-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnote.txt
38 lines (33 loc) · 1.2 KB
/
note.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from urllib.request import urlopen
from bs4 import BeautifulSoup
class DataExtractor:
def __init__(self, url):
self.url = url
self.soup = None
self.table = None
def fetch_page(self):
try:
page = urlopen(self.url)
self.soup = BeautifulSoup(page, "html.parser")
except Exception as e:
print(f"An error occurred while fetching the page: {e}")
def extract_table(self, table_index):
if self.soup:
try:
self.table = self.soup.find_all('table')[table_index]
except IndexError:
print(f"Table index {table_index} is out of range.")
else:
print("Soup object is None. Please fetch the page first.")
def get_table_data(self):
if self.table:
rows = self.table.find_all('tr')
table_data = []
for row in rows:
cells = row.find_all(['th', 'td'])
row_data = [cell.get_text(strip=True) for cell in cells]
table_data.append(row_data)
return table_data
else:
print("Table object is None. Please extract the table first.")
return None