Skip to content

Commit 9425e01

Browse files
committed
fix: [crawler] debug signal timeout
1 parent 0287a13 commit 9425e01

File tree

1 file changed

+13
-13
lines changed

1 file changed

+13
-13
lines changed

bin/lib/crawlers.py

+13-13
Original file line numberDiff line numberDiff line change
@@ -326,20 +326,20 @@ def extract_favicon_from_html(html, url):
326326
# # # # # # # #
327327

328328
def extract_title_from_html(html, item_id):
329-
signal.alarm(60)
330-
try:
331-
soup = BeautifulSoup(html, 'html.parser')
332-
title = soup.title
329+
# signal.alarm(60)
330+
# try:
331+
soup = BeautifulSoup(html, 'html.parser')
332+
title = soup.title
333+
if title:
334+
title = title.string
333335
if title:
334-
title = title.string
335-
if title:
336-
return str(title)
337-
except TimeoutException:
338-
signal.alarm(0)
339-
logger_crawler.warning(f'BeautifulSoup HTML parser timeout: {item_id}')
340-
else:
341-
signal.alarm(0)
342-
signal.alarm(0)
336+
return str(title)
337+
# except TimeoutException:
338+
# signal.alarm(0)
339+
# logger_crawler.warning(f'BeautifulSoup HTML parser timeout: {item_id}')
340+
# else:
341+
# signal.alarm(0)
342+
# signal.alarm(0)
343343
return ''
344344

345345
def extract_description_from_html(html):

0 commit comments

Comments
 (0)