Commit 868da3c 1 parent 8692d9b commit 868da3c Copy full SHA for 868da3c
File tree 1 file changed +24
-5
lines changed
1 file changed +24
-5
lines changed Original file line number Diff line number Diff line change 27
27
28
28
from pyfaup .faup import Faup
29
29
30
+
31
+ import signal
32
+
33
+ class TimeoutException (Exception ):
34
+ pass
35
+
36
+ def timeout_handler (signum , frame ):
37
+ raise TimeoutException
38
+
39
+
40
+ signal .signal (signal .SIGALRM , timeout_handler )
41
+
42
+
30
43
# interact with splash_crawler API
31
44
import requests
32
45
requests .packages .urllib3 .disable_warnings (requests .packages .urllib3 .exceptions .InsecureRequestWarning )
@@ -310,12 +323,18 @@ def extract_favicon_from_html(html, url):
310
323
# # # # # # # #
311
324
312
325
def extract_title_from_html (html ):
313
- soup = BeautifulSoup ( html , 'html.parser' )
314
- title = soup . title
315
- if title :
316
- title = title . string
326
+ signal . alarm ( 60 )
327
+ try :
328
+ soup = BeautifulSoup ( html , 'html.parser' )
329
+ title = soup . title
317
330
if title :
318
- return str (title )
331
+ title = title .string
332
+ if title :
333
+ return str (title )
334
+ except TimeoutException :
335
+ pass
336
+ else :
337
+ signal .alarm (0 )
319
338
return ''
320
339
321
340
def extract_description_from_html (html ):
You can’t perform that action at this time.
0 commit comments