mirror of
https://github.com/pirate/ArchiveBox.git
synced 2025-08-20 05:11:40 +02:00
add timeout to title fetching
This commit is contained in:
@@ -234,7 +234,7 @@ def fetch_page_title(url, default=True):
|
||||
default = url
|
||||
|
||||
try:
|
||||
html_content = urllib.request.urlopen(url).read().decode('utf-8')
|
||||
html_content = urllib.request.urlopen(url, timeout=10).read().decode('utf-8')
|
||||
|
||||
match = re.search('<title>(.*?)</title>', html_content)
|
||||
return match.group(1) if match else default or None
|
||||
|
Reference in New Issue
Block a user