mirror of
https://github.com/pirate/ArchiveBox.git
synced 2025-08-21 13:52:30 +02:00
add timeout to title fetching
This commit is contained in:
@@ -234,7 +234,7 @@ def fetch_page_title(url, default=True):
|
|||||||
default = url
|
default = url
|
||||||
|
|
||||||
try:
|
try:
|
||||||
html_content = urllib.request.urlopen(url).read().decode('utf-8')
|
html_content = urllib.request.urlopen(url, timeout=10).read().decode('utf-8')
|
||||||
|
|
||||||
match = re.search('<title>(.*?)</title>', html_content)
|
match = re.search('<title>(.*?)</title>', html_content)
|
||||||
return match.group(1) if match else default or None
|
return match.group(1) if match else default or None
|
||||||
|
Reference in New Issue
Block a user