mirror of
https://github.com/pirate/ArchiveBox.git
synced 2025-08-27 00:05:27 +02:00
fix fetch page title default
This commit is contained in:
@@ -212,15 +212,19 @@ def download_url(url):
|
||||
return source_path
|
||||
|
||||
|
||||
def fetch_page_title(url, default=None):
|
||||
def fetch_page_title(url, default=True):
|
||||
"""Attempt to guess a page's title by downloading the html"""
|
||||
if default is True:
|
||||
default = url
|
||||
|
||||
try:
|
||||
html_content = urllib.request.urlopen(url).read().decode('utf-8')
|
||||
|
||||
match = re.search('<title>(.*?)</title>', html_content)
|
||||
return match.group(1) if match else default
|
||||
return match.group(1) if match else default or None
|
||||
except Exception:
|
||||
if default is False:
|
||||
raise
|
||||
return default
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user