1
0
mirror of https://github.com/pirate/ArchiveBox.git synced 2025-09-03 03:13:12 +02:00

dont match closing tags in full text

This commit is contained in:
Nick Sweeting
2019-01-31 19:46:27 -08:00
parent ee796b287b
commit c37941efd1

View File

@@ -43,7 +43,7 @@ base_url = lambda url: without_scheme(url) # uniq base url used to dedupe links
short_ts = lambda ts: ts.split('.')[0]
URL_REGEX = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
URL_REGEX = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))[^<]+'
def check_dependencies():