mirror of
https://github.com/pirate/ArchiveBox.git
synced 2025-08-25 15:31:22 +02:00
add comment about why DOM is preferred over singlefile for readability parsing
This commit is contained in:
@@ -99,6 +99,8 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
|
||||
except (Exception, OSError) as err:
|
||||
status = 'failed'
|
||||
output = err
|
||||
|
||||
# prefer Chrome dom output to singlefile because singlefile often contains huge url(data:image/...base64) strings that make the html too long to parse with readability
|
||||
cmd = [cmd[0], './{dom,singlefile}.html']
|
||||
finally:
|
||||
timer.end()
|
||||
|
@@ -177,6 +177,7 @@
|
||||
}
|
||||
</script>
|
||||
{% endif %}
|
||||
|
||||
<script>
|
||||
$ = django.jQuery;
|
||||
$.fn.reverse = [].reverse;
|
||||
|
Reference in New Issue
Block a user