mirror of
https://github.com/pirate/ArchiveBox.git
synced 2025-08-27 08:14:38 +02:00
add comment about why DOM is preferred over singlefile for readability parsing
This commit is contained in:
@@ -99,6 +99,8 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
|
|||||||
except (Exception, OSError) as err:
|
except (Exception, OSError) as err:
|
||||||
status = 'failed'
|
status = 'failed'
|
||||||
output = err
|
output = err
|
||||||
|
|
||||||
|
# prefer Chrome dom output to singlefile because singlefile often contains huge url(data:image/...base64) strings that make the html too long to parse with readability
|
||||||
cmd = [cmd[0], './{dom,singlefile}.html']
|
cmd = [cmd[0], './{dom,singlefile}.html']
|
||||||
finally:
|
finally:
|
||||||
timer.end()
|
timer.end()
|
||||||
|
@@ -177,6 +177,7 @@
|
|||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
$ = django.jQuery;
|
$ = django.jQuery;
|
||||||
$.fn.reverse = [].reverse;
|
$.fn.reverse = [].reverse;
|
||||||
|
Reference in New Issue
Block a user