mirror of
https://github.com/pirate/ArchiveBox.git
synced 2025-08-21 05:41:54 +02:00
ignore robots.txt when using wget
This commit is contained in:
@@ -217,6 +217,7 @@ def fetch_wget(link_dir, link, requisites=FETCH_WGET_REQUISITES, warc=FETCH_WARC
|
|||||||
'--backup-converted',
|
'--backup-converted',
|
||||||
'--span-hosts',
|
'--span-hosts',
|
||||||
'--no-parent',
|
'--no-parent',
|
||||||
|
'-e', 'robots=off',
|
||||||
'--restrict-file-names=unix',
|
'--restrict-file-names=unix',
|
||||||
'--timeout={}'.format(timeout),
|
'--timeout={}'.format(timeout),
|
||||||
*(() if warc else ('--timestamping',)),
|
*(() if warc else ('--timestamping',)),
|
||||||
|
Reference in New Issue
Block a user