diff --git a/archivebox/archive_methods.py b/archivebox/archive_methods.py index db48f41c..fd726de2 100644 --- a/archivebox/archive_methods.py +++ b/archivebox/archive_methods.py @@ -41,6 +41,7 @@ from config import ( GIT_VERSION, YOUTUBEDL_VERSION, ONLY_NEW, + WGET_AUTO_COMPRESSION, ) from util import ( enforce_types, @@ -251,7 +252,6 @@ def fetch_wget(link_dir: str, link: Link, timeout: int=TIMEOUT) -> ArchiveResult '--backup-converted', '--span-hosts', '--no-parent', - '--compression=auto', '-e', 'robots=off', '--restrict-file-names=unix', '--timeout={}'.format(timeout), @@ -260,6 +260,7 @@ def fetch_wget(link_dir: str, link: Link, timeout: int=TIMEOUT) -> ArchiveResult *(('--page-requisites',) if FETCH_WGET_REQUISITES else ()), *(('--user-agent={}'.format(WGET_USER_AGENT),) if WGET_USER_AGENT else ()), *(('--load-cookies', COOKIES_FILE) if COOKIES_FILE else ()), + *(('--compression=auto',) if WGET_AUTO_COMPRESSION else ()), *((() if CHECK_SSL_VALIDITY else ('--no-check-certificate', '--no-hsts'))), link.url, ] diff --git a/archivebox/config.py b/archivebox/config.py index 38a12d4a..1a6b6d6d 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -213,14 +213,16 @@ try: else: FETCH_WGET = FETCH_WARC = False WGET_VERSION = None + WGET_AUTO_COMPRESSION = False if USE_WGET: WGET_VERSION = check_version(WGET_BINARY) + WGET_AUTO_COMPRESSION = not run([WGET_BINARY, "--compression=auto", "--help"], stdout=DEVNULL).returncode WGET_USER_AGENT = WGET_USER_AGENT.format( GIT_SHA=GIT_SHA[:9], WGET_VERSION=WGET_VERSION or '', ) - + ### Make sure git is installed GIT_VERSION = None if FETCH_GIT: