1
0
mirror of https://github.com/pirate/ArchiveBox.git synced 2025-08-22 22:25:24 +02:00

Merge branch 'dev' into DanielBatteryStapler-patch-1

This commit is contained in:
Nick Sweeting
2023-08-31 15:20:46 -07:00
committed by GitHub
9 changed files with 2138 additions and 1148 deletions

View File

@@ -159,10 +159,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'--write-thumbnail', '--write-thumbnail',
'--no-call-home', '--no-call-home',
'--write-sub', '--write-sub',
'--all-subs', '--write-auto-subs',
# There are too many of these and youtube
# throttles you with HTTP error 429
#'--write-auto-subs',
'--convert-subs=srt', '--convert-subs=srt',
'--yes-playlist', '--yes-playlist',
'--continue', '--continue',
@@ -175,7 +172,7 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'--ignore-errors', '--ignore-errors',
'--geo-bypass', '--geo-bypass',
'--add-metadata', '--add-metadata',
'--max-filesize={}'.format(c['MEDIA_MAX_SIZE']), '--format=(bv*+ba/b)[filesize<={}][filesize_approx<=?{}]/(bv*+ba/b)'.format(c['MEDIA_MAX_SIZE'], c['MEDIA_MAX_SIZE']),
]}, ]},

View File

@@ -9,6 +9,7 @@ from ..util import (
enforce_types, enforce_types,
is_static_file, is_static_file,
chrome_args, chrome_args,
chrome_cleanup,
) )
from ..config import ( from ..config import (
TIMEOUT, TIMEOUT,
@@ -57,6 +58,7 @@ def save_dom(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
except Exception as err: except Exception as err:
status = 'failed' status = 'failed'
output = err output = err
chrome_cleanup()
finally: finally:
timer.end() timer.end()

View File

@@ -9,6 +9,7 @@ from ..util import (
enforce_types, enforce_types,
is_static_file, is_static_file,
chrome_args, chrome_args,
chrome_cleanup,
) )
from ..config import ( from ..config import (
TIMEOUT, TIMEOUT,
@@ -54,6 +55,7 @@ def save_pdf(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEOUT) ->
except Exception as err: except Exception as err:
status = 'failed' status = 'failed'
output = err output = err
chrome_cleanup()
finally: finally:
timer.end() timer.end()

View File

@@ -9,6 +9,7 @@ from ..util import (
enforce_types, enforce_types,
is_static_file, is_static_file,
chrome_args, chrome_args,
chrome_cleanup,
) )
from ..config import ( from ..config import (
TIMEOUT, TIMEOUT,
@@ -54,6 +55,7 @@ def save_screenshot(link: Link, out_dir: Optional[Path]=None, timeout: int=TIMEO
except Exception as err: except Exception as err:
status = 'failed' status = 'failed'
output = err output = err
chrome_cleanup()
finally: finally:
timer.end() timer.end()

View File

@@ -441,7 +441,7 @@ def log_archive_method_finished(result: "ArchiveResult"):
hints = ( hints = (
' {}{}{}'.format(ANSI['lightyellow'], line.strip(), ANSI['reset']) ' {}{}{}'.format(ANSI['lightyellow'], line.strip(), ANSI['reset'])
for line in hints[:5] if line.strip() for line in list(hints)[:5] if line.strip()
) )

View File

@@ -17,6 +17,8 @@ from requests.exceptions import RequestException, ReadTimeout
from .vendor.base32_crockford import encode as base32_encode # type: ignore from .vendor.base32_crockford import encode as base32_encode # type: ignore
from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
from os.path import lexists
from os import remove as remove_file
try: try:
import chardet import chardet
@@ -272,6 +274,16 @@ def chrome_args(**options) -> List[str]:
return cmd_args return cmd_args
def chrome_cleanup():
"""
Cleans up any state or runtime files that chrome leaves behind when killed by
a timeout or other error
"""
from .config import IN_DOCKER
if IN_DOCKER and lexists("/home/archivebox/.config/chromium/SingletonLock"):
remove_file("/home/archivebox/.config/chromium/SingletonLock")
def ansi_to_html(text): def ansi_to_html(text):
""" """

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env bash #!/bin/bash
DATA_DIR="${DATA_DIR:-/data}" DATA_DIR="${DATA_DIR:-/data}"
ARCHIVEBOX_USER="${ARCHIVEBOX_USER:-archivebox}" ARCHIVEBOX_USER="${ARCHIVEBOX_USER:-archivebox}"

2768
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -7,7 +7,8 @@
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"@postlight/mercury-parser": "git+https://github.com/postlight/mercury-parser.git", "@postlight/mercury-parser": "git+https://github.com/postlight/mercury-parser.git",
"playwright": "^1.37.1",
"readability-extractor": "git+https://github.com/ArchiveBox/readability-extractor.git", "readability-extractor": "git+https://github.com/ArchiveBox/readability-extractor.git",
"single-file": "git+https://github.com/gildas-lormeau/SingleFile.git" "single-file-cli": "^1.0.63"
} }
} }