mirror of
https://github.com/pirate/ArchiveBox.git
synced 2025-01-17 06:18:25 +01:00
improve config loading of TMP_DIR, LIB_DIR, move to separate files
This commit is contained in:
parent
7a895d9285
commit
cf1ea8f80f
14
Dockerfile
14
Dockerfile
@ -287,22 +287,12 @@ WORKDIR "$DATA_DIR"
|
|||||||
RUN openssl rand -hex 16 > /etc/machine-id \
|
RUN openssl rand -hex 16 > /etc/machine-id \
|
||||||
&& chown -R "$DEFAULT_PUID:$DEFAULT_PGID" "/tmp"
|
&& chown -R "$DEFAULT_PUID:$DEFAULT_PGID" "/tmp"
|
||||||
ENV IN_DOCKER=True \
|
ENV IN_DOCKER=True \
|
||||||
SYSTEM_LIB_DIR=/app/lib \
|
SYSTEM_LIB_DIR=/usr/share/archivebox \
|
||||||
SYSTEM_TMP_DIR=/tmp \
|
SYSTEM_TMP_DIR=/tmp/archivebox \
|
||||||
GOOGLE_API_KEY=no \
|
GOOGLE_API_KEY=no \
|
||||||
GOOGLE_DEFAULT_CLIENT_ID=no \
|
GOOGLE_DEFAULT_CLIENT_ID=no \
|
||||||
GOOGLE_DEFAULT_CLIENT_SECRET=no \
|
GOOGLE_DEFAULT_CLIENT_SECRET=no \
|
||||||
ALLOWED_HOSTS=*
|
ALLOWED_HOSTS=*
|
||||||
## No need to set explicitly, these values will be autodetected by archivebox in docker:
|
|
||||||
# WGET_BINARY="wget" \
|
|
||||||
# YOUTUBEDL_BINARY="yt-dlp" \
|
|
||||||
# CHROME_BINARY="/usr/bin/chromium-browser" \
|
|
||||||
# USE_SINGLEFILE=True \
|
|
||||||
# SINGLEFILE_BINARY="$NODE_MODULES/.bin/single-file" \
|
|
||||||
# USE_READABILITY=True \
|
|
||||||
# READABILITY_BINARY="$NODE_MODULES/.bin/readability-extractor" \
|
|
||||||
# USE_MERCURY=True \
|
|
||||||
# MERCURY_BINARY="$NODE_MODULES/.bin/postlight-parser"
|
|
||||||
|
|
||||||
# Print version for nice docker finish summary
|
# Print version for nice docker finish summary
|
||||||
RUN (echo -e "\n\n[√] Finished Docker build succesfully. Saving build summary in: /VERSION.txt" \
|
RUN (echo -e "\n\n[√] Finished Docker build succesfully. Saving build summary in: /VERSION.txt" \
|
||||||
|
@ -13,7 +13,7 @@ __package__ = 'archivebox'
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
ASCII_LOGO = """
|
ASCII_LOGO = """
|
||||||
@ -25,37 +25,36 @@ ASCII_LOGO = """
|
|||||||
╚═╝ ╚═╝╚═╝ ╚═╝ ╚═════╝╚═╝ ╚═╝╚═╝ ╚═══╝ ╚══════╝ ╚═════╝ ╚═════╝ ╚═╝ ╚═╝
|
╚═╝ ╚═╝╚═╝ ╚═╝ ╚═════╝╚═╝ ╚═╝╚═╝ ╚═══╝ ╚══════╝ ╚═════╝ ╚═════╝ ╚═╝ ╚═╝
|
||||||
"""
|
"""
|
||||||
|
|
||||||
SYSTEM_TMP_DIR = Path(tempfile.gettempdir()) / 'archivebox'
|
# detect ArchiveBox user's UID/GID based on data dir ownership
|
||||||
SYSTEM_TMP_DIR.mkdir(parents=True, exist_ok=True)
|
from archivebox.config.permissions import drop_privileges # noqa
|
||||||
os.environ['SYSTEM_TMP_DIR'] = str(SYSTEM_TMP_DIR)
|
drop_privileges()
|
||||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'core.settings'
|
|
||||||
|
|
||||||
# if we are outside a data dir, cd into an ephemeral tmp dir so that
|
from archivebox.misc.checks import check_not_root, check_io_encoding # noqa
|
||||||
# we can run version/help without polluting cwd with an index.sqlite3
|
check_not_root()
|
||||||
if len(sys.argv) > 1 and sys.argv[1] in ('version', 'help'):
|
check_io_encoding()
|
||||||
current_dir = Path(os.getcwd()).resolve()
|
|
||||||
if not (current_dir / 'index.sqlite3').exists():
|
|
||||||
os.chdir(SYSTEM_TMP_DIR)
|
|
||||||
|
|
||||||
# make sure PACKAGE_DIR is in sys.path so we can import all subfolders
|
# make sure PACKAGE_DIR is in sys.path so we can import all subfolders
|
||||||
# without necessarily waiting for django to load them thorugh INSTALLED_APPS
|
# without necessarily waiting for django to load them thorugh INSTALLED_APPS
|
||||||
PACKAGE_DIR = Path(__file__).resolve().parent
|
PACKAGE_DIR = Path(__file__).resolve().parent
|
||||||
if str(PACKAGE_DIR) not in sys.path:
|
if str(PACKAGE_DIR) not in sys.path:
|
||||||
sys.path.append(str(PACKAGE_DIR))
|
sys.path.append(str(PACKAGE_DIR))
|
||||||
|
os.environ['DJANGO_SETTINGS_MODULE'] = 'core.settings'
|
||||||
|
|
||||||
|
|
||||||
# print('INSTALLING MONKEY PATCHES')
|
# print('INSTALLING MONKEY PATCHES')
|
||||||
from .monkey_patches import * # noqa
|
from archivebox.monkey_patches import * # noqa
|
||||||
# print('DONE INSTALLING MONKEY PATCHES')
|
# print('DONE INSTALLING MONKEY PATCHES')
|
||||||
|
|
||||||
|
|
||||||
# print('LOADING VENDORED LIBRARIES')
|
# print('LOADING VENDORED LIBRARIES')
|
||||||
from .vendor import load_vendored_libs # noqa
|
from archivebox.vendor import load_vendored_libs # noqa
|
||||||
load_vendored_libs()
|
load_vendored_libs()
|
||||||
# print('DONE LOADING VENDORED LIBRARIES')
|
# print('DONE LOADING VENDORED LIBRARIES')
|
||||||
|
|
||||||
|
|
||||||
from .config.constants import CONSTANTS, DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, VERSION # noqa
|
from archivebox.config.constants import CONSTANTS # noqa
|
||||||
|
from archivebox.config.paths import PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa
|
||||||
|
from archivebox.config.version import VERSION # noqa
|
||||||
|
|
||||||
__version__ = VERSION
|
__version__ = VERSION
|
||||||
__author__ = 'Nick Sweeting'
|
__author__ = 'Nick Sweeting'
|
||||||
|
@ -12,12 +12,13 @@ from ninja import NinjaAPI, Swagger
|
|||||||
|
|
||||||
# TODO: explore adding https://eadwincode.github.io/django-ninja-extra/
|
# TODO: explore adding https://eadwincode.github.io/django-ninja-extra/
|
||||||
|
|
||||||
from archivebox.config import SHELL_CONFIG, VERSION
|
from archivebox.config import VERSION
|
||||||
|
from archivebox.config.version import get_COMMIT_HASH
|
||||||
|
|
||||||
from api.auth import API_AUTH_METHODS
|
from api.auth import API_AUTH_METHODS
|
||||||
|
|
||||||
|
|
||||||
COMMIT_HASH = SHELL_CONFIG.COMMIT_HASH or 'unknown'
|
COMMIT_HASH = get_COMMIT_HASH() or 'unknown'
|
||||||
|
|
||||||
html_description=f'''
|
html_description=f'''
|
||||||
<h3>Welcome to your ArchiveBox server's REST API <code>[v1 ALPHA]</code> homepage!</h3>
|
<h3>Welcome to your ArchiveBox server's REST API <code>[v1 ALPHA]</code> homepage!</h3>
|
||||||
|
@ -13,7 +13,7 @@ from ..main import (
|
|||||||
schedule,
|
schedule,
|
||||||
)
|
)
|
||||||
from archivebox.misc.util import ansi_to_html
|
from archivebox.misc.util import ansi_to_html
|
||||||
from archivebox.config import ARCHIVING_CONFIG
|
from archivebox.config.common import ARCHIVING_CONFIG
|
||||||
|
|
||||||
|
|
||||||
from .auth import API_AUTH_METHODS
|
from .auth import API_AUTH_METHODS
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
__package__ = 'archivebox.cli'
|
__package__ = 'archivebox.cli'
|
||||||
__command__ = 'archivebox'
|
__command__ = 'archivebox'
|
||||||
|
|
||||||
|
import os
|
||||||
import sys
|
import sys
|
||||||
import argparse
|
import argparse
|
||||||
import threading
|
import threading
|
||||||
@ -25,6 +26,10 @@ if len(sys.argv) > 1 and sys.argv[1] == 'setup':
|
|||||||
print(':warning: [bold red]DEPRECATED[/bold red] `archivebox setup` is deprecated, use `archivebox install` instead')
|
print(':warning: [bold red]DEPRECATED[/bold red] `archivebox setup` is deprecated, use `archivebox install` instead')
|
||||||
sys.argv[1] = 'install'
|
sys.argv[1] = 'install'
|
||||||
|
|
||||||
|
if '--debug' in sys.argv:
|
||||||
|
os.environ['DEBUG'] = 'True'
|
||||||
|
sys.argv.remove('--debug')
|
||||||
|
|
||||||
|
|
||||||
# def list_subcommands() -> Dict[str, str]:
|
# def list_subcommands() -> Dict[str, str]:
|
||||||
# """find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""
|
# """find and import all valid archivebox_<subcommand>.py files in CLI_DIR"""
|
||||||
@ -50,8 +55,8 @@ SUBCOMMAND_MODULES = {
|
|||||||
|
|
||||||
'init': 'archivebox_init',
|
'init': 'archivebox_init',
|
||||||
'install': 'archivebox_install',
|
'install': 'archivebox_install',
|
||||||
|
##############################################
|
||||||
'config': 'archivebox_config',
|
'config': 'archivebox_config',
|
||||||
|
|
||||||
'add': 'archivebox_add',
|
'add': 'archivebox_add',
|
||||||
'remove': 'archivebox_remove',
|
'remove': 'archivebox_remove',
|
||||||
'update': 'archivebox_update',
|
'update': 'archivebox_update',
|
||||||
@ -63,7 +68,7 @@ SUBCOMMAND_MODULES = {
|
|||||||
'shell': 'archivebox_shell',
|
'shell': 'archivebox_shell',
|
||||||
'manage': 'archivebox_manage',
|
'manage': 'archivebox_manage',
|
||||||
|
|
||||||
'oneshot': 'archivebox_oneshot',
|
# 'oneshot': 'archivebox_oneshot',
|
||||||
}
|
}
|
||||||
|
|
||||||
# every imported command module must have these properties in order to be valid
|
# every imported command module must have these properties in order to be valid
|
||||||
@ -102,11 +107,11 @@ CLI_SUBCOMMANDS = LazySubcommands()
|
|||||||
|
|
||||||
# these common commands will appear sorted before any others for ease-of-use
|
# these common commands will appear sorted before any others for ease-of-use
|
||||||
meta_cmds = ('help', 'version') # dont require valid data folder at all
|
meta_cmds = ('help', 'version') # dont require valid data folder at all
|
||||||
main_cmds = ('init', 'config', 'setup', 'install') # dont require existing db present
|
setup_cmds = ('init', 'setup', 'install') # require valid data folder, but dont require DB present in it yet
|
||||||
archive_cmds = ('add', 'remove', 'update', 'list', 'status') # require existing db present
|
archive_cmds = ('add', 'remove', 'update', 'list', 'status', 'schedule', 'server', 'shell', 'manage') # require valid data folder + existing db present
|
||||||
fake_db = ("oneshot",) # use fake in-memory db
|
fake_db = ("oneshot",) # use fake in-memory db
|
||||||
|
|
||||||
display_first = (*meta_cmds, *main_cmds, *archive_cmds)
|
display_first = (*meta_cmds, *setup_cmds, *archive_cmds)
|
||||||
|
|
||||||
|
|
||||||
IGNORED_BG_THREADS = ('MainThread', 'ThreadPoolExecutor', 'IPythonHistorySavingThread', 'Scheduler') # threads we dont have to wait for before exiting
|
IGNORED_BG_THREADS = ('MainThread', 'ThreadPoolExecutor', 'IPythonHistorySavingThread', 'Scheduler') # threads we dont have to wait for before exiting
|
||||||
@ -157,14 +162,16 @@ def run_subcommand(subcommand: str,
|
|||||||
from archivebox.config.legacy import setup_django
|
from archivebox.config.legacy import setup_django
|
||||||
|
|
||||||
# print('DATA_DIR is', DATA_DIR)
|
# print('DATA_DIR is', DATA_DIR)
|
||||||
# print('pwd is', os.getcwd())
|
# print('pwd is', os.getcwd())
|
||||||
|
|
||||||
cmd_requires_db = subcommand in archive_cmds
|
cmd_requires_db = subcommand in archive_cmds
|
||||||
init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
|
init_pending = '--init' in subcommand_args or '--quick-init' in subcommand_args
|
||||||
|
|
||||||
setup_django(in_memory_db=subcommand in fake_db, check_db=cmd_requires_db and not init_pending)
|
check_db = cmd_requires_db and not init_pending
|
||||||
|
|
||||||
if subcommand not in meta_cmds:
|
setup_django(in_memory_db=subcommand in fake_db, check_db=check_db)
|
||||||
|
|
||||||
|
if subcommand in archive_cmds:
|
||||||
if cmd_requires_db:
|
if cmd_requires_db:
|
||||||
check_migrations()
|
check_migrations()
|
||||||
|
|
||||||
|
@ -9,7 +9,8 @@ import argparse
|
|||||||
from typing import List, Optional, IO
|
from typing import List, Optional, IO
|
||||||
|
|
||||||
from archivebox.misc.util import docstring
|
from archivebox.misc.util import docstring
|
||||||
from archivebox.config import DATA_DIR, ARCHIVING_CONFIG
|
from archivebox.config import DATA_DIR
|
||||||
|
from archivebox.config.common import ARCHIVING_CONFIG
|
||||||
|
|
||||||
from ..main import add
|
from ..main import add
|
||||||
from ..parsers import PARSERS
|
from ..parsers import PARSERS
|
||||||
|
@ -9,7 +9,8 @@ from pathlib import Path
|
|||||||
from typing import Optional, List, IO
|
from typing import Optional, List, IO
|
||||||
|
|
||||||
from archivebox.misc.util import docstring
|
from archivebox.misc.util import docstring
|
||||||
from archivebox.config import DATA_DIR, SERVER_CONFIG
|
from archivebox.config import DATA_DIR
|
||||||
|
from archivebox.config.common import SERVER_CONFIG
|
||||||
from ..logging_util import SmartFormatter, reject_stdin
|
from ..logging_util import SmartFormatter, reject_stdin
|
||||||
from ..main import server
|
from ..main import server
|
||||||
|
|
||||||
|
@ -1,27 +1,9 @@
|
|||||||
__package__ = 'archivebox.config'
|
__package__ = 'archivebox.config'
|
||||||
|
|
||||||
from .constants import CONSTANTS, CONSTANTS_CONFIG, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR, VERSION
|
from .paths import (
|
||||||
from .defaults import (
|
PACKAGE_DIR, # noqa
|
||||||
SHELL_CONFIG,
|
DATA_DIR, # noqa
|
||||||
STORAGE_CONFIG,
|
ARCHIVE_DIR, # noqa
|
||||||
GENERAL_CONFIG,
|
|
||||||
SERVER_CONFIG,
|
|
||||||
ARCHIVING_CONFIG,
|
|
||||||
SEARCH_BACKEND_CONFIG,
|
|
||||||
)
|
)
|
||||||
|
from .constants import CONSTANTS, CONSTANTS_CONFIG # noqa
|
||||||
|
from .version import VERSION # noqa
|
||||||
__all__ = [
|
|
||||||
'CONSTANTS',
|
|
||||||
'PACKAGE_DIR',
|
|
||||||
'DATA_DIR',
|
|
||||||
'ARCHIVE_DIR',
|
|
||||||
'VERSION',
|
|
||||||
'SHELL_CONFIG',
|
|
||||||
'STORAGE_CONFIG',
|
|
||||||
'GENERAL_CONFIG',
|
|
||||||
'SERVER_CONFIG',
|
|
||||||
'ARCHIVING_CONFIG',
|
|
||||||
'SEARCH_BACKEND_CONFIG',
|
|
||||||
'CONSTANTS_CONFIG',
|
|
||||||
]
|
|
||||||
|
@ -8,7 +8,7 @@ from abx.archivebox.base_hook import BaseHook
|
|||||||
|
|
||||||
|
|
||||||
from .constants import CONSTANTS, CONSTANTS_CONFIG, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa
|
from .constants import CONSTANTS, CONSTANTS_CONFIG, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa
|
||||||
from .defaults import (
|
from .common import (
|
||||||
ShellConfig, # noqa: F401
|
ShellConfig, # noqa: F401
|
||||||
StorageConfig, # noqa: F401
|
StorageConfig, # noqa: F401
|
||||||
GeneralConfig, # noqa: F401
|
GeneralConfig, # noqa: F401
|
||||||
|
@ -1,47 +0,0 @@
|
|||||||
# def get_versions_available_on_github(config):
|
|
||||||
# """
|
|
||||||
# returns a dictionary containing the ArchiveBox GitHub release info for
|
|
||||||
# the recommended upgrade version and the currently installed version
|
|
||||||
# """
|
|
||||||
|
|
||||||
# # we only want to perform the (relatively expensive) check for new versions
|
|
||||||
# # when its most relevant, e.g. when the user runs a long-running command
|
|
||||||
# subcommand_run_by_user = sys.argv[3] if len(sys.argv) > 3 else 'help'
|
|
||||||
# long_running_commands = ('add', 'schedule', 'update', 'status', 'server')
|
|
||||||
# if subcommand_run_by_user not in long_running_commands:
|
|
||||||
# return None
|
|
||||||
|
|
||||||
# github_releases_api = "https://api.github.com/repos/ArchiveBox/ArchiveBox/releases"
|
|
||||||
# response = requests.get(github_releases_api)
|
|
||||||
# if response.status_code != 200:
|
|
||||||
# stderr(f'[!] Warning: GitHub API call to check for new ArchiveBox version failed! (status={response.status_code})', color='lightyellow', config=config)
|
|
||||||
# return None
|
|
||||||
# all_releases = response.json()
|
|
||||||
|
|
||||||
# installed_version = parse_version_string(config['VERSION'])
|
|
||||||
|
|
||||||
# # find current version or nearest older version (to link to)
|
|
||||||
# current_version = None
|
|
||||||
# for idx, release in enumerate(all_releases):
|
|
||||||
# release_version = parse_version_string(release['tag_name'])
|
|
||||||
# if release_version <= installed_version:
|
|
||||||
# current_version = release
|
|
||||||
# break
|
|
||||||
|
|
||||||
# current_version = current_version or all_releases[-1]
|
|
||||||
|
|
||||||
# # recommended version is whatever comes after current_version in the release list
|
|
||||||
# # (perhaps too conservative to only recommend upgrading one version at a time, but it's safest)
|
|
||||||
# try:
|
|
||||||
# recommended_version = all_releases[idx+1]
|
|
||||||
# except IndexError:
|
|
||||||
# recommended_version = None
|
|
||||||
|
|
||||||
# return {'recommended_version': recommended_version, 'current_version': current_version}
|
|
||||||
|
|
||||||
# def can_upgrade(config):
|
|
||||||
# if config['VERSIONS_AVAILABLE'] and config['VERSIONS_AVAILABLE']['recommended_version']:
|
|
||||||
# recommended_version = parse_version_string(config['VERSIONS_AVAILABLE']['recommended_version']['tag_name'])
|
|
||||||
# current_version = parse_version_string(config['VERSIONS_AVAILABLE']['current_version']['tag_name'])
|
|
||||||
# return recommended_version > current_version
|
|
||||||
# return False
|
|
@ -1,21 +1,21 @@
|
|||||||
__package__ = 'archivebox.config'
|
__package__ = 'archivebox.config'
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
import sys
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from typing import Dict, Optional
|
from typing import Dict, Optional
|
||||||
from datetime import datetime
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from rich import print
|
from rich import print
|
||||||
from pydantic import Field, field_validator, model_validator, computed_field
|
from pydantic import Field, field_validator, computed_field
|
||||||
from django.utils.crypto import get_random_string
|
from django.utils.crypto import get_random_string
|
||||||
|
|
||||||
from abx.archivebox.base_configset import BaseConfigSet
|
from abx.archivebox.base_configset import BaseConfigSet
|
||||||
|
|
||||||
|
|
||||||
from .constants import CONSTANTS, PACKAGE_DIR
|
from .constants import CONSTANTS
|
||||||
|
from .version import get_COMMIT_HASH, get_BUILD_TIME
|
||||||
|
from .permissions import IN_DOCKER
|
||||||
|
|
||||||
###################### Config ##########################
|
###################### Config ##########################
|
||||||
|
|
||||||
@ -27,14 +27,8 @@ class ShellConfig(BaseConfigSet):
|
|||||||
USE_COLOR: bool = Field(default=lambda c: c.IS_TTY)
|
USE_COLOR: bool = Field(default=lambda c: c.IS_TTY)
|
||||||
SHOW_PROGRESS: bool = Field(default=lambda c: c.IS_TTY)
|
SHOW_PROGRESS: bool = Field(default=lambda c: c.IS_TTY)
|
||||||
|
|
||||||
IN_DOCKER: bool = Field(default=False)
|
IN_DOCKER: bool = Field(default=IN_DOCKER)
|
||||||
IN_QEMU: bool = Field(default=False)
|
IN_QEMU: bool = Field(default=False)
|
||||||
|
|
||||||
USER: str = Field(default=Path('~').expanduser().resolve().name)
|
|
||||||
PUID: int = Field(default=os.getuid())
|
|
||||||
PGID: int = Field(default=os.getgid())
|
|
||||||
|
|
||||||
PYTHON_ENCODING: str = Field(default=(sys.__stdout__ or sys.stdout or sys.__stderr__ or sys.stderr).encoding.upper().replace('UTF8', 'UTF-8'))
|
|
||||||
|
|
||||||
ANSI: Dict[str, str] = Field(default=lambda c: CONSTANTS.DEFAULT_CLI_COLORS if c.USE_COLOR else CONSTANTS.DISABLED_CLI_COLORS)
|
ANSI: Dict[str, str] = Field(default=lambda c: CONSTANTS.DEFAULT_CLI_COLORS if c.USE_COLOR else CONSTANTS.DISABLED_CLI_COLORS)
|
||||||
|
|
||||||
@ -52,63 +46,12 @@ class ShellConfig(BaseConfigSet):
|
|||||||
@computed_field
|
@computed_field
|
||||||
@property
|
@property
|
||||||
def COMMIT_HASH(self) -> Optional[str]:
|
def COMMIT_HASH(self) -> Optional[str]:
|
||||||
try:
|
return get_COMMIT_HASH()
|
||||||
git_dir = PACKAGE_DIR / '../.git'
|
|
||||||
ref = (git_dir / 'HEAD').read_text().strip().split(' ')[-1]
|
|
||||||
commit_hash = git_dir.joinpath(ref).read_text().strip()
|
|
||||||
return commit_hash
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
try:
|
|
||||||
return list((PACKAGE_DIR / '../.git/refs/heads/').glob('*'))[0].read_text().strip()
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
@computed_field
|
@computed_field
|
||||||
@property
|
@property
|
||||||
def BUILD_TIME(self) -> str:
|
def BUILD_TIME(self) -> str:
|
||||||
if self.IN_DOCKER:
|
return get_BUILD_TIME()
|
||||||
docker_build_end_time = Path('/VERSION.txt').read_text().rsplit('BUILD_END_TIME=')[-1].split('\n', 1)[0]
|
|
||||||
return docker_build_end_time
|
|
||||||
|
|
||||||
src_last_modified_unix_timestamp = (PACKAGE_DIR / 'README.md').stat().st_mtime
|
|
||||||
return datetime.fromtimestamp(src_last_modified_unix_timestamp).strftime('%Y-%m-%d %H:%M:%S %s')
|
|
||||||
|
|
||||||
|
|
||||||
@model_validator(mode='after')
|
|
||||||
def validate_not_running_as_root(self):
|
|
||||||
attempted_command = ' '.join(sys.argv[:3])
|
|
||||||
if self.PUID == 0 and attempted_command not in ('setup', 'install'):
|
|
||||||
# stderr('[!] ArchiveBox should never be run as root!', color='red')
|
|
||||||
# stderr(' For more information, see the security overview documentation:')
|
|
||||||
# stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root')
|
|
||||||
print('[red][!] ArchiveBox should never be run as root![/red]', file=sys.stderr)
|
|
||||||
print(' For more information, see the security overview documentation:', file=sys.stderr)
|
|
||||||
print(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root', file=sys.stderr)
|
|
||||||
|
|
||||||
if self.IN_DOCKER:
|
|
||||||
print('[red][!] When using Docker, you must run commands with [green]docker run[/green] instead of [yellow3]docker exec[/yellow3], e.g.:', file=sys.stderr)
|
|
||||||
print(' docker compose run archivebox {attempted_command}', file=sys.stderr)
|
|
||||||
print(f' docker run -it -v $PWD/data:/data archivebox/archivebox {attempted_command}', file=sys.stderr)
|
|
||||||
print(' or:', file=sys.stderr)
|
|
||||||
print(f' docker compose exec --user=archivebox archivebox /bin/bash -c "archivebox {attempted_command}"', file=sys.stderr)
|
|
||||||
print(f' docker exec -it --user=archivebox <container id> /bin/bash -c "archivebox {attempted_command}"', file=sys.stderr)
|
|
||||||
raise SystemExit(2)
|
|
||||||
|
|
||||||
# check python locale
|
|
||||||
if self.PYTHON_ENCODING != 'UTF-8':
|
|
||||||
print(f'[red][X] Your system is running python3 scripts with a bad locale setting: {self.PYTHON_ENCODING} (it should be UTF-8).[/red]', file=sys.stderr)
|
|
||||||
print(' To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)', file=sys.stderr)
|
|
||||||
print(' Or if you\'re using ubuntu/debian, run "dpkg-reconfigure locales"', file=sys.stderr)
|
|
||||||
print('')
|
|
||||||
print(' Confirm that it\'s fixed by opening a new shell and running:', file=sys.stderr)
|
|
||||||
print(' python3 -c "import sys; print(sys.stdout.encoding)" # should output UTF-8', file=sys.stderr)
|
|
||||||
raise SystemExit(2)
|
|
||||||
|
|
||||||
return self
|
|
||||||
|
|
||||||
SHELL_CONFIG = ShellConfig()
|
SHELL_CONFIG = ShellConfig()
|
||||||
|
|
@ -1,115 +0,0 @@
|
|||||||
from pathlib import Path
|
|
||||||
from typing import Optional, Dict, Union, Tuple, Callable, Pattern, Type, Any, List
|
|
||||||
from mypy_extensions import TypedDict
|
|
||||||
|
|
||||||
from benedict import benedict
|
|
||||||
|
|
||||||
SimpleConfigValue = Union[str, bool, int, None, Pattern, Dict[str, Any]]
|
|
||||||
SimpleConfigValueDict = Dict[str, SimpleConfigValue]
|
|
||||||
SimpleConfigValueGetter = Callable[[], SimpleConfigValue]
|
|
||||||
ConfigValue = Union[SimpleConfigValue, SimpleConfigValueDict, SimpleConfigValueGetter]
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class BaseConfig(TypedDict):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class ConfigDict(BaseConfig, benedict, total=False):
|
|
||||||
"""
|
|
||||||
# Regenerate by pasting this quine into `archivebox shell` 🥚
|
|
||||||
from archivebox.config import ConfigDict, CONFIG_DEFAULTS
|
|
||||||
print('class ConfigDict(BaseConfig, total=False):')
|
|
||||||
print(' ' + '"'*3 + ConfigDict.__doc__ + '"'*3)
|
|
||||||
for section, configs in CONFIG_DEFAULTS.items():
|
|
||||||
for key, attrs in configs.items():
|
|
||||||
Type, default = attrs['type'], attrs['default']
|
|
||||||
if default is None:
|
|
||||||
print(f' {key}: Optional[{Type.__name__}]')
|
|
||||||
else:
|
|
||||||
print(f' {key}: {Type.__name__}')
|
|
||||||
print()
|
|
||||||
"""
|
|
||||||
|
|
||||||
IS_TTY: bool
|
|
||||||
USE_COLOR: bool
|
|
||||||
SHOW_PROGRESS: bool
|
|
||||||
IN_DOCKER: bool
|
|
||||||
|
|
||||||
PACKAGE_DIR: Path
|
|
||||||
CONFIG_FILE: Path
|
|
||||||
ONLY_NEW: bool
|
|
||||||
TIMEOUT: int
|
|
||||||
MEDIA_TIMEOUT: int
|
|
||||||
OUTPUT_PERMISSIONS: str
|
|
||||||
RESTRICT_FILE_NAMES: str
|
|
||||||
URL_DENYLIST: str
|
|
||||||
|
|
||||||
SECRET_KEY: Optional[str]
|
|
||||||
BIND_ADDR: str
|
|
||||||
ALLOWED_HOSTS: str
|
|
||||||
DEBUG: bool
|
|
||||||
PUBLIC_INDEX: bool
|
|
||||||
PUBLIC_SNAPSHOTS: bool
|
|
||||||
FOOTER_INFO: str
|
|
||||||
|
|
||||||
SAVE_TITLE: bool
|
|
||||||
SAVE_FAVICON: bool
|
|
||||||
SAVE_WGET: bool
|
|
||||||
SAVE_WGET_REQUISITES: bool
|
|
||||||
SAVE_SINGLEFILE: bool
|
|
||||||
SAVE_READABILITY: bool
|
|
||||||
SAVE_MERCURY: bool
|
|
||||||
SAVE_PDF: bool
|
|
||||||
SAVE_SCREENSHOT: bool
|
|
||||||
SAVE_DOM: bool
|
|
||||||
SAVE_WARC: bool
|
|
||||||
SAVE_GIT: bool
|
|
||||||
SAVE_MEDIA: bool
|
|
||||||
SAVE_ARCHIVE_DOT_ORG: bool
|
|
||||||
|
|
||||||
RESOLUTION: str
|
|
||||||
GIT_DOMAINS: str
|
|
||||||
CHECK_SSL_VALIDITY: bool
|
|
||||||
CURL_USER_AGENT: str
|
|
||||||
WGET_USER_AGENT: str
|
|
||||||
CHROME_USER_AGENT: str
|
|
||||||
COOKIES_FILE: Union[str, Path, None]
|
|
||||||
CHROME_USER_DATA_DIR: Union[str, Path, None]
|
|
||||||
CHROME_TIMEOUT: int
|
|
||||||
CHROME_HEADLESS: bool
|
|
||||||
CHROME_SANDBOX: bool
|
|
||||||
|
|
||||||
USE_CURL: bool
|
|
||||||
USE_WGET: bool
|
|
||||||
USE_SINGLEFILE: bool
|
|
||||||
USE_READABILITY: bool
|
|
||||||
USE_MERCURY: bool
|
|
||||||
USE_GIT: bool
|
|
||||||
USE_CHROME: bool
|
|
||||||
USE_YOUTUBEDL: bool
|
|
||||||
CURL_BINARY: str
|
|
||||||
GIT_BINARY: str
|
|
||||||
WGET_BINARY: str
|
|
||||||
SINGLEFILE_BINARY: str
|
|
||||||
READABILITY_BINARY: str
|
|
||||||
MERCURY_BINARY: str
|
|
||||||
YOUTUBEDL_BINARY: str
|
|
||||||
CHROME_BINARY: Optional[str]
|
|
||||||
|
|
||||||
YOUTUBEDL_ARGS: List[str]
|
|
||||||
WGET_ARGS: List[str]
|
|
||||||
CURL_ARGS: List[str]
|
|
||||||
GIT_ARGS: List[str]
|
|
||||||
TAG_SEPARATOR_PATTERN: str
|
|
||||||
|
|
||||||
|
|
||||||
ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue]
|
|
||||||
ConfigDefaultValue = Union[ConfigValue, ConfigDefaultValueGetter]
|
|
||||||
|
|
||||||
ConfigDefault = TypedDict('ConfigDefault', {
|
|
||||||
'default': ConfigDefaultValue,
|
|
||||||
'type': Optional[Type],
|
|
||||||
'aliases': Optional[Tuple[str, ...]],
|
|
||||||
}, total=False)
|
|
||||||
|
|
||||||
ConfigDefaultDict = Dict[str, ConfigDefault]
|
|
@ -1,118 +1,115 @@
|
|||||||
__package__ = 'archivebox.config'
|
__package__ = 'archivebox.config'
|
||||||
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import platform
|
import platform
|
||||||
import tempfile
|
|
||||||
|
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import importlib.metadata
|
|
||||||
from collections.abc import Mapping
|
from collections.abc import Mapping
|
||||||
|
|
||||||
from benedict import benedict
|
from benedict import benedict
|
||||||
|
|
||||||
from ..misc.logging import DEFAULT_CLI_COLORS
|
from ..misc.logging import DEFAULT_CLI_COLORS
|
||||||
|
|
||||||
|
from .paths import (
|
||||||
|
PACKAGE_DIR,
|
||||||
|
DATA_DIR,
|
||||||
|
ARCHIVE_DIR,
|
||||||
|
get_collection_id,
|
||||||
|
get_LIB_DIR,
|
||||||
|
get_TMP_DIR,
|
||||||
|
)
|
||||||
|
from .permissions import (
|
||||||
|
IS_ROOT,
|
||||||
|
IN_DOCKER,
|
||||||
|
RUNNING_AS_UID,
|
||||||
|
RUNNING_AS_GID,
|
||||||
|
DEFAULT_PUID,
|
||||||
|
DEFAULT_PGID,
|
||||||
|
ARCHIVEBOX_USER,
|
||||||
|
ARCHIVEBOX_GROUP,
|
||||||
|
)
|
||||||
|
from .version import detect_installed_version
|
||||||
|
|
||||||
###################### Config ##########################
|
###################### Config ##########################
|
||||||
|
|
||||||
PACKAGE_DIR: Path = Path(__file__).resolve().parent.parent # archivebox source code dir
|
|
||||||
DATA_DIR: Path = Path(os.getcwd()).resolve() # archivebox user data dir
|
|
||||||
ARCHIVE_DIR: Path = DATA_DIR / 'archive' # archivebox snapshot data dir
|
|
||||||
|
|
||||||
def _detect_installed_version(PACKAGE_DIR: Path):
|
|
||||||
"""Autodetect the installed archivebox version by using pip package metadata, pyproject.toml file, or package.json file"""
|
|
||||||
try:
|
|
||||||
# if in production install, use pip-installed package metadata
|
|
||||||
return importlib.metadata.version(__package__ or 'archivebox').strip()
|
|
||||||
except importlib.metadata.PackageNotFoundError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
try:
|
|
||||||
# if in dev Git repo dir, use pyproject.toml file
|
|
||||||
pyproject_config = (PACKAGE_DIR.parent / 'pyproject.toml').read_text().split('\n')
|
|
||||||
for line in pyproject_config:
|
|
||||||
if line.startswith('version = '):
|
|
||||||
return line.split(' = ', 1)[-1].strip('"').strip()
|
|
||||||
except FileNotFoundError:
|
|
||||||
# building docs, pyproject.toml is not available
|
|
||||||
pass
|
|
||||||
|
|
||||||
# raise Exception('Failed to detect installed archivebox version!')
|
|
||||||
return 'dev'
|
|
||||||
|
|
||||||
VERSION: str = _detect_installed_version(PACKAGE_DIR)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ConstantsDict(Mapping):
|
class ConstantsDict(Mapping):
|
||||||
IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'yes')
|
PACKAGE_DIR: Path = PACKAGE_DIR
|
||||||
OS = platform.system().lower() # darwin, linux, etc.
|
DATA_DIR: Path = DATA_DIR
|
||||||
ARCH = platform.machine().lower() # arm64, x86_64, etc.
|
ARCHIVE_DIR: Path = ARCHIVE_DIR
|
||||||
LIB_DIR_SCOPE = f'{ARCH}-{OS}' + ('-docker' if IN_DOCKER else '')
|
COLLECTION_ID: str = get_collection_id(DATA_DIR)
|
||||||
|
|
||||||
PACKAGE_DIR: Path = PACKAGE_DIR # archivebox source code dir
|
|
||||||
DATA_DIR: Path = DATA_DIR # archivebox user data dir
|
|
||||||
ARCHIVE_DIR: Path = ARCHIVE_DIR # archivebox snapshot data dir
|
|
||||||
VERSION: str = VERSION
|
|
||||||
|
|
||||||
|
# Host system
|
||||||
|
VERSION: str = detect_installed_version(PACKAGE_DIR)
|
||||||
|
OS: str = platform.system().lower() # darwin, linux, etc.
|
||||||
|
ARCH: str = platform.machine().lower() # arm64, x86_64, aarch64, etc.
|
||||||
|
IN_DOCKER: bool = IN_DOCKER
|
||||||
|
|
||||||
|
# Permissions
|
||||||
|
IS_ROOT: bool = IS_ROOT
|
||||||
|
ARCHIVEBOX_USER: int = ARCHIVEBOX_USER
|
||||||
|
ARCHIVEBOX_GROUP: int = ARCHIVEBOX_GROUP
|
||||||
|
RUNNING_AS_UID: int = RUNNING_AS_UID
|
||||||
|
RUNNING_AS_GID: int = RUNNING_AS_GID
|
||||||
|
DEFAULT_PUID: int = DEFAULT_PUID
|
||||||
|
DEFAULT_PGID: int = DEFAULT_PGID
|
||||||
|
|
||||||
|
# Source code dirs
|
||||||
PACKAGE_DIR_NAME: str = PACKAGE_DIR.name
|
PACKAGE_DIR_NAME: str = PACKAGE_DIR.name
|
||||||
TEMPLATES_DIR_NAME: str = 'templates'
|
TEMPLATES_DIR_NAME: str = 'templates'
|
||||||
TEMPLATES_DIR: Path = PACKAGE_DIR / TEMPLATES_DIR_NAME
|
TEMPLATES_DIR: Path = PACKAGE_DIR / TEMPLATES_DIR_NAME
|
||||||
STATIC_DIR: Path = TEMPLATES_DIR / 'static'
|
STATIC_DIR_NAME: str = 'static'
|
||||||
|
STATIC_DIR: Path = TEMPLATES_DIR / STATIC_DIR_NAME
|
||||||
|
|
||||||
|
# Data dirs
|
||||||
|
ARCHIVE_DIR_NAME: str = 'archive'
|
||||||
|
SOURCES_DIR_NAME: str = 'sources'
|
||||||
|
PERSONAS_DIR_NAME: str = 'personas'
|
||||||
|
CRONTABS_DIR_NAME: str = 'crontabs'
|
||||||
|
CACHE_DIR_NAME: str = 'cache'
|
||||||
|
LOGS_DIR_NAME: str = 'logs'
|
||||||
USER_PLUGINS_DIR_NAME: str = 'user_plugins'
|
USER_PLUGINS_DIR_NAME: str = 'user_plugins'
|
||||||
CUSTOM_TEMPLATES_DIR_NAME: str = 'user_templates'
|
CUSTOM_TEMPLATES_DIR_NAME: str = 'user_templates'
|
||||||
|
|
||||||
ARCHIVE_DIR_NAME: str = 'archive'
|
|
||||||
SOURCES_DIR_NAME: str = 'sources'
|
|
||||||
PERSONAS_DIR_NAME: str = 'personas'
|
|
||||||
CRONTABS_DIR_NAME: str = 'crontabs'
|
|
||||||
CACHE_DIR_NAME: str = 'cache'
|
|
||||||
LOGS_DIR_NAME: str = 'logs'
|
|
||||||
LIB_DIR_NAME: str = 'lib'
|
|
||||||
TMP_DIR_NAME: str = 'tmp'
|
|
||||||
|
|
||||||
SYSTEM_TMP_DIR: Path = Path(os.environ['SYSTEM_TMP_DIR']) if 'SYSTEM_TMP_DIR' in os.environ else (Path(tempfile.gettempdir()) / 'archivebox')
|
|
||||||
# DATA_DIR_TMP_DIR: Path = DATA_DIR / TMP_DIR_NAME / machineid.hashed_id('archivebox')[:16] # cant be used because of socket path length restrictions break too often if data dir is in some deep subdir: ocket.error reported AF_UNIX path too long
|
|
||||||
SYSTEM_LIB_DIR: Path = Path(os.environ['SYSTEM_LIB_DIR']) if 'SYSTEM_LIB_DIR' in os.environ else (PACKAGE_DIR / LIB_DIR_NAME)
|
|
||||||
DATA_DIR_LIB_DIR: Path = DATA_DIR / LIB_DIR_NAME / LIB_DIR_SCOPE
|
|
||||||
|
|
||||||
ARCHIVE_DIR: Path = DATA_DIR / ARCHIVE_DIR_NAME
|
ARCHIVE_DIR: Path = DATA_DIR / ARCHIVE_DIR_NAME
|
||||||
SOURCES_DIR: Path = DATA_DIR / SOURCES_DIR_NAME
|
SOURCES_DIR: Path = DATA_DIR / SOURCES_DIR_NAME
|
||||||
PERSONAS_DIR: Path = DATA_DIR / PERSONAS_DIR_NAME
|
PERSONAS_DIR: Path = DATA_DIR / PERSONAS_DIR_NAME
|
||||||
CACHE_DIR: Path = DATA_DIR / CACHE_DIR_NAME
|
|
||||||
LOGS_DIR: Path = DATA_DIR / LOGS_DIR_NAME
|
LOGS_DIR: Path = DATA_DIR / LOGS_DIR_NAME
|
||||||
LIB_DIR: Path = SYSTEM_LIB_DIR if IN_DOCKER else DATA_DIR_LIB_DIR # e.g. /app/lib or ./data/lib/arm64-darwin-docker
|
CACHE_DIR: Path = DATA_DIR / CACHE_DIR_NAME
|
||||||
TMP_DIR: Path = SYSTEM_TMP_DIR
|
|
||||||
CUSTOM_TEMPLATES_DIR: Path = DATA_DIR / CUSTOM_TEMPLATES_DIR_NAME
|
CUSTOM_TEMPLATES_DIR: Path = DATA_DIR / CUSTOM_TEMPLATES_DIR_NAME
|
||||||
USER_PLUGINS_DIR: Path = DATA_DIR / USER_PLUGINS_DIR_NAME
|
USER_PLUGINS_DIR: Path = DATA_DIR / USER_PLUGINS_DIR_NAME
|
||||||
|
|
||||||
|
# Data dir files
|
||||||
|
CONFIG_FILENAME: str = 'ArchiveBox.conf'
|
||||||
|
SQL_INDEX_FILENAME: str = 'index.sqlite3'
|
||||||
|
QUEUE_DATABASE_FILENAME: str = 'queue.sqlite3'
|
||||||
|
CONFIG_FILE: Path = DATA_DIR / CONFIG_FILENAME
|
||||||
|
DATABASE_FILE: Path = DATA_DIR / SQL_INDEX_FILENAME
|
||||||
|
QUEUE_DATABASE_FILE: Path = DATA_DIR / QUEUE_DATABASE_FILENAME
|
||||||
|
|
||||||
|
JSON_INDEX_FILENAME: str = 'index.json'
|
||||||
|
HTML_INDEX_FILENAME: str = 'index.html'
|
||||||
|
ROBOTS_TXT_FILENAME: str = 'robots.txt'
|
||||||
|
FAVICON_FILENAME: str = 'favicon.ico'
|
||||||
|
|
||||||
|
# Runtime dirs
|
||||||
|
TMP_DIR_NAME: str = 'tmp'
|
||||||
|
TMP_DIR: Path = get_TMP_DIR()
|
||||||
|
LIB_DIR_NAME: str = 'lib'
|
||||||
|
LIB_DIR: Path = get_LIB_DIR()
|
||||||
LIB_PIP_DIR: Path = LIB_DIR / 'pip'
|
LIB_PIP_DIR: Path = LIB_DIR / 'pip'
|
||||||
LIB_NPM_DIR: Path = LIB_DIR / 'npm'
|
LIB_NPM_DIR: Path = LIB_DIR / 'npm'
|
||||||
LIB_BROWSERS_DIR: Path = LIB_DIR / 'browsers'
|
LIB_BROWSERS_DIR: Path = LIB_DIR / 'browsers'
|
||||||
LIB_BIN_DIR: Path = LIB_DIR / 'bin'
|
LIB_BIN_DIR: Path = LIB_DIR / 'bin'
|
||||||
BIN_DIR: Path = LIB_BIN_DIR
|
BIN_DIR: Path = LIB_BIN_DIR
|
||||||
|
|
||||||
CONFIG_FILENAME: str = 'ArchiveBox.conf'
|
# Config constants
|
||||||
SQL_INDEX_FILENAME: str = 'index.sqlite3'
|
TIMEZONE: str = 'UTC'
|
||||||
QUEUE_DATABASE_FILENAME: str = 'queue.sqlite3'
|
DEFAULT_CLI_COLORS: Dict[str, str] = DEFAULT_CLI_COLORS
|
||||||
|
DISABLED_CLI_COLORS: Dict[str, str] = benedict({k: '' for k in DEFAULT_CLI_COLORS})
|
||||||
|
|
||||||
CONFIG_FILE: Path = DATA_DIR / CONFIG_FILENAME
|
ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE
|
||||||
DATABASE_FILE: Path = DATA_DIR / SQL_INDEX_FILENAME
|
|
||||||
QUEUE_DATABASE_FILE: Path = DATA_DIR / QUEUE_DATABASE_FILENAME
|
|
||||||
|
|
||||||
JSON_INDEX_FILENAME: str = 'index.json'
|
|
||||||
HTML_INDEX_FILENAME: str = 'index.html'
|
|
||||||
ROBOTS_TXT_FILENAME: str = 'robots.txt'
|
|
||||||
FAVICON_FILENAME: str = 'favicon.ico'
|
|
||||||
|
|
||||||
TIMEZONE: str = 'UTC'
|
|
||||||
DEFAULT_CLI_COLORS: Dict[str, str] = DEFAULT_CLI_COLORS
|
|
||||||
DISABLED_CLI_COLORS: Dict[str, str] = benedict({k: '' for k in DEFAULT_CLI_COLORS})
|
|
||||||
|
|
||||||
ALLOWDENYLIST_REGEX_FLAGS: int = re.IGNORECASE | re.UNICODE | re.MULTILINE
|
|
||||||
|
|
||||||
STATICFILE_EXTENSIONS: frozenset[str] = frozenset((
|
STATICFILE_EXTENSIONS: frozenset[str] = frozenset((
|
||||||
# 99.999% of the time, URLs ending in these extensions are static files
|
# 99.999% of the time, URLs ending in these extensions are static files
|
||||||
@ -136,17 +133,6 @@ class ConstantsDict(Mapping):
|
|||||||
# html, htm, shtml, xhtml, xml, aspx, php, cgi
|
# html, htm, shtml, xhtml, xml, aspx, php, cgi
|
||||||
))
|
))
|
||||||
|
|
||||||
INGORED_PATHS: frozenset[str] = frozenset((
|
|
||||||
".git",
|
|
||||||
".svn",
|
|
||||||
".DS_Store",
|
|
||||||
".gitignore",
|
|
||||||
"lost+found",
|
|
||||||
".DS_Store",
|
|
||||||
".env",
|
|
||||||
"Dockerfile",
|
|
||||||
".ArchiveBox.conf.bak",
|
|
||||||
))
|
|
||||||
PIP_RELATED_NAMES: frozenset[str] = frozenset((
|
PIP_RELATED_NAMES: frozenset[str] = frozenset((
|
||||||
".venv",
|
".venv",
|
||||||
"venv",
|
"venv",
|
||||||
@ -160,7 +146,15 @@ class ConstantsDict(Mapping):
|
|||||||
"yarn.lock",
|
"yarn.lock",
|
||||||
))
|
))
|
||||||
|
|
||||||
DATA_DIR_NAMES: frozenset[str] = frozenset((
|
# When initializing archivebox in a new directory, we check to make sure the dir is
|
||||||
|
# actually empty so that we dont clobber someone's home directory or desktop by accident.
|
||||||
|
# These files are exceptions to the is_empty check when we're trying to init a new dir,
|
||||||
|
# as they could be from a previous archivebox version, system artifacts, dependencies, etc.
|
||||||
|
ALLOWED_IN_DATA_DIR: frozenset[str] = frozenset((
|
||||||
|
*PIP_RELATED_NAMES,
|
||||||
|
*NPM_RELATED_NAMES,
|
||||||
|
|
||||||
|
### Dirs:
|
||||||
ARCHIVE_DIR_NAME,
|
ARCHIVE_DIR_NAME,
|
||||||
SOURCES_DIR_NAME,
|
SOURCES_DIR_NAME,
|
||||||
LOGS_DIR_NAME,
|
LOGS_DIR_NAME,
|
||||||
@ -171,9 +165,12 @@ class ConstantsDict(Mapping):
|
|||||||
CUSTOM_TEMPLATES_DIR_NAME,
|
CUSTOM_TEMPLATES_DIR_NAME,
|
||||||
USER_PLUGINS_DIR_NAME,
|
USER_PLUGINS_DIR_NAME,
|
||||||
CRONTABS_DIR_NAME,
|
CRONTABS_DIR_NAME,
|
||||||
))
|
"static", # created by old static exports <v0.6.0
|
||||||
DATA_DIRS: frozenset[Path] = frozenset(DATA_DIR / dirname for dirname in DATA_DIR_NAMES)
|
"sonic", # created by docker bind mount / sonic FTS process
|
||||||
DATA_FILE_NAMES: frozenset[str] = frozenset((
|
".git",
|
||||||
|
".svn",
|
||||||
|
|
||||||
|
### Files:
|
||||||
CONFIG_FILENAME,
|
CONFIG_FILENAME,
|
||||||
SQL_INDEX_FILENAME,
|
SQL_INDEX_FILENAME,
|
||||||
f"{SQL_INDEX_FILENAME}-wal",
|
f"{SQL_INDEX_FILENAME}-wal",
|
||||||
@ -188,43 +185,37 @@ class ConstantsDict(Mapping):
|
|||||||
FAVICON_FILENAME,
|
FAVICON_FILENAME,
|
||||||
CONFIG_FILENAME,
|
CONFIG_FILENAME,
|
||||||
f"{CONFIG_FILENAME}.bak",
|
f"{CONFIG_FILENAME}.bak",
|
||||||
|
f".{CONFIG_FILENAME}.bak",
|
||||||
"static_index.json",
|
"static_index.json",
|
||||||
))
|
".DS_Store",
|
||||||
|
".gitignore",
|
||||||
# When initializing archivebox in a new directory, we check to make sure the dir is
|
"lost+found",
|
||||||
# actually empty so that we dont clobber someone's home directory or desktop by accident.
|
".DS_Store",
|
||||||
# These files are exceptions to the is_empty check when we're trying to init a new dir,
|
".env",
|
||||||
# as they could be from a previous archivebox version, system artifacts, dependencies, etc.
|
".collection_id",
|
||||||
ALLOWED_IN_DATA_DIR: frozenset[str] = frozenset((
|
"Dockerfile",
|
||||||
*INGORED_PATHS,
|
|
||||||
*PIP_RELATED_NAMES,
|
|
||||||
*NPM_RELATED_NAMES,
|
|
||||||
*DATA_DIR_NAMES,
|
|
||||||
*DATA_FILE_NAMES,
|
|
||||||
"static", # created by old static exports <v0.6.0
|
|
||||||
"sonic", # created by docker bind mount
|
|
||||||
))
|
))
|
||||||
|
|
||||||
CODE_LOCATIONS = benedict({
|
CODE_LOCATIONS = benedict({
|
||||||
'PACKAGE_DIR': {
|
'PACKAGE_DIR': {
|
||||||
'path': (PACKAGE_DIR).resolve(),
|
'path': (PACKAGE_DIR).resolve(),
|
||||||
'enabled': True,
|
'enabled': True,
|
||||||
'is_valid': (PACKAGE_DIR / '__main__.py').exists(),
|
'is_valid': (PACKAGE_DIR / '__main__.py').exists(), # read + list
|
||||||
},
|
},
|
||||||
'TEMPLATES_DIR': {
|
'TEMPLATES_DIR': {
|
||||||
'path': TEMPLATES_DIR.resolve(),
|
'path': TEMPLATES_DIR.resolve(),
|
||||||
'enabled': True,
|
'enabled': True,
|
||||||
'is_valid': STATIC_DIR.exists(),
|
'is_valid': STATIC_DIR.exists() and os.access(STATIC_DIR, os.R_OK) and os.access(STATIC_DIR, os.X_OK), # read + list
|
||||||
},
|
},
|
||||||
'LIB_DIR': {
|
'LIB_DIR': {
|
||||||
'path': LIB_DIR.resolve(),
|
'path': LIB_DIR.resolve(),
|
||||||
'enabled': True,
|
'enabled': True,
|
||||||
'is_valid': LIB_DIR.is_dir(),
|
'is_valid': LIB_DIR.is_dir() and os.access(LIB_DIR, os.R_OK) and os.access(LIB_DIR, os.X_OK) and os.access(LIB_DIR, os.W_OK), # read + write
|
||||||
},
|
},
|
||||||
'TMP_DIR': {
|
'TMP_DIR': {
|
||||||
'path': TMP_DIR.resolve(),
|
'path': TMP_DIR.resolve(),
|
||||||
'enabled': True,
|
'enabled': True,
|
||||||
'is_valid': TMP_DIR.is_dir(),
|
'is_valid': TMP_DIR.is_dir() and os.access(TMP_DIR, os.R_OK) and os.access(TMP_DIR, os.X_OK) and os.access(TMP_DIR, os.W_OK), # read + write
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -232,61 +223,61 @@ class ConstantsDict(Mapping):
|
|||||||
"DATA_DIR": {
|
"DATA_DIR": {
|
||||||
"path": DATA_DIR.resolve(),
|
"path": DATA_DIR.resolve(),
|
||||||
"enabled": True,
|
"enabled": True,
|
||||||
"is_valid": DATABASE_FILE.exists(),
|
"is_valid": DATABASE_FILE.exists() and os.access(DATA_DIR, os.R_OK) and os.access(DATA_DIR, os.W_OK) and os.access(DATA_DIR, os.X_OK),
|
||||||
"is_mount": os.path.ismount(DATA_DIR.resolve()),
|
"is_mount": os.path.ismount(DATA_DIR.resolve()),
|
||||||
},
|
},
|
||||||
"CONFIG_FILE": {
|
"CONFIG_FILE": {
|
||||||
"path": CONFIG_FILE.resolve(),
|
"path": CONFIG_FILE.resolve(),
|
||||||
"enabled": True,
|
"enabled": True,
|
||||||
"is_valid": CONFIG_FILE.exists(),
|
"is_valid": CONFIG_FILE.exists() and os.access(CONFIG_FILE, os.W_OK),
|
||||||
},
|
},
|
||||||
"SQL_INDEX": {
|
"SQL_INDEX": {
|
||||||
"path": DATABASE_FILE.resolve(),
|
"path": DATABASE_FILE.resolve(),
|
||||||
"enabled": True,
|
"enabled": True,
|
||||||
"is_valid": DATABASE_FILE.exists(),
|
"is_valid": DATABASE_FILE.exists() and os.access(DATABASE_FILE, os.R_OK) and os.access(DATABASE_FILE, os.W_OK),
|
||||||
"is_mount": os.path.ismount(DATABASE_FILE.resolve()),
|
"is_mount": os.path.ismount(DATABASE_FILE.resolve()),
|
||||||
},
|
},
|
||||||
"QUEUE_DATABASE": {
|
"QUEUE_DATABASE": {
|
||||||
"path": QUEUE_DATABASE_FILE.resolve(),
|
"path": QUEUE_DATABASE_FILE.resolve(),
|
||||||
"enabled": True,
|
"enabled": True,
|
||||||
"is_valid": QUEUE_DATABASE_FILE.exists(),
|
"is_valid": QUEUE_DATABASE_FILE.exists() and os.access(QUEUE_DATABASE_FILE, os.R_OK) and os.access(QUEUE_DATABASE_FILE, os.W_OK),
|
||||||
"is_mount": os.path.ismount(QUEUE_DATABASE_FILE.resolve()),
|
"is_mount": os.path.ismount(QUEUE_DATABASE_FILE.resolve()),
|
||||||
},
|
},
|
||||||
"ARCHIVE_DIR": {
|
"ARCHIVE_DIR": {
|
||||||
"path": ARCHIVE_DIR.resolve(),
|
"path": ARCHIVE_DIR.resolve(),
|
||||||
"enabled": True,
|
"enabled": True,
|
||||||
"is_valid": ARCHIVE_DIR.exists(),
|
"is_valid": ARCHIVE_DIR.exists() and os.access(ARCHIVE_DIR, os.R_OK) and os.access(ARCHIVE_DIR, os.W_OK) and os.access(ARCHIVE_DIR, os.X_OK),
|
||||||
"is_mount": os.path.ismount(ARCHIVE_DIR.resolve()),
|
"is_mount": os.path.ismount(ARCHIVE_DIR.resolve()),
|
||||||
},
|
},
|
||||||
"SOURCES_DIR": {
|
"SOURCES_DIR": {
|
||||||
"path": SOURCES_DIR.resolve(),
|
"path": SOURCES_DIR.resolve(),
|
||||||
"enabled": True,
|
"enabled": True,
|
||||||
"is_valid": SOURCES_DIR.exists(),
|
"is_valid": SOURCES_DIR.exists() and os.access(SOURCES_DIR, os.R_OK) and os.access(SOURCES_DIR, os.W_OK) and os.access(SOURCES_DIR, os.X_OK),
|
||||||
},
|
},
|
||||||
"LOGS_DIR": {
|
"LOGS_DIR": {
|
||||||
"path": LOGS_DIR.resolve(),
|
"path": LOGS_DIR.resolve(),
|
||||||
"enabled": True,
|
"enabled": True,
|
||||||
"is_valid": LOGS_DIR.is_dir(),
|
"is_valid": LOGS_DIR.is_dir() and os.access(LOGS_DIR, os.R_OK) and os.access(LOGS_DIR, os.W_OK) and os.access(LOGS_DIR, os.X_OK), # read + write
|
||||||
},
|
},
|
||||||
# "CACHE_DIR": {
|
# "CACHE_DIR": {
|
||||||
# "path": CACHE_DIR.resolve(),
|
# "path": CACHE_DIR.resolve(),
|
||||||
# "enabled": True,
|
# "enabled": True,
|
||||||
# "is_valid": CACHE_DIR.is_dir(),
|
# "is_valid": CACHE_DIR.is_dir() and os.access(CACHE_DIR, os.R_OK) and os.access(CACHE_DIR, os.W_OK) and os.access(CACHE_DIR, os.X_OK), # read + write
|
||||||
# },
|
# },
|
||||||
"PERSONAS_DIR": {
|
"PERSONAS_DIR": {
|
||||||
"path": PERSONAS_DIR.resolve(),
|
"path": PERSONAS_DIR.resolve(),
|
||||||
"enabled": PERSONAS_DIR.exists(),
|
"enabled": PERSONAS_DIR.exists(),
|
||||||
"is_valid": PERSONAS_DIR.is_dir(),
|
"is_valid": PERSONAS_DIR.is_dir() and os.access(PERSONAS_DIR, os.R_OK) and os.access(PERSONAS_DIR, os.W_OK) and os.access(PERSONAS_DIR, os.X_OK), # read + write
|
||||||
},
|
},
|
||||||
'CUSTOM_TEMPLATES_DIR': {
|
'CUSTOM_TEMPLATES_DIR': {
|
||||||
'path': CUSTOM_TEMPLATES_DIR.resolve(),
|
'path': CUSTOM_TEMPLATES_DIR.resolve(),
|
||||||
'enabled': CUSTOM_TEMPLATES_DIR.exists(),
|
'enabled': CUSTOM_TEMPLATES_DIR.exists(),
|
||||||
'is_valid': CUSTOM_TEMPLATES_DIR.is_dir(),
|
'is_valid': CUSTOM_TEMPLATES_DIR.is_dir() and os.access(CUSTOM_TEMPLATES_DIR, os.R_OK) and os.access(CUSTOM_TEMPLATES_DIR, os.X_OK), # read
|
||||||
},
|
},
|
||||||
'USER_PLUGINS_DIR': {
|
'USER_PLUGINS_DIR': {
|
||||||
'path': USER_PLUGINS_DIR.resolve(),
|
'path': USER_PLUGINS_DIR.resolve(),
|
||||||
'enabled': USER_PLUGINS_DIR.exists(),
|
'enabled': USER_PLUGINS_DIR.exists(),
|
||||||
'is_valid': USER_PLUGINS_DIR.is_dir(),
|
'is_valid': USER_PLUGINS_DIR.is_dir() and os.access(USER_PLUGINS_DIR, os.R_OK) and os.access(USER_PLUGINS_DIR, os.X_OK), # read
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -314,5 +305,6 @@ globals().update(CONSTANTS)
|
|||||||
|
|
||||||
|
|
||||||
# these need to always exist as we need them to run almost everything
|
# these need to always exist as we need them to run almost everything
|
||||||
|
# TODO: figure out a better time to make these than import-time
|
||||||
CONSTANTS.LIB_DIR.mkdir(parents=True, exist_ok=True)
|
CONSTANTS.LIB_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
CONSTANTS.TMP_DIR.mkdir(parents=True, exist_ok=True)
|
CONSTANTS.TMP_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
@ -22,41 +22,34 @@ Documentation:
|
|||||||
__package__ = 'archivebox.config'
|
__package__ = 'archivebox.config'
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import io
|
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import json
|
import json
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from hashlib import md5
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import Optional, Type, Tuple, Dict
|
from typing import Optional, Type, Tuple, Dict, Any
|
||||||
from subprocess import run, PIPE, DEVNULL, STDOUT, TimeoutExpired
|
from subprocess import run, DEVNULL
|
||||||
from configparser import ConfigParser
|
from configparser import ConfigParser
|
||||||
|
|
||||||
from rich.progress import Progress
|
from rich.progress import Progress
|
||||||
from rich.console import Console
|
from rich.console import Console
|
||||||
from benedict import benedict
|
from benedict import benedict
|
||||||
from pydantic_pkgr import SemVer
|
|
||||||
|
|
||||||
import django
|
import django
|
||||||
from django.db.backends.sqlite3.base import Database as sqlite3
|
from django.db.backends.sqlite3.base import Database as sqlite3
|
||||||
|
|
||||||
|
|
||||||
from .constants import CONSTANTS, TIMEZONE
|
from .constants import CONSTANTS
|
||||||
from .constants import *
|
from .constants import *
|
||||||
from .config_stubs import (
|
|
||||||
ConfigValue,
|
|
||||||
ConfigDefaultValue,
|
|
||||||
ConfigDefaultDict,
|
|
||||||
)
|
|
||||||
from ..misc.logging import (
|
from ..misc.logging import (
|
||||||
stderr,
|
stderr,
|
||||||
hint, # noqa
|
hint, # noqa
|
||||||
)
|
)
|
||||||
|
|
||||||
from .defaults import SHELL_CONFIG, GENERAL_CONFIG, ARCHIVING_CONFIG, SERVER_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG
|
from .common import SHELL_CONFIG, GENERAL_CONFIG, ARCHIVING_CONFIG, SERVER_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG
|
||||||
from archivebox.plugins_auth.ldap.apps import LDAP_CONFIG
|
from archivebox.plugins_auth.ldap.apps import LDAP_CONFIG
|
||||||
from archivebox.plugins_extractor.favicon.apps import FAVICON_CONFIG
|
from archivebox.plugins_extractor.favicon.apps import FAVICON_CONFIG
|
||||||
from archivebox.plugins_extractor.wget.apps import WGET_CONFIG
|
from archivebox.plugins_extractor.wget.apps import WGET_CONFIG
|
||||||
@ -67,7 +60,7 @@ LDAP = LDAP_CONFIG.LDAP_ENABLED
|
|||||||
|
|
||||||
############################### Config Schema ##################################
|
############################### Config Schema ##################################
|
||||||
|
|
||||||
CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
|
CONFIG_SCHEMA: Dict[str, Dict[str, Any]] = {
|
||||||
'SHELL_CONFIG': SHELL_CONFIG.as_legacy_config_schema(),
|
'SHELL_CONFIG': SHELL_CONFIG.as_legacy_config_schema(),
|
||||||
|
|
||||||
'SERVER_CONFIG': SERVER_CONFIG.as_legacy_config_schema(),
|
'SERVER_CONFIG': SERVER_CONFIG.as_legacy_config_schema(),
|
||||||
@ -194,7 +187,7 @@ def get_real_name(key: str) -> str:
|
|||||||
|
|
||||||
# These are derived/computed values calculated *after* all user-provided config values are ingested
|
# These are derived/computed values calculated *after* all user-provided config values are ingested
|
||||||
# they appear in `archivebox config` output and are intended to be read-only for the user
|
# they appear in `archivebox config` output and are intended to be read-only for the user
|
||||||
DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
DYNAMIC_CONFIG_SCHEMA: Dict[str, Any] = {
|
||||||
'URL_DENYLIST_PTN': {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS)},
|
'URL_DENYLIST_PTN': {'default': lambda c: c['URL_DENYLIST'] and re.compile(c['URL_DENYLIST'] or '', CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS)},
|
||||||
'URL_ALLOWLIST_PTN': {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS)},
|
'URL_ALLOWLIST_PTN': {'default': lambda c: c['URL_ALLOWLIST'] and re.compile(c['URL_ALLOWLIST'] or '', CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS)},
|
||||||
|
|
||||||
@ -209,12 +202,12 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
|||||||
|
|
||||||
|
|
||||||
def load_config_val(key: str,
|
def load_config_val(key: str,
|
||||||
default: ConfigDefaultValue=None,
|
default: Any=None,
|
||||||
type: Optional[Type]=None,
|
type: Optional[Type]=None,
|
||||||
aliases: Optional[Tuple[str, ...]]=None,
|
aliases: Optional[Tuple[str, ...]]=None,
|
||||||
config: Optional[benedict]=None,
|
config: Optional[benedict]=None,
|
||||||
env_vars: Optional[os._Environ]=None,
|
env_vars: Optional[os._Environ]=None,
|
||||||
config_file_vars: Optional[Dict[str, str]]=None) -> ConfigValue:
|
config_file_vars: Optional[Dict[str, str]]=None) -> Any:
|
||||||
"""parse bool, int, and str key=value pairs from env"""
|
"""parse bool, int, and str key=value pairs from env"""
|
||||||
|
|
||||||
assert isinstance(config, dict)
|
assert isinstance(config, dict)
|
||||||
@ -372,7 +365,7 @@ def write_config_file(config: Dict[str, str], out_dir: str | None=CONSTANTS.DATA
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
def load_config(defaults: ConfigDefaultDict,
|
def load_config(defaults: Dict[str, Any],
|
||||||
config: Optional[benedict]=None,
|
config: Optional[benedict]=None,
|
||||||
out_dir: Optional[str]=None,
|
out_dir: Optional[str]=None,
|
||||||
env_vars: Optional[os._Environ]=None,
|
env_vars: Optional[os._Environ]=None,
|
||||||
@ -505,7 +498,7 @@ def load_all_config():
|
|||||||
# add all final config values in CONFIG to globals in this file
|
# add all final config values in CONFIG to globals in this file
|
||||||
CONFIG: benedict = load_all_config()
|
CONFIG: benedict = load_all_config()
|
||||||
globals().update(CONFIG)
|
globals().update(CONFIG)
|
||||||
# this lets us do: from .config import DEBUG, MEDIA_TIMEOUT, ...
|
|
||||||
|
|
||||||
# print("FINISHED LOADING CONFIG USING SCHEMAS + FILE + ENV")
|
# print("FINISHED LOADING CONFIG USING SCHEMAS + FILE + ENV")
|
||||||
|
|
||||||
@ -521,8 +514,8 @@ globals().update(CONFIG)
|
|||||||
|
|
||||||
|
|
||||||
# Set timezone to UTC and umask to OUTPUT_PERMISSIONS
|
# Set timezone to UTC and umask to OUTPUT_PERMISSIONS
|
||||||
assert TIMEZONE == 'UTC', f'The server timezone should always be set to UTC (got {TIMEZONE})' # noqa: F821
|
assert CONSTANTS.TIMEZONE == 'UTC', f'The server timezone should always be set to UTC (got {CONSTANTS.TIMEZONE})' # noqa: F821
|
||||||
os.environ["TZ"] = TIMEZONE # noqa: F821
|
os.environ["TZ"] = CONSTANTS.TIMEZONE # noqa: F821
|
||||||
os.umask(0o777 - int(STORAGE_CONFIG.DIR_OUTPUT_PERMISSIONS, base=8)) # noqa: F821
|
os.umask(0o777 - int(STORAGE_CONFIG.DIR_OUTPUT_PERMISSIONS, base=8)) # noqa: F821
|
||||||
|
|
||||||
########################### Config Validity Checkers ###########################
|
########################### Config Validity Checkers ###########################
|
||||||
@ -533,7 +526,8 @@ if not SHELL_CONFIG.SHOW_PROGRESS:
|
|||||||
os.environ['TERM'] = 'dumb'
|
os.environ['TERM'] = 'dumb'
|
||||||
|
|
||||||
# recreate rich console obj based on new config values
|
# recreate rich console obj based on new config values
|
||||||
CONSOLE = Console()
|
STDOUT = CONSOLE = Console()
|
||||||
|
STDERR = Console(stderr=True)
|
||||||
from ..misc import logging
|
from ..misc import logging
|
||||||
logging.CONSOLE = CONSOLE
|
logging.CONSOLE = CONSOLE
|
||||||
|
|
||||||
@ -541,11 +535,11 @@ logging.CONSOLE = CONSOLE
|
|||||||
INITIAL_STARTUP_PROGRESS = None
|
INITIAL_STARTUP_PROGRESS = None
|
||||||
INITIAL_STARTUP_PROGRESS_TASK = 0
|
INITIAL_STARTUP_PROGRESS_TASK = 0
|
||||||
|
|
||||||
def bump_startup_progress_bar():
|
def bump_startup_progress_bar(advance=1):
|
||||||
global INITIAL_STARTUP_PROGRESS
|
global INITIAL_STARTUP_PROGRESS
|
||||||
global INITIAL_STARTUP_PROGRESS_TASK
|
global INITIAL_STARTUP_PROGRESS_TASK
|
||||||
if INITIAL_STARTUP_PROGRESS:
|
if INITIAL_STARTUP_PROGRESS:
|
||||||
INITIAL_STARTUP_PROGRESS.update(INITIAL_STARTUP_PROGRESS_TASK, advance=1) # type: ignore
|
INITIAL_STARTUP_PROGRESS.update(INITIAL_STARTUP_PROGRESS_TASK, advance=advance) # type: ignore
|
||||||
|
|
||||||
|
|
||||||
def setup_django_minimal():
|
def setup_django_minimal():
|
||||||
@ -559,6 +553,8 @@ DJANGO_SET_UP = False
|
|||||||
|
|
||||||
|
|
||||||
def setup_django(out_dir: Path | None=None, check_db=False, config: benedict=CONFIG, in_memory_db=False) -> None:
|
def setup_django(out_dir: Path | None=None, check_db=False, config: benedict=CONFIG, in_memory_db=False) -> None:
|
||||||
|
from rich.panel import Panel
|
||||||
|
|
||||||
global INITIAL_STARTUP_PROGRESS
|
global INITIAL_STARTUP_PROGRESS
|
||||||
global INITIAL_STARTUP_PROGRESS_TASK
|
global INITIAL_STARTUP_PROGRESS_TASK
|
||||||
global DJANGO_SET_UP
|
global DJANGO_SET_UP
|
||||||
@ -568,7 +564,7 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: benedict=CON
|
|||||||
# TODO: figure out why CLI entrypoints with init_pending are running this twice sometimes
|
# TODO: figure out why CLI entrypoints with init_pending are running this twice sometimes
|
||||||
return
|
return
|
||||||
|
|
||||||
with Progress(transient=True, expand=True, console=CONSOLE) as INITIAL_STARTUP_PROGRESS:
|
with Progress(transient=True, expand=True, console=STDERR) as INITIAL_STARTUP_PROGRESS:
|
||||||
INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25)
|
INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25)
|
||||||
|
|
||||||
output_dir = out_dir or CONSTANTS.DATA_DIR
|
output_dir = out_dir or CONSTANTS.DATA_DIR
|
||||||
@ -595,7 +591,14 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: benedict=CON
|
|||||||
else:
|
else:
|
||||||
# Otherwise use default sqlite3 file-based database and initialize django
|
# Otherwise use default sqlite3 file-based database and initialize django
|
||||||
# without running migrations automatically (user runs them manually by calling init)
|
# without running migrations automatically (user runs them manually by calling init)
|
||||||
django.setup()
|
try:
|
||||||
|
django.setup()
|
||||||
|
except Exception as e:
|
||||||
|
bump_startup_progress_bar(advance=1000)
|
||||||
|
STDERR.print()
|
||||||
|
STDERR.print(Panel(f'\n[red]{e.__class__.__name__}[/red]: [yellow]{e}[/yellow]\nPlease check your config and [blue]DATA_DIR[/blue] permissions.\n', title='\n\n[red][X] Error while trying to load database!', subtitle='[grey53]NO WRITES CAN BE PERFORMED[/grey53]', expand=False, style='bold red'))
|
||||||
|
STDERR.print()
|
||||||
|
return
|
||||||
|
|
||||||
bump_startup_progress_bar()
|
bump_startup_progress_bar()
|
||||||
|
|
||||||
@ -608,6 +611,17 @@ def setup_django(out_dir: Path | None=None, check_db=False, config: benedict=CON
|
|||||||
f.write(f"\n> {command}; TS={ts} VERSION={CONSTANTS.VERSION} IN_DOCKER={SHELL_CONFIG.IN_DOCKER} IS_TTY={SHELL_CONFIG.IS_TTY}\n")
|
f.write(f"\n> {command}; TS={ts} VERSION={CONSTANTS.VERSION} IN_DOCKER={SHELL_CONFIG.IN_DOCKER} IS_TTY={SHELL_CONFIG.IS_TTY}\n")
|
||||||
|
|
||||||
if check_db:
|
if check_db:
|
||||||
|
# make sure the data dir is owned by a non-root user
|
||||||
|
if CONSTANTS.DATA_DIR.stat().st_uid == 0:
|
||||||
|
STDERR.print('[red][X] Error: ArchiveBox DATA_DIR cannot be owned by root![/red]')
|
||||||
|
STDERR.print(f' {CONSTANTS.DATA_DIR}')
|
||||||
|
STDERR.print()
|
||||||
|
STDERR.print('[violet]Hint:[/violet] Are you running archivebox in the right folder? (and as a non-root user?)')
|
||||||
|
STDERR.print(' cd path/to/your/archive/data')
|
||||||
|
STDERR.print(' archivebox [command]')
|
||||||
|
STDERR.print()
|
||||||
|
raise SystemExit(9)
|
||||||
|
|
||||||
# Create cache table in DB if needed
|
# Create cache table in DB if needed
|
||||||
try:
|
try:
|
||||||
from django.core.cache import cache
|
from django.core.cache import cache
|
||||||
|
152
archivebox/config/paths.py
Normal file
152
archivebox/config/paths.py
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
__package__ = 'archivebox.config'
|
||||||
|
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
import hashlib
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from functools import cache
|
||||||
|
from platformdirs import PlatformDirs
|
||||||
|
|
||||||
|
from .permissions import SudoPermission, IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
|
||||||
|
|
||||||
|
#############################################################################################
|
||||||
|
|
||||||
|
PACKAGE_DIR: Path = Path(__file__).resolve().parent.parent # archivebox source code dir
|
||||||
|
DATA_DIR: Path = Path(os.getcwd()).resolve() # archivebox user data dir
|
||||||
|
ARCHIVE_DIR: Path = DATA_DIR / 'archive' # archivebox snapshot data dir
|
||||||
|
|
||||||
|
#############################################################################################
|
||||||
|
|
||||||
|
@cache
|
||||||
|
def get_collection_id(DATA_DIR=DATA_DIR):
|
||||||
|
"""Get a short, stable, unique ID for the current collection"""
|
||||||
|
collection_id_file = DATA_DIR / '.collection_id'
|
||||||
|
|
||||||
|
try:
|
||||||
|
return collection_id_file.read_text().strip()
|
||||||
|
except (OSError, FileNotFoundError, PermissionError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
hash_key = str(DATA_DIR.resolve()).encode()
|
||||||
|
collection_id = hashlib.sha256(hash_key).hexdigest()[:8]
|
||||||
|
try:
|
||||||
|
collection_id_file.write_text(collection_id)
|
||||||
|
except (OSError, FileNotFoundError, PermissionError):
|
||||||
|
pass
|
||||||
|
return collection_id
|
||||||
|
|
||||||
|
|
||||||
|
def dir_is_writable(dir_path: Path, uid: int | None = None, gid: int | None = None, fallback=True) -> bool:
|
||||||
|
"""Check if a given directory is writable by a specific user and group (fallback=try as current user is unable to check with provided uid)"""
|
||||||
|
current_uid, current_gid = os.geteuid(), os.getegid()
|
||||||
|
uid, gid = uid or current_uid, gid or current_gid
|
||||||
|
|
||||||
|
test_file = dir_path / '.permissions_test'
|
||||||
|
try:
|
||||||
|
with SudoPermission(uid=uid, fallback=fallback):
|
||||||
|
test_file.exists()
|
||||||
|
test_file.write_text(f'Checking if PUID={uid} PGID={gid} can write to dir')
|
||||||
|
test_file.unlink()
|
||||||
|
return True
|
||||||
|
except (IOError, OSError, PermissionError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@cache
|
||||||
|
def get_LIB_DIR():
|
||||||
|
"""
|
||||||
|
- should be shared with other collections on the same host
|
||||||
|
- must be scoped by CPU architecture, OS family, and archivebox version
|
||||||
|
- should not be shared with other hosts/archivebox versions
|
||||||
|
- must be writable by any archivebox user
|
||||||
|
- should be persistent across reboots
|
||||||
|
- can be on a docker bin mount but probably shouldnt be
|
||||||
|
- ok to have a long path (doesnt contain SOCKETS)
|
||||||
|
"""
|
||||||
|
from .version import detect_installed_version
|
||||||
|
|
||||||
|
HOST_DIRS = PlatformDirs(appname='archivebox', appauthor='ArchiveBox', version=detect_installed_version(), opinion=True, ensure_exists=False)
|
||||||
|
|
||||||
|
if 'SYSTEM_LIB_DIR' in os.environ:
|
||||||
|
lib_dir = Path(os.environ['SYSTEM_LIB_DIR'])
|
||||||
|
else:
|
||||||
|
with SudoPermission(uid=ARCHIVEBOX_USER, fallback=True):
|
||||||
|
lib_dir = HOST_DIRS.site_data_path
|
||||||
|
|
||||||
|
# Docker: /usr/local/share/archivebox/0.8.5
|
||||||
|
# Ubuntu: /usr/local/share/archivebox/0.8.5
|
||||||
|
# macOS: /Library/Application Support/archivebox
|
||||||
|
try:
|
||||||
|
with SudoPermission(uid=0, fallback=True):
|
||||||
|
lib_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
except PermissionError:
|
||||||
|
# our user cannot
|
||||||
|
lib_dir = HOST_DIRS.user_data_path
|
||||||
|
lib_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
if not dir_is_writable(lib_dir):
|
||||||
|
if IS_ROOT:
|
||||||
|
# make sure lib dir is owned by the archivebox user, not root
|
||||||
|
with SudoPermission(uid=0):
|
||||||
|
os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{lib_dir}"')
|
||||||
|
else:
|
||||||
|
raise PermissionError(f'SYSTEM_LIB_DIR {lib_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
|
||||||
|
|
||||||
|
return lib_dir
|
||||||
|
|
||||||
|
@cache
|
||||||
|
def get_TMP_DIR():
|
||||||
|
"""
|
||||||
|
- must NOT be inside DATA_DIR / inside a docker volume bind mount
|
||||||
|
- must NOT have a long PATH (UNIX socket path length restrictions)
|
||||||
|
- must NOT be shared with other collections/hosts
|
||||||
|
- must be writable by archivebox user & root
|
||||||
|
- must be cleared on every boot / not persisted
|
||||||
|
- must be cleared on every archivebox version upgrade
|
||||||
|
"""
|
||||||
|
from .version import detect_installed_version
|
||||||
|
|
||||||
|
HOST_DIRS = PlatformDirs(appname='archivebox', appauthor='ArchiveBox', version=detect_installed_version(), opinion=True, ensure_exists=False)
|
||||||
|
|
||||||
|
# print('DATA_DIR OWNED BY:', ARCHIVEBOX_USER, ARCHIVEBOX_GROUP)
|
||||||
|
# print('RUNNING AS:', self.PUID, self.PGID)
|
||||||
|
|
||||||
|
if 'SYSTEM_TMP_DIR' in os.environ:
|
||||||
|
run_dir = Path(os.environ['SYSTEM_TMP_DIR']).resolve() / get_collection_id(DATA_DIR=DATA_DIR)
|
||||||
|
with SudoPermission(uid=0, fallback=True):
|
||||||
|
run_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
if not dir_is_writable(run_dir):
|
||||||
|
if IS_ROOT:
|
||||||
|
with SudoPermission(uid=0, fallback=False):
|
||||||
|
os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"')
|
||||||
|
else:
|
||||||
|
raise PermissionError(f'SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
|
||||||
|
assert len(str(run_dir / 'supervisord.conf')) < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)'
|
||||||
|
return run_dir
|
||||||
|
|
||||||
|
run_dir = (HOST_DIRS.site_runtime_path / get_collection_id(DATA_DIR=DATA_DIR)).resolve()
|
||||||
|
try:
|
||||||
|
assert len(str(run_dir)) + len('/supervisord.sock') < 95
|
||||||
|
except AssertionError:
|
||||||
|
run_dir = Path(tempfile.gettempdir()).resolve() / 'archivebox' / get_collection_id(DATA_DIR=DATA_DIR)
|
||||||
|
assert len(str(run_dir)) + len('/supervisord.sock') < 95, 'SYSTEM_TMP_DIR path is too long, please set SYSTEM_TMP_DIR env variable to a shorter path (unfortunately unix requires socket paths be < 108 chars)'
|
||||||
|
|
||||||
|
with SudoPermission(uid=0, fallback=True):
|
||||||
|
run_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
if not dir_is_writable(run_dir):
|
||||||
|
if IS_ROOT:
|
||||||
|
with SudoPermission(uid=0):
|
||||||
|
os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{run_dir}"')
|
||||||
|
else:
|
||||||
|
raise PermissionError(f'SYSTEM_TMP_DIR {run_dir} is not writable by archivebox user {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}')
|
||||||
|
|
||||||
|
# Docker: /tmp/archivebox/0.8.5/abc324235
|
||||||
|
# Ubuntu: /tmp/archivebox/0.8.5/abc324235
|
||||||
|
# macOS: /var/folders/qy/6tpfrpx100j1t4l312nz683m0000gn/T/archivebox/0.8.5/abc324235
|
||||||
|
return run_dir
|
||||||
|
|
70
archivebox/config/permissions.py
Normal file
70
archivebox/config/permissions.py
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
__package__ = 'archivebox.config'
|
||||||
|
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from contextlib import contextmanager
|
||||||
|
|
||||||
|
#############################################################################################
|
||||||
|
|
||||||
|
DATA_DIR = Path(os.getcwd())
|
||||||
|
|
||||||
|
DATA_DIR_STAT = Path(DATA_DIR).stat()
|
||||||
|
DATA_DIR_UID = DATA_DIR_STAT.st_uid
|
||||||
|
DATA_DIR_GID = DATA_DIR_STAT.st_gid
|
||||||
|
DEFAULT_PUID = 911
|
||||||
|
DEFAULT_PGID = 911
|
||||||
|
RUNNING_AS_UID = os.getuid()
|
||||||
|
RUNNING_AS_GID = os.getgid()
|
||||||
|
EUID = os.geteuid()
|
||||||
|
EGID = os.getegid()
|
||||||
|
USER: str = Path('~').expanduser().resolve().name
|
||||||
|
|
||||||
|
IS_ROOT = RUNNING_AS_UID == 0
|
||||||
|
IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'TRUE', 'yes')
|
||||||
|
|
||||||
|
os.environ.setdefault('PUID', str(DATA_DIR_UID or RUNNING_AS_UID or DEFAULT_PUID))
|
||||||
|
os.environ.setdefault('PGID', str(DATA_DIR_GID or RUNNING_AS_GID or DEFAULT_PGID))
|
||||||
|
|
||||||
|
ARCHIVEBOX_USER = int(os.environ['PUID'])
|
||||||
|
ARCHIVEBOX_GROUP = int(os.environ['PGID'])
|
||||||
|
|
||||||
|
#############################################################################################
|
||||||
|
|
||||||
|
def drop_privileges():
|
||||||
|
"""If running as root, drop privileges to the user that owns the data dir (or PUID, or default=911)"""
|
||||||
|
|
||||||
|
# always run archivebox as the user that owns the data dir, never as root
|
||||||
|
if os.getuid() == 0:
|
||||||
|
# drop permissions to the user that owns the data dir / provided PUID
|
||||||
|
if os.geteuid() != ARCHIVEBOX_USER:
|
||||||
|
os.seteuid(ARCHIVEBOX_USER)
|
||||||
|
# if we need sudo (e.g. for installing dependencies) code should use SudoPermissions() context manager to regain root
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def SudoPermission(uid=0, fallback=False):
|
||||||
|
"""Attempt to run code with sudo permissions for a given user (or root)"""
|
||||||
|
|
||||||
|
if os.geteuid() == uid:
|
||||||
|
# no need to change effective UID, we are already that user
|
||||||
|
yield
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
# change our effective UID to the given UID
|
||||||
|
os.seteuid(uid)
|
||||||
|
except PermissionError as err:
|
||||||
|
if not fallback:
|
||||||
|
raise PermissionError(f'Not enough permissions to run code as uid={uid}, please retry with sudo') from err
|
||||||
|
try:
|
||||||
|
# yield back to the caller so they can run code inside context as root
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
# then set effective UID back to DATA_DIR owner
|
||||||
|
DATA_DIR_OWNER = DATA_DIR.stat().st_uid
|
||||||
|
try:
|
||||||
|
os.seteuid(DATA_DIR_OWNER)
|
||||||
|
except PermissionError as err:
|
||||||
|
if not fallback:
|
||||||
|
raise PermissionError(f'Failed to revert uid={uid} back to {DATA_DIR_OWNER} after running code with sudo') from err
|
||||||
|
|
121
archivebox/config/version.py
Normal file
121
archivebox/config/version.py
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
__package__ = 'archivebox.config'
|
||||||
|
|
||||||
|
import os
|
||||||
|
import importlib.metadata
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from functools import cache
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
#############################################################################################
|
||||||
|
|
||||||
|
IN_DOCKER = os.environ.get('IN_DOCKER', False) in ('1', 'true', 'True', 'TRUE', 'yes')
|
||||||
|
|
||||||
|
PACKAGE_DIR: Path = Path(__file__).resolve().parent.parent # archivebox source code dir
|
||||||
|
DATA_DIR: Path = Path(os.getcwd()).resolve() # archivebox user data dir
|
||||||
|
ARCHIVE_DIR: Path = DATA_DIR / 'archive' # archivebox snapshot data dir
|
||||||
|
|
||||||
|
#############################################################################################
|
||||||
|
|
||||||
|
|
||||||
|
@cache
|
||||||
|
def detect_installed_version(PACKAGE_DIR: Path=PACKAGE_DIR):
|
||||||
|
"""Autodetect the installed archivebox version by using pip package metadata, pyproject.toml file, or package.json file"""
|
||||||
|
try:
|
||||||
|
# if in production install, use pip-installed package metadata
|
||||||
|
return importlib.metadata.version('archivebox').strip()
|
||||||
|
except importlib.metadata.PackageNotFoundError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
# if in dev Git repo dir, use pyproject.toml file
|
||||||
|
pyproject_config = (PACKAGE_DIR.parent / 'pyproject.toml').read_text().split('\n')
|
||||||
|
for line in pyproject_config:
|
||||||
|
if line.startswith('version = '):
|
||||||
|
return line.split(' = ', 1)[-1].strip('"').strip()
|
||||||
|
except FileNotFoundError:
|
||||||
|
# building docs, pyproject.toml is not available
|
||||||
|
pass
|
||||||
|
|
||||||
|
# raise Exception('Failed to detect installed archivebox version!')
|
||||||
|
return 'dev'
|
||||||
|
|
||||||
|
|
||||||
|
@cache
|
||||||
|
def get_COMMIT_HASH() -> Optional[str]:
|
||||||
|
try:
|
||||||
|
git_dir = PACKAGE_DIR / '../.git'
|
||||||
|
ref = (git_dir / 'HEAD').read_text().strip().split(' ')[-1]
|
||||||
|
commit_hash = git_dir.joinpath(ref).read_text().strip()
|
||||||
|
return commit_hash
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
return list((PACKAGE_DIR / '../.git/refs/heads/').glob('*'))[0].read_text().strip()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
@cache
|
||||||
|
def get_BUILD_TIME() -> str:
|
||||||
|
if IN_DOCKER:
|
||||||
|
docker_build_end_time = Path('/VERSION.txt').read_text().rsplit('BUILD_END_TIME=')[-1].split('\n', 1)[0]
|
||||||
|
return docker_build_end_time
|
||||||
|
|
||||||
|
src_last_modified_unix_timestamp = (PACKAGE_DIR / 'README.md').stat().st_mtime
|
||||||
|
return datetime.fromtimestamp(src_last_modified_unix_timestamp).strftime('%Y-%m-%d %H:%M:%S %s')
|
||||||
|
|
||||||
|
|
||||||
|
# def get_versions_available_on_github(config):
|
||||||
|
# """
|
||||||
|
# returns a dictionary containing the ArchiveBox GitHub release info for
|
||||||
|
# the recommended upgrade version and the currently installed version
|
||||||
|
# """
|
||||||
|
|
||||||
|
# # we only want to perform the (relatively expensive) check for new versions
|
||||||
|
# # when its most relevant, e.g. when the user runs a long-running command
|
||||||
|
# subcommand_run_by_user = sys.argv[3] if len(sys.argv) > 3 else 'help'
|
||||||
|
# long_running_commands = ('add', 'schedule', 'update', 'status', 'server')
|
||||||
|
# if subcommand_run_by_user not in long_running_commands:
|
||||||
|
# return None
|
||||||
|
|
||||||
|
# github_releases_api = "https://api.github.com/repos/ArchiveBox/ArchiveBox/releases"
|
||||||
|
# response = requests.get(github_releases_api)
|
||||||
|
# if response.status_code != 200:
|
||||||
|
# stderr(f'[!] Warning: GitHub API call to check for new ArchiveBox version failed! (status={response.status_code})', color='lightyellow', config=config)
|
||||||
|
# return None
|
||||||
|
# all_releases = response.json()
|
||||||
|
|
||||||
|
# installed_version = parse_version_string(config['VERSION'])
|
||||||
|
|
||||||
|
# # find current version or nearest older version (to link to)
|
||||||
|
# current_version = None
|
||||||
|
# for idx, release in enumerate(all_releases):
|
||||||
|
# release_version = parse_version_string(release['tag_name'])
|
||||||
|
# if release_version <= installed_version:
|
||||||
|
# current_version = release
|
||||||
|
# break
|
||||||
|
|
||||||
|
# current_version = current_version or all_releases[-1]
|
||||||
|
|
||||||
|
# # recommended version is whatever comes after current_version in the release list
|
||||||
|
# # (perhaps too conservative to only recommend upgrading one version at a time, but it's safest)
|
||||||
|
# try:
|
||||||
|
# recommended_version = all_releases[idx+1]
|
||||||
|
# except IndexError:
|
||||||
|
# recommended_version = None
|
||||||
|
|
||||||
|
# return {'recommended_version': recommended_version, 'current_version': current_version}
|
||||||
|
|
||||||
|
# def can_upgrade(config):
|
||||||
|
# if config['VERSIONS_AVAILABLE'] and config['VERSIONS_AVAILABLE']['recommended_version']:
|
||||||
|
# recommended_version = parse_version_string(config['VERSIONS_AVAILABLE']['recommended_version']['tag_name'])
|
||||||
|
# current_version = parse_version_string(config['VERSIONS_AVAILABLE']['current_version']['tag_name'])
|
||||||
|
# return recommended_version > current_version
|
||||||
|
# return False
|
||||||
|
|
||||||
|
|
||||||
|
VERSION: str = detect_installed_version()
|
@ -5,7 +5,7 @@ from django.utils import timezone
|
|||||||
from django.contrib.auth.middleware import RemoteUserMiddleware
|
from django.contrib.auth.middleware import RemoteUserMiddleware
|
||||||
from django.core.exceptions import ImproperlyConfigured
|
from django.core.exceptions import ImproperlyConfigured
|
||||||
|
|
||||||
from archivebox.config import SERVER_CONFIG
|
from archivebox.config.common import SERVER_CONFIG
|
||||||
|
|
||||||
|
|
||||||
def detect_timezone(request, activate: bool=True):
|
def detect_timezone(request, activate: bool=True):
|
||||||
|
@ -13,7 +13,8 @@ import abx.archivebox
|
|||||||
import abx.archivebox.use
|
import abx.archivebox.use
|
||||||
import abx.django.use
|
import abx.django.use
|
||||||
|
|
||||||
from archivebox.config import VERSION, DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS, SHELL_CONFIG, SERVER_CONFIG # noqa
|
from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS
|
||||||
|
from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG # noqa
|
||||||
|
|
||||||
IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3]
|
IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3]
|
||||||
IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ
|
IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ
|
||||||
|
@ -27,7 +27,8 @@ from core.admin import result_url
|
|||||||
|
|
||||||
from queues.tasks import bg_add
|
from queues.tasks import bg_add
|
||||||
|
|
||||||
from archivebox.config import CONSTANTS_CONFIG, DATA_DIR, VERSION, SHELL_CONFIG, SERVER_CONFIG
|
from archivebox.config import CONSTANTS_CONFIG, DATA_DIR, VERSION
|
||||||
|
from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG
|
||||||
from archivebox.misc.util import base_url, htmlencode, ts_to_date_str
|
from archivebox.misc.util import base_url, htmlencode, ts_to_date_str
|
||||||
|
|
||||||
from .serve_static import serve_static_with_byterange_support
|
from .serve_static import serve_static_with_byterange_support
|
||||||
|
@ -5,7 +5,8 @@ import io
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from archivebox.config import VERSION, ARCHIVING_CONFIG
|
from archivebox.config import VERSION
|
||||||
|
from archivebox.config.common import ARCHIVING_CONFIG
|
||||||
from archivebox.config.legacy import SAVE_HTMLTOTEXT
|
from archivebox.config.legacy import SAVE_HTMLTOTEXT
|
||||||
from archivebox.misc.system import atomic_write
|
from archivebox.misc.system import atomic_write
|
||||||
from archivebox.misc.util import enforce_types, is_static_file
|
from archivebox.misc.util import enforce_types, is_static_file
|
||||||
|
@ -12,9 +12,11 @@ from urllib.parse import urlparse
|
|||||||
from django.db.models import QuerySet, Q
|
from django.db.models import QuerySet, Q
|
||||||
|
|
||||||
|
|
||||||
from archivebox.config import DATA_DIR, CONSTANTS, ARCHIVING_CONFIG, STORAGE_CONFIG, SEARCH_BACKEND_CONFIG
|
|
||||||
from archivebox.misc.util import scheme, enforce_types, ExtendedEncoder
|
|
||||||
from archivebox.misc.logging import stderr
|
from archivebox.misc.logging import stderr
|
||||||
|
from archivebox.misc.util import scheme, enforce_types, ExtendedEncoder
|
||||||
|
|
||||||
|
from archivebox.config import DATA_DIR, CONSTANTS
|
||||||
|
from archivebox.config.common import ARCHIVING_CONFIG, STORAGE_CONFIG, SEARCH_BACKEND_CONFIG
|
||||||
from archivebox.config.legacy import URL_DENYLIST_PTN, URL_ALLOWLIST_PTN
|
from archivebox.config.legacy import URL_DENYLIST_PTN, URL_ALLOWLIST_PTN
|
||||||
|
|
||||||
from ..logging_util import (
|
from ..logging_util import (
|
||||||
|
@ -16,7 +16,9 @@ from archivebox.misc.util import (
|
|||||||
htmlencode,
|
htmlencode,
|
||||||
urldecode,
|
urldecode,
|
||||||
)
|
)
|
||||||
from archivebox.config import CONSTANTS, DATA_DIR, VERSION, SHELL_CONFIG, SERVER_CONFIG
|
from archivebox.config import CONSTANTS, DATA_DIR, VERSION
|
||||||
|
from archivebox.config.common import SERVER_CONFIG
|
||||||
|
from archivebox.config.version import get_COMMIT_HASH
|
||||||
from archivebox.plugins_extractor.archivedotorg.apps import ARCHIVEDOTORG_CONFIG
|
from archivebox.plugins_extractor.archivedotorg.apps import ARCHIVEDOTORG_CONFIG
|
||||||
|
|
||||||
from .schema import Link
|
from .schema import Link
|
||||||
@ -56,7 +58,7 @@ def main_index_template(links: List[Link], template: str=MAIN_INDEX_TEMPLATE) ->
|
|||||||
|
|
||||||
return render_django_template(template, {
|
return render_django_template(template, {
|
||||||
'version': VERSION,
|
'version': VERSION,
|
||||||
'git_sha': SHELL_CONFIG.COMMIT_HASH or VERSION,
|
'git_sha': get_COMMIT_HASH() or VERSION,
|
||||||
'num_links': str(len(links)),
|
'num_links': str(len(links)),
|
||||||
'date_updated': datetime.now(timezone.utc).strftime('%Y-%m-%d'),
|
'date_updated': datetime.now(timezone.utc).strftime('%Y-%m-%d'),
|
||||||
'time_updated': datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M'),
|
'time_updated': datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M'),
|
||||||
|
@ -8,7 +8,8 @@ from pathlib import Path
|
|||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from typing import List, Optional, Iterator, Any, Union
|
from typing import List, Optional, Iterator, Any, Union
|
||||||
|
|
||||||
from archivebox.config import VERSION, DATA_DIR, CONSTANTS, SERVER_CONFIG, SHELL_CONFIG
|
from archivebox.config import VERSION, DATA_DIR, CONSTANTS
|
||||||
|
from archivebox.config.common import SERVER_CONFIG, SHELL_CONFIG
|
||||||
|
|
||||||
from .schema import Link
|
from .schema import Link
|
||||||
from archivebox.misc.system import atomic_write
|
from archivebox.misc.system import atomic_write
|
||||||
|
@ -9,7 +9,8 @@ from django.db.models import QuerySet
|
|||||||
from django.db import transaction
|
from django.db import transaction
|
||||||
|
|
||||||
from archivebox.misc.util import enforce_types, parse_date
|
from archivebox.misc.util import enforce_types, parse_date
|
||||||
from archivebox.config import DATA_DIR, GENERAL_CONFIG
|
from archivebox.config import DATA_DIR
|
||||||
|
from archivebox.config.common import GENERAL_CONFIG
|
||||||
|
|
||||||
from .schema import Link
|
from .schema import Link
|
||||||
|
|
||||||
|
@ -22,7 +22,8 @@ from rich.panel import Panel
|
|||||||
from rich_argparse import RichHelpFormatter
|
from rich_argparse import RichHelpFormatter
|
||||||
from django.core.management.base import DjangoHelpFormatter
|
from django.core.management.base import DjangoHelpFormatter
|
||||||
|
|
||||||
from archivebox.config import CONSTANTS, DATA_DIR, VERSION, SHELL_CONFIG
|
from archivebox.config import CONSTANTS, DATA_DIR, VERSION
|
||||||
|
from archivebox.config.common import SHELL_CONFIG
|
||||||
from archivebox.misc.system import get_dir_size
|
from archivebox.misc.system import get_dir_size
|
||||||
from archivebox.misc.util import enforce_types
|
from archivebox.misc.util import enforce_types
|
||||||
from archivebox.misc.logging import ANSI, stderr
|
from archivebox.misc.logging import ANSI, stderr
|
||||||
|
@ -14,13 +14,15 @@ from crontab import CronTab, CronSlices
|
|||||||
from django.db.models import QuerySet
|
from django.db.models import QuerySet
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
|
||||||
from archivebox.config import CONSTANTS, VERSION, DATA_DIR, ARCHIVE_DIR, SHELL_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG, SERVER_CONFIG, ARCHIVING_CONFIG
|
from archivebox.config import CONSTANTS, VERSION, DATA_DIR, ARCHIVE_DIR
|
||||||
|
from archivebox.config.common import SHELL_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG, SERVER_CONFIG, ARCHIVING_CONFIG
|
||||||
|
from archivebox.config.permissions import SudoPermission, IN_DOCKER
|
||||||
from .cli import (
|
from .cli import (
|
||||||
CLI_SUBCOMMANDS,
|
CLI_SUBCOMMANDS,
|
||||||
run_subcommand,
|
run_subcommand,
|
||||||
display_first,
|
display_first,
|
||||||
meta_cmds,
|
meta_cmds,
|
||||||
main_cmds,
|
setup_cmds,
|
||||||
archive_cmds,
|
archive_cmds,
|
||||||
)
|
)
|
||||||
from .parsers import (
|
from .parsers import (
|
||||||
@ -101,7 +103,7 @@ def help(out_dir: Path=DATA_DIR) -> None:
|
|||||||
) + '\n\n ' + '\n '.join(
|
) + '\n\n ' + '\n '.join(
|
||||||
f'[green]{cmd.ljust(20)}[/green] {func.__doc__}'
|
f'[green]{cmd.ljust(20)}[/green] {func.__doc__}'
|
||||||
for cmd, func in all_subcommands.items()
|
for cmd, func in all_subcommands.items()
|
||||||
if cmd in main_cmds
|
if cmd in setup_cmds
|
||||||
) + '\n\n ' + '\n '.join(
|
) + '\n\n ' + '\n '.join(
|
||||||
f'[green]{cmd.ljust(20)}[/green] {func.__doc__}'
|
f'[green]{cmd.ljust(20)}[/green] {func.__doc__}'
|
||||||
for cmd, func in all_subcommands.items()
|
for cmd, func in all_subcommands.items()
|
||||||
@ -119,10 +121,10 @@ def help(out_dir: Path=DATA_DIR) -> None:
|
|||||||
|
|
||||||
[grey53]# using Docker:[/grey53]
|
[grey53]# using Docker:[/grey53]
|
||||||
[blue]docker run[/blue] -v [light_slate_blue]$PWD:/data[/light_slate_blue] [grey53]-p 8000:8000[/grey53] -it [dark_green]archivebox/archivebox[/dark_green] [green]\\[command][/green] [green3][...args][/green3] [violet][--help][/violet] [grey53][--version][/grey53]
|
[blue]docker run[/blue] -v [light_slate_blue]$PWD:/data[/light_slate_blue] [grey53]-p 8000:8000[/grey53] -it [dark_green]archivebox/archivebox[/dark_green] [green]\\[command][/green] [green3][...args][/green3] [violet][--help][/violet] [grey53][--version][/grey53]
|
||||||
''' if SHELL_CONFIG.IN_DOCKER else ''
|
''' if IN_DOCKER else ''
|
||||||
DOCKER_DOCS = '\n [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Docker[/link]' if SHELL_CONFIG.IN_DOCKER else ''
|
DOCKER_DOCS = '\n [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#usage]https://github.com/ArchiveBox/ArchiveBox/wiki/Docker[/link]' if IN_DOCKER else ''
|
||||||
DOCKER_OUTSIDE_HINT = "\n [grey53]# outside of Docker:[/grey53]" if SHELL_CONFIG.IN_DOCKER else ''
|
DOCKER_OUTSIDE_HINT = "\n [grey53]# outside of Docker:[/grey53]" if IN_DOCKER else ''
|
||||||
DOCKER_CMD_PREFIX = "[blue]docker ... [/blue]" if SHELL_CONFIG.IN_DOCKER else ''
|
DOCKER_CMD_PREFIX = "[blue]docker ... [/blue]" if IN_DOCKER else ''
|
||||||
|
|
||||||
print(f'''{DOCKER_USAGE}
|
print(f'''{DOCKER_USAGE}
|
||||||
[deep_sky_blue4]Usage:[/deep_sky_blue4]{DOCKER_OUTSIDE_HINT}
|
[deep_sky_blue4]Usage:[/deep_sky_blue4]{DOCKER_OUTSIDE_HINT}
|
||||||
@ -158,7 +160,7 @@ def help(out_dir: Path=DATA_DIR) -> None:
|
|||||||
print(Panel(EXAMPLE_USAGE, expand=False, border_style='grey53', title='[green3]:white_check_mark: A collection [light_slate_blue]DATA DIR[/light_slate_blue] is currently active[/green3]', subtitle='Commands run inside this dir will only apply to this collection.'))
|
print(Panel(EXAMPLE_USAGE, expand=False, border_style='grey53', title='[green3]:white_check_mark: A collection [light_slate_blue]DATA DIR[/light_slate_blue] is currently active[/green3]', subtitle='Commands run inside this dir will only apply to this collection.'))
|
||||||
else:
|
else:
|
||||||
DATA_SETUP_HELP = '\n'
|
DATA_SETUP_HELP = '\n'
|
||||||
if SHELL_CONFIG.IN_DOCKER:
|
if IN_DOCKER:
|
||||||
DATA_SETUP_HELP += '[violet]Hint:[/violet] When using Docker, you need to mount a volume to use as your data dir:\n'
|
DATA_SETUP_HELP += '[violet]Hint:[/violet] When using Docker, you need to mount a volume to use as your data dir:\n'
|
||||||
DATA_SETUP_HELP += ' docker run [violet]-v /some/path/data:/data[/violet] archivebox/archivebox ...\n\n'
|
DATA_SETUP_HELP += ' docker run [violet]-v /some/path/data:/data[/violet] archivebox/archivebox ...\n\n'
|
||||||
DATA_SETUP_HELP += 'To load an [dark_blue]existing[/dark_blue] collection:\n'
|
DATA_SETUP_HELP += 'To load an [dark_blue]existing[/dark_blue] collection:\n'
|
||||||
@ -190,6 +192,8 @@ def version(quiet: bool=False,
|
|||||||
|
|
||||||
from plugins_auth.ldap.apps import LDAP_CONFIG
|
from plugins_auth.ldap.apps import LDAP_CONFIG
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
from archivebox.config.version import get_COMMIT_HASH, get_BUILD_TIME
|
||||||
|
from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, RUNNING_AS_UID, RUNNING_AS_GID
|
||||||
|
|
||||||
# 0.7.1
|
# 0.7.1
|
||||||
# ArchiveBox v0.7.1+editable COMMIT_HASH=951bba5 BUILD_TIME=2023-12-17 16:46:05 1702860365
|
# ArchiveBox v0.7.1+editable COMMIT_HASH=951bba5 BUILD_TIME=2023-12-17 16:46:05 1702860365
|
||||||
@ -198,13 +202,14 @@ def version(quiet: bool=False,
|
|||||||
# DEBUG=False IS_TTY=True TZ=UTC SEARCH_BACKEND=ripgrep LDAP=False
|
# DEBUG=False IS_TTY=True TZ=UTC SEARCH_BACKEND=ripgrep LDAP=False
|
||||||
|
|
||||||
p = platform.uname()
|
p = platform.uname()
|
||||||
|
COMMIT_HASH = get_COMMIT_HASH()
|
||||||
prnt(
|
prnt(
|
||||||
'[dark_green]ArchiveBox[/dark_green] [dark_goldenrod]v{}[/dark_goldenrod]'.format(CONSTANTS.VERSION),
|
'[dark_green]ArchiveBox[/dark_green] [dark_goldenrod]v{}[/dark_goldenrod]'.format(CONSTANTS.VERSION),
|
||||||
f'COMMIT_HASH={SHELL_CONFIG.COMMIT_HASH[:7] if SHELL_CONFIG.COMMIT_HASH else "unknown"}',
|
f'COMMIT_HASH={COMMIT_HASH[:7] if COMMIT_HASH else "unknown"}',
|
||||||
f'BUILD_TIME={SHELL_CONFIG.BUILD_TIME}',
|
f'BUILD_TIME={get_BUILD_TIME()}',
|
||||||
)
|
)
|
||||||
prnt(
|
prnt(
|
||||||
f'IN_DOCKER={SHELL_CONFIG.IN_DOCKER}',
|
f'IN_DOCKER={IN_DOCKER}',
|
||||||
f'IN_QEMU={SHELL_CONFIG.IN_QEMU}',
|
f'IN_QEMU={SHELL_CONFIG.IN_QEMU}',
|
||||||
f'ARCH={p.machine}',
|
f'ARCH={p.machine}',
|
||||||
f'OS={p.system}',
|
f'OS={p.system}',
|
||||||
@ -212,11 +217,13 @@ def version(quiet: bool=False,
|
|||||||
f'PYTHON={sys.implementation.name.title()}',
|
f'PYTHON={sys.implementation.name.title()}',
|
||||||
)
|
)
|
||||||
OUTPUT_IS_REMOTE_FS = CONSTANTS.DATA_LOCATIONS.DATA_DIR.is_mount or CONSTANTS.DATA_LOCATIONS.ARCHIVE_DIR.is_mount
|
OUTPUT_IS_REMOTE_FS = CONSTANTS.DATA_LOCATIONS.DATA_DIR.is_mount or CONSTANTS.DATA_LOCATIONS.ARCHIVE_DIR.is_mount
|
||||||
|
DATA_DIR_STAT = CONSTANTS.DATA_DIR.stat()
|
||||||
prnt(
|
prnt(
|
||||||
|
f'EUID={os.geteuid()} UID={RUNNING_AS_UID} PUID={ARCHIVEBOX_USER} FS_UID={DATA_DIR_STAT.st_uid}',
|
||||||
|
f'EGID={os.getegid()} GID={RUNNING_AS_GID} PGID={ARCHIVEBOX_GROUP} FS_GID={DATA_DIR_STAT.st_gid}',
|
||||||
|
f'FS_PERMS={STORAGE_CONFIG.OUTPUT_PERMISSIONS}',
|
||||||
f'FS_ATOMIC={STORAGE_CONFIG.ENFORCE_ATOMIC_WRITES}',
|
f'FS_ATOMIC={STORAGE_CONFIG.ENFORCE_ATOMIC_WRITES}',
|
||||||
f'FS_REMOTE={OUTPUT_IS_REMOTE_FS}',
|
f'FS_REMOTE={OUTPUT_IS_REMOTE_FS}',
|
||||||
f'FS_USER={SHELL_CONFIG.PUID}:{SHELL_CONFIG.PGID}',
|
|
||||||
f'FS_PERMS={STORAGE_CONFIG.OUTPUT_PERMISSIONS}',
|
|
||||||
)
|
)
|
||||||
prnt(
|
prnt(
|
||||||
f'DEBUG={SHELL_CONFIG.DEBUG}',
|
f'DEBUG={SHELL_CONFIG.DEBUG}',
|
||||||
@ -261,8 +268,36 @@ def version(quiet: bool=False,
|
|||||||
else:
|
else:
|
||||||
prnt()
|
prnt()
|
||||||
prnt('[red][i] Data locations:[/red] (not in a data directory)')
|
prnt('[red][i] Data locations:[/red] (not in a data directory)')
|
||||||
|
|
||||||
prnt()
|
prnt()
|
||||||
|
|
||||||
|
from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, DEFAULT_PUID, DEFAULT_PGID, IS_ROOT, USER
|
||||||
|
|
||||||
|
data_dir_stat = Path(DATA_DIR).stat()
|
||||||
|
data_dir_uid, data_dir_gid = data_dir_stat.st_uid, data_dir_stat.st_gid
|
||||||
|
data_owned_by_root = data_dir_uid == 0 or data_dir_gid == 0
|
||||||
|
|
||||||
|
data_owned_by_default_user = data_dir_uid == DEFAULT_PUID or data_dir_gid == DEFAULT_PGID
|
||||||
|
data_owner_doesnt_match = (data_dir_uid != ARCHIVEBOX_USER and data_dir_gid != ARCHIVEBOX_GROUP) and not IS_ROOT
|
||||||
|
data_not_writable = not (os.access(DATA_DIR, os.W_OK) and os.access(CONSTANTS.LIB_DIR, os.W_OK) and os.access(CONSTANTS.TMP_DIR, os.W_OK))
|
||||||
|
if data_owned_by_root:
|
||||||
|
prnt('[yellow]:warning: Warning: ArchiveBox [blue]DATA_DIR[/blue] is currently owned by [red]root[/red], ArchiveBox will refuse to run![/yellow]')
|
||||||
|
elif data_owner_doesnt_match or data_not_writable:
|
||||||
|
prnt(f'[yellow]:warning: Warning: ArchiveBox [blue]DATA_DIR[/blue] is currently owned by [red]{data_dir_uid}:{data_dir_gid}[/red], but ArchiveBox user is [blue]{ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/blue] ({USER})! (ArchiveBox may not be able to write to the data dir)[/yellow]')
|
||||||
|
else:
|
||||||
|
prnt(f':information: [blue]DATA_DIR[/blue] is currently owned by [blue]{data_dir_uid}:{data_dir_gid}[/blue] (PUID:PGID)')
|
||||||
|
|
||||||
|
if data_owned_by_root or data_owner_doesnt_match or data_owned_by_default_user or data_not_writable:
|
||||||
|
prnt(f'[violet]Hint:[/violet] If you encounter permissions errors, change [red]{data_dir_uid}[/red]:{data_dir_gid} (PUID:PGID) to match the user that will run ArchiveBox, e.g.:')
|
||||||
|
prnt(f' [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {DATA_DIR.resolve()}')
|
||||||
|
prnt(f' [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {CONSTANTS.LIB_DIR.resolve()}')
|
||||||
|
prnt(f' [grey53]sudo[/grey53] chown -R [blue]{DEFAULT_PUID}:{DEFAULT_PGID}[/blue] {CONSTANTS.TMP_DIR.resolve()}')
|
||||||
|
prnt()
|
||||||
|
prnt('[blue]More info:[/blue]')
|
||||||
|
prnt(' [link=https://github.com/ArchiveBox/ArchiveBox#storage-requirements]https://github.com/ArchiveBox/ArchiveBox#storage-requirements[/link]')
|
||||||
|
prnt(' [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#permissions]https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#permissions[/link]')
|
||||||
|
prnt(' [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid]https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid[/link]')
|
||||||
|
prnt(' [link=https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#filesystem-doesnt-support-fsync-eg-network-mounts]https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#filesystem-doesnt-support-fsync-eg-network-mounts[/link]')
|
||||||
|
|
||||||
|
|
||||||
@enforce_types
|
@enforce_types
|
||||||
@ -948,23 +983,56 @@ def list_folders(links: List[Link],
|
|||||||
@enforce_types
|
@enforce_types
|
||||||
def install(out_dir: Path=DATA_DIR) -> None:
|
def install(out_dir: Path=DATA_DIR) -> None:
|
||||||
"""Automatically install all ArchiveBox dependencies and extras"""
|
"""Automatically install all ArchiveBox dependencies and extras"""
|
||||||
|
|
||||||
|
# if running as root:
|
||||||
|
# - run init to create index + lib dir
|
||||||
|
# - chown -R 911 DATA_DIR
|
||||||
|
# - install all binaries as root
|
||||||
|
# - chown -R 911 LIB_DIR
|
||||||
|
# else:
|
||||||
|
# - run init to create index + lib dir as current user
|
||||||
|
# - install all binaries as current user
|
||||||
|
# - recommend user re-run with sudo if any deps need to be installed as root
|
||||||
|
|
||||||
from rich import print
|
from rich import print
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
|
||||||
|
from archivebox import CONSTANTS
|
||||||
|
from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
|
||||||
|
|
||||||
if not ARCHIVE_DIR.exists():
|
if not ARCHIVE_DIR.exists():
|
||||||
run_subcommand('init', stdin=None, pwd=out_dir)
|
run_subcommand('init', stdin=None, pwd=out_dir) # must init full index because we need a db to store InstalledBinary entries in
|
||||||
|
|
||||||
stderr('\n[+] Installing ArchiveBox dependencies automatically...', color='green')
|
|
||||||
|
|
||||||
|
print('\n[green][+] Installing ArchiveBox dependencies automatically...[/green]')
|
||||||
|
|
||||||
|
# we never want the data dir to be owned by root, detect owner of existing owner of DATA_DIR to try and guess desired non-root UID
|
||||||
|
if IS_ROOT:
|
||||||
|
# if we have sudo/root permissions, take advantage of them just while installing dependencies
|
||||||
|
print()
|
||||||
|
print('[yellow]:warning: Using [red]root[/red] privileges only to install dependencies that need it, all other operations should be done as a [blue]non-root[/blue] user.[/yellow]')
|
||||||
|
print(f' DATA_DIR, LIB_DIR, and TMP_DIR will be owned by [blue]{ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP}[/blue].')
|
||||||
|
print()
|
||||||
|
|
||||||
for binary in reversed(list(settings.BINARIES.values())):
|
for binary in reversed(list(settings.BINARIES.values())):
|
||||||
providers = ' [grey53]or[/grey53] '.join(provider.name for provider in binary.binproviders_supported)
|
providers = ' [grey53]or[/grey53] '.join(provider.name for provider in binary.binproviders_supported)
|
||||||
print(f'[+] Locating / Installing [yellow]{binary.name}[/yellow] using [red]{providers}[/red]...')
|
print(f'[+] Locating / Installing [yellow]{binary.name}[/yellow] using [red]{providers}[/red]...')
|
||||||
try:
|
try:
|
||||||
print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'}))
|
print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'}))
|
||||||
|
if IS_ROOT:
|
||||||
|
with SudoPermission(uid=0):
|
||||||
|
os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"')
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f'[X] Failed to install {binary.name}: {e}')
|
if IS_ROOT:
|
||||||
|
print(f'[yellow]:warning: Retrying {binary.name} installation with [red]sudo[/red]...[/yellow]')
|
||||||
|
with SudoPermission(uid=0):
|
||||||
|
try:
|
||||||
|
print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'}))
|
||||||
|
os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"')
|
||||||
|
except Exception as e:
|
||||||
|
print(f'[red]:cross_mark: Failed to install {binary.name} as root: {e}[/red]')
|
||||||
|
else:
|
||||||
|
print(f'[red]:cross_mark: Failed to install {binary.name} as user {ARCHIVEBOX_USER}: {e}[/red]')
|
||||||
|
|
||||||
|
|
||||||
from django.contrib.auth import get_user_model
|
from django.contrib.auth import get_user_model
|
||||||
User = get_user_model()
|
User = get_user_model()
|
||||||
@ -974,12 +1042,13 @@ def install(out_dir: Path=DATA_DIR) -> None:
|
|||||||
stderr(' archivebox manage createsuperuser')
|
stderr(' archivebox manage createsuperuser')
|
||||||
# run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)
|
# run_subcommand('manage', subcommand_args=['createsuperuser'], pwd=out_dir)
|
||||||
|
|
||||||
stderr('\n[√] Set up ArchiveBox and its dependencies successfully.', color='green')
|
print('\n[green][√] Set up ArchiveBox and its dependencies successfully.[/green]\n', file=sys.stderr)
|
||||||
|
|
||||||
from plugins_pkg.pip.apps import ARCHIVEBOX_BINARY
|
from plugins_pkg.pip.apps import ARCHIVEBOX_BINARY
|
||||||
|
|
||||||
run_shell([ARCHIVEBOX_BINARY.load().abspath, 'version'], capture_output=False, cwd=out_dir)
|
run_shell([ARCHIVEBOX_BINARY.load().abspath, 'version'], capture_output=False, cwd=out_dir)
|
||||||
|
|
||||||
|
|
||||||
# backwards-compatibility:
|
# backwards-compatibility:
|
||||||
setup = install
|
setup = install
|
||||||
|
|
||||||
@ -1100,6 +1169,7 @@ def schedule(add: bool=False,
|
|||||||
|
|
||||||
check_data_folder()
|
check_data_folder()
|
||||||
from archivebox.plugins_pkg.pip.apps import ARCHIVEBOX_BINARY
|
from archivebox.plugins_pkg.pip.apps import ARCHIVEBOX_BINARY
|
||||||
|
from archivebox.config.permissions import USER
|
||||||
|
|
||||||
Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)
|
Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)
|
||||||
|
|
||||||
@ -1156,7 +1226,7 @@ def schedule(add: bool=False,
|
|||||||
existing_jobs = list(cron.find_comment(CRON_COMMENT))
|
existing_jobs = list(cron.find_comment(CRON_COMMENT))
|
||||||
|
|
||||||
print()
|
print()
|
||||||
print('{green}[√] Scheduled new ArchiveBox cron job for user: {} ({} jobs are active).{reset}'.format(SHELL_CONFIG.USER, len(existing_jobs), **SHELL_CONFIG.ANSI))
|
print('{green}[√] Scheduled new ArchiveBox cron job for user: {} ({} jobs are active).{reset}'.format(USER, len(existing_jobs), **SHELL_CONFIG.ANSI))
|
||||||
print('\n'.join(f' > {cmd}' if str(cmd) == str(new_job) else f' {cmd}' for cmd in existing_jobs))
|
print('\n'.join(f' > {cmd}' if str(cmd) == str(new_job) else f' {cmd}' for cmd in existing_jobs))
|
||||||
if total_runs > 60 and not quiet:
|
if total_runs > 60 and not quiet:
|
||||||
stderr()
|
stderr()
|
||||||
@ -1170,7 +1240,7 @@ def schedule(add: bool=False,
|
|||||||
if existing_jobs:
|
if existing_jobs:
|
||||||
print('\n'.join(str(cmd) for cmd in existing_jobs))
|
print('\n'.join(str(cmd) for cmd in existing_jobs))
|
||||||
else:
|
else:
|
||||||
stderr('{red}[X] There are no ArchiveBox cron jobs scheduled for your user ({}).{reset}'.format(SHELL_CONFIG.USER, **SHELL_CONFIG.ANSI))
|
stderr('{red}[X] There are no ArchiveBox cron jobs scheduled for your user ({}).{reset}'.format(USER, **SHELL_CONFIG.ANSI))
|
||||||
stderr(' To schedule a new job, run:')
|
stderr(' To schedule a new job, run:')
|
||||||
stderr(' archivebox schedule --every=[timeperiod] --depth=1 https://example.com/some/rss/feed.xml')
|
stderr(' archivebox schedule --every=[timeperiod] --depth=1 https://example.com/some/rss/feed.xml')
|
||||||
raise SystemExit(0)
|
raise SystemExit(0)
|
||||||
@ -1294,7 +1364,7 @@ def manage(args: Optional[List[str]]=None, out_dir: Path=DATA_DIR) -> None:
|
|||||||
check_data_folder()
|
check_data_folder()
|
||||||
from django.core.management import execute_from_command_line
|
from django.core.management import execute_from_command_line
|
||||||
|
|
||||||
if (args and "createsuperuser" in args) and (SHELL_CONFIG.IN_DOCKER and not SHELL_CONFIG.IS_TTY):
|
if (args and "createsuperuser" in args) and (IN_DOCKER and not SHELL_CONFIG.IS_TTY):
|
||||||
stderr('[!] Warning: you need to pass -it to use interactive commands in docker', color='lightyellow')
|
stderr('[!] Warning: you need to pass -it to use interactive commands in docker', color='lightyellow')
|
||||||
stderr(' docker run -it archivebox manage {}'.format(' '.join(args or ['...'])), color='lightyellow')
|
stderr(' docker run -it archivebox manage {}'.format(' '.join(args or ['...'])), color='lightyellow')
|
||||||
stderr('')
|
stderr('')
|
||||||
|
@ -1,37 +1,44 @@
|
|||||||
__package__ = 'archivebox.misc'
|
__package__ = 'archivebox.misc'
|
||||||
|
|
||||||
from archivebox.config import DATA_DIR, ARCHIVE_DIR, CONSTANTS, SHELL_CONFIG
|
import sys
|
||||||
|
from rich import print
|
||||||
|
|
||||||
from .logging import stderr
|
# DO NOT ADD ANY TOP-LEVEL IMPORTS HERE
|
||||||
|
# this file is imported by archivebox/__init__.py
|
||||||
|
# and any imports here will be imported by EVERYTHING else
|
||||||
|
# so this file should only be used for pure python checks
|
||||||
|
# that don't need to import other parts of ArchiveBox
|
||||||
|
|
||||||
|
|
||||||
def check_data_folder() -> None:
|
def check_data_folder() -> None:
|
||||||
|
from archivebox import DATA_DIR, ARCHIVE_DIR
|
||||||
|
|
||||||
archive_dir_exists = ARCHIVE_DIR.exists()
|
archive_dir_exists = ARCHIVE_DIR.exists()
|
||||||
if not archive_dir_exists:
|
if not archive_dir_exists:
|
||||||
stderr('[X] No archivebox index found in the current directory.', color='red')
|
print('[red][X] No archivebox index found in the current directory.[/red]', file=sys.stderr)
|
||||||
stderr(f' {DATA_DIR}', color='lightyellow')
|
print(f' {DATA_DIR}', file=sys.stderr)
|
||||||
stderr()
|
print(file=sys.stderr)
|
||||||
stderr(' {lightred}Hint{reset}: Are you running archivebox in the right folder?'.format(**SHELL_CONFIG.ANSI))
|
print(' [violet]Hint[/violet]: Are you running archivebox in the right folder?', file=sys.stderr)
|
||||||
stderr(' cd path/to/your/archive/folder')
|
print(' cd path/to/your/archive/folder', file=sys.stderr)
|
||||||
stderr(' archivebox [command]')
|
print(' archivebox [command]', file=sys.stderr)
|
||||||
stderr()
|
print(file=sys.stderr)
|
||||||
stderr(' {lightred}Hint{reset}: To create a new archive collection or import existing data in this folder, run:'.format(**SHELL_CONFIG.ANSI))
|
print(' [violet]Hint[/violet]: To create a new archive collection or import existing data in this folder, run:', file=sys.stderr)
|
||||||
stderr(' archivebox init')
|
print(' archivebox init', file=sys.stderr)
|
||||||
raise SystemExit(2)
|
raise SystemExit(2)
|
||||||
|
|
||||||
|
|
||||||
def check_migrations():
|
def check_migrations():
|
||||||
|
from archivebox import DATA_DIR, CONSTANTS
|
||||||
from ..index.sql import list_migrations
|
from ..index.sql import list_migrations
|
||||||
|
|
||||||
pending_migrations = [name for status, name in list_migrations() if not status]
|
pending_migrations = [name for status, name in list_migrations() if not status]
|
||||||
|
|
||||||
if pending_migrations:
|
if pending_migrations:
|
||||||
stderr('[X] This collection was created with an older version of ArchiveBox and must be upgraded first.', color='lightyellow')
|
print('[red][X] This collection was created with an older version of ArchiveBox and must be upgraded first.[/red]')
|
||||||
stderr(f' {DATA_DIR}')
|
print(f' {DATA_DIR}', file=sys.stderr)
|
||||||
stderr()
|
print(file=sys.stderr)
|
||||||
stderr(f' To upgrade it to the latest version and apply the {len(pending_migrations)} pending migrations, run:')
|
print(f' [violet]Hint:[/violet] To upgrade it to the latest version and apply the {len(pending_migrations)} pending migrations, run:', file=sys.stderr)
|
||||||
stderr(' archivebox init')
|
print(' archivebox init', file=sys.stderr)
|
||||||
raise SystemExit(3)
|
raise SystemExit(3)
|
||||||
|
|
||||||
CONSTANTS.SOURCES_DIR.mkdir(exist_ok=True)
|
CONSTANTS.SOURCES_DIR.mkdir(exist_ok=True)
|
||||||
@ -39,3 +46,39 @@ def check_migrations():
|
|||||||
# CONSTANTS.CACHE_DIR.mkdir(exist_ok=True)
|
# CONSTANTS.CACHE_DIR.mkdir(exist_ok=True)
|
||||||
(CONSTANTS.LIB_DIR / 'bin').mkdir(exist_ok=True, parents=True)
|
(CONSTANTS.LIB_DIR / 'bin').mkdir(exist_ok=True, parents=True)
|
||||||
(CONSTANTS.PERSONAS_DIR / 'Default').mkdir(exist_ok=True, parents=True)
|
(CONSTANTS.PERSONAS_DIR / 'Default').mkdir(exist_ok=True, parents=True)
|
||||||
|
|
||||||
|
|
||||||
|
def check_io_encoding():
|
||||||
|
PYTHON_ENCODING = (sys.__stdout__ or sys.stdout or sys.__stderr__ or sys.stderr).encoding.upper().replace('UTF8', 'UTF-8')
|
||||||
|
|
||||||
|
if PYTHON_ENCODING != 'UTF-8':
|
||||||
|
print(f'[red][X] Your system is running python3 scripts with a bad locale setting: {PYTHON_ENCODING} (it should be UTF-8).[/red]', file=sys.stderr)
|
||||||
|
print(' To fix it, add the line "export PYTHONIOENCODING=UTF-8" to your ~/.bashrc file (without quotes)', file=sys.stderr)
|
||||||
|
print(' Or if you\'re using ubuntu/debian, run "dpkg-reconfigure locales"', file=sys.stderr)
|
||||||
|
print('')
|
||||||
|
print(' Confirm that it\'s fixed by opening a new shell and running:', file=sys.stderr)
|
||||||
|
print(' python3 -c "import sys; print(sys.stdout.encoding)" # should output UTF-8', file=sys.stderr)
|
||||||
|
raise SystemExit(2)
|
||||||
|
|
||||||
|
|
||||||
|
def check_not_root():
|
||||||
|
from archivebox.config.permissions import IS_ROOT, IN_DOCKER
|
||||||
|
|
||||||
|
attempted_command = ' '.join(sys.argv[1:]) if len(sys.argv) > 1 else ''
|
||||||
|
is_getting_help = '-h' in sys.argv or '--help' in sys.argv or 'help' in sys.argv[:2]
|
||||||
|
is_getting_version = '--version' in sys.argv or 'version' in sys.argv[:2]
|
||||||
|
is_installing = 'setup' in sys.argv[:2] or 'install' in sys.argv[:2]
|
||||||
|
|
||||||
|
if IS_ROOT and not (is_getting_help or is_getting_version or is_installing):
|
||||||
|
print('[red][!] ArchiveBox should never be run as root![/red]', file=sys.stderr)
|
||||||
|
print(' For more information, see the security overview documentation:', file=sys.stderr)
|
||||||
|
print(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root', file=sys.stderr)
|
||||||
|
|
||||||
|
if IN_DOCKER:
|
||||||
|
print('[red][!] When using Docker, you must run commands with [green]docker run[/green] instead of [yellow3]docker exec[/yellow3], e.g.:', file=sys.stderr)
|
||||||
|
print(' docker compose run archivebox {attempted_command}', file=sys.stderr)
|
||||||
|
print(f' docker run -it -v $PWD/data:/data archivebox/archivebox {attempted_command}', file=sys.stderr)
|
||||||
|
print(' or:', file=sys.stderr)
|
||||||
|
print(f' docker compose exec --user=archivebox archivebox /bin/bash -c "archivebox {attempted_command}"', file=sys.stderr)
|
||||||
|
print(f' docker exec -it --user=archivebox <container id> /bin/bash -c "archivebox {attempted_command}"', file=sys.stderr)
|
||||||
|
raise SystemExit(2)
|
||||||
|
@ -13,6 +13,7 @@ from rich.highlighter import Highlighter
|
|||||||
|
|
||||||
# SETUP RICH CONSOLE / TTY detection / COLOR / PROGRESS BARS
|
# SETUP RICH CONSOLE / TTY detection / COLOR / PROGRESS BARS
|
||||||
CONSOLE = Console()
|
CONSOLE = Console()
|
||||||
|
STDERR = Console(stderr=True)
|
||||||
IS_TTY = CONSOLE.is_interactive
|
IS_TTY = CONSOLE.is_interactive
|
||||||
|
|
||||||
|
|
||||||
@ -51,7 +52,7 @@ COLOR_DICT = defaultdict(lambda: [(0, 0, 0), (0, 0, 0)], {
|
|||||||
'37': [(255, 255, 255), (255, 255, 255)],
|
'37': [(255, 255, 255), (255, 255, 255)],
|
||||||
})
|
})
|
||||||
|
|
||||||
# Logging Helpers
|
# Logging Helpers (DEPRECATED, use rich.print instead going forward)
|
||||||
def stdout(*args, color: Optional[str]=None, prefix: str='', config: Optional[benedict]=None) -> None:
|
def stdout(*args, color: Optional[str]=None, prefix: str='', config: Optional[benedict]=None) -> None:
|
||||||
ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
ansi = DEFAULT_CLI_COLORS if (config or {}).get('USE_COLOR') else ANSI
|
||||||
|
|
||||||
|
@ -4,7 +4,6 @@ __package__ = 'archivebox.misc'
|
|||||||
import os
|
import os
|
||||||
import signal
|
import signal
|
||||||
import shutil
|
import shutil
|
||||||
import getpass
|
|
||||||
|
|
||||||
from json import dump
|
from json import dump
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@ -14,7 +13,7 @@ from subprocess import _mswindows, PIPE, Popen, CalledProcessError, CompletedPro
|
|||||||
from crontab import CronTab
|
from crontab import CronTab
|
||||||
from atomicwrites import atomic_write as lib_atomic_write
|
from atomicwrites import atomic_write as lib_atomic_write
|
||||||
|
|
||||||
from archivebox.config import STORAGE_CONFIG
|
from archivebox.config.common import STORAGE_CONFIG
|
||||||
from archivebox.misc.util import enforce_types, ExtendedEncoder
|
from archivebox.misc.util import enforce_types, ExtendedEncoder
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
__package__ = 'archivebox'
|
__package__ = 'archivebox.misc'
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import requests
|
import requests
|
||||||
@ -25,10 +25,10 @@ except ImportError:
|
|||||||
detect_encoding = lambda rawdata: "utf-8"
|
detect_encoding = lambda rawdata: "utf-8"
|
||||||
|
|
||||||
|
|
||||||
from archivebox.config.constants import STATICFILE_EXTENSIONS
|
from archivebox.config import CONSTANTS
|
||||||
from archivebox.config import ARCHIVING_CONFIG
|
from archivebox.config.common import ARCHIVING_CONFIG
|
||||||
|
|
||||||
from .misc.logging import COLOR_DICT
|
from .logging import COLOR_DICT
|
||||||
|
|
||||||
|
|
||||||
### Parsing Helpers
|
### Parsing Helpers
|
||||||
@ -120,7 +120,7 @@ def find_all_urls(urls_str: str):
|
|||||||
|
|
||||||
def is_static_file(url: str):
|
def is_static_file(url: str):
|
||||||
# TODO: the proper way is with MIME type detection + ext, not only extension
|
# TODO: the proper way is with MIME type detection + ext, not only extension
|
||||||
return extension(url).lower() in STATICFILE_EXTENSIONS
|
return extension(url).lower() in CONSTANTS.STATICFILE_EXTENSIONS
|
||||||
|
|
||||||
|
|
||||||
def enforce_types(func):
|
def enforce_types(func):
|
||||||
|
@ -13,7 +13,8 @@ from typing import IO, Tuple, List, Optional
|
|||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from archivebox.config import DATA_DIR, CONSTANTS, SHELL_CONFIG, ARCHIVING_CONFIG
|
from archivebox.config import DATA_DIR, CONSTANTS
|
||||||
|
from archivebox.config.common import SHELL_CONFIG, ARCHIVING_CONFIG
|
||||||
from archivebox.misc.system import atomic_write
|
from archivebox.misc.system import atomic_write
|
||||||
from archivebox.misc.logging import stderr, hint
|
from archivebox.misc.logging import stderr, hint
|
||||||
from archivebox.misc.util import (
|
from archivebox.misc.util import (
|
||||||
|
@ -25,7 +25,8 @@ from abx.archivebox.base_binary import BaseBinary, env
|
|||||||
from abx.archivebox.base_hook import BaseHook
|
from abx.archivebox.base_hook import BaseHook
|
||||||
|
|
||||||
# Depends on Other Plugins:
|
# Depends on Other Plugins:
|
||||||
from archivebox.config import CONSTANTS, ARCHIVING_CONFIG, SHELL_CONFIG
|
from archivebox.config import CONSTANTS
|
||||||
|
from archivebox.config.common import ARCHIVING_CONFIG, SHELL_CONFIG
|
||||||
from plugins_pkg.puppeteer.apps import PUPPETEER_BINPROVIDER
|
from plugins_pkg.puppeteer.apps import PUPPETEER_BINPROVIDER
|
||||||
from plugins_pkg.playwright.apps import PLAYWRIGHT_BINPROVIDER
|
from plugins_pkg.playwright.apps import PLAYWRIGHT_BINPROVIDER
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@ from abx.archivebox.base_configset import BaseConfigSet
|
|||||||
from abx.archivebox.base_binary import BaseBinary, env, apt, brew
|
from abx.archivebox.base_binary import BaseBinary, env, apt, brew
|
||||||
# from abx.archivebox.base_extractor import BaseExtractor, ExtractorName
|
# from abx.archivebox.base_extractor import BaseExtractor, ExtractorName
|
||||||
|
|
||||||
from archivebox.config import ARCHIVING_CONFIG
|
from archivebox.config.common import ARCHIVING_CONFIG
|
||||||
from archivebox.plugins_extractor.favicon.apps import FAVICON_CONFIG
|
from archivebox.plugins_extractor.favicon.apps import FAVICON_CONFIG
|
||||||
from archivebox.plugins_extractor.archivedotorg.apps import ARCHIVEDOTORG_CONFIG
|
from archivebox.plugins_extractor.archivedotorg.apps import ARCHIVEDOTORG_CONFIG
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@ from abx.archivebox.base_configset import BaseConfigSet
|
|||||||
from abx.archivebox.base_binary import BaseBinary, env, apt, brew
|
from abx.archivebox.base_binary import BaseBinary, env, apt, brew
|
||||||
from abx.archivebox.base_extractor import BaseExtractor, ExtractorName
|
from abx.archivebox.base_extractor import BaseExtractor, ExtractorName
|
||||||
|
|
||||||
from archivebox.config import ARCHIVING_CONFIG
|
from archivebox.config.common import ARCHIVING_CONFIG
|
||||||
|
|
||||||
|
|
||||||
class GitConfig(BaseConfigSet):
|
class GitConfig(BaseConfigSet):
|
||||||
|
@ -5,14 +5,14 @@ from pathlib import Path
|
|||||||
from subprocess import run
|
from subprocess import run
|
||||||
|
|
||||||
from pydantic import InstanceOf, Field
|
from pydantic import InstanceOf, Field
|
||||||
from pydantic_pkgr import BinProvider, BinName, bin_abspath
|
from pydantic_pkgr import BinProvider, BinName, BinProviderName, ProviderLookupDict, bin_abspath
|
||||||
|
|
||||||
from abx.archivebox.base_plugin import BasePlugin, BaseHook
|
from abx.archivebox.base_plugin import BasePlugin, BaseHook
|
||||||
from abx.archivebox.base_configset import BaseConfigSet
|
from abx.archivebox.base_configset import BaseConfigSet
|
||||||
from abx.archivebox.base_binary import BaseBinary, BinProviderName,ProviderLookupDict, env
|
from abx.archivebox.base_binary import BaseBinary, env
|
||||||
from abx.archivebox.base_extractor import BaseExtractor, ExtractorName
|
from abx.archivebox.base_extractor import BaseExtractor, ExtractorName
|
||||||
|
|
||||||
from archivebox.config import ARCHIVING_CONFIG, STORAGE_CONFIG
|
from archivebox.config.common import ARCHIVING_CONFIG, STORAGE_CONFIG
|
||||||
from archivebox.plugins_pkg.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
|
from archivebox.plugins_pkg.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
|
||||||
|
|
||||||
class MercuryConfig(BaseConfigSet):
|
class MercuryConfig(BaseConfigSet):
|
||||||
|
@ -16,7 +16,7 @@ from abx.archivebox.base_extractor import BaseExtractor
|
|||||||
from abx.archivebox.base_hook import BaseHook
|
from abx.archivebox.base_hook import BaseHook
|
||||||
|
|
||||||
# Depends on Other Plugins:
|
# Depends on Other Plugins:
|
||||||
from archivebox.config import ARCHIVING_CONFIG
|
from archivebox.config.common import ARCHIVING_CONFIG
|
||||||
from plugins_pkg.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
|
from plugins_pkg.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
|
||||||
|
|
||||||
###################### Config ##########################
|
###################### Config ##########################
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
__package__ = 'archivebox.plugins_extractor.singlefile'
|
__package__ = 'archivebox.plugins_extractor.singlefile'
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Dict, Optional, ClassVar
|
from typing import List, Dict, Optional
|
||||||
# from typing_extensions import Self
|
# from typing_extensions import Self
|
||||||
|
|
||||||
# Depends on other PyPI/vendor packages:
|
# Depends on other PyPI/vendor packages:
|
||||||
from pydantic import InstanceOf, Field, validate_call
|
from pydantic import InstanceOf, Field
|
||||||
from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName, bin_abspath, ShallowBinary
|
from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName, bin_abspath, ShallowBinary
|
||||||
|
|
||||||
# Depends on other Django apps:
|
# Depends on other Django apps:
|
||||||
@ -17,7 +17,7 @@ from abx.archivebox.base_queue import BaseQueue
|
|||||||
from abx.archivebox.base_hook import BaseHook
|
from abx.archivebox.base_hook import BaseHook
|
||||||
|
|
||||||
# Depends on Other Plugins:
|
# Depends on Other Plugins:
|
||||||
from archivebox.config import ARCHIVING_CONFIG
|
from archivebox.config.common import ARCHIVING_CONFIG
|
||||||
from plugins_pkg.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
|
from plugins_pkg.npm.apps import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
|
||||||
|
|
||||||
###################### Config ##########################
|
###################### Config ##########################
|
||||||
|
@ -14,7 +14,7 @@ from abx.archivebox.base_configset import BaseConfigSet
|
|||||||
from abx.archivebox.base_binary import BaseBinary, env, apt, brew
|
from abx.archivebox.base_binary import BaseBinary, env, apt, brew
|
||||||
from abx.archivebox.base_extractor import BaseExtractor, ExtractorName
|
from abx.archivebox.base_extractor import BaseExtractor, ExtractorName
|
||||||
|
|
||||||
from archivebox.config import ARCHIVING_CONFIG, STORAGE_CONFIG
|
from archivebox.config.common import ARCHIVING_CONFIG, STORAGE_CONFIG
|
||||||
from .wget_util import wget_output_path
|
from .wget_util import wget_output_path
|
||||||
|
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@ from abx.archivebox.base_configset import BaseConfigSet
|
|||||||
from abx.archivebox.base_binary import BaseBinary, env, apt, brew
|
from abx.archivebox.base_binary import BaseBinary, env, apt, brew
|
||||||
from abx.archivebox.base_hook import BaseHook
|
from abx.archivebox.base_hook import BaseHook
|
||||||
|
|
||||||
from archivebox.config import ARCHIVING_CONFIG
|
from archivebox.config.common import ARCHIVING_CONFIG
|
||||||
from plugins_pkg.pip.apps import pip
|
from plugins_pkg.pip.apps import pip
|
||||||
|
|
||||||
###################### Config ##########################
|
###################### Config ##########################
|
||||||
|
@ -18,7 +18,8 @@ from abx.archivebox.base_hook import BaseHook
|
|||||||
from abx.archivebox.base_searchbackend import BaseSearchBackend
|
from abx.archivebox.base_searchbackend import BaseSearchBackend
|
||||||
|
|
||||||
# Depends on Other Plugins:
|
# Depends on Other Plugins:
|
||||||
from archivebox.config import CONSTANTS, SEARCH_BACKEND_CONFIG
|
from archivebox.config import CONSTANTS
|
||||||
|
from archivebox.config.common import SEARCH_BACKEND_CONFIG
|
||||||
|
|
||||||
###################### Config ##########################
|
###################### Config ##########################
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@ from abx.archivebox.base_hook import BaseHook
|
|||||||
from abx.archivebox.base_searchbackend import BaseSearchBackend
|
from abx.archivebox.base_searchbackend import BaseSearchBackend
|
||||||
|
|
||||||
# Depends on Other Plugins:
|
# Depends on Other Plugins:
|
||||||
from archivebox.config import SEARCH_BACKEND_CONFIG
|
from archivebox.config.common import SEARCH_BACKEND_CONFIG
|
||||||
|
|
||||||
SONIC_LIB = None
|
SONIC_LIB = None
|
||||||
try:
|
try:
|
||||||
|
@ -17,7 +17,7 @@ from abx.archivebox.base_hook import BaseHook
|
|||||||
from abx.archivebox.base_searchbackend import BaseSearchBackend
|
from abx.archivebox.base_searchbackend import BaseSearchBackend
|
||||||
|
|
||||||
# Depends on Other Plugins:
|
# Depends on Other Plugins:
|
||||||
from archivebox.config import SEARCH_BACKEND_CONFIG
|
from archivebox.config.common import SEARCH_BACKEND_CONFIG
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
__package__ = 'archivebox.queues'
|
__package__ = 'archivebox.queues'
|
||||||
|
|
||||||
|
import os
|
||||||
import time
|
import time
|
||||||
import signal
|
import signal
|
||||||
import psutil
|
import psutil
|
||||||
@ -12,6 +13,8 @@ from typing import Dict, cast
|
|||||||
from supervisor.xmlrpc import SupervisorTransport
|
from supervisor.xmlrpc import SupervisorTransport
|
||||||
from xmlrpc.client import ServerProxy
|
from xmlrpc.client import ServerProxy
|
||||||
|
|
||||||
|
from archivebox.config.permissions import ARCHIVEBOX_USER
|
||||||
|
|
||||||
from .settings import SUPERVISORD_CONFIG_FILE, DATA_DIR, PID_FILE, SOCK_FILE, LOG_FILE, WORKERS_DIR, TMP_DIR, LOGS_DIR
|
from .settings import SUPERVISORD_CONFIG_FILE, DATA_DIR, PID_FILE, SOCK_FILE, LOG_FILE, WORKERS_DIR, TMP_DIR, LOGS_DIR
|
||||||
|
|
||||||
from typing import Iterator
|
from typing import Iterator
|
||||||
@ -42,6 +45,7 @@ childlogdir = {LOGS_DIR}
|
|||||||
directory = {DATA_DIR}
|
directory = {DATA_DIR}
|
||||||
strip_ansi = true
|
strip_ansi = true
|
||||||
nocleanup = true
|
nocleanup = true
|
||||||
|
user = {ARCHIVEBOX_USER}
|
||||||
|
|
||||||
[unix_http_server]
|
[unix_http_server]
|
||||||
file = {TMP_DIR}/{SOCK_FILE.name}
|
file = {TMP_DIR}/{SOCK_FILE.name}
|
||||||
|
@ -11,7 +11,7 @@ import abx.archivebox.use
|
|||||||
from archivebox.index.schema import Link
|
from archivebox.index.schema import Link
|
||||||
from archivebox.misc.util import enforce_types
|
from archivebox.misc.util import enforce_types
|
||||||
from archivebox.misc.logging import stderr
|
from archivebox.misc.logging import stderr
|
||||||
from archivebox.config import SEARCH_BACKEND_CONFIG
|
from archivebox.config.common import SEARCH_BACKEND_CONFIG
|
||||||
|
|
||||||
|
|
||||||
def log_index_started(url):
|
def log_index_started(url):
|
||||||
|
@ -110,12 +110,11 @@ if [[ -d "$PLAYWRIGHT_BROWSERS_PATH/.links" ]]; then
|
|||||||
chown -h $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"/.links/*
|
chown -h $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"/.links/*
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# also chown tmp dir
|
# also chown tmp dir and lib dir
|
||||||
mkdir -p /tmp/archivebox
|
mkdir -p "$SYSTEM_TMP_DIR"
|
||||||
chmod 777 /tmp
|
chown $PUID:$PGID "$SYSTEM_TMP_DIR"
|
||||||
chown $PUID:$PGID /tmp/archivebox
|
mkdir -p "$SYSTEM_LIB_DIR"
|
||||||
mkdir -p /app/lib
|
chown $PUID:$PGID "$SYSTEM_LIB_DIR" "$SYSTEM_LIB_DIR"/*
|
||||||
chown $PUID:$PGID /app/lib /app/lib/*
|
|
||||||
|
|
||||||
# (this check is written in blood in 2023, QEMU silently breaks things in ways that are not obvious)
|
# (this check is written in blood in 2023, QEMU silently breaks things in ways that are not obvious)
|
||||||
export IN_QEMU="$(pmap 1 | grep qemu >/dev/null && echo 'True' || echo 'False')"
|
export IN_QEMU="$(pmap 1 | grep qemu >/dev/null && echo 'True' || echo 'False')"
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "archivebox"
|
name = "archivebox"
|
||||||
version = "0.8.5rc2"
|
version = "0.8.5rc3"
|
||||||
requires-python = ">=3.10"
|
requires-python = ">=3.10"
|
||||||
description = "Self-hosted internet archiving solution."
|
description = "Self-hosted internet archiving solution."
|
||||||
authors = [{name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"}]
|
authors = [{name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"}]
|
||||||
@ -77,6 +77,7 @@ dependencies = [
|
|||||||
"atomicwrites==1.4.1",
|
"atomicwrites==1.4.1",
|
||||||
"django-taggit==1.3.0",
|
"django-taggit==1.3.0",
|
||||||
"base32-crockford==0.3.0",
|
"base32-crockford==0.3.0",
|
||||||
|
"platformdirs>=4.3.6",
|
||||||
# "pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7",
|
# "pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7",
|
||||||
# "pydantic-pkgr>=0.4.7",
|
# "pydantic-pkgr>=0.4.7",
|
||||||
############# Plugin Dependencies ################
|
############# Plugin Dependencies ################
|
||||||
@ -133,7 +134,6 @@ dev-dependencies = [
|
|||||||
"django-autotyping>=0.5.1",
|
"django-autotyping>=0.5.1",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["pdm-backend"]
|
requires = ["pdm-backend"]
|
||||||
build-backend = "pdm.backend"
|
build-backend = "pdm.backend"
|
||||||
|
@ -7,11 +7,11 @@ from pathlib import Path
|
|||||||
import json, shutil
|
import json, shutil
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
|
||||||
from archivebox.config import OUTPUT_PERMISSIONS
|
from archivebox.config.common import STORAGE_CONFIG
|
||||||
|
|
||||||
from .fixtures import *
|
from .fixtures import *
|
||||||
|
|
||||||
DIR_PERMISSIONS = OUTPUT_PERMISSIONS.replace('6', '7').replace('4', '5')
|
DIR_PERMISSIONS = STORAGE_CONFIG.OUTPUT_PERMISSIONS.replace('6', '7').replace('4', '5')
|
||||||
|
|
||||||
def test_init(tmp_path, process):
|
def test_init(tmp_path, process):
|
||||||
assert "Initializing a new ArchiveBox" in process.stdout.decode("utf-8")
|
assert "Initializing a new ArchiveBox" in process.stdout.decode("utf-8")
|
||||||
@ -57,7 +57,7 @@ def test_correct_permissions_output_folder(tmp_path, process):
|
|||||||
index_files = ['index.sqlite3', 'archive']
|
index_files = ['index.sqlite3', 'archive']
|
||||||
for file in index_files:
|
for file in index_files:
|
||||||
file_path = tmp_path / file
|
file_path = tmp_path / file
|
||||||
assert oct(file_path.stat().st_mode)[-3:] in (OUTPUT_PERMISSIONS, DIR_PERMISSIONS)
|
assert oct(file_path.stat().st_mode)[-3:] in (STORAGE_CONFIG.OUTPUT_PERMISSIONS, DIR_PERMISSIONS)
|
||||||
|
|
||||||
def test_correct_permissions_add_command_results(tmp_path, process, disable_extractors_dict):
|
def test_correct_permissions_add_command_results(tmp_path, process, disable_extractors_dict):
|
||||||
os.chdir(tmp_path)
|
os.chdir(tmp_path)
|
||||||
@ -65,7 +65,7 @@ def test_correct_permissions_add_command_results(tmp_path, process, disable_extr
|
|||||||
env=disable_extractors_dict)
|
env=disable_extractors_dict)
|
||||||
archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
|
archived_item_path = list(tmp_path.glob('archive/**/*'))[0]
|
||||||
for path in archived_item_path.iterdir():
|
for path in archived_item_path.iterdir():
|
||||||
assert oct(path.stat().st_mode)[-3:] in (OUTPUT_PERMISSIONS, DIR_PERMISSIONS)
|
assert oct(path.stat().st_mode)[-3:] in (STORAGE_CONFIG.OUTPUT_PERMISSIONS, DIR_PERMISSIONS)
|
||||||
|
|
||||||
def test_collision_urls_different_timestamps(tmp_path, process, disable_extractors_dict):
|
def test_collision_urls_different_timestamps(tmp_path, process, disable_extractors_dict):
|
||||||
os.chdir(tmp_path)
|
os.chdir(tmp_path)
|
||||||
|
2
uv.lock
generated
2
uv.lock
generated
@ -41,7 +41,7 @@ wheels = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "archivebox"
|
name = "archivebox"
|
||||||
version = "0.8.5rc2"
|
version = "0.8.5rc3"
|
||||||
source = { editable = "." }
|
source = { editable = "." }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "atomicwrites" },
|
{ name = "atomicwrites" },
|
||||||
|
Loading…
x
Reference in New Issue
Block a user