1
0
mirror of https://github.com/pirate/ArchiveBox.git synced 2025-09-09 13:50:56 +02:00

add new binproviders and binaries args to install and version, bump pydantic-pkgr version

This commit is contained in:
Nick Sweeting
2024-10-11 00:45:59 -07:00
parent fbd2c458c3
commit 6e7071bd19
24 changed files with 318 additions and 235 deletions

View File

@@ -1,15 +1,14 @@
__package__ = "abx.archivebox"
import os
from typing import Dict, List, Optional
from typing import Optional, cast
from typing_extensions import Self
from pydantic import Field, InstanceOf, validate_call
from pydantic import validate_call
from pydantic_pkgr import (
Binary,
BinProvider,
BinProviderName,
ProviderLookupDict,
AptProvider,
BrewProvider,
EnvProvider,
@@ -25,18 +24,6 @@ from .base_hook import BaseHook, HookType
class BaseBinProvider(BaseHook, BinProvider):
hook_type: HookType = "BINPROVIDER"
# def on_get_abspath(self, bin_name: BinName, **context) -> Optional[HostBinPath]:
# Class = super()
# get_abspath_func = lambda: Class.on_get_abspath(bin_name, **context)
# # return cache.get_or_set(f'bin:abspath:{bin_name}', get_abspath_func)
# return get_abspath_func()
# def on_get_version(self, bin_name: BinName, abspath: Optional[HostBinPath]=None, **context) -> SemVer | None:
# Class = super()
# get_version_func = lambda: Class.on_get_version(bin_name, abspath, **context)
# # return cache.get_or_set(f'bin:version:{bin_name}:{abspath}', get_version_func)
# return get_version_func()
# TODO: add install/load/load_or_install methods as abx.hookimpl methods
@@ -52,9 +39,6 @@ class BaseBinProvider(BaseHook, BinProvider):
class BaseBinary(BaseHook, Binary):
hook_type: HookType = "BINARY"
binproviders_supported: List[InstanceOf[BinProvider]] = Field(default_factory=list, alias="binproviders")
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = Field(default_factory=dict, alias="overrides")
@staticmethod
def symlink_to_lib(binary, bin_dir=None) -> None:
bin_dir = bin_dir or CONSTANTS.LIB_BIN_DIR
@@ -82,13 +66,13 @@ class BaseBinary(BaseHook, Binary):
# get cached binary from db
try:
from machine.models import InstalledBinary
installed_binary = InstalledBinary.objects.get_from_db_or_cache(self)
installed_binary = InstalledBinary.objects.get_from_db_or_cache(self) # type: ignore
binary = InstalledBinary.load_from_db(installed_binary)
except Exception:
# maybe we are not in a DATA dir so there is no db, fallback to reading from fs
# (e.g. when archivebox version is run outside of a DATA dir)
binary = super().load(**kwargs)
return binary
return cast(Self, binary)
@validate_call
def install(self, **kwargs) -> Self:

View File

@@ -9,7 +9,7 @@ from pydantic import model_validator, TypeAdapter
from pydantic_settings import BaseSettings, SettingsConfigDict, PydanticBaseSettingsSource
from pydantic_settings.sources import TomlConfigSettingsSource
from pydantic_pkgr.base_types import func_takes_args_or_kwargs
from pydantic_pkgr import func_takes_args_or_kwargs
import abx

View File

@@ -22,17 +22,33 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
add_help=True,
formatter_class=SmartFormatter,
)
# parser.add_argument(
# '--force', # '-f',
# action='store_true',
# help='Overwrite any existing packages that conflict with the ones ArchiveBox is trying to install',
# )
parser.add_argument(
'--binproviders', '-p',
type=str,
help='Select binproviders to use DEFAULT=env,apt,brew,sys_pip,venv_pip,lib_pip,pipx,sys_npm,lib_npm,puppeteer,playwright (all)',
default=None,
)
parser.add_argument(
'--binaries', '-b',
type=str,
help='Select binaries to install DEFAULT=curl,wget,git,yt-dlp,chrome,single-file,readability-extractor,postlight-parser,... (all)',
default=None,
)
parser.add_argument(
'--dry-run', '-d',
action='store_true',
help='Show what would be installed without actually installing anything',
default=False,
)
command = parser.parse_args(args or ()) # noqa
reject_stdin(__command__, stdin)
install(
# force=command.force,
out_dir=Path(pwd) if pwd else DATA_DIR,
binaries=command.binaries.split(',') if command.binaries else None,
binproviders=command.binproviders.split(',') if command.binproviders else None,
dry_run=command.dry_run,
)

View File

@@ -27,6 +27,18 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
action='store_true',
help='Only print ArchiveBox version number and nothing else.',
)
parser.add_argument(
'--binproviders', '-p',
type=str,
help='Select binproviders to detect DEFAULT=env,apt,brew,sys_pip,venv_pip,lib_pip,pipx,sys_npm,lib_npm,puppeteer,playwright (all)',
default=None,
)
parser.add_argument(
'--binaries', '-b',
type=str,
help='Select binaries to detect DEFAULT=curl,wget,git,yt-dlp,chrome,single-file,readability-extractor,postlight-parser,... (all)',
default=None,
)
command = parser.parse_args(args or ())
reject_stdin(__command__, stdin)
@@ -40,6 +52,8 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
version(
quiet=command.quiet,
out_dir=Path(pwd) if pwd else DATA_DIR,
binproviders=command.binproviders.split(',') if command.binproviders else None,
binaries=command.binaries.split(',') if command.binaries else None,
)

View File

@@ -111,9 +111,9 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
or config_value.lower().endswith(binary.name.lower())
# or binary.name.lower().replace('-', '').replace('_', '') in str(config_value).lower()
)))
# if not binary.provider_overrides:
# if not binary.overrides:
# import ipdb; ipdb.set_trace()
# rows['Overrides'].append(str(obj_to_yaml(binary.provider_overrides) or str(binary.provider_overrides))[:200])
# rows['Overrides'].append(str(obj_to_yaml(binary.overrides) or str(binary.overrides))[:200])
# rows['Description'].append(binary.description)
return TableContext(
@@ -153,7 +153,7 @@ def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
'binprovider': binary.loaded_binprovider,
'abspath': binary.loaded_abspath,
'version': binary.loaded_version,
'overrides': obj_to_yaml(binary.provider_overrides),
'overrides': obj_to_yaml(binary.overrides),
'providers': obj_to_yaml(binary.binproviders_supported),
},
"help_texts": {

View File

@@ -356,7 +356,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
'sha256': self.sha256,
'loaded_binprovider': self.BINPROVIDER,
'binproviders_supported': self.BINARY.binproviders_supported,
'provider_overrides': self.BINARY.provider_overrides,
'overrides': self.BINARY.overrides,
})
def load_fresh(self) -> BaseBinary:

View File

@@ -179,7 +179,10 @@ def help(out_dir: Path=DATA_DIR) -> None:
@enforce_types
def version(quiet: bool=False,
out_dir: Path=DATA_DIR) -> None:
out_dir: Path=DATA_DIR,
binproviders: Optional[List[str]]=None,
binaries: Optional[List[str]]=None,
) -> None:
"""Print the ArchiveBox version and dependency information"""
print(VERSION)
@@ -244,6 +247,14 @@ def version(quiet: bool=False,
if binary.name == 'archivebox':
continue
# skip if the binary is not in the requested list of binaries
if binaries and binary.name not in binaries:
continue
# skip if the binary is not supported by any of the requested binproviders
if binproviders and binary.binproviders_supported and not any(provider.name in binproviders for provider in binary.binproviders_supported):
continue
err = None
try:
loaded_bin = binary.load()
@@ -266,6 +277,9 @@ def version(quiet: bool=False,
for name, binprovider in reversed(list(settings.BINPROVIDERS.items())):
err = None
if binproviders and binprovider.name not in binproviders:
continue
# TODO: implement a BinProvider.BINARY() method that gets the loaded binary for a binprovider's INSTALLER_BIN
loaded_bin = binprovider.INSTALLER_BINARY or BaseBinary(name=binprovider.INSTALLER_BIN, binproviders=[env, apt, brew])
@@ -278,25 +292,28 @@ def version(quiet: bool=False,
PATH = str(binprovider.PATH).replace(str(DATA_DIR), '[light_slate_blue].[/light_slate_blue]').replace(str(Path('~').expanduser()), '~')
ownership_summary = f'UID=[blue]{str(binprovider.EUID).ljust(4)}[/blue]'
provider_summary = f'[dark_sea_green3]{str(abspath).ljust(52)}[/dark_sea_green3]' if abspath else f'[grey23]{"not available".ljust(52)}[/grey23]'
prnt('', '[green]√[/green]' if binprovider.is_valid else '[red]X[/red]', '', binprovider.name.ljust(11), provider_summary, ownership_summary, f'PATH={PATH}', overflow='ellipsis', soft_wrap=True)
prnt('', '[green]√[/green]' if binprovider.is_valid else '[grey53]-[/grey53]', '', binprovider.name.ljust(11), provider_summary, ownership_summary, f'PATH={PATH}', overflow='ellipsis', soft_wrap=True)
prnt()
prnt('[deep_sky_blue3][i] Source-code locations:[/deep_sky_blue3]')
for name, path in CONSTANTS.CODE_LOCATIONS.items():
prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
prnt()
if os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) or os.access(CONSTANTS.CONFIG_FILE, os.R_OK):
prnt('[bright_yellow][i] Data locations:[/bright_yellow]')
for name, path in CONSTANTS.DATA_LOCATIONS.items():
prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
from archivebox.misc.checks import check_data_dir_permissions
if not (binaries or binproviders):
# dont show source code / data dir info if we just want to get version info for a binary or binprovider
check_data_dir_permissions()
else:
prnt()
prnt('[red][i] Data locations:[/red] (not in a data directory)')
prnt('[deep_sky_blue3][i] Code locations:[/deep_sky_blue3]')
for name, path in CONSTANTS.CODE_LOCATIONS.items():
prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
prnt()
if os.access(CONSTANTS.ARCHIVE_DIR, os.R_OK) or os.access(CONSTANTS.CONFIG_FILE, os.R_OK):
prnt('[bright_yellow][i] Data locations:[/bright_yellow]')
for name, path in CONSTANTS.DATA_LOCATIONS.items():
prnt(printable_folder_status(name, path), overflow='ignore', crop=False)
from archivebox.misc.checks import check_data_dir_permissions
check_data_dir_permissions()
else:
prnt()
prnt('[red][i] Data locations:[/red] (not in a data directory)')
prnt()
@@ -986,7 +1003,7 @@ def list_folders(links: List[Link],
raise ValueError('Status not recognized.')
@enforce_types
def install(out_dir: Path=DATA_DIR) -> None:
def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, binaries: Optional[List[str]]=None, dry_run: bool=False) -> None:
"""Automatically install all ArchiveBox dependencies and extras"""
# if running as root:
@@ -1021,9 +1038,15 @@ def install(out_dir: Path=DATA_DIR) -> None:
print()
package_manager_names = ', '.join(f'[yellow]{binprovider.name}[/yellow]' for binprovider in reversed(list(settings.BINPROVIDERS.values())))
package_manager_names = ', '.join(
f'[yellow]{binprovider.name}[/yellow]'
for binprovider in reversed(list(settings.BINPROVIDERS.values()))
if not binproviders or (binproviders and binprovider.name in binproviders)
)
print(f'[+] Setting up package managers {package_manager_names}...')
for binprovider in reversed(list(settings.BINPROVIDERS.values())):
if binproviders and binprovider.name not in binproviders:
continue
try:
binprovider.setup()
except Exception:
@@ -1035,12 +1058,46 @@ def install(out_dir: Path=DATA_DIR) -> None:
print()
for binary in reversed(list(settings.BINARIES.values())):
providers = ' [grey53]or[/grey53] '.join(provider.name for provider in binary.binproviders_supported)
if binary.name in ('archivebox', 'django', 'sqlite', 'python', 'pipx'):
# obviously must already be installed if we are running
continue
if binaries and binary.name not in binaries:
continue
providers = ' [grey53]or[/grey53] '.join(
provider.name for provider in binary.binproviders_supported
if not binproviders or (binproviders and provider.name in binproviders)
)
if not providers:
continue
print(f'[+] Detecting / Installing [yellow]{binary.name.ljust(22)}[/yellow] using [red]{providers}[/red]...')
try:
with SudoPermission(uid=0, fallback=True):
# print(binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'}))
binary.load_or_install(fresh=True).model_dump(exclude={'provider_overrides', 'bin_dir', 'hook_type'})
# print(binary.load_or_install(fresh=True).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'}))
if binproviders:
providers_supported_by_binary = [provider.name for provider in binary.binproviders_supported]
for binprovider_name in binproviders:
if binprovider_name not in providers_supported_by_binary:
continue
if dry_run:
# always show install commands when doing a dry run
sys.stderr.write("\033[2;49;90m") # grey53
result = binary.install(binproviders=[binprovider_name], dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
sys.stderr.write("\033[00m\n") # reset
else:
result = binary.load_or_install(binproviders=[binprovider_name], fresh=True, dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
if result and result['loaded_version']:
break
else:
if dry_run:
sys.stderr.write("\033[2;49;90m") # grey53
binary.install(dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
sys.stderr.write("\033[00m\n") # reset
else:
binary.load_or_install(fresh=True, dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
if IS_ROOT:
with SudoPermission(uid=0):
if ARCHIVEBOX_USER == 0:
@@ -1049,6 +1106,9 @@ def install(out_dir: Path=DATA_DIR) -> None:
os.system(f'chown -R {ARCHIVEBOX_USER} "{CONSTANTS.LIB_DIR.resolve()}"')
except Exception as e:
print(f'[red]:cross_mark: Failed to install {binary.name} as user {ARCHIVEBOX_USER}: {e}[/red]')
if binaries and len(binaries) == 1:
# if we are only installing a single binary, raise the exception so the user can see what went wrong
raise
from django.contrib.auth import get_user_model
@@ -1063,7 +1123,13 @@ def install(out_dir: Path=DATA_DIR) -> None:
from plugins_pkg.pip.apps import ARCHIVEBOX_BINARY
proc = run_shell([ARCHIVEBOX_BINARY.load().abspath, 'version'], capture_output=False, cwd=out_dir)
extra_args = []
if binproviders:
extra_args.append(f'--binproviders={",".join(binproviders)}')
if binaries:
extra_args.append(f'--binaries={",".join(binaries)}')
proc = run_shell([ARCHIVEBOX_BINARY.load().abspath, 'version', *extra_args], capture_output=False, cwd=out_dir)
raise SystemExit(proc.returncode)

View File

@@ -3,11 +3,11 @@ __package__ = 'archivebox.plugins_auth.ldap'
import inspect
from typing import List, Dict
from typing import List
from pathlib import Path
from pydantic import InstanceOf
from pydantic_pkgr import BinProviderName, ProviderLookupDict, SemVer
from pydantic_pkgr import BinaryOverrides, SemVer
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_hook import BaseHook
@@ -43,26 +43,26 @@ class LdapBinary(BaseBinary):
description: str = 'LDAP Authentication'
binproviders_supported: List[InstanceOf[BaseBinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, apt]
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
overrides: BinaryOverrides = {
LIB_PIP_BINPROVIDER.name: {
"abspath": lambda: get_LDAP_LIB_path(LIB_SITE_PACKAGES),
"version": lambda: get_LDAP_LIB_version(),
"packages": lambda: ['python-ldap>=3.4.3', 'django-auth-ldap>=4.1.0'],
"packages": ['python-ldap>=3.4.3', 'django-auth-ldap>=4.1.0'],
},
VENV_PIP_BINPROVIDER.name: {
"abspath": lambda: get_LDAP_LIB_path(VENV_SITE_PACKAGES),
"version": lambda: get_LDAP_LIB_version(),
"packages": lambda: ['python-ldap>=3.4.3', 'django-auth-ldap>=4.1.0'],
"packages": ['python-ldap>=3.4.3', 'django-auth-ldap>=4.1.0'],
},
SYS_PIP_BINPROVIDER.name: {
"abspath": lambda: get_LDAP_LIB_path((*USER_SITE_PACKAGES, *SYS_SITE_PACKAGES)),
"version": lambda: get_LDAP_LIB_version(),
"packages": lambda: ['python-ldap>=3.4.3', 'django-auth-ldap>=4.1.0'],
"packages": ['python-ldap>=3.4.3', 'django-auth-ldap>=4.1.0'],
},
apt.name: {
"abspath": lambda: get_LDAP_LIB_path(),
"version": lambda: get_LDAP_LIB_version(),
"packages": lambda: ['libssl-dev', 'libldap2-dev', 'libsasl2-dev', 'python3-ldap', 'python3-msgpack', 'python3-mutagen'],
"packages": ['libssl-dev', 'libldap2-dev', 'libsasl2-dev', 'python3-ldap', 'python3-msgpack', 'python3-mutagen'],
},
}

View File

@@ -13,7 +13,7 @@ from pydantic_pkgr import (
BinProvider,
BinName,
BinProviderName,
ProviderLookupDict,
BinaryOverrides,
bin_abspath,
)
@@ -204,15 +204,15 @@ class ChromeBinary(BaseBinary):
name: BinName = CHROME_CONFIG.CHROME_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [PUPPETEER_BINPROVIDER, env, PLAYWRIGHT_BINPROVIDER]
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
overrides: BinaryOverrides = {
env.name: {
'abspath': lambda: autodetect_system_chrome_install(PATH=env.PATH), # /usr/bin/google-chrome-stable
},
PUPPETEER_BINPROVIDER.name: {
'packages': lambda: ['chrome@stable'], # npx @puppeteer/browsers install chrome@stable
'packages': ['chrome@stable'], # npx @puppeteer/browsers install chrome@stable
},
PLAYWRIGHT_BINPROVIDER.name: {
'packages': lambda: ['chromium'], # playwright install chromium
'packages': ['chromium'], # playwright install chromium
},
}

View File

@@ -1,10 +1,10 @@
__package__ = 'plugins_extractor.mercury'
from typing import List, Optional, Dict
from typing import List, Optional
from pathlib import Path
from pydantic import InstanceOf, Field
from pydantic_pkgr import BinProvider, BinName, BinProviderName, ProviderLookupDict, bin_abspath
from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, bin_abspath
from abx.archivebox.base_plugin import BasePlugin, BaseHook
from abx.archivebox.base_configset import BaseConfigSet
@@ -38,13 +38,13 @@ class MercuryBinary(BaseBinary):
name: BinName = MERCURY_CONFIG.MERCURY_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env]
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
overrides: BinaryOverrides = {
LIB_NPM_BINPROVIDER.name: {
'packages': lambda: ['@postlight/parser@^2.2.3'],
'packages': ['@postlight/parser@^2.2.3'],
},
SYS_NPM_BINPROVIDER.name: {
'packages': lambda: ['@postlight/parser@^2.2.3'],
'install': lambda: False, # never try to install things into global prefix
'packages': ['@postlight/parser@^2.2.3'],
'install': lambda: None, # never try to install things into global prefix
},
env.name: {
'version': lambda: '999.999.999' if bin_abspath('postlight-parser', PATH=env.PATH) else None,

View File

@@ -1,12 +1,12 @@
__package__ = 'archivebox.plugins_extractor.readability'
from pathlib import Path
from typing import List, Dict, Optional
from typing import List
# from typing_extensions import Self
# Depends on other PyPI/vendor packages:
from pydantic import InstanceOf, Field, validate_call
from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName, ShallowBinary
from pydantic import InstanceOf, Field
from pydantic_pkgr import BinProvider, BinaryOverrides, BinName
# Depends on other Django apps:
from abx.archivebox.base_plugin import BasePlugin
@@ -39,23 +39,10 @@ class ReadabilityBinary(BaseBinary):
name: BinName = READABILITY_CONFIG.READABILITY_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env]
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
LIB_NPM_BINPROVIDER.name: {"packages": lambda: [READABILITY_PACKAGE_NAME]},
SYS_NPM_BINPROVIDER.name: {"packages": lambda: []}, # prevent modifying system global npm packages
overrides: BinaryOverrides = {
LIB_NPM_BINPROVIDER.name: {"packages": [READABILITY_PACKAGE_NAME]},
SYS_NPM_BINPROVIDER.name: {"packages": [READABILITY_PACKAGE_NAME], "install": lambda: None}, # prevent modifying system global npm packages
}
@validate_call
def install(self, binprovider_name: Optional[BinProviderName]=None, **kwargs) -> ShallowBinary:
# force install to only use lib/npm provider, we never want to modify global NPM packages
return BaseBinary.install(self, binprovider_name=binprovider_name or LIB_NPM_BINPROVIDER.name, **kwargs)
@validate_call
def load_or_install(self, binprovider_name: Optional[BinProviderName] = None, fresh=False, **kwargs) -> ShallowBinary:
try:
return self.load(fresh=fresh)
except Exception:
# force install to only use lib/npm provider, we never want to modify global NPM packages
return BaseBinary.install(self, binprovider_name=binprovider_name or LIB_NPM_BINPROVIDER.name, **kwargs)

View File

@@ -1,12 +1,12 @@
__package__ = 'archivebox.plugins_extractor.singlefile'
from pathlib import Path
from typing import List, Dict, Optional
from typing import List, Optional
# from typing_extensions import Self
# Depends on other PyPI/vendor packages:
from pydantic import InstanceOf, Field
from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName, bin_abspath, ShallowBinary
from pydantic_pkgr import BinProvider, BinaryOverrides, BinName, bin_abspath
# Depends on other Django apps:
from abx.archivebox.base_plugin import BasePlugin
@@ -45,22 +45,21 @@ class SinglefileBinary(BaseBinary):
name: BinName = SINGLEFILE_CONFIG.SINGLEFILE_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env]
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
overrides: BinaryOverrides = {
LIB_NPM_BINPROVIDER.name: {
"abspath": lambda:
bin_abspath(SINGLEFILE_CONFIG.SINGLEFILE_BINARY, PATH=LIB_NPM_BINPROVIDER.PATH)
or bin_abspath("single-file", PATH=LIB_NPM_BINPROVIDER.PATH)
or bin_abspath("single-file-node.js", PATH=LIB_NPM_BINPROVIDER.PATH),
"packages": lambda:
[f"single-file-cli@>={SINGLEFILE_MIN_VERSION} <{SINGLEFILE_MAX_VERSION}"],
"packages": [f"single-file-cli@>={SINGLEFILE_MIN_VERSION} <{SINGLEFILE_MAX_VERSION}"],
},
SYS_NPM_BINPROVIDER.name: {
"abspath": lambda:
bin_abspath(SINGLEFILE_CONFIG.SINGLEFILE_BINARY, PATH=SYS_NPM_BINPROVIDER.PATH)
or bin_abspath("single-file", PATH=SYS_NPM_BINPROVIDER.PATH)
or bin_abspath("single-file-node.js", PATH=SYS_NPM_BINPROVIDER.PATH),
"packages": lambda:
[], # prevent modifying system global npm packages
"packages": [f"single-file-cli@>={SINGLEFILE_MIN_VERSION} <{SINGLEFILE_MAX_VERSION}"],
"install": lambda: None,
},
env.name: {
'abspath': lambda:
@@ -69,18 +68,6 @@ class SinglefileBinary(BaseBinary):
or bin_abspath('single-file-node.js', PATH=env.PATH),
},
}
def install(self, binprovider_name: Optional[BinProviderName]=None, **kwargs) -> ShallowBinary:
# force install to only use lib/npm provider, we never want to modify global NPM packages
return BaseBinary.install(self, binprovider_name=binprovider_name or LIB_NPM_BINPROVIDER.name, **kwargs)
def load_or_install(self, binprovider_name: Optional[BinProviderName]=None, fresh=False, **kwargs) -> ShallowBinary:
try:
return self.load(fresh=fresh)
except Exception:
# force install to only use lib/npm provider, we never want to modify global NPM packages
return BaseBinary.install(self, binprovider_name=binprovider_name or LIB_NPM_BINPROVIDER.name, **kwargs)
SINGLEFILE_BINARY = SinglefileBinary()

View File

@@ -1,10 +1,10 @@
import sys
from typing import List, Dict
from typing import List
from subprocess import run, PIPE
from rich import print
from pydantic import InstanceOf, Field, model_validator, AliasChoices
from pydantic_pkgr import BinProvider, BinName, BinProviderName, ProviderLookupDict
from pydantic_pkgr import BinProvider, BinName, BinaryOverrides
from abx.archivebox.base_plugin import BasePlugin
from abx.archivebox.base_configset import BaseConfigSet
@@ -54,10 +54,10 @@ class FfmpegBinary(BaseBinary):
name: BinName = 'ffmpeg'
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
overrides: BinaryOverrides = {
'env': {
# 'abspath': lambda: shutil.which('ffmpeg', PATH=env.PATH),
# 'version': lambda: run(['ffmpeg', '-version'], stdout=PIPE, stderr=PIPE, text=True).stdout,
'version': lambda: run(['ffmpeg', '-version'], stdout=PIPE, stderr=PIPE, text=True).stdout,
},
'apt': {
# 'abspath': lambda: shutil.which('ffmpeg', PATH=apt.PATH),

View File

@@ -1,11 +1,11 @@
__package__ = 'archivebox.plugins_pkg.npm'
from pathlib import Path
from typing import List, Optional, Dict
from typing import List, Optional
from pydantic import InstanceOf, model_validator
from pydantic_pkgr import BinProvider, NpmProvider, BinName, PATHStr, BinProviderName, ProviderLookupDict
from pydantic_pkgr import BinProvider, NpmProvider, BinName, PATHStr, BinProviderName, BinaryOverrides
from archivebox.config import DATA_DIR, CONSTANTS
@@ -60,8 +60,8 @@ class NodeBinary(BaseBinary):
name: BinName = 'node'
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
overrides: Dict[BinProviderName, ProviderLookupDict] = {
apt.name: {'packages': lambda c: ['nodejs']},
overrides: BinaryOverrides = {
apt.name: {'packages': ['nodejs']},
}
@@ -72,7 +72,7 @@ class NpmBinary(BaseBinary):
name: BinName = 'npm'
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
overrides: Dict[BinProviderName, ProviderLookupDict] = {
overrides: BinaryOverrides = {
apt.name: {'install': lambda: None}, # already installed when nodejs is installed
brew.name: {'install': lambda: None}, # already installed when nodejs is installed
}
@@ -84,7 +84,7 @@ class NpxBinary(BaseBinary):
name: BinName = 'npx'
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
overrides: Dict[BinProviderName, ProviderLookupDict] = {
overrides: BinaryOverrides = {
apt.name: {'install': lambda: None}, # already installed when nodejs is installed
brew.name: {'install': lambda: None}, # already installed when nodejs is installed
}

View File

@@ -4,14 +4,14 @@ import os
import sys
import site
from pathlib import Path
from typing import List, Dict, Optional
from typing import List, Optional
from pydantic import InstanceOf, Field, model_validator, validate_call
import django
import django.db.backends.sqlite3.base
from django.db.backends.sqlite3.base import Database as django_sqlite3 # type: ignore[import-type]
from pydantic_pkgr import BinProvider, PipProvider, BinName, BinProviderName, ProviderLookupDict, SemVer
from pydantic_pkgr import BinProvider, PipProvider, BinName, BinProviderName, BinaryOverrides, SemVer
from archivebox.config import CONSTANTS, VERSION
@@ -105,18 +105,18 @@ class ArchiveboxBinary(BaseBinary):
name: BinName = 'archivebox'
binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
VENV_PIP_BINPROVIDER.name: {'packages': lambda: [], 'version': lambda: VERSION},
SYS_PIP_BINPROVIDER.name: {'packages': lambda: [], 'version': lambda: VERSION},
apt.name: {'packages': lambda: [], 'version': lambda: VERSION},
brew.name: {'packages': lambda: [], 'version': lambda: VERSION},
overrides: BinaryOverrides = {
VENV_PIP_BINPROVIDER.name: {'packages': [], 'version': VERSION},
SYS_PIP_BINPROVIDER.name: {'packages': [], 'version': VERSION},
apt.name: {'packages': [], 'version': VERSION},
brew.name: {'packages': [], 'version': VERSION},
}
@validate_call
# @validate_call
def install(self, **kwargs):
return self.load() # obviously it's already installed if we are running this ;)
@validate_call
# @validate_call
def load_or_install(self, **kwargs):
return self.load() # obviously it's already installed if we are running this ;)
@@ -127,18 +127,18 @@ class PythonBinary(BaseBinary):
name: BinName = 'python'
binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
overrides: BinaryOverrides = {
SYS_PIP_BINPROVIDER.name: {
'abspath': lambda: sys.executable,
'version': lambda: '{}.{}.{}'.format(*sys.version_info[:3]),
'abspath': sys.executable,
'version': '{}.{}.{}'.format(*sys.version_info[:3]),
},
}
@validate_call
# @validate_call
def install(self, **kwargs):
return self.load() # obviously it's already installed if we are running this ;)
@validate_call
# @validate_call
def load_or_install(self, **kwargs):
return self.load() # obviously it's already installed if we are running this ;)
@@ -152,14 +152,14 @@ LOADED_SQLITE_FROM_VENV = str(LOADED_SQLITE_PATH.absolute().resolve()).startswit
class SqliteBinary(BaseBinary):
name: BinName = 'sqlite'
binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
overrides: BinaryOverrides = {
VENV_PIP_BINPROVIDER.name: {
"abspath": lambda: LOADED_SQLITE_PATH if LOADED_SQLITE_FROM_VENV else None,
"version": lambda: LOADED_SQLITE_VERSION if LOADED_SQLITE_FROM_VENV else None,
"abspath": LOADED_SQLITE_PATH if LOADED_SQLITE_FROM_VENV else None,
"version": LOADED_SQLITE_VERSION if LOADED_SQLITE_FROM_VENV else None,
},
SYS_PIP_BINPROVIDER.name: {
"abspath": lambda: LOADED_SQLITE_PATH if not LOADED_SQLITE_FROM_VENV else None,
"version": lambda: LOADED_SQLITE_VERSION if not LOADED_SQLITE_FROM_VENV else None,
"abspath": LOADED_SQLITE_PATH if not LOADED_SQLITE_FROM_VENV else None,
"version": LOADED_SQLITE_VERSION if not LOADED_SQLITE_FROM_VENV else None,
},
}
@@ -177,11 +177,11 @@ class SqliteBinary(BaseBinary):
])
return self
@validate_call
# @validate_call
def install(self, **kwargs):
return self.load() # obviously it's already installed if we are running this ;)
@validate_call
# @validate_call
def load_or_install(self, **kwargs):
return self.load() # obviously it's already installed if we are running this ;)
@@ -196,22 +196,22 @@ class DjangoBinary(BaseBinary):
name: BinName = 'django'
binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
overrides: BinaryOverrides = {
VENV_PIP_BINPROVIDER.name: {
"abspath": lambda: LOADED_DJANGO_PATH if LOADED_DJANGO_FROM_VENV else None,
"version": lambda: LOADED_DJANGO_VERSION if LOADED_DJANGO_FROM_VENV else None,
"abspath": LOADED_DJANGO_PATH if LOADED_DJANGO_FROM_VENV else None,
"version": LOADED_DJANGO_VERSION if LOADED_DJANGO_FROM_VENV else None,
},
SYS_PIP_BINPROVIDER.name: {
"abspath": lambda: LOADED_DJANGO_PATH if not LOADED_DJANGO_FROM_VENV else None,
"version": lambda: LOADED_DJANGO_VERSION if not LOADED_DJANGO_FROM_VENV else None,
"abspath": LOADED_DJANGO_PATH if not LOADED_DJANGO_FROM_VENV else None,
"version": LOADED_DJANGO_VERSION if not LOADED_DJANGO_FROM_VENV else None,
},
}
@validate_call
# @validate_call
def install(self, **kwargs):
return self.load() # obviously it's already installed if we are running this ;)
@validate_call
# @validate_call
def load_or_install(self, **kwargs):
return self.load() # obviously it's already installed if we are running this ;)
@@ -221,11 +221,11 @@ class PipBinary(BaseBinary):
name: BinName = "pip"
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
@validate_call
# @validate_call
def install(self, **kwargs):
return self.load() # obviously it's already installed if we are running this ;)
@validate_call
# @validate_call
def load_or_install(self, **kwargs):
return self.load() # obviously it's already installed if we are running this ;)

View File

@@ -11,7 +11,7 @@ from pydantic_pkgr import (
BinName,
BinProvider,
BinProviderName,
ProviderLookupDict,
BinProviderOverrides,
InstallArgs,
PATHStr,
HostBinPath,
@@ -66,15 +66,15 @@ class PlaywrightBinProvider(BaseBinProvider):
PATH: PATHStr = f"{CONSTANTS.LIB_BIN_DIR}:{DEFAULT_ENV_PATH}"
playwright_browsers_dir: Optional[Path] = (
playwright_browsers_dir: Path = (
Path("~/Library/Caches/ms-playwright").expanduser() # macos playwright cache dir
if OPERATING_SYSTEM == "darwin" else
Path("~/.cache/ms-playwright").expanduser() # linux playwright cache dir
)
playwright_install_args: List[str] = ["install"] # --with-deps
packages_handler: ProviderLookupDict = Field(default={
"chrome": lambda: ["chromium"],
packages_handler: BinProviderOverrides = Field(default={
"chrome": ["chromium"],
}, exclude=True)
_browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {}
@@ -104,9 +104,17 @@ class PlaywrightBinProvider(BaseBinProvider):
)
# ~/Library/caches/ms-playwright/chromium-1097/chrome-linux/chromium
return sorted(self.playwright_browsers_dir.glob(f"{browser_name}-*/*-linux/*"))
paths = []
for path in sorted(self.playwright_browsers_dir.glob(f"{browser_name}-*/*-linux/*")):
if 'xdg-settings' in str(path):
continue
if 'ffmpeg' in str(path):
continue
if '/chrom' in str(path) and 'chrom' in path.name.lower():
paths.append(path)
return paths
def on_get_abspath(self, bin_name: BinName, **context) -> Optional[HostBinPath]:
def default_abspath_handler(self, bin_name: BinName, **context) -> Optional[HostBinPath]:
assert bin_name == "chrome", "Only chrome is supported using the @puppeteer/browsers install method currently."
# already loaded, return abspath from cache
@@ -128,7 +136,7 @@ class PlaywrightBinProvider(BaseBinProvider):
return None
def on_install(self, bin_name: str, packages: Optional[InstallArgs] = None, **context) -> str:
def default_install_handler(self, bin_name: str, packages: Optional[InstallArgs] = None, **context) -> str:
"""playwright install chrome"""
self.setup()
assert bin_name == "chrome", "Only chrome is supported using the playwright install method currently."
@@ -137,7 +145,7 @@ class PlaywrightBinProvider(BaseBinProvider):
raise Exception(
f"{self.__class__.__name__} install method is not available on this host ({self.INSTALLER_BIN} not found in $PATH)"
)
packages = packages or self.on_get_packages(bin_name)
packages = packages or self.get_packages(bin_name)
# print(f'[*] {self.__class__.__name__}: Installing {bin_name}: {self.INSTALLER_BIN_ABSPATH} install {packages}')
@@ -155,7 +163,7 @@ class PlaywrightBinProvider(BaseBinProvider):
output_lines = [
line for line in proc.stdout.strip().split('\n')
if '/chrom' in line
and 'chrom' in line.rsplit('/', 1)[-1].lower() # make final path segment (filename) contains chrome or chromium
and 'chrom' in line.rsplit('/', 1)[-1].lower() # if final path segment (filename) contains chrome or chromium
and 'xdg-settings' not in line
and 'ffmpeg' not in line
]

View File

@@ -11,7 +11,7 @@ from pydantic_pkgr import (
BinProvider,
BinName,
BinProviderName,
ProviderLookupDict,
BinProviderOverrides,
InstallArgs,
PATHStr,
HostBinPath,
@@ -65,10 +65,10 @@ class PuppeteerBinProvider(BaseBinProvider):
euid: Optional[int] = ARCHIVEBOX_USER
puppeteer_browsers_dir: Optional[Path] = LIB_DIR_BROWSERS
puppeteer_browsers_dir: Path = LIB_DIR_BROWSERS
puppeteer_install_args: List[str] = ["@puppeteer/browsers", "install", "--path", str(LIB_DIR_BROWSERS)]
packages_handler: ProviderLookupDict = Field(default={
packages_handler: BinProviderOverrides = Field(default={
"chrome": lambda:
['chrome@stable'],
}, exclude=True)
@@ -90,7 +90,7 @@ class PuppeteerBinProvider(BaseBinProvider):
# /data/lib/browsers/chrome/linux-131.0.6730.0/chrome-linux64/chrome
return sorted(self.puppeteer_browsers_dir.glob(f"{browser_name}/linux*/chrome*/chrome"))
def on_get_abspath(self, bin_name: BinName, **context) -> Optional[HostBinPath]:
def default_abspath_handler(self, bin_name: BinName, **context) -> Optional[HostBinPath]:
assert bin_name == 'chrome', 'Only chrome is supported using the @puppeteer/browsers install method currently.'
# already loaded, return abspath from cache
@@ -106,7 +106,7 @@ class PuppeteerBinProvider(BaseBinProvider):
return None
def on_install(self, bin_name: str, packages: Optional[InstallArgs] = None, **context) -> str:
def default_install_handler(self, bin_name: str, packages: Optional[InstallArgs] = None, **context) -> str:
"""npx @puppeteer/browsers install chrome@stable"""
self.setup()
assert bin_name == 'chrome', 'Only chrome is supported using the @puppeteer/browsers install method currently.'
@@ -115,7 +115,7 @@ class PuppeteerBinProvider(BaseBinProvider):
raise Exception(
f"{self.__class__.__name__} install method is not available on this host ({self.INSTALLER_BIN} not found in $PATH)"
)
packages = packages or self.on_get_packages(bin_name)
packages = packages or self.get_packages(bin_name)
assert packages, f"No packages specified for installation of {bin_name}"
# print(f'[*] {self.__class__.__name__}: Installing {bin_name}: {self.INSTALLER_BIN_ABSPATH} install {packages}')

View File

@@ -3,12 +3,12 @@ __package__ = 'archivebox.plugins_search.ripgrep'
import re
from pathlib import Path
from subprocess import run
from typing import List, Dict, Iterable
from typing import List, Iterable
# from typing_extensions import Self
# Depends on other PyPI/vendor packages:
from pydantic import InstanceOf, Field
from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName
from pydantic_pkgr import BinProvider, BinaryOverrides, BinName
# Depends on other Django apps:
from abx.archivebox.base_plugin import BasePlugin
@@ -45,9 +45,9 @@ class RipgrepBinary(BaseBinary):
name: BinName = RIPGREP_CONFIG.RIPGREP_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
apt.name: {'packages': lambda: ['ripgrep']},
brew.name: {'packages': lambda: ['ripgrep']},
overrides: BinaryOverrides = {
apt.name: {'packages': ['ripgrep']},
brew.name: {'packages': ['ripgrep']},
}
RIPGREP_BINARY = RipgrepBinary()

View File

@@ -1,11 +1,11 @@
__package__ = 'archivebox.plugins_search.sonic'
import sys
from typing import List, Dict, Generator, cast
from typing import List, Generator, cast
# Depends on other PyPI/vendor packages:
from pydantic import InstanceOf, Field, model_validator
from pydantic_pkgr import BinProvider, BinProviderName, ProviderLookupDict, BinName
from pydantic_pkgr import BinProvider, BinaryOverrides, BinName
# Depends on other Django apps:
from abx.archivebox.base_plugin import BasePlugin
@@ -55,9 +55,9 @@ class SonicBinary(BaseBinary):
name: BinName = SONIC_CONFIG.SONIC_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [brew, env] # TODO: add cargo
provider_overrides: Dict[BinProviderName, ProviderLookupDict] = {
brew.name: {'packages': lambda: ['sonic']},
# cargo.name: {'packages': lambda: ['sonic-server']}, # TODO: add cargo
overrides: BinaryOverrides = {
brew.name: {'packages': ['sonic']},
# cargo.name: {'packages': ['sonic-server']}, # TODO: add cargo
}
# TODO: add version checking over protocol? for when sonic backend is on remote server and binary is not installed locally

View File

@@ -66,11 +66,11 @@ class SqliteftsConfig(BaseConfigSet):
# Only Python >= 3.11 supports sqlite3.Connection.getlimit(),
# so fall back to the default if the API to get the real value isn't present
try:
limit_id = sqlite3.SQLITE_LIMIT_LENGTH
limit_id = sqlite3.SQLITE_LIMIT_LENGTH # type: ignore[attr-defined]
if self.SQLITEFTS_SEPARATE_DATABASE:
cursor = self.get_connection()
return cursor.connection.getlimit(limit_id)
return cursor.connection.getlimit(limit_id) # type: ignore[attr-defined]
else:
with database.temporary_connection() as cursor: # type: ignore[attr-defined]
return cursor.connection.getlimit(limit_id)