1
0
mirror of https://github.com/pirate/ArchiveBox.git synced 2025-08-22 06:03:23 +02:00

Merge pull request #584 from jdcaballerov/ripgrep-configs

This commit is contained in:
Nick Sweeting
2020-12-12 10:21:49 -05:00
committed by GitHub
2 changed files with 12 additions and 4 deletions

View File

@@ -161,6 +161,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
'USE_CHROME': {'type': bool, 'default': True}, 'USE_CHROME': {'type': bool, 'default': True},
'USE_NODE': {'type': bool, 'default': True}, 'USE_NODE': {'type': bool, 'default': True},
'USE_YOUTUBEDL': {'type': bool, 'default': True}, 'USE_YOUTUBEDL': {'type': bool, 'default': True},
'USE_RIPGREP': {'type': bool, 'default': True},
'CURL_BINARY': {'type': str, 'default': 'curl'}, 'CURL_BINARY': {'type': str, 'default': 'curl'},
'GIT_BINARY': {'type': str, 'default': 'git'}, 'GIT_BINARY': {'type': str, 'default': 'git'},
@@ -170,6 +171,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
'MERCURY_BINARY': {'type': str, 'default': 'mercury-parser'}, 'MERCURY_BINARY': {'type': str, 'default': 'mercury-parser'},
'YOUTUBEDL_BINARY': {'type': str, 'default': 'youtube-dl'}, 'YOUTUBEDL_BINARY': {'type': str, 'default': 'youtube-dl'},
'NODE_BINARY': {'type': str, 'default': 'node'}, 'NODE_BINARY': {'type': str, 'default': 'node'},
'RIPGREP_BINARY': {'type': str, 'default': 'rg'},
'CHROME_BINARY': {'type': str, 'default': None}, 'CHROME_BINARY': {'type': str, 'default': None},
'POCKET_CONSUMER_KEY': {'type': str, 'default': None}, 'POCKET_CONSUMER_KEY': {'type': str, 'default': None},
@@ -312,6 +314,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
'SAVE_WARC': {'default': lambda c: c['USE_WGET'] and c['SAVE_WARC']}, 'SAVE_WARC': {'default': lambda c: c['USE_WGET'] and c['SAVE_WARC']},
'WGET_ARGS': {'default': lambda c: c['WGET_ARGS'] or []}, 'WGET_ARGS': {'default': lambda c: c['WGET_ARGS'] or []},
'RIPGREP_VERSION': {'default': lambda c: bin_version(c['RIPGREP_BINARY']) if c['USE_RIPGREP'] else None},
'USE_SINGLEFILE': {'default': lambda c: c['USE_SINGLEFILE'] and c['SAVE_SINGLEFILE']}, 'USE_SINGLEFILE': {'default': lambda c: c['USE_SINGLEFILE'] and c['SAVE_SINGLEFILE']},
'SINGLEFILE_VERSION': {'default': lambda c: bin_version(c['SINGLEFILE_BINARY']) if c['USE_SINGLEFILE'] else None}, 'SINGLEFILE_VERSION': {'default': lambda c: bin_version(c['SINGLEFILE_BINARY']) if c['USE_SINGLEFILE'] else None},
@@ -827,6 +830,13 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue:
'enabled': config['USE_CHROME'], 'enabled': config['USE_CHROME'],
'is_valid': bool(config['CHROME_VERSION']), 'is_valid': bool(config['CHROME_VERSION']),
}, },
'RIPGREP_BINARY': {
'path': bin_path(config['RIPGREP_BINARY']),
'version': config['RIPGREP_VERSION'],
'hash': bin_hash(config['RIPGREP_BINARY']),
'enabled': config['USE_RIPGREP'],
'is_valid': bool(config['RIPGREP_VERSION']),
},
} }
def get_chrome_info(config: ConfigDict) -> ConfigValue: def get_chrome_info(config: ConfigDict) -> ConfigValue:

View File

@@ -2,7 +2,7 @@ import re
from subprocess import run, PIPE, DEVNULL from subprocess import run, PIPE, DEVNULL
from typing import List, Generator from typing import List, Generator
from archivebox.config import ARCHIVE_DIR from archivebox.config import ARCHIVE_DIR, RIPGREP_VERSION
from archivebox.util import enforce_types from archivebox.util import enforce_types
RG_IGNORE_EXTENSIONS = ('css','js','orig','svg') RG_IGNORE_EXTENSIONS = ('css','js','orig','svg')
@@ -26,8 +26,7 @@ def flush(snapshot_ids: Generator[str, None, None]):
@enforce_types @enforce_types
def search(text: str) -> List[str]: def search(text: str) -> List[str]:
is_rg_installed = run(['which', 'rg'], stdout=DEVNULL, stderr=DEVNULL) if not RIPGREP_VERSION:
if is_rg_installed.returncode:
raise Exception("ripgrep binary not found, install ripgrep to use this search backend") raise Exception("ripgrep binary not found, install ripgrep to use this search backend")
from core.models import Snapshot from core.models import Snapshot
@@ -44,4 +43,3 @@ def search(text: str) -> List[str]:
snap_ids = [str(id) for id in Snapshot.objects.filter(timestamp__in=timestamps).values_list('pk', flat=True)] snap_ids = [str(id) for id in Snapshot.objects.filter(timestamp__in=timestamps).values_list('pk', flat=True)]
return snap_ids return snap_ids