Merge pull request #584 from jdcaballerov/ripgrep-configs

2025-08-22 06:03:23 +02:00 · 2020-12-12 10:21:49 -05:00
parent 31ab762ee1 9b6afa36a3
commit 154d31263b
2 changed files with 12 additions and 4 deletions
--- a/archivebox/config.py
+++ b/archivebox/config.py
@@ -161,6 +161,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
        'USE_CHROME':               {'type': bool,  'default': True},
        'USE_NODE':                 {'type': bool,  'default': True},
        'USE_YOUTUBEDL':            {'type': bool,  'default': True},
        'USE_RIPGREP':              {'type': bool,  'default': True},
        'CURL_BINARY':              {'type': str,   'default': 'curl'},
        'GIT_BINARY':               {'type': str,   'default': 'git'},
@@ -170,6 +171,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
        'MERCURY_BINARY':           {'type': str,   'default': 'mercury-parser'},
        'YOUTUBEDL_BINARY':         {'type': str,   'default': 'youtube-dl'},
        'NODE_BINARY':              {'type': str,   'default': 'node'},
        'RIPGREP_BINARY':           {'type': str,   'default': 'rg'},
        'CHROME_BINARY':            {'type': str,   'default': None},
        'POCKET_CONSUMER_KEY':      {'type': str,   'default': None},
@@ -312,6 +314,7 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
    'SAVE_WARC':                {'default': lambda c: c['USE_WGET'] and c['SAVE_WARC']},
    'WGET_ARGS':                {'default': lambda c: c['WGET_ARGS'] or []},
    'RIPGREP_VERSION':          {'default': lambda c: bin_version(c['RIPGREP_BINARY']) if c['USE_RIPGREP'] else None},
    'USE_SINGLEFILE':           {'default': lambda c: c['USE_SINGLEFILE'] and c['SAVE_SINGLEFILE']},
    'SINGLEFILE_VERSION':       {'default': lambda c: bin_version(c['SINGLEFILE_BINARY']) if c['USE_SINGLEFILE'] else None},
@@ -827,6 +830,13 @@ def get_dependency_info(config: ConfigDict) -> ConfigValue:
            'enabled': config['USE_CHROME'],
            'is_valid': bool(config['CHROME_VERSION']),
        },
        'RIPGREP_BINARY': {
            'path': bin_path(config['RIPGREP_BINARY']),
            'version': config['RIPGREP_VERSION'],
            'hash': bin_hash(config['RIPGREP_BINARY']),
            'enabled': config['USE_RIPGREP'],
            'is_valid': bool(config['RIPGREP_VERSION']),
        },
    }
 def get_chrome_info(config: ConfigDict) -> ConfigValue:
--- a/archivebox/search/backends/ripgrep.py
+++ b/archivebox/search/backends/ripgrep.py
@@ -2,7 +2,7 @@ import re
 from subprocess import run, PIPE, DEVNULL
 from typing import List, Generator
-from archivebox.config import ARCHIVE_DIR
+from archivebox.config import ARCHIVE_DIR, RIPGREP_VERSION
 from archivebox.util import enforce_types
 RG_IGNORE_EXTENSIONS = ('css','js','orig','svg')
@@ -26,8 +26,7 @@ def flush(snapshot_ids: Generator[str, None, None]):
@enforce_types
 def search(text: str) -> List[str]:
-    is_rg_installed = run(['which', 'rg'], stdout=DEVNULL, stderr=DEVNULL)
+    if not RIPGREP_VERSION:
    if is_rg_installed.returncode:
        raise Exception("ripgrep binary not found, install ripgrep to use this search backend")
    from core.models import Snapshot
@@ -44,4 +43,3 @@ def search(text: str) -> List[str]:
    snap_ids = [str(id) for id in Snapshot.objects.filter(timestamp__in=timestamps).values_list('pk', flat=True)]
    return snap_ids