diff --git a/archivebox/core/mixins.py b/archivebox/core/mixins.py index b361790a..d1203745 100644 --- a/archivebox/core/mixins.py +++ b/archivebox/core/mixins.py @@ -14,12 +14,10 @@ class SearchResultsAdminMixin(object): if not search_term: return qs, use_distinct try: - snapshot_ids = query_search_index(search_term) + qsearch = query_search_index(search_term) except Exception as err: messages.add_message(request, messages.WARNING, f'Error from the search backend, only showing results from default admin search fields - Error: {err}') else: - qsearch = queryset.filter(pk__in=snapshot_ids) qs |= qsearch - finally: return qs, use_distinct diff --git a/archivebox/main.py b/archivebox/main.py index 7d13a5c4..d533d58d 100644 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -666,7 +666,7 @@ def remove(filter_str: Optional[str]=None, to_remove = snapshots.count() remove_from_sql_main_index(snapshots=snapshots, out_dir=out_dir) - flush_search_index(snapshot_ids=(str(pk) for pk in snapshots.values_list('pk',flat=True))) + flush_search_index(snapshots=snapshots) all_snapshots = load_main_index(out_dir=out_dir) log_removal_finished(all_snapshots.count(), to_remove) diff --git a/archivebox/search/__init__.py b/archivebox/search/__init__.py index 59bb6fe5..15efffb0 100644 --- a/archivebox/search/__init__.py +++ b/archivebox/search/__init__.py @@ -1,7 +1,8 @@ -from typing import List, Union, Generator +from typing import List, Union from pathlib import Path from importlib import import_module +from django.db.models import QuerySet from archivebox.index.schema import Link from archivebox.util import enforce_types @@ -39,16 +40,24 @@ def write_search_index(link: Link, texts: Union[List[str], None]=None, out_dir: backend.index(snapshot_id=str(snap.id), texts=texts) @enforce_types -def query_search_index(text: str) -> List[str]: +def query_search_index(query: str, out_dir: Path=OUTPUT_DIR) -> QuerySet: if search_backend_enabled(): + setup_django(out_dir, check_db=True) + from core.models import Snapshot + backend = import_backend() - return backend.search(text) + snapshot_ids = backend.search(query) + # TODO preserve ordering from backend + qsearch = Snapshot.objects.filter(pk__in=snapshot_ids) + return qsearch else: return [] @enforce_types -def flush_search_index(snapshot_ids: Generator[str, None, None]): - if not indexing_enabled() or not snapshot_ids: +def flush_search_index(snapshots: QuerySet): + if not indexing_enabled() or not snapshots: return backend = import_backend() + snapshot_ids=(str(pk) for pk in snapshots.values_list('pk',flat=True)) + backend.flush(snapshot_ids)