1
0
mirror of https://github.com/pirate/ArchiveBox.git synced 2025-08-22 06:03:23 +02:00

refactor: Remove if LENGTH and use text chunker for every input

This commit is contained in:
jdcaballerov
2020-11-26 18:12:54 -05:00
committed by Nick Sweeting
parent 5a6b814c79
commit 172197ae01

View File

@@ -11,12 +11,10 @@ MAX_SONIC_TEXT_LENGTH = 20000
def index(snapshot_id: str, texts: List[str]):
with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl:
for text in texts:
if len(text) < MAX_SONIC_TEXT_LENGTH:
ingestcl.push(SONIC_COLLECTION, SONIC_BUCKET, snapshot_id, str(text))
else:
chunks = [text[i:i+MAX_SONIC_TEXT_LENGTH] for i in range(0, len(text), MAX_SONIC_TEXT_LENGTH)]
for chunk in chunks:
ingestcl.push(SONIC_COLLECTION, SONIC_BUCKET, snapshot_id, str(chunk))
chunks = [text[i:i+MAX_SONIC_TEXT_LENGTH] for i in range(0, len(text), MAX_SONIC_TEXT_LENGTH)]
for chunk in chunks:
ingestcl.push(SONIC_COLLECTION, SONIC_BUCKET, snapshot_id, str(chunk))
@enforce_types
def search(text: str) -> List[str]:
with SearchClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as querycl: