1
0
mirror of https://github.com/nextapps-de/flexsearch.git synced 2025-08-25 15:01:19 +02:00

re-implement append

This commit is contained in:
Thomas Wilkerling
2025-06-05 08:33:21 +02:00
parent 1f75f5f9d9
commit 1c4bcaac2e
11 changed files with 406 additions and 272 deletions

View File

@@ -891,7 +891,7 @@ Global Members:
`Index` / `Worker`-Index Methods: `Index` / `Worker`-Index Methods:
- index.[**add**](#add-text-item-to-an-index)(id, string) - index.[**add**](#add-text-item-to-an-index)(id, string)
- ~~index.[**append**]()(id, string)~~ - index.[**append**]()(id, string)
- index.[**update**](#update-item-from-an-index)(id, string) - index.[**update**](#update-item-from-an-index)(id, string)
- index.[**remove**](#remove-item-from-an-index)(id) - index.[**remove**](#remove-item-from-an-index)(id)
- index.[**search**](#search-items)(string, \<limit\>, \<options\>) - index.[**search**](#search-items)(string, \<limit\>, \<options\>)
@@ -918,7 +918,7 @@ Global Members:
`Document` Methods: `Document` Methods:
- document.[**add**](doc/document-search.md#addupdateremove-documents)(\<id\>, document) - document.[**add**](doc/document-search.md#addupdateremove-documents)(\<id\>, document)
- ~~document.[**append**]()(\<id\>, document)~~ - document.[**append**]()(\<id\>, document)
- document.[**update**](doc/document-search.md#addupdateremove-documents)(\<id\>, document) - document.[**update**](doc/document-search.md#addupdateremove-documents)(\<id\>, document)
- document.[**remove**](doc/document-search.md#addupdateremove-documents)(id) - document.[**remove**](doc/document-search.md#addupdateremove-documents)(id)
- document.[**remove**](doc/document-search.md#addupdateremove-documents)(document) - document.[**remove**](doc/document-search.md#addupdateremove-documents)(document)
@@ -951,7 +951,7 @@ Global Members:
Async Equivalents (Non-Blocking Balanced): Async Equivalents (Non-Blocking Balanced):
- <small>_async_</small> [**.addAsync**](doc/async.md)( ... , \<callback\>) - <small>_async_</small> [**.addAsync**](doc/async.md)( ... , \<callback\>)
- <small>_async_</small> ~~[**.appendAsync**](doc/async.md)( ... , \<callback\>)~~ - <small>_async_</small> [**.appendAsync**](doc/async.md)( ... , \<callback\>)
- <small>_async_</small> [**.updateAsync**](doc/async.md)( ... , \<callback\>) - <small>_async_</small> [**.updateAsync**](doc/async.md)( ... , \<callback\>)
- <small>_async_</small> [**.removeAsync**](doc/async.md)( ... , \<callback\>) - <small>_async_</small> [**.removeAsync**](doc/async.md)( ... , \<callback\>)
- <small>_async_</small> [**.searchAsync**](doc/async.md)( ... , \<callback\>) - <small>_async_</small> [**.searchAsync**](doc/async.md)( ... , \<callback\>)
@@ -1439,7 +1439,7 @@ Try to choose the most upper of these tokenizer which covers your requirements:
<td><code>"tolerant"</code></td> <td><code>"tolerant"</code></td>
<td>index the full term by also being tolerant against typos like swapped letters and missing letters</td> <td>index the full term by also being tolerant against typos like swapped letters and missing letters</td>
<td><code>foobra</code><br><code>foboar</code><br><code>foobr</code><br><code>fooba</code></td> <td><code>foobra</code><br><code>foboar</code><br><code>foobr</code><br><code>fooba</code></td>
<td>1 + 2(n - 2)</td> <td>2(n - 2) + 2</td>
</tr> </tr>
<tr></tr> <tr></tr>
<tr> <tr>

View File

@@ -5,7 +5,7 @@
Those methods of each index type provides an async version: Those methods of each index type provides an async version:
- addAsync() - addAsync()
- ~~appendAsync()~~ - appendAsync()
- updateAsync() - updateAsync()
- removeAsync() - removeAsync()
- searchAsync() - searchAsync()

9
index.d.ts vendored
View File

@@ -231,9 +231,7 @@ declare module "flexsearch" {
add(id: Id, content: string): W extends false add(id: Id, content: string): W extends false
? this ? this
: Promise<this>; : Promise<this>;
/**
* @deprecated The method "append" will be removed in an upcoming release, only use "add" instead
*/
append(id: Id, content: string): W extends false append(id: Id, content: string): W extends false
? this ? this
: Promise<this>; : Promise<this>;
@@ -295,7 +293,6 @@ declare module "flexsearch" {
callback?: AsyncCallback<void>, callback?: AsyncCallback<void>,
): Promise<this>; ): Promise<this>;
/** @deprecated The method "append" will be removed in an upcoming release, just use "add" instead */
appendAsync( appendAsync(
id: Id, id: Id,
content: string, content: string,
@@ -633,11 +630,9 @@ declare module "flexsearch" {
? this ? this
: Promise<this>; : Promise<this>;
/** @deprecated The method "append" will be removed in an upcoming release, just use "add" instead */
append(id: Id, document: D): W extends false append(id: Id, document: D): W extends false
? this ? this
: Promise<this>; : Promise<this>;
/** @deprecated The method "append" will be removed in an upcoming release, just use "add" instead */
append(document: D): W extends false append(document: D): W extends false
? this ? this
: Promise<this>; : Promise<this>;
@@ -769,13 +764,11 @@ declare module "flexsearch" {
callback?: AsyncCallback<void>, callback?: AsyncCallback<void>,
): Promise<this>; ): Promise<this>;
/** @deprecated The method "append" will be removed in an upcoming release, just use "add" instead */
appendAsync( appendAsync(
id: Id, id: Id,
document: D, document: D,
callback?: AsyncCallback<void>, callback?: AsyncCallback<void>,
): Promise<this>; ): Promise<this>;
/** @deprecated The method "append" will be removed in an upcoming release, just use "add" instead */
appendAsync( appendAsync(
document: D, document: D,
callback?: AsyncCallback<void>, callback?: AsyncCallback<void>,

View File

@@ -255,10 +255,12 @@ function create_result(rows, resolve, enrich){
} }
SqliteDB.prototype.get = function(key, ctx, limit = 0, offset = 0, resolve = true, enrich = false, tags){ SqliteDB.prototype.get = function(key, ctx, limit = 0, offset = 0, resolve = true, enrich = false, tags){
let result; let result;
let stmt = ''; let stmt = '';
let params = ctx ? [ctx, key] : [key]; let params = ctx ? [ctx, key] : [key];
let table = "main." + (ctx ? "ctx" : "map") + this.field; let table = "main." + (ctx ? "ctx" : "map") + this.field;
if(tags){ if(tags){
for(let i = 0; i < tags.length; i+=2){ for(let i = 0; i < tags.length; i+=2){
stmt += ` AND ${ table }.id IN (SELECT id FROM main.tag_${ sanitize(tags[i]) } WHERE tag = ?)`; stmt += ` AND ${ table }.id IN (SELECT id FROM main.tag_${ sanitize(tags[i]) } WHERE tag = ?)`;
@@ -303,12 +305,14 @@ SqliteDB.prototype.get = function(key, ctx, limit = 0, offset = 0, resolve = tru
params params
}); });
} }
return result.then(function(rows){ return result.then(function(rows){
return create_result(rows, resolve, enrich); return create_result(rows, resolve, enrich);
}); });
}; };
SqliteDB.prototype.tag = function(tag, limit = 0, offset = 0, enrich = false){ SqliteDB.prototype.tag = function(tag, limit = 0, offset = 0, enrich = false){
const table = "main.tag" + this.field; const table = "main.tag" + this.field;
const promise = this.promisfy({ const promise = this.promisfy({
method: "all", method: "all",
@@ -325,17 +329,25 @@ SqliteDB.prototype.tag = function(tag, limit = 0, offset = 0, enrich = false){
`, `,
params: [tag] params: [tag]
}); });
enrich || promise.then(function(rows){ enrich || promise.then(function(rows){
return create_result(rows, true, false); return create_result(rows, true, false);
}); });
return promise; return promise;
}; };
function build_params(length, single_param){ function build_params(length, single_param){
// let stmt = "?";
// for(let i = 1; i < length; i++){
// stmt += ",?";
// }
let stmt = single_param let stmt = single_param
? ",(?)" ? ",(?)"
: ",?"; : ",?";
for(let i = 1; i < length;){ for(let i = 1; i < length;){
if(i <= (length - i)){ if(i <= (length - i)){
stmt += stmt; stmt += stmt;
@@ -346,6 +358,7 @@ function build_params(length, single_param){
break; break;
} }
} }
return stmt.substring(1); return stmt.substring(1);
} }
@@ -362,16 +375,9 @@ SqliteDB.prototype.enrich = function(ids){
const chunk = ids.length - count > MAXIMUM_QUERY_VARS const chunk = ids.length - count > MAXIMUM_QUERY_VARS
? ids.slice(count, count + MAXIMUM_QUERY_VARS) ? ids.slice(count, count + MAXIMUM_QUERY_VARS)
: count ? ids.slice(count) : ids; : count ? ids.slice(count) : ids;
const stmt = build_params(chunk.length);
count += chunk.length; count += chunk.length;
// let stmt = "?";
// for(let i = 1; i < chunk.length; i++){
// stmt += ",?";
// }
// 10x faster string concatenation
let stmt = build_params(chunk.length);
promises.push(this.promisfy({ promises.push(this.promisfy({
method: "all", method: "all",
stmt: `SELECT id, doc FROM main.reg WHERE id IN (${stmt})`, stmt: `SELECT id, doc FROM main.reg WHERE id IN (${stmt})`,
@@ -594,37 +600,29 @@ SqliteDB.prototype.transaction = async function(task, callback){
}); });
}; };
SqliteDB.prototype.commit = async function(flexsearch, _replace, _append){ SqliteDB.prototype.commit = async function(flexsearch){
// process cleanup tasks let tasks = flexsearch.commit_task;
if(_replace){ let removals = [];
await this.clear(); let inserts = [];
// there are just removals in the task queue flexsearch.commit_task = [];
flexsearch.commit_task = [];
} for(let i = 0, task; i < tasks.length; i++){
else{ task = tasks[i];
let tasks = flexsearch.commit_task; if(typeof task["del"] !== "undefined"){
flexsearch.commit_task = []; removals.push(task["del"]);
for(let i = 0, task; i < tasks.length; i++){
task = tasks[i];
// there are just removals in the task queue
if(task.clear){
await this.clear();
_replace = true;
break;
}
else{
tasks[i] = task.del;
}
} }
if(!_replace){ else if(typeof task["ins"] !== "undefined"){
if(!_append){ inserts.push(task["ins"]);
tasks = tasks.concat(toArray(flexsearch.reg));
}
tasks.length && await this.remove(tasks);
} }
} }
if(removals.length){
await this.remove(removals);
}
// if(inserts_map.length || inserts_ctx.length){
// await this.insert(inserts_map, inserts_ctx);
// }
if(!flexsearch.reg.size){ if(!flexsearch.reg.size){
return; return;
} }
@@ -645,7 +643,11 @@ SqliteDB.prototype.commit = async function(flexsearch, _replace, _append){
params.push(key, i, ids[j]); params.push(key, i, ids[j]);
// maximum count of variables supported // maximum count of variables supported
if((j === ids.length - 1) || (params.length + 3 > MAXIMUM_QUERY_VARS)){ if((j === ids.length - 1) || (params.length + 3 > MAXIMUM_QUERY_VARS)){
this.db.run("INSERT INTO main.map" + this.field + " (key, res, id) VALUES " + stmt, params); this.db.run(
"INSERT INTO main.map" + this.field + " (key, res, id) VALUES " + stmt
// " ON CONFLICT DO " +
// "UPDATE main.map" + this.field + " SET key = '', res = 0 WHERE key = ? AND res > ? AND id = ?"
, params);
stmt = ""; stmt = "";
params = []; params = [];
} }
@@ -700,13 +702,13 @@ SqliteDB.prototype.commit = async function(flexsearch, _replace, _append){
: doc || null : doc || null
); );
if(chunk.length + 2 > MAXIMUM_QUERY_VARS){ if(chunk.length + 2 > MAXIMUM_QUERY_VARS){
this.db.run("INSERT INTO main.reg (id, doc) VALUES " + stmt, chunk); this.db.run("INSERT INTO main.reg (id, doc) VALUES " + stmt + " ON CONFLICT DO NOTHING", chunk);
stmt = ""; stmt = "";
chunk = []; chunk = [];
} }
} }
if(chunk.length){ if(chunk.length){
this.db.run("INSERT INTO main.reg (id, doc) VALUES " + stmt, chunk); this.db.run("INSERT INTO main.reg (id, doc) VALUES " + stmt + " ON CONFLICT DO NOTHING", chunk);
} }
} }
else if(!flexsearch.bypass){ else if(!flexsearch.bypass){
@@ -717,7 +719,7 @@ SqliteDB.prototype.commit = async function(flexsearch, _replace, _append){
: count ? ids.slice(count) : ids; : count ? ids.slice(count) : ids;
count += chunk.length; count += chunk.length;
const stmt = build_params(chunk.length, /* single param */ true); const stmt = build_params(chunk.length, /* single param */ true);
this.db.run("INSERT INTO main.reg (id) VALUES " + stmt, chunk); this.db.run("INSERT INTO main.reg (id) VALUES " + stmt + " ON CONFLICT DO NOTHING", chunk);
} }
} }
//}); //});
@@ -746,6 +748,10 @@ SqliteDB.prototype.commit = async function(flexsearch, _replace, _append){
} }
}); });
if(inserts.length){
await this.cleanup();
}
// TODO // TODO
//await this.transaction(function(){ //await this.transaction(function(){
// this.db.run("INSERT INTO main.cfg" + this.field + " (cfg) VALUES (?)", [JSON.stringify({ // this.db.run("INSERT INTO main.cfg" + this.field + " (cfg) VALUES (?)", [JSON.stringify({
@@ -802,6 +808,34 @@ SqliteDB.prototype.remove = function(ids){
}); });
}; };
SqliteDB.prototype.cleanup = function(){
return this.transaction(function(){
this.db.run(
"DELETE FROM main.map" + this.field + " " +
"WHERE ROWID IN (" +
"SELECT ROWID FROM (" +
"SELECT ROWID, row_number() OVER dupes AS count " +
"FROM main.map" + this.field + " _t " +
"WINDOW dupes AS (PARTITION BY id, key ORDER BY res) " +
") " +
"WHERE count > 1" +
")"
);
this.db.run(
"DELETE FROM main.ctx" + this.field + " " +
"WHERE ROWID IN (" +
"SELECT ROWID FROM (" +
"SELECT ROWID, row_number() OVER dupes AS count " +
"FROM main.ctx" + this.field + " _t " +
"WINDOW dupes AS (PARTITION BY id, ctx, key ORDER BY res) " +
") " +
"WHERE count > 1" +
")"
);
});
};
SqliteDB.prototype.promisfy = function(opt){ SqliteDB.prototype.promisfy = function(opt){
const db = this.db; const db = this.db;
return new Promise(function(resolve, reject){ return new Promise(function(resolve, reject){

View File

@@ -42,7 +42,7 @@ Document.prototype.add = function(id, content, _append){
if(typeof tree === "function"){ if(typeof tree === "function"){
const tmp = tree(content); const tmp = tree(content);
if(tmp){ if(tmp){
index.add(id, tmp, /* append: */ false, /* skip update: */ true); index.add(id, tmp, /* append: */ _append, /* skip update: */ true);
} }
} }
else{ else{
@@ -225,7 +225,7 @@ function add_index(obj, tree, marker, pos, index, id, key, _append){
if((obj = obj[key])){ if((obj = obj[key])){
// reached target field // reached the target field
if(pos === (tree.length - 1)){ if(pos === (tree.length - 1)){
// handle target value // handle target value

View File

@@ -318,12 +318,23 @@ Document.prototype.search = function(query, limit, options, _promises){
} }
} }
// TODO merge sorted by score
// TODO add score property to results
// if(merge){
// opt.resolve = false;
// }
res = cache res = cache
? index.searchCache(query, limit, opt) ? index.searchCache(query, limit, opt)
: index.search(query, limit, opt); : index.search(query, limit, opt);
// restore state // restore state
opt_enrich && (opt.enrich = opt_enrich); // if(merge){
// opt.resolve = resolve;
// }
if(opt_enrich) {
opt.enrich = opt_enrich;
}
if(promises){ if(promises){
promises[i] = res; promises[i] = res;

View File

@@ -154,12 +154,12 @@ if(SUPPORT_PERSISTENT){
} }
return db.mount(this); return db.mount(this);
}; };
Index.prototype.commit = function(replace, append){ Index.prototype.commit = function(replace){
if(this.commit_timer){ if(this.commit_timer){
clearTimeout(this.commit_timer); clearTimeout(this.commit_timer);
this.commit_timer = null; this.commit_timer = null;
} }
return this.db.commit(this, replace, append); return this.db.commit(this, replace);
}; };
Index.prototype.destroy = function(){ Index.prototype.destroy = function(){
if(this.commit_timer){ if(this.commit_timer){

View File

@@ -8,7 +8,7 @@ import {
import { create_object } from "../common.js"; import { create_object } from "../common.js";
import Index, { autoCommit } from "../index.js"; import Index, { autoCommit } from "../index.js";
import default_compress from "../compress.js"; import default_compress from "../compress.js";
import { KeystoreArray } from "../keystore.js"; import { KeystoreArray, KeystoreMap } from "../keystore.js";
// TODO: // TODO:
// string + number as text // string + number as text
@@ -21,7 +21,6 @@ import { KeystoreArray } from "../keystore.js";
* @param {boolean=} _append * @param {boolean=} _append
* @param {boolean=} _skip_update * @param {boolean=} _skip_update
*/ */
Index.prototype.add = function(id, content, _append, _skip_update){ Index.prototype.add = function(id, content, _append, _skip_update){
if(content && (id || (id === 0))){ if(content && (id || (id === 0))){
@@ -39,9 +38,9 @@ Index.prototype.add = function(id, content, _append, _skip_update){
// do not force a string as input // do not force a string as input
// https://github.com/nextapps-de/flexsearch/issues/432 // https://github.com/nextapps-de/flexsearch/issues/432
content = this.encoder.encode(content, !depth); content = this.encoder.encode(content, !depth);
const word_length = content.length; const term_count = content.length;
if(word_length){ if(term_count){
// check context dupes to skip all contextual redundancy along a document // check context dupes to skip all contextual redundancy along a document
@@ -49,9 +48,9 @@ Index.prototype.add = function(id, content, _append, _skip_update){
const dupes = create_object(); const dupes = create_object();
const resolution = this.resolution; const resolution = this.resolution;
for(let i = 0; i < word_length; i++){ for(let i = 0; i < term_count; i++){
let term = content[this.rtl ? word_length - 1 - i : i]; let term = content[this.rtl ? term_count - 1 - i : i];
let term_length = term.length; let term_length = term.length;
// skip dupes will break the context chain // skip dupes will break the context chain
@@ -59,7 +58,7 @@ Index.prototype.add = function(id, content, _append, _skip_update){
let score = this.score let score = this.score
? this.score(content, term, i, null, 0) ? this.score(content, term, i, null, 0)
: get_score(resolution, word_length, i); : get_score(resolution, term_count, i);
let token = ""; let token = "";
switch(this.tokenize){ switch(this.tokenize){
@@ -70,22 +69,25 @@ Index.prototype.add = function(id, content, _append, _skip_update){
// A[CB]DE A[]CDE // A[CB]DE A[]CDE
// AB[DC]E AB[]DE // AB[DC]E AB[]DE
// ABC[ED] ABC[]E // ABC[ED] ABC[]E
// ABCD[]
case "tolerant": case "tolerant":
this.push_index(dupes, term, score, id, _append); this._push_index(dupes, term, score, id, _append);
if(term_length > 2){ if(term_length > 2){
for(let x = 1, char_a, char_b, prt_1, prt_2; x < term_length - 1; x++){ for(let x = 1, char_a, char_b, prt_1, prt_2; x < term_length - 1; x++){
char_a = term.charAt(x); char_a = term.charAt(x);
char_b = term.charAt(x + 1); char_b = term.charAt(x + 1);
prt_1 = term.substring(0, x) + char_b; prt_1 = term.substring(0, x) + char_b;
prt_2 = term.substring(x + 2); prt_2 = term.substring(x + 2);
// swap letters // swapped letters
token = prt_1 /*+ char_b*/ + char_a + prt_2; token = prt_1 /*+ char_b*/ + char_a + prt_2;
dupes[token] || this.push_index(dupes, token, score, id, _append); this._push_index(dupes, token, score, id, _append);
// remove letters // missing letters
token = prt_1 /*+ char_b*/ + prt_2; token = prt_1 /*+ char_b*/ + prt_2;
dupes[token] || this.push_index(dupes, token, score, id, _append); this._push_index(dupes, token, score, id, _append);
} }
// missing last letter
this._push_index(dupes, term.substring(0, term.length - 1), score, id, _append);
} }
break; break;
@@ -94,13 +96,11 @@ Index.prototype.add = function(id, content, _append, _skip_update){
for(let x = 0, _x; x < term_length; x++){ for(let x = 0, _x; x < term_length; x++){
for(let y = term_length; y > x; y--){ for(let y = term_length; y > x; y--){
token = term.substring(x, y); token = term.substring(x, y);
if(!dupes[token]){ _x = this.rtl ? term_length - 1 - x : x;
_x = this.rtl ? term_length - 1 - x : x; const partial_score = this.score
const partial_score = this.score ? this.score(content, term, i, token, _x)
? this.score(content, term, i, token, _x) : get_score(resolution, term_count, i, term_length, _x);
: get_score(resolution, word_length, i, term_length, _x); this._push_index(dupes, token, partial_score, id, _append);
this.push_index(dupes, token, partial_score, id, _append);
}
} }
} }
break; break;
@@ -117,12 +117,10 @@ Index.prototype.add = function(id, content, _append, _skip_update){
? term_length - 1 - x ? term_length - 1 - x
: x : x
] + token; ] + token;
if(!dupes[token]){ const partial_score = this.score
const partial_score = this.score ? this.score(content, term, i, token, x)
? this.score(content, term, i, token, x) : get_score(resolution, term_count, i, term_length, x);
: get_score(resolution, word_length, i, term_length, x); this._push_index(dupes, token, partial_score, id, _append);
this.push_index(dupes, token, partial_score, id, _append);
}
} }
token = ""; token = "";
} }
@@ -136,57 +134,47 @@ Index.prototype.add = function(id, content, _append, _skip_update){
? term_length - 1 - x ? term_length - 1 - x
: x : x
]; ];
dupes[token] || this.push_index(dupes, token, score, id, _append); this._push_index(dupes, token, score, id, _append);
} }
break; break;
} }
// fallthrough to next case when token has a length of 1 // fallthrough to next case when token has a length of 1
default: // "strict": default: // "strict":
this.push_index(dupes, term, score, id, _append); this._push_index(dupes, term, score, id, _append);
// context is just supported by tokenizer "strict" // context is just supported by tokenizer "strict"
if(depth){ if(depth && (term_count > 1) && (i < (term_count - 1))){
if((word_length > 1) && (i < (word_length - 1))){ const resolution = this.resolution_ctx;
const keyword = term;
const size = Math.min(
depth + 1,
this.rtl
? i + 1
: term_count - i
);
// check inner dupes to skip repeating words in the current context for(let x = 1; x < size; x++){
const dupes_inner = create_object();
const resolution = this.resolution_ctx; term = content[
const keyword = term;
const size = Math.min(
depth + 1,
this.rtl this.rtl
? i + 1 ? term_count - 1 - i - x
: word_length - i : i + x
];
const swap = this.bidirectional && (term > keyword);
const context_score = this.score
? this.score(content, keyword, i, term, x - 1)
: get_score(resolution + ((term_count / 2) > resolution ? 0 : 1), term_count, i, size - 1, x - 1);
this._push_index(
dupes_ctx,
swap ? keyword : term,
context_score,
id,
_append,
swap ? term : keyword
); );
dupes_inner[keyword] = 1;
for(let x = 1; x < size; x++){
term = content[
this.rtl
? word_length - 1 - i - x
: i + x
];
if(term && !dupes_inner[term]){
dupes_inner[term] = 1;
const context_score = this.score
? this.score(content, keyword, i, term, x - 1)
: get_score(resolution + ((word_length / 2) > resolution ? 0 : 1), word_length, i, size - 1, x - 1);
const swap = this.bidirectional && (term > keyword);
this.push_index(
dupes_ctx,
swap ? keyword : term,
context_score,
id,
_append,
swap ? term : keyword
);
}
}
} }
} }
} }
@@ -195,15 +183,10 @@ Index.prototype.add = function(id, content, _append, _skip_update){
this.fastupdate || this.reg.add(id); this.fastupdate || this.reg.add(id);
} }
else{
content = "";
}
} }
if(SUPPORT_PERSISTENT && this.db){ if(SUPPORT_PERSISTENT && this.db){
// when the term has no valid content (e.g., empty), this.commit_task.push(_append ? { "ins": id } : { "del": id });
// then it was not added to the ID registry for removal
content || this.commit_task.push({ "del": id });
this.commit_auto && autoCommit(this); this.commit_auto && autoCommit(this);
} }
@@ -219,29 +202,34 @@ Index.prototype.add = function(id, content, _append, _skip_update){
* @param {boolean=} append * @param {boolean=} append
* @param {string=} keyword * @param {string=} keyword
*/ */
Index.prototype._push_index = function(dupes, term, score, id, append, keyword){
Index.prototype.push_index = function(dupes, term, score, id, append, keyword){ let res, arr;
let arr = keyword ? this.ctx : this.map; if(!(res = dupes[term]) || (keyword && !res[keyword])){
let tmp;
if(!dupes[term] || (keyword && !(tmp = dupes[term])[keyword])){
if(keyword){ if(keyword){
dupes = tmp || (dupes[term] = create_object()); dupes = res || (dupes[term] = create_object());
dupes[keyword] = 1; dupes[keyword] = 1;
if(SUPPORT_COMPRESSION && this.compress){ if(SUPPORT_COMPRESSION && this.compress){
keyword = default_compress(keyword); keyword = default_compress(keyword);
} }
tmp = arr.get(keyword); arr = this.ctx;
tmp ? arr = tmp res = arr.get(keyword);
: arr.set(keyword, arr = new Map()); res ? arr = res
: arr.set(
keyword,
arr = SUPPORT_KEYSTORE && this.keystore
? new KeystoreMap(this.keystore)
: new Map()
);
} }
else{ else{
arr = this.map;
dupes[term] = 1; dupes[term] = 1;
} }
@@ -249,37 +237,56 @@ Index.prototype.push_index = function(dupes, term, score, id, append, keyword){
term = default_compress(term); term = default_compress(term);
} }
tmp = arr.get(term); res = arr.get(term);
tmp ? arr = tmp : arr.set(term, arr = tmp = []); res ? arr = res
// the ID array will be upgraded dynamically : arr.set(term, arr = res = []);
arr = arr[score] || (arr[score] = []);
if(!append || !arr.includes(id)){ if(append){
for(let i = 0, arr; i < res.length; i++){
// auto-upgrade to a keystore array if max size exceeded arr = res[i];
if(SUPPORT_KEYSTORE){ if(arr && arr.includes(id)){
if(arr.length === 2**31-1 /*|| !(arr instanceof KeystoreArray)*/){ if(i <= score){
const keystore = new KeystoreArray(arr); // already included in the right slot
if(this.fastupdate){ return;
for(let value of this.reg.values()){ }
if(value.includes(arr)){ else{
value[value.indexOf(arr)] = keystore; // update slot
} arr.splice(arr.indexOf(id), 1);
if(this.fastupdate){
const tmp = this.reg.get(id);
tmp && tmp.splice(tmp.indexOf(arr), 1);
} }
} }
tmp[score] = arr = keystore; break;
} }
} }
}
arr.push(id); // the ID keystore array will upgrade automatically
arr = arr[score] || (arr[score] = []);
arr.push(id);
// add a reference to the register for fast updates // auto-upgrade to a keystore array if max size exceeded
if(this.fastupdate){ if(SUPPORT_KEYSTORE){
const tmp = this.reg.get(id); if(arr.length === 2**31-1 /*|| !(arr instanceof KeystoreArray)*/){
tmp ? tmp.push(arr) const keystore = new KeystoreArray(arr);
: this.reg.set(id, [arr]); if(this.fastupdate){
for(let value of this.reg.values()){
if(value.includes(arr)){
value[value.indexOf(arr)] = keystore;
}
}
}
res[score] = arr = keystore;
} }
} }
// add a reference to the register for fast updates
if(this.fastupdate){
const tmp = this.reg.get(id);
tmp ? tmp.push(arr)
: this.reg.set(id, [arr]);
}
} }
} }
@@ -291,7 +298,6 @@ Index.prototype.push_index = function(dupes, term, score, id, append, keyword){
* @param {number=} x * @param {number=} x
* @returns {number} * @returns {number}
*/ */
function get_score(resolution, length, i, term_length, x){ function get_score(resolution, length, i, term_length, x){
// console.log("resolution", resolution); // console.log("resolution", resolution);

View File

@@ -112,112 +112,20 @@ Index.prototype.search = function(query, limit, options){
); );
} }
// let maxlength = 0;
// let minlength = 0;
//
// if(length > 1){
//
// // term deduplication will break the context chain
// // todo add context to dupe check
// const dupes = create_object();
// const query_new = [];
//
// // if(context){
// // keyword = query_terms[0];
// // dupes[keyword] = 1;
// // query_new.push(keyword);
// // maxlength = minlength = keyword.length;
// // i = 1;
// // }
//
// for(let i = 0, term; i < length; i++){
//
// term = query_terms[i];
//
// if(term && !dupes[term]){
//
// // todo add keyword check
// // this fast path can't apply to persistent indexes
// if(!suggest && !(SUPPORT_PERSISTENT && this.db) && !this.get_array(term/*, keyword*/)){
//
// // fast path "not found"
// return !SUPPORT_RESOLVER || resolve
// ? result
// : new Resolver(result);
// }
// else{
//
// query_new.push(term);
// dupes[term] = 1;
// }
//
// const term_length = term.length;
// maxlength = Math.max(maxlength, term_length);
// minlength = minlength ? Math.min(minlength, term_length) : term_length;
// }
// // else if(term && (!this.depth || context === false)){
// // query_new.push(term);
// // }
// }
//
// query_terms = query_new;
// length = query_terms.length;
// }
//
// // the term length could be changed after deduplication
//
// if(!length){
// return !SUPPORT_RESOLVER || resolve
// ? result
// : new Resolver(result);
// }
//
// // fast path single term
// if(length === 1){
// return single_term_query.call(
// this,
// query_terms[0], // term
// "", // ctx
// limit,
// offset,
// resolve,
// enrich,
// tag
// );
// }
//
// // fast path single context
// if(length === 2 && context && !suggest){
// return single_term_query.call(
// this,
// query_terms[0], // term
// query_terms[1], // ctx
// limit,
// offset,
// resolve,
// enrich,
// tag
// );
// }
let dupes = create_object(); let dupes = create_object();
let index = 0, keyword; let index = 0, keyword;
//if(length > 1){ if(context){
if(context){ // start with context right away
// start with context right away keyword = query_terms[0];
keyword = query_terms[0]; index = 1;
index = 1; }
} // else {
// todo // // sorting terms will break the context chain
// else if(maxlength > 9 && (maxlength / minlength) > 3){ // // bigger terms have a less occurrence
// // sorting terms will break the context chain // // this might reduce the intersection task
// // bigger terms has less occurrence // query_terms.sort(sort_by_length_down);
// // this might also reduce the intersection task // }
// // todo check intersection order
// query_terms.sort(sort_by_length_down);
// }
//}
if(!resolution && resolution !== 0){ if(!resolution && resolution !== 0){
resolution = keyword resolution = keyword
@@ -245,7 +153,7 @@ Index.prototype.search = function(query, limit, options){
if(term && !dupes[term]){ if(term && !dupes[term]){
dupes[term] = 1; dupes[term] = 1;
arr = await self.get_array(term, keyword, 0, 0, false, false); arr = await self._get_array(term, keyword, 0, 0, false, false);
arr = add_result(arr, /** @type {Array} */ (result), suggest, resolution); arr = add_result(arr, /** @type {Array} */ (result), suggest, resolution);
if(arr){ if(arr){
@@ -295,7 +203,7 @@ Index.prototype.search = function(query, limit, options){
if(term && !dupes[term]){ if(term && !dupes[term]){
dupes[term] = 1; dupes[term] = 1;
arr = this.get_array(term, keyword, 0, 0, false, false); arr = this._get_array(term, keyword, 0, 0, false, false);
arr = add_result(arr, /** @type {Array} */ (result), suggest, resolution); arr = add_result(arr, /** @type {Array} */ (result), suggest, resolution);
if(arr){ if(arr){
@@ -397,7 +305,7 @@ function return_result(result, resolution, limit, offset, suggest, boost, resolv
function single_term_query(term, keyword, limit, offset, resolve, enrich, tag){ function single_term_query(term, keyword, limit, offset, resolve, enrich, tag){
const result = this.get_array( const result = this._get_array(
term, term,
keyword, keyword,
limit, limit,
@@ -483,7 +391,7 @@ function add_result(arr, result, suggest, resolution){
* Promise<IntermediateSearchResults|EnrichedSearchResults> * Promise<IntermediateSearchResults|EnrichedSearchResults>
* } * }
*/ */
Index.prototype.get_array = function(term, keyword, limit, offset, resolve, enrich, tag){ Index.prototype._get_array = function(term, keyword, limit, offset, resolve, enrich, tag){
let arr, swap; let arr, swap;

View File

@@ -10,9 +10,6 @@ const build_compact = env && env.includes("compact");
const build_esm = !env || env.startsWith("module"); const build_esm = !env || env.startsWith("module");
const Charset = _Charset || (await import("../src/charset.js")).default; const Charset = _Charset || (await import("../src/charset.js")).default;
// global.FlexSearch = { Index, Document, Worker, Charset, Encoder, Resolver };
// global.build = { build_light, build_compact, build_esm };
describe("Initialize", function(){ describe("Initialize", function(){
const index = new Index(); const index = new Index();
@@ -157,6 +154,108 @@ describe("Add", function(){
}); });
}); });
describe("Append", function(){
it("Should have been properly appended to the index", function(){
const index = new Index();
index.append(0, "foo");
index.append(2, "bar");
index.append(1, "FooBar");
index.append(3, "Some 'short' content.");
expect(Array.from(index.reg.keys())).to.have.members([0, 1, 2, 3]);
expect(Array.from(index.map.keys())).to.have.members(["fo", "bar", "fobar", "some", "short", "content"]);
expect(index.ctx.size).to.equal(0);
expect(index.reg.size).to.equal(4);
});
it("Should have been properly appended by score", function(){
let index = new Index();
index.add(0, "foo A B C D E F bar");
index.add(1, "foo A B C bar D E F");
index.add(2, "foo bar A B C D E F");
expect(index.search("foo")).to.eql([0, 1, 2]);
expect(index.search("bar")).to.eql([2, 1, 0]);
expect(index.reg.size).to.equal(3);
expect(index.map.size).to.equal(8);
expect(index.ctx.size).to.equal(0);
index.append(0, "bar");
index.append(0, "foo");
index.append(0, "bar");
expect(index.search("foo")).to.eql([0, 1, 2]);
expect(index.search("bar")).to.eql([0, 2, 1]);
expect(index.reg.size).to.equal(3);
expect(index.map.size).to.equal(8);
expect(index.ctx.size).to.equal(0);
index.remove(2).remove(1).remove(0);
expect(index.reg.size).to.equal(0);
expect(index.map.size).to.equal(0);
expect(index.ctx.size).to.equal(0);
index = new Index({ context: true });
index.add(0, "foo A B C D E F bar");
index.add(1, "foo A B C bar D E F");
index.add(2, "foo bar A B C D E F");
expect(index.search("foo")).to.eql([0, 1, 2]);
expect(index.search("bar")).to.eql([2, 1, 0]);
expect(index.reg.size).to.equal(3);
expect(index.map.size).to.equal(8);
expect(index.ctx.size).to.equal(7);
index.append(0, "bar");
index.append(0, "foo");
index.append(0, "bar");
expect(index.search("foo")).to.eql([0, 1, 2]);
expect(index.search("bar")).to.eql([0, 2, 1]);
expect(index.reg.size).to.equal(3);
expect(index.map.size).to.equal(8);
expect(index.ctx.size).to.equal(7);
index.remove(2).remove(1).remove(0);
expect(index.reg.size).to.equal(0);
expect(index.map.size).to.equal(0);
expect(index.ctx.size).to.equal(0);
});
it("Should not have been appended to the index (Parameter)", function(){
const index = new Index();
index.append("foo");
index.append(3);
index.append(null, "foobar");
index.append(void 0, "foobar");
index.append(3, null);
index.append(3, false);
expect(index.reg.size).to.equal(0);
});
it("Should not have been appended to the index (Empty)", function(){
const index = new Index();
index.append(1, "");
index.append(2, " ");
index.append(3, " ");
index.append(4, " - ");
index.append(5, ` ...
- : ,
<-- `);
expect(index.reg.size).to.equal(0);
});
});
describe("Search (Sync)", function(){ describe("Search (Sync)", function(){
it("Should have been matched properly", function(){ it("Should have been matched properly", function(){

View File

@@ -14,13 +14,13 @@ if(!build_light) describe("Document (Multi-Field Search)", function(){
const data = [{ const data = [{
id: 2, id: 2,
data: { title: "Title 3", body: "Body 3" } data: { title: "Title 3", body: "Body 3", cat: "A" }
},{ },{
id: 1, id: 1,
data: { title: "Title 2", body: "Body 2" } data: { title: "Title 2", body: "Body 2", cat: "B" }
},{ },{
id: 0, id: 0,
data: { title: "Title 1", body: "Body 1" } data: { title: "Title 1", body: "Body 1", cat: "A" }
}]; }];
const update = [{ const update = [{
@@ -404,6 +404,89 @@ if(!build_light) describe("Document (Multi-Field Search)", function(){
})).to.eql([]); })).to.eql([]);
}); });
it("Merge Results", function(){
const document = new Document({
document: {
store: [
"data:title"
],
id: "id",
field: [
"data:title",
"data:body"
],
tag: "data:cat"
}
});
for(let i = 0; i < data.length; i++){
document.add(data[i]);
}
expect(document.search({
query: "title",
enrich: false,
suggest: true,
merge: true
})).to.eql([
{ id: 2, field: [ 'data:title' ] },
{ id: 1, field: [ 'data:title' ] },
{ id: 0, field: [ 'data:title' ] }
]);
expect(document.search({
query: "title",
enrich: true,
suggest: true,
merge: true
}).map(res => res.doc)).to.eql([
{ data: { title: data[0].data.title }},
{ data: { title: data[1].data.title }},
{ data: { title: data[2].data.title }}
]);
expect(document.search({
query: "title",
tag: { "data:cat": "A" },
enrich: true,
suggest: true,
merge: true
}).map(res => res.doc)).to.eql([
{ data: { title: data[0].data.title }},
{ data: { title: data[2].data.title }}
]);
});
it("Using BigInt", function(){
const document = new Document({
document: {
store: "data:title",
id: "id",
field: [
"data:title",
"data:body"
],
tag: "data:cat"
}
});
for(let i = 0, tmp; i < data.length; i++){
tmp = Object.assign({}, data[i], { id: BigInt(i + 1) });
document.add(tmp);
}
expect(document.search({
query: "title",
tag: { "data:cat": "A" },
merge: true
})).to.eql([
{ id: BigInt(1), field: [ "data:title" ] },
{ id: BigInt(3), field: [ "data:title" ] }
]);
});
it("Custom Document Store", function(){ it("Custom Document Store", function(){
const document = new Document({ const document = new Document({