mirror of
https://github.com/nextapps-de/flexsearch.git
synced 2025-08-24 14:42:55 +02:00
re-implement append
This commit is contained in:
@@ -891,7 +891,7 @@ Global Members:
|
||||
`Index` / `Worker`-Index Methods:
|
||||
|
||||
- index.[**add**](#add-text-item-to-an-index)(id, string)
|
||||
- ~~index.[**append**]()(id, string)~~
|
||||
- index.[**append**]()(id, string)
|
||||
- index.[**update**](#update-item-from-an-index)(id, string)
|
||||
- index.[**remove**](#remove-item-from-an-index)(id)
|
||||
- index.[**search**](#search-items)(string, \<limit\>, \<options\>)
|
||||
@@ -918,7 +918,7 @@ Global Members:
|
||||
`Document` Methods:
|
||||
|
||||
- document.[**add**](doc/document-search.md#addupdateremove-documents)(\<id\>, document)
|
||||
- ~~document.[**append**]()(\<id\>, document)~~
|
||||
- document.[**append**]()(\<id\>, document)
|
||||
- document.[**update**](doc/document-search.md#addupdateremove-documents)(\<id\>, document)
|
||||
- document.[**remove**](doc/document-search.md#addupdateremove-documents)(id)
|
||||
- document.[**remove**](doc/document-search.md#addupdateremove-documents)(document)
|
||||
@@ -951,7 +951,7 @@ Global Members:
|
||||
Async Equivalents (Non-Blocking Balanced):
|
||||
|
||||
- <small>_async_</small> [**.addAsync**](doc/async.md)( ... , \<callback\>)
|
||||
- <small>_async_</small> ~~[**.appendAsync**](doc/async.md)( ... , \<callback\>)~~
|
||||
- <small>_async_</small> [**.appendAsync**](doc/async.md)( ... , \<callback\>)
|
||||
- <small>_async_</small> [**.updateAsync**](doc/async.md)( ... , \<callback\>)
|
||||
- <small>_async_</small> [**.removeAsync**](doc/async.md)( ... , \<callback\>)
|
||||
- <small>_async_</small> [**.searchAsync**](doc/async.md)( ... , \<callback\>)
|
||||
@@ -1439,7 +1439,7 @@ Try to choose the most upper of these tokenizer which covers your requirements:
|
||||
<td><code>"tolerant"</code></td>
|
||||
<td>index the full term by also being tolerant against typos like swapped letters and missing letters</td>
|
||||
<td><code>foobra</code><br><code>foboar</code><br><code>foobr</code><br><code>fooba</code></td>
|
||||
<td>1 + 2(n - 2)</td>
|
||||
<td>2(n - 2) + 2</td>
|
||||
</tr>
|
||||
<tr></tr>
|
||||
<tr>
|
||||
|
@@ -5,7 +5,7 @@
|
||||
Those methods of each index type provides an async version:
|
||||
|
||||
- addAsync()
|
||||
- ~~appendAsync()~~
|
||||
- appendAsync()
|
||||
- updateAsync()
|
||||
- removeAsync()
|
||||
- searchAsync()
|
||||
|
9
index.d.ts
vendored
9
index.d.ts
vendored
@@ -231,9 +231,7 @@ declare module "flexsearch" {
|
||||
add(id: Id, content: string): W extends false
|
||||
? this
|
||||
: Promise<this>;
|
||||
/**
|
||||
* @deprecated The method "append" will be removed in an upcoming release, only use "add" instead
|
||||
*/
|
||||
|
||||
append(id: Id, content: string): W extends false
|
||||
? this
|
||||
: Promise<this>;
|
||||
@@ -295,7 +293,6 @@ declare module "flexsearch" {
|
||||
callback?: AsyncCallback<void>,
|
||||
): Promise<this>;
|
||||
|
||||
/** @deprecated The method "append" will be removed in an upcoming release, just use "add" instead */
|
||||
appendAsync(
|
||||
id: Id,
|
||||
content: string,
|
||||
@@ -633,11 +630,9 @@ declare module "flexsearch" {
|
||||
? this
|
||||
: Promise<this>;
|
||||
|
||||
/** @deprecated The method "append" will be removed in an upcoming release, just use "add" instead */
|
||||
append(id: Id, document: D): W extends false
|
||||
? this
|
||||
: Promise<this>;
|
||||
/** @deprecated The method "append" will be removed in an upcoming release, just use "add" instead */
|
||||
append(document: D): W extends false
|
||||
? this
|
||||
: Promise<this>;
|
||||
@@ -769,13 +764,11 @@ declare module "flexsearch" {
|
||||
callback?: AsyncCallback<void>,
|
||||
): Promise<this>;
|
||||
|
||||
/** @deprecated The method "append" will be removed in an upcoming release, just use "add" instead */
|
||||
appendAsync(
|
||||
id: Id,
|
||||
document: D,
|
||||
callback?: AsyncCallback<void>,
|
||||
): Promise<this>;
|
||||
/** @deprecated The method "append" will be removed in an upcoming release, just use "add" instead */
|
||||
appendAsync(
|
||||
document: D,
|
||||
callback?: AsyncCallback<void>,
|
||||
|
@@ -255,10 +255,12 @@ function create_result(rows, resolve, enrich){
|
||||
}
|
||||
|
||||
SqliteDB.prototype.get = function(key, ctx, limit = 0, offset = 0, resolve = true, enrich = false, tags){
|
||||
|
||||
let result;
|
||||
let stmt = '';
|
||||
let params = ctx ? [ctx, key] : [key];
|
||||
let table = "main." + (ctx ? "ctx" : "map") + this.field;
|
||||
|
||||
if(tags){
|
||||
for(let i = 0; i < tags.length; i+=2){
|
||||
stmt += ` AND ${ table }.id IN (SELECT id FROM main.tag_${ sanitize(tags[i]) } WHERE tag = ?)`;
|
||||
@@ -303,12 +305,14 @@ SqliteDB.prototype.get = function(key, ctx, limit = 0, offset = 0, resolve = tru
|
||||
params
|
||||
});
|
||||
}
|
||||
|
||||
return result.then(function(rows){
|
||||
return create_result(rows, resolve, enrich);
|
||||
});
|
||||
};
|
||||
|
||||
SqliteDB.prototype.tag = function(tag, limit = 0, offset = 0, enrich = false){
|
||||
|
||||
const table = "main.tag" + this.field;
|
||||
const promise = this.promisfy({
|
||||
method: "all",
|
||||
@@ -325,17 +329,25 @@ SqliteDB.prototype.tag = function(tag, limit = 0, offset = 0, enrich = false){
|
||||
`,
|
||||
params: [tag]
|
||||
});
|
||||
|
||||
enrich || promise.then(function(rows){
|
||||
return create_result(rows, true, false);
|
||||
});
|
||||
|
||||
return promise;
|
||||
};
|
||||
|
||||
function build_params(length, single_param){
|
||||
|
||||
// let stmt = "?";
|
||||
// for(let i = 1; i < length; i++){
|
||||
// stmt += ",?";
|
||||
// }
|
||||
|
||||
let stmt = single_param
|
||||
? ",(?)"
|
||||
: ",?";
|
||||
|
||||
for(let i = 1; i < length;){
|
||||
if(i <= (length - i)){
|
||||
stmt += stmt;
|
||||
@@ -346,6 +358,7 @@ function build_params(length, single_param){
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return stmt.substring(1);
|
||||
}
|
||||
|
||||
@@ -362,16 +375,9 @@ SqliteDB.prototype.enrich = function(ids){
|
||||
const chunk = ids.length - count > MAXIMUM_QUERY_VARS
|
||||
? ids.slice(count, count + MAXIMUM_QUERY_VARS)
|
||||
: count ? ids.slice(count) : ids;
|
||||
const stmt = build_params(chunk.length);
|
||||
count += chunk.length;
|
||||
|
||||
// let stmt = "?";
|
||||
// for(let i = 1; i < chunk.length; i++){
|
||||
// stmt += ",?";
|
||||
// }
|
||||
|
||||
// 10x faster string concatenation
|
||||
let stmt = build_params(chunk.length);
|
||||
|
||||
promises.push(this.promisfy({
|
||||
method: "all",
|
||||
stmt: `SELECT id, doc FROM main.reg WHERE id IN (${stmt})`,
|
||||
@@ -594,37 +600,29 @@ SqliteDB.prototype.transaction = async function(task, callback){
|
||||
});
|
||||
};
|
||||
|
||||
SqliteDB.prototype.commit = async function(flexsearch, _replace, _append){
|
||||
SqliteDB.prototype.commit = async function(flexsearch){
|
||||
|
||||
// process cleanup tasks
|
||||
if(_replace){
|
||||
await this.clear();
|
||||
// there are just removals in the task queue
|
||||
flexsearch.commit_task = [];
|
||||
}
|
||||
else{
|
||||
let tasks = flexsearch.commit_task;
|
||||
flexsearch.commit_task = [];
|
||||
for(let i = 0, task; i < tasks.length; i++){
|
||||
task = tasks[i];
|
||||
// there are just removals in the task queue
|
||||
if(task.clear){
|
||||
await this.clear();
|
||||
_replace = true;
|
||||
break;
|
||||
}
|
||||
else{
|
||||
tasks[i] = task.del;
|
||||
}
|
||||
let tasks = flexsearch.commit_task;
|
||||
let removals = [];
|
||||
let inserts = [];
|
||||
flexsearch.commit_task = [];
|
||||
|
||||
for(let i = 0, task; i < tasks.length; i++){
|
||||
task = tasks[i];
|
||||
if(typeof task["del"] !== "undefined"){
|
||||
removals.push(task["del"]);
|
||||
}
|
||||
if(!_replace){
|
||||
if(!_append){
|
||||
tasks = tasks.concat(toArray(flexsearch.reg));
|
||||
}
|
||||
tasks.length && await this.remove(tasks);
|
||||
else if(typeof task["ins"] !== "undefined"){
|
||||
inserts.push(task["ins"]);
|
||||
}
|
||||
}
|
||||
|
||||
if(removals.length){
|
||||
await this.remove(removals);
|
||||
}
|
||||
// if(inserts_map.length || inserts_ctx.length){
|
||||
// await this.insert(inserts_map, inserts_ctx);
|
||||
// }
|
||||
if(!flexsearch.reg.size){
|
||||
return;
|
||||
}
|
||||
@@ -645,7 +643,11 @@ SqliteDB.prototype.commit = async function(flexsearch, _replace, _append){
|
||||
params.push(key, i, ids[j]);
|
||||
// maximum count of variables supported
|
||||
if((j === ids.length - 1) || (params.length + 3 > MAXIMUM_QUERY_VARS)){
|
||||
this.db.run("INSERT INTO main.map" + this.field + " (key, res, id) VALUES " + stmt, params);
|
||||
this.db.run(
|
||||
"INSERT INTO main.map" + this.field + " (key, res, id) VALUES " + stmt
|
||||
// " ON CONFLICT DO " +
|
||||
// "UPDATE main.map" + this.field + " SET key = '', res = 0 WHERE key = ? AND res > ? AND id = ?"
|
||||
, params);
|
||||
stmt = "";
|
||||
params = [];
|
||||
}
|
||||
@@ -700,13 +702,13 @@ SqliteDB.prototype.commit = async function(flexsearch, _replace, _append){
|
||||
: doc || null
|
||||
);
|
||||
if(chunk.length + 2 > MAXIMUM_QUERY_VARS){
|
||||
this.db.run("INSERT INTO main.reg (id, doc) VALUES " + stmt, chunk);
|
||||
this.db.run("INSERT INTO main.reg (id, doc) VALUES " + stmt + " ON CONFLICT DO NOTHING", chunk);
|
||||
stmt = "";
|
||||
chunk = [];
|
||||
}
|
||||
}
|
||||
if(chunk.length){
|
||||
this.db.run("INSERT INTO main.reg (id, doc) VALUES " + stmt, chunk);
|
||||
this.db.run("INSERT INTO main.reg (id, doc) VALUES " + stmt + " ON CONFLICT DO NOTHING", chunk);
|
||||
}
|
||||
}
|
||||
else if(!flexsearch.bypass){
|
||||
@@ -717,7 +719,7 @@ SqliteDB.prototype.commit = async function(flexsearch, _replace, _append){
|
||||
: count ? ids.slice(count) : ids;
|
||||
count += chunk.length;
|
||||
const stmt = build_params(chunk.length, /* single param */ true);
|
||||
this.db.run("INSERT INTO main.reg (id) VALUES " + stmt, chunk);
|
||||
this.db.run("INSERT INTO main.reg (id) VALUES " + stmt + " ON CONFLICT DO NOTHING", chunk);
|
||||
}
|
||||
}
|
||||
//});
|
||||
@@ -746,6 +748,10 @@ SqliteDB.prototype.commit = async function(flexsearch, _replace, _append){
|
||||
}
|
||||
});
|
||||
|
||||
if(inserts.length){
|
||||
await this.cleanup();
|
||||
}
|
||||
|
||||
// TODO
|
||||
//await this.transaction(function(){
|
||||
// this.db.run("INSERT INTO main.cfg" + this.field + " (cfg) VALUES (?)", [JSON.stringify({
|
||||
@@ -802,6 +808,34 @@ SqliteDB.prototype.remove = function(ids){
|
||||
});
|
||||
};
|
||||
|
||||
SqliteDB.prototype.cleanup = function(){
|
||||
return this.transaction(function(){
|
||||
this.db.run(
|
||||
"DELETE FROM main.map" + this.field + " " +
|
||||
"WHERE ROWID IN (" +
|
||||
"SELECT ROWID FROM (" +
|
||||
"SELECT ROWID, row_number() OVER dupes AS count " +
|
||||
"FROM main.map" + this.field + " _t " +
|
||||
"WINDOW dupes AS (PARTITION BY id, key ORDER BY res) " +
|
||||
") " +
|
||||
"WHERE count > 1" +
|
||||
")"
|
||||
|
||||
);
|
||||
this.db.run(
|
||||
"DELETE FROM main.ctx" + this.field + " " +
|
||||
"WHERE ROWID IN (" +
|
||||
"SELECT ROWID FROM (" +
|
||||
"SELECT ROWID, row_number() OVER dupes AS count " +
|
||||
"FROM main.ctx" + this.field + " _t " +
|
||||
"WINDOW dupes AS (PARTITION BY id, ctx, key ORDER BY res) " +
|
||||
") " +
|
||||
"WHERE count > 1" +
|
||||
")"
|
||||
);
|
||||
});
|
||||
};
|
||||
|
||||
SqliteDB.prototype.promisfy = function(opt){
|
||||
const db = this.db;
|
||||
return new Promise(function(resolve, reject){
|
||||
|
@@ -42,7 +42,7 @@ Document.prototype.add = function(id, content, _append){
|
||||
if(typeof tree === "function"){
|
||||
const tmp = tree(content);
|
||||
if(tmp){
|
||||
index.add(id, tmp, /* append: */ false, /* skip update: */ true);
|
||||
index.add(id, tmp, /* append: */ _append, /* skip update: */ true);
|
||||
}
|
||||
}
|
||||
else{
|
||||
@@ -225,7 +225,7 @@ function add_index(obj, tree, marker, pos, index, id, key, _append){
|
||||
|
||||
if((obj = obj[key])){
|
||||
|
||||
// reached target field
|
||||
// reached the target field
|
||||
if(pos === (tree.length - 1)){
|
||||
|
||||
// handle target value
|
||||
|
@@ -318,12 +318,23 @@ Document.prototype.search = function(query, limit, options, _promises){
|
||||
}
|
||||
}
|
||||
|
||||
// TODO merge sorted by score
|
||||
// TODO add score property to results
|
||||
// if(merge){
|
||||
// opt.resolve = false;
|
||||
// }
|
||||
|
||||
res = cache
|
||||
? index.searchCache(query, limit, opt)
|
||||
: index.search(query, limit, opt);
|
||||
|
||||
// restore state
|
||||
opt_enrich && (opt.enrich = opt_enrich);
|
||||
// if(merge){
|
||||
// opt.resolve = resolve;
|
||||
// }
|
||||
if(opt_enrich) {
|
||||
opt.enrich = opt_enrich;
|
||||
}
|
||||
|
||||
if(promises){
|
||||
promises[i] = res;
|
||||
|
@@ -154,12 +154,12 @@ if(SUPPORT_PERSISTENT){
|
||||
}
|
||||
return db.mount(this);
|
||||
};
|
||||
Index.prototype.commit = function(replace, append){
|
||||
Index.prototype.commit = function(replace){
|
||||
if(this.commit_timer){
|
||||
clearTimeout(this.commit_timer);
|
||||
this.commit_timer = null;
|
||||
}
|
||||
return this.db.commit(this, replace, append);
|
||||
return this.db.commit(this, replace);
|
||||
};
|
||||
Index.prototype.destroy = function(){
|
||||
if(this.commit_timer){
|
||||
|
212
src/index/add.js
212
src/index/add.js
@@ -8,7 +8,7 @@ import {
|
||||
import { create_object } from "../common.js";
|
||||
import Index, { autoCommit } from "../index.js";
|
||||
import default_compress from "../compress.js";
|
||||
import { KeystoreArray } from "../keystore.js";
|
||||
import { KeystoreArray, KeystoreMap } from "../keystore.js";
|
||||
|
||||
// TODO:
|
||||
// string + number as text
|
||||
@@ -21,7 +21,6 @@ import { KeystoreArray } from "../keystore.js";
|
||||
* @param {boolean=} _append
|
||||
* @param {boolean=} _skip_update
|
||||
*/
|
||||
|
||||
Index.prototype.add = function(id, content, _append, _skip_update){
|
||||
|
||||
if(content && (id || (id === 0))){
|
||||
@@ -39,9 +38,9 @@ Index.prototype.add = function(id, content, _append, _skip_update){
|
||||
// do not force a string as input
|
||||
// https://github.com/nextapps-de/flexsearch/issues/432
|
||||
content = this.encoder.encode(content, !depth);
|
||||
const word_length = content.length;
|
||||
const term_count = content.length;
|
||||
|
||||
if(word_length){
|
||||
if(term_count){
|
||||
|
||||
// check context dupes to skip all contextual redundancy along a document
|
||||
|
||||
@@ -49,9 +48,9 @@ Index.prototype.add = function(id, content, _append, _skip_update){
|
||||
const dupes = create_object();
|
||||
const resolution = this.resolution;
|
||||
|
||||
for(let i = 0; i < word_length; i++){
|
||||
for(let i = 0; i < term_count; i++){
|
||||
|
||||
let term = content[this.rtl ? word_length - 1 - i : i];
|
||||
let term = content[this.rtl ? term_count - 1 - i : i];
|
||||
let term_length = term.length;
|
||||
|
||||
// skip dupes will break the context chain
|
||||
@@ -59,7 +58,7 @@ Index.prototype.add = function(id, content, _append, _skip_update){
|
||||
|
||||
let score = this.score
|
||||
? this.score(content, term, i, null, 0)
|
||||
: get_score(resolution, word_length, i);
|
||||
: get_score(resolution, term_count, i);
|
||||
let token = "";
|
||||
|
||||
switch(this.tokenize){
|
||||
@@ -70,22 +69,25 @@ Index.prototype.add = function(id, content, _append, _skip_update){
|
||||
// A[CB]DE A[]CDE
|
||||
// AB[DC]E AB[]DE
|
||||
// ABC[ED] ABC[]E
|
||||
// ABCD[]
|
||||
|
||||
case "tolerant":
|
||||
this.push_index(dupes, term, score, id, _append);
|
||||
this._push_index(dupes, term, score, id, _append);
|
||||
if(term_length > 2){
|
||||
for(let x = 1, char_a, char_b, prt_1, prt_2; x < term_length - 1; x++){
|
||||
char_a = term.charAt(x);
|
||||
char_b = term.charAt(x + 1);
|
||||
prt_1 = term.substring(0, x) + char_b;
|
||||
prt_2 = term.substring(x + 2);
|
||||
// swap letters
|
||||
// swapped letters
|
||||
token = prt_1 /*+ char_b*/ + char_a + prt_2;
|
||||
dupes[token] || this.push_index(dupes, token, score, id, _append);
|
||||
// remove letters
|
||||
this._push_index(dupes, token, score, id, _append);
|
||||
// missing letters
|
||||
token = prt_1 /*+ char_b*/ + prt_2;
|
||||
dupes[token] || this.push_index(dupes, token, score, id, _append);
|
||||
this._push_index(dupes, token, score, id, _append);
|
||||
}
|
||||
// missing last letter
|
||||
this._push_index(dupes, term.substring(0, term.length - 1), score, id, _append);
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -94,13 +96,11 @@ Index.prototype.add = function(id, content, _append, _skip_update){
|
||||
for(let x = 0, _x; x < term_length; x++){
|
||||
for(let y = term_length; y > x; y--){
|
||||
token = term.substring(x, y);
|
||||
if(!dupes[token]){
|
||||
_x = this.rtl ? term_length - 1 - x : x;
|
||||
const partial_score = this.score
|
||||
? this.score(content, term, i, token, _x)
|
||||
: get_score(resolution, word_length, i, term_length, _x);
|
||||
this.push_index(dupes, token, partial_score, id, _append);
|
||||
}
|
||||
_x = this.rtl ? term_length - 1 - x : x;
|
||||
const partial_score = this.score
|
||||
? this.score(content, term, i, token, _x)
|
||||
: get_score(resolution, term_count, i, term_length, _x);
|
||||
this._push_index(dupes, token, partial_score, id, _append);
|
||||
}
|
||||
}
|
||||
break;
|
||||
@@ -117,12 +117,10 @@ Index.prototype.add = function(id, content, _append, _skip_update){
|
||||
? term_length - 1 - x
|
||||
: x
|
||||
] + token;
|
||||
if(!dupes[token]){
|
||||
const partial_score = this.score
|
||||
? this.score(content, term, i, token, x)
|
||||
: get_score(resolution, word_length, i, term_length, x);
|
||||
this.push_index(dupes, token, partial_score, id, _append);
|
||||
}
|
||||
const partial_score = this.score
|
||||
? this.score(content, term, i, token, x)
|
||||
: get_score(resolution, term_count, i, term_length, x);
|
||||
this._push_index(dupes, token, partial_score, id, _append);
|
||||
}
|
||||
token = "";
|
||||
}
|
||||
@@ -136,57 +134,47 @@ Index.prototype.add = function(id, content, _append, _skip_update){
|
||||
? term_length - 1 - x
|
||||
: x
|
||||
];
|
||||
dupes[token] || this.push_index(dupes, token, score, id, _append);
|
||||
this._push_index(dupes, token, score, id, _append);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// fallthrough to next case when token has a length of 1
|
||||
default: // "strict":
|
||||
this.push_index(dupes, term, score, id, _append);
|
||||
this._push_index(dupes, term, score, id, _append);
|
||||
|
||||
// context is just supported by tokenizer "strict"
|
||||
if(depth){
|
||||
if(depth && (term_count > 1) && (i < (term_count - 1))){
|
||||
|
||||
if((word_length > 1) && (i < (word_length - 1))){
|
||||
const resolution = this.resolution_ctx;
|
||||
const keyword = term;
|
||||
const size = Math.min(
|
||||
depth + 1,
|
||||
this.rtl
|
||||
? i + 1
|
||||
: term_count - i
|
||||
);
|
||||
|
||||
// check inner dupes to skip repeating words in the current context
|
||||
const dupes_inner = create_object();
|
||||
const resolution = this.resolution_ctx;
|
||||
const keyword = term;
|
||||
const size = Math.min(
|
||||
depth + 1,
|
||||
for(let x = 1; x < size; x++){
|
||||
|
||||
term = content[
|
||||
this.rtl
|
||||
? i + 1
|
||||
: word_length - i
|
||||
? term_count - 1 - i - x
|
||||
: i + x
|
||||
];
|
||||
|
||||
const swap = this.bidirectional && (term > keyword);
|
||||
const context_score = this.score
|
||||
? this.score(content, keyword, i, term, x - 1)
|
||||
: get_score(resolution + ((term_count / 2) > resolution ? 0 : 1), term_count, i, size - 1, x - 1);
|
||||
this._push_index(
|
||||
dupes_ctx,
|
||||
swap ? keyword : term,
|
||||
context_score,
|
||||
id,
|
||||
_append,
|
||||
swap ? term : keyword
|
||||
);
|
||||
|
||||
dupes_inner[keyword] = 1;
|
||||
|
||||
for(let x = 1; x < size; x++){
|
||||
|
||||
term = content[
|
||||
this.rtl
|
||||
? word_length - 1 - i - x
|
||||
: i + x
|
||||
];
|
||||
|
||||
if(term && !dupes_inner[term]){
|
||||
|
||||
dupes_inner[term] = 1;
|
||||
const context_score = this.score
|
||||
? this.score(content, keyword, i, term, x - 1)
|
||||
: get_score(resolution + ((word_length / 2) > resolution ? 0 : 1), word_length, i, size - 1, x - 1);
|
||||
const swap = this.bidirectional && (term > keyword);
|
||||
this.push_index(
|
||||
dupes_ctx,
|
||||
swap ? keyword : term,
|
||||
context_score,
|
||||
id,
|
||||
_append,
|
||||
swap ? term : keyword
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -195,15 +183,10 @@ Index.prototype.add = function(id, content, _append, _skip_update){
|
||||
|
||||
this.fastupdate || this.reg.add(id);
|
||||
}
|
||||
else{
|
||||
content = "";
|
||||
}
|
||||
}
|
||||
|
||||
if(SUPPORT_PERSISTENT && this.db){
|
||||
// when the term has no valid content (e.g., empty),
|
||||
// then it was not added to the ID registry for removal
|
||||
content || this.commit_task.push({ "del": id });
|
||||
this.commit_task.push(_append ? { "ins": id } : { "del": id });
|
||||
this.commit_auto && autoCommit(this);
|
||||
}
|
||||
|
||||
@@ -219,29 +202,34 @@ Index.prototype.add = function(id, content, _append, _skip_update){
|
||||
* @param {boolean=} append
|
||||
* @param {string=} keyword
|
||||
*/
|
||||
Index.prototype._push_index = function(dupes, term, score, id, append, keyword){
|
||||
|
||||
Index.prototype.push_index = function(dupes, term, score, id, append, keyword){
|
||||
let res, arr;
|
||||
|
||||
let arr = keyword ? this.ctx : this.map;
|
||||
let tmp;
|
||||
|
||||
if(!dupes[term] || (keyword && !(tmp = dupes[term])[keyword])){
|
||||
if(!(res = dupes[term]) || (keyword && !res[keyword])){
|
||||
|
||||
if(keyword){
|
||||
|
||||
dupes = tmp || (dupes[term] = create_object());
|
||||
dupes = res || (dupes[term] = create_object());
|
||||
dupes[keyword] = 1;
|
||||
|
||||
if(SUPPORT_COMPRESSION && this.compress){
|
||||
keyword = default_compress(keyword);
|
||||
}
|
||||
|
||||
tmp = arr.get(keyword);
|
||||
tmp ? arr = tmp
|
||||
: arr.set(keyword, arr = new Map());
|
||||
arr = this.ctx;
|
||||
res = arr.get(keyword);
|
||||
res ? arr = res
|
||||
: arr.set(
|
||||
keyword,
|
||||
arr = SUPPORT_KEYSTORE && this.keystore
|
||||
? new KeystoreMap(this.keystore)
|
||||
: new Map()
|
||||
);
|
||||
}
|
||||
else{
|
||||
|
||||
arr = this.map;
|
||||
dupes[term] = 1;
|
||||
}
|
||||
|
||||
@@ -249,37 +237,56 @@ Index.prototype.push_index = function(dupes, term, score, id, append, keyword){
|
||||
term = default_compress(term);
|
||||
}
|
||||
|
||||
tmp = arr.get(term);
|
||||
tmp ? arr = tmp : arr.set(term, arr = tmp = []);
|
||||
// the ID array will be upgraded dynamically
|
||||
arr = arr[score] || (arr[score] = []);
|
||||
res = arr.get(term);
|
||||
res ? arr = res
|
||||
: arr.set(term, arr = res = []);
|
||||
|
||||
if(!append || !arr.includes(id)){
|
||||
|
||||
// auto-upgrade to a keystore array if max size exceeded
|
||||
if(SUPPORT_KEYSTORE){
|
||||
if(arr.length === 2**31-1 /*|| !(arr instanceof KeystoreArray)*/){
|
||||
const keystore = new KeystoreArray(arr);
|
||||
if(this.fastupdate){
|
||||
for(let value of this.reg.values()){
|
||||
if(value.includes(arr)){
|
||||
value[value.indexOf(arr)] = keystore;
|
||||
}
|
||||
if(append){
|
||||
for(let i = 0, arr; i < res.length; i++){
|
||||
arr = res[i];
|
||||
if(arr && arr.includes(id)){
|
||||
if(i <= score){
|
||||
// already included in the right slot
|
||||
return;
|
||||
}
|
||||
else{
|
||||
// update slot
|
||||
arr.splice(arr.indexOf(id), 1);
|
||||
if(this.fastupdate){
|
||||
const tmp = this.reg.get(id);
|
||||
tmp && tmp.splice(tmp.indexOf(arr), 1);
|
||||
}
|
||||
}
|
||||
tmp[score] = arr = keystore;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
arr.push(id);
|
||||
// the ID keystore array will upgrade automatically
|
||||
arr = arr[score] || (arr[score] = []);
|
||||
arr.push(id);
|
||||
|
||||
// add a reference to the register for fast updates
|
||||
if(this.fastupdate){
|
||||
const tmp = this.reg.get(id);
|
||||
tmp ? tmp.push(arr)
|
||||
: this.reg.set(id, [arr]);
|
||||
// auto-upgrade to a keystore array if max size exceeded
|
||||
if(SUPPORT_KEYSTORE){
|
||||
if(arr.length === 2**31-1 /*|| !(arr instanceof KeystoreArray)*/){
|
||||
const keystore = new KeystoreArray(arr);
|
||||
if(this.fastupdate){
|
||||
for(let value of this.reg.values()){
|
||||
if(value.includes(arr)){
|
||||
value[value.indexOf(arr)] = keystore;
|
||||
}
|
||||
}
|
||||
}
|
||||
res[score] = arr = keystore;
|
||||
}
|
||||
}
|
||||
|
||||
// add a reference to the register for fast updates
|
||||
if(this.fastupdate){
|
||||
const tmp = this.reg.get(id);
|
||||
tmp ? tmp.push(arr)
|
||||
: this.reg.set(id, [arr]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -291,7 +298,6 @@ Index.prototype.push_index = function(dupes, term, score, id, append, keyword){
|
||||
* @param {number=} x
|
||||
* @returns {number}
|
||||
*/
|
||||
|
||||
function get_score(resolution, length, i, term_length, x){
|
||||
|
||||
// console.log("resolution", resolution);
|
||||
|
@@ -112,112 +112,20 @@ Index.prototype.search = function(query, limit, options){
|
||||
);
|
||||
}
|
||||
|
||||
// let maxlength = 0;
|
||||
// let minlength = 0;
|
||||
//
|
||||
// if(length > 1){
|
||||
//
|
||||
// // term deduplication will break the context chain
|
||||
// // todo add context to dupe check
|
||||
// const dupes = create_object();
|
||||
// const query_new = [];
|
||||
//
|
||||
// // if(context){
|
||||
// // keyword = query_terms[0];
|
||||
// // dupes[keyword] = 1;
|
||||
// // query_new.push(keyword);
|
||||
// // maxlength = minlength = keyword.length;
|
||||
// // i = 1;
|
||||
// // }
|
||||
//
|
||||
// for(let i = 0, term; i < length; i++){
|
||||
//
|
||||
// term = query_terms[i];
|
||||
//
|
||||
// if(term && !dupes[term]){
|
||||
//
|
||||
// // todo add keyword check
|
||||
// // this fast path can't apply to persistent indexes
|
||||
// if(!suggest && !(SUPPORT_PERSISTENT && this.db) && !this.get_array(term/*, keyword*/)){
|
||||
//
|
||||
// // fast path "not found"
|
||||
// return !SUPPORT_RESOLVER || resolve
|
||||
// ? result
|
||||
// : new Resolver(result);
|
||||
// }
|
||||
// else{
|
||||
//
|
||||
// query_new.push(term);
|
||||
// dupes[term] = 1;
|
||||
// }
|
||||
//
|
||||
// const term_length = term.length;
|
||||
// maxlength = Math.max(maxlength, term_length);
|
||||
// minlength = minlength ? Math.min(minlength, term_length) : term_length;
|
||||
// }
|
||||
// // else if(term && (!this.depth || context === false)){
|
||||
// // query_new.push(term);
|
||||
// // }
|
||||
// }
|
||||
//
|
||||
// query_terms = query_new;
|
||||
// length = query_terms.length;
|
||||
// }
|
||||
//
|
||||
// // the term length could be changed after deduplication
|
||||
//
|
||||
// if(!length){
|
||||
// return !SUPPORT_RESOLVER || resolve
|
||||
// ? result
|
||||
// : new Resolver(result);
|
||||
// }
|
||||
//
|
||||
// // fast path single term
|
||||
// if(length === 1){
|
||||
// return single_term_query.call(
|
||||
// this,
|
||||
// query_terms[0], // term
|
||||
// "", // ctx
|
||||
// limit,
|
||||
// offset,
|
||||
// resolve,
|
||||
// enrich,
|
||||
// tag
|
||||
// );
|
||||
// }
|
||||
//
|
||||
// // fast path single context
|
||||
// if(length === 2 && context && !suggest){
|
||||
// return single_term_query.call(
|
||||
// this,
|
||||
// query_terms[0], // term
|
||||
// query_terms[1], // ctx
|
||||
// limit,
|
||||
// offset,
|
||||
// resolve,
|
||||
// enrich,
|
||||
// tag
|
||||
// );
|
||||
// }
|
||||
|
||||
let dupes = create_object();
|
||||
let index = 0, keyword;
|
||||
|
||||
//if(length > 1){
|
||||
if(context){
|
||||
// start with context right away
|
||||
keyword = query_terms[0];
|
||||
index = 1;
|
||||
}
|
||||
// todo
|
||||
// else if(maxlength > 9 && (maxlength / minlength) > 3){
|
||||
// // sorting terms will break the context chain
|
||||
// // bigger terms has less occurrence
|
||||
// // this might also reduce the intersection task
|
||||
// // todo check intersection order
|
||||
// query_terms.sort(sort_by_length_down);
|
||||
// }
|
||||
//}
|
||||
if(context){
|
||||
// start with context right away
|
||||
keyword = query_terms[0];
|
||||
index = 1;
|
||||
}
|
||||
// else {
|
||||
// // sorting terms will break the context chain
|
||||
// // bigger terms have a less occurrence
|
||||
// // this might reduce the intersection task
|
||||
// query_terms.sort(sort_by_length_down);
|
||||
// }
|
||||
|
||||
if(!resolution && resolution !== 0){
|
||||
resolution = keyword
|
||||
@@ -245,7 +153,7 @@ Index.prototype.search = function(query, limit, options){
|
||||
if(term && !dupes[term]){
|
||||
|
||||
dupes[term] = 1;
|
||||
arr = await self.get_array(term, keyword, 0, 0, false, false);
|
||||
arr = await self._get_array(term, keyword, 0, 0, false, false);
|
||||
arr = add_result(arr, /** @type {Array} */ (result), suggest, resolution);
|
||||
|
||||
if(arr){
|
||||
@@ -295,7 +203,7 @@ Index.prototype.search = function(query, limit, options){
|
||||
if(term && !dupes[term]){
|
||||
|
||||
dupes[term] = 1;
|
||||
arr = this.get_array(term, keyword, 0, 0, false, false);
|
||||
arr = this._get_array(term, keyword, 0, 0, false, false);
|
||||
arr = add_result(arr, /** @type {Array} */ (result), suggest, resolution);
|
||||
|
||||
if(arr){
|
||||
@@ -397,7 +305,7 @@ function return_result(result, resolution, limit, offset, suggest, boost, resolv
|
||||
|
||||
function single_term_query(term, keyword, limit, offset, resolve, enrich, tag){
|
||||
|
||||
const result = this.get_array(
|
||||
const result = this._get_array(
|
||||
term,
|
||||
keyword,
|
||||
limit,
|
||||
@@ -483,7 +391,7 @@ function add_result(arr, result, suggest, resolution){
|
||||
* Promise<IntermediateSearchResults|EnrichedSearchResults>
|
||||
* }
|
||||
*/
|
||||
Index.prototype.get_array = function(term, keyword, limit, offset, resolve, enrich, tag){
|
||||
Index.prototype._get_array = function(term, keyword, limit, offset, resolve, enrich, tag){
|
||||
|
||||
let arr, swap;
|
||||
|
||||
|
105
test/basic.js
105
test/basic.js
@@ -10,9 +10,6 @@ const build_compact = env && env.includes("compact");
|
||||
const build_esm = !env || env.startsWith("module");
|
||||
const Charset = _Charset || (await import("../src/charset.js")).default;
|
||||
|
||||
// global.FlexSearch = { Index, Document, Worker, Charset, Encoder, Resolver };
|
||||
// global.build = { build_light, build_compact, build_esm };
|
||||
|
||||
describe("Initialize", function(){
|
||||
|
||||
const index = new Index();
|
||||
@@ -157,6 +154,108 @@ describe("Add", function(){
|
||||
});
|
||||
});
|
||||
|
||||
describe("Append", function(){
|
||||
|
||||
it("Should have been properly appended to the index", function(){
|
||||
|
||||
const index = new Index();
|
||||
|
||||
index.append(0, "foo");
|
||||
index.append(2, "bar");
|
||||
index.append(1, "FooBar");
|
||||
index.append(3, "Some 'short' content.");
|
||||
|
||||
expect(Array.from(index.reg.keys())).to.have.members([0, 1, 2, 3]);
|
||||
expect(Array.from(index.map.keys())).to.have.members(["fo", "bar", "fobar", "some", "short", "content"]);
|
||||
expect(index.ctx.size).to.equal(0);
|
||||
expect(index.reg.size).to.equal(4);
|
||||
});
|
||||
|
||||
it("Should have been properly appended by score", function(){
|
||||
|
||||
let index = new Index();
|
||||
index.add(0, "foo A B C D E F bar");
|
||||
index.add(1, "foo A B C bar D E F");
|
||||
index.add(2, "foo bar A B C D E F");
|
||||
|
||||
expect(index.search("foo")).to.eql([0, 1, 2]);
|
||||
expect(index.search("bar")).to.eql([2, 1, 0]);
|
||||
expect(index.reg.size).to.equal(3);
|
||||
expect(index.map.size).to.equal(8);
|
||||
expect(index.ctx.size).to.equal(0);
|
||||
|
||||
index.append(0, "bar");
|
||||
index.append(0, "foo");
|
||||
index.append(0, "bar");
|
||||
|
||||
expect(index.search("foo")).to.eql([0, 1, 2]);
|
||||
expect(index.search("bar")).to.eql([0, 2, 1]);
|
||||
expect(index.reg.size).to.equal(3);
|
||||
expect(index.map.size).to.equal(8);
|
||||
expect(index.ctx.size).to.equal(0);
|
||||
|
||||
index.remove(2).remove(1).remove(0);
|
||||
expect(index.reg.size).to.equal(0);
|
||||
expect(index.map.size).to.equal(0);
|
||||
expect(index.ctx.size).to.equal(0);
|
||||
|
||||
index = new Index({ context: true });
|
||||
index.add(0, "foo A B C D E F bar");
|
||||
index.add(1, "foo A B C bar D E F");
|
||||
index.add(2, "foo bar A B C D E F");
|
||||
|
||||
expect(index.search("foo")).to.eql([0, 1, 2]);
|
||||
expect(index.search("bar")).to.eql([2, 1, 0]);
|
||||
expect(index.reg.size).to.equal(3);
|
||||
expect(index.map.size).to.equal(8);
|
||||
expect(index.ctx.size).to.equal(7);
|
||||
|
||||
index.append(0, "bar");
|
||||
index.append(0, "foo");
|
||||
index.append(0, "bar");
|
||||
|
||||
expect(index.search("foo")).to.eql([0, 1, 2]);
|
||||
expect(index.search("bar")).to.eql([0, 2, 1]);
|
||||
expect(index.reg.size).to.equal(3);
|
||||
expect(index.map.size).to.equal(8);
|
||||
expect(index.ctx.size).to.equal(7);
|
||||
|
||||
index.remove(2).remove(1).remove(0);
|
||||
expect(index.reg.size).to.equal(0);
|
||||
expect(index.map.size).to.equal(0);
|
||||
expect(index.ctx.size).to.equal(0);
|
||||
});
|
||||
|
||||
it("Should not have been appended to the index (Parameter)", function(){
|
||||
|
||||
const index = new Index();
|
||||
|
||||
index.append("foo");
|
||||
index.append(3);
|
||||
index.append(null, "foobar");
|
||||
index.append(void 0, "foobar");
|
||||
index.append(3, null);
|
||||
index.append(3, false);
|
||||
|
||||
expect(index.reg.size).to.equal(0);
|
||||
});
|
||||
|
||||
it("Should not have been appended to the index (Empty)", function(){
|
||||
|
||||
const index = new Index();
|
||||
|
||||
index.append(1, "");
|
||||
index.append(2, " ");
|
||||
index.append(3, " ");
|
||||
index.append(4, " - ");
|
||||
index.append(5, ` ...
|
||||
- : ,
|
||||
<-- `);
|
||||
|
||||
expect(index.reg.size).to.equal(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Search (Sync)", function(){
|
||||
|
||||
it("Should have been matched properly", function(){
|
||||
|
@@ -14,13 +14,13 @@ if(!build_light) describe("Document (Multi-Field Search)", function(){
|
||||
|
||||
const data = [{
|
||||
id: 2,
|
||||
data: { title: "Title 3", body: "Body 3" }
|
||||
data: { title: "Title 3", body: "Body 3", cat: "A" }
|
||||
},{
|
||||
id: 1,
|
||||
data: { title: "Title 2", body: "Body 2" }
|
||||
data: { title: "Title 2", body: "Body 2", cat: "B" }
|
||||
},{
|
||||
id: 0,
|
||||
data: { title: "Title 1", body: "Body 1" }
|
||||
data: { title: "Title 1", body: "Body 1", cat: "A" }
|
||||
}];
|
||||
|
||||
const update = [{
|
||||
@@ -404,6 +404,89 @@ if(!build_light) describe("Document (Multi-Field Search)", function(){
|
||||
})).to.eql([]);
|
||||
});
|
||||
|
||||
it("Merge Results", function(){
|
||||
|
||||
const document = new Document({
|
||||
document: {
|
||||
store: [
|
||||
"data:title"
|
||||
],
|
||||
id: "id",
|
||||
field: [
|
||||
"data:title",
|
||||
"data:body"
|
||||
],
|
||||
tag: "data:cat"
|
||||
}
|
||||
});
|
||||
|
||||
for(let i = 0; i < data.length; i++){
|
||||
document.add(data[i]);
|
||||
}
|
||||
|
||||
expect(document.search({
|
||||
query: "title",
|
||||
enrich: false,
|
||||
suggest: true,
|
||||
merge: true
|
||||
})).to.eql([
|
||||
{ id: 2, field: [ 'data:title' ] },
|
||||
{ id: 1, field: [ 'data:title' ] },
|
||||
{ id: 0, field: [ 'data:title' ] }
|
||||
]);
|
||||
|
||||
expect(document.search({
|
||||
query: "title",
|
||||
enrich: true,
|
||||
suggest: true,
|
||||
merge: true
|
||||
}).map(res => res.doc)).to.eql([
|
||||
{ data: { title: data[0].data.title }},
|
||||
{ data: { title: data[1].data.title }},
|
||||
{ data: { title: data[2].data.title }}
|
||||
]);
|
||||
|
||||
expect(document.search({
|
||||
query: "title",
|
||||
tag: { "data:cat": "A" },
|
||||
enrich: true,
|
||||
suggest: true,
|
||||
merge: true
|
||||
}).map(res => res.doc)).to.eql([
|
||||
{ data: { title: data[0].data.title }},
|
||||
{ data: { title: data[2].data.title }}
|
||||
]);
|
||||
});
|
||||
|
||||
it("Using BigInt", function(){
|
||||
|
||||
const document = new Document({
|
||||
document: {
|
||||
store: "data:title",
|
||||
id: "id",
|
||||
field: [
|
||||
"data:title",
|
||||
"data:body"
|
||||
],
|
||||
tag: "data:cat"
|
||||
}
|
||||
});
|
||||
|
||||
for(let i = 0, tmp; i < data.length; i++){
|
||||
tmp = Object.assign({}, data[i], { id: BigInt(i + 1) });
|
||||
document.add(tmp);
|
||||
}
|
||||
|
||||
expect(document.search({
|
||||
query: "title",
|
||||
tag: { "data:cat": "A" },
|
||||
merge: true
|
||||
})).to.eql([
|
||||
{ id: BigInt(1), field: [ "data:title" ] },
|
||||
{ id: BigInt(3), field: [ "data:title" ] }
|
||||
]);
|
||||
});
|
||||
|
||||
it("Custom Document Store", function(){
|
||||
|
||||
const document = new Document({
|
||||
|
Reference in New Issue
Block a user