diff --git a/README.md b/README.md index 9f6d942..c157c35 100644 --- a/README.md +++ b/README.md @@ -891,7 +891,7 @@ Global Members: `Index` / `Worker`-Index Methods: - index.[**add**](#add-text-item-to-an-index)(id, string) -- ~~index.[**append**]()(id, string)~~ +- index.[**append**]()(id, string) - index.[**update**](#update-item-from-an-index)(id, string) - index.[**remove**](#remove-item-from-an-index)(id) - index.[**search**](#search-items)(string, \, \) @@ -918,7 +918,7 @@ Global Members: `Document` Methods: - document.[**add**](doc/document-search.md#addupdateremove-documents)(\, document) -- ~~document.[**append**]()(\, document)~~ +- document.[**append**]()(\, document) - document.[**update**](doc/document-search.md#addupdateremove-documents)(\, document) - document.[**remove**](doc/document-search.md#addupdateremove-documents)(id) - document.[**remove**](doc/document-search.md#addupdateremove-documents)(document) @@ -951,7 +951,7 @@ Global Members: Async Equivalents (Non-Blocking Balanced): - _async_ [**.addAsync**](doc/async.md)( ... , \) -- _async_ ~~[**.appendAsync**](doc/async.md)( ... , \)~~ +- _async_ [**.appendAsync**](doc/async.md)( ... , \) - _async_ [**.updateAsync**](doc/async.md)( ... , \) - _async_ [**.removeAsync**](doc/async.md)( ... , \) - _async_ [**.searchAsync**](doc/async.md)( ... , \) @@ -1439,7 +1439,7 @@ Try to choose the most upper of these tokenizer which covers your requirements: "tolerant" index the full term by also being tolerant against typos like swapped letters and missing letters foobra
foboar
foobr
fooba - 1 + 2(n - 2) + 2(n - 2) + 2 diff --git a/doc/async.md b/doc/async.md index 9358e5f..ca71711 100644 --- a/doc/async.md +++ b/doc/async.md @@ -5,7 +5,7 @@ Those methods of each index type provides an async version: - addAsync() -- ~~appendAsync()~~ +- appendAsync() - updateAsync() - removeAsync() - searchAsync() diff --git a/index.d.ts b/index.d.ts index a3d76bd..004fd00 100644 --- a/index.d.ts +++ b/index.d.ts @@ -231,9 +231,7 @@ declare module "flexsearch" { add(id: Id, content: string): W extends false ? this : Promise; - /** - * @deprecated The method "append" will be removed in an upcoming release, only use "add" instead - */ + append(id: Id, content: string): W extends false ? this : Promise; @@ -295,7 +293,6 @@ declare module "flexsearch" { callback?: AsyncCallback, ): Promise; - /** @deprecated The method "append" will be removed in an upcoming release, just use "add" instead */ appendAsync( id: Id, content: string, @@ -633,11 +630,9 @@ declare module "flexsearch" { ? this : Promise; - /** @deprecated The method "append" will be removed in an upcoming release, just use "add" instead */ append(id: Id, document: D): W extends false ? this : Promise; - /** @deprecated The method "append" will be removed in an upcoming release, just use "add" instead */ append(document: D): W extends false ? this : Promise; @@ -769,13 +764,11 @@ declare module "flexsearch" { callback?: AsyncCallback, ): Promise; - /** @deprecated The method "append" will be removed in an upcoming release, just use "add" instead */ appendAsync( id: Id, document: D, callback?: AsyncCallback, ): Promise; - /** @deprecated The method "append" will be removed in an upcoming release, just use "add" instead */ appendAsync( document: D, callback?: AsyncCallback, diff --git a/src/db/sqlite/index.js b/src/db/sqlite/index.js index fe2dc84..61fd275 100644 --- a/src/db/sqlite/index.js +++ b/src/db/sqlite/index.js @@ -255,10 +255,12 @@ function create_result(rows, resolve, enrich){ } SqliteDB.prototype.get = function(key, ctx, limit = 0, offset = 0, resolve = true, enrich = false, tags){ + let result; let stmt = ''; let params = ctx ? [ctx, key] : [key]; let table = "main." + (ctx ? "ctx" : "map") + this.field; + if(tags){ for(let i = 0; i < tags.length; i+=2){ stmt += ` AND ${ table }.id IN (SELECT id FROM main.tag_${ sanitize(tags[i]) } WHERE tag = ?)`; @@ -303,12 +305,14 @@ SqliteDB.prototype.get = function(key, ctx, limit = 0, offset = 0, resolve = tru params }); } + return result.then(function(rows){ return create_result(rows, resolve, enrich); }); }; SqliteDB.prototype.tag = function(tag, limit = 0, offset = 0, enrich = false){ + const table = "main.tag" + this.field; const promise = this.promisfy({ method: "all", @@ -325,17 +329,25 @@ SqliteDB.prototype.tag = function(tag, limit = 0, offset = 0, enrich = false){ `, params: [tag] }); + enrich || promise.then(function(rows){ return create_result(rows, true, false); }); + return promise; }; function build_params(length, single_param){ + // let stmt = "?"; + // for(let i = 1; i < length; i++){ + // stmt += ",?"; + // } + let stmt = single_param ? ",(?)" : ",?"; + for(let i = 1; i < length;){ if(i <= (length - i)){ stmt += stmt; @@ -346,6 +358,7 @@ function build_params(length, single_param){ break; } } + return stmt.substring(1); } @@ -362,16 +375,9 @@ SqliteDB.prototype.enrich = function(ids){ const chunk = ids.length - count > MAXIMUM_QUERY_VARS ? ids.slice(count, count + MAXIMUM_QUERY_VARS) : count ? ids.slice(count) : ids; + const stmt = build_params(chunk.length); count += chunk.length; - // let stmt = "?"; - // for(let i = 1; i < chunk.length; i++){ - // stmt += ",?"; - // } - - // 10x faster string concatenation - let stmt = build_params(chunk.length); - promises.push(this.promisfy({ method: "all", stmt: `SELECT id, doc FROM main.reg WHERE id IN (${stmt})`, @@ -594,37 +600,29 @@ SqliteDB.prototype.transaction = async function(task, callback){ }); }; -SqliteDB.prototype.commit = async function(flexsearch, _replace, _append){ +SqliteDB.prototype.commit = async function(flexsearch){ - // process cleanup tasks - if(_replace){ - await this.clear(); - // there are just removals in the task queue - flexsearch.commit_task = []; - } - else{ - let tasks = flexsearch.commit_task; - flexsearch.commit_task = []; - for(let i = 0, task; i < tasks.length; i++){ - task = tasks[i]; - // there are just removals in the task queue - if(task.clear){ - await this.clear(); - _replace = true; - break; - } - else{ - tasks[i] = task.del; - } + let tasks = flexsearch.commit_task; + let removals = []; + let inserts = []; + flexsearch.commit_task = []; + + for(let i = 0, task; i < tasks.length; i++){ + task = tasks[i]; + if(typeof task["del"] !== "undefined"){ + removals.push(task["del"]); } - if(!_replace){ - if(!_append){ - tasks = tasks.concat(toArray(flexsearch.reg)); - } - tasks.length && await this.remove(tasks); + else if(typeof task["ins"] !== "undefined"){ + inserts.push(task["ins"]); } } + if(removals.length){ + await this.remove(removals); + } + // if(inserts_map.length || inserts_ctx.length){ + // await this.insert(inserts_map, inserts_ctx); + // } if(!flexsearch.reg.size){ return; } @@ -645,7 +643,11 @@ SqliteDB.prototype.commit = async function(flexsearch, _replace, _append){ params.push(key, i, ids[j]); // maximum count of variables supported if((j === ids.length - 1) || (params.length + 3 > MAXIMUM_QUERY_VARS)){ - this.db.run("INSERT INTO main.map" + this.field + " (key, res, id) VALUES " + stmt, params); + this.db.run( + "INSERT INTO main.map" + this.field + " (key, res, id) VALUES " + stmt + // " ON CONFLICT DO " + + // "UPDATE main.map" + this.field + " SET key = '', res = 0 WHERE key = ? AND res > ? AND id = ?" + , params); stmt = ""; params = []; } @@ -700,13 +702,13 @@ SqliteDB.prototype.commit = async function(flexsearch, _replace, _append){ : doc || null ); if(chunk.length + 2 > MAXIMUM_QUERY_VARS){ - this.db.run("INSERT INTO main.reg (id, doc) VALUES " + stmt, chunk); + this.db.run("INSERT INTO main.reg (id, doc) VALUES " + stmt + " ON CONFLICT DO NOTHING", chunk); stmt = ""; chunk = []; } } if(chunk.length){ - this.db.run("INSERT INTO main.reg (id, doc) VALUES " + stmt, chunk); + this.db.run("INSERT INTO main.reg (id, doc) VALUES " + stmt + " ON CONFLICT DO NOTHING", chunk); } } else if(!flexsearch.bypass){ @@ -717,7 +719,7 @@ SqliteDB.prototype.commit = async function(flexsearch, _replace, _append){ : count ? ids.slice(count) : ids; count += chunk.length; const stmt = build_params(chunk.length, /* single param */ true); - this.db.run("INSERT INTO main.reg (id) VALUES " + stmt, chunk); + this.db.run("INSERT INTO main.reg (id) VALUES " + stmt + " ON CONFLICT DO NOTHING", chunk); } } //}); @@ -746,6 +748,10 @@ SqliteDB.prototype.commit = async function(flexsearch, _replace, _append){ } }); + if(inserts.length){ + await this.cleanup(); + } + // TODO //await this.transaction(function(){ // this.db.run("INSERT INTO main.cfg" + this.field + " (cfg) VALUES (?)", [JSON.stringify({ @@ -802,6 +808,34 @@ SqliteDB.prototype.remove = function(ids){ }); }; +SqliteDB.prototype.cleanup = function(){ + return this.transaction(function(){ + this.db.run( + "DELETE FROM main.map" + this.field + " " + + "WHERE ROWID IN (" + + "SELECT ROWID FROM (" + + "SELECT ROWID, row_number() OVER dupes AS count " + + "FROM main.map" + this.field + " _t " + + "WINDOW dupes AS (PARTITION BY id, key ORDER BY res) " + + ") " + + "WHERE count > 1" + + ")" + + ); + this.db.run( + "DELETE FROM main.ctx" + this.field + " " + + "WHERE ROWID IN (" + + "SELECT ROWID FROM (" + + "SELECT ROWID, row_number() OVER dupes AS count " + + "FROM main.ctx" + this.field + " _t " + + "WINDOW dupes AS (PARTITION BY id, ctx, key ORDER BY res) " + + ") " + + "WHERE count > 1" + + ")" + ); + }); +}; + SqliteDB.prototype.promisfy = function(opt){ const db = this.db; return new Promise(function(resolve, reject){ diff --git a/src/document/add.js b/src/document/add.js index cfdddbf..d17f356 100644 --- a/src/document/add.js +++ b/src/document/add.js @@ -42,7 +42,7 @@ Document.prototype.add = function(id, content, _append){ if(typeof tree === "function"){ const tmp = tree(content); if(tmp){ - index.add(id, tmp, /* append: */ false, /* skip update: */ true); + index.add(id, tmp, /* append: */ _append, /* skip update: */ true); } } else{ @@ -225,7 +225,7 @@ function add_index(obj, tree, marker, pos, index, id, key, _append){ if((obj = obj[key])){ - // reached target field + // reached the target field if(pos === (tree.length - 1)){ // handle target value diff --git a/src/document/search.js b/src/document/search.js index b379ea3..a483cdd 100644 --- a/src/document/search.js +++ b/src/document/search.js @@ -318,12 +318,23 @@ Document.prototype.search = function(query, limit, options, _promises){ } } + // TODO merge sorted by score + // TODO add score property to results + // if(merge){ + // opt.resolve = false; + // } + res = cache ? index.searchCache(query, limit, opt) : index.search(query, limit, opt); // restore state - opt_enrich && (opt.enrich = opt_enrich); + // if(merge){ + // opt.resolve = resolve; + // } + if(opt_enrich) { + opt.enrich = opt_enrich; + } if(promises){ promises[i] = res; diff --git a/src/index.js b/src/index.js index 034d2d1..1b7e473 100644 --- a/src/index.js +++ b/src/index.js @@ -154,12 +154,12 @@ if(SUPPORT_PERSISTENT){ } return db.mount(this); }; - Index.prototype.commit = function(replace, append){ + Index.prototype.commit = function(replace){ if(this.commit_timer){ clearTimeout(this.commit_timer); this.commit_timer = null; } - return this.db.commit(this, replace, append); + return this.db.commit(this, replace); }; Index.prototype.destroy = function(){ if(this.commit_timer){ diff --git a/src/index/add.js b/src/index/add.js index 68bdb63..29bfc7b 100644 --- a/src/index/add.js +++ b/src/index/add.js @@ -8,7 +8,7 @@ import { import { create_object } from "../common.js"; import Index, { autoCommit } from "../index.js"; import default_compress from "../compress.js"; -import { KeystoreArray } from "../keystore.js"; +import { KeystoreArray, KeystoreMap } from "../keystore.js"; // TODO: // string + number as text @@ -21,7 +21,6 @@ import { KeystoreArray } from "../keystore.js"; * @param {boolean=} _append * @param {boolean=} _skip_update */ - Index.prototype.add = function(id, content, _append, _skip_update){ if(content && (id || (id === 0))){ @@ -39,9 +38,9 @@ Index.prototype.add = function(id, content, _append, _skip_update){ // do not force a string as input // https://github.com/nextapps-de/flexsearch/issues/432 content = this.encoder.encode(content, !depth); - const word_length = content.length; + const term_count = content.length; - if(word_length){ + if(term_count){ // check context dupes to skip all contextual redundancy along a document @@ -49,9 +48,9 @@ Index.prototype.add = function(id, content, _append, _skip_update){ const dupes = create_object(); const resolution = this.resolution; - for(let i = 0; i < word_length; i++){ + for(let i = 0; i < term_count; i++){ - let term = content[this.rtl ? word_length - 1 - i : i]; + let term = content[this.rtl ? term_count - 1 - i : i]; let term_length = term.length; // skip dupes will break the context chain @@ -59,7 +58,7 @@ Index.prototype.add = function(id, content, _append, _skip_update){ let score = this.score ? this.score(content, term, i, null, 0) - : get_score(resolution, word_length, i); + : get_score(resolution, term_count, i); let token = ""; switch(this.tokenize){ @@ -70,22 +69,25 @@ Index.prototype.add = function(id, content, _append, _skip_update){ // A[CB]DE A[]CDE // AB[DC]E AB[]DE // ABC[ED] ABC[]E + // ABCD[] case "tolerant": - this.push_index(dupes, term, score, id, _append); + this._push_index(dupes, term, score, id, _append); if(term_length > 2){ for(let x = 1, char_a, char_b, prt_1, prt_2; x < term_length - 1; x++){ char_a = term.charAt(x); char_b = term.charAt(x + 1); prt_1 = term.substring(0, x) + char_b; prt_2 = term.substring(x + 2); - // swap letters + // swapped letters token = prt_1 /*+ char_b*/ + char_a + prt_2; - dupes[token] || this.push_index(dupes, token, score, id, _append); - // remove letters + this._push_index(dupes, token, score, id, _append); + // missing letters token = prt_1 /*+ char_b*/ + prt_2; - dupes[token] || this.push_index(dupes, token, score, id, _append); + this._push_index(dupes, token, score, id, _append); } + // missing last letter + this._push_index(dupes, term.substring(0, term.length - 1), score, id, _append); } break; @@ -94,13 +96,11 @@ Index.prototype.add = function(id, content, _append, _skip_update){ for(let x = 0, _x; x < term_length; x++){ for(let y = term_length; y > x; y--){ token = term.substring(x, y); - if(!dupes[token]){ - _x = this.rtl ? term_length - 1 - x : x; - const partial_score = this.score - ? this.score(content, term, i, token, _x) - : get_score(resolution, word_length, i, term_length, _x); - this.push_index(dupes, token, partial_score, id, _append); - } + _x = this.rtl ? term_length - 1 - x : x; + const partial_score = this.score + ? this.score(content, term, i, token, _x) + : get_score(resolution, term_count, i, term_length, _x); + this._push_index(dupes, token, partial_score, id, _append); } } break; @@ -117,12 +117,10 @@ Index.prototype.add = function(id, content, _append, _skip_update){ ? term_length - 1 - x : x ] + token; - if(!dupes[token]){ - const partial_score = this.score - ? this.score(content, term, i, token, x) - : get_score(resolution, word_length, i, term_length, x); - this.push_index(dupes, token, partial_score, id, _append); - } + const partial_score = this.score + ? this.score(content, term, i, token, x) + : get_score(resolution, term_count, i, term_length, x); + this._push_index(dupes, token, partial_score, id, _append); } token = ""; } @@ -136,57 +134,47 @@ Index.prototype.add = function(id, content, _append, _skip_update){ ? term_length - 1 - x : x ]; - dupes[token] || this.push_index(dupes, token, score, id, _append); + this._push_index(dupes, token, score, id, _append); } break; } // fallthrough to next case when token has a length of 1 default: // "strict": - this.push_index(dupes, term, score, id, _append); + this._push_index(dupes, term, score, id, _append); + // context is just supported by tokenizer "strict" - if(depth){ + if(depth && (term_count > 1) && (i < (term_count - 1))){ - if((word_length > 1) && (i < (word_length - 1))){ + const resolution = this.resolution_ctx; + const keyword = term; + const size = Math.min( + depth + 1, + this.rtl + ? i + 1 + : term_count - i + ); - // check inner dupes to skip repeating words in the current context - const dupes_inner = create_object(); - const resolution = this.resolution_ctx; - const keyword = term; - const size = Math.min( - depth + 1, + for(let x = 1; x < size; x++){ + + term = content[ this.rtl - ? i + 1 - : word_length - i + ? term_count - 1 - i - x + : i + x + ]; + + const swap = this.bidirectional && (term > keyword); + const context_score = this.score + ? this.score(content, keyword, i, term, x - 1) + : get_score(resolution + ((term_count / 2) > resolution ? 0 : 1), term_count, i, size - 1, x - 1); + this._push_index( + dupes_ctx, + swap ? keyword : term, + context_score, + id, + _append, + swap ? term : keyword ); - - dupes_inner[keyword] = 1; - - for(let x = 1; x < size; x++){ - - term = content[ - this.rtl - ? word_length - 1 - i - x - : i + x - ]; - - if(term && !dupes_inner[term]){ - - dupes_inner[term] = 1; - const context_score = this.score - ? this.score(content, keyword, i, term, x - 1) - : get_score(resolution + ((word_length / 2) > resolution ? 0 : 1), word_length, i, size - 1, x - 1); - const swap = this.bidirectional && (term > keyword); - this.push_index( - dupes_ctx, - swap ? keyword : term, - context_score, - id, - _append, - swap ? term : keyword - ); - } - } } } } @@ -195,15 +183,10 @@ Index.prototype.add = function(id, content, _append, _skip_update){ this.fastupdate || this.reg.add(id); } - else{ - content = ""; - } } if(SUPPORT_PERSISTENT && this.db){ - // when the term has no valid content (e.g., empty), - // then it was not added to the ID registry for removal - content || this.commit_task.push({ "del": id }); + this.commit_task.push(_append ? { "ins": id } : { "del": id }); this.commit_auto && autoCommit(this); } @@ -219,29 +202,34 @@ Index.prototype.add = function(id, content, _append, _skip_update){ * @param {boolean=} append * @param {string=} keyword */ +Index.prototype._push_index = function(dupes, term, score, id, append, keyword){ -Index.prototype.push_index = function(dupes, term, score, id, append, keyword){ + let res, arr; - let arr = keyword ? this.ctx : this.map; - let tmp; - - if(!dupes[term] || (keyword && !(tmp = dupes[term])[keyword])){ + if(!(res = dupes[term]) || (keyword && !res[keyword])){ if(keyword){ - dupes = tmp || (dupes[term] = create_object()); + dupes = res || (dupes[term] = create_object()); dupes[keyword] = 1; if(SUPPORT_COMPRESSION && this.compress){ keyword = default_compress(keyword); } - tmp = arr.get(keyword); - tmp ? arr = tmp - : arr.set(keyword, arr = new Map()); + arr = this.ctx; + res = arr.get(keyword); + res ? arr = res + : arr.set( + keyword, + arr = SUPPORT_KEYSTORE && this.keystore + ? new KeystoreMap(this.keystore) + : new Map() + ); } else{ + arr = this.map; dupes[term] = 1; } @@ -249,37 +237,56 @@ Index.prototype.push_index = function(dupes, term, score, id, append, keyword){ term = default_compress(term); } - tmp = arr.get(term); - tmp ? arr = tmp : arr.set(term, arr = tmp = []); - // the ID array will be upgraded dynamically - arr = arr[score] || (arr[score] = []); + res = arr.get(term); + res ? arr = res + : arr.set(term, arr = res = []); - if(!append || !arr.includes(id)){ - - // auto-upgrade to a keystore array if max size exceeded - if(SUPPORT_KEYSTORE){ - if(arr.length === 2**31-1 /*|| !(arr instanceof KeystoreArray)*/){ - const keystore = new KeystoreArray(arr); - if(this.fastupdate){ - for(let value of this.reg.values()){ - if(value.includes(arr)){ - value[value.indexOf(arr)] = keystore; - } + if(append){ + for(let i = 0, arr; i < res.length; i++){ + arr = res[i]; + if(arr && arr.includes(id)){ + if(i <= score){ + // already included in the right slot + return; + } + else{ + // update slot + arr.splice(arr.indexOf(id), 1); + if(this.fastupdate){ + const tmp = this.reg.get(id); + tmp && tmp.splice(tmp.indexOf(arr), 1); } } - tmp[score] = arr = keystore; + break; } } + } - arr.push(id); + // the ID keystore array will upgrade automatically + arr = arr[score] || (arr[score] = []); + arr.push(id); - // add a reference to the register for fast updates - if(this.fastupdate){ - const tmp = this.reg.get(id); - tmp ? tmp.push(arr) - : this.reg.set(id, [arr]); + // auto-upgrade to a keystore array if max size exceeded + if(SUPPORT_KEYSTORE){ + if(arr.length === 2**31-1 /*|| !(arr instanceof KeystoreArray)*/){ + const keystore = new KeystoreArray(arr); + if(this.fastupdate){ + for(let value of this.reg.values()){ + if(value.includes(arr)){ + value[value.indexOf(arr)] = keystore; + } + } + } + res[score] = arr = keystore; } } + + // add a reference to the register for fast updates + if(this.fastupdate){ + const tmp = this.reg.get(id); + tmp ? tmp.push(arr) + : this.reg.set(id, [arr]); + } } } @@ -291,7 +298,6 @@ Index.prototype.push_index = function(dupes, term, score, id, append, keyword){ * @param {number=} x * @returns {number} */ - function get_score(resolution, length, i, term_length, x){ // console.log("resolution", resolution); diff --git a/src/index/search.js b/src/index/search.js index 90b59e6..f8db4e3 100644 --- a/src/index/search.js +++ b/src/index/search.js @@ -112,112 +112,20 @@ Index.prototype.search = function(query, limit, options){ ); } - // let maxlength = 0; - // let minlength = 0; - // - // if(length > 1){ - // - // // term deduplication will break the context chain - // // todo add context to dupe check - // const dupes = create_object(); - // const query_new = []; - // - // // if(context){ - // // keyword = query_terms[0]; - // // dupes[keyword] = 1; - // // query_new.push(keyword); - // // maxlength = minlength = keyword.length; - // // i = 1; - // // } - // - // for(let i = 0, term; i < length; i++){ - // - // term = query_terms[i]; - // - // if(term && !dupes[term]){ - // - // // todo add keyword check - // // this fast path can't apply to persistent indexes - // if(!suggest && !(SUPPORT_PERSISTENT && this.db) && !this.get_array(term/*, keyword*/)){ - // - // // fast path "not found" - // return !SUPPORT_RESOLVER || resolve - // ? result - // : new Resolver(result); - // } - // else{ - // - // query_new.push(term); - // dupes[term] = 1; - // } - // - // const term_length = term.length; - // maxlength = Math.max(maxlength, term_length); - // minlength = minlength ? Math.min(minlength, term_length) : term_length; - // } - // // else if(term && (!this.depth || context === false)){ - // // query_new.push(term); - // // } - // } - // - // query_terms = query_new; - // length = query_terms.length; - // } - // - // // the term length could be changed after deduplication - // - // if(!length){ - // return !SUPPORT_RESOLVER || resolve - // ? result - // : new Resolver(result); - // } - // - // // fast path single term - // if(length === 1){ - // return single_term_query.call( - // this, - // query_terms[0], // term - // "", // ctx - // limit, - // offset, - // resolve, - // enrich, - // tag - // ); - // } - // - // // fast path single context - // if(length === 2 && context && !suggest){ - // return single_term_query.call( - // this, - // query_terms[0], // term - // query_terms[1], // ctx - // limit, - // offset, - // resolve, - // enrich, - // tag - // ); - // } - let dupes = create_object(); let index = 0, keyword; - //if(length > 1){ - if(context){ - // start with context right away - keyword = query_terms[0]; - index = 1; - } - // todo - // else if(maxlength > 9 && (maxlength / minlength) > 3){ - // // sorting terms will break the context chain - // // bigger terms has less occurrence - // // this might also reduce the intersection task - // // todo check intersection order - // query_terms.sort(sort_by_length_down); - // } - //} + if(context){ + // start with context right away + keyword = query_terms[0]; + index = 1; + } + // else { + // // sorting terms will break the context chain + // // bigger terms have a less occurrence + // // this might reduce the intersection task + // query_terms.sort(sort_by_length_down); + // } if(!resolution && resolution !== 0){ resolution = keyword @@ -245,7 +153,7 @@ Index.prototype.search = function(query, limit, options){ if(term && !dupes[term]){ dupes[term] = 1; - arr = await self.get_array(term, keyword, 0, 0, false, false); + arr = await self._get_array(term, keyword, 0, 0, false, false); arr = add_result(arr, /** @type {Array} */ (result), suggest, resolution); if(arr){ @@ -295,7 +203,7 @@ Index.prototype.search = function(query, limit, options){ if(term && !dupes[term]){ dupes[term] = 1; - arr = this.get_array(term, keyword, 0, 0, false, false); + arr = this._get_array(term, keyword, 0, 0, false, false); arr = add_result(arr, /** @type {Array} */ (result), suggest, resolution); if(arr){ @@ -397,7 +305,7 @@ function return_result(result, resolution, limit, offset, suggest, boost, resolv function single_term_query(term, keyword, limit, offset, resolve, enrich, tag){ - const result = this.get_array( + const result = this._get_array( term, keyword, limit, @@ -483,7 +391,7 @@ function add_result(arr, result, suggest, resolution){ * Promise * } */ -Index.prototype.get_array = function(term, keyword, limit, offset, resolve, enrich, tag){ +Index.prototype._get_array = function(term, keyword, limit, offset, resolve, enrich, tag){ let arr, swap; diff --git a/test/basic.js b/test/basic.js index d39e843..2583176 100644 --- a/test/basic.js +++ b/test/basic.js @@ -10,9 +10,6 @@ const build_compact = env && env.includes("compact"); const build_esm = !env || env.startsWith("module"); const Charset = _Charset || (await import("../src/charset.js")).default; -// global.FlexSearch = { Index, Document, Worker, Charset, Encoder, Resolver }; -// global.build = { build_light, build_compact, build_esm }; - describe("Initialize", function(){ const index = new Index(); @@ -157,6 +154,108 @@ describe("Add", function(){ }); }); +describe("Append", function(){ + + it("Should have been properly appended to the index", function(){ + + const index = new Index(); + + index.append(0, "foo"); + index.append(2, "bar"); + index.append(1, "FooBar"); + index.append(3, "Some 'short' content."); + + expect(Array.from(index.reg.keys())).to.have.members([0, 1, 2, 3]); + expect(Array.from(index.map.keys())).to.have.members(["fo", "bar", "fobar", "some", "short", "content"]); + expect(index.ctx.size).to.equal(0); + expect(index.reg.size).to.equal(4); + }); + + it("Should have been properly appended by score", function(){ + + let index = new Index(); + index.add(0, "foo A B C D E F bar"); + index.add(1, "foo A B C bar D E F"); + index.add(2, "foo bar A B C D E F"); + + expect(index.search("foo")).to.eql([0, 1, 2]); + expect(index.search("bar")).to.eql([2, 1, 0]); + expect(index.reg.size).to.equal(3); + expect(index.map.size).to.equal(8); + expect(index.ctx.size).to.equal(0); + + index.append(0, "bar"); + index.append(0, "foo"); + index.append(0, "bar"); + + expect(index.search("foo")).to.eql([0, 1, 2]); + expect(index.search("bar")).to.eql([0, 2, 1]); + expect(index.reg.size).to.equal(3); + expect(index.map.size).to.equal(8); + expect(index.ctx.size).to.equal(0); + + index.remove(2).remove(1).remove(0); + expect(index.reg.size).to.equal(0); + expect(index.map.size).to.equal(0); + expect(index.ctx.size).to.equal(0); + + index = new Index({ context: true }); + index.add(0, "foo A B C D E F bar"); + index.add(1, "foo A B C bar D E F"); + index.add(2, "foo bar A B C D E F"); + + expect(index.search("foo")).to.eql([0, 1, 2]); + expect(index.search("bar")).to.eql([2, 1, 0]); + expect(index.reg.size).to.equal(3); + expect(index.map.size).to.equal(8); + expect(index.ctx.size).to.equal(7); + + index.append(0, "bar"); + index.append(0, "foo"); + index.append(0, "bar"); + + expect(index.search("foo")).to.eql([0, 1, 2]); + expect(index.search("bar")).to.eql([0, 2, 1]); + expect(index.reg.size).to.equal(3); + expect(index.map.size).to.equal(8); + expect(index.ctx.size).to.equal(7); + + index.remove(2).remove(1).remove(0); + expect(index.reg.size).to.equal(0); + expect(index.map.size).to.equal(0); + expect(index.ctx.size).to.equal(0); + }); + + it("Should not have been appended to the index (Parameter)", function(){ + + const index = new Index(); + + index.append("foo"); + index.append(3); + index.append(null, "foobar"); + index.append(void 0, "foobar"); + index.append(3, null); + index.append(3, false); + + expect(index.reg.size).to.equal(0); + }); + + it("Should not have been appended to the index (Empty)", function(){ + + const index = new Index(); + + index.append(1, ""); + index.append(2, " "); + index.append(3, " "); + index.append(4, " - "); + index.append(5, ` ... + - : , + <-- `); + + expect(index.reg.size).to.equal(0); + }); +}); + describe("Search (Sync)", function(){ it("Should have been matched properly", function(){ diff --git a/test/document.js b/test/document.js index 8e9fd4b..0bbc696 100644 --- a/test/document.js +++ b/test/document.js @@ -14,13 +14,13 @@ if(!build_light) describe("Document (Multi-Field Search)", function(){ const data = [{ id: 2, - data: { title: "Title 3", body: "Body 3" } + data: { title: "Title 3", body: "Body 3", cat: "A" } },{ id: 1, - data: { title: "Title 2", body: "Body 2" } + data: { title: "Title 2", body: "Body 2", cat: "B" } },{ id: 0, - data: { title: "Title 1", body: "Body 1" } + data: { title: "Title 1", body: "Body 1", cat: "A" } }]; const update = [{ @@ -404,6 +404,89 @@ if(!build_light) describe("Document (Multi-Field Search)", function(){ })).to.eql([]); }); + it("Merge Results", function(){ + + const document = new Document({ + document: { + store: [ + "data:title" + ], + id: "id", + field: [ + "data:title", + "data:body" + ], + tag: "data:cat" + } + }); + + for(let i = 0; i < data.length; i++){ + document.add(data[i]); + } + + expect(document.search({ + query: "title", + enrich: false, + suggest: true, + merge: true + })).to.eql([ + { id: 2, field: [ 'data:title' ] }, + { id: 1, field: [ 'data:title' ] }, + { id: 0, field: [ 'data:title' ] } + ]); + + expect(document.search({ + query: "title", + enrich: true, + suggest: true, + merge: true + }).map(res => res.doc)).to.eql([ + { data: { title: data[0].data.title }}, + { data: { title: data[1].data.title }}, + { data: { title: data[2].data.title }} + ]); + + expect(document.search({ + query: "title", + tag: { "data:cat": "A" }, + enrich: true, + suggest: true, + merge: true + }).map(res => res.doc)).to.eql([ + { data: { title: data[0].data.title }}, + { data: { title: data[2].data.title }} + ]); + }); + + it("Using BigInt", function(){ + + const document = new Document({ + document: { + store: "data:title", + id: "id", + field: [ + "data:title", + "data:body" + ], + tag: "data:cat" + } + }); + + for(let i = 0, tmp; i < data.length; i++){ + tmp = Object.assign({}, data[i], { id: BigInt(i + 1) }); + document.add(tmp); + } + + expect(document.search({ + query: "title", + tag: { "data:cat": "A" }, + merge: true + })).to.eql([ + { id: BigInt(1), field: [ "data:title" ] }, + { id: BigInt(3), field: [ "data:title" ] } + ]); + }); + it("Custom Document Store", function(){ const document = new Document({