From 0fa9bcb95d30900630156ddba5e62886e22abab0 Mon Sep 17 00:00:00 2001 From: Thomas Wilkerling Date: Wed, 2 Jun 2021 19:16:28 +0200 Subject: [PATCH] improve scoring calculation --- dist/flexsearch.bundle.js | 45 ++++--- dist/flexsearch.compact.js | 39 +++--- dist/flexsearch.debug.js | 250 +++++++++++++++++++------------------ dist/flexsearch.es5.js | 67 +++++----- dist/flexsearch.light.js | 23 ++-- doc/0.7.0.md | 18 +-- src/index.js | 199 ++++++++++++++++------------- src/webpack.js | 2 + 8 files changed, 334 insertions(+), 309 deletions(-) diff --git a/dist/flexsearch.bundle.js b/dist/flexsearch.bundle.js index 2e0c9b2..3e8c70f 100644 --- a/dist/flexsearch.bundle.js +++ b/dist/flexsearch.bundle.js @@ -6,30 +6,29 @@ * https://github.com/nextapps-de/flexsearch */ (function _f(self){'use strict';try{if(module)self=module}catch(e){}self._factory=_f;var r;function v(a){return"undefined"!==typeof a?a:!0}function w(a){const b=Array(a);for(let c=0;c=this.A&&(m||!u[k])){var f=Math.min(p/d*q|0,q),g="";switch(this.B){case "full":if(3h;n--)g=k.substring(h,n),g.length>=this.A&&M(this,u,g,l,a,c)}break}case "reverse":if(2=this.A&&M(this,u,g,f,a,c);g=""}case "forward":if(1=this.A&&M(this,u,g,f,a,c);break;default:if(M(this,u,k,f,a,c),m&&1=this.A&&!e[k]?(e[k]=1,l=this.F&&k>f,M(this,t,l?f:k,0,a,c,l?k:f)):g=Math.min(g+1,d-q)}}}this.o||(this.register[a]=1)}return this}; +function ea(a){let b="",c="";for(let d=0,e=a.length,f;d=this.A&&(m||!u[k])){var f=df;h--)if(h-f>=this.A){var l=d+e= +this.A&&M(this,u,g,d+e=this.A&&M(this,u,g,f,a,c);break;default:if(M(this,u,k,f,a,c),m&&1=this.A&&!g[k]){g[k]=1;const p=this.F&&k>f;M(this,t,p?f:k,h+d=this.A&&!c[t])if(this.s||f||this.map[t])l[u++]=t,c[t]=1;else return d;a=l;e=a.length}if(!e)return d;b||(b=100);h=this.depth&&1=d)))break;if(p){if(f)return sa(l,d,0);b[b.length]=l;return}}return!c&&l}function sa(a,b,c){a=1===a.length?a[0]:[].concat.apply([],a);return c||a.length>b?a.slice(c,c+b):a} -function ta(a,b,c,d){c?(d=d&&b>c,a=(a=a[d?b:c])&&a[d?c:b]):a=a[b];return a}r.contain=function(a){return!!this.register[a]};r.update=function(a,b){return this.remove(a).add(a,b)}; +r.search=function(a,b,c){D(a)?(c=a,a=c.query):D(b)&&(c=b);let d=[],e;let f,g=0;if(c){b=c.limit;g=c.offset||0;var h=c.context;f=c.suggest}if(a&&(a=this.encode(a),e=a.length,1=this.A&&!c[t])if(this.s||f||this.map[t])l[u++]=t,c[t]=1;else return d;a=l;e=a.length}if(!e)return d;b||(b=100);h=this.depth&&1=d)))break;if(n){if(f)return ta(l,d,0);b[b.length]=l;return}}return!c&&l}function ta(a,b,c){a=1===a.length?a[0]:[].concat.apply([],a);return c||a.length>b?a.slice(c,c+b):a} +function ua(a,b,c,d){c?(d=d&&b>c,a=(a=a[d?b:c])&&a[d?c:b]):a=a[b];return a}r.contain=function(a){return!!this.register[a]};r.update=function(a,b){return this.remove(a).add(a,b)}; r.remove=function(a,b){const c=this.register[a];if(c){if(this.o)for(let d=0,e;db||c)e=e.slice(c,c+b);d&&(e=ya.call(this,e));return{tag:a,result:e}}}function ya(a){const b=Array(a.length);for(let c=0,d;cb||c)e=e.slice(c,c+b);d&&(e=za.call(this,e));return{tag:a,result:e}}}function za(a){const b=Array(a.length);for(let c=0,d;c=this.m&&(m||!t[k])){var f=Math.min(p/e*q|0,q),h="";switch(this.G){case "full":if(3g;n--)h=k.substring(g,n),h.length>=this.m&&Q(this,t,h,l,a,c)}break}case "reverse":if(2=this.m&&Q(this,t,h,f,a,c);h=""}case "forward":if(1=this.m&&Q(this,t,h,f,a,c);break;default:if(Q(this,t,k,f,a,c),m&&1=this.m&&!d[k]?(d[k]=1,l=this.D&&k>f,Q(this,r,l?f:k,0,a,c,l?k:f)):h=Math.min(h+1,e-q)}}}this.B||(this.register[a]=1)}return this}; -function Q(a,b,c,e,d,f,h){let g=h?a.l:a.h;if(!b[c]||h&&!b[c][h])a.o&&(g=g[e]),h?(b=b[c]||(b[c]=z()),b[h]=1,g=g[h]||(g[h]=z())):b[c]=1,g=g[c]||(g[c]=[]),a.o||(g=g[e]||(g[e]=[])),f&&-1!==g.indexOf(d)||(g[g.length]=d,a.B&&(a=a.register[d]||(a.register[d]=[]),a[a.length]=g))} -u.search=function(a,b,c){D(a)?(c=a,a=c.query):D(b)&&(c=b);let e=[],d;let f,h=0;if(c){b=c.limit;h=c.offset||0;var g=c.context;f=c.suggest}if(a&&(a=this.encode(a),d=a.length,1=this.m&&!c[r])if(this.o||f||this.h[r])l[t++]=r,c[r]=1;else return e;a=l;d=a.length}if(!d)return e;b||(b=100);g=this.depth&&1=e)))break;if(p){if(f)return la(l,e,0);b[b.length]=l;return}}return!c&&l}function la(a,b,c){a=1===a.length?a[0]:[].concat.apply([],a);return c||a.length>b?a.slice(c,c+b):a} -function ma(a,b,c,e){c?(e=e&&b>c,a=(a=a[e?b:c])&&a[e?c:b]):a=a[b];return a}u.contain=function(a){return!!this.register[a]};u.update=function(a,b){return this.remove(a).add(a,b)};u.remove=function(a,b){const c=this.register[a];if(c){if(this.B)for(let e=0,d;e=this.o&&(m||!t[k])){var f=ef;g--)if(g-f>=this.o){var l=e+d= +this.o&&Q(this,t,h,e+d=this.o&&Q(this,t,h,f,a,c);break;default:if(Q(this,t,k,f,a,c),m&&1=this.o&&!h[k]){h[k]=1;const p=this.F&&k>f;Q(this,r,p?f:k,g+e=this.o&&!c[r])if(this.s||f||this.h[r])l[t++]=r,c[r]=1;else return e;a=l;d=a.length}if(!d)return e;b||(b=100);g=this.depth&&1=e)))break;if(n){if(f)return ma(l,e,0);b[b.length]=l;return}}return!c&&l}function ma(a,b,c){a=1===a.length?a[0]:[].concat.apply([],a);return c||a.length>b?a.slice(c,c+b):a} +function na(a,b,c,e){c?(e=e&&b>c,a=(a=a[e?b:c])&&a[e?c:b]):a=a[b];return a}u.contain=function(a){return!!this.register[a]};u.update=function(a,b){return this.remove(a).add(a,b)};u.remove=function(a,b){const c=this.register[a];if(c){if(this.C)for(let e=0,d;eb||c)d=d.slice(c,c+b);e&&(d=pa.call(this,d));return{tag:a,result:d}}}function pa(a){const b=Array(a.length);for(let c=0,e;cb||c)d=d.slice(c,c+b);e&&(d=qa.call(this,d));return{tag:a,result:d}}}function qa(a){const b=Array(a.length);for(let c=0,e;c= this.minlength && (m || !r[l])) { - var f = Math.min(n / d * p | 0, p), g = ""; - switch(this.tokenizer) { + if (l && e >= this.minlength && (p || !r[l])) { + var f = d < n ? m : n / d * m | 0, g = ""; + switch(this.tokenize) { case "full": if (3 < e) { - for (var h = 0; h < e; h++) { - var k = h ? Math.min(f / 2 + n / e * h / 2 | 0, f + h) : f; - for (let q = e; q > h; q--) { - g = l.substring(h, q), g.length >= this.minlength && this.push_index(r, g, k, a, c); + for (f = 0; f < e; f++) { + for (var h = e; h > f; h--) { + if (h - f >= this.minlength) { + var k = d + e < n ? m + f : n / (d + e) * (m + f) | 0; + g = l.substring(f, h); + this.push_index(r, g, k, a, c); + } } } break; @@ -315,7 +318,7 @@ M.prototype.add = function(a, b, c, d) { case "reverse": if (2 < e) { for (h = e - 1; 0 < h; h--) { - g = l[h] + g, g.length >= this.minlength && this.push_index(r, g, f, a, c); + g = l[h] + g, g.length >= this.minlength && this.push_index(r, g, d + e < n ? m + h : n / (d + e) * (m + h) | 0, a, c); } g = ""; } @@ -327,9 +330,13 @@ M.prototype.add = function(a, b, c, d) { } break; default: - if (this.push_index(r, l, f, a, c), m && 1 < d && p < d - 1) { - for (e = w(), f = l, g = Math.min(m + 1, d - p), e[f] = 1, h = 1; h < g; h++) { - (l = b[this.rtl ? d - 1 - p - h : p + h]) && l.length >= this.minlength && !e[l] ? (e[l] = 1, k = this.bidirectional && l > f, this.push_index(u, k ? f : l, 0, a, c, k ? l : f)) : g = Math.min(g + 1, d - p); + if (this.push_index(r, l, f, a, c), p && 1 < d && m < d - 1) { + for (e = this.resolution_ctx, g = w(), f = l, h = Math.min(p + 1, d - m), g[f] = 1, k = 1; k < h; k++) { + if ((l = b[this.rtl ? d - 1 - m - k : m + k]) && l.length >= this.minlength && !g[l]) { + g[l] = 1; + const q = this.bidirectional && l > f; + this.push_index(u, q ? f : l, h + d < e ? m + (k - 1) : e / (h + d) * (m + k) | 0, a, c, q ? l : f); + } } } } @@ -376,11 +383,11 @@ M.prototype.search = function(a, b, c) { b || (b = 100); h = this.depth && 1 < e && !1 !== h; c = 0; - let m; - h ? (m = a[0], c = 1) : 1 < e && a.sort(aa); + let p; + h ? (p = a[0], c = 1) : 1 < e && a.sort(aa); for (let n, r; c < e; c++) { r = a[c]; - h ? (n = this.add_result(d, f, b, g, 2 === e, r, m), f && !1 === n && d.length || (m = r)) : n = this.add_result(d, f, b, g, 1 === e, r); + h ? (n = this.add_result(d, f, b, g, 2 === e, r, p), f && !1 === n && d.length || (p = r)) : n = this.add_result(d, f, b, g, 1 === e, r); if (n) { return n; } @@ -395,28 +402,28 @@ M.prototype.search = function(a, b, c) { return d; } if (1 === k) { - return oa(d[0], b, g); + return pa(d[0], b, g); } } } - return ia(d, b, g, f); + return ja(d, b, g, f); }; M.prototype.add_result = function(a, b, c, d, e, f, g) { let h = [], k = g ? this.ctx : this.map; - this.optimize || (k = pa(k, f, g, this.bidirectional)); + this.optimize || (k = qa(k, f, g, this.bidirectional)); if (k) { - let m = 0; + let p = 0; const n = Math.min(k.length, g ? this.resolution_ctx : this.resolution); - for (let r = 0, u = 0, p, l; r < n; r++) { - if (p = k[r]) { - if (this.optimize && (p = pa(p, f, g, this.bidirectional)), d && p && e && (l = p.length, l <= d ? (d -= l, p = null) : (p = p.slice(d), d = 0)), p && (h[m++] = p, e && (u += p.length, u >= c))) { + for (let r = 0, u = 0, m, l; r < n; r++) { + if (m = k[r]) { + if (this.optimize && (m = qa(m, f, g, this.bidirectional)), d && m && e && (l = m.length, l <= d ? (d -= l, m = null) : (m = m.slice(d), d = 0)), m && (h[p++] = m, e && (u += m.length, u >= c))) { break; } } } - if (m) { + if (p) { if (e) { - return oa(h, c, 0); + return pa(h, c, 0); } a[a.length] = h; return; @@ -424,11 +431,11 @@ M.prototype.add_result = function(a, b, c, d, e, f, g) { } return !b && h; }; -function oa(a, b, c) { +function pa(a, b, c) { a = 1 === a.length ? a[0] : [].concat.apply([], a); return c || a.length > b ? a.slice(c, c + b) : a; } -function pa(a, b, c, d) { +function qa(a, b, c, d) { c ? (d = d && b > c, a = (a = a[d ? b : c]) && a[d ? c : b]) : a = a[b]; return a; } @@ -446,14 +453,14 @@ M.prototype.remove = function(a, b) { e = c[d], e.splice(e.indexOf(a), 1); } } else { - O(this.map, a, this.resolution, this.optimize), this.depth && O(this.ctx, a, this.resolution_ctx, this.optimize); + N(this.map, a, this.resolution, this.optimize), this.depth && N(this.ctx, a, this.resolution_ctx, this.optimize); } b || delete this.register[a]; this.cache && this.cache.del(a); } return this; }; -function O(a, b, c, d, e) { +function N(a, b, c, d, e) { let f = 0; if (a.constructor === Array) { if (e) { @@ -462,18 +469,18 @@ function O(a, b, c, d, e) { e = Math.min(a.length, c); for (let g = 0, h; g < e; g++) { if (h = a[g]) { - f = O(h, b, c, d, e), d || f || delete a[g]; + f = N(h, b, c, d, e), d || f || delete a[g]; } } } } else { for (let g in a) { - (f = O(a[g], b, c, d, e)) || delete a[g]; + (f = N(a[g], b, c, d, e)) || delete a[g]; } } return f; } -M.prototype.searchCache = ka; +M.prototype.searchCache = la; M.prototype.export = function(a, b, c, d, e) { let f, g; switch(e || (e = 0)) { @@ -503,7 +510,7 @@ M.prototype.export = function(a, b, c, d, e) { default: return; } - na(a, b || this, c ? c + "." + f : f, d, e, g); + oa(a, b || this, c ? c + "." + f : f, d, e, g); return !0; }; M.prototype.import = function(a, b) { @@ -524,8 +531,8 @@ M.prototype.import = function(a, b) { } } }; -ha(M.prototype); -function qa(a) { +ia(M.prototype); +function ra(a) { a = a.data; var b = self._index; const c = a.args; @@ -543,13 +550,13 @@ function qa(a) { a = a.id, b = b[d].apply(b, c), postMessage("search" === d ? {id:a, msg:b} : {id:a}); } } -;let ra = 0; +;let sa = 0; function P(a) { var b; a ? D(b = a.encode) && (a.encode = b.toString()) : a = {}; (b = (self || window)._factory) && (b = b.toString()); const c = self.exports, d = this; - this.worker = sa(b, c, a.worker); + this.worker = ta(b, c, a.worker); this.resolver = w(); if (this.worker) { if (c) { @@ -580,17 +587,17 @@ function Q(a) { D(d) && (e = d, c.splice(c.length - 1, 1)); d = new Promise(function(f) { setTimeout(function() { - b.resolver[++ra] = f; - b.worker.postMessage({task:a, id:ra, args:c}); + b.resolver[++sa] = f; + b.worker.postMessage({task:a, id:sa, args:c}); }); }); return e ? (d.then(e), this) : d; }; } -function sa(a, b, c) { +function ta(a, b, c) { let d; try { - d = b ? eval('new (require("worker_threads")["Worker"])("../dist/node/node.js")') : a ? new Worker(URL.createObjectURL(new Blob(["onmessage=" + qa.toString()], {type:"text/javascript"}))) : new Worker(y(c) ? c : "worker/worker.js", {type:"module"}); + d = b ? eval('new (require("worker_threads")["Worker"])("../dist/node/node.js")') : a ? new Worker(URL.createObjectURL(new Blob(["onmessage=" + ra.toString()], {type:"text/javascript"}))) : new Worker(y(c) ? c : "worker/worker.js", {type:"module"}); } catch (e) { } return d; @@ -745,20 +752,20 @@ R.prototype.remove = function(a) { }; R.prototype.search = function(a, b, c, d) { A(a) ? (c = a, a = c.query) : A(b) && (c = b, b = 0); - let e = [], f = [], g, h, k, m, n, r, u = 0; + let e = [], f = [], g, h, k, p, n, r, u = 0; if (c) { if (c.constructor === Array) { k = c, c = null; } else { k = (g = c.pluck) || c.index || c.field || c; - m = c.tag; + p = c.tag; h = this.store && c.enrich; n = "and" === c.bool; b = c.limit || 100; r = c.offset || 0; - if (m && (y(m) && (m = [m]), !a)) { - for (let l = 0, q; l < m.length; l++) { - if (q = ta.call(this, m[l], b, r, h)) { + if (p && (y(p) && (p = [p]), !a)) { + for (let l = 0, q; l < p.length; l++) { + if (q = ua.call(this, p[l], b, r, h)) { e[e.length] = q, u++; } } @@ -768,26 +775,26 @@ R.prototype.search = function(a, b, c, d) { } } k || (k = this.field); - n = n && (1 < k.length || m && 1 < m.length); - const p = !d && (this.worker || this.async) && []; + n = n && (1 < k.length || p && 1 < p.length); + const m = !d && (this.worker || this.async) && []; for (let l = 0, q, x, z; l < k.length; l++) { let B; x = k[l]; y(x) || (B = x, x = x.field); - if (p) { - p[l] = this.index[x].searchAsync(a, b, B || c); + if (m) { + m[l] = this.index[x].searchAsync(a, b, B || c); } else { z = (q = d ? d[l] : this.index[x].search(a, b, B || c)) && q.length; - if (m && z) { + if (p && z) { const C = []; let G = 0; n && (C[0] = [q]); - for (let V = 0, ma, N; V < m.length; V++) { - if (ma = m[V], z = (N = this.tagindex[ma]) && N.length) { - G++, C[C.length] = n ? [N] : N; + for (let V = 0, ma, O; V < p.length; V++) { + if (ma = p[V], z = (O = this.tagindex[ma]) && O.length) { + G++, C[C.length] = n ? [O] : O; } } - G && (q = n ? ia(C, b || 100, r || 0) : ja(q, C), z = q.length); + G && (q = n ? ja(C, b || 100, r || 0) : ka(q, C), z = q.length); } if (z) { f[u] = x, e[u++] = q; @@ -798,10 +805,10 @@ R.prototype.search = function(a, b, c, d) { } } } - if (p) { + if (m) { const l = this; return new Promise(function(q) { - Promise.all(p).then(function(x) { + Promise.all(m).then(function(x) { q(l.search(a, b, c, x)); }); }); @@ -814,7 +821,7 @@ R.prototype.search = function(a, b, c, d) { } for (let l = 0, q; l < f.length; l++) { q = e[l]; - q.length && h && (q = ua.call(this, q)); + q.length && h && (q = va.call(this, q)); if (g) { return q; } @@ -822,17 +829,17 @@ R.prototype.search = function(a, b, c, d) { } return e; }; -function ta(a, b, c, d) { +function ua(a, b, c, d) { let e = this.tagindex[a], f = e && e.length - c; if (f && 0 < f) { if (f > b || c) { e = e.slice(c, c + b); } - d && (e = ua.call(this, e)); + d && (e = va.call(this, e)); return {tag:a, result:e}; } } -function ua(a) { +function va(a) { const b = Array(a.length); for (let c = 0, d; c < a.length; c++) { d = a[c], b[c] = {key:d, doc:this.store[d]}; @@ -849,7 +856,7 @@ R.prototype.set = function(a, b) { this.store[a] = b; return this; }; -R.prototype.searchCache = ka; +R.prototype.searchCache = la; R.prototype.export = function(a, b, c, d, e) { e || (e = 0); d || (d = 0); @@ -873,7 +880,7 @@ R.prototype.export = function(a, b, c, d, e) { default: return; } - na(a, this, c, d, e, f); + oa(a, this, c, d, e, f); } }; R.prototype.import = function(a, b) { @@ -900,26 +907,26 @@ R.prototype.import = function(a, b) { } } }; -ha(R.prototype); -var wa = {encode:va, rtl:!1, tokenize:""}; -const xa = /[\W_]+/, ya = F("[\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5]"), za = F("[\u00e8\u00e9\u00ea\u00eb]"), Aa = F("[\u00ec\u00ed\u00ee\u00ef]"), Ba = F("[\u00f2\u00f3\u00f4\u00f5\u00f6\u0151]"), Ca = F("[\u00f9\u00fa\u00fb\u00fc\u0171]"), Da = F("[\u00fd\u0177\u00ff]"), Ea = F("\u00f1"), Fa = F("[\u00e7c]"), Ga = F("\u00df"), Ha = F(" & "), Ia = [ya, "a", za, "e", Aa, "i", Ba, "o", Ca, "u", Da, "y", Ea, "n", Fa, "k", Ga, "s", Ha, " and "]; -function va(a) { - return this.pipeline(E(a).toLowerCase(), !a.normalize && Ia, xa, !1); +ia(R.prototype); +var xa = {encode:wa, rtl:!1, tokenize:""}; +const ya = /[\W_]+/, za = F("[\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5]"), Aa = F("[\u00e8\u00e9\u00ea\u00eb]"), Ba = F("[\u00ec\u00ed\u00ee\u00ef]"), Ca = F("[\u00f2\u00f3\u00f4\u00f5\u00f6\u0151]"), Da = F("[\u00f9\u00fa\u00fb\u00fc\u0171]"), Ea = F("[\u00fd\u0177\u00ff]"), Fa = F("\u00f1"), Ga = F("[\u00e7c]"), Ha = F("\u00df"), Ia = F(" & "), Ja = [za, "a", Aa, "e", Ba, "i", Ca, "o", Da, "u", Ea, "y", Fa, "n", Ga, "k", Ha, "s", Ia, " and "]; +function wa(a) { + return this.pipeline(E(a).toLowerCase(), !a.normalize && Ja, ya, !1); } -;var Ka = {encode:Ja, rtl:!1, tokenize:"strict"}; -const La = /[^a-z0-9]+/, Ma = {b:"p", v:"f", w:"f", z:"s", x:"s", "\u00df":"s", d:"t", n:"m", c:"k", g:"k", j:"k", q:"k", i:"e", y:"e", u:"o"}; -function Ja(a) { - a = va.call(this, a).join(" "); +;var La = {encode:Ka, rtl:!1, tokenize:"strict"}; +const Ma = /[^a-z0-9]+/, Na = {b:"p", v:"f", w:"f", z:"s", x:"s", "\u00df":"s", d:"t", n:"m", c:"k", g:"k", j:"k", q:"k", i:"e", y:"e", u:"o"}; +function Ka(a) { + a = wa.call(this, a).join(" "); const b = []; if (a) { - const c = a.split(La), d = c.length; + const c = a.split(Ma), d = c.length; for (let e = 0, f, g = 0; e < d; e++) { if ((a = c[e]) && (!this.filter || !this.filter[a])) { f = a[0]; - let h = Ma[f] || f, k = h; - for (let m = 1; m < a.length; m++) { - f = a[m]; - const n = Ma[f] || f; + let h = Na[f] || f, k = h; + for (let p = 1; p < a.length; p++) { + f = a[p]; + const n = Na[f] || f; n && n !== k && (h += n, k = n); } b[g++] = h; @@ -928,28 +935,29 @@ function Ja(a) { } return b; } -;var Oa = {encode:Na, rtl:!1, tokenize:""}; -const Pa = F("ae"), Qa = F("oe"), Ra = F("sh"), Sa = F("th"), Ta = F("ph"), Ua = F("pf"), Va = [Pa, "a", Qa, "o", Ra, "s", Sa, "t", Ta, "f", Ua, "f", ]; -function Na(a, b) { - a && (a = Ja.call(this, a).join(" "), 2 < a.length && (a = H(a, Va)), b || (1 < a.length && (a = I(a)), a && (a = a.split(" ")))); +;var Pa = {encode:Oa, rtl:!1, tokenize:""}; +const Qa = F("ae"), Ra = F("oe"), Sa = F("sh"), Ta = F("th"), Ua = F("ph"), Va = F("pf"), Wa = [Qa, "a", Ra, "o", Sa, "s", Ta, "t", Ua, "f", Va, "f", ]; +function Oa(a, b) { + a && (a = Ka.call(this, a).join(" "), 2 < a.length && (a = H(a, Wa)), b || (1 < a.length && (a = I(a)), a && (a = a.split(" ")))); return a; } -;var Xa = {encode:Wa, rtl:!1, tokenize:""}; -const Ya = F("(?!\\b)[aeiouy]"); -function Wa(a) { - a && (a = Na.call(this, a, !0), 1 < a.length && (a = a.replace(Ya, "")), 1 < a.length && (a = I(a)), a && (a = a.split(" "))); +;var Ya = {encode:Xa, rtl:!1, tokenize:""}; +const Za = F("(?!\\b)[aeiouy]"); +function Xa(a) { + a && (a = Oa.call(this, a, !0), 1 < a.length && (a = a.replace(Za, "")), 1 < a.length && (a = I(a)), a && (a = a.split(" "))); return a; } -;J["latin:simple"] = wa; -J["latin:balance"] = Ka; -J["latin:advanced"] = Oa; -J["latin:extra"] = Xa; +;J["latin:default"] = ea; +J["latin:simple"] = xa; +J["latin:balance"] = La; +J["latin:advanced"] = Pa; +J["latin:extra"] = Ya; const X = self; let Y; const Z = {Index:M, Document:R, Worker:P, registerCharset:function(a, b) { J[a] = b; }, registerLanguage:function(a, b) { - fa[a] = b; + ha[a] = b; }}; (Y = X.define) && Y.amd ? Y([], function() { return Z; diff --git a/dist/flexsearch.es5.js b/dist/flexsearch.es5.js index 4073183..356fb1f 100644 --- a/dist/flexsearch.es5.js +++ b/dist/flexsearch.es5.js @@ -7,40 +7,39 @@ */ (function(self){'use strict';var u;function aa(a){var b=0;return function(){return b>>0)+"_",f=0;return b}); +x("Symbol",function(a){function b(g){if(this instanceof b)throw new TypeError("Symbol is not a constructor");return new c(d+(g||"")+"_"+f++,g)}function c(g,e){this.h=g;v(this,"description",{configurable:!0,writable:!0,value:e})}if(a)return a;c.prototype.toString=function(){return this.h};var d="jscomp_symbol_"+(1E9*Math.random()>>>0)+"_",f=0;return b}); x("Symbol.iterator",function(a){if(a)return a;a=Symbol("Symbol.iterator");for(var b="Array Int8Array Uint8Array Uint8ClampedArray Int16Array Uint16Array Int32Array Uint32Array Float32Array Float64Array".split(" "),c=0;c=this.D&&(f||!e[l])){var q=Math.min(h/d*k|0,k),n="";switch(this.m){case "full":if(3p;t--)n=l.substring(p,t),n.length>=this.D&&P(this,e,n,r,a,c);break}case "reverse":if(2= -this.D&&P(this,e,n,q,a,c);n=""}case "forward":if(1=this.D&&P(this,e,n,q,a,c);break;default:if(P(this,e,l,q,a,c),f&&1=this.D&&!m[l]?(m[l]=1,r=this.I&&l>q,P(this,g,r?q:l,0,a,c,r?l:q)):n=Math.min(n+1,d-k)}}}this.s||(this.register[a]=1)}return this}; -function P(a,b,c,d,f,h,e){var g=e?a.h:a.map;if(!b[c]||e&&!b[c][e])a.B&&(g=g[d]),e?(b=b[c]||(b[c]=B()),b[e]=1,g=g[e]||(g[e]=B())):b[c]=1,g=g[c]||(g[c]=[]),a.B||(g=g[d]||(g[d]=[])),h&&-1!==g.indexOf(f)||(g[g.length]=f,a.s&&(a=a.register[f]||(a.register[f]=[]),a[a.length]=g))} -u.search=function(a,b,c){E(a)?(c=a,a=c.query):E(b)&&(c=b);var d=[],f=0;if(c){b=c.limit;f=c.offset||0;var h=c.context;var e=c.suggest}if(a){a=this.encode(a);var g=a.length;if(1=this.D&&!c[q])if(this.B||e||this.map[q])k[m++]=q,c[q]=1;else return d;a=k;g=a.length}}if(!g)return d;b||(b=100);h=this.depth&&1=d)))break;if(m){if(h)return ya(k,d,0);b[b.length]=k;return}}return!c&&k}function ya(a,b,c){a=1===a.length?a[0]:[].concat.apply([],a);return c||a.length>b?a.slice(c,c+b):a} -function za(a,b,c,d){c?(d=d&&b>c,a=(a=a[d?b:c])&&a[d?c:b]):a=a[b];return a}u.contain=function(a){return!!this.register[a]};u.update=function(a,b){return this.remove(a).add(a,b)}; -u.remove=function(a,b){var c=this.register[a];if(c){if(this.s)for(var d=0,f;d=this.D&&(f||!e[l])){var p=dp;q--)if(q-p>=this.D){var r=d+m=this.D&&P(this, +e,n,d+m=this.D&&P(this,e,n,p,a,c);break;default:if(P(this,e,l,p,a,c),f&&1=this.D&&!n[l]){n[l]=1;var t=this.J&&l>p;P(this,h,t?p:l,q+d=this.D&&!c[p])if(this.B||e||this.map[p])k[m++]=p,c[p]=1;else return d;a=k;h=a.length}}if(!h)return d;b||(b=100);g=this.depth&&1=d)))break;if(m){if(g)return za(k,d,0);b[b.length]=k;return}}return!c&&k}function za(a,b,c){a=1===a.length?a[0]:[].concat.apply([],a);return c||a.length>b?a.slice(c,c+b):a} +function Aa(a,b,c,d){c?(d=d&&b>c,a=(a=a[d?b:c])&&a[d?c:b]):a=a[b];return a}u.contain=function(a){return!!this.register[a]};u.update=function(a,b){return this.remove(a).add(a,b)}; +u.remove=function(a,b){var c=this.register[a];if(c){if(this.s)for(var d=0,f;db||c)f=f.slice(c,c+b);d&&(f=Ea.call(this,f));return{tag:a,result:f}}}function Ea(a){for(var b=Array(a.length),c=0,d;cb||c)f=f.slice(c,c+b);d&&(f=Fa.call(this,f));return{tag:a,result:f}}}function Fa(a){for(var b=Array(a.length),c=0,d;c=this.h&&(m||!q[k])){var h=Math.min(n/d*p|0,p),g="";switch(this.C){case "full":if(3e;t--)g=k.substring(e,t),g.length>=this.h&&L(this,q,g,l,a,b)}break}case "reverse":if(2=this.h&&L(this,q,g,h,a,b);g=""}case "forward":if(1=this.h&&L(this,q,g,h,a,b);break;default:if(L(this,q,k,h,a,b),m&&1=this.h&&!f[k]?(f[k]=1,l=this.l&&k>h,L(this,r,l?h:k,0,a,b,l?k:h)):g=Math.min(g+1,d-p)}}}this.s||(this.register[a]=1)}return this}; -function L(a,c,b,d,f,h,g){let e=g?a.m:a.i;if(!c[b]||g&&!c[b][g])a.g&&(e=e[d]),g?(c=c[b]||(c[b]=z()),c[g]=1,e=e[g]||(e[g]=z())):c[b]=1,e=e[b]||(e[b]=[]),a.g||(e=e[d]||(e[d]=[])),h&&-1!==e.indexOf(f)||(e[e.length]=f,a.s&&(a=a.register[f]||(a.register[f]=[]),a[a.length]=e))} -K.prototype.search=function(a,c,b){"object"===typeof a?(b=a,a=b.query):"object"===typeof c&&(b=c);let d=[],f;let h,g=0;if(b){c=b.limit;g=b.offset||0;var e=b.context;h=!1}if(a&&(a=this.encode(a),f=a.length,1=this.h&&!b[r])if(this.g||h||this.i[r])l[q++]=r,b[r]=1;else return d;a=l;f=a.length}if(!f)return d;c||(c=100);e=this.j&&1=d)))break;if(n){if(h)return O(l,d,0);c[c.length]=l;return}}return!b&&l}function O(a,c,b){a=1===a.length?a[0]:[].concat.apply([],a);return b||a.length>c?a.slice(b,b+c):a} -function P(a,c,b,d){b?(d=d&&c>b,a=(a=a[d?c:b])&&a[d?b:c]):a=a[c];return a}K.prototype.contain=function(a){return!!this.register[a]};K.prototype.update=function(a,c){return this.remove(a).add(a,c)};K.prototype.remove=function(a,c){const b=this.register[a];if(b){if(this.s)for(let d=0,f;d=this.h&&(p||!q[k])){var h=dh;f--)if(f-h>=this.h){var l=d+e= +this.h&&L(this,q,g,d+e=this.h&&L(this,q,g,h,a,b);break;default:if(L(this,q,k,h,a,b),p&&1=this.h&&!g[k]){g[k]=1;const t=this.l&&k>h;L(this,r,t?h:k,f+d=this.h&&!b[r])if(this.g||h||this.i[r])l[q++]=r,b[r]=1;else return d;a=l;e=a.length}if(!e)return d;c||(c=100);f=this.j&&1=d)))break;if(m){if(h)return O(l,d,0);c[c.length]=l;return}}return!b&&l}function O(a,c,b){a=1===a.length?a[0]:[].concat.apply([],a);return b||a.length>c?a.slice(b,b+c):a} +function P(a,c,b,d){b?(d=d&&c>b,a=(a=a[d?c:b])&&a[d?b:c]):a=a[c];return a}K.prototype.contain=function(a){return!!this.register[a]};K.prototype.update=function(a,c){return this.remove(a).add(a,c)};K.prototype.remove=function(a,c){const b=this.register[a];if(b){if(this.s)for(let d=0,e;d - optimize: "memory" + optimize: true -7 -1 0 diff --git a/src/index.js b/src/index.js index 16c0dff..50778bc 100644 --- a/src/index.js +++ b/src/index.js @@ -27,7 +27,7 @@ import Cache, { searchCache } from "./cache.js"; import apply_preset from "./preset.js"; import { exportIndex, importIndex } from "./serialize.js"; -registerCharset("latin:default", default_encoder); +//registerCharset("latin:default", default_encoder); /** * @constructor @@ -82,10 +82,10 @@ function Index(options, _register){ this.encode = options["encode"] || (charset && charset.encode) || default_encoder; this.register = _register || create_object(); this.resolution = resolution = options["resolution"] || 9; - this.tokenizer = tmp = (charset && charset.tokenize) || options["tokenize"] || "strict"; + this.tokenize = tmp = (charset && charset.tokenize) || options["tokenize"] || "strict"; this.depth = (tmp === "strict") && context["depth"]; this.bidirectional = parse_option(context["bidirectional"], true); - this.optimize = optimize = options["optimize"] === "memory"; + this.optimize = optimize = parse_option(options["optimize"], true); this.fastupdate = parse_option(options["fastupdate"], true); this.minlength = options["minlength"] || 1; @@ -146,6 +146,8 @@ Index.prototype.add = function(id, content, _append, _skip_update){ // check context dupes to skip all contextual redundancy in the whole document const dupes_ctx = create_object(); + // TODO: stretch the partial score to its full resolution + for(let i = 0; i < length; i++){ let term = content[this.rtl ? length - 1 - i : i]; @@ -154,125 +156,142 @@ Index.prototype.add = function(id, content, _append, _skip_update){ // skip dupes will break the context chain if(term && (term_length >= this.minlength) && (depth || !dupes[term])){ - const score = Math.min((resolution / length * i) | 0, i); + const score = length < resolution ? i : (resolution / length * i) | 0; + let token = ""; - //if(score < resolution){ + switch(this.tokenize){ - let token = ""; + case "full": - switch(this.tokenizer){ + if(term_length > 3){ - case "full": + for(let x = 0; x < term_length; x++){ - if(term_length > 3){ + for(let y = term_length; y > x; y--){ - for(let x = 0; x < term_length; x++){ + if((y - x) >= this.minlength){ - const partial_score = x ? Math.min((score / 2 + resolution / term_length * x / 2) | 0, score + x) : score; + const partial_score = (length + term_length) < resolution ? i + x : (resolution / (length + term_length) * (i + x)) | 0; - //if(partial_score < resolution){ + // console.log("resolution", resolution); + // console.log("length", length); + // console.log("term_length", term_length); + // console.log("i", i); + // console.log((length + term_length) < resolution ? i + x : resolution / (length + term_length) * (i + x)); - for(let y = term_length; y > x; y--){ - - token = term.substring(x, y); - - if(token.length >= this.minlength){ - - this.push_index(dupes, token, partial_score, id, _append); - } - } - //} - } - - break; - } - - // fallthrough to next case when term length < 4 - - case "reverse": - - // skip last round (this token exist already in "forward") - - if(term_length > 2){ - - for(let a = term_length - 1; a > 0; a--){ - - token = term[a] + token; - - if(token.length >= this.minlength){ - - this.push_index(dupes, token, score, id, _append); - } - } - - token = ""; - } - - // fallthrough to next case to apply forward also - - case "forward": - - if(term_length > 1){ - - for(let a = 0; a < term_length; a++){ - - token += term[a]; - - if(token.length >= this.minlength){ - - this.push_index(dupes, token, score, id, _append); + token = term.substring(x, y); + this.push_index(dupes, token, partial_score, id, _append); } } } break; + } - //case "strict": - default: + // fallthrough to next case when term length < 4 - this.push_index(dupes, term, score, id, _append); + case "reverse": - if(depth){ + // skip last round (this token exist already in "forward") - if((length > 1) && (i < (length - 1))){ + if(term_length > 2){ - const resolution = this.resolution_ctx; - // check inner dupes to skip repeating words in the current context - const dupes_inner = create_object(); - const keyword = term; - let size = Math.min(depth + 1, length - i); + for(let x = term_length - 1; x > 0; x--){ - dupes_inner[keyword] = 1; + token = term[x] + token; - for(let x = 1; x < size; x++){ + if(token.length >= this.minlength){ - term = content[this.rtl ? length - 1 - i - x : i + x]; + const partial_score = (length + term_length) < resolution ? i + x : (resolution / (length + term_length) * (i + x)) | 0; - if(term && (term.length >= this.minlength) && !dupes_inner[term]){ + // console.log("token", token); + // console.log("resolution", resolution); + // console.log("length", length); + // console.log("term_length", term_length); + // console.log("i", i); + // console.log((length + term_length) < resolution ? i + x : (resolution / (length + term_length) * (i + x))); - dupes_inner[term] = 1; + this.push_index(dupes, token, partial_score, id, _append); + } + } - const context_score = 0; + token = ""; + } - // const context_score = Math.min(((resolution - size /*+ 1*/) / length * i + x) | 0, i + (x - 1)); - // - // // TODO: this check is not required when calculated properly - // if((context_score >= 0) && (context_score < resolution)){ + // fallthrough to next case to apply forward also - const swap = this.bidirectional && (term > keyword); + case "forward": - this.push_index(dupes_ctx, swap ? keyword : term, context_score, id, _append, swap ? term : keyword); - //} - } - else{ + if(term_length > 1){ - size = Math.min(size + 1, length - i); - } + for(let x = 0; x < term_length; x++){ + + token += term[x]; + + if(token.length >= this.minlength){ + + // console.log("token", token); + // console.log("resolution", resolution); + // console.log("length", length); + // console.log("term_length", term_length); + // console.log("i", i); + // console.log(score); + + this.push_index(dupes, token, score, id, _append); + } + } + } + + break; + + //case "strict": + default: + + // console.log("term", term); + // console.log("resolution", resolution); + // console.log("length", length); + // console.log("i", i); + // console.log(score); + + this.push_index(dupes, term, score, id, _append); + + if(depth){ + + if((length > 1) && (i < (length - 1))){ + + const resolution = this.resolution_ctx; + // check inner dupes to skip repeating words in the current context + const dupes_inner = create_object(); + const keyword = term; + const size = Math.min(depth + 1, length - i); + + dupes_inner[keyword] = 1; + + for(let x = 1; x < size; x++){ + + term = content[this.rtl ? length - 1 - i - x : i + x]; + + if(term && (term.length >= this.minlength) && !dupes_inner[term]){ + + dupes_inner[term] = 1; + + const context_score = (size + length) < resolution ? i + (x - 1) : ((resolution / (size + length) * (i + x)) | 0); + + // console.log("term", term); + // console.log("resolution", resolution); + // console.log("size", size); + // console.log("length", length); + // console.log("i", i); + // console.log("x", x); + // console.log(resolution / (size + length) * (i + x)); + + const swap = this.bidirectional && (term > keyword); + this.push_index(dupes_ctx, swap ? keyword : term, context_score, id, _append, swap ? term : keyword); } } } - } - //} + } + } } } diff --git a/src/webpack.js b/src/webpack.js index 3e0b21a..6951481 100644 --- a/src/webpack.js +++ b/src/webpack.js @@ -3,6 +3,7 @@ import Document from "./document.js"; import Index from "./index.js"; import WorkerIndex from "./worker/index.js"; import { registerCharset, registerLanguage } from "./global.js"; +import charset_default from "./lang/latin/default.js" import charset_simple from "./lang/latin/simple.js" import charset_balance from "./lang/latin/balance.js" import charset_advanced from "./lang/latin/advanced.js" @@ -55,6 +56,7 @@ if(SUPPORT_SERIALIZE){ if(SUPPORT_ENCODER){ + registerCharset("latin:default", charset_default); registerCharset("latin:simple", charset_simple); registerCharset("latin:balance", charset_balance); registerCharset("latin:advanced", charset_advanced);