From fe544b773a7cc17544e3dd88fd18a920b2cedd73 Mon Sep 17 00:00:00 2001 From: Thomas Wilkerling Date: Mon, 23 Apr 2018 09:32:12 +0200 Subject: [PATCH] MOD minor refactoring --- README.md | 2 +- flexsearch.js | 72 ++++++++++++++++++++++++++++++--------------- flexsearch.light.js | 16 +++++----- flexsearch.min.js | 28 +++++++++--------- package.json | 2 +- test/test.js | 2 +- 6 files changed, 73 insertions(+), 49 deletions(-) diff --git a/README.md b/README.md index 569e33a..7ca2dc4 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,7 @@ __Note:__ This feature is actually not enabled by default. ###### TF-IDF / BM25? -> "TF-IDF and all kinds of variations (like BM25) is a big mistake in searching algorithm today. They does'nt provide either: a meaningful relevance of a term as well as the importance of it! Like many pseudo-intelligent algorithm this is also an example of mathematical stupidity." — Thomas Wilkerling, _Contextual-based Scoring_, 2018 +> "TF-IDF and all kinds of variations (like BM25) is a big mistake in searching algorithms today. They don't provide neither: a meaningful relevance of a term nor the importance of it! Like many pseudo-intelligent algorithms this is also just an example of mathematical stupidity." — Thomas Wilkerling, _Contextual-based Scoring_, 2018 #### Web-Worker Support diff --git a/flexsearch.js b/flexsearch.js index 4ef0b01..5e72a12 100644 --- a/flexsearch.js +++ b/flexsearch.js @@ -266,7 +266,7 @@ var SUPPORT_ASYNC = true; //if(options){ - options || (options = defaults); + options || (options = defaults); var custom = options['profile']; var profile = custom ? profiles[custom] : {}; @@ -279,10 +279,10 @@ var SUPPORT_ASYNC = true; options['worker'] = false; - if(SUPPORT_ASYNC){ - - options['async'] = true; - } + // if(SUPPORT_ASYNC){ + // + // options['async'] = true; + // } this._worker = null; } @@ -295,11 +295,14 @@ var SUPPORT_ASYNC = true; self._task_completed = 0; self._task_result = []; self._current_callback = null; + //self._ids_count = new Array(threads); self._worker = new Array(threads); for(var i = 0; i < threads; i++){ - self._worker[i] = add_worker(self.id, i, options || defaults, function(id, query, result, limit){ + //self._ids_count[i] = 0; + + self._worker[i] = add_worker(self.id, i, options /*|| defaults*/, function(id, query, result, limit){ if(self._task_completed === self.worker){ @@ -317,6 +320,7 @@ var SUPPORT_ASYNC = true; if(self._current_callback && (self._task_completed === self.worker)){ // store result to cache + // TODO: add worker cache, may remove global cache if(self.cache){ @@ -326,6 +330,8 @@ var SUPPORT_ASYNC = true; self._current_callback(self._task_result); self._task_result = []; } + + return self; }); } } @@ -433,10 +439,11 @@ var SUPPORT_ASYNC = true; */ this._timer = null; - this._status = true; if(SUPPORT_CACHE) { + this._cache_status = true; + this.cache = custom = ( options['cache'] || @@ -557,7 +564,10 @@ var SUPPORT_ASYNC = true; if(SUPPORT_WORKER && this.worker){ - if(++this._current_task >= this._worker.length) this._current_task = 0; + if(++this._current_task >= this._worker.length){ + + this._current_task = 0; + } this._worker[this._current_task].postMessage(this._current_task, { @@ -568,9 +578,15 @@ var SUPPORT_ASYNC = true; this._ids[id] = "" + this._current_task; + // TODO: improve auto-balancing + //this._ids_count[this._current_task]++; + return this; } + // collect tasks for non-blocking processing + // TODO: actually auto-enabled in worker + if(SUPPORT_ASYNC && this.async){ this._stack[id] || ( @@ -766,7 +782,11 @@ var SUPPORT_ASYNC = true; // update status this._ids[id] = "1"; - this._status = false; + + if(SUPPORT_CACHE){ + + this._cache_status = false; + } } } @@ -801,14 +821,16 @@ var SUPPORT_ASYNC = true; if(SUPPORT_WORKER && this.worker){ - var int = parseInt(this._ids[id], 10); + var current_task = parseInt(this._ids[id], 10); - this._worker[int].postMessage(int, { + this._worker[current_task].postMessage(current_task, { 'remove': true, 'id': id }); + //this._ids_count[current_task]--; + delete this._ids[id]; return this; @@ -844,7 +866,10 @@ var SUPPORT_ASYNC = true; delete this._ids[id]; - this._status = false; + if(SUPPORT_CACHE){ + + this._cache_status = false; + } } return this; @@ -928,27 +953,26 @@ var SUPPORT_ASYNC = true; /** @type {!string|Array} */ var _query = query; - // invalidate cache + if(SUPPORT_CACHE && this.cache){ - if(!this._status){ + // invalidate cache - if(SUPPORT_CACHE && this.cache){ + if(!this._cache_status){ this._cache.reset(); + this._cache_status = true; } - this._status = true; - } + // validate cache - // validate cache + else { - else if(SUPPORT_CACHE && this.cache){ + var cache = this._cache.get(query); - var cache = this._cache.get(query); + if(cache){ - if(cache){ - - return cache; + return cache; + } } } @@ -1153,7 +1177,7 @@ var SUPPORT_ASYNC = true; 'items': items, 'sequences': words, 'chars': chars, - 'status': this._status, + //'status': this._cache_status, 'cache': this._stack_keys.length, 'matcher': global_matcher.length, 'worker': this.worker diff --git a/flexsearch.light.js b/flexsearch.light.js index 4a6eb85..ec1fec6 100644 --- a/flexsearch.light.js +++ b/flexsearch.light.js @@ -7,13 +7,13 @@ 'use strict';(function(d,v,n){var t;(t=n.define)&&t.amd?t([],function(){return v}):(t=n.modules)?t[d.toLowerCase()]=v:"undefined"!==typeof module?module.exports=v:n[d]=v})("FlexSearch",function(){function d(a){"string"===typeof a&&(a=B[a]);a||(a=x);this.id=a.id||G++;this.init(a);v(this,"index",function(){return this.a});v(this,"length",function(){return Object.keys(this.a).length})}function v(a,b,c){Object.defineProperty(a,b,{get:c})}function n(a){return new RegExp(a,"g")}function t(a,b,c){if("undefined"=== typeof c){for(c=0;c=g&&(a=a[h+.5|0],a=a[c]||(a[c]=[]),a[a.length]=e)}return h||b[c]}function A(a,b){if(a)for(var c=Object.keys(a),e=0,f=c.length;e=(8=(8a?1:0a?-1:0b&&(e=e.slice(0,b)));return e}var x={encode:"icase",mode:"forward",f:!1,cache:!1,async:!1,j:!1,threshold:0,depth:0},B= +1;for(var q,d=0,m=1;mb&&(e=e.slice(0,b)));return e}var x={encode:"icase",mode:"forward",f:!1,cache:!1,async:!1,i:!1,threshold:0,depth:0},B= {memory:{encode:"extra",mode:"strict",threshold:7},speed:{encode:"icase",mode:"strict",threshold:7,depth:2},match:{encode:"extra",mode:"full"},score:{encode:"extra",mode:"strict",threshold:5,depth:4},balance:{encode:"balance",mode:"ngram",threshold:6,depth:3},fastest:{encode:"icase",mode:"strict",threshold:9,depth:1}},w=[],G=0,D=n("[ -/]"),E={},F={};d.new=function(a){return new this(a)};d.create=function(a){return d.new(a)};d.registerMatcher=function(a){for(var b in a)a.hasOwnProperty(b)&&(w[w.length]= -n(b),w[w.length]=a[b]);return this};d.registerEncoder=function(a,b){u[a]=b;return this};d.registerLanguage=function(a,b){E[a]=b.filter;F[a]=b.stemmer;return this};d.encode=function(a,b){return u[a].call(u,b)};d.prototype.init=function(a){this.i=[];a||(a=x);var b=a.profile;b=b?B[b]:{};this.mode=a.mode||b.mode||this.mode||x.mode;this.threshold=a.threshold||b.threshold||this.threshold||x.threshold;this.depth=a.depth||b.depth||this.depth||x.depth;this.f=a.suggest||this.f||x.f;this.c=(b=a.encode||b.encode)&& -u[b]||("function"===typeof b?b:this.c||!1);(b=a.matcher)&&this.addMatcher(b);if(b=a.filter){b=E[b]||b;var c=this.c,e={};if(b)for(var f=0;fl;r--)m=d.substring(l,r),z(g,e,m,a,t,n,f);break;default:if(l=z(g,e,d,a,1,n,f),p&&1=f)for(q=e._ctx[d]||(e._ctx[d]={}),d=this.b[d]||(this.b[d]= -[{},{},{},{},{},{},{},{},{},{}]),l=k-p,r=k+p+1,0>l&&(l=0),r>h&&(r=h);lb;b++)A(this.g[b],a);this.depth&&A(this.b,a);delete this.a[a];this.h=!1}return this};d.prototype.search=function(a,b,c){var e=[];if(a&&"object"===typeof a){c=a.callback|| -b;b=a.limit;var f=a.threshold;a=a.query}f=(f||this.threshold||0)|0;"function"===typeof b?(c=b,b=1E3):b||(b=1E3);if(c){var d=this;K(function(){c(d.search(a,b));d=null},1,"search-"+this.id);return null}if(!a||"string"!==typeof a)return e;this.h||(this.h=!0);var g=this.encode(a);if(!g.length)return e;var h=this.mode;g="function"===typeof h?h(g):"ngram"===h?C(g):g.split(D);h=g.length;var k=!0,n=[],q={};if(1=f;A--)if(v=(t?l[m]:this.g)[A][r])w[z++]=v,x=!0;if(x)n[n.length]=1l&&(l=0),r>h&&(r=h);lb;b++)A(this.g[b],a);this.depth&&A(this.b,a);delete this.a[a]}return this};d.prototype.search=function(a,b,c){var e=[];if(a&&"object"===typeof a){c=a.callback||b;b=a.limit;var f= +a.threshold;a=a.query}f=(f||this.threshold||0)|0;"function"===typeof b?(c=b,b=1E3):b||(b=1E3);if(c){var d=this;K(function(){c(d.search(a,b));d=null},1,"search-"+this.id);return null}if(!a||"string"!==typeof a)return e;var g=this.encode(a);if(!g.length)return e;var h=this.mode;g="function"===typeof h?h(g):"ngram"===h?C(g):g.split(D);h=g.length;var k=!0,n=[],q={};if(1=f;A--)if(v=(t?l[m]:this.g)[A][r])w[z++]=v,x=!0;if(x)n[n.length]=1=g&&(a=a[h+.5|0],a=a[e]||(a[e]=[]),a[a.length]=c)}return h||b[e]}function q(a,b){if(a)for(var e=Object.keys(a),c=0,k=e.length;c=(8=(8a?1:0a?-1:0b&&(c=c.slice(0,b)));return c}function F(a){a.B|| (a.B=I(function(){a.B=null;var b=a.async;b&&(a.async=!1);if(a.b.length){for(var e=J(),c;(c=a.b.shift())||0===c;){var d=a.f[c];switch(d[0]){case D.add:a.add(d[1],d[2]);break;case D.remove:a.remove(d[1])}a.f[c]=null;delete a.f[c];if(100=d&&(c.s=c.c),c.w&&c.s===c.c&&(c.cache&&c.i.set(b,c.g),c.w(c.g),c.g=[]))})}this.mode=a.mode||e.mode||this.mode||x.mode;this.async=a.async||this.async||x.async; -this.c=a.worker||this.c||x.c;this.threshold=a.threshold||e.threshold||this.threshold||x.threshold;this.depth=a.depth||e.depth||this.depth||x.depth;this.v=a.suggest||this.v||x.v;this.u=(b=a.encode||e.encode)&&A[b]||("function"===typeof b?b:this.u||!1);this.debug=a.debug||this.debug;(b=a.matcher)&&this.addMatcher(b);if(b=a.filter)this.filter=P(L[b]||b,this.u);if(b=a.stemmer)this.stemmer=Q(M[b]||b,this.u);this.j=[{},{},{},{},{},{},{},{},{},{}];this.l={};this.a={};this.f={};this.b=[];this.B=null;this.o= -!0;this.i=(this.cache=b=a.cache||this.cache||x.cache)?new V(b):!1;return this};f.prototype.encode=function(a){a&&B.length&&(a=y(a,B));a&&this.A.length&&(a=y(a,this.A));a&&this.u&&(a=this.u.call(A,a));a&&this.stemmer&&(a=y(a,this.stemmer));return a};f.prototype.addMatcher=function(a){var b=this.A,e;for(e in a)a.hasOwnProperty(e)&&(b[b.length]=d(e),b[b.length]=a[e]);return this};f.prototype.add=function(a,b,e){if("string"===typeof b&&b&&(a||0===a))if(this.a[a]&&!e)this.update(a,b);else{if(this.c)return++this.m>= +b)};f.prototype.init=function(a){this.A=[];a||(a=y);var b=a.profile,e=b?G[b]:{};if(b=a.worker)if("undefined"===typeof Worker)a.worker=!1,this.h=null;else{var c=this;b=parseInt(b,10)||4;c.m=-1;c.o=0;c.g=[];c.w=null;c.h=Array(b);for(var d=0;d=d&&(c.o=c.c),c.w&&c.o===c.c&&(c.cache&&c.i.set(b,c.g),c.w(c.g),c.g=[]),c})}this.mode=a.mode||e.mode||this.mode||y.mode;this.async=a.async||this.async||y.async; +this.c=a.worker||this.c||y.c;this.threshold=a.threshold||e.threshold||this.threshold||y.threshold;this.depth=a.depth||e.depth||this.depth||y.depth;this.u=a.suggest||this.u||y.u;this.s=(b=a.encode||e.encode)&&A[b]||("function"===typeof b?b:this.s||!1);this.debug=a.debug||this.debug;(b=a.matcher)&&this.addMatcher(b);if(b=a.filter)this.filter=P(L[b]||b,this.s);if(b=a.stemmer)this.stemmer=Q(M[b]||b,this.s);this.j=[{},{},{},{},{},{},{},{},{},{}];this.l={};this.a={};this.f={};this.b=[];this.B=null;this.v= +!0;this.i=(this.cache=b=a.cache||this.cache||y.cache)?new V(b):!1;return this};f.prototype.encode=function(a){a&&B.length&&(a=x(a,B));a&&this.A.length&&(a=x(a,this.A));a&&this.s&&(a=this.s.call(A,a));a&&this.stemmer&&(a=x(a,this.stemmer));return a};f.prototype.addMatcher=function(a){var b=this.A,e;for(e in a)a.hasOwnProperty(e)&&(b[b.length]=d(e),b[b.length]=a[e]);return this};f.prototype.add=function(a,b,e){if("string"===typeof b&&b&&(a||0===a))if(this.a[a]&&!e)this.update(a,b);else{if(this.c)return++this.m>= this.h.length&&(this.m=0),this.h[this.m].postMessage(this.m,{add:!0,id:a,content:b}),this.a[a]=""+this.m,this;if(this.async)return this.f[a]||(this.b[this.b.length]=a),this.f[a]=[D.add,a,b],F(this),this;b=this.encode(b);if(!b.length)return this;e=this.mode;b="function"===typeof e?e(b):"ngram"===e?H(b):b.split(K);for(var c={_ctx:{}},d=this.threshold,t=this.depth,g=this.j,h=b.length,f=0;fl;r--)n=m.substring(l,r),z(g,c,n,a,q,u,d);break;default:if(l=z(g,c,m,a,1,u,d),t&&1=d)for(p=c._ctx[m]||(c._ctx[m]={}),m=this.l[m]||(this.l[m]=[{},{},{},{},{},{},{},{},{},{}]),l=f-t,r=f+t+1,0>l&&(l=0),r>h&&(r=h);lb;b++)q(this.j[b],a);this.depth&&q(this.l,a);delete this.a[a];this.o=!1}return this};f.prototype.search=function(a,b,e){var c=[];if(a&&"object"===typeof a){e=a.callback||b; -b=a.limit;var d=a.threshold;a=a.query}d=(d||this.threshold||0)|0;"function"===typeof b?(e=b,b=1E3):b||(b=1E3);if(this.c){this.w=e;this.s=0;this.g=[];for(c=0;c=d;A--)if(w=(u?l[n]:this.j)[A][r])y[z++]=w,x=!0;if(x)m[m.length]=1g;g++)for(b=Object.keys(this.j[g]),a=0;al;r--)n=m.substring(l,r),z(g,c,n,a,q,u,d);break;default:if(l=z(g,c,m,a,1,u,d),t&&1=d)for(p=c._ctx[m]||(c._ctx[m]={}),m=this.l[m]||(this.l[m]=[{},{},{},{},{},{},{},{},{},{}]),l=f-t,r=f+t+1,0>l&&(l=0),r>h&&(r=h);lb;b++)q(this.j[b],a);this.depth&&q(this.l,a);delete this.a[a];this.v=!1}return this};f.prototype.search=function(a,b,e){var c=[];if(a&&"object"===typeof a){e=a.callback||b; +b=a.limit;var d=a.threshold;a=a.query}d=(d||this.threshold||0)|0;"function"===typeof b?(e=b,b=1E3):b||(b=1E3);if(this.c){this.w=e;this.o=0;this.g=[];for(c=0;c=d;A--)if(w=(u?l[n]:this.j)[A][r])x[z++]=w,y=!0;if(y)m[m.length]=1g;g++)for(b=Object.keys(this.j[g]),a=0;af;c--)h=g[c-1],g[c]=h,d[h]=c;g[f]=a;d[a]=f}}}return b};return a}();return f}(function(){var v={},C=!("undefined"===typeof Blob||"undefined"===typeof URL||!URL.createObjectURL);return function(f,w,d,y,z){var q=f;f=C?URL.createObjectURL(new Blob(["var SUPPORT_WORKER = true;var SUPPORT_BUILTINS = true;var SUPPORT_DEBUG = true;var SUPPORT_CACHE = true;var SUPPORT_ASYNC = true;("+d.toString()+")()"],{type:"text/javascript"})):"../"+q+".js"; -q+="-"+w;v[q]||(v[q]=[]);v[q][z]=new Worker(f);v[q][z].onmessage=y;console.log("Register Worker: "+q+"@"+z);return{postMessage:function(d,f){v[q][d].postMessage(f)}}}}()),this); +this.a,h=f;this.count[g[--f]]<=c&&-1!==f;);f++;if(f!==h){for(c=h;c>f;c--)h=g[c-1],g[c]=h,d[h]=c;g[f]=a;d[a]=f}}}return b};return a}();return f}(function(){var v={},C=!("undefined"===typeof Blob||"undefined"===typeof URL||!URL.createObjectURL);return function(f,w,d,x,z){var q=f;f=C?URL.createObjectURL(new Blob(["var SUPPORT_WORKER = true;var SUPPORT_BUILTINS = true;var SUPPORT_DEBUG = true;var SUPPORT_CACHE = true;var SUPPORT_ASYNC = true;("+d.toString()+")()"],{type:"text/javascript"})):"../"+q+".js"; +q+="-"+w;v[q]||(v[q]=[]);v[q][z]=new Worker(f);v[q][z].onmessage=x;console.log("Register Worker: "+q+"@"+z);return{postMessage:function(d,f){v[q][d].postMessage(f)}}}}()),this); diff --git a/package.json b/package.json index 9c092e1..c714a8a 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "flexsearch", - "version": "0.2.63", + "version": "0.2.65", "description": "Next-Generation full text search library with zero dependencies.", "homepage": "https://github.com/nextapps-de/flexsearch/", "author": "Thomas Wilkerling", diff --git a/test/test.js b/test/test.js index 48ef70e..0bf17f9 100644 --- a/test/test.js +++ b/test/test.js @@ -1126,7 +1126,7 @@ if(env !== 'light'){ 'id', 'chars', - 'status', + //'status', 'cache', 'items', 'matcher',