diff --git a/README.md b/README.md index 4575c71..ade75bb 100644 --- a/README.md +++ b/README.md @@ -828,11 +828,11 @@ The required memory for the index depends on several options: Contextual Index - Multiplied with: + Multiply the sum above with: - * 2(depth + 1) + * (depth * 2 + 1) diff --git a/flexsearch.js b/flexsearch.js index 2e6de68..40ed1e8 100644 --- a/flexsearch.js +++ b/flexsearch.js @@ -3,7 +3,7 @@ * ---------------------------------------------------------- * @author: Thomas Wilkerling * @preserve https://github.com/nextapps-de/flexsearch - * @version: 0.2.11 + * @version: 0.2.2 * @license: Apache 2.0 Licence */ @@ -928,9 +928,9 @@ if(this.depth){ var use_contextual = true; - var key = words[0]; + var ctx_root = words[0]; - check_words[key] = "1"; + check_words[ctx_root] = "1"; } else{ @@ -942,7 +942,7 @@ var ctx_map; - if(!use_contextual || (ctx_map = this._map[10])[key]){ + if(!use_contextual || (ctx_map = this._map[10])[ctx_root]){ for(var a = use_contextual ? 1 : 0; a < length; a++){ @@ -961,10 +961,11 @@ use_contextual ? - ctx_map[key][z][value] + ctx_map[ctx_root] : - this._map[z][value] - ); + this._map + + )[z][value]; if(map){ @@ -995,7 +996,7 @@ check_words[value] = "1"; } - key = value; + ctx_root = value; } } else{ @@ -1149,7 +1150,8 @@ 'simple': (function(){ - var regex_strip = regex('[^a-z0-9 ]'), + var regex_whitespace = regex('\\s\\s+'), + regex_strip = regex('[^a-z0-9 ]'), regex_split = regex('[-\/]'), regex_a = regex('[àáâãäå]'), regex_e = regex('[èéêë]'), @@ -1174,14 +1176,17 @@ regex_c, 'c', regex_s, 's', regex_split, ' ', - regex_strip, '' + regex_strip, '', + regex_whitespace, ' ' ]; return function(str){ + str = replace(str.toLowerCase(), regex_pairs); + return ( - replace(str.toLowerCase(), regex_pairs) + str !== ' ' ? str : '' ); }; }()), @@ -1318,7 +1323,7 @@ } } - str = str.join(""); + str = str.join(" "); str = collapseRepeatingChars(str); } @@ -1490,7 +1495,7 @@ var count_vowels = 0, count_literal = 0, - count_parts = -1; + count_parts = 0; var tmp = ""; var length = value.length; @@ -1524,20 +1529,28 @@ // dynamic n-gram sequences - if((char === ' ') || ((count_vowels >= 2) && (count_literal >= 2)) || (i === length - 1)){ + if((char === ' ') || ((count_vowels > 1) && (count_literal > 1)) || (count_vowels > 2) || (count_literal > 2) || (i === length - 1)){ if(tmp){ - var tmp_length = tmp.length; + if(parts[count_parts] && (tmp.length > 2)){ - if(tmp_length > 2){ - - parts[++count_parts] = tmp; + count_parts++; } - else if(parts[count_parts]){ + + if(parts[count_parts]){ parts[count_parts] += tmp; } + else{ + + parts[count_parts] = tmp; + } + + if(char === ' '){ + + count_parts++; + } tmp = ""; } @@ -1567,7 +1580,7 @@ if(char !== char_prev){ - if((i > 0) && (char === 'h')){ + if(i && (char === 'h')){ var char_prev_is_vowel = ( @@ -1589,7 +1602,7 @@ (char_next === 'y') ); - if(char_prev_is_vowel && char_next_is_vowel){ + if((char_prev_is_vowel && char_next_is_vowel) || (char_prev === ' ')){ collapsed_string += char; } @@ -1666,38 +1679,55 @@ } /** - * @param {!Array>} arr + * Fastest intersect method for a set of unsorted arrays so far + * @param {!Array>} arrays * @param {number=} limit * @returns {Array} */ - function intersect(arr, limit) { + function intersect(arrays, limit) { var result = []; - var length_z = arr.length; + var length_z = arrays.length; if(length_z > 1){ - arr.sort(sort_by_length_up); + // pre-sort arrays by length up - var map = {}; - var a = arr[0]; + arrays.sort(sort_by_length_up); - for(var i = 0, length = a.length; i < length; ++i) { + // fill initial map - map[a[i]] = 1; + var check = {}; + var arr = arrays[0]; + var length = arr.length; + var i = 0; + + while(i < length) { + + check[arr[i++]] = 1; } + // loop through arrays + var tmp, count = 0; + var z = 1; - for(var z = 1; z < length_z; ++z){ + while(z < length_z){ + + // get each array one by one - var b = arr[z]; var found = false; - for(var i = 0, length = b.length; i < length; ++i){ + arr = arrays[z]; + length = arr.length; + i = 0; - if((map[tmp = b[i]]) === z){ + while(i < length){ + + if((check[tmp = arr[i++]]) === z){ + + // fill in during last round if(z === (length_z - 1)){ @@ -1705,32 +1735,33 @@ if(limit && (count === limit)){ - return result; + found = false; + break; } } - found = true; - map[tmp] = z + 1; + // apply count status - break; + found = true; + check[tmp] = z + 1; } } if(!found){ - return []; + break; } - } - return result; + z++; + } } else if(length_z){ - result = arr[0]; + result = arrays[0]; if(limit && result && (result.length > limit)){ - // Note: do not touch original array + // Note: do not touch original array! result = result.slice(0, limit); } @@ -1739,6 +1770,60 @@ return result; } + /** + * Fastest intersect method for 2 sorted arrays so far + * @param {!Array} a + * @param {!Array} b + * @param {number=} limit + * @returns {Array} + */ + + function intersect_sorted(a, b, limit){ + + var result = []; + + var length_a = a.length, + length_b = b.length; + + if(length_a && length_b){ + + var x = 0, y = 0, count = 0; + + var current_a = 0, + current_b = 0; + + while(true){ + + if((current_a || (current_a = a[x])) === + (current_b || (current_b = b[y]))){ + + result[count++] = current_a; + + current_a = current_b = 0; + x++; + y++; + } + else if(current_a < current_b){ + + current_a = 0; + x++; + } + else{ + + current_b = 0; + y++; + } + + if((x === length_a) || (y === length_b)){ + + break; + } + } + } + + return result; + } + /** * @param {FlexSearch} ref */ diff --git a/flexsearch.min.js b/flexsearch.min.js index f4507b1..fabeac7 100644 --- a/flexsearch.min.js +++ b/flexsearch.min.js @@ -1,25 +1,25 @@ /* https://github.com/nextapps-de/flexsearch - @version: 0.2.11 + @version: 0.2.2 @license: Apache 2.0 Licence */ -'use strict';(function(m,C,f){var d;(d=f.define)&&d.amd?d([],function(){return C}):(d=f.modules)?d[m.toLowerCase()]=C:"undefined"!==typeof module?module.exports=C:f[m]=C})("FlexSearch",function H(m){function f(a){a||(a=t);this.id=a.id||I++;this.init(a);Object.defineProperty(this,"index",{get:function(){return this.a}});Object.defineProperty(this,"length",{get:function(){return Object.keys(this.a).length}})}function d(a){return new RegExp(a,"g")}function u(a,b,c){if("undefined"===typeof c){for(c=0;c< -b.length;c+=2)a=a.replace(b[c],b[c+1]);return a}return a.replace(b,c)}function p(a,b,c,e,l,n){if("undefined"===typeof b[c]){var g=l.indexOf(c);g=3/l.length*(l.length-g)+6/(g-l.lastIndexOf(" ",g))+.5|0;b[c]=g;g>n&&(a=a[g],a=a[c]||(a[c]=[]),a[a.length]=e)}return g||b[c]}function x(a){for(var b="",c="",e="",l=0;la?1:0a?-1:0b&&(c=c.slice(0,b)));return c}function z(a){a.w||(a.w=E(function(){a.w= -null;var b=a.async;b&&(a.async=!1);if(a.c.length){for(var c=F(),e;(e=a.c.shift())||0===e;){var l=a.h[e];switch(l[0]){case A.add:a.add(l[1],l[2]);break;case A.update:a.update(l[1],l[2]);break;case A.remove:a.remove(l[1])}a.h[e]=null;delete a.h[e];if(100=d&&(b.o=b.b),b.v&&b.o===b.b&&(b.i.length?b.f="":b.f||(b.f=c),b.cache&&b.l.set(c,b.i),b.v(b.i),b.i=[]))})}this.mode= -a.mode||this.mode||t.mode;this.cache=a.cache||this.cache||t.cache;this.async=a.async||this.async||t.async;this.b=a.worker||this.b||t.b;this.threshold=a.threshold||this.threshold||t.threshold;this.depth=a.depth||this.depth||t.depth;this.encoder=a.encode&&B[a.encode]||("function"===typeof a.encode?a.encode:this.encoder||!1);this.A=a.debug||this.A;a.matcher&&this.addMatcher(a.matcher)}this.g=[{},{},{},{},{},{},{},{},{},{},{}];this.a={};this.h={};this.c=[];this.w=null;this.f="";this.u=!0;this.l=this.cache? -new M(3E4,50,!0):!1;return this};f.prototype.encode=function(a){a&&v.length&&(a=u(a,v));a&&this.m.length&&(a=u(a,this.m));a&&this.encoder&&(a=this.encoder.call(B,a));return a};f.prototype.addMatcher=function(a){for(var b in a)a.hasOwnProperty(b)&&(this.m[this.m.length]=d(b),this.m[this.m.length]=a[b]);return this};f.prototype.add=function(a,b){if("string"===typeof b&&b&&(a||0===a))if(this.a[a])this.update(a,b);else{if(this.b)return++this.s>=this.j.length&&(this.s=0),this.j[this.s].postMessage(this.s, -{add:!0,id:a,content:b}),this.a[a]=""+this.s,this;if(this.async)return this.h[a]||(this.c[this.c.length]=a),this.h[a]=[A.add,a,b],z(this),this;b=this.encode(b);if(!b.length)return this;if("ngram"===this.mode){var c=b;var e=[];if(c)for(var d=0,n=0,g=-1,f="",q=c.length,k=0;kh;w--)r=k.substring(h,w),p(g,e,r,a,b,d);break;default:if(h=p(g,e,k,a,b,d),n&&1d)for(h=g[10],m=e._ctx[k]||(e._ctx[k]={}),k=h[k]||(h[k]=[{},{},{},{},{},{},{},{},{},{}]),h=q-n,w=q+n,0>h&&(h=0),w>f-1&&(w=f-1);h<=w;h++)h!== -q&&p(k,m,c[h],a,b,d)}}this.a[a]="1";this.u=!1}return this};f.prototype.update=function(a,b){if("string"===typeof b&&(a||0===a)&&this.a[a]){if(this.b){var c=parseInt(this.a[a],10);this.j[c].postMessage(c,{update:!0,id:a,content:b});return this}if(this.async)return this.h[a]||(this.c[this.c.length]=a),this.h[a]=[A.update,a,b],z(this),this;this.remove(a);b&&this.add(a,b)}return this};f.prototype.remove=function(a){if(this.a[a]){if(this.b){var b=parseInt(this.a[a],10);this.j[b].postMessage(b,{remove:!0, -id:a});delete this.a[a];return this}if(this.async)return this.h[a]||(this.c[this.c.length]=a),this.h[a]=[A.remove,a],z(this),this;for(b=0;10>b;b++)for(var c=Object.keys(this.g[b]),e=0;e=d;z--)if(v=t?w[r][z][p]:this.g[z][p])x[B++]=v,A=!0;if(A)k[k.length]=1g;g++)for(b=Object.keys(this.g[g]),a=0;ak&&(a=a[f],a=a[c]||(a[c]=[]),a[a.length]=h)}return f||b[c]}function x(a){for(var b="",c="",h="",n=0;na?1:0a?-1:0b&&(c=c.slice(0,b)));return c}function A(a){a.w|| +(a.w=E(function(){a.w=null;var b=a.async;b&&(a.async=!1);if(a.c.length){for(var c=F(),h;(h=a.c.shift())||0===h;){var d=a.h[h];switch(d[0]){case z.add:a.add(d[1],d[2]);break;case z.update:a.update(d[1],d[2]);break;case z.remove:a.remove(d[1])}a.h[h]=null;delete a.h[h];if(100=d&&(b.o=b.b),b.v&&b.o===b.b&&(b.i.length?b.f="":b.f||(b.f=c),b.cache&&b.l.set(c,b.i),b.v(b.i), +b.i=[]))})}this.mode=a.mode||this.mode||u.mode;this.cache=a.cache||this.cache||u.cache;this.async=a.async||this.async||u.async;this.b=a.worker||this.b||u.b;this.threshold=a.threshold||this.threshold||u.threshold;this.depth=a.depth||this.depth||u.depth;this.encoder=a.encode&&B[a.encode]||("function"===typeof a.encode?a.encode:this.encoder||!1);this.A=a.debug||this.A;a.matcher&&this.addMatcher(a.matcher)}this.g=[{},{},{},{},{},{},{},{},{},{},{}];this.a={};this.h={};this.c=[];this.w=null;this.f="";this.u= +!0;this.l=this.cache?new M(3E4,50,!0):!1;return this};e.prototype.encode=function(a){a&&w.length&&(a=v(a,w));a&&this.m.length&&(a=v(a,this.m));a&&this.encoder&&(a=this.encoder.call(B,a));return a};e.prototype.addMatcher=function(a){for(var b in a)a.hasOwnProperty(b)&&(this.m[this.m.length]=d(b),this.m[this.m.length]=a[b]);return this};e.prototype.add=function(a,b){if("string"===typeof b&&b&&(a||0===a))if(this.a[a])this.update(a,b);else{if(this.b)return++this.s>=this.j.length&&(this.s=0),this.j[this.s].postMessage(this.s, +{add:!0,id:a,content:b}),this.a[a]=""+this.s,this;if(this.async)return this.h[a]||(this.c[this.c.length]=a),this.h[a]=[z.add,a,b],A(this),this;b=this.encode(b);if(!b.length)return this;if("ngram"===this.mode){var c=b;var h=[];if(c)for(var d=0,k=0,f=0,e="",r=c.length,m=0;mg;l--)t=m.substring(g,l),q(f,h,t,a,b,d);break;default:if(g=q(f,h,m,a,b,d),k&&1d)for(g=f[10],p=h._ctx[m]||(h._ctx[m]={}),m=g[m]||(g[m]=[{},{},{},{},{},{},{},{},{},{}]),g=r-k,l=r+k,0>g&&(g=0), +l>e-1&&(l=e-1);g<=l;g++)g!==r&&q(m,p,c[g],a,b,d)}}this.a[a]="1";this.u=!1}return this};e.prototype.update=function(a,b){if("string"===typeof b&&(a||0===a)&&this.a[a]){if(this.b){var c=parseInt(this.a[a],10);this.j[c].postMessage(c,{update:!0,id:a,content:b});return this}if(this.async)return this.h[a]||(this.c[this.c.length]=a),this.h[a]=[z.update,a,b],A(this),this;this.remove(a);b&&this.add(a,b)}return this};e.prototype.remove=function(a){if(this.a[a]){if(this.b){var b=parseInt(this.a[a],10);this.j[b].postMessage(b, +{remove:!0,id:a});delete this.a[a];return this}if(this.async)return this.h[a]||(this.c[this.c.length]=a),this.h[a]=[z.remove,a],A(this),this;for(b=0;10>b;b++)for(var c=Object.keys(this.g[b]),d=0;d=e;A--)if(w=(p?u[t]:this.g)[A][q])x[B++]=w,z=!0;if(z)m[m.length]=1f;f++)for(b=Object.keys(this.g[f]),a=0;a + + + + Matching Test + + + + + + + + + + + + + +

Relevance Scoring Comparison

+

Indexed Text: "Gulliver's Travels" (Swift Jonathan 1726)

+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Queryflexsearchbulksearchelasticlunrlunrwadefusejssearchjsiibm25
"without breach of modesty"wait ...wait ...wait ...wait ...wait ...wait ...wait ...wait ...wait ...
"went softly stream"wait ...wait ...wait ...wait ...wait ...wait ...wait ...wait ...wait ...
"i already observed"wait ...wait ...wait ...wait ...wait ...wait ...wait ...wait ...wait ...
"let a of his"wait ...wait ...wait ...wait ...wait ...wait ...wait ...wait ...wait ...
"take that to the rocks"wait ...wait ...wait ...wait ...wait ...wait ...wait ...wait ...wait ...
"bignes of splaknuk"wait ...wait ...wait ...wait ...wait ...wait ...wait ...wait ...wait ...
"matematikal musikal instruments"wait ...wait ...wait ...wait ...wait ...wait ...wait ...wait ...wait ...
"lalkon the camberlayhn"wait ...wait ...wait ...wait ...wait ...wait ...wait ...wait ...wait ...
+
+
+Note: Open console and type e.g. data[493] + + + diff --git a/test/test.js b/test/test.js index 979a33e..ac45656 100644 --- a/test/test.js +++ b/test/test.js @@ -209,14 +209,6 @@ describe('Add (Sync)', function(){ flexsearch_extra.add(3, " - "); expect(flexsearch_extra.length).to.equal(0); - - flexsearch_extra.add(4, "Thomas"); - flexsearch_extra.add(5, "Arithmetic"); - flexsearch_extra.add(6, "Mahagoni"); - - expect(flexsearch_extra.search("tomass")).to.include(4); - expect(flexsearch_extra.search("arytmetik")).to.include(5); - expect(flexsearch_extra.search("mahagony")).to.include(6); }); }); @@ -230,6 +222,14 @@ describe('Search (Sync)', function(){ expect(flexsearch_sync.search("foo foo")).to.have.members([0, 1]); expect(flexsearch_sync.search("foo foo")).to.have.members([0, 1]); + + flexsearch_extra.add(4, "Thomas"); + flexsearch_extra.add(5, "Arithmetic"); + flexsearch_extra.add(6, "Mahagoni"); + + expect(flexsearch_extra.search("tomass")).to.include(4); + expect(flexsearch_extra.search("arytmetik")).to.include(5); + expect(flexsearch_extra.search("mahagony")).to.include(6); }); it('Should have been limited', function(){ @@ -914,6 +914,63 @@ describe('Options', function(){ }); }); +// ------------------------------------------------------------------------ +// Relevance Tests +// ------------------------------------------------------------------------ + +describe('Relevance', function(){ + + it('Should have been sorted by relevance properly', function(){ + + var index = new FlexSearch({ + encode: 'advanced', + mode: 'strict' + }); + + index.add(0, "1 2 3 2 4 1 5 3"); + index.add(1, "zero one two three four five six seven eight nine ten"); + index.add(2, "four two zero one three ten five seven eight six nine"); + + expect(index.search("1")).to.have.members([0]); + expect(index.search("one")).to.have.members([1, 2]); + expect(index.search("one two")).to.have.members([1, 2]); + expect(index.search("four one")).to.have.members([1, 2]); + + var index = new FlexSearch({ + encode: 'advanced', + mode: 'strict', + threshold: 5, + depth: 3 + }); + + index.add(0, "1 2 3 2 4 1 5 3"); + index.add(1, "zero one two three four five six seven eight nine ten"); + index.add(2, "four two zero one three ten five seven eight six nine"); + + expect(index.search("1")).to.have.members([0]); + expect(index.search("one")).to.have.members([1, 2]); + expect(index.search("one two")).to.have.members([1, 2]); + expect(index.search("four one")).to.have.members([1, 2]); + + var index = new FlexSearch({ + encode: 'extra', + mode: 'ngram', + threshold: 5, + depth: 3 + }); + + index.add(0, "1 2 3 2 4 1 5 3"); + index.add(1, "zero one two three four five six seven eight nine ten"); + index.add(2, "four two zero one three ten five seven eight six nine"); + + expect(index.search("1 3 4")).to.have.members([0]); + expect(index.search("1 5 3 4")).to.have.members([0]); + expect(index.search("one")).to.have.members([1, 2]); + expect(index.search("one two")).to.have.members([1, 2]); + expect(index.search("four one")).to.have.members([1, 2]); + }); +}); + // ------------------------------------------------------------------------ // Feature Tests // ------------------------------------------------------------------------