mirror of
https://github.com/nextapps-de/flexsearch.git
synced 2025-09-01 18:03:56 +02:00
ADD dynamically create context by "depth"
MOD move "ngram" to tokenizers FIX using "this" in global encoders
This commit is contained in:
26
README.md
26
README.md
@@ -48,16 +48,14 @@ All Features:
|
||||
<a name="contextual"></a>
|
||||
#### Contextual Search
|
||||
|
||||
FlexSearch introduce a new scoring mechanism called __Contextual Search__ which was invented by Thomas Wilkerling, the author of this library. A Contextual Search __incredibly boost up queries to a new level__.
|
||||
The basic idea of this concept is to limit relevance by context instead of calculating relevance through the whole (unlimited) distance.
|
||||
FlexSearch introduce a new scoring mechanism called __Contextual Search__ which was invented by Thomas Wilkerling, the author of this library. A Contextual Search __incredibly boost up queries to a complete new level__.
|
||||
The basic idea of this concept is to limit relevance by its context instead of calculating relevance through the whole (unlimited) distance.
|
||||
Imagine you add a text block of some sentences to an index ID. Assuming the query includes a combination of first and last word from this text block, are they really relevant to each other?
|
||||
In this way contextual search also improves the results of relevance-based queries on large amount of text data.
|
||||
|
||||
<br>
|
||||
<p align="center">
|
||||
<img src="https://rawgithub.com/nextapps-de/flexsearch/master/contextual_index.svg">
|
||||
</p>
|
||||
<br>
|
||||
|
||||
__Note:__ This feature is actually not enabled by default.
|
||||
|
||||
@@ -573,7 +571,7 @@ Tokenizer effects the required memory also as query time and flexibility of part
|
||||
<td><b>"ngram"</b> (default)</td>
|
||||
<td>index words partially through phonetic n-grams</td>
|
||||
<td><b>foo</b>bar<br>foo<b>bar</b></td>
|
||||
<td>* n/3.5</td>
|
||||
<td>* n / 3.5</td>
|
||||
</tr>
|
||||
<tr></tr>
|
||||
<tr>
|
||||
@@ -587,14 +585,14 @@ Tokenizer effects the required memory also as query time and flexibility of part
|
||||
<td><b>"reverse"</b></td>
|
||||
<td>incrementally index words in both directions</td>
|
||||
<td>foob<b>ar</b><br>fo<b>obar</b></td>
|
||||
<td>* 2n</td>
|
||||
<td>* 2n - 1</td>
|
||||
</tr>
|
||||
<tr></tr>
|
||||
<tr>
|
||||
<td><b>"full"</b></td>
|
||||
<td>index every possible combination</td>
|
||||
<td>fo<b>oba</b>r<br>f<b>oob</b>ar</td>
|
||||
<td>* n*(n-1)</td>
|
||||
<td>* n * (n - 1)</td>
|
||||
</tr>
|
||||
|
||||
</table>
|
||||
@@ -809,6 +807,11 @@ The required memory for the index depends on several options:
|
||||
<td>* 1</td>
|
||||
</tr>
|
||||
<tr></tr>
|
||||
<tr>
|
||||
<td>"ngram" (default)</td>
|
||||
<td>* n / 3.5</td>
|
||||
</tr>
|
||||
<tr></tr>
|
||||
<tr>
|
||||
<td>"forward"</td>
|
||||
<td>* n</td>
|
||||
@@ -816,17 +819,12 @@ The required memory for the index depends on several options:
|
||||
<tr></tr>
|
||||
<tr>
|
||||
<td>"reverse"</td>
|
||||
<td>* 2n</td>
|
||||
</tr>
|
||||
<tr></tr>
|
||||
<tr>
|
||||
<td>"ngram" (default)</td>
|
||||
<td>* n/4</td>
|
||||
<td>* 2n - 1</td>
|
||||
</tr>
|
||||
<tr></tr>
|
||||
<tr>
|
||||
<td>"full"</td>
|
||||
<td>* n*(n-1)</td>
|
||||
<td>* n * (n - 1)</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
|
238
flexsearch.js
238
flexsearch.js
@@ -3,7 +3,7 @@
|
||||
* ----------------------------------------------------------
|
||||
* @author: Thomas Wilkerling
|
||||
* @preserve https://github.com/nextapps-de/flexsearch
|
||||
* @version: 0.2.0
|
||||
* @version: 0.2.1
|
||||
* @license: Apache 2.0 Licence
|
||||
*/
|
||||
|
||||
@@ -45,7 +45,7 @@
|
||||
|
||||
// use on of built-in functions
|
||||
// or pass custom encoding algorithm
|
||||
encode: false
|
||||
encode: 'icase'
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -183,7 +183,7 @@
|
||||
|
||||
FlexSearch.encode = function(name, value){
|
||||
|
||||
return global_encoder[name](value);
|
||||
return global_encoder[name].call(global_encoder, value);
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -311,9 +311,8 @@
|
||||
this.encoder = (
|
||||
|
||||
(options['encode'] && global_encoder[options['encode']]) ||
|
||||
this.encoder ||
|
||||
(typeof options['encode'] === 'function' ? options['encode'] : this.encoder || false)
|
||||
//(defaults.encode && global_encoder[defaults.encode]) ||
|
||||
options['encode']
|
||||
);
|
||||
|
||||
//if(DEBUG){
|
||||
@@ -486,9 +485,9 @@
|
||||
|
||||
var words = (
|
||||
|
||||
content.constructor === Array ?
|
||||
this.mode === 'ngram' ?
|
||||
|
||||
/** @type {!Array<string>} */ (content)
|
||||
/** @type {!Array<string>} */ (ngram(content))
|
||||
:
|
||||
/** @type {string} */ (content).split(regex_split)
|
||||
);
|
||||
@@ -582,13 +581,8 @@
|
||||
|
||||
break;
|
||||
|
||||
case 'ngram':
|
||||
|
||||
// TODO
|
||||
|
||||
break;
|
||||
|
||||
case 'strict':
|
||||
case 'ngram':
|
||||
default:
|
||||
|
||||
var score = addIndex(
|
||||
@@ -617,59 +611,28 @@
|
||||
{/* 7 */},
|
||||
{/* 8 */},
|
||||
{/* 9 */}
|
||||
// TODO test concept of deep trees instead of flat ones
|
||||
//{/* ctx */}
|
||||
]);
|
||||
|
||||
if(i) {
|
||||
var x = i - this.depth;
|
||||
var y = i + this.depth;
|
||||
|
||||
if(i > 1) {
|
||||
if(x < 0) x = 0;
|
||||
if(y > word_length - 1) y = word_length - 1;
|
||||
|
||||
for(; x <= y; x++){
|
||||
|
||||
addIndex(
|
||||
|
||||
ctx_tmp,
|
||||
ctx_dupes,
|
||||
words[i - 2],
|
||||
words[x],
|
||||
id,
|
||||
/** @type {string} */ (content),
|
||||
threshold
|
||||
);
|
||||
}
|
||||
|
||||
addIndex(
|
||||
|
||||
ctx_tmp,
|
||||
ctx_dupes,
|
||||
words[i - 1],
|
||||
id,
|
||||
/** @type {string} */ (content),
|
||||
threshold
|
||||
);
|
||||
}
|
||||
|
||||
if(i < (word_length - 1)) {
|
||||
|
||||
addIndex(
|
||||
|
||||
ctx_tmp,
|
||||
ctx_dupes,
|
||||
words[i + 1],
|
||||
id,
|
||||
/** @type {string} */ (content),
|
||||
threshold
|
||||
);
|
||||
|
||||
if(i < (word_length - 2)) {
|
||||
|
||||
addIndex(
|
||||
|
||||
ctx_tmp,
|
||||
ctx_dupes,
|
||||
words[i + 2],
|
||||
id,
|
||||
/** @type {string} */ (content),
|
||||
threshold
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
@@ -1360,97 +1323,7 @@
|
||||
|
||||
return str;
|
||||
};
|
||||
})(),
|
||||
|
||||
/**
|
||||
* @param {!string} value
|
||||
* @this {global_encoder}
|
||||
* @returns {Array<?string>}
|
||||
*/
|
||||
|
||||
'ngram': function(value){
|
||||
|
||||
var parts = [];
|
||||
|
||||
if(!value){
|
||||
|
||||
return parts;
|
||||
}
|
||||
|
||||
// perform advanced encoding
|
||||
value = this['advanced'](value);
|
||||
|
||||
if(!value){
|
||||
|
||||
return parts;
|
||||
}
|
||||
|
||||
var count_vowels = 0,
|
||||
count_literal = 0,
|
||||
count_parts = -1;
|
||||
|
||||
var tmp = "";
|
||||
var length = value.length;
|
||||
|
||||
for(var i = 0; i < length; i++){
|
||||
|
||||
var char = value[i];
|
||||
var char_is_vowel = (
|
||||
|
||||
(char === 'a') ||
|
||||
(char === 'e') ||
|
||||
(char === 'i') ||
|
||||
(char === 'o') ||
|
||||
(char === 'u') ||
|
||||
(char === 'y')
|
||||
);
|
||||
|
||||
if(char_is_vowel){
|
||||
|
||||
count_vowels++;
|
||||
}
|
||||
else{
|
||||
|
||||
count_literal++;
|
||||
}
|
||||
|
||||
if(char !== ' ') {
|
||||
|
||||
tmp += char;
|
||||
}
|
||||
|
||||
// dynamic n-gram sequences
|
||||
|
||||
if((char === ' ') || ((count_vowels >= 2) && (count_literal >= 2)) || (i === length - 1)){
|
||||
|
||||
if(tmp){
|
||||
|
||||
var tmp_length = tmp.length;
|
||||
|
||||
if((tmp_length > 2) || (char === ' ') || (i === length - 1)){
|
||||
|
||||
var char_code = tmp.charCodeAt(0);
|
||||
|
||||
if((tmp_length > 1) || (char_code >= 48) || (char_code <= 57)){
|
||||
|
||||
parts[++count_parts] = tmp;
|
||||
}
|
||||
}
|
||||
else if(parts[count_parts]){
|
||||
|
||||
parts[count_parts] += tmp;
|
||||
}
|
||||
|
||||
tmp = "";
|
||||
}
|
||||
|
||||
count_vowels = 0;
|
||||
count_literal = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return parts;
|
||||
}
|
||||
})()
|
||||
|
||||
// TODO: provide some common encoder plugins
|
||||
// soundex
|
||||
@@ -1599,6 +1472,83 @@
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {!string} value
|
||||
* @this {global_encoder}
|
||||
* @returns {Array<?string>}
|
||||
*/
|
||||
|
||||
function ngram(value){
|
||||
|
||||
var parts = [];
|
||||
|
||||
if(!value){
|
||||
|
||||
return parts;
|
||||
}
|
||||
|
||||
var count_vowels = 0,
|
||||
count_literal = 0,
|
||||
count_parts = -1;
|
||||
|
||||
var tmp = "";
|
||||
var length = value.length;
|
||||
|
||||
for(var i = 0; i < length; i++){
|
||||
|
||||
var char = value[i];
|
||||
var char_is_vowel = (
|
||||
|
||||
(char === 'a') ||
|
||||
(char === 'e') ||
|
||||
(char === 'i') ||
|
||||
(char === 'o') ||
|
||||
(char === 'u') ||
|
||||
(char === 'y')
|
||||
);
|
||||
|
||||
if(char_is_vowel){
|
||||
|
||||
count_vowels++;
|
||||
}
|
||||
else{
|
||||
|
||||
count_literal++;
|
||||
}
|
||||
|
||||
if(char !== ' ') {
|
||||
|
||||
tmp += char;
|
||||
}
|
||||
|
||||
// dynamic n-gram sequences
|
||||
|
||||
if((char === ' ') || ((count_vowels >= 2) && (count_literal >= 2)) || (i === length - 1)){
|
||||
|
||||
if(tmp){
|
||||
|
||||
var tmp_length = tmp.length;
|
||||
|
||||
if(tmp_length > 2){
|
||||
|
||||
parts[++count_parts] = tmp;
|
||||
}
|
||||
else if(parts[count_parts]){
|
||||
|
||||
parts[count_parts] += tmp;
|
||||
}
|
||||
|
||||
tmp = "";
|
||||
}
|
||||
|
||||
count_vowels = 0;
|
||||
count_literal = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return parts;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {!string} string
|
||||
* @returns {string}
|
||||
|
40
flexsearch.min.js
vendored
40
flexsearch.min.js
vendored
@@ -1,25 +1,25 @@
|
||||
/*
|
||||
https://github.com/nextapps-de/flexsearch
|
||||
@version: 0.2.0
|
||||
@version: 0.2.1
|
||||
@license: Apache 2.0 Licence
|
||||
*/
|
||||
'use strict';(function(l,C,e){var d;(d=e.define)&&d.amd?d([],function(){return C}):(d=e.modules)?d[l.toLowerCase()]=C:"undefined"!==typeof module?module.exports=C:e[l]=C})("FlexSearch",function G(l){function e(a){a||(a=u);this.id=a.id||H++;this.init(a);Object.defineProperty(this,"index",{get:function(){return this.a}});Object.defineProperty(this,"length",{get:function(){return Object.keys(this.a).length}})}function d(a){return new RegExp(a,"g")}function q(a,b,c){if("undefined"===typeof c){for(c=0;c<
|
||||
b.length;c+=2)a=a.replace(b[c],b[c+1]);return a}return a.replace(b,c)}function r(a,b,c,f,h,t){if("undefined"===typeof b[c]){var g=h.indexOf(c);g=3/h.length*(h.length-g)+6/(g-h.lastIndexOf(" ",g))+.5|0;b[c]=g;g>t&&(a=a[g],a=a[c]||(a[c]=[]),a[a.length]=f)}return g||b[c]}function w(a){for(var b="",c="",f="",h=0;h<a.length;h++){var t=a[h];t!==c&&(0<h&&"h"===t?(f="a"===f||"e"===f||"i"===f||"o"===f||"u"===f||"y"===f,"a"!==c&&"e"!==c&&"i"!==c&&"o"!==c&&"u"!==c&&"y"!==c||!f||(b+=t)):b+=t);f=h===a.length-
|
||||
1?"":a[h+1];c=t}return b}function x(a,b){a=a.length-b.length;return 0>a?1:0<a?-1:0}function I(a,b){a=a.length-b.length;return 0>a?-1:0<a?1:0}function J(a,b){var c=[],f=a.length;if(1<f){a.sort(I);for(var h={},t=a[0],g=0,y=t.length;g<y;++g)h[t[g]]=1;for(var d=0,e=1;e<f;++e){var n=a[e],m=!1;g=0;for(y=n.length;g<y;++g)if(h[t=n[g]]===e){if(e===f-1&&(c[d++]=t,b&&d===b))return c;m=!0;h[t]=e+1;break}if(!m)return[]}}else f&&(c=a[0],b&&c&&c.length>b&&(c=c.slice(0,b)));return c}function B(a){a.w||(a.w=D(function(){a.w=
|
||||
null;var b=a.async;b&&(a.async=!1);if(a.c.length){for(var c=E(),f;(f=a.c.shift())||0===f;){var h=a.h[f];switch(h[0]){case z.add:a.add(h[1],h[2]);break;case z.update:a.update(h[1],h[2]);break;case z.remove:a.remove(h[1])}a.h[f]=null;delete a.h[f];if(100<E()-c)break}a.c.length&&B(a)}b&&(a.async=b)},1,"search-async-"+a.id))}function E(){return"undefined"!==typeof performance?performance.now():(new Date).getTime()}function K(a,b,c,f){a=l("flexsearch","id"+a,function(){var a,b;self.a=function(c){if(c=
|
||||
'use strict';(function(p,C,f){var d;(d=f.define)&&d.amd?d([],function(){return C}):(d=f.modules)?d[p.toLowerCase()]=C:"undefined"!==typeof module?module.exports=C:f[p]=C})("FlexSearch",function H(p){function f(a){a||(a=u);this.id=a.id||I++;this.init(a);Object.defineProperty(this,"index",{get:function(){return this.a}});Object.defineProperty(this,"length",{get:function(){return Object.keys(this.a).length}})}function d(a){return new RegExp(a,"g")}function v(a,b,c){if("undefined"===typeof c){for(c=0;c<
|
||||
b.length;c+=2)a=a.replace(b[c],b[c+1]);return a}return a.replace(b,c)}function q(a,b,c,e,k,l){if("undefined"===typeof b[c]){var g=k.indexOf(c);g=3/k.length*(k.length-g)+6/(g-k.lastIndexOf(" ",g))+.5|0;b[c]=g;g>l&&(a=a[g],a=a[c]||(a[c]=[]),a[a.length]=e)}return g||b[c]}function x(a){for(var b="",c="",e="",k=0;k<a.length;k++){var l=a[k];l!==c&&(0<k&&"h"===l?(e="a"===e||"e"===e||"i"===e||"o"===e||"u"===e||"y"===e,"a"!==c&&"e"!==c&&"i"!==c&&"o"!==c&&"u"!==c&&"y"!==c||!e||(b+=l)):b+=l);e=k===a.length-
|
||||
1?"":a[k+1];c=l}return b}function y(a,b){a=a.length-b.length;return 0>a?1:0<a?-1:0}function J(a,b){a=a.length-b.length;return 0>a?-1:0<a?1:0}function K(a,b){var c=[],e=a.length;if(1<e){a.sort(J);for(var k={},l=a[0],g=0,D=l.length;g<D;++g)k[l[g]]=1;for(var d=0,h=1;h<e;++h){var f=a[h],r=!1;g=0;for(D=f.length;g<D;++g)if(k[l=f[g]]===h){if(h===e-1&&(c[d++]=l,b&&d===b))return c;r=!0;k[l]=h+1;break}if(!r)return[]}}else e&&(c=a[0],b&&c&&c.length>b&&(c=c.slice(0,b)));return c}function z(a){a.w||(a.w=E(function(){a.w=
|
||||
null;var b=a.async;b&&(a.async=!1);if(a.c.length){for(var c=F(),e;(e=a.c.shift())||0===e;){var k=a.h[e];switch(k[0]){case A.add:a.add(k[1],k[2]);break;case A.update:a.update(k[1],k[2]);break;case A.remove:a.remove(k[1])}a.h[e]=null;delete a.h[e];if(100<F()-c)break}a.c.length&&z(a)}b&&(a.async=b)},1,"search-async-"+a.id))}function F(){return"undefined"!==typeof performance?performance.now():(new Date).getTime()}function L(a,b,c,e){a=p("flexsearch","id"+a,function(){var a,b;self.a=function(c){if(c=
|
||||
c.data)c.search?self.postMessage({result:b.search(c.content,c.threshold?{limit:c.limit,threshold:c.threshold}:c.limit),id:a,content:c.content,limit:c.limit}):c.add?b.add(c.id,c.content):c.update?b.update(c.id,c.content):c.remove?b.remove(c.id):c.reset?b.reset():c.info?(c=b.info(),c.worker=a,b.A&&console.log(c)):c.register&&(a=c.id,c.options.cache=!1,c.options.async=!0,c.options.worker=!1,b=(new Function(c.register.substring(c.register.indexOf("{")+1,c.register.lastIndexOf("}"))))(),b=new b(c.options))}},
|
||||
function(a){(a=a.data)&&a.result&&f(a.id,a.content,a.result,a.limit)},b);var h=G.toString();c.id=b;a.postMessage(b,{register:h,options:c,id:b});return a}var u={type:"integer",mode:"forward",cache:!1,async:!1,b:!1,threshold:0,depth:0,encode:!1},v=[],H=0,z={add:0,update:1,remove:2},F=d("[ -/]");e.new=function(a){return new this(a)};e.create=function(a){return e.new(a)};e.addMatcher=function(a){for(var b in a)a.hasOwnProperty(b)&&(v[v.length]=d(b),v[v.length]=a[b]);return this};e.register=function(a,
|
||||
b){A[a]=b;return this};e.encode=function(a,b){return A[a](b)};e.prototype.init=function(a){this.m=[];if(a){if(a.worker)if("undefined"===typeof Worker)a.worker=!1,a.async=!0,this.j=null;else{var b=this,c=parseInt(a.worker,10)||4;b.s=-1;b.o=0;b.i=[];b.v=null;b.j=Array(c);for(var f=0;f<c;f++)b.j[f]=K(b.id,f,a||u,function(a,c,f,d){b.o!==b.b&&(b.i=b.i.concat(f),b.o++,d&&b.i.length>=d&&(b.o=b.b),b.v&&b.o===b.b&&(b.i.length?b.f="":b.f||(b.f=c),b.cache&&b.l.set(c,b.i),b.v(b.i),b.i=[]))})}this.mode=a.mode||
|
||||
this.mode||u.mode;this.cache=a.cache||this.cache||u.cache;this.async=a.async||this.async||u.async;this.b=a.worker||this.b||u.b;this.threshold=a.threshold||this.threshold||u.threshold;this.depth=a.depth||this.depth||u.depth;this.encoder=a.encode&&A[a.encode]||this.encoder||a.encode;this.A=a.debug||this.A;a.matcher&&this.addMatcher(a.matcher)}this.g=[{},{},{},{},{},{},{},{},{},{},{}];this.a={};this.h={};this.c=[];this.w=null;this.f="";this.u=!0;this.l=this.cache?new L(3E4,50,!0):!1;return this};e.prototype.encode=
|
||||
function(a){a&&v.length&&(a=q(a,v));a&&this.m.length&&(a=q(a,this.m));a&&this.encoder&&(a=this.encoder.call(A,a));return a};e.prototype.addMatcher=function(a){for(var b in a)a.hasOwnProperty(b)&&(this.m[this.m.length]=d(b),this.m[this.m.length]=a[b]);return this};e.prototype.add=function(a,b){if("string"===typeof b&&b&&(a||0===a))if(this.a[a])this.update(a,b);else{if(this.b)return++this.s>=this.j.length&&(this.s=0),this.j[this.s].postMessage(this.s,{add:!0,id:a,content:b}),this.a[a]=""+this.s,this;
|
||||
if(this.async)return this.h[a]||(this.c[this.c.length]=a),this.h[a]=[z.add,a,b],B(this),this;b=this.encode(b);if(!b.length)return this;for(var c=b.constructor===Array?b:b.split(F),f={_ctx:{}},h=this.threshold,d=this.g,g=c.length,e=0;e<g;e++){var k=c[e];if(k){var p=k.length;switch(this.mode){case "reverse":case "both":for(var n="",m=p-1;1<=m;m--)n=k[m]+n,r(d,f,n,a,b,h);case "forward":n="";for(m=0;m<p;m++)n+=k[m],r(d,f,n,a,b,h);break;case "full":for(m=0;m<p;m++)for(var l=p;l>m;l--)n=k.substring(m,l),
|
||||
r(d,f,n,a,b,h);break;case "ngram":break;default:p=r(d,f,k,a,b,h),1<g&&this.depth&&p>h&&(n=d[10],p=f._ctx[k]||(f._ctx[k]={}),k=n[k]||(n[k]=[{},{},{},{},{},{},{},{},{},{}]),e&&(1<e&&r(k,p,c[e-2],a,b,h),r(k,p,c[e-1],a,b,h)),e<g-1&&(r(k,p,c[e+1],a,b,h),e<g-2&&r(k,p,c[e+2],a,b,h)))}}}this.a[a]="1";this.u=!1}return this};e.prototype.update=function(a,b){if("string"===typeof b&&(a||0===a)&&this.a[a]){if(this.b){var c=parseInt(this.a[a],10);this.j[c].postMessage(c,{update:!0,id:a,content:b});return this}if(this.async)return this.h[a]||
|
||||
(this.c[this.c.length]=a),this.h[a]=[z.update,a,b],B(this),this;this.remove(a);b&&this.add(a,b)}return this};e.prototype.remove=function(a){if(this.a[a]){if(this.b){var b=parseInt(this.a[a],10);this.j[b].postMessage(b,{remove:!0,id:a});delete this.a[a];return this}if(this.async)return this.h[a]||(this.c[this.c.length]=a),this.h[a]=[z.remove,a],B(this),this;for(b=0;10>b;b++)for(var c=Object.keys(this.g[b]),f=0;f<c.length;f++){var d=c[f],e=this.g[b];if((e=e&&e[d])&&e.length)for(var g=0;g<e.length;g++)if(e[g]===
|
||||
a){e.splice(g,1);break}e.length||delete this.g[b][d]}delete this.a[a];this.u=!1}return this};e.prototype.search=function(a,b,c){var f=[];if(a&&"object"===typeof a){c=a.callback||b;b=a.limit;var d=a.threshold;a=a.query}d||(d=0);"function"===typeof b?(c=b,b=1E3):b||(b=1E3);if(this.b){this.v=c;this.o=0;this.i=[];for(f=0;f<this.b;f++)this.j[f].postMessage(f,{search:!0,limit:b,threshold:d,content:a});return null}if(c){var e=this;D(function(){c(e.search(a,b));e=null},1,"search-"+this.id);return null}if(!a||
|
||||
"string"!==typeof a)return f;var g=a;if(!this.u)this.cache&&(this.f="",this.l.reset()),this.u=!0;else if(this.cache){var y=this.l.get(a);if(y)return y}else if(this.f&&0===a.indexOf(this.f))return f;g=this.encode(g);if(!g.length)return f;g=g.constructor===Array?g:g.split(F);y=g.length;var k=!0,p=[],n={};if(1<y)if(this.depth){var m=!0,l=g[0];n[l]="1"}else g.sort(x);var r;if(!m||(r=this.g[10])[l])for(var u=m?1:0;u<y;u++){var q=g[u];if(q&&!n[q]){for(var v,z=!1,w=[],B=0,A=9;A>=d;A--)if(v=m?r[l][A][q]:
|
||||
this.g[A][q])w[B++]=v,z=!0;if(z)p[p.length]=1<B?p.concat.apply([],w):w[0];else{k=!1;break}n[q]="1"}l=q}else k=!1;k&&(f=J(p,b));f.length?this.f="":this.f||(this.f=a);this.cache&&this.l.set(a,f);return f};e.prototype.info=function(){if(this.b)for(var a=0;a<this.b;a++)this.j[a].postMessage(a,{info:!0,id:this.id});else{for(var b,c,f=0,d=0,e=0,g=0;10>g;g++)for(b=Object.keys(this.g[g]),a=0;a<b.length;a++)c=this.g[g][b[a]].length,f+=c+2*b[a].length+4,d+=c,e+=2*b[a].length;b=Object.keys(this.a);c=b.length;
|
||||
for(a=0;a<c;a++)f+=2*b[a].length+2;return{id:this.id,memory:f,items:c,sequences:d,chars:e,status:this.u,cache:this.c.length,matcher:v.length,worker:this.b}}};e.prototype.reset=function(){this.destroy();return this.init()};e.prototype.destroy=function(){this.cache&&this.l.reset();this.g=this.a=this.l=null;return this};var A={icase:function(a){return a.toLowerCase()},simple:function(){var a=[d("[\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5]"),"a",d("[\u00e8\u00e9\u00ea\u00eb]"),"e",d("[\u00ec\u00ed\u00ee\u00ef]"),
|
||||
"i",d("[\u00f2\u00f3\u00f4\u00f5\u00f6\u0151]"),"o",d("[\u00f9\u00fa\u00fb\u00fc\u0171]"),"u",d("[\u00fd\u0177\u00ff]"),"y",d("\u00f1"),"n",d("\u00e7"),"c",d("\u00df"),"s",d("[-/]")," ",d("[^a-z0-9 ]"),""];return function(b){return q(b.toLowerCase(),a)}}(),advanced:function(){var a=[d("ae"),"a",d("ai"),"ei",d("ay"),"ei",d("ey"),"ei",d("oe"),"o",d("ue"),"u",d("ie"),"i",d("sz"),"s",d("zs"),"s",d("sh"),"s",d("ck"),"k",d("cc"),"k",d("dt"),"t",d("ph"),"f",d("pf"),"f",d("ou"),"o",d("uo"),"u"];return function(b,
|
||||
c){if(!b)return b;b=this.simple(b);2<b.length&&(b=q(b,a));c||1<b.length&&(b=w(b));return b}}(),extra:function(){var a=[d("p"),"b",d("z"),"s",d("[cgq]"),"k",d("n"),"m",d("d"),"t",d("[vw]"),"f",d("[aeiouy]"),""];return function(b){if(!b)return b;b=this.advanced(b,!0);if(1<b.length){b=b.split(" ");for(var c=0;c<b.length;c++){var f=b[c];1<f.length&&(b[c]=f[0]+q(f.substring(1),a))}b=b.join("");b=w(b)}return b}}(),ngram:function(a){var b=[];if(!a)return b;a=this.advanced(a);if(!a)return b;for(var c=0,f=
|
||||
0,d=-1,e="",g=a.length,l=0;l<g;l++){var k=a[l];"a"===k||"e"===k||"i"===k||"o"===k||"u"===k||"y"===k?c++:f++;" "!==k&&(e+=k);if(" "===k||2<=c&&2<=f||l===g-1){if(e){c=e.length;if(2<c||" "===k||l===g-1){if(k=e.charCodeAt(0),1<c||48<=k||57>=k)b[++d]=e}else b[d]&&(b[d]+=e);e=""}f=c=0}}return b}},D=function(){var a={};return function(b,c,d){var e=a[d];e&&clearTimeout(e);return a[d]=setTimeout(b,c)}}(),L=function(){function a(){this.cache={}}a.prototype.reset=function(){this.cache={}};a.prototype.set=function(a,
|
||||
c){this.cache[a]=c};a.prototype.get=function(a){return this.cache[a]};return a}();return e}(function(){var l={},C=!("undefined"===typeof Blob||"undefined"===typeof URL||!URL.createObjectURL);return function(e,d,q,r,w){var x=e;e=C?URL.createObjectURL(new Blob(["("+q.toString()+")()"],{type:"text/javascript"})):"../"+x+".js";x+="-"+d;l[x]||(l[x]=[]);l[x][w]=new Worker(e);l[x][w].onmessage=r;return{postMessage:function(d,e){l[x][d].postMessage(e)}}}}()),this);
|
||||
function(a){(a=a.data)&&a.result&&e(a.id,a.content,a.result,a.limit)},b);var k=H.toString();c.id=b;a.postMessage(b,{register:k,options:c,id:b});return a}var u={type:"integer",mode:"forward",cache:!1,async:!1,b:!1,threshold:0,depth:0,encode:"icase"},w=[],I=0,A={add:0,update:1,remove:2},G=d("[ -/]");f.new=function(a){return new this(a)};f.create=function(a){return f.new(a)};f.addMatcher=function(a){for(var b in a)a.hasOwnProperty(b)&&(w[w.length]=d(b),w[w.length]=a[b]);return this};f.register=function(a,
|
||||
b){B[a]=b;return this};f.encode=function(a,b){return B[a].call(B,b)};f.prototype.init=function(a){this.m=[];if(a){if(a.worker)if("undefined"===typeof Worker)a.worker=!1,a.async=!0,this.j=null;else{var b=this,c=parseInt(a.worker,10)||4;b.s=-1;b.o=0;b.i=[];b.v=null;b.j=Array(c);for(var e=0;e<c;e++)b.j[e]=L(b.id,e,a||u,function(a,c,e,d){b.o!==b.b&&(b.i=b.i.concat(e),b.o++,d&&b.i.length>=d&&(b.o=b.b),b.v&&b.o===b.b&&(b.i.length?b.f="":b.f||(b.f=c),b.cache&&b.l.set(c,b.i),b.v(b.i),b.i=[]))})}this.mode=
|
||||
a.mode||this.mode||u.mode;this.cache=a.cache||this.cache||u.cache;this.async=a.async||this.async||u.async;this.b=a.worker||this.b||u.b;this.threshold=a.threshold||this.threshold||u.threshold;this.depth=a.depth||this.depth||u.depth;this.encoder=a.encode&&B[a.encode]||("function"===typeof a.encode?a.encode:this.encoder||!1);this.A=a.debug||this.A;a.matcher&&this.addMatcher(a.matcher)}this.g=[{},{},{},{},{},{},{},{},{},{},{}];this.a={};this.h={};this.c=[];this.w=null;this.f="";this.u=!0;this.l=this.cache?
|
||||
new M(3E4,50,!0):!1;return this};f.prototype.encode=function(a){a&&w.length&&(a=v(a,w));a&&this.m.length&&(a=v(a,this.m));a&&this.encoder&&(a=this.encoder.call(B,a));return a};f.prototype.addMatcher=function(a){for(var b in a)a.hasOwnProperty(b)&&(this.m[this.m.length]=d(b),this.m[this.m.length]=a[b]);return this};f.prototype.add=function(a,b){if("string"===typeof b&&b&&(a||0===a))if(this.a[a])this.update(a,b);else{if(this.b)return++this.s>=this.j.length&&(this.s=0),this.j[this.s].postMessage(this.s,
|
||||
{add:!0,id:a,content:b}),this.a[a]=""+this.s,this;if(this.async)return this.h[a]||(this.c[this.c.length]=a),this.h[a]=[A.add,a,b],z(this),this;b=this.encode(b);if(!b.length)return this;if("ngram"===this.mode){var c=b;var e=[];if(c)for(var d=0,l=0,g=-1,f="",n=c.length,h=0;h<n;h++){var m=c[h];"a"===m||"e"===m||"i"===m||"o"===m||"u"===m||"y"===m?d++:l++;" "!==m&&(f+=m);if(" "===m||2<=d&&2<=l||h===n-1)f&&(2<f.length?e[++g]=f:e[g]&&(e[g]+=f),f=""),l=d=0}c=e}else c=b.split(G);e={_ctx:{}};d=this.threshold;
|
||||
l=this.g;g=c.length;for(f=0;f<g;f++)if(n=c[f]){var r=n.length;switch(this.mode){case "reverse":case "both":var t="";for(h=r-1;1<=h;h--)t=n[h]+t,q(l,e,t,a,b,d);case "forward":t="";for(h=0;h<r;h++)t+=n[h],q(l,e,t,a,b,d);break;case "full":for(h=0;h<r;h++)for(m=r;m>h;m--)t=n.substring(h,m),q(l,e,t,a,b,d);break;default:if(h=q(l,e,n,a,b,d),1<g&&this.depth&&h>d)for(h=l[10],r=e._ctx[n]||(e._ctx[n]={}),n=h[n]||(h[n]=[{},{},{},{},{},{},{},{},{},{}]),h=f-this.depth,m=f+this.depth,0>h&&(h=0),m>g-1&&(m=g-1);h<=
|
||||
m;h++)q(n,r,c[h],a,b,d)}}this.a[a]="1";this.u=!1}return this};f.prototype.update=function(a,b){if("string"===typeof b&&(a||0===a)&&this.a[a]){if(this.b){var c=parseInt(this.a[a],10);this.j[c].postMessage(c,{update:!0,id:a,content:b});return this}if(this.async)return this.h[a]||(this.c[this.c.length]=a),this.h[a]=[A.update,a,b],z(this),this;this.remove(a);b&&this.add(a,b)}return this};f.prototype.remove=function(a){if(this.a[a]){if(this.b){var b=parseInt(this.a[a],10);this.j[b].postMessage(b,{remove:!0,
|
||||
id:a});delete this.a[a];return this}if(this.async)return this.h[a]||(this.c[this.c.length]=a),this.h[a]=[A.remove,a],z(this),this;for(b=0;10>b;b++)for(var c=Object.keys(this.g[b]),e=0;e<c.length;e++){var d=c[e],f=this.g[b];if((f=f&&f[d])&&f.length)for(var g=0;g<f.length;g++)if(f[g]===a){f.splice(g,1);break}f.length||delete this.g[b][d]}delete this.a[a];this.u=!1}return this};f.prototype.search=function(a,b,c){var e=[];if(a&&"object"===typeof a){c=a.callback||b;b=a.limit;var d=a.threshold;a=a.query}d||
|
||||
(d=0);"function"===typeof b?(c=b,b=1E3):b||(b=1E3);if(this.b){this.v=c;this.o=0;this.i=[];for(e=0;e<this.b;e++)this.j[e].postMessage(e,{search:!0,limit:b,threshold:d,content:a});return null}if(c){var f=this;E(function(){c(f.search(a,b));f=null},1,"search-"+this.id);return null}if(!a||"string"!==typeof a)return e;var g=a;if(!this.u)this.cache&&(this.f="",this.l.reset()),this.u=!0;else if(this.cache){var p=this.l.get(a);if(p)return p}else if(this.f&&0===a.indexOf(this.f))return e;g=this.encode(g);if(!g.length)return e;
|
||||
g=g.constructor===Array?g:g.split(G);p=g.length;var n=!0,h=[],m={};if(1<p)if(this.depth){var r=!0,t=g[0];m[t]="1"}else g.sort(y);var u;if(!r||(u=this.g[10])[t])for(var v=r?1:0;v<p;v++){var q=g[v];if(q&&!m[q]){for(var w,A=!1,x=[],B=0,z=9;z>=d;z--)if(w=r?u[t][z][q]:this.g[z][q])x[B++]=w,A=!0;if(A)h[h.length]=1<B?h.concat.apply([],x):x[0];else{n=!1;break}m[q]="1"}t=q}else n=!1;n&&(e=K(h,b));e.length?this.f="":this.f||(this.f=a);this.cache&&this.l.set(a,e);return e};f.prototype.info=function(){if(this.b)for(var a=
|
||||
0;a<this.b;a++)this.j[a].postMessage(a,{info:!0,id:this.id});else{for(var b,c,e=0,d=0,f=0,g=0;10>g;g++)for(b=Object.keys(this.g[g]),a=0;a<b.length;a++)c=this.g[g][b[a]].length,e+=c+2*b[a].length+4,d+=c,f+=2*b[a].length;b=Object.keys(this.a);c=b.length;for(a=0;a<c;a++)e+=2*b[a].length+2;return{id:this.id,memory:e,items:c,sequences:d,chars:f,status:this.u,cache:this.c.length,matcher:w.length,worker:this.b}}};f.prototype.reset=function(){this.destroy();return this.init()};f.prototype.destroy=function(){this.cache&&
|
||||
this.l.reset();this.g=this.a=this.l=null;return this};var B={icase:function(a){return a.toLowerCase()},simple:function(){var a=[d("[\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5]"),"a",d("[\u00e8\u00e9\u00ea\u00eb]"),"e",d("[\u00ec\u00ed\u00ee\u00ef]"),"i",d("[\u00f2\u00f3\u00f4\u00f5\u00f6\u0151]"),"o",d("[\u00f9\u00fa\u00fb\u00fc\u0171]"),"u",d("[\u00fd\u0177\u00ff]"),"y",d("\u00f1"),"n",d("\u00e7"),"c",d("\u00df"),"s",d("[-/]")," ",d("[^a-z0-9 ]"),""];return function(b){return v(b.toLowerCase(),a)}}(),
|
||||
advanced:function(){var a=[d("ae"),"a",d("ai"),"ei",d("ay"),"ei",d("ey"),"ei",d("oe"),"o",d("ue"),"u",d("ie"),"i",d("sz"),"s",d("zs"),"s",d("sh"),"s",d("ck"),"k",d("cc"),"k",d("dt"),"t",d("ph"),"f",d("pf"),"f",d("ou"),"o",d("uo"),"u"];return function(b,c){if(!b)return b;b=this.simple(b);2<b.length&&(b=v(b,a));c||1<b.length&&(b=x(b));return b}}(),extra:function(){var a=[d("p"),"b",d("z"),"s",d("[cgq]"),"k",d("n"),"m",d("d"),"t",d("[vw]"),"f",d("[aeiouy]"),""];return function(b){if(!b)return b;b=this.advanced(b,
|
||||
!0);if(1<b.length){b=b.split(" ");for(var c=0;c<b.length;c++){var d=b[c];1<d.length&&(b[c]=d[0]+v(d.substring(1),a))}b=b.join("");b=x(b)}return b}}()},E=function(){var a={};return function(b,c,d){var e=a[d];e&&clearTimeout(e);return a[d]=setTimeout(b,c)}}(),M=function(){function a(){this.cache={}}a.prototype.reset=function(){this.cache={}};a.prototype.set=function(a,c){this.cache[a]=c};a.prototype.get=function(a){return this.cache[a]};return a}();return f}(function(){var p={},C=!("undefined"===typeof Blob||
|
||||
"undefined"===typeof URL||!URL.createObjectURL);return function(f,d,v,q,x){var y=f;f=C?URL.createObjectURL(new Blob(["("+v.toString()+")()"],{type:"text/javascript"})):"../"+y+".js";y+="-"+d;p[y]||(p[y]=[]);p[y][x]=new Worker(f);p[y][x].onmessage=q;return{postMessage:function(d,f){p[y][d].postMessage(f)}}}}()),this);
|
||||
|
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "flexsearch",
|
||||
"version": "0.2.0",
|
||||
"description": "Superfast, lightweight and memory efficient full text search library.",
|
||||
"version": "0.2.1",
|
||||
"description": "World's fastest and most memory efficient full text search library.",
|
||||
"keywords": [],
|
||||
"bugs": {
|
||||
"url": "https://github.com/nextapps-de/flexsearch/issues",
|
||||
|
@@ -122,8 +122,8 @@ describe('Initialize', function(){
|
||||
|
||||
flexsearch_ngram = new FlexSearch({
|
||||
|
||||
encode: 'ngram',
|
||||
mode: 'strict',
|
||||
encode: 'advanced',
|
||||
mode: 'ngram',
|
||||
async: false,
|
||||
worker: false
|
||||
});
|
||||
@@ -173,7 +173,7 @@ describe('Initialize', function(){
|
||||
expect(flexsearch_forward.mode).to.equal("forward");
|
||||
expect(flexsearch_reverse.mode).to.equal("reverse");
|
||||
expect(flexsearch_full.mode).to.equal("full");
|
||||
expect(flexsearch_ngram.mode).to.equal("strict");
|
||||
expect(flexsearch_ngram.mode).to.equal("ngram");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -905,6 +905,7 @@ describe('Options', function(){
|
||||
|
||||
expect(flexsearch_ngram.length).to.equal(1);
|
||||
expect(flexsearch_ngram.search("mayer")).to.have.lengthOf(1);
|
||||
|
||||
expect(flexsearch_ngram.search("philip meier")).to.have.lengthOf(1);
|
||||
expect(flexsearch_ngram.search("philip meier")).to.include(0);
|
||||
expect(flexsearch_ngram.search("björn meier")).to.have.lengthOf(1);
|
||||
|
Reference in New Issue
Block a user