diff --git a/README.md b/README.md index 195ede2..ecc7201 100644 --- a/README.md +++ b/README.md @@ -203,7 +203,7 @@ Comparison: *** - 0.3.3 + 0.3.6 363757 182603 1627219 @@ -689,8 +689,10 @@ index.search({ When suggestion is enabled all results will be filled up (until limit, default 1000) with similar matches ordered by relevance. +Actually phonetic suggestions are not supported, for that purpose use the encoder and tokenizer which provides similar functionality. Suggestions comes into game when a query has multiple words/phrases. Assume a query contains 3 words. When the index just match 2 of 3 words then normally you will get no results, but with suggestion enabled you will also get results when 2 of 3 words was matched as well 1 of 3 words was matched (depends on the limit), also sorted by relevance. + -#### Update item of an index +#### Update item from an index > Index.__update(id, string)__ @@ -699,7 +701,7 @@ index.update(10025, "Road Runner"); ``` -#### Remove item to the index +#### Remove item from an index > Index.__remove(id)__ @@ -923,6 +925,49 @@ It is also possible to compile language packs into the build +### Right-To-Left Support + +> Set the tokenizer at least to "reverse" or "full" when using RTL. + +Just set the field "rtl" to _true_ and use a compatible tokenizer: + +```js +var index = FlexSearch.create({ + encode: "icase", + tokenize: "reverse", + rtl: true +}); +``` + + +### CJK Word Break (Chinese, Japanese, Korean) + +Set a custom tokenizer which fits your needs, e.g.: + +```js +var index = FlexSearch.create({ + encode: false, + tokenize: function(str){ + return str.split(/[\x00-\x7F]+/); + } +}); +``` + +You can also pass a custom encoder function to apply some linguistic transformations. + +```js +index.add(0, "서울시가 잠이 든 시간에 아무 말, 미뤄, 미뤄"); +``` + +```js +var results = index.search("든"); +``` + +```js +var results = index.search("시간에"); +``` + ### Get info about an index @@ -1182,6 +1227,15 @@ FlexSearch ist highly customizable. Make use of the the righ Disable or pass in language shorthand flag (ISO-3166) or a custom array. + + + rtl


+ + true
+ false + + Enables Right-To-Left encoding. +
diff --git a/dist/flexsearch.compact.js b/dist/flexsearch.compact.js index f3739f6..763c897 100644 --- a/dist/flexsearch.compact.js +++ b/dist/flexsearch.compact.js @@ -1,23 +1,23 @@ /* - FlexSearch v0.3.51 + FlexSearch v0.3.61 Copyright 2019 Nextapps GmbH Author: Thomas Wilkerling Released under the Apache 2.0 Licence https://github.com/nextapps-de/flexsearch */ -'use strict';(function(f,r,d){let k;(k=d.define)&&k.amd?k([],function(){return r}):(k=d.modules)?k[f.toLowerCase()]=r:"object"===typeof exports?module.exports=r:d[f]=r})("FlexSearch",function(){function f(b){z(b)&&(b=F[b]);b||(b=v);this.id=b.id||L++;this.init(b);r(this,"index",function(){return this.b});r(this,"length",function(){return Object.keys(this.b).length})}function r(b,a,c){Object.defineProperty(b,a,{get:c})}function d(b){return new RegExp(b,"g")}function k(b,a){for(let c=0;c=m&&(b=b[9-(e+.5>>0)],b=b[c]||(b[c]=[]),b[b.length]=g);return e}function C(b,a){if(b){const c=Object.keys(b);for(let g=0,d=c.length;gb?1:b?-1:0}function N(b,a){b=b.length-a.length;return 0>b?-1:b?1:0}function O(b,a,c){let d=[],e;const t=b.length;if(1a&&(d=d.slice(0,a)));return d}function z(b){return"string"===typeof b}function A(b){return"function"===typeof b}function B(b){return"undefined"===typeof b}function H(b){const a= -Array(b);for(let c=0;ch;c--)n=m.substring(h,c),y(t,k,n,b,a,l,p)}break;default:if(f=y(t,k,m,b,1,l,p),q&&1=p)for(f=k._ctx[m]||(k._ctx[m]=u()),m=this.c[m]||(this.c[m]=H(10-(p||0))),l=a-q,n=a+q+1,0>l&&(l=0),n>w&&(n=w);lb?1:b?-1:0}function N(b,a){b=b.length-a.length;return 0>b?-1:b?1:0}function O(b,a,c){let d=[],e;const t=b.length;if(1a&&(d=d.slice(0,a)));return d}function z(b){return"string"===typeof b}function A(b){return"function"===typeof b}function B(b){return"undefined"===typeof b}function H(b){const a= +Array(b);for(let c=0;ch;c--)n=m.substring(h,c),y(t,k,n,b,a,l,q)}break;default:if(f=y(t,k,m,b,1,l,q),r&&1=q)for(f=k._ctx[m]||(k._ctx[m]=u()),m=this.c[m]||(this.c[m]=H(10-(q||0))),l=a-r,n=a+r+1,0>l&&(l=0),n>w&&(n=w);l=c&&(this.v=this.h), -this.B&&this.v===this.h&&(this.cache&&this.o.set(a,this.l),this.B(this.l),this.l=[]));return this}function l(b,a,f){Object.defineProperty(b,a,{get:f})}function c(b){return new RegExp(b,"g")}function d(b,a){for(var f=0;f=g&&(b=b[9-(d+.5>>0)],b=b[f]||(b[f]=[]),b[b.length]=c);return d}function z(b,a){if(b)for(var f=Object.keys(b),c=0,d=f.length;cb?1:b?-1:0}function V(b,a){b=b.length-a.length;return 0>b? --1:b?1:0}function U(b,a,f){var c=[],d=b.length;if(1a&&(c=c.slice(0,a)));return c}function C(b){return"string"===typeof b}function G(b){return"function"===typeof b}function w(b){return"undefined"===typeof b}function M(b){for(var a=Array(b),f=0;f=this.f.length&&(this.u=0),this.f[this.u].postMessage({add:!0,id:b,content:a}),this.a[e]=""+this.u,f&&f(),this;if(!d){if(this.async&& -"function"!==typeof importScripts){var g=this;e=new Promise(function(f){setTimeout(function(){g.add(b,a,null,c,!0);g=null;f()})});if(f)e.then(f);else return e;return this}if(f)return this.add(b,a,null,c,!0),f(),this}a=this.encode(a);if(!a.length)return this;f=this.g;d=G(f)?f(a):a.split(N);var h=y();h._ctx=y();for(var A=this.threshold,l=this.depth,H=this.b,r=d.length,q=0;qv;t--)p=u.substring(v,t),B(H,h,p,b,w,m,A);break;default:if(n=B(H,h,u,b,1,m,A),l&&1=A)for(n=h._ctx[u]||(h._ctx[u]=y()),u=this.c[u]||(this.c[u]=M(10-(A||0))),m=q-l,p=q+l+1,0>m&&(m=0),p>r&&(p=r);md;c--)h=g[c-1],g[c]=h,e[h]= +'use strict';function K(g){var k=0;return function(){return k=c&&(this.v=this.h), +this.C&&this.v===this.h&&(this.cache&&this.o.set(a,this.l),this.C(this.l),this.l=[]));return this}function l(b,a,f){Object.defineProperty(b,a,{get:f})}function c(b){return new RegExp(b,"g")}function d(b,a){for(var f=0;f=g&&(b=b[9-(d+.5>>0)],b=b[f]||(b[f]=[]),b[b.length]=c);return d}function D(b,a){if(b)for(var f=Object.keys(b),c=0,d=f.length;cb?1:b?-1:0}function W(b,a){b=b.length-a.length;return 0>b? +-1:b?1:0}function V(b,a,f){var c=[],d=b.length;if(1a&&(c=c.slice(0,a)));return c}function C(b){return"string"===typeof b}function H(b){return"function"===typeof b}function v(b){return"undefined"===typeof b}function N(b){for(var a=Array(b),f=0;f=this.f.length&&(this.u=0),this.f[this.u].postMessage({add:!0,id:b,content:a}),this.a[e]=""+this.u,f&&f(),this;if(!d){if(this.async&& +"function"!==typeof importScripts){var g=this;e=new Promise(function(f){setTimeout(function(){g.add(b,a,null,c,!0);g=null;f()})});if(f)e.then(f);else return e;return this}if(f)return this.add(b,a,null,c,!0),f(),this}a=this.encode(a);if(!a.length)return this;f=this.g;d=H(f)?f(a):a.split(O);var h=z();h._ctx=z();for(var B=this.threshold,l=this.depth,I=this.b,q=d.length,x=this.B,u=0;ut;r--)w=p.substring(t,r),A(I,h,w,b,v,n,B);break;default:if(m=A(I,h,p,b,1,n,B),l&&1=B)for(m=h._ctx[p]||(h._ctx[p]=z()),p=this.c[p]||(this.c[p]=N(10-(B||0))),n=u-l,w=u+l+1,0>n&&(n=0),w>q&&(w=q);nd;c--)h=g[c-1],g[c]=h,e[h]= c;g[d]=a;e[a]=d}}}return b};return b}();return e}(function(){var g={},k="undefined"!==typeof Blob&&"undefined"!==typeof URL&&URL.createObjectURL;return function(e,h,l,c,d){l=k?URL.createObjectURL(new Blob(["("+l.toString()+")()"],{type:"text/javascript"})):e+".es5.js";e+="-"+h;g[e]||(g[e]=[]);g[e][d]=new Worker(l);g[e][d].onmessage=c;console.log("Register Worker: "+e+"@"+d);return g[e][d]}}()),this); diff --git a/dist/flexsearch.light.js b/dist/flexsearch.light.js index 8579896..d07ebcc 100644 --- a/dist/flexsearch.light.js +++ b/dist/flexsearch.light.js @@ -1,18 +1,18 @@ /* - FlexSearch v0.3.51 + FlexSearch v0.3.61 Copyright 2019 Nextapps GmbH Author: Thomas Wilkerling Released under the Apache 2.0 Licence https://github.com/nextapps-de/flexsearch */ -'use strict';(function(e,u,v){let m;(m=v.define)&&m.amd?m([],function(){return u}):(m=v.modules)?m[e.toLowerCase()]=u:"object"===typeof exports?module.exports=u:v[e]=u})("FlexSearch",function(){function e(a){a||(a=x);this.id=a.id||G++;this.init(a);u(this,"index",function(){return this.a});u(this,"length",function(){return Object.keys(this.a).length})}function u(a,b,c){Object.defineProperty(a,b,{get:c})}function v(a,b){for(let c=0;c=g&&(a=a[9-(d+.5>>0)],a=a[c]||(a[c]=[]),a[a.length]=f);return d}function z(a,b){if(a){const c=Object.keys(a);for(let f=0,d=c.length;fa?1:a?-1:0}function I(a,b){a=a.length-b.length;return 0>a?-1:a?1:0}function y(a){return"function"===typeof a} -function A(a){return"undefined"===typeof a}function C(a){const b=Array(a);for(let c=0;ch;c--)l=n.substring(h,c),m(q,f,l,a,b,k,d)}break;default:if(g=m(q,f,n,a,1,k,d),r&&1=d)for(g=f._ctx[n]||(f._ctx[n]=t()),n=this.c[n]||(this.c[n]=C(10-(d||0))),k=e-r,l=e+r+1,0>k&&(k=0),l>p&&(l=p);kb&&(g=g.slice(0,b)));e=g}return e};e.prototype.clear=function(){return this.destroy().init()};e.prototype.destroy=function(){this.g=this.c=this.a=null;return this};const w={icase:function(a){return a.toLowerCase()}}; -return e}(!1),this); +'use strict';(function(e,l,u){let n;(n=u.define)&&n.amd?n([],function(){return l}):(n=u.modules)?n[e.toLowerCase()]=l:"object"===typeof exports?module.exports=l:u[e]=l})("FlexSearch",function(){function e(a){a||(a=v);this.id=a.id||G++;this.init(a);l(this,"index",function(){return this.a});l(this,"length",function(){return Object.keys(this.a).length})}function l(a,b,c){Object.defineProperty(a,b,{get:c})}function u(a,b){for(let c=0;c=g&&(a=a[9-(d+.5>>0)],a=a[c]||(a[c]=[]),a[a.length]=f);return d}function y(a,b){if(a){const c=Object.keys(a);for(let f=0,d=c.length;fa?1:a?-1:0}function I(a,b){a=a.length-b.length;return 0>a?-1:a?1:0}function x(a){return"function"===typeof a} +function z(a){return"undefined"===typeof a}function C(a){const b=Array(a);for(let c=0;ch;c--)m=p.substring(h,c),n(r,f,m,a,b,k,d)}break;default:if(g=n(r,f,p,a,1,k,d),B&&1=d)for(g=f._ctx[p]||(f._ctx[p]=t()),p=this.c[p]||(this.c[p]=C(10-(d||0))),k=e-B,m=e+B+1,0>k&&(k=0),m>q&&(m=q);kb&&(g=g.slice(0,b)));e=g}return e};e.prototype.clear=function(){return this.destroy().init()};e.prototype.destroy=function(){this.g=this.c= +this.a=null;return this};const w={icase:function(a){return a.toLowerCase()}};return e}(!1),this); diff --git a/dist/flexsearch.min.js b/dist/flexsearch.min.js index f123fca..99fc966 100644 --- a/dist/flexsearch.min.js +++ b/dist/flexsearch.min.js @@ -1,30 +1,30 @@ /* - FlexSearch v0.3.51 + FlexSearch v0.3.61 Copyright 2019 Nextapps GmbH Author: Thomas Wilkerling Released under the Apache 2.0 Licence https://github.com/nextapps-de/flexsearch */ -'use strict';(function(u,z,h){let v;(v=h.define)&&v.amd?v([],function(){return z}):(v=h.modules)?v[u.toLowerCase()]=z:"object"===typeof exports?module.exports=z:h[u]=z})("FlexSearch",function P(u){function h(b){D(b)&&(b=J[b]);b||(b=w);this.id=b.id||Q++;this.init(b);A(this,"index",function(){return this.a});A(this,"length",function(){return Object.keys(this.a).length})}function v(b,a,c,d){this.m!==this.c&&(this.g=this.g.concat(c),this.m++,d&&this.g.length>=d&&(this.m=this.c),this.A&&this.m===this.c&& -(this.cache&&this.j.set(a,this.g),this.A(this.g),this.g=[]));return this}function A(b,a,c){Object.defineProperty(b,a,{get:c})}function f(b){return new RegExp(b,"g")}function m(b,a){for(let c=0;c=g&&(b=b[9-(e+.5>>0)],b=b[c]||(b[c]=[]),b[b.length]=d);return e}function H(b,a){if(b){const c=Object.keys(b);for(let d=0,e=c.length;db?1:b?-1:0}function S(b,a){b=b.length-a.length;return 0>b?-1:b? -1:0}function T(b,a,c){let d=[],e;const l=b.length;if(1a&&(d=d.slice(0,a)));return d}function D(b){return"string"===typeof b}function F(b){return"function"===typeof b}function x(b){return"undefined"===typeof b}function L(b){const a=Array(b);for(let c=0;c=d&&(this.m=this.c),this.B&&this.m===this.c&& +(this.cache&&this.j.set(a,this.g),this.B(this.g),this.g=[]));return this}function B(b,a,c){Object.defineProperty(b,a,{get:c})}function f(b){return new RegExp(b,"g")}function m(b,a){for(let c=0;c=g&&(b=b[9-(e+.5>>0)],b=b[c]||(b[c]=[]),b[b.length]=d);return e}function I(b,a){if(b){const c=Object.keys(b);for(let d=0,e=c.length;db?1:b?-1:0}function S(b,a){b=b.length-a.length;return 0>b?-1:b? +1:0}function T(b,a,c){let d=[],e;const l=b.length;if(1a&&(d=d.slice(0,a)));return d}function E(b){return"string"===typeof b}function G(b){return"function"===typeof b}function x(b){return"undefined"===typeof b}function L(b){const a=Array(b);for(let c=0;c=this.i.length&&(this.w=0),this.i[this.w].postMessage({add:!0,id:b,content:a}),this.a[f]=""+this.w,c&&c(),this;if(!e){if(this.async&&"function"!==typeof importScripts){let e= -this;f=new Promise(function(c){setTimeout(function(){e.add(b,a,null,d,!0);e=null;c()})});if(c)f.then(c);else return f;return this}if(c)return this.add(b,a,null,d,!0),c(),this}a=this.encode(a);if(!a.length)return this;c=this.b;e=F(c)?c(a):a.split(M);const l=r();l._ctx=r();const q=this.threshold,t=this.depth,E=this.f,m=e.length;for(let a=0;ak;c--)n=g.substring(k,c),C(E,l,n,b,a,p,q)}break;default:if(h=C(E,l,g,b,1,p,q),t&&1=q)for(h=l._ctx[g]||(l._ctx[g]=r()),g=this.h[g]||(this.h[g]=L(10-(q||0))),p=a-t,n=a+t+1,0>p&&(p=0),n>m&&(n=m);pc;d--)e= -f[d-1],f[d]=e,b[e]=d;f[c]=a;b[a]=c}}}return b};return b}();return h}(function(){const u={},z="undefined"!==typeof Blob&&"undefined"!==typeof URL&&URL.createObjectURL;return function(h,v,A,f,m){A=z?URL.createObjectURL(new Blob(["("+A.toString()+")()"],{type:"text/javascript"})):h+".min.js";h+="-"+v;u[h]||(u[h]=[]);u[h][m]=new Worker(A);u[h][m].onmessage=f;return u[h][m]}}()),this); +options:c,id:a});return b}const v={encode:"icase",b:"forward",u:!1,cache:!1,async:!1,c:!1,A:!1,threshold:0,depth:0},K={memory:{encode:"extra",b:"strict",threshold:7},speed:{encode:"icase",b:"strict",threshold:7,depth:2},match:{encode:"extra",b:"full"},score:{encode:"extra",b:"strict",threshold:5,depth:4},balance:{encode:"balance",b:"strict",threshold:6,depth:3},fastest:{encode:"icase",b:"strict",threshold:9,depth:1}},H=[];let Q=0;const M=f("\\W+"),N={},O={};h.create=function(b){return new h(b)};h.registerMatcher= +function(b){for(const a in b)b.hasOwnProperty(a)&&H.push(f(a),b[a]);return this};h.registerEncoder=function(b,a){C[b]=a.bind(C);return this};h.registerLanguage=function(b,a){N[b]=a.filter;O[b]=a.stemmer;return this};h.encode=function(b,a){return C[b](a)};h.prototype.init=function(b){this.o=[];b||(b=v);var a=b.preset,c=a?K[a]:{};if(a=b.worker)if("undefined"===typeof Worker)b.worker=!1,this.i=null;else{var d=parseInt(a,10)||4;this.w=-1;this.m=0;this.g=[];this.B=null;this.i=Array(d);for(var e=0;e=this.i.length&&(this.w=0),this.i[this.w].postMessage({add:!0,id:b,content:a}),this.a[f]=""+this.w,c&&c(),this;if(!e){if(this.async&&"function"!== +typeof importScripts){let e=this;f=new Promise(function(c){setTimeout(function(){e.add(b,a,null,d,!0);e=null;c()})});if(c)f.then(c);else return f;return this}if(c)return this.add(b,a,null,d,!0),c(),this}a=this.encode(a);if(!a.length)return this;c=this.b;e=G(c)?c(a):a.split(M);const l=r();l._ctx=r();const q=this.threshold,t=this.depth,F=this.f,m=e.length,y=this.A;for(let a=0;ak;c--)n=g.substring(k,c),D(F,l,n,b,a,p,q)}break;default:if(h=D(F,l,g,b,1,p,q),t&&1=q)for(h=l._ctx[g]||(l._ctx[g]=r()),g=this.h[g]||(this.h[g]=L(10-(q||0))),p=a-t,n=a+t+1,0>p&&(p=0),n>m&&(n=m);pc;d--)e=f[d-1],f[d]=e,b[e]=d;f[c]=a;b[a]=c}}}return b};return b}();return h}(function(){const u={},A="undefined"!==typeof Blob&&"undefined"!==typeof URL&&URL.createObjectURL;return function(h,w,B,f,m){B=A?URL.createObjectURL(new Blob(["("+B.toString()+")()"],{type:"text/javascript"})):h+".min.js";h+="-"+w;u[h]||(u[h]=[]);u[h][m]=new Worker(B);u[h][m].onmessage=f;return u[h][m]}}()),this); diff --git a/dist/flexsearch.node.js b/dist/flexsearch.node.js index 73535b5..6eb1636 100644 --- a/dist/flexsearch.node.js +++ b/dist/flexsearch.node.js @@ -1,26 +1,27 @@ /* - FlexSearch v0.3.51 + FlexSearch v0.3.61 Copyright 2019 Nextapps GmbH Author: Thomas Wilkerling Released under the Apache 2.0 Licence https://github.com/nextapps-de/flexsearch */ -'use strict';(function(g,x,d){let m;(m=d.define)&&m.amd?m([],function(){return x}):(m=d.modules)?m[g.toLowerCase()]=x:"object"===typeof exports?module.exports=x:d[g]=x})("FlexSearch",function(){function g(b){B(b)&&(b=G[b]);b||(b=w);this.id=b.id||L++;this.init(b);x(this,"index",function(){return this.b});x(this,"length",function(){return Object.keys(this.b).length})}function x(b,a,c){Object.defineProperty(b,a,{get:c})}function d(b){return new RegExp(b,"g")}function m(b,a){for(let c=0;c=h&&(b=b[9-(e+.5>>0)],b=b[c]||(b[c]=[]),b[b.length]=f);return e}function E(b,a){if(b){const c=Object.keys(b);for(let f=0,e=c.length;fb?1:b?-1:0}function N(b,a){b=b.length-a.length;return 0>b?-1:b?1:0}function O(b,a,c){let f=[],e;const d=b.length;if(1a&&(f=f.slice(0,a)));return f}function B(b){return"string"===typeof b}function C(b){return"function"===typeof b}function y(b){return"undefined"===typeof b}function H(b){const a= -Array(b);for(let c=0;ck;c--)q=h.substring(k,c),A(m,n,q,b,a,l,p)}break;default:if(g=A(m,n,h,b,1,l,p),u&&1=p)for(g=n._ctx[h]||(n._ctx[h]=t()),h=this.f[h]||(this.f[h]=H(10-(p||0))),l=a-u,q=a+u+1,0>l&&(l=0),q>v&&(q=v);lc;d--)e=f[d-1],f[d]=e,b[e]=d;f[c]=a;b[a]=c}}}return b};return b}();return g}(!1),this); +d){if(f="a"===f||"e"===f||"i"===f||"o"===f||"u"===f||"y"===f,("a"===c||"e"===c||"i"===c||"o"===c||"u"===c||"y"===c)&&f||" "===c)a+=d}else a+=d;f=e===b.length-1?"":b[e+1];c=d}return a}function M(b,a){b=b.length-a.length;return 0>b?1:b?-1:0}function N(b,a){b=b.length-a.length;return 0>b?-1:b?1:0}function O(b,a,c){let f=[],e;const d=b.length;if(1a&&(f=f.slice(0,a)));return f}function B(b){return"string"===typeof b}function C(b){return"function"===typeof b}function y(b){return"undefined"===typeof b}function H(b){const a= +Array(b);for(let c=0;ck;c--)r=h.substring(k,c),A(m,p,r,b,a,l,q)}break;default:if(g=A(m,p,h,b,1,l,q),u&&1=q)for(g=p._ctx[h]||(p._ctx[h]=t()),h=this.f[h]||(this.f[h]=H(10- +(q||0))),l=a-u,r=a+u+1,0>l&&(l=0),r>v&&(r=v);lc;d--)e=f[d-1],f[d]=e,b[e]=d;f[c]=a;b[a]=c}}}return b}; +return b}();return g}(!1),this); diff --git a/flexsearch.js b/flexsearch.js index 435fff2..56bb6f6 100644 --- a/flexsearch.js +++ b/flexsearch.js @@ -1,5 +1,5 @@ /**! - * @preserve FlexSearch v0.3.51 + * @preserve FlexSearch v0.3.61 * Copyright 2019 Nextapps GmbH * Author: Thomas Wilkerling * Released under the Apache 2.0 Licence @@ -38,6 +38,7 @@ cache: false, async: false, worker: false, + rtl: false, // minimum scoring (0 - 9) threshold: 0, @@ -377,6 +378,14 @@ defaults.tokenize ); + /** @private */ + this.rtl = ( + + options["rtl"] || + this.rtl || + defaults.rtl + ); + if(SUPPORT_ASYNC) /** @private */ this.async = ( (typeof Promise === "undefined") || is_undefined(custom = options["async"]) ? @@ -620,6 +629,8 @@ if(content && is_string(content) && ((id /*&& !index_blacklist[id]*/) || (id === 0))){ + // TODO: do not mix ids as string "1" and as number 1 + const index = "@" + id; if(this._ids[index] && !_skip_update){ @@ -733,6 +744,7 @@ const depth = this.depth; const map = this._map; const word_length = words.length; + const rtl = this.rtl; // tokenize @@ -744,32 +756,34 @@ if(value){ const length = value.length; - const context_score = (word_length - i) / word_length; + const context_score = (rtl ? i + 1 : word_length - i) / word_length; - let tmp = ""; + let token = ""; switch(tokenizer){ case "reverse": case "both": - for(let a = length - 1; a >= 1; a--){ + // NOTE: skip last round (this token exist already in "forward") - tmp = value[a] + tmp; + for(let a = length; --a;){ + + token = value[a] + token; add_index( map, dupes, - tmp, + token, id, - (length - a) / length, + rtl ? 1 : (length - a) / length, context_score, threshold ); } - tmp = ""; + token = ""; // Note: no break here, fallthrough to next case @@ -777,15 +791,15 @@ for(let a = 0; a < length; a++){ - tmp += value[a]; + token += value[a]; add_index( map, dupes, - tmp, + token, id, - 1, + rtl ? (a + 1) / length : 1, context_score, threshold ); @@ -797,17 +811,17 @@ for(let x = 0; x < length; x++){ - const partial_score = (length - x) / length; + const partial_score = (rtl ? x + 1 : length - x) / length; for(let y = length; y > x; y--){ - tmp = value.substring(x, y); + token = value.substring(x, y); add_index( map, dupes, - tmp, + token, id, partial_score, context_score, @@ -1935,7 +1949,9 @@ partial_score ? - ((9 - (threshold || 6)) * context_score) + ((threshold || 6) * partial_score) + (9 - (threshold || 6)) * context_score + (threshold || 6) * partial_score + //(9 - (threshold || 4.5)) * context_score + (threshold || 4.5) * partial_score + //4.5 * (context_score + partial_score) : context_score ); diff --git a/package.json b/package.json index 7d80816..1993364 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "flexsearch", - "version": "0.3.51", + "version": "0.3.61", "description": "Next-Generation full text search library with zero dependencies.", "homepage": "https://github.com/nextapps-de/flexsearch/", "author": "Thomas Wilkerling", diff --git a/test/test.js b/test/test.js index a7d3af4..d8dcc07 100644 --- a/test/test.js +++ b/test/test.js @@ -861,6 +861,54 @@ describe("Encoding", function(){ }); }); + +// ------------------------------------------------------------------------ +// CJK Word Break +// ------------------------------------------------------------------------ + +describe("CJK Word Break", function(){ + + it("Should have been tokenized properly", function(){ + + var index = FlexSearch.create({ + encode: false, + tokenize: function(str){ + return str.split(/[\x00-\x7F]+/); + } + }); + + index.add(0, "서울시가 잠이 든 시간에 아무 말, 미뤄, 미뤄"); + + expect(index.search("든")).to.include(0); + expect(index.search("시간에")).to.include(0); + }); +}); + +// ------------------------------------------------------------------------ +// Right-To-Left +// ------------------------------------------------------------------------ + +describe("RTL Support", function(){ + + it("Should have been scored properly", function(){ + + var index = new FlexSearch({ + + encode: "icase", + tokenize: "reverse", + rtl: true + }); + + index.add(0, "54321 4 3 2 1 0"); + index.add(1, "0 1 2 3 4 54321"); + index.add(2, "0 1 2 3 4 12345"); + + expect(index.search("5")[0]).to.equal(2); + expect(index.search("5")[1]).to.equal(1); + expect(index.search("5")[2]).to.equal(0); + }); +}); + // ------------------------------------------------------------------------ // Contextual Indexing // ------------------------------------------------------------------------