h;c--)l=n.substring(h,c),m(q,f,l,a,b,k,d)}break;default:if(g=m(q,f,n,a,1,k,d),r&&1=d)for(g=f._ctx[n]||(f._ctx[n]=t()),n=this.c[n]||(this.c[n]=C(10-(d||0))),k=e-r,l=e+r+1,0>k&&(k=0),l>p&&(l=p);kb&&(g=g.slice(0,b)));e=g}return e};e.prototype.clear=function(){return this.destroy().init()};e.prototype.destroy=function(){this.g=this.c=this.a=null;return this};const w={icase:function(a){return a.toLowerCase()}};
-return e}(!1),this);
+'use strict';(function(e,l,u){let n;(n=u.define)&&n.amd?n([],function(){return l}):(n=u.modules)?n[e.toLowerCase()]=l:"object"===typeof exports?module.exports=l:u[e]=l})("FlexSearch",function(){function e(a){a||(a=v);this.id=a.id||G++;this.init(a);l(this,"index",function(){return this.a});l(this,"length",function(){return Object.keys(this.a).length})}function l(a,b,c){Object.defineProperty(a,b,{get:c})}function u(a,b){for(let c=0;c=g&&(a=a[9-(d+.5>>0)],a=a[c]||(a[c]=[]),a[a.length]=f);return d}function y(a,b){if(a){const c=Object.keys(a);for(let f=0,d=c.length;fa?1:a?-1:0}function I(a,b){a=a.length-b.length;return 0>a?-1:a?1:0}function x(a){return"function"===typeof a}
+function z(a){return"undefined"===typeof a}function C(a){const b=Array(a);for(let c=0;ch;c--)m=p.substring(h,c),n(r,f,m,a,b,k,d)}break;default:if(g=n(r,f,p,a,1,k,d),B&&1=d)for(g=f._ctx[p]||(f._ctx[p]=t()),p=this.c[p]||(this.c[p]=C(10-(d||0))),k=e-B,m=e+B+1,0>k&&(k=0),m>q&&(m=q);kb&&(g=g.slice(0,b)));e=g}return e};e.prototype.clear=function(){return this.destroy().init()};e.prototype.destroy=function(){this.g=this.c=
+this.a=null;return this};const w={icase:function(a){return a.toLowerCase()}};return e}(!1),this);
diff --git a/dist/flexsearch.min.js b/dist/flexsearch.min.js
index f123fca..99fc966 100644
--- a/dist/flexsearch.min.js
+++ b/dist/flexsearch.min.js
@@ -1,30 +1,30 @@
/*
- FlexSearch v0.3.51
+ FlexSearch v0.3.61
Copyright 2019 Nextapps GmbH
Author: Thomas Wilkerling
Released under the Apache 2.0 Licence
https://github.com/nextapps-de/flexsearch
*/
-'use strict';(function(u,z,h){let v;(v=h.define)&&v.amd?v([],function(){return z}):(v=h.modules)?v[u.toLowerCase()]=z:"object"===typeof exports?module.exports=z:h[u]=z})("FlexSearch",function P(u){function h(b){D(b)&&(b=J[b]);b||(b=w);this.id=b.id||Q++;this.init(b);A(this,"index",function(){return this.a});A(this,"length",function(){return Object.keys(this.a).length})}function v(b,a,c,d){this.m!==this.c&&(this.g=this.g.concat(c),this.m++,d&&this.g.length>=d&&(this.m=this.c),this.A&&this.m===this.c&&
-(this.cache&&this.j.set(a,this.g),this.A(this.g),this.g=[]));return this}function A(b,a,c){Object.defineProperty(b,a,{get:c})}function f(b){return new RegExp(b,"g")}function m(b,a){for(let c=0;c=g&&(b=b[9-(e+.5>>0)],b=b[c]||(b[c]=[]),b[b.length]=d);return e}function H(b,a){if(b){const c=Object.keys(b);for(let d=0,e=c.length;db?1:b?-1:0}function S(b,a){b=b.length-a.length;return 0>b?-1:b?
-1:0}function T(b,a,c){let d=[],e;const l=b.length;if(1a&&(d=d.slice(0,a)));return d}function D(b){return"string"===typeof b}function F(b){return"function"===typeof b}function x(b){return"undefined"===typeof b}function L(b){const a=Array(b);for(let c=0;c=d&&(this.m=this.c),this.B&&this.m===this.c&&
+(this.cache&&this.j.set(a,this.g),this.B(this.g),this.g=[]));return this}function B(b,a,c){Object.defineProperty(b,a,{get:c})}function f(b){return new RegExp(b,"g")}function m(b,a){for(let c=0;c=g&&(b=b[9-(e+.5>>0)],b=b[c]||(b[c]=[]),b[b.length]=d);return e}function I(b,a){if(b){const c=Object.keys(b);for(let d=0,e=c.length;db?1:b?-1:0}function S(b,a){b=b.length-a.length;return 0>b?-1:b?
+1:0}function T(b,a,c){let d=[],e;const l=b.length;if(1a&&(d=d.slice(0,a)));return d}function E(b){return"string"===typeof b}function G(b){return"function"===typeof b}function x(b){return"undefined"===typeof b}function L(b){const a=Array(b);for(let c=0;c=this.i.length&&(this.w=0),this.i[this.w].postMessage({add:!0,id:b,content:a}),this.a[f]=""+this.w,c&&c(),this;if(!e){if(this.async&&"function"!==typeof importScripts){let e=
-this;f=new Promise(function(c){setTimeout(function(){e.add(b,a,null,d,!0);e=null;c()})});if(c)f.then(c);else return f;return this}if(c)return this.add(b,a,null,d,!0),c(),this}a=this.encode(a);if(!a.length)return this;c=this.b;e=F(c)?c(a):a.split(M);const l=r();l._ctx=r();const q=this.threshold,t=this.depth,E=this.f,m=e.length;for(let a=0;ak;c--)n=g.substring(k,c),C(E,l,n,b,a,p,q)}break;default:if(h=C(E,l,g,b,1,p,q),t&&1=q)for(h=l._ctx[g]||(l._ctx[g]=r()),g=this.h[g]||(this.h[g]=L(10-(q||0))),p=a-t,n=a+t+1,0>p&&(p=0),n>m&&(n=m);pc;d--)e=
-f[d-1],f[d]=e,b[e]=d;f[c]=a;b[a]=c}}}return b};return b}();return h}(function(){const u={},z="undefined"!==typeof Blob&&"undefined"!==typeof URL&&URL.createObjectURL;return function(h,v,A,f,m){A=z?URL.createObjectURL(new Blob(["("+A.toString()+")()"],{type:"text/javascript"})):h+".min.js";h+="-"+v;u[h]||(u[h]=[]);u[h][m]=new Worker(A);u[h][m].onmessage=f;return u[h][m]}}()),this);
+options:c,id:a});return b}const v={encode:"icase",b:"forward",u:!1,cache:!1,async:!1,c:!1,A:!1,threshold:0,depth:0},K={memory:{encode:"extra",b:"strict",threshold:7},speed:{encode:"icase",b:"strict",threshold:7,depth:2},match:{encode:"extra",b:"full"},score:{encode:"extra",b:"strict",threshold:5,depth:4},balance:{encode:"balance",b:"strict",threshold:6,depth:3},fastest:{encode:"icase",b:"strict",threshold:9,depth:1}},H=[];let Q=0;const M=f("\\W+"),N={},O={};h.create=function(b){return new h(b)};h.registerMatcher=
+function(b){for(const a in b)b.hasOwnProperty(a)&&H.push(f(a),b[a]);return this};h.registerEncoder=function(b,a){C[b]=a.bind(C);return this};h.registerLanguage=function(b,a){N[b]=a.filter;O[b]=a.stemmer;return this};h.encode=function(b,a){return C[b](a)};h.prototype.init=function(b){this.o=[];b||(b=v);var a=b.preset,c=a?K[a]:{};if(a=b.worker)if("undefined"===typeof Worker)b.worker=!1,this.i=null;else{var d=parseInt(a,10)||4;this.w=-1;this.m=0;this.g=[];this.B=null;this.i=Array(d);for(var e=0;e=this.i.length&&(this.w=0),this.i[this.w].postMessage({add:!0,id:b,content:a}),this.a[f]=""+this.w,c&&c(),this;if(!e){if(this.async&&"function"!==
+typeof importScripts){let e=this;f=new Promise(function(c){setTimeout(function(){e.add(b,a,null,d,!0);e=null;c()})});if(c)f.then(c);else return f;return this}if(c)return this.add(b,a,null,d,!0),c(),this}a=this.encode(a);if(!a.length)return this;c=this.b;e=G(c)?c(a):a.split(M);const l=r();l._ctx=r();const q=this.threshold,t=this.depth,F=this.f,m=e.length,y=this.A;for(let a=0;ak;c--)n=g.substring(k,c),D(F,l,n,b,a,p,q)}break;default:if(h=D(F,l,g,b,1,p,q),t&&1=q)for(h=l._ctx[g]||(l._ctx[g]=r()),g=this.h[g]||(this.h[g]=L(10-(q||0))),p=a-t,n=a+t+1,0>p&&(p=0),n>m&&(n=m);pc;d--)e=f[d-1],f[d]=e,b[e]=d;f[c]=a;b[a]=c}}}return b};return b}();return h}(function(){const u={},A="undefined"!==typeof Blob&&"undefined"!==typeof URL&&URL.createObjectURL;return function(h,w,B,f,m){B=A?URL.createObjectURL(new Blob(["("+B.toString()+")()"],{type:"text/javascript"})):h+".min.js";h+="-"+w;u[h]||(u[h]=[]);u[h][m]=new Worker(B);u[h][m].onmessage=f;return u[h][m]}}()),this);
diff --git a/dist/flexsearch.node.js b/dist/flexsearch.node.js
index 73535b5..6eb1636 100644
--- a/dist/flexsearch.node.js
+++ b/dist/flexsearch.node.js
@@ -1,26 +1,27 @@
/*
- FlexSearch v0.3.51
+ FlexSearch v0.3.61
Copyright 2019 Nextapps GmbH
Author: Thomas Wilkerling
Released under the Apache 2.0 Licence
https://github.com/nextapps-de/flexsearch
*/
-'use strict';(function(g,x,d){let m;(m=d.define)&&m.amd?m([],function(){return x}):(m=d.modules)?m[g.toLowerCase()]=x:"object"===typeof exports?module.exports=x:d[g]=x})("FlexSearch",function(){function g(b){B(b)&&(b=G[b]);b||(b=w);this.id=b.id||L++;this.init(b);x(this,"index",function(){return this.b});x(this,"length",function(){return Object.keys(this.b).length})}function x(b,a,c){Object.defineProperty(b,a,{get:c})}function d(b){return new RegExp(b,"g")}function m(b,a){for(let c=0;c=h&&(b=b[9-(e+.5>>0)],b=b[c]||(b[c]=[]),b[b.length]=f);return e}function E(b,a){if(b){const c=Object.keys(b);for(let f=0,e=c.length;fb?1:b?-1:0}function N(b,a){b=b.length-a.length;return 0>b?-1:b?1:0}function O(b,a,c){let f=[],e;const d=b.length;if(1a&&(f=f.slice(0,a)));return f}function B(b){return"string"===typeof b}function C(b){return"function"===typeof b}function y(b){return"undefined"===typeof b}function H(b){const a=
-Array(b);for(let c=0;ck;c--)q=h.substring(k,c),A(m,n,q,b,a,l,p)}break;default:if(g=A(m,n,h,b,1,l,p),u&&1=p)for(g=n._ctx[h]||(n._ctx[h]=t()),h=this.f[h]||(this.f[h]=H(10-(p||0))),l=a-u,q=a+u+1,0>l&&(l=0),q>v&&(q=v);lc;d--)e=f[d-1],f[d]=e,b[e]=d;f[c]=a;b[a]=c}}}return b};return b}();return g}(!1),this);
+d){if(f="a"===f||"e"===f||"i"===f||"o"===f||"u"===f||"y"===f,("a"===c||"e"===c||"i"===c||"o"===c||"u"===c||"y"===c)&&f||" "===c)a+=d}else a+=d;f=e===b.length-1?"":b[e+1];c=d}return a}function M(b,a){b=b.length-a.length;return 0>b?1:b?-1:0}function N(b,a){b=b.length-a.length;return 0>b?-1:b?1:0}function O(b,a,c){let f=[],e;const d=b.length;if(1a&&(f=f.slice(0,a)));return f}function B(b){return"string"===typeof b}function C(b){return"function"===typeof b}function y(b){return"undefined"===typeof b}function H(b){const a=
+Array(b);for(let c=0;ck;c--)r=h.substring(k,c),A(m,p,r,b,a,l,q)}break;default:if(g=A(m,p,h,b,1,l,q),u&&1=q)for(g=p._ctx[h]||(p._ctx[h]=t()),h=this.f[h]||(this.f[h]=H(10-
+(q||0))),l=a-u,r=a+u+1,0>l&&(l=0),r>v&&(r=v);lc;d--)e=f[d-1],f[d]=e,b[e]=d;f[c]=a;b[a]=c}}}return b};
+return b}();return g}(!1),this);
diff --git a/flexsearch.js b/flexsearch.js
index 435fff2..56bb6f6 100644
--- a/flexsearch.js
+++ b/flexsearch.js
@@ -1,5 +1,5 @@
/**!
- * @preserve FlexSearch v0.3.51
+ * @preserve FlexSearch v0.3.61
* Copyright 2019 Nextapps GmbH
* Author: Thomas Wilkerling
* Released under the Apache 2.0 Licence
@@ -38,6 +38,7 @@
cache: false,
async: false,
worker: false,
+ rtl: false,
// minimum scoring (0 - 9)
threshold: 0,
@@ -377,6 +378,14 @@
defaults.tokenize
);
+ /** @private */
+ this.rtl = (
+
+ options["rtl"] ||
+ this.rtl ||
+ defaults.rtl
+ );
+
if(SUPPORT_ASYNC) /** @private */ this.async = (
(typeof Promise === "undefined") || is_undefined(custom = options["async"]) ?
@@ -620,6 +629,8 @@
if(content && is_string(content) && ((id /*&& !index_blacklist[id]*/) || (id === 0))){
+ // TODO: do not mix ids as string "1" and as number 1
+
const index = "@" + id;
if(this._ids[index] && !_skip_update){
@@ -733,6 +744,7 @@
const depth = this.depth;
const map = this._map;
const word_length = words.length;
+ const rtl = this.rtl;
// tokenize
@@ -744,32 +756,34 @@
if(value){
const length = value.length;
- const context_score = (word_length - i) / word_length;
+ const context_score = (rtl ? i + 1 : word_length - i) / word_length;
- let tmp = "";
+ let token = "";
switch(tokenizer){
case "reverse":
case "both":
- for(let a = length - 1; a >= 1; a--){
+ // NOTE: skip last round (this token exist already in "forward")
- tmp = value[a] + tmp;
+ for(let a = length; --a;){
+
+ token = value[a] + token;
add_index(
map,
dupes,
- tmp,
+ token,
id,
- (length - a) / length,
+ rtl ? 1 : (length - a) / length,
context_score,
threshold
);
}
- tmp = "";
+ token = "";
// Note: no break here, fallthrough to next case
@@ -777,15 +791,15 @@
for(let a = 0; a < length; a++){
- tmp += value[a];
+ token += value[a];
add_index(
map,
dupes,
- tmp,
+ token,
id,
- 1,
+ rtl ? (a + 1) / length : 1,
context_score,
threshold
);
@@ -797,17 +811,17 @@
for(let x = 0; x < length; x++){
- const partial_score = (length - x) / length;
+ const partial_score = (rtl ? x + 1 : length - x) / length;
for(let y = length; y > x; y--){
- tmp = value.substring(x, y);
+ token = value.substring(x, y);
add_index(
map,
dupes,
- tmp,
+ token,
id,
partial_score,
context_score,
@@ -1935,7 +1949,9 @@
partial_score ?
- ((9 - (threshold || 6)) * context_score) + ((threshold || 6) * partial_score)
+ (9 - (threshold || 6)) * context_score + (threshold || 6) * partial_score
+ //(9 - (threshold || 4.5)) * context_score + (threshold || 4.5) * partial_score
+ //4.5 * (context_score + partial_score)
:
context_score
);
diff --git a/package.json b/package.json
index 7d80816..1993364 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "flexsearch",
- "version": "0.3.51",
+ "version": "0.3.61",
"description": "Next-Generation full text search library with zero dependencies.",
"homepage": "https://github.com/nextapps-de/flexsearch/",
"author": "Thomas Wilkerling",
diff --git a/test/test.js b/test/test.js
index a7d3af4..d8dcc07 100644
--- a/test/test.js
+++ b/test/test.js
@@ -861,6 +861,54 @@ describe("Encoding", function(){
});
});
+
+// ------------------------------------------------------------------------
+// CJK Word Break
+// ------------------------------------------------------------------------
+
+describe("CJK Word Break", function(){
+
+ it("Should have been tokenized properly", function(){
+
+ var index = FlexSearch.create({
+ encode: false,
+ tokenize: function(str){
+ return str.split(/[\x00-\x7F]+/);
+ }
+ });
+
+ index.add(0, "서울시가 잠이 든 시간에 아무 말, 미뤄, 미뤄");
+
+ expect(index.search("든")).to.include(0);
+ expect(index.search("시간에")).to.include(0);
+ });
+});
+
+// ------------------------------------------------------------------------
+// Right-To-Left
+// ------------------------------------------------------------------------
+
+describe("RTL Support", function(){
+
+ it("Should have been scored properly", function(){
+
+ var index = new FlexSearch({
+
+ encode: "icase",
+ tokenize: "reverse",
+ rtl: true
+ });
+
+ index.add(0, "54321 4 3 2 1 0");
+ index.add(1, "0 1 2 3 4 54321");
+ index.add(2, "0 1 2 3 4 12345");
+
+ expect(index.search("5")[0]).to.equal(2);
+ expect(index.search("5")[1]).to.equal(1);
+ expect(index.search("5")[2]).to.equal(0);
+ });
+});
+
// ------------------------------------------------------------------------
// Contextual Indexing
// ------------------------------------------------------------------------