From 6aadad9c3d7b6d8b9c910167111396c88b905f5a Mon Sep 17 00:00:00 2001 From: Thomas Wilkerling Date: Sat, 12 Apr 2025 11:05:00 +0200 Subject: [PATCH] re-add CJK encoder supporting graphemes --- README.md | 4 ++++ dist/flexsearch.bundle.debug.js | 4 ++-- dist/flexsearch.bundle.min.js | 4 ++-- dist/flexsearch.bundle.module.debug.js | 4 ++-- dist/flexsearch.bundle.module.min.js | 4 ++-- dist/flexsearch.compact.debug.js | 4 ++-- dist/flexsearch.compact.min.js | 4 ++-- dist/flexsearch.compact.module.debug.js | 4 ++-- dist/flexsearch.compact.module.min.js | 4 ++-- dist/flexsearch.es5.debug.js | 4 ++-- dist/flexsearch.es5.min.js | 4 ++-- dist/flexsearch.light.debug.js | 2 +- dist/flexsearch.light.min.js | 2 +- dist/flexsearch.light.module.debug.js | 2 +- dist/flexsearch.light.module.min.js | 2 +- dist/module-debug/bundle.js | 1 + dist/module-debug/charset.js | 5 +++++ dist/module-debug/charset/cjk.js | 14 ++++++++++++++ dist/module-min/bundle.js | 2 +- dist/module-min/charset.js | 2 +- dist/module-min/charset/cjk.js | 1 + dist/module/bundle.js | 1 + dist/module/charset.js | 5 +++++ dist/module/charset/cjk.js | 14 ++++++++++++++ doc/encoder.md | 6 +++++- index.d.ts | 1 + package-lock.json | 4 ++-- package.json | 2 +- src/bundle.js | 1 + src/charset.js | 5 +++++ test/encoder.js | 13 ++++++++++++- 31 files changed, 98 insertions(+), 31 deletions(-) create mode 100644 dist/module-debug/charset/cjk.js create mode 100644 dist/module-min/charset/cjk.js create mode 100644 dist/module/charset/cjk.js diff --git a/README.md b/README.md index 6dd556b..aa099ca 100644 --- a/README.md +++ b/README.md @@ -967,6 +967,10 @@ Methods `.export()` and also `.import()` are always async as well as every metho - Charset.[**LatinExtra**](#charset-collection) - Charset.[**LatinSoundex**](#charset-collection) +`Charset` Chinese, Japanese, Korean Encoder Preset: + +- Charset.[**CJK**](#charset-collection) + --- `Language` Encoder Preset: diff --git a/dist/flexsearch.bundle.debug.js b/dist/flexsearch.bundle.debug.js index 64d3942..1c2c179 100644 --- a/dist/flexsearch.bundle.debug.js +++ b/dist/flexsearch.bundle.debug.js @@ -1,5 +1,5 @@ /**! - * FlexSearch.js v0.8.152 (Bundle/Debug) + * FlexSearch.js v0.8.153 (Bundle/Debug) * Author and Copyright: Thomas Wilkerling * Licence: Apache-2.0 * Hosted by Nextapps GmbH @@ -1821,7 +1821,7 @@ var bb = {Exact:Wa, Default:Xa, Normalize:Xa, LatinBalance:{mapper:Ya}, LatinAdv } a[b] = e; } -}}, LatinExact:Wa, LatinDefault:Xa, LatinSimple:Xa}; +}}, CJK:{split:""}, LatinExact:Wa, LatinDefault:Xa, LatinSimple:Xa}; N.prototype.remove = function(a, c) { const b = this.reg.size && (this.fastupdate ? this.reg.get(a) : this.reg.has(a)); if (b) { diff --git a/dist/flexsearch.bundle.min.js b/dist/flexsearch.bundle.min.js index f8280de..c3782e1 100644 --- a/dist/flexsearch.bundle.min.js +++ b/dist/flexsearch.bundle.min.js @@ -1,5 +1,5 @@ /**! - * FlexSearch.js v0.8.152 (Bundle) + * FlexSearch.js v0.8.153 (Bundle) * Author and Copyright: Thomas Wilkerling * Licence: Apache-2.0 * Hosted by Nextapps GmbH @@ -64,7 +64,7 @@ u.get=function(a){return this.db?this.index.get(this.field[0]).db.enrich(a).then u.export=function(a,b,c=0,e=0){if(cthis.limit&&this.cache.delete(this.cache.keys().next().value)}; -X.prototype.get=function(a){const b=this.cache.get(a);b&&this.h!==a&&(this.cache.delete(a),this.cache.set(this.h=a,b));return b};X.prototype.remove=function(a){for(const b of this.cache){const c=b[0];b[1].includes(a)&&this.cache.delete(c)}};X.prototype.clear=function(){this.cache.clear();this.h=""};const Wa={normalize:!1,numeric:!1,dedupe:!1};const Xa={};const Ya=new Map([["b","p"],["v","f"],["w","f"],["z","s"],["x","s"],["d","t"],["n","m"],["c","k"],["g","k"],["j","k"],["q","k"],["i","e"],["y","e"],["u","o"]]);const Za=new Map([["ae","a"],["oe","o"],["sh","s"],["kh","k"],["th","t"],["ph","f"],["pf","f"]]),$a=[/([^aeo])h(.)/g,"$1$2",/([aeo])h([^aeo]|$)/g,"$1$2",/(.)\1+/g,"$1"];const ab={a:"",e:"",i:"",o:"",u:"",y:"",b:1,f:1,p:1,v:1,c:2,g:2,j:2,k:2,q:2,s:2,x:2,z:2,"\u00df":2,d:3,t:3,l:4,m:5,n:5,r:6};var bb={Exact:Wa,Default:Xa,Normalize:Xa,LatinBalance:{mapper:Ya},LatinAdvanced:{mapper:Ya,matcher:Za,replacer:$a},LatinExtra:{mapper:Ya,replacer:$a.concat([/(?!^)[aeo]/g,""]),matcher:Za},LatinSoundex:{dedupe:!1,include:{letter:!0},finalize:function(a){for(let c=0;cd.length)d.pop();else{const f=d.indexOf(a);f===c.length-1?d.pop():d.splice(f,1)}}else cb(this.map,a),this.depth&&cb(this.ctx,a);b||this.reg.delete(a)}this.db&&(this.commit_task.push({del:a}),this.T&&db(this));this.cache&&this.cache.remove(a);return this}; +X.prototype.get=function(a){const b=this.cache.get(a);b&&this.h!==a&&(this.cache.delete(a),this.cache.set(this.h=a,b));return b};X.prototype.remove=function(a){for(const b of this.cache){const c=b[0];b[1].includes(a)&&this.cache.delete(c)}};X.prototype.clear=function(){this.cache.clear();this.h=""};const Wa={normalize:!1,numeric:!1,dedupe:!1};const Xa={};const Ya=new Map([["b","p"],["v","f"],["w","f"],["z","s"],["x","s"],["d","t"],["n","m"],["c","k"],["g","k"],["j","k"],["q","k"],["i","e"],["y","e"],["u","o"]]);const Za=new Map([["ae","a"],["oe","o"],["sh","s"],["kh","k"],["th","t"],["ph","f"],["pf","f"]]),$a=[/([^aeo])h(.)/g,"$1$2",/([aeo])h([^aeo]|$)/g,"$1$2",/(.)\1+/g,"$1"];const ab={a:"",e:"",i:"",o:"",u:"",y:"",b:1,f:1,p:1,v:1,c:2,g:2,j:2,k:2,q:2,s:2,x:2,z:2,"\u00df":2,d:3,t:3,l:4,m:5,n:5,r:6};var bb={Exact:Wa,Default:Xa,Normalize:Xa,LatinBalance:{mapper:Ya},LatinAdvanced:{mapper:Ya,matcher:Za,replacer:$a},LatinExtra:{mapper:Ya,replacer:$a.concat([/(?!^)[aeo]/g,""]),matcher:Za},LatinSoundex:{dedupe:!1,include:{letter:!0},finalize:function(a){for(let c=0;cd.length)d.pop();else{const f=d.indexOf(a);f===c.length-1?d.pop():d.splice(f,1)}}else cb(this.map,a),this.depth&&cb(this.ctx,a);b||this.reg.delete(a)}this.db&&(this.commit_task.push({del:a}),this.T&&db(this));this.cache&&this.cache.remove(a);return this}; function cb(a,b){let c=0;var e="undefined"===typeof b;if(a.constructor===Array)for(let d=0,f,g;dt;f--){g=r.substring(t,f);v=this.rtl?d-1-t:t;var k=this.score?this.score(b,r,p,g,v):fb(q,e,p,d,v); gb(this,n,g,k,a,c)}break}case "bidirectional":case "reverse":if(1g?0:1), e,p,k-1,h-1),v=this.bidirectional&&r>f;gb(this,l,v?f:r,t,a,c,v?r:f)}}}}this.fastupdate||this.reg.add(a)}else b=""}this.db&&(b||this.commit_task.push({del:a}),this.T&&db(this));return this}; diff --git a/dist/flexsearch.bundle.module.debug.js b/dist/flexsearch.bundle.module.debug.js index f1f3bbc..f1ba3b9 100644 --- a/dist/flexsearch.bundle.module.debug.js +++ b/dist/flexsearch.bundle.module.debug.js @@ -1,5 +1,5 @@ /**! - * FlexSearch.js v0.8.152 (Bundle/Module/Debug) + * FlexSearch.js v0.8.153 (Bundle/Module/Debug) * Author and Copyright: Thomas Wilkerling * Licence: Apache-2.0 * Hosted by Nextapps GmbH @@ -1820,7 +1820,7 @@ var bb = {Exact:Wa, Default:Xa, Normalize:Xa, LatinBalance:{mapper:Ya}, LatinAdv } a[b] = e; } -}}, LatinExact:Wa, LatinDefault:Xa, LatinSimple:Xa}; +}}, CJK:{split:""}, LatinExact:Wa, LatinDefault:Xa, LatinSimple:Xa}; N.prototype.remove = function(a, c) { const b = this.reg.size && (this.fastupdate ? this.reg.get(a) : this.reg.has(a)); if (b) { diff --git a/dist/flexsearch.bundle.module.min.js b/dist/flexsearch.bundle.module.min.js index f9bfbca..5b515e7 100644 --- a/dist/flexsearch.bundle.module.min.js +++ b/dist/flexsearch.bundle.module.min.js @@ -1,5 +1,5 @@ /**! - * FlexSearch.js v0.8.152 (Bundle/Module) + * FlexSearch.js v0.8.153 (Bundle/Module) * Author and Copyright: Thomas Wilkerling * Licence: Apache-2.0 * Hosted by Nextapps GmbH @@ -64,7 +64,7 @@ u.get=function(a){return this.db?this.index.get(this.field[0]).db.enrich(a).then u.export=function(a,b,c=0,e=0){if(cthis.limit&&this.cache.delete(this.cache.keys().next().value)}; -X.prototype.get=function(a){const b=this.cache.get(a);b&&this.h!==a&&(this.cache.delete(a),this.cache.set(this.h=a,b));return b};X.prototype.remove=function(a){for(const b of this.cache){const c=b[0];b[1].includes(a)&&this.cache.delete(c)}};X.prototype.clear=function(){this.cache.clear();this.h=""};const Wa={normalize:!1,numeric:!1,dedupe:!1};const Xa={};const Ya=new Map([["b","p"],["v","f"],["w","f"],["z","s"],["x","s"],["d","t"],["n","m"],["c","k"],["g","k"],["j","k"],["q","k"],["i","e"],["y","e"],["u","o"]]);const Za=new Map([["ae","a"],["oe","o"],["sh","s"],["kh","k"],["th","t"],["ph","f"],["pf","f"]]),$a=[/([^aeo])h(.)/g,"$1$2",/([aeo])h([^aeo]|$)/g,"$1$2",/(.)\1+/g,"$1"];const ab={a:"",e:"",i:"",o:"",u:"",y:"",b:1,f:1,p:1,v:1,c:2,g:2,j:2,k:2,q:2,s:2,x:2,z:2,"\u00df":2,d:3,t:3,l:4,m:5,n:5,r:6};var bb={Exact:Wa,Default:Xa,Normalize:Xa,LatinBalance:{mapper:Ya},LatinAdvanced:{mapper:Ya,matcher:Za,replacer:$a},LatinExtra:{mapper:Ya,replacer:$a.concat([/(?!^)[aeo]/g,""]),matcher:Za},LatinSoundex:{dedupe:!1,include:{letter:!0},finalize:function(a){for(let c=0;cd.length)d.pop();else{const f=d.indexOf(a);f===c.length-1?d.pop():d.splice(f,1)}}else cb(this.map,a),this.depth&&cb(this.ctx,a);b||this.reg.delete(a)}this.db&&(this.commit_task.push({del:a}),this.T&&db(this));this.cache&&this.cache.remove(a);return this}; +X.prototype.get=function(a){const b=this.cache.get(a);b&&this.h!==a&&(this.cache.delete(a),this.cache.set(this.h=a,b));return b};X.prototype.remove=function(a){for(const b of this.cache){const c=b[0];b[1].includes(a)&&this.cache.delete(c)}};X.prototype.clear=function(){this.cache.clear();this.h=""};const Wa={normalize:!1,numeric:!1,dedupe:!1};const Xa={};const Ya=new Map([["b","p"],["v","f"],["w","f"],["z","s"],["x","s"],["d","t"],["n","m"],["c","k"],["g","k"],["j","k"],["q","k"],["i","e"],["y","e"],["u","o"]]);const Za=new Map([["ae","a"],["oe","o"],["sh","s"],["kh","k"],["th","t"],["ph","f"],["pf","f"]]),$a=[/([^aeo])h(.)/g,"$1$2",/([aeo])h([^aeo]|$)/g,"$1$2",/(.)\1+/g,"$1"];const ab={a:"",e:"",i:"",o:"",u:"",y:"",b:1,f:1,p:1,v:1,c:2,g:2,j:2,k:2,q:2,s:2,x:2,z:2,"\u00df":2,d:3,t:3,l:4,m:5,n:5,r:6};var bb={Exact:Wa,Default:Xa,Normalize:Xa,LatinBalance:{mapper:Ya},LatinAdvanced:{mapper:Ya,matcher:Za,replacer:$a},LatinExtra:{mapper:Ya,replacer:$a.concat([/(?!^)[aeo]/g,""]),matcher:Za},LatinSoundex:{dedupe:!1,include:{letter:!0},finalize:function(a){for(let c=0;cd.length)d.pop();else{const f=d.indexOf(a);f===c.length-1?d.pop():d.splice(f,1)}}else cb(this.map,a),this.depth&&cb(this.ctx,a);b||this.reg.delete(a)}this.db&&(this.commit_task.push({del:a}),this.T&&db(this));this.cache&&this.cache.remove(a);return this}; function cb(a,b){let c=0;var e="undefined"===typeof b;if(a.constructor===Array)for(let d=0,f,g;dt;f--){g=r.substring(t,f);v=this.rtl?d-1-t:t;var k=this.score?this.score(b,r,p,g,v):fb(q,e,p,d,v); gb(this,n,g,k,a,c)}break}case "bidirectional":case "reverse":if(1g?0:1), e,p,k-1,h-1),v=this.bidirectional&&r>f;gb(this,l,v?f:r,t,a,c,v?r:f)}}}}this.fastupdate||this.reg.add(a)}else b=""}this.db&&(b||this.commit_task.push({del:a}),this.T&&db(this));return this}; diff --git a/dist/flexsearch.compact.debug.js b/dist/flexsearch.compact.debug.js index 509bae0..c71386b 100644 --- a/dist/flexsearch.compact.debug.js +++ b/dist/flexsearch.compact.debug.js @@ -1,5 +1,5 @@ /**! - * FlexSearch.js v0.8.152 (Bundle/Debug) + * FlexSearch.js v0.8.153 (Bundle/Debug) * Author and Copyright: Thomas Wilkerling * Licence: Apache-2.0 * Hosted by Nextapps GmbH @@ -1403,7 +1403,7 @@ var Ta = {Exact:Na, Default:Oa, Normalize:Oa, LatinBalance:{mapper:Pa}, LatinAdv } a[b] = e; } -}}, LatinExact:Na, LatinDefault:Oa, LatinSimple:Oa}; +}}, CJK:{split:""}, LatinExact:Na, LatinDefault:Oa, LatinSimple:Oa}; N.prototype.remove = function(a, c) { const b = this.reg.size && (this.fastupdate ? this.reg.get(a) : this.reg.has(a)); if (b) { diff --git a/dist/flexsearch.compact.min.js b/dist/flexsearch.compact.min.js index b34aff6..205f625 100644 --- a/dist/flexsearch.compact.min.js +++ b/dist/flexsearch.compact.min.js @@ -1,5 +1,5 @@ /**! - * FlexSearch.js v0.8.152 (Bundle) + * FlexSearch.js v0.8.153 (Bundle) * Author and Copyright: Thomas Wilkerling * Licence: Apache-2.0 * Hosted by Nextapps GmbH @@ -51,7 +51,7 @@ w.clear=function(){const a=[];for(const c of this.index.values()){const b=c.clea w.set=function(a,c){"object"===typeof a&&(c=a,a=H(c,this.key));this.store.set(a,c);return this};w.searchCache=Ma; w.export=function(a,c,b=0,e=0){if(bthis.limit&&this.cache.delete(this.cache.keys().next().value)}; -X.prototype.get=function(a){const c=this.cache.get(a);c&&this.h!==a&&(this.cache.delete(a),this.cache.set(this.h=a,c));return c};X.prototype.remove=function(a){for(const c of this.cache){const b=c[0];c[1].includes(a)&&this.cache.delete(b)}};X.prototype.clear=function(){this.cache.clear();this.h=""};const Na={normalize:!1,numeric:!1,dedupe:!1};const Oa={};const Pa=new Map([["b","p"],["v","f"],["w","f"],["z","s"],["x","s"],["d","t"],["n","m"],["c","k"],["g","k"],["j","k"],["q","k"],["i","e"],["y","e"],["u","o"]]);const Qa=new Map([["ae","a"],["oe","o"],["sh","s"],["kh","k"],["th","t"],["ph","f"],["pf","f"]]),Ra=[/([^aeo])h(.)/g,"$1$2",/([aeo])h([^aeo]|$)/g,"$1$2",/(.)\1+/g,"$1"];const Sa={a:"",e:"",i:"",o:"",u:"",y:"",b:1,f:1,p:1,v:1,c:2,g:2,j:2,k:2,q:2,s:2,x:2,z:2,"\u00df":2,d:3,t:3,l:4,m:5,n:5,r:6};var Ta={Exact:Na,Default:Oa,Normalize:Oa,LatinBalance:{mapper:Pa},LatinAdvanced:{mapper:Pa,matcher:Qa,replacer:Ra},LatinExtra:{mapper:Pa,replacer:Ra.concat([/(?!^)[aeo]/g,""]),matcher:Qa},LatinSoundex:{dedupe:!1,include:{letter:!0},finalize:function(a){for(let b=0;bd.length)d.pop();else{const f=d.indexOf(a);f===b.length-1?d.pop():d.splice(f,1)}}else Y(this.map,a),this.depth&&Y(this.ctx,a);c||this.reg.delete(a)}this.cache&&this.cache.remove(a);return this}; +X.prototype.get=function(a){const c=this.cache.get(a);c&&this.h!==a&&(this.cache.delete(a),this.cache.set(this.h=a,c));return c};X.prototype.remove=function(a){for(const c of this.cache){const b=c[0];c[1].includes(a)&&this.cache.delete(b)}};X.prototype.clear=function(){this.cache.clear();this.h=""};const Na={normalize:!1,numeric:!1,dedupe:!1};const Oa={};const Pa=new Map([["b","p"],["v","f"],["w","f"],["z","s"],["x","s"],["d","t"],["n","m"],["c","k"],["g","k"],["j","k"],["q","k"],["i","e"],["y","e"],["u","o"]]);const Qa=new Map([["ae","a"],["oe","o"],["sh","s"],["kh","k"],["th","t"],["ph","f"],["pf","f"]]),Ra=[/([^aeo])h(.)/g,"$1$2",/([aeo])h([^aeo]|$)/g,"$1$2",/(.)\1+/g,"$1"];const Sa={a:"",e:"",i:"",o:"",u:"",y:"",b:1,f:1,p:1,v:1,c:2,g:2,j:2,k:2,q:2,s:2,x:2,z:2,"\u00df":2,d:3,t:3,l:4,m:5,n:5,r:6};var Ta={Exact:Na,Default:Oa,Normalize:Oa,LatinBalance:{mapper:Pa},LatinAdvanced:{mapper:Pa,matcher:Qa,replacer:Ra},LatinExtra:{mapper:Pa,replacer:Ra.concat([/(?!^)[aeo]/g,""]),matcher:Qa},LatinSoundex:{dedupe:!1,include:{letter:!0},finalize:function(a){for(let b=0;bd.length)d.pop();else{const f=d.indexOf(a);f===b.length-1?d.pop():d.splice(f,1)}}else Y(this.map,a),this.depth&&Y(this.ctx,a);c||this.reg.delete(a)}this.cache&&this.cache.remove(a);return this}; function Y(a,c){let b=0;var e="undefined"===typeof c;if(a.constructor===Array)for(let d=0,f,g;dt;f--){g=q.substring(t,f);u=this.rtl?d-1-t:t;var k=this.score?this.score(c,q,p,g,u):Va(v,e,p,d,u); Z(this,m,g,k,a,b)}break}case "bidirectional":case "reverse":if(1g?0:1),e,p, k-1,h-1),u=this.bidirectional&&q>f;Z(this,l,u?f:q,t,a,b,u?q:f)}}}}this.fastupdate||this.reg.add(a)}}return this};function Z(a,c,b,e,d,f,g){let k=g?a.ctx:a.map,h;if(!c[b]||g&&!(h=c[b])[g])g?(c=h||(c[b]=C()),c[g]=1,(h=k.get(g))?k=h:k.set(g,k=new Map)):c[b]=1,(h=k.get(b))?k=h:k.set(b,k=[]),k=k[e]||(k[e]=[]),f&&k.includes(d)||(k.push(d),a.fastupdate&&((c=a.reg.get(d))?c.push(k):a.reg.set(d,[k])))}function Va(a,c,b,e,d){return b&&1this.limit&&this.cache.delete(this.cache.keys().next().value)}; -X.prototype.get=function(a){const c=this.cache.get(a);c&&this.h!==a&&(this.cache.delete(a),this.cache.set(this.h=a,c));return c};X.prototype.remove=function(a){for(const c of this.cache){const b=c[0];c[1].includes(a)&&this.cache.delete(b)}};X.prototype.clear=function(){this.cache.clear();this.h=""};const Na={normalize:!1,numeric:!1,dedupe:!1};const Oa={};const Pa=new Map([["b","p"],["v","f"],["w","f"],["z","s"],["x","s"],["d","t"],["n","m"],["c","k"],["g","k"],["j","k"],["q","k"],["i","e"],["y","e"],["u","o"]]);const Qa=new Map([["ae","a"],["oe","o"],["sh","s"],["kh","k"],["th","t"],["ph","f"],["pf","f"]]),Ra=[/([^aeo])h(.)/g,"$1$2",/([aeo])h([^aeo]|$)/g,"$1$2",/(.)\1+/g,"$1"];const Sa={a:"",e:"",i:"",o:"",u:"",y:"",b:1,f:1,p:1,v:1,c:2,g:2,j:2,k:2,q:2,s:2,x:2,z:2,"\u00df":2,d:3,t:3,l:4,m:5,n:5,r:6};var Ta={Exact:Na,Default:Oa,Normalize:Oa,LatinBalance:{mapper:Pa},LatinAdvanced:{mapper:Pa,matcher:Qa,replacer:Ra},LatinExtra:{mapper:Pa,replacer:Ra.concat([/(?!^)[aeo]/g,""]),matcher:Qa},LatinSoundex:{dedupe:!1,include:{letter:!0},finalize:function(a){for(let b=0;bd.length)d.pop();else{const f=d.indexOf(a);f===b.length-1?d.pop():d.splice(f,1)}}else Y(this.map,a),this.depth&&Y(this.ctx,a);c||this.reg.delete(a)}this.cache&&this.cache.remove(a);return this}; +X.prototype.get=function(a){const c=this.cache.get(a);c&&this.h!==a&&(this.cache.delete(a),this.cache.set(this.h=a,c));return c};X.prototype.remove=function(a){for(const c of this.cache){const b=c[0];c[1].includes(a)&&this.cache.delete(b)}};X.prototype.clear=function(){this.cache.clear();this.h=""};const Na={normalize:!1,numeric:!1,dedupe:!1};const Oa={};const Pa=new Map([["b","p"],["v","f"],["w","f"],["z","s"],["x","s"],["d","t"],["n","m"],["c","k"],["g","k"],["j","k"],["q","k"],["i","e"],["y","e"],["u","o"]]);const Qa=new Map([["ae","a"],["oe","o"],["sh","s"],["kh","k"],["th","t"],["ph","f"],["pf","f"]]),Ra=[/([^aeo])h(.)/g,"$1$2",/([aeo])h([^aeo]|$)/g,"$1$2",/(.)\1+/g,"$1"];const Sa={a:"",e:"",i:"",o:"",u:"",y:"",b:1,f:1,p:1,v:1,c:2,g:2,j:2,k:2,q:2,s:2,x:2,z:2,"\u00df":2,d:3,t:3,l:4,m:5,n:5,r:6};var Ta={Exact:Na,Default:Oa,Normalize:Oa,LatinBalance:{mapper:Pa},LatinAdvanced:{mapper:Pa,matcher:Qa,replacer:Ra},LatinExtra:{mapper:Pa,replacer:Ra.concat([/(?!^)[aeo]/g,""]),matcher:Qa},LatinSoundex:{dedupe:!1,include:{letter:!0},finalize:function(a){for(let b=0;bd.length)d.pop();else{const f=d.indexOf(a);f===b.length-1?d.pop():d.splice(f,1)}}else Y(this.map,a),this.depth&&Y(this.ctx,a);c||this.reg.delete(a)}this.cache&&this.cache.remove(a);return this}; function Y(a,c){let b=0;var e="undefined"===typeof c;if(a.constructor===Array)for(let d=0,f,g;dt;f--){g=q.substring(t,f);u=this.rtl?d-1-t:t;var k=this.score?this.score(c,q,p,g,u):Wa(v,e,p,d,u); Z(this,m,g,k,a,b)}break}case "bidirectional":case "reverse":if(1g?0:1),e,p, k-1,h-1),u=this.bidirectional&&q>f;Z(this,l,u?f:q,t,a,b,u?q:f)}}}}this.fastupdate||this.reg.add(a)}}return this};function Z(a,c,b,e,d,f,g){let k=g?a.ctx:a.map,h;if(!c[b]||g&&!(h=c[b])[g])g?(c=h||(c[b]=C()),c[g]=1,(h=k.get(g))?k=h:k.set(g,k=new Map)):c[b]=1,(h=k.get(b))?k=h:k.set(b,k=[]),k=k[e]||(k[e]=[]),f&&k.includes(d)||(k.push(d),a.fastupdate&&((c=a.reg.get(d))?c.push(k):a.reg.set(d,[k])))}function Wa(a,c,b,e,d){return b&&1this.limit&&this.cache.delete(this.cache.keys().next().value)}; -X.prototype.get=function(a){var b=this.cache.get(a);b&&this.h!==a&&(this.cache.delete(a),this.cache.set(this.h=a,b));return b};X.prototype.remove=function(a){for(var b=x(this.cache),c=b.next();!c.done;c=b.next()){c=c.value;var d=c[0];c[1].includes(a)&&this.cache.delete(d)}};X.prototype.clear=function(){this.cache.clear();this.h=""};var Bb={normalize:!1,numeric:!1,dedupe:!1};var Cb={};var Db=new Map([["b","p"],["v","f"],["w","f"],["z","s"],["x","s"],["d","t"],["n","m"],["c","k"],["g","k"],["j","k"],["q","k"],["i","e"],["y","e"],["u","o"]]);var Eb=new Map([["ae","a"],["oe","o"],["sh","s"],["kh","k"],["th","t"],["ph","f"],["pf","f"]]),Fb=[/([^aeo])h(.)/g,"$1$2",/([aeo])h([^aeo]|$)/g,"$1$2",/(.)\1+/g,"$1"];var Gb={a:"",e:"",i:"",o:"",u:"",y:"",b:1,f:1,p:1,v:1,c:2,g:2,j:2,k:2,q:2,s:2,x:2,z:2,"\u00df":2,d:3,t:3,l:4,m:5,n:5,r:6};var Hb={Exact:Bb,Default:Cb,Normalize:Cb,LatinBalance:{mapper:Db},LatinAdvanced:{mapper:Db,matcher:Eb,replacer:Fb},LatinExtra:{mapper:Db,replacer:Fb.concat([/(?!^)[aeo]/g,""]),matcher:Eb},LatinSoundex:{dedupe:!1,include:{letter:!0},finalize:function(a){for(var b=0;be.length)e.pop();else{var g=e.indexOf(a);g===c.length-1?e.pop():e.splice(g,1)}}else Ib(this.map,a),this.depth&&Ib(this.ctx,a);b||this.reg.delete(a)}this.db&&(this.commit_task.push({del:a}),this.ca&&Jb(this));this.cache&&this.cache.remove(a);return this}; +X.prototype.get=function(a){var b=this.cache.get(a);b&&this.h!==a&&(this.cache.delete(a),this.cache.set(this.h=a,b));return b};X.prototype.remove=function(a){for(var b=x(this.cache),c=b.next();!c.done;c=b.next()){c=c.value;var d=c[0];c[1].includes(a)&&this.cache.delete(d)}};X.prototype.clear=function(){this.cache.clear();this.h=""};var Bb={normalize:!1,numeric:!1,dedupe:!1};var Cb={};var Db=new Map([["b","p"],["v","f"],["w","f"],["z","s"],["x","s"],["d","t"],["n","m"],["c","k"],["g","k"],["j","k"],["q","k"],["i","e"],["y","e"],["u","o"]]);var Eb=new Map([["ae","a"],["oe","o"],["sh","s"],["kh","k"],["th","t"],["ph","f"],["pf","f"]]),Fb=[/([^aeo])h(.)/g,"$1$2",/([aeo])h([^aeo]|$)/g,"$1$2",/(.)\1+/g,"$1"];var Gb={a:"",e:"",i:"",o:"",u:"",y:"",b:1,f:1,p:1,v:1,c:2,g:2,j:2,k:2,q:2,s:2,x:2,z:2,"\u00df":2,d:3,t:3,l:4,m:5,n:5,r:6};var Hb={Exact:Bb,Default:Cb,Normalize:Cb,LatinBalance:{mapper:Db},LatinAdvanced:{mapper:Db,matcher:Eb,replacer:Fb},LatinExtra:{mapper:Db,replacer:Fb.concat([/(?!^)[aeo]/g,""]),matcher:Eb},LatinSoundex:{dedupe:!1,include:{letter:!0},finalize:function(a){for(var b=0;be.length)e.pop();else{var g=e.indexOf(a);g===c.length-1?e.pop():e.splice(g,1)}}else Ib(this.map,a),this.depth&&Ib(this.ctx,a);b||this.reg.delete(a)}this.db&&(this.commit_task.push({del:a}),this.ca&&Jb(this));this.cache&&this.cache.remove(a);return this}; function Ib(a,b){var c=0,d="undefined"===typeof b;if(a.constructor===Array)for(var e=0,g=void 0,f;en;r--)p=l.substring(n,r),q=this.rtl?m-1-n:n,q=this.score?this.score(b,l,k,p,q):Lb(h,d,k,m,q),Mb(this,g,p, q,a,c);break}case "bidirectional":case "reverse":if(1p?0:1),d,k,r-1,q-1),t=this.bidirectional&& l>n;Mb(this,e,t?n:l,w,a,c,t?l:n)}}}}this.fastupdate||this.reg.add(a)}else b=""}this.db&&(b||this.commit_task.push({del:a}),this.ca&&Jb(this));return this}; diff --git a/dist/flexsearch.light.debug.js b/dist/flexsearch.light.debug.js index d2d4367..539e3c2 100644 --- a/dist/flexsearch.light.debug.js +++ b/dist/flexsearch.light.debug.js @@ -1,5 +1,5 @@ /**! - * FlexSearch.js v0.8.152 (Light/Debug) + * FlexSearch.js v0.8.153 (Light/Debug) * Author and Copyright: Thomas Wilkerling * Licence: Apache-2.0 * Hosted by Nextapps GmbH diff --git a/dist/flexsearch.light.min.js b/dist/flexsearch.light.min.js index 7b19e68..0a71b0d 100644 --- a/dist/flexsearch.light.min.js +++ b/dist/flexsearch.light.min.js @@ -1,5 +1,5 @@ /**! - * FlexSearch.js v0.8.152 (Light) + * FlexSearch.js v0.8.153 (Light) * Author and Copyright: Thomas Wilkerling * Licence: Apache-2.0 * Hosted by Nextapps GmbH diff --git a/dist/flexsearch.light.module.debug.js b/dist/flexsearch.light.module.debug.js index c1f2eeb..eb04009 100644 --- a/dist/flexsearch.light.module.debug.js +++ b/dist/flexsearch.light.module.debug.js @@ -1,5 +1,5 @@ /**! - * FlexSearch.js v0.8.152 (Bundle/Debug) + * FlexSearch.js v0.8.153 (Bundle/Debug) * Author and Copyright: Thomas Wilkerling * Licence: Apache-2.0 * Hosted by Nextapps GmbH diff --git a/dist/flexsearch.light.module.min.js b/dist/flexsearch.light.module.min.js index f4d2e5e..f25f24b 100644 --- a/dist/flexsearch.light.module.min.js +++ b/dist/flexsearch.light.module.min.js @@ -1,5 +1,5 @@ /**! - * FlexSearch.js v0.8.152 (Bundle) + * FlexSearch.js v0.8.153 (Bundle) * Author and Copyright: Thomas Wilkerling * Licence: Apache-2.0 * Hosted by Nextapps GmbH diff --git a/dist/module-debug/bundle.js b/dist/module-debug/bundle.js index 6ec120b..62e0947 100644 --- a/dist/module-debug/bundle.js +++ b/dist/module-debug/bundle.js @@ -123,6 +123,7 @@ import { KeystoreMap, KeystoreArray, KeystoreSet } from "./keystore.js"; /** @export */Charset.LatinAdvanced; /** @export */Charset.LatinExtra; /** @export */Charset.LatinSoundex; +/** @export */Charset.CJK; /** @export @deprecated */Charset.LatinExact; /** @export @deprecated */Charset.LatinDefault; /** @export @deprecated */Charset.LatinSimple; diff --git a/dist/module-debug/charset.js b/dist/module-debug/charset.js index 812089f..659ce73 100644 --- a/dist/module-debug/charset.js +++ b/dist/module-debug/charset.js @@ -4,6 +4,7 @@ import charset_latin_balance from "./charset/latin/balance.js"; import charset_latin_advanced from "./charset/latin/advanced.js"; import charset_latin_extra from "./charset/latin/extra.js"; import charset_latin_soundex from "./charset/latin/soundex.js"; +import charset_cjk from "./charset/cjk.js"; // universal charset export const Exact = charset_exact; @@ -14,6 +15,8 @@ export const LatinBalance = charset_latin_balance; export const LatinAdvanced = charset_latin_advanced; export const LatinExtra = charset_latin_extra; export const LatinSoundex = charset_latin_soundex; +// CJK +export const CJK = charset_cjk; // deprecated export const LatinExact = charset_exact; export const LatinDefault = charset_normalize; @@ -29,6 +32,8 @@ export default { LatinAdvanced: charset_latin_advanced, LatinExtra: charset_latin_extra, LatinSoundex: charset_latin_soundex, + // CJK + CJK: charset_cjk, // deprecated LatinExact: charset_exact, LatinDefault: charset_normalize, diff --git a/dist/module-debug/charset/cjk.js b/dist/module-debug/charset/cjk.js new file mode 100644 index 0000000..0767906 --- /dev/null +++ b/dist/module-debug/charset/cjk.js @@ -0,0 +1,14 @@ +import { EncoderOptions } from "../type.js"; + +// https://en.wikipedia.org/wiki/CJK_characters + +/** @type EncoderOptions */ +const options = { + split: "" + //normalize: true + // normalize: function(str){ + // return str.toLowerCase(); + // }, + //dedupe: false +}; +export default options; \ No newline at end of file diff --git a/dist/module-min/bundle.js b/dist/module-min/bundle.js index b087aef..b0f5349 100644 --- a/dist/module-min/bundle.js +++ b/dist/module-min/bundle.js @@ -1 +1 @@ -import{SearchOptions,ContextOptions,DocumentDescriptor,DocumentSearchOptions,FieldOptions,IndexOptions,DocumentOptions,TagOptions,StoreOptions,EncoderOptions,EncoderSplitOptions,PersistentOptions,ResolverOptions}from"./type.js";import StorageInterface from"./db/interface.js";import Document from"./document.js";import Index from"./index.js";import WorkerIndex from"./worker.js";import Resolver from"./resolver.js";import Encoder from"./encoder.js";import IdxDB from"./db/indexeddb/index.js";import Charset from"./charset.js";import{KeystoreMap,KeystoreArray,KeystoreSet}from"./keystore.js";Index.prototype.add,Index.prototype.append,Index.prototype.search,Index.prototype.update,Index.prototype.remove,Index.prototype.contain,Index.prototype.clear,Index.prototype.cleanup,Index.prototype.searchCache,Index.prototype.addAsync,Index.prototype.appendAsync,Index.prototype.searchAsync,Index.prototype.updateAsync,Index.prototype.removeAsync,Index.prototype.export,Index.prototype.import,Index.prototype.serialize,Index.prototype.mount,Index.prototype.commit,Index.prototype.destroy,Index.prototype.encoder,Index.prototype.reg,Index.prototype.map,Index.prototype.ctx,Index.prototype.db,Index.prototype.tag,Index.prototype.store,Index.prototype.depth,Index.prototype.bidirectional,Index.prototype.commit_task,Index.prototype.commit_timer,Index.prototype.cache,Index.prototype.bypass,Index.prototype.document,Encoder.prototype.assign,Encoder.prototype.encode,Encoder.prototype.addMatcher,Encoder.prototype.addStemmer,Encoder.prototype.addFilter,Encoder.prototype.addMapper,Encoder.prototype.addReplacer,Document.prototype.add,Document.prototype.append,Document.prototype.search,Document.prototype.update,Document.prototype.remove,Document.prototype.contain,Document.prototype.clear,Document.prototype.cleanup,Document.prototype.addAsync,Document.prototype.appendAsync,Document.prototype.searchAsync,Document.prototype.updateAsync,Document.prototype.removeAsync,Document.prototype.mount,Document.prototype.commit,Document.prototype.destroy,Document.prototype.export,Document.prototype.import,Document.prototype.searchCache,Document.prototype.get,Document.prototype.set,Document.prototype.field,Document.prototype.index,Document.prototype.reg,Document.prototype.tag,Document.prototype.store,Document.prototype.fastupdate,Resolver.prototype.limit,Resolver.prototype.offset,Resolver.prototype.boost,Resolver.prototype.resolve,Resolver.prototype.or,Resolver.prototype.and,Resolver.prototype.xor,Resolver.prototype.not,Resolver.prototype.result,StorageInterface.db,StorageInterface.id,StorageInterface.support_tag_search,StorageInterface.fastupdate,StorageInterface.prototype.mount,StorageInterface.prototype.open,StorageInterface.prototype.close,StorageInterface.prototype.destroy,StorageInterface.prototype.clear,StorageInterface.prototype.get,StorageInterface.prototype.tag,StorageInterface.prototype.enrich,StorageInterface.prototype.has,StorageInterface.prototype.search,StorageInterface.prototype.info,StorageInterface.prototype.commit,StorageInterface.prototype.remove,KeystoreArray.length,KeystoreMap.size,KeystoreSet.size,Charset.Exact,Charset.Default,Charset.Normalize,Charset.LatinBalance,Charset.LatinAdvanced,Charset.LatinExtra,Charset.LatinSoundex,Charset.LatinExact,Charset.LatinDefault,Charset.LatinSimple,Charset.ArabicDefault,Charset.CjkDefault,Charset.CyrillicDefault,IndexOptions.preset,IndexOptions.context,IndexOptions.encoder,IndexOptions.encode,IndexOptions.resolution,IndexOptions.tokenize,IndexOptions.fastupdate,IndexOptions.score,IndexOptions.keystore,IndexOptions.rtl,IndexOptions.cache,IndexOptions.resolve,IndexOptions.db,IndexOptions.worker,IndexOptions.config,IndexOptions.priority,IndexOptions.export,IndexOptions.import,FieldOptions.preset,FieldOptions.context,FieldOptions.encoder,FieldOptions.encode,FieldOptions.resolution,FieldOptions.tokenize,FieldOptions.fastupdate,FieldOptions.score,FieldOptions.keystore,FieldOptions.rtl,FieldOptions.cache,FieldOptions.db,FieldOptions.config,FieldOptions.resolve,FieldOptions.field,FieldOptions.filter,FieldOptions.custom,FieldOptions.worker,DocumentOptions.context,DocumentOptions.encoder,DocumentOptions.encode,DocumentOptions.resolution,DocumentOptions.tokenize,DocumentOptions.fastupdate,DocumentOptions.score,DocumentOptions.keystore,DocumentOptions.rtl,DocumentOptions.cache,DocumentOptions.db,DocumentOptions.doc,DocumentOptions.document,DocumentOptions.worker,DocumentOptions.priority,DocumentOptions.export,DocumentOptions.import,ContextOptions.depth,ContextOptions.bidirectional,ContextOptions.resolution,DocumentDescriptor.field,DocumentDescriptor.index,DocumentDescriptor.tag,DocumentDescriptor.store,DocumentDescriptor.id,TagOptions.field,TagOptions.tag,TagOptions.filter,TagOptions.custom,TagOptions.keystore,TagOptions.db,TagOptions.config,StoreOptions.field,StoreOptions.filter,StoreOptions.custom,StoreOptions.config,SearchOptions.query,SearchOptions.limit,SearchOptions.offset,SearchOptions.context,SearchOptions.suggest,SearchOptions.resolve,SearchOptions.enrich,SearchOptions.resolution,DocumentSearchOptions.query,DocumentSearchOptions.limit,DocumentSearchOptions.offset,DocumentSearchOptions.context,DocumentSearchOptions.suggest,DocumentSearchOptions.enrich,DocumentSearchOptions.tag,DocumentSearchOptions.field,DocumentSearchOptions.index,DocumentSearchOptions.pluck,DocumentSearchOptions.merge,DocumentSearchOptions.highlight,EncoderOptions.rtl,EncoderOptions.dedupe,EncoderOptions.split,EncoderOptions.include,EncoderOptions.exclude,EncoderOptions.prepare,EncoderOptions.finalize,EncoderOptions.filter,EncoderOptions.matcher,EncoderOptions.mapper,EncoderOptions.stemmer,EncoderOptions.replacer,EncoderOptions.minlength,EncoderOptions.maxlength,EncoderOptions.cache,EncoderSplitOptions.letter,EncoderSplitOptions.number,EncoderSplitOptions.symbol,EncoderSplitOptions.punctuation,EncoderSplitOptions.control,EncoderSplitOptions.char,PersistentOptions.name,PersistentOptions.field,PersistentOptions.type,PersistentOptions.db,ResolverOptions.index,ResolverOptions.query,ResolverOptions.limit,ResolverOptions.offset,ResolverOptions.enrich,ResolverOptions.resolve,ResolverOptions.suggest,ResolverOptions.and,ResolverOptions.or,ResolverOptions.xor,ResolverOptions.not,ResolverOptions.pluck,ResolverOptions.field;const FlexSearch={Index:Index,Charset:Charset,Encoder:Encoder,Document:Document,Worker:WorkerIndex,Resolver:Resolver,IndexedDB:IdxDB,Language:{}};{const a="undefined"==typeof self?"undefined"==typeof global?self:global:self;let b;(b=a.define)&&b.amd?b([],function(){return FlexSearch}):"object"==typeof a.exports?a.exports=FlexSearch:a.FlexSearch=FlexSearch}export default FlexSearch;export{Index,Document,Encoder,Charset,WorkerIndex as Worker,Resolver,IdxDB as IndexedDB}; \ No newline at end of file +import{SearchOptions,ContextOptions,DocumentDescriptor,DocumentSearchOptions,FieldOptions,IndexOptions,DocumentOptions,TagOptions,StoreOptions,EncoderOptions,EncoderSplitOptions,PersistentOptions,ResolverOptions}from"./type.js";import StorageInterface from"./db/interface.js";import Document from"./document.js";import Index from"./index.js";import WorkerIndex from"./worker.js";import Resolver from"./resolver.js";import Encoder from"./encoder.js";import IdxDB from"./db/indexeddb/index.js";import Charset from"./charset.js";import{KeystoreMap,KeystoreArray,KeystoreSet}from"./keystore.js";Index.prototype.add,Index.prototype.append,Index.prototype.search,Index.prototype.update,Index.prototype.remove,Index.prototype.contain,Index.prototype.clear,Index.prototype.cleanup,Index.prototype.searchCache,Index.prototype.addAsync,Index.prototype.appendAsync,Index.prototype.searchAsync,Index.prototype.updateAsync,Index.prototype.removeAsync,Index.prototype.export,Index.prototype.import,Index.prototype.serialize,Index.prototype.mount,Index.prototype.commit,Index.prototype.destroy,Index.prototype.encoder,Index.prototype.reg,Index.prototype.map,Index.prototype.ctx,Index.prototype.db,Index.prototype.tag,Index.prototype.store,Index.prototype.depth,Index.prototype.bidirectional,Index.prototype.commit_task,Index.prototype.commit_timer,Index.prototype.cache,Index.prototype.bypass,Index.prototype.document,Encoder.prototype.assign,Encoder.prototype.encode,Encoder.prototype.addMatcher,Encoder.prototype.addStemmer,Encoder.prototype.addFilter,Encoder.prototype.addMapper,Encoder.prototype.addReplacer,Document.prototype.add,Document.prototype.append,Document.prototype.search,Document.prototype.update,Document.prototype.remove,Document.prototype.contain,Document.prototype.clear,Document.prototype.cleanup,Document.prototype.addAsync,Document.prototype.appendAsync,Document.prototype.searchAsync,Document.prototype.updateAsync,Document.prototype.removeAsync,Document.prototype.mount,Document.prototype.commit,Document.prototype.destroy,Document.prototype.export,Document.prototype.import,Document.prototype.searchCache,Document.prototype.get,Document.prototype.set,Document.prototype.field,Document.prototype.index,Document.prototype.reg,Document.prototype.tag,Document.prototype.store,Document.prototype.fastupdate,Resolver.prototype.limit,Resolver.prototype.offset,Resolver.prototype.boost,Resolver.prototype.resolve,Resolver.prototype.or,Resolver.prototype.and,Resolver.prototype.xor,Resolver.prototype.not,Resolver.prototype.result,StorageInterface.db,StorageInterface.id,StorageInterface.support_tag_search,StorageInterface.fastupdate,StorageInterface.prototype.mount,StorageInterface.prototype.open,StorageInterface.prototype.close,StorageInterface.prototype.destroy,StorageInterface.prototype.clear,StorageInterface.prototype.get,StorageInterface.prototype.tag,StorageInterface.prototype.enrich,StorageInterface.prototype.has,StorageInterface.prototype.search,StorageInterface.prototype.info,StorageInterface.prototype.commit,StorageInterface.prototype.remove,KeystoreArray.length,KeystoreMap.size,KeystoreSet.size,Charset.Exact,Charset.Default,Charset.Normalize,Charset.LatinBalance,Charset.LatinAdvanced,Charset.LatinExtra,Charset.LatinSoundex,Charset.CJK,Charset.LatinExact,Charset.LatinDefault,Charset.LatinSimple,Charset.ArabicDefault,Charset.CjkDefault,Charset.CyrillicDefault,IndexOptions.preset,IndexOptions.context,IndexOptions.encoder,IndexOptions.encode,IndexOptions.resolution,IndexOptions.tokenize,IndexOptions.fastupdate,IndexOptions.score,IndexOptions.keystore,IndexOptions.rtl,IndexOptions.cache,IndexOptions.resolve,IndexOptions.db,IndexOptions.worker,IndexOptions.config,IndexOptions.priority,IndexOptions.export,IndexOptions.import,FieldOptions.preset,FieldOptions.context,FieldOptions.encoder,FieldOptions.encode,FieldOptions.resolution,FieldOptions.tokenize,FieldOptions.fastupdate,FieldOptions.score,FieldOptions.keystore,FieldOptions.rtl,FieldOptions.cache,FieldOptions.db,FieldOptions.config,FieldOptions.resolve,FieldOptions.field,FieldOptions.filter,FieldOptions.custom,FieldOptions.worker,DocumentOptions.context,DocumentOptions.encoder,DocumentOptions.encode,DocumentOptions.resolution,DocumentOptions.tokenize,DocumentOptions.fastupdate,DocumentOptions.score,DocumentOptions.keystore,DocumentOptions.rtl,DocumentOptions.cache,DocumentOptions.db,DocumentOptions.doc,DocumentOptions.document,DocumentOptions.worker,DocumentOptions.priority,DocumentOptions.export,DocumentOptions.import,ContextOptions.depth,ContextOptions.bidirectional,ContextOptions.resolution,DocumentDescriptor.field,DocumentDescriptor.index,DocumentDescriptor.tag,DocumentDescriptor.store,DocumentDescriptor.id,TagOptions.field,TagOptions.tag,TagOptions.filter,TagOptions.custom,TagOptions.keystore,TagOptions.db,TagOptions.config,StoreOptions.field,StoreOptions.filter,StoreOptions.custom,StoreOptions.config,SearchOptions.query,SearchOptions.limit,SearchOptions.offset,SearchOptions.context,SearchOptions.suggest,SearchOptions.resolve,SearchOptions.enrich,SearchOptions.resolution,DocumentSearchOptions.query,DocumentSearchOptions.limit,DocumentSearchOptions.offset,DocumentSearchOptions.context,DocumentSearchOptions.suggest,DocumentSearchOptions.enrich,DocumentSearchOptions.tag,DocumentSearchOptions.field,DocumentSearchOptions.index,DocumentSearchOptions.pluck,DocumentSearchOptions.merge,DocumentSearchOptions.highlight,EncoderOptions.rtl,EncoderOptions.dedupe,EncoderOptions.split,EncoderOptions.include,EncoderOptions.exclude,EncoderOptions.prepare,EncoderOptions.finalize,EncoderOptions.filter,EncoderOptions.matcher,EncoderOptions.mapper,EncoderOptions.stemmer,EncoderOptions.replacer,EncoderOptions.minlength,EncoderOptions.maxlength,EncoderOptions.cache,EncoderSplitOptions.letter,EncoderSplitOptions.number,EncoderSplitOptions.symbol,EncoderSplitOptions.punctuation,EncoderSplitOptions.control,EncoderSplitOptions.char,PersistentOptions.name,PersistentOptions.field,PersistentOptions.type,PersistentOptions.db,ResolverOptions.index,ResolverOptions.query,ResolverOptions.limit,ResolverOptions.offset,ResolverOptions.enrich,ResolverOptions.resolve,ResolverOptions.suggest,ResolverOptions.and,ResolverOptions.or,ResolverOptions.xor,ResolverOptions.not,ResolverOptions.pluck,ResolverOptions.field;const FlexSearch={Index:Index,Charset:Charset,Encoder:Encoder,Document:Document,Worker:WorkerIndex,Resolver:Resolver,IndexedDB:IdxDB,Language:{}};{const a="undefined"==typeof self?"undefined"==typeof global?self:global:self;let b;(b=a.define)&&b.amd?b([],function(){return FlexSearch}):"object"==typeof a.exports?a.exports=FlexSearch:a.FlexSearch=FlexSearch}export default FlexSearch;export{Index,Document,Encoder,Charset,WorkerIndex as Worker,Resolver,IdxDB as IndexedDB}; \ No newline at end of file diff --git a/dist/module-min/charset.js b/dist/module-min/charset.js index 4993ae6..b9bbdab 100644 --- a/dist/module-min/charset.js +++ b/dist/module-min/charset.js @@ -1 +1 @@ -import charset_exact from"./charset/exact.js";import charset_normalize from"./charset/normalize.js";import charset_latin_balance from"./charset/latin/balance.js";import charset_latin_advanced from"./charset/latin/advanced.js";import charset_latin_extra from"./charset/latin/extra.js";import charset_latin_soundex from"./charset/latin/soundex.js";export const Exact=charset_exact;export const Default=charset_normalize;export const Normalize=charset_normalize;export const LatinBalance=charset_latin_balance;export const LatinAdvanced=charset_latin_advanced;export const LatinExtra=charset_latin_extra;export const LatinSoundex=charset_latin_soundex;export const LatinExact=charset_exact;export const LatinDefault=charset_normalize;export const LatinSimple=charset_normalize;export default{Exact:charset_exact,Default:charset_normalize,Normalize:charset_normalize,LatinBalance:charset_latin_balance,LatinAdvanced:charset_latin_advanced,LatinExtra:charset_latin_extra,LatinSoundex:charset_latin_soundex,LatinExact:charset_exact,LatinDefault:charset_normalize,LatinSimple:charset_normalize}; \ No newline at end of file +import charset_exact from"./charset/exact.js";import charset_normalize from"./charset/normalize.js";import charset_latin_balance from"./charset/latin/balance.js";import charset_latin_advanced from"./charset/latin/advanced.js";import charset_latin_extra from"./charset/latin/extra.js";import charset_latin_soundex from"./charset/latin/soundex.js";import charset_cjk from"./charset/cjk.js";export const Exact=charset_exact;export const Default=charset_normalize;export const Normalize=charset_normalize;export const LatinBalance=charset_latin_balance;export const LatinAdvanced=charset_latin_advanced;export const LatinExtra=charset_latin_extra;export const LatinSoundex=charset_latin_soundex;export const CJK=charset_cjk;export const LatinExact=charset_exact;export const LatinDefault=charset_normalize;export const LatinSimple=charset_normalize;export default{Exact:charset_exact,Default:charset_normalize,Normalize:charset_normalize,LatinBalance:charset_latin_balance,LatinAdvanced:charset_latin_advanced,LatinExtra:charset_latin_extra,LatinSoundex:charset_latin_soundex,CJK:charset_cjk,LatinExact:charset_exact,LatinDefault:charset_normalize,LatinSimple:charset_normalize}; \ No newline at end of file diff --git a/dist/module-min/charset/cjk.js b/dist/module-min/charset/cjk.js new file mode 100644 index 0000000..aa3b13d --- /dev/null +++ b/dist/module-min/charset/cjk.js @@ -0,0 +1 @@ +import{EncoderOptions}from"../type.js";const options={split:""};export default options; \ No newline at end of file diff --git a/dist/module/bundle.js b/dist/module/bundle.js index 6ec120b..62e0947 100644 --- a/dist/module/bundle.js +++ b/dist/module/bundle.js @@ -123,6 +123,7 @@ import { KeystoreMap, KeystoreArray, KeystoreSet } from "./keystore.js"; /** @export */Charset.LatinAdvanced; /** @export */Charset.LatinExtra; /** @export */Charset.LatinSoundex; +/** @export */Charset.CJK; /** @export @deprecated */Charset.LatinExact; /** @export @deprecated */Charset.LatinDefault; /** @export @deprecated */Charset.LatinSimple; diff --git a/dist/module/charset.js b/dist/module/charset.js index 812089f..659ce73 100644 --- a/dist/module/charset.js +++ b/dist/module/charset.js @@ -4,6 +4,7 @@ import charset_latin_balance from "./charset/latin/balance.js"; import charset_latin_advanced from "./charset/latin/advanced.js"; import charset_latin_extra from "./charset/latin/extra.js"; import charset_latin_soundex from "./charset/latin/soundex.js"; +import charset_cjk from "./charset/cjk.js"; // universal charset export const Exact = charset_exact; @@ -14,6 +15,8 @@ export const LatinBalance = charset_latin_balance; export const LatinAdvanced = charset_latin_advanced; export const LatinExtra = charset_latin_extra; export const LatinSoundex = charset_latin_soundex; +// CJK +export const CJK = charset_cjk; // deprecated export const LatinExact = charset_exact; export const LatinDefault = charset_normalize; @@ -29,6 +32,8 @@ export default { LatinAdvanced: charset_latin_advanced, LatinExtra: charset_latin_extra, LatinSoundex: charset_latin_soundex, + // CJK + CJK: charset_cjk, // deprecated LatinExact: charset_exact, LatinDefault: charset_normalize, diff --git a/dist/module/charset/cjk.js b/dist/module/charset/cjk.js new file mode 100644 index 0000000..0767906 --- /dev/null +++ b/dist/module/charset/cjk.js @@ -0,0 +1,14 @@ +import { EncoderOptions } from "../type.js"; + +// https://en.wikipedia.org/wiki/CJK_characters + +/** @type EncoderOptions */ +const options = { + split: "" + //normalize: true + // normalize: function(str){ + // return str.toLowerCase(); + // }, + //dedupe: false +}; +export default options; \ No newline at end of file diff --git a/doc/encoder.md b/doc/encoder.md index 0209032..cd585ff 100644 --- a/doc/encoder.md +++ b/doc/encoder.md @@ -62,6 +62,10 @@ const encoder = new Encoder({ 3. Charset.LatinExtra 4. Charset.LatinSoundex +#### Built-In CJK Encoder + +1. Charset.CJK + ### Basic Usage ```js @@ -566,7 +570,7 @@ const encoder = new Encoder({ rtl: true }); ## CJK Word Break (Chinese, Japanese, Korean) ```js -const index = new Index(); +const index = new Index({ encoder: Charset.CJK }); index.add(0, "一个单词"); var results = index.search("单词"); ``` diff --git a/index.d.ts b/index.d.ts index 7e3266a..c877f62 100644 --- a/index.d.ts +++ b/index.d.ts @@ -58,6 +58,7 @@ declare module "flexsearch" { | "LatinAdvanced" | "LatinExtra" | "LatinSoundex" + | "CJK" | ((content: string) => string[]); /** diff --git a/package-lock.json b/package-lock.json index 1cf6d63..d08e1d8 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "flexsearch", - "version": "0.8.152", + "version": "0.8.153", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "flexsearch", - "version": "0.8.152", + "version": "0.8.153", "funding": [ { "type": "github", diff --git a/package.json b/package.json index d0df4c6..0584eec 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "public": true, "preferGlobal": false, "name": "flexsearch", - "version": "0.8.152", + "version": "0.8.153", "description": "Next-Generation full-text search library for Browser and Node.js", "homepage": "https://github.com/nextapps-de/flexsearch/", "author": "Thomas Wilkerling", diff --git a/src/bundle.js b/src/bundle.js index 585580b..7abc0a3 100644 --- a/src/bundle.js +++ b/src/bundle.js @@ -154,6 +154,7 @@ if(SUPPORT_SERIALIZE){ /** @export */ Charset.LatinAdvanced; /** @export */ Charset.LatinExtra; /** @export */ Charset.LatinSoundex; +/** @export */ Charset.CJK; /** @export @deprecated */ Charset.LatinExact; /** @export @deprecated */ Charset.LatinDefault; /** @export @deprecated */ Charset.LatinSimple; diff --git a/src/charset.js b/src/charset.js index ac8e30a..c687b88 100644 --- a/src/charset.js +++ b/src/charset.js @@ -4,6 +4,7 @@ import charset_latin_balance from "./charset/latin/balance.js"; import charset_latin_advanced from "./charset/latin/advanced.js"; import charset_latin_extra from "./charset/latin/extra.js"; import charset_latin_soundex from "./charset/latin/soundex.js"; +import charset_cjk from "./charset/cjk.js"; // universal charset export const Exact = charset_exact; @@ -14,6 +15,8 @@ export const LatinBalance = charset_latin_balance; export const LatinAdvanced = charset_latin_advanced; export const LatinExtra = charset_latin_extra; export const LatinSoundex = charset_latin_soundex; +// CJK +export const CJK = charset_cjk; // deprecated export const LatinExact = charset_exact; export const LatinDefault = charset_normalize; @@ -29,6 +32,8 @@ export default { LatinAdvanced: charset_latin_advanced, LatinExtra: charset_latin_extra, LatinSoundex: charset_latin_soundex, + // CJK + CJK: charset_cjk, // deprecated LatinExact: charset_exact, LatinDefault: charset_normalize, diff --git a/test/encoder.js b/test/encoder.js index 7e9135d..21cea1f 100644 --- a/test/encoder.js +++ b/test/encoder.js @@ -108,7 +108,7 @@ describe("Encoder: CJK Charset", function(){ it("Should have been tokenized properly", function(){ - const index = Index({ tokenize: "forward" }); + const index = Index({ encoder: Charset.CJK }); index.add(0, "서울시가 잠이 든 시간에 아무 말, 미뤄, 미뤄"); expect(index.search("든")).to.include(0); @@ -116,6 +116,17 @@ describe("Encoder: CJK Charset", function(){ index.add(1, "一个单词"); expect(index.search("一个")).to.include(1); + expect(index.search("单词")).to.include(1); + expect(index.search("词单")).to.include(1); + }); + + it("Should have been tokenized properly", function(){ + + const index = Index({ encoder: Charset.CJK }); + index.add(1 , "多大的;多少平"); + + const result = index.search('是多少平的', { suggest: true }); + expect(result).to.include(1); }); });