From b75cccb0f8d07c0377af88931ec28d3cf05c186c Mon Sep 17 00:00:00 2001 From: Thomas Wilkerling Date: Thu, 17 Apr 2025 10:09:16 +0200 Subject: [PATCH] fix result highlighting #489 --- CHANGELOG.md | 7 ++- dist/flexsearch.bundle.debug.js | 4 +- dist/flexsearch.bundle.min.js | 4 +- dist/flexsearch.bundle.module.debug.js | 4 +- dist/flexsearch.bundle.module.min.js | 4 +- dist/flexsearch.compact.debug.js | 4 +- dist/flexsearch.compact.min.js | 4 +- dist/flexsearch.compact.module.debug.js | 4 +- dist/flexsearch.compact.module.min.js | 4 +- dist/flexsearch.es5.debug.js | 60 ++++++++++++------------- dist/flexsearch.es5.min.js | 36 +++++++-------- dist/flexsearch.light.debug.js | 2 +- dist/flexsearch.light.min.js | 2 +- dist/flexsearch.light.module.debug.js | 2 +- dist/flexsearch.light.module.min.js | 2 +- dist/lang/en.min.js | 5 +-- dist/module-debug/document/search.js | 2 +- dist/module-debug/lang/en.js | 29 ++++++++++-- dist/module-min/document/search.js | 2 +- dist/module-min/lang/en.js | 2 +- dist/module/document/search.js | 2 +- dist/module/lang/en.js | 29 ++++++++++-- package-lock.json | 4 +- package.json | 2 +- src/document/search.js | 2 +- test/highlight.js | 36 +++++++++++++++ 26 files changed, 171 insertions(+), 87 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index da59120..263f336 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,10 +2,13 @@ ### Current Version +- Improved Stemmer Handling +- Improved Result Highlighting - Use multi-language charset normalization as the default `Encoder` -- Simplified charset support for CJK, Arabic, Cyrillic, Hindi, ... no extra charset definition required, the default encoder can handle them all +- Simplified charset support for multi-language content - Charset renamed `LatinExact` => `Exact`, `LatinDefault` => `Default` and `LatinSimple` => `Normalize`, these are universal charset presets for any languages -- Charset `CjkDefault`, `ArabicDefault` and `CyrillicDefault` was removed, they are fully covered by the default universal charset presets +- Charset `ArabicDefault` and `CyrillicDefault` was removed, they are fully covered by the default universal charset presets +- Charset `Charset.CjkDefault` was renamed to `Charset.CJK` ### v0.8.1 diff --git a/dist/flexsearch.bundle.debug.js b/dist/flexsearch.bundle.debug.js index 33d526f..18fa21b 100644 --- a/dist/flexsearch.bundle.debug.js +++ b/dist/flexsearch.bundle.debug.js @@ -1,5 +1,5 @@ /**! - * FlexSearch.js v0.8.157 (Bundle/Debug) + * FlexSearch.js v0.8.158 (Bundle/Debug) * Author and Copyright: Thomas Wilkerling * Licence: Apache-2.0 * Hosted by Nextapps GmbH @@ -1488,7 +1488,7 @@ function Ra(a, b, c, d, e) { } else { const C = x.indexOf(y); if (-1 < C) { - q += (q ? " " : "") + w.substring(0, C) + e.replace("$1", w.substring(C, y.length)) + w.substring(C + y.length); + q += (q ? " " : "") + w.substring(0, C) + e.replace("$1", w.substring(C, C + y.length)) + w.substring(C + y.length); F = !0; break; } diff --git a/dist/flexsearch.bundle.min.js b/dist/flexsearch.bundle.min.js index afabc08..768e9dd 100644 --- a/dist/flexsearch.bundle.min.js +++ b/dist/flexsearch.bundle.min.js @@ -1,5 +1,5 @@ /**! - * FlexSearch.js v0.8.157 (Bundle) + * FlexSearch.js v0.8.158 (Bundle) * Author and Copyright: Thomas Wilkerling * Licence: Apache-2.0 * Hosted by Nextapps GmbH @@ -50,7 +50,7 @@ c,x=this.index.get(C),m&&(this.db&&(w.tag=m,F=x.db.support_tag_search,w.field=l) J.length)x++,w.push(J);else if(!q)return p?e:new W(e)}if(x){y=Ha(y,w,p);L=y.length;if(!L&&!q)return p?y:new W(y);x--}}if(L)f[n]=C,e.push(y),n++;else if(1===l.length)return p?e:new W(e)}if(v){if(this.db&&m&&m.length&&!F)for(h=0;hc||d)a=a.slice(d,d+c);e&&(a=V.call(this,a));return a}}function V(a){if(!this||!this.store)return a;const b=Array(a.length);for(let c=0,d;cc||d)a=a.slice(d,d+c);e&&(a=V.call(this,a));return a}}function V(a){if(!this||!this.store)return a;const b=Array(a.length);for(let c=0,d;cb||e)a=a.slice(e,e+b);d&&(a=V.call(this,a));return a}}function V(a){if(!this||!this.store)return a;const c=Array(a.length);for(let b=0,e;bb||e)a=a.slice(e,e+b);d&&(a=V.call(this,a));return a}}function V(a){if(!this||!this.store)return a;const c=Array(a.length);for(let b=0,e;bthis.maxlength)){if(b){if(g[m])continue;g[m]=1}else{if(f===m)continue;f=m}if(d)e.push(m);else if(!this.filter||("function"===typeof this.filter?this.filter(m):!this.filter.has(m))){if(this.cache&&m.length<=this.H)if(this.D){var p=this.C.get(m);if(p||""===p){p&&e.push(p);continue}}else this.D=setTimeout(K,50,this);if(this.stemmer)for(this.N||(this.N= new RegExp("(?!^)("+this.A+")$")),p=void 0;p!==m&&2this.T&&(this.C.clear(),this.H=this.H/1.1|0));if(m){if(m!==n)if(b){if(g[m])continue;g[m]=1}else{if(h===m)continue;h=m}e.push(m)}}}this.finalize&&(e=this.finalize(e)||e);this.cache&&a.length<=this.G&&(this.B.set(a,e),this.B.size>this.T&&(this.B.clear(),this.G=this.G/1.1|0));return e};function K(a){a.D=null;a.B.clear();a.C.clear()};var Ha,Ia; -function Ja(a){var b,c,d,e,g,f;return ta(function(h){switch(h.h){case 1:a=a.data;b=a.task;c=a.id;d=a.args;switch(b){case "init":Ia=a.options||{};(e=a.factory)?(Function("return "+e)()(self),Ha=new self.FlexSearch.Index(Ia),delete self.FlexSearch):Ha=new M(Ia);postMessage({id:c});break;default:h.h=2;return}h.h=0;break;case 2:"export"===b&&(d[1]?(d[0]=Ia.export,d[2]=0,d[3]=1):d=null);if("import"===b){if(!d[0]){h.h=5;break}return D(h,Ia.import.call(Ha,d[0]),9)}g=d&&Ha[b].apply(Ha,d);if(!g||!g.then){h.h= -5;break}return D(h,g,7);case 7:g=h.D;h.h=5;break;case 9:f=h.D,Ha.import(d[0],f);case 5:postMessage("search"===b?{id:c,msg:g}:{id:c}),h.h=0}})};function Ka(a){La.call(a,"add");La.call(a,"append");La.call(a,"search");La.call(a,"update");La.call(a,"remove")}var Na,Oa,Pa;function Qa(){Na=Pa=0} +function Ja(a){var b,c,d,e,g,f;return ta(function(h){switch(h.h){case 1:a=a.data;b=a.task;c=a.id;d=a.args;switch(b){case "init":Ia=a.options||{};(e=a.factory)?(Function("return "+e)()(self),Ha=new self.FlexSearch.Index(Ia),delete self.FlexSearch):Ha=new M(Ia);postMessage({id:c});break;default:h.h=2;return}h.h=0;break;case 2:"export"===b&&(d[1]?(d[0]=Ia.export,d[2]=0,d[3]=1):d=null);if("import"===b){if(!d[0]){h.h=5;break}return E(h,Ia.import.call(Ha,d[0]),9)}g=d&&Ha[b].apply(Ha,d);if(!g||!g.then){h.h= +5;break}return E(h,g,7);case 7:g=h.D;h.h=5;break;case 9:f=h.D,Ha.import(d[0],f);case 5:postMessage("search"===b?{id:c,msg:g}:{id:c}),h.h=0}})};function Ka(a){La.call(a,"add");La.call(a,"append");La.call(a,"search");La.call(a,"update");La.call(a,"remove")}var Na,Oa,Pa;function Qa(){Na=Pa=0} function La(a){this[a+"Async"]=function(){var b=arguments,c=b[b.length-1];if("function"===typeof c){var d=c;delete b[b.length-1]}Na?Pa||(Pa=Date.now()-Oa>=this.priority*this.priority*3):(Na=setTimeout(Qa,0),Oa=Date.now());if(Pa){var e=this;return new Promise(function(f){setTimeout(function(){f(e[a+"Async"].apply(e,b))},0)})}var g=this[a].apply(this,b);c=g.then?g:new Promise(function(f){return f(g)});d&&c.then(d);return c}};var Ra=0; function Sa(a){function b(f){function h(k){k=k.data||k;var l=k.id,m=l&&e.h[l];m&&(m(k.msg),delete e.h[l])}this.worker=f;this.h=I();if(this.worker){d?this.worker.on("message",h):this.worker.onmessage=h;if(a.config)return new Promise(function(k){e.h[++Ra]=function(){k(e);1E9c||d)a=a.slice(d,d+c);e&&(a=V.call(this,a));return a}}function V(a){if(!this||!this.store)return a;for(var b=Array(a.length),c=0,d;cn;Lb(this,g,t?n:l,v,a,c,t?l:n)}}}}this.fastupdate||this.reg.add(a)}else b=""}this.db&&(b||this.commit_task.push({del:a}),this.ca&&Ib(this));return this}; function Lb(a,b,c,d,e,g,f){var h=f?a.ctx:a.map,k;if(!b[c]||f&&!(k=b[c])[f])if(f?(b=k||(b[c]=I()),b[f]=1,(k=h.get(f))?h=k:h.set(f,h=new Map)):b[c]=1,(k=h.get(c))?h=k:h.set(c,h=k=[]),h=h[d]||(h[d]=[]),!g||!h.includes(e)){if(h.length===Math.pow(2,31)-1){b=new R(h);if(a.fastupdate)for(c=y(a.reg.values()),g=c.next();!g.done;g=c.next())g=g.value,g.includes(h)&&(g[g.indexOf(h)]=b);k[d]=h=b}h.push(e);a.fastupdate&&((d=a.reg.get(e))?d.push(h):a.reg.set(e,[h]))}} function Kb(a,b,c,d,e){return c&&1c)&&(k=c,c=b,b=k);if(a.db)return a.db.get(b,c,d,e,g,f,h);a=c?(a=a.ctx.get(c))&&a.get(b):a.map.get(b);return a};function M(a,b){if(!this||this.constructor!==M)return new M(a);if(a){var c=J(a)?a:a.preset;c&&(a=Object.assign({},Jb[c],a))}else a={};c=a.context;var d=!0===c?{depth:1}:c||{},e=J(a.encoder)?Gb[a.encoder]:a.encode||a.encoder||{};this.encoder=e.encode?e:"object"===typeof e?new Ga(e):{encode:e};this.resolution=a.resolution||9;this.tokenize=c=(c=a.tokenize)&&"default"!==c&&"exact"!==c&&c||"strict";this.depth="strict"===c&&d.depth||0;this.bidirectional=!1!==d.bidirectional;this.fastupdate=!!a.fastupdate; this.score=a.score||null;(c=a.keystore||0)&&(this.keystore=c);this.map=c?new S(c):new Map;this.ctx=c?new S(c):new Map;this.reg=b||(this.fastupdate?c?new S(c):new Map:c?new T(c):new Set);this.da=d.resolution||3;this.rtl=e.rtl||a.rtl||!1;this.cache=(c=a.cache||null)&&new X(c);this.resolve=!1!==a.resolve;if(c=a.db)this.db=this.mount(c);this.ca=!1!==a.commit;this.commit_task=[];this.commit_timer=null;this.priority=a.priority||4}u=M.prototype; @@ -124,10 +124,10 @@ u.get=function(a,b,c,d,e,g){c=void 0===c?0:c;d=void 0===d?0:d;e=void 0===e?!0:e; p=d;p=g.length)return[];if(!b&&!c)return g;g=g.slice(c,c+b);return d?e.enrich(g):g})}; u.enrich=function(a){"object"!==typeof a&&(a=[a]);for(var b=this.db.transaction("reg","readonly").objectStore("reg"),c=[],d=0;d} */ -export const stemmer = new Map([["ational", ""], ["iveness", ""], ["fulness", ""], ["ousness", ""], ["ization", ""], ["tional", "tion"], ["biliti", ""], ["icate", ""], ["ative", ""], ["alize", ""], ["iciti", ""], ["entli", ""], ["ousli", ""], ["alism", ""], ["ation", ""], ["aliti", ""], ["iviti", ""], ["ement", ""], ["izer", ""], ["able", ""], ["ible", ""], ["alli", ""], ["ator", ""], ["less", ""], ["logi", ""], ["ical", ""], ["ance", ""], ["ence", ""], ["ness", ""], ["ble", ""], ["ment", ""], ["eli", ""], ["bli", ""], ["ful", ""], ["ant", ""], ["ent", ""], ["ism", ""], ["ate", ""], ["iti", ""], ["ous", ""], ["ive", ""], ["ize", ""], ["ing", ""], ["ion", ""], ["ies", "y"], ["al", ""], ["ou", ""], ["er", ""], ["ed", ""], ["es", "e"], ["ic", ""], ["ly", ""], ["li", ""], ["s", ""]]); +export const stemmer = new Map([ +//["ational", "ate"], +//["iveness", ""], +//["fulness", ""], +//["ousness", ""], +["ization", ""], +//["tional", "tion"], +["biliti", ""], ["icate", ""], ["ative", ""], +//["alize", ""], +//["iciti", ""], +//["entli", ""], +//["ousli", ""], +//["alism", ""], +["ation", ""], +//["aliti", ""], +["iviti", ""], ["ement", ""], ["izer", ""], ["able", ""], ["ible", ""], ["alli", ""], ["ator", ""], ["less", ""], ["logi", ""], ["ical", ""], ["ance", ""], ["ence", ""], ["ness", ""], ["ble", ""], ["ment", ""], +//["nal", "n"], +["eli", ""], ["bli", ""], ["ful", ""], ["ant", ""], ["ent", ""], ["ism", ""], ["ate", ""], ["iti", ""], ["ous", ""], ["ive", ""], ["ize", ""], ["ing", ""], ["ion", ""], ["ies", "y"], ["al", ""], ["ou", ""], ["er", ""], ["ed", ""], +//["es", "e"], +["ic", ""], ["ly", ""], ["li", ""], ["s", ""]]); /* he’s (= he is / he has) @@ -63,7 +84,9 @@ const options = { prepare: function (str) { return str // normalize symbols - .replace(/´`’ʼ/g, "'").replace(/_+/g, " ").replace(/&/g, " and ").replace(/\$/g, " USD ").replace(/£/g, " GBP ") + .replace(/´`’ʼ/g, "'") + //.replace(/[_\-]+/g, " ") + .replace(/&/g, " and ").replace(/\$/g, " USD ").replace(/£/g, " GBP ") // explode short forms .replace(/\bi'm\b/g, "i am").replace(/\b(can't|cannot)\b/g, "can not").replace(/\bwon't\b/g, "will not").replace(/([a-z])'s\b/g, "$1 is has").replace(/([a-z])n't\b/g, "$1 not").replace(/([a-z])'ll\b/g, "$1 will").replace(/([a-z])'re\b/g, "$1 are").replace(/([a-z])'ve\b/g, "$1 have").replace(/([a-z])'d\b/g, "$1 would had"); }, diff --git a/dist/module-min/document/search.js b/dist/module-min/document/search.js index 201a4dc..e1de311 100644 --- a/dist/module-min/document/search.js +++ b/dist/module-min/document/search.js @@ -1 +1 @@ -import{DocumentSearchOptions,DocumentSearchResults,EnrichedDocumentSearchResults,MergedDocumentSearchResults,MergedDocumentSearchEntry,EnrichedSearchResults,SearchResults,IntermediateSearchResults}from"../type.js";import{create_object,is_array,is_object,is_string,parse_simple}from"../common.js";import{intersect_union}from"../intersect.js";import Document from"../document.js";import Index from"../index.js";import Resolver from"../resolver.js";import tick from"../profiler.js";Document.prototype.search=function(a,b,c,d){!1,c||(!b&&is_object(a)?(c=a,a=""):is_object(b)&&(c=b,b=0));let e,f,g,h,j,k,l,m,n=[],o=[],p=0,q=!0;if(c){if(is_array(c)&&(c={index:c}),a=c.query||a,e=c.pluck,g=c.merge,j=e||c.field||(j=c.index)&&(j.index?null:j),k=this.tag&&c.tag,h=c.suggest,q=!1!==c.resolve,(q||e||(j=j||this.field,j&&(is_string(j)?e=j:(is_array(j)&&1===j.length&&(j=j[0]),e=j.field||j.index))),!1,f=this.store&&c.enrich&&q,m=f&&c.highlight,b=c.limit||b,l=c.offset||0,b||(b=100),k&&(!this.db||!d))){!1,k.constructor!==Array&&(k=[k]);let c=[];for(let a,b=0;b} */ -export const stemmer = new Map([["ational", ""], ["iveness", ""], ["fulness", ""], ["ousness", ""], ["ization", ""], ["tional", "tion"], ["biliti", ""], ["icate", ""], ["ative", ""], ["alize", ""], ["iciti", ""], ["entli", ""], ["ousli", ""], ["alism", ""], ["ation", ""], ["aliti", ""], ["iviti", ""], ["ement", ""], ["izer", ""], ["able", ""], ["ible", ""], ["alli", ""], ["ator", ""], ["less", ""], ["logi", ""], ["ical", ""], ["ance", ""], ["ence", ""], ["ness", ""], ["ble", ""], ["ment", ""], ["eli", ""], ["bli", ""], ["ful", ""], ["ant", ""], ["ent", ""], ["ism", ""], ["ate", ""], ["iti", ""], ["ous", ""], ["ive", ""], ["ize", ""], ["ing", ""], ["ion", ""], ["ies", "y"], ["al", ""], ["ou", ""], ["er", ""], ["ed", ""], ["es", "e"], ["ic", ""], ["ly", ""], ["li", ""], ["s", ""]]); +export const stemmer = new Map([ +//["ational", "ate"], +//["iveness", ""], +//["fulness", ""], +//["ousness", ""], +["ization", ""], +//["tional", "tion"], +["biliti", ""], ["icate", ""], ["ative", ""], +//["alize", ""], +//["iciti", ""], +//["entli", ""], +//["ousli", ""], +//["alism", ""], +["ation", ""], +//["aliti", ""], +["iviti", ""], ["ement", ""], ["izer", ""], ["able", ""], ["ible", ""], ["alli", ""], ["ator", ""], ["less", ""], ["logi", ""], ["ical", ""], ["ance", ""], ["ence", ""], ["ness", ""], ["ble", ""], ["ment", ""], +//["nal", "n"], +["eli", ""], ["bli", ""], ["ful", ""], ["ant", ""], ["ent", ""], ["ism", ""], ["ate", ""], ["iti", ""], ["ous", ""], ["ive", ""], ["ize", ""], ["ing", ""], ["ion", ""], ["ies", "y"], ["al", ""], ["ou", ""], ["er", ""], ["ed", ""], +//["es", "e"], +["ic", ""], ["ly", ""], ["li", ""], ["s", ""]]); /* he’s (= he is / he has) @@ -63,7 +84,9 @@ const options = { prepare: function (str) { return str // normalize symbols - .replace(/´`’ʼ/g, "'").replace(/_+/g, " ").replace(/&/g, " and ").replace(/\$/g, " USD ").replace(/£/g, " GBP ") + .replace(/´`’ʼ/g, "'") + //.replace(/[_\-]+/g, " ") + .replace(/&/g, " and ").replace(/\$/g, " USD ").replace(/£/g, " GBP ") // explode short forms .replace(/\bi'm\b/g, "i am").replace(/\b(can't|cannot)\b/g, "can not").replace(/\bwon't\b/g, "will not").replace(/([a-z])'s\b/g, "$1 is has").replace(/([a-z])n't\b/g, "$1 not").replace(/([a-z])'ll\b/g, "$1 will").replace(/([a-z])'re\b/g, "$1 are").replace(/([a-z])'ve\b/g, "$1 have").replace(/([a-z])'d\b/g, "$1 would had"); }, diff --git a/package-lock.json b/package-lock.json index 8a8df5e..7349773 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "flexsearch", - "version": "0.8.157", + "version": "0.8.158", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "flexsearch", - "version": "0.8.157", + "version": "0.8.158", "funding": [ { "type": "github", diff --git a/package.json b/package.json index 3f212c0..3f2bb2c 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "public": true, "preferGlobal": false, "name": "flexsearch", - "version": "0.8.157", + "version": "0.8.158", "description": "Next-Generation full-text search library for Browser and Node.js", "homepage": "https://github.com/nextapps-de/flexsearch/", "author": "Thomas Wilkerling", diff --git a/src/document/search.js b/src/document/search.js index 6eca77a..2fcc8f9 100644 --- a/src/document/search.js +++ b/src/document/search.js @@ -587,7 +587,7 @@ function highlight_fields(query, result, index, pluck, template){ // prefix doc_org_cur.substring(0, position) + // match - template.replace("$1", doc_org_cur.substring(position, query_enc_cur.length)) + + template.replace("$1", doc_org_cur.substring(position, position + query_enc_cur.length)) + // suffix doc_org_cur.substring(position + query_enc_cur.length); found = true; diff --git a/test/highlight.js b/test/highlight.js index b470ebd..1840d1f 100644 --- a/test/highlight.js +++ b/test/highlight.js @@ -267,4 +267,40 @@ if(!build_light) describe("Result Highlighting", function(){ }] }); }); + + it("Should have been highlighted results properly (#489)", async function(){ + + const index = new Document({ + encoder: Charset.LatinBalance, + document: { + store: true, + index: [ + { + field: "title", + tokenize: "forward" + }, + { + field: "content", + tokenize: "forward" + } + ], + }, + }); + + await index.addAsync({ + id: 1, + title: "Tips For Decorating Easter Eggs", + content: `Published: April 14, 2025 From bold color choices to intricate patterns, there are many ways to make your springtime holiday decorations stand out from the rest. The Onion shares tips for dyeing Easter eggs.` + }); + + const search = await index.search("h", { + suggest: true, + enrich: true, + highlight: `$1`, + }); + + expect(search.length).to.equal(1); + expect(search[0].result.length).to.equal(1); + expect(search[0].result[0].highlight).to.equal('Published: April 14, 2025 From bold color choices to intricate patterns, there are many ways to make your springtime holiday decorations stand out from the rest. The Onion shares tips for dyeing Easter eggs.'); + }); });