From 4b90402025dfe049a83cd475b8eb0be8c2395aa8 Mon Sep 17 00:00:00 2001 From: Thomas Wilkerling Date: Tue, 19 Feb 2019 23:43:11 +0100 Subject: [PATCH] Update Readme --- README.md | 14 ++++++++++++-- dist/flexsearch.compact.js | 6 +++--- dist/flexsearch.es5.js | 12 ++++++------ dist/flexsearch.light.js | 2 +- dist/flexsearch.min.js | 4 ++-- dist/flexsearch.node.js | 22 +++++++++++----------- flexsearch.js | 15 ++++++++++++--- package.json | 12 ++++++------ test/test.js | 23 ++++++++++++++++++++++- 9 files changed, 75 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index aac40fb..1fbef05 100644 --- a/README.md +++ b/README.md @@ -754,7 +754,7 @@ index.search("John", { #### Pagination -FlexSearch is providing a cursor-based pagination which has the ability to inject into the most-inner process. This enables a lot of possible performance improvements. +FlexSearch is providing a cursor-based pagination which has the ability to inject into the most-inner process. This enables the possibility of many performance improvements. > The cursor implementation may be changed often. Just take the cursor as it is and do not expect any specific value or format. @@ -1830,7 +1830,7 @@ FlexSearch ist highly customizable. Make use of the the right false
{number} - Enable/Disable
contextual indexing and also sets contextual distance of relevance. + Enable/Disable contextual indexing and also sets contextual distance of relevance. Depth is the maximum number of words/tokens away a term to be considered as relevant. @@ -1880,6 +1880,16 @@ FlexSearch ist highly customizable. Make use of the the right +## Depth, Threshold, Resolution? + +Whereas __depth is the minimum relevance for the context-based index__, __threshold is the minimum relevance for the lexical index__. Threshold is an enhanced variation of a conventional scoring calculation, it uses document distance and partial distance instead of TF-IDF. The final scoring value is based on 3 kinds of distance. + +Resolution on the other hand specify the max scoring value. The final score value is an integer value, so resolution affect how many segments the score may have. When the resolution is 1, then there exist just one scoring level for all matched terms. To get more differentiated results you need to raise the resolution. + +> The difference of both (_resolution_ - _threshold_) affects the performance on higher values. + +The combination of resolution and threshold gives you a good controlling of your matches as well as performance, e.g. when the resolution is 25 and the threshold is 22, then the result only contains matches which are super relevant. The goal should always be just have items in result which are really needed. On top, that also improves performance a lot. + ## Tokenizer diff --git a/dist/flexsearch.compact.js b/dist/flexsearch.compact.js index 84f4409..2b01c0a 100644 --- a/dist/flexsearch.compact.js +++ b/dist/flexsearch.compact.js @@ -6,7 +6,7 @@ https://github.com/nextapps-de/flexsearch */ 'use strict';(function(l,F,G){let A;(A=G.define)&&A.amd?A([],function(){return F}):(A=G.modules)?A[l.toLowerCase()]=F:"object"===typeof exports?module.exports=F:G[l]=F})("FlexSearch",function(){function l(b,a){const c=a?a.id:b&&b.id;this.id=c||0===c?c:Z++;this.init(b,a);A(this,"index",function(){return this.a?Object.keys(this.a.index[this.a.keys[0]].f):Object.keys(this.f)});A(this,"length",function(){return this.index.length})}function F(b,a){const c=b.length,d=H(a),e=[];for(let h=0,g=0;h=g&&(b=b[m-(e+.5>>0)],b=b[c]||(b[c]=[]),b[b.length]=d);return e}function O(b,a){if(b){const c=Object.keys(b);for(let d=0,e=c.length;db?1:b?-1:0}function ba(b,a){b=b[u];a=a[u];return ba?1:0}function aa(b,a){const c=u.length;for(let d=0;da?1:0}function I(b,a,c){return b?{page:b,next:a?""+a:null,result:c}:c}function R(b,a,c,d,e,h,g){let m;h=[];let n;const f=b.length;!0===c?(c="0",n=""):n=c&& c.split(":");if(1k;c--)v=g.substring(k,c),M(y,m,v,b,a,n,l,p-1)}break;default:if(f=M(y,m,g,b,1,n,l,p-1),q&&1=l)for(f=m._ctx[g]||(m._ctx[g]=x()),g=this.h[g]||(this.h[g]=U(p-(l||0))),n=a-q,v=a+q+1,0>n&&(n=0),v>r&&(v=r);nm;c--)n=l.substring(m,c),B(r,f,n,a,b,h,q,p-1)}break;default:if(g=B(r,f,l,a,1,h,q,p-1),k&&1=q)for(g=f._ctx[l]||(f._ctx[l]=y()),l=this.f[l]||(this.f[l]=H(p-(q||0))),h=e-k,n=e+k+1,0>h&&(h=0),n>d&&(n=d);hc;d--)e=f[d-1],f[d]=e,b[e]=d;f[c]=a;b[a]=c}}}return b};return b}();return l}(!1),this); +g;p=[];n=[];for(var u=0;uc;d--)e=f[d-1],f[d]=e,b[e]=d;f[c]=a;b[a]=c}}}return b};return b}();return l}(!1),this); diff --git a/flexsearch.js b/flexsearch.js index ccdd19f..6405e72 100644 --- a/flexsearch.js +++ b/flexsearch.js @@ -1383,7 +1383,16 @@ function merge_and_sort(query, bool, result, sort, limit, suggest, where, cursor, has_and, has_not){ - result = intersect(result, where ? 0 : limit, cursor, SUPPORT_SUGGESTION && suggest, bool, has_and, has_not); + result = intersect( + + result, + where ? 0 : limit, + SUPPORT_PAGINATION && cursor, + SUPPORT_SUGGESTION && suggest, + bool, + has_and, + has_not + ); let next; @@ -1490,7 +1499,7 @@ } sort = SUPPORT_DOCUMENT && query["sort"]; - cursor = query["page"]; + cursor = SUPPORT_PAGINATION && query["page"]; limit = query["limit"]; threshold = query["threshold"]; enrich = query["enrich"]; @@ -1557,7 +1566,7 @@ _query = queries[i]; } - if(!is_string(_query)){ + if(cursor && !is_string(_query)){ _query["page"] = null; _query["limit"] = 0; diff --git a/package.json b/package.json index c24787e..58fd7e2 100644 --- a/package.json +++ b/package.json @@ -26,12 +26,12 @@ "url": "https://github.com/nextapps-de/flexsearch.git" }, "scripts": { - "build": "node compile RELEASE=min DEBUG=false PROFILER=false SUPPORT_WORKER=true SUPPORT_ENCODER=true SUPPORT_CACHE=true SUPPORT_ASYNC=true SUPPORT_PRESET=true SUPPORT_SUGGESTION=true SUPPORT_SERIALIZE=true SUPPORT_INFO=true SUPPORT_DOCUMENT=true SUPPORT_WHERE=true SUPPORT_LANG_DE=false SUPPORT_LANG_EN=false", - "build-light": "node compile RELEASE=light DEBUG=false PROFILER=false SUPPORT_WORKER=false SUPPORT_ENCODER=false SUPPORT_CACHE=false SUPPORT_ASYNC=false SUPPORT_PRESET=false SUPPORT_SUGGESTION=false SUPPORT_SERIALIZE=false SUPPORT_INFO=false SUPPORT_DOCUMENT=false SUPPORT_WHERE=false SUPPORT_LANG_DE=false SUPPORT_LANG_EN=false", - "build-compact": "node compile RELEASE=compact DEBUG=false PROFILER=false SUPPORT_WORKER=false SUPPORT_ENCODER=true SUPPORT_CACHE=false SUPPORT_ASYNC=true SUPPORT_PRESET=true SUPPORT_SUGGESTION=false SUPPORT_SERIALIZE=false SUPPORT_INFO=false SUPPORT_DOCUMENT=true SUPPORT_WHERE=false SUPPORT_LANG_DE=false SUPPORT_LANG_EN=false", - "build-custom": "node compile RELEASE=custom DEBUG=false PROFILER=false SUPPORT_WORKER=false SUPPORT_ENCODER=false SUPPORT_CACHE=false SUPPORT_ASYNC=false SUPPORT_PRESET=false SUPPORT_SUGGESTION=false SUPPORT_SERIALIZE=false SUPPORT_INFO=false SUPPORT_DOCUMENT=false SUPPORT_WHERE=false SUPPORT_LANG_DE=false SUPPORT_LANG_EN=false", - "build-es5": "node compile RELEASE=es5 DEBUG=true PROFILER=false SUPPORT_WORKER=true SUPPORT_ENCODER=true SUPPORT_CACHE=true SUPPORT_ASYNC=true SUPPORT_PRESET=true SUPPORT_SUGGESTION=true SUPPORT_SERIALIZE=true SUPPORT_INFO=true SUPPORT_DOCUMENT=true SUPPORT_WHERE=true SUPPORT_LANG_DE=false SUPPORT_LANG_EN=false LANGUAGE_OUT=ECMASCRIPT5_STRICT", - "build-node": "node compile RELEASE=node DEBUG=false PROFILER=false SUPPORT_WORKER=false SUPPORT_ENCODER=true SUPPORT_CACHE=true SUPPORT_ASYNC=true SUPPORT_PRESET=true SUPPORT_SUGGESTION=true SUPPORT_SERIALIZE=true SUPPORT_INFO=true SUPPORT_DOCUMENT=true SUPPORT_WHERE=true SUPPORT_LANG_DE=false SUPPORT_LANG_EN=false", + "build": "node compile RELEASE=min DEBUG=false PROFILER=false SUPPORT_WORKER=true SUPPORT_ENCODER=true SUPPORT_CACHE=true SUPPORT_ASYNC=true SUPPORT_PRESET=true SUPPORT_SUGGESTION=true SUPPORT_SERIALIZE=true SUPPORT_INFO=true SUPPORT_DOCUMENT=true SUPPORT_OPERATOR=true SUPPORT_WHERE=true SUPPORT_PAGINATION=true SUPPORT_LANG_DE=false SUPPORT_LANG_EN=false", + "build-light": "node compile RELEASE=light DEBUG=false PROFILER=false SUPPORT_WORKER=false SUPPORT_ENCODER=false SUPPORT_CACHE=false SUPPORT_ASYNC=false SUPPORT_PRESET=false SUPPORT_SUGGESTION=false SUPPORT_SERIALIZE=false SUPPORT_INFO=false SUPPORT_DOCUMENT=false SUPPORT_OPERATOR=false SUPPORT_WHERE=false SUPPORT_PAGINATION=false SUPPORT_LANG_DE=false SUPPORT_LANG_EN=false", + "build-compact": "node compile RELEASE=compact DEBUG=false PROFILER=false SUPPORT_WORKER=false SUPPORT_ENCODER=true SUPPORT_CACHE=false SUPPORT_ASYNC=true SUPPORT_PRESET=true SUPPORT_SUGGESTION=false SUPPORT_SERIALIZE=false SUPPORT_INFO=false SUPPORT_DOCUMENT=true SUPPORT_OPERATOR=true SUPPORT_WHERE=false SUPPORT_PAGINATION=false SUPPORT_LANG_DE=false SUPPORT_LANG_EN=false", + "build-custom": "node compile RELEASE=custom DEBUG=false PROFILER=false SUPPORT_WORKER=false SUPPORT_ENCODER=false SUPPORT_CACHE=false SUPPORT_ASYNC=false SUPPORT_PRESET=false SUPPORT_SUGGESTION=false SUPPORT_SERIALIZE=false SUPPORT_INFO=false SUPPORT_DOCUMENT=false SUPPORT_OPERATOR=false SUPPORT_WHERE=false SUPPORT_PAGINATION=false SUPPORT_LANG_DE=false SUPPORT_LANG_EN=false", + "build-es5": "node compile RELEASE=es5 DEBUG=true PROFILER=false SUPPORT_WORKER=true SUPPORT_ENCODER=true SUPPORT_CACHE=true SUPPORT_ASYNC=true SUPPORT_PRESET=true SUPPORT_SUGGESTION=true SUPPORT_SERIALIZE=true SUPPORT_INFO=true SUPPORT_DOCUMENT=true SUPPORT_OPERATOR=true SUPPORT_WHERE=true SUPPORT_PAGINATION=true SUPPORT_LANG_DE=false SUPPORT_LANG_EN=false LANGUAGE_OUT=ECMASCRIPT5_STRICT", + "build-node": "node compile RELEASE=node DEBUG=false PROFILER=false SUPPORT_WORKER=false SUPPORT_ENCODER=true SUPPORT_CACHE=true SUPPORT_ASYNC=true SUPPORT_PRESET=true SUPPORT_SUGGESTION=true SUPPORT_SERIALIZE=true SUPPORT_INFO=true SUPPORT_DOCUMENT=true SUPPORT_OPERATOR=true SUPPORT_WHERE=true SUPPORT_PAGINATION=true SUPPORT_LANG_DE=false SUPPORT_LANG_EN=false", "build-lang": "node compile RELEASE=lang", "build-all": "npm run build && npm run build-light && npm run build-compact && npm run build-es5 && npm run build-node", "test-production": "nyc --reporter=html --reporter=text mocha --timeout=3000 test --exit", diff --git a/test/test.js b/test/test.js index 494ac30..f18dccb 100644 --- a/test/test.js +++ b/test/test.js @@ -870,7 +870,6 @@ describe("Encoding", function(){ }); }); - // ------------------------------------------------------------------------ // CJK Word Break // ------------------------------------------------------------------------ @@ -897,6 +896,28 @@ describe("CJK Word Break", function(){ }); }); +// ------------------------------------------------------------------------ +// Cyrillic Word Break +// ------------------------------------------------------------------------ + +describe("Cyrillic Word Break", function(){ + + it("Should have been tokenized properly", function(){ + + var index = FlexSearch.create({ + encode: false, + tokenize: function(str){ + return str.replace(/[\x00-\x7F]/g, "").split(""); + } + }); + + index.add(0, "Фообар"); + + expect(index.search("Фообар")).to.include(0); + expect(index.search("бар")).to.include(0); + }); +}); + // ------------------------------------------------------------------------ // Right-To-Left // ------------------------------------------------------------------------