diff --git a/dist/flexsearch.min.js b/dist/flexsearch.min.js index 5044c5c..3fb4649 100644 --- a/dist/flexsearch.min.js +++ b/dist/flexsearch.min.js @@ -5,26 +5,29 @@ * Licence: Apache-2.0 * https://github.com/nextapps-de/flexsearch */ -(function(self){'use strict';var u;const v=/[\W_]+/;function aa(a){if(a=a.toLowerCase())if(a&&this.B&&(a=y(a,this.B)),this.F&&1=b)return f.concat(l.slice(0,b-h));f=f.concat(l);h+=p}return f} -function fa(a,b){const c=C(),d=C(),e=[];for(let f=0;f=e&&(f=e-1);this.o=e;this.threshold=f;this.s=b=c&&c.I||a.tokenize||"strict";this.depth="strict"===b&&h.depth;this.u=A(h.bidirectional,!0);this.l=g="memory"=== -a.optimize;this.i=A(a.fastupdate,!0);this.m=a.minlength||1;this.map=g?B(e-f):C();e=h.resolution||e;f=h.threshold||f;f>=e&&(f=e-1);this.h=e;this.j=f;this.g=g?B(e-f):C();this.D=c&&c.D||a.rtl;this.B=(b=a.matcher||d&&d.B)&&G(b,!1);this.F=(b=a.stemmer||d&&d.F)&&G(b,!0);if(c=b=a.filter||d&&d.filter){c=b;d=C();for(let k=0,l=c.length;k=this.m&&(p||!w[m])){var e=Math.min(this.o/l*t|0,t);if(eg;k--)f=m.substring(g,k),f.length>=this.m&&N(this,w,f,h,a,c)}break}case "reverse":if(2< -d){for(g=d-1;0=this.m&&N(this,w,f,e,a,c);f=""}case "forward":if(1=this.m&&N(this,w,f,e,a,c);break;default:if(N(this,w,m,e,a,c),p&&1=this.m&&!e[m]){if(e[m]=1,k=Math.min((this.h-g)/l*t+h|0,t+(h-1)),kf;N(this,q,n?f:m,k,a,c,n?m:f)}}else g=Math.min(g+1,l-t)}}}}this.i||(this.register[a]=1)}}return this}; -function N(a,b,c,d,e,f,g){let h=g?a.g:a.map;if(!b[c]||g&&!b[c][g])a.l&&(h=h[d]),g?(b[c]||(b[c]=C()),b[c][g]=1,h=h[g]||(h[g]=C())):b[c]=1,h=h[c]||(h[c]=[]),a.l||(h=h[d]||(h[d]=[])),f&&-1!==h.indexOf(e)||(h[h.length]=e,a.i&&(a=a.register[e]||(a.register[e]=[]),a[a.length]=h))} -u.search=function(a,b,c){E(a)?(c=a,a=c.query):E(b)&&(c=b);let d=[],e;var f=this.threshold;let g,h=0;if(c){b=c.limit;h=c.offset||0;f=A(c.threshold,f);var k=c.context;g=c.suggest}if(a&&(a=this.encode(a),e=a.length,1=this.m&&!c[q])if(this.l||g||this.map[q])l[w++]=q,c[q]=1;else return d;a=l;e=a.length}if(!e)return d;b||(b=100);c=this.o-f;f=this.h-f;k=this.depth&&1b?d.slice(0,b):d}}return ea(d,b,h,g)};function na(a,b,c,d){c?(d=d&&b>c,a=(a=a[d?b:c])&&a[d?c:b]):a=a[b];return a} -function ma(a,b,c,d,e,f,g,h){let k=[],l=h?a.g:a.map;a.l||(l=na(l,g,h,a.u));if(l){let p=0;d=Math.min(l.length,d);for(let r=0,w=0,q;r=e)));r++);if(p){if(f)return k=1===p?k[0]:[].concat.apply([],k),k.length>e?k.slice(0,e):k;b[b.length]=k;return}}return!c&&k}u.contain=function(a){return!!this.register[a]};u.update=function(a,b){return this.remove(a).add(a,b)}; -u.remove=function(a,b){const c=this.register[a];if(c){if(this.i)for(let d=0,e;db||c)e=e.slice(c,c+b);d&&(e=ra.call(this,e));return{tag:a,result:e}}}function ra(a){const b=Array(a.length);for(let c=0,d;c=b)return f.concat(l.slice(c,b-h+c));f=f.concat(c? +l.slice(c):l);h+=n;c=0}return f}function ha(a,b){const c=B(),d=B(),e=[];for(let f=0;f=e&&(f=e-1);this.s=e;this.threshold=f;this.u=b=c&&c.I||a.tokenize||"strict";this.depth="strict"===b&&h.depth;this.v=z(h.bidirectional,!0);this.m=g="memory"=== +a.optimize;this.i=z(a.fastupdate,!0);this.o=a.minlength||1;this.map=g?A(e-f):B();e=h.resolution||e;f=h.threshold||f;f>=e&&(f=e-1);this.h=e;this.l=f;this.g=g?A(e-f):B();this.F=c&&c.F||a.rtl;this.C=(b=a.matcher||d&&d.C)&&H(b,!1);this.G=(b=a.stemmer||d&&d.G)&&H(b,!0);if(c=b=a.filter||d&&d.filter){c=b;d=B();for(let k=0,l=c.length;k=this.o&&(n||!r[m])){var e=Math.min(this.s/l*p|0,p);if(eg;k--)f=m.substring(g,k),f.length>=this.o&&M(this,r,f,h,a,c)}break}case "reverse":if(2< +d){for(g=d-1;0=this.o&&M(this,r,f,e,a,c);f=""}case "forward":if(1=this.o&&M(this,r,f,e,a,c);break;default:if(M(this,r,m,e,a,c),n&&1=this.o&&!e[m]){if(e[m]=1,k=Math.min((this.h-g)/l*p+h|0,p+(h-1)),kf;M(this,v,t?f:m,k,a,c,t?m:f)}}else g=Math.min(g+1,l-p)}}}}this.i||(this.register[a]=1)}}return this}; +function M(a,b,c,d,e,f,g){let h=g?a.g:a.map;if(!b[c]||g&&!b[c][g])a.m&&(h=h[d]),g?(b=b[c]||(b[c]=B()),b[g]=1,h=h[g]||(h[g]=B())):b[c]=1,h=h[c]||(h[c]=[]),a.m||(h=h[d]||(h[d]=[])),f&&-1!==h.indexOf(e)||(h[h.length]=e,a.i&&(a=a.register[e]||(a.register[e]=[]),a[a.length]=h))} +u.search=function(a,b,c){D(a)?(c=a,a=c.query):D(b)&&(c=b);let d=[],e;var f=this.threshold;let g,h=0;if(c){b=c.limit;h=c.offset||0;f=z(c.threshold,f);var k=c.context;g=c.suggest}if(a&&(a=this.encode(a),e=a.length,1=this.o&&!c[v])if(this.m||g||this.map[v])l[r++]=v,c[v]=1;else return d;a=l;e=a.length}if(!e)return d;b||(b=100);c=this.s-f;f=this.h-f;k=this.depth&&1=e)));r++);if(q){if(g)return oa(l,e,0);b[b.length]=l;return}}return!c&&l}function oa(a,b,c){a=1===a.length?a[0]:[].concat.apply([],a);return c||a.length>b?a.slice(c,c+b):a} +function pa(a,b,c,d){c?(d=d&&b>c,a=(a=a[d?b:c])&&a[d?c:b]):a=a[b];return a}u.contain=function(a){return!!this.register[a]};u.update=function(a,b){return this.remove(a).add(a,b)}; +u.remove=function(a,b){const c=this.register[a];if(c){if(this.i)for(let d=0,e;db||c)e=e.slice(c,c+b);d&&(e=va.call(this,e));return{tag:a,result:e}}}function va(a){const b=Array(a.length);for(let c=0,d;c What is not included yet but comes soon? -- ~~WebWorker (almost done)~~ +- ~~WebWorker~~ - Worker for Node.js (almost done) -- Inline Worker (optionally) -- Offset-Pagination (almost done) +- ~~Inline Worker (optionally)~~ +- ~~Offset-Pagination~~ - ~~Export/Import (not implemented yet)~~ - ~~Tags (almost done)~~ -- ~~Bundles: Light, Compact, Full (almost done)~~ +- ~~Bundles: Light, Compact, Full~~ - Test Automation (needs to be migrated) - Benchmark Suite (almost done) What is not included yet and comes later? -- Engines (almost done) +- Engines What will be dropped? @@ -442,7 +444,7 @@ const index = new Document({ Note: The context options from the field "content" also gets inherited by the corresponding field options, whereas this field options was inherited by the global option (so threshold would be "3" if not set in context options). -#### Nested Data Fields +### Nested Data Fields ```json { @@ -548,7 +550,7 @@ One advantage here is you can perform a search through the same field with diffe > When passing field-specific options you need to provide the full configuration for each field. They get not inherited like the document descriptor. -#### Complex Documents +### Complex Documents You need to follow 2 rules for your documents: @@ -625,7 +627,7 @@ index.search(query, { }); ``` -#### Not Supported Documents (Sequential Data) +### Not Supported Documents (Sequential Data) This example breaks both rules from above: @@ -699,7 +701,7 @@ add([{ You can skip the first loop when your document data has just one index as the outer array. -#### Join / Append Arrays +### Join / Append Arrays On the complex example above, the field `keywords` is an array but here the markup did not have brackets like `keywords[]`. That will also detect the array but instead of appending each entry to a new context, the array will be joined into on large string and added to the index. @@ -968,15 +970,25 @@ In this case the result-set looks like: }] ``` +### Limit & Offset + > By default, every query is limited to 100 entries. Unbounded queries leads into issues. You need to set the limit as an option to adjust the size. +You can set the limit and the offset for each query: + +```js +index.search(query, { limit: 20, offset: 100 }); +``` + +> You cannot pre-count the size of the result-set. That's a limit by the design of FlexSearch. When you really need a count of all results you are able to page through, then just assign a high enough limit and get back all results and apply your paging offset manually (this works also on server-side). FlexSearch is fast enough that this isn't an issue. + ## Document Stores -> Never use a store when: 1. you did not need the original data to process your query results, or 2. you already have the contents/documents stored elsewhere (outside the index). +> Do not use a store when: 1. an array of IDs as the result is good enough, or 2. you already have the contents/documents stored elsewhere (outside the index). -Only a document index have a store. You can use a document index instead of a flat index to get this functionality when only storing ID-content-pairs. +Only a document index can have a store. You can use a document index instead of a flat index to get this functionality also when only storing ID-content-pairs. -This will add the whole original content to the store (text as string or documents as objects): +This will add the whole original content to the store: ```js const index = new Document({ doc: "content", store: true }); @@ -1073,6 +1085,25 @@ Your results are now looking like: Both field "author" and "email" are not indexed. +### Extern Stores + +When the data already exist in your application runtime, then you did not need to add those to the store again. You can assign your data as "extern store": + +```js +const data = [{ ... }, { ... }, { ... }]; + +const index = new Document({ + doc: "content", + extern: data // <--- extern store +}); + +index.add(data[0]); +``` + +> Entries from an extern store are not being managed/changed automatically by FlexSearch. When removing items from the index, the corresponding data item from the extern dataset stays untouched. Please consider, using the method `index.set(id, data)` will change extern stores also. + +When you didn't use the data anywhere in your application (just for searching) then it is better to use an internal store and just select fields you need in the results, which costs you less memory. + ## WebWorker The whole worker implementation has changed by also keeping Node.js support in mind. The good news is worker will also get supported by Node.js by the library. @@ -1132,8 +1163,10 @@ When you perform a field search through all fields then this task is perfectly b Above we have seen that documents will create worker automatically for each field. You can also create a WorkerIndex directly (same like using `Index` instead of `Document`). +Use as ES6 module: + ```js -import WorkerIndex from "./adapter.js"; +import WorkerIndex from "./worker/index.js"; const index = new WorkerIndex(options); index.add(1, "some") .add(2, "content") @@ -1141,6 +1174,16 @@ index.add(1, "some") .add(4, "index"); ``` +Or when bundled version was used instead: + +```js +var index = new FlexSearch.Worker(options); +index.add(1, "some") + .add(2, "content") + .add(3, "to") + .add(4, "index"); +``` + Such a WorkerIndex works pretty much the same as a created instance of `Index`. > A WorkerIndex only support the `async` variant of all methods. That means when you call `index.search()` on a WorkerIndex this will perform also in async the same way as `index.searchAsync()` will do. diff --git a/src/adapter.js b/src/adapter.js deleted file mode 100644 index 70a844e..0000000 --- a/src/adapter.js +++ /dev/null @@ -1,82 +0,0 @@ -import { promise as Promise } from "./polyfill.js"; -import { is_function, is_object } from "./common.js"; - -let counter = 0; - -/** - * @param {number|string|Object} id - * @param {Object=} options - * @constructor - */ - -function WorkerAdapter(id, options){ - - if(is_object(id)){ - - options = /** @type {Object} */ (id); - id = 0; - } - - if(options && is_function(options["encode"])){ - - options["encode"] = options["encode"].toString(); - } - - const self = this; - - this.id = id || counter++; - this.resolver = null; - this.worker = new Worker("worker.js", { type: "module" }); - this.worker.onmessage = function(e){ self.resolver(e["data"]/*["results"]*/) }; - this.worker.postMessage({ task: "create", /*id: this.id,*/ options: options }); -} - -export default WorkerAdapter; - -register("add"); -register("append"); -register("search"); -register("update"); -register("remove"); - -function register(key){ - - WorkerAdapter.prototype[key] = - WorkerAdapter.prototype[key + "Async"] = function(){ - - const self = this; - const args = [].slice.call(arguments); - const arg = args[args.length - 1]; - let callback; - - if(is_function(arg)){ - - callback = arg; - args.splice(args.length - 1, 1); - } - - const promise = new Promise(function(resolve){ - - self.worker.postMessage({ task: key, /*id: this.id,*/ args: args }); - - if(key === "search"){ - - self.resolver = resolve; - } - else{ - - resolve(); - } - }); - - if(callback){ - - promise.then(callback); - return this; - } - else{ - - return promise; - } - }; -} diff --git a/src/common.js b/src/common.js index 9f93426..878eb1f 100644 --- a/src/common.js +++ b/src/common.js @@ -3,16 +3,6 @@ export function parse_option(value, default_value){ return typeof value !== "undefined" ? value : default_value; } -/** - * @param {!Object} obj - * @returns {Array} - */ - -export function get_keys(obj){ - - return Object.keys(obj); -} - /** * @param {!number} count * @returns {Array} @@ -42,6 +32,16 @@ export function create_arrays(count){ return array; } +/** + * @param {!Object} obj + * @returns {Array} + */ + +export function get_keys(obj){ + + return Object.keys(obj); +} + export function create_object(){ return Object.create(null); diff --git a/src/document.js b/src/document.js index a6d4e52..dc735c3 100644 --- a/src/document.js +++ b/src/document.js @@ -20,11 +20,11 @@ import { import Index from "./index.js"; import { DocumentInterface } from "./type.js"; import Cache, { searchCache } from "./cache.js"; -import { create_object, is_array, is_string, is_object, parse_option } from "./common.js"; +import { create_object, is_array, is_string, is_object, parse_option, get_keys } from "./common.js"; import apply_async from "./async.js"; import { intersect, intersect_union } from "./intersect.js"; import { exportDocument, importDocument } from "./serialize.js"; -import WorkerAdapter from "./adapter.js"; +import WorkerIndex from "./worker/index.js"; /** * @constructor @@ -53,8 +53,9 @@ function Document(options){ if(SUPPORT_STORE){ - this.store = (opt = options["store"]) && create_object(); - this.storetree = opt && (opt !== true) && []; + this.extern = !!(opt = options["extern"]); + this.storetree = !this.extern && (opt = options["store"]) && (opt !== true) && []; + this.store = opt && (this.extern ? opt : create_object()); } if(SUPPORT_TAGS){ @@ -66,6 +67,9 @@ function Document(options){ if(SUPPORT_CACHE){ this.cache = (opt = options["cache"]) && new Cache(opt); + + // do not apply cache again for the indexes + options["cache"] = false; } @@ -76,6 +80,8 @@ function Document(options){ if(SUPPORT_ASYNC){ + // this switch is used by recall of promise callbacks + this.async = false; } @@ -101,7 +107,7 @@ function parse_descriptor(options){ else if(!is_array(field)){ field_options = field; - field = Object.keys(field); + field = get_keys(field); } for(let i = 0, key, opt; i < field.length; i++){ @@ -120,11 +126,20 @@ function parse_descriptor(options){ opt = is_object(opt) ? Object.assign({}, options, opt) : options; - index[key] = this.worker ? + if(this.worker){ - new WorkerAdapter(opt) - : - new Index(opt, this.register); + index[key] = new WorkerIndex(opt); + + if(!index[key].worker){ + + this.worker = false; + } + } + + if(!this.worker){ + + index[key] = new Index(opt, this.register); + } this.tree[i] = parse_tree(key, this.marker); this.field[i] = key; @@ -349,7 +364,7 @@ Document.prototype.add = function(id, content, _append){ } } - if(SUPPORT_STORE && this.store){ + if(SUPPORT_STORE && this.store && !this.extern){ let store; @@ -374,7 +389,6 @@ Document.prototype.add = function(id, content, _append){ this.store[id] = store || content; } - } return this; @@ -441,7 +455,7 @@ Document.prototype.remove = function(id){ } } - if(SUPPORT_STORE && this.store){ + if(SUPPORT_STORE && this.store && !this.extern){ delete this.store[id]; } @@ -452,16 +466,24 @@ Document.prototype.remove = function(id){ return this; }; -Document.prototype.search = function(query, limit, options, resolve){ +/** + * @param {!string|Object} query + * @param {number|Object=} limit + * @param {Object=} options + * @param {Array=} _resolve For internal use only. + * @returns {Promise|Array} + */ + +Document.prototype.search = function(query, limit, options, _resolve){ if(is_object(query)){ - options = query; + options = /** @type {Object} */ (query); query = options["query"]; } else if(is_object(limit)){ - options = limit; + options = /** @type {Object} */ (limit); limit = 0; } @@ -500,7 +522,7 @@ Document.prototype.search = function(query, limit, options, resolve){ else if(!is_array(field)){ field_options = field; - field = Object.keys(field); + field = get_keys(field); } } @@ -536,40 +558,7 @@ Document.prototype.search = function(query, limit, options, resolve){ field || (field = this.field); bool = bool && ((field.length > 1) || (tag && (tag.length > 1))); - // use Promise.all to get a change of processing requests in parallel - - if(!resolve && (this.worker || this.async)){ - - resolve = []; - - for(let i = 0, key; i < field.length; i++){ - - let opt; - - key = field[i]; - - if(!is_string(key)){ - - opt = key; - key = key["field"]; - } - else if(field_options){ - - opt = field_options[key]; - } - - resolve[i] = this.index[key].searchAsync(query, limit, opt || options); - } - - const self = this; - - // anyone knows a better workaround of optionally having async promises? - - return Promise.all(resolve).then(function(resolve){ - - self.search(query, limit, options, resolve); - }); - } + const promises = !_resolve && (this.worker || this.async) && []; // TODO solve this in one loop below @@ -589,9 +578,17 @@ Document.prototype.search = function(query, limit, options, resolve){ opt = field_options[key]; } - if(resolve){ + if(promises){ - res = resolve[i]; + promises[i] = this.index[key].searchAsync(query, limit, opt || options); + + // just collect and continue + + continue; + } + else if(_resolve){ + + res = _resolve[i]; } else{ @@ -653,6 +650,23 @@ Document.prototype.search = function(query, limit, options, resolve){ } } + if(promises){ + + const self = this; + + // anyone knows a better workaround of optionally having async promises? + // the promise.all() needs to be wrapped into additional promise, + // otherwise the recursive callback wouldn't run before return + + return new Promise(function(resolve){ + + Promise.all(promises).then(function(result){ + + resolve(self.search(query, limit, options, result)); + }); + }); + } + if(!count){ // fast path "not found" diff --git a/src/index.js b/src/index.js index 0ad00ce..9c69b71 100644 --- a/src/index.js +++ b/src/index.js @@ -319,8 +319,8 @@ Index.prototype.push_index = function(dupes, value, score, id, append, keyword){ if(keyword){ - dupes[value] || (dupes[value] = create_object()); - dupes[value][keyword] = 1; + dupes = dupes[value] || (dupes[value] = create_object()); + dupes[keyword] = 1; arr = arr[keyword] || (arr[keyword] = create_object()); } @@ -453,7 +453,7 @@ Index.prototype.search = function(query, limit, options){ if(depth){ - arr = this.add_result(result, suggest, resolution_ctx, limit, length === 2, term, keyword); + arr = this.add_result(result, suggest, resolution_ctx, limit, offset, length === 2, term, keyword); // when suggestion enabled just forward keyword if term was found // as long as the result is empty forward the pointer also @@ -465,7 +465,7 @@ Index.prototype.search = function(query, limit, options){ } else{ - arr = this.add_result(result, suggest, resolution, limit, length === 1, term); + arr = this.add_result(result, suggest, resolution, limit, offset, length === 1, term); } if(arr){ @@ -497,20 +497,7 @@ Index.prototype.search = function(query, limit, options){ // fast path optimization - result = result[0]; - - if(result.length === 1){ - - result = result[0]; - } - else{ - - result = concat(result); - } - - // TODO apply offset - - return result.length > limit ? result.slice(0, limit) : result; + return single_result(result[0], limit, offset); } } } @@ -518,6 +505,127 @@ Index.prototype.search = function(query, limit, options){ return intersect(result, limit, offset, suggest); }; +/** + * Returns an array when the result is done (to stop the process immediately), + * returns false when suggestions is enabled and no result was found, + * or returns nothing when a set was pushed successfully to the results + * + * @private + * @param {Array} result + * @param {Array} suggest + * @param {number} resolution + * @param {number} limit + * @param {number} offset + * @param {boolean} single_term + * @param {string} term + * @param {string=} keyword + * @return {Array>|boolean|undefined} + */ + +Index.prototype.add_result = function(result, suggest, resolution, limit, offset, single_term, term, keyword){ + + let word_arr = []; + let arr = keyword ? this.ctx : this.map; + + if(!this.optimize){ + + arr = get_array(arr, term, keyword, this.bidirectional); + } + + if(arr){ + + let count = 0; + const arr_len = Math.min(arr.length, resolution); + + for(let x = 0, size = 0, tmp, len; x < arr_len; x++){ + + tmp = arr[x]; + + if(this.optimize){ + + tmp = get_array(tmp, term, keyword, this.bidirectional); + } + + if(tmp && single_term){ + + len = tmp.length; + + if(len <= offset){ + + offset -= len; + tmp = null; + } + else{ + + if(offset){ + + tmp = tmp.slice(offset); + offset = 0; + } + } + } + + if(tmp){ + + // keep score (sparse array): + //word_arr[x] = tmp; + + // simplified score order: + word_arr[count++] = tmp; + + if(single_term){ + + size += tmp.length; + + if(size >= limit){ + + // fast path optimization + + break; + } + } + } + } + + if(count){ + + if(single_term){ + + // fast path optimization + // offset was already applied at this point + + return single_result(word_arr, limit, 0); + } + + result[result.length] = word_arr; + return; + } + } + + // return an empty array will stop the loop, + // to prevent stop when using suggestions return a false value + + return !suggest && word_arr; +}; + +function single_result(result, limit, offset){ + + if(result.length === 1){ + + result = result[0]; + } + else{ + + result = concat(result); + } + + return offset || (result.length > limit) ? + + result.slice(offset, offset + limit) + : + result; +} + function get_array(arr, term, keyword, bidirectional){ if(keyword){ @@ -535,101 +643,6 @@ function get_array(arr, term, keyword, bidirectional){ return arr; } -/** - * Returns an array when the result is done (to stop the process immediately), - * returns false when suggestions is enabled and no result was found, - * or returns nothing when a set was pushed successfully to the results - * - * @private - * @param {Array} result - * @param {Array} suggest - * @param {number} resolution - * @param {number} limit - * @param {boolean} just_one_loop - * @param {string} term - * @param {string=} keyword - * @return {Array>|boolean|undefined} - */ - -Index.prototype.add_result = function(result, suggest, resolution, limit, just_one_loop, term, keyword){ - - let word_arr = []; - let arr = keyword ? this.ctx : this.map; - - if(!this.optimize){ - - arr = get_array(arr, term, keyword, this.bidirectional); - } - - if(arr){ - - let count = 0; - const arr_len = Math.min(arr.length, resolution); - - for(let x = 0, size = 0, tmp; x < arr_len; x++){ - - tmp = arr[x]; - - if(this.optimize){ - - tmp = get_array(tmp, term, keyword, this.bidirectional); - } - - if(tmp){ - - // TODO apply offset - - // keep score (sparse array): - //word_arr[x] = arr; - - // simplified score order: - word_arr[count++] = tmp; - - if(just_one_loop){ - - size += tmp.length; - - if(size >= limit){ - - // fast path optimization - - break; - } - } - } - } - - if(count){ - - if(just_one_loop){ - - // TODO apply offset - - // fast path optimization - - if(count === 1){ - - word_arr = word_arr[0]; - } - else{ - - word_arr = concat(word_arr); - } - - return word_arr.length > limit ? word_arr.slice(0, limit) : word_arr; - } - - result[result.length] = word_arr; - return; - } - } - - // return an empty array will stop the loop, - // to prevent stop when using suggestions return a false value - - return !suggest && word_arr; -}; - Index.prototype.contain = function(id){ return !!this.register[id]; diff --git a/src/intersect.js b/src/intersect.js index 52202c7..09054f9 100644 --- a/src/intersect.js +++ b/src/intersect.js @@ -18,7 +18,7 @@ export function intersect(arrays, limit, offset, suggest) { // return a.length - b.length; // }); - let check = create_object(); + let check; let count = 0; if(suggest){ @@ -26,16 +26,16 @@ export function intersect(arrays, limit, offset, suggest) { suggest = []; } - // terms - for(let x = 0; x < length; x++){ + // terms in reversed order! + for(let x = length - 1; x >= 0; x--){ const word_arr = arrays[x]; const word_arr_len = word_arr.length; const new_check = create_object(); - let found = !x; + let found = !check; - // relevance + // but relevance in forward order for(let y = 0; y < word_arr_len; y++){ //const arr = [].concat.apply([], word_arr); @@ -44,23 +44,18 @@ export function intersect(arrays, limit, offset, suggest) { if(arr_len){ - if(suggest){ - - suggest[y] = []; - } - // ids for(let z = 0, count_suggest = 0, id; z < arr_len; z++){ id = arr[z]; - if(!x){ + if(!check){ new_check[id] = 1; } else if(check[id]){ - if(x === (length - 1)){ + if(!x){ if(offset){ @@ -82,7 +77,9 @@ export function intersect(arrays, limit, offset, suggest) { if(suggest && (count_suggest < limit)){ - suggest[y][count_suggest++] = id; + const tmp = suggest[y] || (suggest[y] = []); + + tmp[count_suggest++] = id; } new_check[id] = 1; @@ -109,18 +106,30 @@ export function intersect(arrays, limit, offset, suggest) { res = suggest[i]; len = res && res.length; - // TODO apply offset + if(len && offset){ + + if(len <= offset){ + + offset -= len; + len = 0; + } + else{ + + len -= offset; + } + } if(len){ if(count + len >= limit){ - return result.concat(res.slice(0, limit - count)); + return result.concat(res.slice(offset, limit - count + offset)); } else{ - result = result.concat(res); + result = result.concat(offset ? res.slice(offset) : res); count += len; + offset = 0; } } } diff --git a/src/lang.js b/src/lang.js index b607a15..0a68de0 100644 --- a/src/lang.js +++ b/src/lang.js @@ -1,5 +1,5 @@ import { IndexInterface, DocumentInterface } from "./type.js"; -import { create_object } from "./common.js"; +import { create_object, get_keys } from "./common.js"; /** * @param {!string} str @@ -161,7 +161,7 @@ export function init_filter(words){ export function init_stemmer_or_matcher(obj, is_stemmer){ - const keys = Object.keys(obj); + const keys = get_keys(obj); const length = keys.length; const final = []; diff --git a/src/webpack.js b/src/webpack.js index b4888aa..9cebcb8 100644 --- a/src/webpack.js +++ b/src/webpack.js @@ -1,7 +1,7 @@ import { SUPPORT_ASYNC, SUPPORT_DOCUMENT, SUPPORT_CACHE, SUPPORT_SERIALIZE } from "./config.js"; import Document from "./document.js"; import Index from "./index.js"; -import WorkerAdapter from "./adapter.js"; +import WorkerIndex from "./worker/index.js"; import { registerCharset, registerLanguage } from "./global.js"; /** @export */ Document.prototype.add; @@ -49,19 +49,18 @@ if(SUPPORT_SERIALIZE){ /** @export */ Document.prototype.import; } -const root = self || window; +const root = self; +let tmp; const FlexSearch = { "Index": Index, "Document": SUPPORT_DOCUMENT ? Document : null, - "WorkerAdapter": WorkerAdapter, + "Worker": WorkerIndex, "registerCharset": registerCharset, "registerLanguage": registerLanguage }; -let tmp; - if((tmp = root["define"]) && tmp["amd"]){ tmp([], function(){ diff --git a/src/worker.js b/src/worker/handler.js similarity index 53% rename from src/worker.js rename to src/worker/handler.js index 0d22601..3e22434 100644 --- a/src/worker.js +++ b/src/worker/handler.js @@ -1,29 +1,44 @@ -import Index from "./index.js"; -import { is_string, is_object } from "./common.js"; +import Index from "../index.js"; -let index, id; +export default function handler(event) { -onmessage = function(event) { - - const data = event.data; + /** @type Index */ + const index = self["_index"]; + const data = event["data"]; const args = data["args"]; switch(data["task"]){ - case "create": + case "init": const options = data["options"] || {}; + const factory = data["factory"]; const encode = options["encode"]; options["cache"] = false; - id = data["id"]; + //root["_id"] = data["id"]; - if(is_string(encode)){ + if(typeof encode === "string"){ options["encode"] = new Function("return " + encode)(); } - index = new Index(options); + if(factory){ + + // export the FlexSearch global payload to "self" + new Function("return " + factory)()(self); + + /** @type Index */ + self["_index"] = self["FlexSearch"]["Index"](options); + + // destroy the exported payload + delete self["FlexSearch"]; + } + else{ + + self["_index"] = new Index(options); + } + break; case "add": diff --git a/src/worker/index.js b/src/worker/index.js new file mode 100644 index 0000000..0488b5d --- /dev/null +++ b/src/worker/index.js @@ -0,0 +1,134 @@ +import { promise as Promise } from "../polyfill.js"; +import { is_function, is_object } from "../common.js"; +import handler from "./handler.js"; + +let counter = 0; + +/** + * @param {number|string|Object} id + * @param {Object=} options + * @constructor + */ + +function WorkerIndex(id, options){ + + let opt; + + if(is_object(id)){ + + options = /** @type {Object} */ (id); + id = 0; + } + else{ + + if(options){ + + if(is_function(opt = options["encode"])){ + + options["encode"] = opt.toString(); + } + } + else{ + + options = {}; + } + } + + // the factory is the outer wrapper from the build + // we use "self" as a trap for node.js + + let factory = (self||window)["_factory"]; + + if(factory){ + + factory = factory.toString(); + } + + const _self = this; + + this.id = id || counter++; + this.resolver = null; + this.worker = create(factory); + + if(!this.worker){ + + return; + } + + this.worker.onmessage = function(e){ _self.resolver(e["data"]/*["results"]*/) }; + this.worker.postMessage({ "task": "init", "factory": factory, /*id: this.id,*/ "options": options }); +} + +export default WorkerIndex; + +register("add"); +register("append"); +register("search"); +register("update"); +register("remove"); + +function register(key){ + + WorkerIndex.prototype[key] = + WorkerIndex.prototype[key + "Async"] = function(){ + + const self = this; + const args = [].slice.call(arguments); + const arg = args[args.length - 1]; + let callback; + + if(is_function(arg)){ + + callback = arg; + args.splice(args.length - 1, 1); + } + + const promise = new Promise(function(resolve){ + + self.worker.postMessage({ "task": key, /*id: this.id,*/ "args": args }); + + if(key === "search"){ + + self.resolver = resolve; + } + else{ + + resolve(); + } + }); + + if(callback){ + + promise.then(callback); + return this; + } + else{ + + return promise; + } + }; +} + +function create(factory){ + + let worker + + try{ + + worker = factory ? + + new Worker(URL.createObjectURL( + + new Blob([ + + "onmessage=" + handler.toString() + + ], { "type": "text/javascript" }) + )) + : + new Worker("worker.js", { type: "module" }); + } + catch(e){} + + return worker; +} \ No newline at end of file diff --git a/src/worker/worker.js b/src/worker/worker.js new file mode 100644 index 0000000..4772d6c --- /dev/null +++ b/src/worker/worker.js @@ -0,0 +1,2 @@ +import handler from "./handler.js"; +onmessage = handler; diff --git a/task/build.js b/task/build.js index f484f9e..ad0637d 100644 --- a/task/build.js +++ b/task/build.js @@ -137,7 +137,7 @@ let parameter = (function(opt){ rewrite_polyfills: use_polyfill || false, // isolation_mode: "IIFE", - output_wrapper: "(function(self){%output%}(module));" + output_wrapper: "(function(self){%output%}(this));" //formatting: "PRETTY_PRINT" }); @@ -232,6 +232,8 @@ else{ preserve = preserve.replace("* FlexSearch.js", "* FlexSearch.js v" + package_json.version + (light_version ? " (Light)" : es5_version ? " (ES5)" : "")); build = preserve.substring(0, preserve.indexOf('*/') + 2) + "\n" + build; + build = build.replace("(function(self){'use strict';", "(function _f(self){'use strict';try{if(module)self=module}catch(e){}self._factory=_f;"); + if(release === "pre"){ fs.existsSync("test/dist") || fs.mkdirSync("test/dist");