diff --git a/README.md b/README.md index 3779733..495569a 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ +


Search Library @@ -10,12 +11,13 @@

-

World's fastest and most memory efficient full text search library with zero dependencies.

- When it comes to raw search speed FlexSearch outperforms every single searching library out there and also provides flexible search capabilities like multi-word matching, phonetic transformations or partial matching. -It also has the __most memory-efficient index__. Keep in mind that updating / removing existing items from the index has a significant cost. When your index needs to be updated continuously then BulkSearch may be a better choice. +It also has the most memory-efficient index. + FlexSearch also provides you a non-blocking asynchronous processing model as well as web workers to perform any updates or queries on the index in parallel through dedicated balanced threads. Installation Guide  •  API Reference  •  Example Options  •  Custom Builds @@ -39,32 +41,30 @@ Supported Module Definitions: All Features: These features are not available in the 50% smaller light version: - WebWorker -- Asynchronous +- Asynchronous Processing - Cache -- Built-in encoders except 'balance' and 'icase' (you can still pass in customs) -- Built-in stemmer and filter (you can still pass in customs) +- Built-in encoders except 'balance' and 'icase' (you can still pass in customs) - Debug logging -- _index.info()_ method +- Methods: _index.info()_ -The light version is just available as compiled version (flexsearch.light.js). +The light version is just available as compiled version (flexsearch.light.js). -> You can also make Custom Builds pretty simple +> It is also pretty simple to make Custom Builds #### Contextual Search @@ -75,7 +75,7 @@ Imagine you add a text block of some sentences to an index ID. Assuming the quer In this way contextual search also improves the results of relevance-based queries on large amount of text data.

- +

__Note:__ This feature is actually not enabled by default. @@ -105,7 +105,7 @@ __Note:__ It is slightly faster to use no web worker when the index or query isn Memory - Large: ~ 5 Mb per 100,000 words + Large: ~ 1 Mb per 100,000 words Tiny: ~ 100 Kb per 100,000 words @@ -138,6 +138,18 @@ __Note:__ It is slightly faster to use no web worker when the index or query isn No Yes + + + Super-Partial-Matching + Yes + No + + + + Wildcards (Query) + Yes + No + --> @@ -189,8 +201,8 @@ var FlexSearch = require("./flexsearch.js"); Global methods: - FlexSearch.__create__(\) -- FlexSearch.__addMatcher__({_KEY: VALUE_}) -- FlexSearch.__register__(name, encoder) +- FlexSearch.__registerMatcher__({_KEY: VALUE_}) +- FlexSearch.__registerEncoder__(name, encoder) - FlexSearch.__encode__(name, string) Index methods: @@ -275,6 +287,29 @@ index.search("John", function(result){ // array of results }); ``` + +Pass custom options for each query: + +```js +index.search({ + + query: "John", + limit: 1000, + threshold: 5, // >= initial threshold + depth: 3 // <= initial depth +}); +``` + +Get suggestions of a query: + +```js +index.search({ + + query: "John", + suggest: true +}); +``` + #### Update item to the index @@ -325,11 +360,11 @@ index.init({ #### Add custom matcher -> FlexSearch.__addMatcher({_REGEX: REPLACE_})__ +> FlexSearch.__registerMatcher({_REGEX: REPLACE_})__ Add global matchers for all instances: ```js -FlexSearch.addMatcher({ +FlexSearch.registerMatcher({ 'ä': 'a', // replaces all 'ä' to 'a' 'ó': 'o', @@ -364,10 +399,10 @@ var index = new FlexSearch({ ##### Register a global encoder to be used by all instances -> FlexSearch.__register(name, encoder)__ +> FlexSearch.__registerEncoder(name, encoder)__ ```js -FlexSearch.register('whitespace', function(str){ +FlexSearch.registerEncoder('whitespace', function(str){ return str.replace(/ /g, ''); }); @@ -393,7 +428,7 @@ var encoded = FlexSearch.encode("whitespace", "sample text"); ##### Mixup/Extend multiple encoders ```js -FlexSearch.register('mixed', function(str){ +FlexSearch.registerEncoder('mixed', function(str){ str = this.encode("icase", str); // built-in str = this.encode("whitespace", str); // custom @@ -402,7 +437,7 @@ FlexSearch.register('mixed', function(str){ }); ``` ```js -FlexSearch.register('extended', function(str){ +FlexSearch.registerEncoder('extended', function(str){ str = this.encode("custom", str); @@ -426,18 +461,89 @@ var index = new FlexSearch({ } }); ``` + +#### Add language-specific stemmer and/or filter + +> __Stemmer:__ several linguistic mutations of the same word (e.g. "run" and "running") + +> __Filter:__ a blacklist of words to be filtered out from indexing at all (e.g. "and", "to" or "be") + +Define a private custom stemmer or filter during creation/initialization: +```js +var index = new FlexSearch({ + + stemmer: { + + // object {key: replacement} + "ational": "ate", + "tional": "tion", + "enci": "ence", + "ing": "" + }, + filter: [ + + // array blacklist + "in", + "into", + "is", + "isn't", + "it", + "it's" + ] +}); +``` + +Or assign stemmer/filters globally to a language: +```js +FlexSearch.registerLanguage('us', { + + stemmer: {/* ... */}, + filter: [/* ... */] +}); +``` + +Or use built-in stemmer or filter of your preferred languages: +```html + + + + + + +... +``` + +Now you can assign built-in stemmer during creation/initialization: +```js +var index_en = new FlexSearch({stemmer: 'en', filter: 'en'}); +var index_de = new FlexSearch({stemmer: 'de', filter: [/* custom */]}); +``` + +In Node.js you just need require the language pack files to make them available: + +```js +require('lang/en.js'); +require('lang/de.js'); +``` + +It is also possible to compile language packs into the build as follows: + +```bash +node compile SUPPORT_LANG_EN=true SUPPORT_LANG_DE=true +``` + -#### Get info +#### Get info about an index ```js index.info(); ``` -Returns information about the index, e.g.: +Returns information e.g.: ```json { - "bytes": 3600356288, + "bytes": 64000, "id": 0, "matchers": 0, "size": 10000, @@ -462,7 +568,7 @@ index.remove(0).update(1, 'foo').add(2, 'foobar'); #### Enable Contextual Index -Create context-enabled index and also set the limit of relevance (depth): +Create index and just set the limit of relevance ("depth"): ```js var index = new FlexSearch({ @@ -472,9 +578,20 @@ var index = new FlexSearch({ }); ``` -#### Use WebWorker (Browser only) +#### Enable Auto-Balanced Cache -Create worker-enabled index and also set the count of parallel threads: +Create index and just set a limit of cache entries: +```js +var index = new FlexSearch({ + + profile: "score", + cache: 10000 +}); +``` + +#### Use WebWorker Sharding (Browser only) + +Create index and just set the count of parallel threads: ```js var index = new FlexSearch({ @@ -581,7 +698,7 @@ FlexSearch ist highly customizable. Make use of the the righ true
{number} - Enable/Disable and/or set capacity of cached entries.

When passing a number as a limit the cache automatically balance stored entries related to their popularity.

Note: When just using "true" the cache has no limits and is actually 5 times faster (the balancer should not run). + Enable/Disable and/or set capacity of cached entries.

When passing a number as a limit the cache automatically balance stored entries related to their popularity.

Note: When just using "true" the cache has no limits and is actually 2-3 times faster (because the balancer do not have to run). @@ -606,36 +723,38 @@ FlexSearch ist highly customizable. Make use of the the
righ depth

false
- {number} + {number:0-9} - Enable/Disable
contextual indexing and also sets relevance depth (experimental). + Enable/Disable contextual indexing and also sets contextual distance of relevance. threshold

false
- {number} + {number:0-9} - Enable/Disable the threshold of minimum relevance results should have.

Note: You can take a lower threshold for indexing and pass a higher value when calling .search(), but not other turn around. + Enable/Disable the threshold of minimum relevance all results should have.

Note: It is also possible to set a lower threshold for indexing and pass a higher value when calling index.search(options). - stemmer

+ stemmer


false
+ {string}
{function} - Disable or pass in custom object/array. + Disable or pass in language shorthand flag (ISO-3166) or a custom object. - filter

+ filter


false
+ {string}
{function} - Disable or pass in custom object/array. + Disable or pass in language shorthand flag (ISO-3166) or a custom array. @@ -746,7 +865,7 @@ Encoding effects the required memory also as query time and phonetic matches. Tr -#### Comparison (Matches) +#### Comparison (Matching) > Reference String: __"Björn-Phillipp Mayer"__ @@ -954,9 +1073,7 @@ Memory-optimized profile: __"memory"__ { encode: "extra", mode: "strict", - threshold: 7, - stemmer: true, - filter: true + threshold: 7 } ``` @@ -987,7 +1104,7 @@ Relevance-optimized profile: __"score"__ encode: "extra", mode: "strict", threshold: 5, - depth: 4 + depth: 5 } ``` @@ -1019,7 +1136,7 @@ Compare these options above: ## Custom Builds -Default Build: +Full Build: ```bash npm run build ``` @@ -1029,6 +1146,11 @@ Light Build: npm run build-light ``` +Build Language Packs: +```bash +npm run build-lang +``` + Custom Build: ```bash npm run build-custom SUPPORT_WORKER=true SUPPORT_ASYNC=true @@ -1040,7 +1162,12 @@ Supported flags: - SUPPORT_WORKER - SUPPORT_CACHE - SUPPORT_ASYNC -- SUPPORT_BUILTINS (english stemmer and filter) +- SUPPORT_BUILTINS (built-in encoders) + +Supported language flags (includes stemmer and filter): + +- SUPPORT_LANG_EN +- SUPPORT_LANG_DE Alternatively you can also use: ```bash diff --git a/compile.js b/compile.js index 3d4c2c3..9e02266 100644 --- a/compile.js +++ b/compile.js @@ -1,41 +1,13 @@ var child_process = require('child_process'); +var supported_lang = [ + + 'en', + 'de' +]; + console.log("Start build ....."); -var parameter = (function(opt){ - - var parameter = ''; - - for(var index in opt){ - - if(opt.hasOwnProperty(index)){ - - parameter += ' --' + index + '=' + opt[index]; - } - } - - console.log(parameter); - - return parameter; -})({ - - compilation_level: "ADVANCED_OPTIMIZATIONS", - use_types_for_optimization: true, - new_type_inf: true, - jscomp_warning: "newCheckTypes", - generate_exports: true, - export_local_property_definitions: true, - language_in: "ECMASCRIPT5_STRICT", - language_out: "ECMASCRIPT5_STRICT", - process_closure_primitives: true, - summary_detail_level: 3, - warning_level: "VERBOSE", - emit_use_strict: true, - output_manifest: "log/manifest.log", - output_module_dependencies: "log/module_dependencies.log", - property_renaming_report: "log/renaming_report.log" -}); - var options = (function(argv){ var arr = {}; @@ -61,11 +33,62 @@ var options = (function(argv){ })(process.argv); -exec("java -jar node_modules/google-closure-compiler/compiler.jar" + parameter + " --define='SUPPORT_DEBUG=" + (options['SUPPORT_DEBUG'] || 'false') + "' --define='SUPPORT_WORKER=" + (options['SUPPORT_WORKER'] || 'false') + "' --define='SUPPORT_BUILTINS=" + (options['SUPPORT_BUILTINS'] || 'false') + "' --define='SUPPORT_CACHE=" + (options['SUPPORT_CACHE'] || 'false') + "' --define='SUPPORT_ASYNC=" + (options['SUPPORT_ASYNC'] || 'false') + "' --js='flexsearch.js' --js_output_file='flexsearch." + (options['RELEASE'] || 'custom') + ".js' && exit 0", function(){ +var parameter = (function(opt){ - console.log("Build Complete: flexsearch." + (options['RELEASE'] || 'custom') + ".js"); + var parameter = ''; + + for(var index in opt){ + + if(opt.hasOwnProperty(index)){ + + parameter += ' --' + index + '=' + opt[index]; + } + } + + //console.log(parameter); + + return parameter; +})({ + + compilation_level: "ADVANCED_OPTIMIZATIONS", + use_types_for_optimization: true, + new_type_inf: true, + jscomp_warning: "newCheckTypes", + generate_exports: true, + export_local_property_definitions: true, + language_in: "ECMASCRIPT5_STRICT", + language_out: "ECMASCRIPT5_STRICT", + process_closure_primitives: true, + summary_detail_level: 3, + warning_level: "VERBOSE", + emit_use_strict: options['RELEASE'] !== 'lang', + output_manifest: "log/manifest.log", + output_module_dependencies: "log/module_dependencies.log", + property_renaming_report: "log/renaming_report.log" }); +if(options['RELEASE'] === 'lang'){ + + for(var i = 0; i < supported_lang.length; i++){ + + (function(i){ + + exec("java -jar node_modules/google-closure-compiler/compiler.jar" + parameter + " --define='SUPPORT_LANG_" + supported_lang[i].toUpperCase() + "=true' --js='lang/" + supported_lang[i] + ".js' --js_output_file='lang/" + supported_lang[i] + ".min.js' && exit 0", function(){ + + console.log("Build Complete: " + supported_lang[i] + ".min.js"); + }); + + })(i); + } +} +else{ + + exec("java -jar node_modules/google-closure-compiler/compiler.jar" + parameter + " --define='SUPPORT_DEBUG=" + (options['SUPPORT_DEBUG'] || 'false') + "' --define='SUPPORT_WORKER=" + (options['SUPPORT_WORKER'] || 'false') + "' --define='SUPPORT_BUILTINS=" + (options['SUPPORT_BUILTINS'] || 'false') + "' --define='SUPPORT_CACHE=" + (options['SUPPORT_CACHE'] || 'false') + "' --define='SUPPORT_ASYNC=" + (options['SUPPORT_ASYNC'] || 'false') + "' --define='SUPPORT_LANG_EN=" + (options['SUPPORT_LANG_EN'] || 'false') + "' --define='SUPPORT_LANG_DE=" + (options['SUPPORT_LANG_DE'] || 'false') + "' --js='flexsearch.js' --js='lang/**.js' --js='!lang/**.min.js' --js_output_file='flexsearch." + (options['RELEASE'] || 'custom') + ".js' && exit 0", function(){ + + console.log("Build Complete: flexsearch." + (options['RELEASE'] || 'custom') + ".js"); + }); +} + function exec(prompt, callback){ var child = child_process.exec(prompt, function(err, stdout, stderr){ diff --git a/doc/contextual-index.svg b/doc/contextual-index.svg new file mode 100644 index 0000000..5cd4406 --- /dev/null +++ b/doc/contextual-index.svg @@ -0,0 +1,2 @@ + +
Word 4
Word 4
Contextual Index
[Not supported by viewer]
Context of Relevance
Context of Relevance
Query
Query
2. contextual score
[Not supported by viewer]
1. partial score
1. partial score
context limit (depth)
[Not supported by viewer]
3. document score
[Not supported by viewer]
"Model of Contextual-based Scoring"
Copyright 2018 Thomas Wilkerling
Nextapps GmbH
[Not supported by viewer]
Word 2
Word 2
Word 3
Word 3
Word 5
Word 5
Word 6
Word 6
Word 1
Word 1
Word 7
Word 7
...
...
...
...
\ No newline at end of file diff --git a/doc/contextual_index.svg b/doc/contextual_index.svg deleted file mode 100644 index 04baad8..0000000 --- a/doc/contextual_index.svg +++ /dev/null @@ -1,2 +0,0 @@ - -
Word 2
Word 2
Word 3
Word 3
Word 4
Word 4
Word 5
Word 5
Word 6
Word 6
Word 1
Word 1
Word 7
Word 7
Contextual Index
[Not supported by viewer]
...
...
...
...
Context of Relevance
Context of Relevance
Query
Query
scored by
distance
scored by<br>distance
\ No newline at end of file diff --git a/flexsearch.js b/flexsearch.js index 2b1489d..6f22804 100644 --- a/flexsearch.js +++ b/flexsearch.js @@ -1,5 +1,5 @@ ;/**! - * @preserve FlexSearch v0.2.44 + * @preserve FlexSearch v0.2.46 * Copyright 2018 Thomas Wilkerling * Released under the Apache 2.0 Licence * https://github.com/nextapps-de/flexsearch @@ -125,273 +125,9 @@ var SUPPORT_ASYNC = true; /** @const {RegExp} */ var regex_split = regex("[ -\/]"); - /** - * http://www.ranks.nl/stopwords - * @const {Array} - */ + var filter = {}; - var filter = [ - - "a", - "about", - "above", - "after", - "again", - "against", - "all", - "also", - "am", - "an", - "and", - "any", - "are", - "aren't", - "as", - "at", - //"back", - "be", - "because", - "been", - "before", - "being", - "below", - //"between", - "both", - "but", - "by", - "can", - "cannot", - "can't", - "come", - "could", - "couldn't", - //"day", - "did", - "didn't", - "do", - "does", - "doesn't", - "doing", - "dont", - "down", - "during", - "each", - "even", - "few", - "first", - "for", - "from", - "further", - "get", - //"give", - "go", - //"good", - "had", - "hadn't", - "has", - "hasn't", - "have", - "haven't", - "having", - "he", - "hed", - //"hell", - "her", - "here", - "here's", - "hers", - "herself", - "hes", - "him", - "himself", - "his", - "how", - "how's", - "i", - "id", - "if", - "ill", - "im", - "in", - "into", - "is", - "isn't", - "it", - "it's", - "itself", - "i've", - "just", - "know", - "let's", - "like", - //"look", - "make", - "me", - "more", - "most", - "mustn't", - "my", - "myself", - "new", - "no", - "nor", - "not", - "now", - "of", - "off", - "on", - "once", - //"one", - "only", - "or", - "other", - "ought", - "our", - "our's", - "ourselves", - "out", - "over", - "own", - //"people", - "same", - "say", - "see", - "shan't", - "she", - "she'd", - "shell", - "shes", - "should", - "shouldn't", - "so", - "some", - "such", - //"take", - "than", - "that", - "that's", - "the", - "their", - "theirs", - "them", - "themselves", - "then", - "there", - "there's", - "these", - "they", - "they'd", - "they'll", - "they're", - "they've", - //"think", - "this", - "those", - "through", - "time", - "to", - "too", - //"two", - //"under", - "until", - "up", - "us", - //"use", - "very", - "want", - "was", - "wasn't", - "way", - "we", - "wed", - "well", - "were", - "weren't", - "we've", - "what", - "what's", - "when", - "when's", - "where", - "where's", - "which", - "while", - "who", - "whom", - "who's", - "why", - "why's", - "will", - "with", - "won't", - //"work", - "would", - "wouldn't", - //"year", - "you", - "you'd", - "you'll", - "your", - "you're", - "your's", - "yourself", - "yourselves", - "you've" - ]; - - /** - * @const {Object} - */ - - var stemmer = { - - "ational": "ate", - "tional": "tion", - "enci": "ence", - "anci": "ance", - "izer": "ize", - "bli": "ble", - "alli": "al", - "entli": "ent", - "eli": "e", - "ousli": "ous", - "ization": "ize", - "ation": "ate", - "ator": "ate", - "alism": "al", - "iveness": "ive", - "fulness": "ful", - "ousness": "ous", - "aliti": "al", - "iviti": "ive", - "biliti": "ble", - "logi": "log", - "icate": "ic", - "ative": "", - "alize": "al", - "iciti": "ic", - "ical": "ic", - "ful": "", - "ness": "", - "al": "", - "ance": "", - "ence": "", - "er": "", - "ic": "", - "able": "", - "ible": "", - "ant": "", - "ement": "", - "ment": "", - "ent": "", - "ou": "", - "ism": "", - "ate": "", - "iti": "", - "ous": "", - "ive": "", - "ize": "" - }; + var stemmer = {}; /** * @param {string|Object=} options @@ -455,7 +191,7 @@ var SUPPORT_ASYNC = true; * @export */ - FlexSearch.addMatcher = function(matcher){ + FlexSearch.registerMatcher = function(matcher){ for(var key in matcher){ @@ -475,13 +211,36 @@ var SUPPORT_ASYNC = true; * @export */ - FlexSearch.register = function(name, encoder){ + FlexSearch.registerEncoder = function(name, encoder){ global_encoder[name] = encoder; return this; }; + /** + * @param {string} lang + * @param {Object} language_pack + * @export + */ + + FlexSearch.registerLanguage = function(lang, language_pack){ + + /** + * @type {Array} + */ + + filter[lang] = language_pack['filter']; + + /** + * @type {Object} + */ + + stemmer[lang] = language_pack['stemmer']; + + return this; + }; + /** * @param {!string} name * @param {?string} value @@ -646,32 +405,14 @@ var SUPPORT_ASYNC = true; ); } - if(SUPPORT_BUILTINS && (custom = options['filter'])) { + if((custom = options['filter'])) { - this.filter = initFilter( - - (custom === true ? - - filter - : - /** @type {Array} */ - (custom) - - ), this.encoder); + this.filter = initFilter(filter[custom] || custom, this.encoder); } - if(SUPPORT_BUILTINS && (custom = options['stemmer'])) { + if((custom = options['stemmer'])) { - this.stemmer = initStemmer( - - (custom === true ? - - stemmer - : - /** @type {Object} */ - (custom) - - ), this.encoder); + this.stemmer = initStemmer(stemmer[custom] || custom, this.encoder); } //} diff --git a/flexsearch.light.js b/flexsearch.light.js index 080a256..1290389 100644 --- a/flexsearch.light.js +++ b/flexsearch.light.js @@ -1,18 +1,19 @@ /* - FlexSearch v0.2.44 + FlexSearch v0.2.46 Copyright 2018 Thomas Wilkerling Released under the Apache 2.0 Licence https://github.com/nextapps-de/flexsearch */ -'use strict';(function(d,u,p){var q;(q=p.define)&&q.amd?q([],function(){return u}):(q=p.modules)?q[d.toLowerCase()]=u:"undefined"!==typeof module?module.exports=u:p[d]=u})("FlexSearch",function(){function d(a){"string"===typeof a&&(a=A[a]);a||(a=x);this.id=a.id||E++;this.init(a);u(this,"index",function(){return this.a});u(this,"length",function(){return Object.keys(this.a).length})}function u(a,b,c){Object.defineProperty(a,b,{get:c})}function p(a){return new RegExp(a,"g")}function q(a,b,c){if("undefined"=== -typeof c){for(c=0;c=e&&(a=a[f+.5|0],a=a[c]||(a[c]=[]),a[a.length]=l)}return f||b[c]}function z(a,b){if(a)for(var c=Object.keys(a),l=0,g=c.length;l=(8=(8a?1:0a?-1:0b))return c.slice(0,b);return c}var x={encode:"icase",mode:"ngram",cache:!1,async:!1,m:!1,threshold:0,depth:0},A={memory:{encode:"extra",mode:"strict",threshold:7},speed:{encode:"icase",mode:"strict",threshold:7,depth:2},match:{encode:"extra",mode:"full"},score:{encode:"extra",mode:"strict",threshold:5,depth:4},balance:{encode:"balance", -mode:"ngram",threshold:6,depth:3},fastest:{encode:"icase",mode:"strict",threshold:9,depth:1}},w=[],E=0,D=p("[ -/]");d.new=function(a){return new this(a)};d.create=function(a){return d.new(a)};d.addMatcher=function(a){for(var b in a)a.hasOwnProperty(b)&&(w[w.length]=p(b),w[w.length]=a[b]);return this};d.register=function(a,b){y[a]=b;return this};d.encode=function(a,b){return y[a].call(y,b)};d.prototype.init=function(a){this.h=[];a||(a=x);var b=a.profile;b=b?A[b]:{};this.mode=a.mode||b.mode||this.mode|| -x.mode;this.threshold=a.threshold||b.threshold||this.threshold||x.threshold;this.depth=a.depth||b.depth||this.depth||x.depth;this.i=(b=a.encode||b.encode)&&y[b]||("function"===typeof b?b:this.i||!1);(b=a.matcher)&&this.addMatcher(b);this.f=[{},{},{},{},{},{},{},{},{},{}];this.b={};this.a={};this.c="";this.g=!0;return this};d.prototype.encode=function(a){a&&w.length&&(a=q(a,w));a&&this.h.length&&(a=q(a,this.h));a&&this.i&&(a=this.i.call(y,a));if(a&&this.j){a=a.split(" ");for(var b=0;bm;t--)r=d.substring(m,t),v(e,l,r,a,q,p,g);break;default:if(m=v(e,l,d,a,1,p,g),h&&1=g)for(n=l._ctx[d]||(l._ctx[d]={}),d=this.b[d]||(this.b[d]=[{},{},{},{},{},{},{},{},{},{}]),m=k-h,t=k+h+1,0>m&&(m=0),t>f&&(t=f);mb;b++)z(this.f[b],a);this.depth&&z(this.b,a);delete this.a[a];this.g=!1}return this};d.prototype.search=function(a,b,c){var l=[];if(a&&"object"===typeof a){c=a.callback||b;b=a.limit;var d=a.threshold;a=a.query}d=(d||this.threshold||0)|0;"function"===typeof b? -(c=b,b=1E3):b||(b=1E3);if(c){var h=this;I(function(){c(h.search(a,b));h=null},1,"search-"+this.id);return null}if(!a||"string"!==typeof a)return l;if(!this.g)this.g=!0;else if(this.c&&0===a.indexOf(this.c))return l;var e=this.encode(a);if(!e.length)return l;var f=this.mode;e="function"===typeof f?f(e):"ngram"===f?B(e):e.split(D);f=e.length;var k=!0,p=[],n={};if(1=d;z--)if(w=(q?m[r]:this.f)[z][t])v[y++]=w,x=!0;if(x)p[p.length]=1=e&&(a=a[h+.5|0],a=a[c]||(a[c]=[]),a[a.length]=f)}return h||b[c]}function A(a,b){if(a)for(var c=Object.keys(a),f=0,g=c.length;f=(8=(8a?1:0a?-1:0b))return c.slice(0,b);return c}var z={encode:"icase",mode:"ngram",cache:!1,async:!1,j:!1,threshold:0,depth:0},B={memory:{encode:"extra",mode:"strict",threshold:7},speed:{encode:"icase",mode:"strict",threshold:7,depth:2},match:{encode:"extra",mode:"full"},score:{encode:"extra",mode:"strict",threshold:5,depth:4},balance:{encode:"balance", +mode:"ngram",threshold:6,depth:3},fastest:{encode:"icase",mode:"strict",threshold:9,depth:1}},x=[],G=0,D=p("[ -/]"),E={},F={};d.new=function(a){return new this(a)};d.create=function(a){return d.new(a)};d.registerMatcher=function(a){for(var b in a)a.hasOwnProperty(b)&&(x[x.length]=p(b),x[x.length]=a[b]);return this};d.registerEncoder=function(a,b){v[a]=b;return this};d.registerLanguage=function(a,b){E[a]=b.filter;F[a]=b.stemmer;return this};d.encode=function(a,b){return v[a].call(v,b)};d.prototype.init= +function(a){this.i=[];a||(a=z);var b=a.profile;b=b?B[b]:{};this.mode=a.mode||b.mode||this.mode||z.mode;this.threshold=a.threshold||b.threshold||this.threshold||z.threshold;this.depth=a.depth||b.depth||this.depth||z.depth;this.f=(b=a.encode||b.encode)&&v[b]||("function"===typeof b?b:this.f||!1);(b=a.matcher)&&this.addMatcher(b);if(b=a.filter){b=E[b]||b;var c=this.f,f={};if(b)for(var g=0;gl;t--)r=d.substring(l,t),y(e,f,r,a,p,u,g);break;default:if(l=y(e,f,d,a,1,u,g),m&&1=g)for(n=f._ctx[d]||(f._ctx[d]={}),d=this.b[d]||(this.b[d]=[{},{},{},{},{},{},{},{},{},{}]),l=k-m,t=k+m+1,0>l&&(l=0),t>h&&(t=h);lb;b++)A(this.g[b],a);this.depth&&A(this.b,a);delete this.a[a];this.h=!1}return this};d.prototype.search=function(a,b,c){var f=[];if(a&&"object"===typeof a){c=a.callback||b;b=a.limit;var g=a.threshold;a=a.query}g=(g||this.threshold||0)|0;"function"===typeof b?(c=b,b=1E3):b||(b=1E3);if(c){var d=this; +K(function(){c(d.search(a,b));d=null},1,"search-"+this.id);return null}if(!a||"string"!==typeof a)return f;if(!this.h)this.h=!0;else if(this.c&&0===a.indexOf(this.c))return f;var e=this.encode(a);if(!e.length)return f;var h=this.mode;e="function"===typeof h?h(e):"ngram"===h?C(e):e.split(D);h=e.length;var k=!0,p=[],n={};if(1=g;y--)if(v= +(u?l[r]:this.g)[y][t])x[z++]=v,w=!0;if(w)p[p.length]=1=k&&(a=a[h+.5|0],a=a[d]||(a[d]=[]),a[a.length]=c)}return h||b[d]}function n(a,b){if(a)for(var d=Object.keys(a),c=0,f=d.length;c=(8=(8a?1:0a?-1:0b))return d.slice(0,b);return d}function E(a){a.D||(a.D=H(function(){a.D=null;var b=a.async;b&&(a.async=!1);if(a.f.length){for(var d=I(),c;(c=a.f.shift())||0===c;){var f=a.h[c];switch(f[0]){case C.add:a.add(f[1],f[2]);break;case C.update:a.update(f[1], -f[2]);break;case C.remove:a.remove(f[1])}a.h[c]=null;delete a.h[c];if(100=f&&(c.m=c.b),c.w&&c.m===c.b&&(c.g.length?c.c="":c.c||(c.c=b),c.cache&&c.j.set(b,c.g),c.w(c.g),c.g=[]))})}this.mode=a.mode||d.mode||this.mode||x.mode;this.async=a.async||this.async||x.async;this.b=a.worker||this.b||x.b;this.threshold=a.threshold||d.threshold||this.threshold||x.threshold;this.depth=a.depth||d.depth||this.depth||x.depth;this.v=(b=a.encode||d.encode)&&z[b]||("function"===typeof b?b:this.v||!1);this.G=a.debug||this.G;(b=a.matcher)&&this.addMatcher(b); -if(b=a.filter)this.B=M(!0===b?S:b,this.v);if(b=a.stemmer)this.F=N(!0===b?T:b,this.v);this.l=[{},{},{},{},{},{},{},{},{},{}];this.o={};this.a={};this.h={};this.f=[];this.D=null;this.c="";this.u=!0;this.j=(this.cache=b=a.cache||this.cache||x.cache)?new U(b):!1;return this};g.prototype.encode=function(a){a&&A.length&&(a=y(a,A));a&&this.C.length&&(a=y(a,this.C));a&&this.v&&(a=this.v.call(z,a));if(a&&this.B){a=a.split(" ");for(var b=0;b=this.i.length&&(this.s=0),this.i[this.s].postMessage(this.s,{add:!0,id:a,content:b}),this.a[a]=""+this.s,this;if(this.async)return this.h[a]||(this.f[this.f.length]=a),this.h[a]=[C.add,a,b],E(this),this;b=this.encode(b); -if(!b.length)return this;d=this.mode;b="function"===typeof d?d(b):"ngram"===d?G(b):b.split(J);for(var c={_ctx:{}},f=this.threshold,e=this.depth,k=this.l,h=b.length,g=0;gl;w--)v=r.substring(l,w),u(k,c,v,a,q,n,f);break;default:if(l=u(k,c, -r,a,1,n,f),e&&1=f)for(p=c._ctx[r]||(c._ctx[r]={}),r=this.o[r]||(this.o[r]=[{},{},{},{},{},{},{},{},{},{}]),l=g-e,w=g+e+1,0>l&&(l=0),w>h&&(w=h);lb;b++)n(this.l[b],a);this.depth&&n(this.o,a);delete this.a[a];this.u=!1}return this};g.prototype.search=function(a,b,d){var c=[];if(a&&"object"===typeof a){d=a.callback||b;b=a.limit;var f=a.threshold;a=a.query}f=(f||this.threshold||0)|0;"function"===typeof b?(d=b,b=1E3):b||(b=1E3);if(this.b){this.w=d;this.m=0;this.g=[];for(c=0;c=f;A--)if(x=(q?l[v]:this.l)[A][w])u[z++]=x,y=!0;if(y)n[n.length]=1g;g++)for(b=Object.keys(this.l[g]),a=0;ag;c--)h=k[c-1],k[c]=h,e[h]=c;k[g]=a;e[a]=g}}}return b};return a}(); -return g}(function(){var q={},B=!("undefined"===typeof Blob||"undefined"===typeof URL||!URL.createObjectURL);return function(g,t,e,y,u){var n=g;g=B?URL.createObjectURL(new Blob(["var SUPPORT_WORKER = true;var SUPPORT_BUILTINS = true;var SUPPORT_DEBUG = true;var SUPPORT_CACHE = true;var SUPPORT_ASYNC = true;("+e.toString()+")()"],{type:"text/javascript"})):"../"+n+".js";n+="-"+t;q[n]||(q[n]=[]);q[n][u]=new Worker(g);q[n][u].onmessage=y;console.log("Register Worker: "+n+"@"+u);return{postMessage:function(e, -g){q[n][e].postMessage(g)}}}}()),this); +m;c=f===a.length-1?"":a[f+1];d=m}return b}function O(a,b){var d={};if(a)for(var c=0;ca?1:0a?-1:0b))return d.slice(0,b);return d}function E(a){a.C||(a.C=H(function(){a.C=null;var b=a.async;b&&(a.async=!1);if(a.f.length){for(var d=I(),c;(c=a.f.shift())||0===c;){var f=a.h[c];switch(f[0]){case C.add:a.add(f[1],f[2]);break;case C.update:a.update(f[1], +f[2]);break;case C.remove:a.remove(f[1])}a.h[c]=null;delete a.h[c];if(100=f&&(c.m=c.b),c.w&&c.m===c.b&&(c.g.length?c.c="":c.c||(c.c=b),c.cache&&c.j.set(b,c.g),c.w(c.g),c.g=[]))})}this.mode=a.mode||d.mode||this.mode||x.mode;this.async=a.async||this.async||x.async;this.b=a.worker||this.b||x.b;this.threshold=a.threshold||d.threshold||this.threshold||x.threshold;this.depth=a.depth||d.depth||this.depth||x.depth;this.v= +(b=a.encode||d.encode)&&z[b]||("function"===typeof b?b:this.v||!1);this.D=a.debug||this.D;(b=a.matcher)&&this.addMatcher(b);if(b=a.filter)this.filter=O(K[b]||b,this.v);if(b=a.stemmer)this.stemmer=P(L[b]||b,this.v);this.l=[{},{},{},{},{},{},{},{},{},{}];this.o={};this.a={};this.h={};this.f=[];this.C=null;this.c="";this.u=!0;this.j=(this.cache=b=a.cache||this.cache||x.cache)?new U(b):!1;return this};g.prototype.encode=function(a){a&&A.length&&(a=y(a,A));a&&this.B.length&&(a=y(a,this.B));a&&this.v&& +(a=this.v.call(z,a));if(a&&this.filter){a=a.split(" ");for(var b=0;b=this.i.length&&(this.s=0),this.i[this.s].postMessage(this.s, +{add:!0,id:a,content:b}),this.a[a]=""+this.s,this;if(this.async)return this.h[a]||(this.f[this.f.length]=a),this.h[a]=[C.add,a,b],E(this),this;b=this.encode(b);if(!b.length)return this;d=this.mode;b="function"===typeof d?d(b):"ngram"===d?G(b):b.split(J);for(var c={_ctx:{}},f=this.threshold,e=this.depth,k=this.l,h=b.length,g=0;gl;w--)v=r.substring(l,w),u(k,c,v,a,q,n,f);break;default:if(l=u(k,c,r,a,1,n,f),e&&1=f)for(p=c._ctx[r]||(c._ctx[r]={}),r=this.o[r]||(this.o[r]=[{},{},{},{},{},{},{},{},{},{}]),l=g-e,w=g+e+1,0>l&&(l=0),w>h&&(w=h);lb;b++)n(this.l[b],a);this.depth&&n(this.o,a);delete this.a[a];this.u=!1}return this};g.prototype.search=function(a,b,d){var c=[];if(a&&"object"===typeof a){d=a.callback||b;b=a.limit;var f=a.threshold;a=a.query}f=(f||this.threshold||0)|0;"function"=== +typeof b?(d=b,b=1E3):b||(b=1E3);if(this.b){this.w=d;this.m=0;this.g=[];for(c=0;c=f;A--)if(x=(q?l[v]:this.l)[A][w])u[z++]=x,y=!0;if(y)n[n.length]=1g;g++)for(b=Object.keys(this.l[g]),a=0;ag;c--)h=k[c-1],k[c]=h,e[h]=c;k[g]=a;e[a]=g}}}return b};return a}();return g}(function(){var q={},B=!("undefined"===typeof Blob||"undefined"===typeof URL||!URL.createObjectURL);return function(g,t,e,y,u){var n=g;g=B?URL.createObjectURL(new Blob(["var SUPPORT_WORKER = true;var SUPPORT_BUILTINS = true;var SUPPORT_DEBUG = true;var SUPPORT_CACHE = true;var SUPPORT_ASYNC = true;("+ +e.toString()+")()"],{type:"text/javascript"})):"../"+n+".js";n+="-"+t;q[n]||(q[n]=[]);q[n][u]=new Worker(g);q[n][u].onmessage=y;console.log("Register Worker: "+n+"@"+u);return{postMessage:function(e,g){q[n][e].postMessage(g)}}}}()),this); diff --git a/lang/de.js b/lang/de.js new file mode 100644 index 0000000..ba3e321 --- /dev/null +++ b/lang/de.js @@ -0,0 +1,175 @@ +/** @define {boolean} */ +var SUPPORT_LANG_DE = true; + +if(SUPPORT_LANG_DE) (function(root){ + + root['FlexSearch']['registerLanguage']('de', /** @const */ { + + /** + * http://www.ranks.nl/stopwords + * @type {Array} + * @export + */ + + filter: [ + + "aber", + "als", + "am", + "an", + "auch", + "auf", + "aus", + "bei", + "bin", + "bis", + "bist", + "da", + "dadurch", + "daher", + "darum", + "das", + "daß", + "dass", + "dein", + "deine", + "dem", + "den", + "der", + "des", + "dessen", + "deshalb", + "die", + "dies", + "dieser", + "dieses", + "doch", + "dort", + "du", + "durch", + "ein", + "eine", + "einem", + "einen", + "einer", + "eines", + "er", + "es", + "euer", + "eure", + "für", + "hatte", + "hatten", + "hattest", + "hattet", + "hier", + "hinter", + "ich", + "ihr", + "ihre", + "im", + "in", + "ist", + "ja", + "jede", + "jedem", + "jeden", + "jeder", + "jedes", + "jener", + "jenes", + "jetzt", + "kann", + "kannst", + "können", + "könnt", + "machen", + "mein", + "meine", + "mit", + "muß", + "mußt", + "musst", + "müssen", + "müßt", + "nach", + "nachdem", + "nein", + "nicht", + "nun", + "oder", + "seid", + "sein", + "seine", + "sich", + "sie", + "sind", + "soll", + "sollen", + "sollst", + "sollt", + "sonst", + "soweit", + "sowie", + "und", + "unser", + "unsere", + "unter", + "vom", + "von", + "vor", + "wann", + "warum", + "was", + "weiter", + "weitere", + "wenn", + "wer", + "werde", + "werden", + "werdet", + "weshalb", + "wie", + "wieder", + "wieso", + "wir", + "wird", + "wirst", + "wo", + "woher", + "wohin", + "zu", + "zum", + "zur", + "über" + ], + + /** + * @type {Object} + * @export + */ + + stemmer: { + + "niss": "", + "isch": "", + "lich": "", + "heit": "", + "keit": "", + "end": "", + "ung": "", + "est": "", + "ern": "", + "em": "", + "er": "", + "en": "", + "es": "", + "st": "", + "ig": "", + "ik": "", + "e": "", + "s": "" + } + }); + +})(this); diff --git a/lang/de.min.js b/lang/de.min.js new file mode 100644 index 0000000..db72e3a --- /dev/null +++ b/lang/de.min.js @@ -0,0 +1,2 @@ +this.FlexSearch.registerLanguage("de",{filter:"aber als am an auch auf aus bei bin bis bist da dadurch daher darum das da\u00df dass dein deine dem den der des dessen deshalb die dies dieser dieses doch dort du durch ein eine einem einen einer eines er es euer eure f\u00fcr hatte hatten hattest hattet hier hinter ich ihr ihre im in ist ja jede jedem jeden jeder jedes jener jenes jetzt kann kannst k\u00f6nnen k\u00f6nnt machen mein meine mit mu\u00df mu\u00dft musst m\u00fcssen m\u00fc\u00dft nach nachdem nein nicht nun oder seid sein seine sich sie sind soll sollen sollst sollt sonst soweit sowie und unser unsere unter vom von vor wann warum was weiter weitere wenn wer werde werden werdet weshalb wie wieder wieso wir wird wirst wo woher wohin zu zum zur \u00fcber".split(" "), +stemmer:{niss:"",isch:"",lich:"",heit:"",keit:"",end:"",ung:"",est:"",ern:"",em:"",er:"",en:"",es:"",st:"",ig:"",ik:"",e:"",s:""}}); diff --git a/lang/en.js b/lang/en.js new file mode 100644 index 0000000..7374754 --- /dev/null +++ b/lang/en.js @@ -0,0 +1,279 @@ +/** @define {boolean} */ +var SUPPORT_LANG_EN = true; + +if(SUPPORT_LANG_EN) (function(root){ + + root['FlexSearch']['registerLanguage']('en', /** @const */ { + + /** + * http://www.ranks.nl/stopwords + * @type {Array} + * @export + */ + + filter: [ + + "a", + "about", + "above", + "after", + "again", + "against", + "all", + "also", + "am", + "an", + "and", + "any", + "are", + "aren't", + "as", + "at", + //"back", + "be", + "because", + "been", + "before", + "being", + "below", + //"between", + "both", + "but", + "by", + "can", + "cannot", + "can't", + "come", + "could", + "couldn't", + //"day", + "did", + "didn't", + "do", + "does", + "doesn't", + "doing", + "dont", + "down", + "during", + "each", + "even", + "few", + "first", + "for", + "from", + "further", + "get", + //"give", + "go", + //"good", + "had", + "hadn't", + "has", + "hasn't", + "have", + "haven't", + "having", + "he", + "hed", + //"hell", + "her", + "here", + "here's", + "hers", + "herself", + "hes", + "him", + "himself", + "his", + "how", + "how's", + "i", + "id", + "if", + "ill", + "im", + "in", + "into", + "is", + "isn't", + "it", + "it's", + "itself", + "i've", + "just", + "know", + "let's", + "like", + //"look", + "make", + "me", + "more", + "most", + "mustn't", + "my", + "myself", + "new", + "no", + "nor", + "not", + "now", + "of", + "off", + "on", + "once", + //"one", + "only", + "or", + "other", + "ought", + "our", + "our's", + "ourselves", + "out", + "over", + "own", + //"people", + "same", + "say", + "see", + "shan't", + "she", + "she'd", + "shell", + "shes", + "should", + "shouldn't", + "so", + "some", + "such", + //"take", + "than", + "that", + "that's", + "the", + "their", + "theirs", + "them", + "themselves", + "then", + "there", + "there's", + "these", + "they", + "they'd", + "they'll", + "they're", + "they've", + //"think", + "this", + "those", + "through", + "time", + "to", + "too", + //"two", + //"under", + "until", + "up", + "us", + //"use", + "very", + "want", + "was", + "wasn't", + "way", + "we", + "wed", + "well", + "were", + "weren't", + "we've", + "what", + "what's", + "when", + "when's", + "where", + "where's", + "which", + "while", + "who", + "whom", + "who's", + "why", + "why's", + "will", + "with", + "won't", + //"work", + "would", + "wouldn't", + //"year", + "you", + "you'd", + "you'll", + "your", + "you're", + "your's", + "yourself", + "yourselves", + "you've" + ], + + /** + * @type {Object} + * @export + */ + + stemmer: { + + "ational": "ate", + "iveness": "ive", + "fulness": "ful", + "ousness": "ous", + "ization": "ize", + "tional": "tion", + "biliti": "ble", + "icate": "ic", + "ative": "", + "alize": "al", + "iciti": "ic", + "entli": "ent", + "ousli": "ous", + "alism": "al", + "ation": "ate", + "aliti": "al", + "iviti": "ive", + "ement": "", + "enci": "ence", + "anci": "ance", + "izer": "ize", + "alli": "al", + "ator": "ate", + "logi": "log", + "ical": "ic", + "ance": "", + "ence": "", + "ness": "", + "able": "", + "ible": "", + "ment": "", + "eli": "e", + "bli": "ble", + "ful": "", + "ant": "", + "ent": "", + "ism": "", + "ate": "", + "iti": "", + "ous": "", + "ive": "", + "ize": "", + "al": "", + "ou": "", + "er": "", + "ic": "" + } + }); + +})(this); diff --git a/lang/en.min.js b/lang/en.min.js new file mode 100644 index 0000000..24d14e5 --- /dev/null +++ b/lang/en.min.js @@ -0,0 +1,2 @@ +this.FlexSearch.registerLanguage("en",{filter:"a about above after again against all also am an and any are aren't as at be because been before being below both but by can cannot can't come could couldn't did didn't do does doesn't doing dont down during each even few first for from further get go had hadn't has hasn't have haven't having he hed her here here's hers herself hes him himself his how how's i id if ill im in into is isn't it it's itself i've just know let's like make me more most mustn't my myself new no nor not now of off on once only or other ought our our's ourselves out over own same say see shan't she she'd shell shes should shouldn't so some such than that that's the their theirs them themselves then there there's these they they'd they'll they're they've this those through time to too until up us very want was wasn't way we wed well were weren't we've what what's when when's where where's which while who whom who's why why's will with won't would wouldn't you you'd you'll your you're your's yourself yourselves you've".split(" "), +stemmer:{ational:"ate",iveness:"ive",fulness:"ful",ousness:"ous",ization:"ize",tional:"tion",biliti:"ble",icate:"ic",ative:"",alize:"al",iciti:"ic",entli:"ent",ousli:"ous",alism:"al",ation:"ate",aliti:"al",iviti:"ive",ement:"",enci:"ence",anci:"ance",izer:"ize",alli:"al",ator:"ate",logi:"log",ical:"ic",ance:"",ence:"",ness:"",able:"",ible:"",ment:"",eli:"e",bli:"ble",ful:"",ant:"",ent:"",ism:"",ate:"",iti:"",ous:"",ive:"",ize:"",al:"",ou:"",er:"",ic:""}}); diff --git a/package.json b/package.json index c002e3e..4e40670 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "flexsearch", - "version": "0.2.44", + "version": "0.2.461", "description": "Next-Generation full text search library with zero dependencies.", "homepage": "https://nextapps-de.github.io/xone/", "author": "Thomas Wilkerling", @@ -25,9 +25,10 @@ }, "scripts": { "build": "node compile RELEASE=min SUPPORT_DEBUG=true SUPPORT_WORKER=true SUPPORT_BUILTINS=true SUPPORT_CACHE=true SUPPORT_ASYNC=true && exit 0", - "build-light": "node compile RELEASE=light SUPPORT_DEBUG=false SUPPORT_WORKER=false SUPPORT_BUILTINS=false SUPPORT_CACHE=false SUPPORT_ASYNC=false && exit 0", + "build-light": "node compile RELEASE=light && exit 0", "build-custom": "node compile", - "build-all": "npm run build && npm run build-light", + "build-lang": "node compile RELEASE=lang && exit 0", + "build-all": "npm run build && npm run build-light && npm run build-lang", "test-production": "nyc --reporter=html --reporter=text mocha --timeout=3000 test --exit", "test-light": "nyc --reporter=html --reporter=text mocha --timeout=3000 test/ --exit", "test-develop": "nyc --reporter=html --reporter=text mocha --timeout=3000 --exit", @@ -39,6 +40,7 @@ "files": [ "flexsearch.js", "flexsearch.min.js", + "lang/", "test/" ], "dependencies": {}, diff --git a/test/test.js b/test/test.js index 2c657e3..a85b1ed 100644 --- a/test/test.js +++ b/test/test.js @@ -837,7 +837,7 @@ describe('Encoding', function(){ it('Should have been encoded properly: Custom Encoder', function(){ - FlexSearch.register('custom', test_encoder); + FlexSearch.registerEncoder('custom', test_encoder); expect(FlexSearch.encode('custom', "Björn-Phillipp Mayer")).to.equal(flexsearch_custom.encode("Björn-Phillipp Mayer")); }); @@ -1006,7 +1006,7 @@ describe('Add Matchers', function(){ it('Should have been added properly', function(){ - FlexSearch.addMatcher({ + FlexSearch.registerMatcher({ '1': 'a', '2': 'b',