diff --git a/README.md b/README.md index 71064cd..684d9f7 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ npm install git+https://github.com/nextapps-de/flexsearch/tree/v0.8-preview - Greatly enhanced performance of the whole text encoding pipeline - Improved indexing of numeric content (Triplets) - Intermediate result sets and `Resolver` -- Basic Resolver: `and`, `or`, `xor`, `not`, `limit`, `offset`, `enrich`, `resolve`, Output formatter +- Basic Resolver: `and`, `or`, `xor`, `not`, `limit`, `offset`, `boost`, `resolve` - Improved charset collection - New charset preset `soundex` which further reduces memory consumption by also increasing "fuzziness" - Performance gain when polling tasks to the index by using "Event-Loop-Caches" @@ -81,10 +81,12 @@ All persistent variants are optimized for larger sized indexes under heavy workl - [basic-resolver](example/nodejs-commonjs/basic-resolver) - [basic-worker](example/nodejs-commonjs/basic-worker) - [basic-worker-extern-config](example/nodejs-commonjs/basic-worker-extern-config) + - [basic-export-import](example/nodejs-commonjs/basic-export-import) - [document](example/nodejs-commonjs/document) - [document-persistent](example/nodejs-commonjs/document-persistent) - [document-worker](example/nodejs-commonjs/document-worker) - [document-worker-extern-config](example/nodejs-commonjs/document-worker-extern-config) + - [document-export-import](example/nodejs-commonjs/document-export-import) - [language-pack](example/nodejs-commonjs/language-pack) - [nodejs-esm](example/nodejs-esm): - [basic](example/nodejs-esm/basic) @@ -93,10 +95,12 @@ All persistent variants are optimized for larger sized indexes under heavy workl - [basic-resolver](example/nodejs-esm/basic-resolver) - [basic-worker](example/nodejs-esm/basic-worker) - [basic-worker-extern-config](example/nodejs-esm/basic-worker-extern-config) + - [basic-export-import](example/nodejs-esm/basic-export-import) - [document](example/nodejs-esm/document) - [document-persistent](example/nodejs-esm/document-persistent) - [document-worker](example/nodejs-esm/document-worker) - [document-worker-extern-config](example/nodejs-esm/document-worker-extern-config) + - [document-export-import](example/nodejs-esm/document-export-import) - [language-pack](example/nodejs-esm/language-pack) ### Examples Browser @@ -2315,6 +2319,7 @@ for(let i = 0; i < files.length; i++){ - You can import language packs by `dist/module/lang/*` when using ESM and by `const EnglishPreset = require("flexsearch/lang/en");` when using CommonJS (Node.js) - The method `index.append()` is now deprecated and will be removed in the near future, because it isn't consistent and leads into unexpected behavior when not used properly. You should only use `index.add()` to push contents to the index. - Using the `async` variants like `.searchAsync` is now deprecated (but still works), asynchronous responses will always return from Worker-Index and from Persistent-Index, everything else will return a non-promised result. Having both types of methods looks like the developers can choose between them, but they can't. +- Any of your exports from versions below v0.8 are not compatible to import into v0.8 ## Not finished yet diff --git a/example/nodejs-commonjs/basic-export-import/README.md b/example/nodejs-commonjs/basic-export-import/README.md new file mode 100644 index 0000000..24ee65e --- /dev/null +++ b/example/nodejs-commonjs/basic-export-import/README.md @@ -0,0 +1,7 @@ +```bash +npm install +``` + +```bash +node index.js +``` \ No newline at end of file diff --git a/example/nodejs-commonjs/basic-export-import/index.js b/example/nodejs-commonjs/basic-export-import/index.js new file mode 100644 index 0000000..c0bbc00 --- /dev/null +++ b/example/nodejs-commonjs/basic-export-import/index.js @@ -0,0 +1,71 @@ +const { Index } = require("flexsearch"); + +(async function(){ + + // you will need to keep the index configuration + // they will not export, also every change to the + // configuration requires a full re-index + const config = { + tokenize: "forward" + }; + + // create a simple index which can store id-content-pairs + let index = new Index(config); + + // some test data + const data = [ + 'cats abcd efgh ijkl mnop qrst uvwx cute', + 'cats abcd efgh ijkl mnop qrst cute', + 'cats abcd efgh ijkl mnop cute', + 'cats abcd efgh ijkl cute', + 'cats abcd efgh cute', + 'cats abcd cute', + 'cats cute' + ]; + + // add data to the index + data.forEach((item, id) => { + index.add(id, item); + }); + + // perform query + let result = index.search("cute cat"); + + // display results + result.forEach(i => { + console.log(data[i]); + }); + + // EXPORT + // ----------------------- + + const fs = require("fs").promises; + + await fs.mkdir("./export/").catch(e => {}); + await index.export(async function(key, data){ + await fs.writeFile("./export/" + key, data, "utf8"); + }); + + // IMPORT + // ----------------------- + + // create the same type of index you have used by .export() + // along with the same configuration + index = new Index(config); + + // load them in parallel + const files = await fs.readdir("./export/"); + await Promise.all(files.map(async file => { + const data = await fs.readFile("./export/" + file, "utf8"); + await index.import(file, data); + })); + + // perform query + result = index.search("cute cat"); + + // display results + console.log("-------------------------------------"); + result.forEach(i => { + console.log(data[i]); + }); +}()); \ No newline at end of file diff --git a/example/nodejs-commonjs/basic-export-import/package.json b/example/nodejs-commonjs/basic-export-import/package.json new file mode 100644 index 0000000..914e4d5 --- /dev/null +++ b/example/nodejs-commonjs/basic-export-import/package.json @@ -0,0 +1,6 @@ +{ + "name": "nodejs-commonjs-basic-export-import", + "dependencies": { + "flexsearch": "github:nextapps-de/flexsearch#v0.8-preview" + } +} diff --git a/example/nodejs-commonjs/document-export-import/README.md b/example/nodejs-commonjs/document-export-import/README.md new file mode 100644 index 0000000..24ee65e --- /dev/null +++ b/example/nodejs-commonjs/document-export-import/README.md @@ -0,0 +1,7 @@ +```bash +npm install +``` + +```bash +node index.js +``` \ No newline at end of file diff --git a/example/nodejs-commonjs/document-export-import/data.json b/example/nodejs-commonjs/document-export-import/data.json new file mode 100644 index 0000000..1fd5660 --- /dev/null +++ b/example/nodejs-commonjs/document-export-import/data.json @@ -0,0 +1,141 @@ +[ + { + "tconst": "tt0000001", + "titleType": "short", + "primaryTitle": "Carmencita", + "originalTitle": "Carmencita", + "isAdult": 0, + "startYear": "1894", + "endYear": "", + "runtimeMinutes": "1", + "genres": [ + "Documentary", + "Short" + ] + }, + { + "tconst": "tt0000002", + "titleType": "short", + "primaryTitle": "Le clown et ses chiens", + "originalTitle": "Le clown et ses chiens", + "isAdult": 0, + "startYear": "1892", + "endYear": "", + "runtimeMinutes": "5", + "genres": [ + "Animation", + "Short" + ] + }, + { + "tconst": "tt0000003", + "titleType": "short", + "primaryTitle": "Pauvre Pierrot", + "originalTitle": "Pauvre Pierrot", + "isAdult": 0, + "startYear": "1892", + "endYear": "", + "runtimeMinutes": "4", + "genres": [ + "Animation", + "Comedy", + "Romance" + ] + }, + { + "tconst": "tt0000004", + "titleType": "short", + "primaryTitle": "Un bon bock", + "originalTitle": "Un bon bock", + "isAdult": 0, + "startYear": "1892", + "endYear": "", + "runtimeMinutes": "12", + "genres": [ + "Animation", + "Short" + ] + }, + { + "tconst": "tt0000005", + "titleType": "short", + "primaryTitle": "Blacksmith Scene", + "originalTitle": "Blacksmith Scene", + "isAdult": 0, + "startYear": "1893", + "endYear": "", + "runtimeMinutes": "1", + "genres": [ + "Comedy", + "Short" + ] + }, + { + "tconst": "tt0000006", + "titleType": "short", + "primaryTitle": "Chinese Opium Den", + "originalTitle": "Chinese Opium Den", + "isAdult": 0, + "startYear": "1894", + "endYear": "", + "runtimeMinutes": "1", + "genres": [ + "Short" + ] + }, + { + "tconst": "tt0000007", + "titleType": "short", + "primaryTitle": "Corbett and Courtney Before the Kinetograph", + "originalTitle": "Corbett and Courtney Before the Kinetograph", + "isAdult": 0, + "startYear": "1894", + "endYear": "", + "runtimeMinutes": "1", + "genres": [ + "Short", + "Sport" + ] + }, + { + "tconst": "tt0000008", + "titleType": "short", + "primaryTitle": "Edison Kinetoscopic Record of a Sneeze", + "originalTitle": "Edison Kinetoscopic Record of a Sneeze", + "isAdult": 0, + "startYear": "1894", + "endYear": "", + "runtimeMinutes": "1", + "genres": [ + "Documentary", + "Short" + ] + }, + { + "tconst": "tt0000009", + "titleType": "movie", + "primaryTitle": "Miss Jerry", + "originalTitle": "Miss Jerry", + "isAdult": 0, + "startYear": "1894", + "endYear": "", + "runtimeMinutes": "45", + "genres": [ + "Romance" + ] + }, + { + "tconst": "tt0000010", + "titleType": "short", + "primaryTitle": "Leaving the Factory", + "originalTitle": "La sortie de l'usine Lumière à Lyon", + "isAdult": 0, + "startYear": "1895", + "endYear": "", + "runtimeMinutes": "1", + "genres": [ + "Documentary", + "Short" + ] + } +] \ No newline at end of file diff --git a/example/nodejs-commonjs/document-export-import/index.js b/example/nodejs-commonjs/document-export-import/index.js new file mode 100644 index 0000000..1f5dd4c --- /dev/null +++ b/example/nodejs-commonjs/document-export-import/index.js @@ -0,0 +1,99 @@ +const { Document, Charset } = require("flexsearch"); +const fs = require("fs").promises; + +(async function(){ + + // loading test data + const data = JSON.parse(await fs.readFile(__dirname + "/data.json", "utf8")); + + // you will need to keep the index configuration + // they will not export, also every change to the + // configuration requires a full re-index + const config = { + document: { + id: "tconst", + store: true, + index: [{ + field: "primaryTitle", + tokenize: "forward", + encoder: Charset.LatinBalance + },{ + field: "originalTitle", + tokenize: "forward", + encoder: Charset.LatinBalance + }], + tag: [{ + field: "startYear" + },{ + field: "genres" + }] + } + }; + + // create the document index + let document = new Document(config); + + // add test data + for(let i = 0; i < data.length; i++){ + document.add(data[i]); + } + + // perform a query + let result = document.search({ + query: "karmen", + tag: { + "startYear": "1894", + "genres": [ + "Documentary", + "Short" + ] + }, + suggest: true, + enrich: true, + merge: true + }); + + // output results + console.log(result); + + // EXPORT + // ----------------------- + + await fs.mkdir("./export/").catch(e => {}); + await document.export(async function(key, data){ + await fs.writeFile("./export/" + key, data, "utf8"); + }); + + // IMPORT + // ----------------------- + + // create the same type of index you have used by .export() + // along with the same configuration + document = new Document(config); + + // load them in parallel + const files = await fs.readdir("./export/"); + await Promise.all(files.map(async file => { + const data = await fs.readFile("./export/" + file, "utf8"); + await document.import(file, data); + })) + + // perform query + result = document.search({ + query: "karmen", + tag: { + "startYear": "1894", + "genres": [ + "Documentary", + "Short" + ] + }, + suggest: true, + enrich: true, + merge: true + }); + + // output results + console.log("-------------------------------------"); + console.log(result); +}()); \ No newline at end of file diff --git a/example/nodejs-commonjs/document-export-import/package.json b/example/nodejs-commonjs/document-export-import/package.json new file mode 100644 index 0000000..a48b758 --- /dev/null +++ b/example/nodejs-commonjs/document-export-import/package.json @@ -0,0 +1,6 @@ +{ + "name": "nodejs-commonjs-document-export-import", + "dependencies": { + "flexsearch": "github:nextapps-de/flexsearch#v0.8-preview" + } +} diff --git a/example/nodejs-esm/basic-export-import/README.md b/example/nodejs-esm/basic-export-import/README.md new file mode 100644 index 0000000..24ee65e --- /dev/null +++ b/example/nodejs-esm/basic-export-import/README.md @@ -0,0 +1,7 @@ +```bash +npm install +``` + +```bash +node index.js +``` \ No newline at end of file diff --git a/example/nodejs-esm/basic-export-import/index.js b/example/nodejs-esm/basic-export-import/index.js new file mode 100644 index 0000000..aeed3ca --- /dev/null +++ b/example/nodejs-esm/basic-export-import/index.js @@ -0,0 +1,68 @@ +import { Index } from "flexsearch/esm"; + +// you will need to keep the index configuration +// they will not export, also every change to the +// configuration requires a full re-index +const config = { + tokenize: "forward" +}; + +// create a simple index which can store id-content-pairs +let index = new Index(config); + +// some test data +const data = [ + 'cats abcd efgh ijkl mnop qrst uvwx cute', + 'cats abcd efgh ijkl mnop qrst cute', + 'cats abcd efgh ijkl mnop cute', + 'cats abcd efgh ijkl cute', + 'cats abcd efgh cute', + 'cats abcd cute', + 'cats cute' +]; + +// add data to the index +data.forEach((item, id) => { + index.add(id, item); +}); + +// perform query +let result = index.search("cute cat"); + +// display results +result.forEach(i => { + console.log(data[i]); +}); + +// EXPORT +// ----------------------- + +import { promises as fs } from "fs"; + +await fs.mkdir("./export/").catch(e => {}); +await index.export(async function(key, data){ + await fs.writeFile("./export/" + key, data, "utf8"); +}); + +// IMPORT +// ----------------------- + +// create the same type of index you have used by .export() +// along with the same configuration +index = new Index(config); + +// load them in parallel +const files = await fs.readdir("./export/"); +await Promise.all(files.map(async file => { + const data = await fs.readFile("./export/" + file, "utf8"); + await index.import(file, data); +})) + +// perform query +result = index.search("cute cat"); + +// display results +console.log("-------------------------------------"); +result.forEach(i => { + console.log(data[i]); +}); diff --git a/example/nodejs-esm/basic-export-import/package.json b/example/nodejs-esm/basic-export-import/package.json new file mode 100644 index 0000000..fe3354b --- /dev/null +++ b/example/nodejs-esm/basic-export-import/package.json @@ -0,0 +1,7 @@ +{ + "name": "nodejs-esm-basic-export-import", + "type": "module", + "dependencies": { + "flexsearch": "github:nextapps-de/flexsearch#v0.8-preview" + } +} diff --git a/example/nodejs-esm/document-export-import/README.md b/example/nodejs-esm/document-export-import/README.md new file mode 100644 index 0000000..24ee65e --- /dev/null +++ b/example/nodejs-esm/document-export-import/README.md @@ -0,0 +1,7 @@ +```bash +npm install +``` + +```bash +node index.js +``` \ No newline at end of file diff --git a/example/nodejs-esm/document-export-import/data.json b/example/nodejs-esm/document-export-import/data.json new file mode 100644 index 0000000..1fd5660 --- /dev/null +++ b/example/nodejs-esm/document-export-import/data.json @@ -0,0 +1,141 @@ +[ + { + "tconst": "tt0000001", + "titleType": "short", + "primaryTitle": "Carmencita", + "originalTitle": "Carmencita", + "isAdult": 0, + "startYear": "1894", + "endYear": "", + "runtimeMinutes": "1", + "genres": [ + "Documentary", + "Short" + ] + }, + { + "tconst": "tt0000002", + "titleType": "short", + "primaryTitle": "Le clown et ses chiens", + "originalTitle": "Le clown et ses chiens", + "isAdult": 0, + "startYear": "1892", + "endYear": "", + "runtimeMinutes": "5", + "genres": [ + "Animation", + "Short" + ] + }, + { + "tconst": "tt0000003", + "titleType": "short", + "primaryTitle": "Pauvre Pierrot", + "originalTitle": "Pauvre Pierrot", + "isAdult": 0, + "startYear": "1892", + "endYear": "", + "runtimeMinutes": "4", + "genres": [ + "Animation", + "Comedy", + "Romance" + ] + }, + { + "tconst": "tt0000004", + "titleType": "short", + "primaryTitle": "Un bon bock", + "originalTitle": "Un bon bock", + "isAdult": 0, + "startYear": "1892", + "endYear": "", + "runtimeMinutes": "12", + "genres": [ + "Animation", + "Short" + ] + }, + { + "tconst": "tt0000005", + "titleType": "short", + "primaryTitle": "Blacksmith Scene", + "originalTitle": "Blacksmith Scene", + "isAdult": 0, + "startYear": "1893", + "endYear": "", + "runtimeMinutes": "1", + "genres": [ + "Comedy", + "Short" + ] + }, + { + "tconst": "tt0000006", + "titleType": "short", + "primaryTitle": "Chinese Opium Den", + "originalTitle": "Chinese Opium Den", + "isAdult": 0, + "startYear": "1894", + "endYear": "", + "runtimeMinutes": "1", + "genres": [ + "Short" + ] + }, + { + "tconst": "tt0000007", + "titleType": "short", + "primaryTitle": "Corbett and Courtney Before the Kinetograph", + "originalTitle": "Corbett and Courtney Before the Kinetograph", + "isAdult": 0, + "startYear": "1894", + "endYear": "", + "runtimeMinutes": "1", + "genres": [ + "Short", + "Sport" + ] + }, + { + "tconst": "tt0000008", + "titleType": "short", + "primaryTitle": "Edison Kinetoscopic Record of a Sneeze", + "originalTitle": "Edison Kinetoscopic Record of a Sneeze", + "isAdult": 0, + "startYear": "1894", + "endYear": "", + "runtimeMinutes": "1", + "genres": [ + "Documentary", + "Short" + ] + }, + { + "tconst": "tt0000009", + "titleType": "movie", + "primaryTitle": "Miss Jerry", + "originalTitle": "Miss Jerry", + "isAdult": 0, + "startYear": "1894", + "endYear": "", + "runtimeMinutes": "45", + "genres": [ + "Romance" + ] + }, + { + "tconst": "tt0000010", + "titleType": "short", + "primaryTitle": "Leaving the Factory", + "originalTitle": "La sortie de l'usine Lumière à Lyon", + "isAdult": 0, + "startYear": "1895", + "endYear": "", + "runtimeMinutes": "1", + "genres": [ + "Documentary", + "Short" + ] + } +] \ No newline at end of file diff --git a/example/nodejs-esm/document-export-import/index.js b/example/nodejs-esm/document-export-import/index.js new file mode 100644 index 0000000..8322bf5 --- /dev/null +++ b/example/nodejs-esm/document-export-import/index.js @@ -0,0 +1,97 @@ +import { Document, Charset } from "flexsearch/esm"; +import { promises as fs } from "fs"; + +const dirname = import.meta.dirname; +// loading test data +const data = JSON.parse(await fs.readFile(dirname + "/data.json", "utf8")); + +// you will need to keep the index configuration +// they will not export, also every change to the +// configuration requires a full re-index +const config = { + document: { + id: "tconst", + store: true, + index: [{ + field: "primaryTitle", + tokenize: "forward", + encoder: Charset.LatinBalance + },{ + field: "originalTitle", + tokenize: "forward", + encoder: Charset.LatinBalance + }], + tag: [{ + field: "startYear" + },{ + field: "genres" + }] + } +}; + +// create the document index +let document = new Document(config); + +// add test data +for(let i = 0; i < data.length; i++){ + document.add(data[i]); +} + +// perform a query +let result = document.search({ + query: "karmen", + tag: { + "startYear": "1894", + "genres": [ + "Documentary", + "Short" + ] + }, + suggest: true, + enrich: true, + merge: true +}); + +// output results +console.log(result); + +// EXPORT +// ----------------------- + +await fs.mkdir("./export/").catch(e => {}); +await document.export(async function(key, data){ + await fs.writeFile("./export/" + key, data, "utf8"); +}); + +// IMPORT +// ----------------------- + +// create the same type of index you have used by .export() +// along with the same configuration +document = new Document(config); + +// load them in parallel +const files = await fs.readdir("./export/"); +await Promise.all(files.map(async file => { + const data = await fs.readFile("./export/" + file, "utf8"); + await document.import(file, data); +})) + +// perform query +result = document.search({ + query: "karmen", + tag: { + "startYear": "1894", + "genres": [ + "Documentary", + "Short" + ] + }, + suggest: true, + enrich: true, + merge: true +}); + +// output results +console.log("-------------------------------------"); +console.log(result); diff --git a/example/nodejs-esm/document-export-import/package.json b/example/nodejs-esm/document-export-import/package.json new file mode 100644 index 0000000..62db8c9 --- /dev/null +++ b/example/nodejs-esm/document-export-import/package.json @@ -0,0 +1,7 @@ +{ + "name": "nodejs-esm-document-export-import", + "type": "module", + "dependencies": { + "flexsearch": "github:nextapps-de/flexsearch#v0.8-preview" + } +} diff --git a/src/serialize.js b/src/serialize.js index 0c655e9..2f58312 100644 --- a/src/serialize.js +++ b/src/serialize.js @@ -132,10 +132,10 @@ function save(callback, field, key, chunk, index_doc, index_obj, index_prt = 0){ } /** - * @param callback - * @param field - * @param index_doc - * @param index_obj + * @param {function(string,string):Promise|void} callback + * @param {string|null=} field + * @param {number=} index_doc + * @param {number=} index_obj * @this {Index} */ @@ -186,6 +186,8 @@ export function exportIndex(callback, field, index_doc, index_obj = 0){ } /** + * @param {string} key + * @param {string|*} data * @this {Index} */ @@ -195,14 +197,14 @@ export function importIndex(key, data){ return; } if(is_string(data)){ - data = JSON.parse(data); + data = JSON.parse(/** @type {string} */(data)); } - key = key.split("."); - if(key[key.length - 1] === "json"){ - key.pop(); + const split = key.split("."); + if(split[split.length - 1] === "json"){ + split.pop(); } - key = key.length > 1 ? key[1] : key[0]; + key = split.length > 1 ? split[1] : split[0]; switch(key){ @@ -230,6 +232,10 @@ export function importIndex(key, data){ } /** + * @param {function(string,string):Promise|void} callback + * @param {string|null=} field + * @param {number=} index_doc + * @param {number=} index_obj * @this {Document} */ @@ -302,6 +308,8 @@ export function exportDocument(callback, field, index_doc = 0, index_obj = 0){ } /** + * @param key + * @param {string|*} data * @this {Document} */ @@ -311,15 +319,15 @@ export function importDocument(key, data){ return; } if(is_string(data)){ - data = JSON.parse(data); + data = JSON.parse(/** @type {string} */(data)); } - key = key.split("."); - if(key[key.length - 1] === "json"){ - key.pop(); + const split = key.split("."); + if(split[split.length - 1] === "json"){ + split.pop(); } - const field = key.length > 2 ? key[0] : ""; - key = key.length > 2 ? key[2] : key[1]; + const field = split.length > 2 ? split[0] : ""; + key = split.length > 2 ? split[2] : split[1]; if(!field){