1
0
mirror of https://github.com/nextapps-de/flexsearch.git synced 2025-08-12 09:04:35 +02:00

add examples for export/import

This commit is contained in:
Thomas Wilkerling
2025-03-15 17:57:14 +01:00
parent 41b55bead6
commit 870c8c702a
16 changed files with 700 additions and 16 deletions

View File

@@ -16,7 +16,7 @@ npm install git+https://github.com/nextapps-de/flexsearch/tree/v0.8-preview
- Greatly enhanced performance of the whole text encoding pipeline - Greatly enhanced performance of the whole text encoding pipeline
- Improved indexing of numeric content (Triplets) - Improved indexing of numeric content (Triplets)
- Intermediate result sets and `Resolver` - Intermediate result sets and `Resolver`
- Basic Resolver: `and`, `or`, `xor`, `not`, `limit`, `offset`, `enrich`, `resolve`, Output formatter - Basic Resolver: `and`, `or`, `xor`, `not`, `limit`, `offset`, `boost`, `resolve`
- Improved charset collection - Improved charset collection
- New charset preset `soundex` which further reduces memory consumption by also increasing "fuzziness" - New charset preset `soundex` which further reduces memory consumption by also increasing "fuzziness"
- Performance gain when polling tasks to the index by using "Event-Loop-Caches" - Performance gain when polling tasks to the index by using "Event-Loop-Caches"
@@ -81,10 +81,12 @@ All persistent variants are optimized for larger sized indexes under heavy workl
- [basic-resolver](example/nodejs-commonjs/basic-resolver) - [basic-resolver](example/nodejs-commonjs/basic-resolver)
- [basic-worker](example/nodejs-commonjs/basic-worker) - [basic-worker](example/nodejs-commonjs/basic-worker)
- [basic-worker-extern-config](example/nodejs-commonjs/basic-worker-extern-config) - [basic-worker-extern-config](example/nodejs-commonjs/basic-worker-extern-config)
- [basic-export-import](example/nodejs-commonjs/basic-export-import)
- [document](example/nodejs-commonjs/document) - [document](example/nodejs-commonjs/document)
- [document-persistent](example/nodejs-commonjs/document-persistent) - [document-persistent](example/nodejs-commonjs/document-persistent)
- [document-worker](example/nodejs-commonjs/document-worker) - [document-worker](example/nodejs-commonjs/document-worker)
- [document-worker-extern-config](example/nodejs-commonjs/document-worker-extern-config) - [document-worker-extern-config](example/nodejs-commonjs/document-worker-extern-config)
- [document-export-import](example/nodejs-commonjs/document-export-import)
- [language-pack](example/nodejs-commonjs/language-pack) - [language-pack](example/nodejs-commonjs/language-pack)
- [nodejs-esm](example/nodejs-esm): - [nodejs-esm](example/nodejs-esm):
- [basic](example/nodejs-esm/basic) - [basic](example/nodejs-esm/basic)
@@ -93,10 +95,12 @@ All persistent variants are optimized for larger sized indexes under heavy workl
- [basic-resolver](example/nodejs-esm/basic-resolver) - [basic-resolver](example/nodejs-esm/basic-resolver)
- [basic-worker](example/nodejs-esm/basic-worker) - [basic-worker](example/nodejs-esm/basic-worker)
- [basic-worker-extern-config](example/nodejs-esm/basic-worker-extern-config) - [basic-worker-extern-config](example/nodejs-esm/basic-worker-extern-config)
- [basic-export-import](example/nodejs-esm/basic-export-import)
- [document](example/nodejs-esm/document) - [document](example/nodejs-esm/document)
- [document-persistent](example/nodejs-esm/document-persistent) - [document-persistent](example/nodejs-esm/document-persistent)
- [document-worker](example/nodejs-esm/document-worker) - [document-worker](example/nodejs-esm/document-worker)
- [document-worker-extern-config](example/nodejs-esm/document-worker-extern-config) - [document-worker-extern-config](example/nodejs-esm/document-worker-extern-config)
- [document-export-import](example/nodejs-esm/document-export-import)
- [language-pack](example/nodejs-esm/language-pack) - [language-pack](example/nodejs-esm/language-pack)
### Examples Browser ### Examples Browser
@@ -2315,6 +2319,7 @@ for(let i = 0; i < files.length; i++){
- You can import language packs by `dist/module/lang/*` when using ESM and by `const EnglishPreset = require("flexsearch/lang/en");` when using CommonJS (Node.js) - You can import language packs by `dist/module/lang/*` when using ESM and by `const EnglishPreset = require("flexsearch/lang/en");` when using CommonJS (Node.js)
- The method `index.append()` is now deprecated and will be removed in the near future, because it isn't consistent and leads into unexpected behavior when not used properly. You should only use `index.add()` to push contents to the index. - The method `index.append()` is now deprecated and will be removed in the near future, because it isn't consistent and leads into unexpected behavior when not used properly. You should only use `index.add()` to push contents to the index.
- Using the `async` variants like `.searchAsync` is now deprecated (but still works), asynchronous responses will always return from Worker-Index and from Persistent-Index, everything else will return a non-promised result. Having both types of methods looks like the developers can choose between them, but they can't. - Using the `async` variants like `.searchAsync` is now deprecated (but still works), asynchronous responses will always return from Worker-Index and from Persistent-Index, everything else will return a non-promised result. Having both types of methods looks like the developers can choose between them, but they can't.
- Any of your exports from versions below v0.8 are not compatible to import into v0.8
## Not finished yet ## Not finished yet

View File

@@ -0,0 +1,7 @@
```bash
npm install
```
```bash
node index.js
```

View File

@@ -0,0 +1,71 @@
const { Index } = require("flexsearch");
(async function(){
// you will need to keep the index configuration
// they will not export, also every change to the
// configuration requires a full re-index
const config = {
tokenize: "forward"
};
// create a simple index which can store id-content-pairs
let index = new Index(config);
// some test data
const data = [
'cats abcd efgh ijkl mnop qrst uvwx cute',
'cats abcd efgh ijkl mnop qrst cute',
'cats abcd efgh ijkl mnop cute',
'cats abcd efgh ijkl cute',
'cats abcd efgh cute',
'cats abcd cute',
'cats cute'
];
// add data to the index
data.forEach((item, id) => {
index.add(id, item);
});
// perform query
let result = index.search("cute cat");
// display results
result.forEach(i => {
console.log(data[i]);
});
// EXPORT
// -----------------------
const fs = require("fs").promises;
await fs.mkdir("./export/").catch(e => {});
await index.export(async function(key, data){
await fs.writeFile("./export/" + key, data, "utf8");
});
// IMPORT
// -----------------------
// create the same type of index you have used by .export()
// along with the same configuration
index = new Index(config);
// load them in parallel
const files = await fs.readdir("./export/");
await Promise.all(files.map(async file => {
const data = await fs.readFile("./export/" + file, "utf8");
await index.import(file, data);
}));
// perform query
result = index.search("cute cat");
// display results
console.log("-------------------------------------");
result.forEach(i => {
console.log(data[i]);
});
}());

View File

@@ -0,0 +1,6 @@
{
"name": "nodejs-commonjs-basic-export-import",
"dependencies": {
"flexsearch": "github:nextapps-de/flexsearch#v0.8-preview"
}
}

View File

@@ -0,0 +1,7 @@
```bash
npm install
```
```bash
node index.js
```

View File

@@ -0,0 +1,141 @@
[
{
"tconst": "tt0000001",
"titleType": "short",
"primaryTitle": "Carmencita",
"originalTitle": "Carmencita",
"isAdult": 0,
"startYear": "1894",
"endYear": "",
"runtimeMinutes": "1",
"genres": [
"Documentary",
"Short"
]
},
{
"tconst": "tt0000002",
"titleType": "short",
"primaryTitle": "Le clown et ses chiens",
"originalTitle": "Le clown et ses chiens",
"isAdult": 0,
"startYear": "1892",
"endYear": "",
"runtimeMinutes": "5",
"genres": [
"Animation",
"Short"
]
},
{
"tconst": "tt0000003",
"titleType": "short",
"primaryTitle": "Pauvre Pierrot",
"originalTitle": "Pauvre Pierrot",
"isAdult": 0,
"startYear": "1892",
"endYear": "",
"runtimeMinutes": "4",
"genres": [
"Animation",
"Comedy",
"Romance"
]
},
{
"tconst": "tt0000004",
"titleType": "short",
"primaryTitle": "Un bon bock",
"originalTitle": "Un bon bock",
"isAdult": 0,
"startYear": "1892",
"endYear": "",
"runtimeMinutes": "12",
"genres": [
"Animation",
"Short"
]
},
{
"tconst": "tt0000005",
"titleType": "short",
"primaryTitle": "Blacksmith Scene",
"originalTitle": "Blacksmith Scene",
"isAdult": 0,
"startYear": "1893",
"endYear": "",
"runtimeMinutes": "1",
"genres": [
"Comedy",
"Short"
]
},
{
"tconst": "tt0000006",
"titleType": "short",
"primaryTitle": "Chinese Opium Den",
"originalTitle": "Chinese Opium Den",
"isAdult": 0,
"startYear": "1894",
"endYear": "",
"runtimeMinutes": "1",
"genres": [
"Short"
]
},
{
"tconst": "tt0000007",
"titleType": "short",
"primaryTitle": "Corbett and Courtney Before the Kinetograph",
"originalTitle": "Corbett and Courtney Before the Kinetograph",
"isAdult": 0,
"startYear": "1894",
"endYear": "",
"runtimeMinutes": "1",
"genres": [
"Short",
"Sport"
]
},
{
"tconst": "tt0000008",
"titleType": "short",
"primaryTitle": "Edison Kinetoscopic Record of a Sneeze",
"originalTitle": "Edison Kinetoscopic Record of a Sneeze",
"isAdult": 0,
"startYear": "1894",
"endYear": "",
"runtimeMinutes": "1",
"genres": [
"Documentary",
"Short"
]
},
{
"tconst": "tt0000009",
"titleType": "movie",
"primaryTitle": "Miss Jerry",
"originalTitle": "Miss Jerry",
"isAdult": 0,
"startYear": "1894",
"endYear": "",
"runtimeMinutes": "45",
"genres": [
"Romance"
]
},
{
"tconst": "tt0000010",
"titleType": "short",
"primaryTitle": "Leaving the Factory",
"originalTitle": "La sortie de l'usine Lumière à Lyon",
"isAdult": 0,
"startYear": "1895",
"endYear": "",
"runtimeMinutes": "1",
"genres": [
"Documentary",
"Short"
]
}
]

View File

@@ -0,0 +1,99 @@
const { Document, Charset } = require("flexsearch");
const fs = require("fs").promises;
(async function(){
// loading test data
const data = JSON.parse(await fs.readFile(__dirname + "/data.json", "utf8"));
// you will need to keep the index configuration
// they will not export, also every change to the
// configuration requires a full re-index
const config = {
document: {
id: "tconst",
store: true,
index: [{
field: "primaryTitle",
tokenize: "forward",
encoder: Charset.LatinBalance
},{
field: "originalTitle",
tokenize: "forward",
encoder: Charset.LatinBalance
}],
tag: [{
field: "startYear"
},{
field: "genres"
}]
}
};
// create the document index
let document = new Document(config);
// add test data
for(let i = 0; i < data.length; i++){
document.add(data[i]);
}
// perform a query
let result = document.search({
query: "karmen",
tag: {
"startYear": "1894",
"genres": [
"Documentary",
"Short"
]
},
suggest: true,
enrich: true,
merge: true
});
// output results
console.log(result);
// EXPORT
// -----------------------
await fs.mkdir("./export/").catch(e => {});
await document.export(async function(key, data){
await fs.writeFile("./export/" + key, data, "utf8");
});
// IMPORT
// -----------------------
// create the same type of index you have used by .export()
// along with the same configuration
document = new Document(config);
// load them in parallel
const files = await fs.readdir("./export/");
await Promise.all(files.map(async file => {
const data = await fs.readFile("./export/" + file, "utf8");
await document.import(file, data);
}))
// perform query
result = document.search({
query: "karmen",
tag: {
"startYear": "1894",
"genres": [
"Documentary",
"Short"
]
},
suggest: true,
enrich: true,
merge: true
});
// output results
console.log("-------------------------------------");
console.log(result);
}());

View File

@@ -0,0 +1,6 @@
{
"name": "nodejs-commonjs-document-export-import",
"dependencies": {
"flexsearch": "github:nextapps-de/flexsearch#v0.8-preview"
}
}

View File

@@ -0,0 +1,7 @@
```bash
npm install
```
```bash
node index.js
```

View File

@@ -0,0 +1,68 @@
import { Index } from "flexsearch/esm";
// you will need to keep the index configuration
// they will not export, also every change to the
// configuration requires a full re-index
const config = {
tokenize: "forward"
};
// create a simple index which can store id-content-pairs
let index = new Index(config);
// some test data
const data = [
'cats abcd efgh ijkl mnop qrst uvwx cute',
'cats abcd efgh ijkl mnop qrst cute',
'cats abcd efgh ijkl mnop cute',
'cats abcd efgh ijkl cute',
'cats abcd efgh cute',
'cats abcd cute',
'cats cute'
];
// add data to the index
data.forEach((item, id) => {
index.add(id, item);
});
// perform query
let result = index.search("cute cat");
// display results
result.forEach(i => {
console.log(data[i]);
});
// EXPORT
// -----------------------
import { promises as fs } from "fs";
await fs.mkdir("./export/").catch(e => {});
await index.export(async function(key, data){
await fs.writeFile("./export/" + key, data, "utf8");
});
// IMPORT
// -----------------------
// create the same type of index you have used by .export()
// along with the same configuration
index = new Index(config);
// load them in parallel
const files = await fs.readdir("./export/");
await Promise.all(files.map(async file => {
const data = await fs.readFile("./export/" + file, "utf8");
await index.import(file, data);
}))
// perform query
result = index.search("cute cat");
// display results
console.log("-------------------------------------");
result.forEach(i => {
console.log(data[i]);
});

View File

@@ -0,0 +1,7 @@
{
"name": "nodejs-esm-basic-export-import",
"type": "module",
"dependencies": {
"flexsearch": "github:nextapps-de/flexsearch#v0.8-preview"
}
}

View File

@@ -0,0 +1,7 @@
```bash
npm install
```
```bash
node index.js
```

View File

@@ -0,0 +1,141 @@
[
{
"tconst": "tt0000001",
"titleType": "short",
"primaryTitle": "Carmencita",
"originalTitle": "Carmencita",
"isAdult": 0,
"startYear": "1894",
"endYear": "",
"runtimeMinutes": "1",
"genres": [
"Documentary",
"Short"
]
},
{
"tconst": "tt0000002",
"titleType": "short",
"primaryTitle": "Le clown et ses chiens",
"originalTitle": "Le clown et ses chiens",
"isAdult": 0,
"startYear": "1892",
"endYear": "",
"runtimeMinutes": "5",
"genres": [
"Animation",
"Short"
]
},
{
"tconst": "tt0000003",
"titleType": "short",
"primaryTitle": "Pauvre Pierrot",
"originalTitle": "Pauvre Pierrot",
"isAdult": 0,
"startYear": "1892",
"endYear": "",
"runtimeMinutes": "4",
"genres": [
"Animation",
"Comedy",
"Romance"
]
},
{
"tconst": "tt0000004",
"titleType": "short",
"primaryTitle": "Un bon bock",
"originalTitle": "Un bon bock",
"isAdult": 0,
"startYear": "1892",
"endYear": "",
"runtimeMinutes": "12",
"genres": [
"Animation",
"Short"
]
},
{
"tconst": "tt0000005",
"titleType": "short",
"primaryTitle": "Blacksmith Scene",
"originalTitle": "Blacksmith Scene",
"isAdult": 0,
"startYear": "1893",
"endYear": "",
"runtimeMinutes": "1",
"genres": [
"Comedy",
"Short"
]
},
{
"tconst": "tt0000006",
"titleType": "short",
"primaryTitle": "Chinese Opium Den",
"originalTitle": "Chinese Opium Den",
"isAdult": 0,
"startYear": "1894",
"endYear": "",
"runtimeMinutes": "1",
"genres": [
"Short"
]
},
{
"tconst": "tt0000007",
"titleType": "short",
"primaryTitle": "Corbett and Courtney Before the Kinetograph",
"originalTitle": "Corbett and Courtney Before the Kinetograph",
"isAdult": 0,
"startYear": "1894",
"endYear": "",
"runtimeMinutes": "1",
"genres": [
"Short",
"Sport"
]
},
{
"tconst": "tt0000008",
"titleType": "short",
"primaryTitle": "Edison Kinetoscopic Record of a Sneeze",
"originalTitle": "Edison Kinetoscopic Record of a Sneeze",
"isAdult": 0,
"startYear": "1894",
"endYear": "",
"runtimeMinutes": "1",
"genres": [
"Documentary",
"Short"
]
},
{
"tconst": "tt0000009",
"titleType": "movie",
"primaryTitle": "Miss Jerry",
"originalTitle": "Miss Jerry",
"isAdult": 0,
"startYear": "1894",
"endYear": "",
"runtimeMinutes": "45",
"genres": [
"Romance"
]
},
{
"tconst": "tt0000010",
"titleType": "short",
"primaryTitle": "Leaving the Factory",
"originalTitle": "La sortie de l'usine Lumière à Lyon",
"isAdult": 0,
"startYear": "1895",
"endYear": "",
"runtimeMinutes": "1",
"genres": [
"Documentary",
"Short"
]
}
]

View File

@@ -0,0 +1,97 @@
import { Document, Charset } from "flexsearch/esm";
import { promises as fs } from "fs";
const dirname = import.meta.dirname;
// loading test data
const data = JSON.parse(await fs.readFile(dirname + "/data.json", "utf8"));
// you will need to keep the index configuration
// they will not export, also every change to the
// configuration requires a full re-index
const config = {
document: {
id: "tconst",
store: true,
index: [{
field: "primaryTitle",
tokenize: "forward",
encoder: Charset.LatinBalance
},{
field: "originalTitle",
tokenize: "forward",
encoder: Charset.LatinBalance
}],
tag: [{
field: "startYear"
},{
field: "genres"
}]
}
};
// create the document index
let document = new Document(config);
// add test data
for(let i = 0; i < data.length; i++){
document.add(data[i]);
}
// perform a query
let result = document.search({
query: "karmen",
tag: {
"startYear": "1894",
"genres": [
"Documentary",
"Short"
]
},
suggest: true,
enrich: true,
merge: true
});
// output results
console.log(result);
// EXPORT
// -----------------------
await fs.mkdir("./export/").catch(e => {});
await document.export(async function(key, data){
await fs.writeFile("./export/" + key, data, "utf8");
});
// IMPORT
// -----------------------
// create the same type of index you have used by .export()
// along with the same configuration
document = new Document(config);
// load them in parallel
const files = await fs.readdir("./export/");
await Promise.all(files.map(async file => {
const data = await fs.readFile("./export/" + file, "utf8");
await document.import(file, data);
}))
// perform query
result = document.search({
query: "karmen",
tag: {
"startYear": "1894",
"genres": [
"Documentary",
"Short"
]
},
suggest: true,
enrich: true,
merge: true
});
// output results
console.log("-------------------------------------");
console.log(result);

View File

@@ -0,0 +1,7 @@
{
"name": "nodejs-esm-document-export-import",
"type": "module",
"dependencies": {
"flexsearch": "github:nextapps-de/flexsearch#v0.8-preview"
}
}

View File

@@ -132,10 +132,10 @@ function save(callback, field, key, chunk, index_doc, index_obj, index_prt = 0){
} }
/** /**
* @param callback * @param {function(string,string):Promise|void} callback
* @param field * @param {string|null=} field
* @param index_doc * @param {number=} index_doc
* @param index_obj * @param {number=} index_obj
* @this {Index} * @this {Index}
*/ */
@@ -186,6 +186,8 @@ export function exportIndex(callback, field, index_doc, index_obj = 0){
} }
/** /**
* @param {string} key
* @param {string|*} data
* @this {Index} * @this {Index}
*/ */
@@ -195,14 +197,14 @@ export function importIndex(key, data){
return; return;
} }
if(is_string(data)){ if(is_string(data)){
data = JSON.parse(data); data = JSON.parse(/** @type {string} */(data));
} }
key = key.split("."); const split = key.split(".");
if(key[key.length - 1] === "json"){ if(split[split.length - 1] === "json"){
key.pop(); split.pop();
} }
key = key.length > 1 ? key[1] : key[0]; key = split.length > 1 ? split[1] : split[0];
switch(key){ switch(key){
@@ -230,6 +232,10 @@ export function importIndex(key, data){
} }
/** /**
* @param {function(string,string):Promise|void} callback
* @param {string|null=} field
* @param {number=} index_doc
* @param {number=} index_obj
* @this {Document} * @this {Document}
*/ */
@@ -302,6 +308,8 @@ export function exportDocument(callback, field, index_doc = 0, index_obj = 0){
} }
/** /**
* @param key
* @param {string|*} data
* @this {Document} * @this {Document}
*/ */
@@ -311,15 +319,15 @@ export function importDocument(key, data){
return; return;
} }
if(is_string(data)){ if(is_string(data)){
data = JSON.parse(data); data = JSON.parse(/** @type {string} */(data));
} }
key = key.split("."); const split = key.split(".");
if(key[key.length - 1] === "json"){ if(split[split.length - 1] === "json"){
key.pop(); split.pop();
} }
const field = key.length > 2 ? key[0] : ""; const field = split.length > 2 ? split[0] : "";
key = key.length > 2 ? key[2] : key[1]; key = split.length > 2 ? split[2] : split[1];
if(!field){ if(!field){