flexsearch/dist/module-debug/index/add.js


import { create_object } from "../common.js";
import Index, { autoCommit } from "../index.js";
import default_compress from "../compress.js";
import { KeystoreArray } from "../keystore.js";

// TODO:
// string + number as text
// boolean, null, undefined as ?


/**
 * @param {!number|string} id
 * @param {!string} content
 * @param {boolean=} _append
 * @param {boolean=} _skip_update
 */

Index.prototype.add = function (id, content, _append, _skip_update) {

    if (content && (id || 0 === id)) {

        // todo check skip_update
        //_skip_update = false;

        if (!_skip_update && !_append) {
            if (this.reg.has(id)) {
                return this.update(id, content);
            }
        }

        const depth = this.depth;
        // do not force a string as input
        // https://github.com/nextapps-de/flexsearch/issues/432
        content = this.encoder.encode(content, !depth);
        const word_length = content.length;

        if (word_length) {

            // check context dupes to skip all contextual redundancy along a document

            const dupes_ctx = create_object(),
                  dupes = create_object(),
                  resolution = this.resolution;


            for (let i = 0; i < word_length; i++) {
                let term = content[this.rtl ? word_length - 1 - i : i],
                    term_length = term.length;


                // todo check context search
                // this check also wasn't applied on search, so it's useless here
                // skip dupes will break the context chain
                if (term_length && (depth || !dupes[term])) {
                    let score = this.score ? this.score(content, term, i, null, 0) : get_score(resolution, word_length, i),
                        token = "";


                    switch (this.tokenize) {

                        case "full":
                            if (2 < term_length) {
                                for (let x = 0, _x; x < term_length; x++) {
                                    for (let y = term_length; y > x; y--) {
                                        token = term.substring(x, y);
                                        _x = this.rtl ? term_length - 1 - x : x;
                                        const partial_score = this.score ? this.score(content, term, i, token, _x) : get_score(resolution, word_length, i, term_length, _x);
                                        this.push_index(dupes, token, partial_score, id, _append);
                                    }
                                }
                                break;
                            }
                        // fallthrough to next case when term length < 3
                        case "bidirectional":
                        case "reverse":
                            // skip last round (this token exist already in "forward")
                            if (1 < term_length) {
                                for (let x = term_length - 1; 0 < x; x--) {
                                    token = term[this.rtl ? term_length - 1 - x : x] + token;
                                    const partial_score = this.score ? this.score(content, term, i, token, x) : get_score(resolution, word_length, i, term_length, x);
                                    this.push_index(dupes, token, partial_score, id, _append);
                                }
                                token = "";
                            }

                        // fallthrough to next case to apply forward also
                        case "forward":
                            if (1 < term_length) {
                                for (let x = 0; x < term_length; x++) {
                                    token += term[this.rtl ? term_length - 1 - x : x];
                                    this.push_index(dupes, token, score, id, _append);
                                }
                                break;
                            }

                        // fallthrough to next case when token has a length of 1
                        default:
                            // "strict":
                            this.push_index(dupes, term, score, id, _append);
                            // context is just supported by tokenizer "strict"
                            if (depth) {

                                if (1 < word_length && i < word_length - 1) {

                                    // check inner dupes to skip repeating words in the current context
                                    const dupes_inner = create_object(),
                                          resolution = this.resolution_ctx,
                                          keyword = term,
                                          size = Math.min(depth + 1, this.rtl ? i + 1 : word_length - i);


                                    dupes_inner[keyword] = 1;

                                    for (let x = 1; x < size; x++) {

                                        term = content[this.rtl ? word_length - 1 - i - x : i + x];

                                        if (term && !dupes_inner[term]) {

                                            dupes_inner[term] = 1;
                                            const context_score = this.score ? this.score(content, keyword, i, term, x - 1) : get_score(resolution + (word_length / 2 > resolution ? 0 : 1), word_length, i, size - 1, x - 1),
                                                  swap = this.bidirectional && term > keyword;

                                            this.push_index(dupes_ctx, swap ? keyword : term, context_score, id, _append, swap ? term : keyword);
                                        }
                                    }
                                }
                            }
                    }
                }
            }

            this.fastupdate || this.reg.add(id);
        } else {
            content = "";
        }
    }

    if (this.db) {
        // when the term has no valid content (e.g. empty),
        // then it was not added to the ID registry for removal
        content || this.commit_task.push({ del: id });
        this.commit_auto && autoCommit(this);
    }

    return this;
};

/**
 * @private
 * @param dupes
 * @param term
 * @param score
 * @param id
 * @param {boolean=} append
 * @param {string=} keyword
 */

Index.prototype.push_index = function (dupes, term, score, id, append, keyword) {
    let arr = keyword ? this.ctx : this.map,
        tmp;


    if (!dupes[term] || keyword && !(tmp = dupes[term])[keyword]) {

        if (keyword) {

            dupes = tmp || (dupes[term] = create_object());
            dupes[keyword] = 1;

            if (this.compress) {
                keyword = default_compress(keyword);
            }

            tmp = arr.get(keyword);
            tmp ? arr = tmp : arr.set(keyword, arr = new Map());
        } else {

            dupes[term] = 1;
        }

        if (this.compress) {
            term = default_compress(term);
        }

        tmp = arr.get(term);
        tmp ? arr = tmp : arr.set(term, arr = tmp = []);
        // the ID array will be upgraded dynamically
        arr = arr[score] || (arr[score] = []);

        if (!append || !arr.includes(id)) {

            // auto-upgrade to keystore array if max size exceeded
            if (2147483647 === arr.length /*|| !(arr instanceof KeystoreArray)*/) {
                    const keystore = new KeystoreArray(arr);
                    if (this.fastupdate) {
                        for (let value of this.reg.values()) {
                            if (value.includes(arr)) {
                                value[value.indexOf(arr)] = keystore;
                            }
                        }
                    }
                    tmp[score] = arr = keystore;
                }


            arr.push(id);

            // add a reference to the register for fast updates
            if (this.fastupdate) {
                const tmp = this.reg.get(id);
                tmp ? tmp.push(arr) : this.reg.set(id, [arr]);
            }
        }
    }
};

/**
 * @param {number} resolution
 * @param {number} length
 * @param {number} i
 * @param {number=} term_length
 * @param {number=} x
 * @returns {number}
 */

function get_score(resolution, length, i, term_length, x) {

    // console.log("resolution", resolution);
    // console.log("length", length);
    // console.log("term_length", term_length);
    // console.log("i", i);
    // console.log("x", x);
    // console.log((resolution - 1) / (length + (term_length || 0)) * (i + (x || 0)) + 1);

    // the first resolution slot is reserved for the best match,
    // when a query matches the first word(s).

    // also to stretch score to the whole range of resolution, the
    // calculation is shift by one and cut the floating point.
    // this needs the resolution "1" to be handled additionally.

    // do not stretch the resolution more than the term length will
    // improve performance and memory, also it improves scoring in
    // most cases between a short document and a long document

    return i && 1 < resolution ? length + (term_length || 0) <= resolution ? i + (x || 0) : 0 | (resolution - 1) / (length + (term_length || 0)) * (i + (x || 0)) + 1 : 0;
}