import { create_object, concat, sort_by_length_up, get_max_len } from "./common.js"; /* from -> result[ res[score][id], res[score][id], ] to -> [id] */ /** * Implementation based on Object[key] provides better suggestions * capabilities and has less performance scaling issues on large indexes. * * @param arrays * @param limit * @param offset * @param {boolean|Array=} suggest * @param {boolean=} resolve * @returns {Array} */ export function intersect(arrays, limit, offset, suggest) { const length = arrays.length; // todo remove // if(length < 2){ // throw new Error("Not optimized intersect"); // } let result = [], size = 0, check, check_suggest, check_new, res_arr; if (suggest) { suggest = []; } // 1. a reversed order prioritize the order of words from a query // because it ends with the first term. // 2. process terms in reversed order often has advantage for // the fast path "end reached". // alternatively the results could be sorted by length up //arrays.sort(sort_by_length_up); // todo the outer loop should be the res array instead of term array, // this isn't just simple because the intersection calculation needs to reflect this //const maxlen = get_max_len(arrays); for (let x = length - 1, found; 0 <= x; x--) { //for(let x = 0, found; x < length; x++){ res_arr = arrays[x]; check_new = create_object(); found = !check; // process relevance in forward order (direction is // important for adding IDs during the last round) for (let y = 0, ids; y < res_arr.length; y++) { ids = res_arr[y]; if (!ids || !ids.length) continue; for (let z = 0, id; z < ids.length; z++) { id = ids[z]; // check exists starting from the 2nd slot if (check) { if (check[id]) { // check if in last round if (!x) { //if(x === length - 1){ if (offset) { offset--; } else { result[size++] = id; if (size === limit) { // fast path "end reached" return result; /*resolve === false ? { result, suggest } :*/ } } } if (x || suggest) { //if((x < length - 1) || suggest){ check_new[id] = 1; } found = /* tag? */ /* stringify */ /* stringify */ /* skip update: */ /* append: */ /* skip update: */ /* skip_update: */ /* skip deletion */!0 /*await rows.hasNext()*/ /*await rows.hasNext()*/ /*await rows.hasNext()*/; } if (suggest) { if (!check_suggest[id]) { check_suggest[id] = 1; const arr = suggest[y] || (suggest[y] = []); arr.push(id); } // OLD: // // check_idx = (check_suggest[id] || 0) + 1; // check_suggest[id] = check_idx; // // // do not adding IDs which are already included in the result (saves one loop) // // the first intersection match has the check index 2, so shift by -2 // // if(check_idx < length){ // // const tmp = suggest[check_idx - 2] || (suggest[check_idx - 2] = []); // tmp[tmp.length] = id; // } } } else { // pre-fill in first round check_new[id] = 1; } } } if (suggest) { // re-use the first pre-filled check for suggestions check || (check_suggest = check_new); } else if (!found) { return []; } check = check_new; } // return intermediate result // if(resolve === false){ // return { result, suggest }; // } if (suggest) { // needs to iterate in reverse direction for (let x = suggest.length - 1, ids, len; 0 <= x; x--) { ids = suggest[x]; len = ids.length; for (let y = 0, id; y < len; y++) { id = ids[y]; if (!check[id]) { if (offset) { offset--; } else { result[size++] = id; if (size === limit) { // fast path "end reached" return result; } } check[id] = 1; } } } } return result; } /** * @param mandatory * @param arrays * @returns {Array} */ export function intersect_union(mandatory, arrays) { const check = create_object(), union = create_object(), result = []; for (let x = 0; x < mandatory.length; x++) { check[mandatory[x]] = 1; } for (let x = 0, arr; x < arrays.length; x++) { arr = arrays[x]; for (let y = 0, id; y < arr.length; y++) { id = arr[y]; if (check[id]) { if (!union[id]) { union[id] = 1; result.push(id); } } } } return result; } /** * Implementation based on Array.includes() provides better performance, * but it needs at least one word in the query which is less frequent. * Also on large indexes it does not scale well performance-wise. * This strategy also lacks of suggestion capabilities (matching & sorting). * * @param arrays * @param limit * @param offset * @param {boolean|Array=} suggest * @returns {Array} */ // export function intersect(arrays, limit, offset, suggest) { // // const length = arrays.length; // let result = []; // let check; // // // determine shortest array and collect results // // from the sparse relevance arrays // // let smallest_size; // let smallest_arr; // let smallest_index; // // for(let x = 0; x < length; x++){ // // const arr = arrays[x]; // const len = arr.length; // // let size = 0; // // for(let y = 0, tmp; y < len; y++){ // // tmp = arr[y]; // // if(tmp){ // // size += tmp.length; // } // } // // if(!smallest_size || (size < smallest_size)){ // // smallest_size = size; // smallest_arr = arr; // smallest_index = x; // } // } // // smallest_arr = smallest_arr.length === 1 ? // // smallest_arr[0] // : // concat(smallest_arr); // // if(suggest){ // // suggest = [smallest_arr]; // check = create_object(); // } // // let size = 0; // let steps = 0; // // // process terms in reversed order often results in better performance. // // the outer loop must be the words array, using the // // smallest array here disables the "fast fail" optimization. // // for(let x = length - 1; x >= 0; x--){ // // if(x !== smallest_index){ // // steps++; // // const word_arr = arrays[x]; // const word_arr_len = word_arr.length; // const new_arr = []; // // let count = 0; // // for(let z = 0, id; z < smallest_arr.length; z++){ // // id = smallest_arr[z]; // // let found; // // // process relevance in forward order (direction is // // important for adding IDs during the last round) // // for(let y = 0; y < word_arr_len; y++){ // // const arr = word_arr[y]; // // if(arr.length){ // // found = arr.includes(id); // // if(found){ // // // check if in last round // // if(steps === length - 1){ // // if(offset){ // // offset--; // } // else{ // // result[size++] = id; // // if(size === limit){ // // // fast path "end reached" // // return result; // } // } // // if(suggest){ // // check[id] = 1; // } // } // // break; // } // } // } // // if(found){ // // new_arr[count++] = id; // } // } // // if(suggest){ // // suggest[steps] = new_arr; // } // else if(!count){ // // return []; // } // // smallest_arr = new_arr; // } // } // // if(suggest){ // // // needs to iterate in reverse direction // // for(let x = suggest.length - 1, arr, len; x >= 0; x--){ // // arr = suggest[x]; // len = arr && arr.length; // // if(len){ // // for(let y = 0, id; y < len; y++){ // // id = arr[y]; // // if(!check[id]){ // // check[id] = 1; // // if(offset){ // // offset--; // } // else{ // // result[size++] = id; // // if(size === limit){ // // // fast path "end reached" // // return result; // } // } // } // } // } // } // } // // return result; // }