mirror of
https://github.com/processwire/processwire.git
synced 2025-08-09 16:26:59 +02:00
Continued improvements to DatabaseQuerySelectFulltext class, especially to improve quality of results on query expansion operators
This commit is contained in:
@@ -24,7 +24,6 @@
|
||||
* https://processwire.com
|
||||
*
|
||||
* @property-read $tableField
|
||||
* @method array getWordAlternates($word)
|
||||
*
|
||||
*
|
||||
*
|
||||
@@ -75,6 +74,20 @@ class DatabaseQuerySelectFulltext extends Wire {
|
||||
*/
|
||||
protected $not = false;
|
||||
|
||||
/**
|
||||
* Cached minimum word length
|
||||
*
|
||||
* @var int|null
|
||||
*
|
||||
*/
|
||||
protected $minWordLength = null;
|
||||
|
||||
/**
|
||||
* @var array
|
||||
*
|
||||
*/
|
||||
static protected $scoreCnts = array();
|
||||
|
||||
/**
|
||||
* Method names to operators they handle
|
||||
*
|
||||
@@ -87,21 +100,13 @@ class DatabaseQuerySelectFulltext extends Wire {
|
||||
'matchPhraseExpand' => array('*+='),
|
||||
'matchRegular' => array('**=', '**+='),
|
||||
'matchStartEnd' => array('^=', '$='),
|
||||
'matchWords' => array('~=', '~+=', '~*=', '~~=', '~|=', '~|*='),
|
||||
'matchWords' => array('~=', '~+=', '~*=', '~~=', '~|=', '~|*=', '~|+='),
|
||||
'matchLikeWords' => array('~%=', '~|%='),
|
||||
'matchLikePhrase' => array('%='),
|
||||
'matchLikeStartEnd' => array('%^=', '%$='),
|
||||
'matchCommands' => array('#='),
|
||||
);
|
||||
|
||||
/**
|
||||
* Keep track of field names used for scores so that the same one isn't ever used more than once
|
||||
*
|
||||
* @var array
|
||||
*
|
||||
*/
|
||||
static $scoreFields = array();
|
||||
|
||||
/**
|
||||
* Construct
|
||||
*
|
||||
@@ -152,7 +157,7 @@ class DatabaseQuerySelectFulltext extends Wire {
|
||||
* @return string
|
||||
*
|
||||
*/
|
||||
protected function escapeLIKE($str) {
|
||||
protected function escapeLike($str) {
|
||||
return str_replace(array('%', '_'), array('\\%', '\\_'), $str);
|
||||
}
|
||||
|
||||
@@ -165,7 +170,7 @@ class DatabaseQuerySelectFulltext extends Wire {
|
||||
* @return string
|
||||
*
|
||||
*/
|
||||
protected function escapeAGAINST($str) {
|
||||
protected function escapeAgainst($str) {
|
||||
$str = str_replace(array('@', '+', '-', '*', '~', '<', '>', '(', ')', ':', '"', '&', '|', '=', '.'), ' ', $str);
|
||||
while(strpos($str, ' ')) $str = str_replace(' ', ' ', $str);
|
||||
return $str;
|
||||
@@ -281,11 +286,14 @@ class DatabaseQuerySelectFulltext extends Wire {
|
||||
*/
|
||||
protected function matchLikePhrase($value) {
|
||||
$likeType = $this->not ? 'NOT LIKE' : 'LIKE';
|
||||
$this->query->where("$this->tableField $likeType ?", '%' . $this->escapeLIKE($value) . '%');
|
||||
$this->query->where("$this->tableField $likeType ?", '%' . $this->escapeLike($value) . '%');
|
||||
}
|
||||
|
||||
/**
|
||||
* Match starts-with or ends-with using only LIKE
|
||||
* Match starts-with or ends-with using only LIKE (no match/against index)
|
||||
*
|
||||
* Does not ignore whitespace, closing tags or punctutation at start/end the way that the
|
||||
* matchStartEnd() method does, so this can be used to perform more literal start/end matches.
|
||||
*
|
||||
* @param string $value
|
||||
*
|
||||
@@ -293,9 +301,9 @@ class DatabaseQuerySelectFulltext extends Wire {
|
||||
protected function matchLikeStartEnd($value) {
|
||||
$likeType = $this->not ? 'NOT LIKE' : 'LIKE';
|
||||
if(strpos($this->operator, '^') !== false) {
|
||||
$this->query->where("$this->tableField $likeType ?", $this->escapeLIKE($value) . '%');
|
||||
$this->query->where("$this->tableField $likeType ?", $this->escapeLike($value) . '%');
|
||||
} else {
|
||||
$this->query->where("$this->tableField $likeType ?", '%' . $this->escapeLIKE($value));
|
||||
$this->query->where("$this->tableField $likeType ?", '%' . $this->escapeLike($value));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -318,7 +326,7 @@ class DatabaseQuerySelectFulltext extends Wire {
|
||||
$wheres = array(); // used only in $any mode
|
||||
|
||||
foreach($words as $word) {
|
||||
$word = $this->escapeLIKE($word);
|
||||
$word = $this->escapeLike($word);
|
||||
if(!strlen($word)) continue;
|
||||
if($any) {
|
||||
$bindKey = $this->query->getUniqueBindKey();
|
||||
@@ -351,13 +359,14 @@ class DatabaseQuerySelectFulltext extends Wire {
|
||||
// ~~= Contains all words live (all full words + partial last word)
|
||||
// ~|= Contains any full words
|
||||
// ~|*= Contains any partial words
|
||||
// ~|+= Contains any words + expand
|
||||
|
||||
$tableField = $this->tableField();
|
||||
$operator = $this->operator;
|
||||
$required = strpos($operator, '|') === false;
|
||||
$partial = strpos($operator, '*') !== false;
|
||||
$partialLast = $operator === '~~=';
|
||||
$expand = $operator === '~+=';
|
||||
$expand = strpos($operator, '+') !== false;
|
||||
$matchType = $this->not ? 'NOT MATCH' : 'MATCH';
|
||||
$scoreField = $this->getScoreFieldName();
|
||||
$matchAgainst = '';
|
||||
@@ -365,12 +374,23 @@ class DatabaseQuerySelectFulltext extends Wire {
|
||||
$data = $this->getBooleanModeWords($value, array(
|
||||
'required' => $required,
|
||||
'partial' => $partial,
|
||||
'partialLast' => $partialLast
|
||||
'partialLast' => $partialLast,
|
||||
'partialLess' => ($partial || $expand),
|
||||
'alternates' => $expand,
|
||||
));
|
||||
|
||||
if($expand) {
|
||||
$bindKey = $this->query->bindValueGetKey($this->escapeAGAINST($data['value']));
|
||||
if(!empty($data['booleanValue'])) {
|
||||
// ensure full matches are above expanded matches
|
||||
$preScoreField = $this->getScoreFieldName();
|
||||
$bindKey = $this->query->bindValueGetKey($data['booleanValue']);
|
||||
$this->query->select("$matchType($tableField) AGAINST($bindKey IN BOOLEAN MODE) + 111.1 AS $preScoreField");
|
||||
$this->query->orderby("$preScoreField DESC");
|
||||
}
|
||||
$bindValue = trim($data['value'] . ' ' . implode(' ', $data['altWords']));
|
||||
$bindKey = $this->query->bindValueGetKey($this->escapeAgainst($bindValue));
|
||||
$matchAgainst = "$matchType($tableField) AGAINST($bindKey WITH QUERY EXPANSION)";
|
||||
|
||||
} else if(!empty($data['booleanValue'])) {
|
||||
$bindKey = $this->query->bindValueGetKey($data['booleanValue']);
|
||||
$matchAgainst = "$matchType($tableField) AGAINST($bindKey IN BOOLEAN MODE)";
|
||||
@@ -387,7 +407,7 @@ class DatabaseQuerySelectFulltext extends Wire {
|
||||
$wheres = array();
|
||||
$likeType = $this->not ? 'NOT RLIKE' : 'RLIKE';
|
||||
foreach($data['likeWords'] as $word) {
|
||||
$word = $this->escapeLIKE($word);
|
||||
$word = $this->escapeLike($word);
|
||||
if(!strlen($word)) continue;
|
||||
$likeValue = '([[:blank:]]|[[:punct:]]|[[:space:]]|>|^)' . preg_quote($word);
|
||||
if($partial || ($partialLast && $word === $data['lastWord'])) {
|
||||
@@ -478,31 +498,63 @@ class DatabaseQuerySelectFulltext extends Wire {
|
||||
*/
|
||||
protected function matchPhraseExpand($value) {
|
||||
|
||||
// *+= phrase match with query expansion: use MATCH/AGAINST and confirm with LIKE
|
||||
|
||||
$tableField = $this->tableField();
|
||||
$not = strpos($this->operator, '!') === 0;
|
||||
$matchType = $not ? "\nNOT MATCH" : "\nMATCH";
|
||||
$words = $this->words($value, array('indexable' => true));
|
||||
$againstValue = $this->escapeAGAINST(implode(' ', $words));
|
||||
$wheres = array();
|
||||
$wordsAlternates = array();
|
||||
|
||||
if(count($words) && strlen($againstValue)) {
|
||||
// use MATCH/AGAINST as pre-filter
|
||||
$match = $not ? 'NOT MATCH' : 'MATCH';
|
||||
$bindKey = $this->query->bindValueGetKey($againstValue);
|
||||
$matchAgainst = "$match($tableField) AGAINST($bindKey WITH QUERY EXPANSION)";
|
||||
$scoreField = $this->getScoreFieldName();
|
||||
$wheres[] = $matchAgainst;
|
||||
$this->query->select("$matchAgainst AS $scoreField");
|
||||
$this->query->orderby("$scoreField DESC");
|
||||
// BOOLEAN PHRASE: full phrase matches come before expanded matches
|
||||
$scoreField = $this->getScoreFieldName();
|
||||
$againstValue = '+"' . $this->escapeAgainst($value) . '*"';
|
||||
$bindKey = $this->query->bindValueGetKey($againstValue);
|
||||
$matchAgainst = "$matchType($tableField) AGAINST($bindKey IN BOOLEAN MODE)";
|
||||
$this->query->select("$matchAgainst + 333.3 AS $scoreField");
|
||||
$this->query->orderby("$scoreField DESC");
|
||||
|
||||
if(!count($words)) {
|
||||
// no words to work with for query expansion (not likely, unless stopwords or too-short)
|
||||
$this->query->where($matchAgainst);
|
||||
return;
|
||||
}
|
||||
|
||||
$likeType = $not ? 'NOT RLIKE' : 'RLIKE';
|
||||
$likeValue = '([[:blank:]]|[[:punct:]]|[[:space:]]|>|^)' . preg_quote($value);
|
||||
$bindKey = $this->query->bindValueGetKey($likeValue);
|
||||
$wheres[] = "($tableField $likeType $bindKey)";
|
||||
// BOOLEAN WEIGHTED WORDS: word matches above query expansion matches
|
||||
$againstValue = '';
|
||||
$scoreField = $this->getScoreFieldName();
|
||||
foreach($words as $word) {
|
||||
$wordAlternates = array();
|
||||
foreach($this->getWordAlternates($word) as $w) {
|
||||
if($w === $word || !$this->isIndexableWord($w)) continue;
|
||||
$wordAlternates[$w] = $w; // alternates for just this word
|
||||
$wordsAlternates[$w] = $w; // alternates for all words
|
||||
}
|
||||
$word = $this->escapeAgainst($word);
|
||||
// full word match carries more weight than partial or alternate word match,
|
||||
// but at least one must be there in order to have a good score
|
||||
$againstValue .= "+(";
|
||||
$againstValue .= ">$word $word*";
|
||||
if(count($wordAlternates)) {
|
||||
$againstValue .= ' ' . $this->escapeAgainst(implode(' ', $wordAlternates));
|
||||
}
|
||||
$wordRoot = $this->getWordRoot($word);
|
||||
if($wordRoot && $wordRoot !== $word) {
|
||||
$againstValue .= ' ' . $this->escapeAgainst($wordRoot) . '*';
|
||||
}
|
||||
$againstValue .= ") ";
|
||||
}
|
||||
$bindKey = $this->query->bindValueGetKey(trim($againstValue));
|
||||
$this->query->select("$matchType($tableField) AGAINST($bindKey IN BOOLEAN MODE) + 222.2 AS $scoreField");
|
||||
$this->query->orderby("$scoreField DESC");
|
||||
|
||||
$this->query->where(implode(' OR ', $wheres));
|
||||
// QUERY EXPANSION: regular match/against words with query expansion
|
||||
$words = array_unique(array_merge($words, $wordsAlternates));
|
||||
$againstValue = $this->escapeAgainst(implode(' ', $words));
|
||||
$bindKey = $this->query->bindValueGetKey($againstValue);
|
||||
$matchAgainst = "$matchType($tableField) AGAINST($bindKey WITH QUERY EXPANSION)";
|
||||
$scoreField = $this->getScoreFieldName();
|
||||
$this->query->where($matchAgainst);
|
||||
$this->query->select("$matchAgainst AS $scoreField");
|
||||
$this->query->orderby("$scoreField DESC");
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -517,12 +569,30 @@ class DatabaseQuerySelectFulltext extends Wire {
|
||||
// **+= Contains match + expand
|
||||
|
||||
$tableField = $this->tableField();
|
||||
$scoreField = $this->getScoreFieldName();
|
||||
$expand = strpos($this->operator, '+') !== false;
|
||||
$expand = strpos($this->operator, '+') !== false;
|
||||
$matchType = $this->not ? 'NOT MATCH' : 'MATCH';
|
||||
|
||||
if($expand) {
|
||||
// boolean mode query for sorting purposes
|
||||
$scoreField = $this->getScoreFieldName();
|
||||
$data = $this->getBooleanModeWords($value, array(
|
||||
'partialLess' => true,
|
||||
'required' => false,
|
||||
'alternates' => true,
|
||||
));
|
||||
if(!empty($data['booleanValue'])) {
|
||||
$againstValue = $data['booleanValue'];
|
||||
$bindKey = $this->query->bindValueGetKey($againstValue);
|
||||
$matchAgainst = "$matchType($tableField) AGAINST($bindKey IN BOOLEAN MODE)";
|
||||
$this->query->select("$matchAgainst + 111.1 AS $scoreField");
|
||||
$this->query->orderby("$scoreField DESC");
|
||||
}
|
||||
}
|
||||
|
||||
// standard MATCH/AGAINST with optional query expansion
|
||||
$scoreField = $this->getScoreFieldName();
|
||||
$words = $this->words($value, array('indexable' => true, 'alternates' => $expand));
|
||||
$againstValue = $this->escapeAGAINST(implode(' ', $words));
|
||||
$againstValue = $this->escapeAgainst(implode(' ', $words));
|
||||
|
||||
if(!count($words) || !strlen(trim($againstValue))) {
|
||||
// query contains no indexbale words: force non-match
|
||||
@@ -530,17 +600,19 @@ class DatabaseQuerySelectFulltext extends Wire {
|
||||
return;
|
||||
}
|
||||
|
||||
$match = $this->not ? 'NOT MATCH' : 'MATCH';
|
||||
$bindKey = $this->query->bindValueGetKey($againstValue);
|
||||
$againstType = $expand ? 'WITH QUERY EXPANSION' : '';
|
||||
$where = "$match($tableField) AGAINST($bindKey $againstType)";
|
||||
$where = "$matchType($tableField) AGAINST($bindKey $againstType)";
|
||||
$this->query->select("$where AS $scoreField");
|
||||
$this->query->where($where);
|
||||
$this->query->orderby("$scoreField DESC");
|
||||
}
|
||||
|
||||
/**
|
||||
* Match phrase at start or end of field value
|
||||
* Match phrase at start or end of field value (also uses fulltext index when possible)
|
||||
*
|
||||
* Ignores whitespace, punctuation and opening/closing tags, enabling it to match
|
||||
* start/end words or phrases surrounded by non-word characters.
|
||||
*
|
||||
* @param $value
|
||||
*
|
||||
@@ -552,15 +624,27 @@ class DatabaseQuerySelectFulltext extends Wire {
|
||||
|
||||
$tableField = $this->tableField();
|
||||
$not = strpos($this->operator, '!') === 0;
|
||||
$matchStart = strpos($this->operator, '^') !== false;
|
||||
$againstValue = '';
|
||||
|
||||
|
||||
$words = $this->words($value, array('indexable' => true));
|
||||
$againstValue = count($words) ? $this->escapeAGAINST(implode(' ', $words)) : '';
|
||||
if(count($words)) {
|
||||
if($matchStart) {
|
||||
$lastWord = $this->escapeAgainst(array_pop($words));
|
||||
$againstValue = count($words) ? '+' . $this->escapeAgainst(implode(' +', $words)) : '';
|
||||
$againstValue = trim("$againstValue +$lastWord*"); // 'partial*' match last word
|
||||
} else {
|
||||
array_shift($words); // skip first word since '*partial' match not possible with fulltext
|
||||
$againstValue = count($words) ? '+' . $this->escapeAgainst(implode(' +', $words)) : '';
|
||||
}
|
||||
}
|
||||
|
||||
if(strlen($againstValue)) {
|
||||
// use MATCH/AGAINST to pre-filter before RLIKE when possible
|
||||
$bindKey = $this->query->bindValueGetKey($againstValue);
|
||||
$match = $not ? 'NOT MATCH' : 'MATCH';
|
||||
$matchAgainst = "$match($tableField) AGAINST($bindKey)";
|
||||
$matchAgainst = "$match($tableField) AGAINST($bindKey IN BOOLEAN MODE)";
|
||||
$scoreField = $this->getScoreFieldName();
|
||||
$this->query->select("$matchAgainst AS $scoreField");
|
||||
$this->query->where($matchAgainst);
|
||||
@@ -570,7 +654,7 @@ class DatabaseQuerySelectFulltext extends Wire {
|
||||
$likeType = $not ? 'NOT RLIKE' : 'RLIKE';
|
||||
$likeValue = preg_quote($value);
|
||||
|
||||
if(strpos($this->operator, '^') !== false) {
|
||||
if($matchStart) {
|
||||
// starts with phrase, [optional non-visible html or whitespace] plus query text
|
||||
$likeValue = '^[[:space:]]*(<[^>]+>)*[[:space:]]*' . $likeValue;
|
||||
} else {
|
||||
@@ -608,23 +692,29 @@ class DatabaseQuerySelectFulltext extends Wire {
|
||||
* - `required` (bool): Are given words required in the query? (default=true)
|
||||
* - `partial` (bool): Is it okay to match a partial value? i.e. can "will" match "willy" (default=false)
|
||||
* - `partialLast` (bool): Use partial only for last word? (default=null, auto-detect)
|
||||
* - `partialLess` (bool): Weight partial match words less than full word match? (default=false)
|
||||
* - `phrase` (bool): Is entire $value a full phrase to match? (default=auto-detect)
|
||||
* - `useStopwords` (bool): Allow inclusion of stopwords? (default=null, auto-detect)
|
||||
* - `alternates` (bool): Get word alternates? (default=null, auto-detect)
|
||||
* @return string|array Value provided to the function with boolean operators added, or verbose array.
|
||||
*
|
||||
*/
|
||||
protected function getBooleanModeWords($value, array $options = array()) {
|
||||
|
||||
$expand = strpos($this->operator, '+') !== false;
|
||||
|
||||
$defaults = array(
|
||||
'required' => true,
|
||||
'partial' => false,
|
||||
'partialLast' => ($this->operator === '~~=' || $this->operator === '^='),
|
||||
'partialLess' => false,
|
||||
'useStopwords' => true,
|
||||
'alternates' => $expand,
|
||||
);
|
||||
|
||||
$options = array_merge($defaults, $options);
|
||||
$minWordLength = (int) $this->database->getVariable('ft_min_word_len');
|
||||
$value = $this->escapeAGAINST($value);
|
||||
$value = $this->escapeAgainst($value);
|
||||
$booleanValues = array();
|
||||
$partial = $options['partial'] ? '*' : '';
|
||||
$required = $options['required'] ? '+' : '';
|
||||
@@ -634,35 +724,55 @@ class DatabaseQuerySelectFulltext extends Wire {
|
||||
$stopWords = array();
|
||||
$shortWords = array();
|
||||
$likeWords = array();
|
||||
$altWords = array();
|
||||
|
||||
// get all words
|
||||
$words = $this->words($value);
|
||||
$allWords = $this->words($value);
|
||||
|
||||
if($options['partialLast']) {
|
||||
// treat last word separately (partial last word for live or starts-with searches)
|
||||
// only last word is partial
|
||||
$lastWord = end($words);
|
||||
$lastWord = end($allWords);
|
||||
$partial = '';
|
||||
}
|
||||
|
||||
// iterate through all words to build boolean query values
|
||||
foreach($words as $key => $word) {
|
||||
foreach($allWords as $key => $word) {
|
||||
|
||||
$length = strlen($word);
|
||||
if(!$length) continue;
|
||||
if(!$length || isset($booleanValues[$word])) continue;
|
||||
|
||||
if($this->isStopword($word)) {
|
||||
// handle stop-word
|
||||
$stopWords[$word] = $word;
|
||||
if($useStopwords) $booleanValues[$word] = $word . $partial;
|
||||
if($useStopwords) $booleanValues[$word] = "$word*";
|
||||
continue; // do nothing further with stopwords
|
||||
|
||||
} else if($length < $minWordLength) {
|
||||
// handle too-short word
|
||||
$booleanValues[$word] = $required . $word . $partial;
|
||||
$booleanValues[$word] = $required . "$word*";
|
||||
$shortWords[$word] = $word;
|
||||
continue; // do nothing further with short words
|
||||
|
||||
} else if($options['partialLess']) {
|
||||
// handle regular word and match full word (more weight), or partial word (less weight)
|
||||
$booleanValues[$word] = $required . "(>$word $word*)";
|
||||
$goodWords[$word] = $word;
|
||||
|
||||
} else {
|
||||
// handle regular word
|
||||
$booleanValues[$word] = $required . $word . $partial;
|
||||
$goodWords[$word] = $word;
|
||||
}
|
||||
|
||||
if($options['alternates']) {
|
||||
$booleanValue = $booleanValues[$word];
|
||||
$alternates = $this->getBooleanModeAlternateWords($word, $booleanValue, $minWordLength, $options);
|
||||
if($booleanValue !== $booleanValues[$word]) {
|
||||
$booleanValues[$word] = $booleanValue;
|
||||
$altWords = array_merge($altWords, $alternates);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(strlen($lastWord)) {
|
||||
@@ -689,20 +799,89 @@ class DatabaseQuerySelectFulltext extends Wire {
|
||||
}
|
||||
|
||||
return array(
|
||||
'value' => trim(implode(' ', $words)),
|
||||
'value' => trim(implode(' ', $allWords)),
|
||||
'booleanValue' => trim(implode(' ', $booleanValues)),
|
||||
'booleanWords' => $booleanValues,
|
||||
'likeWords' => $likeWords,
|
||||
'allWords' => $words,
|
||||
'allWords' => $allWords,
|
||||
'goodWords' => $goodWords,
|
||||
'badWords' => $badWords,
|
||||
'stopWords' => $stopWords,
|
||||
'shortWords' => $shortWords,
|
||||
'altWords' => $altWords,
|
||||
'lastWord' => $lastWord,
|
||||
'minWordLength' => $minWordLength,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper for getBooleanModeWords to handle population of alternate words in boolean value
|
||||
*
|
||||
* @param string $word Word to find alternates for
|
||||
* @param string &$booleanValue Existing boolean value which will be updated
|
||||
* @param int $minWordLength
|
||||
* @param array $options
|
||||
* @return array
|
||||
* @since 3.0.162
|
||||
*
|
||||
*/
|
||||
protected function getBooleanModeAlternateWords($word, &$booleanValue, $minWordLength, array $options) {
|
||||
|
||||
$required = strpos($booleanValue, '+') === 0 ? '+' : '';
|
||||
$alternateWords = $this->getWordAlternates($word);
|
||||
$rootWord = $this->getWordRoot($word);
|
||||
|
||||
if($rootWord) {
|
||||
if(!in_array($rootWord, $alternateWords)) {
|
||||
$alternateWords[] = $rootWord;
|
||||
} else {
|
||||
$rootWord = '';
|
||||
}
|
||||
}
|
||||
|
||||
$alternateWords = array_unique($alternateWords);
|
||||
|
||||
// prepare alternate words for inclusion in boolean value and remove any that aren’t indexable
|
||||
foreach($alternateWords as $key => $alternateWord) {
|
||||
$alternateWord = $this->escapeAgainst($alternateWord);
|
||||
$length = $this->strlen($alternateWord);
|
||||
|
||||
if($alternateWord === $rootWord && $length > 1) {
|
||||
// root word is always partial match. weight less if there are other alternates to match
|
||||
$less = count($alternateWords) > 1 && !empty($options['partialLess']) ? '<' : '';
|
||||
$alternateWords[$key] = $less . $alternateWord . '*';
|
||||
if($length >= $minWordLength && $length >= 3) $alternateWords[] = $less . $alternateWord;
|
||||
|
||||
} else if($length < $minWordLength || $this->isStopword($alternateWord)) {
|
||||
// alternate word not indexable, remove it
|
||||
unset($alternateWords[$key]);
|
||||
|
||||
} else {
|
||||
// replace with escaped version
|
||||
$alternateWords[$key] = $alternateWord;
|
||||
}
|
||||
}
|
||||
|
||||
if(!count($alternateWords)) return array();
|
||||
|
||||
// rebuild boolean value to include alternates: "+(word word)" or "+word" or ""
|
||||
if($required) $booleanValue = ltrim($booleanValue, '+');
|
||||
|
||||
// remove parens from boolean value, if present
|
||||
$booleanValue = trim($booleanValue, '()');
|
||||
|
||||
// assign higher weight to existing first word, if not already
|
||||
if($booleanValue && strpos($booleanValue, '>') !== 0) $booleanValue = ">$booleanValue";
|
||||
|
||||
// append alternate words
|
||||
$booleanValue = trim($booleanValue . ' ' . implode(' ', $alternateWords));
|
||||
|
||||
// package boolean value into parens and optional "+" prefix (indicating required)
|
||||
$booleanValue = "$required($booleanValue)";
|
||||
|
||||
return $alternateWords;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get boolean query value where "+" and "-" and "*" and '"' are allowed in query to affect results
|
||||
*
|
||||
@@ -802,9 +981,7 @@ class DatabaseQuerySelectFulltext extends Wire {
|
||||
*
|
||||
*/
|
||||
protected function isIndexableWord($word) {
|
||||
static $minWordLength = null;
|
||||
// note: ft_min_word_len is automatically changed to InnoDB’s equivalent when applicable
|
||||
if($minWordLength === null) $minWordLength = (int) $this->database->getVariable('ft_min_word_len');
|
||||
$minWordLength = $this->getMinWordLength();
|
||||
if($minWordLength && $this->strlen($word) < $minWordLength) return false;
|
||||
if($this->isStopword($word)) return false;
|
||||
return true;
|
||||
@@ -818,31 +995,50 @@ class DatabaseQuerySelectFulltext extends Wire {
|
||||
*
|
||||
*/
|
||||
protected function getScoreFieldName() {
|
||||
$n = 0;
|
||||
do {
|
||||
$scoreField = "_score_{$this->tableName}_{$this->fieldName}" . (++$n);
|
||||
// $locateField = "_locate_{$tableName}_{$fieldName}$n";
|
||||
} while(isset(self::$scoreFields[$scoreField]));
|
||||
self::$scoreFields[$scoreField] = 1;
|
||||
return $scoreField;
|
||||
$key = $this->tableName . '_' . $this->fieldName;
|
||||
self::$scoreCnts[$key] = isset(self::$scoreCnts[$key]) ? self::$scoreCnts[$key] + 1 : 0;
|
||||
return '_score_' . $key . self::$scoreCnts[$key];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get minimum allowed indexable word length
|
||||
*
|
||||
* @return int
|
||||
*
|
||||
*/
|
||||
protected function getMinWordLength() {
|
||||
// note: ft_min_word_len is automatically changed to InnoDB’s equivalent when applicable
|
||||
if($this->minWordLength !== null) return $this->minWordLength;
|
||||
$this->minWordLength = (int) $this->database->getVariable('ft_min_word_len');
|
||||
return $this->minWordLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get other variations of given word to search (such as plural, singular, etc.)
|
||||
*
|
||||
* Method is currently used by the **+= operator but will be also added
|
||||
* to ~~= and ~+= operators shortly.
|
||||
*
|
||||
* This method is for hooks to implement.
|
||||
*
|
||||
* #pw-hooker
|
||||
* Get other variations of given word to search (such as plural, singular, lemmas, etc.)
|
||||
*
|
||||
* @param string $word
|
||||
* @param int|null $minLength Minimum length for returned words
|
||||
* @return array
|
||||
*
|
||||
*/
|
||||
public function ___getWordAlternates($word) {
|
||||
protected function getWordAlternates($word, $minLength = null) {
|
||||
if($minLength === null) $minLength = $this->getMinWordLength();
|
||||
return $this->wire()->sanitizer->getTextTools()->getWordAlternates($word, array(
|
||||
'operator' => $this->operator,
|
||||
'lowercase' => true,
|
||||
'minLength' => $minLength,
|
||||
));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get root of word (currently not implemented)
|
||||
*
|
||||
* @param string $word
|
||||
* @return string
|
||||
*
|
||||
*/
|
||||
protected function getWordRoot($word) {
|
||||
if($word) {}
|
||||
return array();
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user