mirror of
https://github.com/phpbb/phpbb.git
synced 2025-05-21 14:59:46 +02:00
2031 lines
57 KiB
PHP
2031 lines
57 KiB
PHP
<?php
|
||
/**
|
||
*
|
||
* This file is part of the phpBB Forum Software package.
|
||
*
|
||
* @copyright (c) phpBB Limited <https://www.phpbb.com>
|
||
* @license GNU General Public License, version 2 (GPL-2.0)
|
||
*
|
||
* For full copyright and license information, please see
|
||
* the docs/CREDITS.txt file.
|
||
*
|
||
*/
|
||
|
||
namespace phpbb\search\backend;
|
||
|
||
use phpbb\config\config;
|
||
use phpbb\db\driver\driver_interface;
|
||
use phpbb\event\dispatcher_interface;
|
||
use phpbb\language\language;
|
||
use phpbb\user;
|
||
|
||
/**
|
||
* phpBB's own db driven fulltext search, version 2
|
||
*/
|
||
class fulltext_native extends base implements search_backend_interface
|
||
{
|
||
protected const UTF8_HANGUL_FIRST = "\xEA\xB0\x80";
|
||
protected const UTF8_HANGUL_LAST = "\xED\x9E\xA3";
|
||
protected const UTF8_CJK_FIRST = "\xE4\xB8\x80";
|
||
protected const UTF8_CJK_LAST = "\xE9\xBE\xBB";
|
||
protected const UTF8_CJK_B_FIRST = "\xF0\xA0\x80\x80";
|
||
protected const UTF8_CJK_B_LAST = "\xF0\xAA\x9B\x96";
|
||
|
||
/**
|
||
* Associative array holding index stats
|
||
* @var array
|
||
*/
|
||
protected $stats = array();
|
||
|
||
/**
|
||
* Associative array stores the min and max word length to be searched
|
||
* @var array
|
||
*/
|
||
protected $word_length = array();
|
||
|
||
/**
|
||
* Contains tidied search query.
|
||
* Operators are prefixed in search query and common words excluded
|
||
* @var string
|
||
*/
|
||
protected $search_query = '';
|
||
|
||
/**
|
||
* Contains common words.
|
||
* Common words are words with length less/more than min/max length
|
||
* @var array
|
||
*/
|
||
protected $common_words = array();
|
||
|
||
/**
|
||
* Post ids of posts containing words that are to be included
|
||
* @var array
|
||
*/
|
||
protected $must_contain_ids = array();
|
||
|
||
/**
|
||
* Post ids of posts containing words that should not be included
|
||
* @var array
|
||
*/
|
||
protected $must_not_contain_ids = array();
|
||
|
||
/**
|
||
* Post ids of posts containing at least one word that needs to be excluded
|
||
* @var array
|
||
*/
|
||
protected $must_exclude_one_ids = array();
|
||
|
||
/**
|
||
* Relative path to board root
|
||
* @var string
|
||
*/
|
||
protected $phpbb_root_path;
|
||
|
||
/**
|
||
* PHP Extension
|
||
* @var string
|
||
*/
|
||
protected $php_ext;
|
||
|
||
/**
|
||
* phpBB event dispatcher object
|
||
* @var dispatcher_interface
|
||
*/
|
||
protected $phpbb_dispatcher;
|
||
|
||
/**
|
||
* @var language
|
||
*/
|
||
protected $language;
|
||
|
||
/**
|
||
* Initialises the fulltext_native search backend with min/max word length
|
||
*
|
||
* @param config $config Config object
|
||
* @param driver_interface $db Database object
|
||
* @param dispatcher_interface $phpbb_dispatcher Event dispatcher object
|
||
* @param language $language
|
||
* @param user $user User object
|
||
* @param string $phpbb_root_path phpBB root path
|
||
* @param string $phpEx PHP file extension
|
||
*/
|
||
public function __construct(config $config, driver_interface $db, dispatcher_interface $phpbb_dispatcher, language $language, user $user, string $phpbb_root_path, string $phpEx)
|
||
{
|
||
global $cache;
|
||
|
||
parent::__construct($cache, $config, $db, $user);
|
||
$this->phpbb_dispatcher = $phpbb_dispatcher;
|
||
$this->language = $language;
|
||
|
||
$this->phpbb_root_path = $phpbb_root_path;
|
||
$this->php_ext = $phpEx;
|
||
|
||
$this->word_length = array('min' => (int) $this->config['fulltext_native_min_chars'], 'max' => (int) $this->config['fulltext_native_max_chars']);
|
||
|
||
/**
|
||
* Load the UTF tools
|
||
*/
|
||
if (!function_exists('utf8_decode_ncr'))
|
||
{
|
||
include($this->phpbb_root_path . 'includes/utf/utf_tools.' . $this->php_ext);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* {@inheritdoc}
|
||
*/
|
||
public function get_name(): string
|
||
{
|
||
return 'phpBB Native Fulltext';
|
||
}
|
||
|
||
/**
|
||
* {@inheritdoc}
|
||
*/
|
||
public function is_available(): bool
|
||
{
|
||
return true;
|
||
}
|
||
|
||
/**
|
||
* {@inheritdoc}
|
||
*/
|
||
public function init()
|
||
{
|
||
return false;
|
||
}
|
||
|
||
/**
|
||
* {@inheritdoc}
|
||
*/
|
||
public function get_search_query(): string
|
||
{
|
||
return $this->search_query;
|
||
}
|
||
|
||
/**
|
||
* {@inheritdoc}
|
||
*/
|
||
public function get_common_words(): array
|
||
{
|
||
return $this->common_words;
|
||
}
|
||
|
||
/**
|
||
* {@inheritdoc}
|
||
*/
|
||
public function get_word_length()
|
||
{
|
||
return $this->word_length;
|
||
}
|
||
|
||
/**
|
||
* {@inheritdoc}
|
||
*/
|
||
public function split_keywords(string &$keywords, string $terms): bool
|
||
{
|
||
$tokens = '+-|()* ';
|
||
|
||
$keywords = trim($this->cleanup($keywords, $tokens));
|
||
|
||
// allow word|word|word without brackets
|
||
if ((strpos($keywords, ' ') === false) && (strpos($keywords, '|') !== false) && (strpos($keywords, '(') === false))
|
||
{
|
||
$keywords = '(' . $keywords . ')';
|
||
}
|
||
|
||
$open_bracket = $space = false;
|
||
for ($i = 0, $n = strlen($keywords); $i < $n; $i++)
|
||
{
|
||
if ($open_bracket !== false)
|
||
{
|
||
switch ($keywords[$i])
|
||
{
|
||
case ')':
|
||
if ($open_bracket + 1 == $i)
|
||
{
|
||
$keywords[$i - 1] = '|';
|
||
$keywords[$i] = '|';
|
||
}
|
||
$open_bracket = false;
|
||
break;
|
||
case '(':
|
||
$keywords[$i] = '|';
|
||
break;
|
||
case '+':
|
||
case '-':
|
||
case ' ':
|
||
$keywords[$i] = '|';
|
||
break;
|
||
case '*':
|
||
// $i can never be 0 here since $open_bracket is initialised to false
|
||
if (strpos($tokens, $keywords[$i - 1]) !== false && ($i + 1 === $n || strpos($tokens, $keywords[$i + 1]) !== false))
|
||
{
|
||
$keywords[$i] = '|';
|
||
}
|
||
break;
|
||
}
|
||
}
|
||
else
|
||
{
|
||
switch ($keywords[$i])
|
||
{
|
||
case ')':
|
||
$keywords[$i] = ' ';
|
||
break;
|
||
case '(':
|
||
$open_bracket = $i;
|
||
$space = false;
|
||
break;
|
||
case '|':
|
||
$keywords[$i] = ' ';
|
||
break;
|
||
case '-':
|
||
case '+':
|
||
$space = $keywords[$i];
|
||
break;
|
||
case ' ':
|
||
if ($space !== false)
|
||
{
|
||
$keywords[$i] = $space;
|
||
}
|
||
break;
|
||
default:
|
||
$space = false;
|
||
}
|
||
}
|
||
}
|
||
|
||
if ($open_bracket !== false)
|
||
{
|
||
$keywords .= ')';
|
||
}
|
||
|
||
$match = array(
|
||
'# +#',
|
||
'#\|\|+#',
|
||
'#(\+|\-)(?:\+|\-)+#',
|
||
'#\(\|#',
|
||
'#\|\)#',
|
||
);
|
||
$replace = array(
|
||
' ',
|
||
'|',
|
||
'$1',
|
||
'(',
|
||
')',
|
||
);
|
||
|
||
$keywords = preg_replace($match, $replace, $keywords);
|
||
$num_keywords = count(explode(' ', $keywords));
|
||
|
||
// We limit the number of allowed keywords to minimize load on the database
|
||
if ($this->config['max_num_search_keywords'] && $num_keywords > $this->config['max_num_search_keywords'])
|
||
{
|
||
trigger_error($this->language->lang('MAX_NUM_SEARCH_KEYWORDS_REFINE', (int) $this->config['max_num_search_keywords'], $num_keywords));
|
||
}
|
||
|
||
// $keywords input format: each word separated by a space, words in a bracket are not separated
|
||
|
||
// the user wants to search for any word, convert the search query
|
||
if ($terms == 'any')
|
||
{
|
||
$words = array();
|
||
|
||
preg_match_all('#([^\\s+\\-|()]+)(?:$|[\\s+\\-|()])#u', $keywords, $words);
|
||
if (count($words[1]))
|
||
{
|
||
$keywords = '(' . implode('|', $words[1]) . ')';
|
||
}
|
||
}
|
||
|
||
// Remove non trailing wildcards from each word to prevent a full table scan (it's now using the database index)
|
||
$match = '#\*(?!$|\s)#';
|
||
$replace = '$1';
|
||
$keywords = preg_replace($match, $replace, $keywords);
|
||
|
||
// Only allow one wildcard in the search query to limit the database load
|
||
$match = '#\*#';
|
||
$replace = '$1';
|
||
$count_wildcards = substr_count($keywords, '*');
|
||
|
||
// Reverse the string to remove all wildcards except the first one
|
||
$keywords = strrev(preg_replace($match, $replace, strrev($keywords), $count_wildcards - 1));
|
||
unset($count_wildcards);
|
||
|
||
// set the search_query which is shown to the user
|
||
$this->search_query = $keywords;
|
||
|
||
$exact_words = array();
|
||
preg_match_all('#([^\\s+\\-|()]+)(?:$|[\\s+\\-|()])#u', $keywords, $exact_words);
|
||
$exact_words = $exact_words[1];
|
||
|
||
$common_ids = $words = array();
|
||
|
||
if (count($exact_words))
|
||
{
|
||
$sql = 'SELECT word_id, word_text, word_common
|
||
FROM ' . SEARCH_WORDLIST_TABLE . '
|
||
WHERE ' . $this->db->sql_in_set('word_text', $exact_words) . '
|
||
ORDER BY word_count ASC';
|
||
$result = $this->db->sql_query($sql);
|
||
|
||
// store an array of words and ids, remove common words
|
||
while ($row = $this->db->sql_fetchrow($result))
|
||
{
|
||
if ($row['word_common'])
|
||
{
|
||
$this->common_words[] = $row['word_text'];
|
||
$common_ids[$row['word_text']] = (int) $row['word_id'];
|
||
continue;
|
||
}
|
||
|
||
$words[$row['word_text']] = (int) $row['word_id'];
|
||
}
|
||
$this->db->sql_freeresult($result);
|
||
}
|
||
|
||
// Handle +, - without preceding whitespace character
|
||
$match = array('#(\S)\+#', '#(\S)-#');
|
||
$replace = array('$1 +', '$1 +');
|
||
|
||
$keywords = preg_replace($match, $replace, $keywords);
|
||
|
||
// now analyse the search query, first split it using the spaces
|
||
$query = explode(' ', $keywords);
|
||
|
||
$this->must_contain_ids = array();
|
||
$this->must_not_contain_ids = array();
|
||
$this->must_exclude_one_ids = array();
|
||
|
||
foreach ($query as $word)
|
||
{
|
||
if (empty($word))
|
||
{
|
||
continue;
|
||
}
|
||
|
||
// words which should not be included
|
||
if ($word[0] == '-')
|
||
{
|
||
$word = substr($word, 1);
|
||
|
||
// a group of which at least one may not be in the resulting posts
|
||
if ($word[0] == '(')
|
||
{
|
||
$word = array_unique(explode('|', substr($word, 1, -1)));
|
||
$mode = 'must_exclude_one';
|
||
}
|
||
// one word which should not be in the resulting posts
|
||
else
|
||
{
|
||
$mode = 'must_not_contain';
|
||
}
|
||
$ignore_no_id = true;
|
||
}
|
||
// words which have to be included
|
||
else
|
||
{
|
||
// no prefix is the same as a +prefix
|
||
if ($word[0] == '+')
|
||
{
|
||
$word = substr($word, 1);
|
||
}
|
||
|
||
// a group of words of which at least one word should be in every resulting post
|
||
if (isset($word[0]) && $word[0] == '(')
|
||
{
|
||
$word = array_unique(explode('|', substr($word, 1, -1)));
|
||
}
|
||
$ignore_no_id = false;
|
||
$mode = 'must_contain';
|
||
}
|
||
|
||
if (empty($word))
|
||
{
|
||
continue;
|
||
}
|
||
|
||
// if this is an array of words then retrieve an id for each
|
||
if (is_array($word))
|
||
{
|
||
$non_common_words = array();
|
||
$id_words = array();
|
||
foreach ($word as $i => $word_part)
|
||
{
|
||
if (strpos($word_part, '*') !== false)
|
||
{
|
||
$len = utf8_strlen(str_replace('*', '', $word_part));
|
||
if ($len >= $this->word_length['min'] && $len <= $this->word_length['max'])
|
||
{
|
||
$id_words[] = '\'' . $this->db->sql_escape(str_replace('*', '%', $word_part)) . '\'';
|
||
$non_common_words[] = $word_part;
|
||
}
|
||
else
|
||
{
|
||
$this->common_words[] = $word_part;
|
||
}
|
||
}
|
||
else if (isset($words[$word_part]))
|
||
{
|
||
$id_words[] = $words[$word_part];
|
||
$non_common_words[] = $word_part;
|
||
}
|
||
else
|
||
{
|
||
$len = utf8_strlen($word_part);
|
||
if ($len < $this->word_length['min'] || $len > $this->word_length['max'])
|
||
{
|
||
$this->common_words[] = $word_part;
|
||
}
|
||
}
|
||
}
|
||
if (count($id_words))
|
||
{
|
||
sort($id_words);
|
||
if (count($id_words) > 1)
|
||
{
|
||
$this->{$mode . '_ids'}[] = $id_words;
|
||
}
|
||
else
|
||
{
|
||
$mode = ($mode == 'must_exclude_one') ? 'must_not_contain' : $mode;
|
||
$this->{$mode . '_ids'}[] = $id_words[0];
|
||
}
|
||
}
|
||
// throw an error if we shall not ignore unexistant words
|
||
else if (!$ignore_no_id && count($non_common_words))
|
||
{
|
||
trigger_error(sprintf($this->language->lang('WORDS_IN_NO_POST'), implode($this->language->lang('COMMA_SEPARATOR'), $non_common_words)));
|
||
}
|
||
unset($non_common_words);
|
||
}
|
||
// else we only need one id
|
||
else if (($wildcard = strpos($word, '*') !== false) || isset($words[$word]))
|
||
{
|
||
if ($wildcard)
|
||
{
|
||
$len = utf8_strlen(str_replace('*', '', $word));
|
||
if ($len >= $this->word_length['min'] && $len <= $this->word_length['max'])
|
||
{
|
||
$this->{$mode . '_ids'}[] = '\'' . $this->db->sql_escape(str_replace('*', '%', $word)) . '\'';
|
||
}
|
||
else
|
||
{
|
||
$this->common_words[] = $word;
|
||
}
|
||
}
|
||
else
|
||
{
|
||
$this->{$mode . '_ids'}[] = $words[$word];
|
||
}
|
||
}
|
||
else
|
||
{
|
||
if (!isset($common_ids[$word]))
|
||
{
|
||
$len = utf8_strlen($word);
|
||
if ($len < $this->word_length['min'] || $len > $this->word_length['max'])
|
||
{
|
||
$this->common_words[] = $word;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Return true if all words are not common words
|
||
if (count($exact_words) - count($this->common_words) > 0)
|
||
{
|
||
return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
/**
|
||
* {@inheritdoc}
|
||
*/
|
||
public function keyword_search(string $type, string $fields, string $terms, array $sort_by_sql, string $sort_key, string $sort_dir, string $sort_days, array $ex_fid_ary, string $post_visibility, int $topic_id, array $author_ary, string $author_name, array &$id_ary, int &$start, int $per_page)
|
||
{
|
||
// No keywords? No posts.
|
||
if (empty($this->search_query))
|
||
{
|
||
return false;
|
||
}
|
||
|
||
// we can't search for negatives only
|
||
if (empty($this->must_contain_ids))
|
||
{
|
||
return false;
|
||
}
|
||
|
||
$must_contain_ids = $this->must_contain_ids;
|
||
$must_not_contain_ids = $this->must_not_contain_ids;
|
||
$must_exclude_one_ids = $this->must_exclude_one_ids;
|
||
|
||
sort($must_contain_ids);
|
||
sort($must_not_contain_ids);
|
||
sort($must_exclude_one_ids);
|
||
|
||
// generate a search_key from all the options to identify the results
|
||
$search_key_array = array(
|
||
serialize($must_contain_ids),
|
||
serialize($must_not_contain_ids),
|
||
serialize($must_exclude_one_ids),
|
||
$type,
|
||
$fields,
|
||
$terms,
|
||
$sort_days,
|
||
$sort_key,
|
||
$topic_id,
|
||
implode(',', $ex_fid_ary),
|
||
$post_visibility,
|
||
implode(',', $author_ary),
|
||
$author_name,
|
||
);
|
||
|
||
/**
|
||
* Allow changing the search_key for cached results
|
||
*
|
||
* @event core.search_native_by_keyword_modify_search_key
|
||
* @var array search_key_array Array with search parameters to generate the search_key
|
||
* @var array must_contain_ids Array with post ids of posts containing words that are to be included
|
||
* @var array must_not_contain_ids Array with post ids of posts containing words that should not be included
|
||
* @var array must_exclude_one_ids Array with post ids of posts containing at least one word that needs to be excluded
|
||
* @var string type Searching type ('posts', 'topics')
|
||
* @var string fields Searching fields ('titleonly', 'msgonly', 'firstpost', 'all')
|
||
* @var string terms Searching terms ('all', 'any')
|
||
* @var int sort_days Time, in days, of the oldest possible post to list
|
||
* @var string sort_key The sort type used from the possible sort types
|
||
* @var int topic_id Limit the search to this topic_id only
|
||
* @var array ex_fid_ary Which forums not to search on
|
||
* @var string post_visibility Post visibility data
|
||
* @var array author_ary Array of user_id containing the users to filter the results to
|
||
* @since 3.1.7-RC1
|
||
*/
|
||
$vars = array(
|
||
'search_key_array',
|
||
'must_contain_ids',
|
||
'must_not_contain_ids',
|
||
'must_exclude_one_ids',
|
||
'type',
|
||
'fields',
|
||
'terms',
|
||
'sort_days',
|
||
'sort_key',
|
||
'topic_id',
|
||
'ex_fid_ary',
|
||
'post_visibility',
|
||
'author_ary',
|
||
);
|
||
extract($this->phpbb_dispatcher->trigger_event('core.search_native_by_keyword_modify_search_key', compact($vars)));
|
||
|
||
$search_key = md5(implode('#', $search_key_array));
|
||
|
||
// try reading the results from cache
|
||
$total_results = 0;
|
||
if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == self::SEARCH_RESULT_IN_CACHE)
|
||
{
|
||
return $total_results;
|
||
}
|
||
|
||
$id_ary = array();
|
||
|
||
$sql_where = array();
|
||
$m_num = 0;
|
||
$w_num = 0;
|
||
|
||
$sql_array = array(
|
||
'SELECT' => ($type == 'posts') ? 'p.post_id' : 'p.topic_id',
|
||
'FROM' => array(
|
||
SEARCH_WORDMATCH_TABLE => array(),
|
||
SEARCH_WORDLIST_TABLE => array(),
|
||
),
|
||
'LEFT_JOIN' => array(array(
|
||
'FROM' => array(POSTS_TABLE => 'p'),
|
||
'ON' => 'm0.post_id = p.post_id',
|
||
)),
|
||
);
|
||
|
||
$title_match = '';
|
||
$left_join_topics = false;
|
||
$group_by = true;
|
||
// Build some display specific sql strings
|
||
switch ($fields)
|
||
{
|
||
case 'titleonly':
|
||
$title_match = 'title_match = 1';
|
||
$group_by = false;
|
||
// no break
|
||
case 'firstpost':
|
||
$left_join_topics = true;
|
||
$sql_where[] = 'p.post_id = t.topic_first_post_id';
|
||
break;
|
||
|
||
case 'msgonly':
|
||
$title_match = 'title_match = 0';
|
||
$group_by = false;
|
||
break;
|
||
}
|
||
|
||
if ($type == 'topics')
|
||
{
|
||
$left_join_topics = true;
|
||
$group_by = true;
|
||
}
|
||
|
||
/**
|
||
* @todo Add a query optimizer (handle stuff like "+(4|3) +4")
|
||
*/
|
||
|
||
foreach ($this->must_contain_ids as $subquery)
|
||
{
|
||
if (is_array($subquery))
|
||
{
|
||
$group_by = true;
|
||
|
||
$word_id_sql = array();
|
||
$word_ids = array();
|
||
foreach ($subquery as $id)
|
||
{
|
||
if (is_string($id))
|
||
{
|
||
$sql_array['LEFT_JOIN'][] = array(
|
||
'FROM' => array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
|
||
'ON' => "w$w_num.word_text LIKE $id"
|
||
);
|
||
$word_ids[] = "w$w_num.word_id";
|
||
|
||
$w_num++;
|
||
}
|
||
else
|
||
{
|
||
$word_ids[] = $id;
|
||
}
|
||
}
|
||
|
||
$sql_where[] = $this->db->sql_in_set("m$m_num.word_id", $word_ids);
|
||
|
||
unset($word_id_sql);
|
||
unset($word_ids);
|
||
}
|
||
else if (is_string($subquery))
|
||
{
|
||
$sql_array['FROM'][SEARCH_WORDLIST_TABLE][] = 'w' . $w_num;
|
||
|
||
$sql_where[] = "w$w_num.word_text LIKE $subquery";
|
||
$sql_where[] = "m$m_num.word_id = w$w_num.word_id";
|
||
|
||
$group_by = true;
|
||
$w_num++;
|
||
}
|
||
else
|
||
{
|
||
$sql_where[] = "m$m_num.word_id = $subquery";
|
||
}
|
||
|
||
$sql_array['FROM'][SEARCH_WORDMATCH_TABLE][] = 'm' . $m_num;
|
||
|
||
if ($title_match)
|
||
{
|
||
$sql_where[] = "m$m_num.$title_match";
|
||
}
|
||
|
||
if ($m_num != 0)
|
||
{
|
||
$sql_where[] = "m$m_num.post_id = m0.post_id";
|
||
}
|
||
$m_num++;
|
||
}
|
||
|
||
foreach ($this->must_not_contain_ids as $key => $subquery)
|
||
{
|
||
if (is_string($subquery))
|
||
{
|
||
$sql_array['LEFT_JOIN'][] = array(
|
||
'FROM' => array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
|
||
'ON' => "w$w_num.word_text LIKE $subquery"
|
||
);
|
||
|
||
$this->must_not_contain_ids[$key] = "w$w_num.word_id";
|
||
|
||
$group_by = true;
|
||
$w_num++;
|
||
}
|
||
}
|
||
|
||
if (count($this->must_not_contain_ids))
|
||
{
|
||
$sql_array['LEFT_JOIN'][] = array(
|
||
'FROM' => array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num),
|
||
'ON' => $this->db->sql_in_set("m$m_num.word_id", $this->must_not_contain_ids) . (($title_match) ? " AND m$m_num.$title_match" : '') . " AND m$m_num.post_id = m0.post_id"
|
||
);
|
||
|
||
$sql_where[] = "m$m_num.word_id IS NULL";
|
||
$m_num++;
|
||
}
|
||
|
||
foreach ($this->must_exclude_one_ids as $ids)
|
||
{
|
||
$is_null_joins = array();
|
||
foreach ($ids as $id)
|
||
{
|
||
if (is_string($id))
|
||
{
|
||
$sql_array['LEFT_JOIN'][] = array(
|
||
'FROM' => array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
|
||
'ON' => "w$w_num.word_text LIKE $id"
|
||
);
|
||
$id = "w$w_num.word_id";
|
||
|
||
$group_by = true;
|
||
$w_num++;
|
||
}
|
||
|
||
$sql_array['LEFT_JOIN'][] = array(
|
||
'FROM' => array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num),
|
||
'ON' => "m$m_num.word_id = $id AND m$m_num.post_id = m0.post_id" . (($title_match) ? " AND m$m_num.$title_match" : '')
|
||
);
|
||
$is_null_joins[] = "m$m_num.word_id IS NULL";
|
||
|
||
$m_num++;
|
||
}
|
||
$sql_where[] = '(' . implode(' OR ', $is_null_joins) . ')';
|
||
}
|
||
|
||
$sql_where[] = $post_visibility;
|
||
|
||
$search_query = $this->search_query;
|
||
$must_exclude_one_ids = $this->must_exclude_one_ids;
|
||
$must_not_contain_ids = $this->must_not_contain_ids;
|
||
$must_contain_ids = $this->must_contain_ids;
|
||
|
||
$sql_sort_table = $sql_sort_join = $sql_match = $sql_match_where = $sql_sort = '';
|
||
|
||
/**
|
||
* Allow changing the query used for counting for posts using fulltext_native
|
||
*
|
||
* @event core.search_native_keywords_count_query_before
|
||
* @var string search_query The parsed keywords used for this search
|
||
* @var array must_not_contain_ids Ids that cannot be taken into account for the results
|
||
* @var array must_exclude_one_ids Ids that cannot be on the results
|
||
* @var array must_contain_ids Ids that must be on the results
|
||
* @var int total_results The previous result count for the format of the query
|
||
* Set to 0 to force a re-count
|
||
* @var array sql_array The data on how to search in the DB at this point
|
||
* @var bool left_join_topics Whether or not TOPICS_TABLE should be CROSS JOIN'ED
|
||
* @var array author_ary Array of user_id containing the users to filter the results to
|
||
* @var string author_name An extra username to search on (!empty(author_ary) must be true, to be relevant)
|
||
* @var array ex_fid_ary Which forums not to search on
|
||
* @var int topic_id Limit the search to this topic_id only
|
||
* @var string sql_sort_table Extra tables to include in the SQL query.
|
||
* Used in conjunction with sql_sort_join
|
||
* @var string sql_sort_join SQL conditions to join all the tables used together.
|
||
* Used in conjunction with sql_sort_table
|
||
* @var int sort_days Time, in days, of the oldest possible post to list
|
||
* @var string sql_where An array of the current WHERE clause conditions
|
||
* @var string sql_match Which columns to do the search on
|
||
* @var string sql_match_where Extra conditions to use to properly filter the matching process
|
||
* @var bool group_by Whether or not the SQL query requires a GROUP BY for the elements in the SELECT clause
|
||
* @var string sort_by_sql The possible predefined sort types
|
||
* @var string sort_key The sort type used from the possible sort types
|
||
* @var string sort_dir "a" for ASC or "d" dor DESC for the sort order used
|
||
* @var string sql_sort The result SQL when processing sort_by_sql + sort_key + sort_dir
|
||
* @var int start How many posts to skip in the search results (used for pagination)
|
||
* @since 3.1.5-RC1
|
||
*/
|
||
$vars = array(
|
||
'search_query',
|
||
'must_not_contain_ids',
|
||
'must_exclude_one_ids',
|
||
'must_contain_ids',
|
||
'total_results',
|
||
'sql_array',
|
||
'left_join_topics',
|
||
'author_ary',
|
||
'author_name',
|
||
'ex_fid_ary',
|
||
'topic_id',
|
||
'sql_sort_table',
|
||
'sql_sort_join',
|
||
'sort_days',
|
||
'sql_where',
|
||
'sql_match',
|
||
'sql_match_where',
|
||
'group_by',
|
||
'sort_by_sql',
|
||
'sort_key',
|
||
'sort_dir',
|
||
'sql_sort',
|
||
'start',
|
||
);
|
||
extract($this->phpbb_dispatcher->trigger_event('core.search_native_keywords_count_query_before', compact($vars)));
|
||
|
||
if ($topic_id)
|
||
{
|
||
$sql_where[] = 'p.topic_id = ' . $topic_id;
|
||
}
|
||
|
||
if (count($author_ary))
|
||
{
|
||
if ($author_name)
|
||
{
|
||
// first one matches post of registered users, second one guests and deleted users
|
||
$sql_author = '(' . $this->db->sql_in_set('p.poster_id', array_diff($author_ary, array(ANONYMOUS)), false, true) . ' OR p.post_username ' . $author_name . ')';
|
||
}
|
||
else
|
||
{
|
||
$sql_author = $this->db->sql_in_set('p.poster_id', $author_ary);
|
||
}
|
||
$sql_where[] = $sql_author;
|
||
}
|
||
|
||
if (count($ex_fid_ary))
|
||
{
|
||
$sql_where[] = $this->db->sql_in_set('p.forum_id', $ex_fid_ary, true);
|
||
}
|
||
|
||
if ($sort_days)
|
||
{
|
||
$sql_where[] = 'p.post_time >= ' . (time() - ($sort_days * 86400));
|
||
}
|
||
|
||
$sql_array['WHERE'] = implode(' AND ', $sql_where);
|
||
|
||
$is_mysql = false;
|
||
// if the total result count is not cached yet, retrieve it from the db
|
||
if (!$total_results)
|
||
{
|
||
$sql = '';
|
||
$sql_array_count = $sql_array;
|
||
|
||
if ($left_join_topics)
|
||
{
|
||
$sql_array_count['LEFT_JOIN'][] = array(
|
||
'FROM' => array(TOPICS_TABLE => 't'),
|
||
'ON' => 'p.topic_id = t.topic_id'
|
||
);
|
||
}
|
||
|
||
switch ($this->db->get_sql_layer())
|
||
{
|
||
case 'mysqli':
|
||
|
||
// 3.x does not support SQL_CALC_FOUND_ROWS
|
||
// $sql_array['SELECT'] = 'SQL_CALC_FOUND_ROWS ' . $sql_array['SELECT'];
|
||
$is_mysql = true;
|
||
|
||
break;
|
||
|
||
case 'sqlite3':
|
||
$sql_array_count['SELECT'] = ($type == 'posts') ? 'DISTINCT p.post_id' : 'DISTINCT p.topic_id';
|
||
$sql = 'SELECT COUNT(' . (($type == 'posts') ? 'post_id' : 'topic_id') . ') as total_results
|
||
FROM (' . $this->db->sql_build_query('SELECT', $sql_array_count) . ')';
|
||
|
||
// no break
|
||
|
||
default:
|
||
$sql_array_count['SELECT'] = ($type == 'posts') ? 'COUNT(DISTINCT p.post_id) AS total_results' : 'COUNT(DISTINCT p.topic_id) AS total_results';
|
||
$sql = (!$sql) ? $this->db->sql_build_query('SELECT', $sql_array_count) : $sql;
|
||
|
||
$result = $this->db->sql_query($sql);
|
||
$total_results = (int) $this->db->sql_fetchfield('total_results');
|
||
$this->db->sql_freeresult($result);
|
||
|
||
if (!$total_results)
|
||
{
|
||
return false;
|
||
}
|
||
break;
|
||
}
|
||
|
||
unset($sql_array_count, $sql);
|
||
}
|
||
|
||
// Build sql strings for sorting
|
||
$sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
|
||
|
||
switch ($sql_sort[0])
|
||
{
|
||
case 'u':
|
||
$sql_array['FROM'][USERS_TABLE] = 'u';
|
||
$sql_where[] = 'u.user_id = p.poster_id ';
|
||
break;
|
||
|
||
case 't':
|
||
$left_join_topics = true;
|
||
break;
|
||
|
||
case 'f':
|
||
$sql_array['FROM'][FORUMS_TABLE] = 'f';
|
||
$sql_where[] = 'f.forum_id = p.forum_id';
|
||
break;
|
||
}
|
||
|
||
if ($left_join_topics)
|
||
{
|
||
$sql_array['LEFT_JOIN'][] = array(
|
||
'FROM' => array(TOPICS_TABLE => 't'),
|
||
'ON' => 'p.topic_id = t.topic_id'
|
||
);
|
||
}
|
||
|
||
// if using mysql and the total result count is not calculated yet, get it from the db
|
||
if (!$total_results && $is_mysql)
|
||
{
|
||
// Also count rows for the query as if there was not LIMIT. Add SQL_CALC_FOUND_ROWS to SQL
|
||
$sql_array['SELECT'] = 'SQL_CALC_FOUND_ROWS ' . $sql_array['SELECT'];
|
||
}
|
||
|
||
$sql_array['WHERE'] = implode(' AND ', $sql_where);
|
||
$sql_array['GROUP_BY'] = ($group_by) ? (($type == 'posts') ? 'p.post_id' : 'p.topic_id') . ', ' . $sort_by_sql[$sort_key] : '';
|
||
$sql_array['ORDER_BY'] = $sql_sort;
|
||
|
||
unset($sql_where, $sql_sort, $group_by);
|
||
|
||
$sql = $this->db->sql_build_query('SELECT', $sql_array);
|
||
$result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start);
|
||
|
||
while ($row = $this->db->sql_fetchrow($result))
|
||
{
|
||
$id_ary[] = (int) $row[(($type == 'posts') ? 'post_id' : 'topic_id')];
|
||
}
|
||
$this->db->sql_freeresult($result);
|
||
|
||
if (!$total_results && $is_mysql)
|
||
{
|
||
// Get the number of results as calculated by MySQL
|
||
$sql_count = 'SELECT FOUND_ROWS() as total_results';
|
||
$result = $this->db->sql_query($sql_count);
|
||
$total_results = (int) $this->db->sql_fetchfield('total_results');
|
||
$this->db->sql_freeresult($result);
|
||
|
||
if (!$total_results)
|
||
{
|
||
return false;
|
||
}
|
||
}
|
||
|
||
if ($start >= $total_results)
|
||
{
|
||
$start = floor(($total_results - 1) / $per_page) * $per_page;
|
||
|
||
$result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start);
|
||
|
||
while ($row = $this->db->sql_fetchrow($result))
|
||
{
|
||
$id_ary[] = (int) $row[(($type == 'posts') ? 'post_id' : 'topic_id')];
|
||
}
|
||
$this->db->sql_freeresult($result);
|
||
|
||
}
|
||
|
||
// store the ids, from start on then delete anything that isn't on the current page because we only need ids for one page
|
||
$this->save_ids($search_key, $this->search_query, $author_ary, $total_results, $id_ary, $start, $sort_dir);
|
||
$id_ary = array_slice($id_ary, 0, (int) $per_page);
|
||
|
||
return $total_results;
|
||
}
|
||
|
||
/**
|
||
* {@inheritdoc}
|
||
*/
|
||
public function author_search(string $type, bool $firstpost_only, array $sort_by_sql, string $sort_key, string $sort_dir, string $sort_days, array $ex_fid_ary, string $post_visibility, int $topic_id, array $author_ary, string $author_name, array &$id_ary, int &$start, int $per_page)
|
||
{
|
||
// No author? No posts
|
||
if (!count($author_ary))
|
||
{
|
||
return 0;
|
||
}
|
||
|
||
// generate a search_key from all the options to identify the results
|
||
$search_key_array = array(
|
||
'',
|
||
$type,
|
||
($firstpost_only) ? 'firstpost' : '',
|
||
'',
|
||
'',
|
||
$sort_days,
|
||
$sort_key,
|
||
$topic_id,
|
||
implode(',', $ex_fid_ary),
|
||
$post_visibility,
|
||
implode(',', $author_ary),
|
||
$author_name,
|
||
);
|
||
|
||
/**
|
||
* Allow changing the search_key for cached results
|
||
*
|
||
* @event core.search_native_by_author_modify_search_key
|
||
* @var array search_key_array Array with search parameters to generate the search_key
|
||
* @var string type Searching type ('posts', 'topics')
|
||
* @var boolean firstpost_only Flag indicating if only topic starting posts are considered
|
||
* @var int sort_days Time, in days, of the oldest possible post to list
|
||
* @var string sort_key The sort type used from the possible sort types
|
||
* @var int topic_id Limit the search to this topic_id only
|
||
* @var array ex_fid_ary Which forums not to search on
|
||
* @var string post_visibility Post visibility data
|
||
* @var array author_ary Array of user_id containing the users to filter the results to
|
||
* @var string author_name The username to search on
|
||
* @since 3.1.7-RC1
|
||
*/
|
||
$vars = array(
|
||
'search_key_array',
|
||
'type',
|
||
'firstpost_only',
|
||
'sort_days',
|
||
'sort_key',
|
||
'topic_id',
|
||
'ex_fid_ary',
|
||
'post_visibility',
|
||
'author_ary',
|
||
'author_name',
|
||
);
|
||
extract($this->phpbb_dispatcher->trigger_event('core.search_native_by_author_modify_search_key', compact($vars)));
|
||
|
||
$search_key = md5(implode('#', $search_key_array));
|
||
|
||
// try reading the results from cache
|
||
$total_results = 0;
|
||
if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == self::SEARCH_RESULT_IN_CACHE)
|
||
{
|
||
return $total_results;
|
||
}
|
||
|
||
$id_ary = array();
|
||
|
||
// Create some display specific sql strings
|
||
if ($author_name)
|
||
{
|
||
// first one matches post of registered users, second one guests and deleted users
|
||
$sql_author = '(' . $this->db->sql_in_set('p.poster_id', array_diff($author_ary, array(ANONYMOUS)), false, true) . ' OR p.post_username ' . $author_name . ')';
|
||
}
|
||
else
|
||
{
|
||
$sql_author = $this->db->sql_in_set('p.poster_id', $author_ary);
|
||
}
|
||
$sql_fora = (count($ex_fid_ary)) ? ' AND ' . $this->db->sql_in_set('p.forum_id', $ex_fid_ary, true) : '';
|
||
$sql_time = ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : '';
|
||
$sql_topic_id = ($topic_id) ? ' AND p.topic_id = ' . (int) $topic_id : '';
|
||
$sql_firstpost = ($firstpost_only) ? ' AND p.post_id = t.topic_first_post_id' : '';
|
||
$post_visibility = ($post_visibility) ? ' AND ' . $post_visibility : '';
|
||
|
||
// Build sql strings for sorting
|
||
$sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
|
||
$sql_sort_table = $sql_sort_join = '';
|
||
switch ($sql_sort[0])
|
||
{
|
||
case 'u':
|
||
$sql_sort_table = USERS_TABLE . ' u, ';
|
||
$sql_sort_join = ' AND u.user_id = p.poster_id ';
|
||
break;
|
||
|
||
case 't':
|
||
$sql_sort_table = ($type == 'posts' && !$firstpost_only) ? TOPICS_TABLE . ' t, ' : '';
|
||
$sql_sort_join = ($type == 'posts' && !$firstpost_only) ? ' AND t.topic_id = p.topic_id ' : '';
|
||
break;
|
||
|
||
case 'f':
|
||
$sql_sort_table = FORUMS_TABLE . ' f, ';
|
||
$sql_sort_join = ' AND f.forum_id = p.forum_id ';
|
||
break;
|
||
}
|
||
|
||
$select = ($type == 'posts') ? 'p.post_id' : 't.topic_id';
|
||
$is_mysql = false;
|
||
|
||
/**
|
||
* Allow changing the query used to search for posts by author in fulltext_native
|
||
*
|
||
* @event core.search_native_author_count_query_before
|
||
* @var int total_results The previous result count for the format of the query.
|
||
* Set to 0 to force a re-count
|
||
* @var string type The type of search being made
|
||
* @var string select SQL SELECT clause for what to get
|
||
* @var string sql_sort_table CROSS JOIN'ed table to allow doing the sort chosen
|
||
* @var string sql_sort_join Condition to define how to join the CROSS JOIN'ed table specifyed in sql_sort_table
|
||
* @var array sql_author SQL WHERE condition for the post author ids
|
||
* @var int topic_id Limit the search to this topic_id only
|
||
* @var string sort_by_sql The possible predefined sort types
|
||
* @var string sort_key The sort type used from the possible sort types
|
||
* @var string sort_dir "a" for ASC or "d" dor DESC for the sort order used
|
||
* @var string sql_sort The result SQL when processing sort_by_sql + sort_key + sort_dir
|
||
* @var string sort_days Time, in days, that the oldest post showing can have
|
||
* @var string sql_time The SQL to search on the time specifyed by sort_days
|
||
* @var bool firstpost_only Wether or not to search only on the first post of the topics
|
||
* @var string sql_firstpost The SQL used in the WHERE claused to filter by firstpost.
|
||
* @var array ex_fid_ary Forum ids that must not be searched on
|
||
* @var array sql_fora SQL query for ex_fid_ary
|
||
* @var int start How many posts to skip in the search results (used for pagination)
|
||
* @since 3.1.5-RC1
|
||
*/
|
||
$vars = array(
|
||
'total_results',
|
||
'type',
|
||
'select',
|
||
'sql_sort_table',
|
||
'sql_sort_join',
|
||
'sql_author',
|
||
'topic_id',
|
||
'sort_by_sql',
|
||
'sort_key',
|
||
'sort_dir',
|
||
'sql_sort',
|
||
'sort_days',
|
||
'sql_time',
|
||
'firstpost_only',
|
||
'sql_firstpost',
|
||
'ex_fid_ary',
|
||
'sql_fora',
|
||
'start',
|
||
);
|
||
extract($this->phpbb_dispatcher->trigger_event('core.search_native_author_count_query_before', compact($vars)));
|
||
|
||
// If the cache was completely empty count the results
|
||
if (!$total_results)
|
||
{
|
||
switch ($this->db->get_sql_layer())
|
||
{
|
||
case 'mysqli':
|
||
// $select = 'SQL_CALC_FOUND_ROWS ' . $select;
|
||
$is_mysql = true;
|
||
break;
|
||
|
||
default:
|
||
if ($type == 'posts')
|
||
{
|
||
$sql = 'SELECT COUNT(p.post_id) as total_results
|
||
FROM ' . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t ' : ' ') . "
|
||
WHERE $sql_author
|
||
$sql_topic_id
|
||
$sql_firstpost
|
||
$post_visibility
|
||
$sql_fora
|
||
$sql_time";
|
||
}
|
||
else
|
||
{
|
||
if ($this->db->get_sql_layer() == 'sqlite3')
|
||
{
|
||
$sql = 'SELECT COUNT(topic_id) as total_results
|
||
FROM (SELECT DISTINCT t.topic_id';
|
||
}
|
||
else
|
||
{
|
||
$sql = 'SELECT COUNT(DISTINCT t.topic_id) as total_results';
|
||
}
|
||
|
||
$sql .= ' FROM ' . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
|
||
WHERE $sql_author
|
||
$sql_topic_id
|
||
$sql_firstpost
|
||
$post_visibility
|
||
$sql_fora
|
||
AND t.topic_id = p.topic_id
|
||
$sql_time" . ($this->db->get_sql_layer() == 'sqlite3' ? ')' : '');
|
||
}
|
||
$result = $this->db->sql_query($sql);
|
||
|
||
$total_results = (int) $this->db->sql_fetchfield('total_results');
|
||
$this->db->sql_freeresult($result);
|
||
|
||
if (!$total_results)
|
||
{
|
||
return false;
|
||
}
|
||
break;
|
||
}
|
||
}
|
||
|
||
// Build the query for really selecting the post_ids
|
||
if ($type == 'posts')
|
||
{
|
||
$sql = "SELECT $select
|
||
FROM " . $sql_sort_table . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t' : '') . "
|
||
WHERE $sql_author
|
||
$sql_topic_id
|
||
$sql_firstpost
|
||
$post_visibility
|
||
$sql_fora
|
||
$sql_sort_join
|
||
$sql_time
|
||
ORDER BY $sql_sort";
|
||
$field = 'post_id';
|
||
}
|
||
else
|
||
{
|
||
$sql = "SELECT $select
|
||
FROM " . $sql_sort_table . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
|
||
WHERE $sql_author
|
||
$sql_topic_id
|
||
$sql_firstpost
|
||
$post_visibility
|
||
$sql_fora
|
||
AND t.topic_id = p.topic_id
|
||
$sql_sort_join
|
||
$sql_time
|
||
GROUP BY t.topic_id, " . $sort_by_sql[$sort_key] . '
|
||
ORDER BY ' . $sql_sort;
|
||
$field = 'topic_id';
|
||
}
|
||
|
||
// Only read one block of posts from the db and then cache it
|
||
$result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start);
|
||
|
||
while ($row = $this->db->sql_fetchrow($result))
|
||
{
|
||
$id_ary[] = (int) $row[$field];
|
||
}
|
||
$this->db->sql_freeresult($result);
|
||
|
||
if (!$total_results && $is_mysql)
|
||
{
|
||
// Count rows for the executed queries. Replace $select within $sql with SQL_CALC_FOUND_ROWS, and run it.
|
||
$sql_calc = str_replace('SELECT ' . $select, 'SELECT SQL_CALC_FOUND_ROWS ' . $select, $sql);
|
||
|
||
$result = $this->db->sql_query($sql_calc);
|
||
$this->db->sql_freeresult($result);
|
||
|
||
$sql_count = 'SELECT FOUND_ROWS() as total_results';
|
||
$result = $this->db->sql_query($sql_count);
|
||
$total_results = (int) $this->db->sql_fetchfield('total_results');
|
||
$this->db->sql_freeresult($result);
|
||
|
||
if (!$total_results)
|
||
{
|
||
return false;
|
||
}
|
||
}
|
||
|
||
if ($start >= $total_results)
|
||
{
|
||
$start = floor(($total_results - 1) / $per_page) * $per_page;
|
||
|
||
$result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start);
|
||
|
||
while ($row = $this->db->sql_fetchrow($result))
|
||
{
|
||
$id_ary[] = (int) $row[$field];
|
||
}
|
||
$this->db->sql_freeresult($result);
|
||
}
|
||
|
||
if (count($id_ary))
|
||
{
|
||
$this->save_ids($search_key, '', $author_ary, $total_results, $id_ary, $start, $sort_dir);
|
||
$id_ary = array_slice($id_ary, 0, $per_page);
|
||
|
||
return $total_results;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
/**
|
||
* {@inheritdoc}
|
||
*/
|
||
public function supports_phrase_search(): bool
|
||
{
|
||
return false;
|
||
}
|
||
|
||
/**
|
||
* {@inheritdoc}
|
||
*/
|
||
public function index(string $mode, int $post_id, string &$message, string &$subject, int $poster_id, int $forum_id)
|
||
{
|
||
if (!$this->config['fulltext_native_load_upd'])
|
||
{
|
||
/**
|
||
* The search indexer is disabled, return
|
||
*/
|
||
return;
|
||
}
|
||
|
||
// Split old and new post/subject to obtain array of 'words'
|
||
$split_text = $this->split_message($message);
|
||
$split_title = $this->split_message($subject);
|
||
|
||
$cur_words = array('post' => array(), 'title' => array());
|
||
|
||
$words = array();
|
||
if ($mode == 'edit')
|
||
{
|
||
$words['add']['post'] = array();
|
||
$words['add']['title'] = array();
|
||
$words['del']['post'] = array();
|
||
$words['del']['title'] = array();
|
||
|
||
$sql = 'SELECT w.word_id, w.word_text, m.title_match
|
||
FROM ' . SEARCH_WORDLIST_TABLE . ' w, ' . SEARCH_WORDMATCH_TABLE . " m
|
||
WHERE m.post_id = $post_id
|
||
AND w.word_id = m.word_id";
|
||
$result = $this->db->sql_query($sql);
|
||
|
||
while ($row = $this->db->sql_fetchrow($result))
|
||
{
|
||
$which = ($row['title_match']) ? 'title' : 'post';
|
||
$cur_words[$which][$row['word_text']] = $row['word_id'];
|
||
}
|
||
$this->db->sql_freeresult($result);
|
||
|
||
$words['add']['post'] = array_diff($split_text, array_keys($cur_words['post']));
|
||
$words['add']['title'] = array_diff($split_title, array_keys($cur_words['title']));
|
||
$words['del']['post'] = array_diff(array_keys($cur_words['post']), $split_text);
|
||
$words['del']['title'] = array_diff(array_keys($cur_words['title']), $split_title);
|
||
}
|
||
else
|
||
{
|
||
$words['add']['post'] = $split_text;
|
||
$words['add']['title'] = $split_title;
|
||
$words['del']['post'] = array();
|
||
$words['del']['title'] = array();
|
||
}
|
||
|
||
/**
|
||
* Event to modify method arguments and words before the native search index is updated
|
||
*
|
||
* @event core.search_native_index_before
|
||
* @var string mode Contains the post mode: edit, post, reply, quote
|
||
* @var int post_id The id of the post which is modified/created
|
||
* @var string message New or updated post content
|
||
* @var string subject New or updated post subject
|
||
* @var int poster_id Post author's user id
|
||
* @var int forum_id The id of the forum in which the post is located
|
||
* @var array words Grouped lists of words added to or remove from the index
|
||
* @var array split_text Array of words from the message
|
||
* @var array split_title Array of words from the title
|
||
* @var array cur_words Array of words currently in the index for comparing to new words
|
||
* when mode is edit. Empty for other modes.
|
||
* @since 3.2.3-RC1
|
||
*/
|
||
$vars = array(
|
||
'mode',
|
||
'post_id',
|
||
'message',
|
||
'subject',
|
||
'poster_id',
|
||
'forum_id',
|
||
'words',
|
||
'split_text',
|
||
'split_title',
|
||
'cur_words',
|
||
);
|
||
extract($this->phpbb_dispatcher->trigger_event('core.search_native_index_before', compact($vars)));
|
||
|
||
unset($split_text);
|
||
unset($split_title);
|
||
|
||
// Get unique words from the above arrays
|
||
$unique_add_words = array_unique(array_merge($words['add']['post'], $words['add']['title']));
|
||
|
||
// We now have unique arrays of all words to be added and removed and
|
||
// individual arrays of added and removed words for text and title. What
|
||
// we need to do now is add the new words (if they don't already exist)
|
||
// and then add (or remove) matches between the words and this post
|
||
if (count($unique_add_words))
|
||
{
|
||
$sql = 'SELECT word_id, word_text
|
||
FROM ' . SEARCH_WORDLIST_TABLE . '
|
||
WHERE ' . $this->db->sql_in_set('word_text', $unique_add_words);
|
||
$result = $this->db->sql_query($sql);
|
||
|
||
$word_ids = array();
|
||
while ($row = $this->db->sql_fetchrow($result))
|
||
{
|
||
$word_ids[$row['word_text']] = $row['word_id'];
|
||
}
|
||
$this->db->sql_freeresult($result);
|
||
$new_words = array_diff($unique_add_words, array_keys($word_ids));
|
||
|
||
$this->db->sql_transaction('begin');
|
||
if (count($new_words))
|
||
{
|
||
$sql_ary = array();
|
||
|
||
foreach ($new_words as $word)
|
||
{
|
||
$sql_ary[] = array('word_text' => (string) $word, 'word_count' => 0);
|
||
}
|
||
$this->db->sql_return_on_error(true);
|
||
$this->db->sql_multi_insert(SEARCH_WORDLIST_TABLE, $sql_ary);
|
||
$this->db->sql_return_on_error(false);
|
||
}
|
||
unset($new_words, $sql_ary);
|
||
}
|
||
else
|
||
{
|
||
$this->db->sql_transaction('begin');
|
||
}
|
||
|
||
// now update the search match table, remove links to removed words and add links to new words
|
||
foreach ($words['del'] as $word_in => $word_ary)
|
||
{
|
||
$title_match = ($word_in == 'title') ? 1 : 0;
|
||
|
||
if (count($word_ary))
|
||
{
|
||
$sql_in = array();
|
||
foreach ($word_ary as $word)
|
||
{
|
||
$sql_in[] = $cur_words[$word_in][$word];
|
||
}
|
||
|
||
$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
|
||
WHERE ' . $this->db->sql_in_set('word_id', $sql_in) . '
|
||
AND post_id = ' . intval($post_id) . "
|
||
AND title_match = $title_match";
|
||
$this->db->sql_query($sql);
|
||
|
||
$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
|
||
SET word_count = word_count - 1
|
||
WHERE ' . $this->db->sql_in_set('word_id', $sql_in) . '
|
||
AND word_count > 0';
|
||
$this->db->sql_query($sql);
|
||
|
||
unset($sql_in);
|
||
}
|
||
}
|
||
|
||
$this->db->sql_return_on_error(true);
|
||
foreach ($words['add'] as $word_in => $word_ary)
|
||
{
|
||
$title_match = ($word_in == 'title') ? 1 : 0;
|
||
|
||
if (count($word_ary))
|
||
{
|
||
$sql = 'INSERT INTO ' . SEARCH_WORDMATCH_TABLE . ' (post_id, word_id, title_match)
|
||
SELECT ' . (int) $post_id . ', word_id, ' . (int) $title_match . '
|
||
FROM ' . SEARCH_WORDLIST_TABLE . '
|
||
WHERE ' . $this->db->sql_in_set('word_text', $word_ary);
|
||
$this->db->sql_query($sql);
|
||
|
||
$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
|
||
SET word_count = word_count + 1
|
||
WHERE ' . $this->db->sql_in_set('word_text', $word_ary);
|
||
$this->db->sql_query($sql);
|
||
}
|
||
}
|
||
$this->db->sql_return_on_error(false);
|
||
|
||
$this->db->sql_transaction('commit');
|
||
|
||
// destroy cached search results containing any of the words removed or added
|
||
$this->destroy_cache(array_unique(array_merge($words['add']['post'], $words['add']['title'], $words['del']['post'], $words['del']['title'])), array($poster_id));
|
||
|
||
unset($unique_add_words);
|
||
unset($words);
|
||
unset($cur_words);
|
||
}
|
||
|
||
/**
|
||
* {@inheritdoc}
|
||
*/
|
||
public function index_remove(array $post_ids, array $author_ids, array $forum_ids): void
|
||
{
|
||
if (count($post_ids))
|
||
{
|
||
$sql = 'SELECT w.word_id, w.word_text, m.title_match
|
||
FROM ' . SEARCH_WORDMATCH_TABLE . ' m, ' . SEARCH_WORDLIST_TABLE . ' w
|
||
WHERE ' . $this->db->sql_in_set('m.post_id', $post_ids) . '
|
||
AND w.word_id = m.word_id';
|
||
$result = $this->db->sql_query($sql);
|
||
|
||
$message_word_ids = $title_word_ids = $word_texts = array();
|
||
while ($row = $this->db->sql_fetchrow($result))
|
||
{
|
||
if ($row['title_match'])
|
||
{
|
||
$title_word_ids[] = $row['word_id'];
|
||
}
|
||
else
|
||
{
|
||
$message_word_ids[] = $row['word_id'];
|
||
}
|
||
$word_texts[] = $row['word_text'];
|
||
}
|
||
$this->db->sql_freeresult($result);
|
||
|
||
if (count($title_word_ids))
|
||
{
|
||
$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
|
||
SET word_count = word_count - 1
|
||
WHERE ' . $this->db->sql_in_set('word_id', $title_word_ids) . '
|
||
AND word_count > 0';
|
||
$this->db->sql_query($sql);
|
||
}
|
||
|
||
if (count($message_word_ids))
|
||
{
|
||
$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
|
||
SET word_count = word_count - 1
|
||
WHERE ' . $this->db->sql_in_set('word_id', $message_word_ids) . '
|
||
AND word_count > 0';
|
||
$this->db->sql_query($sql);
|
||
}
|
||
|
||
unset($title_word_ids);
|
||
unset($message_word_ids);
|
||
|
||
$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
|
||
WHERE ' . $this->db->sql_in_set('post_id', $post_ids);
|
||
$this->db->sql_query($sql);
|
||
}
|
||
|
||
$this->destroy_cache(array_unique($word_texts), array_unique($author_ids));
|
||
}
|
||
|
||
/**
|
||
* {@inheritdoc}
|
||
*/
|
||
public function tidy(): void
|
||
{
|
||
// Is the fulltext indexer disabled? If yes then we need not
|
||
// carry on ... it's okay ... I know when I'm not wanted boo hoo
|
||
if (!$this->config['fulltext_native_load_upd'])
|
||
{
|
||
$this->config->set('search_last_gc', time(), false);
|
||
return;
|
||
}
|
||
|
||
$destroy_cache_words = array();
|
||
|
||
// Remove common words
|
||
if ($this->config['num_posts'] >= 100 && $this->config['fulltext_native_common_thres'])
|
||
{
|
||
$common_threshold = ((double) $this->config['fulltext_native_common_thres']) / 100.0;
|
||
// First, get the IDs of common words
|
||
$sql = 'SELECT word_id, word_text
|
||
FROM ' . SEARCH_WORDLIST_TABLE . '
|
||
WHERE word_count > ' . floor($this->config['num_posts'] * $common_threshold) . '
|
||
OR word_common = 1';
|
||
$result = $this->db->sql_query($sql);
|
||
|
||
$sql_in = array();
|
||
while ($row = $this->db->sql_fetchrow($result))
|
||
{
|
||
$sql_in[] = $row['word_id'];
|
||
$destroy_cache_words[] = $row['word_text'];
|
||
}
|
||
$this->db->sql_freeresult($result);
|
||
|
||
if (count($sql_in))
|
||
{
|
||
// Flag the words
|
||
$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
|
||
SET word_common = 1
|
||
WHERE ' . $this->db->sql_in_set('word_id', $sql_in);
|
||
$this->db->sql_query($sql);
|
||
|
||
// by setting search_last_gc to the new time here we make sure that if a user reloads because the
|
||
// following query takes too long, he won't run into it again
|
||
$this->config->set('search_last_gc', time(), false);
|
||
|
||
// Delete the matches
|
||
$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
|
||
WHERE ' . $this->db->sql_in_set('word_id', $sql_in);
|
||
$this->db->sql_query($sql);
|
||
}
|
||
unset($sql_in);
|
||
}
|
||
|
||
if (count($destroy_cache_words))
|
||
{
|
||
// destroy cached search results containing any of the words that are now common or were removed
|
||
$this->destroy_cache(array_unique($destroy_cache_words));
|
||
}
|
||
|
||
$this->config->set('search_last_gc', time(), false);
|
||
}
|
||
|
||
// create_index is inherited from base.php
|
||
|
||
/**
|
||
* {@inheritdoc}
|
||
*/
|
||
public function delete_index(int &$post_counter = null): ?array
|
||
{
|
||
$sql_queries = [];
|
||
|
||
switch ($this->db->get_sql_layer())
|
||
{
|
||
case 'sqlite3':
|
||
$sql_queries[] = 'DELETE FROM ' . SEARCH_WORDLIST_TABLE;
|
||
$sql_queries[] = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE;
|
||
$sql_queries[] = 'DELETE FROM ' . SEARCH_RESULTS_TABLE;
|
||
break;
|
||
|
||
default:
|
||
$sql_queries[] = 'TRUNCATE TABLE ' . SEARCH_WORDLIST_TABLE;
|
||
$sql_queries[] = 'TRUNCATE TABLE ' . SEARCH_WORDMATCH_TABLE;
|
||
$sql_queries[] = 'TRUNCATE TABLE ' . SEARCH_RESULTS_TABLE;
|
||
break;
|
||
}
|
||
|
||
$stats = $this->stats;
|
||
|
||
/**
|
||
* Event to modify SQL queries before the native search index is deleted
|
||
*
|
||
* @event core.search_native_delete_index_before
|
||
* @var array sql_queries Array with queries for deleting the search index
|
||
* @var array stats Array with statistics of the current index (read only)
|
||
* @since 3.2.3-RC1
|
||
*/
|
||
$vars = array(
|
||
'sql_queries',
|
||
'stats',
|
||
);
|
||
extract($this->phpbb_dispatcher->trigger_event('core.search_native_delete_index_before', compact($vars)));
|
||
|
||
foreach ($sql_queries as $sql_query)
|
||
{
|
||
$this->db->sql_query($sql_query);
|
||
}
|
||
|
||
return null;
|
||
}
|
||
|
||
/**
|
||
* {@inheritdoc}
|
||
*/
|
||
public function index_created(): bool
|
||
{
|
||
if (!count($this->stats))
|
||
{
|
||
$this->get_stats();
|
||
}
|
||
|
||
return $this->stats['total_words'] && $this->stats['total_matches'];
|
||
}
|
||
|
||
/**
|
||
* {@inheritdoc}
|
||
*/
|
||
public function index_stats()
|
||
{
|
||
if (!count($this->stats))
|
||
{
|
||
$this->get_stats();
|
||
}
|
||
|
||
return array(
|
||
$this->language->lang('TOTAL_WORDS') => $this->stats['total_words'],
|
||
$this->language->lang('TOTAL_MATCHES') => $this->stats['total_matches']);
|
||
}
|
||
|
||
/**
|
||
* Computes the stats and store them in the $this->stats associative array
|
||
*/
|
||
protected function get_stats()
|
||
{
|
||
$this->stats['total_words'] = $this->db->get_estimated_row_count(SEARCH_WORDLIST_TABLE);
|
||
$this->stats['total_matches'] = $this->db->get_estimated_row_count(SEARCH_WORDMATCH_TABLE);
|
||
}
|
||
|
||
/**
|
||
* Split a text into words of a given length
|
||
*
|
||
* The text is converted to UTF-8, cleaned up, and split. Then, words that
|
||
* conform to the defined length range are returned in an array.
|
||
*
|
||
* NOTE: duplicates are NOT removed from the return array
|
||
*
|
||
* @param string $text Text to split, encoded in UTF-8
|
||
* @return array Array of UTF-8 words
|
||
*/
|
||
protected function split_message($text)
|
||
{
|
||
$match = $words = array();
|
||
|
||
/**
|
||
* Taken from the original code
|
||
*/
|
||
// Do not index code
|
||
$match[] = '#\[code(?:=.*?)?(\:?[0-9a-z]{5,})\].*?\[\/code(\:?[0-9a-z]{5,})\]#is';
|
||
// BBcode
|
||
$match[] = '#\[\/?[a-z0-9\*\+\-]+(?:=.*?)?(?::[a-z])?(\:?[0-9a-z]{5,})\]#';
|
||
|
||
$min = $this->word_length['min'];
|
||
|
||
$isset_min = $min - 1;
|
||
|
||
/**
|
||
* Clean up the string, remove HTML tags, remove BBCodes
|
||
*/
|
||
$word = strtok($this->cleanup(preg_replace($match, ' ', strip_tags($text)), -1), ' ');
|
||
|
||
while (strlen($word))
|
||
{
|
||
if (strlen($word) > 255 || strlen($word) <= $isset_min)
|
||
{
|
||
/**
|
||
* Words longer than 255 bytes are ignored. This will have to be
|
||
* changed whenever we change the length of search_wordlist.word_text
|
||
*
|
||
* Words shorter than $isset_min bytes are ignored, too
|
||
*/
|
||
$word = strtok(' ');
|
||
continue;
|
||
}
|
||
|
||
$len = utf8_strlen($word);
|
||
|
||
/**
|
||
* Test whether the word is too short to be indexed.
|
||
*
|
||
* Note that this limit does NOT apply to CJK and Hangul
|
||
*/
|
||
if ($len < $min)
|
||
{
|
||
/**
|
||
* Note: this could be optimized. If the codepoint is lower than Hangul's range
|
||
* we know that it will also be lower than CJK ranges
|
||
*/
|
||
if ((strncmp($word, self::UTF8_HANGUL_FIRST, 3) < 0 || strncmp($word, self::UTF8_HANGUL_LAST, 3) > 0)
|
||
&& (strncmp($word, self::UTF8_CJK_FIRST, 3) < 0 || strncmp($word, self::UTF8_CJK_LAST, 3) > 0)
|
||
&& (strncmp($word, self::UTF8_CJK_B_FIRST, 4) < 0 || strncmp($word, self::UTF8_CJK_B_LAST, 4) > 0))
|
||
{
|
||
$word = strtok(' ');
|
||
continue;
|
||
}
|
||
}
|
||
|
||
$words[] = $word;
|
||
$word = strtok(' ');
|
||
}
|
||
|
||
return $words;
|
||
}
|
||
|
||
/**
|
||
* Clean up a text to remove non-alphanumeric characters
|
||
*
|
||
* This method receives a UTF-8 string, normalizes and validates it, replaces all
|
||
* non-alphanumeric characters with strings then returns the result.
|
||
*
|
||
* Any number of "allowed chars" can be passed as a UTF-8 string in NFC.
|
||
*
|
||
* @param string $text Text to split, in UTF-8 (not normalized or sanitized)
|
||
* @param string $allowed_chars String of special chars to allow
|
||
* @param string $encoding Text encoding
|
||
* @return string Cleaned up text, only alphanumeric chars are left
|
||
*/
|
||
protected function cleanup($text, $allowed_chars = null, $encoding = 'utf-8')
|
||
{
|
||
static $conv = array(), $conv_loaded = array();
|
||
$allow = array();
|
||
|
||
// Convert the text to UTF-8
|
||
$encoding = strtolower($encoding);
|
||
if ($encoding != 'utf-8')
|
||
{
|
||
$text = utf8_recode($text, $encoding);
|
||
}
|
||
|
||
$utf_len_mask = array(
|
||
"\xC0" => 2,
|
||
"\xD0" => 2,
|
||
"\xE0" => 3,
|
||
"\xF0" => 4
|
||
);
|
||
|
||
/**
|
||
* Replace HTML entities and NCRs
|
||
*/
|
||
$text = htmlspecialchars_decode(utf8_decode_ncr($text), ENT_QUOTES);
|
||
|
||
/**
|
||
* Normalize to NFC
|
||
*/
|
||
$text = \Normalizer::normalize($text);
|
||
|
||
/**
|
||
* The first thing we do is:
|
||
*
|
||
* - convert ASCII-7 letters to lowercase
|
||
* - remove the ASCII-7 non-alpha characters
|
||
* - remove the bytes that should not appear in a valid UTF-8 string: 0xC0,
|
||
* 0xC1 and 0xF5-0xFF
|
||
*
|
||
* @todo in theory, the third one is already taken care of during normalization and those chars should have been replaced by Unicode replacement chars
|
||
*/
|
||
$sb_match = "ISTCPAMELRDOJBNHFGVWUQKYXZ\r\n\t!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\xC0\xC1\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF";
|
||
$sb_replace = 'istcpamelrdojbnhfgvwuqkyxz ';
|
||
|
||
/**
|
||
* This is the list of legal ASCII chars, it is automatically extended
|
||
* with ASCII chars from $allowed_chars
|
||
*/
|
||
$legal_ascii = ' eaisntroludcpmghbfvq10xy2j9kw354867z';
|
||
|
||
/**
|
||
* Prepare an array containing the extra chars to allow
|
||
*/
|
||
if (isset($allowed_chars[0]))
|
||
{
|
||
$pos = 0;
|
||
$len = strlen($allowed_chars);
|
||
do
|
||
{
|
||
$c = $allowed_chars[$pos];
|
||
|
||
if ($c < "\x80")
|
||
{
|
||
/**
|
||
* ASCII char
|
||
*/
|
||
$sb_pos = strpos($sb_match, $c);
|
||
if (is_int($sb_pos))
|
||
{
|
||
/**
|
||
* Remove the char from $sb_match and its corresponding
|
||
* replacement in $sb_replace
|
||
*/
|
||
$sb_match = substr($sb_match, 0, $sb_pos) . substr($sb_match, $sb_pos + 1);
|
||
$sb_replace = substr($sb_replace, 0, $sb_pos) . substr($sb_replace, $sb_pos + 1);
|
||
$legal_ascii .= $c;
|
||
}
|
||
|
||
++$pos;
|
||
}
|
||
else
|
||
{
|
||
/**
|
||
* UTF-8 char
|
||
*/
|
||
$utf_len = $utf_len_mask[$c & "\xF0"];
|
||
$allow[substr($allowed_chars, $pos, $utf_len)] = 1;
|
||
$pos += $utf_len;
|
||
}
|
||
}
|
||
while ($pos < $len);
|
||
}
|
||
|
||
$text = strtr($text, $sb_match, $sb_replace);
|
||
$ret = '';
|
||
|
||
$pos = 0;
|
||
$len = strlen($text);
|
||
|
||
do
|
||
{
|
||
/**
|
||
* Do all consecutive ASCII chars at once
|
||
*/
|
||
if ($spn = strspn($text, $legal_ascii, $pos))
|
||
{
|
||
$ret .= substr($text, $pos, $spn);
|
||
$pos += $spn;
|
||
}
|
||
|
||
if ($pos >= $len)
|
||
{
|
||
return $ret;
|
||
}
|
||
|
||
/**
|
||
* Capture the UTF char
|
||
*/
|
||
$utf_len = $utf_len_mask[$text[$pos] & "\xF0"];
|
||
$utf_char = substr($text, $pos, $utf_len);
|
||
$pos += $utf_len;
|
||
|
||
if (($utf_char >= self::UTF8_HANGUL_FIRST && $utf_char <= self::UTF8_HANGUL_LAST)
|
||
|| ($utf_char >= self::UTF8_CJK_FIRST && $utf_char <= self::UTF8_CJK_LAST)
|
||
|| ($utf_char >= self::UTF8_CJK_B_FIRST && $utf_char <= self::UTF8_CJK_B_LAST))
|
||
{
|
||
/**
|
||
* All characters within these ranges are valid
|
||
*
|
||
* We separate them with a space in order to index each character
|
||
* individually
|
||
*/
|
||
$ret .= ' ' . $utf_char . ' ';
|
||
continue;
|
||
}
|
||
|
||
if (isset($allow[$utf_char]))
|
||
{
|
||
/**
|
||
* The char is explicitly allowed
|
||
*/
|
||
$ret .= $utf_char;
|
||
continue;
|
||
}
|
||
|
||
if (isset($conv[$utf_char]))
|
||
{
|
||
/**
|
||
* The char is mapped to something, maybe to itself actually
|
||
*/
|
||
$ret .= $conv[$utf_char];
|
||
continue;
|
||
}
|
||
|
||
/**
|
||
* The char isn't mapped, but did we load its conversion table?
|
||
*
|
||
* The search indexer table is split into blocks. The block number of
|
||
* each char is equal to its codepoint right-shifted for 11 bits. It
|
||
* means that out of the 11, 16 or 21 meaningful bits of a 2-, 3- or
|
||
* 4- byte sequence we only keep the leftmost 0, 5 or 10 bits. Thus,
|
||
* all UTF chars encoded in 2 bytes are in the same first block.
|
||
*/
|
||
if (isset($utf_char[2]))
|
||
{
|
||
if (isset($utf_char[3]))
|
||
{
|
||
/**
|
||
* 1111 0nnn 10nn nnnn 10nx xxxx 10xx xxxx
|
||
* 0000 0111 0011 1111 0010 0000
|
||
*/
|
||
$idx = ((ord($utf_char[0]) & 0x07) << 7) | ((ord($utf_char[1]) & 0x3F) << 1) | ((ord($utf_char[2]) & 0x20) >> 5);
|
||
}
|
||
else
|
||
{
|
||
/**
|
||
* 1110 nnnn 10nx xxxx 10xx xxxx
|
||
* 0000 0111 0010 0000
|
||
*/
|
||
$idx = ((ord($utf_char[0]) & 0x07) << 1) | ((ord($utf_char[1]) & 0x20) >> 5);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
/**
|
||
* 110x xxxx 10xx xxxx
|
||
* 0000 0000 0000 0000
|
||
*/
|
||
$idx = 0;
|
||
}
|
||
|
||
/**
|
||
* Check if the required conv table has been loaded already
|
||
*/
|
||
if (!isset($conv_loaded[$idx]))
|
||
{
|
||
$conv_loaded[$idx] = 1;
|
||
$file = $this->phpbb_root_path . 'includes/utf/data/search_indexer_' . $idx . '.' . $this->php_ext;
|
||
|
||
if (file_exists($file))
|
||
{
|
||
$conv += include($file);
|
||
}
|
||
}
|
||
|
||
if (isset($conv[$utf_char]))
|
||
{
|
||
$ret .= $conv[$utf_char];
|
||
}
|
||
else
|
||
{
|
||
/**
|
||
* We add an entry to the conversion table so that we
|
||
* don't have to convert to codepoint and perform the checks
|
||
* that are above this block
|
||
*/
|
||
$conv[$utf_char] = ' ';
|
||
$ret .= ' ';
|
||
}
|
||
}
|
||
while (1);
|
||
|
||
return $ret;
|
||
}
|
||
|
||
/**
|
||
* {@inheritdoc}
|
||
*/
|
||
public function get_acp_options(): array
|
||
{
|
||
/**
|
||
* if we need any options, copied from fulltext_native for now, will have to be adjusted or removed
|
||
*/
|
||
|
||
$tpl = '
|
||
<dl>
|
||
<dt><label for="fulltext_native_load_upd">' . $this->language->lang('YES_SEARCH_UPDATE') . $this->language->lang('COLON') . '</label><br /><span>' . $this->language->lang('YES_SEARCH_UPDATE_EXPLAIN') . '</span></dt>
|
||
<dd><label><input type="radio" id="fulltext_native_load_upd" name="config[fulltext_native_load_upd]" value="1"' . (($this->config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" /> ' . $this->language->lang('YES') . '</label><label><input type="radio" name="config[fulltext_native_load_upd]" value="0"' . ((!$this->config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" /> ' . $this->language->lang('NO') . '</label></dd>
|
||
</dl>
|
||
<dl>
|
||
<dt><label for="fulltext_native_min_chars">' . $this->language->lang('MIN_SEARCH_CHARS') . $this->language->lang('COLON') . '</label><br /><span>' . $this->language->lang('MIN_SEARCH_CHARS_EXPLAIN') . '</span></dt>
|
||
<dd><input id="fulltext_native_min_chars" type="number" min="0" max="255" name="config[fulltext_native_min_chars]" value="' . (int) $this->config['fulltext_native_min_chars'] . '" /></dd>
|
||
</dl>
|
||
<dl>
|
||
<dt><label for="fulltext_native_max_chars">' . $this->language->lang('MAX_SEARCH_CHARS') . $this->language->lang('COLON') . '</label><br /><span>' . $this->language->lang('MAX_SEARCH_CHARS_EXPLAIN') . '</span></dt>
|
||
<dd><input id="fulltext_native_max_chars" type="number" min="0" max="255" name="config[fulltext_native_max_chars]" value="' . (int) $this->config['fulltext_native_max_chars'] . '" /></dd>
|
||
</dl>
|
||
<dl>
|
||
<dt><label for="fulltext_native_common_thres">' . $this->language->lang('COMMON_WORD_THRESHOLD') . $this->language->lang('COLON') . '</label><br /><span>' . $this->language->lang('COMMON_WORD_THRESHOLD_EXPLAIN') . '</span></dt>
|
||
<dd><input id="fulltext_native_common_thres" type="text" name="config[fulltext_native_common_thres]" value="' . (double) $this->config['fulltext_native_common_thres'] . '" /> %</dd>
|
||
</dl>
|
||
';
|
||
|
||
// These are fields required in the config table
|
||
return array(
|
||
'tpl' => $tpl,
|
||
'config' => array('fulltext_native_load_upd' => 'bool', 'fulltext_native_min_chars' => 'integer:0:255', 'fulltext_native_max_chars' => 'integer:0:255', 'fulltext_native_common_thres' => 'double:0:100')
|
||
);
|
||
}
|
||
}
|