mirror of
https://github.com/phpbb/phpbb.git
synced 2025-03-14 12:40:13 +01:00
- removed unneeded code from fulltext_mysql [Bug #9947]
- an additional combined index on post_subject and post_text is necessary for fulltext_mysql [Bug #9818] - added phrase searching support to fulltext_mysql while fixing some minor bugs in the keyword processing code, originally intended to do this about a year ago when I switched from split_words array to search_query string in the search backend interface, so finally it's there git-svn-id: file:///svn/phpbb/trunk@7385 89ea8834-ac86-4346-8a33-228a782c2dd0
This commit is contained in:
parent
550f270a00
commit
5742dcd68a
@ -122,39 +122,33 @@ class fulltext_mysql extends search_backend
|
||||
$keywords = preg_replace($match, $replace, $keywords);
|
||||
}
|
||||
|
||||
$match = array();
|
||||
// New lines, carriage returns
|
||||
$match[] = "#[\n\r]+#";
|
||||
// NCRs like etc.
|
||||
$match[] = '#(&|&)[\#a-z0-9]+?;#i';
|
||||
|
||||
// Filter out as above
|
||||
$keywords = preg_replace($match, ' ', trim($keywords));
|
||||
$split_keywords = preg_replace("#[\n\r\t]+#", ' ', trim(htmlspecialchars_decode($keywords)));
|
||||
|
||||
// Split words
|
||||
if ($this->pcre_properties)
|
||||
{
|
||||
$split_keywords = preg_replace('#([^\p{L}\p{N}\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($keywords)));
|
||||
$split_keywords = preg_replace('#([^\p{L}\p{N}\'*"()])#u', '$1$1', str_replace('\'\'', '\' \'', trim($split_keywords)));
|
||||
}
|
||||
else if ($this->mbstring_regex)
|
||||
{
|
||||
$split_keywords = mb_ereg_replace('([^\w\'*])', '\\1\\1', str_replace('\'\'', '\' \'', trim($keywords)));
|
||||
$split_keywords = mb_ereg_replace('([^\w\'*"()])', '\\1\\1', str_replace('\'\'', '\' \'', trim($split_keywords)));
|
||||
}
|
||||
else
|
||||
{
|
||||
$split_keywords = preg_replace('#([^\w\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($keywords)));
|
||||
$split_keywords = preg_replace('#([^\w\'*"()])#u', '$1$1', str_replace('\'\'', '\' \'', trim($split_keywords)));
|
||||
}
|
||||
|
||||
if ($this->pcre_properties)
|
||||
{
|
||||
$matches = array();
|
||||
preg_match_all('#(?:[^\p{L}\p{N}*]|^)([+\-|]?(?:[\p{L}\p{N}*]+\'?)*[\p{L}\p{N}*])(?:[^\p{L}\p{N}*]|$)#u', $split_keywords, $matches);
|
||||
preg_match_all('#(?:[^\p{L}\p{N}*"()]|^)([+\-|]?(?:[\p{L}\p{N}*"()]+\'?)*[\p{L}\p{N}*"()])(?:[^\p{L}\p{N}*"()]|$)#u', $split_keywords, $matches);
|
||||
$this->split_words = $matches[1];
|
||||
}
|
||||
else if ($this->mbstring_regex)
|
||||
{
|
||||
mb_regex_encoding('UTF-8');
|
||||
mb_ereg_search_init($split_keywords, '(?:[^\w*]|^)([+\-|]?(?:[\w*]+\'?)*[\w*])(?:[^\w*]|$)');
|
||||
mb_ereg_search_init($split_keywords, '(?:[^\w*"()]|^)([+\-|]?(?:[\w*"()]+\'?)*[\w*"()])(?:[^\w*"()]|$)');
|
||||
|
||||
while (($word = mb_ereg_search_regs()))
|
||||
{
|
||||
@ -164,13 +158,46 @@ class fulltext_mysql extends search_backend
|
||||
else
|
||||
{
|
||||
$matches = array();
|
||||
preg_match_all('#(?:[^\w*]|^)([+\-|]?(?:[\w*]+\'?)*[\w*])(?:[^\w*]|$)#u', $split_keywords, $matches);
|
||||
preg_match_all('#(?:[^\w*"()]|^)([+\-|]?(?:[\w*"()]+\'?)*[\w*"()])(?:[^\w*"()]|$)#u', $split_keywords, $matches);
|
||||
$this->split_words = $matches[1];
|
||||
}
|
||||
|
||||
// to allow phrase search, we need to concatenate quoted words
|
||||
$tmp_split_words = array();
|
||||
$phrase = '';
|
||||
foreach ($this->split_words as $word)
|
||||
{
|
||||
if ($phrase)
|
||||
{
|
||||
$phrase .= ' ' . $word;
|
||||
if (strpos($word, '"') !== false && substr_count($word, '"') % 2 == 1)
|
||||
{
|
||||
$tmp_split_words[] = $phrase;
|
||||
$phrase = '';
|
||||
}
|
||||
}
|
||||
else if (strpos($word, '"') !== false && substr_count($word, '"') % 2 == 1)
|
||||
{
|
||||
$phrase = $word;
|
||||
}
|
||||
else
|
||||
{
|
||||
$tmp_split_words[] = $word . ' ';
|
||||
}
|
||||
}
|
||||
if ($phrase)
|
||||
{
|
||||
$tmp_split_words[] = $phrase;
|
||||
}
|
||||
|
||||
$this->split_words = $tmp_split_words;
|
||||
|
||||
unset($tmp_split_words);
|
||||
unset($phrase);
|
||||
|
||||
foreach ($this->split_words as $i => $word)
|
||||
{
|
||||
$clean_word = preg_replace('#^[+\-|]#', '', $word);
|
||||
$clean_word = preg_replace('#^[+\-|"]#', '', $word);
|
||||
|
||||
// check word length
|
||||
$clean_len = utf8_strlen(str_replace('*', '', $clean_word));
|
||||
@ -181,9 +208,41 @@ class fulltext_mysql extends search_backend
|
||||
}
|
||||
}
|
||||
|
||||
$this->search_query = implode(' ', $this->split_words);
|
||||
if ($terms == 'any')
|
||||
{
|
||||
$this->search_query = '';
|
||||
foreach ($this->split_words as $word)
|
||||
{
|
||||
if ((strpos($word, '+') === 0) || (strpos($word, '-') === 0) || (strpos($word, '|') === 0))
|
||||
{
|
||||
$word = substr($word, 1);
|
||||
}
|
||||
$this->search_query .= $word . ' ';
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
$this->search_query = '';
|
||||
foreach ($this->split_words as $word)
|
||||
{
|
||||
if ((strpos($word, '+') === 0) || (strpos($word, '-') === 0))
|
||||
{
|
||||
$this->search_query .= $word . ' ';
|
||||
}
|
||||
else if (strpos($word, '|') === 0)
|
||||
{
|
||||
$this->search_query .= substr($word, 1) . ' ';
|
||||
}
|
||||
else
|
||||
{
|
||||
$this->search_query .= '+' . $word . ' ';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (sizeof($this->split_words))
|
||||
$this->search_query = utf8_htmlspecialchars($this->search_query);
|
||||
|
||||
if ($this->search_query)
|
||||
{
|
||||
$this->split_words = array_values($this->split_words);
|
||||
sort($this->split_words);
|
||||
@ -252,26 +311,39 @@ class fulltext_mysql extends search_backend
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a search on keywords depending on display specific params.
|
||||
* Performs a search on keywords depending on display specific params. You have to run split_keywords() first.
|
||||
*
|
||||
* @param array &$id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
|
||||
* @param int $start indicates the first index of the page
|
||||
* @param int $per_page number of ids each page is supposed to contain
|
||||
* @return total number of results
|
||||
* @param string $type contains either posts or topics depending on what should be searched for
|
||||
* @param string &$fields contains either titleonly (topic titles should be searched), msgonly (only message bodies should be searched), firstpost (only subject and body of the first post should be searched) or all (all post bodies and subjects should be searched)
|
||||
* @param string &$terms is either 'all' (use query as entered, words without prefix should default to "have to be in field") or 'any' (ignore search query parts and just return all posts that contain any of the specified words)
|
||||
* @param array &$sort_by_sql contains SQL code for the ORDER BY part of a query
|
||||
* @param string &$sort_key is the key of $sort_by_sql for the selected sorting
|
||||
* @param string &$sort_dir is either a or d representing ASC and DESC
|
||||
* @param string &$sort_days specifies the maximum amount of days a post may be old
|
||||
* @param array &$ex_fid_ary specifies an array of forum ids which should not be searched
|
||||
* @param array &$m_approve_fid_ary specifies an array of forum ids in which the searcher is allowed to view unapproved posts
|
||||
* @param int &$topic_id is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched
|
||||
* @param array &$author_ary an array of author ids if the author should be ignored during the search the array is empty
|
||||
* @param array &$id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
|
||||
* @param int $start indicates the first index of the page
|
||||
* @param int $per_page number of ids each page is supposed to contain
|
||||
* @return boolean|int total number of results
|
||||
*
|
||||
* @access public
|
||||
*/
|
||||
function keyword_search($type, &$fields, &$terms, &$sort_by_sql, &$sort_key, &$sort_dir, &$sort_days, &$ex_fid_ary, &$m_approve_fid_ary, &$topic_id, &$author_ary, &$id_ary, $start, $per_page)
|
||||
{
|
||||
global $config, $db;
|
||||
|
||||
// No keywords? No posts.
|
||||
if (!sizeof($this->split_words))
|
||||
if (!$this->search_query)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// generate a search_key from all the options to identify the results
|
||||
$search_key = md5(implode('#', array(
|
||||
implode(',', $this->split_words),
|
||||
implode(', ', $this->split_words),
|
||||
$type,
|
||||
$fields,
|
||||
$terms,
|
||||
@ -369,47 +441,9 @@ class fulltext_mysql extends search_backend
|
||||
$sql_where_options .= ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : '';
|
||||
$sql_where_options .= $sql_match_where;
|
||||
|
||||
// split the words into words that have to be included or must not be included and optional words
|
||||
$words = $any_words = array();
|
||||
if ($terms == 'all')
|
||||
{
|
||||
foreach ($this->split_words as $id => $word)
|
||||
{
|
||||
if ((strpos($word, '+') === 0) || (strpos($word, '-') === 0))
|
||||
{
|
||||
$words[] = $word;
|
||||
}
|
||||
else if (strpos($word, '|') === 0)
|
||||
{
|
||||
$any_words[] = substr($word, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
$words[] = '+' . $word;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
foreach ($this->split_words as $id => $word)
|
||||
{
|
||||
if ((strpos($word, '+') === 0) || (strpos($word, '-') === 0) || (strpos($word, '|') === 0))
|
||||
{
|
||||
$words[] = substr($word, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
$words[] = $word;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get the ids for the current result block
|
||||
$any_words = (sizeof($any_words)) ? ' +(' . implode(' ', $any_words) . ')' : '';
|
||||
|
||||
$sql = "SELECT $sql_select
|
||||
FROM $sql_from$sql_sort_table" . POSTS_TABLE . " p
|
||||
WHERE MATCH ($sql_match) AGAINST ('" . $db->sql_escape(implode(' ', $words)) . $any_words . "' IN BOOLEAN MODE)
|
||||
WHERE MATCH ($sql_match) AGAINST ('" . $db->sql_escape(htmlspecialchars_decode($this->search_query)) . "' IN BOOLEAN MODE)
|
||||
$sql_where_options
|
||||
ORDER BY $sql_sort";
|
||||
$result = $db->sql_query_limit($sql, $config['search_block_size'], $start);
|
||||
@ -608,37 +642,8 @@ class fulltext_mysql extends search_backend
|
||||
$split_text = $this->split_message($message);
|
||||
$split_title = ($subject) ? $this->split_message($subject) : array();
|
||||
|
||||
$words = array();
|
||||
if ($mode == 'edit')
|
||||
{
|
||||
$old_text = array();
|
||||
$old_title = array();
|
||||
$words = array_unique(array_merge($split_text, $split_title));
|
||||
|
||||
$sql = 'SELECT post_text, post_subject
|
||||
FROM ' . POSTS_TABLE . "
|
||||
WHERE post_id = $post_id";
|
||||
$result = $db->sql_query($sql);
|
||||
|
||||
if ($row = $db->sql_fetchrow($result))
|
||||
{
|
||||
$old_text = $this->split_message($row['post_text']);
|
||||
$old_title = $this->split_message($row['post_subject']);
|
||||
}
|
||||
$db->sql_freeresult($result);
|
||||
|
||||
$words = array_unique(array_merge(
|
||||
array_diff($split_text, $old_text),
|
||||
array_diff($split_title, $old_title),
|
||||
array_diff($old_text, $split_text),
|
||||
array_diff($old_title, $split_title)
|
||||
));
|
||||
unset($old_title);
|
||||
unset($old_text);
|
||||
}
|
||||
else
|
||||
{
|
||||
$words = array_unique(array_merge($split_text, $split_title));
|
||||
}
|
||||
unset($split_text);
|
||||
unset($split_title);
|
||||
|
||||
@ -715,6 +720,11 @@ class fulltext_mysql extends search_backend
|
||||
$alter[] = 'ADD FULLTEXT (post_text)';
|
||||
}
|
||||
|
||||
if (!isset($this->stats['post_content']))
|
||||
{
|
||||
$alter[] = 'ADD FULLTEXT post_content (post_subject, post_text)';
|
||||
}
|
||||
|
||||
if (sizeof($alter))
|
||||
{
|
||||
$db->sql_query('ALTER TABLE ' . POSTS_TABLE . ' ' . implode(', ', $alter));
|
||||
@ -755,6 +765,11 @@ class fulltext_mysql extends search_backend
|
||||
$alter[] = 'DROP INDEX post_text';
|
||||
}
|
||||
|
||||
if (isset($this->stats['post_content']))
|
||||
{
|
||||
$alter[] = 'DROP INDEX post_content';
|
||||
}
|
||||
|
||||
if (sizeof($alter))
|
||||
{
|
||||
$db->sql_query('ALTER TABLE ' . POSTS_TABLE . ' ' . implode(', ', $alter));
|
||||
@ -775,7 +790,7 @@ class fulltext_mysql extends search_backend
|
||||
$this->get_stats();
|
||||
}
|
||||
|
||||
return (isset($this->stats['post_text']) && isset($this->stats['post_subject'])) ? true : false;
|
||||
return (isset($this->stats['post_text']) && isset($this->stats['post_subject']) && isset($this->stats['post_content'])) ? true : false;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -794,6 +809,7 @@ class fulltext_mysql extends search_backend
|
||||
$user->lang['FULLTEXT_MYSQL_TOTAL_POSTS'] => ($this->index_created()) ? $this->stats['total_posts'] : 0,
|
||||
$user->lang['FULLTEXT_MYSQL_TEXT_CARDINALITY'] => isset($this->stats['post_text']['Cardinality']) ? $this->stats['post_text']['Cardinality'] : 0,
|
||||
$user->lang['FULLTEXT_MYSQL_SUBJECT_CARDINALITY'] => isset($this->stats['post_subject']['Cardinality']) ? $this->stats['post_subject']['Cardinality'] : 0,
|
||||
$user->lang['FULLTEXT_MYSQL_COMBINED_CARDINALITY'] => isset($this->stats['post_content']['Cardinality']) ? $this->stats['post_content']['Cardinality'] : 0,
|
||||
);
|
||||
}
|
||||
|
||||
@ -826,6 +842,10 @@ class fulltext_mysql extends search_backend
|
||||
{
|
||||
$this->stats['post_subject'] = $row;
|
||||
}
|
||||
else if ($row['Column_name'] == 'post_content')
|
||||
{
|
||||
$this->stats['post_content'] = $row;
|
||||
}
|
||||
}
|
||||
}
|
||||
$db->sql_freeresult($result);
|
||||
|
@ -51,6 +51,7 @@ $lang = array_merge($lang, array(
|
||||
'FULLTEXT_MYSQL_NOT_MYISAM' => 'MySQL fulltext indexes can only be used with MyISAM tables.',
|
||||
'FULLTEXT_MYSQL_SUBJECT_CARDINALITY' => 'Cardinality of the post_subject fulltext index (estimate of unique values)',
|
||||
'FULLTEXT_MYSQL_TEXT_CARDINALITY' => 'Cardinality of the post_text fulltext index (estimate of unique values)',
|
||||
'FULLTEXT_MYSQL_COMBINED_CARDINALITY' => 'Cardinality of the post_content fulltext index (estimate of unique values)',
|
||||
'FULLTEXT_MYSQL_TOTAL_POSTS' => 'Total number of indexed posts',
|
||||
'FULLTEXT_MYSQL_MBSTRING' => 'Support for non-latin UTF-8 characters using mbstring:',
|
||||
'FULLTEXT_MYSQL_PCRE' => 'Support for non-latin UTF-8 characters using PCRE:',
|
||||
|
@ -453,8 +453,8 @@ if ($keywords || $author || $author_id || $search_id || $submit)
|
||||
}
|
||||
|
||||
// define some vars for urls
|
||||
$hilit = implode('|', explode(' ', preg_replace('#\s+#u', ' ', str_replace(array('+', '-', '|', '(', ')'), ' ', $keywords))));
|
||||
$u_hilit = urlencode(htmlspecialchars_decode($keywords));
|
||||
$hilit = implode('|', explode(' ', preg_replace('#\s+#u', ' ', str_replace(array('+', '-', '|', '(', ')', '"'), ' ', $keywords))));
|
||||
$u_hilit = urlencode(htmlspecialchars_decode(str_replace('|', ' ', $hilit)));
|
||||
$u_show_results = ($show_results != 'posts') ? '&sr=' . $show_results : '';
|
||||
$u_search_forum = implode('&fid%5B%5D=', $search_forum);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user