mirror of
https://github.com/phpbb/phpbb.git
synced 2025-06-06 14:35:56 +02:00
speed up split words a tad
git-svn-id: file:///svn/phpbb/trunk@4459 89ea8834-ac86-4346-8a33-228a782c2dd0
This commit is contained in:
parent
9f7db6a21c
commit
311cc6c3f0
@ -853,11 +853,11 @@ class parse_message
|
|||||||
// Parses a given message and updates/maintains the fulltext tables
|
// Parses a given message and updates/maintains the fulltext tables
|
||||||
class fulltext_search
|
class fulltext_search
|
||||||
{
|
{
|
||||||
function split_words(&$text)
|
function split_words($mode, &$text, &$stopped_words)
|
||||||
{
|
{
|
||||||
global $user, $config;
|
global $user, $config;
|
||||||
|
|
||||||
static $drop_char_match, $drop_char_replace, $stopwords, $synonyms;
|
static $drop_char_match, $drop_char_replace, $stopwords, $replace_synonym, $match_synonym;
|
||||||
|
|
||||||
// Is the fulltext indexer disabled? If yes then we need not
|
// Is the fulltext indexer disabled? If yes then we need not
|
||||||
// carry on ... it's okay ... I know when I'm not wanted boo hoo
|
// carry on ... it's okay ... I know when I'm not wanted boo hoo
|
||||||
@ -868,10 +868,22 @@ class fulltext_search
|
|||||||
|
|
||||||
if (empty($drop_char_match))
|
if (empty($drop_char_match))
|
||||||
{
|
{
|
||||||
$drop_char_match = array('^', '$', '&', '(', ')', '<', '>', '`', '\'', '"', '|', ',', '@', '_', '?', '%', '-', '~', '+', '.', '[', ']', '{', '}', ':', '\\', '/', '=', '#', '\'', ';', '!', '*');
|
$drop_char_match = array('^', '$', '&', '(', ')', '<', '>', '`', '\'', '"', '|', ',', '@', '_', '?', '%', '~', '.', '[', ']', '{', '}', ':', '\\', '/', '=', '#', '\'', ';', '!', '*');
|
||||||
$drop_char_replace = array(' ', ' ', ' ', ' ', ' ', ' ', ' ', '', '', ' ', ' ', ' ', ' ', '', ' ', ' ', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '' , ' ', ' ', ' ', ' ', ' ', ' ', ' ');
|
$drop_char_replace = array(' ', ' ', ' ', ' ', ' ', ' ', ' ', '', '', ' ', ' ', ' ', ' ', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '' , ' ', ' ', ' ', ' ', ' ', ' ', ' ');
|
||||||
$stopwords = @file($user->lang_path . '/search_stopwords.txt');
|
|
||||||
$synonyms = @file($user->lang_path . '/search_synonyms.txt');
|
if ($fp = @fopen($user->lang_path . '/search_stopwords.txt', 'rb'))
|
||||||
|
{
|
||||||
|
$stopwords = explode("\n", str_replace("\r\n", "\n", fread($fp, filesize($user->lang_path . '/search_stopwords.txt'))));
|
||||||
|
}
|
||||||
|
fclose($fp);
|
||||||
|
|
||||||
|
if ($fp = @fopen($user->lang_path . '/search_synonyms.txt', 'rb'))
|
||||||
|
{
|
||||||
|
preg_match_all('#^(.*?) (.*?)$#ms', fread($fp, filesize($user->lang_path . '/search_synonyms.txt')), $match);
|
||||||
|
$replace_synonym = &$match[1];
|
||||||
|
$match_synonym = &$match[2];
|
||||||
|
}
|
||||||
|
fclose($fp);
|
||||||
}
|
}
|
||||||
|
|
||||||
$match = array();
|
$match = array();
|
||||||
@ -888,31 +900,28 @@ class fulltext_search
|
|||||||
// Sequences < min_search_chars & < max_search_chars
|
// Sequences < min_search_chars & < max_search_chars
|
||||||
$match[] = '#\b([a-z0-9]{1,' . $config['min_search_chars'] . '}|[a-z0-9]{' . $config['max_search_chars'] . ',})\b#is';
|
$match[] = '#\b([a-z0-9]{1,' . $config['min_search_chars'] . '}|[a-z0-9]{' . $config['max_search_chars'] . ',})\b#is';
|
||||||
|
|
||||||
$text = preg_replace($match, ' ', ' ' . strtolower($text) . ' ');
|
$text = str_replace($match, ' ', ' ' . strtolower($text) . ' ');
|
||||||
|
$text = str_replace(' and ', ' + ', $text);
|
||||||
|
$text = str_replace(' not ', ' - ', $text);
|
||||||
|
|
||||||
// Filter out non-alphabetical chars
|
// Filter out non-alphabetical chars
|
||||||
$text = str_replace($drop_char_match, $drop_char_replace, $text);
|
$text = str_replace($drop_char_match, $drop_char_replace, $text);
|
||||||
|
|
||||||
if (!empty($stopwords_list))
|
// Split words
|
||||||
|
$text = explode(' ', preg_replace('#\s+#', ' ', $text));
|
||||||
|
|
||||||
|
if (!empty($stopwords))
|
||||||
{
|
{
|
||||||
$text = str_replace($stopwords, '', $text);
|
$stopped_words = array_intersect($text, $stopwords);
|
||||||
|
$text = array_diff($text, $stopwords);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!empty($synonyms))
|
if (!empty($replace_synonym))
|
||||||
{
|
{
|
||||||
for ($j = 0; $j < count($synonyms); $j++)
|
$text = str_replace($replace_synonym, $match_synonym, $text);
|
||||||
{
|
|
||||||
list($replace_synonym, $match_synonym) = split(' ', trim(strtolower($synonyms[$j])));
|
|
||||||
if ( $mode == 'post' || ( $match_synonym != 'not' && $match_synonym != 'and' && $match_synonym != 'or' ) )
|
|
||||||
{
|
|
||||||
$text = preg_replace('#\b' . trim($match_synonym) . '\b#', ' ' . trim($replace_synonym) . ' ', $text);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
preg_match_all('#\b([\w]+)\b#', $text, $split_entries);
|
return $text;
|
||||||
|
|
||||||
return array_unique($split_entries[1]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function add(&$mode, &$post_id, &$message, &$subject)
|
function add(&$mode, &$post_id, &$message, &$subject)
|
||||||
@ -930,8 +939,10 @@ class fulltext_search
|
|||||||
// $starttime = $mtime[1] + $mtime[0];
|
// $starttime = $mtime[1] + $mtime[0];
|
||||||
|
|
||||||
// Split old and new post/subject to obtain array of 'words'
|
// Split old and new post/subject to obtain array of 'words'
|
||||||
$split_text = $this->split_words($message);
|
$stopped_words = array();
|
||||||
$split_title = ($subject) ? $this->split_words($subject) : array();
|
$split_text = $this->split_words('post', $message, $stopped_words);
|
||||||
|
$split_title = ($subject) ? $this->split_words('post', $subject, $stopped_words) : array();
|
||||||
|
unset($stopped_words);
|
||||||
|
|
||||||
$words = array();
|
$words = array();
|
||||||
if ($mode == 'edit')
|
if ($mode == 'edit')
|
||||||
@ -1080,8 +1091,8 @@ class fulltext_search
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Remove common (> 60% of posts ) words
|
// Remove common (> 60% of posts ) words
|
||||||
$sql = "SELECT SUM(forum_posts) AS total_posts
|
$sql = 'SELECT SUM(forum_posts) AS total_posts
|
||||||
FROM " . FORUMS_TABLE;
|
FROM ' . FORUMS_TABLE;
|
||||||
$result = $db->sql_query($sql);
|
$result = $db->sql_query($sql);
|
||||||
|
|
||||||
$row = $db->sql_fetchrow($result);
|
$row = $db->sql_fetchrow($result);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user