mirror of
https://github.com/phpbb/phpbb.git
synced 2025-07-30 21:40:43 +02:00
- search updates
git-svn-id: file:///svn/phpbb/trunk@5003 89ea8834-ac86-4346-8a33-228a782c2dd0
This commit is contained in:
@@ -111,7 +111,7 @@ class bbcode_firstpass extends bbcode
|
||||
'url' => array('bbcode_id' => 3, 'regexp' => array('#\[url=?(.*?)?\](.*?)\[/url\]#ise' => "\$this->validate_url('\$1', '\$2')")),
|
||||
'img' => array('bbcode_id' => 4, 'regexp' => array('#\[img\](https?://)([a-z0-9\-\.,\?!%\*_:;~\\&$@/=\+]+)\[/img\]#i' => '[img:' . $this->bbcode_uid . ']$1$2[/img:' . $this->bbcode_uid . ']')),
|
||||
'size' => array('bbcode_id' => 5, 'regexp' => array('#\[size=([\-\+]?[1-2]?[0-9])\](.*?)\[/size\]#is' => '[size=$1:' . $this->bbcode_uid . ']$2[/size:' . $this->bbcode_uid . ']')),
|
||||
'color' => array('bbcode_id' => 6, 'regexp' => array('!\[color=(#[0-9A-F]{6}|[a-z\-]+)\](.*?)\[/color\]!is' => '[color=$1:' . $this->bbcode_uid . ']$2[/color:' . $this->bbcode_uid . ']')),
|
||||
'color' => array('bbcode_id' => 6, 'regexp' => array('#\[color=(#[0-9A-F]{6}|[a-z\-]+)\](.*?)\[/color\]#is' => '[color=$1:' . $this->bbcode_uid . ']$2[/color:' . $this->bbcode_uid . ']')),
|
||||
'u' => array('bbcode_id' => 7, 'regexp' => array('#\[u\](.*?)\[/u\]#is' => '[u:' . $this->bbcode_uid . ']$1[/u:' . $this->bbcode_uid . ']')),
|
||||
'list' => array('bbcode_id' => 9, 'regexp' => array('#\[list(=[a-z|0-9|(?:disc|circle|square))]+)?\].*\[/list\]#ise' => "\$this->bbcode_list('\$0')")),
|
||||
'email' => array('bbcode_id' => 10, 'regexp' => array('#\[email=?(.*?)?\](.*?)\[/email\]#ise' => "\$this->validate_email('\$1', '\$2')")),
|
||||
@@ -1084,7 +1084,7 @@ class parse_message extends bbcode_firstpass
|
||||
// Parses a given message and updates/maintains the fulltext tables
|
||||
class fulltext_search
|
||||
{
|
||||
function split_words($mode, $text, &$stopped_words)
|
||||
function split_words($mode, $text)
|
||||
{
|
||||
global $user, $config;
|
||||
|
||||
@@ -1099,8 +1099,8 @@ class fulltext_search
|
||||
|
||||
if (!$drop_char_match)
|
||||
{
|
||||
$drop_char_match = array('^', '$', '&', '(', ')', '<', '>', '`', '\'', '"', '|', ',', '@', '_', '?', '%', '~', '.', '[', ']', '{', '}', ':', '\\', '/', '=', '#', '\'', ';', '!', '*');
|
||||
$drop_char_replace = array(' ', ' ', ' ', ' ', ' ', ' ', ' ', '', '', ' ', ' ', ' ', ' ', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '' , ' ', ' ', ' ', ' ', ' ', ' ', ' ');
|
||||
$drop_char_match = array('-', '^', '$', ';', '#', '&', '(', ')', '<', '>', '`', '\'', '"', '|', ',', '@', '_', '?', '%', '~', '.', '[', ']', '{', '}', ':', '\\', '/', '=', '\'', '!', '*');
|
||||
$drop_char_replace = array(' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '', '', ' ', ' ', ' ', ' ', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '' , ' ', ' ', ' ', ' ', ' ');
|
||||
|
||||
if ($fp = @fopen($user->lang_path . '/search_stopwords.txt', 'rb'))
|
||||
{
|
||||
@@ -1118,42 +1118,50 @@ class fulltext_search
|
||||
}
|
||||
|
||||
$match = array();
|
||||
// Comments for hardcoded bbcode elements (urls, smilies, html)
|
||||
$match[] = '#<!\-\- .* \-\->(.*?)<!\-\- .* \-\->#is';
|
||||
// New lines, carriage returns
|
||||
$match[] = "#[\n\r]+#";
|
||||
// NCRs like etc.
|
||||
$match[] = '#&[\#a-z0-9]+?;#i';
|
||||
// URL's
|
||||
$match[] = '#\b[\w]+:\/\/[a-z0-9\.\-]+(\/[a-z0-9\?\.%_\-\+=&\/]+)?#';
|
||||
$match[] = '#(&|&)[\#a-z0-9]+?;#i';
|
||||
// Do not index code
|
||||
$match[] = '#\[code(=.*)?(\:?[0-9a-z]{5,})\].*?\[\/code(\:?[0-9a-z]{5,})\]#is';
|
||||
// BBcode
|
||||
$match[] = '#\[img:[a-z0-9]{5,}\].*?\[\/img:[a-z0-9]{5,}\]#';
|
||||
$match[] = '#\[\/?[a-z\*=\+\-]+(\:?[0-9a-z]+)?:[a-z0-9]{5,}(\:[a-z0-9]+)?=?.*?\]#';
|
||||
$match[] = '#<!\-\-(.*?)\-\->#is';
|
||||
// Sequences < min_search_chars & < max_search_chars
|
||||
$match[] = '#\b([\S]{1,' . $config['min_search_chars'] . '}|[\S]{' . $config['max_search_chars'] . ',})\b#is';
|
||||
$match[] = '#\[\/?[a-z\*\+\-]+(=.*)?(\:?[0-9a-z]{5,})\]#';
|
||||
// Sequences > min_search_chars & < max_search_chars
|
||||
// $match[] = '#\s([\b]{1,' . $config['min_search_chars'] . '}|[\b]{' . $config['max_search_chars'] . ',})\s#is';
|
||||
// $match[] = '#\s((&\#[0-9]+;){1,' . $config['min_search_chars'] . '}|(&\#[0-9]+;){' . $config['max_search_chars'] . ',})\s#is';
|
||||
// Filter out ; and # but not &#[0-9]+;
|
||||
// $match[] = '#(&\#[0-9]+;)|;|\#|&#';
|
||||
|
||||
$replace = array('', '', '', '', '', '', '');
|
||||
|
||||
$text = preg_replace($match, $replace, ' ' . strtolower($text) . ' ');
|
||||
$text = str_replace(' and ', ' + ', $text);
|
||||
$text = str_replace(' not ', ' - ', $text);
|
||||
$text = preg_replace($match, ' ', ' ' . strtolower(trim($text)) . ' ');
|
||||
$text = str_replace(array(' + ', ' - '), array(' and ', ' not '), $text);
|
||||
|
||||
// Filter out non-alphabetical chars
|
||||
$text = str_replace($drop_char_match, $drop_char_replace, $text);
|
||||
|
||||
// Split words
|
||||
$text = explode(' ', preg_replace('#\s+#', ' ', $text));
|
||||
$text = explode(' ', preg_replace('#\s+#', ' ', trim($text)));
|
||||
|
||||
if ($stopwords)
|
||||
if (sizeof($stopwords))
|
||||
{
|
||||
$stopped_words = array_intersect($text, $stopwords);
|
||||
$text = array_diff($text, $stopwords);
|
||||
}
|
||||
|
||||
if ($replace_synonym)
|
||||
if (sizeof($replace_synonym))
|
||||
{
|
||||
$text = str_replace($replace_synonym, $match_synonym, $text);
|
||||
}
|
||||
|
||||
|
||||
foreach ($text as $index => $word)
|
||||
{
|
||||
if (strlen($word) < $config['min_search_chars'] || strlen($word) > $config['max_search_chars'])
|
||||
{
|
||||
unset($text[$index]);
|
||||
}
|
||||
}
|
||||
|
||||
return $text;
|
||||
}
|
||||
|
||||
@@ -1169,10 +1177,8 @@ class fulltext_search
|
||||
}
|
||||
|
||||
// Split old and new post/subject to obtain array of 'words'
|
||||
$stopped_words = array();
|
||||
$split_text = $this->split_words('post', $message, $stopped_words);
|
||||
$split_title = ($subject) ? $this->split_words('post', $subject, $stopped_words) : array();
|
||||
unset($stopped_words);
|
||||
$split_text = $this->split_words('post', $message);
|
||||
$split_title = ($subject) ? $this->split_words('post', $subject) : array();
|
||||
|
||||
$words = array();
|
||||
if ($mode == 'edit')
|
||||
@@ -1230,7 +1236,6 @@ class fulltext_search
|
||||
$new_words = array_diff($unique_add_words, array_keys($word_ids));
|
||||
unset($unique_add_words);
|
||||
|
||||
$db->sql_return_on_error(true);
|
||||
if (sizeof($new_words))
|
||||
{
|
||||
switch (SQL_LAYER)
|
||||
@@ -1238,7 +1243,7 @@ class fulltext_search
|
||||
case 'mysql':
|
||||
case 'mysql4':
|
||||
$sql = 'INSERT INTO ' . SEARCH_WORD_TABLE . ' (word_text)
|
||||
VALUES ' . implode(', ', preg_replace('#^(.*)$#', '(\'$1\')', $new_words));
|
||||
VALUES ' . implode(', ', preg_replace('#^(.*)$#', '(\'$1\')', $new_words));
|
||||
$db->sql_query($sql);
|
||||
break;
|
||||
|
||||
@@ -1258,7 +1263,6 @@ class fulltext_search
|
||||
break;
|
||||
}
|
||||
}
|
||||
$db->sql_return_on_error(false);
|
||||
unset($new_words);
|
||||
}
|
||||
|
||||
@@ -1343,7 +1347,7 @@ class fulltext_search
|
||||
$sql_in[] = $row['word_id'];
|
||||
}
|
||||
while ($row = $db->sql_fetchrow($result));
|
||||
|
||||
|
||||
$sql_in = implode(', ', $sql_in);
|
||||
|
||||
$sql = 'UPDATE ' . SEARCH_WORD_TABLE . "
|
||||
@@ -1363,7 +1367,7 @@ class fulltext_search
|
||||
$sql = 'SELECT w.word_id
|
||||
FROM ' . SEARCH_WORD_TABLE . ' w
|
||||
LEFT JOIN ' . SEARCH_MATCH_TABLE . ' m ON w.word_id = m.word_id
|
||||
AND m.word_id IS NULL
|
||||
WHERE w.word_common = 0 AND m.word_id IS NULL
|
||||
GROUP BY m.word_id';
|
||||
$result = $db->sql_query($sql);
|
||||
|
||||
|
Reference in New Issue
Block a user