1
0
mirror of https://github.com/phpbb/phpbb.git synced 2025-07-31 22:10:45 +02:00

Various updates to fix problems with large result sets ... code is quite messy, will clean up

git-svn-id: file:///svn/phpbb/trunk@1938 89ea8834-ac86-4346-8a33-228a782c2dd0
This commit is contained in:
Paul S. Owen
2002-01-25 00:48:46 +00:00
parent bd4f94c0a9
commit c8c850f104
2 changed files with 267 additions and 385 deletions

View File

@@ -19,11 +19,11 @@
*
***************************************************************************/
function clean_words($mode, $entry, &$stopword_list, &$synonym_list)
function clean_words($mode, &$entry, &$synonym_list)
{
// Weird, $init_match doesn't work with static when double quotes (") are used...
static $drop_char_match = array('^', '$', '&', '(', ')', '<', '>', '`', "'", '|', ',', '@', '_', '?', '%', '-', '~', '+', '.', '[', ']', '{', '}', ':', '\\', '/', '=', '#', '\'', ';', '*', '!');
static $drop_char_replace = array(" ", " ", " ", " ", " ", " ", " ", " ", "", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " " , " ", " ", " ", " ", " ", " ", " ");
static $drop_char_match = array('^', '$', '&', '(', ')', '<', '>', '`', "'", '|', ',', '@', '_', '?', '%', '-', '~', '+', '.', '[', ']', '{', '}', ':', '\\', '/', '=', '#', '\'', ';', '!');
static $drop_char_replace = array(" ", " ", " ", " ", " ", " ", " ", " ", "", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " " , " ", " ", " ", " ", " ", " ");
static $accent_match = array("<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>", "<EFBFBD>");
static $accent_replace = array("s", "a", "a", "a", "a", "a", "a", "a", "c", "e", "e", "e", "e", "i", "i", "i", "i", "o", "n", "o", "o", "o", "o", "o", "o", "u", "u", "u", "u", "y", "t", "y");
@@ -62,26 +62,15 @@ function clean_words($mode, $entry, &$stopword_list, &$synonym_list)
//
for($i = 0; $i < count($drop_char_match); $i++)
{
$entry = str_replace($drop_char_match[$i], $drop_char_replace[$i], $entry);
$entry = str_replace($drop_char_match[$i], $drop_char_replace[$i], $entry);
}
if( $mode == "post" )
{
// 'words' that consist of <=2 or >=50 characters are removed.
$entry = preg_replace("/\b([a-z0-9]{1,2}|[a-z0-9]{50,})\b/si", " ", $entry);
}
$entry = str_replace("*", " ", $entry);
if( !empty($stopword_list) )
{
for ($j = 0; $j < count($stopword_list); $j++)
{
$filter_word = trim(strtolower($stopword_list[$j]));
if( ( $filter_word != "and" && $filter_word != "or" && $filter_word != "not" ) || $mode == "post" )
{
$entry = preg_replace("/\b" . phpbb_preg_quote($filter_word, "/") . "\b/is", " ", $entry);
}
}
// 'words' that consist of <=3 or >=50 characters are removed.
$entry = preg_replace("/\b([a-z0-9]{1,3}|[a-z0-9]{50,})\b/si", " ", $entry);
}
if( !empty($synonym_list) )
@@ -101,9 +90,16 @@ function clean_words($mode, $entry, &$stopword_list, &$synonym_list)
return $entry;
}
function split_words(&$entry)
function split_words(&$entry, $mode = "post")
{
preg_match_all("/\b(\w[\w']*\w+|\w+?)\b/", $entry, $split_entries);
if( $mode == "post" )
{
preg_match_all("/\b(\w[\w']*\w+|\w+?)\b/", $entry, $split_entries);
}
else
{
preg_match_all("/(\*?[a-z0-9]+\*?)|\b([a-z0-9]+)\b/is", $entry, $split_entries);
}
return $split_entries[1];
}
@@ -112,12 +108,11 @@ function add_search_words($post_id, $post_text, $post_title = "")
{
global $db, $phpbb_root_path, $board_config, $lang;
$stopword_array = @file($phpbb_root_path . "language/lang_" . $board_config['default_lang'] . "/search_stopwords.txt");
$synonym_array = @file($phpbb_root_path . "language/lang_" . $board_config['default_lang'] . "/search_synonyms.txt");
$search_raw_words = array();
$search_raw_words['text'] = split_words(clean_words("post", $post_text, $stopword_array, $synonym_array));
$search_raw_words['title'] = split_words(clean_words("post", $post_title, $stopword_array, $synonym_array));
$search_raw_words['text'] = split_words(clean_words("post", $post_text, $synonym_array));
$search_raw_words['title'] = split_words(clean_words("post", $post_title, $synonym_array));
while( list($word_in, $search_matches) = @each($search_raw_words) )
{