1
0
mirror of https://github.com/phpbb/phpbb.git synced 2025-08-05 16:27:38 +02:00

More fixes and updates

git-svn-id: file:///svn/phpbb/branches/phpBB-2_0_0@2572 89ea8834-ac86-4346-8a33-228a782c2dd0
This commit is contained in:
Paul S. Owen
2002-05-13 01:30:59 +00:00
parent 6ca24b8a9f
commit 9fe7c358a1
7 changed files with 74 additions and 78 deletions

View File

@@ -21,9 +21,8 @@
function clean_words($mode, &$entry, &$stopword_list, &$synonym_list)
{
// Weird, $init_match doesn't work with static when double quotes (") are used...
static $drop_char_match = array('^', '$', '&', '(', ')', '<', '>', '`', '\'', '"', '|', ',', '@', '_', '?', '%', '-', '~', '+', '.', '[', ']', '{', '}', ':', '\\', '/', '=', '#', '\'', ';', '!');
static $drop_char_replace = array(' ', ' ', ' ', ' ', ' ', ' ', ' ', '', '', ' ', ' ', ' ', ' ', '', ' ', ' ', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' , ' ', ' ', ' ', ' ', ' ', ' ');
static $drop_char_replace = array(' ', ' ', ' ', ' ', ' ', ' ', ' ', '', '', ' ', ' ', ' ', ' ', '', ' ', ' ', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' , ' ', ' ', ' ', ' ', ' ', ' ');
$entry = ' ' . strip_tags(strtolower($entry)) . ' ';
@@ -42,13 +41,10 @@ function clean_words($mode, &$entry, &$stopword_list, &$synonym_list)
}
else if ( $mode == 'search' )
{
$entry = str_replace('+', ' and ', $entry);
$entry = str_replace('-', ' not ', $entry);
$entry = str_replace(' +', ' and ', $entry);
$entry = str_replace(' -', ' not ', $entry);
}
// Replace numbers on their own
$entry = preg_replace('/\b[0-9]+\b/', ' ', $entry);
//
// Filter out strange characters like ^, $, &, change "it's" to "its"
//
@@ -61,8 +57,8 @@ function clean_words($mode, &$entry, &$stopword_list, &$synonym_list)
{
$entry = str_replace('*', ' ', $entry);
// 'words' that consist of <=3 or >=25 characters are removed.
$entry = preg_replace('/\b([a-z0-9]{1,3}|[a-z0-9]{20,})\b/',' ', $entry);
// 'words' that consist of <=3 or >=20 characters are removed.
$entry = preg_replace('/\b([a-z0-9]{1,3}|[a-z0-9]{21,})\b/',' ', $entry);
}
if ( !empty($stopword_list) )
@@ -95,14 +91,8 @@ function clean_words($mode, &$entry, &$stopword_list, &$synonym_list)
function split_words(&$entry, $mode = 'post')
{
if ( $mode == 'post' )
{
preg_match_all("/\b(\w[\w']*\w+|\w+?)\b/", $entry, $split_entries);
}
else
{
preg_match_all('/(\*?[<5B>-<2D>a-z0-9]+\*?)|\b([<5B>-<2D>a-z0-9]+)\b/', $entry, $split_entries);
}
$rex = ( $mode == 'post' ) ? "/\b(\w[\w']*\w+|\w+?)\b/" : '/(\*?[<5B>-<2D>a-z0-9]+\*?)|\b([<5B>-<2D>a-z0-9]+)\b/';
preg_match_all($rex, $entry, $split_entries);
return $split_entries[1];
}