2005-01-13 22:30:42 +00:00
< ? php
2007-10-05 14:36:34 +00:00
/**
2005-04-09 12:26:45 +00:00
*
2014-05-27 20:18:06 +02:00
* This file is part of the phpBB Forum Software package .
*
* @ copyright ( c ) phpBB Limited < https :// www . phpbb . com >
* @ license GNU General Public License , version 2 ( GPL - 2.0 )
*
* For full copyright and license information , please see
* the docs / CREDITS . txt file .
2005-04-09 12:26:45 +00:00
*
*/
2005-01-13 22:30:42 +00:00
2013-09-10 14:01:09 +02:00
namespace phpbb\search ;
2005-04-09 12:26:45 +00:00
/**
2006-07-27 19:02:47 +00:00
* phpBB ' s own db driven fulltext search , version 2
2005-04-09 12:26:45 +00:00
*/
2013-09-10 14:01:09 +02:00
class fulltext_native extends \phpbb\search\base
2005-01-13 22:30:42 +00:00
{
2013-11-20 13:47:31 +01:00
const UTF8_HANGUL_FIRST = " \xEA \xB0 \x80 " ;
const UTF8_HANGUL_LAST = " \xED \x9E \xA3 " ;
const UTF8_CJK_FIRST = " \xE4 \xB8 \x80 " ;
const UTF8_CJK_LAST = " \xE9 \xBE \xBB " ;
const UTF8_CJK_B_FIRST = " \xF0 \xA0 \x80 \x80 " ;
const UTF8_CJK_B_LAST = " \xF0 \xAA \x9B \x96 " ;
2012-08-15 12:04:55 +05:30
/**
* Associative array holding index stats
* @ var array
*/
2012-08-14 17:46:17 +05:30
protected $stats = array ();
2006-03-15 23:20:04 +00:00
2012-08-15 12:04:55 +05:30
/**
2012-08-20 00:58:41 +05:30
* Associative array stores the min and max word length to be searched
2012-08-15 12:04:55 +05:30
* @ var array
*/
2012-11-09 16:22:32 +05:30
protected $word_length = array ();
2012-08-15 12:04:55 +05:30
/**
2012-08-20 01:04:43 +05:30
* Contains tidied search query .
* Operators are prefixed in search query and common words excluded
2012-08-15 12:04:55 +05:30
* @ var string
*/
2012-11-09 16:22:32 +05:30
protected $search_query ;
2012-08-15 12:04:55 +05:30
/**
2012-08-19 13:13:03 +05:30
* Contains common words .
* Common words are words with length less / more than min / max length
2012-08-15 12:04:55 +05:30
* @ var array
*/
2012-11-09 16:22:32 +05:30
protected $common_words = array ();
2012-08-15 12:04:55 +05:30
/**
* Post ids of posts containing words that are to be included
2012-08-16 00:09:35 +05:30
* @ var array
2012-08-15 12:04:55 +05:30
*/
2012-08-14 17:46:17 +05:30
protected $must_contain_ids = array ();
2012-08-15 12:04:55 +05:30
/**
* Post ids of posts containing words that should not be included
2012-08-16 00:09:35 +05:30
* @ var array
2012-08-15 12:04:55 +05:30
*/
2012-08-14 17:46:17 +05:30
protected $must_not_contain_ids = array ();
2012-08-15 12:04:55 +05:30
/**
* Post ids of posts containing atleast one word that needs to be excluded
2012-08-16 00:09:35 +05:30
* @ var array
2012-08-15 12:04:55 +05:30
*/
2012-08-14 17:46:17 +05:30
protected $must_exclude_one_ids = array ();
2006-07-27 19:02:47 +00:00
2012-08-15 12:04:55 +05:30
/**
* Relative path to board root
* @ var string
*/
2012-08-14 17:46:17 +05:30
protected $phpbb_root_path ;
2012-08-15 12:04:55 +05:30
/**
* PHP Extension
* @ var string
*/
2012-08-14 17:46:17 +05:30
protected $php_ext ;
2012-08-15 12:04:55 +05:30
/**
2012-08-19 12:07:06 +05:30
* Config object
2013-09-10 14:01:09 +02:00
* @ var \phpbb\config\config
2012-08-15 12:04:55 +05:30
*/
2012-08-14 17:46:17 +05:30
protected $config ;
2012-08-15 12:04:55 +05:30
/**
2012-12-04 04:29:31 -05:00
* Database connection
2014-03-17 13:29:35 +01:00
* @ var \phpbb\db\driver\driver_interface
2012-08-15 12:04:55 +05:30
*/
2012-08-14 17:46:17 +05:30
protected $db ;
2012-08-15 12:04:55 +05:30
2015-03-11 17:46:42 +00:00
/**
2015-06-09 12:55:27 +01:00
* phpBB event dispatcher object
2015-03-11 17:46:42 +00:00
* @ var \phpbb\event\dispatcher_interface
*/
protected $phpbb_dispatcher ;
2012-08-15 12:04:55 +05:30
/**
2012-08-19 12:07:06 +05:30
* User object
2013-09-10 14:01:09 +02:00
* @ var \phpbb\user
2012-08-15 12:04:55 +05:30
*/
2012-08-14 17:46:17 +05:30
protected $user ;
2012-07-23 15:41:15 +05:30
2006-07-27 19:02:47 +00:00
/**
2013-11-20 13:47:31 +01:00
* Initialises the fulltext_native search backend with min / max word length
2006-07-27 19:02:47 +00:00
*
2012-08-15 23:49:51 +05:30
* @ param boolean | string & $error is passed by reference and should either be set to false on success or an error message on failure
2015-03-11 17:46:42 +00:00
* @ param \phpbb\event\dispatcher_interface $phpbb_dispatcher Event dispatcher object
2006-07-27 19:02:47 +00:00
*/
2015-03-11 17:46:42 +00:00
public function __construct ( & $error , $phpbb_root_path , $phpEx , $auth , $config , $db , $user , $phpbb_dispatcher )
2005-01-13 22:30:42 +00:00
{
2012-07-23 15:41:15 +05:30
$this -> phpbb_root_path = $phpbb_root_path ;
2012-08-08 11:16:46 +05:30
$this -> php_ext = $phpEx ;
2012-07-23 15:41:15 +05:30
$this -> config = $config ;
$this -> db = $db ;
2015-03-11 17:46:42 +00:00
$this -> phpbb_dispatcher = $phpbb_dispatcher ;
2012-07-23 15:41:15 +05:30
$this -> user = $user ;
2006-03-18 14:30:47 +00:00
2012-07-23 15:41:15 +05:30
$this -> word_length = array ( 'min' => $this -> config [ 'fulltext_native_min_chars' ], 'max' => $this -> config [ 'fulltext_native_max_chars' ]);
2006-03-18 14:30:47 +00:00
2006-07-27 19:02:47 +00:00
/**
* Load the UTF tools
*/
2012-11-08 10:14:29 -05:00
if ( ! function_exists ( 'utf8_decode_ncr' ))
{
include ( $this -> phpbb_root_path . 'includes/utf/utf_tools.' . $this -> php_ext );
}
2006-07-27 19:02:47 +00:00
2005-01-13 22:30:42 +00:00
$error = false ;
}
2011-11-18 14:46:30 +01:00
/**
* Returns the name of this search backend to be displayed to administrators
*
* @ return string Name
*/
public function get_name ()
2011-08-15 20:00:47 -04:00
{
return 'phpBB Native Fulltext' ;
}
2006-01-11 18:56:07 +00:00
/**
2012-08-15 14:13:31 +05:30
* Returns the search_query
*
* @ return string search query
*/
public function get_search_query ()
{
return $this -> search_query ;
}
/**
* Returns the common_words array
*
* @ return array common words that are ignored by search backend
*/
public function get_common_words ()
{
return $this -> common_words ;
}
/**
* Returns the word_length array
*
* @ return array min and max word length for searching
*/
public function get_word_length ()
{
return $this -> word_length ;
}
2006-01-11 18:56:07 +00:00
/**
2012-08-15 23:49:51 +05:30
* This function fills $this -> search_query with the cleaned user search query
2006-07-27 19:02:47 +00:00
*
* If $terms is 'any' then the words will be extracted from the search query
* and combined with | inside brackets . They will afterwards be treated like
* an standard search query .
*
* Then it analyses the query and fills the internal arrays $must_not_contain_ids ,
2012-08-15 23:49:51 +05:30
* $must_contain_ids and $must_exclude_one_ids which are later used by keyword_search ()
2006-07-27 19:02:47 +00:00
*
* @ param string $keywords contains the search query string as entered by the user
* @ param string $terms is either 'all' ( use search query as entered , default words to 'must be contained in post' )
* or 'any' ( find all posts containing at least one of the given words )
* @ return boolean false if no valid keywords were found and otherwise true
2006-01-11 18:56:07 +00:00
*/
2012-08-10 12:23:25 +05:30
public function split_keywords ( $keywords , $terms )
2005-01-13 22:30:42 +00:00
{
2010-08-08 14:02:34 +01:00
$tokens = '+-|()*' ;
$keywords = trim ( $this -> cleanup ( $keywords , $tokens ));
2005-01-13 22:30:42 +00:00
2006-09-17 22:02:28 +00:00
// allow word|word|word without brackets
if (( strpos ( $keywords , ' ' ) === false ) && ( strpos ( $keywords , '|' ) !== false ) && ( strpos ( $keywords , '(' ) === false ))
{
$keywords = '(' . $keywords . ')' ;
}
2005-01-13 22:30:42 +00:00
2006-09-17 22:02:28 +00:00
$open_bracket = $space = false ;
2006-11-10 14:55:49 +00:00
for ( $i = 0 , $n = strlen ( $keywords ); $i < $n ; $i ++ )
2006-09-17 22:02:28 +00:00
{
if ( $open_bracket !== false )
{
switch ( $keywords [ $i ])
{
case ')' :
if ( $open_bracket + 1 == $i )
{
$keywords [ $i - 1 ] = '|' ;
$keywords [ $i ] = '|' ;
}
$open_bracket = false ;
break ;
case '(' :
$keywords [ $i ] = '|' ;
break ;
case '+' :
case '-' :
case ' ' :
$keywords [ $i ] = '|' ;
break ;
2010-08-08 14:02:34 +01:00
case '*' :
if ( $i === 0 || ( $keywords [ $i - 1 ] !== '*' && strcspn ( $keywords [ $i - 1 ], $tokens ) === 0 ))
{
if ( $i === $n - 1 || ( $keywords [ $i + 1 ] !== '*' && strcspn ( $keywords [ $i + 1 ], $tokens ) === 0 ))
{
$keywords = substr ( $keywords , 0 , $i ) . substr ( $keywords , $i + 1 );
}
}
break ;
2006-09-17 22:02:28 +00:00
}
}
else
{
switch ( $keywords [ $i ])
{
case ')' :
$keywords [ $i ] = ' ' ;
break ;
case '(' :
$open_bracket = $i ;
2007-05-13 16:15:20 +00:00
$space = false ;
2006-09-17 22:02:28 +00:00
break ;
case '|' :
$keywords [ $i ] = ' ' ;
break ;
case '-' :
case '+' :
$space = $keywords [ $i ];
break ;
case ' ' :
if ( $space !== false )
{
$keywords [ $i ] = $space ;
}
break ;
default :
$space = false ;
}
}
}
2005-01-13 22:30:42 +00:00
2006-09-17 22:02:28 +00:00
if ( $open_bracket )
{
$keywords .= ')' ;
}
2006-01-11 18:56:07 +00:00
2006-09-17 22:02:28 +00:00
$match = array (
'# +#' ,
'#\|\|+#' ,
'#(\+|\-)(?:\+|\-)+#' ,
'#\(\|#' ,
'#\|\)#' ,
2006-07-27 19:02:47 +00:00
);
$replace = array (
' ' ,
2006-09-17 22:02:28 +00:00
'|' ,
2006-07-27 19:02:47 +00:00
'$1' ,
2006-09-17 22:02:28 +00:00
'(' ,
')' ,
2006-07-27 19:02:47 +00:00
);
2006-04-27 14:20:43 +00:00
2006-09-17 22:02:28 +00:00
$keywords = preg_replace ( $match , $replace , $keywords );
2009-04-11 11:09:45 +00:00
$num_keywords = sizeof ( explode ( ' ' , $keywords ));
// We limit the number of allowed keywords to minimize load on the database
2012-07-23 15:41:15 +05:30
if ( $this -> config [ 'max_num_search_keywords' ] && $num_keywords > $this -> config [ 'max_num_search_keywords' ])
2009-04-11 11:09:45 +00:00
{
2014-01-16 11:48:40 -08:00
trigger_error ( $this -> user -> lang ( 'MAX_NUM_SEARCH_KEYWORDS_REFINE' , ( int ) $this -> config [ 'max_num_search_keywords' ], $num_keywords ));
2009-04-11 11:09:45 +00:00
}
2006-08-28 17:20:21 +00:00
2007-01-21 18:33:45 +00:00
// $keywords input format: each word separated by a space, words in a bracket are not separated
2006-01-11 18:56:07 +00:00
2006-07-27 19:02:47 +00:00
// the user wants to search for any word, convert the search query
if ( $terms == 'any' )
2006-01-11 18:56:07 +00:00
{
2006-07-27 19:02:47 +00:00
$words = array ();
2005-01-13 22:30:42 +00:00
2006-11-27 19:32:18 +00:00
preg_match_all ( '#([^\\s+\\-|()]+)(?:$|[\\s+\\-|()])#u' , $keywords , $words );
2006-07-27 19:02:47 +00:00
if ( sizeof ( $words [ 1 ]))
{
$keywords = '(' . implode ( '|' , $words [ 1 ]) . ')' ;
}
2006-01-11 18:56:07 +00:00
}
2005-01-13 22:30:42 +00:00
2006-07-27 19:02:47 +00:00
// set the search_query which is shown to the user
2006-09-17 22:02:28 +00:00
$this -> search_query = $keywords ;
2006-05-28 19:06:21 +00:00
2006-07-27 19:02:47 +00:00
$exact_words = array ();
2015-01-28 21:37:03 +05:30
preg_match_all ( '#([^\\s+\\-|()]+)(?:$|[\\s+\\-|()])#u' , $keywords , $exact_words );
2006-07-27 19:02:47 +00:00
$exact_words = $exact_words [ 1 ];
2006-05-28 19:06:21 +00:00
2007-04-09 16:04:30 +00:00
$common_ids = $words = array ();
2006-12-03 18:03:33 +00:00
2006-07-27 19:02:47 +00:00
if ( sizeof ( $exact_words ))
2006-01-11 18:56:07 +00:00
{
2006-07-27 19:02:47 +00:00
$sql = ' SELECT word_id , word_text , word_common
FROM ' . SEARCH_WORDLIST_TABLE . '
2012-07-23 15:41:15 +05:30
WHERE ' . $this->db->sql_in_set(' word_text ', $exact_words) . '
2009-12-23 19:38:09 +00:00
ORDER BY word_count ASC ' ;
2012-07-23 15:41:15 +05:30
$result = $this -> db -> sql_query ( $sql );
2007-01-28 13:43:46 +00:00
2006-07-27 19:02:47 +00:00
// store an array of words and ids, remove common words
2012-07-23 15:41:15 +05:30
while ( $row = $this -> db -> sql_fetchrow ( $result ))
2006-01-11 18:56:07 +00:00
{
2006-07-27 19:02:47 +00:00
if ( $row [ 'word_common' ])
2006-01-11 18:56:07 +00:00
{
2006-10-19 13:55:48 +00:00
$this -> common_words [] = $row [ 'word_text' ];
2006-12-03 18:03:33 +00:00
$common_ids [ $row [ 'word_text' ]] = ( int ) $row [ 'word_id' ];
2006-07-27 19:02:47 +00:00
continue ;
2006-01-11 18:56:07 +00:00
}
2006-07-27 19:02:47 +00:00
$words [ $row [ 'word_text' ]] = ( int ) $row [ 'word_id' ];
2006-01-11 18:56:07 +00:00
}
2012-07-23 15:41:15 +05:30
$this -> db -> sql_freeresult ( $result );
2005-01-13 22:30:42 +00:00
}
2013-10-05 21:09:52 +05:30
// Handle +, - without preceeding whitespace character
$match = array ( '#(\S)\+#' , '#(\S)-#' );
$replace = array ( '$1 +' , '$1 +' );
$keywords = preg_replace ( $match , $replace , $keywords );
2006-07-27 19:02:47 +00:00
// now analyse the search query, first split it using the spaces
$query = explode ( ' ' , $keywords );
2006-01-11 18:56:07 +00:00
2006-07-27 19:02:47 +00:00
$this -> must_contain_ids = array ();
$this -> must_not_contain_ids = array ();
$this -> must_exclude_one_ids = array ();
2006-01-11 18:56:07 +00:00
2006-07-27 19:02:47 +00:00
$mode = '' ;
$ignore_no_id = true ;
2006-01-11 18:56:07 +00:00
2006-07-27 19:02:47 +00:00
foreach ( $query as $word )
2006-01-11 18:56:07 +00:00
{
2006-07-27 19:02:47 +00:00
if ( empty ( $word ))
{
continue ;
}
2006-01-11 18:56:07 +00:00
2006-07-27 19:02:47 +00:00
// words which should not be included
if ( $word [ 0 ] == '-' )
{
$word = substr ( $word , 1 );
2006-01-11 18:56:07 +00:00
2006-07-27 19:02:47 +00:00
// a group of which at least one may not be in the resulting posts
if ( $word [ 0 ] == '(' )
{
2006-08-28 17:20:21 +00:00
$word = array_unique ( explode ( '|' , substr ( $word , 1 , - 1 )));
2006-07-27 19:02:47 +00:00
$mode = 'must_exclude_one' ;
}
// one word which should not be in the resulting posts
else
{
$mode = 'must_not_contain' ;
}
$ignore_no_id = true ;
}
// words which have to be included
else
{
// no prefix is the same as a +prefix
if ( $word [ 0 ] == '+' )
{
$word = substr ( $word , 1 );
}
2006-04-27 14:20:43 +00:00
2006-07-27 19:02:47 +00:00
// a group of words of which at least one word should be in every resulting post
if ( $word [ 0 ] == '(' )
{
2006-08-28 17:20:21 +00:00
$word = array_unique ( explode ( '|' , substr ( $word , 1 , - 1 )));
2006-07-27 19:02:47 +00:00
}
$ignore_no_id = false ;
$mode = 'must_contain' ;
}
2006-01-11 18:56:07 +00:00
2006-09-17 22:02:28 +00:00
if ( empty ( $word ))
{
continue ;
}
2006-07-27 19:02:47 +00:00
// if this is an array of words then retrieve an id for each
if ( is_array ( $word ))
{
2007-01-13 22:32:03 +00:00
$non_common_words = array ();
2006-07-27 19:02:47 +00:00
$id_words = array ();
foreach ( $word as $i => $word_part )
{
if ( strpos ( $word_part , '*' ) !== false )
{
2012-07-23 15:41:15 +05:30
$id_words [] = '\'' . $this -> db -> sql_escape ( str_replace ( '*' , '%' , $word_part )) . '\'' ;
2007-01-13 22:32:03 +00:00
$non_common_words [] = $word_part ;
2006-07-27 19:02:47 +00:00
}
2007-01-13 22:32:03 +00:00
else if ( isset ( $words [ $word_part ]))
2006-07-27 19:02:47 +00:00
{
$id_words [] = $words [ $word_part ];
2007-01-13 22:32:03 +00:00
$non_common_words [] = $word_part ;
2006-07-27 19:02:47 +00:00
}
2007-04-09 16:04:30 +00:00
else
{
$len = utf8_strlen ( $word_part );
if ( $len < $this -> word_length [ 'min' ] || $len > $this -> word_length [ 'max' ])
{
$this -> common_words [] = $word_part ;
}
}
2006-07-27 19:02:47 +00:00
}
if ( sizeof ( $id_words ))
{
sort ( $id_words );
if ( sizeof ( $id_words ) > 1 )
{
$this -> { $mode . '_ids' }[] = $id_words ;
}
else
{
$mode = ( $mode == 'must_exclude_one' ) ? 'must_not_contain' : $mode ;
$this -> { $mode . '_ids' }[] = $id_words [ 0 ];
}
}
// throw an error if we shall not ignore unexistant words
2007-01-13 22:32:03 +00:00
else if ( ! $ignore_no_id && sizeof ( $non_common_words ))
2006-07-27 19:02:47 +00:00
{
2015-01-12 00:46:22 +01:00
trigger_error ( sprintf ( $this -> user -> lang [ 'WORDS_IN_NO_POST' ], implode ( $this -> user -> lang [ 'COMMA_SEPARATOR' ], $non_common_words )));
2006-07-27 19:02:47 +00:00
}
2007-01-13 22:32:03 +00:00
unset ( $non_common_words );
2006-07-27 19:02:47 +00:00
}
// else we only need one id
else if (( $wildcard = strpos ( $word , '*' ) !== false ) || isset ( $words [ $word ]))
{
if ( $wildcard )
{
2007-01-28 13:43:46 +00:00
$len = utf8_strlen ( str_replace ( '*' , '' , $word ));
if ( $len >= $this -> word_length [ 'min' ] && $len <= $this -> word_length [ 'max' ])
{
2012-07-23 15:41:15 +05:30
$this -> { $mode . '_ids' }[] = '\'' . $this -> db -> sql_escape ( str_replace ( '*' , '%' , $word )) . '\'' ;
2007-01-28 13:43:46 +00:00
}
else
{
2007-02-23 14:52:16 +00:00
$this -> common_words [] = $word ;
2007-01-28 13:43:46 +00:00
}
2006-07-27 19:02:47 +00:00
}
else
{
$this -> { $mode . '_ids' }[] = $words [ $word ];
}
}
2007-01-28 13:43:46 +00:00
else
2006-07-27 19:02:47 +00:00
{
2006-12-03 18:03:33 +00:00
if ( ! isset ( $common_ids [ $word ]))
{
2007-01-28 13:43:46 +00:00
$len = utf8_strlen ( $word );
2013-12-19 04:22:23 +05:30
if ( $len < $this -> word_length [ 'min' ] || $len > $this -> word_length [ 'max' ])
2007-01-28 13:43:46 +00:00
{
$this -> common_words [] = $word ;
}
}
}
2006-01-11 18:56:07 +00:00
}
2013-12-19 04:22:23 +05:30
// Return true if all words are not common words
if ( sizeof ( $exact_words ) - sizeof ( $this -> common_words ) > 0 )
2006-01-11 18:56:07 +00:00
{
2006-07-27 19:02:47 +00:00
return true ;
2006-01-11 18:56:07 +00:00
}
2006-07-27 19:02:47 +00:00
return false ;
2006-01-11 18:56:07 +00:00
}
/**
2012-08-15 23:49:51 +05:30
* Performs a search on keywords depending on display specific params . You have to run split_keywords () first
2006-01-11 18:56:07 +00:00
*
2006-07-27 19:02:47 +00:00
* @ param string $type contains either posts or topics depending on what should be searched for
2009-07-02 10:28:32 +00:00
* @ param string $fields contains either titleonly ( topic titles should be searched ), msgonly ( only message bodies should be searched ), firstpost ( only subject and body of the first post should be searched ) or all ( all post bodies and subjects should be searched )
* @ param string $terms is either 'all' ( use query as entered , words without prefix should default to " have to be in field " ) or 'any' ( ignore search query parts and just return all posts that contain any of the specified words )
* @ param array $sort_by_sql contains SQL code for the ORDER BY part of a query
* @ param string $sort_key is the key of $sort_by_sql for the selected sorting
* @ param string $sort_dir is either a or d representing ASC and DESC
* @ param string $sort_days specifies the maximum amount of days a post may be old
* @ param array $ex_fid_ary specifies an array of forum ids which should not be searched
2012-08-30 22:20:52 +02:00
* @ param string $post_visibility specifies which types of posts the user can view in which forums
2009-07-02 10:28:32 +00:00
* @ param int $topic_id is set to 0 or a topic id , if it is not 0 then only posts in this topic should be searched
* @ param array $author_ary an array of author ids if the author should be ignored during the search the array is empty
* @ param string $author_name specifies the author match , when ANONYMOUS is also a search - match
2006-11-17 19:37:57 +00:00
* @ param array & $id_ary passed by reference , to be filled with ids for the page specified by $start and $per_page , should be ordered
2006-07-27 19:02:47 +00:00
* @ param int $start indicates the first index of the page
* @ param int $per_page number of ids each page is supposed to contain
* @ return boolean | int total number of results
2006-01-11 18:56:07 +00:00
*/
2013-03-03 20:06:52 +01:00
public function keyword_search ( $type , $fields , $terms , $sort_by_sql , $sort_key , $sort_dir , $sort_days , $ex_fid_ary , $post_visibility , $topic_id , $author_ary , $author_name , & $id_ary , & $start , $per_page )
2006-01-11 18:56:07 +00:00
{
// No keywords? No posts.
2006-07-27 19:02:47 +00:00
if ( empty ( $this -> search_query ))
2006-01-11 18:56:07 +00:00
{
return false ;
}
2005-01-13 22:30:42 +00:00
2013-11-04 15:34:52 +05:30
// we can't search for negatives only
if ( empty ( $this -> must_contain_ids ))
{
return false ;
}
2009-12-23 19:38:09 +00:00
$must_contain_ids = $this -> must_contain_ids ;
$must_not_contain_ids = $this -> must_not_contain_ids ;
$must_exclude_one_ids = $this -> must_exclude_one_ids ;
sort ( $must_contain_ids );
sort ( $must_not_contain_ids );
sort ( $must_exclude_one_ids );
2006-01-11 18:56:07 +00:00
// generate a search_key from all the options to identify the results
$search_key = md5 ( implode ( '#' , array (
2009-12-23 19:38:09 +00:00
serialize ( $must_contain_ids ),
serialize ( $must_not_contain_ids ),
serialize ( $must_exclude_one_ids ),
2006-01-11 18:56:07 +00:00
$type ,
$fields ,
$terms ,
$sort_days ,
$sort_key ,
$topic_id ,
implode ( ',' , $ex_fid_ary ),
2012-08-30 22:20:52 +02:00
$post_visibility ,
2009-07-02 10:28:32 +00:00
implode ( ',' , $author_ary ),
$author_name ,
2006-01-11 18:56:07 +00:00
)));
// try reading the results from cache
2006-07-27 19:02:47 +00:00
$total_results = 0 ;
if ( $this -> obtain_ids ( $search_key , $total_results , $id_ary , $start , $per_page , $sort_dir ) == SEARCH_RESULT_IN_CACHE )
2006-01-11 18:56:07 +00:00
{
2006-07-27 19:02:47 +00:00
return $total_results ;
2006-01-11 18:56:07 +00:00
}
$id_ary = array ();
2006-07-27 19:02:47 +00:00
$sql_where = array ();
$group_by = false ;
$m_num = 0 ;
$w_num = 0 ;
$sql_array = array (
'SELECT' => ( $type == 'posts' ) ? 'p.post_id' : 'p.topic_id' ,
'FROM' => array (
SEARCH_WORDMATCH_TABLE => array (),
SEARCH_WORDLIST_TABLE => array (),
),
2008-09-18 14:31:03 +00:00
'LEFT_JOIN' => array ( array (
'FROM' => array ( POSTS_TABLE => 'p' ),
'ON' => 'm0.post_id = p.post_id' ,
)),
2006-07-27 19:02:47 +00:00
);
$title_match = '' ;
2008-12-04 16:56:56 +00:00
$left_join_topics = false ;
2006-08-08 19:02:44 +00:00
$group_by = true ;
2006-01-11 18:56:07 +00:00
// Build some display specific sql strings
switch ( $fields )
{
case 'titleonly' :
2006-07-27 19:02:47 +00:00
$title_match = 'title_match = 1' ;
2006-08-08 19:02:44 +00:00
$group_by = false ;
2006-07-27 19:02:47 +00:00
// no break
case 'firstpost' :
2008-12-04 16:56:56 +00:00
$left_join_topics = true ;
2006-07-27 19:02:47 +00:00
$sql_where [] = 'p.post_id = t.topic_first_post_id' ;
2006-01-22 19:13:12 +00:00
break ;
2006-01-11 18:56:07 +00:00
case 'msgonly' :
2006-07-27 19:02:47 +00:00
$title_match = 'title_match = 0' ;
2006-08-08 19:02:44 +00:00
$group_by = false ;
2006-01-22 19:13:12 +00:00
break ;
2006-01-11 18:56:07 +00:00
}
2006-08-13 12:23:32 +00:00
if ( $type == 'topics' )
{
2008-12-04 16:56:56 +00:00
$left_join_topics = true ;
2006-08-13 12:23:32 +00:00
$group_by = true ;
}
2006-07-27 19:02:47 +00:00
/**
* @ todo Add a query optimizer ( handle stuff like " +(4|3) +4 " )
*/
2006-03-06 18:21:54 +00:00
2006-07-27 19:02:47 +00:00
foreach ( $this -> must_contain_ids as $subquery )
2006-01-11 18:56:07 +00:00
{
2006-07-27 19:02:47 +00:00
if ( is_array ( $subquery ))
2005-01-13 22:30:42 +00:00
{
2006-07-27 19:02:47 +00:00
$group_by = true ;
2006-06-16 16:54:51 +00:00
2006-07-27 19:02:47 +00:00
$word_id_sql = array ();
$word_ids = array ();
foreach ( $subquery as $id )
2005-01-13 22:30:42 +00:00
{
2006-07-27 19:02:47 +00:00
if ( is_string ( $id ))
2006-01-11 18:56:07 +00:00
{
2006-07-27 19:02:47 +00:00
$sql_array [ 'LEFT_JOIN' ][] = array (
'FROM' => array ( SEARCH_WORDLIST_TABLE => 'w' . $w_num ),
'ON' => " w $w_num .word_text LIKE $id "
);
$word_ids [] = " w $w_num .word_id " ;
2007-01-28 13:43:46 +00:00
2006-07-27 19:02:47 +00:00
$w_num ++ ;
2006-01-11 18:56:07 +00:00
}
else
{
2006-07-27 19:02:47 +00:00
$word_ids [] = $id ;
2006-01-11 18:56:07 +00:00
}
2005-01-13 22:30:42 +00:00
}
2006-07-27 19:02:47 +00:00
2012-07-23 15:41:15 +05:30
$sql_where [] = $this -> db -> sql_in_set ( " m $m_num .word_id " , $word_ids );
2006-07-27 19:02:47 +00:00
unset ( $word_id_sql );
unset ( $word_ids );
2005-01-13 22:30:42 +00:00
}
2006-07-27 19:02:47 +00:00
else if ( is_string ( $subquery ))
{
$sql_array [ 'FROM' ][ SEARCH_WORDLIST_TABLE ][] = 'w' . $w_num ;
$sql_where [] = " w $w_num .word_text LIKE $subquery " ;
$sql_where [] = " m $m_num .word_id = w $w_num .word_id " ;
2005-01-13 22:30:42 +00:00
2006-07-27 19:02:47 +00:00
$group_by = true ;
$w_num ++ ;
}
else
2005-01-13 22:30:42 +00:00
{
2006-07-27 19:02:47 +00:00
$sql_where [] = " m $m_num .word_id = $subquery " ;
}
2007-01-28 13:43:46 +00:00
2006-07-27 19:02:47 +00:00
$sql_array [ 'FROM' ][ SEARCH_WORDMATCH_TABLE ][] = 'm' . $m_num ;
2006-01-11 18:56:07 +00:00
2006-07-27 19:02:47 +00:00
if ( $title_match )
{
$sql_where [] = " m $m_num . $title_match " ;
}
2006-01-11 18:56:07 +00:00
2006-07-27 19:02:47 +00:00
if ( $m_num != 0 )
{
$sql_where [] = " m $m_num .post_id = m0.post_id " ;
}
$m_num ++ ;
}
2006-01-11 18:56:07 +00:00
2006-07-27 19:02:47 +00:00
foreach ( $this -> must_not_contain_ids as $key => $subquery )
{
if ( is_string ( $subquery ))
{
$sql_array [ 'LEFT_JOIN' ][] = array (
'FROM' => array ( SEARCH_WORDLIST_TABLE => 'w' . $w_num ),
'ON' => " w $w_num .word_text LIKE $subquery "
);
$this -> must_not_contain_ids [ $key ] = " w $w_num .word_id " ;
$group_by = true ;
$w_num ++ ;
2005-01-13 22:30:42 +00:00
}
2006-01-11 18:56:07 +00:00
}
2006-07-27 19:02:47 +00:00
if ( sizeof ( $this -> must_not_contain_ids ))
2006-01-11 18:56:07 +00:00
{
2006-07-27 19:02:47 +00:00
$sql_array [ 'LEFT_JOIN' ][] = array (
'FROM' => array ( SEARCH_WORDMATCH_TABLE => 'm' . $m_num ),
2012-07-23 15:41:15 +05:30
'ON' => $this -> db -> sql_in_set ( " m $m_num .word_id " , $this -> must_not_contain_ids ) . (( $title_match ) ? " AND m $m_num . $title_match " : '' ) . " AND m $m_num .post_id = m0.post_id "
2006-07-27 19:02:47 +00:00
);
$sql_where [] = " m $m_num .word_id IS NULL " ;
$m_num ++ ;
}
foreach ( $this -> must_exclude_one_ids as $ids )
{
$is_null_joins = array ();
foreach ( $ids as $id )
2005-01-13 22:30:42 +00:00
{
2006-07-27 19:02:47 +00:00
if ( is_string ( $id ))
2006-01-11 18:56:07 +00:00
{
2006-07-27 19:02:47 +00:00
$sql_array [ 'LEFT_JOIN' ][] = array (
'FROM' => array ( SEARCH_WORDLIST_TABLE => 'w' . $w_num ),
'ON' => " w $w_num .word_text LIKE $id "
);
$id = " w $w_num .word_id " ;
$group_by = true ;
$w_num ++ ;
2006-01-11 18:56:07 +00:00
}
2005-01-13 22:30:42 +00:00
2006-07-27 19:02:47 +00:00
$sql_array [ 'LEFT_JOIN' ][] = array (
'FROM' => array ( SEARCH_WORDMATCH_TABLE => 'm' . $m_num ),
2006-08-12 14:58:02 +00:00
'ON' => " m $m_num .word_id = $id AND m $m_num .post_id = m0.post_id " . (( $title_match ) ? " AND m $m_num . $title_match " : '' )
2006-07-27 19:02:47 +00:00
);
$is_null_joins [] = " m $m_num .word_id IS NULL " ;
$m_num ++ ;
2005-01-13 22:30:42 +00:00
}
2006-07-27 19:02:47 +00:00
$sql_where [] = '(' . implode ( ' OR ' , $is_null_joins ) . ')' ;
2006-01-11 18:56:07 +00:00
}
2012-08-30 22:20:52 +02:00
$sql_where [] = $post_visibility ;
2006-07-27 19:02:47 +00:00
if ( $topic_id )
2006-01-11 18:56:07 +00:00
{
2006-07-27 19:02:47 +00:00
$sql_where [] = 'p.topic_id = ' . $topic_id ;
2006-01-11 18:56:07 +00:00
}
2005-01-13 22:30:42 +00:00
2006-07-27 19:02:47 +00:00
if ( sizeof ( $author_ary ))
{
2009-07-02 10:28:32 +00:00
if ( $author_name )
{
// first one matches post of registered users, second one guests and deleted users
2012-07-23 15:41:15 +05:30
$sql_author = '(' . $this -> db -> sql_in_set ( 'p.poster_id' , array_diff ( $author_ary , array ( ANONYMOUS )), false , true ) . ' OR p.post_username ' . $author_name . ')' ;
2009-07-02 10:28:32 +00:00
}
else
{
2012-07-23 15:41:15 +05:30
$sql_author = $this -> db -> sql_in_set ( 'p.poster_id' , $author_ary );
2009-07-02 10:28:32 +00:00
}
$sql_where [] = $sql_author ;
2006-07-27 19:02:47 +00:00
}
if ( sizeof ( $ex_fid_ary ))
{
2012-07-23 15:41:15 +05:30
$sql_where [] = $this -> db -> sql_in_set ( 'p.forum_id' , $ex_fid_ary , true );
2006-07-27 19:02:47 +00:00
}
if ( $sort_days )
{
$sql_where [] = 'p.post_time >= ' . ( time () - ( $sort_days * 86400 ));
}
$sql_array [ 'WHERE' ] = implode ( ' AND ' , $sql_where );
2005-01-13 22:30:42 +00:00
2006-07-27 19:02:47 +00:00
$is_mysql = false ;
// if the total result count is not cached yet, retrieve it from the db
if ( ! $total_results )
2006-01-11 18:56:07 +00:00
{
2006-07-27 19:02:47 +00:00
$sql = '' ;
$sql_array_count = $sql_array ;
2009-04-18 17:46:34 +00:00
if ( $left_join_topics )
{
$sql_array_count [ 'LEFT_JOIN' ][] = array (
'FROM' => array ( TOPICS_TABLE => 't' ),
'ON' => 'p.topic_id = t.topic_id'
);
}
2014-06-26 17:17:35 +02:00
switch ( $this -> db -> get_sql_layer ())
2005-01-13 22:30:42 +00:00
{
2006-07-27 19:02:47 +00:00
case 'mysql4' :
case 'mysqli' :
2006-10-14 14:56:46 +00:00
2006-09-17 22:02:28 +00:00
// 3.x does not support SQL_CALC_FOUND_ROWS
2009-04-11 13:49:32 +00:00
// $sql_array['SELECT'] = 'SQL_CALC_FOUND_ROWS ' . $sql_array['SELECT'];
2006-10-14 14:56:46 +00:00
$is_mysql = true ;
2006-09-17 22:02:28 +00:00
2006-10-14 14:56:46 +00:00
break ;
2006-07-27 19:02:47 +00:00
case 'sqlite' :
2013-11-03 21:58:05 -06:00
case 'sqlite3' :
2006-07-27 19:02:47 +00:00
$sql_array_count [ 'SELECT' ] = ( $type == 'posts' ) ? 'DISTINCT p.post_id' : 'DISTINCT p.topic_id' ;
$sql = 'SELECT COUNT(' . (( $type == 'posts' ) ? 'post_id' : 'topic_id' ) . ' ) as total_results
2012-07-23 15:41:15 +05:30
FROM ( ' . $this->db->sql_build_query(' SELECT ', $sql_array_count) . ' ) ' ;
2006-10-14 14:56:46 +00:00
2006-07-27 19:02:47 +00:00
// no break
2006-10-14 14:56:46 +00:00
2006-07-27 19:02:47 +00:00
default :
$sql_array_count [ 'SELECT' ] = ( $type == 'posts' ) ? 'COUNT(DISTINCT p.post_id) AS total_results' : 'COUNT(DISTINCT p.topic_id) AS total_results' ;
2012-07-23 15:41:15 +05:30
$sql = ( ! $sql ) ? $this -> db -> sql_build_query ( 'SELECT' , $sql_array_count ) : $sql ;
2007-01-28 13:43:46 +00:00
2012-07-23 15:41:15 +05:30
$result = $this -> db -> sql_query ( $sql );
$total_results = ( int ) $this -> db -> sql_fetchfield ( 'total_results' );
$this -> db -> sql_freeresult ( $result );
2007-01-28 13:43:46 +00:00
2006-07-27 19:02:47 +00:00
if ( ! $total_results )
{
return false ;
}
break ;
}
unset ( $sql_array_count , $sql );
}
// Build sql strings for sorting
$sql_sort = $sort_by_sql [ $sort_key ] . (( $sort_dir == 'a' ) ? ' ASC' : ' DESC' );
switch ( $sql_sort [ 0 ])
{
case 'u' :
$sql_array [ 'FROM' ][ USERS_TABLE ] = 'u' ;
$sql_where [] = 'u.user_id = p.poster_id ' ;
break ;
case 't' :
2008-12-04 16:56:56 +00:00
$left_join_topics = true ;
2006-07-27 19:02:47 +00:00
break ;
2005-01-13 22:30:42 +00:00
2006-07-27 19:02:47 +00:00
case 'f' :
$sql_array [ 'FROM' ][ FORUMS_TABLE ] = 'f' ;
$sql_where [] = 'f.forum_id = p.forum_id' ;
break ;
2005-01-13 22:30:42 +00:00
}
2009-04-11 11:09:45 +00:00
2008-12-04 16:56:56 +00:00
if ( $left_join_topics )
{
2009-04-18 17:42:31 +00:00
$sql_array [ 'LEFT_JOIN' ][] = array (
2008-12-04 17:01:39 +00:00
'FROM' => array ( TOPICS_TABLE => 't' ),
2008-12-04 16:56:56 +00:00
'ON' => 'p.topic_id = t.topic_id'
);
}
2006-01-11 18:56:07 +00:00
2015-03-06 11:07:09 +00:00
// if using mysql and the total result count is not calculated yet, get it from the db
2015-03-07 14:57:42 +00:00
if ( ! $total_results && $is_mysql )
2015-03-06 11:07:09 +00:00
{
// Also count rows for the query as if there was not LIMIT. Add SQL_CALC_FOUND_ROWS to SQL
$sql_array [ 'SELECT' ] = 'SQL_CALC_FOUND_ROWS ' . $sql_array [ 'SELECT' ];
}
2006-07-27 19:02:47 +00:00
$sql_array [ 'WHERE' ] = implode ( ' AND ' , $sql_where );
$sql_array [ 'GROUP_BY' ] = ( $group_by ) ? (( $type == 'posts' ) ? 'p.post_id' : 'p.topic_id' ) . ', ' . $sort_by_sql [ $sort_key ] : '' ;
$sql_array [ 'ORDER_BY' ] = $sql_sort ;
unset ( $sql_where , $sql_sort , $group_by );
2012-07-23 15:41:15 +05:30
$sql = $this -> db -> sql_build_query ( 'SELECT' , $sql_array );
$result = $this -> db -> sql_query_limit ( $sql , $this -> config [ 'search_block_size' ], $start );
2006-07-27 19:02:47 +00:00
2012-07-23 15:41:15 +05:30
while ( $row = $this -> db -> sql_fetchrow ( $result ))
2006-01-11 18:56:07 +00:00
{
2010-01-11 23:26:56 +00:00
$id_ary [] = ( int ) $row [(( $type == 'posts' ) ? 'post_id' : 'topic_id' )];
2006-01-11 18:56:07 +00:00
}
2012-07-23 15:41:15 +05:30
$this -> db -> sql_freeresult ( $result );
2006-01-11 18:56:07 +00:00
2009-04-18 17:42:31 +00:00
if ( ! $total_results && $is_mysql )
2006-07-27 19:02:47 +00:00
{
2015-03-06 11:07:09 +00:00
// Get the number of results as calculated by MySQL
2012-12-21 19:10:48 +05:30
$sql_count = 'SELECT FOUND_ROWS() as total_results' ;
$result = $this -> db -> sql_query ( $sql_count );
2012-07-23 15:41:15 +05:30
$total_results = ( int ) $this -> db -> sql_fetchfield ( 'total_results' );
$this -> db -> sql_freeresult ( $result );
2006-07-27 19:02:47 +00:00
if ( ! $total_results )
{
return false ;
}
}
2006-01-11 18:56:07 +00:00
2012-12-21 19:10:48 +05:30
if ( $start >= $total_results )
{
$start = floor (( $total_results - 1 ) / $per_page ) * $per_page ;
$result = $this -> db -> sql_query_limit ( $sql , $this -> config [ 'search_block_size' ], $start );
while ( $row = $this -> db -> sql_fetchrow ( $result ))
{
$id_ary [] = ( int ) $row [(( $type == 'posts' ) ? 'post_id' : 'topic_id' )];
}
$this -> db -> sql_freeresult ( $result );
}
2006-01-11 18:56:07 +00:00
// store the ids, from start on then delete anything that isn't on the current page because we only need ids for one page
2006-07-27 19:02:47 +00:00
$this -> save_ids ( $search_key , $this -> search_query , $author_ary , $total_results , $id_ary , $start , $sort_dir );
2006-01-11 18:56:07 +00:00
$id_ary = array_slice ( $id_ary , 0 , ( int ) $per_page );
2006-07-27 19:02:47 +00:00
return $total_results ;
2006-01-11 18:56:07 +00:00
}
/**
* Performs a search on an author ' s posts without caring about message contents . Depends on display specific params
*
2006-07-27 19:02:47 +00:00
* @ param string $type contains either posts or topics depending on what should be searched for
2007-07-23 17:13:44 +00:00
* @ param boolean $firstpost_only if true , only topic starting posts will be considered
2009-07-02 10:28:32 +00:00
* @ param array $sort_by_sql contains SQL code for the ORDER BY part of a query
* @ param string $sort_key is the key of $sort_by_sql for the selected sorting
* @ param string $sort_dir is either a or d representing ASC and DESC
* @ param string $sort_days specifies the maximum amount of days a post may be old
* @ param array $ex_fid_ary specifies an array of forum ids which should not be searched
2012-08-30 22:20:52 +02:00
* @ param string $post_visibility specifies which types of posts the user can view in which forums
2009-07-02 10:28:32 +00:00
* @ param int $topic_id is set to 0 or a topic id , if it is not 0 then only posts in this topic should be searched
* @ param array $author_ary an array of author ids
* @ param string $author_name specifies the author match , when ANONYMOUS is also a search - match
2006-11-17 19:37:57 +00:00
* @ param array & $id_ary passed by reference , to be filled with ids for the page specified by $start and $per_page , should be ordered
2006-07-27 19:02:47 +00:00
* @ param int $start indicates the first index of the page
* @ param int $per_page number of ids each page is supposed to contain
* @ return boolean | int total number of results
2006-01-11 18:56:07 +00:00
*/
2013-03-05 18:23:13 +01:00
public function author_search ( $type , $firstpost_only , $sort_by_sql , $sort_key , $sort_dir , $sort_days , $ex_fid_ary , $post_visibility , $topic_id , $author_ary , $author_name , & $id_ary , & $start , $per_page )
2006-01-11 18:56:07 +00:00
{
2012-08-15 23:49:51 +05:30
// No author? No posts
2006-01-11 18:56:07 +00:00
if ( ! sizeof ( $author_ary ))
{
return 0 ;
}
// generate a search_key from all the options to identify the results
$search_key = md5 ( implode ( '#' , array (
'' ,
$type ,
2007-07-23 17:03:37 +00:00
( $firstpost_only ) ? 'firstpost' : '' ,
2006-01-11 18:56:07 +00:00
'' ,
'' ,
$sort_days ,
$sort_key ,
$topic_id ,
implode ( ',' , $ex_fid_ary ),
2012-08-30 19:47:16 +02:00
$post_visibility ,
2009-07-02 10:28:32 +00:00
implode ( ',' , $author_ary ),
$author_name ,
2006-01-11 18:56:07 +00:00
)));
// try reading the results from cache
2006-07-27 19:02:47 +00:00
$total_results = 0 ;
if ( $this -> obtain_ids ( $search_key , $total_results , $id_ary , $start , $per_page , $sort_dir ) == SEARCH_RESULT_IN_CACHE )
2005-01-13 22:30:42 +00:00
{
2006-07-27 19:02:47 +00:00
return $total_results ;
2006-01-11 18:56:07 +00:00
}
$id_ary = array ();
// Create some display specific sql strings
2009-07-02 10:28:32 +00:00
if ( $author_name )
{
// first one matches post of registered users, second one guests and deleted users
2012-07-23 15:41:15 +05:30
$sql_author = '(' . $this -> db -> sql_in_set ( 'p.poster_id' , array_diff ( $author_ary , array ( ANONYMOUS )), false , true ) . ' OR p.post_username ' . $author_name . ')' ;
2009-07-02 10:28:32 +00:00
}
else
{
2012-07-23 15:41:15 +05:30
$sql_author = $this -> db -> sql_in_set ( 'p.poster_id' , $author_ary );
2009-07-02 10:28:32 +00:00
}
2012-07-23 15:41:15 +05:30
$sql_fora = ( sizeof ( $ex_fid_ary )) ? ' AND ' . $this -> db -> sql_in_set ( 'p.forum_id' , $ex_fid_ary , true ) : '' ;
2006-01-21 22:57:42 +00:00
$sql_time = ( $sort_days ) ? ' AND p.post_time >= ' . ( time () - ( $sort_days * 86400 )) : '' ;
2006-03-06 18:21:54 +00:00
$sql_topic_id = ( $topic_id ) ? ' AND p.topic_id = ' . ( int ) $topic_id : '' ;
2007-07-23 17:03:37 +00:00
$sql_firstpost = ( $firstpost_only ) ? ' AND p.post_id = t.topic_first_post_id' : '' ;
2012-10-05 17:46:29 +02:00
$post_visibility = ( $post_visibility ) ? ' AND ' . $post_visibility : '' ;
2006-01-11 18:56:07 +00:00
// Build sql strings for sorting
$sql_sort = $sort_by_sql [ $sort_key ] . (( $sort_dir == 'a' ) ? ' ASC' : ' DESC' );
$sql_sort_table = $sql_sort_join = '' ;
switch ( $sql_sort [ 0 ])
{
case 'u' :
$sql_sort_table = USERS_TABLE . ' u, ' ;
2006-01-21 22:57:42 +00:00
$sql_sort_join = ' AND u.user_id = p.poster_id ' ;
2006-01-22 19:13:12 +00:00
break ;
2006-01-11 18:56:07 +00:00
case 't' :
2008-06-04 17:25:50 +00:00
$sql_sort_table = ( $type == 'posts' && ! $firstpost_only ) ? TOPICS_TABLE . ' t, ' : '' ;
$sql_sort_join = ( $type == 'posts' && ! $firstpost_only ) ? ' AND t.topic_id = p.topic_id ' : '' ;
2006-01-22 19:13:12 +00:00
break ;
2006-01-11 18:56:07 +00:00
case 'f' :
$sql_sort_table = FORUMS_TABLE . ' f, ' ;
$sql_sort_join = ' AND f.forum_id = p.forum_id ' ;
2006-01-22 19:13:12 +00:00
break ;
2006-01-11 18:56:07 +00:00
}
2005-10-02 16:48:17 +00:00
2006-07-27 19:02:47 +00:00
$select = ( $type == 'posts' ) ? 'p.post_id' : 't.topic_id' ;
$is_mysql = false ;
2006-01-11 18:56:07 +00:00
// If the cache was completely empty count the results
2006-07-27 19:02:47 +00:00
if ( ! $total_results )
2006-01-11 18:56:07 +00:00
{
2014-06-26 17:17:35 +02:00
switch ( $this -> db -> get_sql_layer ())
2005-01-13 22:30:42 +00:00
{
2006-07-27 19:02:47 +00:00
case 'mysql4' :
case 'mysqli' :
2009-04-11 13:49:32 +00:00
// $select = 'SQL_CALC_FOUND_ROWS ' . $select;
2006-07-27 19:02:47 +00:00
$is_mysql = true ;
break ;
2005-01-13 22:30:42 +00:00
2006-07-27 19:02:47 +00:00
default :
if ( $type == 'posts' )
{
$sql = ' SELECT COUNT ( p . post_id ) as total_results
2007-07-23 17:03:37 +00:00
FROM ' . POSTS_TABLE . ' p ' . (($firstpost_only) ? ' , ' . TOPICS_TABLE . ' t ' : ' ' ) . "
2006-07-27 19:02:47 +00:00
WHERE $sql_author
$sql_topic_id
2007-07-23 17:03:37 +00:00
$sql_firstpost
2012-08-30 19:47:16 +02:00
$post_visibility
2006-07-27 19:02:47 +00:00
$sql_fora
$sql_time " ;
}
else
{
2014-06-26 17:17:35 +02:00
if ( $this -> db -> get_sql_layer () == 'sqlite' || $this -> db -> get_sql_layer () == 'sqlite3' )
2006-07-27 19:02:47 +00:00
{
$sql = ' SELECT COUNT ( topic_id ) as total_results
FROM ( SELECT DISTINCT t . topic_id ' ;
}
else
{
$sql = 'SELECT COUNT(DISTINCT t.topic_id) as total_results' ;
}
2006-08-01 16:14:14 +00:00
$sql .= ' FROM ' . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
2006-07-27 19:02:47 +00:00
WHERE $sql_author
$sql_topic_id
2007-07-23 17:03:37 +00:00
$sql_firstpost
2012-08-30 19:47:16 +02:00
$post_visibility
2006-07-27 19:02:47 +00:00
$sql_fora
AND t . topic_id = p . topic_id
2014-06-26 17:17:35 +02:00
$sql_time " . (( $this->db ->get_sql_layer() == 'sqlite' || $this->db ->get_sql_layer() == 'sqlite3') ? ')' : '');
2006-07-27 19:02:47 +00:00
}
2012-07-23 15:41:15 +05:30
$result = $this -> db -> sql_query ( $sql );
2007-01-28 13:43:46 +00:00
2012-07-23 15:41:15 +05:30
$total_results = ( int ) $this -> db -> sql_fetchfield ( 'total_results' );
$this -> db -> sql_freeresult ( $result );
2007-01-28 13:43:46 +00:00
2006-07-27 19:02:47 +00:00
if ( ! $total_results )
{
return false ;
}
break ;
2005-01-13 22:30:42 +00:00
}
}
2006-01-11 18:56:07 +00:00
// Build the query for really selecting the post_ids
if ( $type == 'posts' )
{
2006-07-27 19:02:47 +00:00
$sql = " SELECT $select
2008-06-04 17:25:50 +00:00
FROM " . $sql_sort_table . POSTS_TABLE . ' p' . (( $firstpost_only ) ? ', ' . TOPICS_TABLE . ' t' : '') . "
2006-01-11 18:56:07 +00:00
WHERE $sql_author
2006-03-06 18:21:54 +00:00
$sql_topic_id
2007-07-23 17:03:37 +00:00
$sql_firstpost
2012-08-30 19:47:16 +02:00
$post_visibility
2006-01-11 18:56:07 +00:00
$sql_fora
$sql_sort_join
$sql_time
ORDER BY $sql_sort " ;
$field = 'post_id' ;
}
else
{
2006-07-27 19:02:47 +00:00
$sql = " SELECT $select
FROM " . $sql_sort_table . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
2006-01-11 18:56:07 +00:00
WHERE $sql_author
2006-03-06 18:21:54 +00:00
$sql_topic_id
2007-07-23 17:03:37 +00:00
$sql_firstpost
2012-08-30 19:47:16 +02:00
$post_visibility
2006-01-11 18:56:07 +00:00
$sql_fora
AND t . topic_id = p . topic_id
$sql_sort_join
$sql_time
2006-03-26 01:36:26 +00:00
GROUP BY t . topic_id , " . $sort_by_sql[$sort_key] . '
ORDER BY ' . $sql_sort ;
2006-01-11 18:56:07 +00:00
$field = 'topic_id' ;
}
// Only read one block of posts from the db and then cache it
2012-07-23 15:41:15 +05:30
$result = $this -> db -> sql_query_limit ( $sql , $this -> config [ 'search_block_size' ], $start );
2006-01-11 18:56:07 +00:00
2012-07-23 15:41:15 +05:30
while ( $row = $this -> db -> sql_fetchrow ( $result ))
2006-01-11 18:56:07 +00:00
{
2010-01-11 23:26:56 +00:00
$id_ary [] = ( int ) $row [ $field ];
2006-01-11 18:56:07 +00:00
}
2012-07-23 15:41:15 +05:30
$this -> db -> sql_freeresult ( $result );
2006-01-11 18:56:07 +00:00
2006-07-27 19:02:47 +00:00
if ( ! $total_results && $is_mysql )
{
2009-04-11 13:49:32 +00:00
// Count rows for the executed queries. Replace $select within $sql with SQL_CALC_FOUND_ROWS, and run it.
2012-12-21 19:11:20 +05:30
$sql_calc = str_replace ( 'SELECT ' . $select , 'SELECT DISTINCT SQL_CALC_FOUND_ROWS p.post_id' , $sql );
2009-04-11 13:49:32 +00:00
2015-06-07 15:04:34 +02:00
$result = $this -> db -> sql_query ( $sql_calc );
2012-07-23 15:41:15 +05:30
$this -> db -> sql_freeresult ( $result );
2009-04-11 13:49:32 +00:00
2012-12-21 19:11:20 +05:30
$sql_count = 'SELECT FOUND_ROWS() as total_results' ;
$result = $this -> db -> sql_query ( $sql_count );
2012-07-23 15:41:15 +05:30
$total_results = ( int ) $this -> db -> sql_fetchfield ( 'total_results' );
$this -> db -> sql_freeresult ( $result );
2006-07-27 19:02:47 +00:00
if ( ! $total_results )
{
return false ;
}
}
2012-12-21 19:11:20 +05:30
if ( $start >= $total_results )
{
$start = floor (( $total_results - 1 ) / $per_page ) * $per_page ;
$result = $this -> db -> sql_query_limit ( $sql , $this -> config [ 'search_block_size' ], $start );
while ( $row = $this -> db -> sql_fetchrow ( $result ))
{
$id_ary [] = ( int ) $row [ $field ];
}
$this -> db -> sql_freeresult ( $result );
}
2006-01-11 18:56:07 +00:00
if ( sizeof ( $id_ary ))
{
2006-07-27 19:02:47 +00:00
$this -> save_ids ( $search_key , '' , $author_ary , $total_results , $id_ary , $start , $sort_dir );
2006-01-11 18:56:07 +00:00
$id_ary = array_slice ( $id_ary , 0 , $per_page );
2006-07-27 19:02:47 +00:00
return $total_results ;
2006-01-11 18:56:07 +00:00
}
2005-01-13 22:30:42 +00:00
return false ;
}
2006-01-11 18:56:07 +00:00
2006-07-27 19:02:47 +00:00
/**
* Split a text into words of a given length
*
* The text is converted to UTF - 8 , cleaned up , and split . Then , words that
* conform to the defined length range are returned in an array .
*
* NOTE : duplicates are NOT removed from the return array
*
2006-09-17 22:02:28 +00:00
* @ param string $text Text to split , encoded in UTF - 8
2006-07-27 19:02:47 +00:00
* @ return array Array of UTF - 8 words
*/
2012-08-10 12:23:25 +05:30
public function split_message ( $text )
2006-07-27 19:02:47 +00:00
{
$match = $words = array ();
/**
* Taken from the original code
*/
// Do not index code
$match [] = '#\[code(?:=.*?)?(\:?[0-9a-z]{5,})\].*?\[\/code(\:?[0-9a-z]{5,})\]#is' ;
// BBcode
2006-08-28 17:20:21 +00:00
$match [] = '#\[\/?[a-z0-9\*\+\-]+(?:=.*?)?(?::[a-z])?(\:?[0-9a-z]{5,})\]#' ;
2006-07-27 19:02:47 +00:00
2007-01-13 22:32:03 +00:00
$min = $this -> word_length [ 'min' ];
$max = $this -> word_length [ 'max' ];
2006-07-27 19:02:47 +00:00
$isset_min = $min - 1 ;
/**
* Clean up the string , remove HTML tags , remove BBCodes
*/
2006-09-17 22:02:28 +00:00
$word = strtok ( $this -> cleanup ( preg_replace ( $match , ' ' , strip_tags ( $text )), - 1 ), ' ' );
2006-07-27 19:02:47 +00:00
2006-12-23 19:06:53 +00:00
while ( strlen ( $word ))
2006-07-27 19:02:47 +00:00
{
2006-12-23 19:06:53 +00:00
if ( strlen ( $word ) > 255 || strlen ( $word ) <= $isset_min )
2006-07-27 19:02:47 +00:00
{
/**
2006-10-01 08:48:32 +00:00
* Words longer than 255 bytes are ignored . This will have to be
2006-07-27 19:02:47 +00:00
* changed whenever we change the length of search_wordlist . word_text
*
* Words shorter than $isset_min bytes are ignored , too
*/
$word = strtok ( ' ' );
continue ;
}
$len = utf8_strlen ( $word );
/**
* Test whether the word is too short to be indexed .
*
* Note that this limit does NOT apply to CJK and Hangul
*/
if ( $len < $min )
{
/**
* Note : this could be optimized . If the codepoint is lower than Hangul ' s range
* we know that it will also be lower than CJK ranges
*/
2013-11-20 13:47:31 +01:00
if (( strncmp ( $word , self :: UTF8_HANGUL_FIRST , 3 ) < 0 || strncmp ( $word , self :: UTF8_HANGUL_LAST , 3 ) > 0 )
&& ( strncmp ( $word , self :: UTF8_CJK_FIRST , 3 ) < 0 || strncmp ( $word , self :: UTF8_CJK_LAST , 3 ) > 0 )
&& ( strncmp ( $word , self :: UTF8_CJK_B_FIRST , 4 ) < 0 || strncmp ( $word , self :: UTF8_CJK_B_LAST , 4 ) > 0 ))
2006-07-27 19:02:47 +00:00
{
$word = strtok ( ' ' );
continue ;
}
}
$words [] = $word ;
$word = strtok ( ' ' );
}
return $words ;
}
2006-01-11 18:56:07 +00:00
/**
* Updates wordlist and wordmatch tables when a message is posted or changed
*
2006-07-27 19:02:47 +00:00
* @ param string $mode Contains the post mode : edit , post , reply , quote
* @ param int $post_id The id of the post which is modified / created
2006-11-17 19:37:57 +00:00
* @ param string & $message New or updated post content
* @ param string & $subject New or updated post subject
2006-07-27 19:02:47 +00:00
* @ param int $poster_id Post author ' s user id
* @ param int $forum_id The id of the forum in which the post is located
2006-01-11 18:56:07 +00:00
*/
2012-08-10 12:23:25 +05:30
public function index ( $mode , $post_id , & $message , & $subject , $poster_id , $forum_id )
2006-01-11 18:56:07 +00:00
{
2012-07-23 15:41:15 +05:30
if ( ! $this -> config [ 'fulltext_native_load_upd' ])
2006-01-11 18:56:07 +00:00
{
2006-07-27 19:02:47 +00:00
/**
* The search indexer is disabled , return
*/
2006-01-11 18:56:07 +00:00
return ;
}
2013-09-19 15:27:03 +02:00
// Split old and new post/subject to obtain array of 'words'
2006-01-11 18:56:07 +00:00
$split_text = $this -> split_message ( $message );
2006-07-27 19:02:47 +00:00
$split_title = $this -> split_message ( $subject );
2006-01-11 18:56:07 +00:00
$cur_words = array ( 'post' => array (), 'title' => array ());
$words = array ();
if ( $mode == 'edit' )
{
$words [ 'add' ][ 'post' ] = array ();
$words [ 'add' ][ 'title' ] = array ();
$words [ 'del' ][ 'post' ] = array ();
$words [ 'del' ][ 'title' ] = array ();
$sql = ' SELECT w . word_id , w . word_text , m . title_match
2006-06-07 19:32:23 +00:00
FROM ' . SEARCH_WORDLIST_TABLE . ' w , ' . SEARCH_WORDMATCH_TABLE . " m
2006-01-11 18:56:07 +00:00
WHERE m . post_id = $post_id
AND w . word_id = m . word_id " ;
2012-07-23 15:41:15 +05:30
$result = $this -> db -> sql_query ( $sql );
2006-01-11 18:56:07 +00:00
2012-07-23 15:41:15 +05:30
while ( $row = $this -> db -> sql_fetchrow ( $result ))
2006-01-11 18:56:07 +00:00
{
$which = ( $row [ 'title_match' ]) ? 'title' : 'post' ;
$cur_words [ $which ][ $row [ 'word_text' ]] = $row [ 'word_id' ];
}
2012-07-23 15:41:15 +05:30
$this -> db -> sql_freeresult ( $result );
2006-01-11 18:56:07 +00:00
$words [ 'add' ][ 'post' ] = array_diff ( $split_text , array_keys ( $cur_words [ 'post' ]));
$words [ 'add' ][ 'title' ] = array_diff ( $split_title , array_keys ( $cur_words [ 'title' ]));
$words [ 'del' ][ 'post' ] = array_diff ( array_keys ( $cur_words [ 'post' ]), $split_text );
$words [ 'del' ][ 'title' ] = array_diff ( array_keys ( $cur_words [ 'title' ]), $split_title );
}
else
{
$words [ 'add' ][ 'post' ] = $split_text ;
$words [ 'add' ][ 'title' ] = $split_title ;
$words [ 'del' ][ 'post' ] = array ();
$words [ 'del' ][ 'title' ] = array ();
}
unset ( $split_text );
unset ( $split_title );
// Get unique words from the above arrays
$unique_add_words = array_unique ( array_merge ( $words [ 'add' ][ 'post' ], $words [ 'add' ][ 'title' ]));
2009-04-11 11:09:45 +00:00
2006-01-11 18:56:07 +00:00
// We now have unique arrays of all words to be added and removed and
// individual arrays of added and removed words for text and title. What
2013-09-16 05:00:18 +02:00
// we need to do now is add the new words (if they don't already exist)
2006-01-11 18:56:07 +00:00
// and then add (or remove) matches between the words and this post
if ( sizeof ( $unique_add_words ))
{
$sql = ' SELECT word_id , word_text
2006-08-12 13:14:39 +00:00
FROM ' . SEARCH_WORDLIST_TABLE . '
2012-07-23 15:41:15 +05:30
WHERE ' . $this->db->sql_in_set(' word_text ' , $unique_add_words );
$result = $this -> db -> sql_query ( $sql );
2006-01-11 18:56:07 +00:00
$word_ids = array ();
2012-07-23 15:41:15 +05:30
while ( $row = $this -> db -> sql_fetchrow ( $result ))
2006-01-11 18:56:07 +00:00
{
$word_ids [ $row [ 'word_text' ]] = $row [ 'word_id' ];
}
2012-07-23 15:41:15 +05:30
$this -> db -> sql_freeresult ( $result );
2006-08-12 13:14:39 +00:00
$new_words = array_diff ( $unique_add_words , array_keys ( $word_ids ));
2006-01-11 18:56:07 +00:00
2012-07-23 15:41:15 +05:30
$this -> db -> sql_transaction ( 'begin' );
2006-01-11 18:56:07 +00:00
if ( sizeof ( $new_words ))
{
2006-10-14 14:56:46 +00:00
$sql_ary = array ();
2006-01-11 18:56:07 +00:00
2006-10-14 14:56:46 +00:00
foreach ( $new_words as $word )
{
2007-07-27 17:33:27 +00:00
$sql_ary [] = array ( 'word_text' => ( string ) $word , 'word_count' => 0 );
2006-01-11 18:56:07 +00:00
}
2012-07-23 15:41:15 +05:30
$this -> db -> sql_return_on_error ( true );
$this -> db -> sql_multi_insert ( SEARCH_WORDLIST_TABLE , $sql_ary );
$this -> db -> sql_return_on_error ( false );
2006-01-11 18:56:07 +00:00
}
2006-10-14 14:56:46 +00:00
unset ( $new_words , $sql_ary );
2006-01-11 18:56:07 +00:00
}
2007-03-13 22:00:55 +00:00
else
{
2012-07-23 15:41:15 +05:30
$this -> db -> sql_transaction ( 'begin' );
2007-03-13 22:00:55 +00:00
}
2006-01-11 18:56:07 +00:00
2013-09-16 05:00:18 +02:00
// now update the search match table, remove links to removed words and add links to new words
2006-01-11 18:56:07 +00:00
foreach ( $words [ 'del' ] as $word_in => $word_ary )
{
$title_match = ( $word_in == 'title' ) ? 1 : 0 ;
if ( sizeof ( $word_ary ))
{
$sql_in = array ();
foreach ( $word_ary as $word )
{
$sql_in [] = $cur_words [ $word_in ][ $word ];
}
2006-06-07 19:32:23 +00:00
$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
2012-07-23 15:41:15 +05:30
WHERE ' . $this->db->sql_in_set(' word_id ', $sql_in) . '
2006-01-11 18:56:07 +00:00
AND post_id = ' . intval ( $post_id ) . "
AND title_match = $title_match " ;
2012-07-23 15:41:15 +05:30
$this -> db -> sql_query ( $sql );
2007-03-13 22:00:55 +00:00
$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
SET word_count = word_count - 1
2012-07-23 15:41:15 +05:30
WHERE ' . $this->db->sql_in_set(' word_id ', $sql_in) . '
2007-04-21 03:19:23 +00:00
AND word_count > 0 ' ;
2012-07-23 15:41:15 +05:30
$this -> db -> sql_query ( $sql );
2007-03-13 22:00:55 +00:00
2006-01-11 18:56:07 +00:00
unset ( $sql_in );
}
}
2012-07-23 15:41:15 +05:30
$this -> db -> sql_return_on_error ( true );
2006-01-11 18:56:07 +00:00
foreach ( $words [ 'add' ] as $word_in => $word_ary )
{
$title_match = ( $word_in == 'title' ) ? 1 : 0 ;
if ( sizeof ( $word_ary ))
{
2007-07-27 17:33:27 +00:00
$sql = 'INSERT INTO ' . SEARCH_WORDMATCH_TABLE . ' ( post_id , word_id , title_match )
SELECT ' . (int) $post_id . ' , word_id , ' . (int) $title_match . '
FROM ' . SEARCH_WORDLIST_TABLE . '
2012-07-23 15:41:15 +05:30
WHERE ' . $this->db->sql_in_set(' word_text ' , $word_ary );
$this -> db -> sql_query ( $sql );
2007-03-13 22:00:55 +00:00
$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
SET word_count = word_count + 1
2012-07-23 15:41:15 +05:30
WHERE ' . $this->db->sql_in_set(' word_text ' , $word_ary );
$this -> db -> sql_query ( $sql );
2006-01-11 18:56:07 +00:00
}
}
2012-07-23 15:41:15 +05:30
$this -> db -> sql_return_on_error ( false );
2006-01-11 18:56:07 +00:00
2012-07-23 15:41:15 +05:30
$this -> db -> sql_transaction ( 'commit' );
2007-03-13 22:00:55 +00:00
2006-01-11 18:56:07 +00:00
// destroy cached search results containing any of the words removed or added
2006-12-27 00:06:39 +00:00
$this -> destroy_cache ( array_unique ( array_merge ( $words [ 'add' ][ 'post' ], $words [ 'add' ][ 'title' ], $words [ 'del' ][ 'post' ], $words [ 'del' ][ 'title' ])), array ( $poster_id ));
2006-01-11 18:56:07 +00:00
unset ( $unique_add_words );
unset ( $words );
unset ( $cur_words );
}
/**
* Removes entries from the wordmatch table for the specified post_ids
*/
2012-08-10 12:23:25 +05:30
public function index_remove ( $post_ids , $author_ids , $forum_ids )
2006-01-11 18:56:07 +00:00
{
2006-08-12 13:14:39 +00:00
if ( sizeof ( $post_ids ))
{
2007-03-14 20:54:41 +00:00
$sql = ' SELECT w . word_id , w . word_text , m . title_match
2007-03-13 22:00:55 +00:00
FROM ' . SEARCH_WORDMATCH_TABLE . ' m , ' . SEARCH_WORDLIST_TABLE . ' w
2012-07-23 15:41:15 +05:30
WHERE ' . $this->db->sql_in_set(' m . post_id ', $post_ids) . '
2007-03-13 22:00:55 +00:00
AND w . word_id = m . word_id ' ;
2012-07-23 15:41:15 +05:30
$result = $this -> db -> sql_query ( $sql );
2007-03-13 22:00:55 +00:00
$message_word_ids = $title_word_ids = $word_texts = array ();
2012-07-23 15:41:15 +05:30
while ( $row = $this -> db -> sql_fetchrow ( $result ))
2007-03-13 22:00:55 +00:00
{
if ( $row [ 'title_match' ])
{
$title_word_ids [] = $row [ 'word_id' ];
}
else
{
$message_word_ids [] = $row [ 'word_id' ];
}
$word_texts [] = $row [ 'word_text' ];
}
2012-07-23 15:41:15 +05:30
$this -> db -> sql_freeresult ( $result );
2007-03-13 22:00:55 +00:00
2007-03-14 03:19:58 +00:00
if ( sizeof ( $title_word_ids ))
{
$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
SET word_count = word_count - 1
2012-07-23 15:41:15 +05:30
WHERE ' . $this->db->sql_in_set(' word_id ', $title_word_ids) . '
2007-04-21 03:19:23 +00:00
AND word_count > 0 ' ;
2012-07-23 15:41:15 +05:30
$this -> db -> sql_query ( $sql );
2007-03-14 03:19:58 +00:00
}
if ( sizeof ( $message_word_ids ))
{
$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
SET word_count = word_count - 1
2012-07-23 15:41:15 +05:30
WHERE ' . $this->db->sql_in_set(' word_id ', $message_word_ids) . '
2007-04-21 03:19:23 +00:00
AND word_count > 0 ' ;
2012-07-23 15:41:15 +05:30
$this -> db -> sql_query ( $sql );
2007-03-14 03:19:58 +00:00
}
2007-03-13 22:00:55 +00:00
unset ( $title_word_ids );
unset ( $message_word_ids );
2006-08-12 13:14:39 +00:00
$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
2012-07-23 15:41:15 +05:30
WHERE ' . $this->db->sql_in_set(' post_id ' , $post_ids );
$this -> db -> sql_query ( $sql );
2006-08-12 13:14:39 +00:00
}
2006-01-11 18:56:07 +00:00
2012-02-20 02:14:23 +01:00
$this -> destroy_cache ( array_unique ( $word_texts ), array_unique ( $author_ids ));
2006-01-11 18:56:07 +00:00
}
/**
* Tidy up indexes : Tag 'common words' and remove
* words no longer referenced in the match table
*/
2012-08-10 12:23:25 +05:30
public function tidy ()
2006-01-11 18:56:07 +00:00
{
// Is the fulltext indexer disabled? If yes then we need not
// carry on ... it's okay ... I know when I'm not wanted boo hoo
2012-07-23 15:41:15 +05:30
if ( ! $this -> config [ 'fulltext_native_load_upd' ])
2006-01-11 18:56:07 +00:00
{
2015-01-11 17:32:31 +01:00
$this -> config -> set ( 'search_last_gc' , time (), false );
2006-01-11 18:56:07 +00:00
return ;
}
$destroy_cache_words = array ();
2006-12-27 00:06:39 +00:00
// Remove common words
2012-07-23 15:41:15 +05:30
if ( $this -> config [ 'num_posts' ] >= 100 && $this -> config [ 'fulltext_native_common_thres' ])
2006-01-11 18:56:07 +00:00
{
2012-07-23 15:41:15 +05:30
$common_threshold = (( double ) $this -> config [ 'fulltext_native_common_thres' ]) / 100.0 ;
2006-07-11 00:04:02 +00:00
// First, get the IDs of common words
2007-03-13 22:00:55 +00:00
$sql = ' SELECT word_id , word_text
FROM ' . SEARCH_WORDLIST_TABLE . '
2012-07-23 15:41:15 +05:30
WHERE word_count > ' . floor($this->config[' num_posts '] * $common_threshold) . '
2007-03-13 22:00:55 +00:00
OR word_common = 1 ' ;
2012-07-23 15:41:15 +05:30
$result = $this -> db -> sql_query ( $sql );
2006-01-11 18:56:07 +00:00
2006-08-12 13:14:39 +00:00
$sql_in = array ();
2012-07-23 15:41:15 +05:30
while ( $row = $this -> db -> sql_fetchrow ( $result ))
2006-01-11 18:56:07 +00:00
{
2006-08-12 13:14:39 +00:00
$sql_in [] = $row [ 'word_id' ];
2007-03-13 22:00:55 +00:00
$destroy_cache_words [] = $row [ 'word_text' ];
2006-08-12 13:14:39 +00:00
}
2012-07-23 15:41:15 +05:30
$this -> db -> sql_freeresult ( $result );
2006-01-11 18:56:07 +00:00
2006-08-12 13:14:39 +00:00
if ( sizeof ( $sql_in ))
{
2006-07-11 00:04:02 +00:00
// Flag the words
2006-08-12 13:14:39 +00:00
$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
2006-01-11 18:56:07 +00:00
SET word_common = 1
2012-07-23 15:41:15 +05:30
WHERE ' . $this->db->sql_in_set(' word_id ' , $sql_in );
$this -> db -> sql_query ( $sql );
2006-01-11 18:56:07 +00:00
2013-09-16 05:00:18 +02:00
// by setting search_last_gc to the new time here we make sure that if a user reloads because the
2007-03-13 22:00:55 +00:00
// following query takes too long, he won't run into it again
2015-01-11 17:32:31 +01:00
$this -> config -> set ( 'search_last_gc' , time (), false );
2007-03-13 22:00:55 +00:00
2006-07-11 00:04:02 +00:00
// Delete the matches
2006-08-12 13:14:39 +00:00
$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
2012-07-23 15:41:15 +05:30
WHERE ' . $this->db->sql_in_set(' word_id ' , $sql_in );
$this -> db -> sql_query ( $sql );
2006-01-11 18:56:07 +00:00
}
2006-08-12 13:14:39 +00:00
unset ( $sql_in );
2006-01-11 18:56:07 +00:00
}
2007-03-13 22:00:55 +00:00
if ( sizeof ( $destroy_cache_words ))
{
// destroy cached search results containing any of the words that are now common or were removed
$this -> destroy_cache ( array_unique ( $destroy_cache_words ));
}
2006-05-18 18:18:32 +00:00
2015-01-11 17:32:31 +01:00
$this -> config -> set ( 'search_last_gc' , time (), false );
2006-01-11 18:56:07 +00:00
}
2006-03-15 23:20:04 +00:00
/**
* Deletes all words from the index
*/
2012-08-10 12:23:25 +05:30
public function delete_index ( $acp_module , $u_action )
2006-03-15 23:20:04 +00:00
{
2014-06-26 17:17:35 +02:00
switch ( $this -> db -> get_sql_layer ())
2007-02-01 03:13:08 +00:00
{
case 'sqlite' :
2013-11-03 21:58:05 -06:00
case 'sqlite3' :
2012-07-23 15:41:15 +05:30
$this -> db -> sql_query ( 'DELETE FROM ' . SEARCH_WORDLIST_TABLE );
$this -> db -> sql_query ( 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE );
$this -> db -> sql_query ( 'DELETE FROM ' . SEARCH_RESULTS_TABLE );
2007-02-01 03:13:08 +00:00
break ;
default :
2012-07-23 15:41:15 +05:30
$this -> db -> sql_query ( 'TRUNCATE TABLE ' . SEARCH_WORDLIST_TABLE );
$this -> db -> sql_query ( 'TRUNCATE TABLE ' . SEARCH_WORDMATCH_TABLE );
$this -> db -> sql_query ( 'TRUNCATE TABLE ' . SEARCH_RESULTS_TABLE );
2007-02-01 03:13:08 +00:00
break ;
}
2006-03-15 23:20:04 +00:00
}
/**
* Returns true if both FULLTEXT indexes exist
*/
2012-08-10 12:23:25 +05:30
public function index_created ()
2006-03-15 23:20:04 +00:00
{
2006-07-27 19:02:47 +00:00
if ( ! sizeof ( $this -> stats ))
2006-03-15 23:20:04 +00:00
{
$this -> get_stats ();
}
return ( $this -> stats [ 'total_words' ] && $this -> stats [ 'total_matches' ]) ? true : false ;
}
/**
* Returns an associative array containing information about the indexes
*/
2012-08-10 12:23:25 +05:30
public function index_stats ()
2006-03-15 23:20:04 +00:00
{
2006-07-27 19:02:47 +00:00
if ( ! sizeof ( $this -> stats ))
2006-03-15 23:20:04 +00:00
{
$this -> get_stats ();
}
return array (
2012-07-23 15:41:15 +05:30
$this -> user -> lang [ 'TOTAL_WORDS' ] => $this -> stats [ 'total_words' ],
$this -> user -> lang [ 'TOTAL_MATCHES' ] => $this -> stats [ 'total_matches' ]);
2006-03-15 23:20:04 +00:00
}
2012-08-14 17:46:17 +05:30
protected function get_stats ()
2006-03-15 23:20:04 +00:00
{
2012-07-23 15:41:15 +05:30
$this -> stats [ 'total_words' ] = $this -> db -> get_estimated_row_count ( SEARCH_WORDLIST_TABLE );
$this -> stats [ 'total_matches' ] = $this -> db -> get_estimated_row_count ( SEARCH_WORDMATCH_TABLE );
2006-03-15 23:20:04 +00:00
}
2006-07-27 19:02:47 +00:00
/**
* Clean up a text to remove non - alphanumeric characters
*
* This method receives a UTF - 8 string , normalizes and validates it , replaces all
* non - alphanumeric characters with strings then returns the result .
*
* Any number of " allowed chars " can be passed as a UTF - 8 string in NFC .
*
* @ param string $text Text to split , in UTF - 8 ( not normalized or sanitized )
* @ param string $allowed_chars String of special chars to allow
* @ param string $encoding Text encoding
* @ return string Cleaned up text , only alphanumeric chars are left
*/
2012-08-14 17:46:17 +05:30
protected function cleanup ( $text , $allowed_chars = null , $encoding = 'utf-8' )
2006-07-27 19:02:47 +00:00
{
static $conv = array (), $conv_loaded = array ();
$words = $allow = array ();
2006-11-15 15:35:50 +00:00
// Convert the text to UTF-8
2006-07-27 19:02:47 +00:00
$encoding = strtolower ( $encoding );
if ( $encoding != 'utf-8' )
{
$text = utf8_recode ( $text , $encoding );
}
$utf_len_mask = array (
" \xC0 " => 2 ,
" \xD0 " => 2 ,
" \xE0 " => 3 ,
" \xF0 " => 4
);
/**
* Replace HTML entities and NCRs
*/
2006-11-03 21:05:25 +00:00
$text = htmlspecialchars_decode ( utf8_decode_ncr ( $text ), ENT_QUOTES );
2006-07-27 19:02:47 +00:00
/**
2013-11-20 13:47:31 +01:00
* Normalize to NFC
2006-07-27 19:02:47 +00:00
*/
2013-11-20 13:47:31 +01:00
$text = \Normalizer :: normalize ( $text );
2006-07-27 19:02:47 +00:00
/**
* The first thing we do is :
*
* - convert ASCII - 7 letters to lowercase
* - remove the ASCII - 7 non - alpha characters
* - remove the bytes that should not appear in a valid UTF - 8 string : 0xC0 ,
* 0xC1 and 0xF5 - 0xFF
*
* @ todo in theory , the third one is already taken care of during normalization and those chars should have been replaced by Unicode replacement chars
*/
$sb_match = " ISTCPAMELRDOJBNHFGVWUQKYXZ \r \n \t ! \" # $ %&'()*+,-./:;<=>?@[ \\ ]^_` { |}~ \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0B \x0C \x0E \x0F \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1A \x1B \x1C \x1D \x1E \x1F \xC0 \xC1 \xF5 \xF6 \xF7 \xF8 \xF9 \xFA \xFB \xFC \xFD \xFE \xFF " ;
$sb_replace = 'istcpamelrdojbnhfgvwuqkyxz ' ;
/**
* This is the list of legal ASCII chars , it is automatically extended
* with ASCII chars from $allowed_chars
*/
$legal_ascii = ' eaisntroludcpmghbfvq10xy2j9kw354867z' ;
/**
* Prepare an array containing the extra chars to allow
*/
if ( isset ( $allowed_chars [ 0 ]))
{
$pos = 0 ;
$len = strlen ( $allowed_chars );
do
{
$c = $allowed_chars [ $pos ];
if ( $c < " \x80 " )
{
/**
* ASCII char
*/
$sb_pos = strpos ( $sb_match , $c );
if ( is_int ( $sb_pos ))
{
/**
* Remove the char from $sb_match and its corresponding
* replacement in $sb_replace
*/
$sb_match = substr ( $sb_match , 0 , $sb_pos ) . substr ( $sb_match , $sb_pos + 1 );
$sb_replace = substr ( $sb_replace , 0 , $sb_pos ) . substr ( $sb_replace , $sb_pos + 1 );
$legal_ascii .= $c ;
}
++ $pos ;
}
else
{
/**
* UTF - 8 char
*/
$utf_len = $utf_len_mask [ $c & " \xF0 " ];
$allow [ substr ( $allowed_chars , $pos , $utf_len )] = 1 ;
$pos += $utf_len ;
}
}
while ( $pos < $len );
}
$text = strtr ( $text , $sb_match , $sb_replace );
$ret = '' ;
$pos = 0 ;
$len = strlen ( $text );
do
{
/**
* Do all consecutive ASCII chars at once
*/
if ( $spn = strspn ( $text , $legal_ascii , $pos ))
{
$ret .= substr ( $text , $pos , $spn );
$pos += $spn ;
}
if ( $pos >= $len )
{
return $ret ;
}
/**
* Capture the UTF char
*/
$utf_len = $utf_len_mask [ $text [ $pos ] & " \xF0 " ];
$utf_char = substr ( $text , $pos , $utf_len );
$pos += $utf_len ;
2013-11-20 13:47:31 +01:00
if (( $utf_char >= self :: UTF8_HANGUL_FIRST && $utf_char <= self :: UTF8_HANGUL_LAST )
|| ( $utf_char >= self :: UTF8_CJK_FIRST && $utf_char <= self :: UTF8_CJK_LAST )
|| ( $utf_char >= self :: UTF8_CJK_B_FIRST && $utf_char <= self :: UTF8_CJK_B_LAST ))
2006-07-27 19:02:47 +00:00
{
/**
* All characters within these ranges are valid
*
* We separate them with a space in order to index each character
* individually
*/
$ret .= ' ' . $utf_char . ' ' ;
continue ;
}
if ( isset ( $allow [ $utf_char ]))
{
/**
* The char is explicitly allowed
*/
$ret .= $utf_char ;
continue ;
}
if ( isset ( $conv [ $utf_char ]))
{
/**
* The char is mapped to something , maybe to itself actually
*/
$ret .= $conv [ $utf_char ];
continue ;
}
/**
* The char isn ' t mapped , but did we load its conversion table ?
*
* The search indexer table is split into blocks . The block number of
* each char is equal to its codepoint right - shifted for 11 bits . It
* means that out of the 11 , 16 or 21 meaningful bits of a 2 - , 3 - or
* 4 - byte sequence we only keep the leftmost 0 , 5 or 10 bits . Thus ,
* all UTF chars encoded in 2 bytes are in the same first block .
*/
if ( isset ( $utf_char [ 2 ]))
{
if ( isset ( $utf_char [ 3 ]))
{
/**
* 1111 0 nnn 10 nn nnnn 10 nx xxxx 10 xx xxxx
* 0000 0111 0011 1111 0010 0000
*/
$idx = (( ord ( $utf_char [ 0 ]) & 0x07 ) << 7 ) | (( ord ( $utf_char [ 1 ]) & 0x3F ) << 1 ) | (( ord ( $utf_char [ 2 ]) & 0x20 ) >> 5 );
}
else
{
/**
* 1110 nnnn 10 nx xxxx 10 xx xxxx
* 0000 0111 0010 0000
*/
$idx = (( ord ( $utf_char [ 0 ]) & 0x07 ) << 1 ) | (( ord ( $utf_char [ 1 ]) & 0x20 ) >> 5 );
}
}
else
{
/**
* 110 x xxxx 10 xx xxxx
* 0000 0000 0000 0000
*/
$idx = 0 ;
}
/**
* Check if the required conv table has been loaded already
*/
if ( ! isset ( $conv_loaded [ $idx ]))
{
$conv_loaded [ $idx ] = 1 ;
2012-08-08 11:16:46 +05:30
$file = $this -> phpbb_root_path . 'includes/utf/data/search_indexer_' . $idx . '.' . $this -> php_ext ;
2006-07-27 19:02:47 +00:00
if ( file_exists ( $file ))
{
$conv += include ( $file );
}
}
if ( isset ( $conv [ $utf_char ]))
{
$ret .= $conv [ $utf_char ];
}
else
{
/**
* We add an entry to the conversion table so that we
* don ' t have to convert to codepoint and perform the checks
* that are above this block
*/
$conv [ $utf_char ] = ' ' ;
$ret .= ' ' ;
}
}
while ( 1 );
return $ret ;
}
2006-03-15 23:20:04 +00:00
/**
* Returns a list of options for the ACP to display
*/
2012-08-10 12:23:25 +05:30
public function acp ()
2006-03-15 23:20:04 +00:00
{
2006-07-27 19:02:47 +00:00
/**
* if we need any options , copied from fulltext_native for now , will have to be adjusted or removed
*/
2006-03-15 23:20:04 +00:00
$tpl = '
< dl >
2012-11-10 00:30:46 +01:00
< dt >< label for = " fulltext_native_load_upd " > ' . $this->user->lang[' YES_SEARCH_UPDATE '] . $this->user->lang[' COLON '] . ' </ label >< br />< span > ' . $this->user->lang[' YES_SEARCH_UPDATE_EXPLAIN '] . ' </ span ></ dt >
2012-07-23 15:41:15 +05:30
< dd >< label >< input type = " radio " id = " fulltext_native_load_upd " name = " config[fulltext_native_load_upd] " value = " 1 " ' . (($this->config[' fulltext_native_load_upd ']) ? ' checked = " checked " ' : ' ') . ' class = " radio " /> ' . $this->user->lang[' YES '] . ' </ label >< label >< input type = " radio " name = " config[fulltext_native_load_upd] " value = " 0 " ' . ((!$this->config[' fulltext_native_load_upd ']) ? ' checked = " checked " ' : ' ') . ' class = " radio " /> ' . $this->user->lang[' NO '] . ' </ label ></ dd >
2006-03-15 23:20:04 +00:00
</ dl >
< dl >
2012-11-10 00:30:46 +01:00
< dt >< label for = " fulltext_native_min_chars " > ' . $this->user->lang[' MIN_SEARCH_CHARS '] . $this->user->lang[' COLON '] . ' </ label >< br />< span > ' . $this->user->lang[' MIN_SEARCH_CHARS_EXPLAIN '] . ' </ span ></ dt >
2013-05-13 23:20:34 +03:00
< dd >< input id = " fulltext_native_min_chars " type = " number " size = " 3 " maxlength = " 3 " min = " 0 " max = " 255 " name = " config[fulltext_native_min_chars] " value = " ' . (int) $this->config ['fulltext_native_min_chars'] . ' " /></ dd >
2006-03-15 23:20:04 +00:00
</ dl >
< dl >
2012-11-10 00:30:46 +01:00
< dt >< label for = " fulltext_native_max_chars " > ' . $this->user->lang[' MAX_SEARCH_CHARS '] . $this->user->lang[' COLON '] . ' </ label >< br />< span > ' . $this->user->lang[' MAX_SEARCH_CHARS_EXPLAIN '] . ' </ span ></ dt >
2013-05-13 23:20:34 +03:00
< dd >< input id = " fulltext_native_max_chars " type = " number " size = " 3 " maxlength = " 3 " min = " 0 " max = " 255 " name = " config[fulltext_native_max_chars] " value = " ' . (int) $this->config ['fulltext_native_max_chars'] . ' " /></ dd >
2006-03-15 23:20:04 +00:00
</ dl >
2006-12-27 00:06:39 +00:00
< dl >
2012-11-10 00:30:46 +01:00
< dt >< label for = " fulltext_native_common_thres " > ' . $this->user->lang[' COMMON_WORD_THRESHOLD '] . $this->user->lang[' COLON '] . ' </ label >< br />< span > ' . $this->user->lang[' COMMON_WORD_THRESHOLD_EXPLAIN '] . ' </ span ></ dt >
2012-07-23 15:41:15 +05:30
< dd >< input id = " fulltext_native_common_thres " type = " text " size = " 3 " maxlength = " 3 " name = " config[fulltext_native_common_thres] " value = " ' . (double) $this->config ['fulltext_native_common_thres'] . ' " /> %</ dd >
2006-12-27 00:06:39 +00:00
</ dl >
2006-03-15 23:20:04 +00:00
' ;
// These are fields required in the config table
return array (
'tpl' => $tpl ,
2006-12-27 00:06:39 +00:00
'config' => array ( 'fulltext_native_load_upd' => 'bool' , 'fulltext_native_min_chars' => 'integer:0:255' , 'fulltext_native_max_chars' => 'integer:0:255' , 'fulltext_native_common_thres' => 'double:0:100' )
2006-03-15 23:20:04 +00:00
);
}
2005-01-13 22:30:42 +00:00
}