Fixed couple bugs in query, and improved logic of querylib.

This commit is contained in:
mchampan 2006-09-08 01:18:48 +00:00
parent 3a27bb0575
commit 0d46c846da
5 changed files with 121 additions and 70 deletions

View File

@ -1,3 +1,12 @@
2006/09/08
----------
Google Summer of Code is finished, spent a couple of weeks away from
the project to think about it and also to take a break. Working on it
now I discovered bugs in the query parser (now fixed), and I also
un-convoluted the querylib logic (well slighlty).
Updated ZFS files to latest SVN.
2006/08/21
----------
Fixed index document count, and created new config variable to store

View File

@ -1,9 +1,8 @@
We are running cutting-edge (i.e. HEAD) Zend Framework:
URL: http://framework.zend.com/svn/framework/trunk
Revision: 924
Last Changed Rev: 924
Last Changed Date: 2006-07-27 10:23:04 +0200 (Thu, 27 Jul 2006)
Revision: 1042
Last Changed Rev: 1042
Last Changed Date: 2006-09-07 23:14:50 +0200 (Thu, 07 Sep 2006)
This Zend Framework present in this directory only contains the minimum
to run Zend_Search_Lucene - I don't foresee any problems, since the license

View File

@ -20,6 +20,11 @@
*/
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/**
* @category Zend
* @package Zend_Search_Lucene
@ -157,46 +162,74 @@ abstract class Zend_Search_Lucene_Storage_File
* and advances the file pointer.
*
* @return integer
* @throws Zend_Search_Lucene_Exception
*/
public function readLong()
{
$str = $this->_fread(8);
/**
* PHP uses long as largest integer. fseek() uses long for offset.
* long has 4 bytes in a lot of systems. 4 bytes are discarded to prevent
* conversion to float.
* So, largest index segment file is 2Gb
* Check, that we work in 64-bit mode.
* fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
*/
return /* ord($str{0}) << 56 | */
/* ord($str{1}) << 48 | */
/* ord($str{2}) << 40 | */
/* ord($str{3}) << 32 | */
ord($str{4}) << 24 |
ord($str{5}) << 16 |
ord($str{6}) << 8 |
ord($str{7});
if (PHP_INT_SIZE > 4) {
return ord($str{0}) << 56 |
ord($str{1}) << 48 |
ord($str{2}) << 40 |
ord($str{3}) << 32 |
ord($str{4}) << 24 |
ord($str{5}) << 16 |
ord($str{6}) << 8 |
ord($str{7});
} else {
if ((ord($str{0}) != 0) ||
(ord($str{1}) != 0) ||
(ord($str{2}) != 0) ||
(ord($str{3}) != 0) ||
((ord($str{0}) & 0x80) != 0)) {
throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
}
return ord($str{4}) << 24 |
ord($str{5}) << 16 |
ord($str{6}) << 8 |
ord($str{7});
}
}
/**
* Writes long integer to the end of file
*
* @param integer $value
* @throws Zend_Search_Lucene_Exception
*/
public function writeLong($value)
{
/**
* PHP uses long as largest integer. fseek() uses long for offset.
* long has 4 bytes in a lot of systems. 4 bytes are discarded to prevent
* conversion to float.
* So, largest index segment file is 2Gb
* Check, that we work in 64-bit mode.
* fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
*/
settype($value, 'integer');
$this->_fwrite( "\x00\x00\x00\x00" .
chr($value>>24 & 0xFF) .
chr($value>>16 & 0xFF) .
chr($value>>8 & 0xFF) .
chr($value & 0xFF), 8 );
if (PHP_INT_SIZE > 4) {
settype($value, 'integer');
$this->_fwrite( chr($value>>56 & 0xFF) .
chr($value>>48 & 0xFF) .
chr($value>>40 & 0xFF) .
chr($value>>32 & 0xFF) .
chr($value>>24 & 0xFF) .
chr($value>>16 & 0xFF) .
chr($value>>8 & 0xFF) .
chr($value & 0xFF), 8 );
} else {
if ($value > 0x7FFFFFFF) {
throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
}
$this->_fwrite( "\x00\x00\x00\x00" .
chr($value>>24 & 0xFF) .
chr($value>>16 & 0xFF) .
chr($value>>8 & 0xFF) .
chr($value & 0xFF), 8 );
}
}

View File

@ -41,6 +41,9 @@
//otherwise we are dealing with a new advanced query
unset($_SESSION['search_advanced_query']);
session_unregister('search_advanced_query');
//chars to strip from strings (whitespace)
$chars = " \t\n\r\0\x0B,-+";
//retrieve advanced query variables
$adv->mustappear = trim(optional_param('mustappear', '', PARAM_CLEAN), $chars);
@ -55,8 +58,6 @@
//parse the advanced variables into a query string
//TODO: move out to external query class (QueryParse?)
//chars to strip from strings (whitespace)
$chars = ' \t\n\r\0\x0B,;';
$query_string = '';
//get all available module types
@ -105,7 +106,7 @@
} //if
//run the query against the index
$sq = new SearchQuery($query_string, $page_number, 10, true);
$sq = new SearchQuery($query_string, $page_number, 10, false);
} //if
if (!$site = get_site()) {

View File

@ -93,7 +93,8 @@
$validquery,
$validindex,
$results,
$results_per_page;
$results_per_page,
$total_results;
public function __construct($term='', $page=1, $results_per_page=10, $cache=false) {
global $CFG;
@ -140,37 +141,52 @@
} //set_query
public function results() {
if ($this->validquery and $this->validindex) {
return $this->get_subset_results();
} else {
return array();
} //else
return $this->results;
} //results
private function get_subset_results() {
if ($this->count() < $this->results_per_page) {
$this->pagenumber = 1;
} else if ($this->pagenumber > $this->total_pages()) {
$this->pagenumber = $this->total_pages();
} //if
$start = ($this->pagenumber - 1) * $this->results_per_page;
return array_slice($this->results, $start, $this->results_per_page);
} //get_results
private function get_all_results() {
private function process_results($all=false) {
global $USER;
$term = strtolower($this->term);
//experimental - return more results
$strip_arr = array('author:', 'title:', '+', '-', 'doctype:');
$stripped_term = str_replace($strip_arr, '', $term);
$hits = $this->index->find($term." title:".$stripped_term." author:".$stripped_term);
//--
$hitcount = count($hits);
$this->total_results = $hitcount;
if ($hitcount == 0) return array();
$totalpages = ceil($hitcount/$this->results_per_page);
if (!$all) {
if ($hitcount < $this->results_per_page) {
$this->pagenumber = 1;
} else if ($this->pagenumber > $totalpages) {
$this->pagenumber =$totalpages;
} //if
$start = ($this->pagenumber - 1) * $this->results_per_page;
$end = $start + $this->results_per_page;
if ($end > $hitcount) {
$end = $hitcount;
} //if
} else {
$start = 0;
$end = $hitcount;
} //else
$resultdoc = new SearchResult();
$resultdocs = array();
$i = 0;
$term = strtolower($this->term);
$hits = $this->index->find($term." title:".$term." author:".$term);
foreach ($hits as $hit) {
$resultdocs = array();
for ($i = $start; $i < $end; $i++) {
$hit = $hits[$i];
//check permissions on each result
if ($this->can_display($USER, $hit->id, $hit->doctype, $hit->course_id, $hit->group_id)) {
$resultdoc->number = $i;
@ -181,21 +197,19 @@
$resultdoc->author = $hit->author;
//and store it
$resultdocs[] = clone($resultdoc);
$i++;
$resultdocs[] = clone($resultdoc);
} //if
} //foreach
return $resultdocs;
} //get_all_results
} //process_results
private function get_results() {
$cache = new SearchCache();
if ($this->cache and $cache->can_cache()) {
if (!($resultdocs = $cache->cache($this->term))) {
$resultdocs = $this->get_all_results();
$resultdocs = $this->process_results();
//cache the results so we don't have to compute this on every page-load
$cache->cache($this->term, $resultdocs);
//print "Using new results.";
@ -206,7 +220,7 @@
} else {
//no caching :(
//print "Caching disabled!";
$resultdocs = $this->get_all_results();
$resultdocs = $this->process_results();
} //else
return $resultdocs;
@ -271,14 +285,9 @@
} //can_display
public function count() {
return count($this->results);
return $this->total_results;
} //count
//this shouldn't be in this class
//public function index_count() {
// return $this->index->count();
//} //index_count
public function is_valid() {
return ($this->validquery and $this->validindex);
} //is_valid