From 0d46c846dabe15bc68d0fa6730048f260d249861 Mon Sep 17 00:00:00 2001 From: mchampan Date: Fri, 8 Sep 2006 01:18:48 +0000 Subject: [PATCH] Fixed couple bugs in query, and improved logic of querylib. --- search/README.txt | 9 +++ search/Zend/IMPORTANT.txt | 7 +- search/Zend/Search/Lucene/Storage/File.php | 77 ++++++++++++------ search/query.php | 7 +- search/querylib.php | 91 ++++++++++++---------- 5 files changed, 121 insertions(+), 70 deletions(-) diff --git a/search/README.txt b/search/README.txt index 379a27277c8..14cc4f79d68 100644 --- a/search/README.txt +++ b/search/README.txt @@ -1,3 +1,12 @@ +2006/09/08 +---------- +Google Summer of Code is finished, spent a couple of weeks away from +the project to think about it and also to take a break. Working on it +now I discovered bugs in the query parser (now fixed), and I also +un-convoluted the querylib logic (well slighlty). + +Updated ZFS files to latest SVN. + 2006/08/21 ---------- Fixed index document count, and created new config variable to store diff --git a/search/Zend/IMPORTANT.txt b/search/Zend/IMPORTANT.txt index d8b80df59e8..c20ea579ee9 100644 --- a/search/Zend/IMPORTANT.txt +++ b/search/Zend/IMPORTANT.txt @@ -1,9 +1,8 @@ We are running cutting-edge (i.e. HEAD) Zend Framework: URL: http://framework.zend.com/svn/framework/trunk - Revision: 924 - Last Changed Rev: 924 - Last Changed Date: 2006-07-27 10:23:04 +0200 (Thu, 27 Jul 2006) - + Revision: 1042 + Last Changed Rev: 1042 + Last Changed Date: 2006-09-07 23:14:50 +0200 (Thu, 07 Sep 2006) This Zend Framework present in this directory only contains the minimum to run Zend_Search_Lucene - I don't foresee any problems, since the license diff --git a/search/Zend/Search/Lucene/Storage/File.php b/search/Zend/Search/Lucene/Storage/File.php index a53c75b7093..5a195ae85f4 100644 --- a/search/Zend/Search/Lucene/Storage/File.php +++ b/search/Zend/Search/Lucene/Storage/File.php @@ -20,6 +20,11 @@ */ + +/** Zend_Search_Lucene_Exception */ +require_once 'Zend/Search/Lucene/Exception.php'; + + /** * @category Zend * @package Zend_Search_Lucene @@ -157,46 +162,74 @@ abstract class Zend_Search_Lucene_Storage_File * and advances the file pointer. * * @return integer + * @throws Zend_Search_Lucene_Exception */ public function readLong() { $str = $this->_fread(8); /** - * PHP uses long as largest integer. fseek() uses long for offset. - * long has 4 bytes in a lot of systems. 4 bytes are discarded to prevent - * conversion to float. - * So, largest index segment file is 2Gb + * Check, that we work in 64-bit mode. + * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb */ - return /* ord($str{0}) << 56 | */ - /* ord($str{1}) << 48 | */ - /* ord($str{2}) << 40 | */ - /* ord($str{3}) << 32 | */ - ord($str{4}) << 24 | - ord($str{5}) << 16 | - ord($str{6}) << 8 | - ord($str{7}); + if (PHP_INT_SIZE > 4) { + return ord($str{0}) << 56 | + ord($str{1}) << 48 | + ord($str{2}) << 40 | + ord($str{3}) << 32 | + ord($str{4}) << 24 | + ord($str{5}) << 16 | + ord($str{6}) << 8 | + ord($str{7}); + } else { + if ((ord($str{0}) != 0) || + (ord($str{1}) != 0) || + (ord($str{2}) != 0) || + (ord($str{3}) != 0) || + ((ord($str{0}) & 0x80) != 0)) { + throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb'); + } + + return ord($str{4}) << 24 | + ord($str{5}) << 16 | + ord($str{6}) << 8 | + ord($str{7}); + } } /** * Writes long integer to the end of file * * @param integer $value + * @throws Zend_Search_Lucene_Exception */ public function writeLong($value) { /** - * PHP uses long as largest integer. fseek() uses long for offset. - * long has 4 bytes in a lot of systems. 4 bytes are discarded to prevent - * conversion to float. - * So, largest index segment file is 2Gb + * Check, that we work in 64-bit mode. + * fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb */ - settype($value, 'integer'); - $this->_fwrite( "\x00\x00\x00\x00" . - chr($value>>24 & 0xFF) . - chr($value>>16 & 0xFF) . - chr($value>>8 & 0xFF) . - chr($value & 0xFF), 8 ); + if (PHP_INT_SIZE > 4) { + settype($value, 'integer'); + $this->_fwrite( chr($value>>56 & 0xFF) . + chr($value>>48 & 0xFF) . + chr($value>>40 & 0xFF) . + chr($value>>32 & 0xFF) . + chr($value>>24 & 0xFF) . + chr($value>>16 & 0xFF) . + chr($value>>8 & 0xFF) . + chr($value & 0xFF), 8 ); + } else { + if ($value > 0x7FFFFFFF) { + throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb'); + } + + $this->_fwrite( "\x00\x00\x00\x00" . + chr($value>>24 & 0xFF) . + chr($value>>16 & 0xFF) . + chr($value>>8 & 0xFF) . + chr($value & 0xFF), 8 ); + } } diff --git a/search/query.php b/search/query.php index 2bd6f5b3c07..0d2b90e7a5c 100644 --- a/search/query.php +++ b/search/query.php @@ -41,6 +41,9 @@ //otherwise we are dealing with a new advanced query unset($_SESSION['search_advanced_query']); session_unregister('search_advanced_query'); + + //chars to strip from strings (whitespace) + $chars = " \t\n\r\0\x0B,-+"; //retrieve advanced query variables $adv->mustappear = trim(optional_param('mustappear', '', PARAM_CLEAN), $chars); @@ -55,8 +58,6 @@ //parse the advanced variables into a query string //TODO: move out to external query class (QueryParse?) - //chars to strip from strings (whitespace) - $chars = ' \t\n\r\0\x0B,;'; $query_string = ''; //get all available module types @@ -105,7 +106,7 @@ } //if //run the query against the index - $sq = new SearchQuery($query_string, $page_number, 10, true); + $sq = new SearchQuery($query_string, $page_number, 10, false); } //if if (!$site = get_site()) { diff --git a/search/querylib.php b/search/querylib.php index 87007ff361d..e435b0dd274 100644 --- a/search/querylib.php +++ b/search/querylib.php @@ -93,7 +93,8 @@ $validquery, $validindex, $results, - $results_per_page; + $results_per_page, + $total_results; public function __construct($term='', $page=1, $results_per_page=10, $cache=false) { global $CFG; @@ -140,37 +141,52 @@ } //set_query public function results() { - if ($this->validquery and $this->validindex) { - return $this->get_subset_results(); - } else { - return array(); - } //else + return $this->results; } //results - - private function get_subset_results() { - if ($this->count() < $this->results_per_page) { - $this->pagenumber = 1; - } else if ($this->pagenumber > $this->total_pages()) { - $this->pagenumber = $this->total_pages(); - } //if - - $start = ($this->pagenumber - 1) * $this->results_per_page; - - return array_slice($this->results, $start, $this->results_per_page); - } //get_results - - private function get_all_results() { + + private function process_results($all=false) { global $USER; + + $term = strtolower($this->term); + + //experimental - return more results + $strip_arr = array('author:', 'title:', '+', '-', 'doctype:'); + $stripped_term = str_replace($strip_arr, '', $term); + + $hits = $this->index->find($term." title:".$stripped_term." author:".$stripped_term); + //-- + + $hitcount = count($hits); + $this->total_results = $hitcount; + + if ($hitcount == 0) return array(); + + $totalpages = ceil($hitcount/$this->results_per_page); + + if (!$all) { + if ($hitcount < $this->results_per_page) { + $this->pagenumber = 1; + } else if ($this->pagenumber > $totalpages) { + $this->pagenumber =$totalpages; + } //if + + $start = ($this->pagenumber - 1) * $this->results_per_page; + $end = $start + $this->results_per_page; + + if ($end > $hitcount) { + $end = $hitcount; + } //if + } else { + $start = 0; + $end = $hitcount; + } //else $resultdoc = new SearchResult(); - $resultdocs = array(); - $i = 0; - - $term = strtolower($this->term); - - $hits = $this->index->find($term." title:".$term." author:".$term); - - foreach ($hits as $hit) { + $resultdocs = array(); + + for ($i = $start; $i < $end; $i++) { + $hit = $hits[$i]; + //check permissions on each result if ($this->can_display($USER, $hit->id, $hit->doctype, $hit->course_id, $hit->group_id)) { $resultdoc->number = $i; @@ -181,21 +197,19 @@ $resultdoc->author = $hit->author; //and store it - $resultdocs[] = clone($resultdoc); - - $i++; + $resultdocs[] = clone($resultdoc); } //if } //foreach - + return $resultdocs; - } //get_all_results + } //process_results private function get_results() { $cache = new SearchCache(); if ($this->cache and $cache->can_cache()) { if (!($resultdocs = $cache->cache($this->term))) { - $resultdocs = $this->get_all_results(); + $resultdocs = $this->process_results(); //cache the results so we don't have to compute this on every page-load $cache->cache($this->term, $resultdocs); //print "Using new results."; @@ -206,7 +220,7 @@ } else { //no caching :( //print "Caching disabled!"; - $resultdocs = $this->get_all_results(); + $resultdocs = $this->process_results(); } //else return $resultdocs; @@ -271,14 +285,9 @@ } //can_display public function count() { - return count($this->results); + return $this->total_results; } //count - //this shouldn't be in this class - //public function index_count() { - // return $this->index->count(); - //} //index_count - public function is_valid() { return ($this->validquery and $this->validindex); } //is_valid