diff --git a/search/.cvsignore b/search/.cvsignore new file mode 100644 index 00000000000..04fd92e9f34 --- /dev/null +++ b/search/.cvsignore @@ -0,0 +1 @@ +delete_log.php diff --git a/search/README.txt b/search/README.txt index 0737770d362..4841d3e4863 100644 --- a/search/README.txt +++ b/search/README.txt @@ -1,3 +1,19 @@ +2006/08/16 +---------- +Add/delete/update cron functions finished - can be called seperately +or all at once via cron.php. + +Document date field added to index and database summary. + +Some index db functionality abstracted out to indexlib.php - can +use IndexDBControl class to add/del documents from database, and +to make sure the db table is functioning. + +DB sql files changed to add some extra fields. + +Default 'simple' query modified to search title and author, as well +as contents of document, to provide better results for users. + 2006/08/14 ---------- First revision of the advanced search page completed. Functional, @@ -101,5 +117,6 @@ To index for the first time, login as an admin user and browse to /search/index. or /search/stats.php - there will be a message and a link telling you to go index. -- Michael Champanis (mchampan) - cynnical@gmail.com + email: cynnical@gmail.com + skype: mchampan Summer of Code 2006 \ No newline at end of file diff --git a/search/add.php b/search/add.php new file mode 100644 index 00000000000..b862fdda028 --- /dev/null +++ b/search/add.php @@ -0,0 +1,83 @@ +dirroot/search/lib.php"); + + require_login(); + + if (!isadmin()) { + error("You need to be an admin user to use this page.", "$CFG->wwwroot/login/index.php"); + } //if + + //check for php5 (lib.php) + if (!search_check_php5()) { + $phpversion = phpversion(); + mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)"); + exit(0); + } //if + + require_once("$CFG->dirroot/search/indexlib.php"); + + $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH); + $dbcontrol = new IndexDBControl(); + $addition_count = 0; + + mtrace('
Starting index update (additions)...'); + mtrace('Index size before: '.$index->count()."\n"); + + if ($mods = get_records_select('modules')) { + foreach ($mods as $mod) { + $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php'; + $db_names_function = $mod->name.'_db_names'; + $get_document_function = $mod->name.'_single_document'; + $additions = array(); + + if (file_exists($class_file)) { + require_once($class_file); + + if (function_exists($db_names_function) and function_exists($get_document_function)) { + mtrace("Checking $mod->name module for additions."); + $values = $db_names_function(); + + $sql = "select id, ".$values[0]." as docid from ".$values[1]." + where id not in + (select docid from ".SEARCH_DATABASE_TABLE." where doctype like '$mod->name')"; + + $records = get_records_sql($sql); + + if (is_array($records)) { + foreach($records as $record) { + $additions[] = $get_document_function($record->id); + } //foreach + } //if + + foreach ($additions as $add) { + ++$addition_count; + + //object to insert into db + $dbid = $dbcontrol->addDocument($add); + + //synchronise db with index + $add->addField(Zend_Search_Lucene_Field::Keyword('dbid', $dbid)); + + mtrace(" Add: $add->title (database id = $add->dbid, moodle instance id = $add->docid)"); + + $index->addDocument($add); + } //foreach + + mtrace("Finished $mod->name.\n"); + } //if + } //if + } //foreach + } //if + + //commit changes + $index->commit(); + + //update index date + set_config("search_indexer_run_date", time()); + + mtrace("Added $addition_count documents."); + mtrace('Index size after: '.$index->count().''); + +?> \ No newline at end of file diff --git a/search/cron.php b/search/cron.php index 9bc2c2025dd..68e867745c3 100644 --- a/search/cron.php +++ b/search/cron.php @@ -4,7 +4,15 @@ require_once("$CFG->dirroot/search/lib.php"); mtrace("
Starting cron...\n"); + + mtrace("--DELETE----"); + require_once("$CFG->dirroot/search/delete.php"); + mtrace("--UPDATE----"); + require_once("$CFG->dirroot/search/update.php"); + mtrace("--ADD-------"); + require_once("$CFG->dirroot/search/add.php"); + mtrace("------------"); - mtrace(""); + mtrace("cron finished."); ?> \ No newline at end of file diff --git a/search/delete.php b/search/delete.php index 56c71223da7..a7aae71eb48 100644 --- a/search/delete.php +++ b/search/delete.php @@ -2,23 +2,82 @@ require_once('../config.php'); require_once("$CFG->dirroot/search/lib.php"); + + require_login(); + + if (!isadmin()) { + error("You need to be an admin user to use this page.", "$CFG->wwwroot/login/index.php"); + } //if + + //check for php5 (lib.php) + if (!search_check_php5()) { + $phpversion = phpversion(); + mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)"); + exit(0); + } //if - mtrace("
Starting clean-up...\n"); + require_once("$CFG->dirroot/search/indexlib.php"); + + $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH); + $dbcontrol = new IndexDBControl(); + $deletion_count = 0; + + mtrace(''); ?> \ No newline at end of file diff --git a/search/indexer.php b/search/indexer.php index c7d8df8ac28..874775e2f90 100644 --- a/search/indexer.php +++ b/search/indexer.php @@ -47,7 +47,8 @@ } //if //php5 found, continue including php5-only files - require_once("$CFG->dirroot/search/Zend/Search/Lucene.php"); + //require_once("$CFG->dirroot/search/Zend/Search/Lucene.php"); + require_once("$CFG->dirroot/search/indexlib.php"); mtrace('Starting clean-up of removed records...'); + mtrace('Index size before: '.$index->count()."\n"); if ($mods = get_records_select('modules')) { foreach ($mods as $mod) { $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php'; + $delete_function = $mod->name.'_delete'; + $db_names_function = $mod->name.'_db_names'; + $deletions = array(); if (file_exists($class_file)) { - mtrace("Checking $mod->name module for deletions.\n"); - - $records = get_records_sql("select * from ".$CFG->prefix."log where module = '$mod->name' and action like '%delete%'"); - - print_r($records); - } //if + require_once($class_file); + + if (function_exists($delete_function) and function_exists($db_names_function)) { + mtrace("Checking $mod->name module for deletions."); + $values = $db_names_function(); + + $sql = "select id, docid from ".SEARCH_DATABASE_TABLE." + where doctype like '$mod->name' + and docid not in + (select ".$values[0]." from ".$values[1].")"; + + $records = get_records_sql($sql); + + if (is_array($records)) { + foreach($records as $record) { + $deletions[] = $delete_function($record->docid); + } //foreach + } //if + + foreach ($deletions as $delete) { + $doc = $index->find("+docid:$delete +doctype:$mod->name"); + + //get the record, should only be one + foreach ($doc as $thisdoc) { + ++$deletion_count; + mtrace(" Delete: $thisdoc->title (database id = $thisdoc->dbid, index id = $thisdoc->id, moodle instance id = $thisdoc->docid)"); + + $dbcontrol->delDocument($thisdoc); + $index->delete($thisdoc->id); + } //foreach + } //foreach + + mtrace("Finished $mod->name.\n"); + } //if + } //if } //foreach } //if + + //commit changes + $index->commit(); + + //update index date + set_config("search_indexer_run_date", time()); - mtrace(""); + mtrace("Finished $deletion_count removals."); + mtrace('Index size after: '.$index->count().'
Server Time: '.date('r',time())."\n"); @@ -61,7 +62,8 @@ //paths $index_path = SEARCH_INDEX_PATH; - $index_db_file = "$CFG->dirroot/search/db/$CFG->dbtype.sql"; + $index_db_file = "$CFG->dirroot/search/db/$CFG->dbtype.sql"; + $dbcontrol = new IndexDBControl(); //setup directory in data root if (!file_exists($index_path)) { @@ -77,23 +79,9 @@ $index = new Zend_Search_Lucene($index_path, true); - //create the database tables - $tables = $db->MetaTables(); - - if (in_array($CFG->prefix.'search_documents', $tables)) { - //delete_records('search_documents'); - //temporary measure - db doesn't have update scripts and I realised that cvs 1.1 db - //is incompatible with cvs 1.2! Must fix ASAP. - execute_sql('drop table '.$CFG->prefix.'search_documents', false); - - ob_start(); //turn output buffering on - to hide modify_database() output - modify_database($index_db_file, '', false); - ob_end_clean(); //chuck the buffer and resume normal operation - } else { - ob_start(); //turn output buffering on - to hide modify_database() output - modify_database($index_db_file, '', false); - ob_end_clean(); //chuck the buffer and resume normal operation - } //else + if (!$dbcontrol->checkDB()) { + search_pexit("Database error. Please check settings/files."); + } //if //begin timer search_stopwatch(); @@ -130,19 +118,11 @@ foreach($documents as $document) { $counter++; - //object to insert into db - $doc->doctype = $document->doctype; - $doc->title = search_escape_string($document->title); - $doc->url = search_escape_string($document->url); - $doc->update = time(); - $doc->courseid = $document->course_id; - $doc->groupid = $document->group_id; - - //insert summary into db - $id = insert_record('search_documents', $doc); + //object to insert into db + $dbid = $dbcontrol->addDocument($document); //synchronise db with index - $document->addField(Zend_Search_Lucene_Field::Keyword('dbid', $id)); + $document->addField(Zend_Search_Lucene_Field::Keyword('dbid', $dbid)); //add document to index $index->addDocument($document); diff --git a/search/indexlib.php b/search/indexlib.php index de1cf20c46e..305768c0072 100644 --- a/search/indexlib.php +++ b/search/indexlib.php @@ -6,7 +6,8 @@ * and the index itself. * */ - require_once("$CFG->dirroot/search/Zend/Search/Lucene.php"); + require_once("$CFG->dirroot/search/lib.php"); + require_once("$CFG->dirroot/search/Zend/Search/Lucene.php"); class IndexInfo { private $path, //index data directory @@ -45,18 +46,18 @@ $admin_tables = $db->MetaTables(); - if (in_array($CFG->prefix.'search_documents', $admin_tables)) { + if (in_array($CFG->prefix.SEARCH_DATABASE_TABLE, $admin_tables)) { $db_exists = true; //total documents - $this->dbcount = count_records('search_documents'); + $this->dbcount = count_records(SEARCH_DATABASE_TABLE); //individual document types $types = search_get_document_types(); sort($types); foreach($types as $type) { - $c = count_records('search_documents', 'doctype', $type); + $c = count_records(SEARCH_DATABASE_TABLE, 'doctype', $type); $this->types[$type] = (int)$c; } //foreach } else { @@ -121,5 +122,71 @@ } //if } //__get } //IndexInfo + + + /* DB Index control class + * + * */ + + class IndexDBControl { + public function checkTableExists() { + global $CFG, $db; + + $table = SEARCH_DATABASE_TABLE; + $tables = $db->MetaTables(); + + if (in_array($CFG->prefix.$table, $tables)) { + return true; + } else { + return false; + } //else + } //checkTableExists + + public function checkDB() { + global $CFG, $db; + + $sqlfile = "$CFG->dirroot/search/db/$CFG->dbtype.sql"; + $ret = false; + + if ($this->checkTableExists()) { + execute_sql('drop table '.$CFG->prefix.SEARCH_DATABASE_TABLE, false); + } //if + + ob_start(); //turn output buffering on - to hide modify_database() output + $ret = modify_database($sqlfile, '', false); + ob_end_clean(); //chuck the buffer and resume normal operation + + return $ret; + } //checkDB + + public function addDocument($document=null) { + global $db; + + if ($document == null) { + return false; + } //if + + //object to insert into db + $doc->doctype = $document->doctype; + $doc->docid = $document->docid; + $doc->title = search_escape_string($document->title); + $doc->url = search_escape_string($document->url); + $doc->update = time(); + $doc->docdate = $document->date; + $doc->courseid = $document->course_id; + $doc->groupid = $document->group_id; + + //insert summary into db + $id = insert_record(SEARCH_DATABASE_TABLE, $doc); + + return $id; + } //addDocument + + public function delDocument($document) { + global $db; + + delete_records(SEARCH_DATABASE_TABLE, 'id', $document->dbid); + } //delDocument + } //IndexControl ?> \ No newline at end of file diff --git a/search/querylib.php b/search/querylib.php index 83705cec8fc..c31a0dfd9e9 100644 --- a/search/querylib.php +++ b/search/querylib.php @@ -166,8 +166,10 @@ $resultdocs = array(); $i = 0; - $hits = $this->index->find(strtolower($this->term)); + $term = strtolower($this->term); + $hits = $this->index->find($term." title:".$term." author:".$term); + foreach ($hits as $hit) { //check permissions on each result if ($this->can_display($USER, $hit->id, $hit->doctype, $hit->course_id, $hit->group_id)) { diff --git a/search/update.php b/search/update.php new file mode 100644 index 00000000000..6b11e4d0db6 --- /dev/null +++ b/search/update.php @@ -0,0 +1,97 @@ +dirroot/search/lib.php"); + + require_login(); + + if (!isadmin()) { + error("You need to be an admin user to use this page.", "$CFG->wwwroot/login/index.php"); + } //if + + //check for php5 (lib.php) + if (!search_check_php5()) { + $phpversion = phpversion(); + mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)"); + exit(0); + } //if + + require_once("$CFG->dirroot/search/indexlib.php"); + + $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH); + $dbcontrol = new IndexDBControl(); + $update_count = 0; + + $indexdate = $CFG->search_indexer_run_date; + + mtrace("Starting index update (updates)...\n"); + + if ($mods = get_records_select('modules')) { + foreach ($mods as $mod) { + $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php'; + $get_document_function = $mod->name.'_single_document'; + $delete_function = $mod->name.'_delete'; + $db_names_function = $mod->name.'_db_names'; + $updates = array(); + + if (file_exists($class_file)) { + require_once($class_file); + + if (function_exists($delete_function) and function_exists($db_names_function) and function_exists($get_document_function)) { + mtrace("Checking $mod->name module for updates."); + $values = $db_names_function(); + + $sql = "select id, ".$values[0]." as docid from ".$values[1]." + where ".$values[2]." > $indexdate"; + + $records = get_records_sql($sql); + + if (is_array($records)) { + foreach($records as $record) { + $updates[] = $delete_function($record->docid); + } //foreach + } //if + + foreach ($updates as $update) { + ++$update_count; + + //delete old document + $doc = $index->find("+docid:$update +doctype:$mod->name"); + + //get the record, should only be one + foreach ($doc as $thisdoc) { + mtrace(" Delete: $thisdoc->title (database id = $thisdoc->dbid, index id = $thisdoc->id, moodle instance id = $thisdoc->docid)"); + + $dbcontrol->delDocument($thisdoc); + $index->delete($thisdoc->id); + } //foreach + + //add new modified document back into index + $add = $get_document_function($update); + + //object to insert into db + $dbid = $dbcontrol->addDocument($add); + + //synchronise db with index + $add->addField(Zend_Search_Lucene_Field::Keyword('dbid', $dbid)); + + mtrace(" Add: $add->title (database id = $add->dbid, moodle instance id = $add->docid)"); + + $index->addDocument($add); + } //foreach + + mtrace("Finished $mod->name.\n"); + } //if + } //if + } //foreach + } //if + + //commit changes + $index->commit(); + + //update index date + set_config("search_indexer_run_date", time()); + + mtrace("Finished $update_count updates."); + +?> \ No newline at end of file