diff --git a/search/.cvsignore b/search/.cvsignore new file mode 100644 index 00000000000..04fd92e9f34 --- /dev/null +++ b/search/.cvsignore @@ -0,0 +1 @@ +delete_log.php diff --git a/search/README.txt b/search/README.txt index 0737770d362..4841d3e4863 100644 --- a/search/README.txt +++ b/search/README.txt @@ -1,3 +1,19 @@ +2006/08/16 +---------- +Add/delete/update cron functions finished - can be called seperately +or all at once via cron.php. + +Document date field added to index and database summary. + +Some index db functionality abstracted out to indexlib.php - can +use IndexDBControl class to add/del documents from database, and +to make sure the db table is functioning. + +DB sql files changed to add some extra fields. + +Default 'simple' query modified to search title and author, as well +as contents of document, to provide better results for users. + 2006/08/14 ---------- First revision of the advanced search page completed. Functional, @@ -101,5 +117,6 @@ To index for the first time, login as an admin user and browse to /search/index. or /search/stats.php - there will be a message and a link telling you to go index. -- Michael Champanis (mchampan) - cynnical@gmail.com + email: cynnical@gmail.com + skype: mchampan Summer of Code 2006 \ No newline at end of file diff --git a/search/add.php b/search/add.php new file mode 100644 index 00000000000..b862fdda028 --- /dev/null +++ b/search/add.php @@ -0,0 +1,83 @@ +dirroot/search/lib.php"); + + require_login(); + + if (!isadmin()) { + error("You need to be an admin user to use this page.", "$CFG->wwwroot/login/index.php"); + } //if + + //check for php5 (lib.php) + if (!search_check_php5()) { + $phpversion = phpversion(); + mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)"); + exit(0); + } //if + + require_once("$CFG->dirroot/search/indexlib.php"); + + $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH); + $dbcontrol = new IndexDBControl(); + $addition_count = 0; + + mtrace('
Starting index update (additions)...');
+  mtrace('Index size before: '.$index->count()."\n");
+  
+  if ($mods = get_records_select('modules')) {
+  foreach ($mods as $mod) {
+    $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';    
+    $db_names_function = $mod->name.'_db_names';
+    $get_document_function = $mod->name.'_single_document';
+    $additions = array();    
+    
+    if (file_exists($class_file)) {
+      require_once($class_file);
+    
+      if (function_exists($db_names_function) and function_exists($get_document_function)) {
+        mtrace("Checking $mod->name module for additions.");
+        $values = $db_names_function();
+        
+        $sql = "select id, ".$values[0]." as docid from ".$values[1]."
+                where id not in
+                (select docid from ".SEARCH_DATABASE_TABLE." where doctype like '$mod->name')";
+
+        $records = get_records_sql($sql);     
+        
+        if (is_array($records)) {       
+          foreach($records as $record) {
+            $additions[] = $get_document_function($record->id);
+          } //foreach
+        } //if    
+          
+        foreach ($additions as $add) {
+          ++$addition_count;
+          
+          //object to insert into db
+          $dbid = $dbcontrol->addDocument($add);          
+              
+          //synchronise db with index
+          $add->addField(Zend_Search_Lucene_Field::Keyword('dbid', $dbid));          
+          
+          mtrace("  Add: $add->title (database id = $add->dbid, moodle instance id = $add->docid)");
+                            
+          $index->addDocument($add);
+        } //foreach
+                    
+        mtrace("Finished $mod->name.\n");
+      } //if              
+    } //if
+  } //foreach
+  } //if
+  
+  //commit changes
+  $index->commit();
+  
+  //update index date
+  set_config("search_indexer_run_date", time());
+
+  mtrace("Added $addition_count documents.");
+  mtrace('Index size after: '.$index->count().'
'); + +?> \ No newline at end of file diff --git a/search/cron.php b/search/cron.php index 9bc2c2025dd..68e867745c3 100644 --- a/search/cron.php +++ b/search/cron.php @@ -4,7 +4,15 @@ require_once("$CFG->dirroot/search/lib.php"); mtrace("
Starting cron...\n");
+  
+  mtrace("--DELETE----");
+  require_once("$CFG->dirroot/search/delete.php");
+  mtrace("--UPDATE----");
+  require_once("$CFG->dirroot/search/update.php");
+  mtrace("--ADD-------");
+  require_once("$CFG->dirroot/search/add.php");
+  mtrace("------------");
 
-  mtrace("
"); + mtrace("cron finished."); ?> \ No newline at end of file diff --git a/search/delete.php b/search/delete.php index 56c71223da7..a7aae71eb48 100644 --- a/search/delete.php +++ b/search/delete.php @@ -2,23 +2,82 @@ require_once('../config.php'); require_once("$CFG->dirroot/search/lib.php"); + + require_login(); + + if (!isadmin()) { + error("You need to be an admin user to use this page.", "$CFG->wwwroot/login/index.php"); + } //if + + //check for php5 (lib.php) + if (!search_check_php5()) { + $phpversion = phpversion(); + mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)"); + exit(0); + } //if - mtrace("
Starting clean-up...\n");
+  require_once("$CFG->dirroot/search/indexlib.php");  
+  
+  $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH);
+  $dbcontrol = new IndexDBControl();
+  $deletion_count = 0;   
+  
+  mtrace('
Starting clean-up of removed records...');
+  mtrace('Index size before: '.$index->count()."\n");
   
   if ($mods = get_records_select('modules')) {
   foreach ($mods as $mod) {
     $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
+    $delete_function = $mod->name.'_delete';
+    $db_names_function = $mod->name.'_db_names';
+    $deletions = array();    
     
     if (file_exists($class_file)) {
-      mtrace("Checking $mod->name module for deletions.\n");
-      
-      $records = get_records_sql("select * from ".$CFG->prefix."log where module = '$mod->name' and action like '%delete%'");
-      
-      print_r($records);
-    } //if    
+      require_once($class_file);
+    
+      if (function_exists($delete_function) and function_exists($db_names_function)) {
+        mtrace("Checking $mod->name module for deletions.");
+        $values = $db_names_function();
+        
+        $sql = "select id, docid from ".SEARCH_DATABASE_TABLE."
+                where doctype like '$mod->name'
+                and docid not in
+                (select ".$values[0]." from ".$values[1].")";
+
+        $records = get_records_sql($sql);     
+        
+        if (is_array($records)) {       
+          foreach($records as $record) {
+            $deletions[] = $delete_function($record->docid);
+          } //foreach
+        } //if    
+          
+        foreach ($deletions as $delete) {        
+          $doc = $index->find("+docid:$delete +doctype:$mod->name");            
+          
+          //get the record, should only be one
+          foreach ($doc as $thisdoc) {
+            ++$deletion_count;
+            mtrace("  Delete: $thisdoc->title (database id = $thisdoc->dbid, index id = $thisdoc->id, moodle instance id = $thisdoc->docid)");
+            
+            $dbcontrol->delDocument($thisdoc);
+            $index->delete($thisdoc->id);              
+          } //foreach
+        } //foreach
+                    
+        mtrace("Finished $mod->name.\n");
+      } //if              
+    } //if
   } //foreach
   } //if
+  
+  //commit changes
+  $index->commit();
+  
+  //update index date
+  set_config("search_indexer_run_date", time());
 
-  mtrace("
"); + mtrace("Finished $deletion_count removals."); + mtrace('Index size after: '.$index->count().'
'); ?> \ No newline at end of file diff --git a/search/indexer.php b/search/indexer.php index c7d8df8ac28..874775e2f90 100644 --- a/search/indexer.php +++ b/search/indexer.php @@ -47,7 +47,8 @@ } //if //php5 found, continue including php5-only files - require_once("$CFG->dirroot/search/Zend/Search/Lucene.php"); + //require_once("$CFG->dirroot/search/Zend/Search/Lucene.php"); + require_once("$CFG->dirroot/search/indexlib.php"); mtrace('
Server Time: '.date('r',time())."\n");
 
@@ -61,7 +62,8 @@
   
   //paths
   $index_path = SEARCH_INDEX_PATH;
-  $index_db_file = "$CFG->dirroot/search/db/$CFG->dbtype.sql";  
+  $index_db_file = "$CFG->dirroot/search/db/$CFG->dbtype.sql";
+  $dbcontrol = new IndexDBControl();  
   
   //setup directory in data root
   if (!file_exists($index_path)) {
@@ -77,23 +79,9 @@
   
   $index = new Zend_Search_Lucene($index_path, true);
   
-  //create the database tables
-  $tables = $db->MetaTables();
-    
-  if (in_array($CFG->prefix.'search_documents', $tables)) {
-    //delete_records('search_documents');    
-    //temporary measure - db doesn't have update scripts and I realised that cvs 1.1 db
-    //is incompatible with cvs 1.2! Must fix ASAP.    
-    execute_sql('drop table '.$CFG->prefix.'search_documents', false);
-    
-    ob_start(); //turn output buffering on - to hide modify_database() output
-    modify_database($index_db_file, '', false);
-    ob_end_clean(); //chuck the buffer and resume normal operation
-  } else {        
-    ob_start(); //turn output buffering on - to hide modify_database() output
-    modify_database($index_db_file, '', false);
-    ob_end_clean(); //chuck the buffer and resume normal operation
-  } //else
+  if (!$dbcontrol->checkDB()) {
+    search_pexit("Database error. Please check settings/files.");
+  } //if     
 
   //begin timer
   search_stopwatch();
@@ -130,19 +118,11 @@
             foreach($documents as $document) {
               $counter++;
                             
-              //object to insert into db                            
-              $doc->doctype   = $document->doctype;
-              $doc->title     = search_escape_string($document->title);
-              $doc->url       = search_escape_string($document->url);              
-              $doc->update    = time();                            
-              $doc->courseid  = $document->course_id;              
-              $doc->groupid   = $document->group_id;              
-              
-              //insert summary into db
-              $id = insert_record('search_documents', $doc);
+              //object to insert into db
+              $dbid = $dbcontrol->addDocument($document);
               
               //synchronise db with index
-              $document->addField(Zend_Search_Lucene_Field::Keyword('dbid', $id));
+              $document->addField(Zend_Search_Lucene_Field::Keyword('dbid', $dbid));
               
               //add document to index
               $index->addDocument($document);                  
diff --git a/search/indexlib.php b/search/indexlib.php
index de1cf20c46e..305768c0072 100644
--- a/search/indexlib.php
+++ b/search/indexlib.php
@@ -6,7 +6,8 @@
    * and the index itself.
    * */
 
-  require_once("$CFG->dirroot/search/Zend/Search/Lucene.php");
+  require_once("$CFG->dirroot/search/lib.php");
+  require_once("$CFG->dirroot/search/Zend/Search/Lucene.php");   
 
   class IndexInfo {
     private $path,        //index data directory
@@ -45,18 +46,18 @@
             
       $admin_tables = $db->MetaTables();
       
-      if (in_array($CFG->prefix.'search_documents', $admin_tables)) {
+      if (in_array($CFG->prefix.SEARCH_DATABASE_TABLE, $admin_tables)) {
         $db_exists = true;
         
         //total documents
-        $this->dbcount = count_records('search_documents');
+        $this->dbcount = count_records(SEARCH_DATABASE_TABLE);
 
         //individual document types
         $types = search_get_document_types();
         sort($types);
   
         foreach($types as $type) {
-          $c = count_records('search_documents', 'doctype', $type);
+          $c = count_records(SEARCH_DATABASE_TABLE, 'doctype', $type);
           $this->types[$type] = (int)$c;
         } //foreach
       } else {
@@ -121,5 +122,71 @@
       } //if
     } //__get        
   } //IndexInfo
+  
+  
+  /* DB Index control class 
+   * 
+   * */
+   
+  class IndexDBControl {  
+    public function checkTableExists() {
+      global $CFG, $db;
+      
+      $table = SEARCH_DATABASE_TABLE;
+      $tables = $db->MetaTables();
+      
+      if (in_array($CFG->prefix.$table, $tables)) {
+        return true;
+      } else {
+        return false;
+      } //else
+    } //checkTableExists
+    
+    public function checkDB() {
+      global $CFG, $db;
+            
+      $sqlfile = "$CFG->dirroot/search/db/$CFG->dbtype.sql";
+      $ret = false;     
+   
+      if ($this->checkTableExists()) {                 
+        execute_sql('drop table '.$CFG->prefix.SEARCH_DATABASE_TABLE, false);
+      } //if
+        
+      ob_start(); //turn output buffering on - to hide modify_database() output
+      $ret = modify_database($sqlfile, '', false);
+      ob_end_clean(); //chuck the buffer and resume normal operation
+      
+      return $ret;
+    } //checkDB
+    
+    public function addDocument($document=null) {
+      global $db;
+      
+      if ($document == null) {
+        return false;
+      } //if
+      
+      //object to insert into db                            
+      $doc->doctype   = $document->doctype;
+      $doc->docid     = $document->docid;
+      $doc->title     = search_escape_string($document->title);
+      $doc->url       = search_escape_string($document->url);              
+      $doc->update    = time();  
+      $doc->docdate   = $document->date;                          
+      $doc->courseid  = $document->course_id;              
+      $doc->groupid   = $document->group_id;              
+        
+      //insert summary into db
+      $id = insert_record(SEARCH_DATABASE_TABLE, $doc);
+              
+      return $id;
+    } //addDocument
+    
+    public function delDocument($document) {
+      global $db;
+      
+      delete_records(SEARCH_DATABASE_TABLE, 'id', $document->dbid);
+    } //delDocument
+  } //IndexControl
       
 ?>
\ No newline at end of file
diff --git a/search/querylib.php b/search/querylib.php
index 83705cec8fc..c31a0dfd9e9 100644
--- a/search/querylib.php
+++ b/search/querylib.php
@@ -166,8 +166,10 @@
       $resultdocs = array();
       $i = 0;
       
-      $hits = $this->index->find(strtolower($this->term));
+      $term = strtolower($this->term);
       
+      $hits = $this->index->find($term." title:".$term." author:".$term);
+            
       foreach ($hits as $hit) {            
         //check permissions on each result
         if ($this->can_display($USER, $hit->id, $hit->doctype, $hit->course_id, $hit->group_id)) {
diff --git a/search/update.php b/search/update.php
new file mode 100644
index 00000000000..6b11e4d0db6
--- /dev/null
+++ b/search/update.php
@@ -0,0 +1,97 @@
+dirroot/search/lib.php");
+  
+  require_login();
+
+  if (!isadmin()) {
+    error("You need to be an admin user to use this page.", "$CFG->wwwroot/login/index.php");
+  } //if
+    
+  //check for php5 (lib.php)
+  if (!search_check_php5()) {
+    $phpversion = phpversion();
+    mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)");
+    exit(0);
+  } //if  
+    
+  require_once("$CFG->dirroot/search/indexlib.php");   
+  
+  $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH);
+  $dbcontrol = new IndexDBControl();
+  $update_count = 0;
+  
+  $indexdate = $CFG->search_indexer_run_date;
+
+  mtrace("
Starting index update (updates)...\n");  
+  
+  if ($mods = get_records_select('modules')) {
+  foreach ($mods as $mod) {
+    $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
+    $get_document_function = $mod->name.'_single_document';
+    $delete_function = $mod->name.'_delete';
+    $db_names_function = $mod->name.'_db_names';
+    $updates = array();    
+    
+    if (file_exists($class_file)) {
+      require_once($class_file);
+    
+      if (function_exists($delete_function) and function_exists($db_names_function) and function_exists($get_document_function)) {
+        mtrace("Checking $mod->name module for updates.");
+        $values = $db_names_function();
+        
+        $sql = "select id, ".$values[0]." as docid from ".$values[1]."
+                where ".$values[2]." > $indexdate";
+                
+        $records = get_records_sql($sql);     
+        
+        if (is_array($records)) {       
+          foreach($records as $record) {
+            $updates[] = $delete_function($record->docid);
+          } //foreach
+        } //if    
+          
+        foreach ($updates as $update) {
+          ++$update_count;
+                
+          //delete old document  
+          $doc = $index->find("+docid:$update +doctype:$mod->name");            
+          
+          //get the record, should only be one
+          foreach ($doc as $thisdoc) {            
+            mtrace("  Delete: $thisdoc->title (database id = $thisdoc->dbid, index id = $thisdoc->id, moodle instance id = $thisdoc->docid)");
+            
+            $dbcontrol->delDocument($thisdoc);
+            $index->delete($thisdoc->id);              
+          } //foreach
+          
+          //add new modified document back into index
+          $add = $get_document_function($update);
+          
+          //object to insert into db
+          $dbid = $dbcontrol->addDocument($add);                   
+              
+          //synchronise db with index
+          $add->addField(Zend_Search_Lucene_Field::Keyword('dbid', $dbid));          
+          
+          mtrace("  Add: $add->title (database id = $add->dbid, moodle instance id = $add->docid)");
+                            
+          $index->addDocument($add);          
+        } //foreach
+                    
+        mtrace("Finished $mod->name.\n");
+      } //if              
+    } //if
+  } //foreach
+  } //if
+  
+  //commit changes
+  $index->commit();
+  
+  //update index date
+  set_config("search_indexer_run_date", time());
+
+  mtrace("Finished $update_count updates.
"); + +?> \ No newline at end of file