MDL-66326 Global search: Delete from index when courses are deleted

Adds new API support within search engines for optional methods to
delete data for courses and contexts, and implements this for the
two core search plugins (simpledb and solr).

The new API is automatically called when courses or contexts are
deleted. When a whole course is deleted, it only sends the course
delete rather than sending 1,000 separate context deletions as
each activity/block is deleted.
This commit is contained in:
sam marshall 2019-08-08 10:38:51 +01:00
parent 9e4178a465
commit 7ba2a20166
13 changed files with 438 additions and 2 deletions

View File

@ -5390,6 +5390,9 @@ abstract class context extends stdClass implements IteratorAggregate {
$DB->delete_records('context', array('id'=>$this->_id));
// purge static context cache if entry present
context::cache_remove($this);
// Inform search engine to delete data related to this context.
\core_search\manager::context_deleted($this);
}
// ====== context level related methods ======

View File

@ -4309,6 +4309,9 @@ function delete_user(stdClass $user) {
// Delete all content associated with the user context, but not the context itself.
$usercontext->delete_content();
// Delete any search data.
\core_search\manager::context_deleted($usercontext);
// Any plugin that needs to cleanup should register this event.
// Trigger event.
$event = \core\event\user_deleted::create(
@ -5061,6 +5064,10 @@ function delete_course($courseorid, $showfeedback = true) {
}
}
// Tell the search manager we are about to delete a course. This prevents us sending updates
// for each individual context being deleted.
\core_search\manager::course_deleting_start($courseid);
$handler = core_course\customfield\course_handler::create();
$handler->delete_instance($courseid);
@ -5078,6 +5085,9 @@ function delete_course($courseorid, $showfeedback = true) {
format_base::reset_course_cache($courseid);
}
// Tell search that we have deleted the course so it can delete course data from the index.
\core_search\manager::course_deleting_finish($courseid);
// Trigger a course deleted event.
$event = \core\event\course_deleted::create(array(
'objectid' => $course->id,

View File

@ -521,6 +521,47 @@ abstract class engine {
*/
abstract function delete($areaid = null);
/**
* Deletes information related to a specific context id. This should be used when the context
* itself is deleted from Moodle.
*
* This only deletes information for the specified context - not for any child contexts.
*
* This function is optional; if not supported it will return false and the information will
* not be deleted from the search index.
*
* If an engine implements this function it should also implement delete_index_for_course;
* otherwise, nothing will be deleted when users delete an entire course at once.
*
* @param int $oldcontextid ID of context that has been deleted
* @return bool True if implemented
* @throws \core_search\engine_exception Engines may throw this exception for any problem
*/
public function delete_index_for_context(int $oldcontextid) {
return false;
}
/**
* Deletes information related to a specific course id. This should be used when the course
* itself is deleted from Moodle.
*
* This deletes all information relating to that course from the index, including all child
* contexts.
*
* This function is optional; if not supported it will return false and the information will
* not be deleted from the search index.
*
* If an engine implements this function then, ideally, it should also implement
* delete_index_for_context so that deletion of single activities/blocks also works.
*
* @param int $oldcourseid ID of course that has been deleted
* @return bool True if implemented
* @throws \core_search\engine_exception Engines may throw this exception for any problem
*/
public function delete_index_for_course(int $oldcourseid) {
return false;
}
/**
* Checks that the schema is the latest version. If the version stored in config does not match
* the current, this function will attempt to upgrade the schema.

View File

@ -142,6 +142,11 @@ class manager {
*/
protected static $instance = null;
/**
* @var array IDs (as keys) of course deletions in progress in this requuest, if any.
*/
protected static $coursedeleting = [];
/**
* @var \core_search\engine
*/
@ -1789,4 +1794,76 @@ class manager {
$engine->delete($areaid);
}
/**
* Informs the search system that a context has been deleted.
*
* This will clear the data from the search index, where the search engine supports that.
*
* This function does not usually throw an exception (so as not to get in the way of the
* context deletion finishing).
*
* This is called for all types of context deletion.
*
* @param \context $context Context object that has just been deleted
*/
public static function context_deleted(\context $context) {
if (self::is_indexing_enabled()) {
try {
// Hold on, are we deleting a course? If so, and this context is part of the course,
// then don't bother to send a delete because we delete the whole course at once
// later.
if (!empty(self::$coursedeleting)) {
$coursecontext = $context->get_course_context(false);
if ($coursecontext && array_key_exists($coursecontext->instanceid, self::$coursedeleting)) {
// Skip further processing.
return;
}
}
$engine = self::instance()->get_engine();
$engine->delete_index_for_context($context->id);
} catch (\moodle_exception $e) {
debugging('Error deleting search index data for context ' . $context->id . ': ' . $e->getMessage());
}
}
}
/**
* Informs the search system that a course is about to be deleted.
*
* This prevents it from sending hundreds of 'delete context' updates for all the individual
* contexts that are deleted.
*
* If you call this, you must call course_deleting_finish().
*
* @param int $courseid Course id that is being deleted
*/
public static function course_deleting_start(int $courseid) {
self::$coursedeleting[$courseid] = true;
}
/**
* Informs the search engine that a course has now been deleted.
*
* This causes the search engine to actually delete the index for the whole course.
*
* @param int $courseid Course id that no longer exists
*/
public static function course_deleting_finish(int $courseid) {
if (!array_key_exists($courseid, self::$coursedeleting)) {
// Show a debug warning. It doesn't actually matter very much, as we will now delete
// the course data anyhow.
debugging('course_deleting_start not called before deletion of ' . $courseid, DEBUG_DEVELOPER);
}
unset(self::$coursedeleting[$courseid]);
if (self::is_indexing_enabled()) {
try {
$engine = self::instance()->get_engine();
$engine->delete_index_for_course($courseid);
} catch (\moodle_exception $e) {
debugging('Error deleting search index data for course ' . $courseid . ': ' . $e->getMessage());
}
}
}
}

View File

@ -354,4 +354,38 @@ class engine extends \core_search\engine {
);
return array($sql, $params);
}
/**
* Simpledb supports deleting the index for a context.
*
* @param int $oldcontextid Context that has been deleted
* @return bool True to indicate that any data was actually deleted
* @throws \core_search\engine_exception
*/
public function delete_index_for_context(int $oldcontextid) {
global $DB;
try {
$DB->delete_records('search_simpledb_index', ['contextid' => $oldcontextid]);
} catch (\dml_exception $e) {
throw new \core_search\engine_exception('dbupdatefailed');
}
return true;
}
/**
* Simpledb supports deleting the index for a course.
*
* @param int $oldcourseid
* @return bool True to indicate that any data was actually deleted
* @throws \core_search\engine_exception
*/
public function delete_index_for_course(int $oldcourseid) {
global $DB;
try {
$DB->delete_records('search_simpledb_index', ['courseid' => $oldcourseid]);
} catch (\dml_exception $e) {
throw new \core_search\engine_exception('dbupdatefailed');
}
return true;
}
}

View File

@ -74,8 +74,6 @@ class search_simpledb_engine_testcase extends advanced_testcase {
$this->engine = new \search_simpledb\engine();
$this->search = testable_core_search::instance($this->engine);
$areaid = \core_search\manager::generate_areaid('core_mocksearch', 'mock_search_area');
$this->search->add_search_area($areaid, new core_mocksearch\search\mock_search_area());
$this->generator = self::getDataGenerator()->get_plugin_generator('core_search');
$this->generator->setup();
@ -105,6 +103,8 @@ class search_simpledb_engine_testcase extends advanced_testcase {
public function test_index() {
global $DB;
$this->add_mock_search_area();
$record = new \stdClass();
$record->timemodified = time() - 1;
$this->generator->create_record($record);
@ -130,6 +130,8 @@ class search_simpledb_engine_testcase extends advanced_testcase {
public function test_search() {
global $USER, $DB;
$this->add_mock_search_area();
$this->generator->create_record();
$record = new \stdClass();
$record->title = "Special title";
@ -214,6 +216,8 @@ class search_simpledb_engine_testcase extends advanced_testcase {
*/
public function test_delete() {
$this->add_mock_search_area();
$this->generator->create_record();
$this->generator->create_record();
$this->search->index();
@ -237,6 +241,8 @@ class search_simpledb_engine_testcase extends advanced_testcase {
*/
public function test_alloweduserid() {
$this->add_mock_search_area();
$area = new core_mocksearch\search\mock_search_area();
$record = $this->generator->create_record();
@ -309,6 +315,8 @@ class search_simpledb_engine_testcase extends advanced_testcase {
public function test_delete_by_id() {
$this->add_mock_search_area();
$this->generator->create_record();
$this->generator->create_record();
$this->search->index();
@ -334,6 +342,67 @@ class search_simpledb_engine_testcase extends advanced_testcase {
$this->assertNotEquals($deleteid, $result->get('id'));
}
/**
* Tries out deleting data for a context or a course.
*
* @throws moodle_exception
*/
public function test_deleted_contexts_and_courses() {
// Create some courses and activities.
$generator = $this->getDataGenerator();
$course1 = $generator->create_course(['fullname' => 'C1', 'summary' => 'xyzzy']);
$course1page1 = $generator->create_module('page', ['course' => $course1, 'name' => 'C1P1', 'content' => 'xyzzy']);
$generator->create_module('page', ['course' => $course1, 'name' => 'C1P2', 'content' => 'xyzzy']);
$course2 = $generator->create_course(['fullname' => 'C2', 'summary' => 'xyzzy']);
$course2page = $generator->create_module('page', ['course' => $course2, 'name' => 'C2P', 'content' => 'xyzzy']);
$course2pagecontext = \context_module::instance($course2page->cmid);
$this->search->index();
// By default we have all data in the index.
$this->assert_raw_index_contents('xyzzy', ['C1', 'C1P1', 'C1P2', 'C2', 'C2P']);
// Say we delete the course2pagecontext...
$this->engine->delete_index_for_context($course2pagecontext->id);
$this->assert_raw_index_contents('xyzzy', ['C1', 'C1P1', 'C1P2', 'C2']);
// Now delete the second course...
$this->engine->delete_index_for_course($course2->id);
$this->assert_raw_index_contents('xyzzy', ['C1', 'C1P1', 'C1P2']);
// Finally let's delete using Moodle functions to check that works. Single context first.
course_delete_module($course1page1->cmid);
$this->assert_raw_index_contents('xyzzy', ['C1', 'C1P2']);
delete_course($course1, false);
$this->assert_raw_index_contents('xyzzy', []);
}
/**
* Check the contents of the index.
*
* @param string $searchword Word to search for within the content field
* @param string[] $expected Array of expected result titles, in alphabetical order
* @throws dml_exception
*/
protected function assert_raw_index_contents(string $searchword, array $expected) {
global $DB;
$results = $DB->get_records_select('search_simpledb_index',
$DB->sql_like('content', '?'), ['%' . $searchword . '%'], 'id, title');
$titles = array_map(function($x) {
return $x->title;
}, $results);
sort($titles);
$this->assertEquals($expected, $titles);
}
/**
* Adds a mock search area to the search system.
*/
protected function add_mock_search_area() {
$areaid = \core_search\manager::generate_areaid('core_mocksearch', 'mock_search_area');
$this->search->add_search_area($areaid, new core_mocksearch\search\mock_search_area());
}
/**
* Updates mssql fulltext index if necessary.
*

View File

@ -1445,4 +1445,40 @@ class engine extends \core_search\engine {
public function supports_users() {
return true;
}
/**
* Solr supports deleting the index for a context.
*
* @param int $oldcontextid Context that has been deleted
* @return bool True to indicate that any data was actually deleted
* @throws \core_search\engine_exception
*/
public function delete_index_for_context(int $oldcontextid) {
$client = $this->get_search_client();
try {
$client->deleteByQuery('contextid:' . $oldcontextid);
$client->commit(true);
return true;
} catch (\Exception $e) {
throw new \core_search\engine_exception('error_solr', 'search_solr', '', $e->getMessage());
}
}
/**
* Solr supports deleting the index for a course.
*
* @param int $oldcourseid
* @return bool True to indicate that any data was actually deleted
* @throws \core_search\engine_exception
*/
public function delete_index_for_course(int $oldcourseid) {
$client = $this->get_search_client();
try {
$client->deleteByQuery('courseid:' . $oldcourseid);
$client->commit(true);
return true;
} catch (\Exception $e) {
throw new \core_search\engine_exception('error_solr', 'search_solr', '', $e->getMessage());
}
}
}

View File

@ -26,6 +26,7 @@ $string['connectionerror'] = 'The specified Solr server is not available or the
$string['connectionsettings'] = 'Connection settings';
$string['errorcreatingschema'] = 'Error creating the Solr schema: {$a}';
$string['errorvalidatingschema'] = 'Error validating Solr schema: field {$a->fieldname} does not exist. Please <a href="{$a->setupurl}">follow this link</a> to set up the required fields.';
$string['errorsolr'] = 'The Solr search engine reported an error: {$a}';
$string['extensionerror'] = 'The Apache Solr PHP extension is not installed. Please check the documentation.';
$string['fileindexing'] = 'Enable file indexing';
$string['fileindexing_help'] = 'If your Solr install supports it, this feature allows Moodle to send files to be indexed.<br/>

View File

@ -1250,4 +1250,73 @@ class search_solr_engine_testcase extends advanced_testcase {
$record->contextid = $contextid;
$this->generator->create_record($record);
}
/**
* Tries out deleting data for a context or a course.
*
* @throws coding_exception
* @throws moodle_exception
*/
public function test_deleted_contexts_and_courses() {
// Create some courses and activities.
$generator = $this->getDataGenerator();
$course1 = $generator->create_course(['fullname' => 'Course 1']);
$course1context = \context_course::instance($course1->id);
$course1page1 = $generator->create_module('page', ['course' => $course1]);
$course1page1context = \context_module::instance($course1page1->cmid);
$course1page2 = $generator->create_module('page', ['course' => $course1]);
$course1page2context = \context_module::instance($course1page2->cmid);
$course2 = $generator->create_course(['fullname' => 'Course 2']);
$course2context = \context_course::instance($course2->id);
$course2page = $generator->create_module('page', ['course' => $course2]);
$course2pagecontext = \context_module::instance($course2page->cmid);
// Create one search record in each activity and course.
$this->create_search_record($course1->id, $course1context->id, 'C1', 'Xyzzy');
$this->create_search_record($course1->id, $course1page1context->id, 'C1P1', 'Xyzzy');
$this->create_search_record($course1->id, $course1page2context->id, 'C1P2', 'Xyzzy');
$this->create_search_record($course2->id, $course2context->id, 'C2', 'Xyzzy');
$this->create_search_record($course2->id, $course2pagecontext->id, 'C2P', 'Xyzzy plugh');
$this->search->index();
// By default we have all results.
$this->assert_raw_solr_query_result('content:xyzzy', ['C1', 'C1P1', 'C1P2', 'C2', 'C2P']);
// Say we delete the course2pagecontext...
$this->engine->delete_index_for_context($course2pagecontext->id);
$this->assert_raw_solr_query_result('content:xyzzy', ['C1', 'C1P1', 'C1P2', 'C2']);
// Now delete the second course...
$this->engine->delete_index_for_course($course2->id);
$this->assert_raw_solr_query_result('content:xyzzy', ['C1', 'C1P1', 'C1P2']);
// Finally let's delete using Moodle functions to check that works. Single context first.
course_delete_module($course1page1->cmid);
$this->assert_raw_solr_query_result('content:xyzzy', ['C1', 'C1P2']);
delete_course($course1, false);
$this->assert_raw_solr_query_result('content:xyzzy', []);
}
/**
* Carries out a raw Solr query using the Solr basic query syntax.
*
* This is used to test data contained in the index without going through Moodle processing.
*
* @param string $q Search query
* @param string[] $expected Expected titles of results, in alphabetical order
*/
protected function assert_raw_solr_query_result(string $q, array $expected) {
$solr = $this->engine->get_search_client_public();
$query = new SolrQuery($q);
$results = $solr->query($query)->getResponse()->response->docs;
if ($results) {
$titles = array_map(function($x) {
return $x->title;
}, $results);
sort($titles);
} else {
$titles = [];
}
$this->assertEquals($expected, $titles);
}
}

View File

@ -34,4 +34,14 @@ class testable_engine extends \search_solr\engine {
public function test_set_config($name, $value) {
$this->config->$name = $value;
}
/**
* Gets the search client (this function is usually protected) for testing.
*
* @return \SolrClient Solr client object
* @throws \core_search\engine_exception
*/
public function get_search_client_public(): \SolrClient {
return parent::get_search_client();
}
}

View File

@ -116,4 +116,38 @@ class engine extends \core_search\engine {
$this->schemaupdates = [];
return $result;
}
/**
* Records delete of course index so it can be checked later.
*
* @param int $oldcourseid Course id
* @return bool True to indicate action taken
*/
public function delete_index_for_course(int $oldcourseid) {
$this->deletes[] = ['course', $oldcourseid];
return true;
}
/**
* Records delete of context index so it can be checked later.
*
* @param int $oldcontextid Context id
* @return bool True to indicate action taken
*/
public function delete_index_for_context(int $oldcontextid) {
$this->deletes[] = ['context', $oldcontextid];
return true;
}
/**
* Gets all course/context deletes applied, as an array. Each entry is an array with two
* values, the first is either 'course' or 'context' and the second is the id deleted.
*
* @return array List of deletes for comparison
*/
public function get_and_clear_deletes() {
$deletes = $this->deletes;
$this->deletes = [];
return $deletes;
}
}

View File

@ -1463,4 +1463,52 @@ class search_manager_testcase extends advanced_testcase {
}
}
/**
* Tests the context_deleted, course_deleting_start, and course_deleting_finish methods.
*/
public function test_context_deletion() {
$this->resetAfterTest();
// Create one course with 4 activities, and another with one.
$generator = $this->getDataGenerator();
$course1 = $generator->create_course();
$page1 = $generator->create_module('page', ['course' => $course1]);
$context1 = \context_module::instance($page1->cmid);
$page2 = $generator->create_module('page', ['course' => $course1]);
$page3 = $generator->create_module('page', ['course' => $course1]);
$context3 = \context_module::instance($page3->cmid);
$page4 = $generator->create_module('page', ['course' => $course1]);
$course2 = $generator->create_course();
$page5 = $generator->create_module('page', ['course' => $course2]);
$context5 = \context_module::instance($page5->cmid);
// Also create a user.
$user = $generator->create_user();
$usercontext = \context_user::instance($user->id);
$search = testable_core_search::instance();
// Delete two of the pages individually.
course_delete_module($page1->cmid);
course_delete_module($page3->cmid);
// Delete the course with another two.
delete_course($course1->id, false);
// Delete the user.
delete_user($user);
// Delete the page from the other course.
course_delete_module($page5->cmid);
// It should have deleted the contexts and the course, but not the contexts in the course.
$expected = [
['context', $context1->id],
['context', $context3->id],
['course', $course1->id],
['context', $usercontext->id],
['context', $context5->id]
];
$this->assertEquals($expected, $search->get_engine()->get_and_clear_deletes());
}
}

View File

@ -7,6 +7,10 @@ information provided here is intended especially for developers.
this to work, search engine plugins will need to implement the 'stopat' parameter if they
override the add_documents() function, and return an extra parameter from this function (see base
class in engine.php). Unmodified plugins will not work anymore.
* New search engine functions delete_index_for_context and delete_index_for_course are called by
the search manager to inform the search engine it can remove some documents from its index.
(Otherwise, documents from delete courses are never removed unless you reindex.) It is optional
for search engines to support these; if they don't implement them then behaviour is unchanged.
=== 3.7 ===