From d12989b797e63772f713ae227f4bdf05824e457b Mon Sep 17 00:00:00 2001 From: sam marshall Date: Thu, 19 Sep 2024 10:05:42 +0100 Subject: [PATCH] MDL-83211 core_search: Indexing can miss items due to race condition Changes search indexing so that (except for test scripts) each indexing run indexes only items up to 5 seconds ago, and the following indexing will start from the same point. This ensures all items are indexed, even if an item is written to the database one or two seconds after its timecreated value. --- search/classes/manager.php | 32 +++++++++++++++-- search/tests/manager_test.php | 68 +++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 3 deletions(-) diff --git a/search/classes/manager.php b/search/classes/manager.php index 907a8913d74..6751a415a76 100644 --- a/search/classes/manager.php +++ b/search/classes/manager.php @@ -122,6 +122,11 @@ class manager { */ const SEARCH_AREA_CATEGORY_OTHER = 'core-other'; + /** + * @var int To avoid race conditions, do not index documents newer than this many seconds. + */ + const INDEXING_DELAY = 5; + /** * @var \core_search\base[] Enabled search areas. */ @@ -1194,7 +1199,7 @@ class manager { * @return bool Whether there was any updated document or not. */ public function index($fullindex = false, $timelimit = 0, ?\progress_trace $progress = null) { - global $DB; + global $DB, $CFG; // Cannot combine time limit with reindex. if ($timelimit && $fullindex) { @@ -1229,6 +1234,11 @@ class manager { $stopat = self::get_current_time() + $timelimit; } + // Work out if we are in test mode, in which case we disable the indexing delay (because + // the normal pattern is to add a document and immediately index it). + $testmode = (PHPUNIT_TEST || defined('BEHAT_TEST')) && + empty($CFG->searchindexingdelayfortestscript); + foreach ($searchareas as $areaid => $searcharea) { $progress->output('Processing area: ' . $searcharea->get_visible_name()); @@ -1244,6 +1254,17 @@ class manager { $prevtimestart = intval(get_config($componentconfigname, $varname . '_indexingstart')); + // The effective start time of previous indexing was some seconds earlier because we + // only index data up to that time, to avoid race conditions (if it takes a while to + // write a document to the database and the timecreated for that document ends up being + // a second or two out of date). This mechanism is disabled for tests. + if (!$testmode) { + // The -1 here is because for example, if _indexingstart is 123, we will have + // indexed everything up to 123 - 5 = 118 (inclusive). So next time, we can start + // at 119 = 123 - 4 and we don't have to repeat 118. + $prevtimestart -= (self::INDEXING_DELAY - 1); + } + if ($fullindex === true) { $referencestarttime = 0; @@ -1278,8 +1299,13 @@ class manager { $options['stopat'] = $stopat; } $options['progress'] = $progress; - $iterator = new skip_future_documents_iterator(new \core\dml\recordset_walk( - $recordset, array($searcharea, 'get_document'), $options)); + // Skip 'future' documents, also any written very recently (to avoid race conditions). + // The exception is for PHPunit and Behat (step 'I update the global search index') + // where we allow it to index recent documents as well, we don't want it to have to wait. + $iterator = new skip_future_documents_iterator( + new \core\dml\recordset_walk($recordset, [$searcharea, 'get_document'], $options), + $indexingstart - ($testmode ? 0 : self::INDEXING_DELAY), + ); $result = $this->engine->add_documents($iterator, $searcharea, $options); $recordset->close(); $batchinfo = ''; diff --git a/search/tests/manager_test.php b/search/tests/manager_test.php index a24cef37161..0258d06a363 100644 --- a/search/tests/manager_test.php +++ b/search/tests/manager_test.php @@ -1625,4 +1625,72 @@ class manager_test extends \advanced_testcase { ]; $this->assertEquals($expected, $search->get_engine()->get_and_clear_deletes()); } + + /** + * Tests the indexing delay (used to avoid race conditions) in {@see manager::index()}. + * + * @covers \core_search\manager::index + */ + public function test_indexing_delay(): void { + global $USER, $CFG; + + $this->resetAfterTest(); + + // Normally the indexing delay is turned off for test scripts because we don't want to have + // to wait 5 seconds after creating anything to index it and it's not like there will be a + // race condition (indexing doesn't run at same time as adding). This turns it on. + $CFG->searchindexingdelayfortestscript = true; + + $this->setAdminUser(); + + // Create a course and a forum. + $generator = $this->getDataGenerator(); + $course = $generator->create_course(); + $forum = $generator->create_module('forum', ['course' => $course->id]); + + // Skip ahead 5 seconds so everything gets indexed. + $now = time(); + $now += manager::INDEXING_DELAY; + $search = \testable_core_search::instance(); + $search->fake_current_time($now); + $search->index(); + $search->get_engine()->get_and_clear_added_documents(); + + // Basic discussion data. + $basicdata = [ + 'course' => $course->id, + 'forum' => $forum->id, + 'userid' => $USER->id, + ]; + // Discussion so old it's prior to indexing delay (not realistic). + $generator->get_plugin_generator('mod_forum')->create_discussion(array_merge($basicdata, + ['timemodified' => $now - manager::INDEXING_DELAY, 'name' => 'Frog'])); + // Discussion just within indexing delay (simulates if it took a while to add to database). + $generator->get_plugin_generator('mod_forum')->create_discussion(array_merge($basicdata, + ['timemodified' => $now - (manager::INDEXING_DELAY - 1), 'name' => 'Toad'])); + // Move time along a bit. + $now += 100; + $search->fake_current_time($now); + // Discussion that happened 5 seconds before the new now. + $generator->get_plugin_generator('mod_forum')->create_discussion(array_merge($basicdata, + ['timemodified' => $now - (manager::INDEXING_DELAY), 'name' => 'Zombie'])); + // This one only happened 4 seconds before so it shouldn't be indexed yet. + $generator->get_plugin_generator('mod_forum')->create_discussion(array_merge($basicdata, + ['timemodified' => $now - (manager::INDEXING_DELAY - 1), 'name' => 'Werewolf'])); + + // Reindex and check that it added the middle two discussions. + $search->index(); + $added = $search->get_engine()->get_and_clear_added_documents(); + $this->assertCount(2, $added); + $this->assertEquals('Toad', $added[0]->get('title')); + $this->assertEquals('Zombie', $added[1]->get('title')); + + // Move time forwards a couple of seconds and now the last one will get indexed. + $now += 2; + $search->fake_current_time($now); + $search->index(); + $added = $search->get_engine()->get_and_clear_added_documents(); + $this->assertCount(1, $added); + $this->assertEquals('Werewolf', $added[0]->get('title')); + } }