mirror of
https://github.com/moodle/moodle.git
synced 2025-04-13 20:42:22 +02:00
MDL-65806 Search: Solr does not find words in italic
This commit is contained in:
parent
f3507273e9
commit
c207289127
@ -272,7 +272,7 @@ class engine extends \core_search\engine {
|
||||
|
||||
$query = new \SolrDisMaxQuery();
|
||||
|
||||
$this->set_query($query, $data->q);
|
||||
$this->set_query($query, self::replace_underlines($data->q));
|
||||
$this->add_fields($query);
|
||||
|
||||
// Search filters applied, we don't cache these filters as we don't want to pollute the cache with tmp filters
|
||||
@ -750,6 +750,23 @@ class engine extends \core_search\engine {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces underlines at edges of words in the content with spaces.
|
||||
*
|
||||
* For example '_frogs_' will become 'frogs', '_frogs and toads_' will become 'frogs and toads',
|
||||
* and 'frogs_and_toads' will be left as 'frogs_and_toads'.
|
||||
*
|
||||
* The reason for this is that for italic content_to_text puts _italic_ underlines at the start
|
||||
* and end of the italicised phrase (not between words). Solr treats underlines as part of the
|
||||
* word, which means that if you search for a word in italic then you can't find it.
|
||||
*
|
||||
* @param string $str String to replace
|
||||
* @return string Replaced string
|
||||
*/
|
||||
protected static function replace_underlines(string $str): string {
|
||||
return preg_replace('~\b_|_\b~', '', $str);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a text document to the search engine.
|
||||
*
|
||||
@ -758,6 +775,14 @@ class engine extends \core_search\engine {
|
||||
*/
|
||||
protected function add_solr_document($doc) {
|
||||
$solrdoc = new \SolrInputDocument();
|
||||
|
||||
// Replace underlines in the content with spaces. The reason for this is that for italic
|
||||
// text, content_to_text puts _italic_ underlines. Solr treats underlines as part of the
|
||||
// word, which means that if you search for a word in italic then you can't find it.
|
||||
if (array_key_exists('content', $doc)) {
|
||||
$doc['content'] = self::replace_underlines($doc['content']);
|
||||
}
|
||||
|
||||
foreach ($doc as $field => $value) {
|
||||
$solrdoc->addField($field, $value);
|
||||
}
|
||||
|
@ -1010,6 +1010,68 @@ class search_solr_engine_testcase extends advanced_testcase {
|
||||
['Post1', 'Post2'], $results);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests searching for results containing words in italic text. (This used to fail.)
|
||||
*/
|
||||
public function test_italics() {
|
||||
global $USER;
|
||||
|
||||
// Use real search areas.
|
||||
$this->search->clear_static();
|
||||
$this->search->add_core_search_areas();
|
||||
|
||||
// Create a course and a forum.
|
||||
$generator = $this->getDataGenerator();
|
||||
$course = $generator->create_course();
|
||||
$forum = $generator->create_module('forum', ['course' => $course->id]);
|
||||
|
||||
// As admin user, create forum discussions with various words in italics or with underlines.
|
||||
$this->setAdminUser();
|
||||
$forumgen = $generator->get_plugin_generator('mod_forum');
|
||||
$forumgen->create_discussion(['course' => $course->id, 'forum' => $forum->id,
|
||||
'userid' => $USER->id, 'name' => 'Post1',
|
||||
'message' => '<p>This is a post about <i>frogs</i>.</p>']);
|
||||
$forumgen->create_discussion(['course' => $course->id, 'forum' => $forum->id,
|
||||
'userid' => $USER->id, 'name' => 'Post2',
|
||||
'message' => '<p>This is a post about <i>toads and zombies</i>.</p>']);
|
||||
$forumgen->create_discussion(['course' => $course->id, 'forum' => $forum->id,
|
||||
'userid' => $USER->id, 'name' => 'Post3',
|
||||
'message' => '<p>This is a post about toads_and_zombies.</p>']);
|
||||
$forumgen->create_discussion(['course' => $course->id, 'forum' => $forum->id,
|
||||
'userid' => $USER->id, 'name' => 'Post4',
|
||||
'message' => '<p>This is a post about _leading and trailing_ underlines.</p>']);
|
||||
|
||||
// Index the data.
|
||||
$this->search->index();
|
||||
|
||||
// Search for 'frogs' should find the post.
|
||||
$querydata = new stdClass();
|
||||
$querydata->q = 'frogs';
|
||||
$results = $this->search->search($querydata);
|
||||
$this->assert_result_titles(['Post1'], $results);
|
||||
|
||||
// Search for 'toads' or 'zombies' should find post 2 (and not 3)...
|
||||
$querydata->q = 'toads';
|
||||
$results = $this->search->search($querydata);
|
||||
$this->assert_result_titles(['Post2'], $results);
|
||||
$querydata->q = 'zombies';
|
||||
$results = $this->search->search($querydata);
|
||||
$this->assert_result_titles(['Post2'], $results);
|
||||
|
||||
// Search for 'toads_and_zombies' should find post 3.
|
||||
$querydata->q = 'toads_and_zombies';
|
||||
$results = $this->search->search($querydata);
|
||||
$this->assert_result_titles(['Post3'], $results);
|
||||
|
||||
// Search for '_leading' or 'trailing_' should find post 4.
|
||||
$querydata->q = '_leading';
|
||||
$results = $this->search->search($querydata);
|
||||
$this->assert_result_titles(['Post4'], $results);
|
||||
$querydata->q = 'trailing_';
|
||||
$results = $this->search->search($querydata);
|
||||
$this->assert_result_titles(['Post4'], $results);
|
||||
}
|
||||
|
||||
/**
|
||||
* Asserts that the returned documents have the expected titles (regardless of order).
|
||||
*
|
||||
|
Loading…
x
Reference in New Issue
Block a user