diff --git a/search/engine/solr/classes/document.php b/search/engine/solr/classes/document.php index f428dee5e26..34760f0b47b 100644 --- a/search/engine/solr/classes/document.php +++ b/search/engine/solr/classes/document.php @@ -63,17 +63,25 @@ class document extends \core_search\document { 'solr_fileid' => array( 'type' => 'string', 'stored' => true, - 'indexed' => false + 'indexed' => true ), 'solr_filecontenthash' => array( 'type' => 'string', 'stored' => true, - 'indexed' => false + 'indexed' => true ), - 'solr_fileindexedcontent' => array( + // Stores the status of file indexing. + 'solr_fileindexstatus' => array( 'type' => 'int', 'stored' => true, 'indexed' => true + ), + // Field to index, but not store, file contents. + 'solr_filecontent' => array( + 'type' => 'text', + 'stored' => false, + 'indexed' => true, + 'mainquery' => true ) ); @@ -186,7 +194,7 @@ class document extends \core_search\document { $data['type'] = \core_search\manager::TYPE_FILE; $data['solr_fileid'] = $file->get_id(); $data['solr_filecontenthash'] = $file->get_contenthash(); - $data['solr_fileindexedcontent'] = self::INDEXED_FILE_TRUE; + $data['solr_fileindexstatus'] = self::INDEXED_FILE_TRUE; $data['title'] = $file->get_filename(); return $data; diff --git a/search/engine/solr/classes/engine.php b/search/engine/solr/classes/engine.php index 4fe1f7db4eb..702fa17236e 100644 --- a/search/engine/solr/classes/engine.php +++ b/search/engine/solr/classes/engine.php @@ -234,7 +234,7 @@ class engine extends \core_search\engine { $fields = $documentclass::get_default_fields_definition(); $dismax = false; - if ($query instanceof SolrDisMaxQuery) { + if ($query instanceof \SolrDisMaxQuery) { $dismax = true; } @@ -618,7 +618,7 @@ class engine extends \core_search\engine { if ($indexedfile->solr_filecontenthash != $files[$fileid]->get_contenthash()) { continue; } - if ($indexedfile->solr_fileindexedcontent == document::INDEXED_FILE_FALSE && + if ($indexedfile->solr_fileindexstatus == document::INDEXED_FILE_FALSE && $this->file_is_indexable($files[$fileid])) { // This means that the last time we indexed this file, filtering blocked it. // Current settings say it is indexable, so we will allow it to be indexed. @@ -682,7 +682,7 @@ class engine extends \core_search\engine { $query->addField('title'); $query->addField('solr_fileid'); $query->addField('solr_filecontenthash'); - $query->addField('solr_fileindexedcontent'); + $query->addField('solr_fileindexstatus'); $query->addFilterQuery('{!cache=false}solr_filegroupingid:(' . $document->get('id') . ')'); $query->addFilterQuery('type:' . \core_search\manager::TYPE_FILE); @@ -729,7 +729,7 @@ class engine extends \core_search\engine { $result->title = $doc->title; $result->solr_fileid = $doc->solr_fileid; $result->solr_filecontenthash = $doc->solr_filecontenthash; - $result->solr_fileindexedcontent = $doc->solr_fileindexedcontent; + $result->solr_fileindexstatus = $doc->solr_fileindexstatus; $out[] = $result; } @@ -752,7 +752,7 @@ class engine extends \core_search\engine { if (!$this->file_is_indexable($storedfile)) { // For files that we don't consider indexable, we will still place a reference in the search engine. - $filedoc['solr_fileindexedcontent'] = document::INDEXED_FILE_FALSE; + $filedoc['solr_fileindexstatus'] = document::INDEXED_FILE_FALSE; $this->add_solr_document($filedoc); return; } @@ -764,6 +764,11 @@ class engine extends \core_search\engine { // This will prevent solr from automatically making fields for every tika output. $url->param('uprefix', 'ignored_'); + // Control how content is captured. This will keep our file content clean of non-important metadata. + $url->param('captureAttr', 'true'); + // Move the content to a field for indexing. + $url->param('fmap.content', 'solr_filecontent'); + // These are common fields that matches the standard *_point dynamic field and causes an error. $url->param('fmap.media_white_point', 'ignored_mwp'); $url->param('fmap.media_black_point', 'ignored_mbp'); @@ -833,7 +838,7 @@ class engine extends \core_search\engine { } // If we get here, the document was not indexed due to an error. So we will index just the base info without the file. - $filedoc['solr_fileindexedcontent'] = document::INDEXED_FILE_ERROR; + $filedoc['solr_fileindexstatus'] = document::INDEXED_FILE_ERROR; $this->add_solr_document($filedoc); } diff --git a/search/engine/solr/tests/engine_test.php b/search/engine/solr/tests/engine_test.php index 2e05935099c..319cb0a2ce6 100644 --- a/search/engine/solr/tests/engine_test.php +++ b/search/engine/solr/tests/engine_test.php @@ -165,6 +165,11 @@ class search_solr_engine_testcase extends advanced_testcase { $this->assertEquals($USER->id, $results[0]->get('userid')); $this->assertEquals(\context_system::instance()->id, $results[0]->get('contextid')); + // Do a test to make sure we aren't searching non-query fields, like areaid. + $querydata->q = \core_search\manager::generate_areaid('core_mocksearch', 'role_capabilities'); + $this->assertCount(0, $this->search->search($querydata)); + $querydata->q = 'message'; + sleep(1); $beforeadding = time(); sleep(1);