mirror of
https://github.com/moodle/moodle.git
synced 2025-01-19 06:18:28 +01:00
Merge branch 'MDL-53516-master' of git://github.com/merrill-oakland/moodle
This commit is contained in:
commit
e195da19e0
@ -33,6 +33,49 @@ defined('MOODLE_INTERNAL') || die();
|
||||
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
|
||||
*/
|
||||
class document extends \core_search\document {
|
||||
/**
|
||||
* Indicates the file contents were not indexed due to an error.
|
||||
*/
|
||||
const INDEXED_FILE_ERROR = -1;
|
||||
|
||||
/**
|
||||
* Indicates the file contents were not indexed due filtering/settings.
|
||||
*/
|
||||
const INDEXED_FILE_FALSE = 0;
|
||||
|
||||
/**
|
||||
* Indicates the file contents are indexed with the record.
|
||||
*/
|
||||
const INDEXED_FILE_TRUE = 1;
|
||||
|
||||
/**
|
||||
* Any fields that are engine specifc. These are fields that are solely used by a seach engine plugin
|
||||
* for internal purposes.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected static $enginefields = array(
|
||||
'solr_filegroupingid' => array(
|
||||
'type' => 'string',
|
||||
'stored' => true,
|
||||
'indexed' => true
|
||||
),
|
||||
'solr_fileid' => array(
|
||||
'type' => 'string',
|
||||
'stored' => true,
|
||||
'indexed' => false
|
||||
),
|
||||
'solr_filecontenthash' => array(
|
||||
'type' => 'string',
|
||||
'stored' => true,
|
||||
'indexed' => false
|
||||
),
|
||||
'solr_fileindexedcontent' => array(
|
||||
'type' => 'int',
|
||||
'stored' => true,
|
||||
'indexed' => true
|
||||
)
|
||||
);
|
||||
|
||||
/**
|
||||
* Formats the timestamp according to the search engine needs.
|
||||
@ -109,4 +152,43 @@ class document extends \core_search\document {
|
||||
|
||||
return parent::format_text($out);
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply any defaults to unset fields before export. Called after document building, but before export.
|
||||
*
|
||||
* Sub-classes of this should make sure to call parent::apply_defaults().
|
||||
*/
|
||||
protected function apply_defaults() {
|
||||
parent::apply_defaults();
|
||||
|
||||
// We want to set the solr_filegroupingid to id if it isn't set.
|
||||
if (!isset($this->data['solr_filegroupingid'])) {
|
||||
$this->data['solr_filegroupingid'] = $this->data['id'];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Export the data for the given file in relation to this document.
|
||||
*
|
||||
* @param \stored_file $file The stored file we are talking about.
|
||||
* @return array
|
||||
*/
|
||||
public function export_file_for_engine($file) {
|
||||
$data = $this->export_for_engine();
|
||||
|
||||
// Content is index in the main document.
|
||||
unset($data['content']);
|
||||
unset($data['description1']);
|
||||
unset($data['description2']);
|
||||
|
||||
// Going to append the fileid to give it a unique id.
|
||||
$data['id'] = $data['id'].'-solrfile'.$file->get_id();
|
||||
$data['type'] = \core_search\manager::TYPE_FILE;
|
||||
$data['solr_fileid'] = $file->get_id();
|
||||
$data['solr_filecontenthash'] = $file->get_contenthash();
|
||||
$data['solr_fileindexedcontent'] = self::INDEXED_FILE_TRUE;
|
||||
$data['title'] = $file->get_filename();
|
||||
|
||||
return $data;
|
||||
}
|
||||
}
|
||||
|
@ -98,7 +98,12 @@ class engine extends \core_search\engine {
|
||||
}
|
||||
|
||||
$query = new \SolrQuery();
|
||||
$this->set_query($query, $data->q);
|
||||
$maxrows = \core_search\manager::MAX_RESULTS;
|
||||
if ($this->file_indexing_enabled()) {
|
||||
// When using file indexing and grouping, we are going to collapse results, so we want extra results.
|
||||
$maxrows *= 2;
|
||||
}
|
||||
$this->set_query($query, $data->q, $maxrows);
|
||||
$this->add_fields($query);
|
||||
|
||||
// Search filters applied, we don't cache these filters as we don't want to pollute the cache with tmp filters
|
||||
@ -150,7 +155,15 @@ class engine extends \core_search\engine {
|
||||
}
|
||||
|
||||
try {
|
||||
return $this->query_response($this->client->query($query));
|
||||
if ($this->file_indexing_enabled()) {
|
||||
// Now group records by solr_filegroupingid. Limit to 3 results per group.
|
||||
$query->setGroup(true);
|
||||
$query->setGroupLimit(3);
|
||||
$query->addGroupField('solr_filegroupingid');
|
||||
return $this->grouped_files_query_response($this->client->query($query));
|
||||
} else {
|
||||
return $this->query_response($this->client->query($query));
|
||||
}
|
||||
} catch (\SolrClientException $ex) {
|
||||
debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER);
|
||||
$this->queryerror = $ex->getMessage();
|
||||
@ -166,9 +179,13 @@ class engine extends \core_search\engine {
|
||||
/**
|
||||
* Prepares a new query by setting the query, start offset and rows to return.
|
||||
* @param SolrQuery $query
|
||||
* @param object $q Containing query and filters.
|
||||
* @param object $q Containing query and filters.
|
||||
* @param null|int $maxresults The number of results to limit. manager::MAX_RESULTS if not set.
|
||||
*/
|
||||
protected function set_query($query, $q) {
|
||||
protected function set_query($query, $q, $maxresults = null) {
|
||||
if (!is_numeric($maxresults)) {
|
||||
$maxresults = \core_search\manager::MAX_RESULTS;
|
||||
}
|
||||
|
||||
// Set hightlighting.
|
||||
$query->setHighlight(true);
|
||||
@ -183,7 +200,7 @@ class engine extends \core_search\engine {
|
||||
$query->setQuery($q);
|
||||
|
||||
// A reasonable max.
|
||||
$query->setRows(\core_search\manager::MAX_RESULTS);
|
||||
$query->setRows($maxresults);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -204,6 +221,11 @@ class engine extends \core_search\engine {
|
||||
* @param object $response containing results.
|
||||
*/
|
||||
public function add_highlight_content($response) {
|
||||
if (!isset($response->highlighting)) {
|
||||
// There is no highlighting to add.
|
||||
return;
|
||||
}
|
||||
|
||||
$highlightedobject = $response->highlighting;
|
||||
foreach ($response->response->docs as $doc) {
|
||||
$x = $doc->id;
|
||||
@ -302,6 +324,155 @@ class engine extends \core_search\engine {
|
||||
return $docs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes grouped file results into documents, with attached matching files.
|
||||
*
|
||||
* @param SolrQueryResponse $queryresponse The response returned from solr server
|
||||
* @return array Final results to be displayed.
|
||||
*/
|
||||
protected function grouped_files_query_response($queryresponse) {
|
||||
$response = $queryresponse->getResponse();
|
||||
|
||||
// If we can't find the grouping, or there are no matches in the grouping, return empty.
|
||||
if (!isset($response->grouped->solr_filegroupingid) || empty($response->grouped->solr_filegroupingid->matches)) {
|
||||
return array();
|
||||
}
|
||||
|
||||
$numgranted = 0;
|
||||
$orderedids = array();
|
||||
$completedocs = array();
|
||||
$incompletedocs = array();
|
||||
|
||||
$highlightingobj = $response->highlighting;
|
||||
|
||||
// Each group represents a "master document".
|
||||
$groups = $response->grouped->solr_filegroupingid->groups;
|
||||
foreach ($groups as $group) {
|
||||
$groupid = $group->groupValue;
|
||||
$groupdocs = $group->doclist->docs;
|
||||
$firstdoc = reset($groupdocs);
|
||||
|
||||
if (!$searcharea = $this->get_search_area($firstdoc->areaid)) {
|
||||
// Well, this is a problem.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for access.
|
||||
$access = $searcharea->check_access($firstdoc->itemid);
|
||||
switch ($access) {
|
||||
case \core_search\manager::ACCESS_DELETED:
|
||||
// If deleted from Moodle, delete from index and then continue.
|
||||
$this->delete_by_id($firstdoc->id);
|
||||
continue 2;
|
||||
break;
|
||||
case \core_search\manager::ACCESS_DENIED:
|
||||
// This means we should just skip for the current user.
|
||||
continue 2;
|
||||
break;
|
||||
}
|
||||
$numgranted++;
|
||||
|
||||
$maindoc = false;
|
||||
$fileids = array();
|
||||
// Seperate the main document and any files returned.
|
||||
foreach ($groupdocs as $groupdoc) {
|
||||
if ($groupdoc->id == $groupid) {
|
||||
$maindoc = $groupdoc;
|
||||
} else if (isset($groupdoc->solr_fileid)) {
|
||||
$fileids[] = $groupdoc->solr_fileid;
|
||||
}
|
||||
}
|
||||
|
||||
// Store the id of this group, in order, for later merging.
|
||||
$orderedids[] = $groupid;
|
||||
|
||||
if (!$maindoc) {
|
||||
// We don't have the main doc, store what we know for later building.
|
||||
$incompletedocs[$groupid] = $fileids;
|
||||
} else {
|
||||
if (isset($highlightingobj->$groupid)) {
|
||||
// Merge the highlighting for this doc.
|
||||
$this->merge_highlight_field_values($maindoc, $highlightingobj->$groupid);
|
||||
}
|
||||
$docdata = $this->standarize_solr_obj($maindoc);
|
||||
$doc = $this->to_document($searcharea, $docdata);
|
||||
// Now we need to attach the result files to the doc.
|
||||
foreach ($fileids as $fileid) {
|
||||
$doc->add_stored_file($fileid);
|
||||
}
|
||||
$completedocs[$groupid] = $doc;
|
||||
}
|
||||
|
||||
if ($numgranted >= \core_search\manager::MAX_RESULTS) {
|
||||
// We have hit the max results, we will just ignore the rest.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
$incompletedocs = $this->get_missing_docs($incompletedocs);
|
||||
|
||||
$out = array();
|
||||
// Now merge the complete and incomplete documents, in results order.
|
||||
foreach ($orderedids as $docid) {
|
||||
if (isset($completedocs[$docid])) {
|
||||
$out[] = $completedocs[$docid];
|
||||
} else if (isset($incompletedocs[$docid])) {
|
||||
$out[] = $incompletedocs[$docid];
|
||||
}
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retreive any missing main documents and attach provided files.
|
||||
*
|
||||
* The missingdocs array should be an array, indexed by document id, of main documents we need to retrieve. The value
|
||||
* associated to the key should be an array of stored_files or stored file ids to attach to the result document.
|
||||
*
|
||||
* Return array also indexed by document id.
|
||||
*
|
||||
* @param array() $missingdocs An array, indexed by document id, with arrays of files/ids to attach.
|
||||
* @return document[]
|
||||
*/
|
||||
protected function get_missing_docs($missingdocs) {
|
||||
if (empty($missingdocs)) {
|
||||
return array();
|
||||
}
|
||||
|
||||
$docids = array_keys($missingdocs);
|
||||
|
||||
// Build a custom query that will get all the missing documents.
|
||||
$query = new \SolrQuery();
|
||||
$this->set_query($query, '*', count($docids));
|
||||
$this->add_fields($query);
|
||||
$query->addFilterQuery('{!cache=false}id:(' . implode(' OR ', $docids) . ')');
|
||||
|
||||
try {
|
||||
$results = $this->query_response($this->get_search_client()->query($query));
|
||||
} catch (\SolrClientException $ex) {
|
||||
return array();
|
||||
} catch (\SolrServerException $ex) {
|
||||
return array();
|
||||
}
|
||||
|
||||
$out = array();
|
||||
foreach ($results as $result) {
|
||||
$resultid = $result->get('id');
|
||||
if (!isset($missingdocs[$resultid])) {
|
||||
// We got a result we didn't expect. Skip it.
|
||||
continue;
|
||||
}
|
||||
// Attach the files.
|
||||
foreach ($missingdocs[$resultid] as $filedoc) {
|
||||
$result->add_stored_file($filedoc);
|
||||
}
|
||||
$out[$resultid] = $result;
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a standard php array from a \SolrObject instance.
|
||||
*
|
||||
@ -332,20 +503,25 @@ class engine extends \core_search\engine {
|
||||
public function add_document($document, $fileindexing = false) {
|
||||
$docdata = $document->export_for_engine();
|
||||
|
||||
if (!$this->add_text_document($docdata)) {
|
||||
if (!$this->add_solr_document($docdata)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ($fileindexing) {
|
||||
// This will take care of updating all attached files in the index.
|
||||
$this->process_document_files($document);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a text document to the search engine.
|
||||
*
|
||||
* @param array $filedoc
|
||||
* @param array $doc
|
||||
* @return bool
|
||||
*/
|
||||
protected function add_text_document($doc) {
|
||||
protected function add_solr_document($doc) {
|
||||
$solrdoc = new \SolrInputDocument();
|
||||
foreach ($doc as $field => $value) {
|
||||
$solrdoc->addField($field, $value);
|
||||
@ -365,6 +541,293 @@ class engine extends \core_search\engine {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Index files attached to the docuemnt, ensuring the index matches the current document files.
|
||||
*
|
||||
* For documents that aren't known to be new, we check the index for existing files.
|
||||
* - New files we will add.
|
||||
* - Existing and unchanged files we will skip.
|
||||
* - File that are in the index but not on the document will be deleted from the index.
|
||||
* - Files that have changed will be re-indexed.
|
||||
*
|
||||
* @param document $document
|
||||
*/
|
||||
protected function process_document_files($document) {
|
||||
if (!$this->file_indexing_enabled()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Maximum rows to process at a time.
|
||||
$rows = 500;
|
||||
|
||||
// Get the attached files.
|
||||
$files = $document->get_files();
|
||||
|
||||
// If this isn't a new document, we need to check the exiting indexed files.
|
||||
if (!$document->get_is_new()) {
|
||||
// We do this progressively, so we can handle lots of files cleanly.
|
||||
list($numfound, $indexedfiles) = $this->get_indexed_files($document, 0, $rows);
|
||||
$count = 0;
|
||||
$idstodelete = array();
|
||||
|
||||
do {
|
||||
// Go through each indexed file. We want to not index any stored and unchanged ones, delete any missing ones.
|
||||
foreach ($indexedfiles as $indexedfile) {
|
||||
$fileid = $indexedfile->solr_fileid;
|
||||
|
||||
if (isset($files[$fileid])) {
|
||||
// Check for changes that would mean we need to re-index the file. If so, just leave in $files.
|
||||
// Filelib does not guarantee time modified is updated, so we will check important values.
|
||||
if ($indexedfile->modified < $files[$fileid]->get_timemodified()) {
|
||||
continue;
|
||||
}
|
||||
if (strcmp($indexedfile->title, $files[$fileid]->get_filename()) !== 0) {
|
||||
continue;
|
||||
}
|
||||
if ($indexedfile->solr_filecontenthash != $files[$fileid]->get_contenthash()) {
|
||||
continue;
|
||||
}
|
||||
if ($indexedfile->solr_fileindexedcontent == document::INDEXED_FILE_FALSE &&
|
||||
$this->file_is_indexable($files[$fileid])) {
|
||||
// This means that the last time we indexed this file, filtering blocked it.
|
||||
// Current settings say it is indexable, so we will allow it to be indexed.
|
||||
continue;
|
||||
}
|
||||
|
||||
// If the file is already indexed, we can just remove it from the files array and skip it.
|
||||
unset($files[$fileid]);
|
||||
} else {
|
||||
// This means we have found a file that is no longer attached, so we need to delete from the index.
|
||||
// We do it later, since this is progressive, and it could reorder results.
|
||||
$idstodelete[] = $indexedfile->id;
|
||||
}
|
||||
}
|
||||
$count += $rows;
|
||||
|
||||
if ($count < $numfound) {
|
||||
// If we haven't hit the total count yet, fetch the next batch.
|
||||
list($numfound, $indexedfiles) = $this->get_indexed_files($document, $count, $rows);
|
||||
}
|
||||
|
||||
} while ($count < $numfound);
|
||||
|
||||
// Delete files that are no longer attached.
|
||||
foreach ($idstodelete as $id) {
|
||||
// We directly delete the item using the client, as the engine delete_by_id won't work on file docs.
|
||||
$this->get_search_client()->deleteById($id);
|
||||
}
|
||||
}
|
||||
|
||||
// Now we can actually index all the remaining files.
|
||||
foreach ($files as $file) {
|
||||
$this->add_stored_file($document, $file);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the currently indexed files for a particular document, returns the total count, and a subset of files.
|
||||
*
|
||||
* @param document $document
|
||||
* @param int $start The row to start the results on. Zero indexed.
|
||||
* @param int $rows The number of rows to fetch
|
||||
* @return array A two element array, the first is the total number of availble results, the second is an array
|
||||
* of documents for the current request.
|
||||
*/
|
||||
protected function get_indexed_files($document, $start = 0, $rows = 500) {
|
||||
// Build a custom query that will get any document files that are in our solr_filegroupingid.
|
||||
$query = new \SolrQuery();
|
||||
|
||||
// We want to get all file records tied to a document.
|
||||
// For efficiency, we are building our own, stripped down, query.
|
||||
$query->setQuery('*');
|
||||
$query->setRows($rows);
|
||||
$query->setStart($start);
|
||||
// We want a consistent sorting.
|
||||
$query->addSortField('id');
|
||||
|
||||
// We only want the bare minimum of fields.
|
||||
$query->addField('id');
|
||||
$query->addField('modified');
|
||||
$query->addField('title');
|
||||
$query->addField('solr_fileid');
|
||||
$query->addField('solr_filecontenthash');
|
||||
$query->addField('solr_fileindexedcontent');
|
||||
|
||||
$query->addFilterQuery('{!cache=false}solr_filegroupingid:(' . $document->get('id') . ')');
|
||||
$query->addFilterQuery('type:' . \core_search\manager::TYPE_FILE);
|
||||
|
||||
try {
|
||||
$response = $this->get_search_client()->query($query);
|
||||
$responsedoc = $response->getResponse();
|
||||
|
||||
if (empty($responsedoc->response->numFound)) {
|
||||
return array(0, array());
|
||||
}
|
||||
$numfound = $responsedoc->response->numFound;
|
||||
|
||||
return array($numfound, $this->convert_file_results($responsedoc));
|
||||
} catch (\SolrClientException $ex) {
|
||||
debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER);
|
||||
$this->queryerror = $ex->getMessage();
|
||||
return array(0, array());
|
||||
} catch (\SolrServerException $ex) {
|
||||
debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER);
|
||||
$this->queryerror = $ex->getMessage();
|
||||
return array(0, array());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A very lightweight handler for getting information about already indexed files from a Solr response.
|
||||
*
|
||||
* @param SolrObject $responsedoc A Solr response document
|
||||
* @return stdClass[] An array of objects that contain the basic information for file processing.
|
||||
*/
|
||||
protected function convert_file_results($responsedoc) {
|
||||
if (!$docs = $responsedoc->response->docs) {
|
||||
return array();
|
||||
}
|
||||
|
||||
$out = array();
|
||||
|
||||
foreach ($docs as $doc) {
|
||||
// Copy the bare minimim needed info.
|
||||
$result = new \stdClass();
|
||||
$result->id = $doc->id;
|
||||
$result->modified = document::import_time_from_engine($doc->modified);
|
||||
$result->title = $doc->title;
|
||||
$result->solr_fileid = $doc->solr_fileid;
|
||||
$result->solr_filecontenthash = $doc->solr_filecontenthash;
|
||||
$result->solr_fileindexedcontent = $doc->solr_fileindexedcontent;
|
||||
$out[] = $result;
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a file to the search engine.
|
||||
*
|
||||
* Notes about Solr and Tika indexing. We do not send the mime type, only the filename.
|
||||
* Tika has much better content type detection than Moodle, and we will have many more doc failures
|
||||
* if we try to send mime types.
|
||||
*
|
||||
* @param document $document
|
||||
* @param \stored_file $storedfile
|
||||
* @return void
|
||||
*/
|
||||
protected function add_stored_file($document, $storedfile) {
|
||||
$filedoc = $document->export_file_for_engine($storedfile);
|
||||
|
||||
if (!$this->file_is_indexable($storedfile)) {
|
||||
// For files that we don't consider indexable, we will still place a reference in the search engine.
|
||||
$filedoc['solr_fileindexedcontent'] = document::INDEXED_FILE_FALSE;
|
||||
$this->add_solr_document($filedoc);
|
||||
return;
|
||||
}
|
||||
|
||||
$curl = $this->get_curl_object();
|
||||
|
||||
$url = $this->get_connection_url('/update/extract');
|
||||
|
||||
// This will prevent solr from automatically making fields for every tika output.
|
||||
$url->param('uprefix', 'ignored_');
|
||||
|
||||
// These are common fields that matches the standard *_point dynamic field and causes an error.
|
||||
$url->param('fmap.media_white_point', 'ignored_mwp');
|
||||
$url->param('fmap.media_black_point', 'ignored_mbp');
|
||||
|
||||
// Copy each key to the url with literal.
|
||||
// We place in a temp name then copy back to the true field, which prevents errors or Tika overwriting common field names.
|
||||
foreach ($filedoc as $key => $value) {
|
||||
// This will take any fields from tika that match our schema and discard them, so they don't overwrite ours.
|
||||
$url->param('fmap.'.$key, 'ignored_'.$key);
|
||||
// Place data in a tmp field.
|
||||
$url->param('literal.mdltmp_'.$key, $value);
|
||||
// Then move to the final field.
|
||||
$url->param('fmap.mdltmp_'.$key, $key);
|
||||
}
|
||||
|
||||
// This sets the true filename for Tika.
|
||||
$url->param('resource.name', $storedfile->get_filename());
|
||||
|
||||
// A giant block of code that is really just error checking around the curl request.
|
||||
try {
|
||||
// Now actually do the request.
|
||||
$result = $curl->post($url->out(false), array('myfile' => $storedfile));
|
||||
|
||||
$code = $curl->get_errno();
|
||||
$info = $curl->get_info();
|
||||
|
||||
// Now error handling. It is just informational, since we aren't tracking per file/doc results.
|
||||
if ($code != 0) {
|
||||
// This means an internal cURL error occurred error is in result.
|
||||
$message = 'Curl error '.$code.' while indexing file with document id '.$filedoc['id'].': '.$result.'.';
|
||||
debugging($message, DEBUG_DEVELOPER);
|
||||
} else if (isset($info['http_code']) && ($info['http_code'] !== 200)) {
|
||||
// Unexpected HTTP response code.
|
||||
$message = 'Error while indexing file with document id '.$filedoc['id'];
|
||||
// Try to get error message out of msg or title if it exists.
|
||||
if (preg_match('|<str [^>]*name="msg"[^>]*>(.*?)</str>|i', $result, $matches)) {
|
||||
$message .= ': '.$matches[1];
|
||||
} else if (preg_match('|<title[^>]*>([^>]*)</title>|i', $result, $matches)) {
|
||||
$message .= ': '.$matches[1];
|
||||
}
|
||||
// This is a common error, happening whenever a file fails to index for any reason, so we will make it quieter.
|
||||
if (CLI_SCRIPT && !PHPUNIT_TEST) {
|
||||
mtrace($message);
|
||||
}
|
||||
} else {
|
||||
// Check for the expected status field.
|
||||
if (preg_match('|<int [^>]*name="status"[^>]*>(\d*)</int>|i', $result, $matches)) {
|
||||
// Now check for the expected status of 0, if not, error.
|
||||
if ((int)$matches[1] !== 0) {
|
||||
$message = 'Unexpected Solr status code '.(int)$matches[1];
|
||||
$message .= ' while indexing file with document id '.$filedoc['id'].'.';
|
||||
debugging($message, DEBUG_DEVELOPER);
|
||||
} else {
|
||||
// The document was successfully indexed.
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// We received an unprocessable response.
|
||||
$message = 'Unexpected Solr response while indexing file with document id '.$filedoc['id'].': ';
|
||||
$message .= strtok($result, "\n");
|
||||
debugging($message, DEBUG_DEVELOPER);
|
||||
}
|
||||
}
|
||||
} catch (\Exception $e) {
|
||||
// There was an error, but we are not tracking per-file success, so we just continue on.
|
||||
debugging('Unknown exception while indexing file "'.$storedfile->get_filename().'".', DEBUG_DEVELOPER);
|
||||
}
|
||||
|
||||
// If we get here, the document was not indexed due to an error. So we will index just the base info without the file.
|
||||
$filedoc['solr_fileindexedcontent'] = document::INDEXED_FILE_ERROR;
|
||||
$this->add_solr_document($filedoc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks to see if a passed file is indexable.
|
||||
*
|
||||
* @param \stored_file $file The file to check
|
||||
* @return bool True if the file can be indexed
|
||||
*/
|
||||
protected function file_is_indexable($file) {
|
||||
if (!empty($this->config->maxindexfilekb) && ($file->get_filesize() > ($this->config->maxindexfilekb * 1024))) {
|
||||
// The file is too big to index.
|
||||
return false;
|
||||
}
|
||||
|
||||
$mime = $file->get_mimetype();
|
||||
|
||||
if ($mime == 'application/vnd.moodle.backup') {
|
||||
// We don't index Moodle backup files. There is nothing usefully indexable in them.
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Commits all pending changes.
|
||||
*
|
||||
@ -390,6 +853,15 @@ class engine extends \core_search\engine {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if file indexing is supported and enabled. False otherwise.
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function file_indexing_enabled() {
|
||||
return (bool)$this->config->fileindexing;
|
||||
}
|
||||
|
||||
/**
|
||||
* Defragments the index.
|
||||
*
|
||||
@ -406,7 +878,8 @@ class engine extends \core_search\engine {
|
||||
* @return void
|
||||
*/
|
||||
public function delete_by_id($id) {
|
||||
$this->get_search_client()->deleteById($id);
|
||||
// We need to make sure we delete the item and all related files, which can be done with solr_filegroupingid.
|
||||
$this->get_search_client()->deleteByQuery('solr_filegroupingid:' . $id);
|
||||
$this->commit();
|
||||
}
|
||||
|
||||
|
@ -23,9 +23,15 @@
|
||||
*/
|
||||
|
||||
$string['connectionerror'] = 'The specified Solr server is not available or the specified index does not exist';
|
||||
$string['connectionsettings'] = 'Connection settings';
|
||||
$string['errorcreatingschema'] = 'Error creating the Solr schema: {$a}';
|
||||
$string['errorvalidatingschema'] = 'Error validating Solr schema, field {$a->fieldname} does not exist. Please <a href="{$a->setupurl}">follow this link</a> to setup the fields required by Moodle.';
|
||||
$string['extensionerror'] = 'The Apache Solr PHP extension is not installed. Please check the documentation.';
|
||||
$string['fileindexing'] = 'Enable file indexing';
|
||||
$string['fileindexing_help'] = 'If your Solr install supports it, this feature allows Moodle to send files to be indexed.';
|
||||
$string['fileindexsettings'] = 'File indexing settings';
|
||||
$string['maxindexfilekb'] = 'Maximum file size to index (kB)';
|
||||
$string['maxindexfilekb_help'] = 'Files larger than this number of kilobytes will be skipped for search indexing. 0 to index files of any size.';
|
||||
$string['missingconfig'] = 'Your Apache Solr server is not yet configured in Moodle.';
|
||||
$string['multivaluedfield'] = 'Field "{$a}" returned an array instead of a scalar, the field is probably defined in Solr with "Multivalued" to true, this means that Solr autocreated the field for you when you indexed data because you forgot to run search/engine/solr/cli/setup_schema.php. Please delete the current index, create a new one and run setup_schema.php before indexing data in Solr.';
|
||||
$string['nodatafromserver'] = 'No data from server';
|
||||
|
@ -31,6 +31,8 @@ if ($ADMIN->fulltree) {
|
||||
$settings->add(new admin_setting_heading('search_solr_settings', '', get_string('extensionerror', 'search_solr')));
|
||||
|
||||
} else {
|
||||
$settings->add(new admin_setting_heading('search_solr_connection',
|
||||
new lang_string('connectionsettings', 'search_solr'), ''));
|
||||
$settings->add(new admin_setting_configtext('search_solr/server_hostname', new lang_string('solrserverhostname', 'search_solr'), new lang_string('solrserverhostname_desc', 'search_solr'), '127.0.0.1', PARAM_TEXT));
|
||||
$settings->add(new admin_setting_configtext('search_solr/indexname', new lang_string('solrindexname', 'search_solr'), '', 'moodle', PARAM_TEXT));
|
||||
$settings->add(new admin_setting_configcheckbox('search_solr/secure', new lang_string('solrsecuremode', 'search_solr'), '', 0, 1, 0));
|
||||
@ -46,6 +48,15 @@ if ($ADMIN->fulltree) {
|
||||
$settings->add(new admin_setting_configtext('search_solr/ssl_keypassword', new lang_string('solrsslkeypassword', 'search_solr'), new lang_string('solrsslkeypassword_desc', 'search_solr'), '', PARAM_RAW));
|
||||
$settings->add(new admin_setting_configtext('search_solr/ssl_cainfo', new lang_string('solrsslcainfo', 'search_solr'), new lang_string('solrsslcainfo_desc', 'search_solr'), '', PARAM_RAW));
|
||||
$settings->add(new admin_setting_configtext('search_solr/ssl_capath', new lang_string('solrsslcapath', 'search_solr'), new lang_string('solrsslcapath_desc', 'search_solr'), '', PARAM_RAW));
|
||||
|
||||
$settings->add(new admin_setting_heading('search_solr_fileindexing',
|
||||
new lang_string('fileindexsettings', 'search_solr'), ''));
|
||||
$settings->add(new admin_setting_configcheckbox('search_solr/fileindexing',
|
||||
new lang_string('fileindexing', 'search_solr'),
|
||||
new lang_string('fileindexing_help', 'search_solr'), 1));
|
||||
$settings->add(new admin_setting_configtext('search_solr/maxindexfilekb',
|
||||
new lang_string('maxindexfilekb', 'search_solr'),
|
||||
new lang_string('maxindexfilekb_help', 'search_solr'), '20197152', PARAM_INT));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -23,6 +23,7 @@
|
||||
* - define('TEST_SEARCH_SOLR_INDEXNAME', 'unittest');
|
||||
*
|
||||
* Optional params:
|
||||
* - define('TEST_SEARCH_SOLR_DISABLE_FILE_INDEXING', 1);
|
||||
* - define('TEST_SEARCH_SOLR_USERNAME', '');
|
||||
* - define('TEST_SEARCH_SOLR_PASSWORD', '');
|
||||
* - define('TEST_SEARCH_SOLR_SSLCERT', '');
|
||||
@ -99,6 +100,14 @@ class search_solr_engine_testcase extends advanced_testcase {
|
||||
set_config('ssl_cainfo', TEST_SEARCH_SOLR_CAINFOCERT, 'search_solr');
|
||||
}
|
||||
|
||||
if (defined('TEST_SEARCH_SOLR_DISABLE_FILE_INDEXING') && (TEST_SEARCH_SOLR_DISABLE_FILE_INDEXING == 1)) {
|
||||
set_config('fileindexing', 0, 'search_solr');
|
||||
} else {
|
||||
set_config('fileindexing', 1, 'search_solr');
|
||||
}
|
||||
|
||||
// We are only test indexing small string files, so setting this as low as we can.
|
||||
set_config('maxindexfilekb', 1, 'search_solr');
|
||||
|
||||
// Inject search solr engine into the testable core search as we need to add the mock
|
||||
// search component to it.
|
||||
@ -298,4 +307,222 @@ class search_solr_engine_testcase extends advanced_testcase {
|
||||
$regex = '|<span class="highlight">message</span>|';
|
||||
$this->assertRegExp($regex, $exported['content']);
|
||||
}
|
||||
|
||||
public function test_index_file() {
|
||||
if (defined('TEST_SEARCH_SOLR_DISABLE_FILE_INDEXING') && (TEST_SEARCH_SOLR_DISABLE_FILE_INDEXING == 1)) {
|
||||
$this->markTestSkipped('Solr file indexing not enabled.');
|
||||
return;
|
||||
}
|
||||
|
||||
// Very simple test.
|
||||
$this->search->index();
|
||||
$querydata = new stdClass();
|
||||
$querydata->q = '"File contents"';
|
||||
|
||||
$this->assertCount(2, $this->search->search($querydata));
|
||||
}
|
||||
|
||||
public function test_reindexing_files() {
|
||||
if (defined('TEST_SEARCH_SOLR_DISABLE_FILE_INDEXING') && (TEST_SEARCH_SOLR_DISABLE_FILE_INDEXING == 1)) {
|
||||
$this->markTestSkipped('Solr file indexing not enabled.');
|
||||
return;
|
||||
}
|
||||
|
||||
// Get engine and area to work with.
|
||||
$engine = $this->search->get_engine();
|
||||
$areaid = \core_search\manager::generate_areaid('core_mocksearch', 'role_capabilities');
|
||||
$area = \core_search\manager::get_search_area($areaid);
|
||||
|
||||
// Get a single record to make a doc from.
|
||||
$recordset = $area->get_recordset_by_timestamp(0);
|
||||
$record = $recordset->current();
|
||||
$recordset->close();
|
||||
|
||||
$doc = $area->get_document($record);
|
||||
|
||||
// Now we are going to make some files.
|
||||
$fs = get_file_storage();
|
||||
$syscontext = \context_system::instance();
|
||||
|
||||
$files = array();
|
||||
$filerecord = array(
|
||||
'contextid' => $syscontext->id,
|
||||
'component' => 'core',
|
||||
'filearea' => 'unittest',
|
||||
'itemid' => 0,
|
||||
'filepath' => '/',
|
||||
);
|
||||
|
||||
// We make enough so that we pass the 500 files threashold. That is the boundary when getting files.
|
||||
$boundary = 500;
|
||||
$top = (int)($boundary * 1.1);
|
||||
for ($i = 0; $i < $top; $i++) {
|
||||
$filerecord['filename'] = 'searchfile'.$i;
|
||||
$file = $fs->create_file_from_string($filerecord, 'Some FileContents'.$i);
|
||||
$doc->add_stored_file($file);
|
||||
$files[] = $file;
|
||||
}
|
||||
|
||||
// Add the doc with lots of files, then commit.
|
||||
$engine->add_document($doc, true);
|
||||
$engine->area_index_complete($area->get_area_id());
|
||||
|
||||
// Indexes we are going to check. 0 means we will delete, 1 means we will keep.
|
||||
$checkfiles = array(
|
||||
0 => 0, // Check the begining of the set.
|
||||
1 => 1,
|
||||
2 => 0,
|
||||
($top - 3) => 0, // Check the end of the set.
|
||||
($top - 2) => 1,
|
||||
($top - 1) => 0,
|
||||
($boundary - 2) => 0, // Check at the boundary between fetch groups.
|
||||
($boundary - 1) => 0,
|
||||
$boundary => 0,
|
||||
($boundary + 1) => 0,
|
||||
((int)($boundary * 0.5)) => 1, // Make sure we keep some middle ones.
|
||||
((int)($boundary * 1.05)) => 1
|
||||
);
|
||||
|
||||
$querydata = new stdClass();
|
||||
|
||||
// First, check that all the files are currently there.
|
||||
foreach ($checkfiles as $key => $unused) {
|
||||
$querydata->q = 'FileContents'.$key;
|
||||
$this->assertCount(1, $this->search->search($querydata));
|
||||
$querydata->q = 'searchfile'.$key;
|
||||
$this->assertCount(1, $this->search->search($querydata));
|
||||
}
|
||||
|
||||
// Remove the files we want removed from the files array.
|
||||
foreach ($checkfiles as $key => $keep) {
|
||||
if (!$keep) {
|
||||
unset($files[$key]);
|
||||
}
|
||||
}
|
||||
|
||||
// And make us a new file to add.
|
||||
$filerecord['filename'] = 'searchfileNew';
|
||||
$files[] = $fs->create_file_from_string($filerecord, 'Some FileContentsNew');
|
||||
$checkfiles['New'] = 1;
|
||||
|
||||
$doc = $area->get_document($record);
|
||||
foreach($files as $file) {
|
||||
$doc->add_stored_file($file);
|
||||
}
|
||||
|
||||
// Reindex the document with the changed files.
|
||||
$engine->add_document($doc, true);
|
||||
$engine->area_index_complete($area->get_area_id());
|
||||
cache_helper::purge_by_definition('core', 'search_results');
|
||||
|
||||
// Go through our check array, and see if the file is there or not.
|
||||
foreach ($checkfiles as $key => $keep) {
|
||||
$querydata->q = 'FileContents'.$key;
|
||||
$this->assertCount($keep, $this->search->search($querydata));
|
||||
$querydata->q = 'searchfile'.$key;
|
||||
$this->assertCount($keep, $this->search->search($querydata));
|
||||
}
|
||||
|
||||
// Now check that we get one result when we search from something in all of them.
|
||||
$querydata->q = 'Some';
|
||||
$this->assertCount(1, $this->search->search($querydata));
|
||||
}
|
||||
|
||||
public function test_index_filtered_file() {
|
||||
if (defined('TEST_SEARCH_SOLR_DISABLE_FILE_INDEXING') && (TEST_SEARCH_SOLR_DISABLE_FILE_INDEXING == 1)) {
|
||||
$this->markTestSkipped('Solr file indexing not enabled.');
|
||||
return;
|
||||
}
|
||||
|
||||
// Get engine and area to work with.
|
||||
$engine = $this->search->get_engine();
|
||||
$areaid = \core_search\manager::generate_areaid('core_mocksearch', 'role_capabilities');
|
||||
$area = \core_search\manager::get_search_area($areaid);
|
||||
|
||||
// Get a single record to make a doc from.
|
||||
$recordset = $area->get_recordset_by_timestamp(0);
|
||||
$record = $recordset->current();
|
||||
$recordset->close();
|
||||
|
||||
$doc = $area->get_document($record);
|
||||
|
||||
// Now we are going to make some files.
|
||||
$fs = get_file_storage();
|
||||
$syscontext = \context_system::instance();
|
||||
|
||||
$files = array();
|
||||
$filerecord = array(
|
||||
'contextid' => $syscontext->id,
|
||||
'component' => 'core',
|
||||
'filearea' => 'unittest',
|
||||
'itemid' => 0,
|
||||
'filepath' => '/',
|
||||
'filename' => 'largefile'
|
||||
);
|
||||
|
||||
// We need to make a file greater than 1kB in size, which is the lowest filter size.
|
||||
$contents = 'Some LargeFindContent to find.';
|
||||
for ($i = 0; $i < 200; $i++) {
|
||||
$contents .= ' The quick brown fox jumps over the lazy dog.';
|
||||
}
|
||||
|
||||
$this->assertGreaterThan(1024, strlen($contents));
|
||||
|
||||
$file = $fs->create_file_from_string($filerecord, $contents);
|
||||
$doc->add_stored_file($file);
|
||||
|
||||
$filerecord['filename'] = 'smallfile';
|
||||
$file = $fs->create_file_from_string($filerecord, 'Some SmallFindContent to find.');
|
||||
$doc->add_stored_file($file);
|
||||
|
||||
$engine->add_document($doc, true);
|
||||
$engine->area_index_complete($area->get_area_id());
|
||||
|
||||
$querydata = new stdClass();
|
||||
// We shouldn't be able to find the large file contents.
|
||||
$querydata->q = 'LargeFindContent';
|
||||
$this->assertCount(0, $this->search->search($querydata));
|
||||
|
||||
// But we should be able to find the filename.
|
||||
$querydata->q = 'largefile';
|
||||
$this->assertCount(1, $this->search->search($querydata));
|
||||
|
||||
// We should be able to find the small file contents.
|
||||
$querydata->q = 'SmallFindContent';
|
||||
$this->assertCount(1, $this->search->search($querydata));
|
||||
|
||||
// And we should be able to find the filename.
|
||||
$querydata->q = 'smallfile';
|
||||
$this->assertCount(1, $this->search->search($querydata));
|
||||
}
|
||||
|
||||
public function test_delete_by_id() {
|
||||
if (defined('TEST_SEARCH_SOLR_DISABLE_FILE_INDEXING') && (TEST_SEARCH_SOLR_DISABLE_FILE_INDEXING == 1)) {
|
||||
$this->markTestSkipped('Solr file indexing not enabled.');
|
||||
return;
|
||||
}
|
||||
|
||||
// First get files in the index.
|
||||
$this->search->index();
|
||||
$engine = $this->search->get_engine();
|
||||
|
||||
$querydata = new stdClass();
|
||||
|
||||
// Then search to make sure they are there.
|
||||
$querydata->q = '"File contents"';
|
||||
$results = $this->search->search($querydata);
|
||||
$this->assertCount(2, $results);
|
||||
|
||||
$first = reset($results);
|
||||
$deleteid = $first->get('id');
|
||||
|
||||
$engine->delete_by_id($deleteid);
|
||||
cache_helper::purge_by_definition('core', 'search_results');
|
||||
|
||||
// Check that we don't get a result for it anymore.
|
||||
$results = $this->search->search($querydata);
|
||||
$this->assertCount(1, $results);
|
||||
$result = reset($results);
|
||||
$this->assertNotEquals($deleteid, $result->get('id'));
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user