mirror of
https://github.com/moodle/moodle.git
synced 2025-01-18 22:08:20 +01:00
fixes HTM files parsing
This commit is contained in:
parent
e1778c0c5e
commit
0f4471f5d3
@ -32,14 +32,24 @@ function get_text_for_indexing_htm(&$resource){
|
||||
$meta_attributes = $matches[2];
|
||||
$suffix = $matches[3];
|
||||
if (preg_match('/name="(keywords|description)"/i', $meta_attributes)){
|
||||
preg_match('/content="[^"]+"/i', $meta_attributes, $matches);
|
||||
$text = $prefix.' '.$matches[0].' '.$suffix;
|
||||
preg_match('/content="([^"]+)"/i', $meta_attributes, $matches);
|
||||
$text = $prefix.' '.$matches[1].' '.$suffix;
|
||||
}
|
||||
}
|
||||
// filter all html tags
|
||||
// $text = clean_text($text, FORMAT_PLAIN);
|
||||
// NOTE : this is done in ResourceSearchDocument __constructor
|
||||
$text = preg_replace("/<!--[^>]*?-->/", '', $text);
|
||||
// brutally filters all html tags
|
||||
$text = preg_replace("/<[^>]*>/", '', $text);
|
||||
$text = preg_replace("/<!--[^>]*-->/", '', $text);
|
||||
$text = html_entity_decode($text, ENT_COMPAT, 'UTF-8');
|
||||
$text = mb_convert_encoding($text, 'UTF-8', 'AUTO');
|
||||
|
||||
/*
|
||||
* debug code for tracing input
|
||||
echo "<hr/>";
|
||||
$FILE = fopen("filetrace.log", 'w');
|
||||
fwrite($FILE, $text);
|
||||
fclose($FILE);
|
||||
echo "<hr/>";
|
||||
*/
|
||||
|
||||
if (!empty($CFG->block_search_limit_index_body)){
|
||||
$text = shorten($text, $CFG->block_search_limit_index_body);
|
||||
|
Loading…
x
Reference in New Issue
Block a user