mirror of
https://github.com/moodle/moodle.git
synced 2025-03-20 15:40:17 +01:00
MDL-47962 filter_glossary: use preg features for fullmatch
This eliminates a lot of necessary string manipulation for something that preg can just do with the \b assertion. The change also extracts all the work to prepare ->work_phrase (renamed ->regexp) into a separate function. This is to pave the way for future efficiency gains, but for now I have not done them, so it is easier to verify by inspection that this commit does not change behaviour. Also, some tidy-ups to the filterobject structure, to eliminate some redundant fields.
This commit is contained in:
parent
d22699af7f
commit
2abf8fbf86
@ -469,18 +469,21 @@ abstract class moodle_text_filter {
|
||||
class filterobject {
|
||||
/** @var string */
|
||||
public $phrase;
|
||||
/** @var bool whether to only recognise full word matches. */
|
||||
public $fullmatch;
|
||||
|
||||
public $hreftagbegin;
|
||||
public $hreftagend;
|
||||
/** @var bool */
|
||||
public $casesensitive;
|
||||
public $fullmatch;
|
||||
|
||||
/** @var null|string once initialised, holds the regexp for matching this phrase. */
|
||||
public $workregexp = null;
|
||||
|
||||
/** @var mixed */
|
||||
public $replacementphrase;
|
||||
public $work_phrase;
|
||||
public $work_hreftagbegin;
|
||||
public $work_hreftagend;
|
||||
public $work_casesensitive;
|
||||
public $work_fullmatch;
|
||||
public $work_replacementphrase;
|
||||
/** @var bool */
|
||||
public $work_calculated;
|
||||
@ -504,8 +507,8 @@ class filterobject {
|
||||
$this->phrase = $phrase;
|
||||
$this->hreftagbegin = $hreftagbegin;
|
||||
$this->hreftagend = $hreftagend;
|
||||
$this->casesensitive = $casesensitive;
|
||||
$this->fullmatch = $fullmatch;
|
||||
$this->casesensitive = !empty($casesensitive);
|
||||
$this->fullmatch = !empty($fullmatch);
|
||||
$this->replacementphrase = $replacementphrase;
|
||||
$this->work_calculated = false;
|
||||
|
||||
@ -1240,7 +1243,7 @@ function filter_context_may_have_filter_settings($context) {
|
||||
* Process phrases intelligently found within a HTML text (such as adding links).
|
||||
*
|
||||
* @param string $text the text that we are filtering
|
||||
* @param array $linkarray an array of filterobjects
|
||||
* @param filterobject[] $linkarray an array of filterobjects
|
||||
* @param array $ignoretagsopen an array of opening tags that we should ignore while filtering
|
||||
* @param array $ignoretagsclose an array of corresponding closing tags
|
||||
* @param bool $overridedefaultignore True to only use tags provided by arguments
|
||||
@ -1251,11 +1254,15 @@ function filter_phrases($text, $linkarray, $ignoretagsopen = null, $ignoretagscl
|
||||
|
||||
global $CFG;
|
||||
|
||||
static $usedphrases;
|
||||
// Used if $CFG->filtermatchoneperpage is on. Array with keys being the workregexp
|
||||
// for things that have already been matched on this page.
|
||||
static $usedphrases = [];
|
||||
|
||||
$ignoretags = array(); // To store all the enclosig tags to be completely ignored.
|
||||
$tags = array(); // To store all the simple tags to be ignored.
|
||||
|
||||
$linkarray = filter_prepare_phrases_for_filtering($linkarray);
|
||||
|
||||
if (!$overridedefaultignore) {
|
||||
// A list of open/close tags that we should not replace within.
|
||||
// Extended to include <script>, <textarea>, <select> and <a> tags.
|
||||
@ -1281,12 +1288,6 @@ function filter_phrases($text, $linkarray, $ignoretagsopen = null, $ignoretagscl
|
||||
}
|
||||
}
|
||||
|
||||
// Invalid prefixes and suffixes for the fullmatch searches.
|
||||
// Every "word" character, but the underscore, is a invalid suffix or prefix.
|
||||
// (nice to use this because it includes national characters (accents...) as word characters.
|
||||
$filterinvalidprefixes = '([^\W_])';
|
||||
$filterinvalidsuffixes = '([^\W_])';
|
||||
|
||||
// Double up some magic chars to avoid "accidental matches".
|
||||
$text = preg_replace('/([#*%])/', '\1\1', $text);
|
||||
|
||||
@ -1296,18 +1297,21 @@ function filter_phrases($text, $linkarray, $ignoretagsopen = null, $ignoretagscl
|
||||
// Remove tags from $text.
|
||||
filter_save_tags($text, $tags);
|
||||
|
||||
// Prepare the limit for preg_match calls.
|
||||
if (!empty($CFG->filtermatchonepertext) || !empty($CFG->filtermatchoneperpage)) {
|
||||
$pregreplacelimit = 1;
|
||||
} else {
|
||||
$pregreplacelimit = -1; // No limit.
|
||||
}
|
||||
|
||||
// Time to cycle through each phrase to be linked.
|
||||
foreach ($linkarray as $linkobject) {
|
||||
|
||||
// Set some defaults if certain properties are missing.
|
||||
// Properties may be missing if the filterobject class has not been used to construct the object.
|
||||
if (empty($linkobject->phrase)) {
|
||||
if ($linkobject->workregexp === null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Avoid integers < 1000 to be linked. See bug 1446.
|
||||
$intcurrent = intval($linkobject->phrase);
|
||||
if (!empty($intcurrent) && strval($intcurrent) == $linkobject->phrase && $intcurrent < 1000) {
|
||||
// If $CFG->filtermatchoneperpage, avoid previously matched linked phrases.
|
||||
if (!empty($CFG->filtermatchoneperpage) && isset($usedphrases[$linkobject->workregexp])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1325,24 +1329,6 @@ function filter_phrases($text, $linkarray, $ignoretagsopen = null, $ignoretagscl
|
||||
// be cleared up before returning to the user.
|
||||
$linkobject->work_hreftagbegin = preg_replace('/([#*%])/', '\1\1', $linkobject->work_hreftagbegin);
|
||||
|
||||
if (empty($linkobject->casesensitive)) {
|
||||
$linkobject->work_casesensitive = false;
|
||||
} else {
|
||||
$linkobject->work_casesensitive = true;
|
||||
}
|
||||
if (empty($linkobject->fullmatch)) {
|
||||
$linkobject->work_fullmatch = false;
|
||||
} else {
|
||||
$linkobject->work_fullmatch = true;
|
||||
}
|
||||
|
||||
// Strip tags out of the phrase.
|
||||
$linkobject->work_phrase = strip_tags($linkobject->phrase);
|
||||
|
||||
// Double up chars that might cause a false match -- the duplicates will
|
||||
// be cleared up before returning to the user.
|
||||
$linkobject->work_phrase = preg_replace('/([#*%])/', '\1\1', $linkobject->work_phrase);
|
||||
|
||||
// Set the replacement phrase properly.
|
||||
if ($linkobject->replacementphrase) { // We have specified a replacement phrase.
|
||||
// Strip tags.
|
||||
@ -1351,54 +1337,15 @@ function filter_phrases($text, $linkarray, $ignoretagsopen = null, $ignoretagscl
|
||||
$linkobject->work_replacementphrase = '$1';
|
||||
}
|
||||
|
||||
// Quote any regular expression characters and the delimiter in the work phrase to be searched.
|
||||
$linkobject->work_phrase = preg_quote($linkobject->work_phrase, '/');
|
||||
|
||||
// Work calculated.
|
||||
$linkobject->work_calculated = true;
|
||||
}
|
||||
|
||||
// If $CFG->filtermatchoneperpage, avoid previously (request) linked phrases.
|
||||
if (!empty($CFG->filtermatchoneperpage)) {
|
||||
if (!empty($usedphrases) && in_array($linkobject->work_phrase, $usedphrases)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Regular expression modifiers.
|
||||
$modifiers = ($linkobject->work_casesensitive) ? 's' : 'isu'; // Works in unicode mode!
|
||||
|
||||
// Do we need to do a fullmatch?
|
||||
// If yes then go through and remove any non full matching entries.
|
||||
if ($linkobject->work_fullmatch) {
|
||||
$notfullmatches = array();
|
||||
$regexp = '/'.$filterinvalidprefixes.'('.$linkobject->work_phrase.')|('.
|
||||
$linkobject->work_phrase.')'.$filterinvalidsuffixes.'/'.$modifiers;
|
||||
|
||||
preg_match_all($regexp, $text, $listofnotfullmatches);
|
||||
|
||||
if ($listofnotfullmatches) {
|
||||
foreach (array_unique($listofnotfullmatches[0]) as $key => $value) {
|
||||
$notfullmatches['<*'.$key.'*>'] = $value;
|
||||
}
|
||||
if (!empty($notfullmatches)) {
|
||||
$text = str_replace($notfullmatches, array_keys($notfullmatches), $text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Finally we do our highlighting.
|
||||
if (!empty($CFG->filtermatchonepertext) || !empty($CFG->filtermatchoneperpage)) {
|
||||
$resulttext = preg_replace('/('.$linkobject->work_phrase.')/'.$modifiers,
|
||||
$linkobject->work_hreftagbegin.
|
||||
$linkobject->work_replacementphrase.
|
||||
$linkobject->work_hreftagend, $text, 1);
|
||||
} else {
|
||||
$resulttext = preg_replace('/('.$linkobject->work_phrase.')/'.$modifiers,
|
||||
$linkobject->work_hreftagbegin.
|
||||
$linkobject->work_replacementphrase.
|
||||
$linkobject->work_hreftagend, $text);
|
||||
}
|
||||
$resulttext = preg_replace($linkobject->workregexp,
|
||||
$linkobject->work_hreftagbegin.
|
||||
$linkobject->work_replacementphrase.
|
||||
$linkobject->work_hreftagend, $text, $pregreplacelimit);
|
||||
|
||||
// If the text has changed we have to look for links again.
|
||||
if ($resulttext != $text) {
|
||||
@ -1409,15 +1356,9 @@ function filter_phrases($text, $linkarray, $ignoretagsopen = null, $ignoretagscl
|
||||
filter_save_tags($text, $tags);
|
||||
// If $CFG->filtermatchoneperpage, save linked phrases to request.
|
||||
if (!empty($CFG->filtermatchoneperpage)) {
|
||||
$usedphrases[] = $linkobject->work_phrase;
|
||||
$usedphrases[$linkobject->workregexp] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Replace the not full matches before cycling to next link object.
|
||||
if (!empty($notfullmatches)) {
|
||||
$text = str_replace(array_keys($notfullmatches), $notfullmatches, $text);
|
||||
unset($notfullmatches);
|
||||
}
|
||||
}
|
||||
|
||||
// Rebuild the text with all the excluded areas.
|
||||
@ -1439,6 +1380,60 @@ function filter_phrases($text, $linkarray, $ignoretagsopen = null, $ignoretagscl
|
||||
return $text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare a list of link for processing with {@link filter_phrases()}.
|
||||
*
|
||||
* @param filterobject[] $linkarray the links that will be passed to filter_phrases().
|
||||
* @return filterobject[] the updated list of links with necessary pre-processing done.
|
||||
*/
|
||||
function filter_prepare_phrases_for_filtering(array $linkarray) {
|
||||
// Time to cycle through each phrase to be linked.
|
||||
foreach ($linkarray as $linkobject) {
|
||||
|
||||
// Set some defaults if certain properties are missing.
|
||||
// Properties may be missing if the filterobject class has not been used to construct the object.
|
||||
if (empty($linkobject->phrase)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Avoid integers < 1000 to be linked. See bug 1446.
|
||||
$intcurrent = intval($linkobject->phrase);
|
||||
if (!empty($intcurrent) && strval($intcurrent) == $linkobject->phrase && $intcurrent < 1000) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Strip tags out of the phrase.
|
||||
$linkobject->workregexp = strip_tags($linkobject->phrase);
|
||||
|
||||
if (!$linkobject->casesensitive) {
|
||||
$linkobject->workregexp = core_text::strtolower($linkobject->workregexp);
|
||||
}
|
||||
|
||||
// Double up chars that might cause a false match -- the duplicates will
|
||||
// be cleared up before returning to the user.
|
||||
$linkobject->workregexp = preg_replace('/([#*%])/', '\1\1', $linkobject->workregexp);
|
||||
|
||||
// Quote any regular expression characters and the delimiter in the work phrase to be searched.
|
||||
$linkobject->workregexp = preg_quote($linkobject->workregexp, '/');
|
||||
|
||||
if ($linkobject->fullmatch) {
|
||||
$linkobject->workregexp = '\b' . $linkobject->workregexp . '\b';
|
||||
}
|
||||
|
||||
$linkobject->workregexp = '/(' . $linkobject->workregexp . ')/s';
|
||||
|
||||
if (!$linkobject->casesensitive) {
|
||||
$linkobject->workregexp .= 'iu';
|
||||
}
|
||||
}
|
||||
|
||||
return $linkarray;
|
||||
}
|
||||
|
||||
function filter_prepare_phrases_for_replacement() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove duplicate from a list of {@link filterobject}.
|
||||
*
|
||||
|
Loading…
x
Reference in New Issue
Block a user