1
0
mirror of https://github.com/processwire/processwire.git synced 2025-08-13 18:24:57 +02:00

Add $sanitizer->word() method and upgrade $sanitizer->entitiesMarkdown() method to support escaped characters, making it possible to avoid collisions with basic markdown

This commit is contained in:
Ryan Cramer
2020-07-03 15:46:34 -04:00
parent 09c67d5478
commit 803e5c26d3

View File

@@ -1187,6 +1187,33 @@ class Sanitizer extends Wire {
if($headerName) $value = trim(preg_replace('/[^-_a-zA-Z0-9]/', '-', trim($value, ':')), '-'); if($headerName) $value = trim(preg_replace('/[^-_a-zA-Z0-9]/', '-', trim($value, ':')), '-');
return $value; return $value;
} }
/**
* Return first word in given string
*
* #pw-group-strings
*
* @param string $value String containing one or more words
* @param array $options Options to adjust behavior:
* - `keepNumbers` (bool): Allow numbers as return value? (default=true)
* - `keepNumberFormat` (bool): Keep minus/comma/period in numbers rather than splitting into words? Also requires keepNumbers==true. (default=false)
* - `keepUnderscore` (bool): Keep underscores as part of words? (default=false)
* - `keepHyphen` (bool): Keep hyphenated words? (default=false)
* - `keepChars` (array): Specify any of these to also keep as part of words ['.', ',', ';', '/', '*', ':', '+', '<', '>', '_', '-' ] (default=[])
* - `minWordLength` (int): Minimum word length (default=1)
* - `maxWordLength` (int): Maximum word length (default=80)
* - `stripTags` (bool): Strip markup tags so they dont contribute to returned word? (default=true)
* @return string
* @see Sanitizer::wordsArray()
* @since 3.0.162
*
*/
public function word($value, array $options = array()) {
if(!is_string($value)) $value = $this->string($value);
$options['maxWords'] = 1;
$a = $this->wordsArray($value, $options);
return count($a) ? reset($a) : '';
}
/** /**
* Sanitize short string of text to single line without HTML * Sanitize short string of text to single line without HTML
@@ -2435,13 +2462,16 @@ class Sanitizer extends Wire {
'allow' => array('a', 'strong', 'em', 'code', 's', 'span', 'u', 'small', 'i'), 'allow' => array('a', 'strong', 'em', 'code', 's', 'span', 'u', 'small', 'i'),
'disallow' => array(), 'disallow' => array(),
'linkMarkup' => '<a href="{url}" rel="noopener noreferrer nofollow" target="_blank">{text}</a>', 'linkMarkup' => '<a href="{url}" rel="noopener noreferrer nofollow" target="_blank">{text}</a>',
'escapableChars' => array('*', '[', ']', '(', ')', '`', '_', '~'), // for basic markdown or brackets modes
); );
if($options === true || (is_int($options) && $options > 0)) $defaults['fullMarkdown'] = $options; if($options === true || (is_int($options) && $options > 0)) $defaults['fullMarkdown'] = $options;
if(!is_array($options)) $options = array(); if(!is_array($options)) $options = array();
$options = array_merge($defaults, $options); $options = array_merge($defaults, $options);
$findReplace = array();
if($options['fullMarkdown']) { if($options['fullMarkdown']) {
// full markdown
$markdown = $this->wire('modules')->get('TextformatterMarkdownExtra'); $markdown = $this->wire('modules')->get('TextformatterMarkdownExtra');
if(is_int($options['fullMarkdown'])) { if(is_int($options['fullMarkdown'])) {
@@ -2452,6 +2482,12 @@ class Sanitizer extends Wire {
$markdown->format($str); $markdown->format($str);
} else { } else {
// basic (inline) markdown
if(strpos($str, '\\') !== false) {
// allow certain escaped markdown characters to be ignored by our regexps i.e. "\*" or "\[", etc.
$findReplace = $this->getTextTools()->findReplaceEscapeChars($str, $options['escapableChars']);
}
$str = $this->entities($str, $options['flags'], $options['encoding'], $options['doubleEncode']); $str = $this->entities($str, $options['flags'], $options['encoding'], $options['doubleEncode']);
@@ -2507,9 +2543,13 @@ class Sanitizer extends Wire {
if(count($reps)) $str = str_replace(array_keys($reps), array_values($reps), $str); if(count($reps)) $str = str_replace(array_keys($reps), array_values($reps), $str);
} }
if(count($findReplace)) {
$str = str_replace(array_keys($findReplace), array_values($findReplace), $str);
}
return $str; return $str;
} }
/** /**
* Remove entity encoded characters from a string. * Remove entity encoded characters from a string.
* *
@@ -3971,7 +4011,8 @@ class Sanitizer extends Wire {
* *
* @param string|array $value String containing words * @param string|array $value String containing words
* @param array $options * @param array $options
* - `keepNumberFormat` (bool): Keep minus/comma/period in numbers rather than splitting into words? (default=false) * - `keepNumbers` (bool): Keep number-only words in return value? (default=true)
* - `keepNumberFormat` (bool): Keep minus/comma/period in numbers rather than splitting into words? Also requires keepNumbers==true. (default=false)
* - `keepUnderscore` (bool): Keep underscores as part of words? (default=false) * - `keepUnderscore` (bool): Keep underscores as part of words? (default=false)
* - `keepHyphen` (bool): Keep hyphenated words? (default=false) * - `keepHyphen` (bool): Keep hyphenated words? (default=false)
* - `keepChars` (array): Specify any of these to also keep as part of words ['.', ',', ';', '/', '*', ':', '+', '<', '>', '_', '-' ] (default=[]) * - `keepChars` (array): Specify any of these to also keep as part of words ['.', ',', ';', '/', '*', ':', '+', '<', '>', '_', '-' ] (default=[])
@@ -3990,11 +4031,11 @@ class Sanitizer extends Wire {
'maxWordLength' => 80, 'maxWordLength' => 80,
'maxWords' => 0, 'maxWords' => 0,
'keepHyphen' => false, 'keepHyphen' => false,
'keepUnderscore' => false, 'keepUnderscore' => false,
'keepChars' => array(), 'keepNumbers' => true,
'keepNumberFormat' => true, 'keepNumberFormat' => true,
'keepChars' => array(),
'stripTags' => true, 'stripTags' => true,
'getString' => false,
); );
$options = array_merge($defaults, $options); $options = array_merge($defaults, $options);
@@ -4017,7 +4058,10 @@ class Sanitizer extends Wire {
if(!strlen($value)) return array(); if(!strlen($value)) return array();
if($options['keepNumberFormat']) { if(!$options['keepNumbers']) {
$options['keepNumberFormat'] = false;
if(!ctype_alpha($value)) $value = preg_replace('/\d+[-\d,. ]*/', ' ', $value);
} else if($options['keepNumberFormat']) {
$replacements = $this->wordsArrayNumberReplacements($value, $replacementPrefix); $replacements = $this->wordsArrayNumberReplacements($value, $replacementPrefix);
} }
@@ -4044,30 +4088,48 @@ class Sanitizer extends Wire {
$regex = '!\pP*[' . $splitWith . ']\pP*!u'; $regex = '!\pP*[' . $splitWith . ']\pP*!u';
$words = preg_split($regex, "$value ", -1, PREG_SPLIT_NO_EMPTY); $words = preg_split($regex, "$value ", -1, PREG_SPLIT_NO_EMPTY);
if($words === false) { if($words === false) $words = array();
$words = array();
} else if($options['maxWords'] && count($words) > $options['maxWords']) {
$words = array_slice($words, 0, $options['maxWords']);
}
$hasReplacements = count($replacements); $hasReplacements = count($replacements);
$keepChars = $hasReplacements && count($options['keepChars']) ? implode('', $options['keepChars']) : ''; $keepChars = $hasReplacements && count($options['keepChars']) ? implode('', $options['keepChars']) : '';
$numWords = 0;
foreach($words as $key => $word) { foreach($words as $key => $word) {
if(!strlen(trim($word))) {
unset($words[$key]);
continue;
}
if($options['maxWords'] && $numWords >= $options['maxWords']) {
unset($words[$key]);
continue;
}
if($hasReplacements && strpos($word, $replacementPrefix) !== false) { if($hasReplacements && strpos($word, $replacementPrefix) !== false) {
$word = str_replace(array_keys($replacements), array_values($replacements), $word); $word = str_replace(array_keys($replacements), array_values($replacements), $word);
$words[$key] = $word; $words[$key] = $word;
} }
if(!$options['keepNumbers'] && ctype_digit($word)) {
// remove numbers
unset($words[$key]);
continue;
}
$length = $this->multibyteSupport ? mb_strlen($word) : strlen($word); $length = $this->multibyteSupport ? mb_strlen($word) : strlen($word);
if($length < $minLength || $length > $maxLength) { if($length < $minLength || $length > $maxLength) {
// remove any words that are outside the min/max length requirements // remove any words that are outside the min/max length requirements
unset($words[$key]); unset($words[$key]);
} else if($keepChars !== '') { continue;
} else if($keepChars !== '' && !strlen(trim($word, $keepChars))) {
// remove any words that consist only of keepChars // remove any words that consist only of keepChars
if(!strlen(trim($word, $keepChars))) unset($words[$key]); unset($words[$key]);
continue;
} }
$numWords++;
} }
if($options['maxWords'] && count($words) > $options['maxWords']) {
// may be impossible to reach but here as a backup
$words = array_slice($words, 0, $options['maxWords']);
}
return $words; return $words;
} }