mirror of
https://github.com/processwire/processwire.git
synced 2025-08-13 02:04:35 +02:00
Add new $sanitizer->words() method and rewrite existing $sanitizer->word() method
This commit is contained in:
@@ -400,7 +400,9 @@ class Sanitizer extends Wire {
|
|||||||
|
|
||||||
// remove leading or trailing dashes, underscores, dots
|
// remove leading or trailing dashes, underscores, dots
|
||||||
if($beautify) {
|
if($beautify) {
|
||||||
if(strpos($extras, $replacementChar) === false) $extras .= $replacementChar;
|
if($replacementChar !== null && strlen($replacementChar)) {
|
||||||
|
if(strpos($extras, $replacementChar) === false) $extras .= $replacementChar;
|
||||||
|
}
|
||||||
$value = trim($value, $extras);
|
$value = trim($value, $extras);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1307,17 +1309,101 @@ class Sanitizer extends Wire {
|
|||||||
* - `keepChars` (array): Specify any of these to also keep as part of words ['.', ',', ';', '/', '*', ':', '+', '<', '>', '_', '-' ] (default=[])
|
* - `keepChars` (array): Specify any of these to also keep as part of words ['.', ',', ';', '/', '*', ':', '+', '<', '>', '_', '-' ] (default=[])
|
||||||
* - `minWordLength` (int): Minimum word length (default=1)
|
* - `minWordLength` (int): Minimum word length (default=1)
|
||||||
* - `maxWordLength` (int): Maximum word length (default=80)
|
* - `maxWordLength` (int): Maximum word length (default=80)
|
||||||
|
* - `maxWords` (int): Maximum words (default=1 or 99 if a seperator option is specified)
|
||||||
|
* - `maxLength` (int): Maximum returned string length (default=1024)
|
||||||
* - `stripTags` (bool): Strip markup tags so they don’t contribute to returned word? (default=true)
|
* - `stripTags` (bool): Strip markup tags so they don’t contribute to returned word? (default=true)
|
||||||
|
* - `separator' (string): Merge multiple words into one word split by this character? (default='', disabled) 3.0.195+
|
||||||
|
* - `ascii` (bool): Allow only ASCII word characters? (default=false)
|
||||||
|
* - `beautify` (bool): Make ugly strings more pretty? This collapses and trims redundant separators (default=false)
|
||||||
* @return string
|
* @return string
|
||||||
* @see Sanitizer::wordsArray()
|
* @see Sanitizer::wordsArray()
|
||||||
* @since 3.0.162
|
* @since 3.0.162
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public function word($value, array $options = array()) {
|
public function word($value, array $options = array()) {
|
||||||
|
|
||||||
if(!is_string($value)) $value = $this->string($value);
|
if(!is_string($value)) $value = $this->string($value);
|
||||||
$options['maxWords'] = 1;
|
|
||||||
|
$separator = isset($options['separator']) ? $options['separator'] : null;
|
||||||
|
$keepChars = isset($options['keepChars']) ? $options['keepChars'] : array();
|
||||||
|
$maxLength = isset($options['maxLength']) ? (int) $options['maxLength'] : 1024;
|
||||||
|
$minWordLength = isset($options['minWordLength']) ? $options['minWordLength'] : 1;
|
||||||
|
|
||||||
|
if(empty($options['maxWords'])) $options['maxWords'] = $separator !== null ? 99 : 1;
|
||||||
|
if(!empty($options['keepHyphen']) && !in_array('-', $keepChars)) $keepChars[] = '-';
|
||||||
|
if(!empty($options['keepUnderscore']) && !in_array('_', $keepChars)) $keepChars[] = '_';
|
||||||
|
|
||||||
|
$options['keepChars'] = $keepChars;
|
||||||
|
|
||||||
$a = $this->wordsArray($value, $options);
|
$a = $this->wordsArray($value, $options);
|
||||||
return count($a) ? reset($a) : '';
|
$count = count($a);
|
||||||
|
if(!$count) return '';
|
||||||
|
|
||||||
|
if($separator !== null && $count > 1) {
|
||||||
|
$value = implode($separator, $a);
|
||||||
|
} else if($count) {
|
||||||
|
$value = reset($a);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!empty($options['ascii'])) {
|
||||||
|
$sep = $separator === null ? '' : $separator;
|
||||||
|
$value = $this->nameFilter($value, $keepChars, $sep, Sanitizer::translate, $maxLength);
|
||||||
|
} else if($maxLength) {
|
||||||
|
$length = $this->multibyteSupport ? mb_strlen($value) : strlen($value);
|
||||||
|
if($length > $maxLength) {
|
||||||
|
$value = $this->multibyteSupport ? mb_substr($value, 0, $maxLength) : substr($value, 0, $maxLength);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!empty($options['beautify'])) {
|
||||||
|
foreach($keepChars as $s) {
|
||||||
|
while(strpos($value, "$s$s") !== false) $value = str_replace("$s$s", $s, $value);
|
||||||
|
}
|
||||||
|
$value = trim($value, implode('', $keepChars));
|
||||||
|
}
|
||||||
|
|
||||||
|
if($minWordLength > 1 && strlen($value) < $minWordLength) $value = '';
|
||||||
|
|
||||||
|
return $value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given string return a new string containing only words
|
||||||
|
*
|
||||||
|
* #pw-group-strings
|
||||||
|
*
|
||||||
|
* @param $value
|
||||||
|
* @param array $options
|
||||||
|
* - `separator` (string): String to use to separate words (default=' ')
|
||||||
|
* - `ascii` (string): Only allow ASCII characters in words? (default=false)
|
||||||
|
* - `keepUnderscore` (bool): Keep underscores as part of words? (default=false)
|
||||||
|
* - `keepHyphen` (bool): Keep hyphenated words? (default=false)
|
||||||
|
* - `keepChars` (array): Additional non word characters to keep (default=[])
|
||||||
|
* - `maxWordLength` (int): Maximum word length (default=80)
|
||||||
|
* - `minWordLength` (int): Minimum word length (default=1)
|
||||||
|
* - `maxLength` (int): Maximum return value length (default=1024)
|
||||||
|
* - `beautify` (bool): Make ugly strings more pretty? This collapses and trims redundant separators (default=true)
|
||||||
|
* @since 3.0.195
|
||||||
|
* @return string
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public function words($value, array $options = array()) {
|
||||||
|
|
||||||
|
$defaults = array(
|
||||||
|
'ascii' => false,
|
||||||
|
'separator' => ' ',
|
||||||
|
'keepHyphen' => true,
|
||||||
|
'keepUnderscore' => true,
|
||||||
|
'keepChars' => array(),
|
||||||
|
'maxWordLength' => 255,
|
||||||
|
'maxLength' => 1024,
|
||||||
|
'beautify' => true,
|
||||||
|
);
|
||||||
|
|
||||||
|
$options = array_merge($defaults, $options);
|
||||||
|
$value = $this->word($value, $options);
|
||||||
|
|
||||||
|
return $value;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -4621,8 +4707,18 @@ class Sanitizer extends Wire {
|
|||||||
// pC=Other (control, format, surrogate)
|
// pC=Other (control, format, surrogate)
|
||||||
// p{Pd}=Dash punctuation
|
// p{Pd}=Dash punctuation
|
||||||
// pP=Punctuation (all)
|
// pP=Punctuation (all)
|
||||||
|
// pPs=Open punctuation
|
||||||
|
// pPe=Close punctuation
|
||||||
|
// pPf=Final punctuation
|
||||||
|
// pPo=Other punctuation
|
||||||
|
// pPi=Initial punctuation
|
||||||
|
// pM=Mark
|
||||||
|
// pMc=Spacing mark
|
||||||
|
// pMe=Enclosing mark
|
||||||
|
// pMn=Non-spacing mark
|
||||||
|
|
||||||
$splitWith = '.,;/*:+<>\s\pZ\pS\pC\p{Pd}\\\\';
|
//$splitWith = '.,;/*:+<>\s\pZ\pS\pC\p{Pd}\\\\';
|
||||||
|
$splitWith = '.,;/*:+<>\s\pZ\pS\pC\p{Pd}\p{Ps}\p{Pe}\p{Pf}\p{Pi}\p{Po}\\\\';
|
||||||
$regex = '!\pP*[' . $splitWith . ']\pP*!u';
|
$regex = '!\pP*[' . $splitWith . ']\pP*!u';
|
||||||
$words = preg_split($regex, "$value ", -1, PREG_SPLIT_NO_EMPTY);
|
$words = preg_split($regex, "$value ", -1, PREG_SPLIT_NO_EMPTY);
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user