1
0
mirror of https://github.com/processwire/processwire.git synced 2025-08-12 17:54:44 +02:00

Add new $sanitizer->trim() method that can trim of all known UTF-8 whitespace types (or given chars) from beginning and ending of string. This is something I discovered PHP's trim() fucntion does not do.

This commit is contained in:
Ryan Cramer
2019-01-11 09:30:51 -05:00
parent f9ded64ba3
commit 52e20c489d

View File

@@ -76,9 +76,10 @@ class Sanitizer extends Wire {
*
*/
protected $whitespaceUTF8 = array(
'0000', // null byte
'0009', // character tab
'000A', // line feed
'000B', // line tab
'000B', // line tab (vertical tab)
'000C', // form feed
'000D', // carriage return
'0020', // space
@@ -2163,6 +2164,101 @@ class Sanitizer extends Wire {
return $str;
}
/**
* Trim of all known UTF-8 whitespace types (or given chars) from beginning and ending of string
*
* Like PHPs trim() but works with multibyte strings and recognizes all types of UTF-8 whitespace
* as well as HTML whitespace entities. This method also optionally accepts an array for $chars argument
* which enables you to trim out string sequences greater than one character long.
*
* If you do not need an extensive multibyte trim, use PHPs trim() instead because this takes more overhead.
* PHP multibyte support (mb_string) is strongly recommended if using this function.
*
* #pw-group-strings
*
* @param string $str
* @param string|array $chars Characters to trim or omit (blank string) for all known whitespace (including UTF-8) and HTML-entity whitespace.
* @return string
* @since 3.0.124
*
*/
public function trim($str, $chars = '') {
$mb = $this->multibyteSupport;
$len = $mb ? mb_strlen($str) : strlen($str);
if(!$len) return $str;
if(is_array($chars) && !count($chars)) $chars = '';
$trims = array();
// setup trim
if($chars === '') {
// default whitespace characters
$trims = $this->getWhitespaceArray(true);
// let PHP default whitespace trim run first
$str = trim($str);
} else {
// user-specified characters
if(is_array($chars)) {
$trims = $chars;
} else {
for($n = 0; $n < mb_strlen($str); $n++) {
$trim = $mb ? mb_substr($chars, $n, 1) : substr($chars, $n, 1);
$trimLen = $mb ? mb_strlen($trim) : strlen($trim);
if($trimLen) $trims[] = $trim;
}
}
}
// begin trim
do {
$numRemovedStart = 0; // num removed from start
$numRemovedEnd = 0; // num removed from end
foreach($trims as $trimKey => $trim) {
$trimPos = $mb ? mb_strpos($str, $trim) : strpos($str, $trim);
// if trim not present anywhere in string it can be removed from our trims list
if($trimPos === false) {
unset($trims[$trimKey]);
continue;
}
// at this point we know the trim character is present somewhere in the string
$trimLen = $mb ? mb_strlen($trim) : strlen($trim);
// while this trim character matches at beginning of string, remove it
while($trimPos === 0) {
$str = $mb ? mb_substr($str, $trimLen) : substr($str, $trimLen);
$trimPos = $mb ? mb_strpos($str, $trim) : strpos($str, $trim);
$numRemovedStart++;
}
// trim from end
if($trimPos > 0) do {
$x = 0; // qty removed only in this do/while iteration
$trimPos = $mb ? mb_strrpos($str, $trim) : strrpos($str, $trim);
if($trimPos === false) break;
$strLen = $mb ? mb_strlen($str) : strlen($str);
if($trimPos + $trimLen >= $strLen) {
$str = $mb ? mb_substr($str, 0, $trimPos) : substr($str, 0, $trimPos);
$numRemovedEnd++;
$x++;
}
} while($x > 0);
// if trim no longer present, remove it
if($trimPos === false) unset($trims[$trimKey]);
} // foreach
$strLen = $mb ? mb_strlen($str) : strlen($str);
} while($numRemovedStart + $numRemovedEnd > 0 && $strLen > 0);
return $str;
}
/**
* Get array of all characters (including UTF-8) that can be used as whitespace in strings
*