1
0
mirror of https://github.com/processwire/processwire.git synced 2025-08-15 11:14:12 +02:00

Update to allow spaces in URL segments when allowed by config.pageNameWhitelist and config.pageNameCharset=UTF8 per processwire/processwire-issues#720

This commit is contained in:
Ryan Cramer
2019-03-21 09:19:01 -04:00
parent 7905fb5180
commit 02f05c6f67
2 changed files with 34 additions and 22 deletions

View File

@@ -650,29 +650,36 @@ class Sanitizer extends Wire {
if(strpos($value, 'xn-') === 0) $value = substr($value, 3);
// word separators that we always allow
$separators = array('.', '-', '_');
// we let regular pageName handle chars like these, if they appear without other UTF-8
$extras = array('.', '-', '_', ' ', ',', ';', ':', '(', ')', '!', '?', '&', '%', '$', '#', '@');
$separators = array('.', '-', '_');
// proceed only if value has some non-ascii characters
if(ctype_alnum(str_replace($extras, '', $value))) return $this->pageName($value, false, $maxLength);
// validate that all characters are in our whitelist
// whitelist of allowed characters and blacklist of disallowed characters
$whitelist = $this->wire('config')->pageNameWhitelist;
if(!strlen($whitelist)) $whitelist = false;
$blacklist = '/\\%"\'<>?#@:;,+=*^$()[]{}|&';
// we let regular pageName handle chars like these, if they appear without other UTF-8
$extras = array('.', '-', '_', ',', ';', ':', '(', ')', '!', '?', '&', '%', '$', '#', '@');
if($whitelist === false || strpos($whitelist, ' ') === false) $extras[] = ' ';
// proceed only if value has some non-ascii characters
if(ctype_alnum(str_replace($extras, '', $value))) {
// let regular pageName sanitizer handle this
return $this->pageName($value, false, $maxLength);
}
// validate that all characters are in our whitelist
$replacements = array();
for($n = 0; $n < mb_strlen($value); $n++) {
$c = mb_substr($value, $n, 1);
if(!strlen(trim($c)) || ctype_cntrl($c)) {
// character does not resolve to something visible
$inBlacklist = mb_strpos($blacklist, $c) !== false || strpos($blacklist, $c) !== false;
$inWhitelist = !$inBlacklist && $whitelist !== false && mb_strpos($whitelist, $c) !== false;
if($inWhitelist && !$inBlacklist) {
// in whitelist
} else if($inBlacklist || !strlen(trim($c)) || ctype_cntrl($c)) {
// character does not resolve to something visible or is in blacklist
$replacements[] = $c;
} else if(mb_strpos($blacklist, $c) !== false || strpos($blacklist, $c) !== false) {
// character that is in blacklist
$replacements[] = $c;
} else if($whitelist !== false && mb_strpos($whitelist, $c) === false) {
} else {
// character that is not in whitelist, double check case variants
$cLower = mb_strtolower($c);
$cUpper = mb_strtoupper($c);