From 02f05c6f6787f590f5598cbe20f693127ad9620d Mon Sep 17 00:00:00 2001 From: Ryan Cramer Date: Thu, 21 Mar 2019 09:19:01 -0400 Subject: [PATCH] Update to allow spaces in URL segments when allowed by config.pageNameWhitelist and config.pageNameCharset=UTF8 per processwire/processwire-issues#720 --- wire/core/Sanitizer.php | 35 ++++++++++++--------- wire/modules/Process/ProcessPageView.module | 21 ++++++++----- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/wire/core/Sanitizer.php b/wire/core/Sanitizer.php index a3fc96f2..da79f5e2 100644 --- a/wire/core/Sanitizer.php +++ b/wire/core/Sanitizer.php @@ -650,29 +650,36 @@ class Sanitizer extends Wire { if(strpos($value, 'xn-') === 0) $value = substr($value, 3); // word separators that we always allow - $separators = array('.', '-', '_'); - - // we let regular pageName handle chars like these, if they appear without other UTF-8 - $extras = array('.', '-', '_', ' ', ',', ';', ':', '(', ')', '!', '?', '&', '%', '$', '#', '@'); + $separators = array('.', '-', '_'); - // proceed only if value has some non-ascii characters - if(ctype_alnum(str_replace($extras, '', $value))) return $this->pageName($value, false, $maxLength); - - // validate that all characters are in our whitelist + // whitelist of allowed characters and blacklist of disallowed characters $whitelist = $this->wire('config')->pageNameWhitelist; if(!strlen($whitelist)) $whitelist = false; $blacklist = '/\\%"\'<>?#@:;,+=*^$()[]{}|&'; + + // we let regular pageName handle chars like these, if they appear without other UTF-8 + $extras = array('.', '-', '_', ',', ';', ':', '(', ')', '!', '?', '&', '%', '$', '#', '@'); + if($whitelist === false || strpos($whitelist, ' ') === false) $extras[] = ' '; + + // proceed only if value has some non-ascii characters + if(ctype_alnum(str_replace($extras, '', $value))) { + // let regular pageName sanitizer handle this + return $this->pageName($value, false, $maxLength); + } + + // validate that all characters are in our whitelist $replacements = array(); for($n = 0; $n < mb_strlen($value); $n++) { $c = mb_substr($value, $n, 1); - if(!strlen(trim($c)) || ctype_cntrl($c)) { - // character does not resolve to something visible + $inBlacklist = mb_strpos($blacklist, $c) !== false || strpos($blacklist, $c) !== false; + $inWhitelist = !$inBlacklist && $whitelist !== false && mb_strpos($whitelist, $c) !== false; + if($inWhitelist && !$inBlacklist) { + // in whitelist + } else if($inBlacklist || !strlen(trim($c)) || ctype_cntrl($c)) { + // character does not resolve to something visible or is in blacklist $replacements[] = $c; - } else if(mb_strpos($blacklist, $c) !== false || strpos($blacklist, $c) !== false) { - // character that is in blacklist - $replacements[] = $c; - } else if($whitelist !== false && mb_strpos($whitelist, $c) === false) { + } else { // character that is not in whitelist, double check case variants $cLower = mb_strtolower($c); $cUpper = mb_strtoupper($c); diff --git a/wire/modules/Process/ProcessPageView.module b/wire/modules/Process/ProcessPageView.module index 02839acb..b6b4e362 100644 --- a/wire/modules/Process/ProcessPageView.module +++ b/wire/modules/Process/ProcessPageView.module @@ -281,6 +281,10 @@ class ProcessPageView extends Process { /** @var Config $config */ $config = $this->wire('config'); + /** @var Sanitizer $sanitizer */ + $sanitizer = $this->wire('sanitizer'); + /** @var Pages $pages */ + $pages = $this->wire('pages'); // force redirect to actual page URL? (if different from request URL) $forceRedirect = false; @@ -324,7 +328,7 @@ class ProcessPageView extends Process { // sanitized URL does not match requested URL if($config->pageNameCharset == 'UTF8') { // test for extended page name URL - $it = $this->wire('sanitizer')->pagePathNameUTF8($shit); + $it = $sanitizer->pagePathNameUTF8($shit); } if($shit !== $it) { // if still does not match then fail @@ -368,7 +372,8 @@ class ProcessPageView extends Process { $this->pageNum = (int) $matches[2]; $page = null; } else { - $page = $this->pages->get("path=$it, status<" . Page::statusMax); + $spit = $sanitizer->selectorValue($it); + $page = $pages->get("path=$spit, status<" . Page::statusMax); } $hasTrailingSlash = substr($it, -1) == '/'; @@ -389,11 +394,11 @@ class ProcessPageView extends Process { } else { // check for globally unique page which can redirect $trit = trim($it, '/'); - $spit = $this->wire('sanitizer')->pageNameUTF8($trit); + $spit = $sanitizer->pageNameUTF8($trit); if($trit === $spit) { // one segment off root - $spit = $this->wire('sanitizer')->selectorValue($spit); - $page = $this->wire('pages')->get("name=$spit, status=" . Page::statusUnique); + $spit = $sanitizer->selectorValue($spit); + $page = $pages->get("name=$spit, status=" . Page::statusUnique); if($page->id && $page->viewable()) { $this->redirectURL = $page->url; } else { @@ -416,8 +421,8 @@ class ProcessPageView extends Process { $urlSegment = substr($it, $pos); $urlSegments[$cnt] = $urlSegment; $it = substr($it, 0, $pos); // $it no longer includes the urlSegment - $selector = "path=" . $this->wire('sanitizer')->selectorValue($it, 2048) . ", status<" . Page::statusMax; - $page = $this->pages->get($selector); + $selector = "path=" . $sanitizer->selectorValue($it, 2048) . ", status<" . Page::statusMax; + $page = $pages->get($selector); $cnt++; } @@ -570,7 +575,7 @@ class ProcessPageView extends Process { // now set the URL segments to the $input API variable $cnt = 1; foreach($urlSegments as $urlSegment) { - if($cnt == 1) $page->urlSegment = $urlSegment; // backwards compatibility + if($cnt == 1) $page->setQuietly('urlSegment', $urlSegment); // backwards compatibility $this->input->setUrlSegment($cnt, $urlSegment); $cnt++; }