1
0
mirror of https://github.com/processwire/processwire.git synced 2025-08-13 10:15:28 +02:00

Refactoring and improvements to ProcessPageView module

This commit is contained in:
Ryan Cramer
2020-10-02 11:56:54 -04:00
parent 45ad9473d2
commit 337488f117

View File

@@ -64,12 +64,12 @@ class ProcessPageView extends Process {
protected $delayRedirects = false;
/**
* Sanitized URL that generated this request
* Sanitized path that generated this request
*
* Set by the getPage() method and passed to the pageNotFound function.
*
*/
protected $requestURL = '';
protected $requestPath = '';
/**
* Unsanitized URL from $_SERVER['REQUEST_URI']
@@ -103,10 +103,18 @@ class ProcessPageView extends Process {
*/
protected $pageNumPrefix = null;
/**
* Construct
*
*/
public function __construct() {
// no parent call intentional
}
/**
* Init
*
*/
public function init() {
$this->dirtyURL = isset($_SERVER['REQUEST_URI']) ? $_SERVER['REQUEST_URI'] : '';
@@ -164,7 +172,7 @@ class ProcessPageView extends Process {
if(!$page || $_page->id == $config->http404PageID) {
$s = 'access not allowed';
$e = new Wire404Exception($s, Wire404Exception::codePermission);
return $this->pageNotFound($_page, $this->requestURL, true, $s, $e);
return $this->pageNotFound($_page, $this->requestPath, true, $s, $e);
}
if(!$this->delayRedirects) {
@@ -215,7 +223,7 @@ class ProcessPageView extends Process {
}
} catch(Wire404Exception $e) {
return $this->pageNotFound($page, $this->requestURL, false, '404 thrown during page render', $e);
return $this->pageNotFound($page, $this->requestPath, false, '404 thrown during page render', $e);
} catch(\Exception $e) {
$this->responseType = self::responseTypeError;
@@ -224,7 +232,7 @@ class ProcessPageView extends Process {
}
} else {
return $this->pageNotFound(new NullPage(), $this->requestURL, true, 'Requested URL did not resolve to a Page');
return $this->pageNotFound(new NullPage(), $this->requestPath, true, 'Requested URL did not resolve to a Page');
}
return '';
@@ -294,84 +302,29 @@ class ProcessPageView extends Process {
*/
protected function getPage() {
/** @var Config $config */
$config = $this->wire('config');
/** @var Sanitizer $sanitizer */
$sanitizer = $this->wire('sanitizer');
/** @var Pages $pages */
$pages = $this->wire('pages');
$config = $this->wire()->config;
$sanitizer = $this->wire()->sanitizer;
// force redirect to actual page URL? (if different from request URL)
$forceRedirect = false;
// did URL end with index.php|htm|html? If so we might redirect if a page matches without it.
$indexRedirect = false;
// options for $sanitizer->selectorValue() call
$selectorValueOptions = array(
'maxLength' => 2048,
'maxBytes' => 6144,
'allowArray' => false,
);
/** @var string $shit Dirty URL */
/** @var string $it Clean URL */
if(isset($_GET['it'])) {
// normal request
$shit = trim($_GET['it']);
} else if(isset($_SERVER['REQUEST_URI'])) {
// abnormal request, something about request URL made .htaccess skip it, or index.php called directly
$shit = trim($_SERVER['REQUEST_URI']);
if(strpos($shit, '?') !== false) list($shit,) = explode('?', $shit);
if($config->urls->root != '/') {
if(strpos($shit, $config->urls->root) === 0) {
// remove root URL from request
$shit = substr($shit, strlen($config->urls->root) - 1);
} else {
// request URL outside of our root directory
return null;
}
}
} else {
$shit = '/';
}
if($shit === '/') {
$it = '/';
} else {
$it = preg_replace('{[^-_./a-zA-Z0-9]}', '', $shit); // clean
}
unset($_GET['it']);
if($shit !== $it) {
// sanitized URL does not match requested URL
if($config->pageNameCharset == 'UTF8') {
// test for extended page name URL
$it = $sanitizer->pagePathNameUTF8($shit);
}
if($shit !== $it) {
// if still does not match then fail
return null;
}
}
if(!isset($it[0]) || $it[0] != '/') $it = "/$it";
if(strpos($it, '//') !== false) return null;
$hasIndexFile = false;
// get the requested path
$it = $this->getPageRequestPath();
if($it === false) return null;
// check if there are index files in the request
if(strpos($it, '/index.') !== false && preg_match('{/index\.(php|html?)$}', $it, $matches)) {
// if request is to index.php|htm|html, make note of it to determine if we can redirect later
$indexRedirect = true;
$hasIndexFile = true;
} else if(strpos($this->dirtyURL, 'index.php') !== false && strpos($it, 'index.php') === false) {
// if request contains index.php and the $it string does not, make it redirect to correct version
// if request contains index.php and the request path ($it) does not, force redirect to correct version
if(preg_match('!/index\.php$!', parse_url($this->dirtyURL, PHP_URL_PATH))) $forceRedirect = true;
}
$numParts = substr_count($it, '/');
if($numParts > $config->maxUrlDepth) return null;
// check if request is for a secure pagefile
if($this->pagefileSecurePossible($it)) {
$page = $this->checkRequestFile($it);
if(is_object($page)) {
@@ -380,90 +333,199 @@ class ProcessPageView extends Process {
}
}
// optimization to filter out page numbers first
$maybePrefix = false;
foreach($this->pageNumUrlPrefixes as $prefix) {
if(strpos($it, '/' . $prefix) !== false) {
$maybePrefix = true;
break;
}
}
if($maybePrefix && preg_match('{/(' . implode('|', $this->pageNumUrlPrefixes) . ')(\d+)/?$}', $it, $matches)) {
// URL contains a page number, but we'll let it be handled by the checkUrlSegments function later
$this->pageNumPrefix = $matches[1];
$this->pageNum = (int) $matches[2];
// check for pagination segment
if($this->checkPageNumPath($it)) {
// path has a pagination prefix and number in it,
// populated to $this->pageNumPrefix and $this->pageNumPath
$page = null;
} else {
$spit = $sanitizer->selectorValue($it, $selectorValueOptions);
$page = $pages->get("path=$spit, status<" . Page::statusMax);
} else {
// no pagination number, see if it already resolves to a Page
$page = $this->pagesGet($it);
}
$hasTrailingSlash = substr($it, -1) == '/';
if($page && $page->id) {
// path resolves to page with NO pageNum or urlSegments present
if($forceRedirect) {
$this->redirectURL = $page->url;
} else {
// index.php in URL redirect to actual page URL
$this->redirectURL = $page->url;
} else if($page->id > 1) {
// trailing slash vs. non trailing slash, enforced if not homepage
// redirect to proper trailed slash version if incorrect version is present.
// note: this section only executed if no URL segments or page numbers were present
$s = $page->template->slashUrls;
if($page->id > 1 && ((!$hasTrailingSlash && $s !== 0) || ($hasTrailingSlash && $s === 0))) {
$hasTrailingSlash = substr($it, -1) == '/';
$slashUrls = $page->template->slashUrls;
if((!$hasTrailingSlash && $slashUrls !== 0) || ($hasTrailingSlash && $slashUrls === 0)) {
$this->redirectURL = $page->url;
}
}
return $page;
} else {
// check for globally unique page which can redirect
$trit = trim($it, '/');
$spit = $sanitizer->pageNameUTF8($trit);
if($trit === $spit) {
// one segment off root
$spit = $sanitizer->selectorValue($spit, $selectorValueOptions);
$page = $pages->get("name=$spit, status=" . Page::statusUnique);
if($page->id && $page->viewable()) {
$this->redirectURL = $page->url;
} else {
$page = null;
}
}
}
$this->requestURL = $it;
// check for globally unique page which can redirect
$name = trim($it, '/');
if($name && strpos($name, '/') === false && $sanitizer->pageNameUTF8($name) === $name) {
$page = $this->pagesGet($name, 'name', 'status=' . Page::statusUnique);
// if found, redirect to globally unique page
if($page && $page->viewable()) $this->redirectURL = $page->url;
}
// populate request path to class as other methods will now use it
$this->requestPath = $it;
// check if path with URL segments can resolve to a page
$urlSegments = array();
$maxSegments = $config->maxUrlSegments;
if(is_null($maxSegments)) $maxSegments = 4; // default
$cnt = 0;
// if the page isn't found, then check if a page one path level before exists
// this loop allows for us to have both a urlSegment and a pageNum
while((!$page || !$page->id) && $cnt < $maxSegments) {
$it = rtrim($it, '/');
$pos = strrpos($it, '/')+1;
$urlSegment = substr($it, $pos);
$urlSegments[$cnt] = $urlSegment;
$it = substr($it, 0, $pos); // $it no longer includes the urlSegment
$selector = "path=" . $sanitizer->selectorValue($it, $selectorValueOptions) . ", status<" . Page::statusMax;
$page = $pages->get($selector);
$cnt++;
}
// if we still found no page, then we can abort
if(!$page || !$page->id) return null;
if(!$page) $page = $this->getPageUrlSegments($it, $urlSegments);
// if URL segments and/or page numbers are present and not allowed then abort
if(!$this->checkUrlSegments($urlSegments, $page)) {
if($indexRedirect && $cnt === 1) {
if($page && count($urlSegments) && !$this->checkUrlSegments($urlSegments, $page)) {
// found URL segments were checked and found not to be allowed here
if($hasIndexFile && count($urlSegments) === 1) {
// index.php|htm|html segments if not used by page can redirect to URL without it
$forceRedirect = true;
} else {
return null;
// page with invalid URL segments becomes a 404
$page = null;
}
}
// if no page found for guest user, check if path was in admin and map to admin root
if(!$page && $this->wire()->user->isGuest()) {
// this ensures that no admin requests resolve to a 404 and instead show login form
$adminPath = substr($config->urls->admin, strlen($config->urls->root)-1);
if(strpos($this->requestPath, $adminPath) === 0) {
$page = $this->wire()->pages->get($config->adminRootPageID);
}
$forceRedirect = false;
}
if($forceRedirect && $page && !$this->redirectURL) {
$this->redirectURL = $page->url;
}
return $page;
}
/**
* Given a path with URL segments, get matching Page and populate given $urlSegments array
*
* @param string $path
* @param array $urlSegments
* @return null|Page
*
*/
protected function getPageUrlSegments($path, array &$urlSegments) {
$numSegments = 0;
$maxSegments = $this->wire()->config->maxUrlSegments;
$maxSegments = $maxSegments === null ? 4 : (int) $maxSegments;
$page = null;
// if the page isn't found, then check if a page one path level before exists
// this loop allows for us to have both a urlSegment and a pageNum
while(!$page && $numSegments < $maxSegments) {
$path = rtrim($path, '/');
$pos = strrpos($path, '/') + 1;
$urlSegment = substr($path, $pos);
$urlSegments[$numSegments] = $urlSegment;
$path = substr($path, 0, $pos); // $path no longer includes the urlSegment
$page = $this->pagesGet($path);
$numSegments++;
}
return $page;
}
/**
* Get the requested path
*
* @return bool|string Return false on fail or path on success
*
*/
protected function getPageRequestPath() {
$config = $this->wire()->config;
$sanitizer = $this->wire()->sanitizer;
/** @var string $shit Dirty URL */
/** @var string $it Clean URL */
if(isset($_GET['it'])) {
// normal request
$shit = trim($_GET['it']);
} else if(isset($_SERVER['REQUEST_URI'])) {
// abnormal request, something about request URL made .htaccess skip it, or index.php called directly
$rootUrl = $config->urls->root;
$shit = trim($_SERVER['REQUEST_URI']);
if(strpos($shit, '?') !== false) list($shit,) = explode('?', $shit, 2);
if($rootUrl != '/') {
if(strpos($shit, $rootUrl) === 0) {
// remove root URL from request
$shit = substr($shit, strlen($rootUrl) - 1);
} else {
// request URL outside of our root directory
return false;
}
}
} else {
$shit = '/';
}
if($shit === '/') {
$it = '/';
} else {
$it = preg_replace('{[^-_./a-zA-Z0-9]}', '', $shit); // clean
}
unset($_GET['it']);
if($shit !== $it) {
// sanitized URL does not match requested URL
if($config->pageNameCharset == 'UTF8') {
// test for extended page name URL
$it = $sanitizer->pagePathNameUTF8($shit);
}
if($shit !== $it) {
// if still does not match then fail
return false;
}
}
if($forceRedirect && !$this->redirectURL) $this->redirectURL = $page->url;
$maxUrlDepth = $config->maxUrlDepth;
if($maxUrlDepth > 0 && substr_count($it, '/') > $config->maxUrlDepth) return false;
if(!isset($it[0]) || $it[0] != '/') $it = "/$it";
if(strpos($it, '//') !== false) return false;
return $page;
return $it;
}
/**
* Check if given path has a page/pagination number and return it if so (return 0 if not)
*
* @param string $path
* @return int
*
*/
protected function checkPageNumPath($path) {
$hasPrefix = false;
foreach($this->pageNumUrlPrefixes as $prefix) {
if(strpos($path, '/' . $prefix) !== false) {
$hasPrefix = true;
break;
}
}
if($hasPrefix && preg_match('{/(' . implode('|', $this->pageNumUrlPrefixes) . ')(\d+)/?$}', $path, $matches)) {
// URL contains a page number, but we'll let it be handled by the checkUrlSegments function later
$this->pageNumPrefix = $matches[1];
$this->pageNum = (int) $matches[2];
return $this->pageNum;
}
return 0;
}
/**
@@ -570,28 +632,30 @@ class ProcessPageView extends Process {
protected function checkUrlSegments(array $urlSegments, Page $page) {
if(!count($urlSegments)) return true;
$input = $this->wire()->input;
$lastSegment = reset($urlSegments);
$urlSegments = array_reverse($urlSegments);
$pageNum = 1;
$template = $page->template;
// check if the last urlSegment is setting a page number and that page numbers are allowed
if(!is_null($this->pageNum) && $lastSegment === "$this->pageNumPrefix$this->pageNum" && $page->template->allowPageNum) {
if(!is_null($this->pageNum) && $lastSegment === "$this->pageNumPrefix$this->pageNum" && $template->allowPageNum) {
// meets the requirements for a page number: last portion of URL and starts with 'page'
$pageNum = (int) $this->pageNum;
if($pageNum < 1) $pageNum = 1;
if($pageNum > 1 && !$this->wire('user')->isLoggedin()) {
$maxPageNum = $this->wire('config')->maxPageNum;
if($pageNum > 1 && !$this->wire()->user->isLoggedin()) {
$maxPageNum = $this->wire()->config->maxPageNum;
if(!$maxPageNum) $maxPageNum = 999;
if($pageNum > $maxPageNum) return false;
}
$page->setQuietly('pageNum', $pageNum); // backwards compatibility
$this->input->setPageNum($pageNum);
$input->setPageNum($pageNum);
array_pop($urlSegments);
}
// return false if URL segments aren't allowed with this page template
if($page->template != 'admin' && count($urlSegments)) {
if($template->name !== 'admin' && count($urlSegments)) {
if(!$this->isAllowedUrlSegment($page, $urlSegments)) return false;
}
@@ -599,33 +663,33 @@ class ProcessPageView extends Process {
$cnt = 1;
foreach($urlSegments as $urlSegment) {
if($cnt == 1) $page->setQuietly('urlSegment', $urlSegment); // backwards compatibility
$this->input->setUrlSegment($cnt, $urlSegment);
$input->setUrlSegment($cnt, $urlSegment);
$cnt++;
}
if($pageNum > 1 || count($urlSegments)) {
$hasTrailingSlash = substr($this->requestURL, -1) == '/';
$hasTrailingSlash = substr($this->requestPath, -1) == '/';
// $url=URL with urlSegments and no trailing slash
// $url = rtrim(rtrim($page->url, '/') . '/' . $this->input->urlSegmentStr, '/');
$redirectPath = null;
if($pageNum > 1 && $page->template->slashPageNum) {
if($page->template->slashPageNum == 1 && !$hasTrailingSlash) {
if($pageNum > 1 && $template->slashPageNum) {
if($template->slashPageNum == 1 && !$hasTrailingSlash) {
// enforce trailing slash on page numbers
//$this->redirectURL = "$url/$this->pageNumPrefix$pageNum/";
$redirectPath = "/$this->pageNumPrefix$pageNum/";
} else if($page->template->slashPageNum == -1 && $hasTrailingSlash) {
} else if($template->slashPageNum == -1 && $hasTrailingSlash) {
// enforce NO trailing slash on page numbers
// $this->redirectURL = "$url/$this->pageNumPrefix$pageNum";
$redirectPath = "/$this->pageNumPrefix$pageNum";
}
} else if(count($urlSegments) && $page->template->slashUrlSegments) {
if($page->template->slashUrlSegments == 1 && !$hasTrailingSlash) {
} else if(count($urlSegments) && $template->slashUrlSegments) {
if($template->slashUrlSegments == 1 && !$hasTrailingSlash) {
// enforce trailing slash with URL segments
// $this->redirectURL = "$url/";
$redirectPath = "/";
} else if($page->template->slashUrlSegments == -1 && $hasTrailingSlash) {
} else if($template->slashUrlSegments == -1 && $hasTrailingSlash) {
// enforce no trailing slash with URL segments
// $this->redirectURL = $url;
$redirectPath = "";
@@ -634,13 +698,14 @@ class ProcessPageView extends Process {
if($redirectPath !== null) {
// redirect will occur to a proper slash format
if($this->wire('modules')->isInstalled('LanguageSupportPageNames')) {
$modules = $this->wire()->modules;
if($modules->isInstalled('LanguageSupportPageNames')) {
// ensure that LanguageSupportPageNames reaches a ready() state, since
// it can modify the output of $page->url (if installed)
$this->wire('page', $page);
$this->wire('modules')->get('LanguageSupportPageNames')->ready();
$modules->get('LanguageSupportPageNames')->ready();
}
$this->redirectURL = rtrim(rtrim($page->url, '/') . '/' . $this->input->urlSegmentStr, '/') . $redirectPath;
$this->redirectURL = rtrim(rtrim($page->url, '/') . '/' . $input->urlSegmentStr, '/') . $redirectPath;
}
}
@@ -699,7 +764,7 @@ class ProcessPageView extends Process {
protected function checkAccess($page) {
$user = $this->wire()->user;
if($this->requestFile) {
// if a file was requested, we still allow view even if page doesn't have template file
if($page->viewable($this->requestFile) === false) return null;
@@ -723,31 +788,61 @@ class ProcessPageView extends Process {
$accessTemplate = $page->getAccessTemplate();
$redirectLogin = $accessTemplate ? $accessTemplate->redirectLogin : false;
// if we wont be presenting a login form then $page converts to null (404)
if(!$redirectLogin) return null;
$config = $this->wire()->config;
$disallowIDs = array($config->trashPageID); // don't allow login redirect for these pages
$loginRequestURL = $this->redirectURL;
$loginPageID = $this->wire()->config->loginPageID;
$requestPage = $page;
if($redirectLogin) {
$config = $this->wire()->config;
$disallowIDs = array($config->trashPageID); // don't allow login redirect for these pages
if($page->id && in_array($page->id, $disallowIDs)) {
$page = null;
} else if(ctype_digit("$redirectLogin")) {
// redirect login provided as a page ID
$redirectLogin = (int) $redirectLogin;
if($redirectLogin == 1) $redirectLogin = $this->config->loginPageID;
$page = $this->pages->get($redirectLogin);
} else {
// redirect login provided as a URL, optionally with an {id} tag for requested page ID
$redirectLogin = str_replace('{id}', $page->id, $redirectLogin);
$this->redirectURL = $redirectLogin;
}
$session = $this->wire()->session;
$input = $this->wire()->input;
$ns = 'ProcessPageView';
if($page->id && in_array($page->id, $disallowIDs)) {
// don't allow login redirect when matching disallowIDs
$page = null;
// in case anything needs to know the originally requested login page
$this->wire()->session->setFor('ProcessPageView', 'loginRequestPageID', $requestPage->id);
} else if(ctype_digit("$redirectLogin")) {
// redirect login provided as a page ID
$redirectLogin = (int) $redirectLogin;
// if given ID 1 then this maps to the admin login page
if($redirectLogin === 1) $redirectLogin = $loginPageID;
$page = $this->pages->get($redirectLogin);
} else {
$page = null;
// redirect login provided as a URL, optionally with an {id} tag for requested page ID
$redirectLogin = str_replace('{id}', $page->id, $redirectLogin);
$this->redirectURL = $redirectLogin;
}
if(empty($loginRequestURL)) {
$loginRequestURL = $session->getFor($ns, 'loginRequestURL');
}
// in case anything after login needs to know the originally requested page/URL
if(empty($loginRequestURL) && $page && $requestPage && $requestPage->id) {
if($requestPage->id != $loginPageID && !$input->get('loggedout')) {
$loginRequestURL = $input->url(array('page' => $requestPage));
if(!empty($_GET)) {
$queryString = $input->queryStringClean(array(
'maxItems' => 10,
'maxLength' => 500,
'maxNameLength' => 20,
'maxValueLength' => 200,
'sanitizeName' => 'fieldName',
'sanitizeValue' => 'name',
'entityEncode' => false,
));
if(strlen($queryString)) $loginRequestURL .= "?$queryString";
}
$session->setFor($ns, 'loginRequestPageID', $requestPage->id);
$session->setFor($ns, 'loginRequestURL', $loginRequestURL);
}
}
return $page;
}
@@ -836,7 +931,7 @@ class ProcessPageView extends Process {
if($page->template->slashUrlSegments == 1) $url .= '/';
} else {
// use whatever the request came with
if(substr($this->requestURL, -1) == '/') $url .= '/';
if(substr($this->requestPath, -1) == '/') $url .= '/';
}
}
@@ -850,7 +945,7 @@ class ProcessPageView extends Process {
if($page->template->slashPageNum == 1) $url .= '/';
} else {
// use whatever setting the URL came with
if(substr($this->requestURL, '-1') == '/') $url .= '/';
if(substr($this->requestPath, '-1') == '/') $url .= '/';
}
}
}
@@ -858,6 +953,28 @@ class ProcessPageView extends Process {
$this->redirectURL = $url;
}
/**
* Get Page from $pages via path (or other property) or return null if it does not exist
*
* @param string $value Value to match
* @param string $property Property being matched (default='path')
* @param string $selector Additional selector to apply (default='status<9999999');
* @return null|Page
* @since 3.0.168
*
*/
protected function pagesGet($value, $property = 'path', $selector = 'status<9999999') {
if(!is_int($value)) $value = $this->wire()->sanitizer->selectorValue($value, array(
'maxLength' => 2048,
'maxBytes' => 6144,
'allowArray' => false,
'allowSpace' => false,
'blacklist' => array(',', "'"),
));
$page = $this->wire()->pages->get("$property=$value, $selector");
return $page->id ? $page : null;
}
/**
* Passthru a file for a non-public page
*
@@ -1009,6 +1126,50 @@ class ProcessPageView extends Process {
// if at least one template supports pagefileSecure option we will return true here
return $allow;
}
/**
* Given a request path (that does not exist) return the closest parent that does exist
*
* CURRENTLY NOT USED (future use)
*
* @param string $requestPath Request path
* @param string $parentPath Optional minimum required parent path
* @return null|Page Returns found parent on success or null if none found
* @since 3.0.168
*
*/
private function getClosestParentPage($requestPath, $parentPath = '') {
$requestPath = trim($requestPath, '/');
$parentPath = trim($parentPath, '/');
// if request path is not in the required start path then exit early
if($parentPath !== '') {
if(stripos("/$requestPath/", "/$parentPath/") !== 0) return null;
}
$sanitizer = $this->wire()->sanitizer;
$parent = null;
$path = '';
// attempt to match page from beginning of path to find closest parent
$segments = explode('/', $requestPath);
foreach($segments as $segment) {
$seg = $sanitizer->pageName($segment);
if($seg !== $segment) break;
$path .= "/$seg";
if($parentPath !== '' && $path === "/$parentPath") continue;
$page = $this->pagesGet($path);
if(!$page->id) break;
$parent = $page;
}
if($parentPath !== '' && !$parent) $parent = $this->pagesGet("/$parentPath");
return $parent && $parent->id ? $parent : null;
}
}