From be1a203247b45c01e46803dece96e630202fcfb3 Mon Sep 17 00:00:00 2001 From: Ryan Cramer Date: Wed, 27 Jun 2018 10:24:59 -0400 Subject: [PATCH] =?UTF-8?q?Fix=20issue=20processwire/processwire-issues#62?= =?UTF-8?q?2=20in=20$sanitizer->url()=20to=20workaround=20that=20PHP?= =?UTF-8?q?=E2=80=99s=20FILTER=5FVALIDATE=5FURL=20does=20not=20accept=20un?= =?UTF-8?q?derscores=20in=20hostnames,=20despite=20their=20use=20being=20f?= =?UTF-8?q?airly=20common=20(even=20if=20not=20technically=20valid).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- wire/core/Sanitizer.php | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/wire/core/Sanitizer.php b/wire/core/Sanitizer.php index 1ad18b51..d7018561 100644 --- a/wire/core/Sanitizer.php +++ b/wire/core/Sanitizer.php @@ -1508,7 +1508,7 @@ class Sanitizer extends Wire { } /** - * Implementation of PHP's FILTER_VALIDATE_URL with IDN support (will convert to valid) + * Implementation of PHP's FILTER_VALIDATE_URL with IDN and underscore support (will convert to valid) * * Example: http://трикотаж-леко.рф * @@ -1518,10 +1518,28 @@ class Sanitizer extends Wire { * */ protected function filterValidateURL($url, array $options) { - + + // placeholders are characters known to be rejected by FILTER_VALIDATE_URL that should not be + $placeholders = array(); + + if(strpos($url, '_') !== false && strpos(parse_url($url, PHP_URL_HOST), '_') !== false) { + // hostname contains an underscore and FILTER_VALIDATE_URL does not support them in hostnames + do { + $placeholder = 'UNDER' . mt_rand() . 'SCORE'; + } while(strpos($url, $placeholder) !== false); + $url = str_replace('_', $placeholder, $url); + $placeholders[$placeholder] = '_'; + } + $_url = $url; $url = filter_var($url, FILTER_VALIDATE_URL); - if($url !== false && strlen($url)) return $url; + if($url !== false && strlen($url)) { + // if filter_var returns a URL, then we know there is no IDN present and we can exit now + if(count($placeholders)) { + $url = str_replace(array_keys($placeholders), array_values($placeholders), $url); + } + return $url; + } // if allowIDN was specifically set false, don't proceed further if(isset($options['allowIDN']) && !$options['allowIDN']) return $url; @@ -1573,6 +1591,10 @@ class Sanitizer extends Wire { $url = $scheme . $domain . $rest; } } + + if(count($placeholders)) { + $url = str_replace(array_keys($placeholders), array_values($placeholders), $url); + } return $url; }