From 4be389067de38450bad4d25448f16ec64d44e18e Mon Sep 17 00:00:00 2001 From: Ryan Cramer Date: Tue, 31 Dec 2024 10:59:47 -0500 Subject: [PATCH] Fix issue processwire/processwire-issues#2015 --- wire/core/Sanitizer.php | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/wire/core/Sanitizer.php b/wire/core/Sanitizer.php index 564fc208..cdf3b95b 100644 --- a/wire/core/Sanitizer.php +++ b/wire/core/Sanitizer.php @@ -992,7 +992,14 @@ class Sanitizer extends Wire { $_value = $value; $parts = explode('__', $_value); foreach($parts as $n => $part) { - $parts[$n] = $this->punyDecodeName($part); + if(strpos($part, "xn-=") === 0) $part = substr($part, 3); + if(strpos($part, '=') === 0) { + // equals at beginning means keep this part as-is + $part = $this->name(substr($part, 1)); + } else { + $part = $this->punyDecodeName($part); + } + $parts[$n] = $part; } $value = implode('', $parts); return $value; @@ -1003,7 +1010,7 @@ class Sanitizer extends Wire { if(strpos($value, 'xn--') !== 0) $value = 'xn--' . substr($value, 3); if(function_exists('idn_to_utf8')) { // use native php function if available - $value = @idn_to_utf8($value); + $value = idn_to_utf8($value, 32); // 32=IDNA_NONTRANSITIONAL_TO_UNICODE } else { // otherwise use Punycode class $pc = new Punycode(); @@ -1037,9 +1044,18 @@ class Sanitizer extends Wire { while(strlen($_value)) { $part = $tt->substr($_value, 0, 12); $_value = $tt->substr($_value, 12); - $parts[] = $this->punyEncodeName($part); + $part = $this->punyEncodeName($part); + if(strpos($part, 'xn-') !== 0) { + // if encoding didn't result in an xn- string then + // prefix an equals to indicate this part should be taken literally + $part = "=$part"; + } + $parts[] = $part; } $value = implode('__', $parts); + if(strpos($value, 'xn--') !== false && strpos($value, 'xn-') !== 0) { + $value = "xn-$value"; + } return $value; } @@ -1047,7 +1063,7 @@ class Sanitizer extends Wire { if(function_exists("idn_to_ascii")) { // use native php function if available - $value = substr(@idn_to_ascii($value), 3); + $value = substr(idn_to_ascii($value, 16), 3); // 16=IDNA_NONTRANSITIONAL_TO_ASCII } else { // otherwise use Punycode class $pc = new Punycode(); @@ -1064,6 +1080,10 @@ class Sanitizer extends Wire { } return $value; } + + protected function punycode() { + return new Punycode(); + } /** * Format required by ProcessWire user names @@ -2382,7 +2402,7 @@ class Sanitizer extends Wire { } else { // domain contains utf8 - $pc = function_exists("idn_to_ascii") ? false : new Punycode(); + $pc = function_exists("idn_to_ascii") ? false : $this->punycode(); $domain = $pc ? $pc->encode($domain) : @idn_to_ascii($domain); if($domain === false || !strlen($domain)) return ''; $url = $scheme . $domain . $rest;