1
0
mirror of https://github.com/processwire/processwire.git synced 2025-08-06 23:06:59 +02:00
This commit is contained in:
Ryan Cramer
2025-01-03 12:43:17 -05:00
parent 4be389067d
commit e6ace73c02
2 changed files with 95 additions and 44 deletions

View File

@@ -151,11 +151,11 @@ class Punycode {
if($q < $t) { if($q < $t) {
break; break;
} }
$code = $t + (($q - $t) % (static::BASE - $t)); $code = $t + ((floor($q) - $t) % (static::BASE - $t));
$output .= static::$encodeTable[$code]; $output .= static::$encodeTable[$code];
$q = ($q - $t) / (static::BASE - $t); $q = ($q - $t) / (static::BASE - $t);
} }
$output .= static::$encodeTable[$q]; $output .= static::$encodeTable[floor($q)];
$bias = $this->adapt($delta, $h + 1, ($h === $b)); $bias = $this->adapt($delta, $h + 1, ($h === $b));
$delta = 0; $delta = 0;
$h++; $h++;

View File

@@ -777,7 +777,8 @@ class Sanitizer extends Wire {
* - `Sanitizer::okUTF8` (constant): Allow UTF-8 characters to appear in path (implied if $config->pageNameCharset is 'UTF8'). * - `Sanitizer::okUTF8` (constant): Allow UTF-8 characters to appear in path (implied if $config->pageNameCharset is 'UTF8').
* @param int|array $maxLength Maximum number of characters allowed in the name. * @param int|array $maxLength Maximum number of characters allowed in the name.
* You may also specify the $options array for this argument instead. * You may also specify the $options array for this argument instead.
* @param array $options Array of options to modify default behavior. See Sanitizer::name() method for available options. * @param array $options Array of options to modify default behavior. See Sanitizer::name() method for available options, plus:
* - `punycodeVersion` (int): Punycode version to use with UTF-8 page names, see Sanitizer::getPunycodeVersion() method for details.
* @return string * @return string
* @see Sanitizer::name() * @see Sanitizer::name()
* *
@@ -788,7 +789,8 @@ class Sanitizer extends Wire {
if(!strlen($value)) return ''; if(!strlen($value)) return '';
$defaults = array( $defaults = array(
'charset' => $this->wire()->config->pageNameCharset 'charset' => $this->wire()->config->pageNameCharset,
'punycodeVersion' => 0,
); );
if(is_array($beautify)) { if(is_array($beautify)) {
@@ -820,13 +822,16 @@ class Sanitizer extends Wire {
&& !ctype_alnum(str_replace(array('-', '_', '.'), '', $value)) && !ctype_alnum(str_replace(array('-', '_', '.'), '', $value))
&& strpos($value, 'xn-') !== 0) { && strpos($value, 'xn-') !== 0) {
$tt = $this->getTextTools();
$max = $maxLength;
do { do {
// encode value // encode value
$value = $this->punyEncodeName($_value); $value = $this->punyEncodeName($_value, $options['punycodeVersion']);
// if result stayed within our allowed character limit, then good, we're done // if result stayed within our allowed character limit, then good, we're done
if(strlen($value) <= $maxLength) break; if(strlen($value) <= $maxLength) break;
// continue loop until encoded value is equal or less than allowed max length // continue loop until encoded value is equal or less than allowed max length
$_value = substr($_value, 0, strlen($_value) - 1); $_value = $tt->substr($_value, 0, $max--);
} while(true); } while(true);
// if encode was necessary and successful, return with no further processing // if encode was necessary and successful, return with no further processing
@@ -843,7 +848,7 @@ class Sanitizer extends Wire {
$beautify = self::okUTF8; $beautify = self::okUTF8;
if(strpos($value, 'xn-') === 0) { if(strpos($value, 'xn-') === 0) {
// found something to convert // found something to convert
$value = $this->punyDecodeName($value); $value = $this->punyDecodeName($value, $options['punycodeVersion']);
// now it will run through okUTF8 // now it will run through okUTF8
} }
} }
@@ -981,43 +986,51 @@ class Sanitizer extends Wire {
* Decode a PW-punycode'd name value * Decode a PW-punycode'd name value
* *
* @param string $value * @param string $value
* @param int $version 0=auto-detect, 1=original/buggy, 2=punycode library, 3=php idn function
* @return string * @return string
* *
*/ */
protected function punyDecodeName($value) { protected function punyDecodeName($value, $version = 0) {
// exclude values that we know can't be converted // exclude values that we know can't be converted
if(strlen($value) < 4 || strpos($value, 'xn-') !== 0) return $value; if(strlen($value) < 4 || strpos($value, 'xn-') !== 0) return $value;
$version = $this->getPunycodeVersion($version);
if(strpos($value, '__')) { if(strpos($value, '__')) {
// as used by punycode version 1 to split long strings
$_value = $value; $_value = $value;
$parts = explode('__', $_value); $parts = explode('__', $_value);
foreach($parts as $n => $part) { foreach($parts as $n => $part) {
if(strpos($part, "xn-=") === 0) $part = substr($part, 3); $parts[$n] = $this->punyDecodeName($part, $version);
if(strpos($part, '=') === 0) {
// equals at beginning means keep this part as-is
$part = $this->name(substr($part, 1));
} else {
$part = $this->punyDecodeName($part);
}
$parts[$n] = $part;
} }
$value = implode('', $parts); $value = implode('', $parts);
return $value; return $value;
} }
$_value = $value; $_value = $value;
// convert "xn-" single hyphen to recognized punycode "xn--" double hyphen // convert "xn-" single hyphen to recognized punycode "xn--" double hyphen
if(strpos($value, 'xn--') !== 0) $value = 'xn--' . substr($value, 3); if(strpos($value, 'xn--') !== 0) $value = 'xn--' . substr($value, 3);
if(function_exists('idn_to_utf8')) {
// use native php function if available if($version >= 3) {
$value = idn_to_utf8($value, 32); // 32=IDNA_NONTRANSITIONAL_TO_UNICODE // PHP IDN function
} else { // 32=IDNA_NONTRANSITIONAL_TO_UNICODE
// otherwise use Punycode class $info = array();
$value = idn_to_utf8($value, 32, INTL_IDNA_VARIANT_UTS46, $info);
if(empty($value)) $value = $info['result'];
} else if($version === 2) {
// Punycode library
$pc = new Punycode(); $pc = new Punycode();
$value = $pc->decode($value); $value = $pc->decode($value);
} else {
// PHP IDN with old/buggy behavior post PHP 7.4
$value = @idn_to_utf8($value);
} }
// if utf8 conversion failed, restore original value // if utf8 conversion failed, restore original value
if($value === false || !strlen($value)) $value = $_value; if($value === false || !strlen($value)) $value = $_value;
return $value; return $value;
} }
@@ -1025,50 +1038,57 @@ class Sanitizer extends Wire {
* Encode a name value to PW-punycode * Encode a name value to PW-punycode
* *
* @param string $value * @param string $value
* @param int $version 0=auto-detect, 1=original/buggy, 2=punycode library, 3=php idn function
* @return string * @return string
* *
*/ */
protected function punyEncodeName($value) { protected function punyEncodeName($value, $version = 0) {
// exclude values that don't need to be converted
if(strpos($value, 'xn-') === 0) return $value; if(strpos($value, 'xn-') === 0) return $value;
if(ctype_alnum(str_replace(array('.', '-', '_'), '', $value))) return $value; if(ctype_alnum(str_replace(array('.', '-', '_'), '', $value))) return $value;
$tt = $this->getTextTools();
$version = $this->getPunycodeVersion($version);
while(strpos($value, '__') !== false) { while(strpos($value, '__') !== false) {
$value = str_replace('__', '_', $value); $value = str_replace('__', '_', $value);
} }
if(strlen($value) >= 50) { if(strlen($value) >= 50 && $version < 2) {
$tt = $this->getTextTools();
$_value = $value; $_value = $value;
$parts = array(); $parts = array();
while(strlen($_value)) { while(strlen($_value)) {
$part = $tt->substr($_value, 0, 12); $part = $tt->substr($_value, 0, 12);
$_value = $tt->substr($_value, 12); $_value = $tt->substr($_value, 12);
$part = $this->punyEncodeName($part); $parts[] = $this->punyEncodeName($part, $version);
if(strpos($part, 'xn-') !== 0) {
// if encoding didn't result in an xn- string then
// prefix an equals to indicate this part should be taken literally
$part = "=$part";
}
$parts[] = $part;
} }
$value = implode('__', $parts); $value = implode('__', $parts);
if(strpos($value, 'xn--') !== false && strpos($value, 'xn-') !== 0) { return $value;
$value = "xn-$value";
}
return $value;
} }
$_value = $value; $_value = $value;
if(function_exists("idn_to_ascii")) { if($version >= 3) {
// use native php function if available // PHP 7.4+ idn_to_ascii
$value = substr(idn_to_ascii($value, 16), 3); // 16=IDNA_NONTRANSITIONAL_TO_ASCII $info = array();
} else { // 16=IDNA_NONTRANSITIONAL_TO_ASCII
// otherwise use Punycode class $value = idn_to_ascii($value, 16, INTL_IDNA_VARIANT_UTS46, $info);
// IDN return value fails on longer strings, but populates result correctly
if(strlen($_value) >= 50) $value = $info['result'];
} else if($version === 2) {
// Punycode library
$pc = new Punycode(); $pc = new Punycode();
$value = substr($pc->encode($value), 3); $value = substr($pc->encode($value), 3);
} else {
// buggy behavior in PHP 7.4+ but pages may already be present with it
// INTL_IDNA_VARIANT_2003 is default prior to PHP 7.4
$value = @idn_to_ascii($value);
} }
if(strpos($value, 'xn-') === 0) $value = substr($value, 3);
if(strlen($value) && $value !== '-') { if(strlen($value) && $value !== '-') {
// in PW the xn- prefix has one fewer hyphen than in native Punycode // in PW the xn- prefix has one fewer hyphen than in native Punycode
// for compatibility with pageName sanitization and beautification // for compatibility with pageName sanitization and beautification
@@ -1078,9 +1098,40 @@ class Sanitizer extends Wire {
// return value is always ascii // return value is always ascii
$value = $this->name($_value); $value = $this->name($_value);
} }
return $value; return $value;
} }
/**
* Get internal Punycode version to use
*
* 0: Auto-detect from current environment.
* 1: PHP IDN function used by all PW versions prior to 3.0.244, but buggy PHP 7.4+.
* 2: Dedicated Punycode PHP library (no known issues at present).
* 3: PHP IDN function call updated for PHP 7.4+ (default in new installations after January 2025).
*
* @param int $version
* @return int 1=PHP DN but buggy after PHP 7.4+, 2=Punycode library, 3=PHP IDN function PHP 7.4+
* @since 3.0.244
*
*/
protected function getPunycodeVersion($version) {
$config = $this->wire()->config;
if(!$version && strpos($config->pageNameWhitelist, 'v') === 0) {
// i.e. "v3" specified at beginning of pageNameWhitelist
$version = substr($config->pageNameWhitelist, 1, 1);
$version = ctype_digit($version) ? (int) $version : 0;
}
if(!$version) $version = $config->installedAfter('2025-01-04') ? 3 : 1;
if(!function_exists('idn_to_utf8')) $version = 2;
if($version >= 3 && version_compare(phpversion(), '7.4.0', '<')) $version = 2;
return $version;
}
/**
* @return Punycode
*
*/
protected function punycode() { protected function punycode() {
return new Punycode(); return new Punycode();
} }