mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-07-31 03:10:09 +02:00
[3.1.0] Revamp URI handling of percent encoding and validation.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1709 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
@@ -36,11 +36,23 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
|
||||
$ipv4 = $this->ipv4->validate($string, $config, $context);
|
||||
if ($ipv4 !== false) return $ipv4;
|
||||
|
||||
// validate a domain name here, do filtering, etc etc etc
|
||||
// A regular domain name.
|
||||
|
||||
// We could use this, but it would break I18N domain names
|
||||
//$match = preg_match('/^[a-z0-9][\w\-\.]*[a-z0-9]$/i', $string);
|
||||
//if (!$match) return false;
|
||||
// This breaks I18N domain names, but we don't have proper IRI support,
|
||||
// so force users to insert Punycode. If there's complaining we'll
|
||||
// try to fix things into an international friendly form.
|
||||
|
||||
// The productions describing this are:
|
||||
$a = '[a-z]'; // alpha
|
||||
$an = '[a-z0-9]'; // alphanum
|
||||
$and = '[a-z0-9-]'; // alphanum | "-"
|
||||
// domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
|
||||
$domainlabel = "$an($and*$an)?";
|
||||
// toplabel = alpha | alpha *( alphanum | "-" ) alphanum
|
||||
$toplabel = "$a($and*$an)?";
|
||||
// hostname = *( domainlabel "." ) toplabel [ "." ]
|
||||
$match = preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string);
|
||||
if (!$match) return false;
|
||||
|
||||
return $string;
|
||||
}
|
||||
|
Reference in New Issue
Block a user