1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-06 14:16:32 +02:00

[3.1.0] Revamp URI handling of percent encoding and validation.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1709 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2008-05-14 02:19:00 +00:00
parent 77ce3e8b4a
commit cb5d5d0648
13 changed files with 261 additions and 50 deletions

View File

@@ -7,7 +7,7 @@
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
{
protected $parser, $percentEncoder;
protected $parser;
protected $embedsResource;
/**
@@ -15,7 +15,6 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
*/
public function __construct($embeds_resource = false) {
$this->parser = new HTMLPurifier_URIParser();
$this->percentEncoder = new HTMLPurifier_PercentEncoder();
$this->embedsResource = (bool) $embeds_resource;
}
@@ -23,9 +22,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
if ($config->get('URI', 'Disable')) return false;
// initial operations
$uri = $this->parseCDATA($uri);
$uri = $this->percentEncoder->normalize($uri);
// parse the URI
$uri = $this->parser->parse($uri);
@@ -61,13 +58,6 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
$context->destroy('EmbeddedURI');
if (!$ok) return false;
// munge scheme off if necessary (this must be last)
if (!is_null($uri->scheme) && is_null($uri->host)) {
if ($uri_def->defaultScheme == $uri->scheme) {
$uri->scheme = null;
}
}
// back to string
$result = $uri->toString();

View File

@@ -36,11 +36,23 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
$ipv4 = $this->ipv4->validate($string, $config, $context);
if ($ipv4 !== false) return $ipv4;
// validate a domain name here, do filtering, etc etc etc
// A regular domain name.
// We could use this, but it would break I18N domain names
//$match = preg_match('/^[a-z0-9][\w\-\.]*[a-z0-9]$/i', $string);
//if (!$match) return false;
// This breaks I18N domain names, but we don't have proper IRI support,
// so force users to insert Punycode. If there's complaining we'll
// try to fix things into an international friendly form.
// The productions describing this are:
$a = '[a-z]'; // alpha
$an = '[a-z0-9]'; // alphanum
$and = '[a-z0-9-]'; // alphanum | "-"
// domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
$domainlabel = "$an($and*$an)?";
// toplabel = alpha | alpha *( alphanum | "-" ) alphanum
$toplabel = "$a($and*$an)?";
// hostname = *( domainlabel "." ) toplabel [ "." ]
$match = preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string);
if (!$match) return false;
return $string;
}