1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-07 14:46:48 +02:00

[1.2.0] Added percent encoding normalization

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@509 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2006-11-07 17:15:28 +00:00
parent e998b034d1
commit 504203c0f3
6 changed files with 99 additions and 5 deletions

View File

@@ -4,6 +4,7 @@ require_once 'HTMLPurifier/AttrDef.php';
require_once 'HTMLPurifier/URIScheme.php';
require_once 'HTMLPurifier/URISchemeRegistry.php';
require_once 'HTMLPurifier/AttrDef/Host.php';
require_once 'HTMLPurifier/PercentEncoder.php';
HTMLPurifier_ConfigSchema::define(
'URI', 'DefaultScheme', 'http', 'string',
@@ -19,9 +20,11 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
{
var $host;
var $PercentEncoder;
function HTMLPurifier_AttrDef_URI() {
$this->host = new HTMLPurifier_AttrDef_Host();
$this->PercentEncoder = new HTMLPurifier_PercentEncoder();
}
function validate($uri, $config, &$context) {
@@ -32,6 +35,9 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
// parse as CDATA
$uri = $this->parseCDATA($uri);
// fix up percent-encoding
$uri = $this->PercentEncoder->normalize($uri);
// while it would be nice to use parse_url(), that's specifically
// for HTTP and thus won't work for our generic URI parsing

View File

@@ -0,0 +1,44 @@
<?php
class HTMLPurifier_PercentEncoder
{
/**
* Fix up percent-encoding by decoding unreserved characters and normalizing
* @param $string String to normalize
*/
function normalize($string) {
if ($string == '') return '';
$parts = explode('%', $string);
$ret = array_shift($parts);
foreach ($parts as $part) {
$length = strlen($part);
if ($length < 2) {
$ret .= '%25' . $part;
continue;
}
$encoding = substr($part, 0, 2);
$text = substr($part, 2);
if (!ctype_xdigit($encoding)) {
$ret .= '%25' . $part;
continue;
}
$int = hexdec($encoding);
if (
($int >= 48 && $int <= 57) || // digits
($int >= 65 && $int <= 90) || // uppercase letters
($int >= 97 && $int <= 122) || // lowercase letters
$int == 126 || $int == 45 || $int == 46 || $int == 95 // ~-._
) {
$ret .= chr($int) . $text;
continue;
}
$encoding = strtoupper($encoding);
$ret .= '%' . $encoding . $text;
}
return $ret;
}
}
?>