1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-05 13:47:24 +02:00

Commit IPv6 fix, with majoring factoring out. Thank you Feyd!

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@284 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2006-08-17 01:05:35 +00:00
parent cd0108d656
commit 1cadb08fbb
9 changed files with 291 additions and 28 deletions

View File

@@ -0,0 +1,41 @@
<?php
require_once 'HTMLPurifier/AttrDef.php';
require_once 'HTMLPurifier/AttrDef/IPv4.php';
require_once 'HTMLPurifier/AttrDef/IPv6.php';
class HTMLPurifier_AttrDef_Host extends HTMLPurifier_AttrDef
{
var $ipv4, $ipv6;
function HTMLPurifier_AttrDef_Host() {
$this->ipv4 = new HTMLPurifier_AttrDef_IPv4();
$this->ipv6 = new HTMLPurifier_AttrDef_IPv6();
}
function validate($string, $config, &$context) {
$length = strlen($string);
if ($string === '') return '';
if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') {
//IPv6
$ip = substr($string, 1, $length - 2);
$valid = $this->ipv6->validate($ip, $config, $context);
if ($valid === false) return false;
return '['. $valid . ']';
}
$ipv4 = $this->ipv4->validate($string, $config, $context);
if ($ipv4 !== false) return $ipv4;
// validate a domain name here, do filtering, etc etc etc
// We could use this, but it would break I18N domain names
//$match = preg_match('/^[a-z0-9][\w\-\.]*[a-z0-9]$/i', $string);
//if (!$match) return false;
return $string;
}
}
?>

View File

@@ -0,0 +1,31 @@
<?php
require_once 'HTMLPurifier/AttrDef.php';
// spliced from Feyd's IPv6 function (pd)
class HTMLPurifier_AttrDef_IPv4 extends HTMLPurifier_AttrDef
{
// regex is public so that IPv6 can reuse it
var $ip4;
function HTMLPurifier_AttrDef_IPv4() {
$oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
$this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
}
function validate($aIP, $config, &$context) {
if (preg_match('#^' . $this->ip4 . '$#s', $aIP))
{
return $aIP;
}
return false;
}
}
?>

View File

@@ -0,0 +1,98 @@
<?php
require_once 'HTMLPurifier/AttrDef/IPv4.php';
// IPv6 by Feyd, source is in public domain
// note that this expects the brackets to be removed from IPv6 addresses
// extends from the IPv4 impl. so we can borrow its regex
class HTMLPurifier_AttrDef_IPv6 extends HTMLPurifier_AttrDef_IPv4
{
function validate($aIP, $config, &$context) {
$original = $aIP;
$hex = '[0-9a-fA-F]';
$blk = '(?:' . $hex . '{1,4})';
$pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128
// prefix check
if (strpos($aIP, '/') !== false)
{
if (preg_match('#' . $pre . '$#s', $aIP, $find))
{
$aIP = substr($aIP, 0, 0-strlen($find[0]));
unset($find);
}
else
{
return false;
}
}
// IPv4-compatiblity check
if (preg_match('#(?<=:'.')' . $this->ip4 . '$#s', $aIP, $find))
{
$aIP = substr($aIP, 0, 0-strlen($find[0]));
$ip = explode('.', $find[0]);
$ip = array_map('dechex', $ip);
$aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3];
unset($find, $ip);
}
// compression check
$aIP = explode('::', $aIP);
$c = count($aIP);
if ($c > 2)
{
return false;
}
elseif ($c == 2)
{
list($first, $second) = $aIP;
$first = explode(':', $first);
$second = explode(':', $second);
if (count($first) + count($second) > 8)
{
return false;
}
while(count($first) < 8)
{
array_push($first, '0');
}
array_splice($first, 8 - count($second), 8, $second);
$aIP = $first;
unset($first,$second);
}
else
{
$aIP = explode(':', $aIP[0]);
}
$c = count($aIP);
if ($c != 8)
{
return false;
}
// All the pieces should be 16-bit hex strings. Are they?
foreach ($aIP as $piece)
{
if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece)))
{
return false;
}
}
return $original;
}
}
?>

View File

@@ -1,7 +1,9 @@
<?php
require_once 'HTMLPurifier/AttrDef.php';
require_once 'HTMLPurifier/URIScheme.php';
require_once 'HTMLPurifier/URISchemeRegistry.php';
require_once 'HTMLPurifier/AttrDef/Host.php';
HTMLPurifier_ConfigDef::define(
'URI', 'DefaultScheme', 'http',
@@ -12,6 +14,12 @@ HTMLPurifier_ConfigDef::define(
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
{
var $host;
function HTMLPurifier_AttrDef_URI() {
$this->host = new HTMLPurifier_AttrDef_Host();
}
function validate($uri, $config, &$context) {
// We'll write stack-based parsers later, for now, use regexps to
@@ -63,34 +71,12 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
if ($authority !== null) {
// define regexps
// this stuff may need to be factored out so Email can get to it
$HEXDIG = '[A-Fa-f0-9]';
$unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with []
$sub_delims = '!$&\'()'; // needs []
$pct_encoded = "%$HEXDIG$HEXDIG";
$h16 = "{$HEXDIG}{1,4}";
$dec_octet = '(?:25[0-5]|2[0-4]\d|1\d\d|1\d|[0-9])';
$IPv4address = "$dec_octet.$dec_octet.$dec_octet.$dec_octet";
$ls32 = "(?:$h16:$h16|$IPv4address)";
$IPvFuture = "v$HEXDIG+\.[:$unreserved$sub_delims]+";
$IPv6Address = "(?:".
"(?:$h16:){6}$ls32" .
"|::(?:$h16:){5}$ls32" .
"|(?:$h16)?::(?:$h16:){4}$ls32" .
"|(?:(?:$h16:){1}$h16)?::(?:$h16:){3}$ls32" .
"|(?:(?:$h16:){2}$h16)?::(?:$h16:){2}$ls32" .
"|(?:(?:$h16:){3}$h16)?::(?:$h16:){1}$ls32" .
"|(?:(?:$h16:){4}$h16)?::$ls32" .
"|(?:(?:$h16:){5}$h16)?::$h16" .
"|(?:(?:$h16:){6}$h16)?::" .
")";
$IP_literal = "\[(?:$IPvFuture|$IPv6Address)\]";
$r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*";
// IPv6 is broken
$r_authority = "/^(($r_userinfo)@)?(\[$IP_literal\]|[^:]*)(:(\d*))?/";
$r_authority = "/^(($r_userinfo)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
$matches = array();
preg_match($r_authority, $authority, $matches);
// overloads regexp!
@@ -104,6 +90,9 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
if ($port < 1 || $port > 65535) $port = null;
}
$host = $this->host->validate($host, $config, $context);
if ($host === false) $host = null;
// userinfo and host are validated within the regexp
} else {