From b0f3116b9e78908b0038d3570bfc1deb5ad3bdfb Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Wed, 1 Aug 2007 13:15:33 +0000 Subject: [PATCH] [2.1.0] URI scheme is munged off if there is no authority and the scheme is the default one git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1330 48356398-32a2-884e-a903-53898d9a118a --- NEWS | 2 + library/HTMLPurifier/AttrDef/URI.php | 9 ++++- tests/HTMLPurifier/AttrDef/URITest.php | 56 ++++++++++++++++++++------ 3 files changed, 53 insertions(+), 14 deletions(-) diff --git a/NEWS b/NEWS index f8540264..07c01d94 100644 --- a/NEWS +++ b/NEWS @@ -90,6 +90,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier doctype use new %HTML.CustomDoctype . ConfigForm truncates long directives to keep the form small, and does not re-output namespaces +. URI scheme is munged off if there is no authority and the scheme is the + default one 2.0.0, released 2007-06-20 # Completely refactored HTMLModuleManager, decentralizing safety diff --git a/library/HTMLPurifier/AttrDef/URI.php b/library/HTMLPurifier/AttrDef/URI.php index 6250d08e..979e44b3 100644 --- a/library/HTMLPurifier/AttrDef/URI.php +++ b/library/HTMLPurifier/AttrDef/URI.php @@ -134,7 +134,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef $matches = array(); $result = preg_match($r_URI, $uri, $matches); - if (!$result) return false; // invalid URI + if (!$result) return false; // *really* invalid URI // seperate out parts $scheme = !empty($matches[1]) ? $matches[2] : null; @@ -146,6 +146,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef $registry =& HTMLPurifier_URISchemeRegistry::instance(); + $default_scheme = $config->get('URI', 'DefaultScheme'); if ($scheme !== null) { // no need to validate the scheme's fmt since we do that when we // retrieve the specific scheme object from the registry @@ -154,7 +155,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef if (!$scheme_obj) return false; // invalid scheme, clean it out } else { $scheme_obj = $registry->getScheme( - $config->get('URI', 'DefaultScheme'), $config, $context + $default_scheme, $config, $context ); } @@ -176,6 +177,8 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef if ($authority !== null) { + // ridiculously inefficient + // remove URI if it's absolute and we disabled externals or // if it's absolute and embedded and we disabled external resources unset($our_host); @@ -259,6 +262,8 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef if($userinfo !== null) $authority .= $userinfo . '@'; $authority .= $host; if($port !== null) $authority .= ':' . $port; + } else { + if ($default_scheme == $scheme) $scheme = null; // munge scheme off when unnecessary } // reconstruct the result diff --git a/tests/HTMLPurifier/AttrDef/URITest.php b/tests/HTMLPurifier/AttrDef/URITest.php index 5c75920c..64209fa1 100644 --- a/tests/HTMLPurifier/AttrDef/URITest.php +++ b/tests/HTMLPurifier/AttrDef/URITest.php @@ -183,12 +183,9 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness ); } - // scheme munging (i.e. removal when unnecessary) not implemented - function testParsingPathAbsolute() { // note this is different from path-rootless $this->assertParsing( 'http:/this/is/path', - // do not munge scheme off null, null, null, '/this/is/path', null ); } @@ -199,7 +196,6 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness 'http:this/is/path', null, null, null, 'this/is/path', null ); - // TODO: scheme should be munged off } function testParsingPathEmpty() { @@ -207,7 +203,6 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness 'http:', null, null, null, '', null ); - // TODO: scheme should be munged off } function testParsingRelativeURI() { @@ -229,37 +224,74 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness '', null, null, null, '', null ); - // TODO: should be returned unharmed } // OUTPUT RELATED TESTS + // scheme is mocked to ensure only the URI is being tested - function assertOutput($expect_uri, $userinfo, $host, $port, $path, $query, $config = null, $context = null) { + function assertOutput($input_uri, $expect_uri, $userinfo, $host, $port, $path, $query, $config = null, $context = null) { // prepare mock machinery $this->prepareCommon($config, $context); $scheme =& $this->generateSchemeMock(); - $components = array($userinfo, $host, $port, $path, $query, '*', '*'); + $components = array($userinfo, $host, $port, $path, $query); $scheme->setReturnValue('validateComponents', $components); - // dummy URI is passed as input, MUST NOT HAVE FRAGMENT $def = new HTMLPurifier_AttrDef_URI(); - $result_uri = $def->validate('http://example.com/', $config, $context); + $result_uri = $def->validate($input_uri, $config, $context); + if ($expect_uri === true) $expect_uri = $input_uri; $this->assertEqual($result_uri, $expect_uri); } function testOutputRegular() { $this->assertOutput( - 'http://user@authority.part:8080/now/the/path?query', + 'http://user@authority.part:8080/now/the/path?query#frag', true, 'user', 'authority.part', 8080, '/now/the/path', 'query' ); } + function testOutputEmpty() { + $this->assertOutput( + '', true, + null, null, null, '', null + ); + } + + function testOutputNullPath() { + $this->assertOutput( + '', true, + null, null, null, null, null // usually shouldn't happen + ); + } + + function testOutputPathAbsolute() { + $this->assertOutput( + 'http:/this/is/path', '/this/is/path', + null, null, null, '/this/is/path', null + ); + } + + function testOutputPathRootless() { + $this->assertOutput( + 'http:this/is/path', 'this/is/path', + null, null, null, 'this/is/path', null + ); + } + + function testOutputPathEmpty() { + $this->assertOutput( + 'http:', '', + null, null, null, '', null + ); + } + // INTEGRATION TESTS function testIntegration() { $this->assertDef('http://www.google.com/'); + $this->assertDef('http:', ''); + $this->assertDef('http:/foo', '/foo'); $this->assertDef('javascript:bad_stuff();', false); $this->assertDef('ftp://www.example.com/'); $this->assertDef('news:rec.alt'); @@ -336,7 +368,7 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness } function testWhitelist() { - /* + /* unimplemented $this->config->set('URI', 'HostPolicy', 'DenyAll'); $this->config->set('URI', 'HostWhitelist', array(null, 'google.com'));