diff --git a/NEWS b/NEWS index f5f89d32..f8540264 100644 --- a/NEWS +++ b/NEWS @@ -43,6 +43,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier already exists. May clobber autoload, so I need to keep an eye on it . ConfigSchema heavily optimized, will only collect information and validate definitions when HTMLPURIFIER_SCHEMA_STRICT is true. +. AttrDef_URI unit tests refactored +. benchmarks/ directory now protected from public view with .htaccess file; + run the tests via command line 2.0.1, released 2007-06-27 ! Tag auto-closing now based on a ChildDef heuristic rather than a diff --git a/benchmarks/.htaccess b/benchmarks/.htaccess new file mode 100644 index 00000000..3a428827 --- /dev/null +++ b/benchmarks/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/tests/HTMLPurifier/AttrDef/URITest.php b/tests/HTMLPurifier/AttrDef/URITest.php index a4d2521e..af718034 100644 --- a/tests/HTMLPurifier/AttrDef/URITest.php +++ b/tests/HTMLPurifier/AttrDef/URITest.php @@ -14,66 +14,106 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness var $scheme, $components, $return_components; - function testGenericURI() { - + var $oldRegistry; + + function setUp() { + // setup ensures that any twiddling around with the registry is reverted + $this->oldRegistry = HTMLPurifier_URISchemeRegistry::instance(); + $this->def = new HTMLPurifier_AttrDef_URI(); // default + parent::setUp(); + } + + function tearDown() { + HTMLPurifier_URISchemeRegistry::instance($this->oldRegistry); + } + + function prepareCommon(&$config, &$context) { + $config = HTMLPurifier_Config::create($config); + if (!$context) $context = new HTMLPurifier_Context(); + } + + function &generateSchemeMock($scheme_names = array('http', 'mailto')) { generate_mock_once('HTMLPurifier_URIScheme'); generate_mock_once('HTMLPurifier_URISchemeRegistry'); - $old_registry = HTMLPurifier_URISchemeRegistry::instance(); - - // finally, lets get a copy of the actual class - $this->def = new HTMLPurifier_AttrDef_URI(); - - // initialize test inputs - $uri = // input URI - $components = // what components the URI should be parsed to - $return_components = // return components - $expect_uri = array(); // what reassembled URI to expect - - ////////////////////////////////////////////////////////////////////// - - // test a regular instance, return identical URI - $uri[0] = 'http://www.example.com/webhp?q=foo#result2'; - $components[0] = array( - null, // userinfo - 'www.example.com', // host - null, // port - '/webhp', // path - 'q=foo' // query + // load a scheme registry mock to the singleton + $registry =& HTMLPurifier_URISchemeRegistry::instance( + new HTMLPurifier_URISchemeRegistryMock() ); - // test an amended URI (the actual logic is irrelevant) - // test that user and port get parsed correctly (3.2.1 and 3.2.3) - $uri[1] = 'http://user@authority.part:80/now/the/path?query#fragment'; - $components[1] = array( - 'user', 'authority.part', 80, - '/now/the/path', 'query' - ); - $return_components[1] = array( // removed port (it's standard) - 'user', 'authority.part', null, '/now/the/path', 'query' - ); - $expect_uri[1] = 'http://user@authority.part/now/the/path?query#fragment'; + // add a pseudo-scheme to the registry for $scheme_names + $scheme = new HTMLPurifier_URISchemeMock(); + foreach ($scheme_names as $name) { + $registry->setReturnReference('getScheme', $scheme, array($name, '*', '*')); + } + // registry returns false if an invalid scheme is requested + $registry->setReturnValue('getScheme', false, array('*', '*', '*')); - // percent encoded characters are not resolved during generic URI - // parsing even though RFC 3986 defines this notation - // also test what happens when query/fragment are missing - $uri[2] = 'http://en.wikipedia.org/wiki/Clich%C3%A9'; - $components[2] = array( + return $scheme; + } + + // PARSING RELATED TESTS + + function assertParsing($uri, $userinfo, $host, $port, $path, $query, $config = null, $context = null) { + + $this->prepareCommon($config, $context); + $scheme =& $this->generateSchemeMock(); + + // create components parameter list + // Config and Context are wildcards due to PHP4 reference funkiness + $components = array($userinfo, $host, $port, $path, $query, '*', '*'); + $scheme->expectOnce('validateComponents', $components); + + $def = new HTMLPurifier_AttrDef_URI(); + $def->validate($uri, $config, $context); + + $scheme->tally(); + + } + + function testParsingRegular() { + $this->assertParsing( + 'http://www.example.com/webhp?q=foo#result2', + null, 'www.example.com', null, '/webhp', 'q=foo' + ); + } + + function testParsingPortAndUsername() { + $this->assertParsing( + 'http://user@authority.part:80/now/the/path?query#fragment', + 'user', 'authority.part', 80, '/now/the/path', 'query' + ); + } + + function testParsingPercentEncoding() { + $this->assertParsing( + 'http://en.wikipedia.org/wiki/Clich%C3%A9', null, 'en.wikipedia.org', null, '/wiki/Clich%C3%A9', null ); - - // test distinction between empty query and undefined query (above) - $uri[3] = 'http://www.example.com/?#'; - $components[3] = array(null, 'www.example.com', null, '/', ''); - - // path is always defined, even if empty - $uri[4] = 'http://www.example.com'; - $components[4] = array(null, 'www.example.com', null, '', null); - - // test parsing of an opaque URI - $uri[5] = 'mailto:bob@example.com'; - $components[5] = array(null, null, null, 'bob@example.com', null); - + } + + function testParsingEmptyQuery() { + $this->assertParsing( + 'http://www.example.com/?#', + null, 'www.example.com', null, '/', '' + ); + } + + function testParsingEmptyPath() { + $this->assertParsing( + 'http://www.example.com', + null, 'www.example.com', null, '', null + ); + } + + function testParsingOpaqueURI() { + $this->assertParsing( + 'mailto:bob@example.com', + null, null, null, 'bob@example.com', null + ); + } + + function testParsingImproperPercentEncoding() { // even though we don't resolve percent entities, we have to fix // improper percent-encodes. Taken one at a time: // %56 - V, which is an unreserved character @@ -84,168 +124,157 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness // note that Apache doesn't do such fixing, rather, it just claims // that the browser sent a "Bad Request". See PercentEncoder.php // for more details - $uri[6] = 'http://www.example.com/%56%fc%GJ%5%FC'; - $components[6] = array(null, 'www.example.com', null, '/V%FC%25GJ%255%FC', null); - $expect_uri[6] = 'http://www.example.com/V%FC%25GJ%255%FC'; + $this->assertParsing( + 'http://www.example.com/%56%fc%GJ%5%FC', + null, 'www.example.com', null, '/V%FC%25GJ%255%FC', null + ); + } + + function testParsingIPv4Address() { + $this->assertParsing( + 'http://192.0.34.166/', + null, '192.0.34.166', null, '/', null + ); + } + + function testParsingFakeIPv4Address() { + $this->assertParsing( + 'http://333.123.32.123/', + null, '333.123.32.123', null, '/', null + ); + } + + function testParsingIPv6Address() { + $this->assertParsing( + 'http://[2001:db8::7]/c=GB?objectClass?one', + null, '[2001:db8::7]', null, '/c=GB', 'objectClass?one' + ); + } + + // We will not implement punycode encoding, that's up to the browsers + // We also will not implement percent to IDNA encoding transformations: + // if you need to use an international domain in a link, make sure that + // you've got it in UTF-8 and send it in raw (no encoding). + function testParsingInternationalizedDomainName() { + $this->assertParsing( + "http://t\xC5\xABdali\xC5\x86.lv", + null, "t\xC5\xABdali\xC5\x86.lv", null, '', null + ); + } + + function testParsingInvalidHostThatLooksLikeIPv6Address() { + $this->assertParsing( + 'http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]', + null, null, null, '', null + ); + } + + function testParsingInvalidPort() { + $this->assertParsing( + 'http://example.com:foobar', + null, 'example.com', null, '', null + ); + } + + function testParsingOverLargePort() { + $this->assertParsing( + 'http://example.com:65536', + null, 'example.com', null, '', null + ); + } + + // scheme munging (i.e. removal when unnecessary) not implemented + + function testParsingPathAbsolute() { // note this is different from path-rootless + $this->assertParsing( + 'http:/this/is/path', + // do not munge scheme off + null, null, null, '/this/is/path', null + ); + } + + function testParsingPathRootless() { + // this should not be used but is allowed + $this->assertParsing( + 'http:this/is/path', + null, null, null, 'this/is/path', null + ); + // TODO: scheme should be munged off + } + + function testParsingPathEmpty() { + $this->assertParsing( + 'http:', + null, null, null, '', null + ); + // TODO: scheme should be munged off + } + + function testParsingRelativeURI() { + $this->assertParsing( + '/a/b', + null, null, null, '/a/b', null + ); + } + + function testParsingMalformedTag() { + $this->assertParsing( + 'http://www.google.com/\'>"', + null, 'www.google.com', null, '/', null + ); + } + + function testParsingEmpty() { + $this->assertParsing( + '', + null, null, null, '', null + ); + // TODO: should be returned unharmed + } + + // OUTPUT RELATED TESTS + + function assertOutput($expect_uri, $userinfo, $host, $port, $path, $query, $config = null, $context = null) { - // test IPv4 address (behavior may vary with configuration) - $uri[7] = 'http://192.0.34.166/'; - $components[7] = array(null, '192.0.34.166', null, '/', null); + // prepare mock machinery + $this->prepareCommon($config, $context); + $scheme =& $this->generateSchemeMock(); + $components = array($userinfo, $host, $port, $path, $query, '*', '*'); + $scheme->expectOnce('validateComponents'); + $scheme->setReturnValue('validateComponents', $components); - // while it may look like an IPv4 address, it's really a reg-name. - // don't destroy it - $uri[8] = 'http://333.123.32.123/'; - $components[8] = array(null, '333.123.32.123', null, '/', null); + $def = new HTMLPurifier_AttrDef_URI(); + // dummy URI is passed as input, MUST NOT HAVE FRAGMENT + $result_uri = $def->validate('http://example.com/', $config, $context); - // test IPv6 address, using amended form of RFC's example - $uri[9] = 'http://[2001:db8::7]/c=GB?objectClass?one'; - $components[9] = array(null, '[2001:db8::7]', null, '/c=GB', - 'objectClass?one'); + $this->assertEqual($result_uri, $expect_uri); - // We will not implement punycode encoding, that's up to the browsers - // We also will not implement percent to IDNA encoding transformations: - // if you need to use an international domain in a link, make sure that - // you've got it in UTF-8 and send it in raw (no encoding). - - // break the RFC a little and allow international characters - // WARNING: UTF-8 encoded! - $uri[10] = 'http://tūdaliņ.lv'; - $components[10] = array(null, 'tūdaliņ.lv', null, '', null); - - // test invalid IPv6 address and invalid reg-name - $uri[11] = 'http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]'; - $components[11] = array(null, null, null, '', null); - $expect_uri[11] = 'http:'; - - // test invalid port - $uri[12] = 'http://example.com:foobar'; - $components[12] = array(null, 'example.com', null, '', null); - $expect_uri[12] = 'http://example.com'; - - // test overlarge port (max is 65535, although this isn't official) - $uri[13] = 'http://example.com:65536'; - $components[13] = array(null, 'example.com', null, '', null); - $expect_uri[13] = 'http://example.com'; - - // some spec abnf tests - - // "authority . path-abempty" omitted, it is a trivial case - - // "path-absolute", note this is different from path-rootless - $uri[14] = 'http:/this/is/path'; - $components[14] = array(null, null, null, '/this/is/path', null); - $expect_uri[14] = 'http:/this/is/path'; // do not munge scheme off - - // scheme munging is not being tested yet, it's an extra feature - - // "path-rootless" - this should not be used but is allowed - $uri[15] = 'http:this/is/path'; - $components[15] = array(null, null, null, 'this/is/path', null); - //$expect_uri[15] = 'this/is/path'; // munge scheme off - - // "path-empty" - a rather interesting case, remove the scheme - $uri[16] = 'http:'; - $components[16] = array(null, null, null, '', null); - //$expect_uri[16] = ''; // munge scheme off - - // test invalid scheme, components shouldn't be passed - $uri[17] = 'javascript:alert("moo");'; - $expect_uri[17] = false; - - // relative URIs - basic case - $uri[18] = '/a/b'; - $components[18] = array(null, null, null, '/a/b', null); - - // result of malformed tag, gracefully handle error - $uri[19] = 'http://www.google.com/\'>"'; - $components[19] = array(null, 'www.google.com', null, '/', null); - $expect_uri[19] = 'http://www.google.com/'; - - // test empty - $uri[20] = ''; - $components[20] = array(null, null, null, '', null); - $expect_uri[20] = ''; - - foreach ($uri as $i => $value) { - - // the read in values - $this->config = isset($config[$i]) ? $config[$i] : HTMLPurifier_Config::createDefault(); - $this->context = isset($context[$i]) ? $context[$i] : new HTMLPurifier_Context(); - - // setUpAssertDef - if ( isset($components[$i]) ) { - $this->components = $components[$i]; - } else { - $this->components = false; - } - if ( isset($return_components[$i]) ) { - $this->return_components = $return_components[$i]; - } else { - $this->return_components = $this->components; - } - - // parameters - if (!isset($expect_uri[$i])) { - $expect_uri[$i] = $value; // untouched - } - - $this->assertDef($value, $expect_uri[$i], true, "Test $i: %s"); - - } - - // reset to regular implementation - HTMLPurifier_URISchemeRegistry::instance($old_registry); + $scheme->tally(); } - function setUpAssertDef() { - // $fake_registry isn't the real mock, because due to PHP 4 weirdness - // I cannot set a default value to function parameters that are passed - // by reference. So we use the value instance() returns. - $fake_registry = new HTMLPurifier_URISchemeRegistryMock(); - $registry =& HTMLPurifier_URISchemeRegistry::instance($fake_registry); - - // now, let's add a pseudo-scheme to the registry - $this->scheme = new HTMLPurifier_URISchemeMock(); - - // here are the schemes we will support with overloaded mocks - $registry->setReturnReference('getScheme', $this->scheme, array('http', '*', '*')); - $registry->setReturnReference('getScheme', $this->scheme, array('mailto', '*', '*')); - - // default return value is false (meaning no scheme defined: reject) - $registry->setReturnValue('getScheme', false, array('*', '*', '*')); - - if ($this->components === false) { - $this->scheme->expectNever('validateComponents'); - } else { - $this->components[] = '*'; // append the configuration - $this->components[] = '*'; // append context - $this->scheme->setReturnValue( - 'validateComponents', $this->return_components, $this->components); - $this->scheme->expectOnce('validateComponents', $this->components); - } + function testOutputRegular() { + $this->assertOutput( + 'http://user@authority.part:8080/now/the/path?query', + 'user', 'authority.part', 8080, '/now/the/path', 'query' + ); } - function tearDownAssertDef() { - $this->scheme->tally(); - } + // INTEGRATION TESTS function testIntegration() { - - $this->def = new HTMLPurifier_AttrDef_URI(); - $this->assertDef('http://www.google.com/'); $this->assertDef('javascript:bad_stuff();', false); $this->assertDef('ftp://www.example.com/'); $this->assertDef('news:rec.alt'); $this->assertDef('nntp://news.example.com/324234'); $this->assertDef('mailto:bob@example.com'); - } - function testDisableExternal() { + function testConfigDisableExternal() { $this->def = new HTMLPurifier_AttrDef_URI(); + $this->config->set('URI', 'DisableExternal', true); $this->config->set('URI', 'Host', 'sub.example.com'); @@ -268,7 +297,7 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness } - function testDisableExternalResources() { + function testConfigDisableExternalResources() { $this->config->set('URI', 'DisableExternalResources', true); @@ -282,10 +311,9 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness } - function testMunge() { + function testConfigMunge() { $this->config->set('URI', 'Munge', 'http://www.google.com/url?q=%s'); - $this->def = new HTMLPurifier_AttrDef_URI(); $this->assertDef( 'http://www.example.com/', diff --git a/tests/HTMLPurifier/AttrDefHarness.php b/tests/HTMLPurifier/AttrDefHarness.php index 84d8cc9e..39bd676d 100644 --- a/tests/HTMLPurifier/AttrDefHarness.php +++ b/tests/HTMLPurifier/AttrDefHarness.php @@ -4,8 +4,7 @@ class HTMLPurifier_AttrDefHarness extends UnitTestCase { var $def; - var $context; - var $config; + var $context, $config; function setUp() { $this->config = HTMLPurifier_Config::createDefault(); @@ -13,20 +12,15 @@ class HTMLPurifier_AttrDefHarness extends UnitTestCase } // cannot be used for accumulator - function assertDef($string, $expect = true, $ini = false, $message = '%s') { + function assertDef($string, $expect = true) { // $expect can be a string or bool - if ($ini) $this->setUpAssertDef(); $result = $this->def->validate($string, $this->config, $this->context); if ($expect === true) { - $this->assertIdentical($string, $result, $message); + $this->assertIdentical($string, $result); } else { - $this->assertIdentical($expect, $result, $message); + $this->assertIdentical($expect, $result); } - if ($ini) $this->tearDownAssertDef(); } - function setUpAssertDef() {} - function tearDownAssertDef() {} - }