diff --git a/NEWS b/NEWS index 27c3188e..b84a19a7 100644 --- a/NEWS +++ b/NEWS @@ -54,6 +54,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier . Interface for URIScheme changed . Generic URI object to hold components of URI added, most systems involved in URI validation have been migrated to use it +. Custom filtering for URIs factored out to URIDefinition interface for + maximum extensibility 2.0.1, released 2007-06-27 ! Tag auto-closing now based on a ChildDef heuristic rather than a diff --git a/library/HTMLPurifier/AttrDef/URI.php b/library/HTMLPurifier/AttrDef/URI.php index 4f383c57..a21d4f4b 100644 --- a/library/HTMLPurifier/AttrDef/URI.php +++ b/library/HTMLPurifier/AttrDef/URI.php @@ -25,29 +25,6 @@ HTMLPurifier_ConfigSchema::define( 'This directive has been available since 1.2.0.' ); -HTMLPurifier_ConfigSchema::define( - 'URI', 'DisableExternal', false, 'bool', - 'Disables links to external websites. This is a highly effective '. - 'anti-spam and anti-pagerank-leech measure, but comes at a hefty price: no'. - 'links or images outside of your domain will be allowed. Non-linkified '. - 'URIs will still be preserved. If you want to be able to link to '. - 'subdomains or use absolute URIs, specify %URI.Host for your website. '. - 'This directive has been available since 1.2.0.' -); - -HTMLPurifier_ConfigSchema::define( - 'URI', 'DisableExternalResources', false, 'bool', - 'Disables the embedding of external resources, preventing users from '. - 'embedding things like images from other hosts. This prevents '. - 'access tracking (good for email viewers), bandwidth leeching, '. - 'cross-site request forging, goatse.cx posting, and '. - 'other nasties, but also results in '. - 'a loss of end-user functionality (they can\'t directly post a pic '. - 'they posted from Flickr anymore). Use it if you don\'t have a '. - 'robust user-content moderation team. This directive has been '. - 'available since 1.3.0.' -); - HTMLPurifier_ConfigSchema::define( 'URI', 'DisableResources', false, 'bool', 'Disables embedding resources, essentially meaning no pictures. You can '. @@ -117,18 +94,35 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef $uri = $this->parser->parse($uri); if ($uri === false) return false; - // generic validation - $context->register('EmbeddedURI', $this->embedsResource); // flag - $result = $uri->validate($config, $context); - $context->destroy('EmbeddedURI'); - if (!$result) return false; + // add embedded flag to context for validators + $context->register('EmbeddedURI', $this->embedsResource); - // scheme-specific validation - $scheme_obj = $uri->getSchemeObj($config, $context); - if (!$scheme_obj) return false; - if ($this->embedsResource && !$scheme_obj->browsable) return false; - $result = $scheme_obj->validate($uri, $config, $context); - if (!$result) return false; + $ok = false; + do { + + // generic validation + $result = $uri->validate($config, $context); + if (!$result) break; + + // chained validation + $uri_def =& $config->getDefinition('URI'); + $result = $uri_def->filter($uri, $config, $context); + if (!$result) break; + + // scheme-specific validation + $scheme_obj = $uri->getSchemeObj($config, $context); + if (!$scheme_obj) break; + if ($this->embedsResource && !$scheme_obj->browsable) break; + $result = $scheme_obj->validate($uri, $config, $context); + if (!$result) break; + + // survived gauntlet + $ok = true; + + } while (false); + + $context->destroy('EmbeddedURI'); + if (!$ok) return false; // back to string $result = $uri->toString(); diff --git a/library/HTMLPurifier/Config.php b/library/HTMLPurifier/Config.php index 2b811fa9..1a845649 100644 --- a/library/HTMLPurifier/Config.php +++ b/library/HTMLPurifier/Config.php @@ -5,6 +5,7 @@ require_once 'HTMLPurifier/ConfigSchema.php'; // member variables require_once 'HTMLPurifier/HTMLDefinition.php'; require_once 'HTMLPurifier/CSSDefinition.php'; +require_once 'HTMLPurifier/URIDefinition.php'; require_once 'HTMLPurifier/Doctype.php'; require_once 'HTMLPurifier/DefinitionCacheFactory.php'; @@ -313,6 +314,8 @@ class HTMLPurifier_Config $this->definitions[$type] = new HTMLPurifier_HTMLDefinition(); } elseif ($type == 'CSS') { $this->definitions[$type] = new HTMLPurifier_CSSDefinition(); + } elseif ($type == 'URI') { + $this->definitions[$type] = new HTMLPurifier_URIDefinition(); } else { trigger_error("Definition of $type type not supported"); $false = false; diff --git a/library/HTMLPurifier/URI.php b/library/HTMLPurifier/URI.php index 968471b2..da56872f 100644 --- a/library/HTMLPurifier/URI.php +++ b/library/HTMLPurifier/URI.php @@ -1,6 +1,7 @@ host)) { - // remove URI if it's absolute and we disabled externals or - // if it's absolute and embedded and we disabled external resources - unset($our_host); // ensure this variable is not set - if ( - $config->get('URI', 'DisableExternal') || - ( - $config->get('URI', 'DisableExternalResources') && - $context->get('EmbeddedURI', true) // suppress errors - ) - ) { - $our_host = $config->get('URI', 'Host'); - if ($our_host === null) return false; - } + $host_def = new HTMLPurifier_AttrDef_URI_Host(); $this->host = $host_def->validate($this->host, $config, $context); if ($this->host === false) $this->host = null; @@ -70,16 +59,6 @@ class HTMLPurifier_URI // check host against blacklist if ($this->checkBlacklist($this->host, $config, $context)) return false; - // more lenient absolute checking - if (isset($our_host)) { - $host_parts = array_reverse(explode('.', $this->host)); - // could be cached - $our_host_parts = array_reverse(explode('.', $our_host)); - foreach ($our_host_parts as $i => $discard) { - if (!isset($host_parts[$i])) return false; - if ($host_parts[$i] != $our_host_parts[$i]) return false; - } - } } // munge scheme off if necessary diff --git a/library/HTMLPurifier/URIDefinition.php b/library/HTMLPurifier/URIDefinition.php new file mode 100644 index 00000000..c3efa6ad --- /dev/null +++ b/library/HTMLPurifier/URIDefinition.php @@ -0,0 +1,53 @@ + + Revision identifier for your custom definition. See + %HTML.DefinitionRev for details. This directive has been available + since 2.1.0. +

+'); + +class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition +{ + + var $type = 'URI'; + var $filters = array(); + var $registeredFilters = array(); + + function HTMLPurifier_URIDefinition() { + $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal()); + $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources()); + } + + function registerFilter($filter) { + $this->registeredFilters[$filter->name] = $filter; + } + + function doSetup($config) { + foreach ($this->registeredFilters as $name => $filter) { + $conf = $config->get('URI', $name); + if ($conf !== false && $conf !== null) { + $this->filters[$name] = $filter; + } + } + foreach ($this->filters as $n => $x) $this->filters[$n]->prepare($config); + unset($this->registeredFilters); + } + + function filter(&$uri, $config, &$context) { + foreach ($this->filters as $name => $x) { + $result = $this->filters[$name]->filter($uri, $config, $context); + if (!$result) return false; + } + return true; + } + +} diff --git a/library/HTMLPurifier/URIFilter.php b/library/HTMLPurifier/URIFilter.php new file mode 100644 index 00000000..e0066f3b --- /dev/null +++ b/library/HTMLPurifier/URIFilter.php @@ -0,0 +1,24 @@ +get('URI', 'Host'); + if ($our_host !== null) $this->ourHostParts = array_reverse(explode('.', $our_host)); + } + function filter(&$uri, $config, &$context) { + if (is_null($uri->host)) return true; + if ($this->ourHostParts === false) return false; + $host_parts = array_reverse(explode('.', $uri->host)); + foreach ($this->ourHostParts as $i => $x) { + if (!isset($host_parts[$i])) return false; + if ($host_parts[$i] != $this->ourHostParts[$i]) return false; + } + return true; + } +} + diff --git a/library/HTMLPurifier/URIFilter/DisableExternalResources.php b/library/HTMLPurifier/URIFilter/DisableExternalResources.php new file mode 100644 index 00000000..dc00e741 --- /dev/null +++ b/library/HTMLPurifier/URIFilter/DisableExternalResources.php @@ -0,0 +1,26 @@ +get('EmbeddedURI', true)) return true; + return parent::filter($uri, $config, $context); + } +} + diff --git a/maintenance/flush-definition-cache.php b/maintenance/flush-definition-cache.php index 07f1442b..921a00c2 100644 --- a/maintenance/flush-definition-cache.php +++ b/maintenance/flush-definition-cache.php @@ -16,7 +16,8 @@ require_once(dirname(__FILE__) . '/../library/HTMLPurifier.auto.php'); $config = HTMLPurifier_Config::createDefault(); -$names = array('HTML', 'CSS', 'Test'); +//$names = array('HTML', 'CSS', 'URI', 'Test'); +$names = array('URI'); foreach ($names as $name) { echo " - Flushing $name\n"; $cache = new HTMLPurifier_DefinitionCache_Serializer($name); diff --git a/tests/HTMLPurifier/AttrDef/URITest.php b/tests/HTMLPurifier/AttrDef/URITest.php index d6f55ccd..43b041cd 100644 --- a/tests/HTMLPurifier/AttrDef/URITest.php +++ b/tests/HTMLPurifier/AttrDef/URITest.php @@ -48,6 +48,64 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness $this->assertDef('javascript:foobar();', false); } + function test_validate_configDisableExternal() { + + $this->def = new HTMLPurifier_AttrDef_URI(); + + $this->config->set('URI', 'DisableExternal', true); + $this->config->set('URI', 'Host', 'sub.example.com'); + + $this->assertDef('/foobar.txt'); + $this->assertDef('http://google.com/', false); + $this->assertDef('http://sub.example.com/alas?foo=asd'); + $this->assertDef('http://example.com/teehee', false); + $this->assertDef('http://www.example.com/#man', false); + $this->assertDef('http://go.sub.example.com/perhaps?p=foo'); + + } + + function test_validate_configDisableExternalResources() { + + $this->config->set('URI', 'DisableExternalResources', true); + + $this->assertDef('http://sub.example.com/alas?foo=asd'); + $this->assertDef('/img.png'); + + $this->def = new HTMLPurifier_AttrDef_URI(true); + + $this->assertDef('http://sub.example.com/alas?foo=asd', false); + $this->assertDef('/img.png'); + + } + + function test_validate_configBlacklist() { + + $this->config->set('URI', 'HostBlacklist', array('example.com', 'moo')); + + $this->assertDef('foo.txt'); + $this->assertDef('http://www.google.com/example.com/moo'); + + $this->assertDef('http://example.com/#23', false); + $this->assertDef('https://sub.domain.example.com/foobar', false); + $this->assertDef('http://example.com.example.net/?whoo=foo', false); + $this->assertDef('ftp://moo-moo.net/foo/foo/', false); + + } + + /* + function test_validate_configWhitelist() { + + $this->config->set('URI', 'HostPolicy', 'DenyAll'); + $this->config->set('URI', 'HostWhitelist', array(null, 'google.com')); + + $this->assertDef('http://example.com/fo/google.com', false); + $this->assertDef('server.txt'); + $this->assertDef('ftp://www.google.com/?t=a'); + $this->assertDef('http://google.com.tricky.spamsite.net', false); + + } + */ + } diff --git a/tests/HTMLPurifier/Harness.php b/tests/HTMLPurifier/Harness.php index 5cc8c379..897a9f23 100644 --- a/tests/HTMLPurifier/Harness.php +++ b/tests/HTMLPurifier/Harness.php @@ -1,5 +1,7 @@ config, $this->context) = $this->createCommon(); } + /** + * Accepts config and context and prepares them into a valid state + * @param &$config Reference to config variable + * @param &$context Reference to context variable + */ function prepareCommon(&$config, &$context) { $config = HTMLPurifier_Config::create($config); if (!$context) $context = new HTMLPurifier_Context(); } + /** + * Generates default configuration and context objects + * @return Defaults in form of array($config, $context) + */ function createCommon() { return array(HTMLPurifier_Config::createDefault(), new HTMLPurifier_Context); } + /** + * If $expect is false, ignore $result and check if status failed. + * Otherwise, check if $status if true and $result === $expect. + * @param $status Boolean status + * @param $result Mixed result from processing + * @param $expect Mixed expectation for result + */ + function assertEitherFailOrIdentical($status, $result, $expect) { + if ($expect === false) { + $this->assertFalse($status, 'Expected false result, got true'); + } else { + $this->assertTrue($status, 'Expected true result, got false'); + $this->assertIdentical($result, $expect); + } + } + } diff --git a/tests/HTMLPurifier/URIDefinitionTest.php b/tests/HTMLPurifier/URIDefinitionTest.php new file mode 100644 index 00000000..cc782840 --- /dev/null +++ b/tests/HTMLPurifier/URIDefinitionTest.php @@ -0,0 +1,34 @@ +expectOnce('filter'); + else $mock->expectNever('filter'); + $mock->setReturnValue('filter', $result); + return $mock; + } + + function test_filter() { + $def = new HTMLPurifier_URIDefinition(); + $def->filters[] = $this->createFilterMock(); + $def->filters[] = $this->createFilterMock(); + $uri = $this->createURI('test'); + $this->assertTrue($def->filter($uri, $this->config, $this->context)); + } + + function test_filter_earlyAbortIfFail() { + $def = new HTMLPurifier_URIDefinition(); + $def->filters[] = $this->createFilterMock(true, false); + $def->filters[] = $this->createFilterMock(false); // never called + $uri = $this->createURI('test'); + $this->assertFalse($def->filter($uri, $this->config, $this->context)); + } + +} diff --git a/tests/HTMLPurifier/URIFilter/DisableExternalResourcesTest.php b/tests/HTMLPurifier/URIFilter/DisableExternalResourcesTest.php new file mode 100644 index 00000000..4362cbd8 --- /dev/null +++ b/tests/HTMLPurifier/URIFilter/DisableExternalResourcesTest.php @@ -0,0 +1,23 @@ +context->register('EmbeddedURI', $var); + } + + function testPreserveWhenNotEmbedded() { + $this->context->destroy('EmbeddedURI'); // undo setUp + $this->assertFiltering( + 'http://example.com' + ); + } + +} diff --git a/tests/HTMLPurifier/URIFilter/DisableExternalTest.php b/tests/HTMLPurifier/URIFilter/DisableExternalTest.php new file mode 100644 index 00000000..e4a0e89f --- /dev/null +++ b/tests/HTMLPurifier/URIFilter/DisableExternalTest.php @@ -0,0 +1,47 @@ +filter = new HTMLPurifier_URIFilter_DisableExternal(); + } + + function testRemoveExternal() { + $this->assertFiltering( + 'http://example.com', false + ); + } + + function testPreserveInternal() { + $this->assertFiltering( + '/foo/bar' + ); + } + + function testPreserveOurHost() { + $this->config->set('URI', 'Host', 'example.com'); + $this->assertFiltering( + 'http://example.com' + ); + } + + function testPreserveOurSubdomain() { + $this->config->set('URI', 'Host', 'example.com'); + $this->assertFiltering( + 'http://www.example.com' + ); + } + + function testRemoveSuperdomain() { + $this->config->set('URI', 'Host', 'www.example.com'); + $this->assertFiltering( + 'http://example.com', false + ); + } + +} diff --git a/tests/HTMLPurifier/URIFilterHarness.php b/tests/HTMLPurifier/URIFilterHarness.php new file mode 100644 index 00000000..04e101f2 --- /dev/null +++ b/tests/HTMLPurifier/URIFilterHarness.php @@ -0,0 +1,15 @@ +prepareURI($uri, $expect_uri); + $this->filter->prepare($this->config, $this->context); + $result = $this->filter->filter($uri, $this->config, $this->context); + $this->assertEitherFailOrIdentical($result, $uri, $expect_uri); + } + +} diff --git a/tests/HTMLPurifier/URIHarness.php b/tests/HTMLPurifier/URIHarness.php new file mode 100644 index 00000000..bec80845 --- /dev/null +++ b/tests/HTMLPurifier/URIHarness.php @@ -0,0 +1,31 @@ +parse($uri); + if ($expect_uri !== false) { + $expect_uri = $parser->parse($expect_uri); + } + } + + /** + * Generates a URI object from the corresponding string + */ + function createURI($uri) { + $parser = new HTMLPurifier_URIParser(); + return $parser->parse($uri); + } + +} diff --git a/tests/HTMLPurifier/URISchemeTest.php b/tests/HTMLPurifier/URISchemeTest.php index 0ace9a21..6f43d10c 100644 --- a/tests/HTMLPurifier/URISchemeTest.php +++ b/tests/HTMLPurifier/URISchemeTest.php @@ -1,7 +1,6 @@ parse($uri); - if ($expect_uri !== false) { - $expect_uri = $parser->parse($expect_uri); - } + $this->prepareURI($uri, $expect_uri); // convenience hack: the scheme should be explicitly specified $scheme = $uri->getSchemeObj($this->config, $this->context); $result = $scheme->validate($uri, $this->config, $this->context); - if ($expect_uri !== false) { - $this->assertTrue($result); - $this->assertIdentical($uri, $expect_uri); - } else { - $this->assertFalse($result); - } + $this->assertEitherFailOrIdentical($result, $uri, $expect_uri); } function test_http_regular() { diff --git a/tests/HTMLPurifier/URITest.php b/tests/HTMLPurifier/URITest.php index cebc79cf..85017be1 100644 --- a/tests/HTMLPurifier/URITest.php +++ b/tests/HTMLPurifier/URITest.php @@ -3,7 +3,7 @@ require_once 'HTMLPurifier/URI.php'; require_once 'HTMLPurifier/URIParser.php'; -class HTMLPurifier_URITest extends HTMLPurifier_Harness +class HTMLPurifier_URITest extends HTMLPurifier_URIHarness { function createURI($uri) { @@ -179,62 +179,4 @@ class HTMLPurifier_URITest extends HTMLPurifier_Harness $this->assertValidation('http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]', ''); } - function test_validate_configDisableExternal() { - - $this->def = new HTMLPurifier_AttrDef_URI(); - - $this->config->set('URI', 'DisableExternal', true); - $this->config->set('URI', 'Host', 'sub.example.com'); - - $this->assertValidation('/foobar.txt'); - $this->assertValidation('http://google.com/', false); - $this->assertValidation('http://sub.example.com/alas?foo=asd'); - $this->assertValidation('http://example.com/teehee', false); - $this->assertValidation('http://www.example.com/#man', false); - $this->assertValidation('http://go.sub.example.com/perhaps?p=foo'); - - } - - function test_validate_configDisableExternalResources() { - - $this->config->set('URI', 'DisableExternalResources', true); - - $this->assertValidation('http://sub.example.com/alas?foo=asd'); - $this->assertValidation('/img.png'); - - $embeds = true; // passed by reference - $this->context->register('EmbeddedURI', $embeds); - $this->assertValidation('http://sub.example.com/alas?foo=asd', false); - $this->assertValidation('/img.png'); - - } - - function test_validate_configBlacklist() { - - $this->config->set('URI', 'HostBlacklist', array('example.com', 'moo')); - - $this->assertValidation('foo.txt'); - $this->assertValidation('http://www.google.com/example.com/moo'); - - $this->assertValidation('http://example.com/#23', false); - $this->assertValidation('https://sub.domain.example.com/foobar', false); - $this->assertValidation('http://example.com.example.net/?whoo=foo', false); - $this->assertValidation('ftp://moo-moo.net/foo/foo/', false); - - } - - /* - function test_validate_configWhitelist() { - - $this->config->set('URI', 'HostPolicy', 'DenyAll'); - $this->config->set('URI', 'HostWhitelist', array(null, 'google.com')); - - $this->assertValidation('http://example.com/fo/google.com', false); - $this->assertValidation('server.txt'); - $this->assertValidation('ftp://www.google.com/?t=a'); - $this->assertValidation('http://google.com.tricky.spamsite.net', false); - - } - */ - } diff --git a/tests/index.php b/tests/index.php index 4c6b974a..70ed9f9b 100644 --- a/tests/index.php +++ b/tests/index.php @@ -21,7 +21,6 @@ require_once $simpletest_location . 'unit_tester.php'; require_once $simpletest_location . 'reporter.php'; require_once $simpletest_location . 'mock_objects.php'; require_once 'HTMLPurifier/SimpleTest/Reporter.php'; -require_once 'HTMLPurifier/Harness.php'; // load Debugger require_once 'Debugger.php'; @@ -47,6 +46,7 @@ if (isset($_GET['standalone']) || (isset($argv[1]) && $argv[1] == 'standalone')) } else { require_once '../library/HTMLPurifier.auto.php'; } +require_once 'HTMLPurifier/Harness.php'; // setup special DefinitionCacheFactory decorator $factory =& HTMLPurifier_DefinitionCacheFactory::instance(); diff --git a/tests/test_files.php b/tests/test_files.php index c3a78120..cd61b5ce 100644 --- a/tests/test_files.php +++ b/tests/test_files.php @@ -102,6 +102,9 @@ $test_files[] = 'HTMLPurifier/Strategy/RemoveForeignElements_ErrorsTest.php'; $test_files[] = 'HTMLPurifier/Strategy/ValidateAttributesTest.php'; $test_files[] = 'HTMLPurifier/TagTransformTest.php'; $test_files[] = 'HTMLPurifier/TokenTest.php'; +$test_files[] = 'HTMLPurifier/URIDefinitionTest.php'; +$test_files[] = 'HTMLPurifier/URIFilter/DisableExternalTest.php'; +$test_files[] = 'HTMLPurifier/URIFilter/DisableExternalResourcesTest.php'; $test_files[] = 'HTMLPurifier/URIParserTest.php'; $test_files[] = 'HTMLPurifier/URISchemeRegistryTest.php'; $test_files[] = 'HTMLPurifier/URISchemeTest.php';