From 218eb67167c8d9c1c7ea8d6d9a8764a5cd4cd140 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Mon, 14 Aug 2006 21:06:57 +0000 Subject: [PATCH] Remove legacy required code from AttrDef_URI, also explicitly disallow < and > in URIs. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@253 48356398-32a2-884e-a903-53898d9a118a --- docs/config.txt | 8 ++++++-- library/HTMLPurifier/AttrDef/URI.php | 21 ++++++++++----------- library/HTMLPurifier/Definition.php | 3 +-- tests/HTMLPurifier/AttrDef/URITest.php | 4 ++++ 4 files changed, 21 insertions(+), 15 deletions(-) diff --git a/docs/config.txt b/docs/config.txt index a4cdd965..0562c0af 100644 --- a/docs/config.txt +++ b/docs/config.txt @@ -3,5 +3,9 @@ Configuration Configuration is documented on a per-use case: if a class uses a certain value from the configuration object, it has to define its name and what the -value is used for. This means decentralized configuration declaration that -is nevertheless error checking. +value is used for. This means decentralized configuration declarations that +are nevertheless error checking and a centralized configuration object. + +Directives are divided into namespaces, indicating the major portion of +functionality they cover (although there may be overlaps. Please consult +the documentation in ConfigDef for more information on these namespaces. diff --git a/library/HTMLPurifier/AttrDef/URI.php b/library/HTMLPurifier/AttrDef/URI.php index 6e2c7449..b457efdc 100644 --- a/library/HTMLPurifier/AttrDef/URI.php +++ b/library/HTMLPurifier/AttrDef/URI.php @@ -12,12 +12,6 @@ HTMLPurifier_ConfigDef::define( class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef { - var $required = false; - - function HTMLPurifier_AttrDef_URI($required = false) { - $this->required = $required; - } - function validate($uri, $config, &$context) { // We'll write stack-based parsers later, for now, use regexps to @@ -30,18 +24,23 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef // for HTTP and thus won't work for our generic URI parsing // according to the RFC... (but this cuts corners, i.e. non-validating) - $r_URI = '!^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?!'; - // 12 3 4 5 6 7 8 9 + $r_URI = '!^'. + '(([^:/?#<>]+):)?'. // 2. Scheme + '(//([^/?#<>]*))?'. // 4. Authority + '([^?#<>]*)'. // 5. Path + '(\?([^#<>]*))?'. // 7. Query + '(#([^<>]*))?'. // 8. Fragment + '$!'; $matches = array(); $result = preg_match($r_URI, $uri, $matches); - if (!$result) return ''; + if (!$result) return false; // invalid URI // seperate out parts $scheme = !empty($matches[1]) ? $matches[2] : null; $authority = !empty($matches[3]) ? $matches[4] : null; - $path = $matches[5]; // always present + $path = $matches[5]; // always present, can be empty $query = !empty($matches[6]) ? $matches[7] : null; $fragment = !empty($matches[8]) ? $matches[9] : null; @@ -53,7 +52,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef // retrieve the specific scheme object from the registry $scheme = ctype_lower($scheme) ? $scheme : strtolower($scheme); $scheme_obj =& $registry->getScheme($scheme, $config); - if (!$scheme_obj) return $this->required ? '' : false; // invalid scheme, clean it out + if (!$scheme_obj) return false; // invalid scheme, clean it out } else { $scheme_obj =& $registry->getScheme( $config->get('URI', 'DefaultScheme'), $config diff --git a/library/HTMLPurifier/Definition.php b/library/HTMLPurifier/Definition.php index 2670401e..e2e852e4 100644 --- a/library/HTMLPurifier/Definition.php +++ b/library/HTMLPurifier/Definition.php @@ -317,13 +317,12 @@ class HTMLPurifier_Definition $e_URI = new HTMLPurifier_AttrDef_URI(); $this->info['a']->attr['href'] = $this->info['img']->attr['longdesc'] = + $this->info['img']->attr['src'] = $this->info['del']->attr['cite'] = $this->info['ins']->attr['cite'] = $this->info['blockquote']->attr['cite'] = $this->info['q']->attr['cite'] = $e_URI; - $this->info['img']->attr['src'] = new HTMLPurifier_AttrDef_URI(true); - ////////////////////////////////////////////////////////////////////// // UNIMP : info_tag_transform : transformations of tags diff --git a/tests/HTMLPurifier/AttrDef/URITest.php b/tests/HTMLPurifier/AttrDef/URITest.php index d59b8e35..921aa910 100644 --- a/tests/HTMLPurifier/AttrDef/URITest.php +++ b/tests/HTMLPurifier/AttrDef/URITest.php @@ -153,6 +153,10 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness $uri[18] = '/a/b'; $components[18] = array(null, null, null, '/a/b', null); + // it's not allowed, so generic URI should get it + $uri[19] = '<'; + $expect_uri[19] = false; + foreach ($uri as $i => $value) { // setUpAssertDef