From 0db1cbb7acd25df735c3df3fa09065c43ec57550 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Fri, 11 Aug 2006 20:23:41 +0000 Subject: [PATCH] Revamp Configuration classes, breaking backwards configuration compatibility (not that there was much to broken to begin with). Fix bug involving PHP 4 object typecasting. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@203 48356398-32a2-884e-a903-53898d9a118a --- docs/config.txt | 7 ++ library/HTMLPurifier/AttrDef/ID.php | 2 +- library/HTMLPurifier/Config.php | 98 +++++-------------- library/HTMLPurifier/ConfigDef.php | 48 +++++++++ .../Strategy/ValidateAttributes.php | 9 +- tests/HTMLPurifier/ConfigDefTest.php | 63 ++++++++++++ tests/HTMLPurifier/ConfigTest.php | 44 +++++++++ .../Strategy/ValidateAttributesTest.php | 2 +- tests/index.php | 32 +++++- 9 files changed, 222 insertions(+), 83 deletions(-) create mode 100644 docs/config.txt create mode 100644 library/HTMLPurifier/ConfigDef.php create mode 100644 tests/HTMLPurifier/ConfigDefTest.php create mode 100644 tests/HTMLPurifier/ConfigTest.php diff --git a/docs/config.txt b/docs/config.txt new file mode 100644 index 00000000..a4cdd965 --- /dev/null +++ b/docs/config.txt @@ -0,0 +1,7 @@ + +Configuration + +Configuration is documented on a per-use case: if a class uses a certain +value from the configuration object, it has to define its name and what the +value is used for. This means decentralized configuration declaration that +is nevertheless error checking. diff --git a/library/HTMLPurifier/AttrDef/ID.php b/library/HTMLPurifier/AttrDef/ID.php index 4d777ee0..1b33932a 100644 --- a/library/HTMLPurifier/AttrDef/ID.php +++ b/library/HTMLPurifier/AttrDef/ID.php @@ -4,7 +4,7 @@ require_once 'HTMLPurifier/AttrDef.php'; require_once 'HTMLPurifier/IDAccumulator.php'; // NOTE QUIRKY BEHAVIOR: even though this is the id processor, it -// will ignore HTMLPurifier_Config::$attr_id_blacklist: it will only +// will ignore directive Attr:IDBlacklist, since it will only // go according to the ID accumulator. Since the accumulator is // automatically generated, it will have already absorbed the // blacklist. If you're hacking around, make sure you use load()! diff --git a/library/HTMLPurifier/Config.php b/library/HTMLPurifier/Config.php index 7667d533..6eb43227 100644 --- a/library/HTMLPurifier/Config.php +++ b/library/HTMLPurifier/Config.php @@ -4,86 +4,36 @@ class HTMLPurifier_Config { - // which ids do we not allow? - var $attr_id_blacklist = array(); + var $conf; - ////////////////////////////////////////////////////////////////////////// - // all below properties have not been implemented yet - - // prefix all ids with this - var $attr_id_prefix = ''; - - // if there's a prefix, we may want to transparently rewrite the - // URLs we parse too. However, we can only do it when it's a pure - // anchor link, so it's not foolproof - var $attr_id_rewrite_urls = false; - - // determines how the classes array should be construed: - // blacklist - allow allow except those in $classes_blacklist - // whitelist - only allow those in $classes_whitelist - // when one is chosen, the other has no effect - var $attr_class_mode = 'blacklist'; - var $attr_class_blacklist = array(); - var $attr_class_whitelist = array(); - - // designate whether or not to allow numerals in language code subtags - // RFC 1766, the current standard referenced by XML, does not permit - // numbers, but, - // RFC 3066, the superseding best practice standard since January 2001, - // permits them. - // we allow numbers by default, although you generally never see them - // at all. - var $attr_lang_alpha = false; - - // max amount of pixels allowed to be specified - var $attr_pixels_hmax = 600; // horizontal context - var $attr_pixels_vmax = 1200; // vertical context - - // allowed URI schemes - var $uri_schemes = array( - // based off of MediaWiki's default settings - // the ones that definitely must be implemented (they're the same though) - 'http' => true, // "Hypertext Transfer Protocol", nuf' said - 'https' => true, // HTTP over SSL (Secure Socket Layer) - // quite useful, but not necessary - 'mailto' => true,// Email - 'ftp' => true, // "File Transfer Protocol" - 'irc' => true, // "Internet Relay Chat", usually needs another app - // obscure - 'telnet' => true,// network protocol for non-secure remote terminal sessions - // for Usenet, these two are similar, but distinct - 'nntp' => true, // individual Netnews articles - 'news' => true // newsgroup or individual Netnews articles - // gopher and worldwind excluded - ); - - // will munge all URIs to a different URI, which should redirect - // the user to the applicable page. A urlencoded version of the URI - // will replace any instances of %s in the string. One possible - // string is 'http://www.google.com/url?q=%s'. Useful for preventing - // pagerank from being sent to other sites - var $uri_munge = false; - - // will add rel="nofollow" to all links, also helps prevent pagerank - // from going around - var $uri_add_relnofollow = false; - - // web root of the website, we'll try to auto-detect it. Something - // like 'www.example.com/'??? - var $uri_webroot = null; - - // transform all relative URIs into their absolute forms, requires - // $uri_webroot - var $uri_make_absolute = false; - - // disables external links, requires $uri_webroot - var $uri_disable_external = false; + function HTMLPurifier_Config(&$definition) { + $this->conf = $definition->info; // set up the defaults + } function createDefault() { - $config = new HTMLPurifier_Config(); + $definition =& HTMLPurifier_ConfigDef::instance(); + $config = new HTMLPurifier_Config($definition); return $config; } + function get($namespace, $key) { + if (!isset($this->conf[$namespace][$key])) { + trigger_error('Cannot retrieve value of undefined directive', + E_USER_ERROR); + return; + } + return $this->conf[$namespace][$key]; + } + + function set($namespace, $key, $value) { + if (!isset($this->conf[$namespace][$key])) { + trigger_error('Cannot set undefined directive to value', + E_USER_ERROR); + return; + } + $this->conf[$namespace][$key] = $value; + } + } ?> \ No newline at end of file diff --git a/library/HTMLPurifier/ConfigDef.php b/library/HTMLPurifier/ConfigDef.php new file mode 100644 index 00000000..de548ea0 --- /dev/null +++ b/library/HTMLPurifier/ConfigDef.php @@ -0,0 +1,48 @@ +defineNamespace('Core', 'Core features that are always available.'); + $this->defineNamespace('Attr', 'Features regarding attribute validation.'); + } + + function &instance($prototype = null) { + static $instance; + if ($prototype !== null) { + $instance = $prototype; + } elseif ($instance === null || $prototype === true) { + $instance = new HTMLPurifier_ConfigDef(); + $instance->initialize(); + } + return $instance; + } + + function define($namespace, $name, $default, $description) { + $def =& HTMLPurifier_ConfigDef::instance(); + if (!isset($def->info[$namespace])) { + trigger_error('Cannot define directive for undefined namespace', + E_USER_ERROR); + return; + } + if (isset($def->info[$namespace][$name])) { + trigger_error('Cannot redefine directive', E_USER_ERROR); + return; + } + $def->info[$namespace][$name] = $default; + } + + function defineNamespace($namespace, $description) { + $def =& HTMLPurifier_ConfigDef::instance(); + if (isset($def->info[$namespace])) { + trigger_error('Cannot redefine namespace', E_USER_ERROR); + return; + } + $def->info[$namespace] = array(); + } + +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/Strategy/ValidateAttributes.php b/library/HTMLPurifier/Strategy/ValidateAttributes.php index 988e3bd3..af5b2c10 100644 --- a/library/HTMLPurifier/Strategy/ValidateAttributes.php +++ b/library/HTMLPurifier/Strategy/ValidateAttributes.php @@ -3,6 +3,11 @@ require_once 'HTMLPurifier/Strategy.php'; require_once 'HTMLPurifier/Definition.php'; require_once 'HTMLPurifier/IDAccumulator.php'; +require_once 'HTMLPurifier/ConfigDef.php'; + +HTMLPurifier_ConfigDef::define( + 'Attr', 'IDBlacklist', array(), + 'Array of IDs not allowed in the document.'); /** * Validate all attributes in the tokens. @@ -26,7 +31,7 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy // eventually, we'll have a dedicated context object to hold // all these accumulators and caches. For now, just an IDAccumulator $accumulator = new HTMLPurifier_IDAccumulator(); - $accumulator->load($config->attr_id_blacklist); + $accumulator->load($config->get('Attr', 'IDBlacklist')); // create alias to global definition array, see also $defs // DEFINITION CALL @@ -69,7 +74,7 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy // call the definition if ( isset($defs[$attr_key]) ) { // there is a local definition defined - if (!$defs[$attr_key]) { + if ($defs[$attr_key] === false) { // We've explicitly been told not to allow this element. // This is usually when there's a global definition // that must be overridden. diff --git a/tests/HTMLPurifier/ConfigDefTest.php b/tests/HTMLPurifier/ConfigDefTest.php new file mode 100644 index 00000000..7c57fa8f --- /dev/null +++ b/tests/HTMLPurifier/ConfigDefTest.php @@ -0,0 +1,63 @@ +old_copy = HTMLPurifier_ConfigDef::instance(); + // put in our copy, and reassign to the REAL reference + $this->our_copy =& HTMLPurifier_ConfigDef::instance($our_copy); + } + + function tearDown() { + // testing is done, restore the old copy + HTMLPurifier_ConfigDef::instance($this->old_copy); + } + + function testNormal() { + + HTMLPurifier_ConfigDef::defineNamespace('Core', 'Configuration that '. + 'is always available.'); + $this->assertIdentical( array( + 'Core' => array() + ), $this->our_copy->info); + + // note that the description is silently dropped + HTMLPurifier_ConfigDef::define('Core', 'Name', 'default value', + 'This is a description of the directive.'); + $this->assertIdentical( array( + 'Core' => array( + 'Name' => 'default value' + ) + ), $this->our_copy->info); + + // test an invalid namespace + HTMLPurifier_ConfigDef::define('Extension', 'Name', false, 'This is '. + 'for an extension, but we have not defined its namespace!'); + $this->assertError('Cannot define directive for undefined namespace'); + $this->assertNoErrors(); + $this->swallowErrors(); + + // test overloading already defined value + HTMLPurifier_ConfigDef::define('Core', 'Name', 89, + 'What, you\'re not allowed to overload directives? Bummer!'); + $this->assertError('Cannot redefine directive'); + $this->assertNoErrors(); + $this->swallowErrors(); + + } + +} + +?> \ No newline at end of file diff --git a/tests/HTMLPurifier/ConfigTest.php b/tests/HTMLPurifier/ConfigTest.php new file mode 100644 index 00000000..45991a55 --- /dev/null +++ b/tests/HTMLPurifier/ConfigTest.php @@ -0,0 +1,44 @@ +info = array( + 'Core' => array('Key' => false), + 'Attr' => array('Key' => 42), + 'Extension' => array('Pert' => 'moo') + ); + + $config = new HTMLPurifier_Config($def); + + // test default value retrieval + $this->assertIdentical($config->get('Core', 'Key'), false); + $this->assertIdentical($config->get('Attr', 'Key'), 42); + $this->assertIdentical($config->get('Extension', 'Pert'), 'moo'); + + // set some values + $config->set('Core', 'Key', 'foobar'); + $this->assertIdentical($config->get('Core', 'Key'), 'foobar'); + + // try to retrieve undefined value + $config->get('Core', 'NotDefined'); + $this->assertError('Cannot retrieve value of undefined directive'); + $this->assertNoErrors(); + $this->swallowErrors(); + + // try to set undefined value + $config->set('Foobar', 'Key', 'foobar'); + $this->assertError('Cannot set undefined directive to value'); + $this->assertNoErrors(); + $this->swallowErrors(); + + } + +} + +?> \ No newline at end of file diff --git a/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php b/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php index 51685754..69873e30 100644 --- a/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php +++ b/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php @@ -46,7 +46,7 @@ class HTMLPurifier_Strategy_ValidateAttributesTest extends $inputs[7] = '
Invalid
'; $expect[7] = '
Invalid
'; $config[7] = HTMLPurifier_Config::createDefault(); - $config[7]->attr_id_blacklist = array('invalid'); + $config[7]->set('Attr', 'IDBlacklist', array('invalid')); // test classes $inputs[8] = '
Valid
'; diff --git a/tests/index.php b/tests/index.php index 72826aac..7c28cdb4 100644 --- a/tests/index.php +++ b/tests/index.php @@ -34,11 +34,16 @@ function generate_mock_once($name) { Mock::generate($name, $mock_name); } +// this has to be defined before we do any includes of library files +require_once 'HTMLPurifier/ConfigDef.php'; + // define callable test files $test_files = array(); +$test_files[] = 'ConfigTest.php'; +$test_files[] = 'ConfigDefTest.php'; $test_files[] = 'LexerTest.php'; $test_files[] = 'Lexer/DirectLexTest.php'; -//$test_files[] = 'TokenTest.php'; +$test_files[] = 'TokenTest.php'; $test_files[] = 'ChildDefTest.php'; $test_files[] = 'GeneratorTest.php'; $test_files[] = 'EntityLookupTest.php'; @@ -57,28 +62,45 @@ $test_files[] = 'AttrDef/LangTest.php'; $test_files[] = 'AttrDef/PixelsTest.php'; $test_files[] = 'AttrDef/LengthTest.php'; $test_files[] = 'AttrDef/NumberSpanTest.php'; +//$test_files[] = 'AttrDef/URITest.php'; $test_files[] = 'IDAccumulatorTest.php'; $test_files[] = 'TagTransformTest.php'; $test_files[] = 'AttrTransform/LangTest.php'; $test_files[] = 'AttrTransform/TextAlignTest.php'; - $test_file_lookup = array_flip($test_files); +function htmlpurifier_path2class($path) { + $temp = $path; + $temp = str_replace('./', '', $temp); // remove leading './' + $temp = str_replace('.\\', '', $temp); // remove leading '.\' + $temp = str_replace('\\', '_', $temp); // normalize \ to _ + $temp = str_replace('/', '_', $temp); // normalize / to _ + while(strpos($temp, '__') !== false) $temp = str_replace('__', '_', $temp); + $temp = str_replace('.php', '', $temp); + return $temp; +} + +// we can't use addTestFile because SimpleTest chokes on E_STRICT warnings + if (isset($_GET['file']) && isset($test_file_lookup[$_GET['file']])) { // execute only one test $test_file = $_GET['file']; $test = new GroupTest('HTMLPurifier - ' . $test_file); - $test->addTestFile('HTMLPurifier/' . $test_file); + $path = 'HTMLPurifier/' . $test_file; + require_once $path; + $test->addTestClass(htmlpurifier_path2class($path)); } else { $test = new GroupTest('HTMLPurifier'); foreach ($test_files as $test_file) { - $test->addTestFile('HTMLPurifier/' . $test_file); + $path = 'HTMLPurifier/' . $test_file; + require_once $path; + $test->addTestClass(htmlpurifier_path2class($path)); } } @@ -88,4 +110,4 @@ else $reporter = new HTMLReporter(); $test->run($reporter); -?> \ No newline at end of file +?>