From 12f73605a3ab0379e711960dd17b5cccbf35cb47 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Tue, 29 May 2007 21:26:43 +0000 Subject: [PATCH] [1.7.0] Implement HTML.Allowed, a TinyMCE style whitelist format. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1119 48356398-32a2-884e-a903-53898d9a118a --- NEWS | 2 + TODO | 11 ++-- configdoc/generate.php | 1 + library/HTMLPurifier/HTMLDefinition.php | 67 ++++++++++++++++++++++- tests/HTMLPurifier/HTMLDefinitionTest.php | 57 +++++++++++++++++++ tests/test_files.php | 1 + 6 files changed, 132 insertions(+), 7 deletions(-) create mode 100644 tests/HTMLPurifier/HTMLDefinitionTest.php diff --git a/NEWS b/NEWS index 06f6721b..f280ddbc 100644 --- a/NEWS +++ b/NEWS @@ -30,6 +30,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier format: key1:value1,key2:value2 ! ConfigDoc now factored into OOP design ! All deprecated elements now natively supported +! Implement TinyMCE styled whitelist specification format in + %HTML.Allowed - Deprecated and removed EnableRedundantUTF8Cleaning. It didn't even work! . Unit test for ElementDef created, ElementDef behavior modified to be more flexible diff --git a/TODO b/TODO index 5b6f6f42..1189c929 100644 --- a/TODO +++ b/TODO @@ -10,10 +10,7 @@ TODO List 1.7 release [Advanced API] # Complete advanced API, and fully document it - Add framework for unsafe attributes - - Reorganize configuration directives - Set up anonymous module management by HTMLDefinition (Advanced API) - - Get all AttrTypes into string form - - Parse TinyMCE-style whitelist into our %HTML.Allow* whitelists 1.8 release [Refactor, refactor!] # URI validation routines tighter (see docs/dev-code-quality.html) (COMPLEX) @@ -48,6 +45,9 @@ TODO List - Append something to duplicate IDs so they're still usable (impl. note: the dupe detector would also need to detect the suffix as well) +1.11 release [It's All About Trust] (floating) + # Implement untrusted, dangerous elements/attributes + 2.0 release [Beyond HTML] # Legit token based CSS parsing (will require revamping almost every AttrDef class) @@ -81,8 +81,6 @@ Ongoing - eFiction - more! (look for ones that use WYSIWYGs) - Complete basic smoketests - - Reorganize Unit Tests - - Refactor loop tests (esp. AttrDef_URI) Unknown release (on a scratch-an-itch basis) ? Semi-lossy dumb alternate character encoding transfor @@ -91,6 +89,9 @@ Unknown release (on a scratch-an-itch basis) - Explain how to use HTML Purifier in non-PHP languages - Abstract ChildDef_BlockQuote to work with all elements that only allow blocks in them, required or optional + - Reorganize Unit Tests + - Refactor loop tests (esp. AttrDef_URI) + - Reorganize configuration directives (Create more namespaces! Get messy!) Requested ? Native content compression, whitespace stripping (don't rely on Tidy, make diff --git a/configdoc/generate.php b/configdoc/generate.php index 4bf68d6a..80eacd76 100644 --- a/configdoc/generate.php +++ b/configdoc/generate.php @@ -12,6 +12,7 @@ TODO: - allow generation of packaged docs that can be easily moved - multipage documentation - determine how to multilingualize +- add blurbs to ToC */ if (version_compare('5', PHP_VERSION, '>')) exit('Requires PHP 5 or higher.'); diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php index 4d27771e..ec6a9c0d 100644 --- a/library/HTMLPurifier/HTMLDefinition.php +++ b/library/HTMLPurifier/HTMLDefinition.php @@ -65,6 +65,25 @@ HTMLPurifier_ConfigSchema::define(

'); +HTMLPurifier_ConfigSchema::define( + 'HTML', 'Allowed', null, 'string/null', ' +

+ This is a convenience directive that rolls the functionality of + %HTML.AllowedElements and %HTML.AllowedAttributes into one directive. + Specify elements and attributes that are allowed using: + element1[attr1|attr2],element2.... +

+

+ Warning: + All of the constraints on the component directives are still enforced. + The syntax is a subset of TinyMCE\'s valid_elements + whitelist: directly copy-pasting it here will probably result in + broken whitelists. If %HTML.AllowedElements or %HTML.AllowedAttributes + are set, this directive has no effect. + This directive has been available since 1.7.0. +

+'); + /** * Definition of the purified HTML that describes allowed children, * attributes, and many other things. @@ -233,8 +252,18 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition $support = "(for information on implementing this, see the ". "support forums) "; - // setup allowed elements, SubtractiveWhitelist module(?) + // setup allowed elements + $allowed_elements = $config->get('HTML', 'AllowedElements'); + $allowed_attributes = $config->get('HTML', 'AllowedAttributes'); + + if (!is_array($allowed_elements) && !is_array($allowed_attributes)) { + $allowed = $config->get('HTML', 'Allowed'); + if (is_string($allowed)) { + list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed); + } + } + if (is_array($allowed_elements)) { foreach ($this->info as $name => $d) { if(!isset($allowed_elements[$name])) unset($this->info[$name]); @@ -247,7 +276,6 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition } } - $allowed_attributes = $config->get('HTML', 'AllowedAttributes'); $allowed_attributes_mutable = $allowed_attributes; // by copy! if (is_array($allowed_attributes)) { foreach ($this->info_global_attr as $attr_key => $info) { @@ -289,6 +317,41 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition } + /** + * Parses a TinyMCE-flavored Allowed Elements and Attributes list into + * separate lists for processing. Format is element[attr1|attr2],element2... + * @warning Although it's largely drawn from TinyMCE's implementation, + * it is different, and you'll probably have to modify your lists + * @param $list String list to parse + * @param array($allowed_elements, $allowed_attributes) + */ + function parseTinyMCEAllowedList($list) { + + $elements = array(); + $attributes = array(); + + $chunks = explode(',', $list); + foreach ($chunks as $chunk) { + // remove TinyMCE element control characters + if (!strpos($chunk, '[')) { + $element = $chunk; + $attr = false; + } else { + list($element, $attr) = explode('[', $chunk); + } + if ($element !== '*') $elements[$element] = true; + if (!$attr) continue; + $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ] + $attr = explode('|', $attr); + foreach ($attr as $key) { + $attributes["$element.$key"] = true; + } + } + + return array($elements, $attributes); + + } + } diff --git a/tests/HTMLPurifier/HTMLDefinitionTest.php b/tests/HTMLPurifier/HTMLDefinitionTest.php new file mode 100644 index 00000000..b9c77e07 --- /dev/null +++ b/tests/HTMLPurifier/HTMLDefinitionTest.php @@ -0,0 +1,57 @@ +assertEqual( + $def->parseTinyMCEAllowedList('a,b,c'), + array(array('a' => true, 'b' => true, 'c' => true), array()) + ); + + $this->assertEqual( + $def->parseTinyMCEAllowedList('a[x|y|z]'), + array(array('a' => true), array('a.x' => true, 'a.y' => true, 'a.z' => true)) + ); + + $this->assertEqual( + $def->parseTinyMCEAllowedList('*[id]'), + array(array(), array('*.id' => true)) + ); + + $this->assertEqual( + $def->parseTinyMCEAllowedList('a[*]'), + array(array('a' => true), array('a.*' => true)) + ); + + $this->assertEqual( + $def->parseTinyMCEAllowedList('span[style],strong,a[href|title]'), + array(array('span' => true, 'strong' => true, 'a' => true), + array('span.style' => true, 'a.href' => true, 'a.title' => true)) + ); + + } + + function test_Allowed() { + + $config1 = HTMLPurifier_Config::create(array( + 'HTML.AllowedElements' => array('b', 'i', 'p', 'a'), + 'HTML.AllowedAttributes' => array('a.href', '*.id') + )); + + $config2 = HTMLPurifier_Config::create(array( + 'HTML.Allowed' => 'b,i,p,a[href],*[id]' + )); + + $this->assertEqual($config1->getHTMLDefinition(), $config2->getHTMLDefinition()); + + } + +} + +?> \ No newline at end of file diff --git a/tests/test_files.php b/tests/test_files.php index a6549d9f..58a1d127 100644 --- a/tests/test_files.php +++ b/tests/test_files.php @@ -74,6 +74,7 @@ $test_files[] = 'HTMLPurifier/EncoderTest.php'; $test_files[] = 'HTMLPurifier/EntityLookupTest.php'; $test_files[] = 'HTMLPurifier/EntityParserTest.php'; $test_files[] = 'HTMLPurifier/GeneratorTest.php'; +$test_files[] = 'HTMLPurifier/HTMLDefinitionTest.php'; $test_files[] = 'HTMLPurifier/HTMLModuleManagerTest.php'; $test_files[] = 'HTMLPurifier/HTMLModuleTest.php'; $test_files[] = 'HTMLPurifier/HTMLModule/ScriptingTest.php';