diff --git a/NEWS b/NEWS index 5131f617..160bc4a6 100644 --- a/NEWS +++ b/NEWS @@ -16,11 +16,14 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier ! Added GET method to demo for easier validation, added 50kb max input size ! New directive %HTML.BlockWrapper, for block-ifying inline elements ! New directive %HTML.Parent, allows you to only allow inline content +! New directives %HTML.AllowedElements and %HTML.AllowedAttributes to let + users narrow the set of allowed tags - Added missing type to ChildDef_Chameleon - Remove Tidy option from demo if there is not Tidy available . ChildDef_Required guards against empty tags . Lookup table HTMLDefinition->info_flow_elements added . Added peace-of-mind variable initialization to Strategy_FixNesting +. Added HTMLPurifier->info_parent_def, parent child processing made special 1.2.1, unknown release date (bugfix/minor feature release, may be dropped if 1.2.0 is stable) diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php index ae585a86..d3b88e2e 100644 --- a/library/HTMLPurifier/HTMLDefinition.php +++ b/library/HTMLPurifier/HTMLDefinition.php @@ -63,6 +63,30 @@ HTMLPurifier_ConfigSchema::define( 'parent element, meaning that only inline tags would be allowed.' ); +HTMLPurifier_ConfigSchema::define( + 'HTML', 'AllowedElements', null, 'lookup/null', + 'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '. + 'can overload it with your own list of tags to allow. Note that this '. + 'method is subtractive: it does its job by taking away from HTML Purifier '. + 'usual feature set, so you cannot add a tag that HTML Purifier never '. + 'supported in the first place (like embed). If you change this, you '. + 'probably also want to change %HTML.AllowedAttributes. '. + 'Warning: If another directive conflicts with the '. + 'elements here, that directive will win and override.' +); + +HTMLPurifier_ConfigSchema::define( + 'HTML', 'AllowedAttributes', null, 'lookup/null', + 'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '. + 'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '. + '(style, id, class, dir, lang, xml:lang).'. + 'Warning: If another directive conflicts with the '. + 'elements here, that directive will win and override. For '. + 'example, %HTML.EnableAttrID will take precedence over *.id in this '. + 'directive. You must set that directive to true before you can use '. + 'IDs at all.' +); + /** * Defines the purified HTML type with large amounts of objects. * @@ -101,6 +125,13 @@ class HTMLPurifier_HTMLDefinition */ var $info_parent = 'div'; + /** + * Definition for parent element, allows parent element to be a + * tag that's not allowed inside the HTML fragment. + * @public + */ + var $info_parent_def; + /** * String name of element used to wrap inline elements in block context * @note This is rarely used except for BLOCKQUOTEs in strict mode @@ -498,7 +529,32 @@ class HTMLPurifier_HTMLDefinition trigger_error('Cannot use unrecognized element as parent.', E_USER_ERROR); } + $this->info_parent_def = $this->info[$this->info_parent]; + ////////////////////////////////////////////////////////////////////// + // %HTML.Allowed(Elements|Attributes) : cut non-allowed elements + $allowed_elements = $config->get('HTML', 'AllowedElements'); + if (is_array($allowed_elements)) { + // $allowed_elements[$this->info_parent] = true; // allow parent element + foreach ($this->info as $name => $d) { + if(!isset($allowed_elements[$name])) unset($this->info[$name]); + } + } + $allowed_attributes = $config->get('HTML', 'AllowedAttributes'); + if (is_array($allowed_attributes)) { + foreach ($this->info_global_attr as $attr => $info) { + if (!isset($allowed_attributes["*.$attr"])) { + unset($this->info_global_attr[$attr]); + } + } + foreach ($this->info as $tag => $info) { + foreach ($info->attr as $attr => $attr_info) { + if (!isset($allowed_attributes["$tag.$attr"])) { + unset($this->info[$tag]->attr[$attr]); + } + } + } + } } function setAttrForTableElements($attr, $def) { diff --git a/library/HTMLPurifier/Strategy/FixNesting.php b/library/HTMLPurifier/Strategy/FixNesting.php index 3357937e..9ae614e8 100644 --- a/library/HTMLPurifier/Strategy/FixNesting.php +++ b/library/HTMLPurifier/Strategy/FixNesting.php @@ -104,7 +104,11 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy if ($count = count($stack)) { $parent_index = $stack[$count-1]; $parent_name = $tokens[$parent_index]->name; - $parent_def = $definition->info[$parent_name]; + if ($parent_index == 0) { + $parent_def = $definition->info_parent_def; + } else { + $parent_def = $definition->info[$parent_name]; + } } else { // unknown info, it won't be used anyway $parent_index = $parent_name = $parent_def = null; @@ -144,7 +148,14 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy $excludes = array(); // not used, but good to initialize anyway } else { // DEFINITION CALL - $def = $definition->info[$tokens[$i]->name]; + if ($i === 0) { + // special processing for the first node + $def = $definition->info_parent_def; + } else { + $def = $definition->info[$tokens[$i]->name]; + + } + $child_def = $def->child; // have DTD child def validate children @@ -229,13 +240,20 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy // Test if the token indeed is a start tag, if not, move forward // and test again. + $size = count($tokens); while ($i < $size and $tokens[$i]->type != 'start') { if ($tokens[$i]->type == 'end') { // pop a token index off the stack if we ended a node array_pop($stack); // pop an exclusion lookup off exclusion stack if // we ended node and that node had exclusions - if ($definition->info[$tokens[$i]->name]->excludes) { + if ($i == 0 || $i == $size - 1) { + // use specialized var if it's the super-parent + $s_excludes = $definition->info_parent_def->excludes; + } else { + $s_excludes = $definition->info[$tokens[$i]->name]->excludes; + } + if ($s_excludes) { array_pop($exclude_stack); } } diff --git a/tests/HTMLPurifier/Test.php b/tests/HTMLPurifier/Test.php index aaa8341c..805e25ff 100644 --- a/tests/HTMLPurifier/Test.php +++ b/tests/HTMLPurifier/Test.php @@ -36,6 +36,23 @@ class HTMLPurifier_Test extends UnitTestCase } + function testDifferentAllowedElements() { + $config = HTMLPurifier_Config::createDefault(); + $config->set('HTML', 'AllowedElements', array('b', 'i', 'p', 'a')); + $config->set('HTML', 'AllowedAttributes', array('a.href', '*.id')); + $this->purifier = new HTMLPurifier($config); + + $this->assertPurification( + '

Par.

Paragraph

TextBold' + ); + + $this->assertPurification( + 'Not allowedFoobar', + 'Not allowedFoobar' // no ID!!! + ); + + } + } ?> \ No newline at end of file