From 0ea04db5594f474e0d95b3b299973fa928142b58 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Mon, 21 May 2007 01:36:15 +0000 Subject: [PATCH] [1.7.0] Finish implementing legacy elements, begin implementing legacy attributes - Migrated most unit tests over to XHTML 1.0 Strict to preserve transformation behavior - Created %Core.ColorKeywords to be shared between CSS_Color and HTML_Color - Added AttrDef_HTML_Color as AttrType Color - HTMLPurifier_Config::create(HTMLPurifier_Config $config) now clones the object - Attribute minimization for HTML implemented in Generator - Move div@align fix from proprietary to regular set - Color keywords now map to full six digit hexadecimal codes - Harness will now tack on per-use-case configuration git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1084 48356398-32a2-884e-a903-53898d9a118a --- TODO | 3 +- library/HTMLPurifier/AttrDef/CSS/Color.php | 46 +++++++++++-------- library/HTMLPurifier/AttrDef/HTML/Bool.php | 23 ++++++++++ library/HTMLPurifier/AttrDef/HTML/Color.php | 35 ++++++++++++++ library/HTMLPurifier/AttrTypes.php | 1 + library/HTMLPurifier/Config.php | 4 +- library/HTMLPurifier/Generator.php | 19 ++++++-- library/HTMLPurifier/HTMLModule/Legacy.php | 43 ++++++++++++++++- .../HTMLModule/Tidy/Proprietary.php | 11 +---- .../HTMLModule/Tidy/XHTMLAndHTML4.php | 3 +- tests/HTMLPurifier/AttrDef/CSS/BorderTest.php | 4 +- tests/HTMLPurifier/AttrDef/CSS/ColorTest.php | 2 +- tests/HTMLPurifier/AttrDef/HTML/ColorTest.php | 23 ++++++++++ tests/HTMLPurifier/GeneratorTest.php | 10 +++- tests/HTMLPurifier/Harness.php | 3 +- .../HTMLPurifier/Strategy/FixNestingTest.php | 2 + .../Strategy/MakeWellFormedTest.php | 2 + .../Strategy/RemoveForeignElementsTest.php | 1 + .../Strategy/ValidateAttributesTest.php | 4 +- tests/test_files.php | 1 + 20 files changed, 197 insertions(+), 43 deletions(-) create mode 100644 library/HTMLPurifier/AttrDef/HTML/Bool.php create mode 100644 library/HTMLPurifier/AttrDef/HTML/Color.php create mode 100644 tests/HTMLPurifier/AttrDef/HTML/ColorTest.php diff --git a/TODO b/TODO index 433e0eec..f65c0e23 100644 --- a/TODO +++ b/TODO @@ -13,9 +13,10 @@ TODO List - Reorganize configuration directives - Set up doctype object inside configuration object - Set up anonymous module management by HTMLDefinition - - Revamp/create unit tests for modules, esp. the Tidy ones # Implement HTMLDefinition caching using serialize # Implement all deprecated tags and attributes + # Create parsing/standards compliance smoketest + # Reorganize Unit Tests - Parse TinyMCE-style whitelist into our %HTML.Allow* whitelists (possibly do this earlier) ? HTML interface for tweaking configuration to see changes diff --git a/library/HTMLPurifier/AttrDef/CSS/Color.php b/library/HTMLPurifier/AttrDef/CSS/Color.php index 4f8ed1fd..53a4fa89 100644 --- a/library/HTMLPurifier/AttrDef/CSS/Color.php +++ b/library/HTMLPurifier/AttrDef/CSS/Color.php @@ -2,6 +2,31 @@ require_once 'HTMLPurifier/AttrDef.php'; +HTMLPurifier_ConfigSchema::define( + 'Core', 'ColorKeywords', array( + 'maroon' => '#800000', + 'red' => '#FF0000', + 'orange' => '#FFA500', + 'yellow' => '#FFFF00', + 'olive' => '#808000', + 'purple' => '#800080', + 'fuchsia' => '#FF00FF', + 'white' => '#FFFFFF', + 'lime' => '#00FF00', + 'green' => '#008000', + 'navy' => '#000080', + 'blue' => '#0000FF', + 'aqua' => '#00FFFF', + 'teal' => '#008080', + 'black' => '#000000', + 'silver' => '#C0C0C0', + 'gray' => '#808080' + ), 'hash', ' +Lookup array of color names to six digit hexadecimal number corresponding +to color, with preceding hash mark. Used when parsing colors. +This directive has been available since 1.7.0. +'); + /** * Validates Color as defined by CSS. */ @@ -10,25 +35,8 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef function validate($color, $config, &$context) { - static $colors = array( - 'maroon' => '#800000', - 'red' => '#F00', - 'orange' => '#FFA500', - 'yellow' => '#FF0', - 'olive' => '#808000', - 'purple' => '#800080', - 'fuchsia' => '#F0F', - 'white' => '#FFF', - 'lime' => '#0F0', - 'green' => '#008000', - 'navy' => '#000080', - 'blue' => '#00F', - 'aqua' => '#0FF', - 'teal' => '#008080', - 'black' => '#000', - 'silver' => '#C0C0C0', - 'gray' => '#808080' - ); + static $colors = null; + if ($colors === null) $colors = $config->get('Core', 'ColorKeywords'); $color = trim($color); if (!$color) return false; diff --git a/library/HTMLPurifier/AttrDef/HTML/Bool.php b/library/HTMLPurifier/AttrDef/HTML/Bool.php new file mode 100644 index 00000000..dfbfb838 --- /dev/null +++ b/library/HTMLPurifier/AttrDef/HTML/Bool.php @@ -0,0 +1,23 @@ +name = $name;} + + function validate($string, $config, &$context) { + if (empty($string)) return false; + return $name; + } + +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/AttrDef/HTML/Color.php b/library/HTMLPurifier/AttrDef/HTML/Color.php new file mode 100644 index 00000000..8cfcfda5 --- /dev/null +++ b/library/HTMLPurifier/AttrDef/HTML/Color.php @@ -0,0 +1,35 @@ +get('Core', 'ColorKeywords'); + + $string = trim($string); + + if (empty($string)) return false; + if (isset($colors[$string])) return $colors[$string]; + if ($string[0] === '#') $hex = substr($string, 1); + else $hex = $string; + + $length = strlen($hex); + if ($length !== 3 && $length !== 6) return false; + if (!ctype_xdigit($hex)) return false; + if ($length === 3) $hex = $hex[0].$hex[0].$hex[1].$hex[1].$hex[2].$hex[2]; + + return "#$hex"; + + } + +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/AttrTypes.php b/library/HTMLPurifier/AttrTypes.php index 9bf64023..df0c179c 100644 --- a/library/HTMLPurifier/AttrTypes.php +++ b/library/HTMLPurifier/AttrTypes.php @@ -34,6 +34,7 @@ class HTMLPurifier_AttrTypes $this->info['Text'] = new HTMLPurifier_AttrDef_Text(); $this->info['URI'] = new HTMLPurifier_AttrDef_URI(); $this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang(); + $this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color(); // number is really a positive integer (one or more digits) $this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true); diff --git a/library/HTMLPurifier/Config.php b/library/HTMLPurifier/Config.php index 2bc2f460..e082463f 100644 --- a/library/HTMLPurifier/Config.php +++ b/library/HTMLPurifier/Config.php @@ -70,7 +70,9 @@ class HTMLPurifier_Config * @return Configured HTMLPurifier_Config object */ function create($config) { - if (is_a($config, 'HTMLPurifier_Config')) return $config; + if (is_a($config, 'HTMLPurifier_Config')) { + $config = $config->conf; // create a clone + } $ret = HTMLPurifier_Config::createDefault(); if (is_string($config)) $ret->loadIni($config); elseif (is_array($config)) $ret->loadArray($config); diff --git a/library/HTMLPurifier/Generator.php b/library/HTMLPurifier/Generator.php index 14b147c6..028e81df 100644 --- a/library/HTMLPurifier/Generator.php +++ b/library/HTMLPurifier/Generator.php @@ -74,6 +74,12 @@ class HTMLPurifier_Generator */ var $_scriptFix = false; + /** + * Cache of HTMLDefinition + * @private + */ + var $_def; + /** * Generates HTML from an array of tokens. * @param $tokens Array of HTMLPurifier_Token @@ -89,6 +95,8 @@ class HTMLPurifier_Generator $doctype = $config->getDoctype(); $this->_xhtml = $doctype->xml; + $this->_def = $config->getHTMLDefinition(); + if (!$tokens) return ''; for ($i = 0, $size = count($tokens); $i < $size; $i++) { if ($this->_scriptFix && $tokens[$i]->name === 'script') { @@ -136,14 +144,14 @@ class HTMLPurifier_Generator function generateFromToken($token) { if (!isset($token->type)) return ''; if ($token->type == 'start') { - $attr = $this->generateAttributes($token->attr); + $attr = $this->generateAttributes($token->attr, $token->name); return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>'; } elseif ($token->type == 'end') { return 'name . '>'; } elseif ($token->type == 'empty') { - $attr = $this->generateAttributes($token->attr); + $attr = $this->generateAttributes($token->attr, $token->name); return '<' . $token->name . ($attr ? ' ' : '') . $attr . ( $this->_xhtml ? ' /': '' ) . '>'; @@ -174,13 +182,16 @@ class HTMLPurifier_Generator * @param $assoc_array_of_attributes Attribute array * @return Generate HTML fragment for insertion. */ - function generateAttributes($assoc_array_of_attributes) { + function generateAttributes($assoc_array_of_attributes, $element) { $html = ''; foreach ($assoc_array_of_attributes as $key => $value) { if (!$this->_xhtml) { // remove namespaced attributes if (strpos($key, ':') !== false) continue; - // also needed: check for attribute minimization + if (!empty($this->_def->info[$element]->attr[$key]->minimized)) { + $html .= $key . ' '; + continue; + } } $html .= $key.'="'.$this->escape($value).'" '; } diff --git a/library/HTMLPurifier/HTMLModule/Legacy.php b/library/HTMLPurifier/HTMLModule/Legacy.php index 8563023d..d0f87357 100644 --- a/library/HTMLPurifier/HTMLModule/Legacy.php +++ b/library/HTMLPurifier/HTMLModule/Legacy.php @@ -1,5 +1,7 @@ addElement('u', true, 'Inline', 'Inline', 'Common'); + + $this->addElement('basefont', true, 'Inline', 'Empty', false, array( + 'color' => 'Color', + 'face' => 'Text', // extremely broad, we should + 'size' => 'Text', // tighten it + 'id' => 'ID' + )); + $this->addElement('center', true, 'Block', 'Flow', 'Common'); + $this->addElement('dir', true, 'Block', 'Required: li', 'Common', array( + 'compact' => new HTMLPurifier_AttrDef_HTML_Bool('compact') + )); + $this->addElement('font', true, 'Inline', 'Inline', array('Core', 'I18N'), array( + 'color' => 'Color', + 'face' => 'Text', // extremely broad, we should + 'size' => 'Text', // tighten it + )); + $this->addElement('menu', true, 'Block', 'Required: li', 'Common', array( + 'compact' => new HTMLPurifier_AttrDef_HTML_Bool('compact') + )); $this->addElement('s', true, 'Inline', 'Inline', 'Common'); $this->addElement('strike', true, 'Inline', 'Inline', 'Common'); + $this->addElement('u', true, 'Inline', 'Inline', 'Common'); // setup modifications to old elements @@ -36,6 +57,8 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule $ol =& $this->addBlankElement('ol'); $ol->attr['start'] = new HTMLPurifier_AttrDef_Integer(); + $align = new HTMLPurifier_AttrDef_Enum(array('left', 'right', 'center', 'justify')); + $address =& $this->addBlankElement('address'); $address->content_model = 'Inline | #PCDATA | p'; $address->content_model_type = 'optional'; @@ -46,6 +69,24 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule $blockquote->content_model_type = 'optional'; $blockquote->child = false; + $br =& $this->addBlankElement('br'); + $br->attr['clear'] = new HTMLPurifier_AttrDef_Enum(array('left', 'all', 'right', 'none')); + + $caption =& $this->addBlankElement('caption'); + $caption->attr['align'] = new HTMLPurifier_AttrDef_Enum(array('top', 'bottom', 'left', 'right')); + + $div =& $this->addBlankElement('div'); + $div->attr['align'] = $align; + + // dl.compact omitted + + for ($i = 1; $i <= 6; $i++) { + $h =& $this->addBlankElement("h$i"); + $h->attr['align'] = $align; + } + + // to be continued... + } } diff --git a/library/HTMLPurifier/HTMLModule/Tidy/Proprietary.php b/library/HTMLPurifier/HTMLModule/Tidy/Proprietary.php index 2412f0fb..624b066d 100644 --- a/library/HTMLPurifier/HTMLModule/Tidy/Proprietary.php +++ b/library/HTMLPurifier/HTMLModule/Tidy/Proprietary.php @@ -10,16 +10,7 @@ class HTMLPurifier_HTMLModule_Tidy_Proprietary extends var $defaultLevel = 'light'; function makeFixes() { - $r = array(); - - // {{{ // duplicated from XHTMLAndHTML4: not sure how to factor out - $align_lookup = array(); - $align_values = array('left', 'right', 'center', 'justify'); - foreach ($align_values as $v) $align_lookup[$v] = "text-align:$v;"; - // }}} - $r['div@align'] = new HTMLPurifier_AttrTransform_EnumToCSS('align', $align_lookup); - - return $r; + return array(); } } diff --git a/library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php b/library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php index a486560e..4d70cafc 100644 --- a/library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php +++ b/library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php @@ -88,6 +88,7 @@ class HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 extends $r['h5@align'] = $r['h6@align'] = $r['p@align'] = + $r['div@align'] = new HTMLPurifier_AttrTransform_EnumToCSS('align', $align_lookup); // @bgcolor for table, tr, td, th --------------------------------- @@ -179,7 +180,7 @@ class HTMLPurifier_HTMLModule_Tidy_Transitional extends HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 { var $name = 'Tidy_Transitional'; - var $defaultLevel = 'light'; // switch this to heavy once we implement legacy fully + var $defaultLevel = 'heavy'; } class HTMLPurifier_HTMLModule_Tidy_Strict extends diff --git a/tests/HTMLPurifier/AttrDef/CSS/BorderTest.php b/tests/HTMLPurifier/AttrDef/CSS/BorderTest.php index 521588db..6e130e46 100644 --- a/tests/HTMLPurifier/AttrDef/CSS/BorderTest.php +++ b/tests/HTMLPurifier/AttrDef/CSS/BorderTest.php @@ -10,9 +10,9 @@ class HTMLPurifier_AttrDef_CSS_BorderTest extends HTMLPurifier_AttrDefHarness $config = HTMLPurifier_Config::createDefault(); $this->def = new HTMLPurifier_AttrDef_CSS_Border($config); - $this->assertDef('thick solid red', 'thick solid #F00'); + $this->assertDef('thick solid red', 'thick solid #FF0000'); $this->assertDef('thick solid'); - $this->assertDef('solid red', 'solid #F00'); + $this->assertDef('solid red', 'solid #FF0000'); $this->assertDef('1px solid #000'); } diff --git a/tests/HTMLPurifier/AttrDef/CSS/ColorTest.php b/tests/HTMLPurifier/AttrDef/CSS/ColorTest.php index 1c29ae68..89afd121 100644 --- a/tests/HTMLPurifier/AttrDef/CSS/ColorTest.php +++ b/tests/HTMLPurifier/AttrDef/CSS/ColorTest.php @@ -25,7 +25,7 @@ class HTMLPurifier_AttrDef_CSS_ColorTest extends HTMLPurifier_AttrDefHarness $this->assertDef('rgb(256,-23,34)', 'rgb(255,0,34)'); // color keywords, of course - $this->assertDef('red', '#F00'); + $this->assertDef('red', '#FF0000'); // maybe hex transformations would be another nice feature // at the very least transform rgb percent to rgb integer diff --git a/tests/HTMLPurifier/AttrDef/HTML/ColorTest.php b/tests/HTMLPurifier/AttrDef/HTML/ColorTest.php new file mode 100644 index 00000000..39bd80db --- /dev/null +++ b/tests/HTMLPurifier/AttrDef/HTML/ColorTest.php @@ -0,0 +1,23 @@ +def = new HTMLPurifier_AttrDef_HTML_Color(); + $this->assertDef('', false); + $this->assertDef('foo', false); + $this->assertDef('43', false); + $this->assertDef('red', '#FF0000'); + $this->assertDef('#FF0000'); + $this->assertDef('#453443'); + $this->assertDef('453443', '#453443'); + $this->assertDef('#345', '#334455'); + $this->assertDef('120', '#112200'); + } +} + +?> \ No newline at end of file diff --git a/tests/HTMLPurifier/GeneratorTest.php b/tests/HTMLPurifier/GeneratorTest.php index 7528f9fa..2656e82f 100644 --- a/tests/HTMLPurifier/GeneratorTest.php +++ b/tests/HTMLPurifier/GeneratorTest.php @@ -89,13 +89,21 @@ class HTMLPurifier_GeneratorTest extends HTMLPurifier_Harness $expect[4] = 'title="Theta is ' . $theta_char . '"'; foreach ($inputs as $i => $input) { - $result = $this->obj->generateAttributes($input); + $result = $this->obj->generateAttributes($input, 'irrelevant'); $this->assertIdentical($result, $expect[$i]); paintIf($result, $result != $expect[$i]); } } + function test_generateAttributes_minimized() { + $gen = new HTMLPurifier_Generator(); + $context = new HTMLPurifier_Context(); + $gen->generateFromTokens(array(), HTMLPurifier_Config::create(array('HTML.Doctype' => 'HTML 4.01 Transitional')), $context); + $result = $gen->generateAttributes(array('compact' => 'compact'), 'menu'); + $this->assertIdentical($result, 'compact'); + } + function test_generateFromTokens() { $this->func = 'generateFromTokens'; diff --git a/tests/HTMLPurifier/Harness.php b/tests/HTMLPurifier/Harness.php index efb821f0..c591db32 100644 --- a/tests/HTMLPurifier/Harness.php +++ b/tests/HTMLPurifier/Harness.php @@ -63,12 +63,13 @@ class HTMLPurifier_Harness extends UnitTestCase * context object. */ function assertResult($input, $expect = true, - $config_array = false, $context_array = array() + $config_array = array(), $context_array = array() ) { // setup config if ($this->config) { $config = HTMLPurifier_Config::create($this->config); + $config->loadArray($config_array); } else { $config = HTMLPurifier_Config::create($config_array); } diff --git a/tests/HTMLPurifier/Strategy/FixNestingTest.php b/tests/HTMLPurifier/Strategy/FixNestingTest.php index d61e1713..4c8a04a9 100644 --- a/tests/HTMLPurifier/Strategy/FixNestingTest.php +++ b/tests/HTMLPurifier/Strategy/FixNestingTest.php @@ -13,6 +13,8 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness function test() { + $this->config = array('HTML.Doctype' => 'XHTML 1.0 Strict'); + // legal inline $this->assertResult('Bold text'); diff --git a/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php b/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php index 07e9202d..e1760a64 100644 --- a/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php +++ b/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php @@ -13,6 +13,8 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn function test() { + $this->config = array('HTML.Doctype' => 'XHTML 1.0 Strict'); + $this->assertResult(''); $this->assertResult('This is bold text.'); diff --git a/tests/HTMLPurifier/Strategy/RemoveForeignElementsTest.php b/tests/HTMLPurifier/Strategy/RemoveForeignElementsTest.php index 9ec193dc..52a87753 100644 --- a/tests/HTMLPurifier/Strategy/RemoveForeignElementsTest.php +++ b/tests/HTMLPurifier/Strategy/RemoveForeignElementsTest.php @@ -14,6 +14,7 @@ class HTMLPurifier_Strategy_RemoveForeignElementsTest function test() { + $this->config = array('HTML.Doctype' => 'XHTML 1.0 Strict'); $this->assertResult(''); diff --git a/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php b/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php index 8f69b990..54a49a31 100644 --- a/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php +++ b/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php @@ -11,6 +11,7 @@ class HTMLPurifier_Strategy_ValidateAttributesTest extends function setUp() { parent::setUp(); $this->obj = new HTMLPurifier_Strategy_ValidateAttributes(); + $this->config = array('HTML.Doctype' => 'XHTML 1.0 Strict'); } function testEmpty() { @@ -297,7 +298,8 @@ class HTMLPurifier_Strategy_ValidateAttributesTest extends $this->assertResult( '', true, - array('Attr.AllowedFrameTargets' => '_top') + array('Attr.AllowedFrameTargets' => '_top', + 'HTML.Doctype' => 'XHTML 1.0 Transitional') ); $this->assertResult( '', diff --git a/tests/test_files.php b/tests/test_files.php index 21c7b943..af128882 100644 --- a/tests/test_files.php +++ b/tests/test_files.php @@ -20,6 +20,7 @@ $test_files[] = 'AttrDef/CSS/TextDecorationTest.php'; $test_files[] = 'AttrDef/CSS/URITest.php'; $test_files[] = 'AttrDef/CSSTest.php'; $test_files[] = 'AttrDef/EnumTest.php'; +$test_files[] = 'AttrDef/HTML/ColorTest.php'; $test_files[] = 'AttrDef/HTML/IDTest.php'; $test_files[] = 'AttrDef/HTML/LengthTest.php'; $test_files[] = 'AttrDef/HTML/FrameTargetTest.php';