From 4d38c0293298f2c00b009c4d0e4c4c3d9ae8f700 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Sat, 19 May 2007 21:00:12 +0000 Subject: [PATCH] [1.7.0] Implement and hook-in Tidy module setup. - CommonAttributes factored into XMLCommonAttributes and NonXMLCommonAttributes - Tidy abstract module was completely refactored in interest of usability - Add friendly error message if module does not have name git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1070 48356398-32a2-884e-a903-53898d9a118a --- NEWS | 2 + TODO | 5 +- .../HTMLModule/CommonAttributes.php | 6 +- .../HTMLModule/NonXMLCommonAttributes.php | 16 ++ library/HTMLPurifier/HTMLModule/Tidy.php | 139 ++++++++++- .../HTMLPurifier/HTMLModule/Tidy/XHTML.php | 20 ++ .../HTMLModule/Tidy/XHTMLAndHTML4.php | 179 +++++++++++++++ .../HTMLModule/Tidy/XHTMLStrict.php | 27 +++ .../HTMLModule/TransformToStrict.php | 216 ------------------ .../HTMLModule/TransformToXHTML11.php | 36 --- .../HTMLModule/XMLCommonAttributes.php | 16 ++ library/HTMLPurifier/HTMLModuleManager.php | 45 ++-- tests/HTMLPurifier/HTMLModule/TidyTest.php | 142 +++++++++++- 13 files changed, 552 insertions(+), 297 deletions(-) create mode 100644 library/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php create mode 100644 library/HTMLPurifier/HTMLModule/Tidy/XHTML.php create mode 100644 library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php create mode 100644 library/HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php delete mode 100644 library/HTMLPurifier/HTMLModule/TransformToStrict.php delete mode 100644 library/HTMLPurifier/HTMLModule/TransformToXHTML11.php create mode 100644 library/HTMLPurifier/HTMLModule/XMLCommonAttributes.php diff --git a/NEWS b/NEWS index 273e5e34..9375910b 100644 --- a/NEWS +++ b/NEWS @@ -12,6 +12,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier 1.7.0, unknown release date # Completely refactored HTMLModuleManager, decentralizing safety information +# Transform modules changed to Tidy modules, which offer more flexibility + and better modularization . Unit test for ElementDef created, ElementDef behavior modified to be more flexible . Added convenience functions for HTMLModule constructors diff --git a/TODO b/TODO index 268a45ef..46e6bcde 100644 --- a/TODO +++ b/TODO @@ -10,10 +10,9 @@ TODO List 1.7 release [Advanced API] # Complete advanced API, and fully document it - Add framework for unsafe attributes - - Wire in modes (configuration, module and manager wise) + - Document Tidy modules - Reorganize configuration directives - - Determine handling for complex/cascading configuration directives - - Reorganize transformation modules + - Set up doctype object inside configuration object - Set up anonymous module management by HTMLDefinition # Implement HTMLDefinition caching using serialize # Implement all deprecated tags and attributes diff --git a/library/HTMLPurifier/HTMLModule/CommonAttributes.php b/library/HTMLPurifier/HTMLModule/CommonAttributes.php index cad3c6da..34b991f6 100644 --- a/library/HTMLPurifier/HTMLModule/CommonAttributes.php +++ b/library/HTMLPurifier/HTMLModule/CommonAttributes.php @@ -1,5 +1,7 @@ 'ID', 'title' => 'CDATA', ), - 'Lang' => array( - 'xml:lang' => 'LanguageCode', - ), + 'Lang' => array(), 'I18N' => array( 0 => array('Lang'), // proprietary, for xml:lang/lang ), diff --git a/library/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php b/library/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php new file mode 100644 index 00000000..a0fed7e6 --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php @@ -0,0 +1,16 @@ + array( + 'lang' => 'LanguageCode', + ) + ); +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/HTMLModule/Tidy.php b/library/HTMLPurifier/HTMLModule/Tidy.php index 1b29f826..5c3addd9 100644 --- a/library/HTMLPurifier/HTMLModule/Tidy.php +++ b/library/HTMLPurifier/HTMLModule/Tidy.php @@ -25,13 +25,13 @@ HTMLPurifier_ConfigSchema::defineAllowedValues( ); HTMLPurifier_ConfigSchema::define( - 'HTML', 'TidyAdd', array(), 'list', ' + 'HTML', 'TidyAdd', array(), 'lookup', ' Fixes to add to the default set of Tidy fixes as per your level. This directive has been available since 1.7.0. ' ); HTMLPurifier_ConfigSchema::define( - 'HTML', 'TidyRemove', array(), 'list', ' + 'HTML', 'TidyRemove', array(), 'lookup', ' Fixes to remove from the default set of Tidy fixes as per your level. This directive has been available since 1.7.0. ' ); @@ -49,6 +49,11 @@ class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule */ var $levels = array(0 => 'none', 'light', 'medium', 'heavy'); + /** + * Default level to place all fixes in. Disabled by default + */ + var $defaultLevel = null; + /** * Lists of fixes used by getFixesForLevel(). Format is: * HTMLModule_Tidy->fixesForLevel[$level] = array('fix-1', 'fix-2'); @@ -66,20 +71,32 @@ class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule * subtracted fix has no effect. */ function construct($config) { + + // create fixes, initialize fixesForLevel + $fixes = $this->makeFixes(); + $this->makeFixesForLevel($fixes); + + // figure out which fixes to use $level = $config->get('HTML', 'TidyLevel'); - $fixes = $this->getFixesForLevel($level); - - $add_fixes = $config->get('HTML', 'TidyAdd'); - foreach ($add_fixes as $fix) { - $fixes[$fix] = true; - } + $fixes_lookup = $this->getFixesForLevel($level); + // get custom fix declarations: these need namespace processing + $add_fixes = $config->get('HTML', 'TidyAdd'); $remove_fixes = $config->get('HTML', 'TidyRemove'); - foreach ($remove_fixes as $fix) { - unset($fixes[$fix]); + + foreach ($fixes as $name => $fix) { + // needs to be refactored a little to implement globbing + if ( + isset($remove_fixes[$name]) || + (!isset($add_fixes[$name]) && !isset($fixes_lookup[$name])) + ) { + unset($fixes[$name]); + } } + // populate this module with necessary fixes $this->populate($fixes); + } /** @@ -113,13 +130,111 @@ class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule return $ret; } + /** + * Dynamically populates the $fixesForLevel member variable using + * the fixes array. It may be custom overloaded, used in conjunction + * with $defaultLevel, or not used at all. + */ + function makeFixesForLevel($fixes) { + if (!isset($this->defaultLevel)) return; + if (!isset($this->fixesForLevel[$this->defaultLevel])) { + trigger_error( + 'Default level ' . $this->defaultLevel . ' does not exist', + E_USER_ERROR + ); + return; + } + $this->fixesForLevel[$this->defaultLevel] = array_keys($fixes); + } + /** * Populates the module with transforms and other special-case code * based on a list of fixes passed to it - * @abstract * @param $lookup Lookup table of fixes to activate */ - function populate($lookup) {} + function populate($fixes) { + foreach ($fixes as $name => $fix) { + // determine what the fix is for + list($type, $params) = $this->getFixType($name); + switch ($type) { + case 'attr_transform_pre': + case 'attr_transform_post': + $attr = $params['attr']; + if (isset($params['element'])) { + $element = $params['element']; + if (empty($this->info[$element])) { + $e =& $this->addBlankElement($element); + } else { + $e =& $this->info[$element]; + } + } else { + $type = "info_$type"; + $e =& $this; + } + $f =& $e->$type; + $f[$attr] = $fix; + break; + case 'tag_transform': + $this->info_tag_transform[$params['element']] = $fix; + break; + case 'child': + case 'content_model_type': + $element = $params['element']; + if (empty($this->info[$element])) { + $e =& $this->addBlankElement($element); + } else { + $e =& $this->info[$element]; + } + $e->$type = $fix; + break; + default: + trigger_error("Fix type $type not supported", E_USER_ERROR); + break; + } + } + } + + /** + * Parses a fix name and determines what kind of fix it is, as well + * as other information defined by the fix + * @param $name String name of fix + * @return array(string $fix_type, array $fix_parameters) + * @note $fix_parameters is type dependant, see populate() for usage + * of these parameters + */ + function getFixType($name) { + // parse it + $property = $attr = null; + if (strpos($name, '#') !== false) list($name, $property) = explode('#', $name); + if (strpos($name, '@') !== false) list($name, $attr) = explode('@', $name); + + // figure out the parameters + $params = array(); + if ($name !== '') $params['element'] = $name; + if (!is_null($attr)) $params['attr'] = $attr; + + // special case: attribute transform + if (!is_null($attr)) { + if (is_null($property)) $property = 'pre'; + $type = 'attr_transform_' . $property; + return array($type, $params); + } + + // special case: tag transform + if (is_null($property)) { + return array('tag_transform', $params); + } + + return array($property, $params); + + } + + /** + * Defines all fixes the module will perform in a compact + * associative array of fix name to fix implementation. + * @abstract + */ + function makeFixes() {} } diff --git a/library/HTMLPurifier/HTMLModule/Tidy/XHTML.php b/library/HTMLPurifier/HTMLModule/Tidy/XHTML.php new file mode 100644 index 00000000..24b084e8 --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Tidy/XHTML.php @@ -0,0 +1,20 @@ + \ No newline at end of file diff --git a/library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php b/library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php new file mode 100644 index 00000000..74f91936 --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php @@ -0,0 +1,179 @@ + 'text-align:left;', + 'right' => 'text-align:right;', + 'top' => 'caption-side:top;', + 'bottom' => 'caption-side:bottom;' // not supported by IE + )); + + // @align for img ------------------------------------------------- + $r['img@align'] = + new HTMLPurifier_AttrTransform_EnumToCSS('align', array( + 'left' => 'float:left;', + 'right' => 'float:right;', + 'top' => 'vertical-align:top;', + 'middle' => 'vertical-align:middle;', + 'bottom' => 'vertical-align:baseline;', + )); + + // @align for table ----------------------------------------------- + $r['table@align'] = + new HTMLPurifier_AttrTransform_EnumToCSS('align', array( + 'left' => 'float:left;', + 'center' => 'margin-left:auto;margin-right:auto;', + 'right' => 'float:right;' + )); + + // @align for hr ----------------------------------------------- + $r['hr@align'] = + new HTMLPurifier_AttrTransform_EnumToCSS('align', array( + // we use both text-align and margin because these work + // for different browsers (IE and Firefox, respectively) + // and the melange makes for a pretty cross-compatible + // solution + 'left' => 'margin-left:0;margin-right:auto;text-align:left;', + 'center' => 'margin-left:auto;margin-right:auto;text-align:center;', + 'right' => 'margin-left:auto;margin-right:0;text-align:right;' + )); + + // @align for h1, h2, h3, h4, h5, h6, p, div ---------------------- + // {{{ + $align_lookup = array(); + $align_values = array('left', 'right', 'center', 'justify'); + foreach ($align_values as $v) $align_lookup[$v] = "text-align:$v;"; + // }}} + $r['h1@align'] = + $r['h2@align'] = + $r['h3@align'] = + $r['h4@align'] = + $r['h5@align'] = + $r['h6@align'] = + $r['p@align'] = + $r['div@align'] = + new HTMLPurifier_AttrTransform_EnumToCSS('align', $align_lookup); + + // @bgcolor for table, tr, td, th --------------------------------- + $r['table@bgcolor'] = + $r['td@bgcolor'] = + $r['th@bgcolor'] = + new HTMLPurifier_AttrTransform_BgColor(); + + // @border for img ------------------------------------------------ + $r['img@border'] = new HTMLPurifier_AttrTransform_Border(); + + // @clear for br -------------------------------------------------- + $r['br@clear'] = + new HTMLPurifier_AttrTransform_EnumToCSS('clear', array( + 'left' => 'clear:left;', + 'right' => 'clear:right;', + 'all' => 'clear:both;', + 'none' => 'clear:none;', + )); + + // @height for td, th --------------------------------------------- + $r['td@height'] = + $r['th@height'] = + new HTMLPurifier_AttrTransform_Length('height'); + + // @hspace for img ------------------------------------------------ + $r['img@hspace'] = new HTMLPurifier_AttrTransform_ImgSpace('hspace'); + + // @name for img, a ----------------------------------------------- + $r['img@name'] = + $r['a@name'] = new HTMLPurifier_AttrTransform_Name(); + + // @noshade for hr ------------------------------------------------ + // this transformation is not precise but often good enough. + // different browsers use different styles to designate noshade + $r['hr@noshade'] = + new HTMLPurifier_AttrTransform_BoolToCSS( + 'noshade', + 'color:#808080;background-color:#808080;border:0;' + ); + + // @nowrap for td, th --------------------------------------------- + $r['td@nowrap'] = + $r['th@nowrap'] = + new HTMLPurifier_AttrTransform_BoolToCSS( + 'nowrap', + 'white-space:nowrap;' + ); + + // @size for hr -------------------------------------------------- + $r['hr@size'] = new HTMLPurifier_AttrTransform_Length('size', 'height'); + + // @type for li, ol, ul ------------------------------------------- + // {{{ + $ul_types = array( + 'disc' => 'list-style-type:disc;', + 'square' => 'list-style-type:square;', + 'circle' => 'list-style-type:circle;' + ); + $ol_types = array( + '1' => 'list-style-type:decimal;', + 'i' => 'list-style-type:lower-roman;', + 'I' => 'list-style-type:upper-roman;', + 'a' => 'list-style-type:lower-alpha;', + 'A' => 'list-style-type:upper-alpha;' + ); + $li_types = $ul_types + $ol_types; + // }}} + + $r['ul@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ul_types); + $r['ol@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ol_types, true); + $r['li@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $li_types, true); + + // @vspace for img ------------------------------------------------ + $r['img@vspace'] = new HTMLPurifier_AttrTransform_ImgSpace('vspace'); + + // @width for hr, td, th ------------------------------------------ + $r['td@width'] = + $r['th@width'] = + $r['hr@width'] = new HTMLPurifier_AttrTransform_Length('width'); + + return $r; + + } + +} + +class HTMLPurifier_HTMLModule_Tidy_Transitional extends + HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 +{ + var $name = 'Tidy_Transitional'; + var $defaultLevel = 'light'; // switch this to heavy once we implement legacy fully +} + +class HTMLPurifier_HTMLModule_Tidy_Strict extends + HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 +{ + var $name = 'Tidy_Strict'; + var $defaultLevel = 'light'; +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php b/library/HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php new file mode 100644 index 00000000..97c50010 --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php @@ -0,0 +1,27 @@ +content_model_type != 'strictblockquote') return false; + return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model); + } + +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/HTMLModule/TransformToStrict.php b/library/HTMLPurifier/HTMLModule/TransformToStrict.php deleted file mode 100644 index 516e09da..00000000 --- a/library/HTMLPurifier/HTMLModule/TransformToStrict.php +++ /dev/null @@ -1,216 +0,0 @@ - array() - ); - - function HTMLPurifier_HTMLModule_TransformToStrict() { - - // behavior with transformations when there's another CSS property - // working on it is interesting: the CSS will *always* override - // the deprecated attribute, whereas an inline CSS declaration will - // override the corresponding declaration in, say, an external - // stylesheet. This behavior won't affect most people, but it - // does represent an operational difference we CANNOT fix. - - // == deprecated tag transforms =================================== - - $this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font(); - $this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul'); - $this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul'); - $this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center(); - - // == miscellaneous transforms ==================================== - - // initialize blank element definitions - $elements = array( // alphabetically sorted - 'a', 'blockquote', 'br', 'caption', 'h1', 'h2', 'h3', 'h4', - 'h5', 'h6', 'hr', 'img', 'li', 'ol', 'p', 'table', 'td', - 'th', 'tr', 'ul' ); - foreach ($elements as $name) $this->addBlankElement($name); - - // this should not be applied to XHTML 1.0 Transitional, ONLY - // XHTML 1.0 Strict. We may need to put this in another class. - $this->info['blockquote']->content_model_type = 'strictblockquote'; - $this->info['blockquote']->child = false; - - // == deprecated attribute transforms ============================= - - // this segment will probably need to modularized in some fashion - // in order to allow for different "levels" of transformation - - // @align for caption --------------------------------------------- - $this->info['caption']->attr_transform_pre['align'] = - new HTMLPurifier_AttrTransform_EnumToCSS('align', array( - // we're following IE's behavior, not Firefox's, due - // to the fact that no one supports caption-side:right, - // W3C included (with CSS 2.1). This is a slightly - // unreasonable attribute! - 'left' => 'text-align:left;', - 'right' => 'text-align:right;', - 'top' => 'caption-side:top;', - 'bottom' => 'caption-side:bottom;' // not supported by IE - )); - - // @align for img ------------------------------------------------- - $this->info['img']->attr_transform_pre['align'] = - new HTMLPurifier_AttrTransform_EnumToCSS('align', array( - 'left' => 'float:left;', - 'right' => 'float:right;', - 'top' => 'vertical-align:top;', - 'middle' => 'vertical-align:middle;', - 'bottom' => 'vertical-align:baseline;', - )); - - // @align for table ----------------------------------------------- - $this->info['table']->attr_transform_pre['align'] = - new HTMLPurifier_AttrTransform_EnumToCSS('align', array( - 'left' => 'float:left;', - 'center' => 'margin-left:auto;margin-right:auto;', - 'right' => 'float:right;' - )); - - // @align for hr ----------------------------------------------- - $this->info['hr']->attr_transform_pre['align'] = - new HTMLPurifier_AttrTransform_EnumToCSS('align', array( - // we use both text-align and margin because these work - // for different browsers (IE and Firefox, respectively) - // and the melange makes for a pretty cross-compatible - // solution - 'left' => 'margin-left:0;margin-right:auto;text-align:left;', - 'center' => 'margin-left:auto;margin-right:auto;text-align:center;', - 'right' => 'margin-left:auto;margin-right:0;text-align:right;' - )); - - // @align for h1, h2, h3, h4, h5, h6, p --------------------------- - $align_lookup = array(); - $align_values = array('left', 'right', 'center', 'justify'); - foreach ($align_values as $v) $align_lookup[$v] = "text-align:$v;"; - $this->info['h1']->attr_transform_pre['align'] = - $this->info['h2']->attr_transform_pre['align'] = - $this->info['h3']->attr_transform_pre['align'] = - $this->info['h4']->attr_transform_pre['align'] = - $this->info['h5']->attr_transform_pre['align'] = - $this->info['h6']->attr_transform_pre['align'] = - $this->info['p'] ->attr_transform_pre['align'] = - new HTMLPurifier_AttrTransform_EnumToCSS('align', $align_lookup); - - // @bgcolor for table, tr, td, th --------------------------------- - $this->info['table']->attr_transform_pre['bgcolor'] = - $this->info['tr']->attr_transform_pre['bgcolor'] = - $this->info['td']->attr_transform_pre['bgcolor'] = - $this->info['th']->attr_transform_pre['bgcolor'] = - new HTMLPurifier_AttrTransform_BgColor(); - - // @border for img ------------------------------------------------ - $this->info['img']->attr_transform_pre['border'] = new HTMLPurifier_AttrTransform_Border(); - - // @clear for br -------------------------------------------------- - $this->info['br']->attr_transform_pre['clear'] = - new HTMLPurifier_AttrTransform_EnumToCSS('clear', array( - 'left' => 'clear:left;', - 'right' => 'clear:right;', - 'all' => 'clear:both;', - 'none' => 'clear:none;', - )); - - // @height for td, th --------------------------------------------- - $this->info['td']->attr_transform_pre['height'] = - $this->info['th']->attr_transform_pre['height'] = - new HTMLPurifier_AttrTransform_Length('height'); - - // @hspace for img ------------------------------------------------ - $this->info['img']->attr_transform_pre['hspace'] = new HTMLPurifier_AttrTransform_ImgSpace('hspace'); - - // @lang ---------------------------------------------------------- - // this is overridden in TransformToXHTML11 - $this->info_attr_transform_post['lang'] = new HTMLPurifier_AttrTransform_Lang(); - $this->attr_collections['Lang']['lang'] = new HTMLPurifier_AttrDef_Lang(); - - // @name for img, a ----------------------------------------------- - $this->info['img']->attr_transform_pre['name'] = - $this->info['a']->attr_transform_pre['name'] = new HTMLPurifier_AttrTransform_Name(); - - // @noshade for hr ------------------------------------------------ - // this transformation is not precise but often good enough. - // different browsers use different styles to designate noshade - $this->info['hr']->attr_transform_pre['noshade'] = - new HTMLPurifier_AttrTransform_BoolToCSS( - 'noshade', - 'color:#808080;background-color:#808080;border:0;' - ); - - // @nowrap for td, th --------------------------------------------- - $this->info['td']->attr_transform_pre['nowrap'] = - $this->info['th']->attr_transform_pre['nowrap'] = - new HTMLPurifier_AttrTransform_BoolToCSS( - 'nowrap', - 'white-space:nowrap;' - ); - - // @size for hr -------------------------------------------------- - $this->info['hr']->attr_transform_pre['size'] = new HTMLPurifier_AttrTransform_Length('size', 'height'); - - // @type for li, ol, ul ------------------------------------------- - $ul_types = array( - 'disc' => 'list-style-type:disc;', - 'square' => 'list-style-type:square;', - 'circle' => 'list-style-type:circle;' - ); - $ol_types = array( - '1' => 'list-style-type:decimal;', - 'i' => 'list-style-type:lower-roman;', - 'I' => 'list-style-type:upper-roman;', - 'a' => 'list-style-type:lower-alpha;', - 'A' => 'list-style-type:upper-alpha;' - ); - $li_types = $ul_types + $ol_types; - - $this->info['ul']->attr_transform_pre['type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ul_types); - $this->info['ol']->attr_transform_pre['type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ol_types, true); - $this->info['li']->attr_transform_pre['type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $li_types, true); - - // @vspace for img ------------------------------------------------ - $this->info['img']->attr_transform_pre['vspace'] = new HTMLPurifier_AttrTransform_ImgSpace('vspace'); - - // @width for hr, td, th ------------------------------------------ - $this->info['td']->attr_transform_pre['width'] = - $this->info['th']->attr_transform_pre['width'] = - $this->info['hr']->attr_transform_pre['width'] = new HTMLPurifier_AttrTransform_Length('width'); - - } - - var $defines_child_def = true; - function getChildDef($def) { - if ($def->content_model_type != 'strictblockquote') return false; - return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model); - } - -} - -?> \ No newline at end of file diff --git a/library/HTMLPurifier/HTMLModule/TransformToXHTML11.php b/library/HTMLPurifier/HTMLModule/TransformToXHTML11.php deleted file mode 100644 index 68aac613..00000000 --- a/library/HTMLPurifier/HTMLModule/TransformToXHTML11.php +++ /dev/null @@ -1,36 +0,0 @@ - array( - 'lang' => false // remove it - ) - ); - - var $info_attr_transform_post = array( - 'lang' => false // remove it - ); - - function HTMLPurifier_HTMLModule_TransformToXHTML11() { - $this->info_attr_transform_pre['lang'] = new HTMLPurifier_AttrTransform_Lang(); - } - -} - -?> \ No newline at end of file diff --git a/library/HTMLPurifier/HTMLModule/XMLCommonAttributes.php b/library/HTMLPurifier/HTMLModule/XMLCommonAttributes.php new file mode 100644 index 00000000..341a8761 --- /dev/null +++ b/library/HTMLPurifier/HTMLModule/XMLCommonAttributes.php @@ -0,0 +1,16 @@ + array( + 'xml:lang' => 'LanguageCode', + ) + ); +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/HTMLModuleManager.php b/library/HTMLPurifier/HTMLModuleManager.php index 1d127857..a4a35ddf 100644 --- a/library/HTMLPurifier/HTMLModuleManager.php +++ b/library/HTMLPurifier/HTMLModuleManager.php @@ -26,10 +26,14 @@ require_once 'HTMLPurifier/HTMLModule/StyleAttribute.php'; require_once 'HTMLPurifier/HTMLModule/Legacy.php'; require_once 'HTMLPurifier/HTMLModule/Target.php'; require_once 'HTMLPurifier/HTMLModule/Scripting.php'; +require_once 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php'; +require_once 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php'; -// proprietary modules -require_once 'HTMLPurifier/HTMLModule/TransformToStrict.php'; -require_once 'HTMLPurifier/HTMLModule/TransformToXHTML11.php'; +// tidy modules +require_once 'HTMLPurifier/HTMLModule/Tidy.php'; +require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php'; +require_once 'HTMLPurifier/HTMLModule/Tidy/XHTML.php'; +require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php'; HTMLPurifier_ConfigSchema::define( 'HTML', 'Doctype', null, 'string/null', @@ -113,40 +117,37 @@ class HTMLPurifier_HTMLModuleManager 'StyleAttribute', 'Scripting' ); $transitional = array('Legacy', 'Target'); + $xml = array('XMLCommonAttributes'); + $non_xml = array('NonXMLCommonAttributes'); $this->doctypes->register( 'HTML 4.01 Transitional', false, - array_merge($common, $transitional), - array('TransformToStrict') - // Tidy: Transitional + array_merge($common, $transitional, $non_xml), + array('Tidy_Transitional') ); $this->doctypes->register( 'HTML 4.01 Strict', false, - array_merge($common), - array('TransformToStrict') - // Tidy: Strict + array_merge($common, $non_xml), + array('Tidy_Strict') ); $this->doctypes->register( 'XHTML 1.0 Transitional', true, - array_merge($common, $transitional), - array('TransformToStrict') - // Tidy: Transitional, XHTML + array_merge($common, $transitional, $xml, $non_xml), + array('Tidy_Transitional', 'Tidy_XHTML') ); $this->doctypes->register( 'XHTML 1.0 Strict', true, - array_merge($common), - array('TransformToStrict') - // Tidy: Strict, XHTML + array_merge($common, $xml, $non_xml), + array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_XHTMLStrict') ); $this->doctypes->register( 'XHTML 1.1', true, - array_merge($common), - array('TransformToStrict', 'TransformToXHTML11') - // Tidy: Strict, XHTML1_1 + array_merge($common, $xml), + array('Tidy_Strict', 'Tidy_XHTML') // Tidy_XHTML1_1 ); } @@ -194,6 +195,10 @@ class HTMLPurifier_HTMLModuleManager } $module = new $module(); } + if (empty($module->name)) { + trigger_error('Module instance of ' . get_class($module) . ' must have name'); + return; + } $this->registeredModules[$module->name] = $module; } @@ -257,7 +262,9 @@ class HTMLPurifier_HTMLModuleManager foreach ($doctype->tidyModules as $module) { $this->processModule($module); - // FIXME!!! initialize the tidy modules here + if (method_exists($this->modules[$module], 'construct')) { + $this->modules[$module]->construct($config); + } } // setup lookup table based on all valid modules diff --git a/tests/HTMLPurifier/HTMLModule/TidyTest.php b/tests/HTMLPurifier/HTMLModule/TidyTest.php index 0b9bfd11..f3a1f977 100644 --- a/tests/HTMLPurifier/HTMLModule/TidyTest.php +++ b/tests/HTMLPurifier/HTMLModule/TidyTest.php @@ -5,7 +5,7 @@ require_once 'HTMLPurifier/HTMLModule/Tidy.php'; Mock::generatePartial( 'HTMLPurifier_HTMLModule_Tidy', 'HTMLPurifier_HTMLModule_Tidy_TestForConstruct', - array('populate') + array('makeFixes', 'makeFixesForLevel', 'populate') ); class HTMLPurifier_HTMLModule_TidyTest extends UnitTestCase @@ -49,7 +49,17 @@ class HTMLPurifier_HTMLModule_TidyTest extends UnitTestCase $module->fixesForLevel['light'] = array('light-fix-1', 'light-fix-2'); $module->fixesForLevel['medium'] = array('medium-fix-1', 'medium-fix-2'); $module->fixesForLevel['heavy'] = array('heavy-fix-1', 'heavy-fix-2'); - // $module->HTMLPurifier_HTMLModule_Tidy(); // constructor + + $j = 0; + $fixes = array( + 'light-fix-1' => $lf1 = $j++, + 'light-fix-2' => $lf2 = $j++, + 'medium-fix-1' => $mf1 = $j++, + 'medium-fix-2' => $mf2 = $j++, + 'heavy-fix-1' => $hf1 = $j++, + 'heavy-fix-2' => $hf2 = $j++ + ); + $module->setReturnValue('makeFixes', $fixes); $config = HTMLPurifier_Config::create(array( 'HTML.TidyLevel' => 'none' @@ -62,13 +72,23 @@ class HTMLPurifier_HTMLModule_TidyTest extends UnitTestCase $config = HTMLPurifier_Config::create(array( 'HTML.TidyLevel' => 'light' )); - $module->expectAt($i++, 'populate', array($module->getFixesForLevel('light'))); + $module->expectAt($i++, 'populate', array(array( + 'light-fix-1' => $lf1, + 'light-fix-2' => $lf2 + ))); $module->construct($config); $config = HTMLPurifier_Config::create(array( 'HTML.TidyLevel' => 'heavy' )); - $module->expectAt($i++, 'populate', array($module->getFixesForLevel('heavy'))); + $module->expectAt($i++, 'populate', array(array( + 'light-fix-1' => $lf1, + 'light-fix-2' => $lf2, + 'medium-fix-1' => $mf1, + 'medium-fix-2' => $mf2, + 'heavy-fix-1' => $hf1, + 'heavy-fix-2' => $hf2 + ))); $module->construct($config); // fine grained tuning @@ -78,8 +98,8 @@ class HTMLPurifier_HTMLModule_TidyTest extends UnitTestCase 'HTML.TidyAdd' => array('light-fix-1', 'medium-fix-1') )); $module->expectAt($i++, 'populate', array(array( - 'light-fix-1' => true, - 'medium-fix-1' => true + 'light-fix-1' => $lf1, + 'medium-fix-1' => $mf1 ))); $module->construct($config); @@ -88,8 +108,8 @@ class HTMLPurifier_HTMLModule_TidyTest extends UnitTestCase 'HTML.TidyRemove' => array('light-fix-1', 'medium-fix-1') )); $module->expectAt($i++, 'populate', array(array( - 'light-fix-2' => true, - 'medium-fix-2' => true + 'light-fix-2' => $lf2, + 'medium-fix-2' => $mf2 ))); $module->construct($config); @@ -99,6 +119,112 @@ class HTMLPurifier_HTMLModule_TidyTest extends UnitTestCase } + function test_makeFixesForLevel() { + + $module = new HTMLPurifier_HTMLModule_Tidy(); + $module->defaultLevel = 'heavy'; + + $module->makeFixesForLevel(array( + 'fix-1' => 0, + 'fix-2' => 1, + 'fix-3' => 2 + )); + + $this->assertIdentical($module->fixesForLevel['heavy'], array('fix-1', 'fix-2', 'fix-3')); + $this->assertIdentical($module->fixesForLevel['medium'], array()); + $this->assertIdentical($module->fixesForLevel['light'], array()); + + } + function test_makeFixesForLevel_undefinedLevel() { + + $module = new HTMLPurifier_HTMLModule_Tidy(); + $module->defaultLevel = 'bananas'; + + $this->expectError('Default level bananas does not exist'); + + $module->makeFixesForLevel(array( + 'fix-1' => 0 + )); + + } + + function test_getFixType() { + + // syntax needs documenting + + $module = new HTMLPurifier_HTMLModule_Tidy(); + + $this->assertIdentical( + $module->getFixType('a'), + array('tag_transform', array('element' => 'a')) + ); + + $this->assertIdentical( + $module->getFixType('a@href'), + $reuse = array('attr_transform_pre', array('element' => 'a', 'attr' => 'href')) + ); + + $this->assertIdentical( + $module->getFixType('a@href#pre'), + $reuse + ); + + $this->assertIdentical( + $module->getFixType('a@href#post'), + array('attr_transform_post', array('element' => 'a', 'attr' => 'href')) + ); + + $this->assertIdentical( + $module->getFixType('xml:foo@xml:bar'), + array('attr_transform_pre', array('element' => 'xml:foo', 'attr' => 'xml:bar')) + ); + + $this->assertIdentical( + $module->getFixType('blockquote#child'), + array('child', array('element' => 'blockquote')) + ); + + $this->assertIdentical( + $module->getFixType('@lang'), + array('attr_transform_pre', array('attr' => 'lang')) + ); + + $this->assertIdentical( + $module->getFixType('@lang#post'), + array('attr_transform_post', array('attr' => 'lang')) + ); + + } + + function test_populate() { + + $i = 0; + + $module = new HTMLPurifier_HTMLModule_Tidy(); + $module->populate(array( + 'element' => $element = $i++, + 'element@attr' => $attr = $i++, + 'element@attr#post' => $attr_post = $i++, + 'element#child' => $child = $i++, + 'element#content_model_type' => $content_model_type = $i++, + '@attr' => $global_attr = $i++, + '@attr#post' => $global_attr_post = $i++ + )); + + $module2 = new HTMLPurifier_HTMLModule_Tidy(); + $e =& $module2->addBlankElement('element'); + $e->attr_transform_pre['attr'] = $attr; + $e->attr_transform_post['attr'] = $attr_post; + $e->child = $child; + $e->content_model_type = $content_model_type; + $module2->info_tag_transform['element'] = $element; + $module2->info_attr_transform_pre['attr'] = $global_attr; + $module2->info_attr_transform_post['attr'] = $global_attr_post; + + $this->assertEqual($module, $module2); + + } + } ?> \ No newline at end of file