diff --git a/library/HTMLPurifier/ContentSets.php b/library/HTMLPurifier/ContentSets.php index 97b6a43e..5f975e1d 100644 --- a/library/HTMLPurifier/ContentSets.php +++ b/library/HTMLPurifier/ContentSets.php @@ -5,7 +5,6 @@ require_once 'HTMLPurifier/ChildDef.php'; require_once 'HTMLPurifier/ChildDef/Empty.php'; require_once 'HTMLPurifier/ChildDef/Required.php'; require_once 'HTMLPurifier/ChildDef/Optional.php'; -require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php'; // transform class HTMLPurifier_ContentSets { @@ -111,8 +110,6 @@ class HTMLPurifier_ContentSets return new HTMLPurifier_ChildDef_Optional($value); case 'empty': return new HTMLPurifier_ChildDef_Empty(); - case 'strictblockquote': - return new HTMLPurifier_ChildDef_StrictBlockquote($value); case 'custom': return new HTMLPurifier_ChildDef_Custom($value); } diff --git a/library/HTMLPurifier/ElementDef.php b/library/HTMLPurifier/ElementDef.php index 51063c01..7c37d956 100644 --- a/library/HTMLPurifier/ElementDef.php +++ b/library/HTMLPurifier/ElementDef.php @@ -7,6 +7,12 @@ class HTMLPurifier_ElementDef { + /** + * Does the definition work by itself, or is it created solely + * for the purpose of merging into another definition? + */ + var $standalone = true; + /** * Associative array of attribute name to HTMLPurifier_AttrDef * @note Before being processed by HTMLPurifier_AttrCollections @@ -22,13 +28,13 @@ class HTMLPurifier_ElementDef var $attr = array(); /** - * List of tag's HTMLPurifier_AttrTransform to be done before validation + * Indexed list of tag's HTMLPurifier_AttrTransform to be done before validation * @public */ var $attr_transform_pre = array(); /** - * List of tag's HTMLPurifier_AttrTransform to be done after validation + * Indexed list of tag's HTMLPurifier_AttrTransform to be done after validation * @public */ var $attr_transform_post = array(); @@ -79,6 +85,27 @@ class HTMLPurifier_ElementDef */ var $excludes = array(); + /** + * Merges the values of another element definition into this one. + * Values from the new element def take precedence if a value is + * not mergeable. + */ + function mergeIn($def) { + + // later keys takes precedence + foreach($def->attr as $k => $v) $this->attr[$k] = $v; + foreach($def->attr_transform_pre as $k => $v) $this->attr_transform_pre[$k] = $v; + foreach($def->attr_transform_post as $k => $v) $this->attr_transform_post[$k] = $v; + foreach($def->auto_close as $k => $v) $this->auto_close[$k] = $v; + foreach($def->excludes as $k => $v) $this->excludes[$k] = $v; + + if(!is_null($def->child)) $this->child = $def->child; + if(!empty($def->content_model)) $this->content_model .= ' | ' . $def->content_model; + if(!empty($def->content_model_type)) $this->content_model_type = $def->content_model_type; + if(!is_null($def->descendants_are_inline)) $this->descendants_are_inline = $def->descendants_are_inline; + + } + } ?> diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php index 4bc4e9cf..21b8599e 100644 --- a/library/HTMLPurifier/HTMLDefinition.php +++ b/library/HTMLPurifier/HTMLDefinition.php @@ -34,6 +34,9 @@ require_once 'HTMLPurifier/HTMLModule/Tables.php'; require_once 'HTMLPurifier/HTMLModule/Image.php'; require_once 'HTMLPurifier/HTMLModule/StyleAttribute.php'; +// compat modules +require_once 'HTMLPurifier/HTMLModule/TransformToStrict.php'; + HTMLPurifier_ConfigSchema::define( 'HTML', 'EnableAttrID', false, 'bool', 'Allows the ID attribute in HTML. This is disabled by default '. @@ -168,19 +171,19 @@ class HTMLPurifier_HTMLDefinition /** * Associative array of deprecated tag name to HTMLPurifier_TagTransform * @public - */ // use + operator + */ var $info_tag_transform = array(); /** - * List of HTMLPurifier_AttrTransform to be performed before validation. + * Indexed list of HTMLPurifier_AttrTransform to be performed before validation. * @public - */ // use array_merge or a foreach loop + */ var $info_attr_transform_pre = array(); /** - * List of HTMLPurifier_AttrTransform to be performed after validation. + * Indexed list of HTMLPurifier_AttrTransform to be performed after validation. * @public - */ // use array_merge or a foreach loop + */ var $info_attr_transform_post = array(); /** @@ -241,6 +244,7 @@ class HTMLPurifier_HTMLDefinition // this will eventually influence module loading $this->strict = $config->get('HTML', 'Strict'); + // order is important! $this->modules['Text'] = new HTMLPurifier_HTMLModule_Text(); $this->modules['Hypertext'] = new HTMLPurifier_HTMLModule_Hypertext(); $this->modules['List'] = new HTMLPurifier_HTMLModule_List(); @@ -251,6 +255,8 @@ class HTMLPurifier_HTMLDefinition $this->modules['Image'] = new HTMLPurifier_HTMLModule_Image(); $this->modules['StyleAttribute']= new HTMLPurifier_HTMLModule_StyleAttribute(); + $this->modules['TransformToStrict'] = new HTMLPurifier_HTMLModule_TransformToStrict($config); + $this->attr_types = new HTMLPurifier_AttrTypes(); $this->attr_collections = new HTMLPurifier_AttrCollections(); $this->content_sets = new HTMLPurifier_ContentSets(); @@ -279,7 +285,7 @@ class HTMLPurifier_HTMLDefinition // would be nice if we could put each of these in their // own object, would make this hookable too! - $this->setupInfo($config); + $this->processModules($config); $this->setupAttrTransform($config); $this->setupBlockWrapper($config); $this->setupParent($config); @@ -288,24 +294,39 @@ class HTMLPurifier_HTMLDefinition } /** - * Sets up the info array. + * Processes the modules, setting up related info variables * @param $config Instance of HTMLPurifier_Config */ - function setupInfo($config) { + function processModules($config) { $this->attr_collections->setup($this->attr_types, $this->modules); $this->content_sets->setup($this->modules); $this->info_content_sets = $this->content_sets->lookup; foreach ($this->modules as $module_i => $module) { + // process element-wise definitions foreach ($module->info as $name => $def) { - $def =& $this->modules[$module_i]->info[$name]; + // setup info + if (!isset($this->info[$name])) { + if ($def->standalone) { + $this->info[$name] = $this->modules[$module_i]->info[$name]; + } else { + // attempting to merge into an element that doesn't + // exist, ignore it + continue; + } + } else { + $this->info[$name]->mergeIn($this->modules[$module_i]->info[$name]); + } + + // process info + $def = $this->info[$name]; // attribute value expansions $this->attr_collections->performInclusions($def->attr); $this->attr_collections->expandIdentifiers( $def->attr, $this->attr_types); - // chameleon data, set descendants_are_inline + // descendants_are_inline, for ChildDef_Chameleon if (is_string($def->content_model) && strpos($def->content_model, 'Inline') !== false) { if ($name != 'del' && $name != 'ins') { @@ -317,13 +338,16 @@ class HTMLPurifier_HTMLDefinition // set child def from content model $this->content_sets->generateChildDef($def, $module); - // setup info $this->info[$name] = $def; - if ($this->info_parent == $name) { - $this->info_parent_def = $this->info[$name]; - } + } + + // merge in global info variables from module + foreach($module->info_tag_transform as $k => $v) $this->info_tag_transform[$k] = $v; + foreach($module->info_attr_transform_pre as $k => $v) $this->info_attr_transform_pre[$k] = $v; + foreach($module->info_attr_transform_post as $k => $v) $this->info_attr_transform_post[$k]= $v; } + } /** @@ -369,17 +393,13 @@ class HTMLPurifier_HTMLDefinition */ function setupCompat($config) { + // convenience for compat $e_Inline = new HTMLPurifier_ChildDef_Optional( $this->info_content_sets['Inline'] + array('#PCDATA' => true)); - // blockquote changes, implement in TransformStrict and Legacy - if ($this->strict) { - $this->info['blockquote']->child = - new HTMLPurifier_ChildDef_StrictBlockquote( - $this->info_content_sets['Block'] + - array('#PCDATA' => true)); - } else { + // blockquote alt child def, implement in Legacy + if (!$this->strict) { $this->info['blockquote']->child = new HTMLPurifier_ChildDef_Optional( $this->info_content_sets['Flow'] + @@ -409,8 +429,7 @@ class HTMLPurifier_HTMLDefinition array('#PCDATA' => true, 'p' => true)); } - // custom, not sure where to implement, because it's not - // just /one/ module + // deprecated config setting, implement in DisableURI module if ($config->get('Attr', 'DisableURI')) { $this->info['a']->attr['href'] = $this->info['img']->attr['longdesc'] = @@ -427,28 +446,7 @@ class HTMLPurifier_HTMLDefinition $this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer(); } - // deprecated elements transforms, implement in TransformToStrict - $this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font(); - $this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul'); - $this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul'); - $this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center(); - - // deprecated attribute transforms, implement in TransformToStrict - $this->info['h1']->attr_transform_pre[] = - $this->info['h2']->attr_transform_pre[] = - $this->info['h3']->attr_transform_pre[] = - $this->info['h4']->attr_transform_pre[] = - $this->info['h5']->attr_transform_pre[] = - $this->info['h6']->attr_transform_pre[] = - $this->info['p'] ->attr_transform_pre[] = - new HTMLPurifier_AttrTransform_TextAlign(); - - // xml:lang <=> lang mirroring, implement in TransformToStrict? - $this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang(); - $this->info_global_attr['lang'] = new HTMLPurifier_AttrDef_Lang(); - - // setup allowed elements, obsoleted by Modules? (does offer - // different functionality) + // setup allowed elements, SubtractiveWhitelist module $allowed_elements = $config->get('HTML', 'AllowedElements'); if (is_array($allowed_elements)) { foreach ($this->info as $name => $d) { diff --git a/library/HTMLPurifier/HTMLModule.php b/library/HTMLPurifier/HTMLModule.php index 74a08e05..168010ae 100644 --- a/library/HTMLPurifier/HTMLModule.php +++ b/library/HTMLPurifier/HTMLModule.php @@ -53,6 +53,24 @@ class HTMLPurifier_HTMLModule */ var $attr_collections = array(); + /** + * Associative array of deprecated tag name to HTMLPurifier_TagTransform + * @public + */ + var $info_tag_transform = array(); + + /** + * List of HTMLPurifier_AttrTransform to be performed before validation. + * @public + */ + var $info_attr_transform_pre = array(); + + /** + * List of HTMLPurifier_AttrTransform to be performed after validation. + * @public + */ + var $info_attr_transform_post = array(); + /** * Boolean flag that indicates whether or not getChildDef is implemented. * For optimization reasons: may save a call to a function. Be sure @@ -72,6 +90,7 @@ class HTMLPurifier_HTMLModule * @public */ function getChildDef($def) {return false;} + } ?> \ No newline at end of file diff --git a/library/HTMLPurifier/HTMLModule/Bdo.php b/library/HTMLPurifier/HTMLModule/Bdo.php index 26fd90e1..d7e13acc 100644 --- a/library/HTMLPurifier/HTMLModule/Bdo.php +++ b/library/HTMLPurifier/HTMLModule/Bdo.php @@ -13,13 +13,13 @@ class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule var $elements = array('bdo'); var $info = array(); var $content_sets = array('Inline' => 'bdo'); - var $attr_collections_info = array( + var $attr_collections = array( 'I18N' => array('dir' => false) ); function HTMLPurifier_HTMLModule_Bdo() { $dir = new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false); - $this->attr_collections_info['I18N']['dir'] = $dir; + $this->attr_collections['I18N']['dir'] = $dir; $this->info['bdo'] = new HTMLPurifier_ElementDef(); $this->info['bdo']->attr = array( 0 => array('Core', 'Lang'), @@ -33,7 +33,7 @@ class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule $this->info['bdo']->content_model = '#PCDATA | Inline'; $this->info['bdo']->content_model_type = 'optional'; // provides fallback behavior if dir's missing (dir is required) - $this->info['bdo']->attr_transform_post[] = + $this->info['bdo']->attr_transform_post['required-dir'] = new HTMLPurifier_AttrTransform_BdoDir(); } diff --git a/library/HTMLPurifier/HTMLModule/Text.php b/library/HTMLPurifier/HTMLModule/Text.php index 68900826..ad7bd8f2 100644 --- a/library/HTMLPurifier/HTMLModule/Text.php +++ b/library/HTMLPurifier/HTMLModule/Text.php @@ -45,7 +45,7 @@ class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule $this->info[$element]->content_model_type = 'empty'; } elseif ($element == 'blockquote') { $this->info[$element]->content_model = 'Heading | Block | List'; - $this->info[$element]->content_model_type = 'strictblockquote'; + $this->info[$element]->content_model_type = 'optional'; } elseif ($element == 'div') { $this->info[$element]->content_model = '#PCDATA | Flow'; $this->info[$element]->content_model_type = 'optional'; diff --git a/library/HTMLPurifier/HTMLModule/TransformToStrict.php b/library/HTMLPurifier/HTMLModule/TransformToStrict.php index a498eed4..f53a76e4 100644 --- a/library/HTMLPurifier/HTMLModule/TransformToStrict.php +++ b/library/HTMLPurifier/HTMLModule/TransformToStrict.php @@ -1,5 +1,7 @@ false, + 'menu' => false, + 'dir' => false, + 'center'=> false + ); + + var $attr_collections = array( + 'Lang' => array( + 'lang' => false // placeholder + ) + ); + + function HTMLPurifier_HTMLModule_TransformToStrict($config) { + // deprecated tag transforms + $this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font(); + $this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul'); + $this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul'); + $this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center(); + + foreach ($this->elements as $name) { + $this->info[$name] = new HTMLPurifier_ElementDef(); + $this->info[$name]->standalone = false; + } + + // deprecated attribute transforms + $this->info['h1']->attr_transform_pre['align'] = + $this->info['h2']->attr_transform_pre['align'] = + $this->info['h3']->attr_transform_pre['align'] = + $this->info['h4']->attr_transform_pre['align'] = + $this->info['h5']->attr_transform_pre['align'] = + $this->info['h6']->attr_transform_pre['align'] = + $this->info['p'] ->attr_transform_pre['align'] = + new HTMLPurifier_AttrTransform_TextAlign(); + + // xml:lang <=> lang mirroring, implement in TransformToStrict, + // this is overridden in TransformToXHTML11 + $this->info_attr_transform_post['lang'] = new HTMLPurifier_AttrTransform_Lang(); + $this->attr_collections['Lang']['lang'] = new HTMLPurifier_AttrDef_Lang(); + + if ($config->get('HTML', 'Strict')) { + $this->info['blockquote']->content_model_type = 'strictblockquote'; + $this->info['blockquote']->child = false; // recalculate please! + } + + } + + var $defines_child_def = true; + function getChildDef($def) { + if ($def->content_model_type != 'strictblockquote') return false; + return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model); + } } diff --git a/library/HTMLPurifier/Printer/HTMLDefinition.php b/library/HTMLPurifier/Printer/HTMLDefinition.php index 0b704c0d..ff172e84 100644 --- a/library/HTMLPurifier/Printer/HTMLDefinition.php +++ b/library/HTMLPurifier/Printer/HTMLDefinition.php @@ -130,7 +130,9 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer $elements = array(); $attr = array(); if (isset($def->elements)) { - if ($def->type == 'strictblockquote') $def->validateChildren(array(), $this->config, $context); + if ($def->type == 'strictblockquote') { + $def->validateChildren(array(), $this->config, $context); + } $elements = $def->elements; } elseif ($def->type == 'chameleon') { $attr['rowspan'] = 2;