diff --git a/library/HTMLPurifier/AttrCollections.php b/library/HTMLPurifier/AttrCollections.php index 455e50bf..8efb1931 100644 --- a/library/HTMLPurifier/AttrCollections.php +++ b/library/HTMLPurifier/AttrCollections.php @@ -107,6 +107,10 @@ class HTMLPurifier_AttrCollections foreach ($attr as $def_i => $def) { if ($def_i === 0) continue; if (!is_string($def)) continue; + if ($def === false) { + unset($attr[$def_i]); + continue; + } if (isset($attr_types->info[$def])) { $attr[$def_i] = $attr_types->info[$def]; } else { diff --git a/library/HTMLPurifier/AttrTypes.php b/library/HTMLPurifier/AttrTypes.php index 7b7f4ca9..c942c856 100644 --- a/library/HTMLPurifier/AttrTypes.php +++ b/library/HTMLPurifier/AttrTypes.php @@ -4,6 +4,10 @@ require_once 'HTMLPurifier/AttrDef/Nmtokens.php'; require_once 'HTMLPurifier/AttrDef/Text.php'; require_once 'HTMLPurifier/AttrDef/ID.php'; require_once 'HTMLPurifier/AttrDef/URI.php'; +require_once 'HTMLPurifier/AttrDef/Pixels.php'; +require_once 'HTMLPurifier/AttrDef/Length.php'; +require_once 'HTMLPurifier/AttrDef/MultiLength.php'; +require_once 'HTMLPurifier/AttrDef/Integer.php'; /** * Provides lookup array of attribute types to HTMLPurifier_AttrDef objects diff --git a/library/HTMLPurifier/ChildDef/Required.php b/library/HTMLPurifier/ChildDef/Required.php index 16ba5e95..c6f706e2 100644 --- a/library/HTMLPurifier/ChildDef/Required.php +++ b/library/HTMLPurifier/ChildDef/Required.php @@ -20,10 +20,13 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef $elements = str_replace(' ', '', $elements); $elements = explode('|', $elements); } - $elements = array_flip($elements); - foreach ($elements as $i => $x) { - $elements[$i] = true; - if (empty($i)) unset($elements[$i]); + $keys = array_keys($elements); + if ($keys == array_keys($keys)) { + $elements = array_flip($elements); + foreach ($elements as $i => $x) { + $elements[$i] = true; + if (empty($i)) unset($elements[$i]); + } } $this->elements = $elements; $this->gen = new HTMLPurifier_Generator(); diff --git a/library/HTMLPurifier/Config.php b/library/HTMLPurifier/Config.php index e71c003a..3e2b09a7 100644 --- a/library/HTMLPurifier/Config.php +++ b/library/HTMLPurifier/Config.php @@ -152,20 +152,32 @@ class HTMLPurifier_Config } /** - * Retrieves a copy of the HTML definition. + * Retrieves reference to the HTML definition. + * @param $raw Return a copy that has not been setup yet. Must be + * called before it's been setup, otherwise won't work. */ - function getHTMLDefinition() { + function &getHTMLDefinition($raw = false) { if ($this->html_definition === null) { - $this->html_definition = new HTMLPurifier_HTMLDefinition(); + $this->html_definition = new HTMLPurifier_HTMLDefinition($this); + if ($raw) { + return $this->html_definition; // no setup! + } $this->html_definition->setup($this); } + if ($raw && $this->html_definition->setup) { + trigger_error('HTMLDefinition already setup, overwriting old '. + 'definition (set $config->definition manually to null '. + 'if this is desired behavior).', E_USER_NOTICE); + $this->html_definition = new HTMLPurifier_HTMLDefinition($this); + return $this->html_definition; + } return $this->html_definition; } /** - * Retrieves a copy of the CSS definition + * Retrieves reference to the CSS definition */ - function getCSSDefinition() { + function &getCSSDefinition() { if ($this->css_definition === null) { $this->css_definition = new HTMLPurifier_CSSDefinition(); $this->css_definition->setup($this); diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php index 1ad9bacb..6686455b 100644 --- a/library/HTMLPurifier/HTMLDefinition.php +++ b/library/HTMLPurifier/HTMLDefinition.php @@ -1,604 +1,6 @@ <blockquote>Foo</blockquote> '. - 'would become <blockquote><p>Foo</p></blockquote>. The '. - '<p> tags can be replaced '. - 'with whatever you desire, as long as it is a block level element. '. - 'This directive has been available since 1.3.0.' -); - -HTMLPurifier_ConfigSchema::define( - 'HTML', 'Parent', 'div', 'string', - 'String name of element that HTML fragment passed to library will be '. - 'inserted in. An interesting variation would be using span as the '. - 'parent element, meaning that only inline tags would be allowed. '. - 'This directive has been available since 1.3.0.' -); - -HTMLPurifier_ConfigSchema::define( - 'HTML', 'AllowedElements', null, 'lookup/null', - 'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '. - 'can overload it with your own list of tags to allow. Note that this '. - 'method is subtractive: it does its job by taking away from HTML Purifier '. - 'usual feature set, so you cannot add a tag that HTML Purifier never '. - 'supported in the first place (like embed, form or head). If you change this, you '. - 'probably also want to change %HTML.AllowedAttributes. '. - 'Warning: If another directive conflicts with the '. - 'elements here, that directive will win and override. '. - 'This directive has been available since 1.3.0.' -); - -HTMLPurifier_ConfigSchema::define( - 'HTML', 'AllowedAttributes', null, 'lookup/null', - 'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '. - 'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '. - '(style, id, class, dir, lang, xml:lang).'. - 'Warning: If another directive conflicts with the '. - 'elements here, that directive will win and override. For '. - 'example, %HTML.EnableAttrID will take precedence over *.id in this '. - 'directive. You must set that directive to true before you can use '. - 'IDs at all. This directive has been available since 1.3.0.' -); - -HTMLPurifier_ConfigSchema::define( - 'Attr', 'DisableURI', false, 'bool', - 'Disables all URIs in all forms. Not sure why you\'d want to do that '. - '(after all, the Internet\'s founded on the notion of a hyperlink). '. - 'This directive has been available since 1.3.0.' -); - -/** - * Definition of the purified HTML that describes allowed children, - * attributes, and many other things. - * - * Conventions: - * - * All member variables that are prefixed with info - * (including the main $info array) are used by HTML Purifier internals - * and should not be directly edited when customizing the HTMLDefinition. - * They can usually be set via configuration directives or custom - * modules. - * - * On the other hand, member variables without the info prefix are used - * internally by the HTMLDefinition and MUST NOT be used by other HTML - * Purifier internals. Many of them, however, are public, and may be - * edited by userspace code to tweak the behavior of HTMLDefinition. - * In practice, there will not be too many of them. - * - * HTMLPurifier_Printer_HTMLDefinition is a notable exception to this - * rule: in the interest of comprehensiveness, it will sniff everything. - */ - -class HTMLPurifier_HTMLDefinition -{ - - /** - * Associative array of element names to HTMLPurifier_ElementDef - * @public - */ - var $info = array(); - - /** - * Associative array of global attribute name to attribute definition. - * @public - */ - var $info_global_attr = array(); - - /** - * String name of parent element HTML will be going into. - * @public - */ - var $info_parent = 'div'; - - /** - * Definition for parent element, allows parent element to be a - * tag that's not allowed inside the HTML fragment. - * @public - */ - var $info_parent_def; - - /** - * String name of element used to wrap inline elements in block context - * @note This is rarely used except for BLOCKQUOTEs in strict mode - * @public - */ - var $info_block_wrapper = 'p'; - - /** - * Associative array of deprecated tag name to HTMLPurifier_TagTransform - * @public - */ - var $info_tag_transform = array(); - - /** - * List of HTMLPurifier_AttrTransform to be performed before validation. - * @public - */ - var $info_attr_transform_pre = array(); - - /** - * List of HTMLPurifier_AttrTransform to be performed after validation. - * @public - */ - var $info_attr_transform_post = array(); - - /** - * Nested lookup array of content set name (Block, Inline) to - * element name to whether or not it belongs in that content set. - * @public - */ - var $info_content_sets = array(); - - /** - * Boolean is a strict definition? - * @public - */ - var $strict; - - /** - * Initializes the definition, the meat of the class. - */ - function setup($config) { - - // some cached config values - $this->strict = $config->get('HTML', 'Strict'); - - ////////////////////////////////////////////////////////////////////// - // info[] : initializes the definition objects - - // if you attempt to define rules later on for a tag not in this array - // PHP will create an stdclass - - $allowed_tags = - array( - 'ins', 'del', 'blockquote', 'dd', 'li', 'div', 'em', 'strong', - 'dfn', 'code', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym', - 'q', 'sub', 'tt', 'sup', 'i', 'b', 'big', 'small', - 'bdo', 'span', 'dt', 'p', 'h1', 'h2', 'h3', 'h4', - 'h5', 'h6', 'ol', 'ul', 'dl', 'address', 'img', 'br', 'hr', - 'pre', 'a', 'table', 'caption', 'thead', 'tfoot', 'tbody', - 'colgroup', 'col', 'td', 'th', 'tr' - ); - - if (!$this->strict) { - $allowed_tags[] = 'u'; - $allowed_tags[] = 's'; - $allowed_tags[] = 'strike'; - } - - foreach ($allowed_tags as $tag) { - $this->info[$tag] = new HTMLPurifier_ElementDef(); - } - - ////////////////////////////////////////////////////////////////////// - // info[]->child : defines allowed children for elements - - // emulates the structure of the DTD - // however, these are condensed, with bad stuff taken out - // screening process was done by hand - - // entities: prefixed with e_ and _ replaces . from DTD - // double underlines are entities we made up - - // we don't use an array because that complicates interpolation - // strings are used instead of arrays because if you use arrays, - // you have to do some hideous manipulation with array_merge() - - // ALL ELEMENTS, regardless of whether or not they're allowed, - // are defined here. $allowed_tags then determines what to - // ignore - - $e_special_extra = 'object | applet | img | map | iframe'; - $e_special_basic = 'br | span | bdo'; - $e_special = "$e_special_basic | $e_special_extra"; - $e_fontstyle_extra = 'big | small | font | basefont'; - $e_fontstyle_basic = 'tt | i | b | u | s | strike'; - $e_fontstyle = "$e_fontstyle_basic | $e_fontstyle_extra"; - $e_phrase_extra = 'sub | sup'; - $e_phrase_basic = 'em | strong | dfn | code | q | samp | kbd | var'. - ' | cite | abbr | acronym'; - $e_phrase = "$e_phrase_basic | $e_phrase_extra"; - $e_inline_forms = 'input | select | textarea | label | button'; - $e_misc_inline = 'ins | del | script'; - $e_misc = "noscript | $e_misc_inline"; - $e_inline = "a | $e_special | $e_fontstyle | $e_phrase | $e_inline_forms"; - // pseudo-property we created for convenience, see later on - $e__inline = "#PCDATA | $e_inline | $e_misc_inline"; - // note the casing - $e_Inline = new HTMLPurifier_ChildDef_Optional($e__inline); - $e_heading = 'h1|h2|h3|h4|h5|h6'; - $e_lists = 'ul | ol | dl | menu | dir'; - $e_blocktext = 'pre | hr | blockquote | address | center | noframes'; - $e_block = "p | $e_heading | div | $e_lists | $e_blocktext | isindex | fieldset | table"; - $e_Block = new HTMLPurifier_ChildDef_Optional($e_block); - $e__flow = "#PCDATA | $e_block | form | $e_inline | $e_misc"; - $e_Flow = new HTMLPurifier_ChildDef_Optional($e__flow); - $e_form_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | $e_block | $e_inline | $e_misc");//unused - $e_form_button_content = new HTMLPurifier_ChildDef_Optional( - "#PCDATA | p | $e_heading | div | $e_lists | $e_blocktext |". - "table | br | span | bdo | object | applet | img | map |". - "$e_fontstyle | $e_phrase | $e_misc");//unused - - $this->info['ins']->child = - $this->info['del']->child = - new HTMLPurifier_ChildDef_Chameleon($e__inline, $e__flow); - - $this->info['dd']->child = - $this->info['li']->child = - $this->info['div']->child = $e_Flow; - - if ($this->strict) { - $this->info['blockquote']->child = new HTMLPurifier_ChildDef_StrictBlockquote($e_block); - } else { - $this->info['blockquote']->child = $e_Flow; - } - - $this->info['caption']->child = - $this->info['em']->child = - $this->info['strong']->child = - $this->info['dfn']->child = - $this->info['code']->child = - $this->info['samp']->child = - $this->info['kbd']->child = - $this->info['var']->child = - $this->info['cite']->child = - $this->info['abbr']->child = - $this->info['acronym']->child = - $this->info['q']->child = - $this->info['sub']->child = - $this->info['tt']->child = - $this->info['sup']->child = - $this->info['i']->child = - $this->info['b']->child = - $this->info['big']->child = - $this->info['small']->child= - $this->info['bdo']->child = - $this->info['span']->child = - $this->info['dt']->child = - $this->info['p']->child = - $this->info['h1']->child = - $this->info['h2']->child = - $this->info['h3']->child = - $this->info['h4']->child = - $this->info['h5']->child = - $this->info['h6']->child = $e_Inline; - - if (!$this->strict) { - $this->info['u']->child = - $this->info['s']->child = - $this->info['strike']->child = $e_Inline; - } - - // the only three required definitions, besides custom table code - $this->info['ol']->child = - $this->info['ul']->child = new HTMLPurifier_ChildDef_Required('li'); - - $this->info['dl']->child = new HTMLPurifier_ChildDef_Required('dt|dd'); - - if ($this->strict) { - $this->info['address']->child = $e_Inline; - } else { - $this->info['address']->child = - new HTMLPurifier_ChildDef_Optional("#PCDATA | p | $e_inline". - " | $e_misc_inline"); - } - - $this->info['img']->child = - $this->info['br']->child = - $this->info['hr']->child = new HTMLPurifier_ChildDef_Empty(); - - // exclusionary - $this->info['pre']->child = $e_Inline; - $this->info['a']->child = $e_Inline; - - $this->info['table']->child = new HTMLPurifier_ChildDef_Table(); - - // not a real entity, watch the double underscore - $e__row = new HTMLPurifier_ChildDef_Required('tr'); - $this->info['thead']->child = $e__row; - $this->info['tfoot']->child = $e__row; - $this->info['tbody']->child = $e__row; - $this->info['colgroup']->child = new HTMLPurifier_ChildDef_Optional('col'); - $this->info['col']->child = new HTMLPurifier_ChildDef_Empty(); - $this->info['tr']->child = new HTMLPurifier_ChildDef_Required('th | td'); - $this->info['th']->child = $e_Flow; - $this->info['td']->child = $e_Flow; - - ////////////////////////////////////////////////////////////////////// - // misc compat stuff with XHTMLDefinition - - foreach ($this->info as $key => $def) { - if ($this->info[$key]->child == $e_Inline) { - $this->info[$key]->descendants_are_inline = true; - } - } - - foreach ($e_Flow->elements as $name => $bool) { - $this->info_content_sets['Flow'][$name] = true; - } - - ////////////////////////////////////////////////////////////////////// - // info[]->excludes : defines elements that aren't allowed in here - - // make sure you test using isset() and not !empty() - - $this->info['a']->excludes = array('a' => true); - $this->info['pre']->excludes = array_flip(array('img', 'big', 'small', - // technically useless, but good to be indepth - 'object', 'applet', 'font', 'basefont')); - - ////////////////////////////////////////////////////////////////////// - // info[]->attr : defines allowed attributes for elements - - // this doesn't include REQUIRED declarations, those are handled - // by the transform classes. It will, however, do simple and slightly - // complex attribute value substitution - - // the question of varying allowed attributes is more entangling. - - $e_Text = new HTMLPurifier_AttrDef_Text(); - - // attrs, included in almost every single one except for a few, - // which manually override these in their local definitions - $this->info_global_attr = array( - // core attrs - 'class' => new HTMLPurifier_AttrDef_Nmtokens(), - 'title' => $e_Text, - 'style' => new HTMLPurifier_AttrDef_CSS(), - // i18n - 'dir' => new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false), - 'lang' => new HTMLPurifier_AttrDef_Lang(), - 'xml:lang' => new HTMLPurifier_AttrDef_Lang(), - ); - - if ($config->get('HTML', 'EnableAttrID')) { - $this->info_global_attr['id'] = new HTMLPurifier_AttrDef_ID(); - } - - // required attribute stipulation handled in attribute transformation - $this->info['bdo']->attr = array(); // nothing else - - $this->info['br']->attr['dir'] = false; - $this->info['br']->attr['lang'] = false; - $this->info['br']->attr['xml:lang'] = false; - - $this->info['td']->attr['abbr'] = $e_Text; - $this->info['th']->attr['abbr'] = $e_Text; - - $this->setAttrForTableElements('align', new HTMLPurifier_AttrDef_Enum( - array('left', 'center', 'right', 'justify', 'char'), false)); - - $this->setAttrForTableElements('valign', new HTMLPurifier_AttrDef_Enum( - array('top', 'middle', 'bottom', 'baseline'), false)); - - $this->info['img']->attr['alt'] = $e_Text; - - $e_TFrame = new HTMLPurifier_AttrDef_Enum(array('void', 'above', - 'below', 'hsides', 'lhs', 'rhs', 'vsides', 'box', 'border'), false); - $this->info['table']->attr['frame'] = $e_TFrame; - - $e_TRules = new HTMLPurifier_AttrDef_Enum(array('none', 'groups', - 'rows', 'cols', 'all'), false); - $this->info['table']->attr['rules'] = $e_TRules; - - $this->info['table']->attr['summary'] = $e_Text; - - $this->info['table']->attr['border'] = - new HTMLPurifier_AttrDef_Pixels(); - - $e_Length = new HTMLPurifier_AttrDef_Length(); - $this->info['table']->attr['cellpadding'] = - $this->info['table']->attr['cellspacing'] = - $this->info['table']->attr['width'] = - $this->info['img']->attr['height'] = - $this->info['img']->attr['width'] = $e_Length; - $this->setAttrForTableElements('charoff', $e_Length); - - $e_MultiLength = new HTMLPurifier_AttrDef_MultiLength(); - $this->info['col']->attr['width'] = - $this->info['colgroup']->attr['width'] = $e_MultiLength; - - $e__NumberSpan = new HTMLPurifier_AttrDef_Integer(false, false, true); - $this->info['colgroup']->attr['span'] = - $this->info['col']->attr['span'] = - $this->info['td']->attr['rowspan'] = - $this->info['th']->attr['rowspan'] = - $this->info['td']->attr['colspan'] = - $this->info['th']->attr['colspan'] = $e__NumberSpan; - - if (!$config->get('Attr', 'DisableURI')) { - $e_URI = new HTMLPurifier_AttrDef_URI(); - $this->info['a']->attr['href'] = - $this->info['img']->attr['longdesc'] = - $this->info['del']->attr['cite'] = - $this->info['ins']->attr['cite'] = - $this->info['blockquote']->attr['cite'] = - $this->info['q']->attr['cite'] = $e_URI; - - // URI that causes HTTP request - $this->info['img']->attr['src'] = new HTMLPurifier_AttrDef_URI(true); - } - - if (!$this->strict) { - $this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer(); - $this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer(); - } - - ////////////////////////////////////////////////////////////////////// - // info_tag_transform : transformations of tags - - $this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font(); - $this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul'); - $this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul'); - $this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center(); - - ////////////////////////////////////////////////////////////////////// - // info[]->auto_close : tags that automatically close another - - // todo: determine whether or not SGML-like modeling based on - // mandatory/optional end tags would be a better policy - - // make sure you test using isset() not !empty() - - // these are all block elements: blocks aren't allowed in P - $this->info['p']->auto_close = array_flip(array( - 'address', 'blockquote', 'dd', 'dir', 'div', 'dl', 'dt', - 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'ol', 'p', 'pre', - 'table', 'ul' - )); - - $this->info['li']->auto_close = array('li' => true); - - // we need TABLE and heading mismatch code - // we may need to make this more flexible for heading mismatch, - // or we can just create another info - - ////////////////////////////////////////////////////////////////////// - // info[]->attr_transform_* : attribute transformations in elements - // pre is applied before any validation is done, post is done after - - $this->info['h1']->attr_transform_pre[] = - $this->info['h2']->attr_transform_pre[] = - $this->info['h3']->attr_transform_pre[] = - $this->info['h4']->attr_transform_pre[] = - $this->info['h5']->attr_transform_pre[] = - $this->info['h6']->attr_transform_pre[] = - $this->info['p'] ->attr_transform_pre[] = - new HTMLPurifier_AttrTransform_TextAlign(); - - $this->info['bdo']->attr_transform_post[] = - new HTMLPurifier_AttrTransform_BdoDir(); - - $this->info['img']->attr_transform_post[] = - new HTMLPurifier_AttrTransform_ImgRequired(); - - ////////////////////////////////////////////////////////////////////// - // info_attr_transform_* : global attribute transformation that is - // unconditionally called. Good for transformations that have complex - // start conditions - // pre is applied before any validation is done, post is done after - - $this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang(); - - // protect against stdclasses floating around - foreach ($this->info as $key => $obj) { - if (is_a($obj, 'stdclass')) { - unset($this->info[$key]); - } - } - - ////////////////////////////////////////////////////////////////////// - // info_block_wrapper : wraps inline elements in block context - - $block_wrapper = $config->get('HTML', 'BlockWrapper'); - if (isset($e_Block->elements[$block_wrapper])) { - $this->info_block_wrapper = $block_wrapper; - } else { - trigger_error('Cannot use non-block element as block wrapper.', - E_USER_ERROR); - } - - ////////////////////////////////////////////////////////////////////// - // info_parent : parent element of the HTML fragment - - $parent = $config->get('HTML', 'Parent'); - if (isset($this->info[$parent])) { - $this->info_parent = $parent; - } else { - trigger_error('Cannot use unrecognized element as parent.', - E_USER_ERROR); - } - $this->info_parent_def = $this->info[$this->info_parent]; - - ////////////////////////////////////////////////////////////////////// - // %HTML.Allowed(Elements|Attributes) : cut non-allowed elements - - $allowed_elements = $config->get('HTML', 'AllowedElements'); - if (is_array($allowed_elements)) { - foreach ($this->info as $name => $d) { - if(!isset($allowed_elements[$name])) unset($this->info[$name]); - } - } - $allowed_attributes = $config->get('HTML', 'AllowedAttributes'); - if (is_array($allowed_attributes)) { - foreach ($this->info_global_attr as $attr_key => $info) { - if (!isset($allowed_attributes["*.$attr_key"])) { - unset($this->info_global_attr[$attr_key]); - } - } - foreach ($this->info as $tag => $info) { - foreach ($info->attr as $attr => $attr_info) { - if (!isset($allowed_attributes["$tag.$attr"])) { - unset($this->info[$tag]->attr[$attr]); - } - } - } - } - } - - function setAttrForTableElements($attr, $def) { - $this->info['col']->attr[$attr] = - $this->info['colgroup']->attr[$attr] = - $this->info['tbody']->attr[$attr] = - $this->info['td']->attr[$attr] = - $this->info['tfoot']->attr[$attr] = - $this->info['th']->attr[$attr] = - $this->info['thead']->attr[$attr] = - $this->info['tr']->attr[$attr] = $def; - } - -} +require_once 'HTMLPurifier/XHTMLDefinition.php'; /** * Structure that stores an element definition. diff --git a/library/HTMLPurifier/XHTMLDefinition.php b/library/HTMLPurifier/XHTMLDefinition.php index 4b588d15..2f4b18ca 100644 --- a/library/HTMLPurifier/XHTMLDefinition.php +++ b/library/HTMLPurifier/XHTMLDefinition.php @@ -1,7 +1,5 @@ <blockquote>Foo</blockquote> '. + 'would become <blockquote><p>Foo</p></blockquote>. The '. + '<p> tags can be replaced '. + 'with whatever you desire, as long as it is a block level element. '. + 'This directive has been available since 1.3.0.' +); + +HTMLPurifier_ConfigSchema::define( + 'HTML', 'Parent', 'div', 'string', + 'String name of element that HTML fragment passed to library will be '. + 'inserted in. An interesting variation would be using span as the '. + 'parent element, meaning that only inline tags would be allowed. '. + 'This directive has been available since 1.3.0.' +); + +HTMLPurifier_ConfigSchema::define( + 'HTML', 'AllowedElements', null, 'lookup/null', + 'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '. + 'can overload it with your own list of tags to allow. Note that this '. + 'method is subtractive: it does its job by taking away from HTML Purifier '. + 'usual feature set, so you cannot add a tag that HTML Purifier never '. + 'supported in the first place (like embed, form or head). If you change this, you '. + 'probably also want to change %HTML.AllowedAttributes. '. + 'Warning: If another directive conflicts with the '. + 'elements here, that directive will win and override. '. + 'This directive has been available since 1.3.0.' +); + +HTMLPurifier_ConfigSchema::define( + 'HTML', 'AllowedAttributes', null, 'lookup/null', + 'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '. + 'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '. + '(style, id, class, dir, lang, xml:lang).'. + 'Warning: If another directive conflicts with the '. + 'elements here, that directive will win and override. For '. + 'example, %HTML.EnableAttrID will take precedence over *.id in this '. + 'directive. You must set that directive to true before you can use '. + 'IDs at all. This directive has been available since 1.3.0.' +); + +HTMLPurifier_ConfigSchema::define( + 'Attr', 'DisableURI', false, 'bool', + 'Disables all URIs in all forms. Not sure why you\'d want to do that '. + '(after all, the Internet\'s founded on the notion of a hyperlink). '. + 'This directive has been available since 1.3.0.' +); + /** - * Next-generation HTML definition that will supplant HTMLPurifier_HTMLDefinition + * Definition of the purified HTML that describes allowed children, + * attributes, and many other things. + * + * @note This is the next-gen definition that will be renamed to + * HTMLDefinition soon! + * + * Conventions: + * + * All member variables that are prefixed with info + * (including the main $info array) are used by HTML Purifier internals + * and should not be directly edited when customizing the HTMLDefinition. + * They can usually be set via configuration directives or custom + * modules. + * + * On the other hand, member variables without the info prefix are used + * internally by the HTMLDefinition and MUST NOT be used by other HTML + * Purifier internals. Many of them, however, are public, and may be + * edited by userspace code to tweak the behavior of HTMLDefinition. + * In practice, there will not be too many of them. + * + * HTMLPurifier_Printer_HTMLDefinition is a notable exception to this + * rule: in the interest of comprehensiveness, it will sniff everything. */ -class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition +class HTMLPurifier_HTMLDefinition { + /** FULLY-PUBLIC VARIABLES */ + + /** + * Associative array of element names to HTMLPurifier_ElementDef + * @public + */ + var $info = array(); + + /** + * Associative array of global attribute name to attribute definition. + * @public + */ + var $info_global_attr = array(); + + /** + * String name of parent element HTML will be going into. + * @public + */ + var $info_parent = 'div'; + + /** + * Definition for parent element, allows parent element to be a + * tag that's not allowed inside the HTML fragment. + * @public + */ + var $info_parent_def; + + /** + * String name of element used to wrap inline elements in block context + * @note This is rarely used except for BLOCKQUOTEs in strict mode + * @public + */ + var $info_block_wrapper = 'p'; + + /** + * Associative array of deprecated tag name to HTMLPurifier_TagTransform + * @public + */ + var $info_tag_transform = array(); + + /** + * List of HTMLPurifier_AttrTransform to be performed before validation. + * @public + */ + var $info_attr_transform_pre = array(); + + /** + * List of HTMLPurifier_AttrTransform to be performed after validation. + * @public + */ + var $info_attr_transform_post = array(); + + /** + * Nested lookup array of content set name (Block, Inline) to + * element name to whether or not it belongs in that content set. + * @public + */ + var $info_content_sets = array(); + + + + /** PUBLIC BUT INTERNAL VARIABLES */ + + /** + * Boolean is a strict definition? + * @public + */ + var $strict; + /** * Array of HTMLPurifier_Module instances, indexed by module name * @public @@ -53,11 +225,23 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition */ var $attr_collections; + /** + * Is setup? + * @public + */ + var $setup = false; + + + /** * Performs low-cost, preliminary initialization. * @param $config Instance of HTMLPurifier_Config */ - function HTMLPurifier_XHTMLDefinition($config) { + function HTMLPurifier_HTMLDefinition($config) { + + // setup some cached config variables + // this will eventually influence module loading + $this->strict = $config->get('HTML', 'Strict'); $this->modules['Text'] = new HTMLPurifier_HTMLModule_Text(); $this->modules['Hypertext'] = new HTMLPurifier_HTMLModule_Hypertext(); @@ -72,8 +256,17 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition $this->attr_types = new HTMLPurifier_AttrTypes(); $this->attr_collections = new HTMLPurifier_AttrCollections(); + // some compat stuff, will be factored to modules + + // remove ID module + if (!$config->get('HTML', 'EnableAttrID')) { + $this->attr_collections->info['Core']['id'] = false; + } + } + + /** * Processes internals into form usable by HTMLPurifier internals. * Modifying the definition after calling this function should not @@ -82,6 +275,10 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition */ function setup($config) { + // multiple call guard + if ($this->setup) return; + $this->setup = true; + // perform attribute collection substitutions $this->attr_collections->setup($this->attr_types, $this->modules); @@ -153,6 +350,7 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition $this->setupAttrTransform($config); $this->setupBlockWrapper($config); $this->setupParent($config); + $this->setupCompat($config); } @@ -193,6 +391,116 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition $this->info_parent_def = $this->info[$this->info_parent]; } + /** + * Sets up compat code from HTMLDefinition that has not been + * delegated to modules yet + */ + function setupCompat($config) { + + $e_Inline = new HTMLPurifier_ChildDef_Optional( + $this->info_content_sets['Inline'] + + array('#PCDATA' => true)); + + // blockquote changes, implement in TransformStrict and Legacy + if ($this->strict) { + $this->info['blockquote']->child = + new HTMLPurifier_ChildDef_StrictBlockquote( + $this->info_content_sets['Block'] + + array('#PCDATA' => true)); + } else { + $this->info['blockquote']->child = + new HTMLPurifier_ChildDef_Optional( + $this->info_content_sets['Flow'] + + array('#PCDATA' => true)); + } + + // deprecated element definitions, implement in Legacy + if (!$this->strict) { + $this->info['u'] = + $this->info['s'] = + $this->info['strike'] = new HTMLPurifier_ElementDef(); + $this->info['u']->child = + $this->info['s']->child = + $this->info['strike']->child = $e_Inline; + $this->info['u']->descendants_are_inline = + $this->info['s']->descendants_are_inline = + $this->info['strike']->descendants_are_inline = true; + } + + // changed content model for loose, implement in Legacy + if ($this->strict) { + $this->info['address']->child = $e_Inline; + } else { + $this->info['address']->child = + new HTMLPurifier_ChildDef_Optional( + $this->info_content_sets['Inline'] + + array('#PCDATA' => true, 'p' => true)); + } + + // custom, not sure where to implement, because it's not + // just /one/ module + if ($config->get('Attr', 'DisableURI')) { + $this->info['a']->attr['href'] = + $this->info['img']->attr['longdesc'] = + $this->info['del']->attr['cite'] = + $this->info['ins']->attr['cite'] = + $this->info['blockquote']->attr['cite'] = + $this->info['q']->attr['cite'] = + $this->info['img']->attr['src'] = null; + } + + // deprecated attributes implementations, implement in Legacy + if (!$this->strict) { + $this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer(); + $this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer(); + } + + // deprecated elements transforms, implement in TransformToStrict + $this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font(); + $this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul'); + $this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul'); + $this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center(); + + // deprecated attribute transforms, implement in TransformToStrict + $this->info['h1']->attr_transform_pre[] = + $this->info['h2']->attr_transform_pre[] = + $this->info['h3']->attr_transform_pre[] = + $this->info['h4']->attr_transform_pre[] = + $this->info['h5']->attr_transform_pre[] = + $this->info['h6']->attr_transform_pre[] = + $this->info['p'] ->attr_transform_pre[] = + new HTMLPurifier_AttrTransform_TextAlign(); + + // xml:lang <=> lang mirroring, implement in TransformToStrict? + $this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang(); + $this->info_global_attr['lang'] = new HTMLPurifier_AttrDef_Lang(); + + // setup allowed elements, obsoleted by Modules? (does offer + // different functionality) + $allowed_elements = $config->get('HTML', 'AllowedElements'); + if (is_array($allowed_elements)) { + foreach ($this->info as $name => $d) { + if(!isset($allowed_elements[$name])) unset($this->info[$name]); + } + } + $allowed_attributes = $config->get('HTML', 'AllowedAttributes'); + if (is_array($allowed_attributes)) { + foreach ($this->info_global_attr as $attr_key => $info) { + if (!isset($allowed_attributes["*.$attr_key"])) { + unset($this->info_global_attr[$attr_key]); + } + } + foreach ($this->info as $tag => $info) { + foreach ($info->attr as $attr => $attr_info) { + if (!isset($allowed_attributes["$tag.$attr"])) { + unset($this->info[$tag]->attr[$attr]); + } + } + } + } + + } + /** * Instantiates a ChildDef based on content_model and content_model_type * member variables in HTMLPurifier_ElementDef