diff --git a/library/HTMLPurifier/AttrCollections.php b/library/HTMLPurifier/AttrCollections.php
index 455e50bf..8efb1931 100644
--- a/library/HTMLPurifier/AttrCollections.php
+++ b/library/HTMLPurifier/AttrCollections.php
@@ -107,6 +107,10 @@ class HTMLPurifier_AttrCollections
foreach ($attr as $def_i => $def) {
if ($def_i === 0) continue;
if (!is_string($def)) continue;
+ if ($def === false) {
+ unset($attr[$def_i]);
+ continue;
+ }
if (isset($attr_types->info[$def])) {
$attr[$def_i] = $attr_types->info[$def];
} else {
diff --git a/library/HTMLPurifier/AttrTypes.php b/library/HTMLPurifier/AttrTypes.php
index 7b7f4ca9..c942c856 100644
--- a/library/HTMLPurifier/AttrTypes.php
+++ b/library/HTMLPurifier/AttrTypes.php
@@ -4,6 +4,10 @@ require_once 'HTMLPurifier/AttrDef/Nmtokens.php';
require_once 'HTMLPurifier/AttrDef/Text.php';
require_once 'HTMLPurifier/AttrDef/ID.php';
require_once 'HTMLPurifier/AttrDef/URI.php';
+require_once 'HTMLPurifier/AttrDef/Pixels.php';
+require_once 'HTMLPurifier/AttrDef/Length.php';
+require_once 'HTMLPurifier/AttrDef/MultiLength.php';
+require_once 'HTMLPurifier/AttrDef/Integer.php';
/**
* Provides lookup array of attribute types to HTMLPurifier_AttrDef objects
diff --git a/library/HTMLPurifier/ChildDef/Required.php b/library/HTMLPurifier/ChildDef/Required.php
index 16ba5e95..c6f706e2 100644
--- a/library/HTMLPurifier/ChildDef/Required.php
+++ b/library/HTMLPurifier/ChildDef/Required.php
@@ -20,10 +20,13 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
$elements = str_replace(' ', '', $elements);
$elements = explode('|', $elements);
}
- $elements = array_flip($elements);
- foreach ($elements as $i => $x) {
- $elements[$i] = true;
- if (empty($i)) unset($elements[$i]);
+ $keys = array_keys($elements);
+ if ($keys == array_keys($keys)) {
+ $elements = array_flip($elements);
+ foreach ($elements as $i => $x) {
+ $elements[$i] = true;
+ if (empty($i)) unset($elements[$i]);
+ }
}
$this->elements = $elements;
$this->gen = new HTMLPurifier_Generator();
diff --git a/library/HTMLPurifier/Config.php b/library/HTMLPurifier/Config.php
index e71c003a..3e2b09a7 100644
--- a/library/HTMLPurifier/Config.php
+++ b/library/HTMLPurifier/Config.php
@@ -152,20 +152,32 @@ class HTMLPurifier_Config
}
/**
- * Retrieves a copy of the HTML definition.
+ * Retrieves reference to the HTML definition.
+ * @param $raw Return a copy that has not been setup yet. Must be
+ * called before it's been setup, otherwise won't work.
*/
- function getHTMLDefinition() {
+ function &getHTMLDefinition($raw = false) {
if ($this->html_definition === null) {
- $this->html_definition = new HTMLPurifier_HTMLDefinition();
+ $this->html_definition = new HTMLPurifier_HTMLDefinition($this);
+ if ($raw) {
+ return $this->html_definition; // no setup!
+ }
$this->html_definition->setup($this);
}
+ if ($raw && $this->html_definition->setup) {
+ trigger_error('HTMLDefinition already setup, overwriting old '.
+ 'definition (set $config->definition manually to null '.
+ 'if this is desired behavior).', E_USER_NOTICE);
+ $this->html_definition = new HTMLPurifier_HTMLDefinition($this);
+ return $this->html_definition;
+ }
return $this->html_definition;
}
/**
- * Retrieves a copy of the CSS definition
+ * Retrieves reference to the CSS definition
*/
- function getCSSDefinition() {
+ function &getCSSDefinition() {
if ($this->css_definition === null) {
$this->css_definition = new HTMLPurifier_CSSDefinition();
$this->css_definition->setup($this);
diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php
index 1ad9bacb..6686455b 100644
--- a/library/HTMLPurifier/HTMLDefinition.php
+++ b/library/HTMLPurifier/HTMLDefinition.php
@@ -1,604 +1,6 @@
<blockquote>Foo</blockquote> '.
- 'would become <blockquote><p>Foo</p></blockquote>
. The '.
- '<p>
tags can be replaced '.
- 'with whatever you desire, as long as it is a block level element. '.
- 'This directive has been available since 1.3.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'Parent', 'div', 'string',
- 'String name of element that HTML fragment passed to library will be '.
- 'inserted in. An interesting variation would be using span as the '.
- 'parent element, meaning that only inline tags would be allowed. '.
- 'This directive has been available since 1.3.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'AllowedElements', null, 'lookup/null',
- 'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '.
- 'can overload it with your own list of tags to allow. Note that this '.
- 'method is subtractive: it does its job by taking away from HTML Purifier '.
- 'usual feature set, so you cannot add a tag that HTML Purifier never '.
- 'supported in the first place (like embed, form or head). If you change this, you '.
- 'probably also want to change %HTML.AllowedAttributes. '.
- 'Warning: If another directive conflicts with the '.
- 'elements here, that directive will win and override. '.
- 'This directive has been available since 1.3.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'AllowedAttributes', null, 'lookup/null',
- 'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '.
- 'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '.
- '(style, id, class, dir, lang, xml:lang).'.
- 'Warning: If another directive conflicts with the '.
- 'elements here, that directive will win and override. For '.
- 'example, %HTML.EnableAttrID will take precedence over *.id in this '.
- 'directive. You must set that directive to true before you can use '.
- 'IDs at all. This directive has been available since 1.3.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'Attr', 'DisableURI', false, 'bool',
- 'Disables all URIs in all forms. Not sure why you\'d want to do that '.
- '(after all, the Internet\'s founded on the notion of a hyperlink). '.
- 'This directive has been available since 1.3.0.'
-);
-
-/**
- * Definition of the purified HTML that describes allowed children,
- * attributes, and many other things.
- *
- * Conventions:
- *
- * All member variables that are prefixed with info
- * (including the main $info array) are used by HTML Purifier internals
- * and should not be directly edited when customizing the HTMLDefinition.
- * They can usually be set via configuration directives or custom
- * modules.
- *
- * On the other hand, member variables without the info prefix are used
- * internally by the HTMLDefinition and MUST NOT be used by other HTML
- * Purifier internals. Many of them, however, are public, and may be
- * edited by userspace code to tweak the behavior of HTMLDefinition.
- * In practice, there will not be too many of them.
- *
- * HTMLPurifier_Printer_HTMLDefinition is a notable exception to this
- * rule: in the interest of comprehensiveness, it will sniff everything.
- */
-
-class HTMLPurifier_HTMLDefinition
-{
-
- /**
- * Associative array of element names to HTMLPurifier_ElementDef
- * @public
- */
- var $info = array();
-
- /**
- * Associative array of global attribute name to attribute definition.
- * @public
- */
- var $info_global_attr = array();
-
- /**
- * String name of parent element HTML will be going into.
- * @public
- */
- var $info_parent = 'div';
-
- /**
- * Definition for parent element, allows parent element to be a
- * tag that's not allowed inside the HTML fragment.
- * @public
- */
- var $info_parent_def;
-
- /**
- * String name of element used to wrap inline elements in block context
- * @note This is rarely used except for BLOCKQUOTEs in strict mode
- * @public
- */
- var $info_block_wrapper = 'p';
-
- /**
- * Associative array of deprecated tag name to HTMLPurifier_TagTransform
- * @public
- */
- var $info_tag_transform = array();
-
- /**
- * List of HTMLPurifier_AttrTransform to be performed before validation.
- * @public
- */
- var $info_attr_transform_pre = array();
-
- /**
- * List of HTMLPurifier_AttrTransform to be performed after validation.
- * @public
- */
- var $info_attr_transform_post = array();
-
- /**
- * Nested lookup array of content set name (Block, Inline) to
- * element name to whether or not it belongs in that content set.
- * @public
- */
- var $info_content_sets = array();
-
- /**
- * Boolean is a strict definition?
- * @public
- */
- var $strict;
-
- /**
- * Initializes the definition, the meat of the class.
- */
- function setup($config) {
-
- // some cached config values
- $this->strict = $config->get('HTML', 'Strict');
-
- //////////////////////////////////////////////////////////////////////
- // info[] : initializes the definition objects
-
- // if you attempt to define rules later on for a tag not in this array
- // PHP will create an stdclass
-
- $allowed_tags =
- array(
- 'ins', 'del', 'blockquote', 'dd', 'li', 'div', 'em', 'strong',
- 'dfn', 'code', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym',
- 'q', 'sub', 'tt', 'sup', 'i', 'b', 'big', 'small',
- 'bdo', 'span', 'dt', 'p', 'h1', 'h2', 'h3', 'h4',
- 'h5', 'h6', 'ol', 'ul', 'dl', 'address', 'img', 'br', 'hr',
- 'pre', 'a', 'table', 'caption', 'thead', 'tfoot', 'tbody',
- 'colgroup', 'col', 'td', 'th', 'tr'
- );
-
- if (!$this->strict) {
- $allowed_tags[] = 'u';
- $allowed_tags[] = 's';
- $allowed_tags[] = 'strike';
- }
-
- foreach ($allowed_tags as $tag) {
- $this->info[$tag] = new HTMLPurifier_ElementDef();
- }
-
- //////////////////////////////////////////////////////////////////////
- // info[]->child : defines allowed children for elements
-
- // emulates the structure of the DTD
- // however, these are condensed, with bad stuff taken out
- // screening process was done by hand
-
- // entities: prefixed with e_ and _ replaces . from DTD
- // double underlines are entities we made up
-
- // we don't use an array because that complicates interpolation
- // strings are used instead of arrays because if you use arrays,
- // you have to do some hideous manipulation with array_merge()
-
- // ALL ELEMENTS, regardless of whether or not they're allowed,
- // are defined here. $allowed_tags then determines what to
- // ignore
-
- $e_special_extra = 'object | applet | img | map | iframe';
- $e_special_basic = 'br | span | bdo';
- $e_special = "$e_special_basic | $e_special_extra";
- $e_fontstyle_extra = 'big | small | font | basefont';
- $e_fontstyle_basic = 'tt | i | b | u | s | strike';
- $e_fontstyle = "$e_fontstyle_basic | $e_fontstyle_extra";
- $e_phrase_extra = 'sub | sup';
- $e_phrase_basic = 'em | strong | dfn | code | q | samp | kbd | var'.
- ' | cite | abbr | acronym';
- $e_phrase = "$e_phrase_basic | $e_phrase_extra";
- $e_inline_forms = 'input | select | textarea | label | button';
- $e_misc_inline = 'ins | del | script';
- $e_misc = "noscript | $e_misc_inline";
- $e_inline = "a | $e_special | $e_fontstyle | $e_phrase | $e_inline_forms";
- // pseudo-property we created for convenience, see later on
- $e__inline = "#PCDATA | $e_inline | $e_misc_inline";
- // note the casing
- $e_Inline = new HTMLPurifier_ChildDef_Optional($e__inline);
- $e_heading = 'h1|h2|h3|h4|h5|h6';
- $e_lists = 'ul | ol | dl | menu | dir';
- $e_blocktext = 'pre | hr | blockquote | address | center | noframes';
- $e_block = "p | $e_heading | div | $e_lists | $e_blocktext | isindex | fieldset | table";
- $e_Block = new HTMLPurifier_ChildDef_Optional($e_block);
- $e__flow = "#PCDATA | $e_block | form | $e_inline | $e_misc";
- $e_Flow = new HTMLPurifier_ChildDef_Optional($e__flow);
- $e_form_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | $e_block | $e_inline | $e_misc");//unused
- $e_form_button_content = new HTMLPurifier_ChildDef_Optional(
- "#PCDATA | p | $e_heading | div | $e_lists | $e_blocktext |".
- "table | br | span | bdo | object | applet | img | map |".
- "$e_fontstyle | $e_phrase | $e_misc");//unused
-
- $this->info['ins']->child =
- $this->info['del']->child =
- new HTMLPurifier_ChildDef_Chameleon($e__inline, $e__flow);
-
- $this->info['dd']->child =
- $this->info['li']->child =
- $this->info['div']->child = $e_Flow;
-
- if ($this->strict) {
- $this->info['blockquote']->child = new HTMLPurifier_ChildDef_StrictBlockquote($e_block);
- } else {
- $this->info['blockquote']->child = $e_Flow;
- }
-
- $this->info['caption']->child =
- $this->info['em']->child =
- $this->info['strong']->child =
- $this->info['dfn']->child =
- $this->info['code']->child =
- $this->info['samp']->child =
- $this->info['kbd']->child =
- $this->info['var']->child =
- $this->info['cite']->child =
- $this->info['abbr']->child =
- $this->info['acronym']->child =
- $this->info['q']->child =
- $this->info['sub']->child =
- $this->info['tt']->child =
- $this->info['sup']->child =
- $this->info['i']->child =
- $this->info['b']->child =
- $this->info['big']->child =
- $this->info['small']->child=
- $this->info['bdo']->child =
- $this->info['span']->child =
- $this->info['dt']->child =
- $this->info['p']->child =
- $this->info['h1']->child =
- $this->info['h2']->child =
- $this->info['h3']->child =
- $this->info['h4']->child =
- $this->info['h5']->child =
- $this->info['h6']->child = $e_Inline;
-
- if (!$this->strict) {
- $this->info['u']->child =
- $this->info['s']->child =
- $this->info['strike']->child = $e_Inline;
- }
-
- // the only three required definitions, besides custom table code
- $this->info['ol']->child =
- $this->info['ul']->child = new HTMLPurifier_ChildDef_Required('li');
-
- $this->info['dl']->child = new HTMLPurifier_ChildDef_Required('dt|dd');
-
- if ($this->strict) {
- $this->info['address']->child = $e_Inline;
- } else {
- $this->info['address']->child =
- new HTMLPurifier_ChildDef_Optional("#PCDATA | p | $e_inline".
- " | $e_misc_inline");
- }
-
- $this->info['img']->child =
- $this->info['br']->child =
- $this->info['hr']->child = new HTMLPurifier_ChildDef_Empty();
-
- // exclusionary
- $this->info['pre']->child = $e_Inline;
- $this->info['a']->child = $e_Inline;
-
- $this->info['table']->child = new HTMLPurifier_ChildDef_Table();
-
- // not a real entity, watch the double underscore
- $e__row = new HTMLPurifier_ChildDef_Required('tr');
- $this->info['thead']->child = $e__row;
- $this->info['tfoot']->child = $e__row;
- $this->info['tbody']->child = $e__row;
- $this->info['colgroup']->child = new HTMLPurifier_ChildDef_Optional('col');
- $this->info['col']->child = new HTMLPurifier_ChildDef_Empty();
- $this->info['tr']->child = new HTMLPurifier_ChildDef_Required('th | td');
- $this->info['th']->child = $e_Flow;
- $this->info['td']->child = $e_Flow;
-
- //////////////////////////////////////////////////////////////////////
- // misc compat stuff with XHTMLDefinition
-
- foreach ($this->info as $key => $def) {
- if ($this->info[$key]->child == $e_Inline) {
- $this->info[$key]->descendants_are_inline = true;
- }
- }
-
- foreach ($e_Flow->elements as $name => $bool) {
- $this->info_content_sets['Flow'][$name] = true;
- }
-
- //////////////////////////////////////////////////////////////////////
- // info[]->excludes : defines elements that aren't allowed in here
-
- // make sure you test using isset() and not !empty()
-
- $this->info['a']->excludes = array('a' => true);
- $this->info['pre']->excludes = array_flip(array('img', 'big', 'small',
- // technically useless, but good to be indepth
- 'object', 'applet', 'font', 'basefont'));
-
- //////////////////////////////////////////////////////////////////////
- // info[]->attr : defines allowed attributes for elements
-
- // this doesn't include REQUIRED declarations, those are handled
- // by the transform classes. It will, however, do simple and slightly
- // complex attribute value substitution
-
- // the question of varying allowed attributes is more entangling.
-
- $e_Text = new HTMLPurifier_AttrDef_Text();
-
- // attrs, included in almost every single one except for a few,
- // which manually override these in their local definitions
- $this->info_global_attr = array(
- // core attrs
- 'class' => new HTMLPurifier_AttrDef_Nmtokens(),
- 'title' => $e_Text,
- 'style' => new HTMLPurifier_AttrDef_CSS(),
- // i18n
- 'dir' => new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false),
- 'lang' => new HTMLPurifier_AttrDef_Lang(),
- 'xml:lang' => new HTMLPurifier_AttrDef_Lang(),
- );
-
- if ($config->get('HTML', 'EnableAttrID')) {
- $this->info_global_attr['id'] = new HTMLPurifier_AttrDef_ID();
- }
-
- // required attribute stipulation handled in attribute transformation
- $this->info['bdo']->attr = array(); // nothing else
-
- $this->info['br']->attr['dir'] = false;
- $this->info['br']->attr['lang'] = false;
- $this->info['br']->attr['xml:lang'] = false;
-
- $this->info['td']->attr['abbr'] = $e_Text;
- $this->info['th']->attr['abbr'] = $e_Text;
-
- $this->setAttrForTableElements('align', new HTMLPurifier_AttrDef_Enum(
- array('left', 'center', 'right', 'justify', 'char'), false));
-
- $this->setAttrForTableElements('valign', new HTMLPurifier_AttrDef_Enum(
- array('top', 'middle', 'bottom', 'baseline'), false));
-
- $this->info['img']->attr['alt'] = $e_Text;
-
- $e_TFrame = new HTMLPurifier_AttrDef_Enum(array('void', 'above',
- 'below', 'hsides', 'lhs', 'rhs', 'vsides', 'box', 'border'), false);
- $this->info['table']->attr['frame'] = $e_TFrame;
-
- $e_TRules = new HTMLPurifier_AttrDef_Enum(array('none', 'groups',
- 'rows', 'cols', 'all'), false);
- $this->info['table']->attr['rules'] = $e_TRules;
-
- $this->info['table']->attr['summary'] = $e_Text;
-
- $this->info['table']->attr['border'] =
- new HTMLPurifier_AttrDef_Pixels();
-
- $e_Length = new HTMLPurifier_AttrDef_Length();
- $this->info['table']->attr['cellpadding'] =
- $this->info['table']->attr['cellspacing'] =
- $this->info['table']->attr['width'] =
- $this->info['img']->attr['height'] =
- $this->info['img']->attr['width'] = $e_Length;
- $this->setAttrForTableElements('charoff', $e_Length);
-
- $e_MultiLength = new HTMLPurifier_AttrDef_MultiLength();
- $this->info['col']->attr['width'] =
- $this->info['colgroup']->attr['width'] = $e_MultiLength;
-
- $e__NumberSpan = new HTMLPurifier_AttrDef_Integer(false, false, true);
- $this->info['colgroup']->attr['span'] =
- $this->info['col']->attr['span'] =
- $this->info['td']->attr['rowspan'] =
- $this->info['th']->attr['rowspan'] =
- $this->info['td']->attr['colspan'] =
- $this->info['th']->attr['colspan'] = $e__NumberSpan;
-
- if (!$config->get('Attr', 'DisableURI')) {
- $e_URI = new HTMLPurifier_AttrDef_URI();
- $this->info['a']->attr['href'] =
- $this->info['img']->attr['longdesc'] =
- $this->info['del']->attr['cite'] =
- $this->info['ins']->attr['cite'] =
- $this->info['blockquote']->attr['cite'] =
- $this->info['q']->attr['cite'] = $e_URI;
-
- // URI that causes HTTP request
- $this->info['img']->attr['src'] = new HTMLPurifier_AttrDef_URI(true);
- }
-
- if (!$this->strict) {
- $this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
- $this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
- }
-
- //////////////////////////////////////////////////////////////////////
- // info_tag_transform : transformations of tags
-
- $this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
- $this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
- $this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul');
- $this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center();
-
- //////////////////////////////////////////////////////////////////////
- // info[]->auto_close : tags that automatically close another
-
- // todo: determine whether or not SGML-like modeling based on
- // mandatory/optional end tags would be a better policy
-
- // make sure you test using isset() not !empty()
-
- // these are all block elements: blocks aren't allowed in P
- $this->info['p']->auto_close = array_flip(array(
- 'address', 'blockquote', 'dd', 'dir', 'div', 'dl', 'dt',
- 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'ol', 'p', 'pre',
- 'table', 'ul'
- ));
-
- $this->info['li']->auto_close = array('li' => true);
-
- // we need TABLE and heading mismatch code
- // we may need to make this more flexible for heading mismatch,
- // or we can just create another info
-
- //////////////////////////////////////////////////////////////////////
- // info[]->attr_transform_* : attribute transformations in elements
- // pre is applied before any validation is done, post is done after
-
- $this->info['h1']->attr_transform_pre[] =
- $this->info['h2']->attr_transform_pre[] =
- $this->info['h3']->attr_transform_pre[] =
- $this->info['h4']->attr_transform_pre[] =
- $this->info['h5']->attr_transform_pre[] =
- $this->info['h6']->attr_transform_pre[] =
- $this->info['p'] ->attr_transform_pre[] =
- new HTMLPurifier_AttrTransform_TextAlign();
-
- $this->info['bdo']->attr_transform_post[] =
- new HTMLPurifier_AttrTransform_BdoDir();
-
- $this->info['img']->attr_transform_post[] =
- new HTMLPurifier_AttrTransform_ImgRequired();
-
- //////////////////////////////////////////////////////////////////////
- // info_attr_transform_* : global attribute transformation that is
- // unconditionally called. Good for transformations that have complex
- // start conditions
- // pre is applied before any validation is done, post is done after
-
- $this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang();
-
- // protect against stdclasses floating around
- foreach ($this->info as $key => $obj) {
- if (is_a($obj, 'stdclass')) {
- unset($this->info[$key]);
- }
- }
-
- //////////////////////////////////////////////////////////////////////
- // info_block_wrapper : wraps inline elements in block context
-
- $block_wrapper = $config->get('HTML', 'BlockWrapper');
- if (isset($e_Block->elements[$block_wrapper])) {
- $this->info_block_wrapper = $block_wrapper;
- } else {
- trigger_error('Cannot use non-block element as block wrapper.',
- E_USER_ERROR);
- }
-
- //////////////////////////////////////////////////////////////////////
- // info_parent : parent element of the HTML fragment
-
- $parent = $config->get('HTML', 'Parent');
- if (isset($this->info[$parent])) {
- $this->info_parent = $parent;
- } else {
- trigger_error('Cannot use unrecognized element as parent.',
- E_USER_ERROR);
- }
- $this->info_parent_def = $this->info[$this->info_parent];
-
- //////////////////////////////////////////////////////////////////////
- // %HTML.Allowed(Elements|Attributes) : cut non-allowed elements
-
- $allowed_elements = $config->get('HTML', 'AllowedElements');
- if (is_array($allowed_elements)) {
- foreach ($this->info as $name => $d) {
- if(!isset($allowed_elements[$name])) unset($this->info[$name]);
- }
- }
- $allowed_attributes = $config->get('HTML', 'AllowedAttributes');
- if (is_array($allowed_attributes)) {
- foreach ($this->info_global_attr as $attr_key => $info) {
- if (!isset($allowed_attributes["*.$attr_key"])) {
- unset($this->info_global_attr[$attr_key]);
- }
- }
- foreach ($this->info as $tag => $info) {
- foreach ($info->attr as $attr => $attr_info) {
- if (!isset($allowed_attributes["$tag.$attr"])) {
- unset($this->info[$tag]->attr[$attr]);
- }
- }
- }
- }
- }
-
- function setAttrForTableElements($attr, $def) {
- $this->info['col']->attr[$attr] =
- $this->info['colgroup']->attr[$attr] =
- $this->info['tbody']->attr[$attr] =
- $this->info['td']->attr[$attr] =
- $this->info['tfoot']->attr[$attr] =
- $this->info['th']->attr[$attr] =
- $this->info['thead']->attr[$attr] =
- $this->info['tr']->attr[$attr] = $def;
- }
-
-}
+require_once 'HTMLPurifier/XHTMLDefinition.php';
/**
* Structure that stores an element definition.
diff --git a/library/HTMLPurifier/XHTMLDefinition.php b/library/HTMLPurifier/XHTMLDefinition.php
index 4b588d15..2f4b18ca 100644
--- a/library/HTMLPurifier/XHTMLDefinition.php
+++ b/library/HTMLPurifier/XHTMLDefinition.php
@@ -1,7 +1,5 @@
<blockquote>Foo</blockquote> '.
+ 'would become <blockquote><p>Foo</p></blockquote>
. The '.
+ '<p>
tags can be replaced '.
+ 'with whatever you desire, as long as it is a block level element. '.
+ 'This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'Parent', 'div', 'string',
+ 'String name of element that HTML fragment passed to library will be '.
+ 'inserted in. An interesting variation would be using span as the '.
+ 'parent element, meaning that only inline tags would be allowed. '.
+ 'This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'AllowedElements', null, 'lookup/null',
+ 'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '.
+ 'can overload it with your own list of tags to allow. Note that this '.
+ 'method is subtractive: it does its job by taking away from HTML Purifier '.
+ 'usual feature set, so you cannot add a tag that HTML Purifier never '.
+ 'supported in the first place (like embed, form or head). If you change this, you '.
+ 'probably also want to change %HTML.AllowedAttributes. '.
+ 'Warning: If another directive conflicts with the '.
+ 'elements here, that directive will win and override. '.
+ 'This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'AllowedAttributes', null, 'lookup/null',
+ 'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '.
+ 'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '.
+ '(style, id, class, dir, lang, xml:lang).'.
+ 'Warning: If another directive conflicts with the '.
+ 'elements here, that directive will win and override. For '.
+ 'example, %HTML.EnableAttrID will take precedence over *.id in this '.
+ 'directive. You must set that directive to true before you can use '.
+ 'IDs at all. This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+ 'Attr', 'DisableURI', false, 'bool',
+ 'Disables all URIs in all forms. Not sure why you\'d want to do that '.
+ '(after all, the Internet\'s founded on the notion of a hyperlink). '.
+ 'This directive has been available since 1.3.0.'
+);
+
/**
- * Next-generation HTML definition that will supplant HTMLPurifier_HTMLDefinition
+ * Definition of the purified HTML that describes allowed children,
+ * attributes, and many other things.
+ *
+ * @note This is the next-gen definition that will be renamed to
+ * HTMLDefinition soon!
+ *
+ * Conventions:
+ *
+ * All member variables that are prefixed with info
+ * (including the main $info array) are used by HTML Purifier internals
+ * and should not be directly edited when customizing the HTMLDefinition.
+ * They can usually be set via configuration directives or custom
+ * modules.
+ *
+ * On the other hand, member variables without the info prefix are used
+ * internally by the HTMLDefinition and MUST NOT be used by other HTML
+ * Purifier internals. Many of them, however, are public, and may be
+ * edited by userspace code to tweak the behavior of HTMLDefinition.
+ * In practice, there will not be too many of them.
+ *
+ * HTMLPurifier_Printer_HTMLDefinition is a notable exception to this
+ * rule: in the interest of comprehensiveness, it will sniff everything.
*/
-class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition
+class HTMLPurifier_HTMLDefinition
{
+ /** FULLY-PUBLIC VARIABLES */
+
+ /**
+ * Associative array of element names to HTMLPurifier_ElementDef
+ * @public
+ */
+ var $info = array();
+
+ /**
+ * Associative array of global attribute name to attribute definition.
+ * @public
+ */
+ var $info_global_attr = array();
+
+ /**
+ * String name of parent element HTML will be going into.
+ * @public
+ */
+ var $info_parent = 'div';
+
+ /**
+ * Definition for parent element, allows parent element to be a
+ * tag that's not allowed inside the HTML fragment.
+ * @public
+ */
+ var $info_parent_def;
+
+ /**
+ * String name of element used to wrap inline elements in block context
+ * @note This is rarely used except for BLOCKQUOTEs in strict mode
+ * @public
+ */
+ var $info_block_wrapper = 'p';
+
+ /**
+ * Associative array of deprecated tag name to HTMLPurifier_TagTransform
+ * @public
+ */
+ var $info_tag_transform = array();
+
+ /**
+ * List of HTMLPurifier_AttrTransform to be performed before validation.
+ * @public
+ */
+ var $info_attr_transform_pre = array();
+
+ /**
+ * List of HTMLPurifier_AttrTransform to be performed after validation.
+ * @public
+ */
+ var $info_attr_transform_post = array();
+
+ /**
+ * Nested lookup array of content set name (Block, Inline) to
+ * element name to whether or not it belongs in that content set.
+ * @public
+ */
+ var $info_content_sets = array();
+
+
+
+ /** PUBLIC BUT INTERNAL VARIABLES */
+
+ /**
+ * Boolean is a strict definition?
+ * @public
+ */
+ var $strict;
+
/**
* Array of HTMLPurifier_Module instances, indexed by module name
* @public
@@ -53,11 +225,23 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition
*/
var $attr_collections;
+ /**
+ * Is setup?
+ * @public
+ */
+ var $setup = false;
+
+
+
/**
* Performs low-cost, preliminary initialization.
* @param $config Instance of HTMLPurifier_Config
*/
- function HTMLPurifier_XHTMLDefinition($config) {
+ function HTMLPurifier_HTMLDefinition($config) {
+
+ // setup some cached config variables
+ // this will eventually influence module loading
+ $this->strict = $config->get('HTML', 'Strict');
$this->modules['Text'] = new HTMLPurifier_HTMLModule_Text();
$this->modules['Hypertext'] = new HTMLPurifier_HTMLModule_Hypertext();
@@ -72,8 +256,17 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition
$this->attr_types = new HTMLPurifier_AttrTypes();
$this->attr_collections = new HTMLPurifier_AttrCollections();
+ // some compat stuff, will be factored to modules
+
+ // remove ID module
+ if (!$config->get('HTML', 'EnableAttrID')) {
+ $this->attr_collections->info['Core']['id'] = false;
+ }
+
}
+
+
/**
* Processes internals into form usable by HTMLPurifier internals.
* Modifying the definition after calling this function should not
@@ -82,6 +275,10 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition
*/
function setup($config) {
+ // multiple call guard
+ if ($this->setup) return;
+ $this->setup = true;
+
// perform attribute collection substitutions
$this->attr_collections->setup($this->attr_types, $this->modules);
@@ -153,6 +350,7 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition
$this->setupAttrTransform($config);
$this->setupBlockWrapper($config);
$this->setupParent($config);
+ $this->setupCompat($config);
}
@@ -193,6 +391,116 @@ class HTMLPurifier_XHTMLDefinition extends HTMLPurifier_HTMLDefinition
$this->info_parent_def = $this->info[$this->info_parent];
}
+ /**
+ * Sets up compat code from HTMLDefinition that has not been
+ * delegated to modules yet
+ */
+ function setupCompat($config) {
+
+ $e_Inline = new HTMLPurifier_ChildDef_Optional(
+ $this->info_content_sets['Inline'] +
+ array('#PCDATA' => true));
+
+ // blockquote changes, implement in TransformStrict and Legacy
+ if ($this->strict) {
+ $this->info['blockquote']->child =
+ new HTMLPurifier_ChildDef_StrictBlockquote(
+ $this->info_content_sets['Block'] +
+ array('#PCDATA' => true));
+ } else {
+ $this->info['blockquote']->child =
+ new HTMLPurifier_ChildDef_Optional(
+ $this->info_content_sets['Flow'] +
+ array('#PCDATA' => true));
+ }
+
+ // deprecated element definitions, implement in Legacy
+ if (!$this->strict) {
+ $this->info['u'] =
+ $this->info['s'] =
+ $this->info['strike'] = new HTMLPurifier_ElementDef();
+ $this->info['u']->child =
+ $this->info['s']->child =
+ $this->info['strike']->child = $e_Inline;
+ $this->info['u']->descendants_are_inline =
+ $this->info['s']->descendants_are_inline =
+ $this->info['strike']->descendants_are_inline = true;
+ }
+
+ // changed content model for loose, implement in Legacy
+ if ($this->strict) {
+ $this->info['address']->child = $e_Inline;
+ } else {
+ $this->info['address']->child =
+ new HTMLPurifier_ChildDef_Optional(
+ $this->info_content_sets['Inline'] +
+ array('#PCDATA' => true, 'p' => true));
+ }
+
+ // custom, not sure where to implement, because it's not
+ // just /one/ module
+ if ($config->get('Attr', 'DisableURI')) {
+ $this->info['a']->attr['href'] =
+ $this->info['img']->attr['longdesc'] =
+ $this->info['del']->attr['cite'] =
+ $this->info['ins']->attr['cite'] =
+ $this->info['blockquote']->attr['cite'] =
+ $this->info['q']->attr['cite'] =
+ $this->info['img']->attr['src'] = null;
+ }
+
+ // deprecated attributes implementations, implement in Legacy
+ if (!$this->strict) {
+ $this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
+ $this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
+ }
+
+ // deprecated elements transforms, implement in TransformToStrict
+ $this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
+ $this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
+ $this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul');
+ $this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center();
+
+ // deprecated attribute transforms, implement in TransformToStrict
+ $this->info['h1']->attr_transform_pre[] =
+ $this->info['h2']->attr_transform_pre[] =
+ $this->info['h3']->attr_transform_pre[] =
+ $this->info['h4']->attr_transform_pre[] =
+ $this->info['h5']->attr_transform_pre[] =
+ $this->info['h6']->attr_transform_pre[] =
+ $this->info['p'] ->attr_transform_pre[] =
+ new HTMLPurifier_AttrTransform_TextAlign();
+
+ // xml:lang <=> lang mirroring, implement in TransformToStrict?
+ $this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang();
+ $this->info_global_attr['lang'] = new HTMLPurifier_AttrDef_Lang();
+
+ // setup allowed elements, obsoleted by Modules? (does offer
+ // different functionality)
+ $allowed_elements = $config->get('HTML', 'AllowedElements');
+ if (is_array($allowed_elements)) {
+ foreach ($this->info as $name => $d) {
+ if(!isset($allowed_elements[$name])) unset($this->info[$name]);
+ }
+ }
+ $allowed_attributes = $config->get('HTML', 'AllowedAttributes');
+ if (is_array($allowed_attributes)) {
+ foreach ($this->info_global_attr as $attr_key => $info) {
+ if (!isset($allowed_attributes["*.$attr_key"])) {
+ unset($this->info_global_attr[$attr_key]);
+ }
+ }
+ foreach ($this->info as $tag => $info) {
+ foreach ($info->attr as $attr => $attr_info) {
+ if (!isset($allowed_attributes["$tag.$attr"])) {
+ unset($this->info[$tag]->attr[$attr]);
+ }
+ }
+ }
+ }
+
+ }
+
/**
* Instantiates a ChildDef based on content_model and content_model_type
* member variables in HTMLPurifier_ElementDef