diff --git a/NEWS b/NEWS
index ae82e27f..e39b050a 100644
--- a/NEWS
+++ b/NEWS
@@ -11,6 +11,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
1.5.0, unknown release date
! Added a rudimentary I18N and L10N system modeled off MediaWiki
+! Newly structured HTMLDefinition modeled off of XHTML 1.1 modules.
+ Tutorials upcoming!
- Allow 'x' subtag in language codes
- Fixed buggy chameleon-support for ins and del
. Added support for IDREF attributes (i.e. for)
diff --git a/library/HTMLPurifier/ElementDef.php b/library/HTMLPurifier/ElementDef.php
new file mode 100644
index 00000000..56615c41
--- /dev/null
+++ b/library/HTMLPurifier/ElementDef.php
@@ -0,0 +1,67 @@
+type, used to determine which ChildDef to use
+ * @public
+ */
+ var $content_model_type;
+
+ /**
+ * Does the element have a content model (#PCDATA | Inline)*? This
+ * is important for chameleon ins and del processing.
+ * @public
+ */
+ var $descendants_are_inline;
+
+ /**
+ * Lookup table of tags excluded from all descendants of this tag.
+ * @public
+ */
+ var $excludes = array();
+
+}
+
+?>
diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php
index 6686455b..12293835 100644
--- a/library/HTMLPurifier/HTMLDefinition.php
+++ b/library/HTMLPurifier/HTMLDefinition.php
@@ -1,67 +1,558 @@
<blockquote>Foo</blockquote> '.
+ 'would become <blockquote><p>Foo</p></blockquote>
. The '.
+ '<p>
tags can be replaced '.
+ 'with whatever you desire, as long as it is a block level element. '.
+ 'This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'Parent', 'div', 'string',
+ 'String name of element that HTML fragment passed to library will be '.
+ 'inserted in. An interesting variation would be using span as the '.
+ 'parent element, meaning that only inline tags would be allowed. '.
+ 'This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'AllowedElements', null, 'lookup/null',
+ 'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '.
+ 'can overload it with your own list of tags to allow. Note that this '.
+ 'method is subtractive: it does its job by taking away from HTML Purifier '.
+ 'usual feature set, so you cannot add a tag that HTML Purifier never '.
+ 'supported in the first place (like embed, form or head). If you change this, you '.
+ 'probably also want to change %HTML.AllowedAttributes. '.
+ 'Warning: If another directive conflicts with the '.
+ 'elements here, that directive will win and override. '.
+ 'This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+ 'HTML', 'AllowedAttributes', null, 'lookup/null',
+ 'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '.
+ 'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '.
+ '(style, id, class, dir, lang, xml:lang).'.
+ 'Warning: If another directive conflicts with the '.
+ 'elements here, that directive will win and override. For '.
+ 'example, %HTML.EnableAttrID will take precedence over *.id in this '.
+ 'directive. You must set that directive to true before you can use '.
+ 'IDs at all. This directive has been available since 1.3.0.'
+);
+
+HTMLPurifier_ConfigSchema::define(
+ 'Attr', 'DisableURI', false, 'bool',
+ 'Disables all URIs in all forms. Not sure why you\'d want to do that '.
+ '(after all, the Internet\'s founded on the notion of a hyperlink). '.
+ 'This directive has been available since 1.3.0.'
+);
/**
- * Structure that stores an element definition.
+ * Definition of the purified HTML that describes allowed children,
+ * attributes, and many other things.
+ *
+ * @note This is the next-gen definition that will be renamed to
+ * HTMLDefinition soon!
+ *
+ * Conventions:
+ *
+ * All member variables that are prefixed with info
+ * (including the main $info array) are used by HTML Purifier internals
+ * and should not be directly edited when customizing the HTMLDefinition.
+ * They can usually be set via configuration directives or custom
+ * modules.
+ *
+ * On the other hand, member variables without the info prefix are used
+ * internally by the HTMLDefinition and MUST NOT be used by other HTML
+ * Purifier internals. Many of them, however, are public, and may be
+ * edited by userspace code to tweak the behavior of HTMLDefinition.
+ * In practice, there will not be too many of them.
+ *
+ * HTMLPurifier_Printer_HTMLDefinition is a notable exception to this
+ * rule: in the interest of comprehensiveness, it will sniff everything.
*/
-class HTMLPurifier_ElementDef
+class HTMLPurifier_HTMLDefinition
{
- /**
- * Associative array of attribute name to HTMLPurifier_AttrDef
- * @public
- */
- var $attr = array();
+ /** FULLY-PUBLIC VARIABLES */
/**
- * List of tag's HTMLPurifier_AttrTransform to be done before validation
+ * Associative array of element names to HTMLPurifier_ElementDef
* @public
*/
- var $attr_transform_pre = array();
+ var $info = array();
/**
- * List of tag's HTMLPurifier_AttrTransform to be done after validation
+ * Associative array of global attribute name to attribute definition.
* @public
*/
- var $attr_transform_post = array();
+ var $info_global_attr = array();
/**
- * Lookup table of tags that close this tag.
+ * String name of parent element HTML will be going into.
* @public
*/
- var $auto_close = array();
+ var $info_parent = 'div';
/**
- * HTMLPurifier_ChildDef of this tag.
+ * Definition for parent element, allows parent element to be a
+ * tag that's not allowed inside the HTML fragment.
* @public
*/
- var $child;
+ var $info_parent_def;
/**
- * Abstract string representation of internal ChildDef rules
+ * String name of element used to wrap inline elements in block context
+ * @note This is rarely used except for BLOCKQUOTEs in strict mode
* @public
*/
- var $content_model;
+ var $info_block_wrapper = 'p';
/**
- * Value of $child->type, used to determine which ChildDef to use
+ * Associative array of deprecated tag name to HTMLPurifier_TagTransform
* @public
*/
- var $content_model_type;
+ var $info_tag_transform = array();
/**
- * Does the element have a content model (#PCDATA | Inline)*? This
- * is important for chameleon ins and del processing.
+ * List of HTMLPurifier_AttrTransform to be performed before validation.
* @public
*/
- var $descendants_are_inline;
+ var $info_attr_transform_pre = array();
/**
- * Lookup table of tags excluded from all descendants of this tag.
+ * List of HTMLPurifier_AttrTransform to be performed after validation.
* @public
*/
- var $excludes = array();
+ var $info_attr_transform_post = array();
+
+ /**
+ * Nested lookup array of content set name (Block, Inline) to
+ * element name to whether or not it belongs in that content set.
+ * @public
+ */
+ var $info_content_sets = array();
+
+
+
+ /** PUBLIC BUT INTERNAL VARIABLES */
+
+ /**
+ * Boolean is a strict definition?
+ * @public
+ */
+ var $strict;
+
+ /**
+ * Array of HTMLPurifier_Module instances, indexed by module name
+ * @public
+ */
+ var $modules = array();
+
+ /**
+ * Instance of HTMLPurifier_AttrTypes
+ * @public
+ */
+ var $attr_types;
+
+ /**
+ * Instance of HTMLPurifier_AttrCollections
+ * @public
+ */
+ var $attr_collections;
+
+ /**
+ * Is setup?
+ * @public
+ */
+ var $setup = false;
+
+
+
+ /**
+ * Performs low-cost, preliminary initialization.
+ * @param $config Instance of HTMLPurifier_Config
+ */
+ function HTMLPurifier_HTMLDefinition($config) {
+
+ // setup some cached config variables
+ // this will eventually influence module loading
+ $this->strict = $config->get('HTML', 'Strict');
+
+ $this->modules['Text'] = new HTMLPurifier_HTMLModule_Text();
+ $this->modules['Hypertext'] = new HTMLPurifier_HTMLModule_Hypertext();
+ $this->modules['List'] = new HTMLPurifier_HTMLModule_List();
+ $this->modules['Presentation'] = new HTMLPurifier_HTMLModule_Presentation();
+ $this->modules['Edit'] = new HTMLPurifier_HTMLModule_Edit();
+ $this->modules['Bdo'] = new HTMLPurifier_HTMLModule_Bdo();
+ $this->modules['Tables'] = new HTMLPurifier_HTMLModule_Tables();
+ $this->modules['Image'] = new HTMLPurifier_HTMLModule_Image();
+ $this->modules['StyleAttribute']= new HTMLPurifier_HTMLModule_StyleAttribute();
+
+ $this->attr_types = new HTMLPurifier_AttrTypes();
+ $this->attr_collections = new HTMLPurifier_AttrCollections();
+
+ // some compat stuff, will be factored to modules
+
+ // remove ID module
+ if (!$config->get('HTML', 'EnableAttrID')) {
+ $this->attr_collections->info['Core']['id'] = false;
+ }
+
+ }
+
+
+
+ /**
+ * Processes internals into form usable by HTMLPurifier internals.
+ * Modifying the definition after calling this function should not
+ * be done.
+ * @param $config Instance of HTMLPurifier_Config
+ */
+ function setup($config) {
+
+ // multiple call guard
+ if ($this->setup) return;
+ $this->setup = true;
+
+ // perform attribute collection substitutions
+ $this->attr_collections->setup($this->attr_types, $this->modules);
+
+ // populate content_sets based on module hints
+ $content_sets = array();
+ foreach ($this->modules as $module_i => $module) {
+ foreach ($module->content_sets as $key => $value) {
+ if (isset($content_sets[$key])) {
+ // add it into the existing content set
+ $content_sets[$key] = $content_sets[$key] . ' | ' . $value;
+ } else {
+ $content_sets[$key] = $value;
+ }
+ }
+ }
+
+ // perform content_set expansions
+ foreach ($content_sets as $i => $set) {
+ // only performed once, so infinite recursion is not
+ // a problem, you'll just have a stray $Set lying around
+ // at the end
+ $content_sets[$i] =
+ str_replace(
+ array_keys($content_sets),
+ array_values($content_sets),
+ $set);
+ }
+ // define convenient variables
+ $content_sets_keys = array_keys($content_sets);
+ $content_sets_values = array_values($content_sets);
+ foreach ($content_sets as $name => $set) {
+ $this->info_content_sets[$name] = $this->convertToLookup($set);
+ }
+
+ foreach ($this->modules as $module_i => $module) {
+ foreach ($module->info as $name => $def) {
+ $def =& $this->modules[$module_i]->info[$name];
+
+ // attribute value expansions
+
+ $this->attr_collections->performInclusions($def->attr);
+ $this->attr_collections->expandIdentifiers(
+ $def->attr, $this->attr_types);
+
+ // perform content model expansions
+ $content_model = $def->content_model;
+ if (is_string($content_model)) {
+ if (strpos($content_model, 'Inline') !== false) {
+ if ($name != 'del' && $name != 'ins') {
+ // this is for you, ins/del
+ $def->descendants_are_inline = true;
+ }
+ }
+ $def->content_model = str_replace(
+ $content_sets_keys, $content_sets_values, $content_model);
+ }
+
+ // get child def from content model
+ $def->child = $this->getChildDef($def);
+
+ // setup info
+ $this->info[$name] = $def;
+ if ($this->info_parent == $name) {
+ $this->info_parent_def = $this->info[$name];
+ }
+ }
+ }
+
+ $this->setupAttrTransform($config);
+ $this->setupBlockWrapper($config);
+ $this->setupParent($config);
+ $this->setupCompat($config);
+
+ }
+
+ /**
+ * Sets up attribute transformations
+ * @param $config Instance of HTMLPurifier_Config
+ */
+ function setupAttrTransform($config) {
+ $this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang();
+ }
+
+ /**
+ * Sets up block wrapper based on config
+ * @param $config Instance of HTMLPurifier_Config
+ */
+ function setupBlockWrapper($config) {
+ $block_wrapper = $config->get('HTML', 'BlockWrapper');
+ if (isset($this->info_content_sets['Block'][$block_wrapper])) {
+ $this->info_block_wrapper = $block_wrapper;
+ } else {
+ trigger_error('Cannot use non-block element as block wrapper.',
+ E_USER_ERROR);
+ }
+ }
+
+ /**
+ * Sets up parent of fragment based on config
+ * @param $config Instance of HTMLPurifier_Config
+ */
+ function setupParent($config) {
+ $parent = $config->get('HTML', 'Parent');
+ if (isset($this->info[$parent])) {
+ $this->info_parent = $parent;
+ } else {
+ trigger_error('Cannot use unrecognized element as parent.',
+ E_USER_ERROR);
+ }
+ $this->info_parent_def = $this->info[$this->info_parent];
+ }
+
+ /**
+ * Sets up compat code from HTMLDefinition that has not been
+ * delegated to modules yet
+ */
+ function setupCompat($config) {
+
+ $e_Inline = new HTMLPurifier_ChildDef_Optional(
+ $this->info_content_sets['Inline'] +
+ array('#PCDATA' => true));
+
+ // blockquote changes, implement in TransformStrict and Legacy
+ if ($this->strict) {
+ $this->info['blockquote']->child =
+ new HTMLPurifier_ChildDef_StrictBlockquote(
+ $this->info_content_sets['Block'] +
+ array('#PCDATA' => true));
+ } else {
+ $this->info['blockquote']->child =
+ new HTMLPurifier_ChildDef_Optional(
+ $this->info_content_sets['Flow'] +
+ array('#PCDATA' => true));
+ }
+
+ // deprecated element definitions, implement in Legacy
+ if (!$this->strict) {
+ $this->info['u'] =
+ $this->info['s'] =
+ $this->info['strike'] = new HTMLPurifier_ElementDef();
+ $this->info['u']->child =
+ $this->info['s']->child =
+ $this->info['strike']->child = $e_Inline;
+ $this->info['u']->descendants_are_inline =
+ $this->info['s']->descendants_are_inline =
+ $this->info['strike']->descendants_are_inline = true;
+ }
+
+ // changed content model for loose, implement in Legacy
+ if ($this->strict) {
+ $this->info['address']->child = $e_Inline;
+ } else {
+ $this->info['address']->child =
+ new HTMLPurifier_ChildDef_Optional(
+ $this->info_content_sets['Inline'] +
+ array('#PCDATA' => true, 'p' => true));
+ }
+
+ // custom, not sure where to implement, because it's not
+ // just /one/ module
+ if ($config->get('Attr', 'DisableURI')) {
+ $this->info['a']->attr['href'] =
+ $this->info['img']->attr['longdesc'] =
+ $this->info['del']->attr['cite'] =
+ $this->info['ins']->attr['cite'] =
+ $this->info['blockquote']->attr['cite'] =
+ $this->info['q']->attr['cite'] =
+ $this->info['img']->attr['src'] = null;
+ }
+
+ // deprecated attributes implementations, implement in Legacy
+ if (!$this->strict) {
+ $this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
+ $this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
+ }
+
+ // deprecated elements transforms, implement in TransformToStrict
+ $this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
+ $this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
+ $this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul');
+ $this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center();
+
+ // deprecated attribute transforms, implement in TransformToStrict
+ $this->info['h1']->attr_transform_pre[] =
+ $this->info['h2']->attr_transform_pre[] =
+ $this->info['h3']->attr_transform_pre[] =
+ $this->info['h4']->attr_transform_pre[] =
+ $this->info['h5']->attr_transform_pre[] =
+ $this->info['h6']->attr_transform_pre[] =
+ $this->info['p'] ->attr_transform_pre[] =
+ new HTMLPurifier_AttrTransform_TextAlign();
+
+ // xml:lang <=> lang mirroring, implement in TransformToStrict?
+ $this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang();
+ $this->info_global_attr['lang'] = new HTMLPurifier_AttrDef_Lang();
+
+ // setup allowed elements, obsoleted by Modules? (does offer
+ // different functionality)
+ $allowed_elements = $config->get('HTML', 'AllowedElements');
+ if (is_array($allowed_elements)) {
+ foreach ($this->info as $name => $d) {
+ if(!isset($allowed_elements[$name])) unset($this->info[$name]);
+ }
+ }
+ $allowed_attributes = $config->get('HTML', 'AllowedAttributes');
+ if (is_array($allowed_attributes)) {
+ foreach ($this->info_global_attr as $attr_key => $info) {
+ if (!isset($allowed_attributes["*.$attr_key"])) {
+ unset($this->info_global_attr[$attr_key]);
+ }
+ }
+ foreach ($this->info as $tag => $info) {
+ foreach ($info->attr as $attr => $attr_info) {
+ if (!isset($allowed_attributes["$tag.$attr"])) {
+ unset($this->info[$tag]->attr[$attr]);
+ }
+ }
+ }
+ }
+
+ }
+
+ /**
+ * Instantiates a ChildDef based on content_model and content_model_type
+ * member variables in HTMLPurifier_ElementDef
+ * @note This will also defer to modules for custom HTMLPurifier_ChildDef
+ * subclasses that need content set expansion
+ * @param $def HTMLPurifier_ElementDef to have ChildDef extracted
+ * @return HTMLPurifier_ChildDef corresponding to ElementDef
+ */
+ function getChildDef($def) {
+ $value = $def->content_model;
+ if (is_object($value)) return $value; // direct object, return
+ switch ($def->content_model_type) {
+ case 'required':
+ return new HTMLPurifier_ChildDef_Required($value);
+ case 'optional':
+ return new HTMLPurifier_ChildDef_Optional($value);
+ case 'empty':
+ return new HTMLPurifier_ChildDef_Empty();
+ case 'strictblockquote':
+ return new HTMLPurifier_ChildDef_StrictBlockquote($value);
+ case 'custom':
+ return new HTMLPurifier_ChildDef_Custom($value);
+ }
+ // defer to modules, see if they know what child_def to use
+ foreach ($this->modules as $module) {
+ if (!$module->defines_child_def) continue; // save a func call
+ $return = $module->getChildDef($def);
+ if ($return !== false) return $return;
+ }
+ // error-out
+ trigger_error(
+ 'Could not determine which ChildDef class to instantiate',
+ E_USER_ERROR
+ );
+ return false;
+ }
+
+ /**
+ * Converts a string list of elements separated by pipes into
+ * a lookup array.
+ * @param $string List of elements
+ * @return Lookup array of elements
+ */
+ function convertToLookup($string) {
+ $array = explode('|', str_replace(' ', '', $string));
+ $ret = array();
+ foreach ($array as $i => $k) {
+ $ret[$k] = true;
+ }
+ return $ret;
+ }
}
diff --git a/library/HTMLPurifier/XHTMLDefinition.php b/library/HTMLPurifier/XHTMLDefinition.php
deleted file mode 100644
index 2f4b18ca..00000000
--- a/library/HTMLPurifier/XHTMLDefinition.php
+++ /dev/null
@@ -1,558 +0,0 @@
-<blockquote>Foo</blockquote> '.
- 'would become <blockquote><p>Foo</p></blockquote>
. The '.
- '<p>
tags can be replaced '.
- 'with whatever you desire, as long as it is a block level element. '.
- 'This directive has been available since 1.3.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'Parent', 'div', 'string',
- 'String name of element that HTML fragment passed to library will be '.
- 'inserted in. An interesting variation would be using span as the '.
- 'parent element, meaning that only inline tags would be allowed. '.
- 'This directive has been available since 1.3.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'AllowedElements', null, 'lookup/null',
- 'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '.
- 'can overload it with your own list of tags to allow. Note that this '.
- 'method is subtractive: it does its job by taking away from HTML Purifier '.
- 'usual feature set, so you cannot add a tag that HTML Purifier never '.
- 'supported in the first place (like embed, form or head). If you change this, you '.
- 'probably also want to change %HTML.AllowedAttributes. '.
- 'Warning: If another directive conflicts with the '.
- 'elements here, that directive will win and override. '.
- 'This directive has been available since 1.3.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'HTML', 'AllowedAttributes', null, 'lookup/null',
- 'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '.
- 'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '.
- '(style, id, class, dir, lang, xml:lang).'.
- 'Warning: If another directive conflicts with the '.
- 'elements here, that directive will win and override. For '.
- 'example, %HTML.EnableAttrID will take precedence over *.id in this '.
- 'directive. You must set that directive to true before you can use '.
- 'IDs at all. This directive has been available since 1.3.0.'
-);
-
-HTMLPurifier_ConfigSchema::define(
- 'Attr', 'DisableURI', false, 'bool',
- 'Disables all URIs in all forms. Not sure why you\'d want to do that '.
- '(after all, the Internet\'s founded on the notion of a hyperlink). '.
- 'This directive has been available since 1.3.0.'
-);
-
-/**
- * Definition of the purified HTML that describes allowed children,
- * attributes, and many other things.
- *
- * @note This is the next-gen definition that will be renamed to
- * HTMLDefinition soon!
- *
- * Conventions:
- *
- * All member variables that are prefixed with info
- * (including the main $info array) are used by HTML Purifier internals
- * and should not be directly edited when customizing the HTMLDefinition.
- * They can usually be set via configuration directives or custom
- * modules.
- *
- * On the other hand, member variables without the info prefix are used
- * internally by the HTMLDefinition and MUST NOT be used by other HTML
- * Purifier internals. Many of them, however, are public, and may be
- * edited by userspace code to tweak the behavior of HTMLDefinition.
- * In practice, there will not be too many of them.
- *
- * HTMLPurifier_Printer_HTMLDefinition is a notable exception to this
- * rule: in the interest of comprehensiveness, it will sniff everything.
- */
-class HTMLPurifier_HTMLDefinition
-{
-
- /** FULLY-PUBLIC VARIABLES */
-
- /**
- * Associative array of element names to HTMLPurifier_ElementDef
- * @public
- */
- var $info = array();
-
- /**
- * Associative array of global attribute name to attribute definition.
- * @public
- */
- var $info_global_attr = array();
-
- /**
- * String name of parent element HTML will be going into.
- * @public
- */
- var $info_parent = 'div';
-
- /**
- * Definition for parent element, allows parent element to be a
- * tag that's not allowed inside the HTML fragment.
- * @public
- */
- var $info_parent_def;
-
- /**
- * String name of element used to wrap inline elements in block context
- * @note This is rarely used except for BLOCKQUOTEs in strict mode
- * @public
- */
- var $info_block_wrapper = 'p';
-
- /**
- * Associative array of deprecated tag name to HTMLPurifier_TagTransform
- * @public
- */
- var $info_tag_transform = array();
-
- /**
- * List of HTMLPurifier_AttrTransform to be performed before validation.
- * @public
- */
- var $info_attr_transform_pre = array();
-
- /**
- * List of HTMLPurifier_AttrTransform to be performed after validation.
- * @public
- */
- var $info_attr_transform_post = array();
-
- /**
- * Nested lookup array of content set name (Block, Inline) to
- * element name to whether or not it belongs in that content set.
- * @public
- */
- var $info_content_sets = array();
-
-
-
- /** PUBLIC BUT INTERNAL VARIABLES */
-
- /**
- * Boolean is a strict definition?
- * @public
- */
- var $strict;
-
- /**
- * Array of HTMLPurifier_Module instances, indexed by module name
- * @public
- */
- var $modules = array();
-
- /**
- * Instance of HTMLPurifier_AttrTypes
- * @public
- */
- var $attr_types;
-
- /**
- * Instance of HTMLPurifier_AttrCollections
- * @public
- */
- var $attr_collections;
-
- /**
- * Is setup?
- * @public
- */
- var $setup = false;
-
-
-
- /**
- * Performs low-cost, preliminary initialization.
- * @param $config Instance of HTMLPurifier_Config
- */
- function HTMLPurifier_HTMLDefinition($config) {
-
- // setup some cached config variables
- // this will eventually influence module loading
- $this->strict = $config->get('HTML', 'Strict');
-
- $this->modules['Text'] = new HTMLPurifier_HTMLModule_Text();
- $this->modules['Hypertext'] = new HTMLPurifier_HTMLModule_Hypertext();
- $this->modules['List'] = new HTMLPurifier_HTMLModule_List();
- $this->modules['Presentation'] = new HTMLPurifier_HTMLModule_Presentation();
- $this->modules['Edit'] = new HTMLPurifier_HTMLModule_Edit();
- $this->modules['Bdo'] = new HTMLPurifier_HTMLModule_Bdo();
- $this->modules['Tables'] = new HTMLPurifier_HTMLModule_Tables();
- $this->modules['Image'] = new HTMLPurifier_HTMLModule_Image();
- $this->modules['StyleAttribute']= new HTMLPurifier_HTMLModule_StyleAttribute();
-
- $this->attr_types = new HTMLPurifier_AttrTypes();
- $this->attr_collections = new HTMLPurifier_AttrCollections();
-
- // some compat stuff, will be factored to modules
-
- // remove ID module
- if (!$config->get('HTML', 'EnableAttrID')) {
- $this->attr_collections->info['Core']['id'] = false;
- }
-
- }
-
-
-
- /**
- * Processes internals into form usable by HTMLPurifier internals.
- * Modifying the definition after calling this function should not
- * be done.
- * @param $config Instance of HTMLPurifier_Config
- */
- function setup($config) {
-
- // multiple call guard
- if ($this->setup) return;
- $this->setup = true;
-
- // perform attribute collection substitutions
- $this->attr_collections->setup($this->attr_types, $this->modules);
-
- // populate content_sets based on module hints
- $content_sets = array();
- foreach ($this->modules as $module_i => $module) {
- foreach ($module->content_sets as $key => $value) {
- if (isset($content_sets[$key])) {
- // add it into the existing content set
- $content_sets[$key] = $content_sets[$key] . ' | ' . $value;
- } else {
- $content_sets[$key] = $value;
- }
- }
- }
-
- // perform content_set expansions
- foreach ($content_sets as $i => $set) {
- // only performed once, so infinite recursion is not
- // a problem, you'll just have a stray $Set lying around
- // at the end
- $content_sets[$i] =
- str_replace(
- array_keys($content_sets),
- array_values($content_sets),
- $set);
- }
- // define convenient variables
- $content_sets_keys = array_keys($content_sets);
- $content_sets_values = array_values($content_sets);
- foreach ($content_sets as $name => $set) {
- $this->info_content_sets[$name] = $this->convertToLookup($set);
- }
-
- foreach ($this->modules as $module_i => $module) {
- foreach ($module->info as $name => $def) {
- $def =& $this->modules[$module_i]->info[$name];
-
- // attribute value expansions
-
- $this->attr_collections->performInclusions($def->attr);
- $this->attr_collections->expandIdentifiers(
- $def->attr, $this->attr_types);
-
- // perform content model expansions
- $content_model = $def->content_model;
- if (is_string($content_model)) {
- if (strpos($content_model, 'Inline') !== false) {
- if ($name != 'del' && $name != 'ins') {
- // this is for you, ins/del
- $def->descendants_are_inline = true;
- }
- }
- $def->content_model = str_replace(
- $content_sets_keys, $content_sets_values, $content_model);
- }
-
- // get child def from content model
- $def->child = $this->getChildDef($def);
-
- // setup info
- $this->info[$name] = $def;
- if ($this->info_parent == $name) {
- $this->info_parent_def = $this->info[$name];
- }
- }
- }
-
- $this->setupAttrTransform($config);
- $this->setupBlockWrapper($config);
- $this->setupParent($config);
- $this->setupCompat($config);
-
- }
-
- /**
- * Sets up attribute transformations
- * @param $config Instance of HTMLPurifier_Config
- */
- function setupAttrTransform($config) {
- $this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang();
- }
-
- /**
- * Sets up block wrapper based on config
- * @param $config Instance of HTMLPurifier_Config
- */
- function setupBlockWrapper($config) {
- $block_wrapper = $config->get('HTML', 'BlockWrapper');
- if (isset($this->info_content_sets['Block'][$block_wrapper])) {
- $this->info_block_wrapper = $block_wrapper;
- } else {
- trigger_error('Cannot use non-block element as block wrapper.',
- E_USER_ERROR);
- }
- }
-
- /**
- * Sets up parent of fragment based on config
- * @param $config Instance of HTMLPurifier_Config
- */
- function setupParent($config) {
- $parent = $config->get('HTML', 'Parent');
- if (isset($this->info[$parent])) {
- $this->info_parent = $parent;
- } else {
- trigger_error('Cannot use unrecognized element as parent.',
- E_USER_ERROR);
- }
- $this->info_parent_def = $this->info[$this->info_parent];
- }
-
- /**
- * Sets up compat code from HTMLDefinition that has not been
- * delegated to modules yet
- */
- function setupCompat($config) {
-
- $e_Inline = new HTMLPurifier_ChildDef_Optional(
- $this->info_content_sets['Inline'] +
- array('#PCDATA' => true));
-
- // blockquote changes, implement in TransformStrict and Legacy
- if ($this->strict) {
- $this->info['blockquote']->child =
- new HTMLPurifier_ChildDef_StrictBlockquote(
- $this->info_content_sets['Block'] +
- array('#PCDATA' => true));
- } else {
- $this->info['blockquote']->child =
- new HTMLPurifier_ChildDef_Optional(
- $this->info_content_sets['Flow'] +
- array('#PCDATA' => true));
- }
-
- // deprecated element definitions, implement in Legacy
- if (!$this->strict) {
- $this->info['u'] =
- $this->info['s'] =
- $this->info['strike'] = new HTMLPurifier_ElementDef();
- $this->info['u']->child =
- $this->info['s']->child =
- $this->info['strike']->child = $e_Inline;
- $this->info['u']->descendants_are_inline =
- $this->info['s']->descendants_are_inline =
- $this->info['strike']->descendants_are_inline = true;
- }
-
- // changed content model for loose, implement in Legacy
- if ($this->strict) {
- $this->info['address']->child = $e_Inline;
- } else {
- $this->info['address']->child =
- new HTMLPurifier_ChildDef_Optional(
- $this->info_content_sets['Inline'] +
- array('#PCDATA' => true, 'p' => true));
- }
-
- // custom, not sure where to implement, because it's not
- // just /one/ module
- if ($config->get('Attr', 'DisableURI')) {
- $this->info['a']->attr['href'] =
- $this->info['img']->attr['longdesc'] =
- $this->info['del']->attr['cite'] =
- $this->info['ins']->attr['cite'] =
- $this->info['blockquote']->attr['cite'] =
- $this->info['q']->attr['cite'] =
- $this->info['img']->attr['src'] = null;
- }
-
- // deprecated attributes implementations, implement in Legacy
- if (!$this->strict) {
- $this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
- $this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
- }
-
- // deprecated elements transforms, implement in TransformToStrict
- $this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
- $this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
- $this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul');
- $this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center();
-
- // deprecated attribute transforms, implement in TransformToStrict
- $this->info['h1']->attr_transform_pre[] =
- $this->info['h2']->attr_transform_pre[] =
- $this->info['h3']->attr_transform_pre[] =
- $this->info['h4']->attr_transform_pre[] =
- $this->info['h5']->attr_transform_pre[] =
- $this->info['h6']->attr_transform_pre[] =
- $this->info['p'] ->attr_transform_pre[] =
- new HTMLPurifier_AttrTransform_TextAlign();
-
- // xml:lang <=> lang mirroring, implement in TransformToStrict?
- $this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang();
- $this->info_global_attr['lang'] = new HTMLPurifier_AttrDef_Lang();
-
- // setup allowed elements, obsoleted by Modules? (does offer
- // different functionality)
- $allowed_elements = $config->get('HTML', 'AllowedElements');
- if (is_array($allowed_elements)) {
- foreach ($this->info as $name => $d) {
- if(!isset($allowed_elements[$name])) unset($this->info[$name]);
- }
- }
- $allowed_attributes = $config->get('HTML', 'AllowedAttributes');
- if (is_array($allowed_attributes)) {
- foreach ($this->info_global_attr as $attr_key => $info) {
- if (!isset($allowed_attributes["*.$attr_key"])) {
- unset($this->info_global_attr[$attr_key]);
- }
- }
- foreach ($this->info as $tag => $info) {
- foreach ($info->attr as $attr => $attr_info) {
- if (!isset($allowed_attributes["$tag.$attr"])) {
- unset($this->info[$tag]->attr[$attr]);
- }
- }
- }
- }
-
- }
-
- /**
- * Instantiates a ChildDef based on content_model and content_model_type
- * member variables in HTMLPurifier_ElementDef
- * @note This will also defer to modules for custom HTMLPurifier_ChildDef
- * subclasses that need content set expansion
- * @param $def HTMLPurifier_ElementDef to have ChildDef extracted
- * @return HTMLPurifier_ChildDef corresponding to ElementDef
- */
- function getChildDef($def) {
- $value = $def->content_model;
- if (is_object($value)) return $value; // direct object, return
- switch ($def->content_model_type) {
- case 'required':
- return new HTMLPurifier_ChildDef_Required($value);
- case 'optional':
- return new HTMLPurifier_ChildDef_Optional($value);
- case 'empty':
- return new HTMLPurifier_ChildDef_Empty();
- case 'strictblockquote':
- return new HTMLPurifier_ChildDef_StrictBlockquote($value);
- case 'custom':
- return new HTMLPurifier_ChildDef_Custom($value);
- }
- // defer to modules, see if they know what child_def to use
- foreach ($this->modules as $module) {
- if (!$module->defines_child_def) continue; // save a func call
- $return = $module->getChildDef($def);
- if ($return !== false) return $return;
- }
- // error-out
- trigger_error(
- 'Could not determine which ChildDef class to instantiate',
- E_USER_ERROR
- );
- return false;
- }
-
- /**
- * Converts a string list of elements separated by pipes into
- * a lookup array.
- * @param $string List of elements
- * @return Lookup array of elements
- */
- function convertToLookup($string) {
- $array = explode('|', str_replace(' ', '', $string));
- $ret = array();
- foreach ($array as $i => $k) {
- $ret[$k] = true;
- }
- return $ret;
- }
-
-}
-
-?>