From edf20018f0866d9cde56e3cff6ab803ef92b5812 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Thu, 15 Feb 2007 14:00:18 +0000 Subject: [PATCH] Add an HTMLModuleManager. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@751 48356398-32a2-884e-a903-53898d9a118a --- library/HTMLPurifier/ContentSets.php | 5 +- library/HTMLPurifier/HTMLDefinition.php | 319 ++++---------- library/HTMLPurifier/HTMLModule.php | 8 +- library/HTMLPurifier/HTMLModule/SetParent.php | 33 -- .../HTMLModule/TransformToStrict.php | 11 +- .../HTMLModule/TweakSubtractiveWhitelist.php | 70 ---- library/HTMLPurifier/HTMLModuleManager.php | 388 ++++++++++++++++++ .../HTMLPurifier/Printer/HTMLDefinition.php | 3 +- 8 files changed, 489 insertions(+), 348 deletions(-) delete mode 100644 library/HTMLPurifier/HTMLModule/SetParent.php delete mode 100644 library/HTMLPurifier/HTMLModule/TweakSubtractiveWhitelist.php create mode 100644 library/HTMLPurifier/HTMLModuleManager.php diff --git a/library/HTMLPurifier/ContentSets.php b/library/HTMLPurifier/ContentSets.php index 5f975e1d..de5c532e 100644 --- a/library/HTMLPurifier/ContentSets.php +++ b/library/HTMLPurifier/ContentSets.php @@ -36,8 +36,10 @@ class HTMLPurifier_ContentSets * sets and populates the keys, values and lookup member variables. * @param $modules List of HTMLPurifier_HTMLModule */ - function setup($modules) { + function HTMLPurifier_ContentSets($modules) { + if (!is_array($modules)) $modules = array($modules); // populate content_sets based on module hints + // sorry, no way of overloading foreach ($modules as $module_i => $module) { foreach ($module->content_sets as $key => $value) { if (isset($this->info[$key])) { @@ -48,7 +50,6 @@ class HTMLPurifier_ContentSets } } } - // perform content_set expansions $this->keys = array_keys($this->info); foreach ($this->info as $i => $set) { diff --git a/library/HTMLPurifier/HTMLDefinition.php b/library/HTMLPurifier/HTMLDefinition.php index e545ebcc..9f6dff3f 100644 --- a/library/HTMLPurifier/HTMLDefinition.php +++ b/library/HTMLPurifier/HTMLDefinition.php @@ -3,7 +3,7 @@ // components require_once 'HTMLPurifier/AttrTypes.php'; require_once 'HTMLPurifier/AttrCollections.php'; -require_once 'HTMLPurifier/ContentSets.php'; +require_once 'HTMLPurifier/HTMLModuleManager.php'; require_once 'HTMLPurifier/ElementDef.php'; require_once 'HTMLPurifier/AttrDef.php'; @@ -22,31 +22,13 @@ require_once 'HTMLPurifier/TagTransform/Simple.php'; require_once 'HTMLPurifier/TagTransform/Center.php'; require_once 'HTMLPurifier/TagTransform/Font.php'; -// default modules -require_once 'HTMLPurifier/HTMLModule.php'; -require_once 'HTMLPurifier/HTMLModule/Text.php'; -require_once 'HTMLPurifier/HTMLModule/Hypertext.php'; -require_once 'HTMLPurifier/HTMLModule/List.php'; -require_once 'HTMLPurifier/HTMLModule/Presentation.php'; -require_once 'HTMLPurifier/HTMLModule/Edit.php'; -require_once 'HTMLPurifier/HTMLModule/Bdo.php'; -require_once 'HTMLPurifier/HTMLModule/Tables.php'; -require_once 'HTMLPurifier/HTMLModule/Image.php'; -require_once 'HTMLPurifier/HTMLModule/StyleAttribute.php'; - -// compat modules -require_once 'HTMLPurifier/HTMLModule/TransformToStrict.php'; -require_once 'HTMLPurifier/HTMLModule/Legacy.php'; - -// config modules -require_once 'HTMLPurifier/HTMLModule/SetParent.php'; - // tweak modules require_once 'HTMLPurifier/HTMLModule/TweakSubtractiveWhitelist.php'; // this definition and its modules MUST NOT define configuration directives // outside of the HTML or Attr namespaces +// will be superceded by more accurate doctype declaration schemes HTMLPurifier_ConfigSchema::define( 'HTML', 'Strict', false, 'bool', 'Determines whether or not to use Transitional (loose) or Strict rulesets. '. @@ -64,6 +46,39 @@ HTMLPurifier_ConfigSchema::define( 'This directive has been available since 1.3.0.' ); +HTMLPurifier_ConfigSchema::define( + 'HTML', 'Parent', 'div', 'string', + 'String name of element that HTML fragment passed to library will be '. + 'inserted in. An interesting variation would be using span as the '. + 'parent element, meaning that only inline tags would be allowed. '. + 'This directive has been available since 1.3.0.' +); + +HTMLPurifier_ConfigSchema::define( + 'HTML', 'AllowedElements', null, 'lookup/null', + 'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '. + 'can overload it with your own list of tags to allow. Note that this '. + 'method is subtractive: it does its job by taking away from HTML Purifier '. + 'usual feature set, so you cannot add a tag that HTML Purifier never '. + 'supported in the first place (like embed, form or head). If you change this, you '. + 'probably also want to change %HTML.AllowedAttributes. '. + 'Warning: If another directive conflicts with the '. + 'elements here, that directive will win and override. '. + 'This directive has been available since 1.3.0.' +); + +HTMLPurifier_ConfigSchema::define( + 'HTML', 'AllowedAttributes', null, 'lookup/null', + 'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '. + 'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '. + '(style, id, class, dir, lang, xml:lang).'. + 'Warning: If another directive conflicts with the '. + 'elements here, that directive will win and override. For '. + 'example, %HTML.EnableAttrID will take precedence over *.id in this '. + 'directive. You must set that directive to true before you can use '. + 'IDs at all. This directive has been available since 1.3.0.' +); + /** * Definition of the purified HTML that describes allowed children, * attributes, and many other things. @@ -150,147 +165,24 @@ class HTMLPurifier_HTMLDefinition /** PUBLIC BUT INTERNAL VARIABLES */ - /** - * Boolean is a strict definition? - * @public - */ - var $strict; - - /** - * Array of HTMLPurifier_Module instances, indexed by module's class name - * @public - */ - var $modules = array(); - - /** - * Associative array of module class name to module order keywords or - * numbers (keyword is preferred, all keywords are resolved at beginning - * of setup()) - * @public - */ - var $modules_order = array(); - - /** - * List of prefixes HTML Purifier should try to resolve short names to. - * @public - */ - var $module_prefixes = array('HTMLPurifier_HTMLModule_'); - - /** - * Instance of HTMLPurifier_AttrTypes - * @public - */ - var $attr_types; - - /** - * Instance of HTMLPurifier_AttrCollections - * @public - */ - var $attr_collections; - - /** - * Has setup() been called yet? - * @public - */ - var $setup = false; - - /** - * Instance of HTMLPurifier_ContentSets - * @public - */ - var $content_sets; - - /** - * Lookup table of module order "names" and an integer index - * @public - */ - var $order_keywords = array( - 'begin' => 10, - 'setup' => 20, - - 'pre' => 30, - - 'early' => 40, - 'main' => 50, - 'late' => 60, - - 'post' => 70, - - 'cleanup' => 80, - 'end' => 90 - ); - - /** - * Temporary instance of HTMLPurifier_Config for convenience reasons, - * is removed after setup(). - * @public - */ - var $config; + var $setup = false; /**< Has setup() been called yet? */ + var $config; /**< Temporary instance of HTMLPurifier_Config */ + var $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */ /** * Performs low-cost, preliminary initialization. * @param $config Instance of HTMLPurifier_Config */ function HTMLPurifier_HTMLDefinition(&$config) { - $this->config =& $config; - - // set up public internals - $this->strict = $config->get('HTML', 'Strict'); - $this->attr_types = new HTMLPurifier_AttrTypes(); - $this->attr_collections = new HTMLPurifier_AttrCollections(); - $this->content_sets = new HTMLPurifier_ContentSets(); - - // modules - - $main_modules = array('Text', 'Hypertext', 'List', 'Presentation', - 'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute'); - foreach ($main_modules as $module) $this->addModule($module, 'main'); - - if (!$this->strict) $this->addModule('Legacy', 'late'); - - $this->addModule('SetParent', 'post'); - - $this->addModule('TransformToStrict', 'cleanup'); - $this->addModule('TweakSubtractiveWhitelist', 'cleanup'); - - } - - /** - * Adds a module to the ordered list. - * @param $module Mixed: string module name, with or without - * HTMLPurifier_HTMLModule prefix, or instance of - * subclass of HTMLPurifier_HTMLModule. - */ - function addModule($module, $order = 'main') { - if (is_string($module)) { - $original_module = $module; - if (!class_exists($module)) { - foreach ($this->module_prefixes as $prefix) { - $module = $prefix . $original_module; - if (class_exists($module)) break; - } - } - if (!class_exists($module)) { - trigger_error($original_module . ' module does not exist', E_USER_ERROR); - return; - } - $module = new $module($this); - } - if (!isset($this->order_keywords[$order])) { - trigger_error('Order keyword does not exist', E_USER_ERROR); - return; - } - $this->modules[$module->name] = $module; - $this->modules_order[$module->name] = $order; + $this->manager = new HTMLPurifier_HTMLModuleManager(); } /** * Processes internals into form usable by HTMLPurifier internals. * Modifying the definition after calling this function should not * be done. - * @param $config Instance of HTMLPurifier_Config */ function setup() { @@ -298,109 +190,37 @@ class HTMLPurifier_HTMLDefinition if ($this->setup) {return;} else {$this->setup = true;} $this->processModules(); - $this->setupAttrTransform(); - $this->setupBlockWrapper(); + $this->setupConfigStuff(); unset($this->config); + unset($this->manager); } /** - * Processes the modules, setting up related info variables + * Extract out the information from the manager */ function processModules() { - // substitute out the order keywords - foreach ($this->modules_order as $name => $order) { - if (empty($this->modules[$name])) { - trigger_error('Orphan module order definition for module: ' . $name, E_USER_ERROR); - return; - } - if (is_int($order)) continue; - if (empty($this->order_keywords[$order])) { - trigger_error('Unknown order keyword: ' . $order, E_USER_ERROR); - return; - } - $this->modules_order[$name] = $this->order_keywords[$order]; - } + $this->manager->setup($this->config); + $modules = $this->manager->getModules($this->config); - // sort modules member variable - array_multisort( - $this->modules_order, SORT_ASC, SORT_NUMERIC, - $this->modules - ); - - // setup the global registries - $this->attr_collections->setup($this->attr_types, $this->modules); - $this->content_sets->setup($this->modules); - $this->info_content_sets = $this->content_sets->lookup; - - // process the modules - foreach ($this->modules as $module_i => $module) { - - $module->preProcess($this); - - // process element-wise definitions - foreach ($module->info as $name => $def) { - // setup info - if (!isset($this->info[$name])) { - if ($def->standalone) { - $this->info[$name] = $this->modules[$module_i]->info[$name]; - } else { - // attempting to merge into an element that doesn't - // exist, ignore it - continue; - } - } else { - $this->info[$name]->mergeIn($this->modules[$module_i]->info[$name]); - } - - // process info - $def = $this->info[$name]; - - // attribute value expansions - $this->attr_collections->performInclusions($def->attr); - $this->attr_collections->expandIdentifiers( - $def->attr, $this->attr_types); - - // descendants_are_inline, for ChildDef_Chameleon - if (is_string($def->content_model) && - strpos($def->content_model, 'Inline') !== false) { - if ($name != 'del' && $name != 'ins') { - // this is for you, ins/del - $def->descendants_are_inline = true; - } - } - - // set child def from content model - $this->content_sets->generateChildDef($def, $module); - - $this->info[$name] = $def; - - } - - // merge in global info variables from module + foreach ($modules as $module) { foreach($module->info_tag_transform as $k => $v) $this->info_tag_transform[$k] = $v; foreach($module->info_attr_transform_pre as $k => $v) $this->info_attr_transform_pre[$k] = $v; foreach($module->info_attr_transform_post as $k => $v) $this->info_attr_transform_post[$k]= $v; - - $module->postProcess($this); - } + $this->info = $this->manager->getElements($this->config); + $this->info_content_sets = $this->manager->contentSets->lookup; + } /** - * Sets up attribute transformations + * Sets up stuff based on config. We need a better way of doing this. */ - function setupAttrTransform() { - $this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang(); - } - - /** - * Sets up block wrapper based on config - */ - function setupBlockWrapper() { + function setupConfigStuff() { + $block_wrapper = $this->config->get('HTML', 'BlockWrapper'); if (isset($this->info_content_sets['Block'][$block_wrapper])) { $this->info_block_wrapper = $block_wrapper; @@ -408,6 +228,43 @@ class HTMLPurifier_HTMLDefinition trigger_error('Cannot use non-block element as block wrapper.', E_USER_ERROR); } + + $parent = $this->config->get('HTML', 'Parent'); + $def = $this->manager->getElement($parent, $this->config); + if ($def) { + $this->info_parent = $parent; + $this->info_parent_def = $def; + } else { + trigger_error('Cannot use unrecognized element as parent.', + E_USER_ERROR); + $this->info_parent_def = $this->manager->getElement( + $this->info_parent, $this->config); + } + + // setup allowed elements, SubtractiveWhitelist module + $allowed_elements = $this->config->get('HTML', 'AllowedElements'); + if (is_array($allowed_elements)) { + foreach ($this->info as $name => $d) { + if(!isset($allowed_elements[$name])) unset($this->info[$name]); + } + } + $allowed_attributes = $this->config->get('HTML', 'AllowedAttributes'); + if (is_array($allowed_attributes)) { + foreach ($this->info_global_attr as $attr_key => $info) { + if (!isset($allowed_attributes["*.$attr_key"])) { + unset($this->info_global_attr[$attr_key]); + } + } + foreach ($this->info as $tag => $info) { + foreach ($info->attr as $attr => $attr_info) { + if (!isset($allowed_attributes["$tag.$attr"]) && + !isset($allowed_attributes["*.$attr"])) { + unset($this->info[$tag]->attr[$attr]); + } + } + } + } + } diff --git a/library/HTMLPurifier/HTMLModule.php b/library/HTMLPurifier/HTMLModule.php index e6a30328..8ccde459 100644 --- a/library/HTMLPurifier/HTMLModule.php +++ b/library/HTMLPurifier/HTMLModule.php @@ -22,10 +22,10 @@ class HTMLPurifier_HTMLModule var $name; /** - * List of elements that the module implements. - * @note This is only for convention, as a module will often loop - * through the $elements array to define HTMLPurifier_ElementDef - * in the $info array. + * List of elements that the module implements or substantially + * modifies, either through a new ElementDef or a modified + * content set that directly affects the element (if the element was + * removed or added from a content set). * @protected */ var $elements = array(); diff --git a/library/HTMLPurifier/HTMLModule/SetParent.php b/library/HTMLPurifier/HTMLModule/SetParent.php deleted file mode 100644 index 14468787..00000000 --- a/library/HTMLPurifier/HTMLModule/SetParent.php +++ /dev/null @@ -1,33 +0,0 @@ -config->get('HTML', 'Parent'); - if (isset($definition->info[$parent])) { - $definition->info_parent = $parent; - } else { - trigger_error('Cannot use unrecognized element as parent.', - E_USER_ERROR); - } - $definition->info_parent_def = $definition->info[$definition->info_parent]; - } - -} - -?> \ No newline at end of file diff --git a/library/HTMLPurifier/HTMLModule/TransformToStrict.php b/library/HTMLPurifier/HTMLModule/TransformToStrict.php index 7786afee..5c75a0da 100644 --- a/library/HTMLPurifier/HTMLModule/TransformToStrict.php +++ b/library/HTMLPurifier/HTMLModule/TransformToStrict.php @@ -33,8 +33,7 @@ class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule 'lang' => false // placeholder ); - function HTMLPurifier_HTMLModule_TransformToStrict(&$definition) { - $config = $definition->config; + function HTMLPurifier_HTMLModule_TransformToStrict() { // deprecated tag transforms $this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font(); @@ -62,10 +61,10 @@ class HTMLPurifier_HTMLModule_TransformToStrict extends HTMLPurifier_HTMLModule $this->info_attr_transform_post['lang'] = new HTMLPurifier_AttrTransform_Lang(); $this->attr_collections['Lang']['lang'] = new HTMLPurifier_AttrDef_Lang(); - if ($config->get('HTML', 'Strict')) { - $this->info['blockquote']->content_model_type = 'strictblockquote'; - $this->info['blockquote']->child = false; // recalculate please! - } + // this should not be applied to XHTML 1.0 Transitional, ONLY + // XHTML 1.0 Strict. We may need three classes + $this->info['blockquote']->content_model_type = 'strictblockquote'; + $this->info['blockquote']->child = false; // recalculate please! } diff --git a/library/HTMLPurifier/HTMLModule/TweakSubtractiveWhitelist.php b/library/HTMLPurifier/HTMLModule/TweakSubtractiveWhitelist.php deleted file mode 100644 index f00fef9b..00000000 --- a/library/HTMLPurifier/HTMLModule/TweakSubtractiveWhitelist.php +++ /dev/null @@ -1,70 +0,0 @@ -Warning: If another directive conflicts with the '. - 'elements here, that directive will win and override. '. - 'This directive has been available since 1.3.0.' -); - -HTMLPurifier_ConfigSchema::define( - 'HTML', 'AllowedAttributes', null, 'lookup/null', - 'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '. - 'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '. - '(style, id, class, dir, lang, xml:lang).'. - 'Warning: If another directive conflicts with the '. - 'elements here, that directive will win and override. For '. - 'example, %HTML.EnableAttrID will take precedence over *.id in this '. - 'directive. You must set that directive to true before you can use '. - 'IDs at all. This directive has been available since 1.3.0.' -); - -/** - * Proprietary module that further narrows down allowed elements and - * attributes that were allowed to a user-defined whitelist. - * @warning This module cannot ADD elements or attributes, you must - * implement full definitions yourself! - */ - -class HTMLPurifier_HTMLModule_TweakSubtractiveWhitelist extends HTMLPurifier_HTMLModule -{ - - var $name = 'TweakSubtractiveWhitelist'; - - function postProcess(&$definition) { - - // setup allowed elements, SubtractiveWhitelist module - $allowed_elements = $definition->config->get('HTML', 'AllowedElements'); - if (is_array($allowed_elements)) { - foreach ($definition->info as $name => $d) { - if(!isset($allowed_elements[$name])) unset($definition->info[$name]); - } - } - $allowed_attributes = $definition->config->get('HTML', 'AllowedAttributes'); - if (is_array($allowed_attributes)) { - foreach ($definition->info_global_attr as $attr_key => $info) { - if (!isset($allowed_attributes["*.$attr_key"])) { - unset($definition->info_global_attr[$attr_key]); - } - } - foreach ($definition->info as $tag => $info) { - foreach ($info->attr as $attr => $attr_info) { - if (!isset($allowed_attributes["$tag.$attr"]) && - !isset($allowed_attributes["*.$attr"])) { - unset($definition->info[$tag]->attr[$attr]); - } - } - } - } - - } - -} - -?> \ No newline at end of file diff --git a/library/HTMLPurifier/HTMLModuleManager.php b/library/HTMLPurifier/HTMLModuleManager.php new file mode 100644 index 00000000..739ac2ce --- /dev/null +++ b/library/HTMLPurifier/HTMLModuleManager.php @@ -0,0 +1,388 @@ + array( // leading _ indicates private + 'Text', + 'Hypertext', + 'List', + 'Presentation', + 'Edit', + 'Bdo', + 'Tables', + 'Image', + 'StyleAttribute' + ), + // HTML definitions, defer completely to XHTML definitions + 'HTML 4.01 Transitional' => 'XHTML 1.0 Transitional', + 'HTML 4.01 Strict' => 'XHTML 1.0 Strict', + // XHTML definitions + 'XHTML 1.0 Transitional' => array( array('XHTML 1.0 Strict'), 'Legacy' ), + 'XHTML 1.0 Strict' => array(array('_Common')), + 'XHTML 1.1' => array(array('_Common')), + ); + + /** + * Modules to import if lenient mode (attempt to convert everything + * to a valid representation) is on + */ + var $collectionsLenient = array( + 'HTML 4.01 Strict' => 'XHTML 1.0 Strict', + 'XHTML 1.0 Strict' => array('TransformToStrict'), + 'XHTML 1.1' => array(array('XHTML 1.0 Strict'), 'TransformToXHTML11') + ); + + /** + * Modules to import if correctional mode (correct everything that + * is feasible to strict mode) is on + */ + var $collectionsCorrectional = array( + 'HTML 4.01 Transitional' => 'XHTML 1.0 Transitional', + 'XHTML 1.0 Transitional' => array('TransformToStrict'), // probably want a different one + ); + + /** Associative array of element name to defining modules (always array) */ + var $elementModuleLookup = array(); + + /** List of prefixes we should use for resolving small names */ + var $prefixes = array('HTMLPurifier_HTMLModule_'); + + /** Associative array of order keywords to an integer index */ + var $orderKeywords = array( + 'define' => 10, + 'define-redefine' => 20, + 'redefine' => 30, + ); + + /** Instance of HTMLPurifier_ContentSets configured with full modules. */ + var $contentSets; + + var $attrTypes; /**< Instance of HTMLPurifier_AttrTypes */ + var $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */ + + function HTMLPurifier_HTMLModuleManager() { + + // modules + $modules = array( + 'define' => array( + 'Text', 'Hypertext', 'List', 'Presentation', + 'Edit', 'Bdo', 'Tables', 'Image', 'StyleAttribute' + ), + 'define-redefine' => array( + 'Legacy' + ), + 'redefine' => array( + 'TransformToStrict', 'TransformToXHTML11' + ) + ); + + foreach ($modules as $order => $modules_of_order) { + foreach ($modules_of_order as $module) { + $this->addModule($module, $order); + } + } + + $this->attrTypes = new HTMLPurifier_AttrTypes(); + $this->attrCollections = new HTMLPurifier_AttrCollections(); + + } + + /** + * Adds a module to the ordered list. + * @param $module Mixed: string module name, with or without + * HTMLPurifier_HTMLModule prefix, or instance of + * subclass of HTMLPurifier_HTMLModule. + */ + function addModule($module, $order = 'main') { + if (is_string($module)) { + $original_module = $module; + if (!class_exists($module)) { + foreach ($this->prefixes as $prefix) { + $module = $prefix . $original_module; + if (class_exists($module)) break; + } + } + if (!class_exists($module)) { + trigger_error($original_module . ' module does not exist', E_USER_ERROR); + return; + } + $module = new $module(); + } + if (!isset($this->orderKeywords[$order])) { + trigger_error('Order keyword does not exist', E_USER_ERROR); + return; + } + $this->modules[$module->name] = $module; + $this->order[$module->name] = $order; + foreach ($module->elements as $name) { + if (!isset($this->elementModuleLookup[$name])) { + $this->elementModuleLookup[$name] = array(); + } + $this->elementModuleLookup[$name][] = $module->name; + } + } + + function setup($config) { + // substitute out the order keywords + foreach ($this->order as $name => $order) { + if (empty($this->modules[$name])) { + trigger_error('Orphan module order definition for module: ' . $name, E_USER_ERROR); + return; + } + if (is_int($order)) continue; + if (empty($this->orderKeywords[$order])) { + trigger_error('Unknown order keyword: ' . $order, E_USER_ERROR); + return; + } + $this->order[$name] = $this->orderKeywords[$order]; + } + + // sort modules member variable + array_multisort( + $this->order, SORT_ASC, SORT_NUMERIC, + $this->modules + ); + + // sort the lookup modules + foreach ($this->elementModuleLookup as $k => $modules) { + if (count($modules) > 1) { + $this->elementModuleLookup[$k] = array(); + $module_lookup = array_flip($modules); + foreach ($this->order as $name => $v) { + if (isset($module_lookup[$name])) { + $this->elementModuleLookup[$k][] = $name; + } + } + } + } + + $this->processCollections($this->collectionsSafe); + $this->processCollections($this->collectionsLenient); + $this->processCollections($this->collectionsCorrectional); + + // notice that it is vital that we get a full content sets + // elements lineup, but attr collections must not go by + // anything other than the modules the user wants + $this->contentSets = new HTMLPurifier_ContentSets( + $this->getModules($config, true) + ); + $this->attrCollections->setup($this->attrTypes, + $this->getModules($config)); + + } + + function processCollections(&$cols) { + + // $cols is the set of collections + // $col_i is the name (index) of a collection + // $col is a collection/list of modules + + // perform inclusions + foreach ($cols as $col_i => $col) { + if (is_string($col)) continue; // alias, save for later + if (!is_array($col[0])) continue; // no inclusions to do + $includes = $col[0]; + unset($cols[$col_i][0]); // remove inclusions value + for ($i = 0; isset($includes[$i]); $i++) { + $inc = $includes[$i]; + foreach ($cols[$inc] as $module) { + if (is_array($module)) { // another inclusion! + foreach ($module as $inc2) $includes[] = $inc2; + continue; + } + $cols[$col_i][] = $module; // merge in the other modules + } + } + } + + // replace with real modules + foreach ($cols as $col_i => $col) { + if (is_string($col)) continue; + $seen = array(); // lookup array to prevent dupes + foreach ($col as $module_i => $module) { + if (isset($seen[$module])) { + unset($cols[$col_i][$module_i]); + continue; + } + $cols[$col_i][$module_i] = $this->modules[$module]; + $seen[$module] = true; + } + } + + // hook up aliases + foreach ($cols as $col_i => $col) { + if (!is_string($col)) continue; + $cols[$col_i] = $cols[$col]; + } + + // delete pseudo-collections + foreach ($cols as $col_i => $col) { + if ($col_i[0] == '_') unset($cols[$col_i]); + } + + } + + function getDoctype($config) { + // get rid of this later + if ($config->get('HTML', 'Strict')) { + $doctype = 'XHTML 1.0 Strict'; + } else { + $doctype = 'XHTML 1.0 Transitional'; + } + return $doctype; + } + + /** + * @param $config + * @param $full Whether or not to retrieve *all* applicable modules + * for the doctype and not just the safe/whitelisted ones. + * Leniency modules are added based on config though. + */ + function getModules($config, $full = false) { + + // CACHE!!! + + $doctype = $this->getDoctype($config); + + // more logic is needed here to retrieve modules based on + // configuration's leniency, etc. + $modules = $this->collectionsSafe[$doctype]; + + if(isset($this->collectionsLenient[$doctype])) { + $modules = array_merge($modules, $this->collectionsLenient[$doctype]); + } + + if(isset($this->collectionsCorrectional[$doctype])) { + $modules = array_merge($modules, $this->collectionsCorrectional[$doctype]); + } + + // convert from numeric to module name indexing, also prevents + // duplicates + $ret = array(); + foreach ($modules as $module) { + $ret[$module->name] = $module; + } + + return $ret; + + } + + /** + * @param $config + */ + function getElements($config) { + + $modules = $this->getModules($config); + + $elements = array(); + foreach ($modules as $module) { + foreach ($module->elements as $name) { + $elements[$name] = $this->getElement($name, $config); + } + } + + return $elements; + + } + + function getElement($name, $config) { + + $def = false; + + $modules = $this->getModules($config, true); + + if (!isset($this->elementModuleLookup[$name])) { + return false; + } + + foreach($this->elementModuleLookup[$name] as $module_name) { + + // oops, we can't use that module at all + if (!isset($modules[$module_name])) continue; + + $module = $modules[$module_name]; + $new_def = $module->info[$name]; + + if (!$def && $new_def->standalone) { + $def = $new_def; + } elseif ($def) { + $def->mergeIn($new_def); + } else { + continue; + } + + // attribute value expansions + $this->attrCollections->performInclusions($def->attr); + $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes); + + // descendants_are_inline, for ChildDef_Chameleon + if (is_string($def->content_model) && + strpos($def->content_model, 'Inline') !== false) { + if ($name != 'del' && $name != 'ins') { + // this is for you, ins/del + $def->descendants_are_inline = true; + } + } + + $this->contentSets->generateChildDef($def, $module); + } + + return $def; + + } + + /** + * Retrieves full child definition for child, for the parent. Parent + * is a special case because it may not be allowed in the document. + */ + function getFullChildDef($element, $config) { + $def = $this->getElement($element, $config); + if ($def === false) { + trigger_error('Cannot get child def of element not available in doctype', + E_USER_ERROR); + return false; + } + return $def->child; + } + +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/Printer/HTMLDefinition.php b/library/HTMLPurifier/Printer/HTMLDefinition.php index ff172e84..a677c58b 100644 --- a/library/HTMLPurifier/Printer/HTMLDefinition.php +++ b/library/HTMLPurifier/Printer/HTMLDefinition.php @@ -23,8 +23,7 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer $ret .= $this->row('Parent of fragment', $def->info_parent); $ret .= $this->renderChildren($def->info_parent_def->child); - $ret .= $this->row('Strict mode', $def->strict); - if ($def->strict) $ret .= $this->row('Block wrap name', $def->info_block_wrapper); + $ret .= $this->row('Block wrap name', $def->info_block_wrapper); $ret .= $this->start('tr'); $ret .= $this->element('th', 'Global attributes');