1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-02 12:21:09 +02:00

[1.6.1] Fix fatal error with XHTML 1.1 validation.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1013 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2007-05-04 01:17:00 +00:00
parent bd58a7ba77
commit 4e77a1adbd
4 changed files with 304 additions and 281 deletions

2
NEWS
View File

@@ -25,6 +25,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
to enable to enable
! CSS property white-space now allows nowrap (supported in all modern ! CSS property white-space now allows nowrap (supported in all modern
browsers) but not others (which have spotty browser implementations) browsers) but not others (which have spotty browser implementations)
! XHTML 1.1 mode now sort-of works without any fatal errors, and
lang is now moved over to xml:lang.
- Possibly fatal bug with __autoload() fixed in module manager - Possibly fatal bug with __autoload() fixed in module manager
- Invert HTMLModuleManager->addModule() processing order to check - Invert HTMLModuleManager->addModule() processing order to check
prefixes first and then the literal module prefixes first and then the literal module

View File

@@ -1,281 +1,290 @@
<?php <?php
// components // components
require_once 'HTMLPurifier/HTMLModuleManager.php'; require_once 'HTMLPurifier/HTMLModuleManager.php';
// this definition and its modules MUST NOT define configuration directives // this definition and its modules MUST NOT define configuration directives
// outside of the HTML or Attr namespaces // outside of the HTML or Attr namespaces
// will be superceded by more accurate doctype declaration schemes // will be superceded by more accurate doctype declaration schemes
HTMLPurifier_ConfigSchema::define( HTMLPurifier_ConfigSchema::define(
'HTML', 'Strict', false, 'bool', 'HTML', 'Strict', false, 'bool',
'Determines whether or not to use Transitional (loose) or Strict rulesets. '. 'Determines whether or not to use Transitional (loose) or Strict rulesets. '.
'This directive has been available since 1.3.0.' 'This directive has been available since 1.3.0.'
); );
HTMLPurifier_ConfigSchema::define( HTMLPurifier_ConfigSchema::define(
'HTML', 'BlockWrapper', 'p', 'string', 'HTML', 'BlockWrapper', 'p', 'string',
'String name of element to wrap inline elements that are inside a block '. 'String name of element to wrap inline elements that are inside a block '.
'context. This only occurs in the children of blockquote in strict mode. '. 'context. This only occurs in the children of blockquote in strict mode. '.
'Example: by default value, <code>&lt;blockquote&gt;Foo&lt;/blockquote&gt;</code> '. 'Example: by default value, <code>&lt;blockquote&gt;Foo&lt;/blockquote&gt;</code> '.
'would become <code>&lt;blockquote&gt;&lt;p&gt;Foo&lt;/p&gt;&lt;/blockquote&gt;</code>. The '. 'would become <code>&lt;blockquote&gt;&lt;p&gt;Foo&lt;/p&gt;&lt;/blockquote&gt;</code>. The '.
'<code>&lt;p&gt;</code> tags can be replaced '. '<code>&lt;p&gt;</code> tags can be replaced '.
'with whatever you desire, as long as it is a block level element. '. 'with whatever you desire, as long as it is a block level element. '.
'This directive has been available since 1.3.0.' 'This directive has been available since 1.3.0.'
); );
HTMLPurifier_ConfigSchema::define( HTMLPurifier_ConfigSchema::define(
'HTML', 'Parent', 'div', 'string', 'HTML', 'Parent', 'div', 'string',
'String name of element that HTML fragment passed to library will be '. 'String name of element that HTML fragment passed to library will be '.
'inserted in. An interesting variation would be using span as the '. 'inserted in. An interesting variation would be using span as the '.
'parent element, meaning that only inline tags would be allowed. '. 'parent element, meaning that only inline tags would be allowed. '.
'This directive has been available since 1.3.0.' 'This directive has been available since 1.3.0.'
); );
HTMLPurifier_ConfigSchema::define( HTMLPurifier_ConfigSchema::define(
'HTML', 'AllowedElements', null, 'lookup/null', 'HTML', 'AllowedElements', null, 'lookup/null',
'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '. 'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '.
'can overload it with your own list of tags to allow. Note that this '. 'can overload it with your own list of tags to allow. Note that this '.
'method is subtractive: it does its job by taking away from HTML Purifier '. 'method is subtractive: it does its job by taking away from HTML Purifier '.
'usual feature set, so you cannot add a tag that HTML Purifier never '. 'usual feature set, so you cannot add a tag that HTML Purifier never '.
'supported in the first place (like embed, form or head). If you change this, you '. 'supported in the first place (like embed, form or head). If you change this, you '.
'probably also want to change %HTML.AllowedAttributes. '. 'probably also want to change %HTML.AllowedAttributes. '.
'<strong>Warning:</strong> If another directive conflicts with the '. '<strong>Warning:</strong> If another directive conflicts with the '.
'elements here, <em>that</em> directive will win and override. '. 'elements here, <em>that</em> directive will win and override. '.
'This directive has been available since 1.3.0.' 'This directive has been available since 1.3.0.'
); );
HTMLPurifier_ConfigSchema::define( HTMLPurifier_ConfigSchema::define(
'HTML', 'AllowedAttributes', null, 'lookup/null', 'HTML', 'AllowedAttributes', null, 'lookup/null',
'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '. 'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '.
'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '. 'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '.
'(style, id, class, dir, lang, xml:lang).'. '(style, id, class, dir, lang, xml:lang).'.
'<strong>Warning:</strong> If another directive conflicts with the '. '<strong>Warning:</strong> If another directive conflicts with the '.
'elements here, <em>that</em> directive will win and override. For '. 'elements here, <em>that</em> directive will win and override. For '.
'example, %HTML.EnableAttrID will take precedence over *.id in this '. 'example, %HTML.EnableAttrID will take precedence over *.id in this '.
'directive. You must set that directive to true before you can use '. 'directive. You must set that directive to true before you can use '.
'IDs at all. This directive has been available since 1.3.0.' 'IDs at all. This directive has been available since 1.3.0.'
); );
/** /**
* Definition of the purified HTML that describes allowed children, * Definition of the purified HTML that describes allowed children,
* attributes, and many other things. * attributes, and many other things.
* *
* Conventions: * Conventions:
* *
* All member variables that are prefixed with info * All member variables that are prefixed with info
* (including the main $info array) are used by HTML Purifier internals * (including the main $info array) are used by HTML Purifier internals
* and should not be directly edited when customizing the HTMLDefinition. * and should not be directly edited when customizing the HTMLDefinition.
* They can usually be set via configuration directives or custom * They can usually be set via configuration directives or custom
* modules. * modules.
* *
* On the other hand, member variables without the info prefix are used * On the other hand, member variables without the info prefix are used
* internally by the HTMLDefinition and MUST NOT be used by other HTML * internally by the HTMLDefinition and MUST NOT be used by other HTML
* Purifier internals. Many of them, however, are public, and may be * Purifier internals. Many of them, however, are public, and may be
* edited by userspace code to tweak the behavior of HTMLDefinition. * edited by userspace code to tweak the behavior of HTMLDefinition.
* *
* HTMLPurifier_Printer_HTMLDefinition is a notable exception to this * HTMLPurifier_Printer_HTMLDefinition is a notable exception to this
* rule: in the interest of comprehensiveness, it will sniff everything. * rule: in the interest of comprehensiveness, it will sniff everything.
*/ */
class HTMLPurifier_HTMLDefinition class HTMLPurifier_HTMLDefinition
{ {
/** FULLY-PUBLIC VARIABLES */ /** FULLY-PUBLIC VARIABLES */
/** /**
* Associative array of element names to HTMLPurifier_ElementDef * Associative array of element names to HTMLPurifier_ElementDef
* @public * @public
*/ */
var $info = array(); var $info = array();
/** /**
* Associative array of global attribute name to attribute definition. * Associative array of global attribute name to attribute definition.
* @public * @public
*/ */
var $info_global_attr = array(); var $info_global_attr = array();
/** /**
* String name of parent element HTML will be going into. * String name of parent element HTML will be going into.
* @public * @public
*/ */
var $info_parent = 'div'; var $info_parent = 'div';
/** /**
* Definition for parent element, allows parent element to be a * Definition for parent element, allows parent element to be a
* tag that's not allowed inside the HTML fragment. * tag that's not allowed inside the HTML fragment.
* @public * @public
*/ */
var $info_parent_def; var $info_parent_def;
/** /**
* String name of element used to wrap inline elements in block context * String name of element used to wrap inline elements in block context
* @note This is rarely used except for BLOCKQUOTEs in strict mode * @note This is rarely used except for BLOCKQUOTEs in strict mode
* @public * @public
*/ */
var $info_block_wrapper = 'p'; var $info_block_wrapper = 'p';
/** /**
* Associative array of deprecated tag name to HTMLPurifier_TagTransform * Associative array of deprecated tag name to HTMLPurifier_TagTransform
* @public * @public
*/ */
var $info_tag_transform = array(); var $info_tag_transform = array();
/** /**
* Indexed list of HTMLPurifier_AttrTransform to be performed before validation. * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
* @public * @public
*/ */
var $info_attr_transform_pre = array(); var $info_attr_transform_pre = array();
/** /**
* Indexed list of HTMLPurifier_AttrTransform to be performed after validation. * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
* @public * @public
*/ */
var $info_attr_transform_post = array(); var $info_attr_transform_post = array();
/** /**
* Nested lookup array of content set name (Block, Inline) to * Nested lookup array of content set name (Block, Inline) to
* element name to whether or not it belongs in that content set. * element name to whether or not it belongs in that content set.
* @public * @public
*/ */
var $info_content_sets = array(); var $info_content_sets = array();
/** PUBLIC BUT INTERNAL VARIABLES */ /** PUBLIC BUT INTERNAL VARIABLES */
var $setup = false; /**< Has setup() been called yet? */ var $setup = false; /**< Has setup() been called yet? */
var $config; /**< Temporary instance of HTMLPurifier_Config */ var $config; /**< Temporary instance of HTMLPurifier_Config */
var $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */ var $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
/** /**
* Performs low-cost, preliminary initialization. * Performs low-cost, preliminary initialization.
* @param $config Instance of HTMLPurifier_Config * @param $config Instance of HTMLPurifier_Config
*/ */
function HTMLPurifier_HTMLDefinition(&$config) { function HTMLPurifier_HTMLDefinition(&$config) {
$this->config =& $config; $this->config =& $config;
$this->manager = new HTMLPurifier_HTMLModuleManager(); $this->manager = new HTMLPurifier_HTMLModuleManager();
} }
/** /**
* Processes internals into form usable by HTMLPurifier internals. * Processes internals into form usable by HTMLPurifier internals.
* Modifying the definition after calling this function should not * Modifying the definition after calling this function should not
* be done. * be done.
*/ */
function setup() { function setup() {
// multiple call guard // multiple call guard
if ($this->setup) {return;} else {$this->setup = true;} if ($this->setup) {return;} else {$this->setup = true;}
$this->processModules(); $this->processModules();
$this->setupConfigStuff(); $this->setupConfigStuff();
unset($this->config); unset($this->config);
unset($this->manager); unset($this->manager);
} }
/** /**
* Extract out the information from the manager * Extract out the information from the manager
*/ */
function processModules() { function processModules() {
$this->manager->setup($this->config); $this->manager->setup($this->config);
foreach ($this->manager->activeModules as $module) { foreach ($this->manager->activeModules as $module) {
foreach($module->info_tag_transform as $k => $v) $this->info_tag_transform[$k] = $v; foreach($module->info_tag_transform as $k => $v) {
foreach($module->info_attr_transform_pre as $k => $v) $this->info_attr_transform_pre[$k] = $v; if ($v === false) unset($this->info_tag_transform[$k]);
foreach($module->info_attr_transform_post as $k => $v) $this->info_attr_transform_post[$k]= $v; else $this->info_tag_transform[$k] = $v;
} }
foreach($module->info_attr_transform_pre as $k => $v) {
$this->info = $this->manager->getElements($this->config); if ($v === false) unset($this->info_attr_transform_pre[$k]);
$this->info_content_sets = $this->manager->contentSets->lookup; else $this->info_attr_transform_pre[$k] = $v;
}
} foreach($module->info_attr_transform_post as $k => $v) {
if ($v === false) unset($this->info_attr_transform_post[$k]);
/** else $this->info_attr_transform_post[$k] = $v;
* Sets up stuff based on config. We need a better way of doing this. }
*/ }
function setupConfigStuff() {
$this->info = $this->manager->getElements($this->config);
$block_wrapper = $this->config->get('HTML', 'BlockWrapper'); $this->info_content_sets = $this->manager->contentSets->lookup;
if (isset($this->info_content_sets['Block'][$block_wrapper])) {
$this->info_block_wrapper = $block_wrapper; }
} else {
trigger_error('Cannot use non-block element as block wrapper.', /**
E_USER_ERROR); * Sets up stuff based on config. We need a better way of doing this.
} */
function setupConfigStuff() {
$parent = $this->config->get('HTML', 'Parent');
$def = $this->manager->getElement($parent, $this->config); $block_wrapper = $this->config->get('HTML', 'BlockWrapper');
if ($def) { if (isset($this->info_content_sets['Block'][$block_wrapper])) {
$this->info_parent = $parent; $this->info_block_wrapper = $block_wrapper;
$this->info_parent_def = $def; } else {
} else { trigger_error('Cannot use non-block element as block wrapper.',
trigger_error('Cannot use unrecognized element as parent.', E_USER_ERROR);
E_USER_ERROR); }
$this->info_parent_def = $this->manager->getElement(
$this->info_parent, $this->config); $parent = $this->config->get('HTML', 'Parent');
} $def = $this->manager->getElement($parent, $this->config);
if ($def) {
// support template text $this->info_parent = $parent;
$support = "(for information on implementing this, see the ". $this->info_parent_def = $def;
"support forums) "; } else {
trigger_error('Cannot use unrecognized element as parent.',
// setup allowed elements, SubtractiveWhitelist module E_USER_ERROR);
$allowed_elements = $this->config->get('HTML', 'AllowedElements'); $this->info_parent_def = $this->manager->getElement(
if (is_array($allowed_elements)) { $this->info_parent, $this->config);
foreach ($this->info as $name => $d) { }
if(!isset($allowed_elements[$name])) unset($this->info[$name]);
unset($allowed_elements[$name]); // support template text
} $support = "(for information on implementing this, see the ".
// emit errors "support forums) ";
foreach ($allowed_elements as $element => $d) {
trigger_error("Element '$element' is not supported $support", E_USER_WARNING); // setup allowed elements, SubtractiveWhitelist module
} $allowed_elements = $this->config->get('HTML', 'AllowedElements');
} if (is_array($allowed_elements)) {
foreach ($this->info as $name => $d) {
$allowed_attributes = $this->config->get('HTML', 'AllowedAttributes'); if(!isset($allowed_elements[$name])) unset($this->info[$name]);
$allowed_attributes_mutable = $allowed_attributes; // by copy! unset($allowed_elements[$name]);
if (is_array($allowed_attributes)) { }
foreach ($this->info_global_attr as $attr_key => $info) { // emit errors
if (!isset($allowed_attributes["*.$attr_key"])) { foreach ($allowed_elements as $element => $d) {
unset($this->info_global_attr[$attr_key]); trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
} elseif (isset($allowed_attributes_mutable["*.$attr_key"])) { }
unset($allowed_attributes_mutable["*.$attr_key"]); }
}
} $allowed_attributes = $this->config->get('HTML', 'AllowedAttributes');
foreach ($this->info as $tag => $info) { $allowed_attributes_mutable = $allowed_attributes; // by copy!
foreach ($info->attr as $attr => $attr_info) { if (is_array($allowed_attributes)) {
if (!isset($allowed_attributes["$tag.$attr"]) && foreach ($this->info_global_attr as $attr_key => $info) {
!isset($allowed_attributes["*.$attr"])) { if (!isset($allowed_attributes["*.$attr_key"])) {
unset($this->info[$tag]->attr[$attr]); unset($this->info_global_attr[$attr_key]);
} else { } elseif (isset($allowed_attributes_mutable["*.$attr_key"])) {
if (isset($allowed_attributes_mutable["$tag.$attr"])) { unset($allowed_attributes_mutable["*.$attr_key"]);
unset($allowed_attributes_mutable["$tag.$attr"]); }
} elseif (isset($allowed_attributes_mutable["*.$attr"])) { }
unset($allowed_attributes_mutable["*.$attr"]); foreach ($this->info as $tag => $info) {
} foreach ($info->attr as $attr => $attr_info) {
} if (!isset($allowed_attributes["$tag.$attr"]) &&
} !isset($allowed_attributes["*.$attr"])) {
} unset($this->info[$tag]->attr[$attr]);
// emit errors } else {
foreach ($allowed_attributes_mutable as $elattr => $d) { if (isset($allowed_attributes_mutable["$tag.$attr"])) {
list($element, $attribute) = explode('.', $elattr); unset($allowed_attributes_mutable["$tag.$attr"]);
if ($element == '*') { } elseif (isset($allowed_attributes_mutable["*.$attr"])) {
trigger_error("Global attribute '$attribute' is not ". unset($allowed_attributes_mutable["*.$attr"]);
"supported in any elements $support", }
E_USER_WARNING); }
} else { }
trigger_error("Attribute '$attribute' in element '$element' not supported $support", }
E_USER_WARNING); // emit errors
} foreach ($allowed_attributes_mutable as $elattr => $d) {
} list($element, $attribute) = explode('.', $elattr);
} if ($element == '*') {
trigger_error("Global attribute '$attribute' is not ".
} "supported in any elements $support",
E_USER_WARNING);
} else {
} trigger_error("Attribute '$attribute' in element '$element' not supported $support",
E_USER_WARNING);
?> }
}
}
}
}
?>

View File

@@ -1,5 +1,7 @@
<?php <?php
require_once 'HTMLPurifier/AttrTransform/Lang.php';
/** /**
* Proprietary module that transforms XHTML 1.0 deprecated aspects into * Proprietary module that transforms XHTML 1.0 deprecated aspects into
* XHTML 1.1 compliant ones, when possible. For maximum effectiveness, * XHTML 1.1 compliant ones, when possible. For maximum effectiveness,
@@ -25,6 +27,10 @@ class HTMLPurifier_HTMLModule_TransformToXHTML11 extends HTMLPurifier_HTMLModule
'lang' => false // remove it 'lang' => false // remove it
); );
function HTMLPurifier_HTMLModule_TransformToXHTML11() {
$this->info_attr_transform_pre['lang'] = new HTMLPurifier_AttrTransform_Lang();
}
} }
?> ?>

View File

@@ -231,6 +231,12 @@ class HTMLPurifier_Strategy_ValidateAttributesTest extends
'<td style="white-space:nowrap;" />' '<td style="white-space:nowrap;" />'
); );
// test only one lang
$this->assertResult(
'<b lang="en">asdf</b>',
'<b xml:lang="en">asdf</b>', array('HTML.Doctype' => 'XHTML 1.1')
);
} }
} }