mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-04 05:07:55 +02:00
[1.3.0] Huge upgrade, (X)HTML Strict now supported
+ Transparently handles inline elements in block context (blockquote) ! Added GET method to demo for easier validation, added 50kb max input size ! New directive %HTML.BlockWrapper, for block-ifying inline elements ! New directive %HTML.Parent, allows you to only allow inline content - Added missing type to ChildDef_Chameleon . ChildDef_Required guards against empty tags . Lookup table HTMLDefinition->info_flow_elements added . Added peace-of-mind variable initialization to Strategy_FixNesting git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@560 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
@@ -23,6 +23,8 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
|
||||
*/
|
||||
var $block;
|
||||
|
||||
var $type = 'chameleon';
|
||||
|
||||
/**
|
||||
* @param $inline List of elements to allow when inline.
|
||||
* @param $block List of elements to allow when block.
|
||||
|
@@ -20,7 +20,10 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
|
||||
$elements = explode('|', $elements);
|
||||
}
|
||||
$elements = array_flip($elements);
|
||||
foreach ($elements as $i => $x) $elements[$i] = true;
|
||||
foreach ($elements as $i => $x) {
|
||||
$elements[$i] = true;
|
||||
if (empty($i)) unset($elements[$i]);
|
||||
}
|
||||
$this->elements = $elements;
|
||||
$this->gen = new HTMLPurifier_Generator();
|
||||
}
|
||||
|
70
library/HTMLPurifier/ChildDef/StrictBlockquote.php
Normal file
70
library/HTMLPurifier/ChildDef/StrictBlockquote.php
Normal file
@@ -0,0 +1,70 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef/Required.php';
|
||||
|
||||
/**
|
||||
* Takes the contents of blockquote when in strict and reformats for validation.
|
||||
*
|
||||
* From XHTML 1.0 Transitional to Strict, there is a notable change where
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_StrictBlockquote
|
||||
extends HTMLPurifier_ChildDef_Required
|
||||
{
|
||||
var $allow_empty = true;
|
||||
var $type = 'strictblockquote';
|
||||
var $init = false;
|
||||
function HTMLPurifier_ChildDef_StrictBlockquote() {}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
|
||||
$def = $config->getHTMLDefinition();
|
||||
if (!$this->init) {
|
||||
// allow all inline elements
|
||||
$this->elements = $def->info_flow_elements;
|
||||
$this->elements['#PCDATA'] = true;
|
||||
$this->init = true;
|
||||
}
|
||||
|
||||
$result = parent::validateChildren($tokens_of_children, $config, $context);
|
||||
if ($result === false) return array();
|
||||
if ($result === true) $result = $tokens_of_children;
|
||||
|
||||
$block_wrap_start = new HTMLPurifier_Token_Start($def->info_block_wrapper);
|
||||
$block_wrap_end = new HTMLPurifier_Token_End( $def->info_block_wrapper);
|
||||
$is_inline = false;
|
||||
$depth = 0;
|
||||
$ret = array();
|
||||
|
||||
// assuming that there are no comment tokens
|
||||
foreach ($result as $i => $token) {
|
||||
$token = $result[$i];
|
||||
// ifs are nested for readability
|
||||
if (!$is_inline) {
|
||||
if (!$depth) {
|
||||
if (($token->type == 'text') ||
|
||||
($def->info[$token->name]->type == 'inline')) {
|
||||
$is_inline = true;
|
||||
$ret[] = $block_wrap_start;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (!$depth) {
|
||||
// starting tokens have been inline text / empty
|
||||
if ($token->type == 'start' || $token->type == 'empty') {
|
||||
if ($def->info[$token->name]->type == 'block') {
|
||||
// ended
|
||||
$ret[] = $block_wrap_end;
|
||||
$is_inline = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
$ret[] = $token;
|
||||
if ($token->type == 'start') $depth++;
|
||||
if ($token->type == 'end') $depth--;
|
||||
}
|
||||
if ($is_inline) $ret[] = $block_wrap_end;
|
||||
return $ret;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@@ -23,6 +23,7 @@ require_once 'HTMLPurifier/ChildDef.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Required.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Optional.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Table.php';
|
||||
require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
|
||||
require_once 'HTMLPurifier/Generator.php';
|
||||
require_once 'HTMLPurifier/Token.php';
|
||||
require_once 'HTMLPurifier/TagTransform.php';
|
||||
@@ -45,6 +46,23 @@ HTMLPurifier_ConfigSchema::define(
|
||||
'Determines whether or not to use Transitional (loose) or Strict rulesets.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'HTML', 'BlockWrapper', 'p', 'string',
|
||||
'String name of element to wrap inline elements that are inside a block '.
|
||||
'context. This only occurs in the children of blockquote in strict mode. '.
|
||||
'Example: by default value, <code><blockquote>Foo</blockquote></code> '.
|
||||
'would become <code><blockquote><p>Foo</p></blockquote></code>. The '.
|
||||
'<code><p></code> tags can be replaced '.
|
||||
'with whatever you desire, as long as it is a block level element.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'HTML', 'Parent', 'div', 'string',
|
||||
'String name of element that HTML fragment passed to library will be '.
|
||||
'inserted in. An interesting variation would be using span as the '.
|
||||
'parent element, meaning that only inline tags would be allowed.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Defines the purified HTML type with large amounts of objects.
|
||||
*
|
||||
@@ -79,11 +97,17 @@ class HTMLPurifier_HTMLDefinition
|
||||
|
||||
/**
|
||||
* String name of parent element HTML will be going into.
|
||||
* @todo Allow this to be overloaded by user config
|
||||
* @public
|
||||
*/
|
||||
var $info_parent = 'div';
|
||||
|
||||
/**
|
||||
* String name of element used to wrap inline elements in block context
|
||||
* @note This is rarely used except for BLOCKQUOTEs in strict mode
|
||||
* @public
|
||||
*/
|
||||
var $info_block_wrapper = 'p';
|
||||
|
||||
/**
|
||||
* Associative array of deprecated tag name to HTMLPurifier_TagTransform
|
||||
* @public
|
||||
@@ -102,6 +126,11 @@ class HTMLPurifier_HTMLDefinition
|
||||
*/
|
||||
var $info_attr_transform_post = array();
|
||||
|
||||
/**
|
||||
* Lookup table of flow elements
|
||||
*/
|
||||
var $info_flow_elements = array();
|
||||
|
||||
/**
|
||||
* Initializes the definition, the meat of the class.
|
||||
*/
|
||||
@@ -164,11 +193,9 @@ class HTMLPurifier_HTMLDefinition
|
||||
$e_phrase_basic = 'em | strong | dfn | code | q | samp | kbd | var'.
|
||||
' | cite | abbr | acronym';
|
||||
$e_phrase = "$e_phrase_basic | $e_phrase_extra";
|
||||
$e_inline_forms = ''; // humor the dtd
|
||||
$e_misc_inline = 'ins | del';
|
||||
$e_misc = "$e_misc_inline";
|
||||
$e_inline = "a | $e_special | $e_fontstyle | $e_phrase".
|
||||
" | $e_inline_forms";
|
||||
$e_inline = "a | $e_special | $e_fontstyle | $e_phrase";
|
||||
// pseudo-property we created for convenience, see later on
|
||||
$e__inline = "#PCDATA | $e_inline | $e_misc_inline";
|
||||
// note the casing
|
||||
@@ -181,11 +208,10 @@ class HTMLPurifier_HTMLDefinition
|
||||
$e__flow = "#PCDATA | $e_block | $e_inline | $e_misc";
|
||||
$e_Flow = new HTMLPurifier_ChildDef_Optional($e__flow);
|
||||
$e_a_content = new HTMLPurifier_ChildDef_Optional("#PCDATA".
|
||||
" | $e_special | $e_fontstyle | $e_phrase | $e_inline_forms".
|
||||
" | $e_misc_inline");
|
||||
" | $e_special | $e_fontstyle | $e_phrase | $e_misc_inline");
|
||||
$e_pre_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | a".
|
||||
" | $e_special_basic | $e_fontstyle_basic | $e_phrase_basic".
|
||||
" | $e_inline_forms | $e_misc_inline");
|
||||
" | $e_misc_inline");
|
||||
$e_form_content = new HTMLPurifier_ChildDef_Optional('');//unused
|
||||
$e_form_button_content = new HTMLPurifier_ChildDef_Optional('');//unused
|
||||
|
||||
@@ -198,7 +224,7 @@ class HTMLPurifier_HTMLDefinition
|
||||
$this->info['div']->child = $e_Flow;
|
||||
|
||||
if ($config->get('HTML', 'Strict')) {
|
||||
$this->info['blockquote']->child = $e_Block;
|
||||
$this->info['blockquote']->child = new HTMLPurifier_ChildDef_StrictBlockquote();
|
||||
} else {
|
||||
$this->info['blockquote']->child = $e_Flow;
|
||||
}
|
||||
@@ -276,7 +302,7 @@ class HTMLPurifier_HTMLDefinition
|
||||
|
||||
// reuses $e_Inline and $e_Block
|
||||
foreach ($e_Inline->elements as $name => $bool) {
|
||||
if ($name == '#PCDATA' || $name == '') continue;
|
||||
if ($name == '#PCDATA') continue;
|
||||
$this->info[$name]->type = 'inline';
|
||||
}
|
||||
|
||||
@@ -284,6 +310,10 @@ class HTMLPurifier_HTMLDefinition
|
||||
$this->info[$name]->type = 'block';
|
||||
}
|
||||
|
||||
foreach ($e_Flow->elements as $name => $bool) {
|
||||
$this->info_flow_elements[$name] = true;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// info[]->excludes : defines elements that aren't allowed in here
|
||||
|
||||
@@ -447,6 +477,28 @@ class HTMLPurifier_HTMLDefinition
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// info_block_wrapper : wraps inline elements in block context
|
||||
|
||||
$block_wrapper = $config->get('HTML', 'BlockWrapper');
|
||||
if (isset($e_Block->elements[$block_wrapper])) {
|
||||
$this->info_block_wrapper = $block_wrapper;
|
||||
} else {
|
||||
trigger_error('Cannot use non-block element as block wrapper.',
|
||||
E_USER_ERROR);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// info_parent : parent element of the HTML fragment
|
||||
|
||||
$parent = $config->get('HTML', 'Parent');
|
||||
if (isset($this->info[$parent])) {
|
||||
$this->info_parent = $parent;
|
||||
} else {
|
||||
trigger_error('Cannot use unrecognized element as parent.',
|
||||
E_USER_ERROR);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
function setAttrForTableElements($attr, $def) {
|
||||
|
@@ -141,6 +141,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
if ($excluded) {
|
||||
// there is an exclusion, remove the entire node
|
||||
$result = false;
|
||||
$excludes = array(); // not used, but good to initialize anyway
|
||||
} else {
|
||||
// DEFINITION CALL
|
||||
$def = $definition->info[$tokens[$i]->name];
|
||||
|
Reference in New Issue
Block a user