1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-10-22 17:16:34 +02:00

Release 2.0.1, merged in 1181 to HEAD.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@1255 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2007-06-27 14:30:45 +00:00
parent 42858ad594
commit 495164e938
326 changed files with 3025 additions and 826 deletions

View File

@@ -27,4 +27,3 @@ class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy
}
?>

View File

@@ -22,4 +22,3 @@ class HTMLPurifier_Strategy_Core extends HTMLPurifier_Strategy_Composite
}
?>

View File

@@ -54,6 +54,9 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
$is_inline = $definition->info_parent_def->descendants_are_inline;
$context->register('IsInline', $is_inline);
// setup error collector
$e =& $context->get('ErrorCollector', true);
//####################################################################//
// Loop initialization
@@ -67,6 +70,11 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// processed, i.e. there won't be empty exclusions.
$exclude_stack = array();
// variable that contains the start token while we are processing
// nodes. This enables error reporting to do its job
$start_token = false;
$context->register('CurrentToken', $start_token);
//####################################################################//
// Loop
@@ -100,6 +108,8 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// $i is index of start token
// $j is index of end token
$start_token = $tokens[$i]; // to make token available via CurrentToken
//################################################################//
// Gather information on parent
@@ -200,6 +210,14 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
} elseif($result === false) {
// remove entire node
if ($e) {
if ($excluded) {
$e->send(E_ERROR, 'Strategy_FixNesting: Node excluded');
} else {
$e->send(E_ERROR, 'Strategy_FixNesting: Node removed');
}
}
// calculate length of inner tokens and current tokens
$length = $j - $i + 1;
@@ -216,7 +234,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// this is a rought heuristic that covers 100% of HTML's
// cases and 99% of all other cases. A child definition
// that would be tricked by this would be something like:
// ( | a b c) where it's all or nothing. Fortunantely,
// ( | a b c) where it's all or nothing. Fortunately,
// our current implementation claims that that case would
// not allow empty, even if it did
if (!$parent_def->child->allow_empty) {
@@ -234,6 +252,14 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// calculate length of inner tokens
$length = $j - $i - 1;
if ($e) {
if (empty($result) && $length) {
$e->send(E_ERROR, 'Strategy_FixNesting: Node contents removed');
} else {
$e->send(E_WARNING, 'Strategy_FixNesting: Node reorganized');
}
}
// perform replacement
array_splice($tokens, $i + 1, $length, $result);
@@ -291,6 +317,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// remove context variables
$context->destroy('IsInline');
$context->destroy('CurrentToken');
//####################################################################//
// Return
@@ -301,4 +328,4 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
}
?>

View File

@@ -4,129 +4,229 @@ require_once 'HTMLPurifier/Strategy.php';
require_once 'HTMLPurifier/HTMLDefinition.php';
require_once 'HTMLPurifier/Generator.php';
require_once 'HTMLPurifier/Injector/AutoParagraph.php';
require_once 'HTMLPurifier/Injector/Linkify.php';
require_once 'HTMLPurifier/Injector/PurifierLinkify.php';
HTMLPurifier_ConfigSchema::define(
'AutoFormat', 'Custom', array(), 'list', '
<p>
This directive can be used to add custom auto-format injectors.
Specify an array of injector names (class name minus the prefix)
or concrete implementations. Injector class must exist. This directive
has been available since 2.0.1.
</p>
'
);
/**
* Takes tokens makes them well-formed (balance end tags, etc.)
*/
class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
{
/**
* Locally shared variable references
* @private
*/
var $inputTokens, $inputIndex, $outputTokens, $currentNesting,
$currentInjector, $injectors;
function execute($tokens, $config, &$context) {
$definition = $config->getHTMLDefinition();
$generator = new HTMLPurifier_Generator();
// CurrentNesting
$this->currentNesting = array();
$context->register('CurrentNesting', $this->currentNesting);
// InputIndex
$this->inputIndex = false;
$context->register('InputIndex', $this->inputIndex);
// InputTokens
$context->register('InputTokens', $tokens);
$this->inputTokens =& $tokens;
// OutputTokens
$result = array();
$current_nesting = array();
$this->outputTokens =& $result;
// %Core.EscapeInvalidTags
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
foreach ($tokens as $token) {
$generator = new HTMLPurifier_Generator();
$e =& $context->get('ErrorCollector', true);
// -- begin INJECTOR --
$this->injectors = array();
$injectors = $config->getBatch('AutoFormat');
$custom_injectors = $injectors['Custom'];
unset($injectors['Custom']); // special case
foreach ($injectors as $injector => $b) {
$injector = "HTMLPurifier_Injector_$injector";
if ($b) $this->injectors[] = new $injector;
}
foreach ($custom_injectors as $injector) {
if (is_string($injector)) {
$injector = "HTMLPurifier_Injector_$injector";
$injector = new $injector;
}
$this->injectors[] = $injector;
}
// array index of the injector that resulted in an array
// substitution. This enables processTokens() to know which
// injectors are affected by the added tokens and which are
// not (namely, the ones after the current injector are not
// affected)
$this->currentInjector = false;
// give the injectors references to the definition and context
// variables for performance reasons
foreach ($this->injectors as $i => $x) {
$this->injectors[$i]->prepare($config, $context);
}
// -- end INJECTOR --
$token = false;
$context->register('CurrentToken', $token);
for ($this->inputIndex = 0; isset($tokens[$this->inputIndex]); $this->inputIndex++) {
// if all goes well, this token will be passed through unharmed
$token = $tokens[$this->inputIndex];
foreach ($this->injectors as $i => $x) {
if ($x->skip > 0) $this->injectors[$i]->skip--;
}
// quick-check: if it's not a tag, no need to process
if (empty( $token->is_tag )) {
$result[] = $token;
if ($token->type === 'text') {
// injector handler code; duplicated for performance reasons
foreach ($this->injectors as $i => $x) {
if (!$x->skip) $x->handleText($token, $config, $context);
if (is_array($token)) {
$this->currentInjector = $i;
break;
}
}
}
$this->processToken($token, $config, $context);
continue;
}
// DEFINITION CALL
$info = $definition->info[$token->name]->child;
// quick checks:
// test if it claims to be a start tag but is empty
if ($info->type == 'empty' &&
$token->type == 'start' ) {
$result[] = new HTMLPurifier_Token_Empty($token->name,
$token->attr);
if ($info->type == 'empty' && $token->type == 'start') {
$result[] = new HTMLPurifier_Token_Empty($token->name, $token->attr);
continue;
}
// test if it claims to be empty but really is a start tag
if ($info->type != 'empty' &&
$token->type == 'empty' ) {
$result[] = new HTMLPurifier_Token_Start($token->name,
$token->attr);
if ($info->type != 'empty' && $token->type == 'empty' ) {
$result[] = new HTMLPurifier_Token_Start($token->name, $token->attr);
$result[] = new HTMLPurifier_Token_End($token->name);
continue;
}
// automatically insert empty tags
if ($token->type == 'empty') {
$result[] = $token;
continue;
}
// we give start tags precedence, so automatically accept unless...
// it's one of those special cases
// start tags have precedence, so they get passed through...
if ($token->type == 'start') {
// if there's a parent, check for special case
if (!empty($current_nesting)) {
// ...unless they also have to close their parent
if (!empty($this->currentNesting)) {
$parent = array_pop($current_nesting);
$parent_name = $parent->name;
$parent_info = $definition->info[$parent_name];
$parent = array_pop($this->currentNesting);
$parent_info = $definition->info[$parent->name];
// we need to replace this with a more general
// algorithm
if (isset($parent_info->auto_close[$token->name])) {
$result[] = new HTMLPurifier_Token_End($parent_name);
// this can be replaced with a more general algorithm:
// if the token is not allowed by the parent, auto-close
// the parent
if (!isset($parent_info->child->elements[$token->name])) {
if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
// close the parent, then append the token
$result[] = new HTMLPurifier_Token_End($parent->name);
$result[] = $token;
$current_nesting[] = $token;
$this->currentNesting[] = $token;
continue;
}
$current_nesting[] = $parent; // undo the pop
$this->currentNesting[] = $parent; // undo the pop
}
$result[] = $token;
$current_nesting[] = $token;
// injector handler code; duplicated for performance reasons
foreach ($this->injectors as $i => $x) {
if (!$x->skip) $x->handleStart($token, $config, $context);
if (is_array($token)) {
$this->currentInjector = $i;
break;
}
}
$this->processToken($token, $config, $context);
continue;
}
// sanity check
// sanity check: we should be dealing with a closing tag
if ($token->type != 'end') continue;
// okay, we're dealing with a closing tag
// make sure that we have something open
if (empty($current_nesting)) {
if (empty($this->currentNesting)) {
if ($escape_invalid_tags) {
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
$result[] = new HTMLPurifier_Token_Text(
$generator->generateFromToken($token, $config, $context)
);
} elseif ($e) {
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
}
continue;
}
// first, check for the simplest case: everything closes neatly
// current_nesting is modified
$current_parent = array_pop($current_nesting);
$current_parent = array_pop($this->currentNesting);
if ($current_parent->name == $token->name) {
$result[] = $token;
continue;
}
// undo the array_pop
$current_nesting[] = $current_parent;
// okay, so we're trying to close the wrong tag
// scroll back the entire nest, trying to find our tag
// feature could be to specify how far you'd like to go
$size = count($current_nesting);
// undo the pop previous pop
$this->currentNesting[] = $current_parent;
// scroll back the entire nest, trying to find our tag.
// (feature could be to specify how far you'd like to go)
$size = count($this->currentNesting);
// -2 because -1 is the last element, but we already checked that
$skipped_tags = false;
for ($i = $size - 2; $i >= 0; $i--) {
if ($current_nesting[$i]->name == $token->name) {
if ($this->currentNesting[$i]->name == $token->name) {
// current nesting is modified
$skipped_tags = array_splice($current_nesting, $i);
$skipped_tags = array_splice($this->currentNesting, $i);
break;
}
}
// we still didn't find the tag, so translate to text
// we still didn't find the tag, so remove
if ($skipped_tags === false) {
if ($escape_invalid_tags) {
$result[] = new HTMLPurifier_Token_Text(
$generator->generateFromToken($token, $config, $context)
);
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
} elseif ($e) {
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
}
continue;
}
@@ -134,27 +234,66 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// okay, we found it, close all the skipped tags
// note that skipped tags contains the element we need closed
$size = count($skipped_tags);
for ($i = $size - 1; $i >= 0; $i--) {
for ($i = $size - 1; $i > 0; $i--) {
if ($e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
}
$result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
}
// done!
$result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
}
// we're at the end now, fix all still unclosed tags
$context->destroy('CurrentNesting');
$context->destroy('InputTokens');
$context->destroy('InputIndex');
$context->destroy('CurrentToken');
if (!empty($current_nesting)) {
$size = count($current_nesting);
// we're at the end now, fix all still unclosed tags
// not using processToken() because at this point we don't
// care about current nesting
if (!empty($this->currentNesting)) {
$size = count($this->currentNesting);
for ($i = $size - 1; $i >= 0; $i--) {
if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
}
$result[] =
new HTMLPurifier_Token_End($current_nesting[$i]->name);
new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
}
}
unset($this->outputTokens, $this->injectors, $this->currentInjector,
$this->currentNesting, $this->inputTokens, $this->inputIndex);
return $result;
}
function processToken($token, $config, &$context) {
if (is_array($token)) {
// the original token was overloaded by an injector, time
// to some fancy acrobatics
// $this->inputIndex is decremented so that the entire set gets
// re-processed
array_splice($this->inputTokens, $this->inputIndex--, 1, $token);
// adjust the injector skips based on the array substitution
$offset = count($token) + 1;
for ($i = 0; $i <= $this->currentInjector; $i++) {
$this->injectors[$i]->skip += $offset;
}
} elseif ($token) {
// regular case
$this->outputTokens[] = $token;
if ($token->type == 'start') {
$this->currentNesting[] = $token;
} elseif ($token->type == 'end') {
array_pop($this->currentNesting); // not actually used
}
}
}
}
?>

View File

@@ -17,9 +17,11 @@ HTMLPurifier_ConfigSchema::define(
HTMLPurifier_ConfigSchema::define(
'Core', 'RemoveScriptContents', true, 'bool', '
This directive enables HTML Purifier to remove not only script tags
but all of their contents. This directive has been available since 2.0.0,
revert to pre-2.0.0 behavior by setting to false.
<p>
This directive enables HTML Purifier to remove not only script tags
but all of their contents. This directive has been available since 2.0.0,
revert to pre-2.0.0 behavior by setting to false.
</p>
'
);
@@ -48,6 +50,17 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
// removes tokens until it reaches a closing tag with its value
$remove_until = false;
// converts comments into text tokens when this is equal to a tag name
$textify_comments = false;
$token = false;
$context->register('CurrentToken', $token);
$e = false;
if ($config->get('Core', 'CollectErrors')) {
$e =& $context->get('ErrorCollector');
}
foreach($tokens as $token) {
if ($remove_until) {
if (empty($token->is_tag) || $token->name !== $remove_until) {
@@ -61,11 +74,13 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
if (
isset($definition->info_tag_transform[$token->name])
) {
$original_name = $token->name;
// there is a transformation for this tag
// DEFINITION CALL
$token = $definition->
info_tag_transform[$token->name]->
transform($token, $config, $context);
if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
}
if (isset($definition->info[$token->name])) {
@@ -76,7 +91,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
$definition->info[$token->name]->required_attr &&
($token->name != 'img' || $remove_invalid_img) // ensure config option still works
) {
$token = $attr_validator->validateToken($token, $config, $context);
$attr_validator->validateToken($token, $config, $context);
$ok = true;
foreach ($definition->info[$token->name]->required_attr as $name) {
if (!isset($token->attr[$name])) {
@@ -84,12 +99,23 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
break;
}
}
if (!$ok) continue;
if (!$ok) {
if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name);
continue;
}
$token->armor['ValidateAttributes'] = true;
}
// CAN BE GENERICIZED
if ($token->name == 'script' && $token->type == 'start') {
$textify_comments = $token->name;
} elseif ($token->name === $textify_comments && $token->type == 'end') {
$textify_comments = false;
}
} elseif ($escape_invalid_tags) {
// invalid tag, generate HTML and insert in
// invalid tag, generate HTML representation and insert in
if ($e) $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
$token = new HTMLPurifier_Token_Text(
$generator->generateFromToken($token, $config, $context)
);
@@ -104,21 +130,37 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
} else {
$remove_until = false;
}
if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Script removed');
} else {
if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
}
continue;
}
} elseif ($token->type == 'comment') {
// strip comments
continue;
// textify comments in script tags when they are allowed
if ($textify_comments !== false) {
$data = $token->data;
$token = new HTMLPurifier_Token_Text($data);
} else {
// strip comments
if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
continue;
}
} elseif ($token->type == 'text') {
} else {
continue;
}
$result[] = $token;
}
if ($remove_until && $e) {
// we removed tokens until the end, throw error
$e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
}
$context->destroy('CurrentToken');
return $result;
}
}
?>

View File

@@ -27,6 +27,9 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
// setup validator
$validator = new HTMLPurifier_AttrValidator();
$token = false;
$context->register('CurrentToken', $token);
foreach ($tokens as $key => $token) {
// only process tokens that have attributes,
@@ -36,7 +39,10 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
// skip tokens that are armored
if (!empty($token->armor['ValidateAttributes'])) continue;
$tokens[$key] = $validator->validateToken($token, $config, $context);
// note that we have no facilities here for removing tokens
$validator->validateToken($token, $config, $context);
$tokens[$key] = $token; // for PHP 4
}
$context->destroy('IDAccumulator');
@@ -46,4 +52,3 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
}
?>