1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-04 13:18:00 +02:00

[2.0.1] Implement error collection for RemoveForeignElements.

- Register Generator context variable.
- Implement special substitutions for error collector.
- Also sort by order the errors came in.
- Fix line number determination bug in Lexer::create().
- Remove vestigial variables.
- Force all tag transforms to use copy(), implement serialize, unserialize algorithm for copy() in tokens.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1238 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2007-06-26 02:49:21 +00:00
parent 98b4e70a93
commit 7a8edc88f9
15 changed files with 203 additions and 42 deletions

View File

@@ -11,10 +11,12 @@ class HTMLPurifier_ErrorCollector
var $errors = array();
var $locale;
var $generator;
var $context;
function HTMLPurifier_ErrorCollector(&$context) {
$this->locale =& $context->get('Locale');
$this->generator =& $context->get('Generator');
$this->context =& $context;
}
@@ -40,7 +42,21 @@ class HTMLPurifier_ErrorCollector
$token = $this->context->get('CurrentToken', true);
$line = $token ? $token->line : $this->context->get('CurrentLine', true);
$attr = $this->context->get('CurrentAttr', true);
// perform special substitutions
// Currently defined: $CurrentToken.Name, $CurrentToken.Serialized,
// $CurrentAttr.Name, $CurrentAttr.Value
if (strpos($msg, '$') !== false) {
$subst = array();
if (!is_null($token)) {
if (isset($token->name)) $subst['$CurrentToken.Name'] = $token->name;
$subst['$CurrentToken.Serialized'] = $this->generator->generateFromToken($token);
}
if (!is_null($attr)) {
$subst['$CurrentAttr.Name'] = $attr;
if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr];
}
if (!empty($subst)) $msg = strtr($msg, $subst);
}
$this->errors[] = array($line, $severity, $msg);
}
@@ -68,10 +84,12 @@ class HTMLPurifier_ErrorCollector
// line numbers are enabled if they aren't explicitly disabled
if ($config->get('Core', 'MaintainLineNumbers') !== false) {
$lines = array();
foreach ($errors as $error) {
$original_order = array();
foreach ($errors as $i => $error) {
$lines[] = $error[0];
$original_order[] = $i;
}
array_multisort($lines, SORT_ASC, $errors);
array_multisort($lines, SORT_ASC, $original_order, SORT_ASC, $errors);
}
foreach ($errors as $error) {
@@ -80,6 +98,8 @@ class HTMLPurifier_ErrorCollector
$string .= $this->locale->getErrorName($severity) . ': ';
$string .= $generator->escape($msg);
if ($line) {
// have javascript link generation that causes
// textarea to skip to the specified line
$string .= $this->locale->formatMessage(
'ErrorCollector: At line', array('line' => $line));
}

View File

@@ -46,7 +46,8 @@ HTMLPurifier_ConfigSchema::define('Output', 'Newline', null, 'string/null', '
/**
* Generates HTML from tokens.
* @todo Create a configuration-wide instance that all objects retrieve
* @todo Refactor interface so that configuration/context is determined
* upon instantiation, no need for messy generateFromTokens() calls
*/
class HTMLPurifier_Generator
{

View File

@@ -16,6 +16,15 @@ $messages = array(
'Lexer: Missing attribute key' => 'Attribute declaration has no key',
'Lexer: Missing end quote' => 'Attribute declaration has no end quote',
'Strategy_RemoveForeignElements: Tag transform' => '$1 element transformed into $CurrentToken.Serialized',
'Strategy_RemoveForeignElements: Missing required attribute' => '$1 element missing required attribute $2',
'Strategy_RemoveForeignElements: Foreign element to text' => 'Unrecognized $1 element converted to text',
'Strategy_RemoveForeignElements: Foreign element removed' => 'Unrecognized $1 element removed',
'Strategy_RemoveForeignElements: Comment removed' => 'Comment containing "$1" removed',
'Strategy_RemoveForeignElements: Script removed' => 'Inline scripting removed',
'Strategy_RemoveForeignElements: Token removed to end' => 'Tags and text starting from $1 element where removed to end',
);
$errorNames = array(

View File

@@ -155,7 +155,11 @@ class HTMLPurifier_Lexer
// once PHP DOM implements native line numbers, or we
// hack out something using XSLT, remove this stipulation
if ($config->get('Core', 'MaintainLineNumbers')) {
$line_numbers = $config->get('Core', 'MaintainLineNumbers');
if (
$line_numbers === true ||
($line_numbers === null && $config->get('Core', 'CollectErrors'))
) {
$lexer = 'DirectLex';
break;
}

View File

@@ -59,7 +59,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
// but it's pretty wasteful. Set to 0 to turn off
$synchronize_interval = $config->get('Core', 'DirectLexLineNumberSyncInterval');
$e = $l = false;
$e = false;
if ($config->get('Core', 'CollectErrors')) {
$e =& $context->get('ErrorCollector');
}
@@ -285,7 +285,6 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
}
$context->destroy('CurrentLine');
return $array;
}

View File

@@ -56,6 +56,11 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
$token = false;
$context->register('CurrentToken', $token);
$e = false;
if ($config->get('Core', 'CollectErrors')) {
$e =& $context->get('ErrorCollector');
}
foreach($tokens as $token) {
if ($remove_until) {
if (empty($token->is_tag) || $token->name !== $remove_until) {
@@ -69,11 +74,13 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
if (
isset($definition->info_tag_transform[$token->name])
) {
$original_name = $token->name;
// there is a transformation for this tag
// DEFINITION CALL
$token = $definition->
info_tag_transform[$token->name]->
transform($token, $config, $context);
if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
}
if (isset($definition->info[$token->name])) {
@@ -92,7 +99,10 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
break;
}
}
if (!$ok) continue;
if (!$ok) {
if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $token->name, $name);
continue;
}
$token->armor['ValidateAttributes'] = true;
}
@@ -104,7 +114,8 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
}
} elseif ($escape_invalid_tags) {
// invalid tag, generate HTML and insert in
// invalid tag, generate HTML representation and insert in
if ($e) $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text', $token->name);
$token = new HTMLPurifier_Token_Text(
$generator->generateFromToken($token, $config, $context)
);
@@ -119,6 +130,9 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
} else {
$remove_until = false;
}
if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Script removed');
} else {
if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed', $token->name);
}
continue;
}
@@ -129,6 +143,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
$token = new HTMLPurifier_Token_Text($data);
} else {
// strip comments
if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Comment removed', $token->data);
continue;
}
} elseif ($token->type == 'text') {
@@ -137,6 +152,10 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
}
$result[] = $token;
}
if ($remove_until && $e) {
// we removed tokens until the end, throw error
$e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
}
$context->destroy('CurrentToken');

View File

@@ -39,7 +39,8 @@ class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
function transform($tag, $config, &$context) {
if ($tag->type == 'end') {
$new_tag = new HTMLPurifier_Token_End($this->transform_to);
$new_tag = $tag->copy();
$new_tag->name = $this->transform_to;
return $new_tag;
}

View File

@@ -24,7 +24,7 @@ class HTMLPurifier_Token {
* @return Copied token
*/
function copy() {
trigger_error('Cannot copy abstract class', E_USER_ERROR);
return unserialize(serialize($this));
}
}
@@ -89,9 +89,6 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
{
var $type = 'start';
function copy() {
return new HTMLPurifier_Token_Start($this->name, $this->attr);
}
}
/**
@@ -100,9 +97,6 @@ class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
{
var $type = 'empty';
function copy() {
return new HTMLPurifier_Token_Empty($this->name, $this->attr);
}
}
/**
@@ -115,9 +109,6 @@ class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag
{
var $type = 'end';
function copy() {
return new HTMLPurifier_Token_End($this->name);
}
}
/**
@@ -146,9 +137,6 @@ class HTMLPurifier_Token_Text extends HTMLPurifier_Token
$this->data = $data;
$this->is_whitespace = ctype_space($data);
}
function copy() {
return new HTMLPurifier_Token_Text($this->data);
}
}
@@ -167,9 +155,6 @@ class HTMLPurifier_Token_Comment extends HTMLPurifier_Token
function HTMLPurifier_Token_Comment($data) {
$this->data = $data;
}
function copy() {
return new HTMLPurifier_Token_Comment($this->data);
}
}
?>