1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-17 14:08:15 +01:00

[2.0.1] Implement error collection for RemoveForeignElements.

- Register Generator context variable.
- Implement special substitutions for error collector.
- Also sort by order the errors came in.
- Fix line number determination bug in Lexer::create().
- Remove vestigial variables.
- Force all tag transforms to use copy(), implement serialize, unserialize algorithm for copy() in tokens.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1238 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-06-26 02:49:21 +00:00
parent 98b4e70a93
commit 7a8edc88f9
15 changed files with 203 additions and 42 deletions

2
NEWS
View File

@ -18,6 +18,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
! Newlines normalized internally, and then converted back to the
value of PHP_EOL. If this is not desired, set your newline format
using %Output.Newline.
! Beta error collection, messages are implemented for Lexer and
RemoveForeignElements
- Clean up special case code for <script> tags
- Reorder includes for DefinitionCache decorators, fixes a possible
missing class error

View File

@ -140,6 +140,10 @@ class HTMLPurifier
$context = new HTMLPurifier_Context();
// our friendly neighborhood generator, all primed with configuration too!
$this->generator->generateFromTokens(array(), $config, $context);
$context->register('Generator', $this->generator);
// set up global context variables
if ($config->get('Core', 'CollectErrors')) {
// may get moved out if other facilities use it

View File

@ -11,10 +11,12 @@ class HTMLPurifier_ErrorCollector
var $errors = array();
var $locale;
var $generator;
var $context;
function HTMLPurifier_ErrorCollector(&$context) {
$this->locale =& $context->get('Locale');
$this->generator =& $context->get('Generator');
$this->context =& $context;
}
@ -40,7 +42,21 @@ class HTMLPurifier_ErrorCollector
$token = $this->context->get('CurrentToken', true);
$line = $token ? $token->line : $this->context->get('CurrentLine', true);
$attr = $this->context->get('CurrentAttr', true);
// perform special substitutions
// Currently defined: $CurrentToken.Name, $CurrentToken.Serialized,
// $CurrentAttr.Name, $CurrentAttr.Value
if (strpos($msg, '$') !== false) {
$subst = array();
if (!is_null($token)) {
if (isset($token->name)) $subst['$CurrentToken.Name'] = $token->name;
$subst['$CurrentToken.Serialized'] = $this->generator->generateFromToken($token);
}
if (!is_null($attr)) {
$subst['$CurrentAttr.Name'] = $attr;
if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr];
}
if (!empty($subst)) $msg = strtr($msg, $subst);
}
$this->errors[] = array($line, $severity, $msg);
}
@ -68,10 +84,12 @@ class HTMLPurifier_ErrorCollector
// line numbers are enabled if they aren't explicitly disabled
if ($config->get('Core', 'MaintainLineNumbers') !== false) {
$lines = array();
foreach ($errors as $error) {
$original_order = array();
foreach ($errors as $i => $error) {
$lines[] = $error[0];
$original_order[] = $i;
}
array_multisort($lines, SORT_ASC, $errors);
array_multisort($lines, SORT_ASC, $original_order, SORT_ASC, $errors);
}
foreach ($errors as $error) {
@ -80,6 +98,8 @@ class HTMLPurifier_ErrorCollector
$string .= $this->locale->getErrorName($severity) . ': ';
$string .= $generator->escape($msg);
if ($line) {
// have javascript link generation that causes
// textarea to skip to the specified line
$string .= $this->locale->formatMessage(
'ErrorCollector: At line', array('line' => $line));
}

View File

@ -46,7 +46,8 @@ HTMLPurifier_ConfigSchema::define('Output', 'Newline', null, 'string/null', '
/**
* Generates HTML from tokens.
* @todo Create a configuration-wide instance that all objects retrieve
* @todo Refactor interface so that configuration/context is determined
* upon instantiation, no need for messy generateFromTokens() calls
*/
class HTMLPurifier_Generator
{

View File

@ -16,6 +16,15 @@ $messages = array(
'Lexer: Missing attribute key' => 'Attribute declaration has no key',
'Lexer: Missing end quote' => 'Attribute declaration has no end quote',
'Strategy_RemoveForeignElements: Tag transform' => '$1 element transformed into $CurrentToken.Serialized',
'Strategy_RemoveForeignElements: Missing required attribute' => '$1 element missing required attribute $2',
'Strategy_RemoveForeignElements: Foreign element to text' => 'Unrecognized $1 element converted to text',
'Strategy_RemoveForeignElements: Foreign element removed' => 'Unrecognized $1 element removed',
'Strategy_RemoveForeignElements: Comment removed' => 'Comment containing "$1" removed',
'Strategy_RemoveForeignElements: Script removed' => 'Inline scripting removed',
'Strategy_RemoveForeignElements: Token removed to end' => 'Tags and text starting from $1 element where removed to end',
);
$errorNames = array(

View File

@ -155,7 +155,11 @@ class HTMLPurifier_Lexer
// once PHP DOM implements native line numbers, or we
// hack out something using XSLT, remove this stipulation
if ($config->get('Core', 'MaintainLineNumbers')) {
$line_numbers = $config->get('Core', 'MaintainLineNumbers');
if (
$line_numbers === true ||
($line_numbers === null && $config->get('Core', 'CollectErrors'))
) {
$lexer = 'DirectLex';
break;
}

View File

@ -59,7 +59,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
// but it's pretty wasteful. Set to 0 to turn off
$synchronize_interval = $config->get('Core', 'DirectLexLineNumberSyncInterval');
$e = $l = false;
$e = false;
if ($config->get('Core', 'CollectErrors')) {
$e =& $context->get('ErrorCollector');
}
@ -285,7 +285,6 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
}
$context->destroy('CurrentLine');
return $array;
}

View File

@ -56,6 +56,11 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
$token = false;
$context->register('CurrentToken', $token);
$e = false;
if ($config->get('Core', 'CollectErrors')) {
$e =& $context->get('ErrorCollector');
}
foreach($tokens as $token) {
if ($remove_until) {
if (empty($token->is_tag) || $token->name !== $remove_until) {
@ -69,11 +74,13 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
if (
isset($definition->info_tag_transform[$token->name])
) {
$original_name = $token->name;
// there is a transformation for this tag
// DEFINITION CALL
$token = $definition->
info_tag_transform[$token->name]->
transform($token, $config, $context);
if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
}
if (isset($definition->info[$token->name])) {
@ -92,7 +99,10 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
break;
}
}
if (!$ok) continue;
if (!$ok) {
if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $token->name, $name);
continue;
}
$token->armor['ValidateAttributes'] = true;
}
@ -104,7 +114,8 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
}
} elseif ($escape_invalid_tags) {
// invalid tag, generate HTML and insert in
// invalid tag, generate HTML representation and insert in
if ($e) $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text', $token->name);
$token = new HTMLPurifier_Token_Text(
$generator->generateFromToken($token, $config, $context)
);
@ -119,6 +130,9 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
} else {
$remove_until = false;
}
if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Script removed');
} else {
if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed', $token->name);
}
continue;
}
@ -129,6 +143,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
$token = new HTMLPurifier_Token_Text($data);
} else {
// strip comments
if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Comment removed', $token->data);
continue;
}
} elseif ($token->type == 'text') {
@ -137,6 +152,10 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
}
$result[] = $token;
}
if ($remove_until && $e) {
// we removed tokens until the end, throw error
$e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
}
$context->destroy('CurrentToken');

View File

@ -39,7 +39,8 @@ class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
function transform($tag, $config, &$context) {
if ($tag->type == 'end') {
$new_tag = new HTMLPurifier_Token_End($this->transform_to);
$new_tag = $tag->copy();
$new_tag->name = $this->transform_to;
return $new_tag;
}

View File

@ -24,7 +24,7 @@ class HTMLPurifier_Token {
* @return Copied token
*/
function copy() {
trigger_error('Cannot copy abstract class', E_USER_ERROR);
return unserialize(serialize($this));
}
}
@ -89,9 +89,6 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
{
var $type = 'start';
function copy() {
return new HTMLPurifier_Token_Start($this->name, $this->attr);
}
}
/**
@ -100,9 +97,6 @@ class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
{
var $type = 'empty';
function copy() {
return new HTMLPurifier_Token_Empty($this->name, $this->attr);
}
}
/**
@ -115,9 +109,6 @@ class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag
{
var $type = 'end';
function copy() {
return new HTMLPurifier_Token_End($this->name);
}
}
/**
@ -146,9 +137,6 @@ class HTMLPurifier_Token_Text extends HTMLPurifier_Token
$this->data = $data;
$this->is_whitespace = ctype_space($data);
}
function copy() {
return new HTMLPurifier_Token_Text($this->data);
}
}
@ -167,9 +155,6 @@ class HTMLPurifier_Token_Comment extends HTMLPurifier_Token
function HTMLPurifier_Token_Comment($data) {
$this->data = $data;
}
function copy() {
return new HTMLPurifier_Token_Comment($this->data);
}
}
?>

View File

@ -7,6 +7,7 @@ class HTMLPurifier_ErrorCollectorTest extends UnitTestCase
function setup() {
generate_mock_once('HTMLPurifier_Language');
generate_mock_once('HTMLPurifier_Generator');
}
function test() {
@ -25,6 +26,9 @@ class HTMLPurifier_ErrorCollectorTest extends UnitTestCase
$context->register('Locale', $language);
$context->register('CurrentLine', $line);
$generator = new HTMLPurifier_GeneratorMock();
$context->register('Generator', $generator);
$collector = new HTMLPurifier_ErrorCollector($context);
$line = 23;
@ -56,6 +60,9 @@ class HTMLPurifier_ErrorCollectorTest extends UnitTestCase
$context = new HTMLPurifier_Context();
$context->register('Locale', $language);
$generator = new HTMLPurifier_GeneratorMock();
$context->register('Generator', $generator);
$collector = new HTMLPurifier_ErrorCollector($context);
$formatted_result = '<p>No errors</p>';
$config = HTMLPurifier_Config::createDefault();
@ -70,6 +77,9 @@ class HTMLPurifier_ErrorCollectorTest extends UnitTestCase
$context = new HTMLPurifier_Context();
$context->register('Locale', $language);
$generator = new HTMLPurifier_GeneratorMock();
$context->register('Generator', $generator);
$collector = new HTMLPurifier_ErrorCollector($context);
$collector->send(E_ERROR, 'message-1');
$collector->send(E_ERROR, 'message-2');
@ -87,6 +97,42 @@ class HTMLPurifier_ErrorCollectorTest extends UnitTestCase
$this->assertIdentical($collector->getHTMLFormatted($config), $formatted_result);
}
function testContextSubstitutions() {
$language = new HTMLPurifier_LanguageMock();
$language->setReturnValue('getMessage',
'$CurrentToken.Name, $CurrentToken.Serialized', array('message-token'));
$language->setReturnValue('getMessage',
'$CurrentAttr.Name => $CurrentAttr.Value', array('message-attr'));
$context = new HTMLPurifier_Context();
$context->register('Locale', $language);
$current_token = new HTMLPurifier_Token_Start('a', array('href' => 'http://example.com'));
$current_token->line = 32;
$current_attr = 'href';
$generator = new HTMLPurifier_GeneratorMock();
$generator->setReturnValue('generateFromToken', '<a href="http://example.com">', array($current_token));
$context->register('Generator', $generator);
$collector = new HTMLPurifier_ErrorCollector($context);
$context->register('CurrentToken', $current_token);
$collector->send(E_NOTICE, 'message-token');
$collector->send(E_NOTICE, 'message-attr'); // test when context isn't available
$context->register('CurrentAttr', $current_attr);
$collector->send(E_NOTICE, 'message-attr');
$result = array(
0 => array(32, E_NOTICE, 'a, <a href="http://example.com">'),
1 => array(32, E_NOTICE, '$CurrentAttr.Name => $CurrentAttr.Value'),
2 => array(32, E_NOTICE, 'href => http://example.com')
);
$this->assertIdentical($collector->getRaw(), $result);
}
}
?>

View File

@ -0,0 +1,27 @@
<?php
require_once 'HTMLPurifier/ErrorCollector.php';
require_once 'HTMLPurifier/Lexer/DirectLex.php';
class HTMLPurifier_ErrorsHarness extends UnitTestCase
{
var $config, $context;
var $collector, $generator;
function setup() {
$this->config = HTMLPurifier_Config::create(array('Core.CollectErrors' => true));
$this->context = new HTMLPurifier_Context();
generate_mock_once('HTMLPurifier_ErrorCollector');
$this->collector = new HTMLPurifier_ErrorCollectorMock($this);
$this->context->register('ErrorCollector', $this->collector);
}
function expectErrorCollection() {
$args = func_get_args();
$this->collector->expectOnce('send', $args);
}
}
?>

View File

@ -1,21 +1,11 @@
<?php
require_once 'HTMLPurifier/ErrorsHarness.php';
require_once 'HTMLPurifier/Lexer/DirectLex.php';
class HTMLPurifier_Lexer_DirectLex_ErrorsTest extends UnitTestCase
class HTMLPurifier_Lexer_DirectLex_ErrorsTest extends HTMLPurifier_ErrorsHarness
{
var $config, $context;
var $collector;
function setup() {
$this->config = HTMLPurifier_Config::create(array('Core.CollectErrors' => true));
$this->context = new HTMLPurifier_Context();
generate_mock_once('HTMLPurifier_ErrorCollector');
$this->collector = new HTMLPurifier_ErrorCollectorMock($this);
$this->context->register('ErrorCollector', $this->collector);
}
function invoke($input) {
$lexer = new HTMLPurifier_Lexer_DirectLex();
$lexer->tokenizeHTML($input, $this->config, $this->context);
@ -26,10 +16,6 @@ class HTMLPurifier_Lexer_DirectLex_ErrorsTest extends UnitTestCase
$lexer->parseAttributeString($input, $this->config, $this->context);
}
function expectErrorCollection($severity, $msg) {
$this->collector->expectOnce('send', array($severity, $msg));
}
function testUnclosedComment() {
$this->expectErrorCollection(E_WARNING, 'Lexer: Unclosed comment');
$this->invoke('<!-- >');

View File

@ -0,0 +1,57 @@
<?php
require_once 'HTMLPurifier/ErrorsHarness.php';
require_once 'HTMLPurifier/Strategy/RemoveForeignElements.php';
class HTMLPurifier_Strategy_RemoveForeignElements_ErrorsTest extends HTMLPurifier_ErrorsHarness
{
function setup() {
parent::setup();
$this->config->set('HTML', 'TidyLevel', 'heavy');
}
function invoke($input) {
$strategy = new HTMLPurifier_Strategy_RemoveForeignElements();
$lexer = new HTMLPurifier_Lexer_DirectLex();
$tokens = $lexer->tokenizeHTML($input, $this->config, $this->context);
$strategy->execute($tokens, $this->config, $this->context);
}
function testTagTransform() {
// uses $CurrentToken.Serialized
$this->expectErrorCollection(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', 'center');
$this->invoke('<center>');
}
function testMissingRequiredAttr() {
// a little fragile, since img has two required attributes
$this->expectErrorCollection(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', 'img', 'alt');
$this->invoke('<img />');
}
function testForeignElementToText() {
$this->config->set('Core', 'EscapeInvalidTags', true);
$this->expectErrorCollection(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text', 'cannot-possibly-exist-element');
$this->invoke('<cannot-possibly-exist-element>');
}
function testForeignElementRemoved() {
$this->expectErrorCollection(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed', 'cannot-possibly-exist-element');
$this->invoke('<cannot-possibly-exist-element>');
}
function testCommentRemoved() {
$this->expectErrorCollection(E_ERROR, 'Strategy_RemoveForeignElements: Comment removed', ' test ');
$this->invoke('<!-- test -->');
}
function testScriptRemoved() {
$this->collector->expectAt(0, 'send', array(E_ERROR, 'Strategy_RemoveForeignElements: Script removed'));
$this->collector->expectAt(1, 'send', array(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', 'script'));
$this->invoke('<script>asdf');
}
}
?>

View File

@ -95,6 +95,7 @@ $test_files[] = 'HTMLPurifier/Strategy/CoreTest.php';
$test_files[] = 'HTMLPurifier/Strategy/FixNestingTest.php';
$test_files[] = 'HTMLPurifier/Strategy/MakeWellFormedTest.php';
$test_files[] = 'HTMLPurifier/Strategy/RemoveForeignElementsTest.php';
$test_files[] = 'HTMLPurifier/Strategy/RemoveForeignElements_ErrorsTest.php';
$test_files[] = 'HTMLPurifier/Strategy/ValidateAttributesTest.php';
$test_files[] = 'HTMLPurifier/TagTransformTest.php';
$test_files[] = 'HTMLPurifier/TokenTest.php';