1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-01-16 13:39:02 +01:00

[2.0.1] Implement haphazard error collection for AttrValidator.

- Error collector / Language can take arrays and listify them
- AttrValidator takes token by reference
- Formatted errors now have their severity <strong>
- 100 test-cases! W00t!

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1250 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang 2007-06-27 02:03:15 +00:00
parent a005da8a4c
commit 3a1d505b3d
12 changed files with 205 additions and 44 deletions

4
NEWS
View File

@ -18,8 +18,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
! Newlines normalized internally, and then converted back to the
value of PHP_EOL. If this is not desired, set your newline format
using %Output.Newline.
! Beta error collection, messages are implemented for Lexer and
RemoveForeignElements
! Beta error collection, messages are implemented for the most generic
cases involving Lexing or Strategies
- Clean up special case code for <script> tags
- Reorder includes for DefinitionCache decorators, fixes a possible
missing class error

BIN
art/100cases.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.7 KiB

View File

@ -1,12 +1,31 @@
<?php
/**
* Validates the attributes of a token. Doesn't manage required attributes
* very well. The only reason we factored this out was because RemoveForeignElements
* also needed it besides ValidateAttributes.
*/
class HTMLPurifier_AttrValidator
{
function validateToken($token, &$config, &$context) {
/**
* Validates the attributes of a token, returning a modified token
* that has valid tokens
* @param $token Reference to token to validate. We require a reference
* because the operation this class performs on the token are
* not atomic, so the context CurrentToken to be updated
* throughout
* @param $config Instance of HTMLPurifier_Config
* @param $context Instance of HTMLPurifier_Context
*/
function validateToken(&$token, &$config, &$context) {
$definition = $config->getHTMLDefinition();
$e =& $context->get('ErrorCollector', true);
// initialize CurrentToken if necessary
$current_token =& $context->get('CurrentToken', true);
if (!$current_token) $context->register('CurrentToken', $token);
if ($token->type !== 'start' && $token->type !== 'empty') return $token;
@ -14,21 +33,21 @@ class HTMLPurifier_AttrValidator
// DEFINITION CALL
$d_defs = $definition->info_global_attr;
// copy out attributes for easy manipulation
$attr = $token->attr;
// reference attributes for easy manipulation
$attr =& $token->attr;
// do global transformations (pre)
// nothing currently utilizes this
foreach ($definition->info_attr_transform_pre as $transform) {
$attr = $transform->transform($attr, $config, $context);
$attr = $transform->transform($o = $attr, $config, $context);
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
// do local transformations only applicable to this element (pre)
// ex. <p align="right"> to <p style="text-align:right;">
foreach ($definition->info[$token->name]->attr_transform_pre
as $transform
) {
$attr = $transform->transform($attr, $config, $context);
foreach ($definition->info[$token->name]->attr_transform_pre as $transform) {
$attr = $transform->transform($o = $attr, $config, $context);
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
// create alias to this element's attribute definition array, see
@ -36,6 +55,9 @@ class HTMLPurifier_AttrValidator
// DEFINITION CALL
$defs = $definition->info[$token->name]->attr;
$attr_key = false;
$context->register('CurrentAttr', $attr_key);
// iterate through all the attribute keypairs
// Watch out for name collisions: $key has previously been used
foreach ($attr as $attr_key => $value) {
@ -69,9 +91,17 @@ class HTMLPurifier_AttrValidator
// put the results into effect
if ($result === false || $result === null) {
// this is a generic error message that should replaced
// with more specific ones when possible
if ($e) $e->send(E_ERROR, 'AttrValidator: Attribute removed');
// remove the attribute
unset($attr[$attr_key]);
} elseif (is_string($result)) {
// generally, if a substitution is happening, there
// was some sort of implicit correction going on. We'll
// delegate it to the attribute classes to say exactly what.
// simple substitution
$attr[$attr_key] = $result;
}
@ -83,21 +113,24 @@ class HTMLPurifier_AttrValidator
// others would prepend themselves).
}
$context->destroy('CurrentAttr');
// post transforms
// ex. <x lang="fr"> to <x lang="fr" xml:lang="fr">
// global (error reporting untested)
foreach ($definition->info_attr_transform_post as $transform) {
$attr = $transform->transform($attr, $config, $context);
$attr = $transform->transform($o = $attr, $config, $context);
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
// ex. <bdo> to <bdo dir="ltr">
// local (error reporting untested)
foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
$attr = $transform->transform($attr, $config, $context);
$attr = $transform->transform($o = $attr, $config, $context);
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
// commit changes
$token->attr = $attr;
return $token;
// destroy CurrentToken if we made it ourselves
if (!$current_token) $context->destroy('CurrentToken');
}

View File

@ -26,9 +26,10 @@ class HTMLPurifier_ErrorCollector
* @param $severity int Error severity, PHP error style (don't use E_USER_)
* @param $msg string Error message text
*/
function send($severity, $msg, $args = array()) {
function send($severity, $msg) {
if (!is_array($args)) {
$args = array();
if (func_num_args() > 2) {
$args = func_get_args();
array_shift($args);
unset($args[0]);
@ -94,7 +95,7 @@ class HTMLPurifier_ErrorCollector
foreach ($errors as $error) {
list($line, $severity, $msg) = $error;
$string = '';
$string .= $this->locale->getErrorName($severity) . ': ';
$string .= '<strong>' . $this->locale->getErrorName($severity) . '</strong>: ';
$string .= $this->generator->escape($msg);
if ($line) {
// have javascript link generation that causes

View File

@ -78,6 +78,25 @@ class HTMLPurifier_Language
return $this->errorNames[$int];
}
/**
* Converts an array list into a string readable representation
*/
function listify($array) {
$sep = $this->getMessage('Item separator');
$sep_last = $this->getMessage('Item separator last');
$ret = '';
for ($i = 0, $c = count($array); $i < $c; $i++) {
if ($i == 0) {
} elseif ($i + 1 < $c) {
$ret .= $sep;
} else {
$ret .= $sep_last;
}
$ret .= $array[$i];
}
return $ret;
}
/**
* Formats a localised message with passed parameters
* @param $key string identifier of message
@ -94,22 +113,35 @@ class HTMLPurifier_Language
$generator = false;
foreach ($args as $i => $value) {
if (is_object($value)) {
// complicated stuff
if (!$generator) $generator = $this->context->get('Generator');
// assuming it's a token
if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name;
if (isset($value->data)) $subst['$'.$i.'.Data'] = $value->data;
$subst['$'.$i.'.Compact'] =
$subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value);
// a more complex algorithm for compact representation
// could be introduced for all types of tokens. This
// may need to be factored out into a dedicated class
if (!empty($value->attr)) {
$stripped_token = $value->copy();
$stripped_token->attr = array();
$subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token);
if (is_a($value, 'HTMLPurifier_Token')) {
// factor this out some time
if (!$generator) $generator = $this->context->get('Generator');
if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name;
if (isset($value->data)) $subst['$'.$i.'.Data'] = $value->data;
$subst['$'.$i.'.Compact'] =
$subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value);
// a more complex algorithm for compact representation
// could be introduced for all types of tokens. This
// may need to be factored out into a dedicated class
if (!empty($value->attr)) {
$stripped_token = $value->copy();
$stripped_token->attr = array();
$subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token);
}
$subst['$'.$i.'.Line'] = $value->line ? $value->line : 'unknown';
}
continue;
} elseif (is_array($value)) {
$keys = array_keys($value);
if (array_keys($keys) === $keys) {
// list
$subst['$'.$i] = $this->listify($value);
} else {
// associative array
// no $i implementation yet, sorry
$subst['$'.$i.'.Keys'] = $this->listify($keys);
$subst['$'.$i.'.Values'] = $this->listify(array_values($value));
}
$subst['$'.$i.'.Line'] = $value->line ? $value->line : 'unknown';
continue;
}
$subst['$' . $i] = $value;

View File

@ -5,7 +5,14 @@ $fallback = false;
$messages = array(
'HTMLPurifier' => 'HTML Purifier',
'LanguageFactoryTest: Pizza' => 'Pizza', // for unit testing purposes
// for unit testing purposes
'LanguageFactoryTest: Pizza' => 'Pizza',
'LanguageTest: List' => '$1',
'LanguageTest: Hash' => '$1.Keys; $1.Values',
'Item separator' => ', ',
'Item separator last' => ' and ', // non-Harvard style
'ErrorCollector: No errors' => 'No errors detected. However, because error reporting is still incomplete, there may have been errors that the error collector was not notified of; please inspect the output HTML carefully.',
'ErrorCollector: At line' => ' at line $line',
@ -37,6 +44,9 @@ $messages = array(
'Strategy_FixNesting: Node reorganized' => 'Contents of $CurrentToken.Compact node reorganized to enforce its content model',
'Strategy_FixNesting: Node contents removed' => 'Contents of $CurrentToken.Compact node removed',
'AttrValidator: Attributes transformed' => 'Attributes on $CurrentToken.Compact transformed from $1.Keys to $2.Keys',
'AttrValidator: Attribute removed' => '$CurrentAttr.Name attribute on $CurrentToken.Compact removed',
);
$errorNames = array(

View File

@ -91,7 +91,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
$definition->info[$token->name]->required_attr &&
($token->name != 'img' || $remove_invalid_img) // ensure config option still works
) {
$token = $attr_validator->validateToken($token, $config, $context);
$attr_validator->validateToken($token, $config, $context);
$ok = true;
foreach ($definition->info[$token->name]->required_attr as $name) {
if (!isset($token->attr[$name])) {

View File

@ -27,6 +27,9 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
// setup validator
$validator = new HTMLPurifier_AttrValidator();
$token = false;
$context->register('CurrentToken', $token);
foreach ($tokens as $key => $token) {
// only process tokens that have attributes,
@ -36,7 +39,8 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
// skip tokens that are armored
if (!empty($token->armor['ValidateAttributes'])) continue;
$tokens[$key] = $validator->validateToken($token, $config, $context);
// note that we have no facilities here for removing tokens
$validator->validateToken($token, $config, $context);
}
$context->destroy('IDAccumulator');

View File

@ -0,0 +1,50 @@
<?php
require_once 'HTMLPurifier/ErrorsHarness.php';
require_once 'HTMLPurifier/AttrValidator.php';
class HTMLPurifier_AttrValidator_ErrorsTest extends HTMLPurifier_ErrorsHarness
{
function invoke($input) {
$validator = new HTMLPurifier_AttrValidator();
$validator->validateToken($input, $this->config, $this->context);
}
function testAttributesTransformedGlobalPre() {
$this->config->set('HTML', 'DefinitionID',
'HTMLPurifier_AttrValidator_ErrorsTest::testAttributesTransformedGlobalPre');
$def =& $this->config->getHTMLDefinition(true);
generate_mock_once('HTMLPurifier_AttrTransform');
$transform = new HTMLPurifier_AttrTransformMock();
$input = array('original' => 'value');
$output = array('class' => 'value'); // must be valid
$transform->setReturnValue('transform', $output, array($input, new AnythingExpectation(), new AnythingExpectation()));
$def->info_attr_transform_pre[] = $transform;
$this->expectErrorCollection(E_NOTICE, 'AttrValidator: Attributes transformed', $input, $output);
$token = new HTMLPurifier_Token_Start('span', $input, 1);
$this->invoke($token);
}
function testAttributesTransformedLocalPre() {
$this->config->set('HTML', 'TidyLevel', 'heavy');
$input = array('align' => 'right');
$output = array('style' => 'text-align:right;');
$this->expectErrorCollection(E_NOTICE, 'AttrValidator: Attributes transformed', $input, $output);
$token = new HTMLPurifier_Token_Start('p', $input, 1);
$this->invoke($token);
}
// to lazy to check for global post and global pre
function testAttributeRemoved() {
$this->expectErrorCollection(E_ERROR, 'AttrValidator: Attribute removed');
$this->expectContext('CurrentAttr', 'foobar');
$token = new HTMLPurifier_Token_Start('p', array('foobar' => 'right'), 1);
$this->expectContext('CurrentToken', $token);
$this->invoke($token);
}
}
?>

View File

@ -45,8 +45,8 @@ class HTMLPurifier_ErrorCollectorTest extends UnitTestCase
$this->assertIdentical($collector->getRaw(), $result);
$formatted_result =
'<ul><li>Warning: Message 2 at line 3</li>'.
'<li>Error: Message 1 at line 23</li></ul>';
'<ul><li><strong>Warning</strong>: Message 2 at line 3</li>'.
'<li><strong>Error</strong>: Message 1 at line 23</li></ul>';
$config = HTMLPurifier_Config::create(array('Core.MaintainLineNumbers' => true));
@ -91,8 +91,8 @@ class HTMLPurifier_ErrorCollectorTest extends UnitTestCase
$this->assertIdentical($collector->getRaw(), $result);
$formatted_result =
'<ul><li>Error: Message 1</li>'.
'<li>Error: Message 2</li></ul>';
'<ul><li><strong>Error</strong>: Message 1</li>'.
'<li><strong>Error</strong>: Message 2</li></ul>';
$config = HTMLPurifier_Config::createDefault();
$this->assertIdentical($collector->getHTMLFormatted($config), $formatted_result);
}

View File

@ -7,6 +7,13 @@ class HTMLPurifier_LanguageTest extends UnitTestCase
var $lang;
function generateEnLanguage() {
$factory = HTMLPurifier_LanguageFactory::instance();
$config = HTMLPurifier_Config::create(array('Core.Language' => 'en'));
$context = new HTMLPurifier_Context();
return $factory->create($config, $context);
}
function test_getMessage() {
$config = HTMLPurifier_Config::createDefault();
$context = new HTMLPurifier_Context();
@ -26,7 +33,7 @@ class HTMLPurifier_LanguageTest extends UnitTestCase
$this->assertIdentical($lang->formatMessage('LanguageTest: Error', array(1=>'fatal', 32)), 'Error is fatal on line 32');
}
function test_formatMessage_complexParameter() {
function test_formatMessage_tokenParameter() {
$config = HTMLPurifier_Config::createDefault();
$context = new HTMLPurifier_Context();
$generator = new HTMLPurifier_Generator(); // replace with mock if this gets icky
@ -43,6 +50,29 @@ class HTMLPurifier_LanguageTest extends UnitTestCase
'Data Token: data>, data&gt;, data&gt;, 23');
}
function test_listify() {
$lang = $this->generateEnLanguage();
$this->assertEqual($lang->listify(array('Item')), 'Item');
$this->assertEqual($lang->listify(array('Item', 'Item2')), 'Item and Item2');
$this->assertEqual($lang->listify(array('Item', 'Item2', 'Item3')), 'Item, Item2 and Item3');
}
function test_formatMessage_arrayParameter() {
$lang = $this->generateEnLanguage();
$array = array('Item1', 'Item2', 'Item3');
$this->assertIdentical(
$lang->formatMessage('LanguageTest: List', array(1=>$array)),
'Item1, Item2 and Item3'
);
$array = array('Key1' => 'Value1', 'Key2' => 'Value2');
$this->assertIdentical(
$lang->formatMessage('LanguageTest: Hash', array(1=>$array)),
'Key1 and Key2; Value1 and Value2'
);
}
}
?>

View File

@ -52,6 +52,7 @@ $test_files[] = 'HTMLPurifier/AttrTransform/LangTest.php';
$test_files[] = 'HTMLPurifier/AttrTransform/LengthTest.php';
$test_files[] = 'HTMLPurifier/AttrTransform/NameTest.php';
$test_files[] = 'HTMLPurifier/AttrTypesTest.php';
$test_files[] = 'HTMLPurifier/AttrValidator_ErrorsTest.php';
$test_files[] = 'HTMLPurifier/ChildDef/ChameleonTest.php';
$test_files[] = 'HTMLPurifier/ChildDef/CustomTest.php';
$test_files[] = 'HTMLPurifier/ChildDef/OptionalTest.php';