1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-07-31 19:30:21 +02:00

[2.0.1] Implement haphazard error collection for AttrValidator.

- Error collector / Language can take arrays and listify them
- AttrValidator takes token by reference
- Formatted errors now have their severity <strong>
- 100 test-cases! W00t!

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1250 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2007-06-27 02:03:15 +00:00
parent a005da8a4c
commit 3a1d505b3d
12 changed files with 205 additions and 44 deletions

View File

@@ -1,12 +1,31 @@
<?php
/**
* Validates the attributes of a token. Doesn't manage required attributes
* very well. The only reason we factored this out was because RemoveForeignElements
* also needed it besides ValidateAttributes.
*/
class HTMLPurifier_AttrValidator
{
function validateToken($token, &$config, &$context) {
/**
* Validates the attributes of a token, returning a modified token
* that has valid tokens
* @param $token Reference to token to validate. We require a reference
* because the operation this class performs on the token are
* not atomic, so the context CurrentToken to be updated
* throughout
* @param $config Instance of HTMLPurifier_Config
* @param $context Instance of HTMLPurifier_Context
*/
function validateToken(&$token, &$config, &$context) {
$definition = $config->getHTMLDefinition();
$e =& $context->get('ErrorCollector', true);
// initialize CurrentToken if necessary
$current_token =& $context->get('CurrentToken', true);
if (!$current_token) $context->register('CurrentToken', $token);
if ($token->type !== 'start' && $token->type !== 'empty') return $token;
@@ -14,21 +33,21 @@ class HTMLPurifier_AttrValidator
// DEFINITION CALL
$d_defs = $definition->info_global_attr;
// copy out attributes for easy manipulation
$attr = $token->attr;
// reference attributes for easy manipulation
$attr =& $token->attr;
// do global transformations (pre)
// nothing currently utilizes this
foreach ($definition->info_attr_transform_pre as $transform) {
$attr = $transform->transform($attr, $config, $context);
$attr = $transform->transform($o = $attr, $config, $context);
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
// do local transformations only applicable to this element (pre)
// ex. <p align="right"> to <p style="text-align:right;">
foreach ($definition->info[$token->name]->attr_transform_pre
as $transform
) {
$attr = $transform->transform($attr, $config, $context);
foreach ($definition->info[$token->name]->attr_transform_pre as $transform) {
$attr = $transform->transform($o = $attr, $config, $context);
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
// create alias to this element's attribute definition array, see
@@ -36,6 +55,9 @@ class HTMLPurifier_AttrValidator
// DEFINITION CALL
$defs = $definition->info[$token->name]->attr;
$attr_key = false;
$context->register('CurrentAttr', $attr_key);
// iterate through all the attribute keypairs
// Watch out for name collisions: $key has previously been used
foreach ($attr as $attr_key => $value) {
@@ -69,9 +91,17 @@ class HTMLPurifier_AttrValidator
// put the results into effect
if ($result === false || $result === null) {
// this is a generic error message that should replaced
// with more specific ones when possible
if ($e) $e->send(E_ERROR, 'AttrValidator: Attribute removed');
// remove the attribute
unset($attr[$attr_key]);
} elseif (is_string($result)) {
// generally, if a substitution is happening, there
// was some sort of implicit correction going on. We'll
// delegate it to the attribute classes to say exactly what.
// simple substitution
$attr[$attr_key] = $result;
}
@@ -83,21 +113,24 @@ class HTMLPurifier_AttrValidator
// others would prepend themselves).
}
$context->destroy('CurrentAttr');
// post transforms
// ex. <x lang="fr"> to <x lang="fr" xml:lang="fr">
// global (error reporting untested)
foreach ($definition->info_attr_transform_post as $transform) {
$attr = $transform->transform($attr, $config, $context);
$attr = $transform->transform($o = $attr, $config, $context);
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
// ex. <bdo> to <bdo dir="ltr">
// local (error reporting untested)
foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
$attr = $transform->transform($attr, $config, $context);
$attr = $transform->transform($o = $attr, $config, $context);
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
// commit changes
$token->attr = $attr;
return $token;
// destroy CurrentToken if we made it ourselves
if (!$current_token) $context->destroy('CurrentToken');
}

View File

@@ -26,9 +26,10 @@ class HTMLPurifier_ErrorCollector
* @param $severity int Error severity, PHP error style (don't use E_USER_)
* @param $msg string Error message text
*/
function send($severity, $msg, $args = array()) {
function send($severity, $msg) {
if (!is_array($args)) {
$args = array();
if (func_num_args() > 2) {
$args = func_get_args();
array_shift($args);
unset($args[0]);
@@ -94,7 +95,7 @@ class HTMLPurifier_ErrorCollector
foreach ($errors as $error) {
list($line, $severity, $msg) = $error;
$string = '';
$string .= $this->locale->getErrorName($severity) . ': ';
$string .= '<strong>' . $this->locale->getErrorName($severity) . '</strong>: ';
$string .= $this->generator->escape($msg);
if ($line) {
// have javascript link generation that causes

View File

@@ -78,6 +78,25 @@ class HTMLPurifier_Language
return $this->errorNames[$int];
}
/**
* Converts an array list into a string readable representation
*/
function listify($array) {
$sep = $this->getMessage('Item separator');
$sep_last = $this->getMessage('Item separator last');
$ret = '';
for ($i = 0, $c = count($array); $i < $c; $i++) {
if ($i == 0) {
} elseif ($i + 1 < $c) {
$ret .= $sep;
} else {
$ret .= $sep_last;
}
$ret .= $array[$i];
}
return $ret;
}
/**
* Formats a localised message with passed parameters
* @param $key string identifier of message
@@ -94,22 +113,35 @@ class HTMLPurifier_Language
$generator = false;
foreach ($args as $i => $value) {
if (is_object($value)) {
// complicated stuff
if (!$generator) $generator = $this->context->get('Generator');
// assuming it's a token
if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name;
if (isset($value->data)) $subst['$'.$i.'.Data'] = $value->data;
$subst['$'.$i.'.Compact'] =
$subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value);
// a more complex algorithm for compact representation
// could be introduced for all types of tokens. This
// may need to be factored out into a dedicated class
if (!empty($value->attr)) {
$stripped_token = $value->copy();
$stripped_token->attr = array();
$subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token);
if (is_a($value, 'HTMLPurifier_Token')) {
// factor this out some time
if (!$generator) $generator = $this->context->get('Generator');
if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name;
if (isset($value->data)) $subst['$'.$i.'.Data'] = $value->data;
$subst['$'.$i.'.Compact'] =
$subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value);
// a more complex algorithm for compact representation
// could be introduced for all types of tokens. This
// may need to be factored out into a dedicated class
if (!empty($value->attr)) {
$stripped_token = $value->copy();
$stripped_token->attr = array();
$subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token);
}
$subst['$'.$i.'.Line'] = $value->line ? $value->line : 'unknown';
}
continue;
} elseif (is_array($value)) {
$keys = array_keys($value);
if (array_keys($keys) === $keys) {
// list
$subst['$'.$i] = $this->listify($value);
} else {
// associative array
// no $i implementation yet, sorry
$subst['$'.$i.'.Keys'] = $this->listify($keys);
$subst['$'.$i.'.Values'] = $this->listify(array_values($value));
}
$subst['$'.$i.'.Line'] = $value->line ? $value->line : 'unknown';
continue;
}
$subst['$' . $i] = $value;

View File

@@ -5,7 +5,14 @@ $fallback = false;
$messages = array(
'HTMLPurifier' => 'HTML Purifier',
'LanguageFactoryTest: Pizza' => 'Pizza', // for unit testing purposes
// for unit testing purposes
'LanguageFactoryTest: Pizza' => 'Pizza',
'LanguageTest: List' => '$1',
'LanguageTest: Hash' => '$1.Keys; $1.Values',
'Item separator' => ', ',
'Item separator last' => ' and ', // non-Harvard style
'ErrorCollector: No errors' => 'No errors detected. However, because error reporting is still incomplete, there may have been errors that the error collector was not notified of; please inspect the output HTML carefully.',
'ErrorCollector: At line' => ' at line $line',
@@ -37,6 +44,9 @@ $messages = array(
'Strategy_FixNesting: Node reorganized' => 'Contents of $CurrentToken.Compact node reorganized to enforce its content model',
'Strategy_FixNesting: Node contents removed' => 'Contents of $CurrentToken.Compact node removed',
'AttrValidator: Attributes transformed' => 'Attributes on $CurrentToken.Compact transformed from $1.Keys to $2.Keys',
'AttrValidator: Attribute removed' => '$CurrentAttr.Name attribute on $CurrentToken.Compact removed',
);
$errorNames = array(

View File

@@ -91,7 +91,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
$definition->info[$token->name]->required_attr &&
($token->name != 'img' || $remove_invalid_img) // ensure config option still works
) {
$token = $attr_validator->validateToken($token, $config, $context);
$attr_validator->validateToken($token, $config, $context);
$ok = true;
foreach ($definition->info[$token->name]->required_attr as $name) {
if (!isset($token->attr[$name])) {

View File

@@ -27,6 +27,9 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
// setup validator
$validator = new HTMLPurifier_AttrValidator();
$token = false;
$context->register('CurrentToken', $token);
foreach ($tokens as $key => $token) {
// only process tokens that have attributes,
@@ -36,7 +39,8 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
// skip tokens that are armored
if (!empty($token->armor['ValidateAttributes'])) continue;
$tokens[$key] = $validator->validateToken($token, $config, $context);
// note that we have no facilities here for removing tokens
$validator->validateToken($token, $config, $context);
}
$context->destroy('IDAccumulator');