1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-04 21:28:06 +02:00

[2.0.1] Revamp error collector scheme: we now have custom mocks and an exchange of responsibilities

- Fix oversight in AutoParagraph dealing with armor.
- Order errors with no line number last
- Language object now needs $config and $context objects to do parameterized objects
- Auto-close notice added
- Token constructors accept line numbers

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1245 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2007-06-26 19:33:37 +00:00
parent 275932ec05
commit 6a95d91a1a
17 changed files with 248 additions and 109 deletions

View File

@@ -54,14 +54,6 @@ require_once 'HTMLPurifier/Encoder.php';
require_once 'HTMLPurifier/ErrorCollector.php';
require_once 'HTMLPurifier/LanguageFactory.php';
HTMLPurifier_ConfigSchema::define(
'Core', 'Language', 'en', 'string', '
ISO 639 language code for localizable things in HTML Purifier to use,
which is mainly error reporting. There is currently only an English (en)
translation, so this directive is currently useless.
This directive has been available since 2.0.0.
');
HTMLPurifier_ConfigSchema::define(
'Core', 'CollectErrors', false, 'bool', '
Whether or not to collect errors found while filtering the document. This
@@ -148,7 +140,7 @@ class HTMLPurifier
if ($config->get('Core', 'CollectErrors')) {
// may get moved out if other facilities use it
$language_factory = HTMLPurifier_LanguageFactory::instance();
$language = $language_factory->create($config->get('Core', 'Language'));
$language = $language_factory->create($config, $context);
$context->register('Locale', $language);
$error_collector = new HTMLPurifier_ErrorCollector($context);

View File

@@ -27,36 +27,35 @@ class HTMLPurifier_ErrorCollector
* @param $msg string Error message text
*/
function send($severity, $msg, $args = array()) {
if (func_num_args() == 2) {
$msg = $this->locale->getMessage($msg);
} else {
// setup one-based array if necessary
if (!is_array($args)) {
$args = func_get_args();
array_shift($args);
unset($args[0]);
}
$msg = $this->locale->formatMessage($msg, $args);
if (!is_array($args)) {
$args = func_get_args();
array_shift($args);
unset($args[0]);
}
$token = $this->context->get('CurrentToken', true);
$line = $token ? $token->line : $this->context->get('CurrentLine', true);
$attr = $this->context->get('CurrentAttr', true);
// perform special substitutions
// Currently defined: $CurrentToken.Name, $CurrentToken.Serialized,
// $CurrentAttr.Name, $CurrentAttr.Value
if (strpos($msg, '$') !== false) {
$subst = array();
if (!is_null($token)) {
if (isset($token->name)) $subst['$CurrentToken.Name'] = $token->name;
$subst['$CurrentToken.Serialized'] = $this->generator->generateFromToken($token);
}
if (!is_null($attr)) {
$subst['$CurrentAttr.Name'] = $attr;
if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr];
}
if (!empty($subst)) $msg = strtr($msg, $subst);
// perform special substitutions, also add custom parameters
$subst = array();
if (!is_null($token)) {
$args['CurrentToken'] = $token;
}
if (!is_null($attr)) {
$subst['$CurrentAttr.Name'] = $attr;
if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr];
}
if (empty($args)) {
$msg = $this->locale->getMessage($msg);
} else {
$msg = $this->locale->formatMessage($msg, $args);
}
if (!empty($subst)) $msg = strtr($msg, $subst);
$this->errors[] = array($line, $severity, $msg);
}
@@ -74,8 +73,6 @@ class HTMLPurifier_ErrorCollector
* @param $config Configuration array, vital for HTML output nature
*/
function getHTMLFormatted($config) {
$generator = new HTMLPurifier_Generator();
$generator->generateFromTokens(array(), $config, $this->context); // initialize
$ret = array();
$errors = $this->errors;
@@ -83,20 +80,22 @@ class HTMLPurifier_ErrorCollector
// sort error array by line
// line numbers are enabled if they aren't explicitly disabled
if ($config->get('Core', 'MaintainLineNumbers') !== false) {
$lines = array();
$has_line = array();
$lines = array();
$original_order = array();
foreach ($errors as $i => $error) {
$has_line[] = (int) (bool) $error[0];
$lines[] = $error[0];
$original_order[] = $i;
}
array_multisort($lines, SORT_ASC, $original_order, SORT_ASC, $errors);
array_multisort($has_line, SORT_DESC, $lines, SORT_ASC, $original_order, SORT_ASC, $errors);
}
foreach ($errors as $error) {
list($line, $severity, $msg) = $error;
$string = '';
$string .= $this->locale->getErrorName($severity) . ': ';
$string .= $generator->escape($msg);
$string .= $this->generator->escape($msg);
if ($line) {
// have javascript link generation that causes
// textarea to skip to the specified line

View File

@@ -38,7 +38,7 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
if (empty($this->currentNesting)) {
if (!$this->allowsElement('p')) return;
// case 1: we're in root node (and it allows paragraphs)
$token = array(new HTMLPurifier_Token_Start('p'));
$token = array($this->_pStart());
$this->_splitText($text, $token);
} elseif ($this->currentNesting[count($this->currentNesting)-1]->name == 'p') {
// case 2: we're in a paragraph
@@ -229,7 +229,6 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
array_pop($result);
}
}
/**

View File

@@ -31,6 +31,16 @@ class HTMLPurifier_Language
*/
var $_loaded = false;
/**
* Instances of HTMLPurifier_Config and HTMLPurifier_Context
*/
var $config, $context;
function HTMLPurifier_Language($config, &$context) {
$this->config = $config;
$this->context =& $context;
}
/**
* Loads language object with necessary info from factory cache
* @note This is a lazy loader
@@ -73,16 +83,38 @@ class HTMLPurifier_Language
* @param $key string identifier of message
* @param $args Parameters to substitute in
* @return string localised message
* @todo Implement conditionals? Right now, some messages make
* reference to line numbers, but those aren't always available
*/
function formatMessage($key, $args = array()) {
if (!$this->_loaded) $this->load();
if (!isset($this->messages[$key])) return "[$key]";
$raw = $this->messages[$key];
$substitutions = array();
$subst = array();
$generator = false;
foreach ($args as $i => $value) {
$substitutions['$' . $i] = $value;
if (is_object($value)) {
// complicated stuff
if (!$generator) $generator = $this->context->get('Generator');
// assuming it's a token
if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name;
if (isset($value->data)) $subst['$'.$i.'.Data'] = $value->data;
$subst['$'.$i.'.Compact'] =
$subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value);
// a more complex algorithm for compact representation
// could be introduced for all types of tokens. This
// may need to be factored out into a dedicated class
if (!empty($value->attr)) {
$stripped_token = $value->copy();
$stripped_token->attr = array();
$subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token);
}
$subst['$'.$i.'.Line'] = $value->line ? $value->line : 'unknown';
continue;
}
$subst['$' . $i] = $value;
}
return strtr($raw, $substitutions);
return strtr($raw, $subst);
}
}

View File

@@ -17,19 +17,20 @@ $messages = array(
'Lexer: Missing end quote' => 'Attribute declaration has no end quote',
'Strategy_RemoveForeignElements: Tag transform' => '<$1> element transformed into $CurrentToken.Serialized',
'Strategy_RemoveForeignElements: Missing required attribute' => '<$1> element missing required attribute $2',
'Strategy_RemoveForeignElements: Missing required attribute' => '$CurrentToken.Compact element missing required attribute $1',
'Strategy_RemoveForeignElements: Foreign element to text' => 'Unrecognized $CurrentToken.Serialized tag converted to text',
'Strategy_RemoveForeignElements: Foreign element removed' => 'Unrecognized $CurrentToken.Serialized tag removed',
'Strategy_RemoveForeignElements: Comment removed' => 'Comment containing "$1" removed',
'Strategy_RemoveForeignElements: Script removed' => 'Inline scripting removed',
'Strategy_RemoveForeignElements: Comment removed' => 'Comment containing "$CurrentToken.Data" removed',
'Strategy_RemoveForeignElements: Script removed' => 'Script removed',
'Strategy_RemoveForeignElements: Token removed to end' => 'Tags and text starting from $1 element where removed to end',
'Strategy_MakeWellFormed: Unnecessary end tag removed' => 'Unnecessary </$1> tag removed',
'Strategy_MakeWellFormed: Unnecessary end tag to text' => 'Unnecessary </$1> tag converted to text',
'Strategy_MakeWellFormed: Stray end tag removed' => 'Stray </$1> tag removed',
'Strategy_MakeWellFormed: Stray end tag to text' => 'Stray </$1> tag converted to text',
'Strategy_MakeWellFormed: Tag closed by element end' => '<$1> tag closed by end of $CurrentToken.Serialized',
'Strategy_MakeWellFormed: Tag closed by document end' => '<$1> tag closed by end of document',
'Strategy_MakeWellFormed: Unnecessary end tag removed' => 'Unnecessary $CurrentToken.Serialized tag removed',
'Strategy_MakeWellFormed: Unnecessary end tag to text' => 'Unnecessary $CurrentToken.Serialized tag converted to text',
'Strategy_MakeWellFormed: Tag auto closed' => '$1.Compact started on line $1.Line auto-closed by $CurrentToken.Compact',
'Strategy_MakeWellFormed: Stray end tag removed' => 'Stray $CurrentToken.Serialized tag removed',
'Strategy_MakeWellFormed: Stray end tag to text' => 'Stray $CurrentToken.Serialized tag converted to text',
'Strategy_MakeWellFormed: Tag closed by element end' => '$1.Compact tag started on line $1.Line closed by end of $CurrentToken.Serialized',
'Strategy_MakeWellFormed: Tag closed by document end' => '$1.Compact tag started on line $1.Line closed by end of document',
);

View File

@@ -3,6 +3,14 @@
require_once 'HTMLPurifier/Language.php';
require_once 'HTMLPurifier/AttrDef/Lang.php';
HTMLPurifier_ConfigSchema::define(
'Core', 'Language', 'en', 'string', '
ISO 639 language code for localizable things in HTML Purifier to use,
which is mainly error reporting. There is currently only an English (en)
translation, so this directive is currently useless.
This directive has been available since 2.0.0.
');
/**
* Class responsible for generating HTMLPurifier_Language objects, managing
* caching and fallbacks.
@@ -79,12 +87,15 @@ class HTMLPurifier_LanguageFactory
/**
* Creates a language object, handles class fallbacks
* @param $code string language code
* @param $config Instance of HTMLPurifier_Config
* @param $context Instance of HTMLPurifier_Context
*/
function create($code) {
function create($config, &$context) {
$config = $context = false; // hope it doesn't use these!
$code = $this->validator->validate($code, $config, $context);
// validate language code
$code = $this->validator->validate(
$config->get('Core', 'Language'), $config, $context
);
if ($code === false) $code = 'en'; // malformed code becomes English
$pcode = str_replace('-', '_', $code); // make valid PHP classname
@@ -111,7 +122,7 @@ class HTMLPurifier_LanguageFactory
$lang = HTMLPurifier_LanguageFactory::factory( $fallback );
$depth--;
} else {
$lang = new $class;
$lang = new $class($config, $context);
}
$lang->code = $code;

View File

@@ -153,6 +153,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// if the token is not allowed by the parent, auto-close
// the parent
if (!isset($parent_info->child->elements[$token->name])) {
if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
// close the parent, then append the token
$result[] = new HTMLPurifier_Token_End($parent->name);
$result[] = $token;
@@ -182,12 +183,12 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// make sure that we have something open
if (empty($this->currentNesting)) {
if ($escape_invalid_tags) {
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text', $token->name);
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
$result[] = new HTMLPurifier_Token_Text(
$generator->generateFromToken($token, $config, $context)
);
} elseif ($e) {
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed', $token->name);
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
}
continue;
}
@@ -223,9 +224,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$result[] = new HTMLPurifier_Token_Text(
$generator->generateFromToken($token, $config, $context)
);
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text', $token->name);
if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
} elseif ($e) {
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed', $token->name);
$e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
}
continue;
}
@@ -235,7 +236,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$size = count($skipped_tags);
for ($i = $size - 1; $i > 0; $i--) {
if ($e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]->name);
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
}
$result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
}
@@ -244,25 +245,25 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
}
$context->destroy('CurrentNesting');
$context->destroy('InputTokens');
$context->destroy('InputIndex');
$context->destroy('CurrentToken');
// we're at the end now, fix all still unclosed tags
// not using processToken() because at this point we don't
// care about current nesting
if (!empty($this->currentNesting)) {
$size = count($this->currentNesting);
for ($i = $size - 1; $i >= 0; $i--) {
if ($e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]->name);
if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
}
$result[] =
new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
}
}
$context->destroy('CurrentNesting');
$context->destroy('InputTokens');
$context->destroy('InputIndex');
$context->destroy('CurrentToken');
unset($this->outputTokens, $this->injectors, $this->currentInjector,
$this->currentNesting, $this->inputTokens, $this->inputIndex);

View File

@@ -100,7 +100,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
}
}
if (!$ok) {
if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $token->name, $name);
if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name);
continue;
}
$token->armor['ValidateAttributes'] = true;
@@ -143,7 +143,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
$token = new HTMLPurifier_Token_Text($data);
} else {
// strip comments
if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed', $token->data);
if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
continue;
}
} elseif ($token->type == 'text') {

View File

@@ -66,7 +66,7 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
* @param $name String name.
* @param $attr Associative array of attributes.
*/
function HTMLPurifier_Token_Tag($name, $attr = array()) {
function HTMLPurifier_Token_Tag($name, $attr = array(), $line = null) {
$this->name = ctype_lower($name) ? $name : strtolower($name);
foreach ($attr as $key => $value) {
// normalization only necessary when key is not lowercase
@@ -81,6 +81,7 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
}
}
$this->attr = $attr;
$this->line = $line;
}
}
@@ -134,9 +135,10 @@ class HTMLPurifier_Token_Text extends HTMLPurifier_Token
*
* @param $data String parsed character data.
*/
function HTMLPurifier_Token_Text($data) {
function HTMLPurifier_Token_Text($data, $line = null) {
$this->data = $data;
$this->is_whitespace = ctype_space($data);
$this->line = $line;
}
}
@@ -153,8 +155,9 @@ class HTMLPurifier_Token_Comment extends HTMLPurifier_Token
*
* @param $data String comment data.
*/
function HTMLPurifier_Token_Comment($data) {
function HTMLPurifier_Token_Comment($data, $line = null) {
$this->data = $data;
$this->line = $line;
}
}