diff --git a/library/HTMLPurifier.php b/library/HTMLPurifier.php
index 731e1dfd..e9e8219b 100644
--- a/library/HTMLPurifier.php
+++ b/library/HTMLPurifier.php
@@ -51,6 +51,7 @@ require_once 'HTMLPurifier/Generator.php';
require_once 'HTMLPurifier/Strategy/Core.php';
require_once 'HTMLPurifier/Encoder.php';
+require_once 'HTMLPurifier/ErrorCollector.php';
require_once 'HTMLPurifier/LanguageFactory.php';
HTMLPurifier_ConfigSchema::define(
@@ -146,8 +147,8 @@ class HTMLPurifier
$language = $language_factory->create($config->get('Core', 'Language'));
$context->register('Locale', $language);
- $error_collector = new HTMLPurifier_ErrorCollector();
- $context->register('ErrorCollector', $language);
+ $error_collector = new HTMLPurifier_ErrorCollector($language);
+ $context->register('ErrorCollector', $error_collector);
}
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
diff --git a/library/HTMLPurifier/ErrorCollector.php b/library/HTMLPurifier/ErrorCollector.php
index ddad3d8d..762c8dd6 100644
--- a/library/HTMLPurifier/ErrorCollector.php
+++ b/library/HTMLPurifier/ErrorCollector.php
@@ -10,16 +10,31 @@ class HTMLPurifier_ErrorCollector
{
var $errors = array();
+ var $locale;
+
+ function HTMLPurifier_ErrorCollector(&$locale) {$this->locale =& $locale;}
/**
* Sends an error message to the collector for later use
- * @param string Error message text
- * @param int Error severity, PHP error style (don't use E_USER_)
- * @param HTMLPurifier_Token Token that caused error
- * @param array Tokens surrounding the offending token above, use true as placeholder
+ * @param $line Integer line number, or HTMLPurifier_Token that caused error
+ * @param $severity int Error severity, PHP error style (don't use E_USER_)
+ * @param $msg string Error message text
*/
- function send($msg, $severity, $token, $context_tokens = array(true)) {
- $this->errors[] = array($msg, $severity, $token, $context_tokens);
+ function send($line, $severity, $msg) {
+ $token = false;
+ if ($line && !is_int($line)) {
+ $token = $line;
+ $line = $token->line;
+ if (!$line) $line = false;
+ }
+ if (func_num_args() == 3) $msg = $this->locale->getMessage($msg);
+ else {
+ $args = func_get_args();
+ array_splice($args, 0, 2); // one less to make 1-based
+ unset($args[0]);
+ $msg = $this->locale->formatMessage($msg, $args);
+ }
+ $this->errors[] = array($line, $severity, $msg);
}
/**
@@ -35,41 +50,40 @@ class HTMLPurifier_ErrorCollector
* Default HTML formatting implementation for error messages
* @param $config Configuration array, vital for HTML output nature
*/
- function getHTMLFormatted($config, &$context) {
+ function getHTMLFormatted($config) {
$generator = new HTMLPurifier_Generator();
- $generator->generateFromTokens(array(), $config, $context); // initialize
+ $generator->generateFromTokens(array(), $config, $this->context); // initialize
$ret = array();
$errors = $this->errors;
- $locale = $context->get('Locale');
// sort error array by line
if ($config->get('Core', 'MaintainLineNumbers')) {
$lines = array();
- foreach ($errors as $error) $lines[] = $error[2]->line;
+ foreach ($errors as $error) {
+ $lines[] = $error[0];
+ }
array_multisort($lines, SORT_ASC, $errors);
}
foreach ($errors as $error) {
- list($msg, $severity, $token, $context_tokens) = $error;
+ list($line, $severity, $msg) = $error;
$string = '';
- $string .= $locale->getErrorName($severity) . ': ';
+ $string .= $this->locale->getErrorName($severity) . ': ';
$string .= $generator->escape($msg);
- if (!empty($token->line)) {
- $string .= ' at line ' . $token->line;
+ if ($line) {
+ $string .= $this->locale->formatMessage(
+ 'ErrorCollector: At line', array('line' => $line));
}
- $string .= ' (';
- foreach ($context_tokens as $context_token) {
- if ($context_token !== true) {
- $string .= $generator->escape($generator->generateFromToken($context_token));
- } else {
- $string .= '' . $generator->escape($generator->generateFromToken($token)) . '';
- }
- }
- $string .= '
)';
$ret[] = $string;
}
- return $ret;
+
+ if (empty($errors)) {
+ return '
' . $this->locale->getMessage('ErrorCollector: No errors') . '
';
+ } else {
+ return '- ' . implode('
- ', $ret) . '
';
+ }
+
}
}
diff --git a/library/HTMLPurifier/Language.php b/library/HTMLPurifier/Language.php
index 51051c97..d7de2ab5 100644
--- a/library/HTMLPurifier/Language.php
+++ b/library/HTMLPurifier/Language.php
@@ -71,17 +71,16 @@ class HTMLPurifier_Language
/**
* Formats a localised message with passed parameters
* @param $key string identifier of message
- * @param $param Parameter to substitute in (arbitrary number)
+ * @param $args Parameters to substitute in
* @return string localised message
*/
- function formatMessage($key) {
+ function formatMessage($key, $args = array()) {
if (!$this->_loaded) $this->load();
if (!isset($this->messages[$key])) return "[$key]";
$raw = $this->messages[$key];
- $args = func_get_args();
$substitutions = array();
- for ($i = 1; $i < count($args); $i++) {
- $substitutions['$' . $i] = $args[$i];
+ foreach ($args as $i => $value) {
+ $substitutions['$' . $i] = $value;
}
return strtr($raw, $substitutions);
}
diff --git a/library/HTMLPurifier/Language/messages/en.php b/library/HTMLPurifier/Language/messages/en.php
index d62a30cb..11c1d842 100644
--- a/library/HTMLPurifier/Language/messages/en.php
+++ b/library/HTMLPurifier/Language/messages/en.php
@@ -7,7 +7,14 @@ $messages = array(
'htmlpurifier' => 'HTML Purifier',
'pizza' => 'Pizza', // for unit testing purposes
+'ErrorCollector: No errors' => 'No errors',
+'ErrorCollector: At line' => ' at line $line',
+'Lexer: Unclosed comment' => 'Unclosed comment',
+'Lexer: Unescaped lt' => 'Unescaped less-than sign (<) should be <',
+'Lexer: Missing gt' => 'Missing greater-than sign (>), previous less-than sign (<) should be escaped',
+'Lexer: Missing attribute key' => 'Attribute declaration has no key',
+'Lexer: Missing end quote' => 'Attribute declaration has no end quote',
);
diff --git a/library/HTMLPurifier/LanguageFactory.php b/library/HTMLPurifier/LanguageFactory.php
index c8891ffd..4acbf0e4 100644
--- a/library/HTMLPurifier/LanguageFactory.php
+++ b/library/HTMLPurifier/LanguageFactory.php
@@ -24,7 +24,7 @@ class HTMLPurifier_LanguageFactory
* variables to slurp out of a message file.
* @value array list
*/
- var $keys = array('fallback', 'messages');
+ var $keys = array('fallback', 'messages', 'errorNames');
/**
* Instance of HTMLPurifier_AttrDef_Lang to validate language codes
diff --git a/library/HTMLPurifier/Lexer/DirectLex.php b/library/HTMLPurifier/Lexer/DirectLex.php
index 892f7d64..1cdc1a64 100644
--- a/library/HTMLPurifier/Lexer/DirectLex.php
+++ b/library/HTMLPurifier/Lexer/DirectLex.php
@@ -44,12 +44,19 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
$array = array(); // result array
$maintain_line_numbers = $config->get('Core', 'MaintainLineNumbers');
- $current_line = 1;
+ if ($maintain_line_numbers) $current_line = 1;
+ else $current_line = false;
+ $context->register('CurrentLine', $current_line);
$nl = PHP_EOL;
// how often to manually recalculate. This will ALWAYS be right,
// but it's pretty wasteful. Set to 0 to turn off
$synchronize_interval = $config->get('Core', 'DirectLexLineNumberSyncInterval');
+ $e = $l = false;
+ if ($config->get('Core', 'CollectErrors')) {
+ $e =& $context->get('ErrorCollector');
+ }
+
// infinite loop protection
// has to be pretty big, since html docs can be big
// we're allow two hundred thousand tags... more than enough?
@@ -131,6 +138,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
// uh oh, we have a comment that extends to
// infinity. Can't be helped: set comment
// end position to end of string
+ if ($e) $e->send($current_line, E_WARNING, 'Lexer: Unclosed comment');
$position_comment_end = strlen($html);
$end = true;
} else {
@@ -173,6 +181,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
// have accidently grabbed an emoticon. Translate into
// text and go our merry way
if (!ctype_alnum($segment[0])) {
+ if ($e) $e->send($current_line, E_NOTICE, 'Lexer: Unescaped lt');
$token = new
HTMLPurifier_Token_Text(
'<' .
@@ -251,6 +260,8 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
$inside_tag = false;
continue;
} else {
+ // inside tag, but there's no ending > sign
+ if ($e) $e->send($current_line, E_WARNING, 'Lexer: Missing gt');
$token = new
HTMLPurifier_Token_Text(
'<' .
@@ -265,6 +276,9 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
}
break;
}
+
+ $context->destroy('CurrentLine');
+
return $array;
}
@@ -295,6 +309,12 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
if ($string == '') return array(); // no attributes
+ $e = false;
+ if ($config->get('Core', 'CollectErrors')) {
+ $e =& $context->get('ErrorCollector');
+ $current_line =& $context->get('CurrentLine');
+ }
+
// let's see if we can abort as quickly as possible
// one equal sign, no spaces => one attribute
$num_equal = substr_count($string, '=');
@@ -306,7 +326,10 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
// only one attribute
list($key, $quoted_value) = explode('=', $string);
$quoted_value = trim($quoted_value);
- if (!$key) return array();
+ if (!$key) {
+ if ($e) $e->send($current_line, E_ERROR, 'Lexer: Missing attribute key');
+ return array();
+ }
if (!$quoted_value) return array($key => '');
$first_char = @$quoted_value[0];
$last_char = @$quoted_value[strlen($quoted_value)-1];
@@ -320,6 +343,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
} else {
// not well behaved
if ($open_quote) {
+ if ($e) $e->send($current_line, E_ERROR, 'Lexer: Missing end quote');
$value = substr($quoted_value, 1);
} else {
$value = $quoted_value;
@@ -343,7 +367,10 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
while(true) {
// infinite loop protection
- if (++$loops > 1000) return array();
+ if (++$loops > 1000) {
+ trigger_error('Infinite loop detected in attribute parsing', E_USER_WARNING);
+ return array();
+ }
if ($cursor >= $size) {
break;
@@ -362,7 +389,11 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
$key = substr($string, $key_begin, $key_end - $key_begin);
- if (!$key) continue; // empty key
+ if (!$key) {
+ if ($e) $e->send($current_line, E_ERROR, 'Lexer: Missing attribute key');
+ $cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop
+ continue; // empty key
+ }
// scroll past all whitespace
$cursor += strspn($string, $this->_whitespace, $cursor);
@@ -407,6 +438,9 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
// boolattr
if ($key !== '') {
$array[$key] = $key;
+ } else {
+ // purely theoretical
+ if ($e) $e->send($current_line, E_ERROR, 'Lexer: Missing attribute key');
}
}
diff --git a/tests/HTMLPurifier/ErrorCollectorTest.php b/tests/HTMLPurifier/ErrorCollectorTest.php
index f25579be..95c0578e 100644
--- a/tests/HTMLPurifier/ErrorCollectorTest.php
+++ b/tests/HTMLPurifier/ErrorCollectorTest.php
@@ -5,45 +5,74 @@ require_once 'HTMLPurifier/ErrorCollector.php';
class HTMLPurifier_ErrorCollectorTest extends UnitTestCase
{
+ function setup() {
+ generate_mock_once('HTMLPurifier_Language');
+ }
+
function test() {
- $tok1 = new HTMLPurifier_Token_Text('Token that caused error');
- $tok1->line = 23;
- $tok2 = new HTMLPurifier_Token_Start('a'); // also caused error
- $tok2->line = 3;
- $tok3 = new HTMLPurifier_Token_Text('Context before'); // before $tok2
- $tok3->line = 3;
- $tok4 = new HTMLPurifier_Token_Text('Context after'); // after $tok2
- $tok4->line = 3;
+ $tok = new HTMLPurifier_Token_Start('a'); // also caused error
+ $tok->line = 3;
- $collector = new HTMLPurifier_ErrorCollector();
- $collector->send('Big fat error', E_ERROR, $tok1);
- $collector->send('Another ', E_WARNING, $tok2, array($tok3, true, $tok4));
+ $language = new HTMLPurifier_LanguageMock();
+ $language->setReturnValue('getErrorName', 'Error', array(E_ERROR));
+ $language->setReturnValue('getErrorName', 'Warning', array(E_WARNING));
+ $language->setReturnValue('getMessage', 'Message 1', array('message-1'));
+ $language->setReturnValue('formatMessage', 'Message 2', array('message-2', array(1 => 'param')));
+ $language->setReturnValue('formatMessage', ' at line 23', array('ErrorCollector: At line', array('line' => 23)));
+ $language->setReturnValue('formatMessage', ' at line 3', array('ErrorCollector: At line', array('line' => 3)));
+
+ $collector = new HTMLPurifier_ErrorCollector($language);
+ $collector->send(23, E_ERROR, 'message-1');
+ $collector->send($tok, E_WARNING, 'message-2', 'param');
$result = array(
- 0 => array('Big fat error', E_ERROR, $tok1, array(true)),
- 1 => array('Another ', E_WARNING, $tok2, array($tok3, true, $tok4))
+ 0 => array(23, E_ERROR, 'Message 1'),
+ 1 => array(3, E_WARNING, 'Message 2')
);
$this->assertIdentical($collector->getRaw(), $result);
- $formatted_result = array(
- 0 => 'Warning: Another <warning> at line 3 (Context before<a>Context after
)',
- 1 => 'Error: Big fat error at line 23 (Token that caused error
)'
- );
+ $formatted_result =
+ '- Warning: Message 2 at line 3
'.
+ '- Error: Message 1 at line 23
';
$config = HTMLPurifier_Config::create(array('Core.MaintainLineNumbers' => true));
- $context = new HTMLPurifier_Context();
+ $this->assertIdentical($collector->getHTMLFormatted($config), $formatted_result);
- generate_mock_once('HTMLPurifier_Language');
+ }
+
+ function testNoErrors() {
$language = new HTMLPurifier_LanguageMock();
+ $language->setReturnValue('getMessage', 'No errors', array('ErrorCollector: No errors'));
+ $collector = new HTMLPurifier_ErrorCollector($language);
+ $formatted_result = 'No errors
';
+ $config = HTMLPurifier_Config::createDefault();
+ $this->assertIdentical($collector->getHTMLFormatted($config), $formatted_result);
+ }
+
+ function testNoLineNumbers() {
+ $token = new HTMLPurifier_Token_Start('a'); // no line number!
+ $language = new HTMLPurifier_LanguageMock();
+ $language->setReturnValue('getMessage', 'Message 1', array('message-1'));
+ $language->setReturnValue('getMessage', 'Message 2', array('message-2'));
$language->setReturnValue('getErrorName', 'Error', array(E_ERROR));
- $language->setReturnValue('getErrorName', 'Warning', array(E_WARNING));
- $context->register('Locale', $language);
+ $collector = new HTMLPurifier_ErrorCollector($language);
+ $collector->send(false, E_ERROR, 'message-1');
+ $collector->send($token, E_ERROR, 'message-2');
- $this->assertIdentical($collector->getHTMLFormatted($config, $context), $formatted_result);
+ $result = array(
+ 0 => array(false, E_ERROR, 'Message 1'),
+ 1 => array(false, E_ERROR, 'Message 2')
+ );
+ $this->assertIdentical($collector->getRaw(), $result);
+ $formatted_result =
+ '- Error: Message 1
'.
+ '- Error: Message 2
';
+ $config = HTMLPurifier_Config::createDefault();
+ $this->assertIdentical($collector->getHTMLFormatted($config), $formatted_result);
}
}
diff --git a/tests/HTMLPurifier/LanguageTest.php b/tests/HTMLPurifier/LanguageTest.php
index 21e55206..63f37fbd 100644
--- a/tests/HTMLPurifier/LanguageTest.php
+++ b/tests/HTMLPurifier/LanguageTest.php
@@ -19,7 +19,7 @@ class HTMLPurifier_LanguageTest extends UnitTestCase
$lang = new HTMLPurifier_Language();
$lang->_loaded = true;
$lang->messages['error'] = 'Error is $1 on line $2';
- $this->assertIdentical($lang->formatMessage('error', 'fatal', 32), 'Error is fatal on line 32');
+ $this->assertIdentical($lang->formatMessage('error', array(1=>'fatal', 32)), 'Error is fatal on line 32');
}
}
diff --git a/tests/HTMLPurifier/Lexer/DirectLexTest.php b/tests/HTMLPurifier/Lexer/DirectLexTest.php
index 37c516f3..92e230c3 100644
--- a/tests/HTMLPurifier/Lexer/DirectLexTest.php
+++ b/tests/HTMLPurifier/Lexer/DirectLexTest.php
@@ -53,6 +53,12 @@ class HTMLPurifier_Lexer_DirectLexTest extends UnitTestCase
$input[10] = 'name="input" selected';
$expect[10] = array('name' => 'input', 'selected' => 'selected');
+ $input[11] = '=""';
+ $expect[11] = array();
+
+ $input[12] = '="" =""';
+ $expect[12] = array('"' => ''); // tough to say, just don't throw a loop
+
$config = HTMLPurifier_Config::createDefault();
$context = new HTMLPurifier_Context();
$size = count($input);