diff --git a/library/HTMLPurifier.php b/library/HTMLPurifier.php index 731e1dfd..e9e8219b 100644 --- a/library/HTMLPurifier.php +++ b/library/HTMLPurifier.php @@ -51,6 +51,7 @@ require_once 'HTMLPurifier/Generator.php'; require_once 'HTMLPurifier/Strategy/Core.php'; require_once 'HTMLPurifier/Encoder.php'; +require_once 'HTMLPurifier/ErrorCollector.php'; require_once 'HTMLPurifier/LanguageFactory.php'; HTMLPurifier_ConfigSchema::define( @@ -146,8 +147,8 @@ class HTMLPurifier $language = $language_factory->create($config->get('Core', 'Language')); $context->register('Locale', $language); - $error_collector = new HTMLPurifier_ErrorCollector(); - $context->register('ErrorCollector', $language); + $error_collector = new HTMLPurifier_ErrorCollector($language); + $context->register('ErrorCollector', $error_collector); } $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); diff --git a/library/HTMLPurifier/ErrorCollector.php b/library/HTMLPurifier/ErrorCollector.php index ddad3d8d..762c8dd6 100644 --- a/library/HTMLPurifier/ErrorCollector.php +++ b/library/HTMLPurifier/ErrorCollector.php @@ -10,16 +10,31 @@ class HTMLPurifier_ErrorCollector { var $errors = array(); + var $locale; + + function HTMLPurifier_ErrorCollector(&$locale) {$this->locale =& $locale;} /** * Sends an error message to the collector for later use - * @param string Error message text - * @param int Error severity, PHP error style (don't use E_USER_) - * @param HTMLPurifier_Token Token that caused error - * @param array Tokens surrounding the offending token above, use true as placeholder + * @param $line Integer line number, or HTMLPurifier_Token that caused error + * @param $severity int Error severity, PHP error style (don't use E_USER_) + * @param $msg string Error message text */ - function send($msg, $severity, $token, $context_tokens = array(true)) { - $this->errors[] = array($msg, $severity, $token, $context_tokens); + function send($line, $severity, $msg) { + $token = false; + if ($line && !is_int($line)) { + $token = $line; + $line = $token->line; + if (!$line) $line = false; + } + if (func_num_args() == 3) $msg = $this->locale->getMessage($msg); + else { + $args = func_get_args(); + array_splice($args, 0, 2); // one less to make 1-based + unset($args[0]); + $msg = $this->locale->formatMessage($msg, $args); + } + $this->errors[] = array($line, $severity, $msg); } /** @@ -35,41 +50,40 @@ class HTMLPurifier_ErrorCollector * Default HTML formatting implementation for error messages * @param $config Configuration array, vital for HTML output nature */ - function getHTMLFormatted($config, &$context) { + function getHTMLFormatted($config) { $generator = new HTMLPurifier_Generator(); - $generator->generateFromTokens(array(), $config, $context); // initialize + $generator->generateFromTokens(array(), $config, $this->context); // initialize $ret = array(); $errors = $this->errors; - $locale = $context->get('Locale'); // sort error array by line if ($config->get('Core', 'MaintainLineNumbers')) { $lines = array(); - foreach ($errors as $error) $lines[] = $error[2]->line; + foreach ($errors as $error) { + $lines[] = $error[0]; + } array_multisort($lines, SORT_ASC, $errors); } foreach ($errors as $error) { - list($msg, $severity, $token, $context_tokens) = $error; + list($line, $severity, $msg) = $error; $string = ''; - $string .= $locale->getErrorName($severity) . ': '; + $string .= $this->locale->getErrorName($severity) . ': '; $string .= $generator->escape($msg); - if (!empty($token->line)) { - $string .= ' at line ' . $token->line; + if ($line) { + $string .= $this->locale->formatMessage( + 'ErrorCollector: At line', array('line' => $line)); } - $string .= ' ('; - foreach ($context_tokens as $context_token) { - if ($context_token !== true) { - $string .= $generator->escape($generator->generateFromToken($context_token)); - } else { - $string .= '' . $generator->escape($generator->generateFromToken($token)) . ''; - } - } - $string .= ')'; $ret[] = $string; } - return $ret; + + if (empty($errors)) { + return '

' . $this->locale->getMessage('ErrorCollector: No errors') . '

'; + } else { + return ''; + } + } } diff --git a/library/HTMLPurifier/Language.php b/library/HTMLPurifier/Language.php index 51051c97..d7de2ab5 100644 --- a/library/HTMLPurifier/Language.php +++ b/library/HTMLPurifier/Language.php @@ -71,17 +71,16 @@ class HTMLPurifier_Language /** * Formats a localised message with passed parameters * @param $key string identifier of message - * @param $param Parameter to substitute in (arbitrary number) + * @param $args Parameters to substitute in * @return string localised message */ - function formatMessage($key) { + function formatMessage($key, $args = array()) { if (!$this->_loaded) $this->load(); if (!isset($this->messages[$key])) return "[$key]"; $raw = $this->messages[$key]; - $args = func_get_args(); $substitutions = array(); - for ($i = 1; $i < count($args); $i++) { - $substitutions['$' . $i] = $args[$i]; + foreach ($args as $i => $value) { + $substitutions['$' . $i] = $value; } return strtr($raw, $substitutions); } diff --git a/library/HTMLPurifier/Language/messages/en.php b/library/HTMLPurifier/Language/messages/en.php index d62a30cb..11c1d842 100644 --- a/library/HTMLPurifier/Language/messages/en.php +++ b/library/HTMLPurifier/Language/messages/en.php @@ -7,7 +7,14 @@ $messages = array( 'htmlpurifier' => 'HTML Purifier', 'pizza' => 'Pizza', // for unit testing purposes +'ErrorCollector: No errors' => 'No errors', +'ErrorCollector: At line' => ' at line $line', +'Lexer: Unclosed comment' => 'Unclosed comment', +'Lexer: Unescaped lt' => 'Unescaped less-than sign (<) should be <', +'Lexer: Missing gt' => 'Missing greater-than sign (>), previous less-than sign (<) should be escaped', +'Lexer: Missing attribute key' => 'Attribute declaration has no key', +'Lexer: Missing end quote' => 'Attribute declaration has no end quote', ); diff --git a/library/HTMLPurifier/LanguageFactory.php b/library/HTMLPurifier/LanguageFactory.php index c8891ffd..4acbf0e4 100644 --- a/library/HTMLPurifier/LanguageFactory.php +++ b/library/HTMLPurifier/LanguageFactory.php @@ -24,7 +24,7 @@ class HTMLPurifier_LanguageFactory * variables to slurp out of a message file. * @value array list */ - var $keys = array('fallback', 'messages'); + var $keys = array('fallback', 'messages', 'errorNames'); /** * Instance of HTMLPurifier_AttrDef_Lang to validate language codes diff --git a/library/HTMLPurifier/Lexer/DirectLex.php b/library/HTMLPurifier/Lexer/DirectLex.php index 892f7d64..1cdc1a64 100644 --- a/library/HTMLPurifier/Lexer/DirectLex.php +++ b/library/HTMLPurifier/Lexer/DirectLex.php @@ -44,12 +44,19 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $array = array(); // result array $maintain_line_numbers = $config->get('Core', 'MaintainLineNumbers'); - $current_line = 1; + if ($maintain_line_numbers) $current_line = 1; + else $current_line = false; + $context->register('CurrentLine', $current_line); $nl = PHP_EOL; // how often to manually recalculate. This will ALWAYS be right, // but it's pretty wasteful. Set to 0 to turn off $synchronize_interval = $config->get('Core', 'DirectLexLineNumberSyncInterval'); + $e = $l = false; + if ($config->get('Core', 'CollectErrors')) { + $e =& $context->get('ErrorCollector'); + } + // infinite loop protection // has to be pretty big, since html docs can be big // we're allow two hundred thousand tags... more than enough? @@ -131,6 +138,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // uh oh, we have a comment that extends to // infinity. Can't be helped: set comment // end position to end of string + if ($e) $e->send($current_line, E_WARNING, 'Lexer: Unclosed comment'); $position_comment_end = strlen($html); $end = true; } else { @@ -173,6 +181,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // have accidently grabbed an emoticon. Translate into // text and go our merry way if (!ctype_alnum($segment[0])) { + if ($e) $e->send($current_line, E_NOTICE, 'Lexer: Unescaped lt'); $token = new HTMLPurifier_Token_Text( '<' . @@ -251,6 +260,8 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $inside_tag = false; continue; } else { + // inside tag, but there's no ending > sign + if ($e) $e->send($current_line, E_WARNING, 'Lexer: Missing gt'); $token = new HTMLPurifier_Token_Text( '<' . @@ -265,6 +276,9 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer } break; } + + $context->destroy('CurrentLine'); + return $array; } @@ -295,6 +309,12 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer if ($string == '') return array(); // no attributes + $e = false; + if ($config->get('Core', 'CollectErrors')) { + $e =& $context->get('ErrorCollector'); + $current_line =& $context->get('CurrentLine'); + } + // let's see if we can abort as quickly as possible // one equal sign, no spaces => one attribute $num_equal = substr_count($string, '='); @@ -306,7 +326,10 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // only one attribute list($key, $quoted_value) = explode('=', $string); $quoted_value = trim($quoted_value); - if (!$key) return array(); + if (!$key) { + if ($e) $e->send($current_line, E_ERROR, 'Lexer: Missing attribute key'); + return array(); + } if (!$quoted_value) return array($key => ''); $first_char = @$quoted_value[0]; $last_char = @$quoted_value[strlen($quoted_value)-1]; @@ -320,6 +343,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer } else { // not well behaved if ($open_quote) { + if ($e) $e->send($current_line, E_ERROR, 'Lexer: Missing end quote'); $value = substr($quoted_value, 1); } else { $value = $quoted_value; @@ -343,7 +367,10 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer while(true) { // infinite loop protection - if (++$loops > 1000) return array(); + if (++$loops > 1000) { + trigger_error('Infinite loop detected in attribute parsing', E_USER_WARNING); + return array(); + } if ($cursor >= $size) { break; @@ -362,7 +389,11 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $key = substr($string, $key_begin, $key_end - $key_begin); - if (!$key) continue; // empty key + if (!$key) { + if ($e) $e->send($current_line, E_ERROR, 'Lexer: Missing attribute key'); + $cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop + continue; // empty key + } // scroll past all whitespace $cursor += strspn($string, $this->_whitespace, $cursor); @@ -407,6 +438,9 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer // boolattr if ($key !== '') { $array[$key] = $key; + } else { + // purely theoretical + if ($e) $e->send($current_line, E_ERROR, 'Lexer: Missing attribute key'); } } diff --git a/tests/HTMLPurifier/ErrorCollectorTest.php b/tests/HTMLPurifier/ErrorCollectorTest.php index f25579be..95c0578e 100644 --- a/tests/HTMLPurifier/ErrorCollectorTest.php +++ b/tests/HTMLPurifier/ErrorCollectorTest.php @@ -5,45 +5,74 @@ require_once 'HTMLPurifier/ErrorCollector.php'; class HTMLPurifier_ErrorCollectorTest extends UnitTestCase { + function setup() { + generate_mock_once('HTMLPurifier_Language'); + } + function test() { - $tok1 = new HTMLPurifier_Token_Text('Token that caused error'); - $tok1->line = 23; - $tok2 = new HTMLPurifier_Token_Start('a'); // also caused error - $tok2->line = 3; - $tok3 = new HTMLPurifier_Token_Text('Context before'); // before $tok2 - $tok3->line = 3; - $tok4 = new HTMLPurifier_Token_Text('Context after'); // after $tok2 - $tok4->line = 3; + $tok = new HTMLPurifier_Token_Start('a'); // also caused error + $tok->line = 3; - $collector = new HTMLPurifier_ErrorCollector(); - $collector->send('Big fat error', E_ERROR, $tok1); - $collector->send('Another ', E_WARNING, $tok2, array($tok3, true, $tok4)); + $language = new HTMLPurifier_LanguageMock(); + $language->setReturnValue('getErrorName', 'Error', array(E_ERROR)); + $language->setReturnValue('getErrorName', 'Warning', array(E_WARNING)); + $language->setReturnValue('getMessage', 'Message 1', array('message-1')); + $language->setReturnValue('formatMessage', 'Message 2', array('message-2', array(1 => 'param'))); + $language->setReturnValue('formatMessage', ' at line 23', array('ErrorCollector: At line', array('line' => 23))); + $language->setReturnValue('formatMessage', ' at line 3', array('ErrorCollector: At line', array('line' => 3))); + + $collector = new HTMLPurifier_ErrorCollector($language); + $collector->send(23, E_ERROR, 'message-1'); + $collector->send($tok, E_WARNING, 'message-2', 'param'); $result = array( - 0 => array('Big fat error', E_ERROR, $tok1, array(true)), - 1 => array('Another ', E_WARNING, $tok2, array($tok3, true, $tok4)) + 0 => array(23, E_ERROR, 'Message 1'), + 1 => array(3, E_WARNING, 'Message 2') ); $this->assertIdentical($collector->getRaw(), $result); - $formatted_result = array( - 0 => 'Warning: Another <warning> at line 3 (Context before<a>Context after)', - 1 => 'Error: Big fat error at line 23 (Token that caused error)' - ); + $formatted_result = + ''; $config = HTMLPurifier_Config::create(array('Core.MaintainLineNumbers' => true)); - $context = new HTMLPurifier_Context(); + $this->assertIdentical($collector->getHTMLFormatted($config), $formatted_result); - generate_mock_once('HTMLPurifier_Language'); + } + + function testNoErrors() { $language = new HTMLPurifier_LanguageMock(); + $language->setReturnValue('getMessage', 'No errors', array('ErrorCollector: No errors')); + $collector = new HTMLPurifier_ErrorCollector($language); + $formatted_result = '

No errors

'; + $config = HTMLPurifier_Config::createDefault(); + $this->assertIdentical($collector->getHTMLFormatted($config), $formatted_result); + } + + function testNoLineNumbers() { + $token = new HTMLPurifier_Token_Start('a'); // no line number! + $language = new HTMLPurifier_LanguageMock(); + $language->setReturnValue('getMessage', 'Message 1', array('message-1')); + $language->setReturnValue('getMessage', 'Message 2', array('message-2')); $language->setReturnValue('getErrorName', 'Error', array(E_ERROR)); - $language->setReturnValue('getErrorName', 'Warning', array(E_WARNING)); - $context->register('Locale', $language); + $collector = new HTMLPurifier_ErrorCollector($language); + $collector->send(false, E_ERROR, 'message-1'); + $collector->send($token, E_ERROR, 'message-2'); - $this->assertIdentical($collector->getHTMLFormatted($config, $context), $formatted_result); + $result = array( + 0 => array(false, E_ERROR, 'Message 1'), + 1 => array(false, E_ERROR, 'Message 2') + ); + $this->assertIdentical($collector->getRaw(), $result); + $formatted_result = + ''; + $config = HTMLPurifier_Config::createDefault(); + $this->assertIdentical($collector->getHTMLFormatted($config), $formatted_result); } } diff --git a/tests/HTMLPurifier/LanguageTest.php b/tests/HTMLPurifier/LanguageTest.php index 21e55206..63f37fbd 100644 --- a/tests/HTMLPurifier/LanguageTest.php +++ b/tests/HTMLPurifier/LanguageTest.php @@ -19,7 +19,7 @@ class HTMLPurifier_LanguageTest extends UnitTestCase $lang = new HTMLPurifier_Language(); $lang->_loaded = true; $lang->messages['error'] = 'Error is $1 on line $2'; - $this->assertIdentical($lang->formatMessage('error', 'fatal', 32), 'Error is fatal on line 32'); + $this->assertIdentical($lang->formatMessage('error', array(1=>'fatal', 32)), 'Error is fatal on line 32'); } } diff --git a/tests/HTMLPurifier/Lexer/DirectLexTest.php b/tests/HTMLPurifier/Lexer/DirectLexTest.php index 37c516f3..92e230c3 100644 --- a/tests/HTMLPurifier/Lexer/DirectLexTest.php +++ b/tests/HTMLPurifier/Lexer/DirectLexTest.php @@ -53,6 +53,12 @@ class HTMLPurifier_Lexer_DirectLexTest extends UnitTestCase $input[10] = 'name="input" selected'; $expect[10] = array('name' => 'input', 'selected' => 'selected'); + $input[11] = '=""'; + $expect[11] = array(); + + $input[12] = '="" =""'; + $expect[12] = array('"' => ''); // tough to say, just don't throw a loop + $config = HTMLPurifier_Config::createDefault(); $context = new HTMLPurifier_Context(); $size = count($input);