diff --git a/library/HTMLPurifier/Generator.php b/library/HTMLPurifier/Generator.php
index deecc084..19295dbc 100644
--- a/library/HTMLPurifier/Generator.php
+++ b/library/HTMLPurifier/Generator.php
@@ -13,6 +13,7 @@ class HTMLPurifier_Generator
}
function generateFromToken($token) {
+ if (!isset($token->type)) return '';
if ($token->type == 'start') {
$attr = $this->generateAttributes($token->attributes);
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
@@ -25,7 +26,7 @@ class HTMLPurifier_Generator
return '<' . $token->name . ($attr ? ' ' : '') . $attr . ' />';
} elseif ($token->type == 'text') {
- return htmlentities($token->data, ENT_COMPAT, 'UTF-8');
+ return htmlspecialchars($token->data, ENT_COMPAT, 'UTF-8');
} else {
return '';
@@ -36,7 +37,7 @@ class HTMLPurifier_Generator
function generateAttributes($assoc_array_of_attributes) {
$html = '';
foreach ($assoc_array_of_attributes as $key => $value) {
- $html .= $key.'="'.htmlentities($value, ENT_COMPAT, 'UTF-8').'" ';
+ $html .= $key.'="'.htmlspecialchars($value, ENT_COMPAT, 'UTF-8').'" ';
}
return rtrim($html);
}
diff --git a/library/HTMLPurifier/Lexer.php b/library/HTMLPurifier/Lexer.php
index ee341ff8..086d7fb9 100644
--- a/library/HTMLPurifier/Lexer.php
+++ b/library/HTMLPurifier/Lexer.php
@@ -5,18 +5,20 @@ require_once 'HTMLPurifier/Token.php';
/**
* Forgivingly lexes HTML (SGML-style) markup into tokens.
*
- * The lexer parses a string of SGML-style markup and converts them into
+ * A lexer parses a string of SGML-style markup and converts them into
* corresponding tokens. It doesn't check for well-formedness, although its
* internal mechanism may make this automatic (such as the case of
* HTMLPurifier_Lexer_DOMLex). There are several implementations to choose
* from.
*
- * The lexer is HTML-oriented: it might work with XML, but it's not
+ * A lexer is HTML-oriented: it might work with XML, but it's not
* recommended, as we adhere to a subset of the specification for optimization
* reasons.
*
* This class should not be directly instantiated, but you may use create() to
- * retrieve a default copy of the lexer.
+ * retrieve a default copy of the lexer. Being a supertype, this class
+ * does not actually define any implementation, but offers commonly used
+ * convenience functions for subclasses.
*
* @note The unit tests will instantiate this class for testing purposes, as
* many of the utility functions require a class to be instantiated.
diff --git a/tests/HTMLPurifier/GeneratorTest.php b/tests/HTMLPurifier/GeneratorTest.php
index d93decf0..5806868b 100644
--- a/tests/HTMLPurifier/GeneratorTest.php
+++ b/tests/HTMLPurifier/GeneratorTest.php
@@ -1,15 +1,18 @@
UnitTestCase();
$this->gen = new HTMLPurifier_Generator();
+ $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
}
function test_generateFromToken() {
@@ -39,6 +42,15 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
$inputs[5] = new HTMLPurifier_Token_Empty('br');
$expect[5] = '
';
+ // test fault tolerance
+ $inputs[6] = null;
+ $expect[6] = '';
+
+ // don't convert non-special characters
+ $theta_char = $this->_entity_lookup->table['theta'];
+ $inputs[7] = new HTMLPurifier_Token_Text($theta_char);
+ $expect[7] = $theta_char;
+
foreach ($inputs as $i => $input) {
$result = $this->gen->generateFromToken($input);
$this->assertEqual($result, $expect[$i]);
@@ -64,6 +76,11 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
$inputs[3] = array('src' => 'picture.jpg', 'alt' => 'Short & interesting');
$expect[3] = 'src="picture.jpg" alt="Short & interesting"';
+ // don't escape nonspecial characters
+ $theta_char = $this->_entity_lookup->table['theta'];
+ $inputs[4] = array('title' => 'Theta is ' . $theta_char);
+ $expect[4] = 'title="Theta is ' . $theta_char . '"';
+
foreach ($inputs as $i => $input) {
$result = $this->gen->generateAttributes($input);
$this->assertEqual($result, $expect[$i]);