diff --git a/library/HTMLPurifier/Lexer/DOMLex.php b/library/HTMLPurifier/Lexer/DOMLex.php
index 202b4c8a..1ba9675f 100644
--- a/library/HTMLPurifier/Lexer/DOMLex.php
+++ b/library/HTMLPurifier/Lexer/DOMLex.php
@@ -75,16 +75,15 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
protected function tokenizeDOM($node, &$tokens, $collect = false) {
// recursive goodness!
- // intercept non element nodes
-
- if ( isset($node->data) ) {
- if ($node->nodeType === XML_TEXT_NODE ||
- $node->nodeType === XML_CDATA_SECTION_NODE) {
- $tokens[] = $this->factory->createText($node->data);
- } elseif ($node->nodeType === XML_COMMENT_NODE) {
- $tokens[] = $this->factory->createComment($node->data);
- }
- // quite possibly, the object wasn't handled, that's fine
+ // intercept non element nodes. WE MUST catch all of them,
+ // but we're not getting the character reference nodes because
+ // those should have been preprocessed
+ if ($node->nodeType === XML_TEXT_NODE ||
+ $node->nodeType === XML_CDATA_SECTION_NODE) {
+ $tokens[] = $this->factory->createText($node->data);
+ return;
+ } elseif ($node->nodeType === XML_COMMENT_NODE) {
+ $tokens[] = $this->factory->createComment($node->data);
return;
}