Release 2.0.1, merged in 1181 to HEAD.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@1255 48356398-32a2-884e-a903-53898d9a118a
2025-08-06 14:16:32 +02:00 · 2007-06-27 14:30:45 +00:00
parent 42858ad594
commit 495164e938
326 changed files with 3025 additions and 826 deletions
--- a/library/HTMLPurifier/Lexer/DOMLex.php
+++ b/library/HTMLPurifier/Lexer/DOMLex.php
@@ -38,25 +38,25 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
        $this->factory = new HTMLPurifier_TokenFactory();
    }
    
-    public function tokenizeHTML($string, $config, &$context) {
+    public function tokenizeHTML($html, $config, &$context) {
        
-        $string = $this->normalize($string, $config, $context);
+        $html = $this->normalize($html, $config, $context);
        
-        // preprocess string, essential for UTF-8
-        $string =
+        // preprocess html, essential for UTF-8
+        $html =
            '<!DOCTYPE html '.
                'PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
                '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'.
            '<html><head>'.
            '<meta http-equiv="Content-Type" content="text/html;'.
                ' charset=utf-8" />'.
-            '</head><body><div>'.$string.'</div></body></html>';
+            '</head><body><div>'.$html.'</div></body></html>';
        
        $doc = new DOMDocument();
        $doc->encoding = 'UTF-8'; // theoretically, the above has this covered
        
        set_error_handler(array($this, 'muteErrorHandler'));
-        $doc->loadHTML($string);
+        $doc->loadHTML($html);
        restore_error_handler();
        
        $tokens = array();
@@ -83,10 +83,13 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
        // intercept non element nodes. WE MUST catch all of them,
        // but we're not getting the character reference nodes because
        // those should have been preprocessed
-        if ($node->nodeType === XML_TEXT_NODE ||
-                  $node->nodeType === XML_CDATA_SECTION_NODE) {
+        if ($node->nodeType === XML_TEXT_NODE) {
            $tokens[] = $this->factory->createText($node->data);
            return;
+        } elseif ($node->nodeType === XML_CDATA_SECTION_NODE) {
+            // undo DOM's special treatment of <script> tags
+            $tokens[] = $this->factory->createText($this->parseData($node->data));
+            return;
        } elseif ($node->nodeType === XML_COMMENT_NODE) {
            $tokens[] = $this->factory->createComment($node->data);
            return;
@@ -150,4 +153,3 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
    
 }

-?>