[2.0.1] Improve special case handling for <script>

- DirectLex now honors comments with greater than or less than signs in them - Comments are transformed into script elements, ending comments are scrapped - Buggy generator code rewritten to be more error-proof - AttrValidator checks if token has attributes before processing - Remove invalid documentation from Scripting - "Commenting" of script elements switched to the more advanced version git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1189 48356398-32a2-884e-a903-53898d9a118a
2025-10-17 23:16:06 +02:00 · 2007-06-21 14:44:26 +00:00
parent e55551ecdd
commit bf0d659c47
13 changed files with 179 additions and 32 deletions
--- a/library/HTMLPurifier/Lexer/DOMLex.php
+++ b/library/HTMLPurifier/Lexer/DOMLex.php
@@ -83,10 +83,13 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
        // intercept non element nodes. WE MUST catch all of them,
        // but we're not getting the character reference nodes because
        // those should have been preprocessed
-        if ($node->nodeType === XML_TEXT_NODE ||
-                  $node->nodeType === XML_CDATA_SECTION_NODE) {
+        if ($node->nodeType === XML_TEXT_NODE) {
            $tokens[] = $this->factory->createText($node->data);
            return;
+        } elseif ($node->nodeType === XML_CDATA_SECTION_NODE) {
+            // undo DOM's special treatment of <script> tags
+            $tokens[] = $this->factory->createText($this->parseData($node->data));
+            return;
        } elseif ($node->nodeType === XML_COMMENT_NODE) {
            $tokens[] = $this->factory->createComment($node->data);
            return;
--- a/library/HTMLPurifier/Lexer/DirectLex.php
+++ b/library/HTMLPurifier/Lexer/DirectLex.php
@@ -126,22 +126,34 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
                
                // Check if it's a comment
                if (
-                    substr($segment, 0, 3) == '!--' &&
-                    substr($segment, $strlen_segment-2, 2) == '--'
+                    substr($segment, 0, 3) == '!--'
                ) {
+                    // re-determine segment length, looking for -->
+                    $position_comment_end = strpos($html, '-->', $cursor);
+                    if ($position_comment_end === false) {
+                        // uh oh, we have a comment that extends to
+                        // infinity. Can't be helped: set comment
+                        // end position to end of string
+                        $position_comment_end = strlen($html);
+                        $end = true;
+                    } else {
+                        $end = false;
+                    }
+                    $strlen_segment = $position_comment_end - $cursor;
+                    $segment = substr($html, $cursor, $strlen_segment);
                    $token = new
                        HTMLPurifier_Token_Comment(
                            substr(
-                                $segment, 3, $strlen_segment - 5
+                                $segment, 3, $strlen_segment - 3
                            )
                        );
                    if ($maintain_line_numbers) {
                        $token->line = $current_line;
-                        $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
+                        $current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
                    }
                    $array[] = $token;
+                    $cursor = $end ? $position_comment_end : $position_comment_end + 3;
                    $inside_tag = false;
-                    $cursor = $position_next_gt + 1;
                    continue;
                }