From 6c9c8f238060996dae260a5d62d96e08da588832 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Wed, 20 Feb 2008 00:15:44 +0000 Subject: [PATCH] [3.1.0] [BACKPORT] Fix bug with comments in styles, and some associated issues - Restore printTokens() git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1570 48356398-32a2-884e-a903-53898d9a118a --- NEWS | 6 ++++ .../Filter/ExtractStyleBlocks.php | 9 +++++ library/HTMLPurifier/Generator.php | 2 ++ library/HTMLPurifier/Lexer.php | 1 - library/HTMLPurifier/Lexer/DOMLex.php | 2 +- library/HTMLPurifier/Lexer/DirectLex.php | 36 +++++-------------- smoketests/common.php | 18 ++++++++++ tests/Debugger.php | 12 ------- .../Filter/ExtractStyleBlocksTest.php | 13 +++++++ tests/HTMLPurifier/LexerTest.php | 23 ++++++++++++ tests/common.php | 18 +++++++++- 11 files changed, 98 insertions(+), 42 deletions(-) diff --git a/NEWS b/NEWS index 1f45b4a7..993405ab 100644 --- a/NEWS +++ b/NEWS @@ -32,6 +32,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier $schema which defines what HTMLPurifier_ConfigSchema to use besides the global default. - Fix bug with trusted script handling in libxml versions later than 2.6.28. +- Fix bug in ExtractStyleBlocks with comments in style tags +- Fix bug in comment parsing for DirectLex . Plugins now get their own changelogs according to project conventions. . Convert tokens to use instanceof, reducing memory footprint and improving comparison speed. @@ -53,6 +55,10 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier . Debugger class is deprecated and will be removed soon. . Command line argument parsing for testing scripts revamped, now --opt value format is supported. +. Smoketests now cleanup after magic quotes +. Generator now can output comments (however, comments are still stripped + from HTML Purifier output) +. substr_count PHP4 compatibility cludge removed 3.0.0, released 2008-01-06 # HTML Purifier is PHP 5 only! The 2.1.x branch will be maintained diff --git a/library/HTMLPurifier/Filter/ExtractStyleBlocks.php b/library/HTMLPurifier/Filter/ExtractStyleBlocks.php index ae799d66..9938234c 100644 --- a/library/HTMLPurifier/Filter/ExtractStyleBlocks.php +++ b/library/HTMLPurifier/Filter/ExtractStyleBlocks.php @@ -72,6 +72,15 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter } else { $scopes = array(); } + // remove comments from CSS + $css = trim($css); + if (strncmp('') { + $css = substr($css, 0, -3); + } + $css = trim($css); $this->_tidy->parse($css); $css_definition = $config->getDefinition('CSS'); foreach ($this->_tidy->css as $k => $decls) { diff --git a/library/HTMLPurifier/Generator.php b/library/HTMLPurifier/Generator.php index 77fbb412..5837c19e 100644 --- a/library/HTMLPurifier/Generator.php +++ b/library/HTMLPurifier/Generator.php @@ -112,6 +112,8 @@ class HTMLPurifier_Generator } elseif ($token instanceof HTMLPurifier_Token_Text) { return $this->escape($token->data); + } elseif ($token instanceof HTMLPurifier_Token_Comment) { + return ''; } else { return ''; diff --git a/library/HTMLPurifier/Lexer.php b/library/HTMLPurifier/Lexer.php index ed91fe34..220ff149 100644 --- a/library/HTMLPurifier/Lexer.php +++ b/library/HTMLPurifier/Lexer.php @@ -112,7 +112,6 @@ class HTMLPurifier_Lexer case 'DirectLex': return new HTMLPurifier_Lexer_DirectLex(); case 'PH5P': - // experimental Lexer that must be manually included return new HTMLPurifier_Lexer_PH5P(); default: trigger_error("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer), E_USER_ERROR); diff --git a/library/HTMLPurifier/Lexer/DOMLex.php b/library/HTMLPurifier/Lexer/DOMLex.php index ed4101c8..dc0adff2 100644 --- a/library/HTMLPurifier/Lexer/DOMLex.php +++ b/library/HTMLPurifier/Lexer/DOMLex.php @@ -91,7 +91,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer $last = end($tokens); $data = $node->data; // (note $node->tagname is already normalized) - if ($last instanceof HTMLPurifier_Token_Start && $last->name == 'script') { + if ($last instanceof HTMLPurifier_Token_Start && ($last->name == 'script' || $last->name == 'style')) { $new_data = trim($data); if (substr($new_data, 0, 4) === ' $position_comment_end = strpos($html, '-->', $cursor); @@ -168,13 +168,11 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $segment = substr($html, $cursor, $strlen_segment); $token = new HTMLPurifier_Token_Comment( - substr( - $segment, 3, $strlen_segment - 3 - ) + substr($segment, 3) ); if ($maintain_line_numbers) { $token->line = $current_line; - $current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment); + $current_line += substr_count($html, $nl, $cursor, $strlen_segment); } $array[] = $token; $cursor = $end ? $position_comment_end : $position_comment_end + 3; @@ -189,7 +187,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer $token = new HTMLPurifier_Token_End($type); if ($maintain_line_numbers) { $token->line = $current_line; - $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); + $current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor); } $array[] = $token; $inside_tag = false; @@ -213,7 +211,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer ); if ($maintain_line_numbers) { $token->line = $current_line; - $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); + $current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor); } $array[] = $token; $cursor = $position_next_gt + 1; @@ -242,7 +240,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer } if ($maintain_line_numbers) { $token->line = $current_line; - $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); + $current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor); } $array[] = $token; $inside_tag = false; @@ -274,7 +272,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer } if ($maintain_line_numbers) { $token->line = $current_line; - $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); + $current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor); } $array[] = $token; $cursor = $position_next_gt + 1; @@ -302,22 +300,6 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer return $array; } - /** - * PHP 4 compatible substr_count that implements offset and length - */ - protected function substrCount($haystack, $needle, $offset, $length) { - static $oldVersion; - if ($oldVersion === null) { - $oldVersion = version_compare(PHP_VERSION, '5.1', '<'); - } - if ($oldVersion) { - $haystack = substr($haystack, $offset, $length); - return substr_count($haystack, $needle); - } else { - return substr_count($haystack, $needle, $offset, $length); - } - } - /** * Takes the inside of an HTML tag and makes an assoc array of attributes. * diff --git a/smoketests/common.php b/smoketests/common.php index f6576093..d01081f8 100644 --- a/smoketests/common.php +++ b/smoketests/common.php @@ -15,3 +15,21 @@ function escapeHTML($string) { return $string; } +if (function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc()) { + function fix_magic_quotes(&$array) { + foreach ($array as $k => $val) { + if (!is_array($val)) { + $array[$k] = stripslashes($val); + } else { + fix_magic_quotes($array[$k]); + } + } + } + + fix_magic_quotes($_GET); + fix_magic_quotes($_POST); + fix_magic_quotes($_COOKIE); + fix_magic_quotes($_REQUEST); + fix_magic_quotes($_ENV); + fix_magic_quotes($_SERVER); +} diff --git a/tests/Debugger.php b/tests/Debugger.php index 6bc44554..91e21617 100644 --- a/tests/Debugger.php +++ b/tests/Debugger.php @@ -55,18 +55,6 @@ function isInScopes($array = array()) { } /**#@-*/ -function printTokens($tokens, $index = null) { - $string = '
';
-    $generator = new HTMLPurifier_Generator();
-    foreach ($tokens as $i => $token) {
-        if ($index === $i) $string .= '[';
-        $string .= "$i";
-        $string .= $generator->escape($generator->generateFromToken($token));
-        if ($index === $i) $string .= ']';
-    }
-    $string .= '
'; - echo $string; -} /** * The debugging singleton. Most interesting stuff happens here. diff --git a/tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php b/tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php index f1c3dafb..571ab18b 100644 --- a/tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php +++ b/tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php @@ -168,6 +168,19 @@ text-align:right; p p div { text-align:left; +}" + ); + } + + function test_removeComments() { + $this->assertCleanCSS( +"", +"div { +text-align:right; }" ); } diff --git a/tests/HTMLPurifier/LexerTest.php b/tests/HTMLPurifier/LexerTest.php index c1f05e57..257d0584 100644 --- a/tests/HTMLPurifier/LexerTest.php +++ b/tests/HTMLPurifier/LexerTest.php @@ -509,6 +509,29 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness ); } + function test_tokenizeHTML_() { + $this->assertTokenization( +'', + array( + new HTMLPurifier_Token_Start('style', array('type' => 'text/css')), + new HTMLPurifier_Token_Text("\ndiv {}\n"), + new HTMLPurifier_Token_End('style'), + ), + array( + // PH5P doesn't seem to like style tags + 'PH5P' => false, + // DirectLex defers to RemoveForeignElements for textification + 'DirectLex' => array( + new HTMLPurifier_Token_Start('style', array('type' => 'text/css')), + new HTMLPurifier_Token_Comment("\ndiv {}\n"), + new HTMLPurifier_Token_End('style'), + ), + ) + ); + } + /* function test_tokenizeHTML_() { diff --git a/tests/common.php b/tests/common.php index b8cecde0..31820d19 100644 --- a/tests/common.php +++ b/tests/common.php @@ -159,4 +159,20 @@ function htmlpurifier_add_test($test, $test_file, $only_phpt = false) { default: trigger_error("$test_file is an invalid file for testing", E_USER_ERROR); } -} \ No newline at end of file +} + +/** + * Debugging function that prints tokens in a user-friendly manner. + */ +function printTokens($tokens, $index = null) { + $string = '
';
+    $generator = new HTMLPurifier_Generator();
+    foreach ($tokens as $i => $token) {
+        if ($index === $i) $string .= '[';
+        $string .= "$i";
+        $string .= $generator->escape($generator->generateFromToken($token));
+        if ($index === $i) $string .= ']';
+    }
+    $string .= '
'; + echo $string; +}