diff --git a/NEWS b/NEWS index a3c5c8cf..7aacdc20 100644 --- a/NEWS +++ b/NEWS @@ -36,6 +36,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier - Do not re-munge URL if the output URL has the same host as the input URL. Requested by Chris. - Fix error in documentation regarding %Filter.ExtractStyleBlocks +- Prevent ]]> from triggering %Core.ConvertDocumentToFragment . Strategy_MakeWellFormed now operates in-place, saving memory and allowing for more interesting filter-backtracking . New HTMLPurifier_Injector->rewind() functionality, allows injectors to rewind diff --git a/library/HTMLPurifier/Lexer.php b/library/HTMLPurifier/Lexer.php index 01364f65..8c60ab33 100644 --- a/library/HTMLPurifier/Lexer.php +++ b/library/HTMLPurifier/Lexer.php @@ -226,11 +226,6 @@ class HTMLPurifier_Lexer */ public function normalize($html, $config, $context) { - // extract body from document if applicable - if ($config->get('Core', 'ConvertDocumentToFragment')) { - $html = $this->extractBody($html); - } - // normalize newlines to \n $html = str_replace("\r\n", "\n", $html); $html = str_replace("\r", "\n", $html); @@ -243,6 +238,11 @@ class HTMLPurifier_Lexer // escape CDATA $html = $this->escapeCDATA($html); + // extract body from document if applicable + if ($config->get('Core', 'ConvertDocumentToFragment')) { + $html = $this->extractBody($html); + } + // expand entities that aren't the big five $html = $this->_entity_parser->substituteNonSpecialEntities($html); diff --git a/tests/HTMLPurifier/LexerTest.php b/tests/HTMLPurifier/LexerTest.php index 605ee2f2..349e5b24 100644 --- a/tests/HTMLPurifier/LexerTest.php +++ b/tests/HTMLPurifier/LexerTest.php @@ -634,6 +634,26 @@ div {} ); } + function test_tokenizeHTML_bodyInCDATA() { + $this->assertTokenization( + 'Foo]]>', + array( + new HTMLPurifier_Token_Text('Foo'), + ), + array( + 'PH5P' => array( + new HTMLPurifier_Token_Text('<'), + new HTMLPurifier_Token_Text('body'), + new HTMLPurifier_Token_Text('>'), + new HTMLPurifier_Token_Text('Foo'), + new HTMLPurifier_Token_Text('<'), + new HTMLPurifier_Token_Text('/body'), + new HTMLPurifier_Token_Text('>'), + ), + ) + ); + } + /* function test_tokenizeHTML_() {