diff --git a/NEWS b/NEWS
index 90a05462..e6e0b9b0 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,10 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. Internal change
==========================
+4.7.0, unknown release date
+- Don't truncate upon encountering when using DOMLex. Thanks
+ Myrto Christina for finally convincing me to fix this.
+
4.6.0, released 2013-11-30
# Secure URI munge hashing algorithm has changed to hash_hmac("sha256", $url, $secret).
Please update any verification scripts you may have.
diff --git a/library/HTMLPurifier/Lexer/DOMLex.php b/library/HTMLPurifier/Lexer/DOMLex.php
index 72075445..b8181929 100644
--- a/library/HTMLPurifier/Lexer/DOMLex.php
+++ b/library/HTMLPurifier/Lexer/DOMLex.php
@@ -75,8 +75,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
$tokens = array();
$this->tokenizeDOM(
$doc->getElementsByTagName('html')->item(0)-> //
- getElementsByTagName('body')->item(0)-> //
- getElementsByTagName('div')->item(0), //
+ getElementsByTagName('body')->item(0), //
$tokens
);
return $tokens;
@@ -272,7 +271,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
$ret .= '';
$ret .= '';
// No protection if $html contains a stray
!
- $ret .= '' . $html . '
';
+ $ret .= '' . $html . '';
return $ret;
}
}
diff --git a/library/HTMLPurifier/Lexer/PH5P.php b/library/HTMLPurifier/Lexer/PH5P.php
index a4587e4c..ff4fa218 100644
--- a/library/HTMLPurifier/Lexer/PH5P.php
+++ b/library/HTMLPurifier/Lexer/PH5P.php
@@ -34,8 +34,7 @@ class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex
$tokens = array();
$this->tokenizeDOM(
$doc->getElementsByTagName('html')->item(0)-> //
- getElementsByTagName('body')->item(0)-> //
- getElementsByTagName('div')->item(0) //
+ getElementsByTagName('body')->item(0) //
,
$tokens
);
diff --git a/tests/HTMLPurifier/LexerTest.php b/tests/HTMLPurifier/LexerTest.php
index a2438a31..ecdbe1b8 100644
--- a/tests/HTMLPurifier/LexerTest.php
+++ b/tests/HTMLPurifier/LexerTest.php
@@ -264,7 +264,8 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
new HTMLPurifier_Token_End('poolasdf'),
new HTMLPurifier_Token_End('pooloka'),
),
- 'PH5P' => $alt,
+ // 20140831: Weird, but whatever...
+ 'PH5P' => array(new HTMLPurifier_Token_Empty('asdf')),
)
);
}
@@ -800,6 +801,21 @@ div {}
);
}
+ public function test_tokenizeHTML_prematureDivClose()
+ {
+ $this->assertTokenization(
+ '
dontdie',
+ array(
+ new HTMLPurifier_Token_End('div'),
+ new HTMLPurifier_Token_Text('dontdie')
+ ),
+ array(
+ 'DOMLex' => $alt = array(new HTMLPurifier_Token_Text('dontdie')),
+ 'PH5P' => $alt
+ )
+ );
+ }
+
/*