1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-04 13:18:00 +02:00

[3.1.0] Fixed fatal error in PH5P lexer with invalid tag names

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1650 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2008-04-05 04:28:37 +00:00
parent c216968087
commit 9f1e678b48
6 changed files with 93 additions and 24 deletions

View File

@@ -63,16 +63,10 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
$e =& $context->get('ErrorCollector');
}
// infinite loop protection
// has to be pretty big, since html docs can be big
// we're allow two hundred thousand tags... more than enough?
// NOTE: this is also used for synchronization, so watch out
// for testing synchronization
$loops = 0;
while(true) {
// infinite loop protection
if (++$loops > 200000) return array();
while(++$loops) {
// recalculate lines
if (
@@ -381,16 +375,8 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
// space, so let's guarantee that there's always a terminating space.
$string .= ' ';
// infinite loop protection
$loops = 0;
while(true) {
// infinite loop protection
if (++$loops > 1000) {
trigger_error('Infinite loop detected in attribute parsing', E_USER_WARNING);
return array();
}
if ($cursor >= $size) {
break;
}

View File

@@ -115,7 +115,7 @@ class HTML5 {
public function __construct($data) {
$data = str_replace("\r\n", "\n", $data);
$date = str_replace("\r", null, $data);
$data = str_replace("\r", null, $data);
$this->data = $data;
$this->char = -1;
@@ -2143,7 +2143,7 @@ class HTML5TreeConstructer {
/* Reconstruct the active formatting elements, if any. */
$this->reconstructActiveFormattingElements();
$this->insertElement($token);
$this->insertElement($token, true, true);
break;
}
break;
@@ -3524,7 +3524,18 @@ class HTML5TreeConstructer {
}
}
private function insertElement($token, $append = true) {
private function insertElement($token, $append = true, $check = false) {
// Proprietary workaround for libxml2's limitations with tag names
if ($check) {
// Slightly modified HTML5 tag-name modification,
// removing anything that's not an ASCII letter, digit, or hyphen
$token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']);
// Remove leading hyphens and numbers
$token['name'] = ltrim($token['name'], '-0..9');
// In theory, this should ever be needed, but just in case
if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice
}
$el = $this->dom->createElement($token['name']);
foreach($token['attr'] as $attr) {