mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-03 20:58:11 +02:00
fix: catastrophic backtracking in Core.AggressivelyFixLt (#440)
This commit is contained in:
@@ -52,14 +52,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||
// attempt to armor stray angled brackets that cannot possibly
|
||||
// form tags and thus are probably being used as emoticons
|
||||
if ($config->get('Core.AggressivelyFixLt')) {
|
||||
$char = '[^a-z!\/]';
|
||||
$comment = "/<!--(.*?)(-->|\z)/is";
|
||||
$html = preg_replace_callback($comment, array($this, 'callbackArmorCommentEntities'), $html);
|
||||
do {
|
||||
$old = $html;
|
||||
$html = preg_replace("/<($char)/i", '<\\1', $html);
|
||||
} while ($html !== $old);
|
||||
$html = preg_replace_callback($comment, array($this, 'callbackUndoCommentSubst'), $html); // fix comments
|
||||
$html = $this->aggressivelyFixLt($html);
|
||||
}
|
||||
|
||||
// preprocess html, essential for UTF-8
|
||||
@@ -288,7 +281,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||
*/
|
||||
public function callbackUndoCommentSubst($matches)
|
||||
{
|
||||
return '<!--' . strtr($matches[1], array('&' => '&', '<' => '<')) . $matches[2];
|
||||
return '<!--' . $this->undoCommentSubstr($matches[1]) . $matches[2];
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -299,7 +292,25 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||
*/
|
||||
public function callbackArmorCommentEntities($matches)
|
||||
{
|
||||
return '<!--' . str_replace('&', '&', $matches[1]) . $matches[2];
|
||||
return '<!--' . $this->armorEntities($matches[1]) . $matches[2];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $string
|
||||
* @return string
|
||||
*/
|
||||
protected function armorEntities($string)
|
||||
{
|
||||
return str_replace('&', '&', $string);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $string
|
||||
* @return string
|
||||
*/
|
||||
protected function undoCommentSubstr($string)
|
||||
{
|
||||
return strtr($string, array('&' => '&', '<' => '<'));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -335,6 +346,66 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||
$ret .= '</body></html>';
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $html
|
||||
* @return string
|
||||
*/
|
||||
protected function aggressivelyFixLt($html)
|
||||
{
|
||||
$char = '[^a-z!\/]';
|
||||
$html = $this->manipulateHtmlComments($html, array($this, 'armorEntities'));
|
||||
|
||||
do {
|
||||
$old = $html;
|
||||
$html = preg_replace("/<($char)/i", '<\\1', $html);
|
||||
} while ($html !== $old);
|
||||
|
||||
return $this->manipulateHtmlComments($html, array($this, 'undoCommentSubstr'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Modify HTML comments in the given HTML content using a callback.
|
||||
*
|
||||
* @param string $html
|
||||
* @param callable $callback
|
||||
* @return string
|
||||
*/
|
||||
protected function manipulateHtmlComments($html, callable $callback)
|
||||
{
|
||||
$offset = 0;
|
||||
$startTag = '<!--';
|
||||
$endTag = '-->';
|
||||
|
||||
while (($startPos = strpos($html, $startTag, $offset)) !== false) {
|
||||
$startPos += strlen($startTag); // Move past `<!--`
|
||||
$endPos = strpos($html, $endTag, $startPos);
|
||||
|
||||
if ($endPos === false) {
|
||||
// No matching ending comment tag found
|
||||
break;
|
||||
}
|
||||
|
||||
// Extract the original comment content
|
||||
$commentContent = substr($html, $startPos, $endPos - $startPos);
|
||||
|
||||
// Apply the callback to the comment content
|
||||
$newCommentContent = $callback($commentContent);
|
||||
|
||||
// Reconstruct the entire comment with the new content
|
||||
$newComment = $startTag . $newCommentContent . $endTag;
|
||||
|
||||
// Replace the old comment in the HTML content with the new one
|
||||
$html = substr($html, 0, $startPos - strlen($startTag)) .
|
||||
$newComment .
|
||||
substr($html, $endPos + strlen($endTag));
|
||||
|
||||
// Move offset to the end of the new comment for the next iteration
|
||||
$offset = strpos($html, $newComment, $offset) + strlen($newComment);
|
||||
}
|
||||
|
||||
return $html;
|
||||
}
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
Reference in New Issue
Block a user