mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-02 20:27:40 +02:00
Refine Lexers for parsing stray angled brackets; %Core.AggressivelyFixLt = true
By default, the DirectLex and DOMLex behavior with stray angled brackets varied a great deal due to their implementations. A little known directive %Core.AggressivelyFixLt attempted to match DOMLex's behavior with DirectLex's, but it was off by default. By turning it on by default, users now enjoy these benefits, and performance-minded users can turn it back off. Also, several refinements to stray angled bracket parsing was made. Specifically: * DirectLex: Handle each left angled bracket individually, which prevents strange behavior as reported by eon. * DOMLex: Iterate aggressive lt fix, so that stacked brackets like << are handled. Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
This commit is contained in:
File diff suppressed because one or more lines are too long
@@ -1,13 +1,17 @@
|
||||
Core.AggressivelyFixLt
|
||||
TYPE: bool
|
||||
VERSION: 2.1.0
|
||||
DEFAULT: false
|
||||
DEFAULT: true
|
||||
--DESCRIPTION--
|
||||
|
||||
This directive enables aggressive pre-filter fixes HTML Purifier can
|
||||
perform in order to ensure that open angled-brackets do not get killed
|
||||
during parsing stage. Enabling this will result in two preg_replace_callback
|
||||
calls and one preg_replace call for every bit of HTML passed through here.
|
||||
It is not necessary and will have no effect for PHP 4.
|
||||
|
||||
|
||||
<p>
|
||||
This directive enables aggressive pre-filter fixes HTML Purifier can
|
||||
perform in order to ensure that open angled-brackets do not get killed
|
||||
during parsing stage. Enabling this will result in two preg_replace_callback
|
||||
calls and at least two preg_replace calls for every HTML document parsed;
|
||||
if your users make very well-formed HTML, you can set this directive false.
|
||||
This has no effect when DirectLex is used.
|
||||
</p>
|
||||
<p>
|
||||
<strong>Notice:</strong> This directive's default turned from false to true
|
||||
in HTML Purifier 3.1.2.
|
||||
</p>
|
@@ -45,7 +45,10 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||
$char = '[^a-z!\/]';
|
||||
$comment = "/<!--(.*?)(-->|\z)/is";
|
||||
$html = preg_replace_callback($comment, array($this, 'callbackArmorCommentEntities'), $html);
|
||||
$html = preg_replace("/<($char)/i", '<\\1', $html);
|
||||
do {
|
||||
$old = $html;
|
||||
$html = preg_replace("/<($char)/i", '<\\1', $html);
|
||||
} while ($html !== $old);
|
||||
$html = preg_replace_callback($comment, array($this, 'callbackUndoCommentSubst'), $html); // fix comments
|
||||
}
|
||||
|
||||
|
@@ -197,20 +197,12 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
if (!ctype_alpha($segment[0])) {
|
||||
// XML: $segment[0] !== '_' && $segment[0] !== ':'
|
||||
if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt');
|
||||
$token = new
|
||||
HTMLPurifier_Token_Text(
|
||||
'<' .
|
||||
$this->parseData(
|
||||
$segment
|
||||
) .
|
||||
'>'
|
||||
);
|
||||
$token = new HTMLPurifier_Token_Text('<');
|
||||
if ($maintain_line_numbers) {
|
||||
$token->line = $current_line;
|
||||
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
|
||||
}
|
||||
$array[] = $token;
|
||||
$cursor = $position_next_gt + 1;
|
||||
$inside_tag = false;
|
||||
continue;
|
||||
}
|
||||
|
Reference in New Issue
Block a user