mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-04 21:28:06 +02:00
Implement %HTML.AllowedComments and %HTML.AllowedCommentsRegexp
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
This commit is contained in:
Binary file not shown.
@@ -0,0 +1,10 @@
|
||||
HTML.AllowedComments
|
||||
TYPE: lookup
|
||||
VERSION: 4.3.1
|
||||
DEFAULT: array()
|
||||
--DESCRIPTION--
|
||||
A whitelist which indicates what explicit comment bodies should be
|
||||
allowed, modulo leading and trailing whitespace. See also %HTML.AllowedCommentsRegexp
|
||||
(these directives are union'ed together, so a comment is considered
|
||||
valid if any directive deems it valid.)
|
||||
--# vim: et sw=4 sts=4
|
@@ -0,0 +1,15 @@
|
||||
HTML.AllowedCommentsRegexp
|
||||
TYPE: string/null
|
||||
VERSION: 4.3.1
|
||||
DEFAULT: NULL
|
||||
--DESCRIPTION--
|
||||
A regexp, which if it matches the body of a comment, indicates that
|
||||
it should be allowed. Trailing and leading spaces are removed prior
|
||||
to running this regular expression.
|
||||
<strong>Warning:</strong> Make sure you specify
|
||||
correct anchor metacharacters <code>^regex$</code>, otherwise you may accept
|
||||
comments that you did not mean to! In particular, the regex <code>/foo|bar/</code>
|
||||
is probably not sufficiently strict, since it also allows <code>foobar</code>.
|
||||
See also %HTML.AllowedComments (these directives are union'ed together,
|
||||
so a comment is considered valid if any directive deems it valid.)
|
||||
--# vim: et sw=4 sts=4
|
@@ -21,6 +21,9 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
||||
|
||||
// currently only used to determine if comments should be kept
|
||||
$trusted = $config->get('HTML.Trusted');
|
||||
$comment_lookup = $config->get('HTML.AllowedComments');
|
||||
$comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
|
||||
$check_comments = $comment_lookup !== array() || $comment_regexp !== null;
|
||||
|
||||
$remove_script_contents = $config->get('Core.RemoveScriptContents');
|
||||
$hidden_elements = $config->get('Core.HiddenElements');
|
||||
@@ -128,23 +131,37 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
||||
if ($textify_comments !== false) {
|
||||
$data = $token->data;
|
||||
$token = new HTMLPurifier_Token_Text($data);
|
||||
} elseif ($trusted) {
|
||||
// keep, but perform comment cleaning
|
||||
} elseif ($trusted || $check_comments) {
|
||||
// always cleanup comments
|
||||
$trailing_hyphen = false;
|
||||
if ($e) {
|
||||
// perform check whether or not there's a trailing hyphen
|
||||
if (substr($token->data, -1) == '-') {
|
||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
|
||||
$trailing_hyphen = true;
|
||||
}
|
||||
}
|
||||
$token->data = rtrim($token->data, '-');
|
||||
$found_double_hyphen = false;
|
||||
while (strpos($token->data, '--') !== false) {
|
||||
if ($e && !$found_double_hyphen) {
|
||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
|
||||
}
|
||||
$found_double_hyphen = true; // prevent double-erroring
|
||||
$found_double_hyphen = true;
|
||||
$token->data = str_replace('--', '-', $token->data);
|
||||
}
|
||||
if ($trusted || !empty($comment_lookup[trim($token->data)]) || ($comment_regexp !== NULL && preg_match($comment_regexp, trim($token->data)))) {
|
||||
// OK good
|
||||
if ($e) {
|
||||
if ($trailing_hyphen) {
|
||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
|
||||
}
|
||||
if ($found_double_hyphen) {
|
||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if ($e) {
|
||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
|
||||
}
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
// strip comments
|
||||
if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
|
||||
|
Reference in New Issue
Block a user