mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-02 20:27:40 +02:00
Implement %HTML.AllowedComments and %HTML.AllowedCommentsRegexp
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
This commit is contained in:
2
NEWS
2
NEWS
@@ -18,6 +18,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
! Added %HTML.TargetBlank, which adds target="blank" to all outgoing links.
|
! Added %HTML.TargetBlank, which adds target="blank" to all outgoing links.
|
||||||
! Properly handle sub-lists directly nested inside of lists in
|
! Properly handle sub-lists directly nested inside of lists in
|
||||||
a standards compliant way, by moving them into the preceding <li>
|
a standards compliant way, by moving them into the preceding <li>
|
||||||
|
! Added %HTML.AllowedComments and %HTML.AllowedCommentsRegexp for
|
||||||
|
limited allowed comments in untrusted situations.
|
||||||
- Color keywords are now case insensitive. Thanks Yzmir Ramirez
|
- Color keywords are now case insensitive. Thanks Yzmir Ramirez
|
||||||
<yramirez-htmlpurifier@adicio.com> for reporting.
|
<yramirez-htmlpurifier@adicio.com> for reporting.
|
||||||
- Explicitly initialize anonModule variable to null.
|
- Explicitly initialize anonModule variable to null.
|
||||||
|
@@ -14,7 +14,7 @@
|
|||||||
<line>348</line>
|
<line>348</line>
|
||||||
</file>
|
</file>
|
||||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||||
<line>47</line>
|
<line>50</line>
|
||||||
</file>
|
</file>
|
||||||
</directive>
|
</directive>
|
||||||
<directive id="CSS.MaxImgLength">
|
<directive id="CSS.MaxImgLength">
|
||||||
@@ -478,14 +478,24 @@
|
|||||||
<line>19</line>
|
<line>19</line>
|
||||||
</file>
|
</file>
|
||||||
</directive>
|
</directive>
|
||||||
<directive id="Core.RemoveScriptContents">
|
<directive id="HTML.AllowedComments">
|
||||||
|
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||||
|
<line>24</line>
|
||||||
|
</file>
|
||||||
|
</directive>
|
||||||
|
<directive id="HTML.AllowedCommentsRegexp">
|
||||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||||
<line>25</line>
|
<line>25</line>
|
||||||
</file>
|
</file>
|
||||||
</directive>
|
</directive>
|
||||||
|
<directive id="Core.RemoveScriptContents">
|
||||||
|
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||||
|
<line>28</line>
|
||||||
|
</file>
|
||||||
|
</directive>
|
||||||
<directive id="Core.HiddenElements">
|
<directive id="Core.HiddenElements">
|
||||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||||
<line>26</line>
|
<line>29</line>
|
||||||
</file>
|
</file>
|
||||||
</directive>
|
</directive>
|
||||||
<directive id="URI.HostBlacklist">
|
<directive id="URI.HostBlacklist">
|
||||||
|
Binary file not shown.
@@ -0,0 +1,10 @@
|
|||||||
|
HTML.AllowedComments
|
||||||
|
TYPE: lookup
|
||||||
|
VERSION: 4.3.1
|
||||||
|
DEFAULT: array()
|
||||||
|
--DESCRIPTION--
|
||||||
|
A whitelist which indicates what explicit comment bodies should be
|
||||||
|
allowed, modulo leading and trailing whitespace. See also %HTML.AllowedCommentsRegexp
|
||||||
|
(these directives are union'ed together, so a comment is considered
|
||||||
|
valid if any directive deems it valid.)
|
||||||
|
--# vim: et sw=4 sts=4
|
@@ -0,0 +1,15 @@
|
|||||||
|
HTML.AllowedCommentsRegexp
|
||||||
|
TYPE: string/null
|
||||||
|
VERSION: 4.3.1
|
||||||
|
DEFAULT: NULL
|
||||||
|
--DESCRIPTION--
|
||||||
|
A regexp, which if it matches the body of a comment, indicates that
|
||||||
|
it should be allowed. Trailing and leading spaces are removed prior
|
||||||
|
to running this regular expression.
|
||||||
|
<strong>Warning:</strong> Make sure you specify
|
||||||
|
correct anchor metacharacters <code>^regex$</code>, otherwise you may accept
|
||||||
|
comments that you did not mean to! In particular, the regex <code>/foo|bar/</code>
|
||||||
|
is probably not sufficiently strict, since it also allows <code>foobar</code>.
|
||||||
|
See also %HTML.AllowedComments (these directives are union'ed together,
|
||||||
|
so a comment is considered valid if any directive deems it valid.)
|
||||||
|
--# vim: et sw=4 sts=4
|
@@ -21,6 +21,9 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
|||||||
|
|
||||||
// currently only used to determine if comments should be kept
|
// currently only used to determine if comments should be kept
|
||||||
$trusted = $config->get('HTML.Trusted');
|
$trusted = $config->get('HTML.Trusted');
|
||||||
|
$comment_lookup = $config->get('HTML.AllowedComments');
|
||||||
|
$comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
|
||||||
|
$check_comments = $comment_lookup !== array() || $comment_regexp !== null;
|
||||||
|
|
||||||
$remove_script_contents = $config->get('Core.RemoveScriptContents');
|
$remove_script_contents = $config->get('Core.RemoveScriptContents');
|
||||||
$hidden_elements = $config->get('Core.HiddenElements');
|
$hidden_elements = $config->get('Core.HiddenElements');
|
||||||
@@ -128,23 +131,37 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
|||||||
if ($textify_comments !== false) {
|
if ($textify_comments !== false) {
|
||||||
$data = $token->data;
|
$data = $token->data;
|
||||||
$token = new HTMLPurifier_Token_Text($data);
|
$token = new HTMLPurifier_Token_Text($data);
|
||||||
} elseif ($trusted) {
|
} elseif ($trusted || $check_comments) {
|
||||||
// keep, but perform comment cleaning
|
// always cleanup comments
|
||||||
|
$trailing_hyphen = false;
|
||||||
if ($e) {
|
if ($e) {
|
||||||
// perform check whether or not there's a trailing hyphen
|
// perform check whether or not there's a trailing hyphen
|
||||||
if (substr($token->data, -1) == '-') {
|
if (substr($token->data, -1) == '-') {
|
||||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
|
$trailing_hyphen = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
$token->data = rtrim($token->data, '-');
|
$token->data = rtrim($token->data, '-');
|
||||||
$found_double_hyphen = false;
|
$found_double_hyphen = false;
|
||||||
while (strpos($token->data, '--') !== false) {
|
while (strpos($token->data, '--') !== false) {
|
||||||
if ($e && !$found_double_hyphen) {
|
$found_double_hyphen = true;
|
||||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
|
|
||||||
}
|
|
||||||
$found_double_hyphen = true; // prevent double-erroring
|
|
||||||
$token->data = str_replace('--', '-', $token->data);
|
$token->data = str_replace('--', '-', $token->data);
|
||||||
}
|
}
|
||||||
|
if ($trusted || !empty($comment_lookup[trim($token->data)]) || ($comment_regexp !== NULL && preg_match($comment_regexp, trim($token->data)))) {
|
||||||
|
// OK good
|
||||||
|
if ($e) {
|
||||||
|
if ($trailing_hyphen) {
|
||||||
|
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
|
||||||
|
}
|
||||||
|
if ($found_double_hyphen) {
|
||||||
|
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if ($e) {
|
||||||
|
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// strip comments
|
// strip comments
|
||||||
if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
|
if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
|
||||||
|
@@ -100,6 +100,16 @@ alert(<b>bold</b>);
|
|||||||
$this->assertResult('<!-- bo --- asdf--as -->', '<!-- bo - asdf-as -->');
|
$this->assertResult('<!-- bo --- asdf--as -->', '<!-- bo - asdf-as -->');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function testPreserveCommentsWithLookup() {
|
||||||
|
$this->config->set('HTML.AllowedComments', array('allowed'));
|
||||||
|
$this->assertResult('<!-- allowed --><!-- not allowed -->', '<!-- allowed -->');
|
||||||
|
}
|
||||||
|
|
||||||
|
function testPreserveCommentsWithRegexp() {
|
||||||
|
$this->config->set('HTML.AllowedCommentsRegexp', '/^allowed[1-9]$/');
|
||||||
|
$this->assertResult('<!-- allowed1 --><!-- not allowed -->', '<!-- allowed1 -->');
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
// vim: et sw=4 sts=4
|
||||||
|
@@ -48,14 +48,14 @@ class HTMLPurifier_Strategy_RemoveForeignElements_ErrorsTest extends HTMLPurifie
|
|||||||
function testTrailingHyphenInCommentRemoved() {
|
function testTrailingHyphenInCommentRemoved() {
|
||||||
$this->config->set('HTML.Trusted', true);
|
$this->config->set('HTML.Trusted', true);
|
||||||
$this->expectErrorCollection(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
|
$this->expectErrorCollection(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
|
||||||
$this->expectContext('CurrentToken', new HTMLPurifier_Token_Comment(' test --', 1));
|
$this->expectContext('CurrentToken', new HTMLPurifier_Token_Comment(' test ', 1));
|
||||||
$this->invoke('<!-- test ---->');
|
$this->invoke('<!-- test ---->');
|
||||||
}
|
}
|
||||||
|
|
||||||
function testDoubleHyphenInCommentRemoved() {
|
function testDoubleHyphenInCommentRemoved() {
|
||||||
$this->config->set('HTML.Trusted', true);
|
$this->config->set('HTML.Trusted', true);
|
||||||
$this->expectErrorCollection(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
|
$this->expectErrorCollection(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
|
||||||
$this->expectContext('CurrentToken', new HTMLPurifier_Token_Comment(' test --- test -- test ', 1));
|
$this->expectContext('CurrentToken', new HTMLPurifier_Token_Comment(' test - test - test ', 1));
|
||||||
$this->invoke('<!-- test --- test -- test -->');
|
$this->invoke('<!-- test --- test -- test -->');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user