1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-05 13:47:24 +02:00

Add %Core.DisableExcludes directive

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
This commit is contained in:
Edward Z. Yang
2013-02-17 15:47:38 -08:00
parent 344e0640b6
commit 631021733b
6 changed files with 62 additions and 16 deletions

View File

@@ -0,0 +1,14 @@
Core.DisableExcludes
TYPE: bool
DEFAULT: false
VERSION: 4.5.0
--DESCRIPTION--
<p>
This directive disables SGML-style exclusions, e.g. the exclusion of
<code>&lt;object&gt;</code> in any descendant of a
<code>&lt;pre&gt;</code> tag. Disabling excludes will allow some
invalid documents to pass through HTML Purifier, but HTML Purifier
will also be less likely to accidentally remove large documents during
processing.
</p>
--# vim: et sw=4 sts=4

View File

@@ -26,6 +26,22 @@
* translated into text depends on the child definitions.
*
* @todo Enable nodes to be bubbled out of the structure.
*
* @warning This algorithm (though it may be hard to see) proceeds from
* a top-down fashion. Thus, parents are processed before
* children. This is easy to implement and has a nice effiency
* benefit, in that if a node is removed, we never waste any
* time processing it, but it also means that if a child
* changes in a non-encapsulated way (e.g. it is removed), we
* need to go back and reprocess the parent to see if those
* changes resulted in problems for the parent. See
* [BACKTRACK] for an example of this. In the current
* implementation, this backtracking can only be triggered when
* a node is removed and if that node was the sole node, the
* parent would need to be removed. As such, it is easy to see
* that backtracking only incurs constant overhead. If more
* sophisticated backtracking is implemented, care must be
* taken to avoid nontermination or exponential blowup.
*/
class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
@@ -38,6 +54,8 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// get a copy of the HTML definition
$definition = $config->getHTMLDefinition();
$excludes_enabled = !$config->get('Core.DisableExcludes');
// insert implicit "parent" node, will be removed at end.
// DEFINITION CALL
$parent_name = $definition->info_parent;
@@ -147,7 +165,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// parent exclusions. The array should not be very large, two
// elements at most.
$excluded = false;
if (!empty($exclude_stack)) {
if (!empty($exclude_stack) && $excludes_enabled) {
foreach ($exclude_stack as $lookup) {
if (isset($lookup[$tokens[$i]->name])) {
$excluded = true;
@@ -235,7 +253,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// our current implementation claims that that case would
// not allow empty, even if it did
if (!$parent_def->child->allow_empty) {
// we need to do a double-check
// we need to do a double-check [BACKTRACK]
$i = $parent_index;
array_pop($stack);
}