53
diff --git a/library/HTMLPurifier/ConfigSchema/schema.ser b/library/HTMLPurifier/ConfigSchema/schema.ser
index 32a02c4c..fa0bacb9 100644
Binary files a/library/HTMLPurifier/ConfigSchema/schema.ser and b/library/HTMLPurifier/ConfigSchema/schema.ser differ
diff --git a/library/HTMLPurifier/ConfigSchema/schema/Core.DisableExcludes.txt b/library/HTMLPurifier/ConfigSchema/schema/Core.DisableExcludes.txt
new file mode 100644
index 00000000..1cd4c2c9
--- /dev/null
+++ b/library/HTMLPurifier/ConfigSchema/schema/Core.DisableExcludes.txt
@@ -0,0 +1,14 @@
+Core.DisableExcludes
+TYPE: bool
+DEFAULT: false
+VERSION: 4.5.0
+--DESCRIPTION--
+
+ This directive disables SGML-style exclusions, e.g. the exclusion of
+ <object>
in any descendant of a
+ <pre>
tag. Disabling excludes will allow some
+ invalid documents to pass through HTML Purifier, but HTML Purifier
+ will also be less likely to accidentally remove large documents during
+ processing.
+
+--# vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Strategy/FixNesting.php b/library/HTMLPurifier/Strategy/FixNesting.php
index f8180239..d1588b93 100644
--- a/library/HTMLPurifier/Strategy/FixNesting.php
+++ b/library/HTMLPurifier/Strategy/FixNesting.php
@@ -26,6 +26,22 @@
* translated into text depends on the child definitions.
*
* @todo Enable nodes to be bubbled out of the structure.
+ *
+ * @warning This algorithm (though it may be hard to see) proceeds from
+ * a top-down fashion. Thus, parents are processed before
+ * children. This is easy to implement and has a nice effiency
+ * benefit, in that if a node is removed, we never waste any
+ * time processing it, but it also means that if a child
+ * changes in a non-encapsulated way (e.g. it is removed), we
+ * need to go back and reprocess the parent to see if those
+ * changes resulted in problems for the parent. See
+ * [BACKTRACK] for an example of this. In the current
+ * implementation, this backtracking can only be triggered when
+ * a node is removed and if that node was the sole node, the
+ * parent would need to be removed. As such, it is easy to see
+ * that backtracking only incurs constant overhead. If more
+ * sophisticated backtracking is implemented, care must be
+ * taken to avoid nontermination or exponential blowup.
*/
class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
@@ -38,6 +54,8 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// get a copy of the HTML definition
$definition = $config->getHTMLDefinition();
+ $excludes_enabled = !$config->get('Core.DisableExcludes');
+
// insert implicit "parent" node, will be removed at end.
// DEFINITION CALL
$parent_name = $definition->info_parent;
@@ -147,7 +165,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// parent exclusions. The array should not be very large, two
// elements at most.
$excluded = false;
- if (!empty($exclude_stack)) {
+ if (!empty($exclude_stack) && $excludes_enabled) {
foreach ($exclude_stack as $lookup) {
if (isset($lookup[$tokens[$i]->name])) {
$excluded = true;
@@ -235,7 +253,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// our current implementation claims that that case would
// not allow empty, even if it did
if (!$parent_def->child->allow_empty) {
- // we need to do a double-check
+ // we need to do a double-check [BACKTRACK]
$i = $parent_index;
array_pop($stack);
}
diff --git a/tests/HTMLPurifier/Strategy/FixNestingTest.php b/tests/HTMLPurifier/Strategy/FixNestingTest.php
index 9394352e..965ae2a8 100644
--- a/tests/HTMLPurifier/Strategy/FixNestingTest.php
+++ b/tests/HTMLPurifier/Strategy/FixNestingTest.php
@@ -139,6 +139,11 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness
$this->assertResult('text
', 'text
');
}
+ function testDisabledExcludes() {
+ $this->config->set('Core.DisableExcludes', true);
+ $this->assertResult('');
+ }
+
}
// vim: et sw=4 sts=4