mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-07-31 03:10:09 +02:00
Rewrite FixNesting implementation to be tree-based.
This mega-patch rips out the FixNesting implementation and the related ChildDef components. The primary algorithmic change is to convert from use of tokens to tree nodes, which are far more amenable to the style of processing that FixNesting uses. Additionally, FixNesting has been changed to go bottom-up rather than top-down, in order to avoid needing to implement backtracking. This patch simplifies a good deal of the relevant logic, since we no longer need to continually recalculate the nesting structure when processing things. However, the conversion to the alternate format incurs some overhead, so for small inputs these changes are not a win. One possibility to greatly reduce the constant factors here is to switch to entirely using libxml's representation, and never serializing tokens; this would require one to rewrite injectors, however. The iterative post-order traversal in FixNesting is a bit subtle, but we have essentially reified the stack and continuations. We've removed support for %Core.EscapeInvalidChildren. Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
This commit is contained in:
@@ -3,6 +3,11 @@
|
||||
class HTMLPurifier_ChildDef_CustomTest extends HTMLPurifier_ChildDefHarness
|
||||
{
|
||||
|
||||
public function setUp()
|
||||
{
|
||||
parent::setUp();
|
||||
}
|
||||
|
||||
public function test()
|
||||
{
|
||||
$this->obj = new HTMLPurifier_ChildDef_Custom('(a,b?,c*,d+,(a,b)*)');
|
||||
|
@@ -24,12 +24,6 @@ class HTMLPurifier_ChildDef_ListTest extends HTMLPurifier_ChildDefHarness
|
||||
$this->assertResult('<li>asdf</li><li />');
|
||||
}
|
||||
|
||||
public function testIllegal()
|
||||
{
|
||||
// XXX actually this never gets triggered in practice
|
||||
$this->assertResult('<li /><b />', '<li /><li><b /></li>');
|
||||
}
|
||||
|
||||
public function testOlAtBeginning()
|
||||
{
|
||||
$this->assertResult('<ol />', '<li><ol /></li>');
|
||||
|
@@ -68,15 +68,10 @@ class HTMLPurifier_ChildDef_RequiredTest extends HTMLPurifier_ChildDefHarness
|
||||
$this->obj = new HTMLPurifier_ChildDef_Required('#PCDATA | b');
|
||||
$this->assertResult('Out <b>Bold text</b><img />', 'Out <b>Bold text</b>');
|
||||
}
|
||||
|
||||
public function testPCDATAAllowedWithEscaping()
|
||||
public function testPCDATAAllowedJump()
|
||||
{
|
||||
$this->obj = new HTMLPurifier_ChildDef_Required('#PCDATA | b');
|
||||
$this->config->set('Core.EscapeInvalidChildren', true);
|
||||
$this->assertResult(
|
||||
'Out <b>Bold text</b><img />',
|
||||
'Out <b>Bold text</b><img />'
|
||||
);
|
||||
$this->assertResult('A <i>foo</i>', 'A foo');
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -76,7 +76,7 @@ class HTMLPurifier_ChildDef_TableTest extends HTMLPurifier_ChildDefHarness
|
||||
$this->config->set('Output.Newline', "\n");
|
||||
$this->assertResult(
|
||||
"\n\t<tbody />\n\t\t<tfoot />\n\t\t\t",
|
||||
"\n\t\t<tfoot />\n\t<tbody />\n\t\t\t"
|
||||
"\n\t<tfoot />\n\t\t\t<tbody />\n\t\t"
|
||||
);
|
||||
|
||||
}
|
||||
|
@@ -8,8 +8,8 @@ class HTMLPurifier_ChildDefHarness extends HTMLPurifier_ComplexHarness
|
||||
parent::setUp();
|
||||
$this->obj = null;
|
||||
$this->func = 'validateChildren';
|
||||
$this->to_tokens = true;
|
||||
$this->to_html = true;
|
||||
$this->to_node_list = true;
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -28,6 +28,14 @@ class HTMLPurifier_ComplexHarness extends HTMLPurifier_Harness
|
||||
*/
|
||||
protected $to_tokens = false;
|
||||
|
||||
/**
|
||||
* Whether or not the method deals in a node list.
|
||||
* If set to true, assertResult() will transparently convert HTML
|
||||
* to and back from node.
|
||||
* @type bool
|
||||
*/
|
||||
protected $to_node_list = false;
|
||||
|
||||
/**
|
||||
* Whether or not to convert tokens back into HTML before performing
|
||||
* equality check, has no effect on bools.
|
||||
@@ -54,9 +62,12 @@ class HTMLPurifier_ComplexHarness extends HTMLPurifier_Harness
|
||||
*/
|
||||
protected function assertResult($input, $expect = true)
|
||||
{
|
||||
if ($this->to_tokens && is_string($input)) {
|
||||
// $func may cause $input to change, so "clone" another copy
|
||||
// to sacrifice
|
||||
// $func may cause $input to change, so "clone" another copy
|
||||
// to sacrifice
|
||||
if ($this->to_node_list && is_string($input)) {
|
||||
$input = HTMLPurifier_Arborize::arborize($this->tokenize($temp = $input), $this->config, $this->context)->children;
|
||||
$input_c = HTMLPurifier_Arborize::arborize($this->tokenize($temp), $this->config, $this->context)->children;
|
||||
} elseif ($this->to_tokens && is_string($input)) {
|
||||
$input = $this->tokenize($temp = $input);
|
||||
$input_c = $this->tokenize($temp);
|
||||
} else {
|
||||
@@ -76,6 +87,12 @@ class HTMLPurifier_ComplexHarness extends HTMLPurifier_Harness
|
||||
}
|
||||
|
||||
if ($this->to_html) {
|
||||
if ($this->to_node_list) {
|
||||
$result = $this->generateTokens($result);
|
||||
if (is_array($expect) && !empty($expect) && $expect[0] instanceof HTMLPurifier_Node) {
|
||||
$expect = $this->generateTokens($expect);
|
||||
}
|
||||
}
|
||||
$result = $this->generate($result);
|
||||
if (is_array($expect)) {
|
||||
$expect = $this->generate($expect);
|
||||
@@ -106,6 +123,16 @@ class HTMLPurifier_ComplexHarness extends HTMLPurifier_Harness
|
||||
return $generator->generateFromTokens($tokens);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate tokens from node list
|
||||
*/
|
||||
protected function generateTokens($children)
|
||||
{
|
||||
$dummy = new HTMLPurifier_Node_Element("dummy");
|
||||
$dummy->children = $children;
|
||||
return HTMLPurifier_Arborize::flatten($dummy, $this->context, $this->config);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
@@ -27,15 +27,6 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness
|
||||
);
|
||||
}
|
||||
|
||||
public function testEscapeBlockInInline()
|
||||
{
|
||||
$this->config->set('Core.EscapeInvalidChildren', true);
|
||||
$this->assertResult(
|
||||
'<b><div>Illegal div.</div></b>',
|
||||
'<b><div>Illegal div.</div></b>'
|
||||
);
|
||||
}
|
||||
|
||||
public function testRemoveNodeWithMissingRequiredElements()
|
||||
{
|
||||
$this->assertResult('<ul></ul>', '');
|
||||
@@ -98,15 +89,6 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness
|
||||
);
|
||||
}
|
||||
|
||||
public function testChameleonEscapeInvalidBlockInInline()
|
||||
{
|
||||
$this->config->set('Core.EscapeInvalidChildren', true);
|
||||
$this->assertResult( // alt config
|
||||
'<span><ins><div>Not allowed!</div></ins></span>',
|
||||
'<span><ins><div>Not allowed!</div></ins></span>'
|
||||
);
|
||||
}
|
||||
|
||||
public function testExclusionsIntegration()
|
||||
{
|
||||
// test exclusions
|
||||
|
Reference in New Issue
Block a user