1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-07-31 19:30:21 +02:00

Implement %AutoFormat.RemoveEmpty, end to start ref, and injector rewind.

Injector rewind: Injectors can now use the method rewind() in order to move
the input index backwards, so that they can reprocess tokens (other injectors
are not affected by a rewind). This functionality was necessary to implement
nested node removals in %AutoFormat.RemoveEmpty.

End to start ref: To facilitate rewinding, HTMLPurifier_Token_End now
maintains a reference called $start to the starting token for their node.

%AutoFormat.RemoveEmpty removes empty nodes. Lots of people have requested
it, so here is a partially effective implementation. Because it is implemented
as an Injector, it's not possible for it to handle newly introduced empty
nodes by later validators, specifically auto-closing and child validation.
The Injector is only meant to be used on HTML-ish languages.

Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
This commit is contained in:
Edward Z. Yang
2008-06-27 16:09:14 -04:00
parent fd384129bf
commit 700d5bcbfc
12 changed files with 252 additions and 24 deletions

View File

@@ -0,0 +1,58 @@
<?php
class HTMLPurifier_Injector_RemoveEmptyTest extends HTMLPurifier_InjectorHarness
{
public function setup() {
parent::setup();
$this->config->set('AutoFormat', 'RemoveEmpty', true);
}
function testPreserve() {
$this->assertResult('<b>asdf</b>');
}
function testRemove() {
$this->assertResult('<b></b>', '');
}
function testRemoveWithSpace() {
$this->assertResult('<b> </b>', '');
}
function testRemoveWithAttr() {
$this->assertResult('<b class="asdf"></b>', '');
}
function testRemoveIdAndName() {
$this->assertResult('<a id="asdf" name="asdf"></a>', '');
}
function testPreserveColgroup() {
$this->assertResult('<colgroup></colgroup>');
}
function testPreserveId() {
$this->config->set('Attr', 'EnableID', true);
$this->assertResult('<a id="asdf"></a>');
}
function testPreserveName() {
$this->config->set('Attr', 'EnableID', true);
$this->assertResult('<a name="asdf"></a>');
}
function testRemoveNested() {
$this->assertResult('<b><i></i></b>', '');
}
function testRemoveNested2() {
$this->assertResult('<b><i><u></u></i></b>', '');
}
function testRemoveNested3() {
$this->assertResult('<b> <i> <u> </u> </i> </b>', '');
}
}

View File

@@ -10,6 +10,7 @@ class HTMLPurifier_Injector_SafeObjectTest extends HTMLPurifier_InjectorHarness
function setup() {
parent::setup();
// there is no AutoFormat.SafeObject directive
$this->config->set('AutoFormat', 'Custom', array(new HTMLPurifier_Injector_SafeObject()));
$this->config->set('HTML', 'Trusted', true);
}

View File

@@ -8,14 +8,19 @@ class HTMLPurifier_Strategy_MakeWellFormed_InjectorTest extends HTMLPurifier_Str
$this->obj = new HTMLPurifier_Strategy_MakeWellFormed();
$this->config->set('AutoFormat', 'AutoParagraph', true);
$this->config->set('AutoFormat', 'Linkify', true);
$this->config->set('AutoFormat', 'RemoveEmpty', true);
generate_mock_once('HTMLPurifier_Injector');
}
function testEndNotification() {
$mock = new HTMLPurifier_InjectorMock();
$mock->skip = false;
$mock->expectAt(0, 'notifyEnd', array(new HTMLPurifier_Token_End('b')));
$mock->expectAt(1, 'notifyEnd', array(new HTMLPurifier_Token_End('i')));
$b = new HTMLPurifier_Token_End('b');
$b->start = new HTMLPurifier_Token_Start('b');
$mock->expectAt(0, 'notifyEnd', array($b));
$i = new HTMLPurifier_Token_End('i');
$i->start = new HTMLPurifier_Token_Start('i');
$mock->expectAt(1, 'notifyEnd', array($i));
$mock->expectCallCount('notifyEnd', 2);
$this->config->set('AutoFormat', 'AutoParagraph', false);
$this->config->set('AutoFormat', 'Linkify', false);
@@ -92,4 +97,20 @@ class HTMLPurifier_Strategy_MakeWellFormed_InjectorTest extends HTMLPurifier_Str
);
}
function testEmptyAndParagraph() {
// This is a fairly degenerate case, but it demonstrates that
// the two don't error out together, at least.
$this->assertResult(
"<p>asdf\n\nasdf<b></b></p>\n\n<p></p><i></i>",
"<p>asdf</p><p>asdf</p>"
);
}
function testRewindAndParagraph() {
$this->assertResult(
"bar\n\n<p><i></i>\n\n</p>\n\nfoo",
"<p>bar</p><p>foo</p>"
);
}
}