diff --git a/NEWS b/NEWS
index 10da3844..242f0688 100644
--- a/NEWS
+++ b/NEWS
@@ -16,6 +16,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
entities, even if target encoding is UTF-8.
! Added support for 'scope' attribute on tables.
! Added %HTML.TargetBlank, which adds target="blank" to all outgoing links.
+! Properly handle sub-lists directly nested inside of lists in
+ a standards compliant way, by moving them into the preceding
- Color keywords are now case insensitive. Thanks Yzmir Ramirez
for reporting.
- Explicitly initialize anonModule variable to null.
diff --git a/library/HTMLPurifier.includes.php b/library/HTMLPurifier.includes.php
index 58b66205..e44ab1b3 100644
--- a/library/HTMLPurifier.includes.php
+++ b/library/HTMLPurifier.includes.php
@@ -135,6 +135,7 @@ require 'HTMLPurifier/AttrTransform/Textarea.php';
require 'HTMLPurifier/ChildDef/Chameleon.php';
require 'HTMLPurifier/ChildDef/Custom.php';
require 'HTMLPurifier/ChildDef/Empty.php';
+require 'HTMLPurifier/ChildDef/List.php';
require 'HTMLPurifier/ChildDef/Required.php';
require 'HTMLPurifier/ChildDef/Optional.php';
require 'HTMLPurifier/ChildDef/StrictBlockquote.php';
diff --git a/library/HTMLPurifier.safe-includes.php b/library/HTMLPurifier.safe-includes.php
index 344a7128..d2a11792 100644
--- a/library/HTMLPurifier.safe-includes.php
+++ b/library/HTMLPurifier.safe-includes.php
@@ -129,6 +129,7 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php';
+require_once $__dir . '/HTMLPurifier/ChildDef/List.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Required.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php';
require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php';
diff --git a/library/HTMLPurifier/ChildDef/List.php b/library/HTMLPurifier/ChildDef/List.php
new file mode 100644
index 00000000..cdaa2893
--- /dev/null
+++ b/library/HTMLPurifier/ChildDef/List.php
@@ -0,0 +1,120 @@
+ true, 'ul' => true, 'ol' => true);
+ public function validateChildren($tokens_of_children, $config, $context) {
+ // Flag for subclasses
+ $this->whitespace = false;
+
+ // if there are no tokens, delete parent node
+ if (empty($tokens_of_children)) return false;
+
+ // the new set of children
+ $result = array();
+
+ // current depth into the nest
+ $nesting = 0;
+
+ // a little sanity check to make sure it's not ALL whitespace
+ $all_whitespace = true;
+
+ $seen_li = false;
+ $need_close_li = false;
+
+ foreach ($tokens_of_children as $token) {
+ if (!empty($token->is_whitespace)) {
+ $result[] = $token;
+ continue;
+ }
+ $all_whitespace = false; // phew, we're not talking about whitespace
+
+ if ($nesting == 1 && $need_close_li) {
+ $result[] = new HTMLPurifier_Token_End('li');
+ $nesting--;
+ $need_close_li = false;
+ }
+
+ $is_child = ($nesting == 0);
+
+ if ($token instanceof HTMLPurifier_Token_Start) {
+ $nesting++;
+ } elseif ($token instanceof HTMLPurifier_Token_End) {
+ $nesting--;
+ }
+
+ if ($is_child) {
+ if ($token->name === 'li') {
+ // good
+ $seen_li = true;
+ } elseif ($token->name === 'ul' || $token->name === 'ol') {
+ // we want to tuck this into the previous li
+ $need_close_li = true;
+ $nesting++;
+ if (!$seen_li) {
+ // create a new li element
+ $result[] = new HTMLPurifier_Token_Start('li');
+ } else {
+ // backtrack until found
+ while(true) {
+ $t = array_pop($result);
+ if ($t instanceof HTMLPurifier_Token_End) {
+ // XXX actually, these invariants could very plausibly be violated
+ // if we are doing silly things with modifying the set of allowed elements.
+ // FORTUNATELY, it doesn't make a difference, since the allowed
+ // elements are hard-coded here!
+ if ($t->name !== 'li') {
+ trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
+ return false;
+ }
+ break;
+ } elseif ($t instanceof HTMLPurifier_Token_Empty) { // bleagh
+ if ($t->name !== 'li') {
+ trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
+ return false;
+ }
+ // XXX this should have a helper for it...
+ $result[] = new HTMLPurifier_Token_Start('li', $t->attr, $t->line, $t->col, $t->armor);
+ break;
+ } else {
+ if (!$t->is_whitespace) {
+ trigger_error("Only whitespace present invariant violated in List ChildDef", E_USER_ERROR);
+ return false;
+ }
+ }
+ }
+ }
+ } else {
+ // start wrapping (this doesn't precisely mimic
+ // browser behavior, but what browsers do is kind of
+ // hard to mimic in a standards compliant way
+ // XXX Actually, this has no impact in practice,
+ // because this gets handled earlier. Arguably,
+ // we should rip out all of that processing
+ $result[] = new HTMLPurifier_Token_Start('li');
+ $nesting++;
+ $seen_li = true;
+ $need_close_li = true;
+ }
+ }
+ $result[] = $token;
+ }
+ if ($need_close_li) {
+ $result[] = new HTMLPurifier_Token_End('li');
+ }
+ if (empty($result)) return false;
+ if ($all_whitespace) {
+ return false;
+ }
+ if ($tokens_of_children == $result) return true;
+ return $result;
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/HTMLModule/List.php b/library/HTMLPurifier/HTMLModule/List.php
index 74d4522f..79ccefaf 100644
--- a/library/HTMLPurifier/HTMLModule/List.php
+++ b/library/HTMLPurifier/HTMLModule/List.php
@@ -20,10 +20,16 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
public $content_sets = array('Flow' => 'List');
public function setup($config) {
- $ol = $this->addElement('ol', 'List', 'Required: li', 'Common');
- $ol->wrap = "li";
- $ul = $this->addElement('ul', 'List', 'Required: li', 'Common');
- $ul->wrap = "li";
+ $ol = $this->addElement('ol', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
+ $ul = $this->addElement('ul', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
+ // XXX The wrap attribute is handled by MakeWellFormed. This is all
+ // quite unsatisfactory, because we generated this
+ // *specifically* for lists, and now a big chunk of the handling
+ // is done properly by the List ChildDef. So actually, we just
+ // want enough information to make autoclosing work properly,
+ // and then hand off the tricky stuff to the ChildDef.
+ $ol->wrap = 'li';
+ $ul->wrap = 'li';
$this->addElement('dl', 'List', 'Required: dt | dd', 'Common');
$this->addElement('li', false, 'Flow', 'Common');
diff --git a/tests/HTMLPurifier/ChildDef/ListTest.php b/tests/HTMLPurifier/ChildDef/ListTest.php
new file mode 100644
index 00000000..02dcab0f
--- /dev/null
+++ b/tests/HTMLPurifier/ChildDef/ListTest.php
@@ -0,0 +1,50 @@
+obj = new HTMLPurifier_ChildDef_List();
+ }
+
+ function testEmptyInput() {
+ $this->assertResult('', false);
+ }
+
+ function testSingleLi() {
+ $this->assertResult('');
+ }
+
+ function testSomeLi() {
+ $this->assertResult('asdf');
+ }
+
+ function testIllegal() {
+ // XXX actually this never gets triggered in practice
+ $this->assertResult('', '');
+ }
+
+ function testOlAtBeginning() {
+ $this->assertResult('
', '
');
+ }
+
+ function testOlAtBeginningWithOtherJunk() {
+ $this->assertResult('
', '
');
+ }
+
+ function testOlInMiddle() {
+ $this->assertResult('Foo- Bar
', 'Foo- Bar
');
+ }
+
+ function testMultipleOl() {
+ $this->assertResult('
', '
');
+ }
+
+ function testUlAtBeginning() {
+ $this->assertResult('', '');
+ }
+
+}
+
+// vim: et sw=4 sts=4
diff --git a/tests/HTMLPurifier/HTMLT/list-nesting.htmlt b/tests/HTMLPurifier/HTMLT/list-nesting.htmlt
new file mode 100644
index 00000000..22ebf605
--- /dev/null
+++ b/tests/HTMLPurifier/HTMLT/list-nesting.htmlt
@@ -0,0 +1,5 @@
+--HTML--
+
+--EXPECT--
+
+--# vim: et sw=4 sts=4
diff --git a/tests/HTMLPurifier/Strategy/FixNestingTest.php b/tests/HTMLPurifier/Strategy/FixNestingTest.php
index 7bbc53a5..9394352e 100644
--- a/tests/HTMLPurifier/Strategy/FixNestingTest.php
+++ b/tests/HTMLPurifier/Strategy/FixNestingTest.php
@@ -35,10 +35,17 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness
$this->assertResult('', '');
}
- function testRemoveIllegalPCDATA() {
+ function testListHandleIllegalPCDATA() {
$this->assertResult(
'',
- ''
+ ''
+ );
+ }
+
+ function testRemoveIllegalPCDATA() {
+ $this->assertResult(
+ '',
+ ''
);
}
diff --git a/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php b/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php
index 75ee646e..c6d775c5 100644
--- a/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php
+++ b/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php
@@ -119,21 +119,21 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn
function testNestedOl() {
$this->assertResult(
'- foo
',
- '- foo
'
+ '- foo
'
);
}
function testNestedUl() {
$this->assertResult(
'',
- ''
+ ''
);
}
function testNestedOlWithStrangeEnding() {
$this->assertResult(
'- foo
- foo
',
- '- foo
- foo
'
+ '- foo
- foo
'
);
}