diff --git a/NEWS b/NEWS index 10da3844..242f0688 100644 --- a/NEWS +++ b/NEWS @@ -16,6 +16,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier entities, even if target encoding is UTF-8. ! Added support for 'scope' attribute on tables. ! Added %HTML.TargetBlank, which adds target="blank" to all outgoing links. +! Properly handle sub-lists directly nested inside of lists in + a standards compliant way, by moving them into the preceding
  • - Color keywords are now case insensitive. Thanks Yzmir Ramirez for reporting. - Explicitly initialize anonModule variable to null. diff --git a/library/HTMLPurifier.includes.php b/library/HTMLPurifier.includes.php index 58b66205..e44ab1b3 100644 --- a/library/HTMLPurifier.includes.php +++ b/library/HTMLPurifier.includes.php @@ -135,6 +135,7 @@ require 'HTMLPurifier/AttrTransform/Textarea.php'; require 'HTMLPurifier/ChildDef/Chameleon.php'; require 'HTMLPurifier/ChildDef/Custom.php'; require 'HTMLPurifier/ChildDef/Empty.php'; +require 'HTMLPurifier/ChildDef/List.php'; require 'HTMLPurifier/ChildDef/Required.php'; require 'HTMLPurifier/ChildDef/Optional.php'; require 'HTMLPurifier/ChildDef/StrictBlockquote.php'; diff --git a/library/HTMLPurifier.safe-includes.php b/library/HTMLPurifier.safe-includes.php index 344a7128..d2a11792 100644 --- a/library/HTMLPurifier.safe-includes.php +++ b/library/HTMLPurifier.safe-includes.php @@ -129,6 +129,7 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php'; +require_once $__dir . '/HTMLPurifier/ChildDef/List.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Required.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php'; require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php'; diff --git a/library/HTMLPurifier/ChildDef/List.php b/library/HTMLPurifier/ChildDef/List.php new file mode 100644 index 00000000..cdaa2893 --- /dev/null +++ b/library/HTMLPurifier/ChildDef/List.php @@ -0,0 +1,120 @@ + true, 'ul' => true, 'ol' => true); + public function validateChildren($tokens_of_children, $config, $context) { + // Flag for subclasses + $this->whitespace = false; + + // if there are no tokens, delete parent node + if (empty($tokens_of_children)) return false; + + // the new set of children + $result = array(); + + // current depth into the nest + $nesting = 0; + + // a little sanity check to make sure it's not ALL whitespace + $all_whitespace = true; + + $seen_li = false; + $need_close_li = false; + + foreach ($tokens_of_children as $token) { + if (!empty($token->is_whitespace)) { + $result[] = $token; + continue; + } + $all_whitespace = false; // phew, we're not talking about whitespace + + if ($nesting == 1 && $need_close_li) { + $result[] = new HTMLPurifier_Token_End('li'); + $nesting--; + $need_close_li = false; + } + + $is_child = ($nesting == 0); + + if ($token instanceof HTMLPurifier_Token_Start) { + $nesting++; + } elseif ($token instanceof HTMLPurifier_Token_End) { + $nesting--; + } + + if ($is_child) { + if ($token->name === 'li') { + // good + $seen_li = true; + } elseif ($token->name === 'ul' || $token->name === 'ol') { + // we want to tuck this into the previous li + $need_close_li = true; + $nesting++; + if (!$seen_li) { + // create a new li element + $result[] = new HTMLPurifier_Token_Start('li'); + } else { + // backtrack until
  • found + while(true) { + $t = array_pop($result); + if ($t instanceof HTMLPurifier_Token_End) { + // XXX actually, these invariants could very plausibly be violated + // if we are doing silly things with modifying the set of allowed elements. + // FORTUNATELY, it doesn't make a difference, since the allowed + // elements are hard-coded here! + if ($t->name !== 'li') { + trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR); + return false; + } + break; + } elseif ($t instanceof HTMLPurifier_Token_Empty) { // bleagh + if ($t->name !== 'li') { + trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR); + return false; + } + // XXX this should have a helper for it... + $result[] = new HTMLPurifier_Token_Start('li', $t->attr, $t->line, $t->col, $t->armor); + break; + } else { + if (!$t->is_whitespace) { + trigger_error("Only whitespace present invariant violated in List ChildDef", E_USER_ERROR); + return false; + } + } + } + } + } else { + // start wrapping (this doesn't precisely mimic + // browser behavior, but what browsers do is kind of + // hard to mimic in a standards compliant way + // XXX Actually, this has no impact in practice, + // because this gets handled earlier. Arguably, + // we should rip out all of that processing + $result[] = new HTMLPurifier_Token_Start('li'); + $nesting++; + $seen_li = true; + $need_close_li = true; + } + } + $result[] = $token; + } + if ($need_close_li) { + $result[] = new HTMLPurifier_Token_End('li'); + } + if (empty($result)) return false; + if ($all_whitespace) { + return false; + } + if ($tokens_of_children == $result) return true; + return $result; + } +} + +// vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/HTMLModule/List.php b/library/HTMLPurifier/HTMLModule/List.php index 74d4522f..79ccefaf 100644 --- a/library/HTMLPurifier/HTMLModule/List.php +++ b/library/HTMLPurifier/HTMLModule/List.php @@ -20,10 +20,16 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule public $content_sets = array('Flow' => 'List'); public function setup($config) { - $ol = $this->addElement('ol', 'List', 'Required: li', 'Common'); - $ol->wrap = "li"; - $ul = $this->addElement('ul', 'List', 'Required: li', 'Common'); - $ul->wrap = "li"; + $ol = $this->addElement('ol', 'List', new HTMLPurifier_ChildDef_List(), 'Common'); + $ul = $this->addElement('ul', 'List', new HTMLPurifier_ChildDef_List(), 'Common'); + // XXX The wrap attribute is handled by MakeWellFormed. This is all + // quite unsatisfactory, because we generated this + // *specifically* for lists, and now a big chunk of the handling + // is done properly by the List ChildDef. So actually, we just + // want enough information to make autoclosing work properly, + // and then hand off the tricky stuff to the ChildDef. + $ol->wrap = 'li'; + $ul->wrap = 'li'; $this->addElement('dl', 'List', 'Required: dt | dd', 'Common'); $this->addElement('li', false, 'Flow', 'Common'); diff --git a/tests/HTMLPurifier/ChildDef/ListTest.php b/tests/HTMLPurifier/ChildDef/ListTest.php new file mode 100644 index 00000000..02dcab0f --- /dev/null +++ b/tests/HTMLPurifier/ChildDef/ListTest.php @@ -0,0 +1,50 @@ +obj = new HTMLPurifier_ChildDef_List(); + } + + function testEmptyInput() { + $this->assertResult('', false); + } + + function testSingleLi() { + $this->assertResult('
  • '); + } + + function testSomeLi() { + $this->assertResult('
  • asdf
  • '); + } + + function testIllegal() { + // XXX actually this never gets triggered in practice + $this->assertResult('
  • ', '
  • '); + } + + function testOlAtBeginning() { + $this->assertResult('
      ', '
      1. '); + } + + function testOlAtBeginningWithOtherJunk() { + $this->assertResult('
        1. ', '
          1. '); + } + + function testOlInMiddle() { + $this->assertResult('
          2. Foo
            1. Bar
            ', '
          3. Foo
            1. Bar
          4. '); + } + + function testMultipleOl() { + $this->assertResult('
              1. ', '
                  1. '); + } + + function testUlAtBeginning() { + $this->assertResult('
                      ', '
                      • '); + } + +} + +// vim: et sw=4 sts=4 diff --git a/tests/HTMLPurifier/HTMLT/list-nesting.htmlt b/tests/HTMLPurifier/HTMLT/list-nesting.htmlt new file mode 100644 index 00000000..22ebf605 --- /dev/null +++ b/tests/HTMLPurifier/HTMLT/list-nesting.htmlt @@ -0,0 +1,5 @@ +--HTML-- +
                        • Sublist 1
                          • Bullet
                        +--EXPECT-- +
                        • Sublist 1
                          • Bullet
                        +--# vim: et sw=4 sts=4 diff --git a/tests/HTMLPurifier/Strategy/FixNestingTest.php b/tests/HTMLPurifier/Strategy/FixNestingTest.php index 7bbc53a5..9394352e 100644 --- a/tests/HTMLPurifier/Strategy/FixNestingTest.php +++ b/tests/HTMLPurifier/Strategy/FixNestingTest.php @@ -35,10 +35,17 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness $this->assertResult('
                          ', ''); } - function testRemoveIllegalPCDATA() { + function testListHandleIllegalPCDATA() { $this->assertResult( '
                            Illegal text
                          • Legal item
                          ', - '
                          • Legal item
                          ' + '
                          • Illegal text
                          • Legal item
                          ' + ); + } + + function testRemoveIllegalPCDATA() { + $this->assertResult( + 'Illegal text
                          ', + '
                          ' ); } diff --git a/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php b/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php index 75ee646e..c6d775c5 100644 --- a/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php +++ b/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php @@ -119,21 +119,21 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn function testNestedOl() { $this->assertResult( '
                            1. foo
                          ', - '
                            1. foo
                          ' + '
                            1. foo
                          ' ); } function testNestedUl() { $this->assertResult( '
                            • foo
                          ', - '
                            • foo
                          ' + '
                            • foo
                          ' ); } function testNestedOlWithStrangeEnding() { $this->assertResult( '
                              1. foo
                            1. foo
                            ', - '
                                1. foo
                              1. foo
                            ' + '
                                1. foo
                            1. foo
                            ' ); }