diff --git a/tests/HTMLPurifier/DefinitionTest.php b/tests/HTMLPurifier/DefinitionTest.php index 9ac5893b..f1c168e8 100644 --- a/tests/HTMLPurifier/DefinitionTest.php +++ b/tests/HTMLPurifier/DefinitionTest.php @@ -1,18 +1,30 @@ UnitTestCase(); $this->def = new HTMLPurifier_Definition(); $this->def->loadData(); - $this->lex = new HTMLPurifier_Lexer(); + + // we can't use the DOM lexer since it does too much stuff + // automatically, however, we should be able to use it + // interchangeably if we wanted to... + + if (true) { + $this->lex = new HTMLPurifier_Lexer_DirectLex(); + } else { + require_once 'HTMLPurifier/Lexer/DOMLex.php'; + $this->lex = new HTMLPurifier_Lexer_DOMLex(); + } + + $this->gen = new HTMLPurifier_Generator(); } function test_removeForeignElements() { @@ -20,44 +32,21 @@ class HTMLPurifier_DefinitionTest extends UnitTestCase $inputs = array(); $expect = array(); - $inputs[0] = array(); + $inputs[0] = ''; $expect[0] = $inputs[0]; - $inputs[1] = array( - new HTMLPurifier_Token_Text('This is ') - ,new HTMLPurifier_Token_Start('b', array()) - ,new HTMLPurifier_Token_Text('bold') - ,new HTMLPurifier_Token_End('b') - ,new HTMLPurifier_Token_Text(' text') - ); + $inputs[1] = 'This is bold text.'; $expect[1] = $inputs[1]; - $inputs[2] = array( - new HTMLPurifier_Token_Start('asdf') - ,new HTMLPurifier_Token_End('asdf') - ,new HTMLPurifier_Token_Start('d', array('href' => 'bang!')) - ,new HTMLPurifier_Token_End('d') - ,new HTMLPurifier_Token_Start('pooloka') - ,new HTMLPurifier_Token_Start('poolasdf') - ,new HTMLPurifier_Token_Start('ds', array('moogle' => '&')) - ,new HTMLPurifier_Token_End('asdf') - ,new HTMLPurifier_Token_End('asdf') - ); - $expect[2] = array( - new HTMLPurifier_Token_Text('') - ,new HTMLPurifier_Token_Text('') - ,new HTMLPurifier_Token_Text('') - ,new HTMLPurifier_Token_Text('') - ,new HTMLPurifier_Token_Text('') - ,new HTMLPurifier_Token_Text('') - ,new HTMLPurifier_Token_Text('') - ,new HTMLPurifier_Token_Text('') - ,new HTMLPurifier_Token_Text('') - ); + // [INVALID] + $inputs[2] = 'BlingBong'; + $expect[2] = htmlspecialchars($inputs[2]); foreach ($inputs as $i => $input) { - $result = $this->def->removeForeignElements($input); - $this->assertEqual($expect[$i], $result); + $tokens = $this->lex->tokenizeHTML($input); + $result_tokens = $this->def->removeForeignElements($tokens); + $result = $this->gen->generateFromTokens($result_tokens); + $this->assertEqual($expect[$i], $result, "Test $i: %s"); paintIf($result, $result != $expect[$i]); } @@ -68,122 +57,46 @@ class HTMLPurifier_DefinitionTest extends UnitTestCase $inputs = array(); $expect = array(); - $inputs[0] = array(); + $inputs[0] = ''; $expect[0] = $inputs[0]; - $inputs[1] = array( - new HTMLPurifier_Token_Text('This is ') - ,new HTMLPurifier_Token_Start('b') - ,new HTMLPurifier_Token_Text('bold') - ,new HTMLPurifier_Token_End('b') - ,new HTMLPurifier_Token_Text(' text') - ,new HTMLPurifier_Token_Empty('br') - ); + $inputs[1] = 'This is bold text.'; $expect[1] = $inputs[1]; - $inputs[2] = array( - new HTMLPurifier_Token_Start('b') - ,new HTMLPurifier_Token_Text('Unclosed tag, gasp!') - ); - $expect[2] = array( - new HTMLPurifier_Token_Start('b') - ,new HTMLPurifier_Token_Text('Unclosed tag, gasp!') - ,new HTMLPurifier_Token_End('b') - ); + $inputs[2] = 'Unclosed tag, gasp!'; + $expect[2] = 'Unclosed tag, gasp!'; - $inputs[3] = array( - new HTMLPurifier_Token_Start('b') - ,new HTMLPurifier_Token_Start('i') - ,new HTMLPurifier_Token_Text('The b is closed, but the i is not') - ,new HTMLPurifier_Token_End('b') - ); - $expect[3] = array( - new HTMLPurifier_Token_Start('b') - ,new HTMLPurifier_Token_Start('i') - ,new HTMLPurifier_Token_Text('The b is closed, but the i is not') - ,new HTMLPurifier_Token_End('i') - ,new HTMLPurifier_Token_End('b') - ); + $inputs[3] = 'Bold and italic?'; + $expect[3] = 'Bold and italic?'; - $inputs[4] = array( - new HTMLPurifier_Token_Text('Hey, recycle unused end tags!') - ,new HTMLPurifier_Token_End('b') - ); - $expect[4] = array( - new HTMLPurifier_Token_Text('Hey, recycle unused end tags!') - ,new HTMLPurifier_Token_Text('') - ); + // CHANGE THIS BEHAVIOR! + $inputs[4] = 'Unused end tags... recycle!'; + $expect[4] = 'Unused end tags... recycle!</b>'; - $inputs[5] = array(new HTMLPurifier_Token_Start('br', array('style' => 'clear:both;'))); - $expect[5] = array(new HTMLPurifier_Token_Empty('br', array('style' => 'clear:both;'))); + $inputs[5] = '
'; + $expect[5] = '
'; - $inputs[6] = array(new HTMLPurifier_Token_Empty('div', array('style' => 'clear:both;'))); - $expect[6] = array( - new HTMLPurifier_Token_Start('div', array('style' => 'clear:both;')) - ,new HTMLPurifier_Token_End('div') - ); + $inputs[6] = '
'; + $expect[6] = '
'; // test automatic paragraph closing - $inputs[7] = array( - new HTMLPurifier_Token_Start('p') - ,new HTMLPurifier_Token_Text('Paragraph 1') - ,new HTMLPurifier_Token_Start('p') - ,new HTMLPurifier_Token_Text('Paragraph 2') - ); - $expect[7] = array( - new HTMLPurifier_Token_Start('p') - ,new HTMLPurifier_Token_Text('Paragraph 1') - ,new HTMLPurifier_Token_End('p') - ,new HTMLPurifier_Token_Start('p') - ,new HTMLPurifier_Token_Text('Paragraph 2') - ,new HTMLPurifier_Token_End('p') - ); + $inputs[7] = '

Paragraph 1

Paragraph 2'; + $expect[7] = '

Paragraph 1

Paragraph 2

'; - $inputs[8] = array( - new HTMLPurifier_Token_Start('div') - ,new HTMLPurifier_Token_Start('p') - ,new HTMLPurifier_Token_Text('Paragraph 1 in a div') - ,new HTMLPurifier_Token_End('div') - ); - $expect[8] = array( - new HTMLPurifier_Token_Start('div') - ,new HTMLPurifier_Token_Start('p') - ,new HTMLPurifier_Token_Text('Paragraph 1 in a div') - ,new HTMLPurifier_Token_End('p') - ,new HTMLPurifier_Token_End('div') - ); + $inputs[8] = '

Paragraphs

In

A

Div

'; + $expect[8] = '

Paragraphs

In

A

Div

'; // automatic list closing - $inputs[9] = array( - new HTMLPurifier_Token_Start('ol') - - ,new HTMLPurifier_Token_Start('li') - ,new HTMLPurifier_Token_Text('Item 1') - - ,new HTMLPurifier_Token_Start('li') - ,new HTMLPurifier_Token_Text('Item 2') - - ,new HTMLPurifier_Token_End('ol') - ); - $expect[9] = array( - new HTMLPurifier_Token_Start('ol') - - ,new HTMLPurifier_Token_Start('li') - ,new HTMLPurifier_Token_Text('Item 1') - ,new HTMLPurifier_Token_End('li') - - ,new HTMLPurifier_Token_Start('li') - ,new HTMLPurifier_Token_Text('Item 2') - ,new HTMLPurifier_Token_End('li') - - ,new HTMLPurifier_Token_End('ol') - ); + $inputs[9] = '
  1. Item 1
  2. Item 2
'; + $expect[9] = '
  1. Item 1
  2. Item 2
'; foreach ($inputs as $i => $input) { - $result = $this->def->makeWellFormed($input); - $this->assertEqual($expect[$i], $result); + $tokens = $this->lex->tokenizeHTML($input); + $result_tokens = $this->def->makeWellFormed($tokens); + $result = $this->gen->generateFromTokens($result_tokens); + $this->assertEqual($expect[$i], $result, "Test $i: %s"); paintIf($result, $result != $expect[$i]); } @@ -196,68 +109,31 @@ class HTMLPurifier_DefinitionTest extends UnitTestCase // next id = 4 // legal inline nesting - $inputs[0] = array( - new HTMLPurifier_Token_Start('b'), - new HTMLPurifier_Token_Text('Bold text'), - new HTMLPurifier_Token_End('b'), - ); + $inputs[0] = 'Bold text'; $expect[0] = $inputs[0]; // legal inline and block // as the parent element is considered FLOW - $inputs[1] = array( - new HTMLPurifier_Token_Start('a', array('href' => 'http://www.example.com/')), - new HTMLPurifier_Token_Text('Linky'), - new HTMLPurifier_Token_End('a'), - new HTMLPurifier_Token_Start('div'), - new HTMLPurifier_Token_Text('Block element'), - new HTMLPurifier_Token_End('div'), - ); + $inputs[1] = 'Blank
Block
'; $expect[1] = $inputs[1]; // illegal block in inline, element -> text - $inputs[2] = array( - new HTMLPurifier_Token_Start('b'), - new HTMLPurifier_Token_Start('div'), - new HTMLPurifier_Token_Text('Illegal Div'), - new HTMLPurifier_Token_End('div'), - new HTMLPurifier_Token_End('b'), - ); - $expect[2] = array( - new HTMLPurifier_Token_Start('b'), - new HTMLPurifier_Token_Text('
'), - new HTMLPurifier_Token_Text('Illegal Div'), - new HTMLPurifier_Token_Text('
'), - new HTMLPurifier_Token_End('b'), - ); + $inputs[2] = '
Illegal div.
'; + $expect[2] = '<div>Illegal div.</div>'; // test of empty set that's required, resulting in removal of node - $inputs[3] = array( - new HTMLPurifier_Token_Start('ul'), - new HTMLPurifier_Token_End('ul') - ); - $expect[3] = array(); + $inputs[3] = '
    '; + $expect[3] = ''; // test illegal text which gets removed - $inputs[4] = array( - new HTMLPurifier_Token_Start('ul'), - new HTMLPurifier_Token_Text('Illegal Text'), - new HTMLPurifier_Token_Start('li'), - new HTMLPurifier_Token_Text('Legal item'), - new HTMLPurifier_Token_End('li'), - new HTMLPurifier_Token_End('ul') - ); - $expect[4] = array( - new HTMLPurifier_Token_Start('ul'), - new HTMLPurifier_Token_Start('li'), - new HTMLPurifier_Token_Text('Legal item'), - new HTMLPurifier_Token_End('li'), - new HTMLPurifier_Token_End('ul') - ); + $inputs[4] = '
      Illegal text
    • Legal item
    '; + $expect[4] = '
    • Legal item
    '; foreach ($inputs as $i => $input) { - $result = $this->def->fixNesting($input); - $this->assertEqual($expect[$i], $result); + $tokens = $this->lex->tokenizeHTML($input); + $result_tokens = $this->def->fixNesting($tokens); + $result = $this->gen->generateFromTokens($result_tokens); + $this->assertEqual($expect[$i], $result, "Test $i: %s"); paintIf($result, $result != $expect[$i]); } }