''))
+ new HTMLPurifier_Token_Start('a', array('href'=>''))
);
$input[9] = '<b>';
$expect[9] = array(
- new MF_Text('')
+ new HTMLPurifier_Token_Text('')
);
$sax_expect[9] = array(
- new MF_Text('<')
- ,new MF_Text('b')
- ,new MF_Text('>')
+ new HTMLPurifier_Token_Text('<')
+ ,new HTMLPurifier_Token_Text('b')
+ ,new HTMLPurifier_Token_Text('>')
);
// note that SAX can clump text nodes together. We won't be
// too picky though
@@ -130,16 +130,16 @@ class Test_HTML_Lexer extends UnitTestCase
// [INVALID]
$input[10] = '';
$expect[10] = array(
- new MF_StartTag('a', array('"' => ''))
+ new HTMLPurifier_Token_Start('a', array('"' => ''))
);
// [INVALID] [RECOVERABLE]
$input[11] = '"';
- $expect[11] = array( new MF_Text('"') );
+ $expect[11] = array( new HTMLPurifier_Token_Text('"') );
// compare with this valid one:
$input[12] = '"';
- $expect[12] = array( new MF_Text('"') );
+ $expect[12] = array( new HTMLPurifier_Token_Text('"') );
$sax_expect[12] = false;
// SAX chokes on this? We do have entity parsing on, so it should work!
diff --git a/tests/PureHTMLDefinition.php b/tests/PureHTMLDefinition.php
index 5e9c6511..d9b58a10 100644
--- a/tests/PureHTMLDefinition.php
+++ b/tests/PureHTMLDefinition.php
@@ -3,14 +3,25 @@
class Test_HTMLDTD_ChildDef extends UnitTestCase
{
+ var $lex;
+ var $gen;
+
+ function Test_HTMLDTD_ChildDef() {
+ $this->lex = new HTML_Lexer();
+ $this->gen = new HTML_Generator();
+ parent::UnitTestCase();
+ }
+
function assertSeries($inputs, $expect, $def) {
foreach ($inputs as $i => $input) {
- $result = $def->validateChildren($input);
+ $tokens = $this->lex->tokenizeHTML($input);
+ $result = $def->validateChildren($tokens);
if (is_bool($expect[$i])) {
$this->assertIdentical($expect[$i], $result);
} else {
- $this->assertEqual($expect[$i], $result);
- paintIf($result, $result != $expect[$i]);
+ $result_html = $this->gen->generateFromTokens($result);
+ $this->assertEqual($expect[$i], $result_html);
+ paintIf($result_html, $result_html != $expect[$i]);
}
}
}
@@ -21,32 +32,20 @@ class Test_HTMLDTD_ChildDef extends UnitTestCase
$def = new HTMLDTD_ChildDef(
'(caption?, (col*|colgroup*), thead?, tfoot?, (tbody+|tr+))');
- $inputs[0] = array();
+ $inputs[0] = '';
$expect[0] = false;
// we really don't care what's inside, because if it turns out
// this tr is illegal, we'll end up re-evaluating the parent node
// anyway.
- $inputs[1] = array(
- new MF_StartTag('tr') ,new MF_EndTag('tr')
- );
+ $inputs[1] = '
';
$expect[1] = true;
- $inputs[2] = array(
- new MF_StartTag('caption') ,new MF_EndTag('caption')
- ,new MF_StartTag('col') ,new MF_EndTag('col')
- ,new MF_StartTag('thead') ,new MF_EndTag('thead')
- ,new MF_StartTag('tfoot') ,new MF_EndTag('tfoot')
- ,new MF_StartTag('tbody') ,new MF_EndTag('tbody')
- );
+ $inputs[2] = '' .
+ '';
$expect[2] = true;
- $inputs[3] = array(
- new MF_StartTag('col') ,new MF_EndTag('col')
- ,new MF_StartTag('col') ,new MF_EndTag('col')
- ,new MF_StartTag('col') ,new MF_EndTag('col')
- ,new MF_StartTag('tr') ,new MF_EndTag('tr')
- );
+ $inputs[3] = '
';
$expect[3] = true;
$this->assertSeries($inputs, $expect, $def);
@@ -81,63 +80,22 @@ class Test_HTMLDTD_ChildDef extends UnitTestCase
$inputs[0] = array();
$expect[0] = false;
- $inputs[1] = array(
- new MF_StartTag('dt')
- ,new MF_Text('Term')
- ,new MF_EndTag('dt')
-
- ,new MF_Text('Text in an illegal location')
-
- ,new MF_StartTag('dd')
- ,new MF_Text('Definition')
- ,new MF_EndTag('dd')
-
- ,new MF_StartTag('b') // test tag removal too
- ,new MF_EndTag('b')
- );
- $expect[1] = array(
- new MF_StartTag('dt')
- ,new MF_Text('Term')
- ,new MF_EndTag('dt')
-
- ,new MF_StartTag('dd')
- ,new MF_Text('Definition')
- ,new MF_EndTag('dd')
- );
+ $inputs[1] = 'TermText in an illegal location'.
+ 'DefinitionIllegal tag';
- $inputs[2] = array(new MF_Text('How do you do!'));
+ $expect[1] = 'TermDefinition';
+
+ $inputs[2] = 'How do you do!';
$expect[2] = false;
// whitespace shouldn't trigger it
- $inputs[3] = array(
- new MF_Text("\n")
- ,new MF_StartTag('dd')
- ,new MF_Text('Definition')
- ,new MF_EndTag('dd')
- ,new MF_Text(' ')
- );
+ $inputs[3] = "\nDefinition ";
$expect[3] = true;
- $inputs[4] = array(
- new MF_StartTag('dd')
- ,new MF_Text('Definition')
- ,new MF_EndTag('dd')
- ,new MF_Text(' ')
- ,new MF_StartTag('b')
- ,new MF_EndTag('b')
- ,new MF_Text(' ')
- );
- $expect[4] = array(
- new MF_StartTag('dd')
- ,new MF_Text('Definition')
- ,new MF_EndTag('dd')
- ,new MF_Text(' ')
- ,new MF_Text(' ')
- );
- $inputs[5] = array(
- new MF_Text(' ')
- ,new MF_Text("\t")
- );
+ $inputs[4] ='Definition ';
+ $expect[4] = 'Definition ';
+
+ $inputs[5] = "\t ";
$expect[5] = false;
$this->assertSeries($inputs, $expect, $def);
@@ -146,41 +104,23 @@ class Test_HTMLDTD_ChildDef extends UnitTestCase
function test_required_pcdata_allowed() {
$def = new HTMLDTD_ChildDef_Required('#PCDATA | b');
- $input = array(
- new MF_StartTag('b')
- ,new MF_Text('Bold text')
- ,new MF_EndTag('b')
- ,new MF_EmptyTag('img') // illegal tag
- );
- $expect = array(
- new MF_StartTag('b')
- ,new MF_Text('Bold text')
- ,new MF_EndTag('b')
- ,new MF_Text('
')
- );
- $this->assertEqual($expect, $def->validateChildren($input));
+
+ $inputs[0] = 'Bold text
';
+ $expect[0] = 'Bold text<img />';
+
+ $this->assertSeries($inputs, $expect, $def);
}
function test_optional() {
$def = new HTMLDTD_ChildDef_Optional('b | i');
- $input = array(
- new MF_StartTag('b')
- ,new MF_Text('Bold text')
- ,new MF_EndTag('b')
- ,new MF_EmptyTag('img') // illegal tag
- );
- $expect = array(
- new MF_StartTag('b')
- ,new MF_Text('Bold text')
- ,new MF_EndTag('b')
- );
- $this->assertEqual($expect, $def->validateChildren($input));
- $input = array(
- new MF_Text('Not allowed text')
- );
- $expect = array();
- $this->assertEqual($expect, $def->validateChildren($input));
+ $inputs[0] = 'Bold text
';
+ $expect[0] = 'Bold text';
+
+ $inputs[1] = 'Not allowed text';
+ $expect[1] = '';
+
+ $this->assertSeries($inputs, $expect, $def);
}
}
@@ -188,13 +128,13 @@ class Test_HTMLDTD_ChildDef extends UnitTestCase
class Test_PureHTMLDefinition extends UnitTestCase
{
- var $def, $lexer;
+ var $def, $lex;
function Test_PureHTMLDefinition() {
$this->UnitTestCase();
$this->def = new PureHTMLDefinition();
$this->def->loadData();
- $this->lexer = new HTML_Lexer();
+ $this->lex = new HTML_Lexer();
}
function test_removeForeignElements() {
@@ -206,35 +146,35 @@ class Test_PureHTMLDefinition extends UnitTestCase
$expect[0] = $inputs[0];
$inputs[1] = array(
- new MF_Text('This is ')
- ,new MF_StartTag('b', array())
- ,new MF_Text('bold')
- ,new MF_EndTag('b')
- ,new MF_Text(' text')
+ new HTMLPurifier_Token_Text('This is ')
+ ,new HTMLPurifier_Token_Start('b', array())
+ ,new HTMLPurifier_Token_Text('bold')
+ ,new HTMLPurifier_Token_End('b')
+ ,new HTMLPurifier_Token_Text(' text')
);
$expect[1] = $inputs[1];
$inputs[2] = array(
- new MF_StartTag('asdf')
- ,new MF_EndTag('asdf')
- ,new MF_StartTag('d', array('href' => 'bang!'))
- ,new MF_EndTag('d')
- ,new MF_StartTag('pooloka')
- ,new MF_StartTag('poolasdf')
- ,new MF_StartTag('ds', array('moogle' => '&'))
- ,new MF_EndTag('asdf')
- ,new MF_EndTag('asdf')
+ new HTMLPurifier_Token_Start('asdf')
+ ,new HTMLPurifier_Token_End('asdf')
+ ,new HTMLPurifier_Token_Start('d', array('href' => 'bang!'))
+ ,new HTMLPurifier_Token_End('d')
+ ,new HTMLPurifier_Token_Start('pooloka')
+ ,new HTMLPurifier_Token_Start('poolasdf')
+ ,new HTMLPurifier_Token_Start('ds', array('moogle' => '&'))
+ ,new HTMLPurifier_Token_End('asdf')
+ ,new HTMLPurifier_Token_End('asdf')
);
$expect[2] = array(
- new MF_Text('')
- ,new MF_Text('')
- ,new MF_Text('')
- ,new MF_Text('')
- ,new MF_Text('')
- ,new MF_Text('')
- ,new MF_Text('')
- ,new MF_Text('')
- ,new MF_Text('')
+ new HTMLPurifier_Token_Text('')
+ ,new HTMLPurifier_Token_Text('')
+ ,new HTMLPurifier_Token_Text('')
+ ,new HTMLPurifier_Token_Text('')
+ ,new HTMLPurifier_Token_Text('')
+ ,new HTMLPurifier_Token_Text('')
+ ,new HTMLPurifier_Token_Text('')
+ ,new HTMLPurifier_Token_Text('')
+ ,new HTMLPurifier_Token_Text('')
);
foreach ($inputs as $i => $input) {
@@ -254,113 +194,113 @@ class Test_PureHTMLDefinition extends UnitTestCase
$expect[0] = $inputs[0];
$inputs[1] = array(
- new MF_Text('This is ')
- ,new MF_StartTag('b')
- ,new MF_Text('bold')
- ,new MF_EndTag('b')
- ,new MF_Text(' text')
- ,new MF_EmptyTag('br')
+ new HTMLPurifier_Token_Text('This is ')
+ ,new HTMLPurifier_Token_Start('b')
+ ,new HTMLPurifier_Token_Text('bold')
+ ,new HTMLPurifier_Token_End('b')
+ ,new HTMLPurifier_Token_Text(' text')
+ ,new HTMLPurifier_Token_Empty('br')
);
$expect[1] = $inputs[1];
$inputs[2] = array(
- new MF_StartTag('b')
- ,new MF_Text('Unclosed tag, gasp!')
+ new HTMLPurifier_Token_Start('b')
+ ,new HTMLPurifier_Token_Text('Unclosed tag, gasp!')
);
$expect[2] = array(
- new MF_StartTag('b')
- ,new MF_Text('Unclosed tag, gasp!')
- ,new MF_EndTag('b')
+ new HTMLPurifier_Token_Start('b')
+ ,new HTMLPurifier_Token_Text('Unclosed tag, gasp!')
+ ,new HTMLPurifier_Token_End('b')
);
$inputs[3] = array(
- new MF_StartTag('b')
- ,new MF_StartTag('i')
- ,new MF_Text('The b is closed, but the i is not')
- ,new MF_EndTag('b')
+ new HTMLPurifier_Token_Start('b')
+ ,new HTMLPurifier_Token_Start('i')
+ ,new HTMLPurifier_Token_Text('The b is closed, but the i is not')
+ ,new HTMLPurifier_Token_End('b')
);
$expect[3] = array(
- new MF_StartTag('b')
- ,new MF_StartTag('i')
- ,new MF_Text('The b is closed, but the i is not')
- ,new MF_EndTag('i')
- ,new MF_EndTag('b')
+ new HTMLPurifier_Token_Start('b')
+ ,new HTMLPurifier_Token_Start('i')
+ ,new HTMLPurifier_Token_Text('The b is closed, but the i is not')
+ ,new HTMLPurifier_Token_End('i')
+ ,new HTMLPurifier_Token_End('b')
);
$inputs[4] = array(
- new MF_Text('Hey, recycle unused end tags!')
- ,new MF_EndTag('b')
+ new HTMLPurifier_Token_Text('Hey, recycle unused end tags!')
+ ,new HTMLPurifier_Token_End('b')
);
$expect[4] = array(
- new MF_Text('Hey, recycle unused end tags!')
- ,new MF_Text('')
+ new HTMLPurifier_Token_Text('Hey, recycle unused end tags!')
+ ,new HTMLPurifier_Token_Text('')
);
- $inputs[5] = array(new MF_StartTag('br', array('style' => 'clear:both;')));
- $expect[5] = array(new MF_EmptyTag('br', array('style' => 'clear:both;')));
+ $inputs[5] = array(new HTMLPurifier_Token_Start('br', array('style' => 'clear:both;')));
+ $expect[5] = array(new HTMLPurifier_Token_Empty('br', array('style' => 'clear:both;')));
- $inputs[6] = array(new MF_EmptyTag('div', array('style' => 'clear:both;')));
+ $inputs[6] = array(new HTMLPurifier_Token_Empty('div', array('style' => 'clear:both;')));
$expect[6] = array(
- new MF_StartTag('div', array('style' => 'clear:both;'))
- ,new MF_EndTag('div')
+ new HTMLPurifier_Token_Start('div', array('style' => 'clear:both;'))
+ ,new HTMLPurifier_Token_End('div')
);
// test automatic paragraph closing
$inputs[7] = array(
- new MF_StartTag('p')
- ,new MF_Text('Paragraph 1')
- ,new MF_StartTag('p')
- ,new MF_Text('Paragraph 2')
+ new HTMLPurifier_Token_Start('p')
+ ,new HTMLPurifier_Token_Text('Paragraph 1')
+ ,new HTMLPurifier_Token_Start('p')
+ ,new HTMLPurifier_Token_Text('Paragraph 2')
);
$expect[7] = array(
- new MF_StartTag('p')
- ,new MF_Text('Paragraph 1')
- ,new MF_EndTag('p')
- ,new MF_StartTag('p')
- ,new MF_Text('Paragraph 2')
- ,new MF_EndTag('p')
+ new HTMLPurifier_Token_Start('p')
+ ,new HTMLPurifier_Token_Text('Paragraph 1')
+ ,new HTMLPurifier_Token_End('p')
+ ,new HTMLPurifier_Token_Start('p')
+ ,new HTMLPurifier_Token_Text('Paragraph 2')
+ ,new HTMLPurifier_Token_End('p')
);
$inputs[8] = array(
- new MF_StartTag('div')
- ,new MF_StartTag('p')
- ,new MF_Text('Paragraph 1 in a div')
- ,new MF_EndTag('div')
+ new HTMLPurifier_Token_Start('div')
+ ,new HTMLPurifier_Token_Start('p')
+ ,new HTMLPurifier_Token_Text('Paragraph 1 in a div')
+ ,new HTMLPurifier_Token_End('div')
);
$expect[8] = array(
- new MF_StartTag('div')
- ,new MF_StartTag('p')
- ,new MF_Text('Paragraph 1 in a div')
- ,new MF_EndTag('p')
- ,new MF_EndTag('div')
+ new HTMLPurifier_Token_Start('div')
+ ,new HTMLPurifier_Token_Start('p')
+ ,new HTMLPurifier_Token_Text('Paragraph 1 in a div')
+ ,new HTMLPurifier_Token_End('p')
+ ,new HTMLPurifier_Token_End('div')
);
// automatic list closing
$inputs[9] = array(
- new MF_StartTag('ol')
+ new HTMLPurifier_Token_Start('ol')
- ,new MF_StartTag('li')
- ,new MF_Text('Item 1')
+ ,new HTMLPurifier_Token_Start('li')
+ ,new HTMLPurifier_Token_Text('Item 1')
- ,new MF_StartTag('li')
- ,new MF_Text('Item 2')
+ ,new HTMLPurifier_Token_Start('li')
+ ,new HTMLPurifier_Token_Text('Item 2')
- ,new MF_EndTag('ol')
+ ,new HTMLPurifier_Token_End('ol')
);
$expect[9] = array(
- new MF_StartTag('ol')
+ new HTMLPurifier_Token_Start('ol')
- ,new MF_StartTag('li')
- ,new MF_Text('Item 1')
- ,new MF_EndTag('li')
+ ,new HTMLPurifier_Token_Start('li')
+ ,new HTMLPurifier_Token_Text('Item 1')
+ ,new HTMLPurifier_Token_End('li')
- ,new MF_StartTag('li')
- ,new MF_Text('Item 2')
- ,new MF_EndTag('li')
+ ,new HTMLPurifier_Token_Start('li')
+ ,new HTMLPurifier_Token_Text('Item 2')
+ ,new HTMLPurifier_Token_End('li')
- ,new MF_EndTag('ol')
+ ,new HTMLPurifier_Token_End('ol')
);
foreach ($inputs as $i => $input) {
@@ -379,62 +319,62 @@ class Test_PureHTMLDefinition extends UnitTestCase
// legal inline nesting
$inputs[0] = array(
- new MF_StartTag('b'),
- new MF_Text('Bold text'),
- new MF_EndTag('b'),
+ new HTMLPurifier_Token_Start('b'),
+ new HTMLPurifier_Token_Text('Bold text'),
+ new HTMLPurifier_Token_End('b'),
);
$expect[0] = $inputs[0];
// legal inline and block
// as the parent element is considered FLOW
$inputs[1] = array(
- new MF_StartTag('a', array('href' => 'http://www.example.com/')),
- new MF_Text('Linky'),
- new MF_EndTag('a'),
- new MF_StartTag('div'),
- new MF_Text('Block element'),
- new MF_EndTag('div'),
+ new HTMLPurifier_Token_Start('a', array('href' => 'http://www.example.com/')),
+ new HTMLPurifier_Token_Text('Linky'),
+ new HTMLPurifier_Token_End('a'),
+ new HTMLPurifier_Token_Start('div'),
+ new HTMLPurifier_Token_Text('Block element'),
+ new HTMLPurifier_Token_End('div'),
);
$expect[1] = $inputs[1];
// illegal block in inline, element -> text
$inputs[2] = array(
- new MF_StartTag('b'),
- new MF_StartTag('div'),
- new MF_Text('Illegal Div'),
- new MF_EndTag('div'),
- new MF_EndTag('b'),
+ new HTMLPurifier_Token_Start('b'),
+ new HTMLPurifier_Token_Start('div'),
+ new HTMLPurifier_Token_Text('Illegal Div'),
+ new HTMLPurifier_Token_End('div'),
+ new HTMLPurifier_Token_End('b'),
);
$expect[2] = array(
- new MF_StartTag('b'),
- new MF_Text(''),
- new MF_Text('Illegal Div'),
- new MF_Text('
'),
- new MF_EndTag('b'),
+ new HTMLPurifier_Token_Start('b'),
+ new HTMLPurifier_Token_Text(''),
+ new HTMLPurifier_Token_Text('Illegal Div'),
+ new HTMLPurifier_Token_Text('
'),
+ new HTMLPurifier_Token_End('b'),
);
// test of empty set that's required, resulting in removal of node
$inputs[3] = array(
- new MF_StartTag('ul'),
- new MF_EndTag('ul')
+ new HTMLPurifier_Token_Start('ul'),
+ new HTMLPurifier_Token_End('ul')
);
$expect[3] = array();
// test illegal text which gets removed
$inputs[4] = array(
- new MF_StartTag('ul'),
- new MF_Text('Illegal Text'),
- new MF_StartTag('li'),
- new MF_Text('Legal item'),
- new MF_EndTag('li'),
- new MF_EndTag('ul')
+ new HTMLPurifier_Token_Start('ul'),
+ new HTMLPurifier_Token_Text('Illegal Text'),
+ new HTMLPurifier_Token_Start('li'),
+ new HTMLPurifier_Token_Text('Legal item'),
+ new HTMLPurifier_Token_End('li'),
+ new HTMLPurifier_Token_End('ul')
);
$expect[4] = array(
- new MF_StartTag('ul'),
- new MF_StartTag('li'),
- new MF_Text('Legal item'),
- new MF_EndTag('li'),
- new MF_EndTag('ul')
+ new HTMLPurifier_Token_Start('ul'),
+ new HTMLPurifier_Token_Start('li'),
+ new HTMLPurifier_Token_Text('Legal item'),
+ new HTMLPurifier_Token_End('li'),
+ new HTMLPurifier_Token_End('ul')
);
foreach ($inputs as $i => $input) {