From 23dba8b55e83abb11a96fe4d490cf96e25efcb47 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Fri, 21 Jul 2006 11:27:54 +0000 Subject: [PATCH] Rename MarkupFragment.php to Token.php, change internal class names and rewire the classes. We also started adding more dependence on the Lexer and Generator in unrelated tests. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@63 48356398-32a2-884e-a903-53898d9a118a --- HTML_Generator.php | 8 +- HTML_Lexer.php | 30 +-- MarkupFragment.php | 65 ------ PureHTMLDefinition.php | 38 ++-- Token.php | 66 ++++++ tester.php | 6 +- tests/HTML_Generator.php | 22 +- tests/HTML_Lexer.php | 80 ++++---- tests/PureHTMLDefinition.php | 380 +++++++++++++++-------------------- 9 files changed, 323 insertions(+), 372 deletions(-) delete mode 100644 MarkupFragment.php create mode 100644 Token.php diff --git a/HTML_Generator.php b/HTML_Generator.php index 9726267d..4e41376c 100644 --- a/HTML_Generator.php +++ b/HTML_Generator.php @@ -12,18 +12,18 @@ class HTML_Generator } function generateFromToken($token) { - if (is_a($token, 'MF_StartTag')) { + if ($token->type == 'start') { $attr = $this->generateAttributes($token->attributes); return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>'; - } elseif (is_a($token, 'MF_EndTag')) { + } elseif ($token->type == 'end') { return 'name . '>'; - } elseif (is_a($token, 'MF_EmptyTag')) { + } elseif ($token->type == 'empty') { $attr = $this->generateAttributes($token->attributes); return '<' . $token->name . ($attr ? ' ' : '') . $attr . ' />'; - } elseif (is_a($token, 'MF_Text')) { + } elseif ($token->type == 'text') { return htmlentities($token->data, ENT_COMPAT, 'UTF-8'); } else { diff --git a/HTML_Lexer.php b/HTML_Lexer.php index 2a2bd710..1af3d5d0 100644 --- a/HTML_Lexer.php +++ b/HTML_Lexer.php @@ -121,7 +121,7 @@ class HTML_Lexer if (!$inside_tag && $position_next_lt !== false) { // We are not inside tag and there still is another tag to parse $array[] = new - MF_Text( + HTMLPurifier_Token_Text( html_entity_decode( substr( $string, $cursor, $position_next_lt - $cursor @@ -138,7 +138,7 @@ class HTML_Lexer if ($cursor === strlen($string)) break; // Create Text of rest of string $array[] = new - MF_Text( + HTMLPurifier_Token_Text( html_entity_decode( substr( $string, $cursor @@ -158,7 +158,7 @@ class HTML_Lexer substr($segment,strlen($segment)-2,2) == '--' ) { $array[] = new - MF_Comment( + HTMLPurifier_Token_Comment( substr( $segment, 3, strlen($segment) - 5 ) @@ -172,7 +172,7 @@ class HTML_Lexer $is_end_tag = (strpos($segment,'/') === 0); if ($is_end_tag) { $type = substr($segment, 1); - $array[] = new MF_EndTag($type); + $array[] = new HTMLPurifier_Token_End($type); $inside_tag = false; $cursor = $position_next_gt + 1; continue; @@ -191,9 +191,9 @@ class HTML_Lexer $position_first_space = $this->nextWhiteSpace($segment); if ($position_first_space === false) { if ($is_self_closing) { - $array[] = new MF_EmptyTag($segment); + $array[] = new HTMLPurifier_Token_Empty($segment); } else { - $array[] = new MF_StartTag($segment, array()); + $array[] = new HTMLPurifier_Token_Start($segment); } $inside_tag = false; $cursor = $position_next_gt + 1; @@ -210,16 +210,16 @@ class HTML_Lexer ); $attributes = $this->tokenizeAttributeString($attribute_string); if ($is_self_closing) { - $array[] = new MF_EmptyTag($type, $attributes); + $array[] = new HTMLPurifier_Token_Empty($type, $attributes); } else { - $array[] = new MF_StartTag($type, $attributes); + $array[] = new HTMLPurifier_Token_Start($type, $attributes); } $cursor = $position_next_gt + 1; $inside_tag = false; continue; } else { $array[] = new - MF_Text( + HTMLPurifier_Token_Text( '<' . html_entity_decode( substr($string, $cursor), @@ -362,9 +362,9 @@ class HTML_Lexer_Sax extends HTML_Lexer function openHandler(&$parser, $name, $attrs, $closed) { if ($closed) { - $this->tokens[] = new MF_EmptyTag($name, $attrs); + $this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs); } else { - $this->tokens[] = new MF_StartTag($name, $attrs); + $this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs); } return true; } @@ -373,21 +373,21 @@ class HTML_Lexer_Sax extends HTML_Lexer // HTMLSax3 seems to always send empty tags an extra close tag // check and ignore if you see it: // [TESTME] to make sure it doesn't overreach - if (is_a($this->tokens[count($this->tokens)-1], 'MF_EmptyTag')) { + if ($this->tokens[count($this->tokens)-1]->type == 'empty') { return true; } - $this->tokens[] = new MF_EndTag($name); + $this->tokens[] = new HTMLPurifier_Token_End($name); return true; } function dataHandler(&$parser, $data) { - $this->tokens[] = new MF_Text($data); + $this->tokens[] = new HTMLPurifier_Token_Text($data); return true; } function escapeHandler(&$parser, $data) { if (strpos($data, '-') === 0) { - $this->tokens[] = new MF_Comment($data); + $this->tokens[] = new HTMLPurifier_Token_Comment($data); } return true; } diff --git a/MarkupFragment.php b/MarkupFragment.php deleted file mode 100644 index 43bea19e..00000000 --- a/MarkupFragment.php +++ /dev/null @@ -1,65 +0,0 @@ -name = strtolower($name); // for some reason, the SAX parser - // uses uppercase. Investigate? - } -} - -class MF_TagWithAttributes extends MF_Tag // abstract -{ - var $attributes = array(); - function MF_TagWithAttributes($name, $attributes = array()) { - $this->MF_Tag($name); - $this->attributes = $attributes; - } -} - -class MF_StartTag extends MF_TagWithAttributes -{ - var $type = 'start'; -} - -class MF_EmptyTag extends MF_TagWithAttributes -{ - var $type = 'empty'; -} - -class MF_EndTag extends MF_Tag -{ - var $type = 'end'; -} - -class MF_Text extends MF -{ - var $name = '#PCDATA'; - var $type = 'text'; - var $data; - var $is_whitespace = false; - function MF_Text($data) { - $this->data = $data; - if (trim($data, " \n\r\t") === '') $this->is_whitespace = true; - } - function append($mf_text) { - return new MF_Text($this->data . $mf_text->data); - } -} - -class MF_Comment extends MF -{ - var $data; - var $type = 'comment'; - function MF_Comment($data) { - $this->data = $data; - } -} - -?> \ No newline at end of file diff --git a/PureHTMLDefinition.php b/PureHTMLDefinition.php index d9ae2f30..cb51bfe6 100644 --- a/PureHTMLDefinition.php +++ b/PureHTMLDefinition.php @@ -162,10 +162,10 @@ class PureHTMLDefinition if (empty($this->info)) $this->loadData(); $result = array(); foreach($tokens as $token) { - if (is_subclass_of($token, 'MF_Tag')) { + if (!empty( $token->is_tag )) { if (!isset($this->info[$token->name])) { // invalid tag, generate HTML and insert in - $token = new MF_Text( + $token = new HTMLPurifier_Token_Text( $this->generator->generateFromToken($token) ); } @@ -186,7 +186,7 @@ class PureHTMLDefinition $result = array(); $current_nesting = array(); foreach ($tokens as $token) { - if (!is_subclass_of($token, 'MF_Tag')) { + if (empty( $token->is_tag )) { $result[] = $token; continue; } @@ -196,7 +196,8 @@ class PureHTMLDefinition if ($info->child_def->type == 'empty' && $token->type == 'start' ) { - $result[] = new MF_EmptyTag($token->name, $token->attributes); + $result[] = new HTMLPurifier_Token_Empty($token->name, + $token->attributes); continue; } @@ -204,8 +205,9 @@ class PureHTMLDefinition if ($info->child_def->type != 'empty' && $token->type == 'empty' ) { - $result[] = new MF_StartTag($token->name, $token->attributes); - $result[] = new MF_EndTag($token->name); + $result[] = new HTMLPurifier_Token_Start($token->name, + $token->attributes); + $result[] = new HTMLPurifier_Token_End($token->name); continue; } @@ -228,7 +230,7 @@ class PureHTMLDefinition if ($current_parent->name == 'p' && isset($this->info_closes_p[$token->name]) ) { - $result[] = new MF_EndTag('p'); + $result[] = new HTMLPurifier_Token_End('p'); $result[] = $token; $current_nesting[] = $token; continue; @@ -238,7 +240,7 @@ class PureHTMLDefinition if ($current_parent->name == 'li' && $token->name == 'li' ) { - $result[] = new MF_EndTag('li'); + $result[] = new HTMLPurifier_Token_End('li'); $result[] = $token; $current_nesting[] = $token; continue; @@ -263,7 +265,7 @@ class PureHTMLDefinition // make sure that we have something open if (empty($current_nesting)) { - $result[] = new MF_Text( + $result[] = new HTMLPurifier_Token_Text( $this->generator->generateFromToken($token) ); continue; @@ -298,7 +300,7 @@ class PureHTMLDefinition // we still didn't find the tag, so translate to text if ($skipped_tags === false) { - $result[] = new MF_Text( + $result[] = new HTMLPurifier_Token_Text( $this->generator->generateFromToken($token) ); continue; @@ -308,7 +310,7 @@ class PureHTMLDefinition // note that skipped tags contains the element we need closed $size = count($skipped_tags); for ($i = $size - 1; $i >= 0; $i--) { - $result[] = new MF_EndTag($skipped_tags[$i]->name); + $result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name); } // done! @@ -320,7 +322,8 @@ class PureHTMLDefinition if (!empty($current_nesting)) { $size = count($current_nesting); for ($i = $size - 1; $i >= 0; $i--) { - $result[] = new MF_EndTag($current_nesting[$i]->name); + $result[] = + new HTMLPurifier_Token_End($current_nesting[$i]->name); } } @@ -331,8 +334,8 @@ class PureHTMLDefinition if (empty($this->info)) $this->loadData(); // insert implicit "parent" node, will be removed at end - array_unshift($tokens, new MF_StartTag('div')); - $tokens[] = new MF_EndTag('div'); + array_unshift($tokens, new HTMLPurifier_Token_Start('div')); + $tokens[] = new HTMLPurifier_Token_End('div'); for ($i = 0, $size = count($tokens) ; $i < $size; ) { @@ -553,7 +556,7 @@ class HTMLDTD_ChildDef_Required extends HTMLDTD_ChildDef_Simple if (!isset($this->elements[$token->name])) { $is_deleting = true; if ($pcdata_allowed) { - $result[] = new MF_Text( + $result[] = new HTMLPurifier_Token_Text( $this->gen->generateFromToken($token) ); } @@ -563,7 +566,10 @@ class HTMLDTD_ChildDef_Required extends HTMLDTD_ChildDef_Simple if (!$is_deleting) { $result[] = $token; } elseif ($pcdata_allowed) { - $result[] = new MF_Text($this->gen->generateFromToken($token)); + $result[] = + new HTMLPurifier_Token_Text( + $this->gen->generateFromToken( $token ) + ); } else { // drop silently } diff --git a/Token.php b/Token.php new file mode 100644 index 00000000..1ca8a1db --- /dev/null +++ b/Token.php @@ -0,0 +1,66 @@ +name = strtolower($name); // for some reason, the SAX parser + // uses uppercase. Investigate? + } +} + +// a rich tag has attributes +class HTMLPurifier_Token_RichTag extends HTMLPurifier_Token_Tag // abstract +{ + var $attributes = array(); + function HTMLPurifier_Token_RichTag($name, $attributes = array()) { + $this->HTMLPurifier_Token_Tag($name); + $this->attributes = $attributes; + } +} + +class HTMLPurifier_Token_Start extends HTMLPurifier_Token_RichTag +{ + var $type = 'start'; +} + +class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_RichTag +{ + var $type = 'empty'; +} + +class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag +{ + var $type = 'end'; +} + +class HTMLPurifier_Token_Text extends HTMLPurifier_Token +{ + var $name = '#PCDATA'; + var $type = 'text'; + var $data; + var $is_whitespace = false; + function HTMLPurifier_Token_Text($data) { + $this->data = $data; + if (trim($data, " \n\r\t") === '') $this->is_whitespace = true; + } + function append($text) { + return new HTMLPurifier_Token_Text($this->data . $text->data); + } +} + +class HTMLPurifier_Token_Comment extends HTMLPurifier_Token +{ + var $data; + var $type = 'comment'; + function HTMLPurifier_Token_Comment($data) { + $this->data = $data; + } +} + +?> \ No newline at end of file diff --git a/tester.php b/tester.php index 552c15e2..dfaa5916 100644 --- a/tester.php +++ b/tester.php @@ -6,7 +6,7 @@ require_once 'XML/HTMLSax3.php'; // optional PEAR class require_once 'HTML_Purifier.php'; require_once 'HTML_Lexer.php'; -require_once 'MarkupFragment.php'; +require_once 'Token.php'; require_once 'PureHTMLDefinition.php'; require_once 'HTML_Generator.php'; @@ -15,11 +15,11 @@ $test = new GroupTest('HTML_Purifier'); chdir('tests/'); $test->addTestFile('HTML_Purifier.php'); $test->addTestFile('HTML_Lexer.php'); -//$test->addTestFile('MarkupFragment.php'); +//$test->addTestFile('Token.php'); $test->addTestFile('PureHTMLDefinition.php'); $test->addTestFile('HTML_Generator.php'); chdir('../'); -$test->run(new HtmlReporter()); +$test->run( new HtmlReporter() ); ?> \ No newline at end of file diff --git a/tests/HTML_Generator.php b/tests/HTML_Generator.php index f867ea22..d7e3da55 100644 --- a/tests/HTML_Generator.php +++ b/tests/HTML_Generator.php @@ -15,22 +15,26 @@ class Test_HTML_Generator extends UnitTestCase $inputs = array(); $expect = array(); - $inputs[0] = new MF_Text('Foobar.<>'); + $inputs[0] = new HTMLPurifier_Token_Text('Foobar.<>'); $expect[0] = 'Foobar.<>'; - $inputs[1] = new MF_StartTag('a', array('href' => 'dyn?a=foo&b=bar')); + $inputs[1] = new HTMLPurifier_Token_Start('a', + array('href' => 'dyn?a=foo&b=bar') + ); $expect[1] = ''; - $inputs[2] = new MF_EndTag('b'); + $inputs[2] = new HTMLPurifier_Token_End('b'); $expect[2] = ''; - $inputs[3] = new MF_EmptyTag('br', array('style' => 'font-family:"Courier New";')); + $inputs[3] = new HTMLPurifier_Token_Empty('br', + array('style' => 'font-family:"Courier New";') + ); $expect[3] = '
'; - $inputs[4] = new MF_StartTag('asdf'); + $inputs[4] = new HTMLPurifier_Token_Start('asdf'); $expect[4] = ''; - $inputs[5] = new MF_EmptyTag('br'); + $inputs[5] = new HTMLPurifier_Token_Empty('br'); $expect[5] = '
'; foreach ($inputs as $i => $input) { @@ -69,9 +73,9 @@ class Test_HTML_Generator extends UnitTestCase function test_generateFromTokens() { $tokens = array( - new MF_StartTag('b'), - new MF_Text('Foobar!'), - new MF_EndTag('b') + new HTMLPurifier_Token_Start('b'), + new HTMLPurifier_Token_Text('Foobar!'), + new HTMLPurifier_Token_End('b') ); $expect = 'Foobar!'; $this->assertEqual($expect, $this->gen->generateFromTokens($tokens)); diff --git a/tests/HTML_Lexer.php b/tests/HTML_Lexer.php index 2cef97d3..afa75fa1 100644 --- a/tests/HTML_Lexer.php +++ b/tests/HTML_Lexer.php @@ -46,83 +46,83 @@ class Test_HTML_Lexer extends UnitTestCase $input[1] = 'This is regular text.'; $expect[1] = array( - new MF_Text('This is regular text.') + new HTMLPurifier_Token_Text('This is regular text.') ); $input[2] = 'This is bold text'; $expect[2] = array( - new MF_Text('This is ') - ,new MF_StartTag('b', array()) - ,new MF_Text('bold') - ,new MF_EndTag('b') - ,new MF_Text(' text') + new HTMLPurifier_Token_Text('This is ') + ,new HTMLPurifier_Token_Start('b', array()) + ,new HTMLPurifier_Token_Text('bold') + ,new HTMLPurifier_Token_End('b') + ,new HTMLPurifier_Token_Text(' text') ); $input[3] = '
Totally rad dude. asdf
'; $expect[3] = array( - new MF_StartTag('DIV', array()) - ,new MF_Text('Totally rad dude. ') - ,new MF_StartTag('b', array()) - ,new MF_Text('asdf') - ,new MF_EndTag('b') - ,new MF_EndTag('div') + new HTMLPurifier_Token_Start('DIV', array()) + ,new HTMLPurifier_Token_Text('Totally rad dude. ') + ,new HTMLPurifier_Token_Start('b', array()) + ,new HTMLPurifier_Token_Text('asdf') + ,new HTMLPurifier_Token_End('b') + ,new HTMLPurifier_Token_End('div') ); $input[4] = '
'; $expect[4] = array( - new MF_StartTag('asdf') - ,new MF_EndTag('asdf') - ,new MF_StartTag('d') - ,new MF_EndTag('d') - ,new MF_StartTag('poOloka') - ,new MF_StartTag('poolasdf') - ,new MF_StartTag('ds') - ,new MF_EndTag('asdf') - ,new MF_EndTag('ASDF') + new HTMLPurifier_Token_Start('asdf') + ,new HTMLPurifier_Token_End('asdf') + ,new HTMLPurifier_Token_Start('d') + ,new HTMLPurifier_Token_End('d') + ,new HTMLPurifier_Token_Start('poOloka') + ,new HTMLPurifier_Token_Start('poolasdf') + ,new HTMLPurifier_Token_Start('ds') + ,new HTMLPurifier_Token_End('asdf') + ,new HTMLPurifier_Token_End('ASDF') ); $input[5] = 'Link to foobar
'; $expect[5] = array( - new MF_StartTag('a',array('href'=>'foobar.php','title'=>'foo!')) - ,new MF_Text('Link to ') - ,new MF_StartTag('b',array('id'=>'asdf')) - ,new MF_Text('foobar') - ,new MF_EndTag('b') - ,new MF_EndTag('a') + new HTMLPurifier_Token_Start('a',array('href'=>'foobar.php','title'=>'foo!')) + ,new HTMLPurifier_Token_Text('Link to ') + ,new HTMLPurifier_Token_Start('b',array('id'=>'asdf')) + ,new HTMLPurifier_Token_Text('foobar') + ,new HTMLPurifier_Token_End('b') + ,new HTMLPurifier_Token_End('a') ); $input[6] = '
'; $expect[6] = array( - new MF_EmptyTag('br') + new HTMLPurifier_Token_Empty('br') ); // [INVALID] [RECOVERABLE] $input[7] = ' '; $expect[7] = array( - new MF_Comment(' Comment ') - ,new MF_Text(' ') - ,new MF_Comment(' not so well formed -') + new HTMLPurifier_Token_Comment(' Comment ') + ,new HTMLPurifier_Token_Text(' ') + ,new HTMLPurifier_Token_Comment(' not so well formed -') ); $sax_expect[7] = false; // we need to figure out proper comment output // [INVALID] $input[8] = ''')) + new HTMLPurifier_Token_Start('a', array('href'=>'')) ); $input[9] = '<b>'; $expect[9] = array( - new MF_Text('') + new HTMLPurifier_Token_Text('') ); $sax_expect[9] = array( - new MF_Text('<') - ,new MF_Text('b') - ,new MF_Text('>') + new HTMLPurifier_Token_Text('<') + ,new HTMLPurifier_Token_Text('b') + ,new HTMLPurifier_Token_Text('>') ); // note that SAX can clump text nodes together. We won't be // too picky though @@ -130,16 +130,16 @@ class Test_HTML_Lexer extends UnitTestCase // [INVALID] $input[10] = ''; $expect[10] = array( - new MF_StartTag('a', array('"' => '')) + new HTMLPurifier_Token_Start('a', array('"' => '')) ); // [INVALID] [RECOVERABLE] $input[11] = '"'; - $expect[11] = array( new MF_Text('"') ); + $expect[11] = array( new HTMLPurifier_Token_Text('"') ); // compare with this valid one: $input[12] = '"'; - $expect[12] = array( new MF_Text('"') ); + $expect[12] = array( new HTMLPurifier_Token_Text('"') ); $sax_expect[12] = false; // SAX chokes on this? We do have entity parsing on, so it should work! diff --git a/tests/PureHTMLDefinition.php b/tests/PureHTMLDefinition.php index 5e9c6511..d9b58a10 100644 --- a/tests/PureHTMLDefinition.php +++ b/tests/PureHTMLDefinition.php @@ -3,14 +3,25 @@ class Test_HTMLDTD_ChildDef extends UnitTestCase { + var $lex; + var $gen; + + function Test_HTMLDTD_ChildDef() { + $this->lex = new HTML_Lexer(); + $this->gen = new HTML_Generator(); + parent::UnitTestCase(); + } + function assertSeries($inputs, $expect, $def) { foreach ($inputs as $i => $input) { - $result = $def->validateChildren($input); + $tokens = $this->lex->tokenizeHTML($input); + $result = $def->validateChildren($tokens); if (is_bool($expect[$i])) { $this->assertIdentical($expect[$i], $result); } else { - $this->assertEqual($expect[$i], $result); - paintIf($result, $result != $expect[$i]); + $result_html = $this->gen->generateFromTokens($result); + $this->assertEqual($expect[$i], $result_html); + paintIf($result_html, $result_html != $expect[$i]); } } } @@ -21,32 +32,20 @@ class Test_HTMLDTD_ChildDef extends UnitTestCase $def = new HTMLDTD_ChildDef( '(caption?, (col*|colgroup*), thead?, tfoot?, (tbody+|tr+))'); - $inputs[0] = array(); + $inputs[0] = ''; $expect[0] = false; // we really don't care what's inside, because if it turns out // this tr is illegal, we'll end up re-evaluating the parent node // anyway. - $inputs[1] = array( - new MF_StartTag('tr') ,new MF_EndTag('tr') - ); + $inputs[1] = ''; $expect[1] = true; - $inputs[2] = array( - new MF_StartTag('caption') ,new MF_EndTag('caption') - ,new MF_StartTag('col') ,new MF_EndTag('col') - ,new MF_StartTag('thead') ,new MF_EndTag('thead') - ,new MF_StartTag('tfoot') ,new MF_EndTag('tfoot') - ,new MF_StartTag('tbody') ,new MF_EndTag('tbody') - ); + $inputs[2] = '' . + ''; $expect[2] = true; - $inputs[3] = array( - new MF_StartTag('col') ,new MF_EndTag('col') - ,new MF_StartTag('col') ,new MF_EndTag('col') - ,new MF_StartTag('col') ,new MF_EndTag('col') - ,new MF_StartTag('tr') ,new MF_EndTag('tr') - ); + $inputs[3] = ''; $expect[3] = true; $this->assertSeries($inputs, $expect, $def); @@ -81,63 +80,22 @@ class Test_HTMLDTD_ChildDef extends UnitTestCase $inputs[0] = array(); $expect[0] = false; - $inputs[1] = array( - new MF_StartTag('dt') - ,new MF_Text('Term') - ,new MF_EndTag('dt') - - ,new MF_Text('Text in an illegal location') - - ,new MF_StartTag('dd') - ,new MF_Text('Definition') - ,new MF_EndTag('dd') - - ,new MF_StartTag('b') // test tag removal too - ,new MF_EndTag('b') - ); - $expect[1] = array( - new MF_StartTag('dt') - ,new MF_Text('Term') - ,new MF_EndTag('dt') - - ,new MF_StartTag('dd') - ,new MF_Text('Definition') - ,new MF_EndTag('dd') - ); + $inputs[1] = '
Term
Text in an illegal location'. + '
Definition
Illegal tag'; - $inputs[2] = array(new MF_Text('How do you do!')); + $expect[1] = '
Term
Definition
'; + + $inputs[2] = 'How do you do!'; $expect[2] = false; // whitespace shouldn't trigger it - $inputs[3] = array( - new MF_Text("\n") - ,new MF_StartTag('dd') - ,new MF_Text('Definition') - ,new MF_EndTag('dd') - ,new MF_Text(' ') - ); + $inputs[3] = "\n
Definition
"; $expect[3] = true; - $inputs[4] = array( - new MF_StartTag('dd') - ,new MF_Text('Definition') - ,new MF_EndTag('dd') - ,new MF_Text(' ') - ,new MF_StartTag('b') - ,new MF_EndTag('b') - ,new MF_Text(' ') - ); - $expect[4] = array( - new MF_StartTag('dd') - ,new MF_Text('Definition') - ,new MF_EndTag('dd') - ,new MF_Text(' ') - ,new MF_Text(' ') - ); - $inputs[5] = array( - new MF_Text(' ') - ,new MF_Text("\t") - ); + $inputs[4] ='
Definition
'; + $expect[4] = '
Definition
'; + + $inputs[5] = "\t "; $expect[5] = false; $this->assertSeries($inputs, $expect, $def); @@ -146,41 +104,23 @@ class Test_HTMLDTD_ChildDef extends UnitTestCase function test_required_pcdata_allowed() { $def = new HTMLDTD_ChildDef_Required('#PCDATA | b'); - $input = array( - new MF_StartTag('b') - ,new MF_Text('Bold text') - ,new MF_EndTag('b') - ,new MF_EmptyTag('img') // illegal tag - ); - $expect = array( - new MF_StartTag('b') - ,new MF_Text('Bold text') - ,new MF_EndTag('b') - ,new MF_Text('') - ); - $this->assertEqual($expect, $def->validateChildren($input)); + + $inputs[0] = 'Bold text'; + $expect[0] = 'Bold text<img />'; + + $this->assertSeries($inputs, $expect, $def); } function test_optional() { $def = new HTMLDTD_ChildDef_Optional('b | i'); - $input = array( - new MF_StartTag('b') - ,new MF_Text('Bold text') - ,new MF_EndTag('b') - ,new MF_EmptyTag('img') // illegal tag - ); - $expect = array( - new MF_StartTag('b') - ,new MF_Text('Bold text') - ,new MF_EndTag('b') - ); - $this->assertEqual($expect, $def->validateChildren($input)); - $input = array( - new MF_Text('Not allowed text') - ); - $expect = array(); - $this->assertEqual($expect, $def->validateChildren($input)); + $inputs[0] = 'Bold text'; + $expect[0] = 'Bold text'; + + $inputs[1] = 'Not allowed text'; + $expect[1] = ''; + + $this->assertSeries($inputs, $expect, $def); } } @@ -188,13 +128,13 @@ class Test_HTMLDTD_ChildDef extends UnitTestCase class Test_PureHTMLDefinition extends UnitTestCase { - var $def, $lexer; + var $def, $lex; function Test_PureHTMLDefinition() { $this->UnitTestCase(); $this->def = new PureHTMLDefinition(); $this->def->loadData(); - $this->lexer = new HTML_Lexer(); + $this->lex = new HTML_Lexer(); } function test_removeForeignElements() { @@ -206,35 +146,35 @@ class Test_PureHTMLDefinition extends UnitTestCase $expect[0] = $inputs[0]; $inputs[1] = array( - new MF_Text('This is ') - ,new MF_StartTag('b', array()) - ,new MF_Text('bold') - ,new MF_EndTag('b') - ,new MF_Text(' text') + new HTMLPurifier_Token_Text('This is ') + ,new HTMLPurifier_Token_Start('b', array()) + ,new HTMLPurifier_Token_Text('bold') + ,new HTMLPurifier_Token_End('b') + ,new HTMLPurifier_Token_Text(' text') ); $expect[1] = $inputs[1]; $inputs[2] = array( - new MF_StartTag('asdf') - ,new MF_EndTag('asdf') - ,new MF_StartTag('d', array('href' => 'bang!')) - ,new MF_EndTag('d') - ,new MF_StartTag('pooloka') - ,new MF_StartTag('poolasdf') - ,new MF_StartTag('ds', array('moogle' => '&')) - ,new MF_EndTag('asdf') - ,new MF_EndTag('asdf') + new HTMLPurifier_Token_Start('asdf') + ,new HTMLPurifier_Token_End('asdf') + ,new HTMLPurifier_Token_Start('d', array('href' => 'bang!')) + ,new HTMLPurifier_Token_End('d') + ,new HTMLPurifier_Token_Start('pooloka') + ,new HTMLPurifier_Token_Start('poolasdf') + ,new HTMLPurifier_Token_Start('ds', array('moogle' => '&')) + ,new HTMLPurifier_Token_End('asdf') + ,new HTMLPurifier_Token_End('asdf') ); $expect[2] = array( - new MF_Text('') - ,new MF_Text('') - ,new MF_Text('') - ,new MF_Text('') - ,new MF_Text('') - ,new MF_Text('') - ,new MF_Text('') - ,new MF_Text('') - ,new MF_Text('') + new HTMLPurifier_Token_Text('') + ,new HTMLPurifier_Token_Text('') + ,new HTMLPurifier_Token_Text('') + ,new HTMLPurifier_Token_Text('') + ,new HTMLPurifier_Token_Text('') + ,new HTMLPurifier_Token_Text('') + ,new HTMLPurifier_Token_Text('') + ,new HTMLPurifier_Token_Text('') + ,new HTMLPurifier_Token_Text('') ); foreach ($inputs as $i => $input) { @@ -254,113 +194,113 @@ class Test_PureHTMLDefinition extends UnitTestCase $expect[0] = $inputs[0]; $inputs[1] = array( - new MF_Text('This is ') - ,new MF_StartTag('b') - ,new MF_Text('bold') - ,new MF_EndTag('b') - ,new MF_Text(' text') - ,new MF_EmptyTag('br') + new HTMLPurifier_Token_Text('This is ') + ,new HTMLPurifier_Token_Start('b') + ,new HTMLPurifier_Token_Text('bold') + ,new HTMLPurifier_Token_End('b') + ,new HTMLPurifier_Token_Text(' text') + ,new HTMLPurifier_Token_Empty('br') ); $expect[1] = $inputs[1]; $inputs[2] = array( - new MF_StartTag('b') - ,new MF_Text('Unclosed tag, gasp!') + new HTMLPurifier_Token_Start('b') + ,new HTMLPurifier_Token_Text('Unclosed tag, gasp!') ); $expect[2] = array( - new MF_StartTag('b') - ,new MF_Text('Unclosed tag, gasp!') - ,new MF_EndTag('b') + new HTMLPurifier_Token_Start('b') + ,new HTMLPurifier_Token_Text('Unclosed tag, gasp!') + ,new HTMLPurifier_Token_End('b') ); $inputs[3] = array( - new MF_StartTag('b') - ,new MF_StartTag('i') - ,new MF_Text('The b is closed, but the i is not') - ,new MF_EndTag('b') + new HTMLPurifier_Token_Start('b') + ,new HTMLPurifier_Token_Start('i') + ,new HTMLPurifier_Token_Text('The b is closed, but the i is not') + ,new HTMLPurifier_Token_End('b') ); $expect[3] = array( - new MF_StartTag('b') - ,new MF_StartTag('i') - ,new MF_Text('The b is closed, but the i is not') - ,new MF_EndTag('i') - ,new MF_EndTag('b') + new HTMLPurifier_Token_Start('b') + ,new HTMLPurifier_Token_Start('i') + ,new HTMLPurifier_Token_Text('The b is closed, but the i is not') + ,new HTMLPurifier_Token_End('i') + ,new HTMLPurifier_Token_End('b') ); $inputs[4] = array( - new MF_Text('Hey, recycle unused end tags!') - ,new MF_EndTag('b') + new HTMLPurifier_Token_Text('Hey, recycle unused end tags!') + ,new HTMLPurifier_Token_End('b') ); $expect[4] = array( - new MF_Text('Hey, recycle unused end tags!') - ,new MF_Text('
') + new HTMLPurifier_Token_Text('Hey, recycle unused end tags!') + ,new HTMLPurifier_Token_Text('
') ); - $inputs[5] = array(new MF_StartTag('br', array('style' => 'clear:both;'))); - $expect[5] = array(new MF_EmptyTag('br', array('style' => 'clear:both;'))); + $inputs[5] = array(new HTMLPurifier_Token_Start('br', array('style' => 'clear:both;'))); + $expect[5] = array(new HTMLPurifier_Token_Empty('br', array('style' => 'clear:both;'))); - $inputs[6] = array(new MF_EmptyTag('div', array('style' => 'clear:both;'))); + $inputs[6] = array(new HTMLPurifier_Token_Empty('div', array('style' => 'clear:both;'))); $expect[6] = array( - new MF_StartTag('div', array('style' => 'clear:both;')) - ,new MF_EndTag('div') + new HTMLPurifier_Token_Start('div', array('style' => 'clear:both;')) + ,new HTMLPurifier_Token_End('div') ); // test automatic paragraph closing $inputs[7] = array( - new MF_StartTag('p') - ,new MF_Text('Paragraph 1') - ,new MF_StartTag('p') - ,new MF_Text('Paragraph 2') + new HTMLPurifier_Token_Start('p') + ,new HTMLPurifier_Token_Text('Paragraph 1') + ,new HTMLPurifier_Token_Start('p') + ,new HTMLPurifier_Token_Text('Paragraph 2') ); $expect[7] = array( - new MF_StartTag('p') - ,new MF_Text('Paragraph 1') - ,new MF_EndTag('p') - ,new MF_StartTag('p') - ,new MF_Text('Paragraph 2') - ,new MF_EndTag('p') + new HTMLPurifier_Token_Start('p') + ,new HTMLPurifier_Token_Text('Paragraph 1') + ,new HTMLPurifier_Token_End('p') + ,new HTMLPurifier_Token_Start('p') + ,new HTMLPurifier_Token_Text('Paragraph 2') + ,new HTMLPurifier_Token_End('p') ); $inputs[8] = array( - new MF_StartTag('div') - ,new MF_StartTag('p') - ,new MF_Text('Paragraph 1 in a div') - ,new MF_EndTag('div') + new HTMLPurifier_Token_Start('div') + ,new HTMLPurifier_Token_Start('p') + ,new HTMLPurifier_Token_Text('Paragraph 1 in a div') + ,new HTMLPurifier_Token_End('div') ); $expect[8] = array( - new MF_StartTag('div') - ,new MF_StartTag('p') - ,new MF_Text('Paragraph 1 in a div') - ,new MF_EndTag('p') - ,new MF_EndTag('div') + new HTMLPurifier_Token_Start('div') + ,new HTMLPurifier_Token_Start('p') + ,new HTMLPurifier_Token_Text('Paragraph 1 in a div') + ,new HTMLPurifier_Token_End('p') + ,new HTMLPurifier_Token_End('div') ); // automatic list closing $inputs[9] = array( - new MF_StartTag('ol') + new HTMLPurifier_Token_Start('ol') - ,new MF_StartTag('li') - ,new MF_Text('Item 1') + ,new HTMLPurifier_Token_Start('li') + ,new HTMLPurifier_Token_Text('Item 1') - ,new MF_StartTag('li') - ,new MF_Text('Item 2') + ,new HTMLPurifier_Token_Start('li') + ,new HTMLPurifier_Token_Text('Item 2') - ,new MF_EndTag('ol') + ,new HTMLPurifier_Token_End('ol') ); $expect[9] = array( - new MF_StartTag('ol') + new HTMLPurifier_Token_Start('ol') - ,new MF_StartTag('li') - ,new MF_Text('Item 1') - ,new MF_EndTag('li') + ,new HTMLPurifier_Token_Start('li') + ,new HTMLPurifier_Token_Text('Item 1') + ,new HTMLPurifier_Token_End('li') - ,new MF_StartTag('li') - ,new MF_Text('Item 2') - ,new MF_EndTag('li') + ,new HTMLPurifier_Token_Start('li') + ,new HTMLPurifier_Token_Text('Item 2') + ,new HTMLPurifier_Token_End('li') - ,new MF_EndTag('ol') + ,new HTMLPurifier_Token_End('ol') ); foreach ($inputs as $i => $input) { @@ -379,62 +319,62 @@ class Test_PureHTMLDefinition extends UnitTestCase // legal inline nesting $inputs[0] = array( - new MF_StartTag('b'), - new MF_Text('Bold text'), - new MF_EndTag('b'), + new HTMLPurifier_Token_Start('b'), + new HTMLPurifier_Token_Text('Bold text'), + new HTMLPurifier_Token_End('b'), ); $expect[0] = $inputs[0]; // legal inline and block // as the parent element is considered FLOW $inputs[1] = array( - new MF_StartTag('a', array('href' => 'http://www.example.com/')), - new MF_Text('Linky'), - new MF_EndTag('a'), - new MF_StartTag('div'), - new MF_Text('Block element'), - new MF_EndTag('div'), + new HTMLPurifier_Token_Start('a', array('href' => 'http://www.example.com/')), + new HTMLPurifier_Token_Text('Linky'), + new HTMLPurifier_Token_End('a'), + new HTMLPurifier_Token_Start('div'), + new HTMLPurifier_Token_Text('Block element'), + new HTMLPurifier_Token_End('div'), ); $expect[1] = $inputs[1]; // illegal block in inline, element -> text $inputs[2] = array( - new MF_StartTag('b'), - new MF_StartTag('div'), - new MF_Text('Illegal Div'), - new MF_EndTag('div'), - new MF_EndTag('b'), + new HTMLPurifier_Token_Start('b'), + new HTMLPurifier_Token_Start('div'), + new HTMLPurifier_Token_Text('Illegal Div'), + new HTMLPurifier_Token_End('div'), + new HTMLPurifier_Token_End('b'), ); $expect[2] = array( - new MF_StartTag('b'), - new MF_Text('
'), - new MF_Text('Illegal Div'), - new MF_Text('
'), - new MF_EndTag('b'), + new HTMLPurifier_Token_Start('b'), + new HTMLPurifier_Token_Text('
'), + new HTMLPurifier_Token_Text('Illegal Div'), + new HTMLPurifier_Token_Text('
'), + new HTMLPurifier_Token_End('b'), ); // test of empty set that's required, resulting in removal of node $inputs[3] = array( - new MF_StartTag('ul'), - new MF_EndTag('ul') + new HTMLPurifier_Token_Start('ul'), + new HTMLPurifier_Token_End('ul') ); $expect[3] = array(); // test illegal text which gets removed $inputs[4] = array( - new MF_StartTag('ul'), - new MF_Text('Illegal Text'), - new MF_StartTag('li'), - new MF_Text('Legal item'), - new MF_EndTag('li'), - new MF_EndTag('ul') + new HTMLPurifier_Token_Start('ul'), + new HTMLPurifier_Token_Text('Illegal Text'), + new HTMLPurifier_Token_Start('li'), + new HTMLPurifier_Token_Text('Legal item'), + new HTMLPurifier_Token_End('li'), + new HTMLPurifier_Token_End('ul') ); $expect[4] = array( - new MF_StartTag('ul'), - new MF_StartTag('li'), - new MF_Text('Legal item'), - new MF_EndTag('li'), - new MF_EndTag('ul') + new HTMLPurifier_Token_Start('ul'), + new HTMLPurifier_Token_Start('li'), + new HTMLPurifier_Token_Text('Legal item'), + new HTMLPurifier_Token_End('li'), + new HTMLPurifier_Token_End('ul') ); foreach ($inputs as $i => $input) {