1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-06 06:07:26 +02:00

Begin getting parsing of character data into shape, not done yet.

git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@60 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2006-07-20 02:43:06 +00:00
parent dadfa87acc
commit 3427c6c079
2 changed files with 74 additions and 6 deletions

View File

@@ -25,6 +25,16 @@ class Test_HTML_Lexer extends UnitTestCase
$this->assertIdentical(2, $HP->nextWhiteSpace("as\t\r\nasdf as"));
}
function test_parseData() {
$HP =& $this->HTML_Lexer;
$this->assertIdentical('asdf', $HP->parseData('asdf'));
$this->assertIdentical('&', $HP->parseData('&'));
$this->assertIdentical('"', $HP->parseData('"'));
$this->assertIdentical("'", $HP->parseData('''));
$this->assertIdentical('-', $HP->parseData('-'));
// UTF-8 needed!!!
}
function test_tokenizeHTML() {
$input = array();
@@ -114,8 +124,8 @@ class Test_HTML_Lexer extends UnitTestCase
,new MF_Text('b')
,new MF_Text('>')
);
// however, we may want to change both styles
// into parsed: '<b>'. SAX has an option for this
// note that SAX can clump text nodes together. We won't be
// too picky though
// [INVALID]
$input[10] = '<a "=>';
@@ -123,6 +133,16 @@ class Test_HTML_Lexer extends UnitTestCase
new MF_StartTag('a', array('"' => ''))
);
// [INVALID] [RECOVERABLE]
$input[11] = '"';
$expect[11] = array( new MF_Text('"') );
// compare with this valid one:
$input[12] = '&quot;';
$expect[12] = array( new MF_Text('"') );
$sax_expect[12] = false;
// SAX chokes on this? We do have entity parsing on, so it should work!
foreach($input as $i => $discard) {
$result = $this->HTML_Lexer->tokenizeHTML($input[$i]);
$this->assertEqual($expect[$i], $result);