mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-06 06:07:26 +02:00
Begin getting parsing of character data into shape, not done yet.
git-svn-id: http://htmlpurifier.org/svnroot/html_purifier/trunk@60 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
@@ -25,6 +25,16 @@ class Test_HTML_Lexer extends UnitTestCase
|
||||
$this->assertIdentical(2, $HP->nextWhiteSpace("as\t\r\nasdf as"));
|
||||
}
|
||||
|
||||
function test_parseData() {
|
||||
$HP =& $this->HTML_Lexer;
|
||||
$this->assertIdentical('asdf', $HP->parseData('asdf'));
|
||||
$this->assertIdentical('&', $HP->parseData('&'));
|
||||
$this->assertIdentical('"', $HP->parseData('"'));
|
||||
$this->assertIdentical("'", $HP->parseData('''));
|
||||
$this->assertIdentical('-', $HP->parseData('-'));
|
||||
// UTF-8 needed!!!
|
||||
}
|
||||
|
||||
function test_tokenizeHTML() {
|
||||
|
||||
$input = array();
|
||||
@@ -114,8 +124,8 @@ class Test_HTML_Lexer extends UnitTestCase
|
||||
,new MF_Text('b')
|
||||
,new MF_Text('>')
|
||||
);
|
||||
// however, we may want to change both styles
|
||||
// into parsed: '<b>'. SAX has an option for this
|
||||
// note that SAX can clump text nodes together. We won't be
|
||||
// too picky though
|
||||
|
||||
// [INVALID]
|
||||
$input[10] = '<a "=>';
|
||||
@@ -123,6 +133,16 @@ class Test_HTML_Lexer extends UnitTestCase
|
||||
new MF_StartTag('a', array('"' => ''))
|
||||
);
|
||||
|
||||
// [INVALID] [RECOVERABLE]
|
||||
$input[11] = '"';
|
||||
$expect[11] = array( new MF_Text('"') );
|
||||
|
||||
// compare with this valid one:
|
||||
$input[12] = '"';
|
||||
$expect[12] = array( new MF_Text('"') );
|
||||
$sax_expect[12] = false;
|
||||
// SAX chokes on this? We do have entity parsing on, so it should work!
|
||||
|
||||
foreach($input as $i => $discard) {
|
||||
$result = $this->HTML_Lexer->tokenizeHTML($input[$i]);
|
||||
$this->assertEqual($expect[$i], $result);
|
||||
|
Reference in New Issue
Block a user