1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-07-31 03:10:09 +02:00

- Implemented special entity conversion.

- Optimized and documented DirectLex.
- Rearranged test cases.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@100 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2006-07-23 18:13:04 +00:00
parent dc24215874
commit 2fa1161d3d
2 changed files with 226 additions and 112 deletions

View File

@@ -11,66 +11,76 @@ class HTMLPurifier_Lexer_DirectLexTest extends UnitTestCase
$this->DirectLex = new HTMLPurifier_Lexer_DirectLex();
}
function test_nextWhiteSpace() {
$HP =& $this->DirectLex;
$this->assertIdentical(false, $HP->nextWhiteSpace('asdf'));
$this->assertIdentical(0, $HP->nextWhiteSpace(' asdf'));
$this->assertIdentical(0, $HP->nextWhiteSpace("\nasdf"));
$this->assertIdentical(1, $HP->nextWhiteSpace("a\tsdf"));
$this->assertIdentical(4, $HP->nextWhiteSpace("asdf\r"));
$this->assertIdentical(2, $HP->nextWhiteSpace("as\t\r\nasdf as"));
$this->assertIdentical(3, $HP->nextWhiteSpace('a a ', 2));
}
function test_parseData() {
$HP =& $this->DirectLex;
$this->assertIdentical('asdf', $HP->parseData('asdf'));
$this->assertIdentical('&', $HP->parseData('&'));
$this->assertIdentical('"', $HP->parseData('"'));
$this->assertIdentical("'", $HP->parseData('''));
$this->assertIdentical('-', $HP->parseData('-'));
// UTF-8 needed!!!
$this->assertIdentical("'", $HP->parseData('''));
$this->assertIdentical('&&&', $HP->parseData('&&&'));
$this->assertIdentical('&&', $HP->parseData('&&')); // [INVALID]
$this->assertIdentical('Procter & Gamble',
$HP->parseData('Procter & Gamble')); // [INVALID]
// This is not special, thus not converted. Test of fault tolerance,
// realistically speaking, this should never happen
$this->assertIdentical('-', $HP->parseData('-'));
}
function test_specialEntityCallback() {
$HP =& $this->DirectLex;
$this->assertIdentical("'",$HP->specialEntityCallback(
array(''', null, '39', null) ));
}
// internals testing
function test_tokenizeAttributeString() {
function test_parseAttributeString() {
$input[0] = 'href="asdf" boom="assdf"';
$expect[0] = array('href'=>'asdf', 'boom'=>'assdf');
$input[0] = 'href="about:blank" rel="nofollow"';
$expect[0] = array('href'=>'about:blank', 'rel'=>'nofollow');
$input[1] = "href='r'";
$expect[1] = array('href'=>'r');
$input[1] = "href='about:blank'";
$expect[1] = array('href'=>'about:blank');
// note that the single quotes aren't /really/ escaped
$input[2] = 'onclick="javascript:alert(\'asdf\');"';
$expect[2] = array('onclick' => "javascript:alert('asdf');");
$input[3] = 'selected';
$expect[3] = array('selected'=>'selected');
$input[4] = '="asdf"';
// [INVALID]
$input[4] = '="nokey"';
$expect[4] = array();
$input[5] = 'missile=launch';
$expect[5] = array('missile' => 'launch');
// [SIMPLE]
$input[5] = 'color=blue';
$expect[5] = array('color' => 'blue');
$input[6] = 'href="foo';
$expect[6] = array('href' => 'foo');
// [INVALID]
$input[6] = 'href="about:blank';
$expect[6] = array('href' => 'about:blank');
// [INVALID]
$input[7] = '"=';
$expect[7] = array('"' => '');
// we ought to get array()
$input[8] = 'href ="about:blank"rel ="nofollow"';
$expect[8] = array('href' => 'about:blank', 'rel' => 'nofollow');
$input[9] = 'foo bar';
$expect[9] = array('foo' => 'foo', 'bar' => 'bar');
$input[9] = 'two bool';
$expect[9] = array('two' => 'two', 'bool' => 'bool');
$input[10] = 'foo="bar" blue';
$expect[10] = array('foo' => 'bar', 'blue' => 'blue');
$input[10] = 'name="input" selected';
$expect[10] = array('name' => 'input', 'selected' => 'selected');
$size = count($input);
for($i = 0; $i < $size; $i++) {
$result = $this->DirectLex->tokenizeAttributeString($input[$i]);
$result = $this->DirectLex->parseAttributeString($input[$i]);
$this->assertEqual($expect[$i], $result, 'Test ' . $i . ': %s');
paintIf($result, $expect[$i] != $result);
}