mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-07-30 19:00:10 +02:00
Fix extant broken PEARSax3 parsing patterns.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
This commit is contained in:
@@ -172,7 +172,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
|
||||
}
|
||||
if ($t_expect != $result) {
|
||||
printTokens($result);
|
||||
//var_dump($result);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -270,20 +269,14 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
|
||||
function test_tokenizeHTML_comment() {
|
||||
$this->assertTokenization(
|
||||
'<!-- Comment -->',
|
||||
array( new HTMLPurifier_Token_Comment(' Comment ') ),
|
||||
array(
|
||||
'PEARSax3' => array( new HTMLPurifier_Token_Comment('-- Comment --') ),
|
||||
)
|
||||
array( new HTMLPurifier_Token_Comment(' Comment ') )
|
||||
);
|
||||
}
|
||||
|
||||
function test_tokenizeHTML_malformedComment() {
|
||||
$this->assertTokenization(
|
||||
'<!-- not so well formed --->',
|
||||
array( new HTMLPurifier_Token_Comment(' not so well formed -') ),
|
||||
array(
|
||||
'PEARSax3' => array( new HTMLPurifier_Token_Comment('-- not so well formed ---') ),
|
||||
)
|
||||
array( new HTMLPurifier_Token_Comment(' not so well formed -') )
|
||||
);
|
||||
}
|
||||
|
||||
@@ -574,6 +567,13 @@ div {}
|
||||
}
|
||||
|
||||
function test_tokenizeHTML_tagWithAtSignAndExtraGt() {
|
||||
$alt_expect = array(
|
||||
// Technically this is invalid, but it won't be a
|
||||
// problem with invalid element removal; also, this
|
||||
// mimics Mozilla's parsing of the tag.
|
||||
new HTMLPurifier_Token_Start('a@'),
|
||||
new HTMLPurifier_Token_Text('>'),
|
||||
);
|
||||
$this->assertTokenization(
|
||||
'<a@>>',
|
||||
array(
|
||||
@@ -582,13 +582,8 @@ div {}
|
||||
new HTMLPurifier_Token_End('a'),
|
||||
),
|
||||
array(
|
||||
'DirectLex' => array(
|
||||
// Technically this is invalid, but it won't be a
|
||||
// problem with invalid element removal; also, this
|
||||
// mimics Mozilla's parsing of the tag.
|
||||
new HTMLPurifier_Token_Start('a@'),
|
||||
new HTMLPurifier_Token_Text('>'),
|
||||
),
|
||||
'DirectLex' => $alt_expect,
|
||||
'PEARSax3' => $alt_expect,
|
||||
)
|
||||
);
|
||||
}
|
||||
@@ -608,6 +603,11 @@ div {}
|
||||
new HTMLPurifier_Token_Text('<3'),
|
||||
new HTMLPurifier_Token_Empty('br'),
|
||||
),
|
||||
'PEARSax3' => array(
|
||||
// bah too lazy to fix this
|
||||
new HTMLPurifier_Token_Empty('br'),
|
||||
new HTMLPurifier_Token_Empty('3<br'),
|
||||
),
|
||||
)
|
||||
);
|
||||
}
|
||||
@@ -627,6 +627,12 @@ div {}
|
||||
new HTMLPurifier_Token_Text('<<'),
|
||||
new HTMLPurifier_Token_End('b'),
|
||||
),
|
||||
'PEARSax3' => array(
|
||||
// also too lazy to fix
|
||||
new HTMLPurifier_Token_Start('b'),
|
||||
new HTMLPurifier_Token_Empty('<<'),
|
||||
new HTMLPurifier_Token_Text('b>'),
|
||||
),
|
||||
)
|
||||
);
|
||||
}
|
||||
@@ -648,26 +654,35 @@ div {}
|
||||
new HTMLPurifier_Token_Text('test'),
|
||||
new HTMLPurifier_Token_End('b'),
|
||||
),
|
||||
'PEARSax3' => array(
|
||||
// totally doing the wrong thing here
|
||||
new HTMLPurifier_Token_Text(' '),
|
||||
new HTMLPurifier_Token_Start('b'),
|
||||
new HTMLPurifier_Token_Text('test'),
|
||||
new HTMLPurifier_Token_End('b'),
|
||||
),
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
function test_tokenizeHTML_bodyInCDATA() {
|
||||
$alt_tokens = array(
|
||||
new HTMLPurifier_Token_Text('<'),
|
||||
new HTMLPurifier_Token_Text('body'),
|
||||
new HTMLPurifier_Token_Text('>'),
|
||||
new HTMLPurifier_Token_Text('Foo'),
|
||||
new HTMLPurifier_Token_Text('<'),
|
||||
new HTMLPurifier_Token_Text('/body'),
|
||||
new HTMLPurifier_Token_Text('>'),
|
||||
);
|
||||
$this->assertTokenization(
|
||||
'<![CDATA[<body>Foo</body>]]>',
|
||||
array(
|
||||
new HTMLPurifier_Token_Text('<body>Foo</body>'),
|
||||
),
|
||||
array(
|
||||
'PH5P' => array(
|
||||
new HTMLPurifier_Token_Text('<'),
|
||||
new HTMLPurifier_Token_Text('body'),
|
||||
new HTMLPurifier_Token_Text('>'),
|
||||
new HTMLPurifier_Token_Text('Foo'),
|
||||
new HTMLPurifier_Token_Text('<'),
|
||||
new HTMLPurifier_Token_Text('/body'),
|
||||
new HTMLPurifier_Token_Text('>'),
|
||||
),
|
||||
'PH5P' => $alt_tokens,
|
||||
'PEARSax3' => $alt_tokens,
|
||||
)
|
||||
);
|
||||
}
|
||||
|
Reference in New Issue
Block a user