mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-07-31 19:30:21 +02:00
Refine Lexers for parsing stray angled brackets; %Core.AggressivelyFixLt = true
By default, the DirectLex and DOMLex behavior with stray angled brackets varied a great deal due to their implementations. A little known directive %Core.AggressivelyFixLt attempted to match DOMLex's behavior with DirectLex's, but it was off by default. By turning it on by default, users now enjoy these benefits, and performance-minded users can turn it back off. Also, several refinements to stray angled bracket parsing was made. Specifically: * DirectLex: Handle each left angled bracket individually, which prevents strange behavior as reported by eon. * DOMLex: Iterate aggressive lt fix, so that stacked brackets like << are handled. Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
This commit is contained in:
@@ -418,14 +418,13 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
|
||||
}
|
||||
|
||||
function test_tokenizeHTML_emoticonProtection() {
|
||||
$this->config->set('Core', 'AggressivelyFixLt', true);
|
||||
$this->assertTokenization(
|
||||
'<b>Whoa! <3 That\'s not good >.></b>',
|
||||
array(
|
||||
new HTMLPurifier_Token_Start('b'),
|
||||
new HTMLPurifier_Token_Text('Whoa! '),
|
||||
new HTMLPurifier_Token_Text('<3 That\'s not good >'),
|
||||
new HTMLPurifier_Token_Text('.>'),
|
||||
new HTMLPurifier_Token_Text('<'),
|
||||
new HTMLPurifier_Token_Text('3 That\'s not good >.>'),
|
||||
new HTMLPurifier_Token_End('b')
|
||||
),
|
||||
array(
|
||||
@@ -491,7 +490,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
|
||||
}
|
||||
|
||||
function test_tokenizeHTML_entitiesInComment() {
|
||||
$this->config->set('Core', 'AggressivelyFixLt', true);
|
||||
$this->assertTokenization(
|
||||
'<!-- This comment < < & -->',
|
||||
array( new HTMLPurifier_Token_Comment(' This comment < < & ') ),
|
||||
@@ -508,7 +506,8 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
|
||||
array(
|
||||
'DirectLex' => array(
|
||||
new HTMLPurifier_Token_Start('a', array('href' => '')),
|
||||
new HTMLPurifier_Token_Text('<">'),
|
||||
new HTMLPurifier_Token_Text('<'),
|
||||
new HTMLPurifier_Token_Text('">'),
|
||||
),
|
||||
'PEARSax3' => false,
|
||||
)
|
||||
@@ -556,7 +555,7 @@ div {}
|
||||
);
|
||||
}
|
||||
|
||||
function test_tokenizeHTML_() {
|
||||
function test_tokenizeHTML_tagWithAtSignAndExtraGt() {
|
||||
$this->assertTokenization(
|
||||
'<a@>>',
|
||||
array(
|
||||
@@ -576,6 +575,65 @@ div {}
|
||||
);
|
||||
}
|
||||
|
||||
function test_tokenizeHTML_emoticonHeart() {
|
||||
$this->assertTokenization(
|
||||
'<br /><3<br />',
|
||||
array(
|
||||
new HTMLPurifier_Token_Empty('br'),
|
||||
new HTMLPurifier_Token_Text('<'),
|
||||
new HTMLPurifier_Token_Text('3'),
|
||||
new HTMLPurifier_Token_Empty('br'),
|
||||
),
|
||||
array(
|
||||
'DOMLex' => array(
|
||||
new HTMLPurifier_Token_Empty('br'),
|
||||
new HTMLPurifier_Token_Text('<3'),
|
||||
new HTMLPurifier_Token_Empty('br'),
|
||||
),
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
function test_tokenizeHTML_emoticonShiftyEyes() {
|
||||
$this->assertTokenization(
|
||||
'<b><<</b>',
|
||||
array(
|
||||
new HTMLPurifier_Token_Start('b'),
|
||||
new HTMLPurifier_Token_Text('<'),
|
||||
new HTMLPurifier_Token_Text('<'),
|
||||
new HTMLPurifier_Token_End('b'),
|
||||
),
|
||||
array(
|
||||
'DOMLex' => array(
|
||||
new HTMLPurifier_Token_Start('b'),
|
||||
new HTMLPurifier_Token_Text('<<'),
|
||||
new HTMLPurifier_Token_End('b'),
|
||||
),
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
function test_tokenizeHTML_eon1996() {
|
||||
$this->assertTokenization(
|
||||
'< <b>test</b>',
|
||||
array(
|
||||
new HTMLPurifier_Token_Text('<'),
|
||||
new HTMLPurifier_Token_Text(' '),
|
||||
new HTMLPurifier_Token_Start('b'),
|
||||
new HTMLPurifier_Token_Text('test'),
|
||||
new HTMLPurifier_Token_End('b'),
|
||||
),
|
||||
array(
|
||||
'DOMLex' => array(
|
||||
new HTMLPurifier_Token_Text('< '),
|
||||
new HTMLPurifier_Token_Start('b'),
|
||||
new HTMLPurifier_Token_Text('test'),
|
||||
new HTMLPurifier_Token_End('b'),
|
||||
),
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
function test_tokenizeHTML_() {
|
||||
|
Reference in New Issue
Block a user