1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-07-31 19:30:21 +02:00

Refine Lexers for parsing stray angled brackets; %Core.AggressivelyFixLt = true

By default, the DirectLex and DOMLex behavior with stray angled brackets
varied a great deal due to their implementations. A little known directive
%Core.AggressivelyFixLt attempted to match DOMLex's behavior with DirectLex's,
but it was off by default. By turning it on by default, users now enjoy these
benefits, and performance-minded users can turn it back off.

Also, several refinements to stray angled bracket parsing was made. Specifically:

* DirectLex: Handle each left angled bracket individually, which prevents
  strange behavior as reported by eon.
* DOMLex: Iterate aggressive lt fix, so that stacked brackets like << are
  handled.

Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
This commit is contained in:
Edward Z. Yang
2008-06-28 00:43:02 -04:00
parent ba418a1f19
commit aa0fdeee30
7 changed files with 86 additions and 29 deletions

View File

@@ -418,14 +418,13 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
}
function test_tokenizeHTML_emoticonProtection() {
$this->config->set('Core', 'AggressivelyFixLt', true);
$this->assertTokenization(
'<b>Whoa! <3 That\'s not good >.></b>',
array(
new HTMLPurifier_Token_Start('b'),
new HTMLPurifier_Token_Text('Whoa! '),
new HTMLPurifier_Token_Text('<3 That\'s not good >'),
new HTMLPurifier_Token_Text('.>'),
new HTMLPurifier_Token_Text('<'),
new HTMLPurifier_Token_Text('3 That\'s not good >.>'),
new HTMLPurifier_Token_End('b')
),
array(
@@ -491,7 +490,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
}
function test_tokenizeHTML_entitiesInComment() {
$this->config->set('Core', 'AggressivelyFixLt', true);
$this->assertTokenization(
'<!-- This comment < &lt; & -->',
array( new HTMLPurifier_Token_Comment(' This comment < &lt; & ') ),
@@ -508,7 +506,8 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
array(
'DirectLex' => array(
new HTMLPurifier_Token_Start('a', array('href' => '')),
new HTMLPurifier_Token_Text('<">'),
new HTMLPurifier_Token_Text('<'),
new HTMLPurifier_Token_Text('">'),
),
'PEARSax3' => false,
)
@@ -556,7 +555,7 @@ div {}
);
}
function test_tokenizeHTML_() {
function test_tokenizeHTML_tagWithAtSignAndExtraGt() {
$this->assertTokenization(
'<a@>>',
array(
@@ -576,6 +575,65 @@ div {}
);
}
function test_tokenizeHTML_emoticonHeart() {
$this->assertTokenization(
'<br /><3<br />',
array(
new HTMLPurifier_Token_Empty('br'),
new HTMLPurifier_Token_Text('<'),
new HTMLPurifier_Token_Text('3'),
new HTMLPurifier_Token_Empty('br'),
),
array(
'DOMLex' => array(
new HTMLPurifier_Token_Empty('br'),
new HTMLPurifier_Token_Text('<3'),
new HTMLPurifier_Token_Empty('br'),
),
)
);
}
function test_tokenizeHTML_emoticonShiftyEyes() {
$this->assertTokenization(
'<b><<</b>',
array(
new HTMLPurifier_Token_Start('b'),
new HTMLPurifier_Token_Text('<'),
new HTMLPurifier_Token_Text('<'),
new HTMLPurifier_Token_End('b'),
),
array(
'DOMLex' => array(
new HTMLPurifier_Token_Start('b'),
new HTMLPurifier_Token_Text('<<'),
new HTMLPurifier_Token_End('b'),
),
)
);
}
function test_tokenizeHTML_eon1996() {
$this->assertTokenization(
'< <b>test</b>',
array(
new HTMLPurifier_Token_Text('<'),
new HTMLPurifier_Token_Text(' '),
new HTMLPurifier_Token_Start('b'),
new HTMLPurifier_Token_Text('test'),
new HTMLPurifier_Token_End('b'),
),
array(
'DOMLex' => array(
new HTMLPurifier_Token_Text('< '),
new HTMLPurifier_Token_Start('b'),
new HTMLPurifier_Token_Text('test'),
new HTMLPurifier_Token_End('b'),
),
)
);
}
/*
function test_tokenizeHTML_() {