1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-07-31 03:10:09 +02:00

Revamp entity decoding to be more like HTML5.

See %Core.LegacyEntityDecoder for more details.

Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
This commit is contained in:
Edward Z. Yang
2017-03-07 13:34:55 -08:00
parent 66bbae73a9
commit 7e11c271b9
10 changed files with 272 additions and 35 deletions

View File

@@ -46,11 +46,11 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
// HTMLPurifier_Lexer->parseData() -----------------------------------------
public function assertParseData($input, $expect = true)
public function assertParseData($input, $expect = true, $is_attr = false)
{
if ($expect === true) $expect = $input;
$lexer = new HTMLPurifier_Lexer();
$this->assertIdentical($expect, $lexer->parseData($input));
$this->assertIdentical($expect, $lexer->parseData($input, $is_attr, $this->config));
}
public function test_parseData_plainText()
@@ -95,7 +95,58 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
public function test_parseData_improperEntityFaultToleranceTest()
{
$this->assertParseData('&#x2D;');
$this->assertParseData('&#x2D;', '-');
}
public function test_parseData_noTrailingSemi()
{
$this->assertParseData('&ampA', '&A');
}
public function test_parseData_noTrailingSemiAttr()
{
$this->assertParseData('&ampA', '&ampA', true);
}
public function test_parseData_T119()
{
$this->assertParseData('&ampA', '&ampA', true);
}
public function test_parseData_T119b()
{
$this->assertParseData('&trade=', true, true);
}
public function test_parseData_legacy1()
{
$this->config->set('Core.LegacyEntityDecoder', true);
$this->assertParseData('&ampa', true);
$this->assertParseData('&amp=', "&=");
$this->assertParseData('&ampa', true, true);
$this->assertParseData('&amp=', "&=", true);
$this->assertParseData('&lta', true);
$this->assertParseData('&lt=', "<=");
$this->assertParseData('&lta', true, true);
$this->assertParseData('&lt=', "<=", true);
}
public function test_parseData_nonlegacy1()
{
$this->assertParseData('&ampa', "&a");
$this->assertParseData('&amp=', "&=");
$this->assertParseData('&ampa', true, true);
$this->assertParseData('&amp=', true, true);
$this->assertParseData('&lta', "<a");
$this->assertParseData('&lt=', "<=");
$this->assertParseData('&lta', true, true);
$this->assertParseData('&lt=', true, true);
$this->assertParseData('&lta;', "<a;");
}
public function test_parseData_noTrailingSemiNever()
{
$this->assertParseData('&imath');
}
// HTMLPurifier_Lexer->extractBody() ---------------------------------------