1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-07-31 03:10:09 +02:00

Revamp entity decoding to be more like HTML5.

See %Core.LegacyEntityDecoder for more details.

Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
This commit is contained in:
Edward Z. Yang
2017-03-07 13:34:55 -08:00
parent 66bbae73a9
commit 7e11c271b9
10 changed files with 272 additions and 35 deletions

View File

@@ -36,7 +36,7 @@ class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex
$doc->getElementsByTagName('html')->item(0)-> // <html>
getElementsByTagName('body')->item(0) // <body>
,
$tokens
$tokens, $config
);
return $tokens;
}
@@ -1515,6 +1515,7 @@ class HTML5
// Consume the maximum number of characters possible, with the
// consumed characters case-sensitively matching one of the
// identifiers in the first column of the entities table.
$e_name = $this->characters('0-9A-Za-z;', $this->char + 1);
$len = strlen($e_name);
@@ -1547,7 +1548,7 @@ class HTML5
// Return a character token for the character corresponding to the
// entity name (as given by the second column of the entities table).
return html_entity_decode('&' . $entity . ';', ENT_QUOTES, 'UTF-8');
return html_entity_decode('&' . rtrim($entity, ';') . ';', ENT_QUOTES, 'UTF-8');
}
private function emitToken($token)