mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-06 14:16:32 +02:00
Revamp entity decoding to be more like HTML5.
See %Core.LegacyEntityDecoder for more details. Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
This commit is contained in:
@@ -169,21 +169,24 @@ class HTMLPurifier_Lexer
|
||||
''' => "'"
|
||||
);
|
||||
|
||||
public function parseText($string, $config) {
|
||||
return $this->parseData($string, false, $config);
|
||||
}
|
||||
|
||||
public function parseAttr($string, $config) {
|
||||
return $this->parseData($string, true, $config);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses special entities into the proper characters.
|
||||
*
|
||||
* This string will translate escaped versions of the special characters
|
||||
* into the correct ones.
|
||||
*
|
||||
* @warning
|
||||
* You should be able to treat the output of this function as
|
||||
* completely parsed, but that's only because all other entities should
|
||||
* have been handled previously in substituteNonSpecialEntities()
|
||||
*
|
||||
* @param string $string String character data to be parsed.
|
||||
* @return string Parsed character data.
|
||||
*/
|
||||
public function parseData($string)
|
||||
public function parseData($string, $is_attr, $config)
|
||||
{
|
||||
// following functions require at least one character
|
||||
if ($string === '') {
|
||||
@@ -209,7 +212,15 @@ class HTMLPurifier_Lexer
|
||||
}
|
||||
|
||||
// hmm... now we have some uncommon entities. Use the callback.
|
||||
$string = $this->_entity_parser->substituteSpecialEntities($string);
|
||||
if ($config->get('Core.LegacyEntityDecoder')) {
|
||||
$string = $this->_entity_parser->substituteSpecialEntities($string);
|
||||
} else {
|
||||
if ($is_attr) {
|
||||
$string = $this->_entity_parser->substituteAttrEntities($string);
|
||||
} else {
|
||||
$string = $this->_entity_parser->substituteTextEntities($string);
|
||||
}
|
||||
}
|
||||
return $string;
|
||||
}
|
||||
|
||||
@@ -323,7 +334,9 @@ class HTMLPurifier_Lexer
|
||||
}
|
||||
|
||||
// expand entities that aren't the big five
|
||||
$html = $this->_entity_parser->substituteNonSpecialEntities($html);
|
||||
if ($config->get('Core.LegacyEntityDecoder')) {
|
||||
$html = $this->_entity_parser->substituteNonSpecialEntities($html);
|
||||
}
|
||||
|
||||
// clean into wellformed UTF-8 string for an SGML context: this has
|
||||
// to be done after entity expansion because the entities sometimes
|
||||
|
Reference in New Issue
Block a user