mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-07-31 03:10:09 +02:00
Remove a huge swath of duplicated function calls by factoring them into a normalize() function. Also made DirectLex's variable names consistent with the rest of the classes.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@340 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
@@ -137,6 +137,31 @@ class HTMLPurifier_Lexer
|
||||
return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8');
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a piece of HTML and normalizes it by converting entities, fixing
|
||||
* encoding, extracting bits, and other good stuff.
|
||||
*/
|
||||
function normalize($html, $config) {
|
||||
|
||||
// extract body from document if applicable
|
||||
if ($config->get('Core', 'AcceptFullDocuments')) {
|
||||
$html = $this->extractBody($html);
|
||||
}
|
||||
|
||||
// escape CDATA
|
||||
$html = $this->escapeCDATA($html);
|
||||
|
||||
// expand entities that aren't the big five
|
||||
$html = $this->_encoder->substituteNonSpecialEntities($html);
|
||||
|
||||
// clean into wellformed UTF-8 string for an SGML context: this has
|
||||
// to be done after entity expansion because the entities sometimes
|
||||
// represent non-SGML characters (horror, horror!)
|
||||
$html = $this->_encoder->cleanUTF8($html);
|
||||
|
||||
return $html;
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a string of HTML (fragment or document) and returns the content
|
||||
*/
|
||||
|
Reference in New Issue
Block a user