1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-07-31 03:10:09 +02:00

Remove a huge swath of duplicated function calls by factoring them into a normalize() function. Also made DirectLex's variable names consistent with the rest of the classes.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@340 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2006-08-29 20:05:26 +00:00
parent 1de3088276
commit 89376a11e3
4 changed files with 46 additions and 51 deletions

View File

@@ -137,6 +137,31 @@ class HTMLPurifier_Lexer
return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8');
}
/**
* Takes a piece of HTML and normalizes it by converting entities, fixing
* encoding, extracting bits, and other good stuff.
*/
function normalize($html, $config) {
// extract body from document if applicable
if ($config->get('Core', 'AcceptFullDocuments')) {
$html = $this->extractBody($html);
}
// escape CDATA
$html = $this->escapeCDATA($html);
// expand entities that aren't the big five
$html = $this->_encoder->substituteNonSpecialEntities($html);
// clean into wellformed UTF-8 string for an SGML context: this has
// to be done after entity expansion because the entities sometimes
// represent non-SGML characters (horror, horror!)
$html = $this->_encoder->cleanUTF8($html);
return $html;
}
/**
* Takes a string of HTML (fragment or document) and returns the content
*/