mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-06 14:16:32 +02:00
Release 2.0.1, merged in 1181 to HEAD.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@1255 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
@@ -38,25 +38,25 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||
$this->factory = new HTMLPurifier_TokenFactory();
|
||||
}
|
||||
|
||||
public function tokenizeHTML($string, $config, &$context) {
|
||||
public function tokenizeHTML($html, $config, &$context) {
|
||||
|
||||
$string = $this->normalize($string, $config, $context);
|
||||
$html = $this->normalize($html, $config, $context);
|
||||
|
||||
// preprocess string, essential for UTF-8
|
||||
$string =
|
||||
// preprocess html, essential for UTF-8
|
||||
$html =
|
||||
'<!DOCTYPE html '.
|
||||
'PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
|
||||
'"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'.
|
||||
'<html><head>'.
|
||||
'<meta http-equiv="Content-Type" content="text/html;'.
|
||||
' charset=utf-8" />'.
|
||||
'</head><body><div>'.$string.'</div></body></html>';
|
||||
'</head><body><div>'.$html.'</div></body></html>';
|
||||
|
||||
$doc = new DOMDocument();
|
||||
$doc->encoding = 'UTF-8'; // theoretically, the above has this covered
|
||||
|
||||
set_error_handler(array($this, 'muteErrorHandler'));
|
||||
$doc->loadHTML($string);
|
||||
$doc->loadHTML($html);
|
||||
restore_error_handler();
|
||||
|
||||
$tokens = array();
|
||||
@@ -83,10 +83,13 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||
// intercept non element nodes. WE MUST catch all of them,
|
||||
// but we're not getting the character reference nodes because
|
||||
// those should have been preprocessed
|
||||
if ($node->nodeType === XML_TEXT_NODE ||
|
||||
$node->nodeType === XML_CDATA_SECTION_NODE) {
|
||||
if ($node->nodeType === XML_TEXT_NODE) {
|
||||
$tokens[] = $this->factory->createText($node->data);
|
||||
return;
|
||||
} elseif ($node->nodeType === XML_CDATA_SECTION_NODE) {
|
||||
// undo DOM's special treatment of <script> tags
|
||||
$tokens[] = $this->factory->createText($this->parseData($node->data));
|
||||
return;
|
||||
} elseif ($node->nodeType === XML_COMMENT_NODE) {
|
||||
$tokens[] = $this->factory->createComment($node->data);
|
||||
return;
|
||||
@@ -150,4 +153,3 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||
|
||||
}
|
||||
|
||||
?>
|
Reference in New Issue
Block a user