mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-07-31 19:30:21 +02:00
Hacky full docuement parse thingy removed from DOMLex, fixes barfing on full HTML documents.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@328 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
@@ -295,10 +295,9 @@ class HTMLPurifier_Lexer
|
|||||||
/**
|
/**
|
||||||
* Takes a string of HTML (fragment or document) and returns the content
|
* Takes a string of HTML (fragment or document) and returns the content
|
||||||
*/
|
*/
|
||||||
function extractBody($html, $return_bool = false) {
|
function extractBody($html) {
|
||||||
$matches = array();
|
$matches = array();
|
||||||
$result = preg_match('!<body[^>]*>(.+?)</body>!is', $html, $matches);
|
$result = preg_match('!<body[^>]*>(.+?)</body>!is', $html, $matches);
|
||||||
if ($return_bool) return $result;
|
|
||||||
if ($result) {
|
if ($result) {
|
||||||
return $matches[1];
|
return $matches[1];
|
||||||
} else {
|
} else {
|
||||||
|
@@ -37,7 +37,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||||
|
|
||||||
if ($config->get('Core', 'AcceptFullDocuments')) {
|
if ($config->get('Core', 'AcceptFullDocuments')) {
|
||||||
$is_full = $this->extractBody($string, true);
|
$string = $this->extractBody($string);
|
||||||
}
|
}
|
||||||
|
|
||||||
$doc = new DOMDocument();
|
$doc = new DOMDocument();
|
||||||
@@ -55,9 +55,8 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||||||
// clean it into well-formed UTF-8 string
|
// clean it into well-formed UTF-8 string
|
||||||
$string = $this->cleanUTF8($string);
|
$string = $this->cleanUTF8($string);
|
||||||
|
|
||||||
if (!$is_full) {
|
|
||||||
// preprocess string, essential for UTF-8
|
// preprocess string, essential for UTF-8
|
||||||
$string =
|
$string =
|
||||||
'<!DOCTYPE html '.
|
'<!DOCTYPE html '.
|
||||||
'PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
|
'PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
|
||||||
'"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'.
|
'"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'.
|
||||||
@@ -65,7 +64,6 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||||||
'<meta http-equiv="Content-Type" content="text/html;'.
|
'<meta http-equiv="Content-Type" content="text/html;'.
|
||||||
' charset=utf-8" />'.
|
' charset=utf-8" />'.
|
||||||
'</head><body><div>'.$string.'</div></body></html>';
|
'</head><body><div>'.$string.'</div></body></html>';
|
||||||
}
|
|
||||||
|
|
||||||
@$doc->loadHTML($string); // mute all errors, handle it transparently
|
@$doc->loadHTML($string); // mute all errors, handle it transparently
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user