mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-04 05:07:55 +02:00
Outfit a bunch of other classes so they can accept a configuration object. Put in basic scaffolding for extractBody() functionality.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@257 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
@@ -137,7 +137,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
|
||||
$is_deleting = true;
|
||||
if ($pcdata_allowed && $escape_invalid_children) {
|
||||
$result[] = new HTMLPurifier_Token_Text(
|
||||
$this->gen->generateFromToken($token)
|
||||
$this->gen->generateFromToken($token, $config)
|
||||
);
|
||||
}
|
||||
continue;
|
||||
@@ -148,7 +148,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
|
||||
} elseif ($pcdata_allowed && $escape_invalid_children) {
|
||||
$result[] =
|
||||
new HTMLPurifier_Token_Text(
|
||||
$this->gen->generateFromToken( $token )
|
||||
$this->gen->generateFromToken( $token, $config )
|
||||
);
|
||||
} else {
|
||||
// drop silently
|
||||
|
@@ -5,26 +5,28 @@
|
||||
class HTMLPurifier_Generator
|
||||
{
|
||||
|
||||
function generateFromTokens($tokens) {
|
||||
// only unit tests may omit configuration: internals MUST pass config
|
||||
function generateFromTokens($tokens, $config = null) {
|
||||
$html = '';
|
||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||
if (!$tokens) return '';
|
||||
foreach ($tokens as $token) {
|
||||
$html .= $this->generateFromToken($token);
|
||||
$html .= $this->generateFromToken($token, $config);
|
||||
}
|
||||
return $html;
|
||||
}
|
||||
|
||||
function generateFromToken($token) {
|
||||
function generateFromToken($token, $config) {
|
||||
if (!isset($token->type)) return '';
|
||||
if ($token->type == 'start') {
|
||||
$attr = $this->generateAttributes($token->attributes);
|
||||
$attr = $this->generateAttributes($token->attributes, $config);
|
||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
|
||||
|
||||
} elseif ($token->type == 'end') {
|
||||
return '</' . $token->name . '>';
|
||||
|
||||
} elseif ($token->type == 'empty') {
|
||||
$attr = $this->generateAttributes($token->attributes);
|
||||
$attr = $this->generateAttributes($token->attributes, $config);
|
||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr . ' />';
|
||||
|
||||
} elseif ($token->type == 'text') {
|
||||
@@ -36,7 +38,7 @@ class HTMLPurifier_Generator
|
||||
}
|
||||
}
|
||||
|
||||
function generateAttributes($assoc_array_of_attributes) {
|
||||
function generateAttributes($assoc_array_of_attributes, $config) {
|
||||
$html = '';
|
||||
foreach ($assoc_array_of_attributes as $key => $value) {
|
||||
$html .= $key.'="'.htmlspecialchars($value, ENT_COMPAT, 'UTF-8').'" ';
|
||||
|
@@ -2,6 +2,14 @@
|
||||
|
||||
require_once 'HTMLPurifier/Token.php';
|
||||
|
||||
HTMLPurifier_ConfigDef::define(
|
||||
'Core', 'AcceptFullDocuments', true,
|
||||
'This parameter determines whether or not the filter should accept full '.
|
||||
'HTML documents, not just HTML fragments. When on, it will '.
|
||||
'drop all sections except the content between body. Depending on '.
|
||||
'the implementation in use, this may speed up document parse times.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Forgivingly lexes HTML (SGML-style) markup into tokens.
|
||||
*
|
||||
@@ -52,7 +60,7 @@ class HTMLPurifier_Lexer
|
||||
* @param $string String HTML.
|
||||
* @return HTMLPurifier_Token array representation of HTML.
|
||||
*/
|
||||
function tokenizeHTML($string) {
|
||||
function tokenizeHTML($string, $config = null) {
|
||||
trigger_error('Call to abstract class', E_USER_ERROR);
|
||||
}
|
||||
|
||||
@@ -228,6 +236,14 @@ class HTMLPurifier_Lexer
|
||||
return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8');
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a string of HTML (fragment or document) and returns the content
|
||||
*/
|
||||
function extractBody($html) {
|
||||
if (strpos($html, '<html') === false) return $html; // already fragment
|
||||
// ...
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -25,7 +25,9 @@ require_once 'HTMLPurifier/Lexer.php';
|
||||
class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||
{
|
||||
|
||||
public function tokenizeHTML($string) {
|
||||
public function tokenizeHTML($string, $config = null) {
|
||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||
|
||||
$doc = new DOMDocument();
|
||||
$doc->encoding = 'UTF-8'; // technically does nothing, but comprehensive
|
||||
|
||||
|
@@ -106,7 +106,9 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
}
|
||||
}
|
||||
|
||||
function tokenizeHTML($string) {
|
||||
function tokenizeHTML($string, $config = null) {
|
||||
|
||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||
|
||||
// some quick checking (if empty, return empty)
|
||||
$string = @ (string) $string;
|
||||
|
@@ -29,7 +29,8 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
|
||||
*/
|
||||
var $tokens = array();
|
||||
|
||||
function tokenizeHTML($html) {
|
||||
function tokenizeHTML($html, $config = null) {
|
||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||
$html = $this->escapeCDATA($html);
|
||||
$html = $this->substituteNonSpecialEntities($html);
|
||||
$parser=& new XML_HTMLSax3();
|
||||
|
@@ -87,7 +87,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
// make sure that we have something open
|
||||
if (empty($current_nesting)) {
|
||||
$result[] = new HTMLPurifier_Token_Text(
|
||||
$this->generator->generateFromToken($token)
|
||||
$this->generator->generateFromToken($token, $config)
|
||||
);
|
||||
continue;
|
||||
}
|
||||
@@ -122,7 +122,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
// we still didn't find the tag, so translate to text
|
||||
if ($skipped_tags === false) {
|
||||
$result[] = new HTMLPurifier_Token_Text(
|
||||
$this->generator->generateFromToken($token)
|
||||
$this->generator->generateFromToken($token, $config)
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
@@ -43,7 +43,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
||||
} else {
|
||||
// invalid tag, generate HTML and insert in
|
||||
$token = new HTMLPurifier_Token_Text(
|
||||
$this->generator->generateFromToken($token)
|
||||
$this->generator->generateFromToken($token, $config)
|
||||
);
|
||||
}
|
||||
} elseif ($token->type == 'comment') {
|
||||
|
Reference in New Issue
Block a user