1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-04 05:07:55 +02:00

Outfit a bunch of other classes so they can accept a configuration object. Put in basic scaffolding for extractBody() functionality.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@257 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2006-08-15 00:31:12 +00:00
parent 24c64dbbac
commit d7140f2e05
14 changed files with 75 additions and 26 deletions

View File

@@ -137,7 +137,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
$is_deleting = true;
if ($pcdata_allowed && $escape_invalid_children) {
$result[] = new HTMLPurifier_Token_Text(
$this->gen->generateFromToken($token)
$this->gen->generateFromToken($token, $config)
);
}
continue;
@@ -148,7 +148,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
} elseif ($pcdata_allowed && $escape_invalid_children) {
$result[] =
new HTMLPurifier_Token_Text(
$this->gen->generateFromToken( $token )
$this->gen->generateFromToken( $token, $config )
);
} else {
// drop silently

View File

@@ -5,26 +5,28 @@
class HTMLPurifier_Generator
{
function generateFromTokens($tokens) {
// only unit tests may omit configuration: internals MUST pass config
function generateFromTokens($tokens, $config = null) {
$html = '';
if (!$config) $config = HTMLPurifier_Config::createDefault();
if (!$tokens) return '';
foreach ($tokens as $token) {
$html .= $this->generateFromToken($token);
$html .= $this->generateFromToken($token, $config);
}
return $html;
}
function generateFromToken($token) {
function generateFromToken($token, $config) {
if (!isset($token->type)) return '';
if ($token->type == 'start') {
$attr = $this->generateAttributes($token->attributes);
$attr = $this->generateAttributes($token->attributes, $config);
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
} elseif ($token->type == 'end') {
return '</' . $token->name . '>';
} elseif ($token->type == 'empty') {
$attr = $this->generateAttributes($token->attributes);
$attr = $this->generateAttributes($token->attributes, $config);
return '<' . $token->name . ($attr ? ' ' : '') . $attr . ' />';
} elseif ($token->type == 'text') {
@@ -36,7 +38,7 @@ class HTMLPurifier_Generator
}
}
function generateAttributes($assoc_array_of_attributes) {
function generateAttributes($assoc_array_of_attributes, $config) {
$html = '';
foreach ($assoc_array_of_attributes as $key => $value) {
$html .= $key.'="'.htmlspecialchars($value, ENT_COMPAT, 'UTF-8').'" ';

View File

@@ -2,6 +2,14 @@
require_once 'HTMLPurifier/Token.php';
HTMLPurifier_ConfigDef::define(
'Core', 'AcceptFullDocuments', true,
'This parameter determines whether or not the filter should accept full '.
'HTML documents, not just HTML fragments. When on, it will '.
'drop all sections except the content between body. Depending on '.
'the implementation in use, this may speed up document parse times.'
);
/**
* Forgivingly lexes HTML (SGML-style) markup into tokens.
*
@@ -52,7 +60,7 @@ class HTMLPurifier_Lexer
* @param $string String HTML.
* @return HTMLPurifier_Token array representation of HTML.
*/
function tokenizeHTML($string) {
function tokenizeHTML($string, $config = null) {
trigger_error('Call to abstract class', E_USER_ERROR);
}
@@ -228,6 +236,14 @@ class HTMLPurifier_Lexer
return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8');
}
/**
* Takes a string of HTML (fragment or document) and returns the content
*/
function extractBody($html) {
if (strpos($html, '<html') === false) return $html; // already fragment
// ...
}
}
?>

View File

@@ -25,7 +25,9 @@ require_once 'HTMLPurifier/Lexer.php';
class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
{
public function tokenizeHTML($string) {
public function tokenizeHTML($string, $config = null) {
if (!$config) $config = HTMLPurifier_Config::createDefault();
$doc = new DOMDocument();
$doc->encoding = 'UTF-8'; // technically does nothing, but comprehensive

View File

@@ -106,7 +106,9 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
}
}
function tokenizeHTML($string) {
function tokenizeHTML($string, $config = null) {
if (!$config) $config = HTMLPurifier_Config::createDefault();
// some quick checking (if empty, return empty)
$string = @ (string) $string;

View File

@@ -29,7 +29,8 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
*/
var $tokens = array();
function tokenizeHTML($html) {
function tokenizeHTML($html, $config = null) {
if (!$config) $config = HTMLPurifier_Config::createDefault();
$html = $this->escapeCDATA($html);
$html = $this->substituteNonSpecialEntities($html);
$parser=& new XML_HTMLSax3();

View File

@@ -87,7 +87,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// make sure that we have something open
if (empty($current_nesting)) {
$result[] = new HTMLPurifier_Token_Text(
$this->generator->generateFromToken($token)
$this->generator->generateFromToken($token, $config)
);
continue;
}
@@ -122,7 +122,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// we still didn't find the tag, so translate to text
if ($skipped_tags === false) {
$result[] = new HTMLPurifier_Token_Text(
$this->generator->generateFromToken($token)
$this->generator->generateFromToken($token, $config)
);
continue;
}

View File

@@ -43,7 +43,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
} else {
// invalid tag, generate HTML and insert in
$token = new HTMLPurifier_Token_Text(
$this->generator->generateFromToken($token)
$this->generator->generateFromToken($token, $config)
);
}
} elseif ($token->type == 'comment') {