1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-04 05:07:55 +02:00
- Partially finished migrating to new Context object (done in r485).
- Created HTMLPurifier_Harness to assist with testing, ChildDefTest migrated to that framework.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@484 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2006-10-01 20:47:07 +00:00
parent 58be73fcf7
commit 8f515b9cda
21 changed files with 261 additions and 203 deletions

View File

@@ -44,6 +44,7 @@
// they get included
require_once 'HTMLPurifier/ConfigSchema.php';
require_once 'HTMLPurifier/Config.php';
require_once 'HTMLPurifier/Context.php';
require_once 'HTMLPurifier/Lexer.php';
require_once 'HTMLPurifier/Generator.php';
@@ -95,16 +96,17 @@ class HTMLPurifier
*/
function purify($html, $config = null) {
$config = $config ? $config : $this->config;
$html = $this->encoder->convertToUTF8($html, $config);
$context =& new HTMLPurifier_Context();
$html = $this->encoder->convertToUTF8($html, $config, $context);
$html =
$this->generator->generateFromTokens(
$this->strategy->execute(
$this->lexer->tokenizeHTML($html, $config),
$config
$this->lexer->tokenizeHTML($html, $config, $context),
$config, $context
),
$config
$config, $context
);
$html = $this->encoder->convertFromUTF8($html, $config);
$html = $this->encoder->convertFromUTF8($html, $config, $context);
return $html;
}

View File

@@ -38,15 +38,14 @@ class HTMLPurifier_ChildDef
/**
* Validates nodes according to definition and returns modification.
*
* @warning $context is NOT HTMLPurifier_AttrContext
* @param $tokens_of_children Array of HTMLPurifier_Token
* @param $config HTMLPurifier_Config object
* @param $context String context indicating inline, block or unknown
* @param $context HTMLPurifier_Context object
* @return bool true to leave nodes as is
* @return bool false to remove parent node
* @return array of replacement child tokens
*/
function validateChildren($tokens_of_children, $config, $context) {
function validateChildren($tokens_of_children, $config, &$context) {
trigger_error('Call to abstract function', E_USER_ERROR);
}
}
@@ -91,7 +90,7 @@ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
$reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
$this->_pcre_regex = $reg;
}
function validateChildren($tokens_of_children, $config, $context) {
function validateChildren($tokens_of_children, $config, &$context) {
$list_of_children = '';
$nesting = 0; // depth into the nest
foreach ($tokens_of_children as $token) {
@@ -145,7 +144,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
}
var $allow_empty = false;
var $type = 'required';
function validateChildren($tokens_of_children, $config, $context) {
function validateChildren($tokens_of_children, $config, &$context) {
// if there are no tokens, delete parent node
if (empty($tokens_of_children)) return false;
@@ -227,7 +226,7 @@ class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
{
var $allow_empty = true;
var $type = 'optional';
function validateChildren($tokens_of_children, $config, $context) {
function validateChildren($tokens_of_children, $config, &$context) {
$result = parent::validateChildren($tokens_of_children, $config, $context);
if ($result === false) return array();
return $result;
@@ -246,7 +245,7 @@ class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
var $allow_empty = true;
var $type = 'empty';
function HTMLPurifier_ChildDef_Empty() {}
function validateChildren($tokens_of_children, $config, $context) {
function validateChildren($tokens_of_children, $config, &$context) {
return array();
}
}
@@ -281,8 +280,9 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
$this->block = new HTMLPurifier_ChildDef_Optional($block);
}
function validateChildren($tokens_of_children, $config, $context) {
switch ($context) {
function validateChildren($tokens_of_children, $config, &$context) {
$parent_type = $context->get('ParentType');
switch ($parent_type) {
case 'unknown':
case 'inline':
$result = $this->inline->validateChildren(
@@ -308,7 +308,7 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
var $allow_empty = false;
var $type = 'table';
function HTMLPurifier_ChildDef_Table() {}
function validateChildren($tokens_of_children, $config, $context) {
function validateChildren($tokens_of_children, $config, &$context) {
if (empty($tokens_of_children)) return false;
// this ensures that the loop gets run one last time before closing

View File

@@ -266,7 +266,7 @@ class HTMLPurifier_Encoder
/**
* Converts a string to UTF-8 based on configuration.
*/
function convertToUTF8($str, $config) {
function convertToUTF8($str, $config, &$context) {
static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv');
$encoding = $config->get('Core', 'Encoding');
@@ -283,7 +283,7 @@ class HTMLPurifier_Encoder
* @note Currently, this is a lossy conversion, with unexpressable
* characters being omitted.
*/
function convertFromUTF8($str, $config) {
function convertFromUTF8($str, $config, &$context) {
static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv');
$encoding = $config->get('Core', 'Encoding');

View File

@@ -122,7 +122,7 @@ class HTMLPurifier_Lexer
* @param $string String HTML.
* @return HTMLPurifier_Token array representation of HTML.
*/
function tokenizeHTML($string, $config = null) {
function tokenizeHTML($string, $config, &$context) {
trigger_error('Call to abstract class', E_USER_ERROR);
}
@@ -196,7 +196,7 @@ class HTMLPurifier_Lexer
* Takes a piece of HTML and normalizes it by converting entities, fixing
* encoding, extracting bits, and other good stuff.
*/
function normalize($html, $config) {
function normalize($html, $config, &$context) {
// extract body from document if applicable
if ($config->get('Core', 'AcceptFullDocuments')) {

View File

@@ -38,10 +38,9 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
$this->factory = new HTMLPurifier_TokenFactory();
}
public function tokenizeHTML($string, $config = null) {
if (!$config) $config = HTMLPurifier_Config::createDefault();
public function tokenizeHTML($string, $config, &$context) {
$string = $this->normalize($string, $config);
$string = $this->normalize($string, $config, $context);
// preprocess string, essential for UTF-8
$string =

View File

@@ -24,11 +24,9 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
*/
var $_whitespace = "\x20\x09\x0D\x0A";
function tokenizeHTML($html, $config = null) {
function tokenizeHTML($html, $config, &$context) {
if (!$config) $config = HTMLPurifier_Config::createDefault();
$html = $this->normalize($html, $config);
$html = $this->normalize($html, $config, $context);
$cursor = 0; // our location in the text
$inside_tag = false; // whether or not we're parsing the inside of a tag
@@ -147,6 +145,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
if ($attribute_string) {
$attributes = $this->parseAttributeString(
$attribute_string
, $config, $context
);
} else {
$attributes = array();
@@ -181,7 +180,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
* @param $string Inside of tag excluding name.
* @returns Assoc array of attributes.
*/
function parseAttributeString($string) {
function parseAttributeString($string, $config, &$context) {
$string = (string) $string; // quick typecast
if ($string == '') return array(); // no attributes

View File

@@ -31,12 +31,11 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
*/
var $tokens = array();
function tokenizeHTML($string, $config = null) {
function tokenizeHTML($string, $config, &$context) {
$this->tokens = array();
if (!$config) $config = HTMLPurifier_Config::createDefault();
$string = $this->normalize($string, $config);
$string = $this->normalize($string, $config, $context);
$parser=& new XML_HTMLSax3();
$parser->set_object($this);

View File

@@ -24,7 +24,7 @@ class HTMLPurifier_Strategy
* @param $config Configuration options
* @returns Processed array of token objects.
*/
function execute($tokens, $config = null) {
function execute($tokens, $config, &$context) {
trigger_error('Cannot call abstract function', E_USER_ERROR);
}

View File

@@ -18,9 +18,9 @@ class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy
trigger_error('Attempt to instantiate abstract object', E_USER_ERROR);
}
function execute($tokens, $config) {
function execute($tokens, $config, &$context) {
foreach ($this->strategies as $strategy) {
$tokens = $strategy->execute($tokens, $config);
$tokens = $strategy->execute($tokens, $config, $context);
}
return $tokens;
}

View File

@@ -34,8 +34,7 @@ require_once 'HTMLPurifier/HTMLDefinition.php';
class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
{
function execute($tokens, $config) {
function execute($tokens, $config, &$context) {
//####################################################################//
// Pre-processing
@@ -49,6 +48,10 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
array_unshift($tokens, new HTMLPurifier_Token_Start($parent_name));
$tokens[] = new HTMLPurifier_Token_End($parent_name);
// setup the context variables
$parent_type = 'unknown'; // reference var that we alter
$context->register('ParentType', $parent_type);
//####################################################################//
// Loop initialization
@@ -109,10 +112,10 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// calculate context
if (isset($parent_def)) {
$context = $parent_def->type;
$parent_type = $parent_def->type;
} else {
// generally found in specialized elements like UL
$context = 'unknown';
$parent_type = 'unknown';
}
//################################################################//
@@ -145,7 +148,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// have DTD child def validate children
$result = $child_def->validateChildren(
$child_tokens, $config,$context);
$child_tokens, $config, $context);
// determine whether or not this element has any exclusions
$excludes = $def->excludes;
@@ -247,6 +250,9 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
array_shift($tokens);
array_pop($tokens);
// remove context variables
$context->destroy('ParentType');
//####################################################################//
// Return

View File

@@ -10,7 +10,7 @@ require_once 'HTMLPurifier/Generator.php';
class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
{
function execute($tokens, $config) {
function execute($tokens, $config, &$context) {
$definition = $config->getHTMLDefinition();
$generator = new HTMLPurifier_Generator();
$result = array();
@@ -86,7 +86,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
if (empty($current_nesting)) {
if ($escape_invalid_tags) {
$result[] = new HTMLPurifier_Token_Text(
$generator->generateFromToken($token, $config)
$generator->generateFromToken($token, $config, $context)
);
}
continue;
@@ -123,7 +123,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
if ($skipped_tags === false) {
if ($escape_invalid_tags) {
$result[] = new HTMLPurifier_Token_Text(
$generator->generateFromToken($token, $config)
$generator->generateFromToken($token, $config, $context)
);
}
continue;

View File

@@ -16,7 +16,7 @@ require_once 'HTMLPurifier/TagTransform.php';
class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
{
function execute($tokens, $config) {
function execute($tokens, $config, &$context) {
$definition = $config->getHTMLDefinition();
$generator = new HTMLPurifier_Generator();
$result = array();
@@ -37,7 +37,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
} elseif ($escape_invalid_tags) {
// invalid tag, generate HTML and insert in
$token = new HTMLPurifier_Token_Text(
$generator->generateFromToken($token, $config)
$generator->generateFromToken($token, $config, $context)
);
} else {
continue;

View File

@@ -17,18 +17,18 @@ HTMLPurifier_ConfigSchema::define(
class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
{
function execute($tokens, $config) {
function execute($tokens, $config, &$context) {
$definition = $config->getHTMLDefinition();
// setup StrategyContext
$context = new HTMLPurifier_AttrContext();
$attr_context = new HTMLPurifier_AttrContext();
// setup ID accumulator and load it with blacklisted IDs
// eventually, we'll have a dedicated context object to hold
// all these accumulators and caches. For now, just an IDAccumulator
$context->id_accumulator = new HTMLPurifier_IDAccumulator();
$context->id_accumulator->load($config->get('Attr', 'IDBlacklist'));
$attr_context->id_accumulator = new HTMLPurifier_IDAccumulator();
$attr_context->id_accumulator->load($config->get('Attr', 'IDBlacklist'));
// create alias to global definition array, see also $defs
// DEFINITION CALL
@@ -81,14 +81,14 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
} else {
// validate according to the element's definition
$result = $defs[$attr_key]->validate(
$value, $config, $context
$value, $config, $attr_context
);
}
} elseif ( isset($d_defs[$attr_key]) ) {
// there is a global definition defined, validate according
// to the global definition
$result = $d_defs[$attr_key]->validate(
$value, $config, $context
$value, $config, $attr_context
);
} else {
// system never heard of the attribute? DELETE!