1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-10-25 02:26:32 +02:00

Release 2.1.3, merged in 1404 to HEAD.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@1444 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2007-11-06 04:34:33 +00:00
parent b3f0e6c86c
commit 9db861e356
47 changed files with 5526 additions and 278 deletions

View File

@@ -22,8 +22,8 @@
*/
/*
HTML Purifier 2.1.2 - Standards Compliant HTML Filtering
Copyright (C) 2006 Edward Z. Yang
HTML Purifier 2.1.3 - Standards Compliant HTML Filtering
Copyright (C) 2006-2007 Edward Z. Yang
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -43,9 +43,8 @@
// constants are slow, but we'll make one exception
define('HTMLPURIFIER_PREFIX', dirname(__FILE__));
// almost every class has an undocumented dependency to these, so make sure
// they get included
require_once 'HTMLPurifier/ConfigSchema.php'; // important
// every class has an undocumented dependency to these, must be included!
require_once 'HTMLPurifier/ConfigSchema.php'; // fatal errors if not included
require_once 'HTMLPurifier/Config.php';
require_once 'HTMLPurifier/Context.php';
@@ -60,16 +59,23 @@ require_once 'HTMLPurifier/LanguageFactory.php';
HTMLPurifier_ConfigSchema::define(
'Core', 'CollectErrors', false, 'bool', '
Whether or not to collect errors found while filtering the document. This
is a useful way to give feedback to your users. CURRENTLY NOT IMPLEMENTED.
This directive has been available since 2.0.0.
is a useful way to give feedback to your users. <strong>Warning:</strong>
Currently this feature is very patchy and experimental, with lots of
possible error messages not yet implemented. It will not cause any problems,
but it may not help your users either. This directive has been available
since 2.0.0.
');
/**
* Main library execution class.
* Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
*
* Facade that performs calls to the HTMLPurifier_Lexer,
* HTMLPurifier_Strategy and HTMLPurifier_Generator subsystems in order to
* purify HTML.
* @note There are several points in which configuration can be specified
* for HTML Purifier. The precedence of these (from lowest to
* highest) is as follows:
* -# Instance: new HTMLPurifier($config)
* -# Invocation: purify($html, $config)
* These configurations are entirely independent of each other and
* are *not* merged.
*
* @todo We need an easier way to inject strategies, it'll probably end
* up getting done through config though.
@@ -77,15 +83,16 @@ This directive has been available since 2.0.0.
class HTMLPurifier
{
var $version = '2.1.2';
var $version = '2.1.3';
var $config;
var $filters;
var $filters = array();
var $strategy, $generator;
/**
* Final HTMLPurifier_Context of last run purification. Might be an array.
* Resultant HTMLPurifier_Context of last run purification. Is an array
* of contexts if the last called method was purifyArray().
* @public
*/
var $context;
@@ -150,6 +157,11 @@ class HTMLPurifier
$context->register('ErrorCollector', $error_collector);
}
// setup id_accumulator context, necessary due to the fact that
// AttrValidator can be called from many places
$id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
$context->register('IDAccumulator', $id_accumulator);
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
for ($i = 0, $size = count($this->filters); $i < $size; $i++) {
@@ -198,6 +210,8 @@ class HTMLPurifier
/**
* Singleton for enforcing just one HTML Purifier in your system
* @param $prototype Optional prototype HTMLPurifier instance to
* overload singleton with.
*/
static function &getInstance($prototype = null) {
static $htmlpurifier;

View File

@@ -102,7 +102,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
$result = $uri->validate($config, $context);
if (!$result) break;
// chained validation
// chained filtering
$uri_def =& $config->getDefinition('URI');
$result = $uri_def->filter($uri, $config, $context);
if (!$result) break;

View File

@@ -1,7 +1,6 @@
<?php
require_once 'HTMLPurifier/AttrDef.php';
require_once 'HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
{
@@ -15,3 +14,5 @@ class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
}
// sub-implementations
require_once 'HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';

View File

@@ -23,6 +23,13 @@ class HTMLPurifier_AttrValidator
$definition = $config->getHTMLDefinition();
$e =& $context->get('ErrorCollector', true);
// initialize IDAccumulator if necessary
$ok =& $context->get('IDAccumulator', true);
if (!$ok) {
$id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
$context->register('IDAccumulator', $id_accumulator);
}
// initialize CurrentToken if necessary
$current_token =& $context->get('CurrentToken', true);
if (!$current_token) $context->register('CurrentToken', $token);

View File

@@ -15,7 +15,10 @@ class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
var $type = 'optional';
function validateChildren($tokens_of_children, $config, &$context) {
$result = parent::validateChildren($tokens_of_children, $config, $context);
if ($result === false) return array();
if ($result === false) {
if (empty($tokens_of_children)) return true;
else return array();
}
return $result;
}
}

View File

@@ -42,7 +42,7 @@ class HTMLPurifier_Config
/**
* HTML Purifier's version
*/
var $version = '2.1.2';
var $version = '2.1.3';
/**
* Two-level associative array of configuration directives

View File

@@ -236,13 +236,26 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
/**
* Adds a custom element to your HTML definition
* @note See HTMLPurifier_HTMLModule::addElement for detailed
* parameter descriptions.
* parameter and return value descriptions.
*/
function addElement($element_name, $type, $contents, $attr_collections, $attributes) {
function &addElement($element_name, $type, $contents, $attr_collections, $attributes) {
$module =& $this->getAnonymousModule();
// assume that if the user is calling this, the element
// is safe. This may not be a good idea
$module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes);
$element =& $module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes);
return $element;
}
/**
* Adds a blank element to your HTML definition, for overriding
* existing behavior
* @note See HTMLPurifier_HTMLModule::addBlankElement for detailed
* parameter and return value descriptions.
*/
function &addBlankElement($element_name) {
$module =& $this->getAnonymousModule();
$element =& $module->addBlankElement($element_name);
return $element;
}
/**

View File

@@ -13,6 +13,8 @@ require_once 'HTMLPurifier/AttrTransform/Length.php';
require_once 'HTMLPurifier/AttrTransform/ImgSpace.php';
require_once 'HTMLPurifier/AttrTransform/EnumToCSS.php';
require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
class HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 extends
HTMLPurifier_HTMLModule_Tidy
{
@@ -188,5 +190,17 @@ class HTMLPurifier_HTMLModule_Tidy_Strict extends
{
var $name = 'Tidy_Strict';
var $defaultLevel = 'light';
function makeFixes() {
$r = parent::makeFixes();
$r['blockquote#content_model_type'] = 'strictblockquote';
return $r;
}
var $defines_child_def = true;
function getChildDef($def) {
if ($def->content_model_type != 'strictblockquote') return parent::getChildDef($def);
return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
}
}

View File

@@ -1,26 +0,0 @@
<?php
require_once 'HTMLPurifier/HTMLModule/Tidy.php';
require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
class HTMLPurifier_HTMLModule_Tidy_XHTMLStrict extends
HTMLPurifier_HTMLModule_Tidy
{
var $name = 'Tidy_XHTMLStrict';
var $defaultLevel = 'light';
function makeFixes() {
$r = array();
$r['blockquote#content_model_type'] = 'strictblockquote';
return $r;
}
var $defines_child_def = true;
function getChildDef($def) {
if ($def->content_model_type != 'strictblockquote') return false;
return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
}
}

View File

@@ -35,7 +35,6 @@ require_once 'HTMLPurifier/HTMLModule/Object.php';
require_once 'HTMLPurifier/HTMLModule/Tidy.php';
require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
require_once 'HTMLPurifier/HTMLModule/Tidy/XHTML.php';
require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php';
require_once 'HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
HTMLPurifier_ConfigSchema::define(
@@ -209,7 +208,7 @@ class HTMLPurifier_HTMLModuleManager
$this->doctypes->register(
'XHTML 1.0 Strict', true,
array_merge($common, $xml, $non_xml),
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_XHTMLStrict', 'Tidy_Proprietary'),
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary'),
array(),
'-//W3C//DTD XHTML 1.0 Strict//EN',
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
@@ -218,7 +217,7 @@ class HTMLPurifier_HTMLModuleManager
$this->doctypes->register(
'XHTML 1.1', true,
array_merge($common, $xml, array('Ruby')),
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_XHTMLStrict'), // Tidy_XHTML1_1
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict'), // Tidy_XHTML1_1
array(),
'-//W3C//DTD XHTML 1.1//EN',
'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'

View File

@@ -1,11 +1,15 @@
<?php
HTMLPurifier_ConfigSchema::define(
'Attr', 'IDBlacklist', array(), 'list',
'Array of IDs not allowed in the document.'
);
/**
* Component of HTMLPurifier_AttrContext that accumulates IDs to prevent dupes
* @note In Slashdot-speak, dupe means duplicate.
* @note This class does not accept $config or $context, thus, it is the
* burden of the callee to register the appropriate errors or
* configuration.
* @note The default constructor does not accept $config or $context objects:
* use must use the static build() factory method to perform initialization.
*/
class HTMLPurifier_IDAccumulator
{
@@ -16,6 +20,19 @@ class HTMLPurifier_IDAccumulator
*/
var $ids = array();
/**
* Builds an IDAccumulator, also initializing the default blacklist
* @param $config Instance of HTMLPurifier_Config
* @param $context Instance of HTMLPurifier_Context
* @return Fully initialized HTMLPurifier_IDAccumulator
* @static
*/
static function build($config, &$context) {
$id_accumulator = new HTMLPurifier_IDAccumulator();
$id_accumulator->load($config->get('Attr', 'IDBlacklist'));
return $id_accumulator;
}
/**
* Add an ID to the lookup table.
* @param $id ID to be added.

View File

@@ -4,6 +4,9 @@
* Injects tokens into the document while parsing for well-formedness.
* This enables "formatter-like" functionality such as auto-paragraphing,
* smiley-ification and linkification to take place.
*
* @todo Allow injectors to request a re-run on their output. This
* would help if an operation is recursive.
*/
class HTMLPurifier_Injector
{
@@ -107,5 +110,12 @@ class HTMLPurifier_Injector
*/
function handleElement(&$token) {}
/**
* Notifier that is called when an end token is processed
* @note This differs from handlers in that the token is read-only
*/
function notifyEnd($token) {}
}

View File

@@ -6,20 +6,28 @@ HTMLPurifier_ConfigSchema::define(
'AutoFormat', 'AutoParagraph', false, 'bool', '
<p>
This directive turns on auto-paragraphing, where double newlines are
converted in to paragraphs whenever possible. Auto-paragraphing
applies when:
converted in to paragraphs whenever possible. Auto-paragraphing:
</p>
<ul>
<li>There are inline elements or text in the root node</li>
<li>There are inline elements or text with double newlines or
block elements in nodes that allow paragraph tags</li>
<li>There are double newlines in paragraph tags</li>
<li>Always applies to inline elements or text in the root node,</li>
<li>Applies to inline elements or text with double newlines in nodes
that allow paragraph tags,</li>
<li>Applies to double newlines in paragraph tags</li>
</ul>
<p>
<code>p</code> tags must be allowed for this directive to take effect.
We do not use <code>br</code> tags for paragraphing, as that is
semantically incorrect.
</p>
<p>
To prevent auto-paragraphing as a content-producer, refrain from using
double-newlines except to specify a new paragraph or in contexts where
it has special meaning (whitespace usually has no meaning except in
tags like <code>pre</code>, so this should not be difficult.) To prevent
the paragraphing of inline text adjacent to block elements, wrap them
in <code>div</code> tags (the behavior is slightly different outside of
the root node.)
</p>
<p>
This directive has been available since 2.0.1.
</p>
@@ -62,19 +70,27 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
$ok = false;
// test if up-coming tokens are either block or have
// a double newline in them
$nesting = 0;
for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) {
if ($this->inputTokens[$i]->type == 'start'){
if (!$this->_isInline($this->inputTokens[$i])) {
$ok = true;
// we haven't found a double-newline, and
// we've hit a block element, so don't paragraph
$ok = false;
break;
}
break;
$nesting++;
}
if ($this->inputTokens[$i]->type == 'end') {
if ($nesting <= 0) break;
$nesting--;
}
if ($this->inputTokens[$i]->type == 'end') break;
if ($this->inputTokens[$i]->type == 'text') {
// found it!
if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) {
$ok = true;
break;
}
if (!$this->inputTokens[$i]->is_whitespace) break;
}
}
if ($ok) {

View File

@@ -13,11 +13,14 @@ if (version_compare(PHP_VERSION, "5", ">=")) {
}
HTMLPurifier_ConfigSchema::define(
'Core', 'AcceptFullDocuments', true, 'bool',
'This parameter determines whether or not the filter should accept full '.
'HTML documents, not just HTML fragments. When on, it will '.
'drop all sections except the content between body.'
);
'Core', 'ConvertDocumentToFragment', true, 'bool', '
This parameter determines whether or not the filter should convert
input that is a full document with html and body tags to a fragment
of just the contents of a body tag. This parameter is simply something
HTML Purifier can do during an edge-case: for most inputs, this
processing is not necessary.
');
HTMLPurifier_ConfigSchema::defineAlias('Core', 'AcceptFullDocuments', 'Core', 'ConvertDocumentToFragment');
HTMLPurifier_ConfigSchema::define(
'Core', 'LexerImpl', null, 'mixed/null', '
@@ -316,7 +319,7 @@ class HTMLPurifier_Lexer
function normalize($html, $config, &$context) {
// extract body from document if applicable
if ($config->get('Core', 'AcceptFullDocuments')) {
if ($config->get('Core', 'ConvertDocumentToFragment')) {
$html = $this->extractBody($html);
}

View File

@@ -160,9 +160,15 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
$segment = substr($html, $cursor, $strlen_segment);
if ($segment === false) {
// somehow, we attempted to access beyond the end of
// the string, defense-in-depth, reported by Nate Abele
break;
}
// Check if it's a comment
if (
substr($segment, 0, 3) == '!--'
substr($segment, 0, 3) === '!--'
) {
// re-determine segment length, looking for -->
$position_comment_end = strpos($html, '-->', $cursor);
@@ -237,7 +243,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
// trailing slash. Remember, we could have a tag like <br>, so
// any later token processing scripts must convert improperly
// classified EmptyTags from StartTags.
$is_self_closing= (strrpos($segment,'/') === $strlen_segment-1);
$is_self_closing = (strrpos($segment,'/') === $strlen_segment-1);
if ($is_self_closing) {
$strlen_segment--;
$segment = substr($segment, 0, $strlen_segment);

View File

@@ -26,8 +26,6 @@ class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex {
}
// begin PHP5P source code here
/*
Copyright 2007 Jeroen van der Meer <http://jero.net/>
@@ -3722,7 +3720,7 @@ class HTML5TreeConstructer {
}
}
private function generateImpliedEndTags(array $exclude = array()) {
private function generateImpliedEndTags($exclude = array()) {
/* When the steps below require the UA to generate implied end tags,
then, if the current node is a dd element, a dt element, an li element,
a p element, a td element, a th element, or a tr element, the UA must
@@ -3736,7 +3734,8 @@ class HTML5TreeConstructer {
}
}
private function getElementCategory($name) {
private function getElementCategory($node) {
$name = $node->tagName;
if(in_array($name, $this->special))
return self::SPECIAL;
@@ -3884,3 +3883,4 @@ class HTML5TreeConstructer {
return $this->dom;
}
}
?>

View File

@@ -195,7 +195,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
//################################################################//
// Process result by interpreting $result
if ($result === true) {
if ($result === true || $child_tokens === $result) {
// leave the node as is
// register start token as a parental node start

View File

@@ -36,28 +36,23 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$definition = $config->getHTMLDefinition();
// CurrentNesting
$this->currentNesting = array();
$context->register('CurrentNesting', $this->currentNesting);
// InputIndex
$this->inputIndex = false;
$context->register('InputIndex', $this->inputIndex);
// InputTokens
$context->register('InputTokens', $tokens);
$this->inputTokens =& $tokens;
// OutputTokens
// local variables
$result = array();
$this->outputTokens =& $result;
// %Core.EscapeInvalidTags
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
$generator = new HTMLPurifier_Generator();
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
$e =& $context->get('ErrorCollector', true);
// member variables
$this->currentNesting = array();
$this->inputIndex = false;
$this->inputTokens =& $tokens;
$this->outputTokens =& $result;
// context variables
$context->register('CurrentNesting', $this->currentNesting);
$context->register('InputIndex', $this->inputIndex);
$context->register('InputTokens', $tokens);
// -- begin INJECTOR --
$this->injectors = array();
@@ -95,6 +90,10 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
trigger_error("Cannot enable $name injector because $error is not allowed", E_USER_WARNING);
}
// warning: most foreach loops follow the convention $i => $x.
// be sure, for PHP4 compatibility, to only perform write operations
// directly referencing the object using $i: $x is only safe for reads
// -- end INJECTOR --
$token = false;
@@ -105,6 +104,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// if all goes well, this token will be passed through unharmed
$token = $tokens[$this->inputIndex];
//printTokens($tokens, $this->inputIndex);
foreach ($this->injectors as $i => $x) {
if ($x->skip > 0) $this->injectors[$i]->skip--;
}
@@ -114,7 +115,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
if ($token->type === 'text') {
// injector handler code; duplicated for performance reasons
foreach ($this->injectors as $i => $x) {
if (!$x->skip) $x->handleText($token);
if (!$x->skip) $this->injectors[$i]->handleText($token);
if (is_array($token)) {
$this->currentInjector = $i;
break;
@@ -172,7 +173,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// injector handler code; duplicated for performance reasons
if ($ok) {
foreach ($this->injectors as $i => $x) {
if (!$x->skip) $x->handleElement($token);
if (!$x->skip) $this->injectors[$i]->handleElement($token);
if (is_array($token)) {
$this->currentInjector = $i;
break;
@@ -202,6 +203,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$current_parent = array_pop($this->currentNesting);
if ($current_parent->name == $token->name) {
$result[] = $token;
foreach ($this->injectors as $i => $x) {
$this->injectors[$i]->notifyEnd($token);
}
continue;
}
@@ -238,16 +242,16 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// okay, we found it, close all the skipped tags
// note that skipped tags contains the element we need closed
$size = count($skipped_tags);
for ($i = $size - 1; $i > 0; $i--) {
if ($e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
for ($i = count($skipped_tags) - 1; $i >= 0; $i--) {
if ($i && $e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
}
$result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
$result[] = $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
foreach ($this->injectors as $j => $x) { // $j, not $i!!!
$this->injectors[$j]->notifyEnd($new_token);
}
}
$result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
}
$context->destroy('CurrentNesting');
@@ -255,17 +259,18 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$context->destroy('InputIndex');
$context->destroy('CurrentToken');
// we're at the end now, fix all still unclosed tags
// not using processToken() because at this point we don't
// care about current nesting
// we're at the end now, fix all still unclosed tags (this is
// duplicated from the end of the loop with some slight modifications)
// not using $skipped_tags since it would invariably be all of them
if (!empty($this->currentNesting)) {
$size = count($this->currentNesting);
for ($i = $size - 1; $i >= 0; $i--) {
for ($i = count($this->currentNesting) - 1; $i >= 0; $i--) {
if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
}
$result[] =
new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
$result[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
foreach ($this->injectors as $j => $x) { // $j, not $i!!!
$this->injectors[$j]->notifyEnd($new_token);
}
}
}
@@ -286,8 +291,14 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// adjust the injector skips based on the array substitution
if ($this->injectors) {
$offset = count($token) + 1;
$offset = count($token);
for ($i = 0; $i <= $this->currentInjector; $i++) {
// because of the skip back, we need to add one more
// for uninitialized injectors. I'm not exactly
// sure why this is the case, but I think it has to
// do with the fact that we're decrementing skips
// before re-checking text
if (!$this->injectors[$i]->skip) $this->injectors[$i]->skip++;
$this->injectors[$i]->skip += $offset;
}
}

View File

@@ -116,6 +116,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
// mostly everything's good, but
// we need to make sure required attributes are in order
if (
($token->type === 'start' || $token->type === 'empty') &&
$definition->info[$token->name]->required_attr &&
($token->name != 'img' || $remove_invalid_img) // ensure config option still works
) {
@@ -134,7 +135,6 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
$token->armor['ValidateAttributes'] = true;
}
// CAN BE GENERICIZED
if (isset($hidden_elements[$token->name]) && $token->type == 'start') {
$textify_comments = $token->name;
} elseif ($token->name === $textify_comments && $token->type == 'end') {

View File

@@ -6,10 +6,6 @@ require_once 'HTMLPurifier/IDAccumulator.php';
require_once 'HTMLPurifier/AttrValidator.php';
HTMLPurifier_ConfigSchema::define(
'Attr', 'IDBlacklist', array(), 'list',
'Array of IDs not allowed in the document.');
/**
* Validate all attributes in the tokens.
*/
@@ -19,11 +15,6 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
function execute($tokens, $config, &$context) {
// setup id_accumulator context
$id_accumulator = new HTMLPurifier_IDAccumulator();
$id_accumulator->load($config->get('Attr', 'IDBlacklist'));
$context->register('IDAccumulator', $id_accumulator);
// setup validator
$validator = new HTMLPurifier_AttrValidator();
@@ -44,8 +35,6 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
$tokens[$key] = $token; // for PHP 4
}
$context->destroy('IDAccumulator');
$context->destroy('CurrentToken');
return $tokens;

View File

@@ -1,10 +1,22 @@
<?php
/**
* Chainable filters for custom URI processing
* Chainable filters for custom URI processing.
*
* These filters can perform custom actions on a URI filter object,
* including transformation or blacklisting.
*
* @warning This filter is called before scheme object validation occurs.
* Make sure, if you require a specific scheme object, you
* you check that it exists. This allows filters to convert
* proprietary URI schemes into regular ones.
*/
class HTMLPurifier_URIFilter
{
/**
* Unique identifier of filter
*/
var $name;
/**
@@ -17,8 +29,12 @@ class HTMLPurifier_URIFilter
* @param &$uri Reference to URI object
* @param $config Instance of HTMLPurifier_Config
* @param &$context Instance of HTMLPurifier_Context
* @return bool Whether or not to continue processing: false indicates
* URL is no good, true indicates continue processing. Note that
* all changes are committed directly on the URI object
*/
function filter(&$uri, $config, &$context) {
trigger_error('Cannot call abstract function', E_USER_ERROR);
}
}

View File

@@ -47,6 +47,10 @@ class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter
// absolute URI already: don't change
if (!is_null($uri->host)) return true;
$scheme_obj = $uri->getSchemeObj($config, $context);
if (!$scheme_obj) {
// scheme not recognized
return false;
}
if (!$scheme_obj->hierarchical) {
// non-hierarchal URI with explicit scheme, don't change
return true;