mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-11 16:44:59 +02:00
Release 2.1.3, merged in 1404 to HEAD.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@1444 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
@@ -22,8 +22,8 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
HTML Purifier 2.1.2 - Standards Compliant HTML Filtering
|
||||
Copyright (C) 2006 Edward Z. Yang
|
||||
HTML Purifier 2.1.3 - Standards Compliant HTML Filtering
|
||||
Copyright (C) 2006-2007 Edward Z. Yang
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
@@ -43,9 +43,8 @@
|
||||
// constants are slow, but we'll make one exception
|
||||
define('HTMLPURIFIER_PREFIX', dirname(__FILE__));
|
||||
|
||||
// almost every class has an undocumented dependency to these, so make sure
|
||||
// they get included
|
||||
require_once 'HTMLPurifier/ConfigSchema.php'; // important
|
||||
// every class has an undocumented dependency to these, must be included!
|
||||
require_once 'HTMLPurifier/ConfigSchema.php'; // fatal errors if not included
|
||||
require_once 'HTMLPurifier/Config.php';
|
||||
require_once 'HTMLPurifier/Context.php';
|
||||
|
||||
@@ -60,16 +59,23 @@ require_once 'HTMLPurifier/LanguageFactory.php';
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'CollectErrors', false, 'bool', '
|
||||
Whether or not to collect errors found while filtering the document. This
|
||||
is a useful way to give feedback to your users. CURRENTLY NOT IMPLEMENTED.
|
||||
This directive has been available since 2.0.0.
|
||||
is a useful way to give feedback to your users. <strong>Warning:</strong>
|
||||
Currently this feature is very patchy and experimental, with lots of
|
||||
possible error messages not yet implemented. It will not cause any problems,
|
||||
but it may not help your users either. This directive has been available
|
||||
since 2.0.0.
|
||||
');
|
||||
|
||||
/**
|
||||
* Main library execution class.
|
||||
* Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
|
||||
*
|
||||
* Facade that performs calls to the HTMLPurifier_Lexer,
|
||||
* HTMLPurifier_Strategy and HTMLPurifier_Generator subsystems in order to
|
||||
* purify HTML.
|
||||
* @note There are several points in which configuration can be specified
|
||||
* for HTML Purifier. The precedence of these (from lowest to
|
||||
* highest) is as follows:
|
||||
* -# Instance: new HTMLPurifier($config)
|
||||
* -# Invocation: purify($html, $config)
|
||||
* These configurations are entirely independent of each other and
|
||||
* are *not* merged.
|
||||
*
|
||||
* @todo We need an easier way to inject strategies, it'll probably end
|
||||
* up getting done through config though.
|
||||
@@ -77,15 +83,16 @@ This directive has been available since 2.0.0.
|
||||
class HTMLPurifier
|
||||
{
|
||||
|
||||
var $version = '2.1.2';
|
||||
var $version = '2.1.3';
|
||||
|
||||
var $config;
|
||||
var $filters;
|
||||
var $filters = array();
|
||||
|
||||
var $strategy, $generator;
|
||||
|
||||
/**
|
||||
* Final HTMLPurifier_Context of last run purification. Might be an array.
|
||||
* Resultant HTMLPurifier_Context of last run purification. Is an array
|
||||
* of contexts if the last called method was purifyArray().
|
||||
* @public
|
||||
*/
|
||||
var $context;
|
||||
@@ -150,6 +157,11 @@ class HTMLPurifier
|
||||
$context->register('ErrorCollector', $error_collector);
|
||||
}
|
||||
|
||||
// setup id_accumulator context, necessary due to the fact that
|
||||
// AttrValidator can be called from many places
|
||||
$id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
|
||||
$context->register('IDAccumulator', $id_accumulator);
|
||||
|
||||
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
|
||||
|
||||
for ($i = 0, $size = count($this->filters); $i < $size; $i++) {
|
||||
@@ -198,6 +210,8 @@ class HTMLPurifier
|
||||
|
||||
/**
|
||||
* Singleton for enforcing just one HTML Purifier in your system
|
||||
* @param $prototype Optional prototype HTMLPurifier instance to
|
||||
* overload singleton with.
|
||||
*/
|
||||
static function &getInstance($prototype = null) {
|
||||
static $htmlpurifier;
|
||||
|
@@ -102,7 +102,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
$result = $uri->validate($config, $context);
|
||||
if (!$result) break;
|
||||
|
||||
// chained validation
|
||||
// chained filtering
|
||||
$uri_def =& $config->getDefinition('URI');
|
||||
$result = $uri_def->filter($uri, $config, $context);
|
||||
if (!$result) break;
|
||||
|
@@ -1,7 +1,6 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
|
||||
|
||||
class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
|
||||
{
|
||||
@@ -15,3 +14,5 @@ class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
// sub-implementations
|
||||
require_once 'HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
|
||||
|
@@ -23,6 +23,13 @@ class HTMLPurifier_AttrValidator
|
||||
$definition = $config->getHTMLDefinition();
|
||||
$e =& $context->get('ErrorCollector', true);
|
||||
|
||||
// initialize IDAccumulator if necessary
|
||||
$ok =& $context->get('IDAccumulator', true);
|
||||
if (!$ok) {
|
||||
$id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
|
||||
$context->register('IDAccumulator', $id_accumulator);
|
||||
}
|
||||
|
||||
// initialize CurrentToken if necessary
|
||||
$current_token =& $context->get('CurrentToken', true);
|
||||
if (!$current_token) $context->register('CurrentToken', $token);
|
||||
|
@@ -15,7 +15,10 @@ class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
|
||||
var $type = 'optional';
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
$result = parent::validateChildren($tokens_of_children, $config, $context);
|
||||
if ($result === false) return array();
|
||||
if ($result === false) {
|
||||
if (empty($tokens_of_children)) return true;
|
||||
else return array();
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
@@ -42,7 +42,7 @@ class HTMLPurifier_Config
|
||||
/**
|
||||
* HTML Purifier's version
|
||||
*/
|
||||
var $version = '2.1.2';
|
||||
var $version = '2.1.3';
|
||||
|
||||
/**
|
||||
* Two-level associative array of configuration directives
|
||||
|
@@ -236,13 +236,26 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
|
||||
/**
|
||||
* Adds a custom element to your HTML definition
|
||||
* @note See HTMLPurifier_HTMLModule::addElement for detailed
|
||||
* parameter descriptions.
|
||||
* parameter and return value descriptions.
|
||||
*/
|
||||
function addElement($element_name, $type, $contents, $attr_collections, $attributes) {
|
||||
function &addElement($element_name, $type, $contents, $attr_collections, $attributes) {
|
||||
$module =& $this->getAnonymousModule();
|
||||
// assume that if the user is calling this, the element
|
||||
// is safe. This may not be a good idea
|
||||
$module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes);
|
||||
$element =& $module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes);
|
||||
return $element;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a blank element to your HTML definition, for overriding
|
||||
* existing behavior
|
||||
* @note See HTMLPurifier_HTMLModule::addBlankElement for detailed
|
||||
* parameter and return value descriptions.
|
||||
*/
|
||||
function &addBlankElement($element_name) {
|
||||
$module =& $this->getAnonymousModule();
|
||||
$element =& $module->addBlankElement($element_name);
|
||||
return $element;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -13,6 +13,8 @@ require_once 'HTMLPurifier/AttrTransform/Length.php';
|
||||
require_once 'HTMLPurifier/AttrTransform/ImgSpace.php';
|
||||
require_once 'HTMLPurifier/AttrTransform/EnumToCSS.php';
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
|
||||
|
||||
class HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 extends
|
||||
HTMLPurifier_HTMLModule_Tidy
|
||||
{
|
||||
@@ -188,5 +190,17 @@ class HTMLPurifier_HTMLModule_Tidy_Strict extends
|
||||
{
|
||||
var $name = 'Tidy_Strict';
|
||||
var $defaultLevel = 'light';
|
||||
|
||||
function makeFixes() {
|
||||
$r = parent::makeFixes();
|
||||
$r['blockquote#content_model_type'] = 'strictblockquote';
|
||||
return $r;
|
||||
}
|
||||
|
||||
var $defines_child_def = true;
|
||||
function getChildDef($def) {
|
||||
if ($def->content_model_type != 'strictblockquote') return parent::getChildDef($def);
|
||||
return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -1,26 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/HTMLModule/Tidy.php';
|
||||
require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
|
||||
|
||||
class HTMLPurifier_HTMLModule_Tidy_XHTMLStrict extends
|
||||
HTMLPurifier_HTMLModule_Tidy
|
||||
{
|
||||
|
||||
var $name = 'Tidy_XHTMLStrict';
|
||||
var $defaultLevel = 'light';
|
||||
|
||||
function makeFixes() {
|
||||
$r = array();
|
||||
$r['blockquote#content_model_type'] = 'strictblockquote';
|
||||
return $r;
|
||||
}
|
||||
|
||||
var $defines_child_def = true;
|
||||
function getChildDef($def) {
|
||||
if ($def->content_model_type != 'strictblockquote') return false;
|
||||
return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -35,7 +35,6 @@ require_once 'HTMLPurifier/HTMLModule/Object.php';
|
||||
require_once 'HTMLPurifier/HTMLModule/Tidy.php';
|
||||
require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
|
||||
require_once 'HTMLPurifier/HTMLModule/Tidy/XHTML.php';
|
||||
require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php';
|
||||
require_once 'HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
@@ -209,7 +208,7 @@ class HTMLPurifier_HTMLModuleManager
|
||||
$this->doctypes->register(
|
||||
'XHTML 1.0 Strict', true,
|
||||
array_merge($common, $xml, $non_xml),
|
||||
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_XHTMLStrict', 'Tidy_Proprietary'),
|
||||
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary'),
|
||||
array(),
|
||||
'-//W3C//DTD XHTML 1.0 Strict//EN',
|
||||
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
|
||||
@@ -218,7 +217,7 @@ class HTMLPurifier_HTMLModuleManager
|
||||
$this->doctypes->register(
|
||||
'XHTML 1.1', true,
|
||||
array_merge($common, $xml, array('Ruby')),
|
||||
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_XHTMLStrict'), // Tidy_XHTML1_1
|
||||
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict'), // Tidy_XHTML1_1
|
||||
array(),
|
||||
'-//W3C//DTD XHTML 1.1//EN',
|
||||
'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
|
||||
|
@@ -1,11 +1,15 @@
|
||||
<?php
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'IDBlacklist', array(), 'list',
|
||||
'Array of IDs not allowed in the document.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Component of HTMLPurifier_AttrContext that accumulates IDs to prevent dupes
|
||||
* @note In Slashdot-speak, dupe means duplicate.
|
||||
* @note This class does not accept $config or $context, thus, it is the
|
||||
* burden of the callee to register the appropriate errors or
|
||||
* configuration.
|
||||
* @note The default constructor does not accept $config or $context objects:
|
||||
* use must use the static build() factory method to perform initialization.
|
||||
*/
|
||||
class HTMLPurifier_IDAccumulator
|
||||
{
|
||||
@@ -16,6 +20,19 @@ class HTMLPurifier_IDAccumulator
|
||||
*/
|
||||
var $ids = array();
|
||||
|
||||
/**
|
||||
* Builds an IDAccumulator, also initializing the default blacklist
|
||||
* @param $config Instance of HTMLPurifier_Config
|
||||
* @param $context Instance of HTMLPurifier_Context
|
||||
* @return Fully initialized HTMLPurifier_IDAccumulator
|
||||
* @static
|
||||
*/
|
||||
static function build($config, &$context) {
|
||||
$id_accumulator = new HTMLPurifier_IDAccumulator();
|
||||
$id_accumulator->load($config->get('Attr', 'IDBlacklist'));
|
||||
return $id_accumulator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an ID to the lookup table.
|
||||
* @param $id ID to be added.
|
||||
|
@@ -4,6 +4,9 @@
|
||||
* Injects tokens into the document while parsing for well-formedness.
|
||||
* This enables "formatter-like" functionality such as auto-paragraphing,
|
||||
* smiley-ification and linkification to take place.
|
||||
*
|
||||
* @todo Allow injectors to request a re-run on their output. This
|
||||
* would help if an operation is recursive.
|
||||
*/
|
||||
class HTMLPurifier_Injector
|
||||
{
|
||||
@@ -107,5 +110,12 @@ class HTMLPurifier_Injector
|
||||
*/
|
||||
function handleElement(&$token) {}
|
||||
|
||||
/**
|
||||
* Notifier that is called when an end token is processed
|
||||
* @note This differs from handlers in that the token is read-only
|
||||
*/
|
||||
function notifyEnd($token) {}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
@@ -6,20 +6,28 @@ HTMLPurifier_ConfigSchema::define(
|
||||
'AutoFormat', 'AutoParagraph', false, 'bool', '
|
||||
<p>
|
||||
This directive turns on auto-paragraphing, where double newlines are
|
||||
converted in to paragraphs whenever possible. Auto-paragraphing
|
||||
applies when:
|
||||
converted in to paragraphs whenever possible. Auto-paragraphing:
|
||||
</p>
|
||||
<ul>
|
||||
<li>There are inline elements or text in the root node</li>
|
||||
<li>There are inline elements or text with double newlines or
|
||||
block elements in nodes that allow paragraph tags</li>
|
||||
<li>There are double newlines in paragraph tags</li>
|
||||
<li>Always applies to inline elements or text in the root node,</li>
|
||||
<li>Applies to inline elements or text with double newlines in nodes
|
||||
that allow paragraph tags,</li>
|
||||
<li>Applies to double newlines in paragraph tags</li>
|
||||
</ul>
|
||||
<p>
|
||||
<code>p</code> tags must be allowed for this directive to take effect.
|
||||
We do not use <code>br</code> tags for paragraphing, as that is
|
||||
semantically incorrect.
|
||||
</p>
|
||||
<p>
|
||||
To prevent auto-paragraphing as a content-producer, refrain from using
|
||||
double-newlines except to specify a new paragraph or in contexts where
|
||||
it has special meaning (whitespace usually has no meaning except in
|
||||
tags like <code>pre</code>, so this should not be difficult.) To prevent
|
||||
the paragraphing of inline text adjacent to block elements, wrap them
|
||||
in <code>div</code> tags (the behavior is slightly different outside of
|
||||
the root node.)
|
||||
</p>
|
||||
<p>
|
||||
This directive has been available since 2.0.1.
|
||||
</p>
|
||||
@@ -62,19 +70,27 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
|
||||
$ok = false;
|
||||
// test if up-coming tokens are either block or have
|
||||
// a double newline in them
|
||||
$nesting = 0;
|
||||
for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) {
|
||||
if ($this->inputTokens[$i]->type == 'start'){
|
||||
if (!$this->_isInline($this->inputTokens[$i])) {
|
||||
$ok = true;
|
||||
// we haven't found a double-newline, and
|
||||
// we've hit a block element, so don't paragraph
|
||||
$ok = false;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
$nesting++;
|
||||
}
|
||||
if ($this->inputTokens[$i]->type == 'end') {
|
||||
if ($nesting <= 0) break;
|
||||
$nesting--;
|
||||
}
|
||||
if ($this->inputTokens[$i]->type == 'end') break;
|
||||
if ($this->inputTokens[$i]->type == 'text') {
|
||||
// found it!
|
||||
if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) {
|
||||
$ok = true;
|
||||
break;
|
||||
}
|
||||
if (!$this->inputTokens[$i]->is_whitespace) break;
|
||||
}
|
||||
}
|
||||
if ($ok) {
|
||||
|
@@ -13,11 +13,14 @@ if (version_compare(PHP_VERSION, "5", ">=")) {
|
||||
}
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'AcceptFullDocuments', true, 'bool',
|
||||
'This parameter determines whether or not the filter should accept full '.
|
||||
'HTML documents, not just HTML fragments. When on, it will '.
|
||||
'drop all sections except the content between body.'
|
||||
);
|
||||
'Core', 'ConvertDocumentToFragment', true, 'bool', '
|
||||
This parameter determines whether or not the filter should convert
|
||||
input that is a full document with html and body tags to a fragment
|
||||
of just the contents of a body tag. This parameter is simply something
|
||||
HTML Purifier can do during an edge-case: for most inputs, this
|
||||
processing is not necessary.
|
||||
');
|
||||
HTMLPurifier_ConfigSchema::defineAlias('Core', 'AcceptFullDocuments', 'Core', 'ConvertDocumentToFragment');
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'LexerImpl', null, 'mixed/null', '
|
||||
@@ -316,7 +319,7 @@ class HTMLPurifier_Lexer
|
||||
function normalize($html, $config, &$context) {
|
||||
|
||||
// extract body from document if applicable
|
||||
if ($config->get('Core', 'AcceptFullDocuments')) {
|
||||
if ($config->get('Core', 'ConvertDocumentToFragment')) {
|
||||
$html = $this->extractBody($html);
|
||||
}
|
||||
|
||||
|
@@ -160,9 +160,15 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
|
||||
$segment = substr($html, $cursor, $strlen_segment);
|
||||
|
||||
if ($segment === false) {
|
||||
// somehow, we attempted to access beyond the end of
|
||||
// the string, defense-in-depth, reported by Nate Abele
|
||||
break;
|
||||
}
|
||||
|
||||
// Check if it's a comment
|
||||
if (
|
||||
substr($segment, 0, 3) == '!--'
|
||||
substr($segment, 0, 3) === '!--'
|
||||
) {
|
||||
// re-determine segment length, looking for -->
|
||||
$position_comment_end = strpos($html, '-->', $cursor);
|
||||
@@ -237,7 +243,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
// trailing slash. Remember, we could have a tag like <br>, so
|
||||
// any later token processing scripts must convert improperly
|
||||
// classified EmptyTags from StartTags.
|
||||
$is_self_closing= (strrpos($segment,'/') === $strlen_segment-1);
|
||||
$is_self_closing = (strrpos($segment,'/') === $strlen_segment-1);
|
||||
if ($is_self_closing) {
|
||||
$strlen_segment--;
|
||||
$segment = substr($segment, 0, $strlen_segment);
|
||||
|
@@ -26,8 +26,6 @@ class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex {
|
||||
|
||||
}
|
||||
|
||||
// begin PHP5P source code here
|
||||
|
||||
/*
|
||||
|
||||
Copyright 2007 Jeroen van der Meer <http://jero.net/>
|
||||
@@ -3722,7 +3720,7 @@ class HTML5TreeConstructer {
|
||||
}
|
||||
}
|
||||
|
||||
private function generateImpliedEndTags(array $exclude = array()) {
|
||||
private function generateImpliedEndTags($exclude = array()) {
|
||||
/* When the steps below require the UA to generate implied end tags,
|
||||
then, if the current node is a dd element, a dt element, an li element,
|
||||
a p element, a td element, a th element, or a tr element, the UA must
|
||||
@@ -3736,7 +3734,8 @@ class HTML5TreeConstructer {
|
||||
}
|
||||
}
|
||||
|
||||
private function getElementCategory($name) {
|
||||
private function getElementCategory($node) {
|
||||
$name = $node->tagName;
|
||||
if(in_array($name, $this->special))
|
||||
return self::SPECIAL;
|
||||
|
||||
@@ -3884,3 +3883,4 @@ class HTML5TreeConstructer {
|
||||
return $this->dom;
|
||||
}
|
||||
}
|
||||
?>
|
||||
|
@@ -195,7 +195,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
//################################################################//
|
||||
// Process result by interpreting $result
|
||||
|
||||
if ($result === true) {
|
||||
if ($result === true || $child_tokens === $result) {
|
||||
// leave the node as is
|
||||
|
||||
// register start token as a parental node start
|
||||
|
@@ -36,28 +36,23 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
|
||||
$definition = $config->getHTMLDefinition();
|
||||
|
||||
// CurrentNesting
|
||||
$this->currentNesting = array();
|
||||
$context->register('CurrentNesting', $this->currentNesting);
|
||||
|
||||
// InputIndex
|
||||
$this->inputIndex = false;
|
||||
$context->register('InputIndex', $this->inputIndex);
|
||||
|
||||
// InputTokens
|
||||
$context->register('InputTokens', $tokens);
|
||||
$this->inputTokens =& $tokens;
|
||||
|
||||
// OutputTokens
|
||||
// local variables
|
||||
$result = array();
|
||||
$this->outputTokens =& $result;
|
||||
|
||||
// %Core.EscapeInvalidTags
|
||||
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
|
||||
$generator = new HTMLPurifier_Generator();
|
||||
|
||||
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
|
||||
$e =& $context->get('ErrorCollector', true);
|
||||
|
||||
// member variables
|
||||
$this->currentNesting = array();
|
||||
$this->inputIndex = false;
|
||||
$this->inputTokens =& $tokens;
|
||||
$this->outputTokens =& $result;
|
||||
|
||||
// context variables
|
||||
$context->register('CurrentNesting', $this->currentNesting);
|
||||
$context->register('InputIndex', $this->inputIndex);
|
||||
$context->register('InputTokens', $tokens);
|
||||
|
||||
// -- begin INJECTOR --
|
||||
|
||||
$this->injectors = array();
|
||||
@@ -95,6 +90,10 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
trigger_error("Cannot enable $name injector because $error is not allowed", E_USER_WARNING);
|
||||
}
|
||||
|
||||
// warning: most foreach loops follow the convention $i => $x.
|
||||
// be sure, for PHP4 compatibility, to only perform write operations
|
||||
// directly referencing the object using $i: $x is only safe for reads
|
||||
|
||||
// -- end INJECTOR --
|
||||
|
||||
$token = false;
|
||||
@@ -105,6 +104,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
// if all goes well, this token will be passed through unharmed
|
||||
$token = $tokens[$this->inputIndex];
|
||||
|
||||
//printTokens($tokens, $this->inputIndex);
|
||||
|
||||
foreach ($this->injectors as $i => $x) {
|
||||
if ($x->skip > 0) $this->injectors[$i]->skip--;
|
||||
}
|
||||
@@ -114,7 +115,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
if ($token->type === 'text') {
|
||||
// injector handler code; duplicated for performance reasons
|
||||
foreach ($this->injectors as $i => $x) {
|
||||
if (!$x->skip) $x->handleText($token);
|
||||
if (!$x->skip) $this->injectors[$i]->handleText($token);
|
||||
if (is_array($token)) {
|
||||
$this->currentInjector = $i;
|
||||
break;
|
||||
@@ -172,7 +173,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
// injector handler code; duplicated for performance reasons
|
||||
if ($ok) {
|
||||
foreach ($this->injectors as $i => $x) {
|
||||
if (!$x->skip) $x->handleElement($token);
|
||||
if (!$x->skip) $this->injectors[$i]->handleElement($token);
|
||||
if (is_array($token)) {
|
||||
$this->currentInjector = $i;
|
||||
break;
|
||||
@@ -202,6 +203,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
$current_parent = array_pop($this->currentNesting);
|
||||
if ($current_parent->name == $token->name) {
|
||||
$result[] = $token;
|
||||
foreach ($this->injectors as $i => $x) {
|
||||
$this->injectors[$i]->notifyEnd($token);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -238,16 +242,16 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
|
||||
// okay, we found it, close all the skipped tags
|
||||
// note that skipped tags contains the element we need closed
|
||||
$size = count($skipped_tags);
|
||||
for ($i = $size - 1; $i > 0; $i--) {
|
||||
if ($e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
|
||||
for ($i = count($skipped_tags) - 1; $i >= 0; $i--) {
|
||||
if ($i && $e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
|
||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
|
||||
}
|
||||
$result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
|
||||
$result[] = $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
|
||||
foreach ($this->injectors as $j => $x) { // $j, not $i!!!
|
||||
$this->injectors[$j]->notifyEnd($new_token);
|
||||
}
|
||||
}
|
||||
|
||||
$result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
|
||||
|
||||
}
|
||||
|
||||
$context->destroy('CurrentNesting');
|
||||
@@ -255,17 +259,18 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
$context->destroy('InputIndex');
|
||||
$context->destroy('CurrentToken');
|
||||
|
||||
// we're at the end now, fix all still unclosed tags
|
||||
// not using processToken() because at this point we don't
|
||||
// care about current nesting
|
||||
// we're at the end now, fix all still unclosed tags (this is
|
||||
// duplicated from the end of the loop with some slight modifications)
|
||||
// not using $skipped_tags since it would invariably be all of them
|
||||
if (!empty($this->currentNesting)) {
|
||||
$size = count($this->currentNesting);
|
||||
for ($i = $size - 1; $i >= 0; $i--) {
|
||||
for ($i = count($this->currentNesting) - 1; $i >= 0; $i--) {
|
||||
if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
|
||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
|
||||
}
|
||||
$result[] =
|
||||
new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
|
||||
$result[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
|
||||
foreach ($this->injectors as $j => $x) { // $j, not $i!!!
|
||||
$this->injectors[$j]->notifyEnd($new_token);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -286,8 +291,14 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
|
||||
// adjust the injector skips based on the array substitution
|
||||
if ($this->injectors) {
|
||||
$offset = count($token) + 1;
|
||||
$offset = count($token);
|
||||
for ($i = 0; $i <= $this->currentInjector; $i++) {
|
||||
// because of the skip back, we need to add one more
|
||||
// for uninitialized injectors. I'm not exactly
|
||||
// sure why this is the case, but I think it has to
|
||||
// do with the fact that we're decrementing skips
|
||||
// before re-checking text
|
||||
if (!$this->injectors[$i]->skip) $this->injectors[$i]->skip++;
|
||||
$this->injectors[$i]->skip += $offset;
|
||||
}
|
||||
}
|
||||
|
@@ -116,6 +116,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
||||
// mostly everything's good, but
|
||||
// we need to make sure required attributes are in order
|
||||
if (
|
||||
($token->type === 'start' || $token->type === 'empty') &&
|
||||
$definition->info[$token->name]->required_attr &&
|
||||
($token->name != 'img' || $remove_invalid_img) // ensure config option still works
|
||||
) {
|
||||
@@ -134,7 +135,6 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
||||
$token->armor['ValidateAttributes'] = true;
|
||||
}
|
||||
|
||||
// CAN BE GENERICIZED
|
||||
if (isset($hidden_elements[$token->name]) && $token->type == 'start') {
|
||||
$textify_comments = $token->name;
|
||||
} elseif ($token->name === $textify_comments && $token->type == 'end') {
|
||||
|
@@ -6,10 +6,6 @@ require_once 'HTMLPurifier/IDAccumulator.php';
|
||||
|
||||
require_once 'HTMLPurifier/AttrValidator.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'IDBlacklist', array(), 'list',
|
||||
'Array of IDs not allowed in the document.');
|
||||
|
||||
/**
|
||||
* Validate all attributes in the tokens.
|
||||
*/
|
||||
@@ -19,11 +15,6 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
|
||||
|
||||
function execute($tokens, $config, &$context) {
|
||||
|
||||
// setup id_accumulator context
|
||||
$id_accumulator = new HTMLPurifier_IDAccumulator();
|
||||
$id_accumulator->load($config->get('Attr', 'IDBlacklist'));
|
||||
$context->register('IDAccumulator', $id_accumulator);
|
||||
|
||||
// setup validator
|
||||
$validator = new HTMLPurifier_AttrValidator();
|
||||
|
||||
@@ -44,8 +35,6 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
|
||||
|
||||
$tokens[$key] = $token; // for PHP 4
|
||||
}
|
||||
|
||||
$context->destroy('IDAccumulator');
|
||||
$context->destroy('CurrentToken');
|
||||
|
||||
return $tokens;
|
||||
|
@@ -1,10 +1,22 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Chainable filters for custom URI processing
|
||||
* Chainable filters for custom URI processing.
|
||||
*
|
||||
* These filters can perform custom actions on a URI filter object,
|
||||
* including transformation or blacklisting.
|
||||
*
|
||||
* @warning This filter is called before scheme object validation occurs.
|
||||
* Make sure, if you require a specific scheme object, you
|
||||
* you check that it exists. This allows filters to convert
|
||||
* proprietary URI schemes into regular ones.
|
||||
*/
|
||||
class HTMLPurifier_URIFilter
|
||||
{
|
||||
|
||||
/**
|
||||
* Unique identifier of filter
|
||||
*/
|
||||
var $name;
|
||||
|
||||
/**
|
||||
@@ -17,8 +29,12 @@ class HTMLPurifier_URIFilter
|
||||
* @param &$uri Reference to URI object
|
||||
* @param $config Instance of HTMLPurifier_Config
|
||||
* @param &$context Instance of HTMLPurifier_Context
|
||||
* @return bool Whether or not to continue processing: false indicates
|
||||
* URL is no good, true indicates continue processing. Note that
|
||||
* all changes are committed directly on the URI object
|
||||
*/
|
||||
function filter(&$uri, $config, &$context) {
|
||||
trigger_error('Cannot call abstract function', E_USER_ERROR);
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -47,6 +47,10 @@ class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter
|
||||
// absolute URI already: don't change
|
||||
if (!is_null($uri->host)) return true;
|
||||
$scheme_obj = $uri->getSchemeObj($config, $context);
|
||||
if (!$scheme_obj) {
|
||||
// scheme not recognized
|
||||
return false;
|
||||
}
|
||||
if (!$scheme_obj->hierarchical) {
|
||||
// non-hierarchal URI with explicit scheme, don't change
|
||||
return true;
|
||||
|
Reference in New Issue
Block a user