MDL-10937 upgraded html purifier to 2.1.1

This commit is contained in:
skodak 2007-08-21 22:06:47 +00:00
parent 3d436d8ac5
commit 5adad31057
168 changed files with 6307 additions and 2100 deletions

View File

@ -7,4 +7,3 @@
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
require_once 'HTMLPurifier.php';
?>

View File

@ -18,4 +18,3 @@ function HTMLPurifier($html, $config = null) {
return $purifier->purify($html, $config);
}
?>

View File

@ -22,7 +22,7 @@
*/
/*
HTML Purifier 1.6.1 - Standards Compliant HTML Filtering
HTML Purifier 2.1.1 - Standards Compliant HTML Filtering
Copyright (C) 2006 Edward Z. Yang
This library is free software; you can redistribute it and/or
@ -40,9 +40,12 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
// constants are slow, but we'll make one exception
define('HTMLPURIFIER_PREFIX', dirname(__FILE__));
// almost every class has an undocumented dependency to these, so make sure
// they get included
require_once 'HTMLPurifier/ConfigSchema.php';
require_once 'HTMLPurifier/ConfigSchema.php'; // important
require_once 'HTMLPurifier/Config.php';
require_once 'HTMLPurifier/Context.php';
@ -51,6 +54,16 @@ require_once 'HTMLPurifier/Generator.php';
require_once 'HTMLPurifier/Strategy/Core.php';
require_once 'HTMLPurifier/Encoder.php';
require_once 'HTMLPurifier/ErrorCollector.php';
require_once 'HTMLPurifier/LanguageFactory.php';
HTMLPurifier_ConfigSchema::define(
'Core', 'CollectErrors', false, 'bool', '
Whether or not to collect errors found while filtering the document. This
is a useful way to give feedback to your users. CURRENTLY NOT IMPLEMENTED.
This directive has been available since 2.0.0.
');
/**
* Main library execution class.
*
@ -64,12 +77,12 @@ require_once 'HTMLPurifier/Encoder.php';
class HTMLPurifier
{
var $version = '1.6.1';
var $version = '2.1.1';
var $config;
var $filters;
var $lexer, $strategy, $generator;
var $strategy, $generator;
/**
* Final HTMLPurifier_Context of last run purification. Might be an array.
@ -89,7 +102,6 @@ class HTMLPurifier
$this->config = HTMLPurifier_Config::create($config);
$this->lexer = HTMLPurifier_Lexer::create();
$this->strategy = new HTMLPurifier_Strategy_Core();
$this->generator = new HTMLPurifier_Generator();
@ -117,7 +129,27 @@ class HTMLPurifier
$config = $config ? HTMLPurifier_Config::create($config) : $this->config;
// implementation is partially environment dependant, partially
// configuration dependant
$lexer = HTMLPurifier_Lexer::create($config);
$context = new HTMLPurifier_Context();
// our friendly neighborhood generator, all primed with configuration too!
$this->generator->generateFromTokens(array(), $config, $context);
$context->register('Generator', $this->generator);
// set up global context variables
if ($config->get('Core', 'CollectErrors')) {
// may get moved out if other facilities use it
$language_factory = HTMLPurifier_LanguageFactory::instance();
$language = $language_factory->create($config, $context);
$context->register('Locale', $language);
$error_collector = new HTMLPurifier_ErrorCollector($context);
$context->register('ErrorCollector', $error_collector);
}
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
for ($i = 0, $size = count($this->filters); $i < $size; $i++) {
@ -130,7 +162,7 @@ class HTMLPurifier
// list of tokens
$this->strategy->execute(
// list of un-purified tokens
$this->lexer->tokenizeHTML(
$lexer->tokenizeHTML(
// un-purified HTML
$html, $config, $context
),
@ -164,7 +196,23 @@ class HTMLPurifier
return $array_of_html;
}
/**
* Singleton for enforcing just one HTML Purifier in your system
*/
function &getInstance($prototype = null) {
static $htmlpurifier;
if (!$htmlpurifier || $prototype) {
if (is_a($prototype, 'HTMLPurifier')) {
$htmlpurifier = $prototype;
} elseif ($prototype) {
$htmlpurifier = new HTMLPurifier($prototype);
} else {
$htmlpurifier = new HTMLPurifier();
}
}
return $htmlpurifier;
}
}
?>

View File

@ -1,7 +1,6 @@
<?php
require_once 'HTMLPurifier/AttrTypes.php';
require_once 'HTMLPurifier/AttrDef/Lang.php';
/**
* Defines common attribute collections that modules reference
@ -12,8 +11,6 @@ class HTMLPurifier_AttrCollections
/**
* Associative array of attribute collections, indexed by name
* @note Technically, the composition of these is more complicated,
* but we bypass it using our own excludes property
*/
var $info = array();
@ -25,27 +22,29 @@ class HTMLPurifier_AttrCollections
* @param $modules Hash array of HTMLPurifier_HTMLModule members
*/
function HTMLPurifier_AttrCollections($attr_types, $modules) {
$info =& $this->info;
// load extensions from the modules
foreach ($modules as $module) {
foreach ($module->attr_collections as $coll_i => $coll) {
if (!isset($this->info[$coll_i])) {
$this->info[$coll_i] = array();
}
foreach ($coll as $attr_i => $attr) {
if ($attr_i === 0 && isset($info[$coll_i][$attr_i])) {
if ($attr_i === 0 && isset($this->info[$coll_i][$attr_i])) {
// merge in includes
$info[$coll_i][$attr_i] = array_merge(
$info[$coll_i][$attr_i], $attr);
$this->info[$coll_i][$attr_i] = array_merge(
$this->info[$coll_i][$attr_i], $attr);
continue;
}
$info[$coll_i][$attr_i] = $attr;
$this->info[$coll_i][$attr_i] = $attr;
}
}
}
// perform internal expansions and inclusions
foreach ($info as $name => $attr) {
foreach ($this->info as $name => $attr) {
// merge attribute collections that include others
$this->performInclusions($info[$name]);
$this->performInclusions($this->info[$name]);
// replace string identifiers with actual attribute objects
$this->expandIdentifiers($info[$name], $attr_types);
$this->expandIdentifiers($this->info[$name], $attr_types);
}
}
@ -57,16 +56,20 @@ class HTMLPurifier_AttrCollections
function performInclusions(&$attr) {
if (!isset($attr[0])) return;
$merge = $attr[0];
$seen = array(); // recursion guard
// loop through all the inclusions
for ($i = 0; isset($merge[$i]); $i++) {
if (isset($seen[$merge[$i]])) continue;
$seen[$merge[$i]] = true;
// foreach attribute of the inclusion, copy it over
if (!isset($this->info[$merge[$i]])) continue;
foreach ($this->info[$merge[$i]] as $key => $value) {
if (isset($attr[$key])) continue; // also catches more inclusions
$attr[$key] = $value;
}
if (isset($info[$merge[$i]][0])) {
if (isset($this->info[$merge[$i]][0])) {
// recursion
$merge = array_merge($merge, isset($info[$merge[$i]][0]));
$merge = array_merge($merge, $this->info[$merge[$i]][0]);
}
}
unset($attr[0]);
@ -79,22 +82,48 @@ class HTMLPurifier_AttrCollections
* @param $attr_types HTMLPurifier_AttrTypes instance
*/
function expandIdentifiers(&$attr, $attr_types) {
// because foreach will process new elements we add, make sure we
// skip duplicates
$processed = array();
foreach ($attr as $def_i => $def) {
// skip inclusions
if ($def_i === 0) continue;
if (!is_string($def)) continue;
if (isset($processed[$def_i])) continue;
// determine whether or not attribute is required
if ($required = (strpos($def_i, '*') !== false)) {
// rename the definition
unset($attr[$def_i]);
$def_i = trim($def_i, '*');
$attr[$def_i] = $def;
}
$processed[$def_i] = true;
// if we've already got a literal object, move on
if (is_object($def)) {
// preserve previous required
$attr[$def_i]->required = ($required || $attr[$def_i]->required);
continue;
}
if ($def === false) {
unset($attr[$def_i]);
continue;
}
if (isset($attr_types->info[$def])) {
$attr[$def_i] = $attr_types->info[$def];
if ($t = $attr_types->get($def)) {
$attr[$def_i] = $t;
$attr[$def_i]->required = $required;
} else {
trigger_error('Attempted to reference undefined attribute type', E_USER_ERROR);
unset($attr[$def_i]);
}
}
}
}
?>

View File

@ -14,11 +14,17 @@ class HTMLPurifier_AttrDef
{
/**
* Tells us whether or not an HTML attribute is minimized. Only the
* boolean attribute vapourware would use this.
* Tells us whether or not an HTML attribute is minimized. Has no
* meaning in other contexts.
*/
var $minimized = false;
/**
* Tells us whether or not an HTML attribute is required. Has no
* meaning in other contexts
*/
var $required = false;
/**
* Validates and cleans passed string according to a definition.
*
@ -62,6 +68,19 @@ class HTMLPurifier_AttrDef
$string = str_replace(array("\r", "\t"), ' ', $string);
return $string;
}
/**
* Factory method for creating this class from a string.
* @param $string String construction info
* @return Created AttrDef object corresponding to $string
* @public
*/
function make($string) {
// default implementation, return flyweight of this object
// if overloaded, it is *necessary* for you to clone the
// object (usually by instantiating a new copy) and return that
return $this;
}
}
?>

View File

@ -66,4 +66,3 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
}
?>

View File

@ -84,4 +84,3 @@ class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
}
?>

View File

@ -127,4 +127,3 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
}
?>

View File

@ -42,4 +42,3 @@ class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
}
?>

View File

@ -2,43 +2,47 @@
require_once 'HTMLPurifier/AttrDef.php';
HTMLPurifier_ConfigSchema::define(
'Core', 'ColorKeywords', array(
'maroon' => '#800000',
'red' => '#FF0000',
'orange' => '#FFA500',
'yellow' => '#FFFF00',
'olive' => '#808000',
'purple' => '#800080',
'fuchsia' => '#FF00FF',
'white' => '#FFFFFF',
'lime' => '#00FF00',
'green' => '#008000',
'navy' => '#000080',
'blue' => '#0000FF',
'aqua' => '#00FFFF',
'teal' => '#008080',
'black' => '#000000',
'silver' => '#C0C0C0',
'gray' => '#808080'
), 'hash', '
Lookup array of color names to six digit hexadecimal number corresponding
to color, with preceding hash mark. Used when parsing colors.
This directive has been available since 2.0.0.
');
/**
* Validates Color as defined by CSS.
*/
class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
{
/**
* Color keyword lookup table.
* @todo Extend it to include all usually allowed colors.
*/
var $colors = array(
'maroon' => '#800000',
'red' => '#F00',
'orange' => '#FFA500',
'yellow' => '#FF0',
'olive' => '#808000',
'purple' => '#800080',
'fuchsia' => '#F0F',
'white' => '#FFF',
'lime' => '#0F0',
'green' => '#008000',
'navy' => '#000080',
'blue' => '#00F',
'aqua' => '#0FF',
'teal' => '#008080',
'black' => '#000',
'silver' => '#C0C0C0',
'gray' => '#808080'
);
function validate($color, $config, &$context) {
static $colors = null;
if ($colors === null) $colors = $config->get('Core', 'ColorKeywords');
$color = trim($color);
if (!$color) return false;
$lower = strtolower($color);
if (isset($this->colors[$lower])) return $this->colors[$lower];
if (isset($colors[$lower])) return $colors[$lower];
if ($color[0] === '#') {
// hexadecimal handling
@ -94,4 +98,3 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
}
?>

View File

@ -35,4 +35,3 @@ class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef
}
?>

View File

@ -18,18 +18,6 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
*/
var $info = array();
/**
* System font keywords.
*/
var $system_fonts = array(
'caption' => true,
'icon' => true,
'menu' => true,
'message-box' => true,
'small-caption' => true,
'status-bar' => true
);
function HTMLPurifier_AttrDef_CSS_Font($config) {
$def = $config->getCSSDefinition();
$this->info['font-style'] = $def->info['font-style'];
@ -42,13 +30,22 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
function validate($string, $config, &$context) {
static $system_fonts = array(
'caption' => true,
'icon' => true,
'menu' => true,
'message-box' => true,
'small-caption' => true,
'status-bar' => true
);
// regular pre-processing
$string = $this->parseCDATA($string);
if ($string === '') return false;
// check if it's one of the keywords
$lowercase_string = strtolower($string);
if (isset($this->system_fonts[$lowercase_string])) {
if (isset($system_fonts[$lowercase_string])) {
return $lowercase_string;
}
@ -151,4 +148,3 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
}
?>

View File

@ -10,19 +10,15 @@ require_once 'HTMLPurifier/AttrDef.php';
class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
{
/**
* Generic font family keywords.
* @protected
*/
var $generic_names = array(
'serif' => true,
'sans-serif' => true,
'monospace' => true,
'fantasy' => true,
'cursive' => true
);
function validate($string, $config, &$context) {
static $generic_names = array(
'serif' => true,
'sans-serif' => true,
'monospace' => true,
'fantasy' => true,
'cursive' => true
);
$string = $this->parseCDATA($string);
// assume that no font names contain commas in them
$fonts = explode(',', $string);
@ -31,7 +27,7 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
$font = trim($font);
if ($font === '') continue;
// match a generic name
if (isset($this->generic_names[$font])) {
if (isset($generic_names[$font])) {
$final .= $font . ', ';
continue;
}
@ -42,19 +38,24 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
$quote = $font[0];
if ($font[$length - 1] !== $quote) continue;
$font = substr($font, 1, $length - 2);
// double-backslash processing is buggy
$font = str_replace("\\$quote", $quote, $font); // de-escape quote
$font = str_replace("\\\n", "\n", $font); // de-escape newlines
}
// process font
// $font is a pure representation of the font name
if (ctype_alnum($font)) {
// very simple font, allow it in unharmed
$final .= $font . ', ';
continue;
}
$nospace = str_replace(array(' ', '.', '!'), '', $font);
if (ctype_alnum($nospace)) {
// font with spaces in it
$final .= "'$font', ";
continue;
}
// complicated font, requires quoting
// armor single quotes and new lines
$font = str_replace("'", "\\'", $font);
$font = str_replace("\n", "\\\n", $font);
$final .= "'$font', ";
}
$final = rtrim($final, ', ');
if ($final === '') return false;
@ -63,4 +64,3 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
}
?>

View File

@ -53,4 +53,3 @@ class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
}
?>

View File

@ -77,4 +77,3 @@ class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef
}
?>

View File

@ -55,4 +55,3 @@ class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef
}
?>

View File

@ -58,4 +58,3 @@ class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
}
?>

View File

@ -40,4 +40,3 @@ class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef
}
?>

View File

@ -10,23 +10,19 @@ require_once 'HTMLPurifier/AttrDef.php';
class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
{
/**
* Lookup table of allowed values.
* @protected
*/
var $allowed_values = array(
'line-through' => true,
'overline' => true,
'underline' => true
);
function validate($string, $config, &$context) {
static $allowed_values = array(
'line-through' => true,
'overline' => true,
'underline' => true
);
$string = strtolower($this->parseCDATA($string));
$parts = explode(' ', $string);
$final = '';
foreach ($parts as $part) {
if (isset($this->allowed_values[$part])) {
if (isset($allowed_values[$part])) {
$final .= $part . ' ';
}
}
@ -38,4 +34,3 @@ class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
}
?>

View File

@ -15,7 +15,7 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
{
function HTMLPurifier_AttrDef_CSS_URI() {
$this->HTMLPurifier_AttrDef_URI(true); // always embedded
parent::HTMLPurifier_AttrDef_URI(true); // always embedded
}
function validate($uri_string, $config, &$context) {
@ -29,7 +29,7 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
if ($uri_string[$new_length] != ')') return false;
$uri = trim(substr($uri_string, 0, $new_length));
if (isset($uri[0]) && ($uri[0] == "'" || $uri[0] == '"')) {
if (!empty($uri) && ($uri[0] == "'" || $uri[0] == '"')) {
$quote = $uri[0];
$new_length = strlen($uri) - 1;
if ($uri[$new_length] !== $quote) return false;
@ -55,4 +55,3 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
}
?>

View File

@ -45,6 +45,21 @@ class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
return $result ? $string : false;
}
/**
* @param $string In form of comma-delimited list of case-insensitive
* valid values. Example: "foo,bar,baz". Prepend "s:" to make
* case sensitive
*/
function make($string) {
if (strlen($string) > 2 && $string[0] == 's' && $string[1] == ':') {
$string = substr($string, 2);
$sensitive = true;
} else {
$sensitive = false;
}
$values = explode(',', $string);
return new HTMLPurifier_AttrDef_Enum($values, $sensitive);
}
}
?>

View File

@ -0,0 +1,29 @@
<?php
require_once 'HTMLPurifier/AttrDef.php';
/**
* Validates a boolean attribute
*/
class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef
{
var $name;
var $minimized = true;
function HTMLPurifier_AttrDef_HTML_Bool($name = false) {$this->name = $name;}
function validate($string, $config, &$context) {
if (empty($string)) return false;
return $this->name;
}
/**
* @param $string Name of attribute
*/
function make($string) {
return new HTMLPurifier_AttrDef_HTML_Bool($string);
}
}

View File

@ -0,0 +1,34 @@
<?php
require_once 'HTMLPurifier/AttrDef.php';
require_once 'HTMLPurifier/AttrDef/CSS/Color.php'; // for %Core.ColorKeywords
/**
* Validates a color according to the HTML spec.
*/
class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
{
function validate($string, $config, &$context) {
static $colors = null;
if ($colors === null) $colors = $config->get('Core', 'ColorKeywords');
$string = trim($string);
if (empty($string)) return false;
if (isset($colors[$string])) return $colors[$string];
if ($string[0] === '#') $hex = substr($string, 1);
else $hex = $string;
$length = strlen($hex);
if ($length !== 3 && $length !== 6) return false;
if (!ctype_xdigit($hex)) return false;
if ($length === 3) $hex = $hex[0].$hex[0].$hex[1].$hex[1].$hex[2].$hex[2];
return "#$hex";
}
}

View File

@ -31,4 +31,3 @@ class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum
}
?>

View File

@ -118,4 +118,3 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
}
?>

View File

@ -41,4 +41,3 @@ class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels
}
?>

View File

@ -26,22 +26,20 @@ HTMLPurifier_ConfigSchema::define(
class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
{
/** Lookup array of attribute names to configuration name */
var $configLookup = array(
'rel' => 'AllowedRel',
'rev' => 'AllowedRev'
);
/** Name config attribute to pull. */
var $name;
function HTMLPurifier_AttrDef_HTML_LinkTypes($name) {
if (!isset($this->configLookup[$name])) {
$configLookup = array(
'rel' => 'AllowedRel',
'rev' => 'AllowedRev'
);
if (!isset($configLookup[$name])) {
trigger_error('Unrecognized attribute name for link '.
'relationship.', E_USER_ERROR);
return;
}
$this->name = $this->configLookup[$name];
$this->name = $configLookup[$name];
}
function validate($string, $config, &$context) {
@ -72,4 +70,3 @@ class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
}
?>

View File

@ -41,4 +41,3 @@ class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Le
}
?>

View File

@ -48,4 +48,3 @@ class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
}
?>

View File

@ -34,4 +34,3 @@ class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
}
?>

View File

@ -72,4 +72,3 @@ class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef
}
?>

View File

@ -72,4 +72,3 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
}
?>

View File

@ -14,4 +14,3 @@ class HTMLPurifier_AttrDef_Text extends HTMLPurifier_AttrDef
}
?>

View File

@ -1,90 +1,65 @@
<?php
require_once 'HTMLPurifier/AttrDef.php';
require_once 'HTMLPurifier/URIParser.php';
require_once 'HTMLPurifier/URIScheme.php';
require_once 'HTMLPurifier/URISchemeRegistry.php';
require_once 'HTMLPurifier/AttrDef/URI/Host.php';
require_once 'HTMLPurifier/PercentEncoder.php';
HTMLPurifier_ConfigSchema::define(
'URI', 'DefaultScheme', 'http', 'string',
'Defines through what scheme the output will be served, in order to '.
'select the proper object validator when no scheme information is present.'
);
// special case filtering directives
HTMLPurifier_ConfigSchema::define(
'URI', 'Host', null, 'string/null',
'Defines the domain name of the server, so we can determine whether or '.
'an absolute URI is from your website or not. Not strictly necessary, '.
'as users should be using relative URIs to reference resources on your '.
'website. It will, however, let you use absolute URIs to link to '.
'subdomains of the domain you post here: i.e. example.com will allow '.
'sub.example.com. However, higher up domains will still be excluded: '.
'if you set %URI.Host to sub.example.com, example.com will be blocked. '.
'This directive has been available since 1.2.0.'
);
'URI', 'Munge', null, 'string/null', '
<p>
Munges all browsable (usually http, https and ftp)
absolute URI\'s into another URI, usually a URI redirection service.
This directive accepts a URI, formatted with a <code>%s</code> where
the url-encoded original URI should be inserted (sample:
<code>http://www.google.com/url?q=%s</code>).
</p>
<p>
Uses for this directive:
</p>
<ul>
<li>
Prevent PageRank leaks, while being fairly transparent
to users (you may also want to add some client side JavaScript to
override the text in the statusbar). <strong>Notice</strong>:
Many security experts believe that this form of protection does not deter spam-bots.
</li>
<li>
Redirect users to a splash page telling them they are leaving your
website. While this is poor usability practice, it is often mandated
in corporate environments.
</li>
</ul>
<p>
This directive has been available since 1.3.0.
</p>
');
// disabling directives
HTMLPurifier_ConfigSchema::define(
'URI', 'DisableExternal', false, 'bool',
'Disables links to external websites. This is a highly effective '.
'anti-spam and anti-pagerank-leech measure, but comes at a hefty price: no'.
'links or images outside of your domain will be allowed. Non-linkified '.
'URIs will still be preserved. If you want to be able to link to '.
'subdomains or use absolute URIs, specify %URI.Host for your website. '.
'This directive has been available since 1.2.0.'
);
HTMLPurifier_ConfigSchema::define(
'URI', 'DisableExternalResources', false, 'bool',
'Disables the embedding of external resources, preventing users from '.
'embedding things like images from other hosts. This prevents '.
'access tracking (good for email viewers), bandwidth leeching, '.
'cross-site request forging, goatse.cx posting, and '.
'other nasties, but also results in '.
'a loss of end-user functionality (they can\'t directly post a pic '.
'they posted from Flickr anymore). Use it if you don\'t have a '.
'robust user-content moderation team. This directive has been '.
'available since 1.3.0.'
);
HTMLPurifier_ConfigSchema::define(
'URI', 'DisableResources', false, 'bool',
'Disables embedding resources, essentially meaning no pictures. You can '.
'still link to them though. See %URI.DisableExternalResources for why '.
'this might be a good idea. This directive has been available since 1.3.0.'
);
HTMLPurifier_ConfigSchema::define(
'URI', 'Munge', null, 'string/null',
'Munges all browsable (usually http, https and ftp) URI\'s into some URL '.
'redirection service. Pass this directive a URI, with %s inserted where '.
'the url-encoded original URI should be inserted (sample: '.
'<code>http://www.google.com/url?q=%s</code>). '.
'This prevents PageRank leaks, while being as transparent as possible '.
'to users (you may also want to add some client side JavaScript to '.
'override the text in the statusbar). Warning: many security experts '.
'believe that this form of protection does not deter spam-bots. '.
'You can also use this directive to redirect users to a splash page '.
'telling them they are leaving your website. '.
'This directive has been available since 1.3.0.'
);
HTMLPurifier_ConfigSchema::define(
'URI', 'HostBlacklist', array(), 'list',
'List of strings that are forbidden in the host of any URI. Use it to '.
'kill domain names of spam, etc. Note that it will catch anything in '.
'the domain, so <tt>moo.com</tt> will catch <tt>moo.com.example.com</tt>. '.
'This directive has been available since 1.3.0.'
);
HTMLPurifier_ConfigSchema::define(
'URI', 'Disable', false, 'bool',
'Disables all URIs in all forms. Not sure why you\'d want to do that '.
'(after all, the Internet\'s founded on the notion of a hyperlink). '.
'This directive has been available since 1.3.0.'
);
'URI', 'Disable', false, 'bool', '
<p>
Disables all URIs in all forms. Not sure why you\'d want to do that
(after all, the Internet\'s founded on the notion of a hyperlink).
This directive has been available since 1.3.0.
</p>
');
HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable');
HTMLPurifier_ConfigSchema::define(
'URI', 'DisableResources', false, 'bool', '
<p>
Disables embedding resources, essentially meaning no pictures. You can
still link to them though. See %URI.DisableExternalResources for why
this might be a good idea. This directive has been available since 1.3.0.
</p>
');
/**
* Validates a URI as defined by RFC 3986.
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
@ -92,205 +67,83 @@ HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable');
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
{
var $host;
var $PercentEncoder;
var $embeds_resource;
var $parser, $percentEncoder;
var $embedsResource;
/**
* @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
*/
function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
$this->host = new HTMLPurifier_AttrDef_URI_Host();
$this->PercentEncoder = new HTMLPurifier_PercentEncoder();
$this->embeds_resource = (bool) $embeds_resource;
$this->parser = new HTMLPurifier_URIParser();
$this->percentEncoder = new HTMLPurifier_PercentEncoder();
$this->embedsResource = (bool) $embeds_resource;
}
function validate($uri, $config, &$context) {
// We'll write stack-based parsers later, for now, use regexps to
// get things working as fast as possible (irony)
if ($config->get('URI', 'Disable')) return false;
// parse as CDATA
// initial operations
$uri = $this->parseCDATA($uri);
$uri = $this->percentEncoder->normalize($uri);
// fix up percent-encoding
$uri = $this->PercentEncoder->normalize($uri);
// parse the URI
$uri = $this->parser->parse($uri);
if ($uri === false) return false;
// while it would be nice to use parse_url(), that's specifically
// for HTTP and thus won't work for our generic URI parsing
// add embedded flag to context for validators
$context->register('EmbeddedURI', $this->embedsResource);
// according to the RFC... (but this cuts corners, i.e. non-validating)
$r_URI = '!'.
'(([^:/?#<>\'"]+):)?'. // 2. Scheme
'(//([^/?#<>\'"]*))?'. // 4. Authority
'([^?#<>\'"]*)'. // 5. Path
'(\?([^#<>\'"]*))?'. // 7. Query
'(#([^<>\'"]*))?'. // 8. Fragment
'!';
$ok = false;
do {
// generic validation
$result = $uri->validate($config, $context);
if (!$result) break;
// chained validation
$uri_def =& $config->getDefinition('URI');
$result = $uri_def->filter($uri, $config, $context);
if (!$result) break;
// scheme-specific validation
$scheme_obj = $uri->getSchemeObj($config, $context);
if (!$scheme_obj) break;
if ($this->embedsResource && !$scheme_obj->browsable) break;
$result = $scheme_obj->validate($uri, $config, $context);
if (!$result) break;
// survived gauntlet
$ok = true;
} while (false);
$matches = array();
$result = preg_match($r_URI, $uri, $matches);
$context->destroy('EmbeddedURI');
if (!$ok) return false;
if (!$result) return false; // invalid URI
// seperate out parts
$scheme = !empty($matches[1]) ? $matches[2] : null;
$authority = !empty($matches[3]) ? $matches[4] : null;
$path = $matches[5]; // always present, can be empty
$query = !empty($matches[6]) ? $matches[7] : null;
$fragment = !empty($matches[8]) ? $matches[9] : null;
$registry =& HTMLPurifier_URISchemeRegistry::instance();
if ($scheme !== null) {
// no need to validate the scheme's fmt since we do that when we
// retrieve the specific scheme object from the registry
$scheme = ctype_lower($scheme) ? $scheme : strtolower($scheme);
$scheme_obj = $registry->getScheme($scheme, $config, $context);
if (!$scheme_obj) return false; // invalid scheme, clean it out
} else {
$scheme_obj = $registry->getScheme(
$config->get('URI', 'DefaultScheme'), $config, $context
);
// munge scheme off if necessary (this must be last)
if (!is_null($uri->scheme) && is_null($uri->host)) {
if ($uri_def->defaultScheme == $uri->scheme) {
$uri->scheme = null;
}
}
// back to string
$result = $uri->toString();
// the URI we're processing embeds_resource a resource in the page, but the URI
// it references cannot be located
if ($this->embeds_resource && !$scheme_obj->browsable) {
return false;
}
if ($authority !== null) {
// remove URI if it's absolute and we disabled externals or
// if it's absolute and embedded and we disabled external resources
unset($our_host);
if (
$config->get('URI', 'DisableExternal') ||
(
$config->get('URI', 'DisableExternalResources') &&
$this->embeds_resource
)
) {
$our_host = $config->get('URI', 'Host');
if ($our_host === null) return false;
}
$HEXDIG = '[A-Fa-f0-9]';
$unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with []
$sub_delims = '!$&\'()'; // needs []
$pct_encoded = "%$HEXDIG$HEXDIG";
$r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*";
$r_authority = "/^(($r_userinfo)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
$matches = array();
preg_match($r_authority, $authority, $matches);
// overloads regexp!
$userinfo = !empty($matches[1]) ? $matches[2] : null;
$host = !empty($matches[3]) ? $matches[3] : null;
$port = !empty($matches[4]) ? $matches[5] : null;
// validate port
if ($port !== null) {
$port = (int) $port;
if ($port < 1 || $port > 65535) $port = null;
}
$host = $this->host->validate($host, $config, $context);
if ($host === false) $host = null;
if ($this->checkBlacklist($host, $config, $context)) return false;
// more lenient absolute checking
if (isset($our_host)) {
$host_parts = array_reverse(explode('.', $host));
// could be cached
$our_host_parts = array_reverse(explode('.', $our_host));
foreach ($our_host_parts as $i => $discard) {
if (!isset($host_parts[$i])) return false;
if ($host_parts[$i] != $our_host_parts[$i]) return false;
}
}
// userinfo and host are validated within the regexp
} else {
$port = $host = $userinfo = null;
}
// query and fragment are quite simple in terms of definition:
// *( pchar / "/" / "?" ), so define their validation routines
// when we start fixing percent encoding
// path gets to be validated against a hodge-podge of rules depending
// on the status of authority and scheme, but it's not that important,
// esp. since it won't be applicable to everyone
// okay, now we defer execution to the subobject for more processing
// note that $fragment is omitted
list($userinfo, $host, $port, $path, $query) =
$scheme_obj->validateComponents(
$userinfo, $host, $port, $path, $query, $config, $context
);
// reconstruct authority
$authority = null;
if (!is_null($userinfo) || !is_null($host) || !is_null($port)) {
$authority = '';
if($userinfo !== null) $authority .= $userinfo . '@';
$authority .= $host;
if($port !== null) $authority .= ':' . $port;
}
// reconstruct the result
$result = '';
if ($scheme !== null) $result .= "$scheme:";
if ($authority !== null) $result .= "//$authority";
$result .= $path;
if ($query !== null) $result .= "?$query";
if ($fragment !== null) $result .= "#$fragment";
// munge if necessary
$munge = $config->get('URI', 'Munge');
if (!empty($scheme_obj->browsable) && $munge !== null) {
if ($authority !== null) {
$result = str_replace('%s', rawurlencode($result), $munge);
}
// munge entire URI if necessary
if (
!is_null($uri->host) && // indicator for authority
!empty($scheme_obj->browsable) &&
!is_null($munge = $config->get('URI', 'Munge'))
) {
$result = str_replace('%s', rawurlencode($result), $munge);
}
return $result;
}
/**
* Checks a host against an array blacklist
* @param $host Host to check
* @param $config HTMLPurifier_Config instance
* @param $context HTMLPurifier_Context instance
* @return bool Is spam?
*/
function checkBlacklist($host, &$config, &$context) {
$blacklist = $config->get('URI', 'HostBlacklist');
if (!empty($blacklist)) {
foreach($blacklist as $blacklisted_host_fragment) {
if (strpos($host, $blacklisted_host_fragment) !== false) {
return true;
}
}
}
return false;
}
}
?>

View File

@ -14,4 +14,3 @@ class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
}
?>

View File

@ -20,4 +20,3 @@ class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_UR
}
?>

View File

@ -51,4 +51,3 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
}
?>

View File

@ -15,13 +15,10 @@ class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
*/
var $ip4;
function HTMLPurifier_AttrDef_URI_IPv4() {
$oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
$this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
}
function validate($aIP, $config, &$context) {
if (!$this->ip4) $this->_loadRegex();
if (preg_match('#^' . $this->ip4 . '$#s', $aIP))
{
return $aIP;
@ -31,6 +28,14 @@ class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
}
/**
* Lazy load function to prevent regex from being stuffed in
* cache.
*/
function _loadRegex() {
$oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
$this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
}
}
?>

View File

@ -13,6 +13,8 @@ class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
function validate($aIP, $config, &$context) {
if (!$this->ip4) $this->_loadRegex();
$original = $aIP;
$hex = '[0-9a-fA-F]';
@ -96,4 +98,3 @@ class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
}
?>

View File

@ -55,4 +55,3 @@ class HTMLPurifier_AttrTransform
}
?>

View File

@ -28,4 +28,3 @@ class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform
}
?>

View File

@ -23,4 +23,3 @@ extends HTMLPurifier_AttrTransform {
}
?>

View File

@ -36,4 +36,3 @@ extends HTMLPurifier_AttrTransform {
}
?>

View File

@ -17,4 +17,3 @@ class HTMLPurifier_AttrTransform_Border extends HTMLPurifier_AttrTransform {
}
?>

View File

@ -57,4 +57,3 @@ class HTMLPurifier_AttrTransform_EnumToCSS extends HTMLPurifier_AttrTransform {
}
?>

View File

@ -20,7 +20,10 @@ HTMLPurifier_ConfigSchema::define(
);
/**
* Post-transform that ensures the required attrs of img (alt and src) are set
* Transform that supplies default values for the src and alt attributes
* in img tags, as well as prevents the img tag from being removed
* because of a missing alt tag. This needs to be registered as both
* a pre and post attribute transform.
*/
class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
{
@ -29,6 +32,7 @@ class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
$src = true;
if (!isset($attr['src'])) {
if ($config->get('Core', 'RemoveInvalidImg')) return $attr;
$attr['src'] = $config->get('Attr', 'DefaultInvalidImage');
$src = false;
}
@ -47,4 +51,3 @@ class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
}
?>

View File

@ -44,4 +44,3 @@ extends HTMLPurifier_AttrTransform {
}
?>

View File

@ -27,4 +27,3 @@ class HTMLPurifier_AttrTransform_Lang extends HTMLPurifier_AttrTransform
}
?>

View File

@ -26,4 +26,3 @@ class HTMLPurifier_AttrTransform_Length extends HTMLPurifier_AttrTransform
}
?>

View File

@ -18,4 +18,3 @@ class HTMLPurifier_AttrTransform_Name extends HTMLPurifier_AttrTransform
}
?>

View File

@ -1,36 +0,0 @@
<?php
require_once 'HTMLPurifier/AttrTransform.php';
/**
* Pre-transform that changes deprecated align attribute to text-align.
*/
class HTMLPurifier_AttrTransform_TextAlign
extends HTMLPurifier_AttrTransform {
function transform($attr, $config, &$context) {
if (!isset($attr['align'])) return $attr;
$align = strtolower(trim($attr['align']));
unset($attr['align']);
$values = array('left' => 1,
'right' => 1,
'center' => 1,
'justify' => 1);
if (!isset($values[$align])) {
return $attr;
}
$attr['style'] = isset($attr['style']) ? $attr['style'] : '';
$attr['style'] = "text-align:$align;" . $attr['style'];
return $attr;
}
}
?>

View File

@ -1,10 +1,14 @@
<?php
require_once 'HTMLPurifier/AttrDef/Lang.php';
require_once 'HTMLPurifier/AttrDef/Enum.php';
require_once 'HTMLPurifier/AttrDef/HTML/Bool.php';
require_once 'HTMLPurifier/AttrDef/HTML/ID.php';
require_once 'HTMLPurifier/AttrDef/HTML/Length.php';
require_once 'HTMLPurifier/AttrDef/HTML/MultiLength.php';
require_once 'HTMLPurifier/AttrDef/HTML/Nmtokens.php';
require_once 'HTMLPurifier/AttrDef/HTML/Pixels.php';
require_once 'HTMLPurifier/AttrDef/HTML/Color.php';
require_once 'HTMLPurifier/AttrDef/Integer.php';
require_once 'HTMLPurifier/AttrDef/Text.php';
require_once 'HTMLPurifier/AttrDef/URI.php';
@ -16,14 +20,19 @@ class HTMLPurifier_AttrTypes
{
/**
* Lookup array of attribute string identifiers to concrete implementations
* @public
* @protected
*/
var $info = array();
/**
* Constructs the info array
* Constructs the info array, supplying default implementations for attribute
* types.
*/
function HTMLPurifier_AttrTypes() {
// pseudo-types, must be instantiated via shorthand
$this->info['Enum'] = new HTMLPurifier_AttrDef_Enum();
$this->info['Bool'] = new HTMLPurifier_AttrDef_HTML_Bool();
$this->info['CDATA'] = new HTMLPurifier_AttrDef_Text();
$this->info['ID'] = new HTMLPurifier_AttrDef_HTML_ID();
$this->info['Length'] = new HTMLPurifier_AttrDef_HTML_Length();
@ -32,10 +41,42 @@ class HTMLPurifier_AttrTypes
$this->info['Pixels'] = new HTMLPurifier_AttrDef_HTML_Pixels();
$this->info['Text'] = new HTMLPurifier_AttrDef_Text();
$this->info['URI'] = new HTMLPurifier_AttrDef_URI();
$this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
$this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color();
// number is really a positive integer (one or more digits)
// FIXME: ^^ not always, see start and value of list items
$this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);
}
/**
* Retrieves a type
* @param $type String type name
* @return Object AttrDef for type
*/
function get($type) {
// determine if there is any extra info tacked on
if (strpos($type, '#') !== false) list($type, $string) = explode('#', $type, 2);
else $string = '';
if (!isset($this->info[$type])) {
trigger_error('Cannot retrieve undefined attribute type ' . $type, E_USER_ERROR);
return;
}
return $this->info[$type]->make($string);
}
/**
* Sets a new implementation for a type
* @param $type String type name
* @param $impl Object AttrDef for type
*/
function set($type, $impl) {
$this->info[$type] = $impl;
}
}
?>

View File

@ -0,0 +1,139 @@
<?php
/**
* Validates the attributes of a token. Doesn't manage required attributes
* very well. The only reason we factored this out was because RemoveForeignElements
* also needed it besides ValidateAttributes.
*/
class HTMLPurifier_AttrValidator
{
/**
* Validates the attributes of a token, returning a modified token
* that has valid tokens
* @param $token Reference to token to validate. We require a reference
* because the operation this class performs on the token are
* not atomic, so the context CurrentToken to be updated
* throughout
* @param $config Instance of HTMLPurifier_Config
* @param $context Instance of HTMLPurifier_Context
*/
function validateToken(&$token, &$config, &$context) {
$definition = $config->getHTMLDefinition();
$e =& $context->get('ErrorCollector', true);
// initialize CurrentToken if necessary
$current_token =& $context->get('CurrentToken', true);
if (!$current_token) $context->register('CurrentToken', $token);
if ($token->type !== 'start' && $token->type !== 'empty') return $token;
// create alias to global definition array, see also $defs
// DEFINITION CALL
$d_defs = $definition->info_global_attr;
// reference attributes for easy manipulation
$attr =& $token->attr;
// do global transformations (pre)
// nothing currently utilizes this
foreach ($definition->info_attr_transform_pre as $transform) {
$attr = $transform->transform($o = $attr, $config, $context);
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
// do local transformations only applicable to this element (pre)
// ex. <p align="right"> to <p style="text-align:right;">
foreach ($definition->info[$token->name]->attr_transform_pre as $transform) {
$attr = $transform->transform($o = $attr, $config, $context);
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
// create alias to this element's attribute definition array, see
// also $d_defs (global attribute definition array)
// DEFINITION CALL
$defs = $definition->info[$token->name]->attr;
$attr_key = false;
$context->register('CurrentAttr', $attr_key);
// iterate through all the attribute keypairs
// Watch out for name collisions: $key has previously been used
foreach ($attr as $attr_key => $value) {
// call the definition
if ( isset($defs[$attr_key]) ) {
// there is a local definition defined
if ($defs[$attr_key] === false) {
// We've explicitly been told not to allow this element.
// This is usually when there's a global definition
// that must be overridden.
// Theoretically speaking, we could have a
// AttrDef_DenyAll, but this is faster!
$result = false;
} else {
// validate according to the element's definition
$result = $defs[$attr_key]->validate(
$value, $config, $context
);
}
} elseif ( isset($d_defs[$attr_key]) ) {
// there is a global definition defined, validate according
// to the global definition
$result = $d_defs[$attr_key]->validate(
$value, $config, $context
);
} else {
// system never heard of the attribute? DELETE!
$result = false;
}
// put the results into effect
if ($result === false || $result === null) {
// this is a generic error message that should replaced
// with more specific ones when possible
if ($e) $e->send(E_ERROR, 'AttrValidator: Attribute removed');
// remove the attribute
unset($attr[$attr_key]);
} elseif (is_string($result)) {
// generally, if a substitution is happening, there
// was some sort of implicit correction going on. We'll
// delegate it to the attribute classes to say exactly what.
// simple substitution
$attr[$attr_key] = $result;
}
// we'd also want slightly more complicated substitution
// involving an array as the return value,
// although we're not sure how colliding attributes would
// resolve (certain ones would be completely overriden,
// others would prepend themselves).
}
$context->destroy('CurrentAttr');
// post transforms
// global (error reporting untested)
foreach ($definition->info_attr_transform_post as $transform) {
$attr = $transform->transform($o = $attr, $config, $context);
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
// local (error reporting untested)
foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
$attr = $transform->transform($o = $attr, $config, $context);
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
// destroy CurrentToken if we made it ourselves
if (!$current_token) $context->destroy('CurrentToken');
}
}

View File

@ -1,5 +1,7 @@
<?php
require_once 'HTMLPurifier/Definition.php';
require_once 'HTMLPurifier/AttrDef/CSS/Background.php';
require_once 'HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';
require_once 'HTMLPurifier/AttrDef/CSS/Border.php';
@ -15,13 +17,24 @@ require_once 'HTMLPurifier/AttrDef/CSS/TextDecoration.php';
require_once 'HTMLPurifier/AttrDef/CSS/URI.php';
require_once 'HTMLPurifier/AttrDef/Enum.php';
HTMLPurifier_ConfigSchema::define(
'CSS', 'DefinitionRev', 1, 'int', '
<p>
Revision identifier for your custom definition. See
%HTML.DefinitionRev for details. This directive has been available
since 2.0.0.
</p>
');
/**
* Defines allowed CSS attributes and what their values are.
* @see HTMLPurifier_HTMLDefinition
*/
class HTMLPurifier_CSSDefinition
class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
{
var $type = 'CSS';
/**
* Assoc array of attribute name to definition object.
*/
@ -30,7 +43,7 @@ class HTMLPurifier_CSSDefinition
/**
* Constructs the info array. The meat of this class.
*/
function setup($config) {
function doSetup($config) {
$this->info['text-align'] = new HTMLPurifier_AttrDef_Enum(
array('left', 'right', 'center', 'justify'), false);
@ -213,4 +226,3 @@ class HTMLPurifier_CSSDefinition
}
?>

View File

@ -36,6 +36,11 @@ class HTMLPurifier_ChildDef
*/
var $allow_empty;
/**
* Lookup array of all elements that this definition could possibly allow
*/
var $elements = array();
/**
* Validates nodes according to definition and returns modification.
*
@ -52,4 +57,4 @@ class HTMLPurifier_ChildDef
}
}
?>

View File

@ -35,6 +35,7 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
function HTMLPurifier_ChildDef_Chameleon($inline, $block) {
$this->inline = new HTMLPurifier_ChildDef_Optional($inline);
$this->block = new HTMLPurifier_ChildDef_Optional($block);
$this->elements = $this->block->elements;
}
function validateChildren($tokens_of_children, $config, &$context) {
@ -48,4 +49,3 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
}
}
?>

View File

@ -38,8 +38,27 @@ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
if ($raw{0} != '(') {
$raw = "($raw)";
}
$reg = str_replace(',', ',?', $raw);
$reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
$el = '[#a-zA-Z0-9_.-]+';
$reg = $raw;
// COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M
// DOING! Seriously: if there's problems, please report them.
// collect all elements into the $elements array
preg_match_all("/$el/", $reg, $matches);
foreach ($matches[0] as $match) {
$this->elements[$match] = true;
}
// setup all elements as parentheticals with leading commas
$reg = preg_replace("/$el/", '(,\\0)', $reg);
// remove commas when they were not solicited
$reg = preg_replace("/([^,(|]\(+),/", '\\1', $reg);
// remove all non-paranthetical commas: they are handled by first regex
$reg = preg_replace("/,\(/", '(', $reg);
$this->_pcre_regex = $reg;
}
function validateChildren($tokens_of_children, $config, &$context) {
@ -60,11 +79,11 @@ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
$list_of_children .= $token->name . ',';
}
}
$list_of_children = rtrim($list_of_children, ',');
// add leading comma to deal with stray comma declarations
$list_of_children = ',' . rtrim($list_of_children, ',');
$okay =
preg_match(
'/^'.$this->_pcre_regex.'$/',
'/^,?'.$this->_pcre_regex.'$/',
$list_of_children
);
@ -72,4 +91,3 @@ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
}
}
?>

View File

@ -19,4 +19,3 @@ class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
}
}
?>

View File

@ -20,4 +20,3 @@ class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
}
}
?>

View File

@ -25,11 +25,10 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
$elements = array_flip($elements);
foreach ($elements as $i => $x) {
$elements[$i] = true;
if (empty($i)) unset($elements[$i]);
if (empty($i)) unset($elements[$i]); // remove blank
}
}
$this->elements = $elements;
$this->gen = new HTMLPurifier_Generator();
}
var $allow_empty = false;
var $type = 'required';
@ -57,6 +56,12 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
// some configuration
$escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren');
// generator
static $gen = null;
if ($gen === null) {
$gen = new HTMLPurifier_Generator();
}
foreach ($tokens_of_children as $token) {
if (!empty($token->is_whitespace)) {
$result[] = $token;
@ -80,7 +85,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
$result[] = $token;
} elseif ($pcdata_allowed && $escape_invalid_children) {
$result[] = new HTMLPurifier_Token_Text(
$this->gen->generateFromToken($token, $config)
$gen->generateFromToken($token, $config)
);
}
continue;
@ -91,7 +96,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
} elseif ($pcdata_allowed && $escape_invalid_children) {
$result[] =
new HTMLPurifier_Token_Text(
$this->gen->generateFromToken( $token, $config )
$gen->generateFromToken( $token, $config )
);
} else {
// drop silently
@ -104,4 +109,3 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
}
}
?>

View File

@ -45,8 +45,8 @@ extends HTMLPurifier_ChildDef_Required
if (!$is_inline) {
if (!$depth) {
if (
$token->type == 'text' ||
!isset($this->elements[$token->name])
($token->type == 'text' && !$token->is_whitespace) ||
($token->type != 'text' && !isset($this->elements[$token->name]))
) {
$is_inline = true;
$ret[] = $block_wrap_start;
@ -73,4 +73,3 @@ extends HTMLPurifier_ChildDef_Required
}
}
?>

View File

@ -9,6 +9,8 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
{
var $allow_empty = false;
var $type = 'table';
var $elements = array('tr' => true, 'tbody' => true, 'thead' => true,
'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true);
function HTMLPurifier_ChildDef_Table() {}
function validateChildren($tokens_of_children, $config, &$context) {
if (empty($tokens_of_children)) return false;
@ -139,4 +141,3 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
}
}
?>

View File

@ -1,5 +1,29 @@
<?php
require_once 'HTMLPurifier/ConfigSchema.php';
// member variables
require_once 'HTMLPurifier/HTMLDefinition.php';
require_once 'HTMLPurifier/CSSDefinition.php';
require_once 'HTMLPurifier/URIDefinition.php';
require_once 'HTMLPurifier/Doctype.php';
require_once 'HTMLPurifier/DefinitionCacheFactory.php';
// accomodations for versions earlier than 4.3.10 and 5.0.2
// borrowed from PHP_Compat, LGPL licensed, by Aidan Lister <aidan@php.net>
if (!defined('PHP_EOL')) {
switch (strtoupper(substr(PHP_OS, 0, 3))) {
case 'WIN':
define('PHP_EOL', "\r\n");
break;
case 'DAR':
define('PHP_EOL', "\r");
break;
default:
define('PHP_EOL', "\n");
}
}
/**
* Configuration object that triggers customizable behavior.
*
@ -15,6 +39,11 @@
class HTMLPurifier_Config
{
/**
* HTML Purifier's version
*/
var $version = '2.1.1';
/**
* Two-level associative array of configuration directives
*/
@ -26,14 +55,31 @@ class HTMLPurifier_Config
var $def;
/**
* Cached instance of HTMLPurifier_HTMLDefinition
* Indexed array of definitions
*/
var $html_definition;
var $definitions;
/**
* Cached instance of HTMLPurifier_CSSDefinition
* Bool indicator whether or not config is finalized
*/
var $css_definition;
var $finalized = false;
/**
* Bool indicator whether or not to automatically finalize
* the object if a read operation is done
*/
var $autoFinalize = true;
/**
* Namespace indexed array of serials for specific namespaces (see
* getSerial for more info).
*/
var $serials = array();
/**
* Serial for entire configuration object
*/
var $serial;
/**
* @param $definition HTMLPurifier_ConfigSchema that defines what directives
@ -54,7 +100,10 @@ class HTMLPurifier_Config
* @return Configured HTMLPurifier_Config object
*/
function create($config) {
if (is_a($config, 'HTMLPurifier_Config')) return $config;
if (is_a($config, 'HTMLPurifier_Config')) {
// pass-through
return $config;
}
$ret = HTMLPurifier_Config::createDefault();
if (is_string($config)) $ret->loadIni($config);
elseif (is_array($config)) $ret->loadArray($config);
@ -78,13 +127,16 @@ class HTMLPurifier_Config
* @param $key String key
*/
function get($namespace, $key, $from_alias = false) {
if (!$this->finalized && $this->autoFinalize) $this->finalize();
if (!isset($this->def->info[$namespace][$key])) {
trigger_error('Cannot retrieve value of undefined directive',
// can't add % due to SimpleTest bug
trigger_error('Cannot retrieve value of undefined directive ' . htmlspecialchars("$namespace.$key"),
E_USER_WARNING);
return;
}
if ($this->def->info[$namespace][$key]->class == 'alias') {
trigger_error('Cannot get value from aliased directive, use real name',
$d = $this->def->info[$namespace][$key];
trigger_error('Cannot get value from aliased directive, use real name ' . $d->namespace . '.' . $d->name,
E_USER_ERROR);
return;
}
@ -96,14 +148,50 @@ class HTMLPurifier_Config
* @param $namespace String namespace
*/
function getBatch($namespace) {
if (!$this->finalized && $this->autoFinalize) $this->finalize();
if (!isset($this->def->info[$namespace])) {
trigger_error('Cannot retrieve undefined namespace',
trigger_error('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace),
E_USER_WARNING);
return;
}
return $this->conf[$namespace];
}
/**
* Returns a md5 signature of a segment of the configuration object
* that uniquely identifies that particular configuration
* @note Revision is handled specially and is removed from the batch
* before processing!
* @param $namespace Namespace to get serial for
*/
function getBatchSerial($namespace) {
if (empty($this->serials[$namespace])) {
$batch = $this->getBatch($namespace);
unset($batch['DefinitionRev']);
$this->serials[$namespace] = md5(serialize($batch));
}
return $this->serials[$namespace];
}
/**
* Returns a md5 signature for the entire configuration object
* that uniquely identifies that particular configuration
*/
function getSerial() {
if (empty($this->serial)) {
$this->serial = md5(serialize($this->getAll()));
}
return $this->serial;
}
/**
* Retrieves all directives, organized by namespace
*/
function getAll() {
if (!$this->finalized && $this->autoFinalize) $this->finalize();
return $this->conf;
}
/**
* Sets a value to configuration.
* @param $namespace String namespace
@ -111,15 +199,16 @@ class HTMLPurifier_Config
* @param $value Mixed value
*/
function set($namespace, $key, $value, $from_alias = false) {
if ($this->isFinalized('Cannot set directive after finalization')) return;
if (!isset($this->def->info[$namespace][$key])) {
trigger_error('Cannot set undefined directive to value',
trigger_error('Cannot set undefined directive ' . htmlspecialchars("$namespace.$key") . ' to value',
E_USER_WARNING);
return;
}
if ($this->def->info[$namespace][$key]->class == 'alias') {
if ($from_alias) {
trigger_error('Double-aliases not allowed, please fix '.
'ConfigSchema bug');
'ConfigSchema bug with' . "$namespace.$key");
}
$this->set($this->def->info[$namespace][$key]->namespace,
$this->def->info[$namespace][$key]->name,
@ -128,7 +217,7 @@ class HTMLPurifier_Config
}
$value = $this->def->validate(
$value,
$this->def->info[$namespace][$key]->type,
$type = $this->def->info[$namespace][$key]->type,
$this->def->info[$namespace][$key]->allow_null
);
if (is_string($value)) {
@ -139,23 +228,36 @@ class HTMLPurifier_Config
if ($this->def->info[$namespace][$key]->allowed !== true) {
// check to see if the value is allowed
if (!isset($this->def->info[$namespace][$key]->allowed[$value])) {
trigger_error('Value not supported', E_USER_WARNING);
trigger_error('Value not supported, valid values are: ' .
$this->_listify($this->def->info[$namespace][$key]->allowed), E_USER_WARNING);
return;
}
}
}
if ($this->def->isError($value)) {
trigger_error('Value is of invalid type', E_USER_WARNING);
trigger_error('Value for ' . "$namespace.$key" . ' is of invalid type, should be ' . $type, E_USER_WARNING);
return;
}
$this->conf[$namespace][$key] = $value;
if ($namespace == 'HTML' || $namespace == 'Attr') {
// reset HTML definition if relevant attributes changed
$this->html_definition = null;
}
if ($namespace == 'CSS') {
$this->css_definition = null;
// reset definitions if the directives they depend on changed
// this is a very costly process, so it's discouraged
// with finalization
if ($namespace == 'HTML' || $namespace == 'CSS') {
$this->definitions[$namespace] = null;
}
$this->serials[$namespace] = false;
}
/**
* Convenience function for error reporting
* @private
*/
function _listify($lookup) {
$list = array();
foreach ($lookup as $name => $b) $list[] = $name;
return implode(', ', $list);
}
/**
@ -164,26 +266,76 @@ class HTMLPurifier_Config
* called before it's been setup, otherwise won't work.
*/
function &getHTMLDefinition($raw = false) {
if (
empty($this->html_definition) || // hasn't ever been setup
($raw && $this->html_definition->setup) // requesting new one
) {
$this->html_definition = new HTMLPurifier_HTMLDefinition($this);
if ($raw) return $this->html_definition; // no setup!
}
if (!$this->html_definition->setup) $this->html_definition->setup();
return $this->html_definition;
$def =& $this->getDefinition('HTML', $raw);
return $def; // prevent PHP 4.4.0 from complaining
}
/**
* Retrieves reference to the CSS definition
*/
function &getCSSDefinition() {
if ($this->css_definition === null) {
$this->css_definition = new HTMLPurifier_CSSDefinition();
$this->css_definition->setup($this);
function &getCSSDefinition($raw = false) {
$def =& $this->getDefinition('CSS', $raw);
return $def;
}
/**
* Retrieves a definition
* @param $type Type of definition: HTML, CSS, etc
* @param $raw Whether or not definition should be returned raw
*/
function &getDefinition($type, $raw = false) {
if (!$this->finalized && $this->autoFinalize) $this->finalize();
$factory = HTMLPurifier_DefinitionCacheFactory::instance();
$cache = $factory->create($type, $this);
if (!$raw) {
// see if we can quickly supply a definition
if (!empty($this->definitions[$type])) {
if (!$this->definitions[$type]->setup) {
$this->definitions[$type]->setup($this);
$cache->set($this->definitions[$type], $this);
}
return $this->definitions[$type];
}
// memory check missed, try cache
$this->definitions[$type] = $cache->get($this);
if ($this->definitions[$type]) {
// definition in cache, return it
return $this->definitions[$type];
}
} elseif (
!empty($this->definitions[$type]) &&
!$this->definitions[$type]->setup
) {
// raw requested, raw in memory, quick return
return $this->definitions[$type];
}
return $this->css_definition;
// quick checks failed, let's create the object
if ($type == 'HTML') {
$this->definitions[$type] = new HTMLPurifier_HTMLDefinition();
} elseif ($type == 'CSS') {
$this->definitions[$type] = new HTMLPurifier_CSSDefinition();
} elseif ($type == 'URI') {
$this->definitions[$type] = new HTMLPurifier_URIDefinition();
} else {
trigger_error("Definition of $type type not supported");
$false = false;
return $false;
}
// quick abort if raw
if ($raw) {
if (is_null($this->get($type, 'DefinitionID'))) {
// fatally error out if definition ID not set
trigger_error("Cannot retrieve raw version without specifying %$type.DefinitionID", E_USER_ERROR);
$false = new HTMLPurifier_Error();
return $false;
}
return $this->definitions[$type];
}
// set it up
$this->definitions[$type]->setup($this);
// save in cache
$cache->set($this->definitions[$type], $this);
return $this->definitions[$type];
}
/**
@ -192,6 +344,7 @@ class HTMLPurifier_Config
* @param $config_array Configuration associative array
*/
function loadArray($config_array) {
if ($this->isFinalized('Cannot load directives after finalization')) return;
foreach ($config_array as $key => $value) {
$key = str_replace('_', '.', $key);
if (strpos($key, '.') !== false) {
@ -208,15 +361,134 @@ class HTMLPurifier_Config
}
}
/**
* Returns a list of array(namespace, directive) for all directives
* that are allowed in a web-form context as per an allowed
* namespaces/directives list.
* @param $allowed List of allowed namespaces/directives
* @static
*/
function getAllowedDirectivesForForm($allowed) {
$schema = HTMLPurifier_ConfigSchema::instance();
if ($allowed !== true) {
if (is_string($allowed)) $allowed = array($allowed);
$allowed_ns = array();
$allowed_directives = array();
$blacklisted_directives = array();
foreach ($allowed as $ns_or_directive) {
if (strpos($ns_or_directive, '.') !== false) {
// directive
if ($ns_or_directive[0] == '-') {
$blacklisted_directives[substr($ns_or_directive, 1)] = true;
} else {
$allowed_directives[$ns_or_directive] = true;
}
} else {
// namespace
$allowed_ns[$ns_or_directive] = true;
}
}
}
$ret = array();
foreach ($schema->info as $ns => $keypairs) {
foreach ($keypairs as $directive => $def) {
if ($allowed !== true) {
if (isset($blacklisted_directives["$ns.$directive"])) continue;
if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue;
}
if ($def->class == 'alias') continue;
if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue;
$ret[] = array($ns, $directive);
}
}
return $ret;
}
/**
* Loads configuration values from $_GET/$_POST that were posted
* via ConfigForm
* @param $array $_GET or $_POST array to import
* @param $index Index/name that the config variables are in
* @param $allowed List of allowed namespaces/directives
* @param $mq_fix Boolean whether or not to enable magic quotes fix
* @static
*/
function loadArrayFromForm($array, $index, $allowed = true, $mq_fix = true) {
$ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix);
$config = HTMLPurifier_Config::create($ret);
return $config;
}
/**
* Merges in configuration values from $_GET/$_POST to object. NOT STATIC.
* @note Same parameters as loadArrayFromForm
*/
function mergeArrayFromForm($array, $index, $allowed = true, $mq_fix = true) {
$ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix);
$this->loadArray($ret);
}
/**
* Prepares an array from a form into something usable for the more
* strict parts of HTMLPurifier_Config
* @static
*/
function prepareArrayFromForm($array, $index, $allowed = true, $mq_fix = true) {
$array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array();
$mq = get_magic_quotes_gpc() && $mq_fix;
$allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed);
$ret = array();
foreach ($allowed as $key) {
list($ns, $directive) = $key;
$skey = "$ns.$directive";
if (!empty($array["Null_$skey"])) {
$ret[$ns][$directive] = null;
continue;
}
if (!isset($array[$skey])) continue;
$value = $mq ? stripslashes($array[$skey]) : $array[$skey];
$ret[$ns][$directive] = $value;
}
return $ret;
}
/**
* Loads configuration values from an ini file
* @param $filename Name of ini file
*/
function loadIni($filename) {
if ($this->isFinalized('Cannot load directives after finalization')) return;
$array = parse_ini_file($filename, true);
$this->loadArray($array);
}
/**
* Checks whether or not the configuration object is finalized.
* @param $error String error message, or false for no error
*/
function isFinalized($error = false) {
if ($this->finalized && $error) {
trigger_error($error, E_USER_ERROR);
}
return $this->finalized;
}
/**
* Finalizes configuration only if auto finalize is on and not
* already finalized
*/
function autoFinalize() {
if (!$this->finalized && $this->autoFinalize) $this->finalize();
}
/**
* Finalizes a configuration object, prohibiting further change
*/
function finalize() {
$this->finalized = true;
}
}
?>

View File

@ -7,4 +7,3 @@ class HTMLPurifier_ConfigDef {
var $class = false;
}
?>

View File

@ -61,6 +61,12 @@ class HTMLPurifier_ConfigDef_Directive extends HTMLPurifier_ConfigDef
*/
var $aliases = array();
/**
* Advisory list of directive aliases, i.e. other directives that
* redirect here
*/
var $directiveAliases = array();
/**
* Adds a description to the array
*/
@ -71,4 +77,3 @@ class HTMLPurifier_ConfigDef_Directive extends HTMLPurifier_ConfigDef
}
?>

View File

@ -24,4 +24,3 @@ class HTMLPurifier_ConfigDef_DirectiveAlias extends HTMLPurifier_ConfigDef
}
}
?>

View File

@ -20,4 +20,3 @@ class HTMLPurifier_ConfigDef_Namespace extends HTMLPurifier_ConfigDef {
}
?>

View File

@ -6,8 +6,11 @@ require_once 'HTMLPurifier/ConfigDef/Namespace.php';
require_once 'HTMLPurifier/ConfigDef/Directive.php';
require_once 'HTMLPurifier/ConfigDef/DirectiveAlias.php';
if (!defined('HTMLPURIFIER_SCHEMA_STRICT')) define('HTMLPURIFIER_SCHEMA_STRICT', false);
/**
* Configuration definition, defines directives and their defaults.
* @note If you update this, please update Printer_ConfigForm
* @todo The ability to define things multiple times is confusing and should
* be factored out to its own function named registerDependency() or
* addNote(), where only the namespace.name and an extra descriptions
@ -48,6 +51,8 @@ class HTMLPurifier_ConfigSchema {
var $types = array(
'string' => 'String',
'istring' => 'Case-insensitive string',
'text' => 'Text',
'itext' => 'Case-insensitive text',
'int' => 'Integer',
'float' => 'Float',
'bool' => 'Boolean',
@ -66,6 +71,10 @@ class HTMLPurifier_ConfigSchema {
$this->defineNamespace('URI', 'Features regarding Uniform Resource Identifiers.');
$this->defineNamespace('HTML', 'Configuration regarding allowed HTML.');
$this->defineNamespace('CSS', 'Configuration regarding allowed CSS.');
$this->defineNamespace('AutoFormat', 'Configuration for activating auto-formatting functionality (also known as <code>Injector</code>s)');
$this->defineNamespace('AutoFormatParam', 'Configuration for customizing auto-formatting functionality');
$this->defineNamespace('Output', 'Configuration relating to the generation of (X)HTML.');
$this->defineNamespace('Cache', 'Configuration for DefinitionCache and related subclasses.');
$this->defineNamespace('Test', 'Developer testing configuration for our unit tests.');
}
@ -95,27 +104,30 @@ class HTMLPurifier_ConfigSchema {
* HTMLPurifier_DirectiveDef::$type for allowed values
* @param $description Description of directive for documentation
*/
function define(
$namespace, $name, $default, $type,
$description
) {
function define($namespace, $name, $default, $type, $description) {
$def =& HTMLPurifier_ConfigSchema::instance();
if (!isset($def->info[$namespace])) {
trigger_error('Cannot define directive for undefined namespace',
E_USER_ERROR);
return;
}
if (!ctype_alnum($name)) {
trigger_error('Directive name must be alphanumeric',
E_USER_ERROR);
return;
}
if (empty($description)) {
trigger_error('Description must be non-empty',
E_USER_ERROR);
return;
// basic sanity checks
if (HTMLPURIFIER_SCHEMA_STRICT) {
if (!isset($def->info[$namespace])) {
trigger_error('Cannot define directive for undefined namespace',
E_USER_ERROR);
return;
}
if (!ctype_alnum($name)) {
trigger_error('Directive name must be alphanumeric',
E_USER_ERROR);
return;
}
if (empty($description)) {
trigger_error('Description must be non-empty',
E_USER_ERROR);
return;
}
}
if (isset($def->info[$namespace][$name])) {
// already defined
if (
$def->info[$namespace][$name]->type !== $type ||
$def->defaults[$namespace][$name] !== $default
@ -124,29 +136,35 @@ class HTMLPurifier_ConfigSchema {
return;
}
} else {
// process modifiers
// needs defining
// process modifiers (OPTIMIZE!)
$type_values = explode('/', $type, 2);
$type = $type_values[0];
$modifier = isset($type_values[1]) ? $type_values[1] : false;
$allow_null = ($modifier === 'null');
if (!isset($def->types[$type])) {
trigger_error('Invalid type for configuration directive',
E_USER_ERROR);
return;
}
$default = $def->validate($default, $type, $allow_null);
if ($def->isError($default)) {
trigger_error('Default value does not match directive type',
E_USER_ERROR);
return;
if (HTMLPURIFIER_SCHEMA_STRICT) {
if (!isset($def->types[$type])) {
trigger_error('Invalid type for configuration directive',
E_USER_ERROR);
return;
}
$default = $def->validate($default, $type, $allow_null);
if ($def->isError($default)) {
trigger_error('Default value does not match directive type',
E_USER_ERROR);
return;
}
}
$def->info[$namespace][$name] =
new HTMLPurifier_ConfigDef_Directive();
$def->info[$namespace][$name]->type = $type;
$def->info[$namespace][$name]->allow_null = $allow_null;
$def->defaults[$namespace][$name] = $default;
}
if (!HTMLPURIFIER_SCHEMA_STRICT) return;
$backtrace = debug_backtrace();
$file = $def->mungeFilename($backtrace[0]['file']);
$line = $backtrace[0]['line'];
@ -161,19 +179,21 @@ class HTMLPurifier_ConfigSchema {
*/
function defineNamespace($namespace, $description) {
$def =& HTMLPurifier_ConfigSchema::instance();
if (isset($def->info[$namespace])) {
trigger_error('Cannot redefine namespace', E_USER_ERROR);
return;
}
if (!ctype_alnum($namespace)) {
trigger_error('Namespace name must be alphanumeric',
E_USER_ERROR);
return;
}
if (empty($description)) {
trigger_error('Description must be non-empty',
E_USER_ERROR);
return;
if (HTMLPURIFIER_SCHEMA_STRICT) {
if (isset($def->info[$namespace])) {
trigger_error('Cannot redefine namespace', E_USER_ERROR);
return;
}
if (!ctype_alnum($namespace)) {
trigger_error('Namespace name must be alphanumeric',
E_USER_ERROR);
return;
}
if (empty($description)) {
trigger_error('Description must be non-empty',
E_USER_ERROR);
return;
}
}
$def->info[$namespace] = array();
$def->info_namespace[$namespace] = new HTMLPurifier_ConfigDef_Namespace();
@ -194,23 +214,25 @@ class HTMLPurifier_ConfigSchema {
*/
function defineValueAliases($namespace, $name, $aliases) {
$def =& HTMLPurifier_ConfigSchema::instance();
if (!isset($def->info[$namespace][$name])) {
if (HTMLPURIFIER_SCHEMA_STRICT && !isset($def->info[$namespace][$name])) {
trigger_error('Cannot set value alias for non-existant directive',
E_USER_ERROR);
return;
}
foreach ($aliases as $alias => $real) {
if (!$def->info[$namespace][$name] !== true &&
!isset($def->info[$namespace][$name]->allowed[$real])
) {
trigger_error('Cannot define alias to value that is not allowed',
E_USER_ERROR);
return;
}
if (isset($def->info[$namespace][$name]->allowed[$alias])) {
trigger_error('Cannot define alias over allowed value',
E_USER_ERROR);
return;
if (HTMLPURIFIER_SCHEMA_STRICT) {
if (!$def->info[$namespace][$name] !== true &&
!isset($def->info[$namespace][$name]->allowed[$real])
) {
trigger_error('Cannot define alias to value that is not allowed',
E_USER_ERROR);
return;
}
if (isset($def->info[$namespace][$name]->allowed[$alias])) {
trigger_error('Cannot define alias over allowed value',
E_USER_ERROR);
return;
}
}
$def->info[$namespace][$name]->aliases[$alias] = $real;
}
@ -225,14 +247,14 @@ class HTMLPurifier_ConfigSchema {
*/
function defineAllowedValues($namespace, $name, $allowed_values) {
$def =& HTMLPurifier_ConfigSchema::instance();
if (!isset($def->info[$namespace][$name])) {
if (HTMLPURIFIER_SCHEMA_STRICT && !isset($def->info[$namespace][$name])) {
trigger_error('Cannot define allowed values for undefined directive',
E_USER_ERROR);
return;
}
$directive =& $def->info[$namespace][$name];
$type = $directive->type;
if ($type != 'string' && $type != 'istring') {
if (HTMLPURIFIER_SCHEMA_STRICT && $type != 'string' && $type != 'istring') {
trigger_error('Cannot define allowed values for directive whose type is not string',
E_USER_ERROR);
return;
@ -243,8 +265,11 @@ class HTMLPurifier_ConfigSchema {
foreach ($allowed_values as $value) {
$directive->allowed[$value] = true;
}
if ($def->defaults[$namespace][$name] !== null &&
!isset($directive->allowed[$def->defaults[$namespace][$name]])) {
if (
HTMLPURIFIER_SCHEMA_STRICT &&
$def->defaults[$namespace][$name] !== null &&
!isset($directive->allowed[$def->defaults[$namespace][$name]])
) {
trigger_error('Default value must be in allowed range of variables',
E_USER_ERROR);
$directive->allowed = true; // undo undo!
@ -262,34 +287,37 @@ class HTMLPurifier_ConfigSchema {
*/
function defineAlias($namespace, $name, $new_namespace, $new_name) {
$def =& HTMLPurifier_ConfigSchema::instance();
if (!isset($def->info[$namespace])) {
trigger_error('Cannot define directive alias in undefined namespace',
E_USER_ERROR);
return;
}
if (!ctype_alnum($name)) {
trigger_error('Directive name must be alphanumeric',
E_USER_ERROR);
return;
}
if (isset($def->info[$namespace][$name])) {
trigger_error('Cannot define alias over directive',
E_USER_ERROR);
return;
}
if (!isset($def->info[$new_namespace][$new_name])) {
trigger_error('Cannot define alias to undefined directive',
E_USER_ERROR);
return;
}
if ($def->info[$new_namespace][$new_name]->class == 'alias') {
trigger_error('Cannot define alias to alias',
E_USER_ERROR);
return;
if (HTMLPURIFIER_SCHEMA_STRICT) {
if (!isset($def->info[$namespace])) {
trigger_error('Cannot define directive alias in undefined namespace',
E_USER_ERROR);
return;
}
if (!ctype_alnum($name)) {
trigger_error('Directive name must be alphanumeric',
E_USER_ERROR);
return;
}
if (isset($def->info[$namespace][$name])) {
trigger_error('Cannot define alias over directive',
E_USER_ERROR);
return;
}
if (!isset($def->info[$new_namespace][$new_name])) {
trigger_error('Cannot define alias to undefined directive',
E_USER_ERROR);
return;
}
if ($def->info[$new_namespace][$new_name]->class == 'alias') {
trigger_error('Cannot define alias to alias',
E_USER_ERROR);
return;
}
}
$def->info[$namespace][$name] =
new HTMLPurifier_ConfigDef_DirectiveAlias(
$new_namespace, $new_name);
$def->info[$new_namespace][$new_name]->directiveAliases[] = "$namespace.$name";
}
/**
@ -303,11 +331,14 @@ class HTMLPurifier_ConfigSchema {
if ($allow_null && $var === null) return null;
switch ($type) {
case 'mixed':
//if (is_string($var)) $var = unserialize($var);
return $var;
case 'istring':
case 'string':
case 'text': // no difference, just is longer/multiple line string
case 'itext':
if (!is_string($var)) break;
if ($type === 'istring') $var = strtolower($var);
if ($type === 'istring' || $type === 'itext') $var = strtolower($var);
return $var;
case 'int':
if (is_string($var) && ctype_digit($var)) $var = (int) $var;
@ -338,11 +369,25 @@ class HTMLPurifier_ConfigSchema {
// a single empty string item, but having an empty
// array is more intuitive
if ($var == '') return array();
// simplistic string to array method that only works
// for simple lists of tag names or alphanumeric characters
$var = explode(',',$var);
if (strpos($var, "\n") === false && strpos($var, "\r") === false) {
// simplistic string to array method that only works
// for simple lists of tag names or alphanumeric characters
$var = explode(',',$var);
} else {
$var = preg_split('/(,|[\n\r]+)/', $var);
}
// remove spaces
foreach ($var as $i => $j) $var[$i] = trim($j);
if ($type === 'hash') {
// key:value,key2:value2
$nvar = array();
foreach ($var as $keypair) {
$c = explode(':', $keypair, 2);
if (!isset($c[1])) continue;
$nvar[$c[0]] = $c[1];
}
$var = $nvar;
}
}
if (!is_array($var)) break;
$keys = array_keys($var);
@ -371,6 +416,7 @@ class HTMLPurifier_ConfigSchema {
* Takes an absolute path and munges it into a more manageable relative path
*/
function mungeFilename($filename) {
if (!HTMLPURIFIER_SCHEMA_STRICT) return $filename;
$offset = strrpos($filename, 'HTMLPurifier');
$filename = substr($filename, $offset);
$filename = str_replace('\\', '/', $filename);
@ -387,4 +433,4 @@ class HTMLPurifier_ConfigSchema {
}
}
?>

View File

@ -5,6 +5,9 @@ require_once 'HTMLPurifier/ChildDef.php';
require_once 'HTMLPurifier/ChildDef/Empty.php';
require_once 'HTMLPurifier/ChildDef/Required.php';
require_once 'HTMLPurifier/ChildDef/Optional.php';
require_once 'HTMLPurifier/ChildDef/Custom.php';
// NOT UNIT TESTED!!!
class HTMLPurifier_ContentSets
{
@ -145,4 +148,3 @@ class HTMLPurifier_ContentSets
}
?>

View File

@ -2,6 +2,8 @@
/**
* Registry object that contains information about the current context.
* @warning Is a bit buggy when variables are set to null: it thinks
* they don't exist! So use false instead, please.
*/
class HTMLPurifier_Context
{
@ -19,7 +21,7 @@ class HTMLPurifier_Context
*/
function register($name, &$ref) {
if (isset($this->_storage[$name])) {
trigger_error('Name collision, cannot re-register',
trigger_error("Name $name produces collision, cannot re-register",
E_USER_ERROR);
return;
}
@ -29,11 +31,14 @@ class HTMLPurifier_Context
/**
* Retrieves a variable reference from the context.
* @param $name String name
* @param $ignore_error Boolean whether or not to ignore error
*/
function &get($name) {
function &get($name, $ignore_error = false) {
if (!isset($this->_storage[$name])) {
trigger_error('Attempted to retrieve non-existent variable',
E_USER_ERROR);
if (!$ignore_error) {
trigger_error("Attempted to retrieve non-existent variable $name",
E_USER_ERROR);
}
$var = null; // so we can return by reference
return $var;
}
@ -46,7 +51,7 @@ class HTMLPurifier_Context
*/
function destroy($name) {
if (!isset($this->_storage[$name])) {
trigger_error('Attempted to destroy non-existent variable',
trigger_error("Attempted to destroy non-existent variable $name",
E_USER_ERROR);
return;
}
@ -73,4 +78,3 @@ class HTMLPurifier_Context
}
?>

View File

@ -0,0 +1,40 @@
<?php
/**
* Super-class for definition datatype objects, implements serialization
* functions for the class.
*/
class HTMLPurifier_Definition
{
/**
* Has setup() been called yet?
*/
var $setup = false;
/**
* What type of definition is it?
*/
var $type;
/**
* Sets up the definition object into the final form, something
* not done by the constructor
* @param $config HTMLPurifier_Config instance
*/
function doSetup($config) {
trigger_error('Cannot call abstract method', E_USER_ERROR);
}
/**
* Setup function that aborts if already setup
* @param $config HTMLPurifier_Config instance
*/
function setup($config) {
if ($this->setup) return;
$this->setup = true;
$this->doSetup($config);
}
}

View File

@ -0,0 +1,128 @@
<?php
require_once 'HTMLPurifier/DefinitionCache/Serializer.php';
require_once 'HTMLPurifier/DefinitionCache/Null.php';
require_once 'HTMLPurifier/DefinitionCache/Decorator.php';
require_once 'HTMLPurifier/DefinitionCache/Decorator/Memory.php';
require_once 'HTMLPurifier/DefinitionCache/Decorator/Cleanup.php';
/**
* Abstract class representing Definition cache managers that implements
* useful common methods and is a factory.
* @todo Get some sort of versioning variable so the library can easily
* invalidate the cache with a new version
* @todo Make the test runner cache aware and allow the user to easily
* flush the cache
* @todo Create a separate maintenance file advanced users can use to
* cache their custom HTMLDefinition, which can be loaded
* via a configuration directive
* @todo Implement memcached
*/
class HTMLPurifier_DefinitionCache
{
var $type;
/**
* @param $name Type of definition objects this instance of the
* cache will handle.
*/
function HTMLPurifier_DefinitionCache($type) {
$this->type = $type;
}
/**
* Generates a unique identifier for a particular configuration
* @param Instance of HTMLPurifier_Config
*/
function generateKey($config) {
return $config->version . '-' . // possibly replace with function calls
$config->getBatchSerial($this->type) . '-' .
$config->get($this->type, 'DefinitionRev');
}
/**
* Tests whether or not a key is old with respect to the configuration's
* version and revision number.
* @param $key Key to test
* @param $config Instance of HTMLPurifier_Config to test against
*/
function isOld($key, $config) {
if (substr_count($key, '-') < 2) return true;
list($version, $hash, $revision) = explode('-', $key, 3);
$compare = version_compare($version, $config->version);
// version mismatch, is always old
if ($compare != 0) return true;
// versions match, ids match, check revision number
if (
$hash == $config->getBatchSerial($this->type) &&
$revision < $config->get($this->type, 'DefinitionRev')
) return true;
return false;
}
/**
* Checks if a definition's type jives with the cache's type
* @note Throws an error on failure
* @param $def Definition object to check
* @return Boolean true if good, false if not
*/
function checkDefType($def) {
if ($def->type !== $this->type) {
trigger_error("Cannot use definition of type {$def->type} in cache for {$this->type}");
return false;
}
return true;
}
/**
* Adds a definition object to the cache
*/
function add($def, $config) {
trigger_error('Cannot call abstract method', E_USER_ERROR);
}
/**
* Unconditionally saves a definition object to the cache
*/
function set($def, $config) {
trigger_error('Cannot call abstract method', E_USER_ERROR);
}
/**
* Replace an object in the cache
*/
function replace($def, $config) {
trigger_error('Cannot call abstract method', E_USER_ERROR);
}
/**
* Retrieves a definition object from the cache
*/
function get($config) {
trigger_error('Cannot call abstract method', E_USER_ERROR);
}
/**
* Removes a definition object to the cache
*/
function remove($config) {
trigger_error('Cannot call abstract method', E_USER_ERROR);
}
/**
* Clears all objects from cache
*/
function flush($config) {
trigger_error('Cannot call abstract method', E_USER_ERROR);
}
/**
* Clears all expired (older version or revision) objects from cache
*/
function cleanup($config) {
trigger_error('Cannot call abstract method', E_USER_ERROR);
}
}

View File

@ -0,0 +1,59 @@
<?php
require_once 'HTMLPurifier/DefinitionCache.php';
class HTMLPurifier_DefinitionCache_Decorator extends HTMLPurifier_DefinitionCache
{
/**
* Cache object we are decorating
*/
var $cache;
function HTMLPurifier_DefinitionCache_Decorator() {}
/**
* Lazy decorator function
* @param $cache Reference to cache object to decorate
*/
function decorate(&$cache) {
$decorator = $this->copy();
// reference is necessary for mocks in PHP 4
$decorator->cache =& $cache;
$decorator->type = $cache->type;
return $decorator;
}
/**
* Cross-compatible clone substitute
*/
function copy() {
return new HTMLPurifier_DefinitionCache_Decorator();
}
function add($def, $config) {
return $this->cache->add($def, $config);
}
function set($def, $config) {
return $this->cache->set($def, $config);
}
function replace($def, $config) {
return $this->cache->replace($def, $config);
}
function get($config) {
return $this->cache->get($config);
}
function flush($config) {
return $this->cache->flush($config);
}
function cleanup($config) {
return $this->cache->cleanup($config);
}
}

View File

@ -0,0 +1,44 @@
<?php
require_once 'HTMLPurifier/DefinitionCache/Decorator.php';
/**
* Definition cache decorator class that cleans up the cache
* whenever there is a cache miss.
*/
class HTMLPurifier_DefinitionCache_Decorator_Cleanup extends
HTMLPurifier_DefinitionCache_Decorator
{
var $name = 'Cleanup';
function copy() {
return new HTMLPurifier_DefinitionCache_Decorator_Cleanup();
}
function add($def, $config) {
$status = parent::add($def, $config);
if (!$status) parent::cleanup($config);
return $status;
}
function set($def, $config) {
$status = parent::set($def, $config);
if (!$status) parent::cleanup($config);
return $status;
}
function replace($def, $config) {
$status = parent::replace($def, $config);
if (!$status) parent::cleanup($config);
return $status;
}
function get($config) {
$ret = parent::get($config);
if (!$ret) parent::cleanup($config);
return $ret;
}
}

View File

@ -0,0 +1,47 @@
<?php
require_once 'HTMLPurifier/DefinitionCache/Decorator.php';
/**
* Definition cache decorator class that saves all cache retrievals
* to PHP's memory; good for unit tests or circumstances where
* there are lots of configuration objects floating around.
*/
class HTMLPurifier_DefinitionCache_Decorator_Memory extends
HTMLPurifier_DefinitionCache_Decorator
{
var $definitions;
var $name = 'Memory';
function copy() {
return new HTMLPurifier_DefinitionCache_Decorator_Memory();
}
function add($def, $config) {
$status = parent::add($def, $config);
if ($status) $this->definitions[$this->generateKey($config)] = $def;
return $status;
}
function set($def, $config) {
$status = parent::set($def, $config);
if ($status) $this->definitions[$this->generateKey($config)] = $def;
return $status;
}
function replace($def, $config) {
$status = parent::replace($def, $config);
if ($status) $this->definitions[$this->generateKey($config)] = $def;
return $status;
}
function get($config) {
$key = $this->generateKey($config);
if (isset($this->definitions[$key])) return $this->definitions[$key];
$this->definitions[$key] = parent::get($config);
return $this->definitions[$key];
}
}

View File

@ -0,0 +1,46 @@
<?php
require_once 'HTMLPurifier/DefinitionCache/Decorator.php';
/**
* Definition cache decorator template.
*/
class HTMLPurifier_DefinitionCache_Decorator_Template extends
HTMLPurifier_DefinitionCache_Decorator
{
var $name = 'Template'; // replace this
function copy() {
// replace class name with yours
return new HTMLPurifier_DefinitionCache_Decorator_Template();
}
// remove methods you don't need
function add($def, $config) {
return parent::add($def, $config);
}
function set($def, $config) {
return parent::set($def, $config);
}
function replace($def, $config) {
return parent::replace($def, $config);
}
function get($config) {
return parent::get($config);
}
function flush() {
return parent::flush();
}
function cleanup($config) {
return parent::cleanup($config);
}
}

View File

@ -0,0 +1,36 @@
<?php
require_once 'HTMLPurifier/DefinitionCache.php';
/**
* Null cache object to use when no caching is on.
*/
class HTMLPurifier_DefinitionCache_Null extends HTMLPurifier_DefinitionCache
{
function add($def, $config) {
return false;
}
function set($def, $config) {
return false;
}
function replace($def, $config) {
return false;
}
function get($config) {
return false;
}
function flush($config) {
return false;
}
function cleanup($config) {
return false;
}
}

View File

@ -0,0 +1,190 @@
<?php
require_once 'HTMLPurifier/DefinitionCache.php';
HTMLPurifier_ConfigSchema::define(
'Cache', 'SerializerPath', null, 'string/null', '
<p>
Absolute path with no trailing slash to store serialized definitions in.
Default is within the
HTML Purifier library inside DefinitionCache/Serializer. This
path must be writable by the webserver. This directive has been
available since 2.0.0.
</p>
');
class HTMLPurifier_DefinitionCache_Serializer extends
HTMLPurifier_DefinitionCache
{
function add($def, $config) {
if (!$this->checkDefType($def)) return;
$file = $this->generateFilePath($config);
if (file_exists($file)) return false;
if (!$this->_prepareDir($config)) return false;
return $this->_write($file, serialize($def));
}
function set($def, $config) {
if (!$this->checkDefType($def)) return;
$file = $this->generateFilePath($config);
if (!$this->_prepareDir($config)) return false;
return $this->_write($file, serialize($def));
}
function replace($def, $config) {
if (!$this->checkDefType($def)) return;
$file = $this->generateFilePath($config);
if (!file_exists($file)) return false;
if (!$this->_prepareDir($config)) return false;
return $this->_write($file, serialize($def));
}
function get($config) {
$file = $this->generateFilePath($config);
if (!file_exists($file)) return false;
return unserialize(file_get_contents($file));
}
function remove($config) {
$file = $this->generateFilePath($config);
if (!file_exists($file)) return false;
return unlink($file);
}
function flush($config) {
if (!$this->_prepareDir($config)) return false;
$dir = $this->generateDirectoryPath($config);
$dh = opendir($dir);
while (false !== ($filename = readdir($dh))) {
if (empty($filename)) continue;
if ($filename[0] === '.') continue;
unlink($dir . '/' . $filename);
}
}
function cleanup($config) {
if (!$this->_prepareDir($config)) return false;
$dir = $this->generateDirectoryPath($config);
$dh = opendir($dir);
while (false !== ($filename = readdir($dh))) {
if (empty($filename)) continue;
if ($filename[0] === '.') continue;
$key = substr($filename, 0, strlen($filename) - 4);
if ($this->isOld($key, $config)) unlink($dir . '/' . $filename);
}
}
/**
* Generates the file path to the serial file corresponding to
* the configuration and definition name
*/
function generateFilePath($config) {
$key = $this->generateKey($config);
return $this->generateDirectoryPath($config) . '/' . $key . '.ser';
}
/**
* Generates the path to the directory contain this cache's serial files
* @note No trailing slash
*/
function generateDirectoryPath($config) {
$base = $this->generateBaseDirectoryPath($config);
return $base . '/' . $this->type;
}
/**
* Generates path to base directory that contains all definition type
* serials
*/
function generateBaseDirectoryPath($config) {
$base = $config->get('Cache', 'SerializerPath');
$base = is_null($base) ? HTMLPURIFIER_PREFIX . '/HTMLPurifier/DefinitionCache/Serializer' : $base;
return $base;
}
/**
* Convenience wrapper function for file_put_contents
* @param $file File name to write to
* @param $data Data to write into file
* @return Number of bytes written if success, or false if failure.
*/
function _write($file, $data) {
static $file_put_contents;
if ($file_put_contents === null) {
$file_put_contents = function_exists('file_put_contents');
}
if ($file_put_contents) {
return file_put_contents($file, $data);
}
$fh = fopen($file, 'w');
if (!$fh) return false;
$status = fwrite($fh, $data);
fclose($fh);
return $status;
}
/**
* Prepares the directory that this type stores the serials in
* @return True if successful
*/
function _prepareDir($config) {
$directory = $this->generateDirectoryPath($config);
if (!is_dir($directory)) {
$base = $this->generateBaseDirectoryPath($config);
if (!is_dir($base)) {
trigger_error('Base directory '.$base.' does not exist,
please create or change using %Cache.SerializerPath',
E_USER_ERROR);
return false;
} elseif (!$this->_testPermissions($base)) {
return false;
}
mkdir($directory);
} elseif (!$this->_testPermissions($directory)) {
return false;
}
return true;
}
/**
* Tests permissions on a directory and throws out friendly
* error messages and attempts to chmod it itself if possible
*/
function _testPermissions($dir) {
// early abort, if it is writable, everything is hunky-dory
if (is_writable($dir)) return true;
if (!is_dir($dir)) {
// generally, you'll want to handle this beforehand
// so a more specific error message can be given
trigger_error('Directory '.$dir.' does not exist',
E_USER_ERROR);
return false;
}
if (function_exists('posix_getuid')) {
// POSIX system, we can give more specific advice
if (fileowner($dir) === posix_getuid()) {
// we can chmod it ourselves
chmod($dir, 0755);
return true;
} elseif (filegroup($dir) === posix_getgid()) {
$chmod = '775';
} else {
// PHP's probably running as nobody, so we'll
// need to give global permissions
$chmod = '777';
}
trigger_error('Directory '.$dir.' not writable, '.
'please chmod to ' . $chmod,
E_USER_ERROR);
} else {
// generic error message
trigger_error('Directory '.$dir.' not writable, '.
'please alter file permissions',
E_USER_ERROR);
}
return false;
}
}

View File

@ -0,0 +1,94 @@
<?php
require_once 'HTMLPurifier/DefinitionCache.php';
HTMLPurifier_ConfigSchema::define(
'Cache', 'DefinitionImpl', 'Serializer', 'string/null', '
This directive defines which method to use when caching definitions,
the complex data-type that makes HTML Purifier tick. Set to null
to disable caching (not recommended, as you will see a definite
performance degradation). This directive has been available since 2.0.0.
');
HTMLPurifier_ConfigSchema::defineAllowedValues(
'Cache', 'DefinitionImpl', array('Serializer')
);
HTMLPurifier_ConfigSchema::defineAlias(
'Core', 'DefinitionCache',
'Cache', 'DefinitionImpl'
);
/**
* Responsible for creating definition caches.
*/
class HTMLPurifier_DefinitionCacheFactory
{
var $caches = array('Serializer' => array());
var $decorators = array();
/**
* Initialize default decorators
*/
function setup() {
$this->addDecorator('Cleanup');
}
/**
* Retrieves an instance of global definition cache factory.
* @static
*/
function &instance($prototype = null) {
static $instance;
if ($prototype !== null) {
$instance = $prototype;
} elseif ($instance === null || $prototype === true) {
$instance = new HTMLPurifier_DefinitionCacheFactory();
$instance->setup();
}
return $instance;
}
/**
* Factory method that creates a cache object based on configuration
* @param $name Name of definitions handled by cache
* @param $config Instance of HTMLPurifier_Config
*/
function &create($type, $config) {
// only one implementation as for right now, $config will
// be used to determine implementation
$method = $config->get('Cache', 'DefinitionImpl');
if ($method === null) {
$null = new HTMLPurifier_DefinitionCache_Null($type);
return $null;
}
if (!empty($this->caches[$method][$type])) {
return $this->caches[$method][$type];
}
$cache = new HTMLPurifier_DefinitionCache_Serializer($type);
foreach ($this->decorators as $decorator) {
$new_cache = $decorator->decorate($cache);
// prevent infinite recursion in PHP 4
unset($cache);
$cache = $new_cache;
}
$this->caches[$method][$type] = $cache;
return $this->caches[$method][$type];
}
/**
* Registers a decorator to add to all new cache objects
* @param
*/
function addDecorator($decorator) {
if (is_string($decorator)) {
$class = "HTMLPurifier_DefinitionCache_Decorator_$decorator";
$decorator = new $class;
}
$this->decorators[$decorator->name] = $decorator;
}
}

View File

@ -0,0 +1,66 @@
<?php
/**
* Represents a document type, contains information on which modules
* need to be loaded.
* @note This class is inspected by Printer_HTMLDefinition->renderDoctype.
* If structure changes, please update that function.
*/
class HTMLPurifier_Doctype
{
/**
* Full name of doctype
*/
var $name;
/**
* List of standard modules (string identifiers or literal objects)
* that this doctype uses
*/
var $modules = array();
/**
* List of modules to use for tidying up code
*/
var $tidyModules = array();
/**
* Is the language derived from XML (i.e. XHTML)?
*/
var $xml = true;
/**
* List of aliases for this doctype
*/
var $aliases = array();
/**
* Public DTD identifier
*/
var $dtdPublic;
/**
* System DTD identifier
*/
var $dtdSystem;
function HTMLPurifier_Doctype($name = null, $xml = true, $modules = array(),
$tidyModules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null
) {
$this->name = $name;
$this->xml = $xml;
$this->modules = $modules;
$this->tidyModules = $tidyModules;
$this->aliases = $aliases;
$this->dtdPublic = $dtd_public;
$this->dtdSystem = $dtd_system;
}
/**
* Clones the doctype, use before resolving modes and the like
*/
function copy() {
return unserialize(serialize($this));
}
}

View File

@ -0,0 +1,124 @@
<?php
require_once 'HTMLPurifier/Doctype.php';
// Legacy directives for doctype specification
HTMLPurifier_ConfigSchema::define(
'HTML', 'Strict', false, 'bool',
'Determines whether or not to use Transitional (loose) or Strict rulesets. '.
'This directive is deprecated in favor of %HTML.Doctype. '.
'This directive has been available since 1.3.0.'
);
HTMLPurifier_ConfigSchema::define(
'HTML', 'XHTML', true, 'bool',
'Determines whether or not output is XHTML 1.0 or HTML 4.01 flavor. '.
'This directive is deprecated in favor of %HTML.Doctype. '.
'This directive was available since 1.1.'
);
HTMLPurifier_ConfigSchema::defineAlias('Core', 'XHTML', 'HTML', 'XHTML');
class HTMLPurifier_DoctypeRegistry
{
/**
* Hash of doctype names to doctype objects
* @protected
*/
var $doctypes;
/**
* Lookup table of aliases to real doctype names
* @protected
*/
var $aliases;
/**
* Registers a doctype to the registry
* @note Accepts a fully-formed doctype object, or the
* parameters for constructing a doctype object
* @param $doctype Name of doctype or literal doctype object
* @param $modules Modules doctype will load
* @param $modules_for_modes Modules doctype will load for certain modes
* @param $aliases Alias names for doctype
* @return Reference to registered doctype (usable for further editing)
*/
function &register($doctype, $xml = true, $modules = array(),
$tidy_modules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null
) {
if (!is_array($modules)) $modules = array($modules);
if (!is_array($tidy_modules)) $tidy_modules = array($tidy_modules);
if (!is_array($aliases)) $aliases = array($aliases);
if (!is_object($doctype)) {
$doctype = new HTMLPurifier_Doctype(
$doctype, $xml, $modules, $tidy_modules, $aliases, $dtd_public, $dtd_system
);
}
$this->doctypes[$doctype->name] =& $doctype;
$name = $doctype->name;
// hookup aliases
foreach ($doctype->aliases as $alias) {
if (isset($this->doctypes[$alias])) continue;
$this->aliases[$alias] = $name;
}
// remove old aliases
if (isset($this->aliases[$name])) unset($this->aliases[$name]);
return $doctype;
}
/**
* Retrieves reference to a doctype of a certain name
* @note This function resolves aliases
* @note When possible, use the more fully-featured make()
* @param $doctype Name of doctype
* @return Reference to doctype object
*/
function &get($doctype) {
if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype];
if (!isset($this->doctypes[$doctype])) {
trigger_error('Doctype ' . htmlspecialchars($doctype) . ' does not exist', E_USER_ERROR);
$anon = new HTMLPurifier_Doctype($doctype);
return $anon;
}
return $this->doctypes[$doctype];
}
/**
* Creates a doctype based on a configuration object,
* will perform initialization on the doctype
* @note Use this function to get a copy of doctype that config
* can hold on to (this is necessary in order to tell
* Generator whether or not the current document is XML
* based or not).
*/
function make($config) {
$original_doctype = $this->get($this->getDoctypeFromConfig($config));
$doctype = $original_doctype->copy();
return $doctype;
}
/**
* Retrieves the doctype from the configuration object
*/
function getDoctypeFromConfig($config) {
// recommended test
$doctype = $config->get('HTML', 'Doctype');
if (!empty($doctype)) return $doctype;
$doctype = $config->get('HTML', 'CustomDoctype');
if (!empty($doctype)) return $doctype;
// backwards-compatibility
if ($config->get('HTML', 'XHTML')) {
$doctype = 'XHTML 1.0';
} else {
$doctype = 'HTML 4.01';
}
if ($config->get('HTML', 'Strict')) {
$doctype .= ' Strict';
} else {
$doctype .= ' Transitional';
}
return $doctype;
}
}

View File

@ -3,6 +3,8 @@
/**
* Structure that stores an HTML element definition. Used by
* HTMLPurifier_HTMLDefinition and HTMLPurifier_HTMLModule.
* @note This class is inspected by HTMLPurifier_Printer_HTMLDefinition.
* Please update that class too.
*/
class HTMLPurifier_ElementDef
{
@ -51,6 +53,8 @@ class HTMLPurifier_ElementDef
* Abstract string representation of internal ChildDef rules. See
* HTMLPurifier_ContentSets for how this is parsed and then transformed
* into an HTMLPurifier_ChildDef.
* @warning This is a temporary variable that is not available after
* being processed by HTMLDefinition
* @public
*/
var $content_model;
@ -58,19 +62,15 @@ class HTMLPurifier_ElementDef
/**
* Value of $child->type, used to determine which ChildDef to use,
* used in combination with $content_model.
* @warning This must be lowercase
* @warning This is a temporary variable that is not available after
* being processed by HTMLDefinition
* @public
*/
var $content_model_type;
/**
* Lookup table of tags that close this tag. Used during parsing
* to make sure we don't attempt to nest unclosed tags.
* @public
*/
var $auto_close = array();
/**
* Does the element have a content model (#PCDATA | Inline)*? This
* is important for chameleon ins and del processing in
@ -78,14 +78,47 @@ class HTMLPurifier_ElementDef
* have to worry about this one.
* @public
*/
var $descendants_are_inline;
var $descendants_are_inline = false;
/**
* List of the names of required attributes this element has. Dynamically
* populated.
* @public
*/
var $required_attr = array();
/**
* Lookup table of tags excluded from all descendants of this tag.
* @note SGML permits exclusions for all descendants, but this is
* not possible with DTDs or XML Schemas. W3C has elected to
* use complicated compositions of content_models to simulate
* exclusion for children, but we go the simpler, SGML-style
* route of flat-out exclusions, which correctly apply to
* all descendants and not just children. Note that the XHTML
* Modularization Abstract Modules are blithely unaware of such
* distinctions.
* @public
*/
var $excludes = array();
/**
* Is this element safe for untrusted users to use?
*/
var $safe;
/**
* Low-level factory constructor for creating new standalone element defs
* @static
*/
function create($safe, $content_model, $content_model_type, $attr) {
$def = new HTMLPurifier_ElementDef();
$def->safe = (bool) $safe;
$def->content_model = $content_model;
$def->content_model_type = $content_model_type;
$def->attr = $attr;
return $def;
}
/**
* Merges the values of another element definition into this one.
* Values from the new element def take precedence if a value is
@ -99,24 +132,56 @@ class HTMLPurifier_ElementDef
// merge in the includes
// sorry, no way to override an include
foreach ($v as $v2) {
$def->attr[0][] = $v2;
$this->attr[0][] = $v2;
}
continue;
}
if ($v === false) {
if (isset($this->attr[$k])) unset($this->attr[$k]);
continue;
}
$this->attr[$k] = $v;
}
foreach($def->attr_transform_pre as $k => $v) $this->attr_transform_pre[$k] = $v;
foreach($def->attr_transform_post as $k => $v) $this->attr_transform_post[$k] = $v;
foreach($def->auto_close as $k => $v) $this->auto_close[$k] = $v;
foreach($def->excludes as $k => $v) $this->excludes[$k] = $v;
$this->_mergeAssocArray($this->attr_transform_pre, $def->attr_transform_pre);
$this->_mergeAssocArray($this->attr_transform_post, $def->attr_transform_post);
$this->_mergeAssocArray($this->excludes, $def->excludes);
if(!empty($def->content_model)) {
$this->content_model .= ' | ' . $def->content_model;
$this->child = false;
}
if(!empty($def->content_model_type)) {
$this->content_model_type = $def->content_model_type;
$this->child = false;
}
if(!is_null($def->child)) $this->child = $def->child;
if(!empty($def->content_model)) $this->content_model .= ' | ' . $def->content_model;
if(!empty($def->content_model_type)) $this->content_model_type = $def->content_model_type;
if(!is_null($def->descendants_are_inline)) $this->descendants_are_inline = $def->descendants_are_inline;
if($def->descendants_are_inline) $this->descendants_are_inline = $def->descendants_are_inline;
if(!is_null($def->safe)) $this->safe = $def->safe;
}
/**
* Merges one array into another, removes values which equal false
* @param $a1 Array by reference that is merged into
* @param $a2 Array that merges into $a1
*/
function _mergeAssocArray(&$a1, $a2) {
foreach ($a2 as $k => $v) {
if ($v === false) {
if (isset($a1[$k])) unset($a1[$k]);
continue;
}
$a1[$k] = $v;
}
}
/**
* Retrieves a copy of the element definition
*/
function copy() {
return unserialize(serialize($this));
}
}
?>

View File

@ -1,7 +1,5 @@
<?php
require_once 'HTMLPurifier/EntityLookup.php';
HTMLPurifier_ConfigSchema::define(
'Core', 'Encoding', 'utf-8', 'istring',
'If for some reason you are unable to convert all webpages to UTF-8, '.
@ -400,4 +398,3 @@ class HTMLPurifier_Encoder
}
?>

View File

@ -19,7 +19,7 @@ class HTMLPurifier_EntityLookup {
*/
function setup($file = false) {
if (!$file) {
$file = dirname(__FILE__) . '/EntityLookup/entities.ser';
$file = HTMLPURIFIER_PREFIX . '/HTMLPurifier/EntityLookup/entities.ser';
}
$this->table = unserialize(file_get_contents($file));
}
@ -43,4 +43,3 @@ class HTMLPurifier_EntityLookup {
}
?>

View File

@ -24,8 +24,8 @@ class HTMLPurifier_EntityParser
* @protected
*/
var $_substituteEntitiesRegex =
'/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z]+));?/';
// 1. hex 2. dec 3. string
'/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/';
// 1. hex 2. dec 3. string (XML style)
/**
@ -97,7 +97,6 @@ class HTMLPurifier_EntityParser
} else {
if (isset($this->_special_ent2dec[$matches[3]])) return $entity;
if (!$this->_entity_lookup) {
require_once 'HTMLPurifier/EntityLookup.php';
$this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
}
if (isset($this->_entity_lookup->table[$matches[3]])) {
@ -155,4 +154,3 @@ class HTMLPurifier_EntityParser
}
?>

View File

@ -5,4 +5,3 @@
*/
class HTMLPurifier_Error {}
?>

View File

@ -0,0 +1,118 @@
<?php
require_once 'HTMLPurifier/Generator.php';
/**
* Error collection class that enables HTML Purifier to report HTML
* problems back to the user
*/
class HTMLPurifier_ErrorCollector
{
var $errors = array();
var $locale;
var $generator;
var $context;
function HTMLPurifier_ErrorCollector(&$context) {
$this->locale =& $context->get('Locale');
$this->generator =& $context->get('Generator');
$this->context =& $context;
}
/**
* Sends an error message to the collector for later use
* @param $line Integer line number, or HTMLPurifier_Token that caused error
* @param $severity int Error severity, PHP error style (don't use E_USER_)
* @param $msg string Error message text
*/
function send($severity, $msg) {
$args = array();
if (func_num_args() > 2) {
$args = func_get_args();
array_shift($args);
unset($args[0]);
}
$token = $this->context->get('CurrentToken', true);
$line = $token ? $token->line : $this->context->get('CurrentLine', true);
$attr = $this->context->get('CurrentAttr', true);
// perform special substitutions, also add custom parameters
$subst = array();
if (!is_null($token)) {
$args['CurrentToken'] = $token;
}
if (!is_null($attr)) {
$subst['$CurrentAttr.Name'] = $attr;
if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr];
}
if (empty($args)) {
$msg = $this->locale->getMessage($msg);
} else {
$msg = $this->locale->formatMessage($msg, $args);
}
if (!empty($subst)) $msg = strtr($msg, $subst);
$this->errors[] = array($line, $severity, $msg);
}
/**
* Retrieves raw error data for custom formatter to use
* @param List of arrays in format of array(Error message text,
* token that caused error, tokens surrounding token)
*/
function getRaw() {
return $this->errors;
}
/**
* Default HTML formatting implementation for error messages
* @param $config Configuration array, vital for HTML output nature
*/
function getHTMLFormatted($config) {
$ret = array();
$errors = $this->errors;
// sort error array by line
// line numbers are enabled if they aren't explicitly disabled
if ($config->get('Core', 'MaintainLineNumbers') !== false) {
$has_line = array();
$lines = array();
$original_order = array();
foreach ($errors as $i => $error) {
$has_line[] = (int) (bool) $error[0];
$lines[] = $error[0];
$original_order[] = $i;
}
array_multisort($has_line, SORT_DESC, $lines, SORT_ASC, $original_order, SORT_ASC, $errors);
}
foreach ($errors as $error) {
list($line, $severity, $msg) = $error;
$string = '';
$string .= '<strong>' . $this->locale->getErrorName($severity) . '</strong>: ';
$string .= $this->generator->escape($msg);
if ($line) {
// have javascript link generation that causes
// textarea to skip to the specified line
$string .= $this->locale->formatMessage(
'ErrorCollector: At line', array('line' => $line));
}
$ret[] = $string;
}
if (empty($errors)) {
return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>';
} else {
return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>';
}
}
}

View File

@ -36,4 +36,3 @@ class HTMLPurifier_Filter
}
?>

View File

@ -31,4 +31,3 @@ class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter
}
?>

View File

@ -1,59 +1,75 @@
<?php
require_once 'HTMLPurifier/Lexer.php';
HTMLPurifier_ConfigSchema::define(
'Core', 'CleanUTF8DuringGeneration', false, 'bool',
'When true, HTMLPurifier_Generator will also check all strings it '.
'escapes for UTF-8 well-formedness as a defense in depth measure. '.
'This could cause a considerable performance impact, and is not '.
'strictly necessary due to the fact that the Lexers should have '.
'ensured that all the UTF-8 strings were well-formed. Note that '.
'the configuration value is only read at the beginning of '.
'generateFromTokens.'
);
HTMLPurifier_ConfigSchema::define(
'Core', 'XHTML', true, 'bool',
'Determines whether or not output is XHTML or not. When disabled, HTML '.
'Purifier goes into HTML 4.01 removes XHTML-specific markup constructs, '.
'such as boolean attribute expansion and trailing slashes in empty tags. '.
'This directive was available since 1.1.'
'Output', 'CommentScriptContents', true, 'bool',
'Determines whether or not HTML Purifier should attempt to fix up '.
'the contents of script tags for legacy browsers with comments. This '.
'directive was available since 2.0.0.'
);
HTMLPurifier_ConfigSchema::defineAlias('Core', 'CommentScriptContents', 'Output', 'CommentScriptContents');
// extension constraints could be factored into ConfigSchema
HTMLPurifier_ConfigSchema::define(
'Core', 'TidyFormat', false, 'bool',
'<p>Determines whether or not to run Tidy on the final output for pretty '.
'formatting reasons, such as indentation and wrap.</p><p>This can greatly '.
'improve readability for editors who are hand-editing the HTML, but is '.
'by no means necessary as HTML Purifier has already fixed all major '.
'errors the HTML may have had. Tidy is a non-default extension, and this directive '.
'will silently fail if Tidy is not available.</p><p>If you are looking to make '.
'the overall look of your page\'s source better, I recommend running Tidy '.
'on the entire page rather than just user-content (after all, the '.
'indentation relative to the containing blocks will be incorrect).</p><p>This '.
'directive was available since 1.1.1.</p>'
'Output', 'TidyFormat', false, 'bool', <<<HTML
<p>
Determines whether or not to run Tidy on the final output for pretty
formatting reasons, such as indentation and wrap.
</p>
<p>
This can greatly improve readability for editors who are hand-editing
the HTML, but is by no means necessary as HTML Purifier has already
fixed all major errors the HTML may have had. Tidy is a non-default
extension, and this directive will silently fail if Tidy is not
available.
</p>
<p>
If you are looking to make the overall look of your page's source
better, I recommend running Tidy on the entire page rather than just
user-content (after all, the indentation relative to the containing
blocks will be incorrect).
</p>
<p>
This directive was available since 1.1.1.
</p>
HTML
);
HTMLPurifier_ConfigSchema::defineAlias('Core', 'TidyFormat', 'Output', 'TidyFormat');
HTMLPurifier_ConfigSchema::define('Output', 'Newline', null, 'string/null', '
<p>
Newline string to format final output with. If left null, HTML Purifier
will auto-detect the default newline type of the system and use that;
you can manually override it here. Remember, \r\n is Windows, \r
is Mac, and \n is Unix. This directive was available since 2.0.1.
</p>
');
/**
* Generates HTML from tokens.
* @todo Refactor interface so that configuration/context is determined
* upon instantiation, no need for messy generateFromTokens() calls
*/
class HTMLPurifier_Generator
{
/**
* Bool cache of %Core.CleanUTF8DuringGeneration
* @private
*/
var $_clean_utf8 = false;
/**
* Bool cache of %Core.XHTML
* Bool cache of %HTML.XHTML
* @private
*/
var $_xhtml = true;
/**
* Bool cache of %Output.CommentScriptContents
* @private
*/
var $_scriptFix = false;
/**
* Cache of HTMLDefinition
* @private
*/
var $_def;
/**
* Generates HTML from an array of tokens.
* @param $tokens Array of HTMLPurifier_Token
@ -63,13 +79,28 @@ class HTMLPurifier_Generator
function generateFromTokens($tokens, $config, &$context) {
$html = '';
if (!$config) $config = HTMLPurifier_Config::createDefault();
$this->_clean_utf8 = $config->get('Core', 'CleanUTF8DuringGeneration');
$this->_xhtml = $config->get('Core', 'XHTML');
$this->_scriptFix = $config->get('Output', 'CommentScriptContents');
$this->_def = $config->getHTMLDefinition();
$this->_xhtml = $this->_def->doctype->xml;
if (!$tokens) return '';
foreach ($tokens as $token) {
$html .= $this->generateFromToken($token);
for ($i = 0, $size = count($tokens); $i < $size; $i++) {
if ($this->_scriptFix && $tokens[$i]->name === 'script'
&& $i + 2 < $size && $tokens[$i+2]->type == 'end') {
// script special case
// the contents of the script block must be ONE token
// for this to work
$html .= $this->generateFromToken($tokens[$i++]);
$html .= $this->generateScriptFromToken($tokens[$i++]);
// We're not going to do this: it wouldn't be valid anyway
//while ($tokens[$i]->name != 'script') {
// $html .= $this->generateScriptFromToken($tokens[$i++]);
//}
}
$html .= $this->generateFromToken($tokens[$i]);
}
if ($config->get('Core', 'TidyFormat') && extension_loaded('tidy')) {
if ($config->get('Output', 'TidyFormat') && extension_loaded('tidy')) {
$tidy_options = array(
'indent'=> true,
@ -93,6 +124,10 @@ class HTMLPurifier_Generator
$html = (string) $tidy;
}
}
// normalize newlines to system
$nl = $config->get('Output', 'Newline');
if ($nl === null) $nl = PHP_EOL;
$html = str_replace("\n", $nl, $html);
return $html;
}
@ -104,14 +139,14 @@ class HTMLPurifier_Generator
function generateFromToken($token) {
if (!isset($token->type)) return '';
if ($token->type == 'start') {
$attr = $this->generateAttributes($token->attr);
$attr = $this->generateAttributes($token->attr, $token->name);
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
} elseif ($token->type == 'end') {
return '</' . $token->name . '>';
} elseif ($token->type == 'empty') {
$attr = $this->generateAttributes($token->attr);
$attr = $this->generateAttributes($token->attr, $token->name);
return '<' . $token->name . ($attr ? ' ' : '') . $attr .
( $this->_xhtml ? ' /': '' )
. '>';
@ -125,18 +160,35 @@ class HTMLPurifier_Generator
}
}
/**
* Special case processor for the contents of script tags
* @warning This runs into problems if there's already a literal
* --> somewhere inside the script contents.
*/
function generateScriptFromToken($token) {
if ($token->type != 'text') return $this->generateFromToken($token);
// return '<!--' . "\n" . trim($token->data) . "\n" . '// -->';
// more advanced version:
// thanks <http://lachy.id.au/log/2005/05/script-comments>
$data = preg_replace('#//\s*$#', '', $token->data);
return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
}
/**
* Generates attribute declarations from attribute array.
* @param $assoc_array_of_attributes Attribute array
* @return Generate HTML fragment for insertion.
*/
function generateAttributes($assoc_array_of_attributes) {
function generateAttributes($assoc_array_of_attributes, $element) {
$html = '';
foreach ($assoc_array_of_attributes as $key => $value) {
if (!$this->_xhtml) {
// remove namespaced attributes
if (strpos($key, ':') !== false) continue;
// also needed: check for attribute minimization
if (!empty($this->_def->info[$element]->attr[$key]->minimized)) {
$html .= $key . ' ';
continue;
}
}
$html .= $key.'="'.$this->escape($value).'" ';
}
@ -149,10 +201,8 @@ class HTMLPurifier_Generator
* @return String escaped data.
*/
function escape($string) {
if ($this->_clean_utf8) $string = HTMLPurifier_Lexer::cleanUTF8($string);
return htmlspecialchars($string, ENT_COMPAT, 'UTF-8');
}
}
?>

View File

@ -1,61 +1,133 @@
<?php
// components
require_once 'HTMLPurifier/Definition.php';
require_once 'HTMLPurifier/HTMLModuleManager.php';
// this definition and its modules MUST NOT define configuration directives
// outside of the HTML or Attr namespaces
// will be superceded by more accurate doctype declaration schemes
HTMLPurifier_ConfigSchema::define(
'HTML', 'Strict', false, 'bool',
'Determines whether or not to use Transitional (loose) or Strict rulesets. '.
'This directive has been available since 1.3.0.'
);
'HTML', 'DefinitionID', null, 'string/null', '
<p>
Unique identifier for a custom-built HTML definition. If you edit
the raw version of the HTMLDefinition, introducing changes that the
configuration object does not reflect, you must specify this variable.
If you change your custom edits, you should change this directive, or
clear your cache. Example:
</p>
<pre>
$config = HTMLPurifier_Config::createDefault();
$config->set(\'HTML\', \'DefinitionID\', \'1\');
$def = $config->getHTMLDefinition();
$def->addAttribute(\'a\', \'tabindex\', \'Number\');
</pre>
<p>
In the above example, the configuration is still at the defaults, but
using the advanced API, an extra attribute has been added. The
configuration object normally has no way of knowing that this change
has taken place, so it needs an extra directive: %HTML.DefinitionID.
If someone else attempts to use the default configuration, these two
pieces of code will not clobber each other in the cache, since one has
an extra directive attached to it.
</p>
<p>
This directive has been available since 2.0.0, and in that version or
later you <em>must</em> specify a value to this directive to use the
advanced API features.
</p>
');
HTMLPurifier_ConfigSchema::define(
'HTML', 'BlockWrapper', 'p', 'string',
'String name of element to wrap inline elements that are inside a block '.
'context. This only occurs in the children of blockquote in strict mode. '.
'Example: by default value, <code>&lt;blockquote&gt;Foo&lt;/blockquote&gt;</code> '.
'would become <code>&lt;blockquote&gt;&lt;p&gt;Foo&lt;/p&gt;&lt;/blockquote&gt;</code>. The '.
'<code>&lt;p&gt;</code> tags can be replaced '.
'with whatever you desire, as long as it is a block level element. '.
'This directive has been available since 1.3.0.'
);
'HTML', 'DefinitionRev', 1, 'int', '
<p>
Revision identifier for your custom definition specified in
%HTML.DefinitionID. This serves the same purpose: uniquely identifying
your custom definition, but this one does so in a chronological
context: revision 3 is more up-to-date then revision 2. Thus, when
this gets incremented, the cache handling is smart enough to clean
up any older revisions of your definition as well as flush the
cache. This directive has been available since 2.0.0.
</p>
');
HTMLPurifier_ConfigSchema::define(
'HTML', 'Parent', 'div', 'string',
'String name of element that HTML fragment passed to library will be '.
'inserted in. An interesting variation would be using span as the '.
'parent element, meaning that only inline tags would be allowed. '.
'This directive has been available since 1.3.0.'
);
'HTML', 'BlockWrapper', 'p', 'string', '
<p>
String name of element to wrap inline elements that are inside a block
context. This only occurs in the children of blockquote in strict mode.
</p>
<p>
Example: by default value,
<code>&lt;blockquote&gt;Foo&lt;/blockquote&gt;</code> would become
<code>&lt;blockquote&gt;&lt;p&gt;Foo&lt;/p&gt;&lt;/blockquote&gt;</code>.
The <code>&lt;p&gt;</code> tags can be replaced with whatever you desire,
as long as it is a block level element. This directive has been available
since 1.3.0.
</p>
');
HTMLPurifier_ConfigSchema::define(
'HTML', 'AllowedElements', null, 'lookup/null',
'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '.
'can overload it with your own list of tags to allow. Note that this '.
'method is subtractive: it does its job by taking away from HTML Purifier '.
'usual feature set, so you cannot add a tag that HTML Purifier never '.
'supported in the first place (like embed, form or head). If you change this, you '.
'probably also want to change %HTML.AllowedAttributes. '.
'<strong>Warning:</strong> If another directive conflicts with the '.
'elements here, <em>that</em> directive will win and override. '.
'This directive has been available since 1.3.0.'
);
'HTML', 'Parent', 'div', 'string', '
<p>
String name of element that HTML fragment passed to library will be
inserted in. An interesting variation would be using span as the
parent element, meaning that only inline tags would be allowed.
This directive has been available since 1.3.0.
</p>
');
HTMLPurifier_ConfigSchema::define(
'HTML', 'AllowedAttributes', null, 'lookup/null',
'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '.
'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '.
'(style, id, class, dir, lang, xml:lang).'.
'<strong>Warning:</strong> If another directive conflicts with the '.
'elements here, <em>that</em> directive will win and override. For '.
'example, %HTML.EnableAttrID will take precedence over *.id in this '.
'directive. You must set that directive to true before you can use '.
'IDs at all. This directive has been available since 1.3.0.'
);
'HTML', 'AllowedElements', null, 'lookup/null', '
<p>
If HTML Purifier\'s tag set is unsatisfactory for your needs, you
can overload it with your own list of tags to allow. Note that this
method is subtractive: it does its job by taking away from HTML Purifier
usual feature set, so you cannot add a tag that HTML Purifier never
supported in the first place (like embed, form or head). If you
change this, you probably also want to change %HTML.AllowedAttributes.
</p>
<p>
<strong>Warning:</strong> If another directive conflicts with the
elements here, <em>that</em> directive will win and override.
This directive has been available since 1.3.0.
</p>
');
HTMLPurifier_ConfigSchema::define(
'HTML', 'AllowedAttributes', null, 'lookup/null', '
<p>
If HTML Purifier\'s attribute set is unsatisfactory, overload it!
The syntax is "tag.attr" or "*.attr" for the global attributes
(style, id, class, dir, lang, xml:lang).
</p>
<p>
<strong>Warning:</strong> If another directive conflicts with the
elements here, <em>that</em> directive will win and override. For
example, %HTML.EnableAttrID will take precedence over *.id in this
directive. You must set that directive to true before you can use
IDs at all. This directive has been available since 1.3.0.
</p>
');
HTMLPurifier_ConfigSchema::define(
'HTML', 'Allowed', null, 'itext/null', '
<p>
This is a convenience directive that rolls the functionality of
%HTML.AllowedElements and %HTML.AllowedAttributes into one directive.
Specify elements and attributes that are allowed using:
<code>element1[attr1|attr2],element2...</code>. You can also use
newlines instead of commas to separate elements.
</p>
<p>
<strong>Warning</strong>:
All of the constraints on the component directives are still enforced.
The syntax is a <em>subset</em> of TinyMCE\'s <code>valid_elements</code>
whitelist: directly copy-pasting it here will probably result in
broken whitelists. If %HTML.AllowedElements or %HTML.AllowedAttributes
are set, this directive has no effect.
This directive has been available since 2.0.0.
</p>
');
/**
* Definition of the purified HTML that describes allowed children,
@ -74,13 +146,13 @@ HTMLPurifier_ConfigSchema::define(
* Purifier internals. Many of them, however, are public, and may be
* edited by userspace code to tweak the behavior of HTMLDefinition.
*
* HTMLPurifier_Printer_HTMLDefinition is a notable exception to this
* rule: in the interest of comprehensiveness, it will sniff everything.
* @note This class is inspected by Printer_HTMLDefinition; please
* update that class if things here change.
*/
class HTMLPurifier_HTMLDefinition
class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
{
/** FULLY-PUBLIC VARIABLES */
// FULLY-PUBLIC VARIABLES ---------------------------------------------
/**
* Associative array of element names to HTMLPurifier_ElementDef
@ -139,50 +211,97 @@ class HTMLPurifier_HTMLDefinition
*/
var $info_content_sets = array();
/**
* Doctype object
*/
var $doctype;
/** PUBLIC BUT INTERNAL VARIABLES */
var $setup = false; /**< Has setup() been called yet? */
var $config; /**< Temporary instance of HTMLPurifier_Config */
// RAW CUSTOMIZATION STUFF --------------------------------------------
/**
* Adds a custom attribute to a pre-existing element
* @param $element_name String element name to add attribute to
* @param $attr_name String name of attribute
* @param $def Attribute definition, can be string or object, see
* HTMLPurifier_AttrTypes for details
*/
function addAttribute($element_name, $attr_name, $def) {
$module =& $this->getAnonymousModule();
$element =& $module->addBlankElement($element_name);
$element->attr[$attr_name] = $def;
}
/**
* Adds a custom element to your HTML definition
* @note See HTMLPurifier_HTMLModule::addElement for detailed
* parameter descriptions.
*/
function addElement($element_name, $type, $contents, $attr_collections, $attributes) {
$module =& $this->getAnonymousModule();
// assume that if the user is calling this, the element
// is safe. This may not be a good idea
$module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes);
}
/**
* Retrieves a reference to the anonymous module, so you can
* bust out advanced features without having to make your own
* module.
*/
function &getAnonymousModule() {
if (!$this->_anonModule) {
$this->_anonModule = new HTMLPurifier_HTMLModule();
$this->_anonModule->name = 'Anonymous';
}
return $this->_anonModule;
}
var $_anonModule;
// PUBLIC BUT INTERNAL VARIABLES --------------------------------------
var $type = 'HTML';
var $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
/**
* Performs low-cost, preliminary initialization.
* @param $config Instance of HTMLPurifier_Config
*/
function HTMLPurifier_HTMLDefinition(&$config) {
$this->config =& $config;
function HTMLPurifier_HTMLDefinition() {
$this->manager = new HTMLPurifier_HTMLModuleManager();
}
/**
* Processes internals into form usable by HTMLPurifier internals.
* Modifying the definition after calling this function should not
* be done.
*/
function setup() {
// multiple call guard
if ($this->setup) {return;} else {$this->setup = true;}
$this->processModules();
$this->setupConfigStuff();
unset($this->config);
function doSetup($config) {
$this->processModules($config);
$this->setupConfigStuff($config);
unset($this->manager);
// cleanup some of the element definitions
foreach ($this->info as $k => $v) {
unset($this->info[$k]->content_model);
unset($this->info[$k]->content_model_type);
}
}
/**
* Extract out the information from the manager
*/
function processModules() {
function processModules($config) {
$this->manager->setup($this->config);
if ($this->_anonModule) {
// for user specific changes
// this is late-loaded so we don't have to deal with PHP4
// reference wonky-ness
$this->manager->addModule($this->_anonModule);
unset($this->_anonModule);
}
foreach ($this->manager->activeModules as $module) {
$this->manager->setup($config);
$this->doctype = $this->manager->doctype;
foreach ($this->manager->modules as $module) {
foreach($module->info_tag_transform as $k => $v) {
if ($v === false) unset($this->info_tag_transform[$k]);
else $this->info_tag_transform[$k] = $v;
@ -197,7 +316,7 @@ class HTMLPurifier_HTMLDefinition
}
}
$this->info = $this->manager->getElements($this->config);
$this->info = $this->manager->getElements();
$this->info_content_sets = $this->manager->contentSets->lookup;
}
@ -205,9 +324,9 @@ class HTMLPurifier_HTMLDefinition
/**
* Sets up stuff based on config. We need a better way of doing this.
*/
function setupConfigStuff() {
function setupConfigStuff($config) {
$block_wrapper = $this->config->get('HTML', 'BlockWrapper');
$block_wrapper = $config->get('HTML', 'BlockWrapper');
if (isset($this->info_content_sets['Block'][$block_wrapper])) {
$this->info_block_wrapper = $block_wrapper;
} else {
@ -215,24 +334,33 @@ class HTMLPurifier_HTMLDefinition
E_USER_ERROR);
}
$parent = $this->config->get('HTML', 'Parent');
$def = $this->manager->getElement($parent, $this->config);
$parent = $config->get('HTML', 'Parent');
$def = $this->manager->getElement($parent, true);
if ($def) {
$this->info_parent = $parent;
$this->info_parent_def = $def;
} else {
trigger_error('Cannot use unrecognized element as parent.',
E_USER_ERROR);
$this->info_parent_def = $this->manager->getElement(
$this->info_parent, $this->config);
$this->info_parent_def = $this->manager->getElement($this->info_parent, true);
}
// support template text
$support = "(for information on implementing this, see the ".
"support forums) ";
// setup allowed elements, SubtractiveWhitelist module
$allowed_elements = $this->config->get('HTML', 'AllowedElements');
// setup allowed elements
$allowed_elements = $config->get('HTML', 'AllowedElements');
$allowed_attributes = $config->get('HTML', 'AllowedAttributes');
if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
$allowed = $config->get('HTML', 'Allowed');
if (is_string($allowed)) {
list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
}
}
if (is_array($allowed_elements)) {
foreach ($this->info as $name => $d) {
if(!isset($allowed_elements[$name])) unset($this->info[$name]);
@ -240,11 +368,11 @@ class HTMLPurifier_HTMLDefinition
}
// emit errors
foreach ($allowed_elements as $element => $d) {
$element = htmlspecialchars($element);
trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
}
}
$allowed_attributes = $this->config->get('HTML', 'AllowedAttributes');
$allowed_attributes_mutable = $allowed_attributes; // by copy!
if (is_array($allowed_attributes)) {
foreach ($this->info_global_attr as $attr_key => $info) {
@ -271,6 +399,8 @@ class HTMLPurifier_HTMLDefinition
// emit errors
foreach ($allowed_attributes_mutable as $elattr => $d) {
list($element, $attribute) = explode('.', $elattr);
$element = htmlspecialchars($element);
$attribute = htmlspecialchars($attribute);
if ($element == '*') {
trigger_error("Global attribute '$attribute' is not ".
"supported in any elements $support",
@ -284,7 +414,43 @@ class HTMLPurifier_HTMLDefinition
}
/**
* Parses a TinyMCE-flavored Allowed Elements and Attributes list into
* separate lists for processing. Format is element[attr1|attr2],element2...
* @warning Although it's largely drawn from TinyMCE's implementation,
* it is different, and you'll probably have to modify your lists
* @param $list String list to parse
* @param array($allowed_elements, $allowed_attributes)
*/
function parseTinyMCEAllowedList($list) {
$elements = array();
$attributes = array();
$chunks = preg_split('/(,|[\n\r]+)/', $list);
foreach ($chunks as $chunk) {
if (empty($chunk)) continue;
// remove TinyMCE element control characters
if (!strpos($chunk, '[')) {
$element = $chunk;
$attr = false;
} else {
list($element, $attr) = explode('[', $chunk);
}
if ($element !== '*') $elements[$element] = true;
if (!$attr) continue;
$attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
$attr = explode('|', $attr);
foreach ($attr as $key) {
$attributes["$element.$key"] = true;
}
}
return array($elements, $attributes);
}
}
?>

View File

@ -16,16 +16,14 @@
class HTMLPurifier_HTMLModule
{
// -- Overloadable ----------------------------------------------------
/**
* Short unique string identifier of the module
*/
var $name;
/**
* Dynamically set integer that specifies when the module was loaded in.
*/
var $order;
/**
* Informally, a list of elements this module changes. Not used in
* any significant way.
@ -99,27 +97,127 @@ class HTMLPurifier_HTMLModule
*/
function getChildDef($def) {return false;}
/**
* Hook method that lets module perform arbitrary operations on
* HTMLPurifier_HTMLDefinition before the module gets processed.
* @param $definition Reference to HTMLDefinition being setup
*/
function preProcess(&$definition) {}
// -- Convenience -----------------------------------------------------
/**
* Hook method that lets module perform arbitrary operations
* on HTMLPurifier_HTMLDefinition after the module gets processed.
* @param $definition Reference to HTMLDefinition being setup
* Convenience function that sets up a new element
* @param $element Name of element to add
* @param $safe Is element safe for untrusted users to use?
* @param $type What content set should element be registered to?
* Set as false to skip this step.
* @param $contents Allowed children in form of:
* "$content_model_type: $content_model"
* @param $attr_includes What attribute collections to register to
* element?
* @param $attr What unique attributes does the element define?
* @note See ElementDef for in-depth descriptions of these parameters.
* @return Reference to created element definition object, so you
* can set advanced parameters
* @protected
*/
function postProcess(&$definition) {}
function &addElement($element, $safe, $type, $contents, $attr_includes = array(), $attr = array()) {
$this->elements[] = $element;
// parse content_model
list($content_model_type, $content_model) = $this->parseContents($contents);
// merge in attribute inclusions
$this->mergeInAttrIncludes($attr, $attr_includes);
// add element to content sets
if ($type) $this->addElementToContentSet($element, $type);
// create element
$this->info[$element] = HTMLPurifier_ElementDef::create(
$safe, $content_model, $content_model_type, $attr
);
// literal object $contents means direct child manipulation
if (!is_string($contents)) $this->info[$element]->child = $contents;
return $this->info[$element];
}
/**
* Hook method that is called when a module gets registered to
* the definition.
* @param $definition Reference to HTMLDefinition being setup
* Convenience function that creates a totally blank, non-standalone
* element.
* @param $element Name of element to create
* @return Reference to created element
*/
function setup(&$definition) {}
function &addBlankElement($element) {
if (!isset($this->info[$element])) {
$this->elements[] = $element;
$this->info[$element] = new HTMLPurifier_ElementDef();
$this->info[$element]->standalone = false;
} else {
trigger_error("Definition for $element already exists in module, cannot redefine");
}
return $this->info[$element];
}
/**
* Convenience function that registers an element to a content set
* @param Element to register
* @param Name content set (warning: case sensitive, usually upper-case
* first letter)
* @protected
*/
function addElementToContentSet($element, $type) {
if (!isset($this->content_sets[$type])) $this->content_sets[$type] = '';
else $this->content_sets[$type] .= ' | ';
$this->content_sets[$type] .= $element;
}
/**
* Convenience function that transforms single-string contents
* into separate content model and content model type
* @param $contents Allowed children in form of:
* "$content_model_type: $content_model"
* @note If contents is an object, an array of two nulls will be
* returned, and the callee needs to take the original $contents
* and use it directly.
*/
function parseContents($contents) {
if (!is_string($contents)) return array(null, null); // defer
switch ($contents) {
// check for shorthand content model forms
case 'Empty':
return array('empty', '');
case 'Inline':
return array('optional', 'Inline | #PCDATA');
case 'Flow':
return array('optional', 'Flow | #PCDATA');
}
list($content_model_type, $content_model) = explode(':', $contents);
$content_model_type = strtolower(trim($content_model_type));
$content_model = trim($content_model);
return array($content_model_type, $content_model);
}
/**
* Convenience function that merges a list of attribute includes into
* an attribute array.
* @param $attr Reference to attr array to modify
* @param $attr_includes Array of includes / string include to merge in
*/
function mergeInAttrIncludes(&$attr, $attr_includes) {
if (!is_array($attr_includes)) {
if (empty($attr_includes)) $attr_includes = array();
else $attr_includes = array($attr_includes);
}
$attr[0] = $attr_includes;
}
/**
* Convenience function that generates a lookup table with boolean
* true as value.
* @param $list List of values to turn into a lookup
* @note You can also pass an arbitrary number of arguments in
* place of the regular argument
* @return Lookup array equivalent of list
*/
function makeLookup($list) {
if (is_string($list)) $list = func_get_args();
$ret = array();
foreach ($list as $value) {
if (is_null($value)) continue;
$ret[$value] = true;
}
return $ret;
}
}
?>

View File

@ -11,32 +11,23 @@ class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule
{
var $name = 'Bdo';
var $elements = array('bdo');
var $content_sets = array('Inline' => 'bdo');
var $attr_collections = array(
'I18N' => array('dir' => false)
);
function HTMLPurifier_HTMLModule_Bdo() {
$dir = new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false);
$this->attr_collections['I18N']['dir'] = $dir;
$this->info['bdo'] = new HTMLPurifier_ElementDef();
$this->info['bdo']->attr = array(
0 => array('Core', 'Lang'),
'dir' => $dir, // required
// The Abstract Module specification has the attribute
// inclusions wrong for bdo: bdo allows
// xml:lang too (and we'll toss in lang for good measure,
// though it is not allowed for XHTML 1.1, this will
// be managed with a global attribute transform)
$bdo =& $this->addElement(
'bdo', true, 'Inline', 'Inline', array('Core', 'Lang'),
array(
'dir' => 'Enum#ltr,rtl', // required
// The Abstract Module specification has the attribute
// inclusions wrong for bdo: bdo allows Lang
)
);
$this->info['bdo']->content_model = '#PCDATA | Inline';
$this->info['bdo']->content_model_type = 'optional';
// provides fallback behavior if dir's missing (dir is required)
$this->info['bdo']->attr_transform_post['required-dir'] =
new HTMLPurifier_AttrTransform_BdoDir();
$bdo->attr_transform_post['required-dir'] = new HTMLPurifier_AttrTransform_BdoDir();
$this->attr_collections['I18N']['dir'] = 'Enum#ltr,rtl';
}
}
?>

View File

@ -1,5 +1,7 @@
<?php
require_once 'HTMLPurifier/HTMLModule.php';
class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule
{
var $name = 'CommonAttributes';
@ -12,9 +14,7 @@ class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule
'id' => 'ID',
'title' => 'CDATA',
),
'Lang' => array(
'xml:lang' => false, // see constructor
),
'Lang' => array(),
'I18N' => array(
0 => array('Lang'), // proprietary, for xml:lang/lang
),
@ -22,10 +22,5 @@ class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule
0 => array('Core', 'I18N')
)
);
function HTMLPurifier_HTMLModule_CommonAttributes() {
$this->attr_collections['Lang']['xml:lang'] = new HTMLPurifier_AttrDef_Lang();
}
}
?>

View File

@ -11,28 +11,24 @@ class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule
{
var $name = 'Edit';
var $elements = array('del', 'ins');
var $content_sets = array('Inline' => 'del | ins');
function HTMLPurifier_HTMLModule_Edit() {
foreach ($this->elements as $element) {
$this->info[$element] = new HTMLPurifier_ElementDef();
$this->info[$element]->attr = array(
0 => array('Common'),
'cite' => 'URI',
// 'datetime' => 'Datetime' // Datetime not implemented
);
// Inline context ! Block context (exclamation mark is
// separator, see getChildDef for parsing)
$this->info[$element]->content_model =
'#PCDATA | Inline ! #PCDATA | Flow';
// HTML 4.01 specifies that ins/del must not contain block
// elements when used in an inline context, chameleon is
// a complicated workaround to acheive this effect
$this->info[$element]->content_model_type = 'chameleon';
}
$contents = 'Chameleon: #PCDATA | Inline ! #PCDATA | Flow';
$attr = array(
'cite' => 'URI',
// 'datetime' => 'Datetime', // not implemented
);
$this->addElement('del', true, 'Inline', $contents, 'Common', $attr);
$this->addElement('ins', true, 'Inline', $contents, 'Common', $attr);
}
// HTML 4.01 specifies that ins/del must not contain block
// elements when used in an inline context, chameleon is
// a complicated workaround to acheive this effect
// Inline context ! Block context (exclamation mark is
// separator, see getChildDef for parsing)
var $defines_child_def = true;
function getChildDef($def) {
if ($def->content_model_type != 'chameleon') return false;
@ -42,4 +38,3 @@ class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule
}
?>

View File

@ -10,27 +10,23 @@ class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule
{
var $name = 'Hypertext';
var $elements = array('a');
var $content_sets = array('Inline' => 'a');
function HTMLPurifier_HTMLModule_Hypertext() {
$this->info['a'] = new HTMLPurifier_ElementDef();
$this->info['a']->attr = array(
0 => array('Common'),
// 'accesskey' => 'Character',
// 'charset' => 'Charset',
'href' => 'URI',
//'hreflang' => 'LanguageCode',
'rel' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rel'),
'rev' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rev'),
//'tabindex' => 'Number',
//'type' => 'ContentType',
$a =& $this->addElement(
'a', true, 'Inline', 'Inline', 'Common',
array(
// 'accesskey' => 'Character',
// 'charset' => 'Charset',
'href' => 'URI',
// 'hreflang' => 'LanguageCode',
'rel' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rel'),
'rev' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rev'),
// 'tabindex' => 'Number',
// 'type' => 'ContentType',
)
);
$this->info['a']->content_model = '#PCDATA | Inline';
$this->info['a']->content_model_type = 'optional';
$this->info['a']->excludes = array('a' => true);
$a->excludes = array('a' => true);
}
}
?>

View File

@ -14,24 +14,23 @@ class HTMLPurifier_HTMLModule_Image extends HTMLPurifier_HTMLModule
{
var $name = 'Image';
var $elements = array('img');
var $content_sets = array('Inline' => 'img');
function HTMLPurifier_HTMLModule_Image() {
$this->info['img'] = new HTMLPurifier_ElementDef();
$this->info['img']->attr = array(
0 => array('Common'),
'alt' => 'Text',
'height' => 'Length',
'longdesc' => 'URI',
'src' => new HTMLPurifier_AttrDef_URI(true), // embedded
'width' => 'Length'
$img =& $this->addElement(
'img', true, 'Inline', 'Empty', 'Common',
array(
'alt*' => 'Text',
'height' => 'Length',
'longdesc' => 'URI',
'src*' => new HTMLPurifier_AttrDef_URI(true), // embedded
'width' => 'Length'
)
);
$this->info['img']->content_model_type = 'empty';
$this->info['img']->attr_transform_post[] =
// kind of strange, but splitting things up would be inefficient
$img->attr_transform_pre[] =
$img->attr_transform_post[] =
new HTMLPurifier_AttrTransform_ImgRequired();
}
}
?>

View File

@ -1,5 +1,7 @@
<?php
require_once 'HTMLPurifier/AttrDef/HTML/Bool.php';
/**
* XHTML 1.1 Legacy module defines elements that were previously
* deprecated.
@ -22,39 +24,117 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
// incomplete
var $name = 'Legacy';
var $elements = array('u', 's', 'strike');
var $non_standalone_elements = array('li', 'ol', 'address', 'blockquote');
function HTMLPurifier_HTMLModule_Legacy() {
// setup new elements
foreach ($this->elements as $name) {
$this->info[$name] = new HTMLPurifier_ElementDef();
// for u, s, strike, as more elements get added, add
// conditionals as necessary
$this->info[$name]->content_model = 'Inline | #PCDATA';
$this->info[$name]->content_model_type = 'optional';
$this->info[$name]->attr[0] = array('Common');
}
$this->addElement('basefont', true, 'Inline', 'Empty', false, array(
'color' => 'Color',
'face' => 'Text', // extremely broad, we should
'size' => 'Text', // tighten it
'id' => 'ID'
));
$this->addElement('center', true, 'Block', 'Flow', 'Common');
$this->addElement('dir', true, 'Block', 'Required: li', 'Common', array(
'compact' => 'Bool#compact'
));
$this->addElement('font', true, 'Inline', 'Inline', array('Core', 'I18N'), array(
'color' => 'Color',
'face' => 'Text', // extremely broad, we should
'size' => 'Text', // tighten it
));
$this->addElement('menu', true, 'Block', 'Required: li', 'Common', array(
'compact' => 'Bool#compact'
));
$this->addElement('s', true, 'Inline', 'Inline', 'Common');
$this->addElement('strike', true, 'Inline', 'Inline', 'Common');
$this->addElement('u', true, 'Inline', 'Inline', 'Common');
// setup modifications to old elements
foreach ($this->non_standalone_elements as $name) {
$this->info[$name] = new HTMLPurifier_ElementDef();
$this->info[$name]->standalone = false;
$align = 'Enum#left,right,center,justify';
$address =& $this->addBlankElement('address');
$address->content_model = 'Inline | #PCDATA | p';
$address->content_model_type = 'optional';
$address->child = false;
$blockquote =& $this->addBlankElement('blockquote');
$blockquote->content_model = 'Flow | #PCDATA';
$blockquote->content_model_type = 'optional';
$blockquote->child = false;
$br =& $this->addBlankElement('br');
$br->attr['clear'] = 'Enum#left,all,right,none';
$caption =& $this->addBlankElement('caption');
$caption->attr['align'] = 'Enum#top,bottom,left,right';
$div =& $this->addBlankElement('div');
$div->attr['align'] = $align;
$dl =& $this->addBlankElement('dl');
$dl->attr['compact'] = 'Bool#compact';
for ($i = 1; $i <= 6; $i++) {
$h =& $this->addBlankElement("h$i");
$h->attr['align'] = $align;
}
$this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
$this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
$hr =& $this->addBlankElement('hr');
$hr->attr['align'] = $align;
$hr->attr['noshade'] = 'Bool#noshade';
$hr->attr['size'] = 'Pixels';
$hr->attr['width'] = 'Length';
$this->info['address']->content_model = 'Inline | #PCDATA | p';
$this->info['address']->content_model_type = 'optional';
$this->info['address']->child = false;
$img =& $this->addBlankElement('img');
$img->attr['align'] = 'Enum#top,middle,bottom,left,right';
$img->attr['border'] = 'Pixels';
$img->attr['hspace'] = 'Pixels';
$img->attr['vspace'] = 'Pixels';
$this->info['blockquote']->content_model = 'Flow | #PCDATA';
$this->info['blockquote']->content_model_type = 'optional';
$this->info['blockquote']->child = false;
// figure out this integer business
$li =& $this->addBlankElement('li');
$li->attr['value'] = new HTMLPurifier_AttrDef_Integer();
$li->attr['type'] = 'Enum#s:1,i,I,a,A,disc,square,circle';
$ol =& $this->addBlankElement('ol');
$ol->attr['compact'] = 'Bool#compact';
$ol->attr['start'] = new HTMLPurifier_AttrDef_Integer();
$ol->attr['type'] = 'Enum#s:1,i,I,a,A';
$p =& $this->addBlankElement('p');
$p->attr['align'] = $align;
$pre =& $this->addBlankElement('pre');
$pre->attr['width'] = 'Number';
// script omitted
$table =& $this->addBlankElement('table');
$table->attr['align'] = 'Enum#left,center,right';
$table->attr['bgcolor'] = 'Color';
$tr =& $this->addBlankElement('tr');
$tr->attr['bgcolor'] = 'Color';
$th =& $this->addBlankElement('th');
$th->attr['bgcolor'] = 'Color';
$th->attr['height'] = 'Length';
$th->attr['nowrap'] = 'Bool#nowrap';
$th->attr['width'] = 'Length';
$td =& $this->addBlankElement('td');
$td->attr['bgcolor'] = 'Color';
$td->attr['height'] = 'Length';
$td->attr['nowrap'] = 'Bool#nowrap';
$td->attr['width'] = 'Length';
$ul =& $this->addBlankElement('ul');
$ul->attr['compact'] = 'Bool#compact';
$ul->attr['type'] = 'Enum#square,disc,circle';
}
}
?>

View File

@ -9,7 +9,6 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
{
var $name = 'List';
var $elements = array('dl', 'dt', 'dd', 'ol', 'ul', 'li');
// According to the abstract schema, the List content set is a fully formed
// one or more expr, but it invariably occurs in an optional declaration
@ -19,28 +18,19 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
// Furthermore, the actual XML Schema may disagree. Regardless,
// we don't have support for such nested expressions without using
// the incredibly inefficient and draconic Custom ChildDef.
var $content_sets = array('List' => 'dl | ol | ul', 'Flow' => 'List');
var $content_sets = array('Flow' => 'List');
function HTMLPurifier_HTMLModule_List() {
foreach ($this->elements as $element) {
$this->info[$element] = new HTMLPurifier_ElementDef();
$this->info[$element]->attr = array(0 => array('Common'));
if ($element == 'li' || $element == 'dd') {
$this->info[$element]->content_model = '#PCDATA | Flow';
$this->info[$element]->content_model_type = 'optional';
} elseif ($element == 'ol' || $element == 'ul') {
$this->info[$element]->content_model = 'li';
$this->info[$element]->content_model_type = 'required';
}
}
$this->info['dt']->content_model = '#PCDATA | Inline';
$this->info['dt']->content_model_type = 'optional';
$this->info['dl']->content_model = 'dt | dd';
$this->info['dl']->content_model_type = 'required';
// this could be a LOT more robust
$this->info['li']->auto_close = array('li' => true);
$this->addElement('ol', true, 'List', 'Required: li', 'Common');
$this->addElement('ul', true, 'List', 'Required: li', 'Common');
$this->addElement('dl', true, 'List', 'Required: dt | dd', 'Common');
$this->addElement('li', true, false, 'Flow', 'Common');
$this->addElement('dd', true, false, 'Flow', 'Common');
$this->addElement('dt', true, false, 'Inline', 'Common');
}
}
?>

View File

@ -0,0 +1,15 @@
<?php
require_once 'HTMLPurifier/HTMLModule.php';
class HTMLPurifier_HTMLModule_NonXMLCommonAttributes extends HTMLPurifier_HTMLModule
{
var $name = 'NonXMLCommonAttributes';
var $attr_collections = array(
'Lang' => array(
'lang' => 'LanguageCode',
)
);
}

Some files were not shown because too many files have changed in this diff Show More