1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-07-30 19:00:10 +02:00

Revamp configuration files so that more rules can be added, internal organization is more logical, and descriptions are captured.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@327 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2006-08-27 18:49:16 +00:00
parent 0d4ee2ba37
commit 24cde9c891
15 changed files with 600 additions and 54 deletions

View File

@@ -6,7 +6,7 @@ require_once 'HTMLPurifier/URISchemeRegistry.php';
require_once 'HTMLPurifier/AttrDef/Host.php';
HTMLPurifier_ConfigDef::define(
'URI', 'DefaultScheme', 'http',
'URI', 'DefaultScheme', 'http', 'string',
'Defines through what scheme the output will be served, in order to '.
'select the proper object validator when no scheme information is present.'
);

View File

@@ -5,11 +5,14 @@ require_once 'HTMLPurifier/AttrTransform.php';
// this MUST be placed in post, as it assumes that any value in dir is valid
HTMLPurifier_ConfigDef::define(
'Attr', 'DefaultTextDir', 'ltr',
'Attr', 'DefaultTextDir', 'ltr', 'string',
'Defines the default text direction (ltr or rtl) of the document '.
'being parsed. This generally is the same as the value of the dir '.
'attribute in HTML, or ltr if that is not specified.'
);
HTMLPurifier_ConfigDef::defineAllowedValues(
'Attr', 'DefaultTextDir', array( 'ltr', 'rtl' )
);
/**
* Post-trasnform that ensures that bdo tags have the dir attribute set.

View File

@@ -5,7 +5,7 @@ require_once 'HTMLPurifier/AttrTransform.php';
// must be called POST validation
HTMLPurifier_ConfigDef::define(
'Attr', 'DefaultInvalidImage', '',
'Attr', 'DefaultInvalidImage', '', 'string',
'This is the default image an img tag will be pointed to if it does '.
'not have a valid src attribute. In future versions, we may allow the '.
'image tag to be removed completely, but due to design issues, this is '.
@@ -13,7 +13,7 @@ HTMLPurifier_ConfigDef::define(
);
HTMLPurifier_ConfigDef::define(
'Attr', 'DefaultInvalidImageAlt', 'Invalid image',
'Attr', 'DefaultInvalidImageAlt', 'Invalid image', 'string',
'This is the content of the alt tag of an invalid image if the user '.
'had not previously specified an alt attribute. It has no effect when the '.
'image is valid but there was no alt attribute present.'

View File

@@ -13,7 +13,7 @@
// in order to make it self correcting
HTMLPurifier_ConfigDef::define(
'Core', 'EscapeInvalidChildren', false,
'Core', 'EscapeInvalidChildren', false, 'bool',
'When true, a child is found that is not allowed in the context of the '.
'parent element will be transformed into text as if it were ASCII. When '.
'false, that element and all internal tags will be dropped, though text '.

View File

@@ -20,12 +20,18 @@ class HTMLPurifier_Config
*/
var $conf;
/**
* Reference HTMLPurifier_ConfigDef for value checking
*/
var $def;
/**
* @param $definition HTMLPurifier_ConfigDef that defines what directives
* are allowed.
*/
function HTMLPurifier_Config(&$definition) {
$this->conf = $definition->info; // set up the defaults
$this->conf = $definition->defaults; // set up, copy in defaults
$this->def = $definition; // keep a copy around for checking
}
/**
@@ -46,7 +52,7 @@ class HTMLPurifier_Config
function get($namespace, $key) {
if (!isset($this->conf[$namespace][$key])) {
trigger_error('Cannot retrieve value of undefined directive',
E_USER_ERROR);
E_USER_WARNING);
return;
}
return $this->conf[$namespace][$key];
@@ -61,7 +67,26 @@ class HTMLPurifier_Config
function set($namespace, $key, $value) {
if (!isset($this->conf[$namespace][$key])) {
trigger_error('Cannot set undefined directive to value',
E_USER_ERROR);
E_USER_WARNING);
return;
}
if (is_string($value)) {
// resolve value alias if defined
if (isset($this->def->info[$namespace][$key]->aliases[$value])) {
$value = $this->def->info[$namespace][$key]->aliases[$value];
}
if ($this->def->info[$namespace][$key]->allowed !== true) {
// check to see if the value is allowed
if (!isset($this->def->info[$namespace][$key]->allowed[$value])) {
trigger_error('Value not supported', E_USER_WARNING);
return;
}
}
}
$value = $this->def->validate($value,
$this->def->info[$namespace][$key]->type);
if ($value === null) {
trigger_error('Value is of invalid type', E_USER_WARNING);
return;
}
$this->conf[$namespace][$key] = $value;

View File

@@ -7,11 +7,36 @@
class HTMLPurifier_ConfigDef {
/**
* Currently defined directives (and namespaces).
* Defaults of the directives and namespaces.
* @note This shares the exact same structure as HTMLPurifier_Config::$conf
*/
var $defaults = array();
/**
* Definition of the directives.
*/
var $info = array();
/**
* Definition of namespaces.
*/
var $info_namespace = array();
/**
* Lookup table of allowed types.
*/
var $types = array(
'string' => true,
'istring' => true,
'int' => true,
'float' => true,
'bool' => true,
'lookup' => true,
'list' => true,
'hash' => true,
'mixed' => true
);
/**
* Initializes the default namespaces.
*/
@@ -44,9 +69,14 @@ class HTMLPurifier_ConfigDef {
* @param $namespace Namespace the directive is in
* @param $name Key of directive
* @param $default Default value of directive
* @param $type Allowed type of the directive. See
* HTMLPurifier_DirectiveDef::$type for allowed values
* @param $description Description of directive for documentation
*/
function define($namespace, $name, $default, $description) {
function define(
$namespace, $name, $default, $type,
$description
) {
$def =& HTMLPurifier_ConfigDef::instance();
if (!isset($def->info[$namespace])) {
trigger_error('Cannot define directive for undefined namespace',
@@ -54,11 +84,33 @@ class HTMLPurifier_ConfigDef {
return;
}
if (isset($def->info[$namespace][$name])) {
// this behavior is at risk of change
trigger_error('Cannot redefine directive', E_USER_ERROR);
return;
if (
$def->info[$namespace][$name]->type !== $type ||
$def->defaults[$namespace][$name] !== $default
) {
trigger_error('Inconsistent default or type, cannot redefine');
return;
}
} else {
if (!isset($def->types[$type])) {
trigger_error('Invalid type for configuration directive',
E_USER_ERROR);
return;
}
if ($def->validate($default, $type) === null) {
trigger_error('Default value does not match directive type',
E_USER_ERROR);
return;
}
$def->info[$namespace][$name] =
new HTMLPurifier_ConfigEntity_Directive();
$def->info[$namespace][$name]->type = $type;
$def->defaults[$namespace][$name] = $default;
}
$def->info[$namespace][$name] = $default;
$backtrace = debug_backtrace();
$file = $def->mungeFilename($backtrace[0]['file']);
$line = $backtrace[0]['line'];
$def->info[$namespace][$name]->addDescription($file,$line,$description);
}
/**
@@ -73,8 +125,187 @@ class HTMLPurifier_ConfigDef {
return;
}
$def->info[$namespace] = array();
$def->info_namespace[$namespace] = new HTMLPurifier_ConfigEntity_Namespace();
$backtrace = debug_backtrace();
$file = $def->mungeFilename($backtrace[0]['file']);
$line = $backtrace[0]['line'];
$def->info_namespace[$namespace]->addDescription($file,$line,$description);
$def->defaults[$namespace] = array();
}
/**
* Defines a directive value alias.
*
* Directive value aliases are convenient for developers because it lets
* them set a directive to several values and get the same result.
* @param $namespace Directive's namespace
* @param $name Name of Directive
* @param $alias Name of aliased value
* @param $real Value aliased value will be converted into
*/
function defineValueAliases($namespace, $name, $aliases) {
$def =& HTMLPurifier_ConfigDef::instance();
if (!isset($def->info[$namespace][$name])) {
trigger_error('Cannot set value alias for non-existant directive',
E_USER_ERROR);
return;
}
foreach ($aliases as $alias => $real) {
if (!$def->info[$namespace][$name] !== true &&
!isset($def->info[$namespace][$name]->allowed[$real])
) {
trigger_error('Cannot define alias to value that is not allowed',
E_USER_ERROR);
return;
}
if (isset($def->info[$namespace][$name]->allowed[$alias])) {
trigger_error('Cannot define alias over allowed value',
E_USER_ERROR);
return;
}
$def->info[$namespace][$name]->aliases[$alias] = $real;
}
}
/**
* Defines a set of allowed values for a directive.
* @param $namespace Namespace of directive
* @param $name Name of directive
* @param $allowed_values Arraylist of allowed values
*/
function defineAllowedValues($namespace, $name, $allowed_values) {
$def =& HTMLPurifier_ConfigDef::instance();
if (!isset($def->info[$namespace][$name])) {
trigger_error('Cannot define allowed values for undefined directive',
E_USER_ERROR);
return;
}
if ($def->info[$namespace][$name]->allowed === true) {
$def->info[$namespace][$name]->allowed = array();
}
foreach ($allowed_values as $value) {
$def->info[$namespace][$name]->allowed[$value] = true;
}
}
/**
* Validate a variable according to type. Return null if invalid.
*/
function validate($var, $type) {
if (!isset($this->types[$type])) {
trigger_error('Invalid type', E_USER_ERROR);
return;
}
switch ($type) {
case 'mixed':
return $var;
case 'istring':
case 'string':
if (!is_string($var)) return;
if ($type === 'istring') $var = strtolower($var);
return $var;
case 'int':
if (is_string($var) && ctype_digit($var)) $var = (int) $var;
elseif (!is_int($var)) return;
return $var;
case 'float':
if (is_string($var) && is_numeric($var)) $var = (float) $var;
elseif (!is_float($var)) return;
return $var;
case 'bool':
if (is_int($var) && ($var === 0 || $var === 1)) {
$var = (bool) $var;
} elseif (!is_bool($var)) return;
return $var;
case 'list':
case 'hash':
case 'lookup':
if (!is_array($var)) return;
$keys = array_keys($var);
if ($keys === array_keys($keys)) {
if ($type == 'list') return $var;
elseif ($type == 'lookup') {
$new = array();
foreach ($var as $key) {
$new[$key] = true;
}
return $new;
} else return;
}
if ($type === 'lookup') {
foreach ($var as $key => $value) {
$var[$key] = true;
}
}
return $var;
}
}
function mungeFilename($filename) {
$offset = strrpos($filename, 'HTMLPurifier');
$filename = substr($filename, $offset);
$filename = str_replace('\\', '/', $filename);
return $filename;
}
}
/**
* Base class for configuration entity
*/
class HTMLPurifier_ConfigEntity
{
/**
* Plaintext descriptions of the configuration entity is. Organized by
* file and line number, so multiple descriptions are allowed.
*/
var $descriptions = array();
/**
* Adds a description to the array
*/
function addDescription($file, $line, $description) {
if (!isset($this->descriptions[$file])) $this->descriptions[$file] = array();
$this->descriptions[$file][$line] = $description;
}
}
/**
* Structure object describing of a namespace
*/
class HTMLPurifier_ConfigEntity_Namespace extends HTMLPurifier_ConfigEntity {}
/**
* Structure object containing definition of a directive.
* @note This structure does not contain default values
*/
class HTMLPurifier_ConfigEntity_Directive extends HTMLPurifier_ConfigEntity
{
/**
* Hash of value aliases, i.e. values that are equivalent.
*/
var $aliases = array();
/**
* Lookup table of allowed values of the element, bool true if all allowed.
*/
var $allowed = true;
/**
* Allowed type of the directive. Values are:
* - string
* - istring (case insensitive string)
* - int
* - float
* - bool
* - lookup (array of value => true)
* - list (regular numbered index array)
* - hash (array of key => value)
* - mixed (anything goes)
*/
var $type = 'mixed';
}
?>

View File

@@ -5,7 +5,7 @@
require_once 'HTMLPurifier/Lexer.php';
HTMLPurifier_ConfigDef::define(
'Core', 'CleanUTF8DuringGeneration', false,
'Core', 'CleanUTF8DuringGeneration', false, 'bool',
'When true, HTMLPurifier_Generator will also check all strings it '.
'escapes for UTF-8 well-formedness as a defense in depth measure. '.
'This could cause a considerable performance impact, and is not '.

View File

@@ -3,7 +3,7 @@
require_once 'HTMLPurifier/Token.php';
HTMLPurifier_ConfigDef::define(
'Core', 'AcceptFullDocuments', true,
'Core', 'AcceptFullDocuments', true, 'bool',
'This parameter determines whether or not the filter should accept full '.
'HTML documents, not just HTML fragments. When on, it will '.
'drop all sections except the content between body. Depending on '.

View File

@@ -9,7 +9,7 @@
*/
HTMLPurifier_ConfigDef::define(
'Core', 'EscapeInvalidTags', false,
'Core', 'EscapeInvalidTags', false, 'bool',
'When true, invalid tags will be written back to the document as plain '.
'text. Otherwise, they are silently dropped.'
);

View File

@@ -7,7 +7,7 @@ require_once 'HTMLPurifier/ConfigDef.php';
require_once 'HTMLPurifier/AttrContext.php';
HTMLPurifier_ConfigDef::define(
'Attr', 'IDBlacklist', array(),
'Attr', 'IDBlacklist', array(), 'list',
'Array of IDs not allowed in the document.');
/**

View File

@@ -11,13 +11,13 @@ HTMLPurifier_ConfigDef::define(
// for Usenet, these two are similar, but distinct
'nntp' => true, // individual Netnews articles
'news' => true // newsgroup or individual Netnews articles),
),
), 'lookup',
'Whitelist that defines the schemes that a URI is allowed to have. This '.
'prevents XSS attacks from using pseudo-schemes like javascript or mocha.'
);
HTMLPurifier_ConfigDef::define(
'URI', 'OverrideAllowedSchemes', true,
'URI', 'OverrideAllowedSchemes', true, 'bool',
'If this is set to true (which it is by default), you can override '.
'%URI.AllowedSchemes by simply registering a HTMLPurifier_URIScheme '.
'to the registry. If false, you will also have to update that directive '.