1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-05 05:37:49 +02:00

Commit strict version of HTML Purifier.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk-strict@647 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2007-01-16 21:59:29 +00:00
parent a909632d2d
commit 2bf912d528
45 changed files with 1022 additions and 153 deletions

View File

@@ -8,6 +8,11 @@ require_once 'HTMLPurifier/CSSDefinition.php';
* @note We don't implement the whole CSS specification, so it might be
* difficult to reuse this component in the context of validating
* actual stylesheet declarations.
* @note If we were really serious about validating the CSS, we would
* tokenize the styles and then parse the tokens. Obviously, we
* are not doing that. Doing that could seriously harm performance,
* but would make these components a lot more viable for a CSS
* filtering solution.
*/
class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
{
@@ -20,6 +25,9 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
// we're going to break the spec and explode by semicolons.
// This is because semicolon rarely appears in escaped form
// Doing this is generally flaky but fast
// IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI
// for details
$declarations = explode(';', $css);
$propvalues = array();

View File

@@ -0,0 +1,58 @@
<?php
require_once 'HTMLPurifier/AttrDef/URI.php';
/**
* Validates a URI in CSS syntax, which uses url('http://example.com')
* @note While theoretically speaking we a URI in a CSS document could
* be non-embedded, as of CSS2 there is no such usage so we're
* generalizing it. This may need to be changed in the future.
* @warning Since HTMLPurifier_AttrDef_CSS blindly uses semicolons as
* the separator, you cannot put a literal semicolon in
* in the URI. Try percent encoding it, in that case.
*/
class HTMLPurifier_AttrDef_CSSURI extends HTMLPurifier_AttrDef_URI
{
function HTMLPurifier_AttrDef_CSSURI() {
$this->HTMLPurifier_AttrDef_URI(true); // always embedded
}
function validate($uri_string, $config, &$context) {
// parse the URI out of the string and then pass it onto
// the parent object
$uri_string = $this->parseCDATA($uri_string);
if (strpos($uri_string, 'url(') !== 0) return false;
$uri_string = substr($uri_string, 4);
$new_length = strlen($uri_string) - 1;
if ($uri_string[$new_length] != ')') return false;
$uri = trim(substr($uri_string, 0, $new_length));
if (isset($uri[0]) && ($uri[0] == "'" || $uri[0] == '"')) {
$quote = $uri[0];
$new_length = strlen($uri) - 1;
if ($uri[$new_length] !== $quote) return false;
$uri = substr($uri, 1, $new_length - 1);
}
$keys = array( '(', ')', ',', ' ', '"', "'");
$values = array('\\(', '\\)', '\\,', '\\ ', '\\"', "\\'");
$uri = str_replace($values, $keys, $uri);
$result = parent::validate($uri, $config, $context);
if ($result === false) return false;
// escape necessary characters according to CSS spec
// except for the comma, none of these should appear in the
// URI at all
$result = str_replace($keys, $values, $result);
return "url($result)";
}
}
?>

View File

@@ -4,8 +4,7 @@ require_once 'HTMLPurifier/AttrDef.php';
/**
* Validates shorthand CSS property list-style.
* @note This currently does not support list-style-image, as that functionality
* is not implemented yet elsewhere.
* @warning Does not support url tokens that have internal spaces.
*/
class HTMLPurifier_AttrDef_ListStyle extends HTMLPurifier_AttrDef
{
@@ -20,6 +19,7 @@ class HTMLPurifier_AttrDef_ListStyle extends HTMLPurifier_AttrDef
$def = $config->getCSSDefinition();
$this->info['list-style-type'] = $def->info['list-style-type'];
$this->info['list-style-position'] = $def->info['list-style-position'];
$this->info['list-style-image'] = $def->info['list-style-image'];
}
function validate($string, $config, &$context) {
@@ -28,48 +28,49 @@ class HTMLPurifier_AttrDef_ListStyle extends HTMLPurifier_AttrDef
$string = $this->parseCDATA($string);
if ($string === '') return false;
// assumes URI doesn't have spaces in it
$bits = explode(' ', strtolower($string)); // bits to process
$caught_type = false;
$caught_position = false;
$caught_none = false; // as in keyword none, which is in all of them
$caught = array();
$caught['type'] = false;
$caught['position'] = false;
$caught['image'] = false;
$ret = '';
$i = 0; // number of catches
$none = false;
foreach ($bits as $bit) {
if ($caught_none && ($caught_type || $caught_position)) break;
if ($caught_type && $caught_position) break;
if ($i >= 3) return; // optimization bit
if ($bit === '') continue;
if ($bit === 'none') {
if ($caught_none) continue;
$caught_none = true;
$ret .= 'none ';
continue;
}
// if we add anymore, roll it into a loop
$r = $this->info['list-style-type']->validate($bit, $config, $context);
if ($r !== false) {
if ($caught_type) continue;
$caught_type = true;
$ret .= $r . ' ';
continue;
}
$r = $this->info['list-style-position']->validate($bit, $config, $context);
if ($r !== false) {
if ($caught_position) continue;
$caught_position = true;
$ret .= $r . ' ';
continue;
foreach ($caught as $key => $status) {
if ($status !== false) continue;
$r = $this->info['list-style-' . $key]->validate($bit, $config, $context);
if ($r === false) continue;
if ($r === 'none') {
if ($none) continue;
else $none = true;
if ($key == 'image') continue;
}
$caught[$key] = $r;
$i++;
}
}
$ret = rtrim($ret);
return $ret ? $ret : false;
if (!$i) return false;
$ret = array();
// construct type
if ($caught['type']) $ret[] = $caught['type'];
// construct image
if ($caught['image']) $ret[] = $caught['image'];
// construct position
if ($caught['position']) $ret[] = $caught['position'];
if (empty($ret)) return false;
return implode(' ', $ret);
}

View File

@@ -139,10 +139,10 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
// no need to validate the scheme's fmt since we do that when we
// retrieve the specific scheme object from the registry
$scheme = ctype_lower($scheme) ? $scheme : strtolower($scheme);
$scheme_obj =& $registry->getScheme($scheme, $config, $context);
$scheme_obj = $registry->getScheme($scheme, $config, $context);
if (!$scheme_obj) return false; // invalid scheme, clean it out
} else {
$scheme_obj =& $registry->getScheme(
$scheme_obj = $registry->getScheme(
$config->get('URI', 'DefaultScheme'), $config, $context
);
}

View File

@@ -20,7 +20,7 @@ HTMLPurifier_ConfigSchema::defineAllowedValues(
class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform
{
function transform($attr, $config, $context) {
function transform($attr, $config, &$context) {
if (isset($attr['dir'])) return $attr;
$attr['dir'] = $config->get('Attr', 'DefaultTextDir');
return $attr;

View File

@@ -25,7 +25,7 @@ HTMLPurifier_ConfigSchema::define(
class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
{
function transform($attr, $config, $context) {
function transform($attr, $config, &$context) {
$src = true;
if (!isset($attr['src'])) {

View File

@@ -10,7 +10,7 @@ require_once 'HTMLPurifier/AttrTransform.php';
class HTMLPurifier_AttrTransform_Lang extends HTMLPurifier_AttrTransform
{
function transform($attr, $config, $context) {
function transform($attr, $config, &$context) {
$lang = isset($attr['lang']) ? $attr['lang'] : false;
$xml_lang = isset($attr['xml:lang']) ? $attr['xml:lang'] : false;

View File

@@ -8,7 +8,7 @@ require_once 'HTMLPurifier/AttrTransform.php';
class HTMLPurifier_AttrTransform_TextAlign
extends HTMLPurifier_AttrTransform {
function transform($attr, $config, $context) {
function transform($attr, $config, &$context) {
if (!isset($attr['align'])) return $attr;

View File

@@ -11,6 +11,7 @@ require_once 'HTMLPurifier/AttrDef/FontFamily.php';
require_once 'HTMLPurifier/AttrDef/Font.php';
require_once 'HTMLPurifier/AttrDef/Border.php';
require_once 'HTMLPurifier/AttrDef/ListStyle.php';
require_once 'HTMLPurifier/AttrDef/CSSURI.php';
/**
* Defines allowed CSS attributes and what their values are.
@@ -51,11 +52,19 @@ class HTMLPurifier_CSSDefinition
$this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum(
array('normal', 'small-caps'), false);
$uri_or_none = new HTMLPurifier_AttrDef_Composite(
array(
new HTMLPurifier_AttrDef_Enum(array('none')),
new HTMLPurifier_AttrDef_CSSURI()
)
);
$this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum(
array('inside', 'outside'), false);
$this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum(
array('disc', 'circle', 'square', 'decimal', 'lower-roman',
'upper-roman', 'lower-alpha', 'upper-alpha'), false);
'upper-roman', 'lower-alpha', 'upper-alpha', 'none'), false);
$this->info['list-style-image'] = $uri_or_none;
$this->info['list-style'] = new HTMLPurifier_AttrDef_ListStyle($config);
@@ -63,13 +72,15 @@ class HTMLPurifier_CSSDefinition
array('capitalize', 'uppercase', 'lowercase', 'none'), false);
$this->info['color'] = new HTMLPurifier_AttrDef_Color();
// technically speaking, this one should get its own validator, but
// since we don't support background images, it effectively is
// equivalent to color. The only trouble is that if the author
// specifies an image and a color, they'll both end up getting dropped,
// even though we ought to implement it and just discard the image
// info. This will be fixed in a later version (see TODO) when
// better URI filtering is implemented.
$this->info['background-image'] = $uri_or_none;
$this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum(
array('repeat', 'repeat-x', 'repeat-y', 'no-repeat')
);
$this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum(
array('scroll', 'fixed')
);
// pending its own validator as a shorthand
$this->info['background'] =
$border_color =

View File

@@ -51,8 +51,8 @@ class HTMLPurifier_Config
* an array of directives based on loadArray().
* @return Configured HTMLPurifier_Config object
*/
function create($config) {
if (is_a($config, 'HTMLPurifier_Config')) return $config;
static function create($config) {
if ($config instanceof HTMLPurifier_Config) return $config;
$ret = HTMLPurifier_Config::createDefault();
if (is_array($config)) $ret->loadArray($config);
return $ret;
@@ -62,7 +62,7 @@ class HTMLPurifier_Config
* Convenience constructor that creates a default configuration object.
* @return Default HTMLPurifier_Config object.
*/
function createDefault() {
static function createDefault() {
$definition =& HTMLPurifier_ConfigSchema::instance();
$config = new HTMLPurifier_Config($definition);
return $config;

View File

@@ -68,7 +68,7 @@ class HTMLPurifier_ConfigSchema {
/**
* Retrieves an instance of the application-wide configuration definition.
*/
function &instance($prototype = null) {
static function &instance($prototype = null) {
static $instance;
if ($prototype !== null) {
$instance = $prototype;
@@ -89,7 +89,7 @@ class HTMLPurifier_ConfigSchema {
* HTMLPurifier_DirectiveDef::$type for allowed values
* @param $description Description of directive for documentation
*/
function define(
static function define(
$namespace, $name, $default, $type,
$description
) {
@@ -147,7 +147,7 @@ class HTMLPurifier_ConfigSchema {
* @param $namespace Namespace's name
* @param $description Description of the namespace
*/
function defineNamespace($namespace, $description) {
static function defineNamespace($namespace, $description) {
$def =& HTMLPurifier_ConfigSchema::instance();
if (isset($def->info[$namespace])) {
trigger_error('Cannot redefine namespace', E_USER_ERROR);
@@ -174,7 +174,7 @@ class HTMLPurifier_ConfigSchema {
* @param $alias Name of aliased value
* @param $real Value aliased value will be converted into
*/
function defineValueAliases($namespace, $name, $aliases) {
static function defineValueAliases($namespace, $name, $aliases) {
$def =& HTMLPurifier_ConfigSchema::instance();
if (!isset($def->info[$namespace][$name])) {
trigger_error('Cannot set value alias for non-existant directive',
@@ -204,7 +204,7 @@ class HTMLPurifier_ConfigSchema {
* @param $name Name of directive
* @param $allowed_values Arraylist of allowed values
*/
function defineAllowedValues($namespace, $name, $allowed_values) {
static function defineAllowedValues($namespace, $name, $allowed_values) {
$def =& HTMLPurifier_ConfigSchema::instance();
if (!isset($def->info[$namespace][$name])) {
trigger_error('Cannot define allowed values for undefined directive',
@@ -305,7 +305,7 @@ class HTMLPurifier_ConfigSchema {
*/
function isError($var) {
if (!is_object($var)) return false;
if (!is_a($var, 'HTMLPurifier_Error')) return false;
if (!($var instanceof HTMLPurifier_Error)) return false;
return true;
}
}

View File

@@ -67,7 +67,7 @@ class HTMLPurifier_Encoder
* would need that, and I'm probably not going to implement them.
* Once again, PHP 6 should solve all our problems.
*/
function cleanUTF8($str, $force_php = false) {
static function cleanUTF8($str, $force_php = false) {
static $non_sgml_chars = array();
if (empty($non_sgml_chars)) {
@@ -249,7 +249,7 @@ class HTMLPurifier_Encoder
// | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes
// +----------+----------+----------+----------+
function unichr($code) {
static function unichr($code) {
if($code > 1114111 or $code < 0 or
($code >= 55296 and $code <= 57343) ) {
// bits are set outside the "valid" range as defined

View File

@@ -28,7 +28,7 @@ class HTMLPurifier_EntityLookup {
* Retrieves sole instance of the object.
* @param Optional prototype of custom lookup table to overload with.
*/
function instance($prototype = false) {
static function instance($prototype = false) {
// no references, since PHP doesn't copy unless modified
static $instance = null;
if ($prototype) {

View File

@@ -300,9 +300,6 @@ class HTMLPurifier_HTMLDefinition
$this->info['b']->child =
$this->info['big']->child =
$this->info['small']->child=
$this->info['u']->child =
$this->info['s']->child =
$this->info['strike']->child =
$this->info['bdo']->child =
$this->info['span']->child =
$this->info['dt']->child =
@@ -314,6 +311,12 @@ class HTMLPurifier_HTMLDefinition
$this->info['h5']->child =
$this->info['h6']->child = $e_Inline;
if (!$this->strict) {
$this->info['u']->child =
$this->info['s']->child =
$this->info['strike']->child = $e_Inline;
}
// the only three required definitions, besides custom table code
$this->info['ol']->child =
$this->info['ul']->child = new HTMLPurifier_ChildDef_Required('li');
@@ -355,10 +358,12 @@ class HTMLPurifier_HTMLDefinition
// reuses $e_Inline and $e_Block
foreach ($e_Inline->elements as $name => $bool) {
if ($name == '#PCDATA') continue;
if (!isset($this->info[$name])) continue;
$this->info[$name]->type = 'inline';
}
foreach ($e_Block->elements as $name => $bool) {
if (!isset($this->info[$name])) continue;
$this->info[$name]->type = 'block';
}
@@ -531,7 +536,7 @@ class HTMLPurifier_HTMLDefinition
// protect against stdclasses floating around
foreach ($this->info as $key => $obj) {
if (is_a($obj, 'stdclass')) {
if ($obj instanceof stdClass) {
unset($this->info[$key]);
}
}

View File

@@ -145,7 +145,7 @@ class HTMLPurifier_Lexer
* @param $prototype Optional prototype lexer.
* @return Concrete lexer.
*/
function create($prototype = null) {
static function create($prototype = null) {
// we don't really care if it's a reference or a copy
static $lexer = null;
if ($prototype) {
@@ -170,7 +170,7 @@ class HTMLPurifier_Lexer
* @param $string HTML string to process.
* @returns HTML with CDATA sections escaped.
*/
function escapeCDATA($string) {
static function escapeCDATA($string) {
return preg_replace_callback(
'/<!\[CDATA\[(.+?)\]\]>/',
array('HTMLPurifier_Lexer', 'CDATACallback'),
@@ -187,7 +187,7 @@ class HTMLPurifier_Lexer
* and 1 the inside of the CDATA section.
* @returns Escaped internals of the CDATA section.
*/
function CDATACallback($matches) {
static function CDATACallback($matches) {
// not exactly sure why the character set is needed, but whatever
return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8');
}

View File

@@ -88,6 +88,11 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
} elseif ($node->nodeType === XML_COMMENT_NODE) {
$tokens[] = $this->factory->createComment($node->data);
return;
} elseif (
// not-well tested: there may be other nodes we have to grab
$node->nodeType !== XML_ELEMENT_NODE
) {
return;
}
$attr = $node->hasAttributes() ?

View File

@@ -37,7 +37,7 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
$string = $this->normalize($string, $config, $context);
$parser=& new XML_HTMLSax3();
$parser= new XML_HTMLSax3();
$parser->set_object($this);
$parser->set_element_handler('openHandler','closeHandler');
$parser->set_data_handler('dataHandler');

View File

@@ -10,10 +10,10 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
*/
var $def;
function render(&$config) {
function render($config) {
$ret = '';
$this->config =& $config;
$this->def =& $config->getHTMLDefinition();
$this->def = $config->getHTMLDefinition();
$def =& $this->def;
$ret .= $this->start('div', array('class' => 'HTMLPurifier_Printer'));

View File

@@ -37,7 +37,7 @@ class HTMLPurifier_URISchemeRegistry
* @note Pass a registry object $prototype with a compatible interface and
* the function will copy it and return it all further times.
*/
function &instance($prototype = null) {
static function &instance($prototype = null) {
static $instance = null;
if ($prototype !== null) {
$instance = $prototype;