1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-02 20:27:40 +02:00

Compare commits

...

2 Commits

Author SHA1 Message Date
Edward Z. Yang
3b979ee846 Merged revisions for 1.3.1 release into branch, with local modifications to keep NEWS items in present.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/1.3@604 48356398-32a2-884e-a903-53898d9a118a
2006-12-06 23:19:59 +00:00
Edward Z. Yang
d151ffd9e6 Create 1.3 release series.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/1.3@590 48356398-32a2-884e-a903-53898d9a118a
2006-11-26 23:30:22 +00:00
19 changed files with 149 additions and 72 deletions

6
NEWS
View File

@@ -9,6 +9,12 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. Internal change
==========================
1.3.1, released 2006-12-06
! Added HTMLPurifier.func.php stub for a convenient function to call the library
- Fixed bug in RemoveInvalidImg code that caused all images to be dropped
(thanks to .mario for reporting this)
. Standardized all attribute handling variables to attr, made it plural
1.3.0, released 2006-11-26
# Invalid images are now removed, rather than replaced with a dud
<img src="" alt="Invalid image" />. Previous behavior can be restored

1
TODO
View File

@@ -84,6 +84,7 @@ Requested
3. Extend the tag exclusion system to specify whether or not the
contents should be dropped or not (currently, there's code that could do
something like this if it didn't drop the inner text too.)
- Accept array input, by iterating and purifying all of the items
Wontfix
- Non-lossy smart alternate character encoding transformations (unless

View File

@@ -54,8 +54,9 @@ help you find the correct functionality more quickly. Here they are:</p>
abbreviated version is more readable than the full version. Here, we
list common abbreviations:
<ul>
<li>Attr(s) to Attribute(s)</li>
<li>Attr to Attributes (note that it is plural, i.e. <code>$attr = array()</code>)</li>
<li>Def to Definition</li>
<li><code>$ret</code> is the value to be returned in a function</li>
</ul>
</dd>

View File

@@ -0,0 +1,21 @@
<?php
/**
* Function wrapper for HTML Purifier for quick use.
* @note This function only includes the library when it is called. While
* this is efficient for instances when you only use HTML Purifier
* on a few of your pages, it murders bytecode caching. You still
* need to add HTML Purifier to your path.
*/
function HTMLPurifier($html, $config = null) {
static $purifier = false;
if (!$purifier) {
$init = true;
require_once 'HTMLPurifier.php';
$purifier = new HTMLPurifier();
}
return $purifier->purify($html, $config);
}
?>

View File

@@ -22,7 +22,7 @@
*/
/*
HTML Purifier 1.3.0 - Standards Compliant HTML Filtering
HTML Purifier 1.3.1 - Standards Compliant HTML Filtering
Copyright (C) 2006 Edward Z. Yang
This library is free software; you can redistribute it and/or

View File

@@ -21,7 +21,7 @@ class HTMLPurifier_AttrTransform
* Abstract: makes changes to the attributes dependent on multiple values.
*
* @param $attr Assoc array of attributes, usually from
* HTMLPurifier_Token_Tag::$attributes
* HTMLPurifier_Token_Tag::$attr
* @param $config Mandatory HTMLPurifier_Config object.
* @param $context Mandatory HTMLPurifier_Context object
* @returns Processed attribute array.

View File

@@ -104,14 +104,14 @@ class HTMLPurifier_Generator
function generateFromToken($token) {
if (!isset($token->type)) return '';
if ($token->type == 'start') {
$attr = $this->generateAttributes($token->attributes);
$attr = $this->generateAttributes($token->attr);
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
} elseif ($token->type == 'end') {
return '</' . $token->name . '>';
} elseif ($token->type == 'empty') {
$attr = $this->generateAttributes($token->attributes);
$attr = $this->generateAttributes($token->attr);
return '<' . $token->name . ($attr ? ' ' : '') . $attr .
( $this->_xhtml ? ' /': '' )
. '>';

View File

@@ -72,7 +72,7 @@ HTMLPurifier_ConfigSchema::define(
'can overload it with your own list of tags to allow. Note that this '.
'method is subtractive: it does its job by taking away from HTML Purifier '.
'usual feature set, so you cannot add a tag that HTML Purifier never '.
'supported in the first place (like embed). If you change this, you '.
'supported in the first place (like embed, form or head). If you change this, you '.
'probably also want to change %HTML.AllowedAttributes. '.
'<strong>Warning:</strong> If another directive conflicts with the '.
'elements here, <em>that</em> directive will win and override. '.
@@ -561,18 +561,18 @@ class HTMLPurifier_HTMLDefinition
//////////////////////////////////////////////////////////////////////
// %HTML.Allowed(Elements|Attributes) : cut non-allowed elements
$allowed_elements = $config->get('HTML', 'AllowedElements');
if (is_array($allowed_elements)) {
// $allowed_elements[$this->info_parent] = true; // allow parent element
foreach ($this->info as $name => $d) {
if(!isset($allowed_elements[$name])) unset($this->info[$name]);
}
}
$allowed_attributes = $config->get('HTML', 'AllowedAttributes');
if (is_array($allowed_attributes)) {
foreach ($this->info_global_attr as $attr => $info) {
if (!isset($allowed_attributes["*.$attr"])) {
unset($this->info_global_attr[$attr]);
foreach ($this->info_global_attr as $attr_key => $info) {
if (!isset($allowed_attributes["*.$attr_key"])) {
unset($this->info_global_attr[$attr_key]);
}
}
foreach ($this->info as $tag => $info) {

View File

@@ -143,18 +143,18 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
)
);
if ($attribute_string) {
$attributes = $this->parseAttributeString(
$attribute_string
, $config, $context
);
$attr = $this->parseAttributeString(
$attribute_string
, $config, $context
);
} else {
$attributes = array();
$attr = array();
}
if ($is_self_closing) {
$array[] = new HTMLPurifier_Token_Empty($type, $attributes);
$array[] = new HTMLPurifier_Token_Empty($type, $attr);
} else {
$array[] = new HTMLPurifier_Token_Start($type, $attributes);
$array[] = new HTMLPurifier_Token_Start($type, $attr);
}
$cursor = $position_next_gt + 1;
$inside_tag = false;

View File

@@ -156,11 +156,14 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
}
$child_def = $def->child;
// have DTD child def validate children
$result = $child_def->validateChildren(
$child_tokens, $config, $context);
if (!empty($def->child)) {
// have DTD child def validate children
$result = $def->child->validateChildren(
$child_tokens, $config, $context);
} else {
// weird, no child definition, get rid of everything
$result = false;
}
// determine whether or not this element has any exclusions
$excludes = $def->excludes;

View File

@@ -30,7 +30,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$token->type == 'start' ) {
$result[] = new HTMLPurifier_Token_Empty($token->name,
$token->attributes);
$token->attr);
continue;
}
@@ -39,7 +39,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$token->type == 'empty' ) {
$result[] = new HTMLPurifier_Token_Start($token->name,
$token->attributes);
$token->attr);
$result[] = new HTMLPurifier_Token_End($token->name);
continue;

View File

@@ -38,7 +38,9 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
// hard-coded image special case, pre-emptively drop
// if not available. Probably not abstract-able
if ( $token->name == 'img' ) {
if (!isset($token->attr['src'])) continue;
if (!isset($token->attr['src'])) {
continue;
}
if (!isset($definition->info['img']->attr['src'])) {
continue;
}
@@ -46,7 +48,8 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
$definition->
info['img']->
attr['src']->
validate($token->attr['src']);
validate($token->attr['src'],
$config, $context);
if ($token->attr['src'] === false) continue;
}

View File

@@ -35,7 +35,7 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
if ($token->type !== 'start' && $token->type !== 'empty') continue;
// copy out attributes for easy manipulation
$attr = $token->attributes;
$attr = $token->attr;
// do global transformations (pre)
// nothing currently utilizes this
@@ -117,7 +117,7 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
// commit changes
// could interfere with flyweight implementation
$tokens[$key]->attributes = $attr;
$tokens[$key]->attr = $attr;
}
$context->destroy('IDAccumulator');

View File

@@ -62,16 +62,16 @@ class HTMLPurifier_TagTransform_Center extends HTMLPurifier_TagTransform
$new_tag = new HTMLPurifier_Token_End($this->transform_to);
return $new_tag;
}
$attributes = $tag->attributes;
$attr = $tag->attr;
$prepend_css = 'text-align:center;';
if (isset($attributes['style'])) {
$attributes['style'] = $prepend_css . $attributes['style'];
if (isset($attr['style'])) {
$attr['style'] = $prepend_css . $attr['style'];
} else {
$attributes['style'] = $prepend_css;
$attr['style'] = $prepend_css;
}
$new_tag = $tag->copy();
$new_tag->name = $this->transform_to;
$new_tag->attributes = $attributes;
$new_tag->attr = $attr;
return $new_tag;
}
}
@@ -115,39 +115,39 @@ class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
return $new_tag;
}
$attributes = $tag->attributes;
$attr = $tag->attr;
$prepend_style = '';
// handle color transform
if (isset($attributes['color'])) {
$prepend_style .= 'color:' . $attributes['color'] . ';';
unset($attributes['color']);
if (isset($attr['color'])) {
$prepend_style .= 'color:' . $attr['color'] . ';';
unset($attr['color']);
}
// handle face transform
if (isset($attributes['face'])) {
$prepend_style .= 'font-family:' . $attributes['face'] . ';';
unset($attributes['face']);
if (isset($attr['face'])) {
$prepend_style .= 'font-family:' . $attr['face'] . ';';
unset($attr['face']);
}
// handle size transform
if (isset($attributes['size'])) {
if (isset($this->_size_lookup[$attributes['size']])) {
if (isset($attr['size'])) {
if (isset($this->_size_lookup[$attr['size']])) {
$prepend_style .= 'font-size:' .
$this->_size_lookup[$attributes['size']] . ';';
$this->_size_lookup[$attr['size']] . ';';
}
unset($attributes['size']);
unset($attr['size']);
}
if ($prepend_style) {
$attributes['style'] = isset($attributes['style']) ?
$prepend_style . $attributes['style'] :
$attr['style'] = isset($attr['style']) ?
$prepend_style . $attr['style'] :
$prepend_style;
}
$new_tag = $tag->copy();
$new_tag->name = $this->transform_to;
$new_tag->attributes = $attributes;
$new_tag->attr = $attr;
return $new_tag;

View File

@@ -50,30 +50,29 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
/**
* Associative array of the tag's attributes.
*/
var $attributes = array();
var $attr = array();
/**
* Non-overloaded constructor, which lower-cases passed tag name.
*
* @param $name String name.
* @param $attributes Associative array of attributes.
* @param $name String name.
* @param $attr Associative array of attributes.
*/
function HTMLPurifier_Token_Tag($name, $attributes = array()) {
//if ($attributes === null) var_dump(debug_backtrace());
function HTMLPurifier_Token_Tag($name, $attr = array()) {
$this->name = ctype_lower($name) ? $name : strtolower($name);
foreach ($attributes as $key => $value) {
foreach ($attr as $key => $value) {
// normalization only necessary when key is not lowercase
if (!ctype_lower($key)) {
$new_key = strtolower($key);
if (!isset($attributes[$new_key])) {
$attributes[$new_key] = $attributes[$key];
if (!isset($attr[$new_key])) {
$attr[$new_key] = $attr[$key];
}
if ($new_key !== $key) {
unset($attributes[$key]);
unset($attr[$key]);
}
}
}
$this->attributes = $attributes;
$this->attr = $attr;
}
}
@@ -84,7 +83,7 @@ class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
{
var $type = 'start';
function copy() {
return new HTMLPurifier_Token_Start($this->name, $this->attributes);
return new HTMLPurifier_Token_Start($this->name, $this->attr);
}
}
@@ -95,7 +94,7 @@ class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
{
var $type = 'empty';
function copy() {
return new HTMLPurifier_Token_Empty($this->name, $this->attributes);
return new HTMLPurifier_Token_Empty($this->name, $this->attr);
}
}

View File

@@ -37,12 +37,12 @@ class HTMLPurifier_TokenFactory
/**
* Creates a HTMLPurifier_Token_Start.
* @param $name Tag name
* @param $attribute Associative array of attributes
* @param $attr Associative array of attributes
* @return Generated HTMLPurifier_Token_Start
*/
public function createStart($name, $attributes = array()) {
public function createStart($name, $attr = array()) {
$p = clone $this->p_start;
$p->HTMLPurifier_Token_Tag($name, $attributes);
$p->HTMLPurifier_Token_Tag($name, $attr);
return $p;
}
@@ -60,12 +60,12 @@ class HTMLPurifier_TokenFactory
/**
* Creates a HTMLPurifier_Token_Empty.
* @param $name Tag name
* @param $attribute Associative array of attributes
* @param $attr Associative array of attributes
* @return Generated HTMLPurifier_Token_Empty
*/
public function createEmpty($name, $attributes = array()) {
public function createEmpty($name, $attr = array()) {
$p = clone $this->p_empty;
$p->HTMLPurifier_Token_Tag($name, $attributes);
$p->HTMLPurifier_Token_Tag($name, $attr);
return $p;
}

40
smoketests/loadFunc.php Normal file
View File

@@ -0,0 +1,40 @@
<?php
set_include_path('../library/' . PATH_SEPARATOR . get_include_path() );
header('Content-type: text/html; charset=UTF-8');
echo '<?xml version="1.0" encoding="UTF-8" ?>';
function printb($bool) {
echo '<strong>' . ($bool ? 'Pass' : 'Fail') . '</strong>';
}
function printEval($code) {
echo '<pre>' . htmlspecialchars($code) . '</pre>';
eval($code);
}
?><!DOCTYPE html
PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>HTML Purifier Function Include Smoketest</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
</head>
<body>
<h1>HTML Purifier Function Include Smoketest</h1>
<p>Tests whether or not the includes are done properly and whether or
not the library is lazy loaded.</p>
<?php printEval("require_once 'HTMLPurifier.func.php';"); ?>
<p>HTMLPurifier class doesn't exist: <?php printb(!class_exists('HTMLPurifier')); ?></li></p>
<?php printEval("HTMLPurifier('foobar');"); ?>
<p>HTMLPurifier class exists: <?php printb(class_exists('HTMLPurifier')); ?></li></p>
</body>
</html>

View File

@@ -42,12 +42,15 @@ class HTMLPurifier_Strategy_RemoveForeignElementsTest
' Warning!</span>'
);
// test removal of img tag
// test removal of invalid img tag
$this->assertResult(
'<img />',
''
);
// test preservation of valid img tag
$this->assertResult('<img src="foobar.gif" />');
}
}

View File

@@ -5,15 +5,15 @@ require_once 'HTMLPurifier/Token.php';
class HTMLPurifier_TokenTest extends UnitTestCase
{
function assertTokenConstruction($name, $attributes,
$expect_name = null, $expect_attributes = null
function assertTokenConstruction($name, $attr,
$expect_name = null, $expect_attr = null
) {
if ($expect_name === null) $expect_name = $name;
if ($expect_attributes === null) $expect_attributes = $attributes;
$token = new HTMLPurifier_Token_Start($name, $attributes);
if ($expect_attr === null) $expect_attr = $attr;
$token = new HTMLPurifier_Token_Start($name, $attr);
$this->assertEqual($expect_name, $token->name);
$this->assertEqual($expect_attributes, $token->attributes);
$this->assertEqual($expect_name, $token->name);
$this->assertEqual($expect_attr, $token->attr);
}
function testConstruct() {