1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-02 20:27:40 +02:00

Compare commits

...

4 Commits

Author SHA1 Message Date
Edward Z. Yang
ed38579fa2 Fix duplicated items in TODO.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/1.3@623 48356398-32a2-884e-a903-53898d9a118a
2006-12-26 17:13:26 +00:00
Edward Z. Yang
54f615f1d3 Merged r608-621 for 1.3.2 release from trunk.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/1.3@622 48356398-32a2-884e-a903-53898d9a118a
2006-12-26 17:10:29 +00:00
Edward Z. Yang
3b979ee846 Merged revisions for 1.3.1 release into branch, with local modifications to keep NEWS items in present.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/1.3@604 48356398-32a2-884e-a903-53898d9a118a
2006-12-06 23:19:59 +00:00
Edward Z. Yang
d151ffd9e6 Create 1.3 release series.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/1.3@590 48356398-32a2-884e-a903-53898d9a118a
2006-11-26 23:30:22 +00:00
30 changed files with 546 additions and 106 deletions

View File

@@ -4,7 +4,7 @@
# Project related configuration options
#---------------------------------------------------------------------------
PROJECT_NAME = HTML Purifier
PROJECT_NUMBER = 1.3.0
PROJECT_NUMBER = 1.3.2
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
CREATE_SUBDIRS = NO
OUTPUT_LANGUAGE = English

20
NEWS
View File

@@ -9,6 +9,26 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. Internal change
==========================
1.3.2, released 2006-12-25
! HTMLPurifier object now accepts configuration arrays, no need to manually
instantiate a configuration object
! Context object now accessible to outside
! Added enduser-youtube.html, explains how to embed YouTube videos. See
also corresponding smoketest preserveYouTube.php.
! Added purifyArray(), which takes a list of HTML and purifies it all
! Added static member variable $version to HTML Purifier with PHP-compatible
version number string.
- Fixed fatal error thrown by upper-cased language attributes
- printDefinition.php: added labels, added better clarification
. HTMLPurifier_Config::create() added, takes mixed variable and converts into
a HTMLPurifier_Config object.
1.3.1, released 2006-12-06
! Added HTMLPurifier.func.php stub for a convenient function to call the library
- Fixed bug in RemoveInvalidImg code that caused all images to be dropped
(thanks to .mario for reporting this)
. Standardized all attribute handling variables to attr, made it plural
1.3.0, released 2006-11-26
# Invalid images are now removed, rather than replaced with a dud
<img src="" alt="Invalid image" />. Previous behavior can be restored

8
TODO
View File

@@ -10,6 +10,7 @@ TODO List
1.4 release
# More extensive URI filtering schemes (see docs/proposal-new-directives.txt)
# Allow for background-image and list-style-image (intrinsically tied to above)
# Add hooks for custom behavior (for instance, YouTube preservation)
- Aggressive caching
? Rich set* methods and config file loaders for HTMLPurifier_Config
? Configuration profiles: sets of directives that get set with one func call
@@ -66,7 +67,6 @@ Unknown release (on a scratch-an-itch basis)
- Append something to duplicate IDs so they're still usable (impl. note: the
dupe detector would also need to detect the suffix as well)
- Have 'lang' attribute be checked against official lists
- Docs on how to embed YouTube videos (and friends) without patches
Encoding workarounds
- Non-lossy dumb alternate character encoding transformations, achieved by
@@ -84,6 +84,12 @@ Requested
3. Extend the tag exclusion system to specify whether or not the
contents should be dropped or not (currently, there's code that could do
something like this if it didn't drop the inner text too.)
- More user-friendly warnings when %HTML.Allow* attempts to specify a
tag or attribute that is not supported
- Allow specifying global attributes on a tag-by-tag basis in
%HTML.AllowAttributes
- Parse TinyMCE whitelist into our %HTML.Allow* whitelists
- XSS-attempt detection
Wontfix
- Non-lossy smart alternate character encoding transformations (unless

View File

@@ -54,8 +54,9 @@ help you find the correct functionality more quickly. Here they are:</p>
abbreviated version is more readable than the full version. Here, we
list common abbreviations:
<ul>
<li>Attr(s) to Attribute(s)</li>
<li>Attr to Attributes (note that it is plural, i.e. <code>$attr = array()</code>)</li>
<li>Def to Definition</li>
<li><code>$ret</code> is the value to be returned in a function</li>
</ul>
</dd>

179
docs/enduser-youtube.html Normal file
View File

@@ -0,0 +1,179 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta name="description" content="Explains how to safely allow the embedding of flash from trusted sites in HTML Purifier." />
<link rel="stylesheet" type="text/css" href="./style.css" />
<title>Embedding YouTube Videos - HTML Purifier</title>
</head><body>
<h1 class="subtitled">Embedding YouTube Videos</h1>
<div class="subtitle">...as well as other dangerous active content</div>
<div id="filing">Filed under End-User</div>
<div id="index">Return to the <a href="index.html">index</a>.</div>
<p>Clients like their YouTube videos. It gives them a warm fuzzy feeling when
they see a neat little embedded video player on their websites that can play
the latest clips from their documentary &quot;Fido and the Bones of Spring&quot;.
All joking aside, the ability to embed YouTube videos or other active
content in their pages is something that a lot of people like.</p>
<p>This is a <em>bad</em> idea. The moment you embed anything untrusted,
you will definitely be slammed by a manner of nasties that can be
embedded in things from your run of the mill Flash movie to
<a href="http://blog.spywareguide.com/2006/12/myspace_phish_attack_leads_use.html">Quicktime movies</a>.
Even <code>img</code> tags, which HTML Purifier allows by default, can be
dangerous. Be distrustful of anything that tells a browser to load content
from another website automatically.</p>
<p>Luckily for us, however, whitelisting saves the day. Sure, letting users
include any old random flash file could be dangerous, but if it's
from a specific website, it probably is okay. If no amount of pleading will
convince the people upstairs that they should just settle with just linking
to their movies, you may find this technique very useful.</p>
<h2>Sample</h2>
<p>Below is custom code that allows users to embed
YouTube videos. This is not favoritism: this trick can easily be adapted for
other forms of embeddable content.</p>
<p>Usually, websites like YouTube give us boilerplate code that you can insert
into your documents. YouTube's code goes like this:</p>
<pre>
&lt;object width=&quot;425&quot; height=&quot;350&quot;&gt;
&lt;param name=&quot;movie&quot; value=&quot;http://www.youtube.com/v/AyPzM5WK8ys&quot; /&gt;
&lt;param name=&quot;wmode&quot; value=&quot;transparent&quot; /&gt;
&lt;embed src=&quot;http://www.youtube.com/v/AyPzM5WK8ys&quot;
type=&quot;application/x-shockwave-flash&quot;
wmode=&quot;transparent&quot; width=&quot;425&quot; height=&quot;350&quot; /&gt;
&lt;/object&gt;
</pre>
<p>There are two things to note about this code:</p>
<ol>
<li><code>&lt;embed&gt;</code> is not recognized by W3C, so if you want
standards-compliant code, you'll have to get rid of it.</li>
<li>The code is exactly the same for all instances, except for the
identifier <tt>AyPzM5WK8ys</tt> which tells us which movie file
to retrieve.</li>
</ol>
<p>What point 2 means is that if we have code like <code>&lt;span
class=&quot;embed-youtube&quot;&gt;AyPzM5WK8ys&lt;/span&gt;</code> your
application can reconstruct the full object from this small snippet that
passes through HTML Purifier <em>unharmed</em>.</p>
<pre>
&lt;?php
class HTMLPurifierX_PreserveYouTube extends HTMLPurifier
{
function purify($html, $config = null) {
$pre_regex = '#&lt;object[^&gt;]+&gt;.+?'.
'http://www.youtube.com/v/([A-Za-z0-9]+).+?&lt;/object&gt;#';
$pre_replace = '&lt;span class=&quot;youtube-embed&quot;&gt;\1&lt;/span&gt;';
$html = preg_replace($pre_regex, $pre_replace, $html);
$html = parent::purify($html, $config);
$post_regex = '#&lt;span class=&quot;youtube-embed&quot;&gt;([A-Za-z0-9]+)&lt;/span&gt;#';
$post_replace = '&lt;object width=&quot;425&quot; height=&quot;350&quot; '.
'data=&quot;http://www.youtube.com/v/\1&quot;&gt;'.
'&lt;param name=&quot;movie&quot; value=&quot;http://www.youtube.com/v/\1&quot;&gt;&lt;/param&gt;'.
'&lt;param name=&quot;wmode&quot; value=&quot;transparent&quot;&gt;&lt;/param&gt;'.
'&lt;!--[if IE]&gt;'.
'&lt;embed src=&quot;http://www.youtube.com/v/\1&quot;'.
'type=&quot;application/x-shockwave-flash&quot;'.
'wmode=&quot;transparent&quot; width=&quot;425&quot; height=&quot;350&quot; /&gt;'.
'&lt;![endif]--&gt;'.
'&lt;/object&gt;';
$html = preg_replace($post_regex, $post_replace, $html);
return $html;
}
}
$purifier = new HTMLPurifierX_PreserveYouTube();
$html_still_with_youtube = $purifier->purify($html_with_youtube);
?&gt;
</pre>
<p>There is a bit going on here, so let's explain.</p>
<ol>
<li>The class uses the prefix <code>HTMLPurifierX</code> because it's
userspace code. Don't use <code>HTMLPurifier</code> in front of your
class, since it might clobber another class in the library.</li>
<li>In order to keep the interface compatible, we've extended HTMLPurifier
into a new class that preserves the YouTube videos. This means that
all you have to do is replace all instances of
<code>new HTMLPurifier</code> to <code>new
HTMLPurifierX_PreserveYouTube</code>. There's other ways to go about
doing this: if you were calling a function that wrapped HTML Purifier,
you could paste the PHP right there. If you wanted to be really
fancy, you could make a decorator for HTMLPurifier.</li>
<li>The first preg_replace call replaces any YouTube code users may have
embedded into the benign span tag. Span is used because it is inline,
and objects are inline too. We are very careful to be extremely
restrictive on what goes inside the span tag, as if an errant code
gets in there it could get messy.</li>
<li>The HTML is then purified as usual.</li>
<li>Then, another preg_replace replaces the span tag with a fully fledged
object. Note that the embed is removed, and, in its place, a data
attribute was added to the object. This makes the tag standards
compliant! It also breaks Internet Explorer, so we add in a bit of
conditional comments with the old embed code to make it work again.
It's all quite convoluted but works.</li>
</ol>
<h2>Warning</h2>
<p>There are a number of possible problems with the code above, depending
on how you look at it.</p>
<h3>Cannot change width and height</h3>
<p>The width and height of the final YouTube movie cannot be adjusted. This
is because I am lazy. If you really insist on letting users change the size
of the movie, what you need to do is package up the attributes inside the
span tag (along with the movie ID). It gets complicated though: a malicious
user can specify an outrageously large height and width and attempt to crash
the user's operating system/browser. You need to either cap it by limiting
the amount of digits allowed in the regex or using a callback to check the
number.</p>
<h3>Trusts media's host's security</h3>
<p>By allowing this code onto our website, we are trusting that YouTube has
tech-savvy enough people not to allow their users to inject malicious
code into the Flash files. An exploit on YouTube means an exploit on your
site. Even though YouTube is run by the reputable Google, it
<a href="http://ha.ckers.org/blog/20061213/google-xss-vuln/">doesn't</a>
mean they are
<a href="http://ha.ckers.org/blog/20061208/xss-in-googles-orkut/">invulnerable.</a>
You're putting a certain measure of the job on an external provider (just as
you have by entrusting your user input to HTML Purifier), and
it is important that you are cognizant of the risk.</p>
<h3>Poorly written adaptations compromise security</h3>
<p>This should go without saying, but if you're going to adapt this code
for Google Video or the like, make sure you do it <em>right</em>. It's
extremely easy to allow a character too many in the final section and
suddenly you're introducing XSS into HTML Purifier's XSS free output. HTML
Purifier may be well written, but it cannot guard against vulnerabilities
introduced after it has finished.</p>
<h2>Future plans</h2>
<p>It would probably be a good idea if this code was added to the core
library. Look out for the inclusion of this into the core as a decorator
or the like.</p>
</body>
</html>

View File

@@ -23,7 +23,10 @@ information for casual developers using HTML Purifier.</p>
<dl>
<dt><a href="enduser-id.html">IDs</a></dt>
<dd>Explains various methods for allowing IDs in documents safely in HTML Purifier.</dd>
<dd>Explains various methods for allowing IDs in documents safely.</dd>
<dt><a href="enduser-youtube.html">Embedding YouTube videos</a></dt>
<dd>Explains how to safely allow the embedding of flash from trusted sites.</dd>
</dl>

View File

@@ -10,7 +10,8 @@ It's quite simple, according to <http://www.w3.org/TR/xhtml11/changes.html>
...but that's only an informative section. More things to do:
1. Scratch style attribute (it's deprecated)
2. Be module-aware
2. Be module-aware (this might entail intelligent grouping in the definition
and allowing users to specifically remove certain modules (see 5))
3. Cross-reference minimal content models with existing DTDs and determine
changes (todo)
4. Watch out for the Legacy Module

View File

@@ -0,0 +1,21 @@
<?php
/**
* Function wrapper for HTML Purifier for quick use.
* @note This function only includes the library when it is called. While
* this is efficient for instances when you only use HTML Purifier
* on a few of your pages, it murders bytecode caching. You still
* need to add HTML Purifier to your path.
*/
function HTMLPurifier($html, $config = null) {
static $purifier = false;
if (!$purifier) {
$init = true;
require_once 'HTMLPurifier.php';
$purifier = new HTMLPurifier();
}
return $purifier->purify($html, $config);
}
?>

View File

@@ -22,7 +22,7 @@
*/
/*
HTML Purifier 1.3.0 - Standards Compliant HTML Filtering
HTML Purifier 1.3.2 - Standards Compliant HTML Filtering
Copyright (C) 2006 Edward Z. Yang
This library is free software; you can redistribute it and/or
@@ -64,19 +64,29 @@ require_once 'HTMLPurifier/Encoder.php';
class HTMLPurifier
{
var $version = '1.3.2';
var $config;
var $lexer, $strategy, $generator;
/**
* Final HTMLPurifier_Context of last run purification. Might be an array.
* @public
*/
var $context;
/**
* Initializes the purifier.
* @param $config Optional HTMLPurifier_Config object for all instances of
* the purifier, if omitted, a default configuration is
* supplied (which can be overridden on a per-use basis).
* The parameter can also be any type that
* HTMLPurifier_Config::create() supports.
*/
function HTMLPurifier($config = null) {
$this->config = $config ? $config : HTMLPurifier_Config::createDefault();
$this->config = HTMLPurifier_Config::create($config);
$this->lexer = HTMLPurifier_Lexer::create();
$this->strategy = new HTMLPurifier_Strategy_Core();
@@ -91,25 +101,54 @@ class HTMLPurifier
* @param $html String of HTML to purify
* @param $config HTMLPurifier_Config object for this operation, if omitted,
* defaults to the config object specified during this
* object's construction.
* object's construction. The parameter can also be any type
* that HTMLPurifier_Config::create() supports.
* @return Purified HTML
*/
function purify($html, $config = null) {
$config = $config ? $config : $this->config;
$config = $config ? HTMLPurifier_Config::create($config) : $this->config;
$context =& new HTMLPurifier_Context();
$html = $this->encoder->convertToUTF8($html, $config, $context);
// purified HTML
$html =
$this->generator->generateFromTokens(
// list of tokens
$this->strategy->execute(
$this->lexer->tokenizeHTML($html, $config, $context),
// list of un-purified tokens
$this->lexer->tokenizeHTML(
// un-purified HTML
$html, $config, $context
),
$config, $context
),
$config, $context
);
$html = $this->encoder->convertFromUTF8($html, $config, $context);
$this->context =& $context;
return $html;
}
/**
* Filters an array of HTML snippets
* @param $config Optional HTMLPurifier_Config object for this operation.
* See HTMLPurifier::purify() for more details.
* @return Array of purified HTML
*/
function purifyArray($array_of_html, $config = null) {
$context_array = array();
foreach ($array_of_html as $key => $html) {
$array_of_html[$key] = $this->purify($html, $config);
$context_array[$key] = $this->context;
}
$this->context = $context_array;
return $array_of_html;
}
}
?>

View File

@@ -49,7 +49,7 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
if ($length == 0 || $length == 1 || $length > 8 || !ctype_alnum($subtags[1])) {
return $new_string;
}
if (!ctype_lower($subtags[1])) $subtags[1] = strotolower($subtags[1]);
if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]);
$new_string .= '-' . $subtags[1];
if ($num_subtags == 2) return $new_string;
@@ -61,7 +61,7 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
return $new_string;
}
if (!ctype_lower($subtags[$i])) {
$subtags[$i] = strotolower($subtags[$i]);
$subtags[$i] = strtolower($subtags[$i]);
}
$new_string .= '-' . $subtags[$i];
}

View File

@@ -21,7 +21,7 @@ class HTMLPurifier_AttrTransform
* Abstract: makes changes to the attributes dependent on multiple values.
*
* @param $attr Assoc array of attributes, usually from
* HTMLPurifier_Token_Tag::$attributes
* HTMLPurifier_Token_Tag::$attr
* @param $config Mandatory HTMLPurifier_Config object.
* @param $context Mandatory HTMLPurifier_Context object
* @returns Processed attribute array.

View File

@@ -44,6 +44,20 @@ class HTMLPurifier_Config
$this->def = $definition; // keep a copy around for checking
}
/**
* Convenience constructor that creates a config object based on a mixed var
* @param mixed $config Variable that defines the state of the config
* object. Can be: a HTMLPurifier_Config() object or
* an array of directives based on loadArray().
* @return Configured HTMLPurifier_Config object
*/
function create($config) {
if (is_a($config, 'HTMLPurifier_Config')) return $config;
$ret = HTMLPurifier_Config::createDefault();
if (is_array($config)) $ret->loadArray($config);
return $ret;
}
/**
* Convenience constructor that creates a default configuration object.
* @return Default HTMLPurifier_Config object.

View File

@@ -104,14 +104,14 @@ class HTMLPurifier_Generator
function generateFromToken($token) {
if (!isset($token->type)) return '';
if ($token->type == 'start') {
$attr = $this->generateAttributes($token->attributes);
$attr = $this->generateAttributes($token->attr);
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
} elseif ($token->type == 'end') {
return '</' . $token->name . '>';
} elseif ($token->type == 'empty') {
$attr = $this->generateAttributes($token->attributes);
$attr = $this->generateAttributes($token->attr);
return '<' . $token->name . ($attr ? ' ' : '') . $attr .
( $this->_xhtml ? ' /': '' )
. '>';

View File

@@ -72,7 +72,7 @@ HTMLPurifier_ConfigSchema::define(
'can overload it with your own list of tags to allow. Note that this '.
'method is subtractive: it does its job by taking away from HTML Purifier '.
'usual feature set, so you cannot add a tag that HTML Purifier never '.
'supported in the first place (like embed). If you change this, you '.
'supported in the first place (like embed, form or head). If you change this, you '.
'probably also want to change %HTML.AllowedAttributes. '.
'<strong>Warning:</strong> If another directive conflicts with the '.
'elements here, <em>that</em> directive will win and override. '.
@@ -561,18 +561,18 @@ class HTMLPurifier_HTMLDefinition
//////////////////////////////////////////////////////////////////////
// %HTML.Allowed(Elements|Attributes) : cut non-allowed elements
$allowed_elements = $config->get('HTML', 'AllowedElements');
if (is_array($allowed_elements)) {
// $allowed_elements[$this->info_parent] = true; // allow parent element
foreach ($this->info as $name => $d) {
if(!isset($allowed_elements[$name])) unset($this->info[$name]);
}
}
$allowed_attributes = $config->get('HTML', 'AllowedAttributes');
if (is_array($allowed_attributes)) {
foreach ($this->info_global_attr as $attr => $info) {
if (!isset($allowed_attributes["*.$attr"])) {
unset($this->info_global_attr[$attr]);
foreach ($this->info_global_attr as $attr_key => $info) {
if (!isset($allowed_attributes["*.$attr_key"])) {
unset($this->info_global_attr[$attr_key]);
}
}
foreach ($this->info as $tag => $info) {

View File

@@ -143,18 +143,18 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
)
);
if ($attribute_string) {
$attributes = $this->parseAttributeString(
$attribute_string
, $config, $context
);
$attr = $this->parseAttributeString(
$attribute_string
, $config, $context
);
} else {
$attributes = array();
$attr = array();
}
if ($is_self_closing) {
$array[] = new HTMLPurifier_Token_Empty($type, $attributes);
$array[] = new HTMLPurifier_Token_Empty($type, $attr);
} else {
$array[] = new HTMLPurifier_Token_Start($type, $attributes);
$array[] = new HTMLPurifier_Token_Start($type, $attr);
}
$cursor = $position_next_gt + 1;
$inside_tag = false;

View File

@@ -156,11 +156,14 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
}
$child_def = $def->child;
// have DTD child def validate children
$result = $child_def->validateChildren(
$child_tokens, $config, $context);
if (!empty($def->child)) {
// have DTD child def validate children
$result = $def->child->validateChildren(
$child_tokens, $config, $context);
} else {
// weird, no child definition, get rid of everything
$result = false;
}
// determine whether or not this element has any exclusions
$excludes = $def->excludes;

View File

@@ -30,7 +30,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$token->type == 'start' ) {
$result[] = new HTMLPurifier_Token_Empty($token->name,
$token->attributes);
$token->attr);
continue;
}
@@ -39,7 +39,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$token->type == 'empty' ) {
$result[] = new HTMLPurifier_Token_Start($token->name,
$token->attributes);
$token->attr);
$result[] = new HTMLPurifier_Token_End($token->name);
continue;

View File

@@ -38,7 +38,9 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
// hard-coded image special case, pre-emptively drop
// if not available. Probably not abstract-able
if ( $token->name == 'img' ) {
if (!isset($token->attr['src'])) continue;
if (!isset($token->attr['src'])) {
continue;
}
if (!isset($definition->info['img']->attr['src'])) {
continue;
}
@@ -46,7 +48,8 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
$definition->
info['img']->
attr['src']->
validate($token->attr['src']);
validate($token->attr['src'],
$config, $context);
if ($token->attr['src'] === false) continue;
}

View File

@@ -35,7 +35,7 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
if ($token->type !== 'start' && $token->type !== 'empty') continue;
// copy out attributes for easy manipulation
$attr = $token->attributes;
$attr = $token->attr;
// do global transformations (pre)
// nothing currently utilizes this
@@ -117,7 +117,7 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
// commit changes
// could interfere with flyweight implementation
$tokens[$key]->attributes = $attr;
$tokens[$key]->attr = $attr;
}
$context->destroy('IDAccumulator');

View File

@@ -62,16 +62,16 @@ class HTMLPurifier_TagTransform_Center extends HTMLPurifier_TagTransform
$new_tag = new HTMLPurifier_Token_End($this->transform_to);
return $new_tag;
}
$attributes = $tag->attributes;
$attr = $tag->attr;
$prepend_css = 'text-align:center;';
if (isset($attributes['style'])) {
$attributes['style'] = $prepend_css . $attributes['style'];
if (isset($attr['style'])) {
$attr['style'] = $prepend_css . $attr['style'];
} else {
$attributes['style'] = $prepend_css;
$attr['style'] = $prepend_css;
}
$new_tag = $tag->copy();
$new_tag->name = $this->transform_to;
$new_tag->attributes = $attributes;
$new_tag->attr = $attr;
return $new_tag;
}
}
@@ -115,39 +115,39 @@ class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
return $new_tag;
}
$attributes = $tag->attributes;
$attr = $tag->attr;
$prepend_style = '';
// handle color transform
if (isset($attributes['color'])) {
$prepend_style .= 'color:' . $attributes['color'] . ';';
unset($attributes['color']);
if (isset($attr['color'])) {
$prepend_style .= 'color:' . $attr['color'] . ';';
unset($attr['color']);
}
// handle face transform
if (isset($attributes['face'])) {
$prepend_style .= 'font-family:' . $attributes['face'] . ';';
unset($attributes['face']);
if (isset($attr['face'])) {
$prepend_style .= 'font-family:' . $attr['face'] . ';';
unset($attr['face']);
}
// handle size transform
if (isset($attributes['size'])) {
if (isset($this->_size_lookup[$attributes['size']])) {
if (isset($attr['size'])) {
if (isset($this->_size_lookup[$attr['size']])) {
$prepend_style .= 'font-size:' .
$this->_size_lookup[$attributes['size']] . ';';
$this->_size_lookup[$attr['size']] . ';';
}
unset($attributes['size']);
unset($attr['size']);
}
if ($prepend_style) {
$attributes['style'] = isset($attributes['style']) ?
$prepend_style . $attributes['style'] :
$attr['style'] = isset($attr['style']) ?
$prepend_style . $attr['style'] :
$prepend_style;
}
$new_tag = $tag->copy();
$new_tag->name = $this->transform_to;
$new_tag->attributes = $attributes;
$new_tag->attr = $attr;
return $new_tag;

View File

@@ -50,30 +50,29 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
/**
* Associative array of the tag's attributes.
*/
var $attributes = array();
var $attr = array();
/**
* Non-overloaded constructor, which lower-cases passed tag name.
*
* @param $name String name.
* @param $attributes Associative array of attributes.
* @param $name String name.
* @param $attr Associative array of attributes.
*/
function HTMLPurifier_Token_Tag($name, $attributes = array()) {
//if ($attributes === null) var_dump(debug_backtrace());
function HTMLPurifier_Token_Tag($name, $attr = array()) {
$this->name = ctype_lower($name) ? $name : strtolower($name);
foreach ($attributes as $key => $value) {
foreach ($attr as $key => $value) {
// normalization only necessary when key is not lowercase
if (!ctype_lower($key)) {
$new_key = strtolower($key);
if (!isset($attributes[$new_key])) {
$attributes[$new_key] = $attributes[$key];
if (!isset($attr[$new_key])) {
$attr[$new_key] = $attr[$key];
}
if ($new_key !== $key) {
unset($attributes[$key]);
unset($attr[$key]);
}
}
}
$this->attributes = $attributes;
$this->attr = $attr;
}
}
@@ -84,7 +83,7 @@ class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
{
var $type = 'start';
function copy() {
return new HTMLPurifier_Token_Start($this->name, $this->attributes);
return new HTMLPurifier_Token_Start($this->name, $this->attr);
}
}
@@ -95,7 +94,7 @@ class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
{
var $type = 'empty';
function copy() {
return new HTMLPurifier_Token_Empty($this->name, $this->attributes);
return new HTMLPurifier_Token_Empty($this->name, $this->attr);
}
}

View File

@@ -37,12 +37,12 @@ class HTMLPurifier_TokenFactory
/**
* Creates a HTMLPurifier_Token_Start.
* @param $name Tag name
* @param $attribute Associative array of attributes
* @param $attr Associative array of attributes
* @return Generated HTMLPurifier_Token_Start
*/
public function createStart($name, $attributes = array()) {
public function createStart($name, $attr = array()) {
$p = clone $this->p_start;
$p->HTMLPurifier_Token_Tag($name, $attributes);
$p->HTMLPurifier_Token_Tag($name, $attr);
return $p;
}
@@ -60,12 +60,12 @@ class HTMLPurifier_TokenFactory
/**
* Creates a HTMLPurifier_Token_Empty.
* @param $name Tag name
* @param $attribute Associative array of attributes
* @param $attr Associative array of attributes
* @return Generated HTMLPurifier_Token_Empty
*/
public function createEmpty($name, $attributes = array()) {
public function createEmpty($name, $attr = array()) {
$p = clone $this->p_empty;
$p->HTMLPurifier_Token_Tag($name, $attributes);
$p->HTMLPurifier_Token_Tag($name, $attr);
return $p;
}

40
smoketests/loadFunc.php Normal file
View File

@@ -0,0 +1,40 @@
<?php
set_include_path('../library/' . PATH_SEPARATOR . get_include_path() );
header('Content-type: text/html; charset=UTF-8');
echo '<?xml version="1.0" encoding="UTF-8" ?>';
function printb($bool) {
echo '<strong>' . ($bool ? 'Pass' : 'Fail') . '</strong>';
}
function printEval($code) {
echo '<pre>' . htmlspecialchars($code) . '</pre>';
eval($code);
}
?><!DOCTYPE html
PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>HTML Purifier Function Include Smoketest</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
</head>
<body>
<h1>HTML Purifier Function Include Smoketest</h1>
<p>Tests whether or not the includes are done properly and whether or
not the library is lazy loaded.</p>
<?php printEval("require_once 'HTMLPurifier.func.php';"); ?>
<p>HTMLPurifier class doesn't exist: <?php printb(!class_exists('HTMLPurifier')); ?></li></p>
<?php printEval("HTMLPurifier('foobar');"); ?>
<p>HTMLPurifier class exists: <?php printb(class_exists('HTMLPurifier')); ?></li></p>
</body>
</html>

View File

@@ -0,0 +1,65 @@
<?php
require_once 'common.php';
echo '<?xml version="1.0" encoding="UTF-8" ?>';
?><!DOCTYPE html
PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html>
<head>
<title>HTML Purifier Preserve YouTube Smoketest</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
</head>
<body>
<h1>HTML Purifier Preserve YouTube Smoketest</h1>
<?php
class HTMLPurifierX_PreserveYouTube extends HTMLPurifier
{
function purify($html, $config = null) {
$pre_regex = '#<object[^>]+>.+?'.
'http://www.youtube.com/v/([A-Za-z0-9]+).+?</object>#';
$pre_replace = '<span class="youtube-embed">\1</span>';
$html = preg_replace($pre_regex, $pre_replace, $html);
$html = parent::purify($html, $config);
$post_regex = '#<span class="youtube-embed">([A-Za-z0-9]+)</span>#';
$post_replace = '<object width="425" height="350" '.
'data="http://www.youtube.com/v/\1">'.
'<param name="movie" value="http://www.youtube.com/v/\1"></param>'.
'<param name="wmode" value="transparent"></param>'.
'<!--[if IE]>'.
'<embed src="http://www.youtube.com/v/\1"'.
'type="application/x-shockwave-flash"'.
'wmode="transparent" width="425" height="350" />'.
'<![endif]-->'.
'</object>';
$html = preg_replace($post_regex, $post_replace, $html);
return $html;
}
}
$string = '<object width="425" height="350"><param name="movie" value="http://www.youtube.com/v/JzqumbhfxRo"></param><param name="wmode" value="transparent"></param><embed src="http://www.youtube.com/v/JzqumbhfxRo" type="application/x-shockwave-flash" wmode="transparent" width="425" height="350"></embed></object>';
$regular_purifier = new HTMLPurifier();
$youtube_purifier = new HTMLPurifierX_PreserveYouTube();
?>
<h2>Unpurified</h2>
<p><a href="?break">Click here to see the unpurified version (breaks validation).</a></p>
<div><?php
if (isset($_GET['break'])) echo $string;
?></div>
<h2>Without YouTube exception</h2>
<div><?php
echo $regular_purifier->purify($string);
?></div>
<h2>With YouTube exception</h2>
<div><?php
echo $youtube_purifier->purify($string);
?></div>
</body>
</html>

View File

@@ -36,6 +36,7 @@ echo '<?xml version="1.0" encoding="UTF-8" ?>';
<style type="text/css">
form table {margin:1em auto;}
form th {text-align:right;padding-right:1em;}
form .c {display:none;}
.HTMLPurifier_Printer table {border-collapse:collapse;
border:1px solid #000; width:600px;
margin:1em auto;font-family:sans-serif;font-size:75%;}
@@ -62,11 +63,9 @@ influences the internal workings of the definition objects.</p>
<p>You can specify an array by typing in a comma-separated
list of items, HTML Purifier will take care of the rest (including
transformation into a real array list or a lookup table). If a
directive can be set to null, that usually means that the feature
is disabled when it is null (not that, say, no tags are allowed).</p>
transformation into a real array list or a lookup table).</p>
<form id="edit-config" method="get" action="printDefinition.php">
<form id="edit-config" name="edit-config" method="get" action="printDefinition.php">
<table>
<?php
$directives = $config->getBatch('HTML');
@@ -91,27 +90,31 @@ is disabled when it is null (not that, say, no tags are allowed).</p>
<tr>
<th>
<a href="http://hp.jpsband.org/live/configdoc/plain.html#<?php echo $directive ?>">
%<?php echo $directive; ?>
<label for="<?php echo $directive; ?>">%<?php echo $directive; ?></label>
</a>
</th>
<td>
<?php if (is_bool($value)) { ?>
Yes <input type="radio" name="<?php echo $directive; ?>" value="1"<?php if ($value) { ?> checked="checked"<?php } ?> /> &nbsp;
No <input type="radio" name="<?php echo $directive; ?>" value="0"<?php if (!$value) { ?> checked="checked"<?php } ?> />
<label for="Yes_<?php echo $directive; ?>"><span class="c">%<?php echo $directive; ?>:</span> Yes</label>
<input type="radio" name="<?php echo $directive; ?>" id="Yes_<?php echo $directive; ?>" value="1"<?php if ($value) { ?> checked="checked"<?php } ?> /> &nbsp;
<label for="No_<?php echo $directive; ?>"><span class="c">%<?php echo $directive; ?>:</span> No</label>
<input type="radio" name="<?php echo $directive; ?>" id="No_<?php echo $directive; ?>" value="0"<?php if (!$value) { ?> checked="checked"<?php } ?> />
<?php } else { ?>
<?php if($allow_null) { ?>
Null/Disabled <input
type="checkbox"
value="1"
onclick="toggleWriteability('<?php echo $directive ?>',checked)"
name="Null_<?php echo $directive; ?>"
<?php if ($value === null) { ?> checked="checked"<?php } ?>
/> or <br />
<label for="Null_<?php echo $directive; ?>"><span class="c">%<?php echo $directive; ?>:</span> Null/Disabled*</label>
<input
type="checkbox"
value="1"
onclick="toggleWriteability('<?php echo $directive ?>',checked)"
name="Null_<?php echo $directive; ?>"
id="Null_<?php echo $directive; ?>"
<?php if ($value === null) { ?> checked="checked"<?php } ?>
/> or <br />
<?php } ?>
<input
type="text"
id="<?php echo $directive; ?>"
name="<?php echo $directive; ?>"
id="<?php echo $directive; ?>"
value="<?php echo escapeHTML($value); ?>"
<?php if($value === null) {echo 'disabled="disabled"';} ?>
/>
@@ -128,6 +131,10 @@ is disabled when it is null (not that, say, no tags are allowed).</p>
</td>
</tr>
</table>
<p>* Some configuration directives make a distinction between an empty
variable and a null variable. A whitelist, for example, will take an
empty array as meaning <em>no</em> allowed elements, while checking
Null/Disabled will mean that user whitelisting functionality is disabled.</p>
</form>
<h2>HTMLDefinition</h2>
<?php echo $printer_html_definition->render($config) ?>

View File

@@ -17,6 +17,9 @@ class HTMLPurifier_AttrDef_LangTest extends HTMLPurifier_AttrDefHarness
$this->assertDef(' en ', 'en'); // trim
$this->assertDef('EN', 'en'); // case insensitivity
// (thanks Eugen Pankratz for noticing the typos!)
$this->assertDef('En-Us-Edison', 'en-us-edison'); // complex ci
$this->assertDef('fr en', false); // multiple languages
$this->assertDef('%', false); // bad character
@@ -26,7 +29,7 @@ class HTMLPurifier_AttrDef_LangTest extends HTMLPurifier_AttrDefHarness
// primary subtag rules
// I'm somewhat hesitant to allow x and i as primary language codes,
// because they usually are never used in real life. However,
// theoretically speaking, having them alone is permissble, so
// theoretically speaking, having them alone is permissable, so
// I'll be lenient. No XML parser is going to complain anyway.
$this->assertDef('x');
$this->assertDef('i');

View File

@@ -180,6 +180,25 @@ class HTMLPurifier_ConfigTest extends UnitTestCase
}
function test_create() {
HTMLPurifier_ConfigSchema::defineNamespace('Cake', 'Properties of it.');
HTMLPurifier_ConfigSchema::define('Cake', 'Sprinkles', 666, 'int', 'Number of.');
HTMLPurifier_ConfigSchema::define('Cake', 'Flavor', 'vanilla', 'string', 'Flavor of the batter.');
$config = HTMLPurifier_Config::createDefault();
$config->set('Cake', 'Sprinkles', 42);
// test flat pass-through
$created_config = HTMLPurifier_Config::create($config);
$this->assertEqual($config, $created_config);
// test loadArray
$created_config = HTMLPurifier_Config::create(array('Cake.Sprinkles' => 42));
$this->assertEqual($config, $created_config);
}
}
?>

View File

@@ -42,12 +42,15 @@ class HTMLPurifier_Strategy_RemoveForeignElementsTest
' Warning!</span>'
);
// test removal of img tag
// test removal of invalid img tag
$this->assertResult(
'<img />',
''
);
// test preservation of valid img tag
$this->assertResult('<img src="foobar.gif" />');
}
}

View File

@@ -25,7 +25,7 @@ class HTMLPurifier_Test extends UnitTestCase
function testStrict() {
$config = HTMLPurifier_Config::createDefault();
$config->set('HTML', 'Strict', true);
$this->purifier = new HTMLPurifier($config);
$this->purifier = new HTMLPurifier( $config ); // verbose syntax
$this->assertPurification(
'<u>Illegal underline</u>',
@@ -40,10 +40,11 @@ class HTMLPurifier_Test extends UnitTestCase
}
function testDifferentAllowedElements() {
$config = HTMLPurifier_Config::createDefault();
$config->set('HTML', 'AllowedElements', array('b', 'i', 'p', 'a'));
$config->set('HTML', 'AllowedAttributes', array('a.href', '*.id'));
$this->purifier = new HTMLPurifier($config);
$this->purifier = new HTMLPurifier(array(
'HTML.AllowedElements' => array('b', 'i', 'p', 'a'),
'HTML.AllowedAttributes' => array('a.href', '*.id')
));
$this->assertPurification(
'<p>Par.</p><p>Para<a href="http://google.com/">gr</a>aph</p>Text<b>Bol<i>d</i></b>'
@@ -58,9 +59,7 @@ class HTMLPurifier_Test extends UnitTestCase
function testDisableURI() {
$config = HTMLPurifier_Config::createDefault();
$config->set('Attr', 'DisableURI', true);
$this->purifier = new HTMLPurifier($config);
$this->purifier = new HTMLPurifier( array('Attr.DisableURI' => true) );
$this->assertPurification(
'<img src="foobar"/>',
@@ -69,6 +68,21 @@ class HTMLPurifier_Test extends UnitTestCase
}
function test_purifyArray() {
$this->purifier = new HTMLPurifier();
$this->assertEqual(
$this->purifier->purifyArray(
array('Good', '<b>Sketchy', 'foo' => '<script>bad</script>')
),
array('Good', '<b>Sketchy</b>', 'foo' => 'bad')
);
$this->assertIsA($this->purifier->context, 'array');
}
}
?>

View File

@@ -5,15 +5,15 @@ require_once 'HTMLPurifier/Token.php';
class HTMLPurifier_TokenTest extends UnitTestCase
{
function assertTokenConstruction($name, $attributes,
$expect_name = null, $expect_attributes = null
function assertTokenConstruction($name, $attr,
$expect_name = null, $expect_attr = null
) {
if ($expect_name === null) $expect_name = $name;
if ($expect_attributes === null) $expect_attributes = $attributes;
$token = new HTMLPurifier_Token_Start($name, $attributes);
if ($expect_attr === null) $expect_attr = $attr;
$token = new HTMLPurifier_Token_Start($name, $attr);
$this->assertEqual($expect_name, $token->name);
$this->assertEqual($expect_attributes, $token->attributes);
$this->assertEqual($expect_name, $token->name);
$this->assertEqual($expect_attr, $token->attr);
}
function testConstruct() {