mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-05 21:57:26 +02:00
Compare commits
2 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
ed38579fa2 | ||
|
54f615f1d3 |
2
Doxyfile
2
Doxyfile
@@ -4,7 +4,7 @@
|
|||||||
# Project related configuration options
|
# Project related configuration options
|
||||||
#---------------------------------------------------------------------------
|
#---------------------------------------------------------------------------
|
||||||
PROJECT_NAME = HTML Purifier
|
PROJECT_NAME = HTML Purifier
|
||||||
PROJECT_NUMBER = 1.3.0
|
PROJECT_NUMBER = 1.3.2
|
||||||
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
|
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
|
||||||
CREATE_SUBDIRS = NO
|
CREATE_SUBDIRS = NO
|
||||||
OUTPUT_LANGUAGE = English
|
OUTPUT_LANGUAGE = English
|
||||||
|
14
NEWS
14
NEWS
@@ -9,6 +9,20 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
. Internal change
|
. Internal change
|
||||||
==========================
|
==========================
|
||||||
|
|
||||||
|
1.3.2, released 2006-12-25
|
||||||
|
! HTMLPurifier object now accepts configuration arrays, no need to manually
|
||||||
|
instantiate a configuration object
|
||||||
|
! Context object now accessible to outside
|
||||||
|
! Added enduser-youtube.html, explains how to embed YouTube videos. See
|
||||||
|
also corresponding smoketest preserveYouTube.php.
|
||||||
|
! Added purifyArray(), which takes a list of HTML and purifies it all
|
||||||
|
! Added static member variable $version to HTML Purifier with PHP-compatible
|
||||||
|
version number string.
|
||||||
|
- Fixed fatal error thrown by upper-cased language attributes
|
||||||
|
- printDefinition.php: added labels, added better clarification
|
||||||
|
. HTMLPurifier_Config::create() added, takes mixed variable and converts into
|
||||||
|
a HTMLPurifier_Config object.
|
||||||
|
|
||||||
1.3.1, released 2006-12-06
|
1.3.1, released 2006-12-06
|
||||||
! Added HTMLPurifier.func.php stub for a convenient function to call the library
|
! Added HTMLPurifier.func.php stub for a convenient function to call the library
|
||||||
- Fixed bug in RemoveInvalidImg code that caused all images to be dropped
|
- Fixed bug in RemoveInvalidImg code that caused all images to be dropped
|
||||||
|
9
TODO
9
TODO
@@ -10,6 +10,7 @@ TODO List
|
|||||||
1.4 release
|
1.4 release
|
||||||
# More extensive URI filtering schemes (see docs/proposal-new-directives.txt)
|
# More extensive URI filtering schemes (see docs/proposal-new-directives.txt)
|
||||||
# Allow for background-image and list-style-image (intrinsically tied to above)
|
# Allow for background-image and list-style-image (intrinsically tied to above)
|
||||||
|
# Add hooks for custom behavior (for instance, YouTube preservation)
|
||||||
- Aggressive caching
|
- Aggressive caching
|
||||||
? Rich set* methods and config file loaders for HTMLPurifier_Config
|
? Rich set* methods and config file loaders for HTMLPurifier_Config
|
||||||
? Configuration profiles: sets of directives that get set with one func call
|
? Configuration profiles: sets of directives that get set with one func call
|
||||||
@@ -66,7 +67,6 @@ Unknown release (on a scratch-an-itch basis)
|
|||||||
- Append something to duplicate IDs so they're still usable (impl. note: the
|
- Append something to duplicate IDs so they're still usable (impl. note: the
|
||||||
dupe detector would also need to detect the suffix as well)
|
dupe detector would also need to detect the suffix as well)
|
||||||
- Have 'lang' attribute be checked against official lists
|
- Have 'lang' attribute be checked against official lists
|
||||||
- Docs on how to embed YouTube videos (and friends) without patches
|
|
||||||
|
|
||||||
Encoding workarounds
|
Encoding workarounds
|
||||||
- Non-lossy dumb alternate character encoding transformations, achieved by
|
- Non-lossy dumb alternate character encoding transformations, achieved by
|
||||||
@@ -84,7 +84,12 @@ Requested
|
|||||||
3. Extend the tag exclusion system to specify whether or not the
|
3. Extend the tag exclusion system to specify whether or not the
|
||||||
contents should be dropped or not (currently, there's code that could do
|
contents should be dropped or not (currently, there's code that could do
|
||||||
something like this if it didn't drop the inner text too.)
|
something like this if it didn't drop the inner text too.)
|
||||||
- Accept array input, by iterating and purifying all of the items
|
- More user-friendly warnings when %HTML.Allow* attempts to specify a
|
||||||
|
tag or attribute that is not supported
|
||||||
|
- Allow specifying global attributes on a tag-by-tag basis in
|
||||||
|
%HTML.AllowAttributes
|
||||||
|
- Parse TinyMCE whitelist into our %HTML.Allow* whitelists
|
||||||
|
- XSS-attempt detection
|
||||||
|
|
||||||
Wontfix
|
Wontfix
|
||||||
- Non-lossy smart alternate character encoding transformations (unless
|
- Non-lossy smart alternate character encoding transformations (unless
|
||||||
|
179
docs/enduser-youtube.html
Normal file
179
docs/enduser-youtube.html
Normal file
@@ -0,0 +1,179 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||||
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||||
|
<meta name="description" content="Explains how to safely allow the embedding of flash from trusted sites in HTML Purifier." />
|
||||||
|
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||||
|
|
||||||
|
<title>Embedding YouTube Videos - HTML Purifier</title>
|
||||||
|
|
||||||
|
</head><body>
|
||||||
|
|
||||||
|
<h1 class="subtitled">Embedding YouTube Videos</h1>
|
||||||
|
<div class="subtitle">...as well as other dangerous active content</div>
|
||||||
|
|
||||||
|
<div id="filing">Filed under End-User</div>
|
||||||
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
|
|
||||||
|
<p>Clients like their YouTube videos. It gives them a warm fuzzy feeling when
|
||||||
|
they see a neat little embedded video player on their websites that can play
|
||||||
|
the latest clips from their documentary "Fido and the Bones of Spring".
|
||||||
|
All joking aside, the ability to embed YouTube videos or other active
|
||||||
|
content in their pages is something that a lot of people like.</p>
|
||||||
|
|
||||||
|
<p>This is a <em>bad</em> idea. The moment you embed anything untrusted,
|
||||||
|
you will definitely be slammed by a manner of nasties that can be
|
||||||
|
embedded in things from your run of the mill Flash movie to
|
||||||
|
<a href="http://blog.spywareguide.com/2006/12/myspace_phish_attack_leads_use.html">Quicktime movies</a>.
|
||||||
|
Even <code>img</code> tags, which HTML Purifier allows by default, can be
|
||||||
|
dangerous. Be distrustful of anything that tells a browser to load content
|
||||||
|
from another website automatically.</p>
|
||||||
|
|
||||||
|
<p>Luckily for us, however, whitelisting saves the day. Sure, letting users
|
||||||
|
include any old random flash file could be dangerous, but if it's
|
||||||
|
from a specific website, it probably is okay. If no amount of pleading will
|
||||||
|
convince the people upstairs that they should just settle with just linking
|
||||||
|
to their movies, you may find this technique very useful.</p>
|
||||||
|
|
||||||
|
<h2>Sample</h2>
|
||||||
|
|
||||||
|
<p>Below is custom code that allows users to embed
|
||||||
|
YouTube videos. This is not favoritism: this trick can easily be adapted for
|
||||||
|
other forms of embeddable content.</p>
|
||||||
|
|
||||||
|
<p>Usually, websites like YouTube give us boilerplate code that you can insert
|
||||||
|
into your documents. YouTube's code goes like this:</p>
|
||||||
|
|
||||||
|
<pre>
|
||||||
|
<object width="425" height="350">
|
||||||
|
<param name="movie" value="http://www.youtube.com/v/AyPzM5WK8ys" />
|
||||||
|
<param name="wmode" value="transparent" />
|
||||||
|
<embed src="http://www.youtube.com/v/AyPzM5WK8ys"
|
||||||
|
type="application/x-shockwave-flash"
|
||||||
|
wmode="transparent" width="425" height="350" />
|
||||||
|
</object>
|
||||||
|
</pre>
|
||||||
|
|
||||||
|
<p>There are two things to note about this code:</p>
|
||||||
|
|
||||||
|
<ol>
|
||||||
|
<li><code><embed></code> is not recognized by W3C, so if you want
|
||||||
|
standards-compliant code, you'll have to get rid of it.</li>
|
||||||
|
<li>The code is exactly the same for all instances, except for the
|
||||||
|
identifier <tt>AyPzM5WK8ys</tt> which tells us which movie file
|
||||||
|
to retrieve.</li>
|
||||||
|
</ol>
|
||||||
|
|
||||||
|
<p>What point 2 means is that if we have code like <code><span
|
||||||
|
class="embed-youtube">AyPzM5WK8ys</span></code> your
|
||||||
|
application can reconstruct the full object from this small snippet that
|
||||||
|
passes through HTML Purifier <em>unharmed</em>.</p>
|
||||||
|
|
||||||
|
<pre>
|
||||||
|
<?php
|
||||||
|
|
||||||
|
class HTMLPurifierX_PreserveYouTube extends HTMLPurifier
|
||||||
|
{
|
||||||
|
function purify($html, $config = null) {
|
||||||
|
$pre_regex = '#<object[^>]+>.+?'.
|
||||||
|
'http://www.youtube.com/v/([A-Za-z0-9]+).+?</object>#';
|
||||||
|
$pre_replace = '<span class="youtube-embed">\1</span>';
|
||||||
|
$html = preg_replace($pre_regex, $pre_replace, $html);
|
||||||
|
$html = parent::purify($html, $config);
|
||||||
|
$post_regex = '#<span class="youtube-embed">([A-Za-z0-9]+)</span>#';
|
||||||
|
$post_replace = '<object width="425" height="350" '.
|
||||||
|
'data="http://www.youtube.com/v/\1">'.
|
||||||
|
'<param name="movie" value="http://www.youtube.com/v/\1"></param>'.
|
||||||
|
'<param name="wmode" value="transparent"></param>'.
|
||||||
|
'<!--[if IE]>'.
|
||||||
|
'<embed src="http://www.youtube.com/v/\1"'.
|
||||||
|
'type="application/x-shockwave-flash"'.
|
||||||
|
'wmode="transparent" width="425" height="350" />'.
|
||||||
|
'<![endif]-->'.
|
||||||
|
'</object>';
|
||||||
|
$html = preg_replace($post_regex, $post_replace, $html);
|
||||||
|
return $html;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$purifier = new HTMLPurifierX_PreserveYouTube();
|
||||||
|
$html_still_with_youtube = $purifier->purify($html_with_youtube);
|
||||||
|
|
||||||
|
?>
|
||||||
|
</pre>
|
||||||
|
|
||||||
|
<p>There is a bit going on here, so let's explain.</p>
|
||||||
|
|
||||||
|
<ol>
|
||||||
|
<li>The class uses the prefix <code>HTMLPurifierX</code> because it's
|
||||||
|
userspace code. Don't use <code>HTMLPurifier</code> in front of your
|
||||||
|
class, since it might clobber another class in the library.</li>
|
||||||
|
<li>In order to keep the interface compatible, we've extended HTMLPurifier
|
||||||
|
into a new class that preserves the YouTube videos. This means that
|
||||||
|
all you have to do is replace all instances of
|
||||||
|
<code>new HTMLPurifier</code> to <code>new
|
||||||
|
HTMLPurifierX_PreserveYouTube</code>. There's other ways to go about
|
||||||
|
doing this: if you were calling a function that wrapped HTML Purifier,
|
||||||
|
you could paste the PHP right there. If you wanted to be really
|
||||||
|
fancy, you could make a decorator for HTMLPurifier.</li>
|
||||||
|
<li>The first preg_replace call replaces any YouTube code users may have
|
||||||
|
embedded into the benign span tag. Span is used because it is inline,
|
||||||
|
and objects are inline too. We are very careful to be extremely
|
||||||
|
restrictive on what goes inside the span tag, as if an errant code
|
||||||
|
gets in there it could get messy.</li>
|
||||||
|
<li>The HTML is then purified as usual.</li>
|
||||||
|
<li>Then, another preg_replace replaces the span tag with a fully fledged
|
||||||
|
object. Note that the embed is removed, and, in its place, a data
|
||||||
|
attribute was added to the object. This makes the tag standards
|
||||||
|
compliant! It also breaks Internet Explorer, so we add in a bit of
|
||||||
|
conditional comments with the old embed code to make it work again.
|
||||||
|
It's all quite convoluted but works.</li>
|
||||||
|
</ol>
|
||||||
|
|
||||||
|
<h2>Warning</h2>
|
||||||
|
|
||||||
|
<p>There are a number of possible problems with the code above, depending
|
||||||
|
on how you look at it.</p>
|
||||||
|
|
||||||
|
<h3>Cannot change width and height</h3>
|
||||||
|
|
||||||
|
<p>The width and height of the final YouTube movie cannot be adjusted. This
|
||||||
|
is because I am lazy. If you really insist on letting users change the size
|
||||||
|
of the movie, what you need to do is package up the attributes inside the
|
||||||
|
span tag (along with the movie ID). It gets complicated though: a malicious
|
||||||
|
user can specify an outrageously large height and width and attempt to crash
|
||||||
|
the user's operating system/browser. You need to either cap it by limiting
|
||||||
|
the amount of digits allowed in the regex or using a callback to check the
|
||||||
|
number.</p>
|
||||||
|
|
||||||
|
<h3>Trusts media's host's security</h3>
|
||||||
|
|
||||||
|
<p>By allowing this code onto our website, we are trusting that YouTube has
|
||||||
|
tech-savvy enough people not to allow their users to inject malicious
|
||||||
|
code into the Flash files. An exploit on YouTube means an exploit on your
|
||||||
|
site. Even though YouTube is run by the reputable Google, it
|
||||||
|
<a href="http://ha.ckers.org/blog/20061213/google-xss-vuln/">doesn't</a>
|
||||||
|
mean they are
|
||||||
|
<a href="http://ha.ckers.org/blog/20061208/xss-in-googles-orkut/">invulnerable.</a>
|
||||||
|
You're putting a certain measure of the job on an external provider (just as
|
||||||
|
you have by entrusting your user input to HTML Purifier), and
|
||||||
|
it is important that you are cognizant of the risk.</p>
|
||||||
|
|
||||||
|
<h3>Poorly written adaptations compromise security</h3>
|
||||||
|
|
||||||
|
<p>This should go without saying, but if you're going to adapt this code
|
||||||
|
for Google Video or the like, make sure you do it <em>right</em>. It's
|
||||||
|
extremely easy to allow a character too many in the final section and
|
||||||
|
suddenly you're introducing XSS into HTML Purifier's XSS free output. HTML
|
||||||
|
Purifier may be well written, but it cannot guard against vulnerabilities
|
||||||
|
introduced after it has finished.</p>
|
||||||
|
|
||||||
|
<h2>Future plans</h2>
|
||||||
|
|
||||||
|
<p>It would probably be a good idea if this code was added to the core
|
||||||
|
library. Look out for the inclusion of this into the core as a decorator
|
||||||
|
or the like.</p>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
@@ -23,7 +23,10 @@ information for casual developers using HTML Purifier.</p>
|
|||||||
<dl>
|
<dl>
|
||||||
|
|
||||||
<dt><a href="enduser-id.html">IDs</a></dt>
|
<dt><a href="enduser-id.html">IDs</a></dt>
|
||||||
<dd>Explains various methods for allowing IDs in documents safely in HTML Purifier.</dd>
|
<dd>Explains various methods for allowing IDs in documents safely.</dd>
|
||||||
|
|
||||||
|
<dt><a href="enduser-youtube.html">Embedding YouTube videos</a></dt>
|
||||||
|
<dd>Explains how to safely allow the embedding of flash from trusted sites.</dd>
|
||||||
|
|
||||||
</dl>
|
</dl>
|
||||||
|
|
||||||
|
@@ -10,7 +10,8 @@ It's quite simple, according to <http://www.w3.org/TR/xhtml11/changes.html>
|
|||||||
...but that's only an informative section. More things to do:
|
...but that's only an informative section. More things to do:
|
||||||
|
|
||||||
1. Scratch style attribute (it's deprecated)
|
1. Scratch style attribute (it's deprecated)
|
||||||
2. Be module-aware
|
2. Be module-aware (this might entail intelligent grouping in the definition
|
||||||
|
and allowing users to specifically remove certain modules (see 5))
|
||||||
3. Cross-reference minimal content models with existing DTDs and determine
|
3. Cross-reference minimal content models with existing DTDs and determine
|
||||||
changes (todo)
|
changes (todo)
|
||||||
4. Watch out for the Legacy Module
|
4. Watch out for the Legacy Module
|
||||||
|
@@ -22,7 +22,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
HTML Purifier 1.3.1 - Standards Compliant HTML Filtering
|
HTML Purifier 1.3.2 - Standards Compliant HTML Filtering
|
||||||
Copyright (C) 2006 Edward Z. Yang
|
Copyright (C) 2006 Edward Z. Yang
|
||||||
|
|
||||||
This library is free software; you can redistribute it and/or
|
This library is free software; you can redistribute it and/or
|
||||||
@@ -64,19 +64,29 @@ require_once 'HTMLPurifier/Encoder.php';
|
|||||||
class HTMLPurifier
|
class HTMLPurifier
|
||||||
{
|
{
|
||||||
|
|
||||||
|
var $version = '1.3.2';
|
||||||
|
|
||||||
var $config;
|
var $config;
|
||||||
|
|
||||||
var $lexer, $strategy, $generator;
|
var $lexer, $strategy, $generator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Final HTMLPurifier_Context of last run purification. Might be an array.
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $context;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initializes the purifier.
|
* Initializes the purifier.
|
||||||
* @param $config Optional HTMLPurifier_Config object for all instances of
|
* @param $config Optional HTMLPurifier_Config object for all instances of
|
||||||
* the purifier, if omitted, a default configuration is
|
* the purifier, if omitted, a default configuration is
|
||||||
* supplied (which can be overridden on a per-use basis).
|
* supplied (which can be overridden on a per-use basis).
|
||||||
|
* The parameter can also be any type that
|
||||||
|
* HTMLPurifier_Config::create() supports.
|
||||||
*/
|
*/
|
||||||
function HTMLPurifier($config = null) {
|
function HTMLPurifier($config = null) {
|
||||||
|
|
||||||
$this->config = $config ? $config : HTMLPurifier_Config::createDefault();
|
$this->config = HTMLPurifier_Config::create($config);
|
||||||
|
|
||||||
$this->lexer = HTMLPurifier_Lexer::create();
|
$this->lexer = HTMLPurifier_Lexer::create();
|
||||||
$this->strategy = new HTMLPurifier_Strategy_Core();
|
$this->strategy = new HTMLPurifier_Strategy_Core();
|
||||||
@@ -91,25 +101,54 @@ class HTMLPurifier
|
|||||||
* @param $html String of HTML to purify
|
* @param $html String of HTML to purify
|
||||||
* @param $config HTMLPurifier_Config object for this operation, if omitted,
|
* @param $config HTMLPurifier_Config object for this operation, if omitted,
|
||||||
* defaults to the config object specified during this
|
* defaults to the config object specified during this
|
||||||
* object's construction.
|
* object's construction. The parameter can also be any type
|
||||||
|
* that HTMLPurifier_Config::create() supports.
|
||||||
* @return Purified HTML
|
* @return Purified HTML
|
||||||
*/
|
*/
|
||||||
function purify($html, $config = null) {
|
function purify($html, $config = null) {
|
||||||
$config = $config ? $config : $this->config;
|
|
||||||
|
$config = $config ? HTMLPurifier_Config::create($config) : $this->config;
|
||||||
|
|
||||||
$context =& new HTMLPurifier_Context();
|
$context =& new HTMLPurifier_Context();
|
||||||
$html = $this->encoder->convertToUTF8($html, $config, $context);
|
$html = $this->encoder->convertToUTF8($html, $config, $context);
|
||||||
|
|
||||||
|
// purified HTML
|
||||||
$html =
|
$html =
|
||||||
$this->generator->generateFromTokens(
|
$this->generator->generateFromTokens(
|
||||||
|
// list of tokens
|
||||||
$this->strategy->execute(
|
$this->strategy->execute(
|
||||||
$this->lexer->tokenizeHTML($html, $config, $context),
|
// list of un-purified tokens
|
||||||
|
$this->lexer->tokenizeHTML(
|
||||||
|
// un-purified HTML
|
||||||
|
$html, $config, $context
|
||||||
|
),
|
||||||
$config, $context
|
$config, $context
|
||||||
),
|
),
|
||||||
$config, $context
|
$config, $context
|
||||||
);
|
);
|
||||||
|
|
||||||
$html = $this->encoder->convertFromUTF8($html, $config, $context);
|
$html = $this->encoder->convertFromUTF8($html, $config, $context);
|
||||||
|
$this->context =& $context;
|
||||||
return $html;
|
return $html;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Filters an array of HTML snippets
|
||||||
|
* @param $config Optional HTMLPurifier_Config object for this operation.
|
||||||
|
* See HTMLPurifier::purify() for more details.
|
||||||
|
* @return Array of purified HTML
|
||||||
|
*/
|
||||||
|
function purifyArray($array_of_html, $config = null) {
|
||||||
|
$context_array = array();
|
||||||
|
foreach ($array_of_html as $key => $html) {
|
||||||
|
$array_of_html[$key] = $this->purify($html, $config);
|
||||||
|
$context_array[$key] = $this->context;
|
||||||
|
}
|
||||||
|
$this->context = $context_array;
|
||||||
|
return $array_of_html;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@@ -49,7 +49,7 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
|
|||||||
if ($length == 0 || $length == 1 || $length > 8 || !ctype_alnum($subtags[1])) {
|
if ($length == 0 || $length == 1 || $length > 8 || !ctype_alnum($subtags[1])) {
|
||||||
return $new_string;
|
return $new_string;
|
||||||
}
|
}
|
||||||
if (!ctype_lower($subtags[1])) $subtags[1] = strotolower($subtags[1]);
|
if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]);
|
||||||
|
|
||||||
$new_string .= '-' . $subtags[1];
|
$new_string .= '-' . $subtags[1];
|
||||||
if ($num_subtags == 2) return $new_string;
|
if ($num_subtags == 2) return $new_string;
|
||||||
@@ -61,7 +61,7 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
|
|||||||
return $new_string;
|
return $new_string;
|
||||||
}
|
}
|
||||||
if (!ctype_lower($subtags[$i])) {
|
if (!ctype_lower($subtags[$i])) {
|
||||||
$subtags[$i] = strotolower($subtags[$i]);
|
$subtags[$i] = strtolower($subtags[$i]);
|
||||||
}
|
}
|
||||||
$new_string .= '-' . $subtags[$i];
|
$new_string .= '-' . $subtags[$i];
|
||||||
}
|
}
|
||||||
|
@@ -44,6 +44,20 @@ class HTMLPurifier_Config
|
|||||||
$this->def = $definition; // keep a copy around for checking
|
$this->def = $definition; // keep a copy around for checking
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convenience constructor that creates a config object based on a mixed var
|
||||||
|
* @param mixed $config Variable that defines the state of the config
|
||||||
|
* object. Can be: a HTMLPurifier_Config() object or
|
||||||
|
* an array of directives based on loadArray().
|
||||||
|
* @return Configured HTMLPurifier_Config object
|
||||||
|
*/
|
||||||
|
function create($config) {
|
||||||
|
if (is_a($config, 'HTMLPurifier_Config')) return $config;
|
||||||
|
$ret = HTMLPurifier_Config::createDefault();
|
||||||
|
if (is_array($config)) $ret->loadArray($config);
|
||||||
|
return $ret;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convenience constructor that creates a default configuration object.
|
* Convenience constructor that creates a default configuration object.
|
||||||
* @return Default HTMLPurifier_Config object.
|
* @return Default HTMLPurifier_Config object.
|
||||||
|
65
smoketests/preserveYouTube.php
Normal file
65
smoketests/preserveYouTube.php
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'common.php';
|
||||||
|
|
||||||
|
echo '<?xml version="1.0" encoding="UTF-8" ?>';
|
||||||
|
?><!DOCTYPE html
|
||||||
|
PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||||
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>HTML Purifier Preserve YouTube Smoketest</title>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>HTML Purifier Preserve YouTube Smoketest</h1>
|
||||||
|
<?php
|
||||||
|
|
||||||
|
class HTMLPurifierX_PreserveYouTube extends HTMLPurifier
|
||||||
|
{
|
||||||
|
function purify($html, $config = null) {
|
||||||
|
$pre_regex = '#<object[^>]+>.+?'.
|
||||||
|
'http://www.youtube.com/v/([A-Za-z0-9]+).+?</object>#';
|
||||||
|
$pre_replace = '<span class="youtube-embed">\1</span>';
|
||||||
|
$html = preg_replace($pre_regex, $pre_replace, $html);
|
||||||
|
$html = parent::purify($html, $config);
|
||||||
|
$post_regex = '#<span class="youtube-embed">([A-Za-z0-9]+)</span>#';
|
||||||
|
$post_replace = '<object width="425" height="350" '.
|
||||||
|
'data="http://www.youtube.com/v/\1">'.
|
||||||
|
'<param name="movie" value="http://www.youtube.com/v/\1"></param>'.
|
||||||
|
'<param name="wmode" value="transparent"></param>'.
|
||||||
|
'<!--[if IE]>'.
|
||||||
|
'<embed src="http://www.youtube.com/v/\1"'.
|
||||||
|
'type="application/x-shockwave-flash"'.
|
||||||
|
'wmode="transparent" width="425" height="350" />'.
|
||||||
|
'<![endif]-->'.
|
||||||
|
'</object>';
|
||||||
|
$html = preg_replace($post_regex, $post_replace, $html);
|
||||||
|
return $html;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$string = '<object width="425" height="350"><param name="movie" value="http://www.youtube.com/v/JzqumbhfxRo"></param><param name="wmode" value="transparent"></param><embed src="http://www.youtube.com/v/JzqumbhfxRo" type="application/x-shockwave-flash" wmode="transparent" width="425" height="350"></embed></object>';
|
||||||
|
|
||||||
|
$regular_purifier = new HTMLPurifier();
|
||||||
|
$youtube_purifier = new HTMLPurifierX_PreserveYouTube();
|
||||||
|
|
||||||
|
?>
|
||||||
|
<h2>Unpurified</h2>
|
||||||
|
<p><a href="?break">Click here to see the unpurified version (breaks validation).</a></p>
|
||||||
|
<div><?php
|
||||||
|
if (isset($_GET['break'])) echo $string;
|
||||||
|
?></div>
|
||||||
|
|
||||||
|
<h2>Without YouTube exception</h2>
|
||||||
|
<div><?php
|
||||||
|
echo $regular_purifier->purify($string);
|
||||||
|
?></div>
|
||||||
|
|
||||||
|
<h2>With YouTube exception</h2>
|
||||||
|
<div><?php
|
||||||
|
echo $youtube_purifier->purify($string);
|
||||||
|
?></div>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
@@ -36,6 +36,7 @@ echo '<?xml version="1.0" encoding="UTF-8" ?>';
|
|||||||
<style type="text/css">
|
<style type="text/css">
|
||||||
form table {margin:1em auto;}
|
form table {margin:1em auto;}
|
||||||
form th {text-align:right;padding-right:1em;}
|
form th {text-align:right;padding-right:1em;}
|
||||||
|
form .c {display:none;}
|
||||||
.HTMLPurifier_Printer table {border-collapse:collapse;
|
.HTMLPurifier_Printer table {border-collapse:collapse;
|
||||||
border:1px solid #000; width:600px;
|
border:1px solid #000; width:600px;
|
||||||
margin:1em auto;font-family:sans-serif;font-size:75%;}
|
margin:1em auto;font-family:sans-serif;font-size:75%;}
|
||||||
@@ -62,11 +63,9 @@ influences the internal workings of the definition objects.</p>
|
|||||||
|
|
||||||
<p>You can specify an array by typing in a comma-separated
|
<p>You can specify an array by typing in a comma-separated
|
||||||
list of items, HTML Purifier will take care of the rest (including
|
list of items, HTML Purifier will take care of the rest (including
|
||||||
transformation into a real array list or a lookup table). If a
|
transformation into a real array list or a lookup table).</p>
|
||||||
directive can be set to null, that usually means that the feature
|
|
||||||
is disabled when it is null (not that, say, no tags are allowed).</p>
|
|
||||||
|
|
||||||
<form id="edit-config" method="get" action="printDefinition.php">
|
<form id="edit-config" name="edit-config" method="get" action="printDefinition.php">
|
||||||
<table>
|
<table>
|
||||||
<?php
|
<?php
|
||||||
$directives = $config->getBatch('HTML');
|
$directives = $config->getBatch('HTML');
|
||||||
@@ -91,27 +90,31 @@ is disabled when it is null (not that, say, no tags are allowed).</p>
|
|||||||
<tr>
|
<tr>
|
||||||
<th>
|
<th>
|
||||||
<a href="http://hp.jpsband.org/live/configdoc/plain.html#<?php echo $directive ?>">
|
<a href="http://hp.jpsband.org/live/configdoc/plain.html#<?php echo $directive ?>">
|
||||||
%<?php echo $directive; ?>
|
<label for="<?php echo $directive; ?>">%<?php echo $directive; ?></label>
|
||||||
</a>
|
</a>
|
||||||
</th>
|
</th>
|
||||||
<td>
|
<td>
|
||||||
<?php if (is_bool($value)) { ?>
|
<?php if (is_bool($value)) { ?>
|
||||||
Yes <input type="radio" name="<?php echo $directive; ?>" value="1"<?php if ($value) { ?> checked="checked"<?php } ?> />
|
<label for="Yes_<?php echo $directive; ?>"><span class="c">%<?php echo $directive; ?>:</span> Yes</label>
|
||||||
No <input type="radio" name="<?php echo $directive; ?>" value="0"<?php if (!$value) { ?> checked="checked"<?php } ?> />
|
<input type="radio" name="<?php echo $directive; ?>" id="Yes_<?php echo $directive; ?>" value="1"<?php if ($value) { ?> checked="checked"<?php } ?> />
|
||||||
|
<label for="No_<?php echo $directive; ?>"><span class="c">%<?php echo $directive; ?>:</span> No</label>
|
||||||
|
<input type="radio" name="<?php echo $directive; ?>" id="No_<?php echo $directive; ?>" value="0"<?php if (!$value) { ?> checked="checked"<?php } ?> />
|
||||||
<?php } else { ?>
|
<?php } else { ?>
|
||||||
<?php if($allow_null) { ?>
|
<?php if($allow_null) { ?>
|
||||||
Null/Disabled <input
|
<label for="Null_<?php echo $directive; ?>"><span class="c">%<?php echo $directive; ?>:</span> Null/Disabled*</label>
|
||||||
type="checkbox"
|
<input
|
||||||
value="1"
|
type="checkbox"
|
||||||
onclick="toggleWriteability('<?php echo $directive ?>',checked)"
|
value="1"
|
||||||
name="Null_<?php echo $directive; ?>"
|
onclick="toggleWriteability('<?php echo $directive ?>',checked)"
|
||||||
<?php if ($value === null) { ?> checked="checked"<?php } ?>
|
name="Null_<?php echo $directive; ?>"
|
||||||
/> or <br />
|
id="Null_<?php echo $directive; ?>"
|
||||||
|
<?php if ($value === null) { ?> checked="checked"<?php } ?>
|
||||||
|
/> or <br />
|
||||||
<?php } ?>
|
<?php } ?>
|
||||||
<input
|
<input
|
||||||
type="text"
|
type="text"
|
||||||
id="<?php echo $directive; ?>"
|
|
||||||
name="<?php echo $directive; ?>"
|
name="<?php echo $directive; ?>"
|
||||||
|
id="<?php echo $directive; ?>"
|
||||||
value="<?php echo escapeHTML($value); ?>"
|
value="<?php echo escapeHTML($value); ?>"
|
||||||
<?php if($value === null) {echo 'disabled="disabled"';} ?>
|
<?php if($value === null) {echo 'disabled="disabled"';} ?>
|
||||||
/>
|
/>
|
||||||
@@ -128,6 +131,10 @@ is disabled when it is null (not that, say, no tags are allowed).</p>
|
|||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
</table>
|
</table>
|
||||||
|
<p>* Some configuration directives make a distinction between an empty
|
||||||
|
variable and a null variable. A whitelist, for example, will take an
|
||||||
|
empty array as meaning <em>no</em> allowed elements, while checking
|
||||||
|
Null/Disabled will mean that user whitelisting functionality is disabled.</p>
|
||||||
</form>
|
</form>
|
||||||
<h2>HTMLDefinition</h2>
|
<h2>HTMLDefinition</h2>
|
||||||
<?php echo $printer_html_definition->render($config) ?>
|
<?php echo $printer_html_definition->render($config) ?>
|
||||||
|
@@ -17,6 +17,9 @@ class HTMLPurifier_AttrDef_LangTest extends HTMLPurifier_AttrDefHarness
|
|||||||
$this->assertDef(' en ', 'en'); // trim
|
$this->assertDef(' en ', 'en'); // trim
|
||||||
$this->assertDef('EN', 'en'); // case insensitivity
|
$this->assertDef('EN', 'en'); // case insensitivity
|
||||||
|
|
||||||
|
// (thanks Eugen Pankratz for noticing the typos!)
|
||||||
|
$this->assertDef('En-Us-Edison', 'en-us-edison'); // complex ci
|
||||||
|
|
||||||
$this->assertDef('fr en', false); // multiple languages
|
$this->assertDef('fr en', false); // multiple languages
|
||||||
$this->assertDef('%', false); // bad character
|
$this->assertDef('%', false); // bad character
|
||||||
|
|
||||||
@@ -26,7 +29,7 @@ class HTMLPurifier_AttrDef_LangTest extends HTMLPurifier_AttrDefHarness
|
|||||||
// primary subtag rules
|
// primary subtag rules
|
||||||
// I'm somewhat hesitant to allow x and i as primary language codes,
|
// I'm somewhat hesitant to allow x and i as primary language codes,
|
||||||
// because they usually are never used in real life. However,
|
// because they usually are never used in real life. However,
|
||||||
// theoretically speaking, having them alone is permissble, so
|
// theoretically speaking, having them alone is permissable, so
|
||||||
// I'll be lenient. No XML parser is going to complain anyway.
|
// I'll be lenient. No XML parser is going to complain anyway.
|
||||||
$this->assertDef('x');
|
$this->assertDef('x');
|
||||||
$this->assertDef('i');
|
$this->assertDef('i');
|
||||||
|
@@ -180,6 +180,25 @@ class HTMLPurifier_ConfigTest extends UnitTestCase
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function test_create() {
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::defineNamespace('Cake', 'Properties of it.');
|
||||||
|
HTMLPurifier_ConfigSchema::define('Cake', 'Sprinkles', 666, 'int', 'Number of.');
|
||||||
|
HTMLPurifier_ConfigSchema::define('Cake', 'Flavor', 'vanilla', 'string', 'Flavor of the batter.');
|
||||||
|
|
||||||
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
$config->set('Cake', 'Sprinkles', 42);
|
||||||
|
|
||||||
|
// test flat pass-through
|
||||||
|
$created_config = HTMLPurifier_Config::create($config);
|
||||||
|
$this->assertEqual($config, $created_config);
|
||||||
|
|
||||||
|
// test loadArray
|
||||||
|
$created_config = HTMLPurifier_Config::create(array('Cake.Sprinkles' => 42));
|
||||||
|
$this->assertEqual($config, $created_config);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@@ -25,7 +25,7 @@ class HTMLPurifier_Test extends UnitTestCase
|
|||||||
function testStrict() {
|
function testStrict() {
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
$config->set('HTML', 'Strict', true);
|
$config->set('HTML', 'Strict', true);
|
||||||
$this->purifier = new HTMLPurifier($config);
|
$this->purifier = new HTMLPurifier( $config ); // verbose syntax
|
||||||
|
|
||||||
$this->assertPurification(
|
$this->assertPurification(
|
||||||
'<u>Illegal underline</u>',
|
'<u>Illegal underline</u>',
|
||||||
@@ -40,10 +40,11 @@ class HTMLPurifier_Test extends UnitTestCase
|
|||||||
}
|
}
|
||||||
|
|
||||||
function testDifferentAllowedElements() {
|
function testDifferentAllowedElements() {
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
|
||||||
$config->set('HTML', 'AllowedElements', array('b', 'i', 'p', 'a'));
|
$this->purifier = new HTMLPurifier(array(
|
||||||
$config->set('HTML', 'AllowedAttributes', array('a.href', '*.id'));
|
'HTML.AllowedElements' => array('b', 'i', 'p', 'a'),
|
||||||
$this->purifier = new HTMLPurifier($config);
|
'HTML.AllowedAttributes' => array('a.href', '*.id')
|
||||||
|
));
|
||||||
|
|
||||||
$this->assertPurification(
|
$this->assertPurification(
|
||||||
'<p>Par.</p><p>Para<a href="http://google.com/">gr</a>aph</p>Text<b>Bol<i>d</i></b>'
|
'<p>Par.</p><p>Para<a href="http://google.com/">gr</a>aph</p>Text<b>Bol<i>d</i></b>'
|
||||||
@@ -58,9 +59,7 @@ class HTMLPurifier_Test extends UnitTestCase
|
|||||||
|
|
||||||
function testDisableURI() {
|
function testDisableURI() {
|
||||||
|
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
$this->purifier = new HTMLPurifier( array('Attr.DisableURI' => true) );
|
||||||
$config->set('Attr', 'DisableURI', true);
|
|
||||||
$this->purifier = new HTMLPurifier($config);
|
|
||||||
|
|
||||||
$this->assertPurification(
|
$this->assertPurification(
|
||||||
'<img src="foobar"/>',
|
'<img src="foobar"/>',
|
||||||
@@ -69,6 +68,21 @@ class HTMLPurifier_Test extends UnitTestCase
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function test_purifyArray() {
|
||||||
|
|
||||||
|
$this->purifier = new HTMLPurifier();
|
||||||
|
|
||||||
|
$this->assertEqual(
|
||||||
|
$this->purifier->purifyArray(
|
||||||
|
array('Good', '<b>Sketchy', 'foo' => '<script>bad</script>')
|
||||||
|
),
|
||||||
|
array('Good', '<b>Sketchy</b>', 'foo' => 'bad')
|
||||||
|
);
|
||||||
|
|
||||||
|
$this->assertIsA($this->purifier->context, 'array');
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
Reference in New Issue
Block a user