mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-03 12:47:56 +02:00
Compare commits
4 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
ed38579fa2 | ||
|
54f615f1d3 | ||
|
3b979ee846 | ||
|
d151ffd9e6 |
2
Doxyfile
2
Doxyfile
@@ -4,7 +4,7 @@
|
|||||||
# Project related configuration options
|
# Project related configuration options
|
||||||
#---------------------------------------------------------------------------
|
#---------------------------------------------------------------------------
|
||||||
PROJECT_NAME = HTML Purifier
|
PROJECT_NAME = HTML Purifier
|
||||||
PROJECT_NUMBER = 1.3.0
|
PROJECT_NUMBER = 1.3.2
|
||||||
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
|
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
|
||||||
CREATE_SUBDIRS = NO
|
CREATE_SUBDIRS = NO
|
||||||
OUTPUT_LANGUAGE = English
|
OUTPUT_LANGUAGE = English
|
||||||
|
20
NEWS
20
NEWS
@@ -9,6 +9,26 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
. Internal change
|
. Internal change
|
||||||
==========================
|
==========================
|
||||||
|
|
||||||
|
1.3.2, released 2006-12-25
|
||||||
|
! HTMLPurifier object now accepts configuration arrays, no need to manually
|
||||||
|
instantiate a configuration object
|
||||||
|
! Context object now accessible to outside
|
||||||
|
! Added enduser-youtube.html, explains how to embed YouTube videos. See
|
||||||
|
also corresponding smoketest preserveYouTube.php.
|
||||||
|
! Added purifyArray(), which takes a list of HTML and purifies it all
|
||||||
|
! Added static member variable $version to HTML Purifier with PHP-compatible
|
||||||
|
version number string.
|
||||||
|
- Fixed fatal error thrown by upper-cased language attributes
|
||||||
|
- printDefinition.php: added labels, added better clarification
|
||||||
|
. HTMLPurifier_Config::create() added, takes mixed variable and converts into
|
||||||
|
a HTMLPurifier_Config object.
|
||||||
|
|
||||||
|
1.3.1, released 2006-12-06
|
||||||
|
! Added HTMLPurifier.func.php stub for a convenient function to call the library
|
||||||
|
- Fixed bug in RemoveInvalidImg code that caused all images to be dropped
|
||||||
|
(thanks to .mario for reporting this)
|
||||||
|
. Standardized all attribute handling variables to attr, made it plural
|
||||||
|
|
||||||
1.3.0, released 2006-11-26
|
1.3.0, released 2006-11-26
|
||||||
# Invalid images are now removed, rather than replaced with a dud
|
# Invalid images are now removed, rather than replaced with a dud
|
||||||
<img src="" alt="Invalid image" />. Previous behavior can be restored
|
<img src="" alt="Invalid image" />. Previous behavior can be restored
|
||||||
|
8
TODO
8
TODO
@@ -10,6 +10,7 @@ TODO List
|
|||||||
1.4 release
|
1.4 release
|
||||||
# More extensive URI filtering schemes (see docs/proposal-new-directives.txt)
|
# More extensive URI filtering schemes (see docs/proposal-new-directives.txt)
|
||||||
# Allow for background-image and list-style-image (intrinsically tied to above)
|
# Allow for background-image and list-style-image (intrinsically tied to above)
|
||||||
|
# Add hooks for custom behavior (for instance, YouTube preservation)
|
||||||
- Aggressive caching
|
- Aggressive caching
|
||||||
? Rich set* methods and config file loaders for HTMLPurifier_Config
|
? Rich set* methods and config file loaders for HTMLPurifier_Config
|
||||||
? Configuration profiles: sets of directives that get set with one func call
|
? Configuration profiles: sets of directives that get set with one func call
|
||||||
@@ -66,7 +67,6 @@ Unknown release (on a scratch-an-itch basis)
|
|||||||
- Append something to duplicate IDs so they're still usable (impl. note: the
|
- Append something to duplicate IDs so they're still usable (impl. note: the
|
||||||
dupe detector would also need to detect the suffix as well)
|
dupe detector would also need to detect the suffix as well)
|
||||||
- Have 'lang' attribute be checked against official lists
|
- Have 'lang' attribute be checked against official lists
|
||||||
- Docs on how to embed YouTube videos (and friends) without patches
|
|
||||||
|
|
||||||
Encoding workarounds
|
Encoding workarounds
|
||||||
- Non-lossy dumb alternate character encoding transformations, achieved by
|
- Non-lossy dumb alternate character encoding transformations, achieved by
|
||||||
@@ -84,6 +84,12 @@ Requested
|
|||||||
3. Extend the tag exclusion system to specify whether or not the
|
3. Extend the tag exclusion system to specify whether or not the
|
||||||
contents should be dropped or not (currently, there's code that could do
|
contents should be dropped or not (currently, there's code that could do
|
||||||
something like this if it didn't drop the inner text too.)
|
something like this if it didn't drop the inner text too.)
|
||||||
|
- More user-friendly warnings when %HTML.Allow* attempts to specify a
|
||||||
|
tag or attribute that is not supported
|
||||||
|
- Allow specifying global attributes on a tag-by-tag basis in
|
||||||
|
%HTML.AllowAttributes
|
||||||
|
- Parse TinyMCE whitelist into our %HTML.Allow* whitelists
|
||||||
|
- XSS-attempt detection
|
||||||
|
|
||||||
Wontfix
|
Wontfix
|
||||||
- Non-lossy smart alternate character encoding transformations (unless
|
- Non-lossy smart alternate character encoding transformations (unless
|
||||||
|
@@ -54,8 +54,9 @@ help you find the correct functionality more quickly. Here they are:</p>
|
|||||||
abbreviated version is more readable than the full version. Here, we
|
abbreviated version is more readable than the full version. Here, we
|
||||||
list common abbreviations:
|
list common abbreviations:
|
||||||
<ul>
|
<ul>
|
||||||
<li>Attr(s) to Attribute(s)</li>
|
<li>Attr to Attributes (note that it is plural, i.e. <code>$attr = array()</code>)</li>
|
||||||
<li>Def to Definition</li>
|
<li>Def to Definition</li>
|
||||||
|
<li><code>$ret</code> is the value to be returned in a function</li>
|
||||||
</ul>
|
</ul>
|
||||||
</dd>
|
</dd>
|
||||||
|
|
||||||
|
179
docs/enduser-youtube.html
Normal file
179
docs/enduser-youtube.html
Normal file
@@ -0,0 +1,179 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||||
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||||
|
<meta name="description" content="Explains how to safely allow the embedding of flash from trusted sites in HTML Purifier." />
|
||||||
|
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||||
|
|
||||||
|
<title>Embedding YouTube Videos - HTML Purifier</title>
|
||||||
|
|
||||||
|
</head><body>
|
||||||
|
|
||||||
|
<h1 class="subtitled">Embedding YouTube Videos</h1>
|
||||||
|
<div class="subtitle">...as well as other dangerous active content</div>
|
||||||
|
|
||||||
|
<div id="filing">Filed under End-User</div>
|
||||||
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
|
|
||||||
|
<p>Clients like their YouTube videos. It gives them a warm fuzzy feeling when
|
||||||
|
they see a neat little embedded video player on their websites that can play
|
||||||
|
the latest clips from their documentary "Fido and the Bones of Spring".
|
||||||
|
All joking aside, the ability to embed YouTube videos or other active
|
||||||
|
content in their pages is something that a lot of people like.</p>
|
||||||
|
|
||||||
|
<p>This is a <em>bad</em> idea. The moment you embed anything untrusted,
|
||||||
|
you will definitely be slammed by a manner of nasties that can be
|
||||||
|
embedded in things from your run of the mill Flash movie to
|
||||||
|
<a href="http://blog.spywareguide.com/2006/12/myspace_phish_attack_leads_use.html">Quicktime movies</a>.
|
||||||
|
Even <code>img</code> tags, which HTML Purifier allows by default, can be
|
||||||
|
dangerous. Be distrustful of anything that tells a browser to load content
|
||||||
|
from another website automatically.</p>
|
||||||
|
|
||||||
|
<p>Luckily for us, however, whitelisting saves the day. Sure, letting users
|
||||||
|
include any old random flash file could be dangerous, but if it's
|
||||||
|
from a specific website, it probably is okay. If no amount of pleading will
|
||||||
|
convince the people upstairs that they should just settle with just linking
|
||||||
|
to their movies, you may find this technique very useful.</p>
|
||||||
|
|
||||||
|
<h2>Sample</h2>
|
||||||
|
|
||||||
|
<p>Below is custom code that allows users to embed
|
||||||
|
YouTube videos. This is not favoritism: this trick can easily be adapted for
|
||||||
|
other forms of embeddable content.</p>
|
||||||
|
|
||||||
|
<p>Usually, websites like YouTube give us boilerplate code that you can insert
|
||||||
|
into your documents. YouTube's code goes like this:</p>
|
||||||
|
|
||||||
|
<pre>
|
||||||
|
<object width="425" height="350">
|
||||||
|
<param name="movie" value="http://www.youtube.com/v/AyPzM5WK8ys" />
|
||||||
|
<param name="wmode" value="transparent" />
|
||||||
|
<embed src="http://www.youtube.com/v/AyPzM5WK8ys"
|
||||||
|
type="application/x-shockwave-flash"
|
||||||
|
wmode="transparent" width="425" height="350" />
|
||||||
|
</object>
|
||||||
|
</pre>
|
||||||
|
|
||||||
|
<p>There are two things to note about this code:</p>
|
||||||
|
|
||||||
|
<ol>
|
||||||
|
<li><code><embed></code> is not recognized by W3C, so if you want
|
||||||
|
standards-compliant code, you'll have to get rid of it.</li>
|
||||||
|
<li>The code is exactly the same for all instances, except for the
|
||||||
|
identifier <tt>AyPzM5WK8ys</tt> which tells us which movie file
|
||||||
|
to retrieve.</li>
|
||||||
|
</ol>
|
||||||
|
|
||||||
|
<p>What point 2 means is that if we have code like <code><span
|
||||||
|
class="embed-youtube">AyPzM5WK8ys</span></code> your
|
||||||
|
application can reconstruct the full object from this small snippet that
|
||||||
|
passes through HTML Purifier <em>unharmed</em>.</p>
|
||||||
|
|
||||||
|
<pre>
|
||||||
|
<?php
|
||||||
|
|
||||||
|
class HTMLPurifierX_PreserveYouTube extends HTMLPurifier
|
||||||
|
{
|
||||||
|
function purify($html, $config = null) {
|
||||||
|
$pre_regex = '#<object[^>]+>.+?'.
|
||||||
|
'http://www.youtube.com/v/([A-Za-z0-9]+).+?</object>#';
|
||||||
|
$pre_replace = '<span class="youtube-embed">\1</span>';
|
||||||
|
$html = preg_replace($pre_regex, $pre_replace, $html);
|
||||||
|
$html = parent::purify($html, $config);
|
||||||
|
$post_regex = '#<span class="youtube-embed">([A-Za-z0-9]+)</span>#';
|
||||||
|
$post_replace = '<object width="425" height="350" '.
|
||||||
|
'data="http://www.youtube.com/v/\1">'.
|
||||||
|
'<param name="movie" value="http://www.youtube.com/v/\1"></param>'.
|
||||||
|
'<param name="wmode" value="transparent"></param>'.
|
||||||
|
'<!--[if IE]>'.
|
||||||
|
'<embed src="http://www.youtube.com/v/\1"'.
|
||||||
|
'type="application/x-shockwave-flash"'.
|
||||||
|
'wmode="transparent" width="425" height="350" />'.
|
||||||
|
'<![endif]-->'.
|
||||||
|
'</object>';
|
||||||
|
$html = preg_replace($post_regex, $post_replace, $html);
|
||||||
|
return $html;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$purifier = new HTMLPurifierX_PreserveYouTube();
|
||||||
|
$html_still_with_youtube = $purifier->purify($html_with_youtube);
|
||||||
|
|
||||||
|
?>
|
||||||
|
</pre>
|
||||||
|
|
||||||
|
<p>There is a bit going on here, so let's explain.</p>
|
||||||
|
|
||||||
|
<ol>
|
||||||
|
<li>The class uses the prefix <code>HTMLPurifierX</code> because it's
|
||||||
|
userspace code. Don't use <code>HTMLPurifier</code> in front of your
|
||||||
|
class, since it might clobber another class in the library.</li>
|
||||||
|
<li>In order to keep the interface compatible, we've extended HTMLPurifier
|
||||||
|
into a new class that preserves the YouTube videos. This means that
|
||||||
|
all you have to do is replace all instances of
|
||||||
|
<code>new HTMLPurifier</code> to <code>new
|
||||||
|
HTMLPurifierX_PreserveYouTube</code>. There's other ways to go about
|
||||||
|
doing this: if you were calling a function that wrapped HTML Purifier,
|
||||||
|
you could paste the PHP right there. If you wanted to be really
|
||||||
|
fancy, you could make a decorator for HTMLPurifier.</li>
|
||||||
|
<li>The first preg_replace call replaces any YouTube code users may have
|
||||||
|
embedded into the benign span tag. Span is used because it is inline,
|
||||||
|
and objects are inline too. We are very careful to be extremely
|
||||||
|
restrictive on what goes inside the span tag, as if an errant code
|
||||||
|
gets in there it could get messy.</li>
|
||||||
|
<li>The HTML is then purified as usual.</li>
|
||||||
|
<li>Then, another preg_replace replaces the span tag with a fully fledged
|
||||||
|
object. Note that the embed is removed, and, in its place, a data
|
||||||
|
attribute was added to the object. This makes the tag standards
|
||||||
|
compliant! It also breaks Internet Explorer, so we add in a bit of
|
||||||
|
conditional comments with the old embed code to make it work again.
|
||||||
|
It's all quite convoluted but works.</li>
|
||||||
|
</ol>
|
||||||
|
|
||||||
|
<h2>Warning</h2>
|
||||||
|
|
||||||
|
<p>There are a number of possible problems with the code above, depending
|
||||||
|
on how you look at it.</p>
|
||||||
|
|
||||||
|
<h3>Cannot change width and height</h3>
|
||||||
|
|
||||||
|
<p>The width and height of the final YouTube movie cannot be adjusted. This
|
||||||
|
is because I am lazy. If you really insist on letting users change the size
|
||||||
|
of the movie, what you need to do is package up the attributes inside the
|
||||||
|
span tag (along with the movie ID). It gets complicated though: a malicious
|
||||||
|
user can specify an outrageously large height and width and attempt to crash
|
||||||
|
the user's operating system/browser. You need to either cap it by limiting
|
||||||
|
the amount of digits allowed in the regex or using a callback to check the
|
||||||
|
number.</p>
|
||||||
|
|
||||||
|
<h3>Trusts media's host's security</h3>
|
||||||
|
|
||||||
|
<p>By allowing this code onto our website, we are trusting that YouTube has
|
||||||
|
tech-savvy enough people not to allow their users to inject malicious
|
||||||
|
code into the Flash files. An exploit on YouTube means an exploit on your
|
||||||
|
site. Even though YouTube is run by the reputable Google, it
|
||||||
|
<a href="http://ha.ckers.org/blog/20061213/google-xss-vuln/">doesn't</a>
|
||||||
|
mean they are
|
||||||
|
<a href="http://ha.ckers.org/blog/20061208/xss-in-googles-orkut/">invulnerable.</a>
|
||||||
|
You're putting a certain measure of the job on an external provider (just as
|
||||||
|
you have by entrusting your user input to HTML Purifier), and
|
||||||
|
it is important that you are cognizant of the risk.</p>
|
||||||
|
|
||||||
|
<h3>Poorly written adaptations compromise security</h3>
|
||||||
|
|
||||||
|
<p>This should go without saying, but if you're going to adapt this code
|
||||||
|
for Google Video or the like, make sure you do it <em>right</em>. It's
|
||||||
|
extremely easy to allow a character too many in the final section and
|
||||||
|
suddenly you're introducing XSS into HTML Purifier's XSS free output. HTML
|
||||||
|
Purifier may be well written, but it cannot guard against vulnerabilities
|
||||||
|
introduced after it has finished.</p>
|
||||||
|
|
||||||
|
<h2>Future plans</h2>
|
||||||
|
|
||||||
|
<p>It would probably be a good idea if this code was added to the core
|
||||||
|
library. Look out for the inclusion of this into the core as a decorator
|
||||||
|
or the like.</p>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
@@ -23,7 +23,10 @@ information for casual developers using HTML Purifier.</p>
|
|||||||
<dl>
|
<dl>
|
||||||
|
|
||||||
<dt><a href="enduser-id.html">IDs</a></dt>
|
<dt><a href="enduser-id.html">IDs</a></dt>
|
||||||
<dd>Explains various methods for allowing IDs in documents safely in HTML Purifier.</dd>
|
<dd>Explains various methods for allowing IDs in documents safely.</dd>
|
||||||
|
|
||||||
|
<dt><a href="enduser-youtube.html">Embedding YouTube videos</a></dt>
|
||||||
|
<dd>Explains how to safely allow the embedding of flash from trusted sites.</dd>
|
||||||
|
|
||||||
</dl>
|
</dl>
|
||||||
|
|
||||||
|
@@ -10,7 +10,8 @@ It's quite simple, according to <http://www.w3.org/TR/xhtml11/changes.html>
|
|||||||
...but that's only an informative section. More things to do:
|
...but that's only an informative section. More things to do:
|
||||||
|
|
||||||
1. Scratch style attribute (it's deprecated)
|
1. Scratch style attribute (it's deprecated)
|
||||||
2. Be module-aware
|
2. Be module-aware (this might entail intelligent grouping in the definition
|
||||||
|
and allowing users to specifically remove certain modules (see 5))
|
||||||
3. Cross-reference minimal content models with existing DTDs and determine
|
3. Cross-reference minimal content models with existing DTDs and determine
|
||||||
changes (todo)
|
changes (todo)
|
||||||
4. Watch out for the Legacy Module
|
4. Watch out for the Legacy Module
|
||||||
|
21
library/HTMLPurifier.func.php
Normal file
21
library/HTMLPurifier.func.php
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Function wrapper for HTML Purifier for quick use.
|
||||||
|
* @note This function only includes the library when it is called. While
|
||||||
|
* this is efficient for instances when you only use HTML Purifier
|
||||||
|
* on a few of your pages, it murders bytecode caching. You still
|
||||||
|
* need to add HTML Purifier to your path.
|
||||||
|
*/
|
||||||
|
|
||||||
|
function HTMLPurifier($html, $config = null) {
|
||||||
|
static $purifier = false;
|
||||||
|
if (!$purifier) {
|
||||||
|
$init = true;
|
||||||
|
require_once 'HTMLPurifier.php';
|
||||||
|
$purifier = new HTMLPurifier();
|
||||||
|
}
|
||||||
|
return $purifier->purify($html, $config);
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
@@ -22,7 +22,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
HTML Purifier 1.3.0 - Standards Compliant HTML Filtering
|
HTML Purifier 1.3.2 - Standards Compliant HTML Filtering
|
||||||
Copyright (C) 2006 Edward Z. Yang
|
Copyright (C) 2006 Edward Z. Yang
|
||||||
|
|
||||||
This library is free software; you can redistribute it and/or
|
This library is free software; you can redistribute it and/or
|
||||||
@@ -64,19 +64,29 @@ require_once 'HTMLPurifier/Encoder.php';
|
|||||||
class HTMLPurifier
|
class HTMLPurifier
|
||||||
{
|
{
|
||||||
|
|
||||||
|
var $version = '1.3.2';
|
||||||
|
|
||||||
var $config;
|
var $config;
|
||||||
|
|
||||||
var $lexer, $strategy, $generator;
|
var $lexer, $strategy, $generator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Final HTMLPurifier_Context of last run purification. Might be an array.
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $context;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initializes the purifier.
|
* Initializes the purifier.
|
||||||
* @param $config Optional HTMLPurifier_Config object for all instances of
|
* @param $config Optional HTMLPurifier_Config object for all instances of
|
||||||
* the purifier, if omitted, a default configuration is
|
* the purifier, if omitted, a default configuration is
|
||||||
* supplied (which can be overridden on a per-use basis).
|
* supplied (which can be overridden on a per-use basis).
|
||||||
|
* The parameter can also be any type that
|
||||||
|
* HTMLPurifier_Config::create() supports.
|
||||||
*/
|
*/
|
||||||
function HTMLPurifier($config = null) {
|
function HTMLPurifier($config = null) {
|
||||||
|
|
||||||
$this->config = $config ? $config : HTMLPurifier_Config::createDefault();
|
$this->config = HTMLPurifier_Config::create($config);
|
||||||
|
|
||||||
$this->lexer = HTMLPurifier_Lexer::create();
|
$this->lexer = HTMLPurifier_Lexer::create();
|
||||||
$this->strategy = new HTMLPurifier_Strategy_Core();
|
$this->strategy = new HTMLPurifier_Strategy_Core();
|
||||||
@@ -91,25 +101,54 @@ class HTMLPurifier
|
|||||||
* @param $html String of HTML to purify
|
* @param $html String of HTML to purify
|
||||||
* @param $config HTMLPurifier_Config object for this operation, if omitted,
|
* @param $config HTMLPurifier_Config object for this operation, if omitted,
|
||||||
* defaults to the config object specified during this
|
* defaults to the config object specified during this
|
||||||
* object's construction.
|
* object's construction. The parameter can also be any type
|
||||||
|
* that HTMLPurifier_Config::create() supports.
|
||||||
* @return Purified HTML
|
* @return Purified HTML
|
||||||
*/
|
*/
|
||||||
function purify($html, $config = null) {
|
function purify($html, $config = null) {
|
||||||
$config = $config ? $config : $this->config;
|
|
||||||
|
$config = $config ? HTMLPurifier_Config::create($config) : $this->config;
|
||||||
|
|
||||||
$context =& new HTMLPurifier_Context();
|
$context =& new HTMLPurifier_Context();
|
||||||
$html = $this->encoder->convertToUTF8($html, $config, $context);
|
$html = $this->encoder->convertToUTF8($html, $config, $context);
|
||||||
|
|
||||||
|
// purified HTML
|
||||||
$html =
|
$html =
|
||||||
$this->generator->generateFromTokens(
|
$this->generator->generateFromTokens(
|
||||||
|
// list of tokens
|
||||||
$this->strategy->execute(
|
$this->strategy->execute(
|
||||||
$this->lexer->tokenizeHTML($html, $config, $context),
|
// list of un-purified tokens
|
||||||
|
$this->lexer->tokenizeHTML(
|
||||||
|
// un-purified HTML
|
||||||
|
$html, $config, $context
|
||||||
|
),
|
||||||
$config, $context
|
$config, $context
|
||||||
),
|
),
|
||||||
$config, $context
|
$config, $context
|
||||||
);
|
);
|
||||||
|
|
||||||
$html = $this->encoder->convertFromUTF8($html, $config, $context);
|
$html = $this->encoder->convertFromUTF8($html, $config, $context);
|
||||||
|
$this->context =& $context;
|
||||||
return $html;
|
return $html;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Filters an array of HTML snippets
|
||||||
|
* @param $config Optional HTMLPurifier_Config object for this operation.
|
||||||
|
* See HTMLPurifier::purify() for more details.
|
||||||
|
* @return Array of purified HTML
|
||||||
|
*/
|
||||||
|
function purifyArray($array_of_html, $config = null) {
|
||||||
|
$context_array = array();
|
||||||
|
foreach ($array_of_html as $key => $html) {
|
||||||
|
$array_of_html[$key] = $this->purify($html, $config);
|
||||||
|
$context_array[$key] = $this->context;
|
||||||
|
}
|
||||||
|
$this->context = $context_array;
|
||||||
|
return $array_of_html;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@@ -49,7 +49,7 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
|
|||||||
if ($length == 0 || $length == 1 || $length > 8 || !ctype_alnum($subtags[1])) {
|
if ($length == 0 || $length == 1 || $length > 8 || !ctype_alnum($subtags[1])) {
|
||||||
return $new_string;
|
return $new_string;
|
||||||
}
|
}
|
||||||
if (!ctype_lower($subtags[1])) $subtags[1] = strotolower($subtags[1]);
|
if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]);
|
||||||
|
|
||||||
$new_string .= '-' . $subtags[1];
|
$new_string .= '-' . $subtags[1];
|
||||||
if ($num_subtags == 2) return $new_string;
|
if ($num_subtags == 2) return $new_string;
|
||||||
@@ -61,7 +61,7 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
|
|||||||
return $new_string;
|
return $new_string;
|
||||||
}
|
}
|
||||||
if (!ctype_lower($subtags[$i])) {
|
if (!ctype_lower($subtags[$i])) {
|
||||||
$subtags[$i] = strotolower($subtags[$i]);
|
$subtags[$i] = strtolower($subtags[$i]);
|
||||||
}
|
}
|
||||||
$new_string .= '-' . $subtags[$i];
|
$new_string .= '-' . $subtags[$i];
|
||||||
}
|
}
|
||||||
|
@@ -21,7 +21,7 @@ class HTMLPurifier_AttrTransform
|
|||||||
* Abstract: makes changes to the attributes dependent on multiple values.
|
* Abstract: makes changes to the attributes dependent on multiple values.
|
||||||
*
|
*
|
||||||
* @param $attr Assoc array of attributes, usually from
|
* @param $attr Assoc array of attributes, usually from
|
||||||
* HTMLPurifier_Token_Tag::$attributes
|
* HTMLPurifier_Token_Tag::$attr
|
||||||
* @param $config Mandatory HTMLPurifier_Config object.
|
* @param $config Mandatory HTMLPurifier_Config object.
|
||||||
* @param $context Mandatory HTMLPurifier_Context object
|
* @param $context Mandatory HTMLPurifier_Context object
|
||||||
* @returns Processed attribute array.
|
* @returns Processed attribute array.
|
||||||
|
@@ -44,6 +44,20 @@ class HTMLPurifier_Config
|
|||||||
$this->def = $definition; // keep a copy around for checking
|
$this->def = $definition; // keep a copy around for checking
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convenience constructor that creates a config object based on a mixed var
|
||||||
|
* @param mixed $config Variable that defines the state of the config
|
||||||
|
* object. Can be: a HTMLPurifier_Config() object or
|
||||||
|
* an array of directives based on loadArray().
|
||||||
|
* @return Configured HTMLPurifier_Config object
|
||||||
|
*/
|
||||||
|
function create($config) {
|
||||||
|
if (is_a($config, 'HTMLPurifier_Config')) return $config;
|
||||||
|
$ret = HTMLPurifier_Config::createDefault();
|
||||||
|
if (is_array($config)) $ret->loadArray($config);
|
||||||
|
return $ret;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convenience constructor that creates a default configuration object.
|
* Convenience constructor that creates a default configuration object.
|
||||||
* @return Default HTMLPurifier_Config object.
|
* @return Default HTMLPurifier_Config object.
|
||||||
|
@@ -104,14 +104,14 @@ class HTMLPurifier_Generator
|
|||||||
function generateFromToken($token) {
|
function generateFromToken($token) {
|
||||||
if (!isset($token->type)) return '';
|
if (!isset($token->type)) return '';
|
||||||
if ($token->type == 'start') {
|
if ($token->type == 'start') {
|
||||||
$attr = $this->generateAttributes($token->attributes);
|
$attr = $this->generateAttributes($token->attr);
|
||||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
|
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
|
||||||
|
|
||||||
} elseif ($token->type == 'end') {
|
} elseif ($token->type == 'end') {
|
||||||
return '</' . $token->name . '>';
|
return '</' . $token->name . '>';
|
||||||
|
|
||||||
} elseif ($token->type == 'empty') {
|
} elseif ($token->type == 'empty') {
|
||||||
$attr = $this->generateAttributes($token->attributes);
|
$attr = $this->generateAttributes($token->attr);
|
||||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr .
|
return '<' . $token->name . ($attr ? ' ' : '') . $attr .
|
||||||
( $this->_xhtml ? ' /': '' )
|
( $this->_xhtml ? ' /': '' )
|
||||||
. '>';
|
. '>';
|
||||||
|
@@ -72,7 +72,7 @@ HTMLPurifier_ConfigSchema::define(
|
|||||||
'can overload it with your own list of tags to allow. Note that this '.
|
'can overload it with your own list of tags to allow. Note that this '.
|
||||||
'method is subtractive: it does its job by taking away from HTML Purifier '.
|
'method is subtractive: it does its job by taking away from HTML Purifier '.
|
||||||
'usual feature set, so you cannot add a tag that HTML Purifier never '.
|
'usual feature set, so you cannot add a tag that HTML Purifier never '.
|
||||||
'supported in the first place (like embed). If you change this, you '.
|
'supported in the first place (like embed, form or head). If you change this, you '.
|
||||||
'probably also want to change %HTML.AllowedAttributes. '.
|
'probably also want to change %HTML.AllowedAttributes. '.
|
||||||
'<strong>Warning:</strong> If another directive conflicts with the '.
|
'<strong>Warning:</strong> If another directive conflicts with the '.
|
||||||
'elements here, <em>that</em> directive will win and override. '.
|
'elements here, <em>that</em> directive will win and override. '.
|
||||||
@@ -561,18 +561,18 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// %HTML.Allowed(Elements|Attributes) : cut non-allowed elements
|
// %HTML.Allowed(Elements|Attributes) : cut non-allowed elements
|
||||||
|
|
||||||
$allowed_elements = $config->get('HTML', 'AllowedElements');
|
$allowed_elements = $config->get('HTML', 'AllowedElements');
|
||||||
if (is_array($allowed_elements)) {
|
if (is_array($allowed_elements)) {
|
||||||
// $allowed_elements[$this->info_parent] = true; // allow parent element
|
|
||||||
foreach ($this->info as $name => $d) {
|
foreach ($this->info as $name => $d) {
|
||||||
if(!isset($allowed_elements[$name])) unset($this->info[$name]);
|
if(!isset($allowed_elements[$name])) unset($this->info[$name]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
$allowed_attributes = $config->get('HTML', 'AllowedAttributes');
|
$allowed_attributes = $config->get('HTML', 'AllowedAttributes');
|
||||||
if (is_array($allowed_attributes)) {
|
if (is_array($allowed_attributes)) {
|
||||||
foreach ($this->info_global_attr as $attr => $info) {
|
foreach ($this->info_global_attr as $attr_key => $info) {
|
||||||
if (!isset($allowed_attributes["*.$attr"])) {
|
if (!isset($allowed_attributes["*.$attr_key"])) {
|
||||||
unset($this->info_global_attr[$attr]);
|
unset($this->info_global_attr[$attr_key]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
foreach ($this->info as $tag => $info) {
|
foreach ($this->info as $tag => $info) {
|
||||||
|
@@ -143,18 +143,18 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
|||||||
)
|
)
|
||||||
);
|
);
|
||||||
if ($attribute_string) {
|
if ($attribute_string) {
|
||||||
$attributes = $this->parseAttributeString(
|
$attr = $this->parseAttributeString(
|
||||||
$attribute_string
|
$attribute_string
|
||||||
, $config, $context
|
, $config, $context
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
$attributes = array();
|
$attr = array();
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($is_self_closing) {
|
if ($is_self_closing) {
|
||||||
$array[] = new HTMLPurifier_Token_Empty($type, $attributes);
|
$array[] = new HTMLPurifier_Token_Empty($type, $attr);
|
||||||
} else {
|
} else {
|
||||||
$array[] = new HTMLPurifier_Token_Start($type, $attributes);
|
$array[] = new HTMLPurifier_Token_Start($type, $attr);
|
||||||
}
|
}
|
||||||
$cursor = $position_next_gt + 1;
|
$cursor = $position_next_gt + 1;
|
||||||
$inside_tag = false;
|
$inside_tag = false;
|
||||||
|
@@ -156,11 +156,14 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$child_def = $def->child;
|
if (!empty($def->child)) {
|
||||||
|
// have DTD child def validate children
|
||||||
// have DTD child def validate children
|
$result = $def->child->validateChildren(
|
||||||
$result = $child_def->validateChildren(
|
$child_tokens, $config, $context);
|
||||||
$child_tokens, $config, $context);
|
} else {
|
||||||
|
// weird, no child definition, get rid of everything
|
||||||
|
$result = false;
|
||||||
|
}
|
||||||
|
|
||||||
// determine whether or not this element has any exclusions
|
// determine whether or not this element has any exclusions
|
||||||
$excludes = $def->excludes;
|
$excludes = $def->excludes;
|
||||||
|
@@ -30,7 +30,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
$token->type == 'start' ) {
|
$token->type == 'start' ) {
|
||||||
|
|
||||||
$result[] = new HTMLPurifier_Token_Empty($token->name,
|
$result[] = new HTMLPurifier_Token_Empty($token->name,
|
||||||
$token->attributes);
|
$token->attr);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -39,7 +39,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
$token->type == 'empty' ) {
|
$token->type == 'empty' ) {
|
||||||
|
|
||||||
$result[] = new HTMLPurifier_Token_Start($token->name,
|
$result[] = new HTMLPurifier_Token_Start($token->name,
|
||||||
$token->attributes);
|
$token->attr);
|
||||||
$result[] = new HTMLPurifier_Token_End($token->name);
|
$result[] = new HTMLPurifier_Token_End($token->name);
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
|
@@ -38,7 +38,9 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
|||||||
// hard-coded image special case, pre-emptively drop
|
// hard-coded image special case, pre-emptively drop
|
||||||
// if not available. Probably not abstract-able
|
// if not available. Probably not abstract-able
|
||||||
if ( $token->name == 'img' ) {
|
if ( $token->name == 'img' ) {
|
||||||
if (!isset($token->attr['src'])) continue;
|
if (!isset($token->attr['src'])) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (!isset($definition->info['img']->attr['src'])) {
|
if (!isset($definition->info['img']->attr['src'])) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -46,7 +48,8 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
|||||||
$definition->
|
$definition->
|
||||||
info['img']->
|
info['img']->
|
||||||
attr['src']->
|
attr['src']->
|
||||||
validate($token->attr['src']);
|
validate($token->attr['src'],
|
||||||
|
$config, $context);
|
||||||
if ($token->attr['src'] === false) continue;
|
if ($token->attr['src'] === false) continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -35,7 +35,7 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
|
|||||||
if ($token->type !== 'start' && $token->type !== 'empty') continue;
|
if ($token->type !== 'start' && $token->type !== 'empty') continue;
|
||||||
|
|
||||||
// copy out attributes for easy manipulation
|
// copy out attributes for easy manipulation
|
||||||
$attr = $token->attributes;
|
$attr = $token->attr;
|
||||||
|
|
||||||
// do global transformations (pre)
|
// do global transformations (pre)
|
||||||
// nothing currently utilizes this
|
// nothing currently utilizes this
|
||||||
@@ -117,7 +117,7 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
|
|||||||
|
|
||||||
// commit changes
|
// commit changes
|
||||||
// could interfere with flyweight implementation
|
// could interfere with flyweight implementation
|
||||||
$tokens[$key]->attributes = $attr;
|
$tokens[$key]->attr = $attr;
|
||||||
}
|
}
|
||||||
$context->destroy('IDAccumulator');
|
$context->destroy('IDAccumulator');
|
||||||
|
|
||||||
|
@@ -62,16 +62,16 @@ class HTMLPurifier_TagTransform_Center extends HTMLPurifier_TagTransform
|
|||||||
$new_tag = new HTMLPurifier_Token_End($this->transform_to);
|
$new_tag = new HTMLPurifier_Token_End($this->transform_to);
|
||||||
return $new_tag;
|
return $new_tag;
|
||||||
}
|
}
|
||||||
$attributes = $tag->attributes;
|
$attr = $tag->attr;
|
||||||
$prepend_css = 'text-align:center;';
|
$prepend_css = 'text-align:center;';
|
||||||
if (isset($attributes['style'])) {
|
if (isset($attr['style'])) {
|
||||||
$attributes['style'] = $prepend_css . $attributes['style'];
|
$attr['style'] = $prepend_css . $attr['style'];
|
||||||
} else {
|
} else {
|
||||||
$attributes['style'] = $prepend_css;
|
$attr['style'] = $prepend_css;
|
||||||
}
|
}
|
||||||
$new_tag = $tag->copy();
|
$new_tag = $tag->copy();
|
||||||
$new_tag->name = $this->transform_to;
|
$new_tag->name = $this->transform_to;
|
||||||
$new_tag->attributes = $attributes;
|
$new_tag->attr = $attr;
|
||||||
return $new_tag;
|
return $new_tag;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -115,39 +115,39 @@ class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
|
|||||||
return $new_tag;
|
return $new_tag;
|
||||||
}
|
}
|
||||||
|
|
||||||
$attributes = $tag->attributes;
|
$attr = $tag->attr;
|
||||||
$prepend_style = '';
|
$prepend_style = '';
|
||||||
|
|
||||||
// handle color transform
|
// handle color transform
|
||||||
if (isset($attributes['color'])) {
|
if (isset($attr['color'])) {
|
||||||
$prepend_style .= 'color:' . $attributes['color'] . ';';
|
$prepend_style .= 'color:' . $attr['color'] . ';';
|
||||||
unset($attributes['color']);
|
unset($attr['color']);
|
||||||
}
|
}
|
||||||
|
|
||||||
// handle face transform
|
// handle face transform
|
||||||
if (isset($attributes['face'])) {
|
if (isset($attr['face'])) {
|
||||||
$prepend_style .= 'font-family:' . $attributes['face'] . ';';
|
$prepend_style .= 'font-family:' . $attr['face'] . ';';
|
||||||
unset($attributes['face']);
|
unset($attr['face']);
|
||||||
}
|
}
|
||||||
|
|
||||||
// handle size transform
|
// handle size transform
|
||||||
if (isset($attributes['size'])) {
|
if (isset($attr['size'])) {
|
||||||
if (isset($this->_size_lookup[$attributes['size']])) {
|
if (isset($this->_size_lookup[$attr['size']])) {
|
||||||
$prepend_style .= 'font-size:' .
|
$prepend_style .= 'font-size:' .
|
||||||
$this->_size_lookup[$attributes['size']] . ';';
|
$this->_size_lookup[$attr['size']] . ';';
|
||||||
}
|
}
|
||||||
unset($attributes['size']);
|
unset($attr['size']);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($prepend_style) {
|
if ($prepend_style) {
|
||||||
$attributes['style'] = isset($attributes['style']) ?
|
$attr['style'] = isset($attr['style']) ?
|
||||||
$prepend_style . $attributes['style'] :
|
$prepend_style . $attr['style'] :
|
||||||
$prepend_style;
|
$prepend_style;
|
||||||
}
|
}
|
||||||
|
|
||||||
$new_tag = $tag->copy();
|
$new_tag = $tag->copy();
|
||||||
$new_tag->name = $this->transform_to;
|
$new_tag->name = $this->transform_to;
|
||||||
$new_tag->attributes = $attributes;
|
$new_tag->attr = $attr;
|
||||||
|
|
||||||
return $new_tag;
|
return $new_tag;
|
||||||
|
|
||||||
|
@@ -50,30 +50,29 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
|
|||||||
/**
|
/**
|
||||||
* Associative array of the tag's attributes.
|
* Associative array of the tag's attributes.
|
||||||
*/
|
*/
|
||||||
var $attributes = array();
|
var $attr = array();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Non-overloaded constructor, which lower-cases passed tag name.
|
* Non-overloaded constructor, which lower-cases passed tag name.
|
||||||
*
|
*
|
||||||
* @param $name String name.
|
* @param $name String name.
|
||||||
* @param $attributes Associative array of attributes.
|
* @param $attr Associative array of attributes.
|
||||||
*/
|
*/
|
||||||
function HTMLPurifier_Token_Tag($name, $attributes = array()) {
|
function HTMLPurifier_Token_Tag($name, $attr = array()) {
|
||||||
//if ($attributes === null) var_dump(debug_backtrace());
|
|
||||||
$this->name = ctype_lower($name) ? $name : strtolower($name);
|
$this->name = ctype_lower($name) ? $name : strtolower($name);
|
||||||
foreach ($attributes as $key => $value) {
|
foreach ($attr as $key => $value) {
|
||||||
// normalization only necessary when key is not lowercase
|
// normalization only necessary when key is not lowercase
|
||||||
if (!ctype_lower($key)) {
|
if (!ctype_lower($key)) {
|
||||||
$new_key = strtolower($key);
|
$new_key = strtolower($key);
|
||||||
if (!isset($attributes[$new_key])) {
|
if (!isset($attr[$new_key])) {
|
||||||
$attributes[$new_key] = $attributes[$key];
|
$attr[$new_key] = $attr[$key];
|
||||||
}
|
}
|
||||||
if ($new_key !== $key) {
|
if ($new_key !== $key) {
|
||||||
unset($attributes[$key]);
|
unset($attr[$key]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
$this->attributes = $attributes;
|
$this->attr = $attr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -84,7 +83,7 @@ class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
|
|||||||
{
|
{
|
||||||
var $type = 'start';
|
var $type = 'start';
|
||||||
function copy() {
|
function copy() {
|
||||||
return new HTMLPurifier_Token_Start($this->name, $this->attributes);
|
return new HTMLPurifier_Token_Start($this->name, $this->attr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -95,7 +94,7 @@ class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
|
|||||||
{
|
{
|
||||||
var $type = 'empty';
|
var $type = 'empty';
|
||||||
function copy() {
|
function copy() {
|
||||||
return new HTMLPurifier_Token_Empty($this->name, $this->attributes);
|
return new HTMLPurifier_Token_Empty($this->name, $this->attr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -37,12 +37,12 @@ class HTMLPurifier_TokenFactory
|
|||||||
/**
|
/**
|
||||||
* Creates a HTMLPurifier_Token_Start.
|
* Creates a HTMLPurifier_Token_Start.
|
||||||
* @param $name Tag name
|
* @param $name Tag name
|
||||||
* @param $attribute Associative array of attributes
|
* @param $attr Associative array of attributes
|
||||||
* @return Generated HTMLPurifier_Token_Start
|
* @return Generated HTMLPurifier_Token_Start
|
||||||
*/
|
*/
|
||||||
public function createStart($name, $attributes = array()) {
|
public function createStart($name, $attr = array()) {
|
||||||
$p = clone $this->p_start;
|
$p = clone $this->p_start;
|
||||||
$p->HTMLPurifier_Token_Tag($name, $attributes);
|
$p->HTMLPurifier_Token_Tag($name, $attr);
|
||||||
return $p;
|
return $p;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -60,12 +60,12 @@ class HTMLPurifier_TokenFactory
|
|||||||
/**
|
/**
|
||||||
* Creates a HTMLPurifier_Token_Empty.
|
* Creates a HTMLPurifier_Token_Empty.
|
||||||
* @param $name Tag name
|
* @param $name Tag name
|
||||||
* @param $attribute Associative array of attributes
|
* @param $attr Associative array of attributes
|
||||||
* @return Generated HTMLPurifier_Token_Empty
|
* @return Generated HTMLPurifier_Token_Empty
|
||||||
*/
|
*/
|
||||||
public function createEmpty($name, $attributes = array()) {
|
public function createEmpty($name, $attr = array()) {
|
||||||
$p = clone $this->p_empty;
|
$p = clone $this->p_empty;
|
||||||
$p->HTMLPurifier_Token_Tag($name, $attributes);
|
$p->HTMLPurifier_Token_Tag($name, $attr);
|
||||||
return $p;
|
return $p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
40
smoketests/loadFunc.php
Normal file
40
smoketests/loadFunc.php
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
set_include_path('../library/' . PATH_SEPARATOR . get_include_path() );
|
||||||
|
|
||||||
|
header('Content-type: text/html; charset=UTF-8');
|
||||||
|
echo '<?xml version="1.0" encoding="UTF-8" ?>';
|
||||||
|
|
||||||
|
function printb($bool) {
|
||||||
|
echo '<strong>' . ($bool ? 'Pass' : 'Fail') . '</strong>';
|
||||||
|
}
|
||||||
|
|
||||||
|
function printEval($code) {
|
||||||
|
echo '<pre>' . htmlspecialchars($code) . '</pre>';
|
||||||
|
eval($code);
|
||||||
|
}
|
||||||
|
|
||||||
|
?><!DOCTYPE html
|
||||||
|
PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||||
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
||||||
|
<head>
|
||||||
|
<title>HTML Purifier Function Include Smoketest</title>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>HTML Purifier Function Include Smoketest</h1>
|
||||||
|
|
||||||
|
<p>Tests whether or not the includes are done properly and whether or
|
||||||
|
not the library is lazy loaded.</p>
|
||||||
|
|
||||||
|
<?php printEval("require_once 'HTMLPurifier.func.php';"); ?>
|
||||||
|
|
||||||
|
<p>HTMLPurifier class doesn't exist: <?php printb(!class_exists('HTMLPurifier')); ?></li></p>
|
||||||
|
|
||||||
|
<?php printEval("HTMLPurifier('foobar');"); ?>
|
||||||
|
|
||||||
|
<p>HTMLPurifier class exists: <?php printb(class_exists('HTMLPurifier')); ?></li></p>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
65
smoketests/preserveYouTube.php
Normal file
65
smoketests/preserveYouTube.php
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'common.php';
|
||||||
|
|
||||||
|
echo '<?xml version="1.0" encoding="UTF-8" ?>';
|
||||||
|
?><!DOCTYPE html
|
||||||
|
PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||||
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>HTML Purifier Preserve YouTube Smoketest</title>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>HTML Purifier Preserve YouTube Smoketest</h1>
|
||||||
|
<?php
|
||||||
|
|
||||||
|
class HTMLPurifierX_PreserveYouTube extends HTMLPurifier
|
||||||
|
{
|
||||||
|
function purify($html, $config = null) {
|
||||||
|
$pre_regex = '#<object[^>]+>.+?'.
|
||||||
|
'http://www.youtube.com/v/([A-Za-z0-9]+).+?</object>#';
|
||||||
|
$pre_replace = '<span class="youtube-embed">\1</span>';
|
||||||
|
$html = preg_replace($pre_regex, $pre_replace, $html);
|
||||||
|
$html = parent::purify($html, $config);
|
||||||
|
$post_regex = '#<span class="youtube-embed">([A-Za-z0-9]+)</span>#';
|
||||||
|
$post_replace = '<object width="425" height="350" '.
|
||||||
|
'data="http://www.youtube.com/v/\1">'.
|
||||||
|
'<param name="movie" value="http://www.youtube.com/v/\1"></param>'.
|
||||||
|
'<param name="wmode" value="transparent"></param>'.
|
||||||
|
'<!--[if IE]>'.
|
||||||
|
'<embed src="http://www.youtube.com/v/\1"'.
|
||||||
|
'type="application/x-shockwave-flash"'.
|
||||||
|
'wmode="transparent" width="425" height="350" />'.
|
||||||
|
'<![endif]-->'.
|
||||||
|
'</object>';
|
||||||
|
$html = preg_replace($post_regex, $post_replace, $html);
|
||||||
|
return $html;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$string = '<object width="425" height="350"><param name="movie" value="http://www.youtube.com/v/JzqumbhfxRo"></param><param name="wmode" value="transparent"></param><embed src="http://www.youtube.com/v/JzqumbhfxRo" type="application/x-shockwave-flash" wmode="transparent" width="425" height="350"></embed></object>';
|
||||||
|
|
||||||
|
$regular_purifier = new HTMLPurifier();
|
||||||
|
$youtube_purifier = new HTMLPurifierX_PreserveYouTube();
|
||||||
|
|
||||||
|
?>
|
||||||
|
<h2>Unpurified</h2>
|
||||||
|
<p><a href="?break">Click here to see the unpurified version (breaks validation).</a></p>
|
||||||
|
<div><?php
|
||||||
|
if (isset($_GET['break'])) echo $string;
|
||||||
|
?></div>
|
||||||
|
|
||||||
|
<h2>Without YouTube exception</h2>
|
||||||
|
<div><?php
|
||||||
|
echo $regular_purifier->purify($string);
|
||||||
|
?></div>
|
||||||
|
|
||||||
|
<h2>With YouTube exception</h2>
|
||||||
|
<div><?php
|
||||||
|
echo $youtube_purifier->purify($string);
|
||||||
|
?></div>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
@@ -36,6 +36,7 @@ echo '<?xml version="1.0" encoding="UTF-8" ?>';
|
|||||||
<style type="text/css">
|
<style type="text/css">
|
||||||
form table {margin:1em auto;}
|
form table {margin:1em auto;}
|
||||||
form th {text-align:right;padding-right:1em;}
|
form th {text-align:right;padding-right:1em;}
|
||||||
|
form .c {display:none;}
|
||||||
.HTMLPurifier_Printer table {border-collapse:collapse;
|
.HTMLPurifier_Printer table {border-collapse:collapse;
|
||||||
border:1px solid #000; width:600px;
|
border:1px solid #000; width:600px;
|
||||||
margin:1em auto;font-family:sans-serif;font-size:75%;}
|
margin:1em auto;font-family:sans-serif;font-size:75%;}
|
||||||
@@ -62,11 +63,9 @@ influences the internal workings of the definition objects.</p>
|
|||||||
|
|
||||||
<p>You can specify an array by typing in a comma-separated
|
<p>You can specify an array by typing in a comma-separated
|
||||||
list of items, HTML Purifier will take care of the rest (including
|
list of items, HTML Purifier will take care of the rest (including
|
||||||
transformation into a real array list or a lookup table). If a
|
transformation into a real array list or a lookup table).</p>
|
||||||
directive can be set to null, that usually means that the feature
|
|
||||||
is disabled when it is null (not that, say, no tags are allowed).</p>
|
|
||||||
|
|
||||||
<form id="edit-config" method="get" action="printDefinition.php">
|
<form id="edit-config" name="edit-config" method="get" action="printDefinition.php">
|
||||||
<table>
|
<table>
|
||||||
<?php
|
<?php
|
||||||
$directives = $config->getBatch('HTML');
|
$directives = $config->getBatch('HTML');
|
||||||
@@ -91,27 +90,31 @@ is disabled when it is null (not that, say, no tags are allowed).</p>
|
|||||||
<tr>
|
<tr>
|
||||||
<th>
|
<th>
|
||||||
<a href="http://hp.jpsband.org/live/configdoc/plain.html#<?php echo $directive ?>">
|
<a href="http://hp.jpsband.org/live/configdoc/plain.html#<?php echo $directive ?>">
|
||||||
%<?php echo $directive; ?>
|
<label for="<?php echo $directive; ?>">%<?php echo $directive; ?></label>
|
||||||
</a>
|
</a>
|
||||||
</th>
|
</th>
|
||||||
<td>
|
<td>
|
||||||
<?php if (is_bool($value)) { ?>
|
<?php if (is_bool($value)) { ?>
|
||||||
Yes <input type="radio" name="<?php echo $directive; ?>" value="1"<?php if ($value) { ?> checked="checked"<?php } ?> />
|
<label for="Yes_<?php echo $directive; ?>"><span class="c">%<?php echo $directive; ?>:</span> Yes</label>
|
||||||
No <input type="radio" name="<?php echo $directive; ?>" value="0"<?php if (!$value) { ?> checked="checked"<?php } ?> />
|
<input type="radio" name="<?php echo $directive; ?>" id="Yes_<?php echo $directive; ?>" value="1"<?php if ($value) { ?> checked="checked"<?php } ?> />
|
||||||
|
<label for="No_<?php echo $directive; ?>"><span class="c">%<?php echo $directive; ?>:</span> No</label>
|
||||||
|
<input type="radio" name="<?php echo $directive; ?>" id="No_<?php echo $directive; ?>" value="0"<?php if (!$value) { ?> checked="checked"<?php } ?> />
|
||||||
<?php } else { ?>
|
<?php } else { ?>
|
||||||
<?php if($allow_null) { ?>
|
<?php if($allow_null) { ?>
|
||||||
Null/Disabled <input
|
<label for="Null_<?php echo $directive; ?>"><span class="c">%<?php echo $directive; ?>:</span> Null/Disabled*</label>
|
||||||
type="checkbox"
|
<input
|
||||||
value="1"
|
type="checkbox"
|
||||||
onclick="toggleWriteability('<?php echo $directive ?>',checked)"
|
value="1"
|
||||||
name="Null_<?php echo $directive; ?>"
|
onclick="toggleWriteability('<?php echo $directive ?>',checked)"
|
||||||
<?php if ($value === null) { ?> checked="checked"<?php } ?>
|
name="Null_<?php echo $directive; ?>"
|
||||||
/> or <br />
|
id="Null_<?php echo $directive; ?>"
|
||||||
|
<?php if ($value === null) { ?> checked="checked"<?php } ?>
|
||||||
|
/> or <br />
|
||||||
<?php } ?>
|
<?php } ?>
|
||||||
<input
|
<input
|
||||||
type="text"
|
type="text"
|
||||||
id="<?php echo $directive; ?>"
|
|
||||||
name="<?php echo $directive; ?>"
|
name="<?php echo $directive; ?>"
|
||||||
|
id="<?php echo $directive; ?>"
|
||||||
value="<?php echo escapeHTML($value); ?>"
|
value="<?php echo escapeHTML($value); ?>"
|
||||||
<?php if($value === null) {echo 'disabled="disabled"';} ?>
|
<?php if($value === null) {echo 'disabled="disabled"';} ?>
|
||||||
/>
|
/>
|
||||||
@@ -128,6 +131,10 @@ is disabled when it is null (not that, say, no tags are allowed).</p>
|
|||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
</table>
|
</table>
|
||||||
|
<p>* Some configuration directives make a distinction between an empty
|
||||||
|
variable and a null variable. A whitelist, for example, will take an
|
||||||
|
empty array as meaning <em>no</em> allowed elements, while checking
|
||||||
|
Null/Disabled will mean that user whitelisting functionality is disabled.</p>
|
||||||
</form>
|
</form>
|
||||||
<h2>HTMLDefinition</h2>
|
<h2>HTMLDefinition</h2>
|
||||||
<?php echo $printer_html_definition->render($config) ?>
|
<?php echo $printer_html_definition->render($config) ?>
|
||||||
|
@@ -17,6 +17,9 @@ class HTMLPurifier_AttrDef_LangTest extends HTMLPurifier_AttrDefHarness
|
|||||||
$this->assertDef(' en ', 'en'); // trim
|
$this->assertDef(' en ', 'en'); // trim
|
||||||
$this->assertDef('EN', 'en'); // case insensitivity
|
$this->assertDef('EN', 'en'); // case insensitivity
|
||||||
|
|
||||||
|
// (thanks Eugen Pankratz for noticing the typos!)
|
||||||
|
$this->assertDef('En-Us-Edison', 'en-us-edison'); // complex ci
|
||||||
|
|
||||||
$this->assertDef('fr en', false); // multiple languages
|
$this->assertDef('fr en', false); // multiple languages
|
||||||
$this->assertDef('%', false); // bad character
|
$this->assertDef('%', false); // bad character
|
||||||
|
|
||||||
@@ -26,7 +29,7 @@ class HTMLPurifier_AttrDef_LangTest extends HTMLPurifier_AttrDefHarness
|
|||||||
// primary subtag rules
|
// primary subtag rules
|
||||||
// I'm somewhat hesitant to allow x and i as primary language codes,
|
// I'm somewhat hesitant to allow x and i as primary language codes,
|
||||||
// because they usually are never used in real life. However,
|
// because they usually are never used in real life. However,
|
||||||
// theoretically speaking, having them alone is permissble, so
|
// theoretically speaking, having them alone is permissable, so
|
||||||
// I'll be lenient. No XML parser is going to complain anyway.
|
// I'll be lenient. No XML parser is going to complain anyway.
|
||||||
$this->assertDef('x');
|
$this->assertDef('x');
|
||||||
$this->assertDef('i');
|
$this->assertDef('i');
|
||||||
|
@@ -180,6 +180,25 @@ class HTMLPurifier_ConfigTest extends UnitTestCase
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function test_create() {
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::defineNamespace('Cake', 'Properties of it.');
|
||||||
|
HTMLPurifier_ConfigSchema::define('Cake', 'Sprinkles', 666, 'int', 'Number of.');
|
||||||
|
HTMLPurifier_ConfigSchema::define('Cake', 'Flavor', 'vanilla', 'string', 'Flavor of the batter.');
|
||||||
|
|
||||||
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
$config->set('Cake', 'Sprinkles', 42);
|
||||||
|
|
||||||
|
// test flat pass-through
|
||||||
|
$created_config = HTMLPurifier_Config::create($config);
|
||||||
|
$this->assertEqual($config, $created_config);
|
||||||
|
|
||||||
|
// test loadArray
|
||||||
|
$created_config = HTMLPurifier_Config::create(array('Cake.Sprinkles' => 42));
|
||||||
|
$this->assertEqual($config, $created_config);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@@ -42,12 +42,15 @@ class HTMLPurifier_Strategy_RemoveForeignElementsTest
|
|||||||
' Warning!</span>'
|
' Warning!</span>'
|
||||||
);
|
);
|
||||||
|
|
||||||
// test removal of img tag
|
// test removal of invalid img tag
|
||||||
$this->assertResult(
|
$this->assertResult(
|
||||||
'<img />',
|
'<img />',
|
||||||
''
|
''
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// test preservation of valid img tag
|
||||||
|
$this->assertResult('<img src="foobar.gif" />');
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@@ -25,7 +25,7 @@ class HTMLPurifier_Test extends UnitTestCase
|
|||||||
function testStrict() {
|
function testStrict() {
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
$config->set('HTML', 'Strict', true);
|
$config->set('HTML', 'Strict', true);
|
||||||
$this->purifier = new HTMLPurifier($config);
|
$this->purifier = new HTMLPurifier( $config ); // verbose syntax
|
||||||
|
|
||||||
$this->assertPurification(
|
$this->assertPurification(
|
||||||
'<u>Illegal underline</u>',
|
'<u>Illegal underline</u>',
|
||||||
@@ -40,10 +40,11 @@ class HTMLPurifier_Test extends UnitTestCase
|
|||||||
}
|
}
|
||||||
|
|
||||||
function testDifferentAllowedElements() {
|
function testDifferentAllowedElements() {
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
|
||||||
$config->set('HTML', 'AllowedElements', array('b', 'i', 'p', 'a'));
|
$this->purifier = new HTMLPurifier(array(
|
||||||
$config->set('HTML', 'AllowedAttributes', array('a.href', '*.id'));
|
'HTML.AllowedElements' => array('b', 'i', 'p', 'a'),
|
||||||
$this->purifier = new HTMLPurifier($config);
|
'HTML.AllowedAttributes' => array('a.href', '*.id')
|
||||||
|
));
|
||||||
|
|
||||||
$this->assertPurification(
|
$this->assertPurification(
|
||||||
'<p>Par.</p><p>Para<a href="http://google.com/">gr</a>aph</p>Text<b>Bol<i>d</i></b>'
|
'<p>Par.</p><p>Para<a href="http://google.com/">gr</a>aph</p>Text<b>Bol<i>d</i></b>'
|
||||||
@@ -58,9 +59,7 @@ class HTMLPurifier_Test extends UnitTestCase
|
|||||||
|
|
||||||
function testDisableURI() {
|
function testDisableURI() {
|
||||||
|
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
$this->purifier = new HTMLPurifier( array('Attr.DisableURI' => true) );
|
||||||
$config->set('Attr', 'DisableURI', true);
|
|
||||||
$this->purifier = new HTMLPurifier($config);
|
|
||||||
|
|
||||||
$this->assertPurification(
|
$this->assertPurification(
|
||||||
'<img src="foobar"/>',
|
'<img src="foobar"/>',
|
||||||
@@ -69,6 +68,21 @@ class HTMLPurifier_Test extends UnitTestCase
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function test_purifyArray() {
|
||||||
|
|
||||||
|
$this->purifier = new HTMLPurifier();
|
||||||
|
|
||||||
|
$this->assertEqual(
|
||||||
|
$this->purifier->purifyArray(
|
||||||
|
array('Good', '<b>Sketchy', 'foo' => '<script>bad</script>')
|
||||||
|
),
|
||||||
|
array('Good', '<b>Sketchy</b>', 'foo' => 'bad')
|
||||||
|
);
|
||||||
|
|
||||||
|
$this->assertIsA($this->purifier->context, 'array');
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@@ -5,15 +5,15 @@ require_once 'HTMLPurifier/Token.php';
|
|||||||
class HTMLPurifier_TokenTest extends UnitTestCase
|
class HTMLPurifier_TokenTest extends UnitTestCase
|
||||||
{
|
{
|
||||||
|
|
||||||
function assertTokenConstruction($name, $attributes,
|
function assertTokenConstruction($name, $attr,
|
||||||
$expect_name = null, $expect_attributes = null
|
$expect_name = null, $expect_attr = null
|
||||||
) {
|
) {
|
||||||
if ($expect_name === null) $expect_name = $name;
|
if ($expect_name === null) $expect_name = $name;
|
||||||
if ($expect_attributes === null) $expect_attributes = $attributes;
|
if ($expect_attr === null) $expect_attr = $attr;
|
||||||
$token = new HTMLPurifier_Token_Start($name, $attributes);
|
$token = new HTMLPurifier_Token_Start($name, $attr);
|
||||||
|
|
||||||
$this->assertEqual($expect_name, $token->name);
|
$this->assertEqual($expect_name, $token->name);
|
||||||
$this->assertEqual($expect_attributes, $token->attributes);
|
$this->assertEqual($expect_attr, $token->attr);
|
||||||
}
|
}
|
||||||
|
|
||||||
function testConstruct() {
|
function testConstruct() {
|
||||||
|
Reference in New Issue
Block a user