mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-03 20:58:11 +02:00
Compare commits
2 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
3af239c70f | ||
|
c6d5016626 |
2
Doxyfile
2
Doxyfile
@@ -4,7 +4,7 @@
|
||||
# Project related configuration options
|
||||
#---------------------------------------------------------------------------
|
||||
PROJECT_NAME = HTML Purifier
|
||||
PROJECT_NUMBER = 1.3.2
|
||||
PROJECT_NUMBER = 1.2.0
|
||||
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
|
||||
CREATE_SUBDIRS = NO
|
||||
OUTPUT_LANGUAGE = English
|
||||
|
51
NEWS
51
NEWS
@@ -9,57 +9,6 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
. Internal change
|
||||
==========================
|
||||
|
||||
1.3.2, released 2006-12-25
|
||||
! HTMLPurifier object now accepts configuration arrays, no need to manually
|
||||
instantiate a configuration object
|
||||
! Context object now accessible to outside
|
||||
! Added enduser-youtube.html, explains how to embed YouTube videos. See
|
||||
also corresponding smoketest preserveYouTube.php.
|
||||
! Added purifyArray(), which takes a list of HTML and purifies it all
|
||||
! Added static member variable $version to HTML Purifier with PHP-compatible
|
||||
version number string.
|
||||
- Fixed fatal error thrown by upper-cased language attributes
|
||||
- printDefinition.php: added labels, added better clarification
|
||||
. HTMLPurifier_Config::create() added, takes mixed variable and converts into
|
||||
a HTMLPurifier_Config object.
|
||||
|
||||
1.3.1, released 2006-12-06
|
||||
! Added HTMLPurifier.func.php stub for a convenient function to call the library
|
||||
- Fixed bug in RemoveInvalidImg code that caused all images to be dropped
|
||||
(thanks to .mario for reporting this)
|
||||
. Standardized all attribute handling variables to attr, made it plural
|
||||
|
||||
1.3.0, released 2006-11-26
|
||||
# Invalid images are now removed, rather than replaced with a dud
|
||||
<img src="" alt="Invalid image" />. Previous behavior can be restored
|
||||
with new directive %Core.RemoveInvalidImg set to false.
|
||||
! (X)HTML Strict now supported
|
||||
+ Transparently handles inline elements in block context (blockquote)
|
||||
! Added GET method to demo for easier validation, added 50kb max input size
|
||||
! New directive %HTML.BlockWrapper, for block-ifying inline elements
|
||||
! New directive %HTML.Parent, allows you to only allow inline content
|
||||
! New directives %HTML.AllowedElements and %HTML.AllowedAttributes to let
|
||||
users narrow the set of allowed tags
|
||||
! <li value="4"> and <ul start="2"> now allowed in loose mode
|
||||
! New directives %URI.DisableExternalResources and %URI.DisableResources
|
||||
! New directive %Attr.DisableURI, which eliminates all hyperlinking
|
||||
! New directive %URI.Munge, munges URI so you can use some sort of redirector
|
||||
service to avoid PageRank leaks or warn users that they are exiting your site.
|
||||
! Added spiffy new smoketest printDefinition.php, which lets you twiddle with
|
||||
the configuration settings and see how the internal rules are affected.
|
||||
! New directive %URI.HostBlacklist for blocking links to bad hosts.
|
||||
xssAttacks.php smoketest updated accordingly.
|
||||
- Added missing type to ChildDef_Chameleon
|
||||
- Remove Tidy option from demo if there is not Tidy available
|
||||
. ChildDef_Required guards against empty tags
|
||||
. Lookup table HTMLDefinition->info_flow_elements added
|
||||
. Added peace-of-mind variable initialization to Strategy_FixNesting
|
||||
. Added HTMLPurifier->info_parent_def, parent child processing made special
|
||||
. Added internal documents briefly summarizing future progression of HTML
|
||||
. HTMLPurifier_Config->getBatch($namespace) added
|
||||
. More lenient casting to bool from string in HTMLPurifier_ConfigSchema
|
||||
. Refactored ChildDef classes into their own files
|
||||
|
||||
1.2.0, released 2006-11-19
|
||||
# ID attributes now disabled by default. New directives:
|
||||
+ %HTML.EnableAttrID - restores old behavior by allowing IDs
|
||||
|
57
TODO
57
TODO
@@ -1,62 +1,45 @@
|
||||
|
||||
TODO List
|
||||
|
||||
= KEY ====================
|
||||
# Flagship
|
||||
- Regular
|
||||
? At-risk
|
||||
==========================
|
||||
1.3 release
|
||||
- Make URI validation routines tighter (especially mailto)
|
||||
- More extensive URI filtering schemes (see docs/proposal-new-directives.txt)
|
||||
- Allow for background-image and list-style-image (see above)
|
||||
- Error logging for filtering/cleanup procedures
|
||||
- Rich set* methods and config file loaders for HTMLPurifier_Config
|
||||
|
||||
1.4 release
|
||||
# More extensive URI filtering schemes (see docs/proposal-new-directives.txt)
|
||||
# Allow for background-image and list-style-image (intrinsically tied to above)
|
||||
# Add hooks for custom behavior (for instance, YouTube preservation)
|
||||
- Aggressive caching
|
||||
? Rich set* methods and config file loaders for HTMLPurifier_Config
|
||||
? Configuration profiles: sets of directives that get set with one func call
|
||||
? ConfigSchema directive aliases (so we can rename some of them)
|
||||
? URI validation routines tighter (see docs/dev-code-quality.html) (COMPLEX)
|
||||
|
||||
1.5 release
|
||||
# Error logging for filtering/cleanup procedures
|
||||
- Requires I18N facilities to be created first (COMPLEX)
|
||||
|
||||
1.6 release
|
||||
# Add pre-packaged "levels" of cleaning (custom behavior already done)
|
||||
- Add various "levels" of cleaning
|
||||
- Related: Allow strict (X)HTML
|
||||
- More fine-grained control over escaping behavior
|
||||
- Silently drop content inbetween SCRIPT tags (can be generalized to allow
|
||||
specification of elements that, when detected as foreign, trigger removal
|
||||
of children, although unbalanced tags could wreck havoc (or at least
|
||||
delete the rest of the document)).
|
||||
|
||||
1.7 release
|
||||
# Additional support for poorly written HTML
|
||||
- Implement all non-essential attribute transforms (BIG!)
|
||||
- Microsoft Word HTML cleaning (i.e. MsoNormal, but research essential!)
|
||||
- Friendly strict handling of <address> (block -> <br>)
|
||||
1.5 release
|
||||
- Additional support for poorly written HTML
|
||||
- Implement all non-essential attribute transforms
|
||||
- Microsoft Word HTML cleaning (i.e. MsoNormal)
|
||||
|
||||
2.0 release
|
||||
# Formatters for plaintext (COMPLEX)
|
||||
- Formatters for plaintext
|
||||
- Auto-paragraphing (be sure to leverage fact that we know when things
|
||||
shouldn't be paragraphed, such as lists and tables).
|
||||
- Linkify URLs
|
||||
- Smileys
|
||||
- Linkification for HTML Purifier docs: notably configuration and classes
|
||||
- Linkification for HTML Purifier docs: notably configuration and
|
||||
class names
|
||||
|
||||
3.0 release
|
||||
- Extended HTML capabilities based on namespacing and tag transforms (COMPLEX)
|
||||
- Extended HTML capabilities based on namespacing and tag transforms
|
||||
- Hooks for adding custom processors to custom namespaced tags and
|
||||
attributes, offer default implementation
|
||||
- Lots of documentation and samples
|
||||
- XHTML 1.1 support
|
||||
|
||||
Ongoing
|
||||
- Lots of profiling, make it faster!
|
||||
- Plugins for major CMSes (COMPLEX)
|
||||
- Drupal
|
||||
- WordPress
|
||||
- eFiction
|
||||
- more! (look for ones that use WYSIWYGs)
|
||||
- Plugins for major CMSes (very tricky issue)
|
||||
|
||||
Unknown release (on a scratch-an-itch basis)
|
||||
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
||||
@@ -84,12 +67,6 @@ Requested
|
||||
3. Extend the tag exclusion system to specify whether or not the
|
||||
contents should be dropped or not (currently, there's code that could do
|
||||
something like this if it didn't drop the inner text too.)
|
||||
- More user-friendly warnings when %HTML.Allow* attempts to specify a
|
||||
tag or attribute that is not supported
|
||||
- Allow specifying global attributes on a tag-by-tag basis in
|
||||
%HTML.AllowAttributes
|
||||
- Parse TinyMCE whitelist into our %HTML.Allow* whitelists
|
||||
- XSS-attempt detection
|
||||
|
||||
Wontfix
|
||||
- Non-lossy smart alternate character encoding transformations (unless
|
||||
|
@@ -22,8 +22,6 @@ of code that should be aggressively refactored. This does not list
|
||||
optimization issues, that needs to be done after intense profiling.</p>
|
||||
|
||||
<pre>
|
||||
docs/examples/demo.php - ad hoc HTML/PHP soup to the extreme
|
||||
|
||||
AttrDef
|
||||
Class - doesn't support Unicode characters (fringe); uses regular
|
||||
expressions
|
||||
@@ -34,8 +32,7 @@ AttrDef
|
||||
Number - constructor interface inconsistent with Integer
|
||||
ConfigSchema - redefinition is a mess
|
||||
Strategy
|
||||
FixNesting - cannot bubble nodes out of structures, duplicated checks
|
||||
for special-case parent node
|
||||
FixNesting - cannot bubble nodes out of structures
|
||||
MakeWellFormed - insufficient automatic closing definitions (check HTML
|
||||
spec for optional end tags, also, closing based on type (block/inline)
|
||||
might be efficient).
|
||||
|
@@ -54,9 +54,8 @@ help you find the correct functionality more quickly. Here they are:</p>
|
||||
abbreviated version is more readable than the full version. Here, we
|
||||
list common abbreviations:
|
||||
<ul>
|
||||
<li>Attr to Attributes (note that it is plural, i.e. <code>$attr = array()</code>)</li>
|
||||
<li>Attr(s) to Attribute(s)</li>
|
||||
<li>Def to Definition</li>
|
||||
<li><code>$ret</code> is the value to be returned in a function</li>
|
||||
</ul>
|
||||
</dd>
|
||||
|
||||
|
@@ -128,20 +128,19 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="2">Absolute positioning, unknown release milestone</th></tr>
|
||||
<tr class="danger impl-no"><td>bottom</td><td rowspan="4">Dangerous, must be non-negative to even be considered,
|
||||
but it's still possible to arbitrarily position by running over.</td></tr>
|
||||
<tr class="danger impl-no"><td>left</td></tr>
|
||||
<tr class="danger impl-no"><td>right</td></tr>
|
||||
<tr class="danger impl-no"><td>top</td></tr>
|
||||
<tr class="impl-no"><td>clip</td><td>-</td></tr>
|
||||
<tr class="danger impl-no"><td>position</td><td>ENUM(static, relative, absolute, fixed)
|
||||
<tr class="danger"><td>bottom</td><td rowspan="4">Dangerous, must be non-negative</td></tr>
|
||||
<tr class="danger"><td>left</td></tr>
|
||||
<tr class="danger"><td>right</td></tr>
|
||||
<tr class="danger"><td>top</td></tr>
|
||||
<tr><td>clip</td><td>-</td></tr>
|
||||
<tr class="danger"><td>position</td><td>ENUM(static, relative, absolute, fixed), permit
|
||||
relative not absolute?</td></tr>
|
||||
<tr class="danger impl-no"><td>z-index</td><td>Dangerous</td></tr>
|
||||
<tr class="danger"><td>z-index</td><td>Dangerous</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="2">Unknown</th></tr>
|
||||
<tr class="danger css1"><td>background-image</td><td>Dangerous, target milestone 1.3</td></tr>
|
||||
<tr class="danger css1"><td>background-image</td><td>Dangerous, target milestone 1.2</td></tr>
|
||||
<tr class="css1"><td>background-attachment</td><td>ENUM(scroll, fixed),
|
||||
Depends on background-image</td></tr>
|
||||
<tr class="css1"><td>background-position</td><td>Depends on background-image</td></tr>
|
||||
@@ -151,7 +150,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
||||
inline-block has incomplete IE6 support and requires -moz-inline-box
|
||||
for Mozilla. Unknown target milestone.</td></tr>
|
||||
<tr><td class="css1">height</td><td>Interesting, why use it? Unknown target milestone.</td></tr>
|
||||
<tr class="danger css1"><td>list-style-image</td><td>Dangerous? Target milestone 1.3</td></tr>
|
||||
<tr class="danger css1"><td>list-style-image</td><td>Dangerous? Target milestone 1.2</td></tr>
|
||||
<tr class="impl-no"><td>max-height</td><td rowspan="4">No IE 5/6</td></tr>
|
||||
<tr class="impl-no"><td>min-height</td></tr>
|
||||
<tr class="impl-no"><td>max-width</td></tr>
|
||||
@@ -237,7 +236,7 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
||||
<tr><th colspan="3">Questionable</th></tr>
|
||||
<tr class="impl-no"><td>accesskey</td><td>A</td><td>May interfere with main interface</td></tr>
|
||||
<tr class="impl-no"><td>tabindex</td><td>A</td><td>May interfere with main interface</td></tr>
|
||||
<tr><td>target</td><td>A</td><td>Config enabled, only useful for frame layouts, disallowed in strict</td></tr>
|
||||
<tr><td>target</td><td>A</td><td>Config enabled, only useful for frame layouts</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
@@ -284,11 +283,11 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
||||
<tr><td>nowrap</td><td>TD, TH</td><td>Boolean, style 'white-space:nowrap;' (not compat with IE5)</td></tr>
|
||||
<tr><td>size</td><td>HR</td><td>Near-equiv 'width', needs px suffix if original was pixels</td></tr>
|
||||
<tr class="required impl-yes"><td>src</td><td>IMG</td><td>Required, insert blank or default img if not set</td></tr>
|
||||
<tr class="impl-yes"><td>start</td><td>OL</td><td>Poorly supported 'counter-reset', allowed in loose, dropped in strict</td></tr>
|
||||
<tr><td>start</td><td>OL</td><td>Poorly supported 'counter-reset', transform may not be desirable</td></tr>
|
||||
<tr><td rowspan="3">type</td><td>LI</td><td rowspan="3">Equivalent style 'list-style-type', different allowed values though. (needs testing)</td></tr>
|
||||
<tr><td>OL</td></tr>
|
||||
<tr><td>UL</td></tr>
|
||||
<tr class="impl-yes"><td>value</td><td>LI</td><td>Poorly supported 'counter-reset', allowed in loose, dropped in strict</td></tr>
|
||||
<tr><td>value</td><td>LI</td><td>Poorly supported 'counter-reset', transform may not be desirable, see ol.start. Configurable.</td></tr>
|
||||
<tr><td>vspace</td><td>IMG</td><td>Near-equiv styles 'margin-left' and 'margin-right', needs px suffix, see hspace</td></tr>
|
||||
<tr><td rowspan="2">width</td><td>HR</td><td rowspan="2">Near-equiv style 'width', needs px suffix if original was pixels</td></tr>
|
||||
<tr><td>TD, TH</td></tr>
|
||||
|
@@ -1,179 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Explains how to safely allow the embedding of flash from trusted sites in HTML Purifier." />
|
||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||
|
||||
<title>Embedding YouTube Videos - HTML Purifier</title>
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1 class="subtitled">Embedding YouTube Videos</h1>
|
||||
<div class="subtitle">...as well as other dangerous active content</div>
|
||||
|
||||
<div id="filing">Filed under End-User</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
|
||||
<p>Clients like their YouTube videos. It gives them a warm fuzzy feeling when
|
||||
they see a neat little embedded video player on their websites that can play
|
||||
the latest clips from their documentary "Fido and the Bones of Spring".
|
||||
All joking aside, the ability to embed YouTube videos or other active
|
||||
content in their pages is something that a lot of people like.</p>
|
||||
|
||||
<p>This is a <em>bad</em> idea. The moment you embed anything untrusted,
|
||||
you will definitely be slammed by a manner of nasties that can be
|
||||
embedded in things from your run of the mill Flash movie to
|
||||
<a href="http://blog.spywareguide.com/2006/12/myspace_phish_attack_leads_use.html">Quicktime movies</a>.
|
||||
Even <code>img</code> tags, which HTML Purifier allows by default, can be
|
||||
dangerous. Be distrustful of anything that tells a browser to load content
|
||||
from another website automatically.</p>
|
||||
|
||||
<p>Luckily for us, however, whitelisting saves the day. Sure, letting users
|
||||
include any old random flash file could be dangerous, but if it's
|
||||
from a specific website, it probably is okay. If no amount of pleading will
|
||||
convince the people upstairs that they should just settle with just linking
|
||||
to their movies, you may find this technique very useful.</p>
|
||||
|
||||
<h2>Sample</h2>
|
||||
|
||||
<p>Below is custom code that allows users to embed
|
||||
YouTube videos. This is not favoritism: this trick can easily be adapted for
|
||||
other forms of embeddable content.</p>
|
||||
|
||||
<p>Usually, websites like YouTube give us boilerplate code that you can insert
|
||||
into your documents. YouTube's code goes like this:</p>
|
||||
|
||||
<pre>
|
||||
<object width="425" height="350">
|
||||
<param name="movie" value="http://www.youtube.com/v/AyPzM5WK8ys" />
|
||||
<param name="wmode" value="transparent" />
|
||||
<embed src="http://www.youtube.com/v/AyPzM5WK8ys"
|
||||
type="application/x-shockwave-flash"
|
||||
wmode="transparent" width="425" height="350" />
|
||||
</object>
|
||||
</pre>
|
||||
|
||||
<p>There are two things to note about this code:</p>
|
||||
|
||||
<ol>
|
||||
<li><code><embed></code> is not recognized by W3C, so if you want
|
||||
standards-compliant code, you'll have to get rid of it.</li>
|
||||
<li>The code is exactly the same for all instances, except for the
|
||||
identifier <tt>AyPzM5WK8ys</tt> which tells us which movie file
|
||||
to retrieve.</li>
|
||||
</ol>
|
||||
|
||||
<p>What point 2 means is that if we have code like <code><span
|
||||
class="embed-youtube">AyPzM5WK8ys</span></code> your
|
||||
application can reconstruct the full object from this small snippet that
|
||||
passes through HTML Purifier <em>unharmed</em>.</p>
|
||||
|
||||
<pre>
|
||||
<?php
|
||||
|
||||
class HTMLPurifierX_PreserveYouTube extends HTMLPurifier
|
||||
{
|
||||
function purify($html, $config = null) {
|
||||
$pre_regex = '#<object[^>]+>.+?'.
|
||||
'http://www.youtube.com/v/([A-Za-z0-9]+).+?</object>#';
|
||||
$pre_replace = '<span class="youtube-embed">\1</span>';
|
||||
$html = preg_replace($pre_regex, $pre_replace, $html);
|
||||
$html = parent::purify($html, $config);
|
||||
$post_regex = '#<span class="youtube-embed">([A-Za-z0-9]+)</span>#';
|
||||
$post_replace = '<object width="425" height="350" '.
|
||||
'data="http://www.youtube.com/v/\1">'.
|
||||
'<param name="movie" value="http://www.youtube.com/v/\1"></param>'.
|
||||
'<param name="wmode" value="transparent"></param>'.
|
||||
'<!--[if IE]>'.
|
||||
'<embed src="http://www.youtube.com/v/\1"'.
|
||||
'type="application/x-shockwave-flash"'.
|
||||
'wmode="transparent" width="425" height="350" />'.
|
||||
'<![endif]-->'.
|
||||
'</object>';
|
||||
$html = preg_replace($post_regex, $post_replace, $html);
|
||||
return $html;
|
||||
}
|
||||
}
|
||||
|
||||
$purifier = new HTMLPurifierX_PreserveYouTube();
|
||||
$html_still_with_youtube = $purifier->purify($html_with_youtube);
|
||||
|
||||
?>
|
||||
</pre>
|
||||
|
||||
<p>There is a bit going on here, so let's explain.</p>
|
||||
|
||||
<ol>
|
||||
<li>The class uses the prefix <code>HTMLPurifierX</code> because it's
|
||||
userspace code. Don't use <code>HTMLPurifier</code> in front of your
|
||||
class, since it might clobber another class in the library.</li>
|
||||
<li>In order to keep the interface compatible, we've extended HTMLPurifier
|
||||
into a new class that preserves the YouTube videos. This means that
|
||||
all you have to do is replace all instances of
|
||||
<code>new HTMLPurifier</code> to <code>new
|
||||
HTMLPurifierX_PreserveYouTube</code>. There's other ways to go about
|
||||
doing this: if you were calling a function that wrapped HTML Purifier,
|
||||
you could paste the PHP right there. If you wanted to be really
|
||||
fancy, you could make a decorator for HTMLPurifier.</li>
|
||||
<li>The first preg_replace call replaces any YouTube code users may have
|
||||
embedded into the benign span tag. Span is used because it is inline,
|
||||
and objects are inline too. We are very careful to be extremely
|
||||
restrictive on what goes inside the span tag, as if an errant code
|
||||
gets in there it could get messy.</li>
|
||||
<li>The HTML is then purified as usual.</li>
|
||||
<li>Then, another preg_replace replaces the span tag with a fully fledged
|
||||
object. Note that the embed is removed, and, in its place, a data
|
||||
attribute was added to the object. This makes the tag standards
|
||||
compliant! It also breaks Internet Explorer, so we add in a bit of
|
||||
conditional comments with the old embed code to make it work again.
|
||||
It's all quite convoluted but works.</li>
|
||||
</ol>
|
||||
|
||||
<h2>Warning</h2>
|
||||
|
||||
<p>There are a number of possible problems with the code above, depending
|
||||
on how you look at it.</p>
|
||||
|
||||
<h3>Cannot change width and height</h3>
|
||||
|
||||
<p>The width and height of the final YouTube movie cannot be adjusted. This
|
||||
is because I am lazy. If you really insist on letting users change the size
|
||||
of the movie, what you need to do is package up the attributes inside the
|
||||
span tag (along with the movie ID). It gets complicated though: a malicious
|
||||
user can specify an outrageously large height and width and attempt to crash
|
||||
the user's operating system/browser. You need to either cap it by limiting
|
||||
the amount of digits allowed in the regex or using a callback to check the
|
||||
number.</p>
|
||||
|
||||
<h3>Trusts media's host's security</h3>
|
||||
|
||||
<p>By allowing this code onto our website, we are trusting that YouTube has
|
||||
tech-savvy enough people not to allow their users to inject malicious
|
||||
code into the Flash files. An exploit on YouTube means an exploit on your
|
||||
site. Even though YouTube is run by the reputable Google, it
|
||||
<a href="http://ha.ckers.org/blog/20061213/google-xss-vuln/">doesn't</a>
|
||||
mean they are
|
||||
<a href="http://ha.ckers.org/blog/20061208/xss-in-googles-orkut/">invulnerable.</a>
|
||||
You're putting a certain measure of the job on an external provider (just as
|
||||
you have by entrusting your user input to HTML Purifier), and
|
||||
it is important that you are cognizant of the risk.</p>
|
||||
|
||||
<h3>Poorly written adaptations compromise security</h3>
|
||||
|
||||
<p>This should go without saying, but if you're going to adapt this code
|
||||
for Google Video or the like, make sure you do it <em>right</em>. It's
|
||||
extremely easy to allow a character too many in the final section and
|
||||
suddenly you're introducing XSS into HTML Purifier's XSS free output. HTML
|
||||
Purifier may be well written, but it cannot guard against vulnerabilities
|
||||
introduced after it has finished.</p>
|
||||
|
||||
<h2>Future plans</h2>
|
||||
|
||||
<p>It would probably be a good idea if this code was added to the core
|
||||
library. Look out for the inclusion of this into the core as a decorator
|
||||
or the like.</p>
|
||||
|
||||
</body>
|
||||
</html>
|
@@ -1,66 +1,34 @@
|
||||
<?php
|
||||
|
||||
// using _REQUEST because we accept GET and POST requests
|
||||
header('Content-type:text/html;charset=UTF-8');
|
||||
|
||||
$content = empty($_REQUEST['xml']) ? 'text/html' : 'application/xhtml+xml';
|
||||
header("Content-type:$content;charset=UTF-8");
|
||||
|
||||
// prevent PHP versions with shorttags from barfing
|
||||
echo '<?xml version="1.0" encoding="UTF-8" ?>
|
||||
';
|
||||
|
||||
function getFormMethod() {
|
||||
return (isset($_REQUEST['post'])) ? 'post' : 'get';
|
||||
}
|
||||
|
||||
if (empty($_REQUEST['strict'])) {
|
||||
?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
?><!DOCTYPE html
|
||||
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<?php
|
||||
} else {
|
||||
?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<?php
|
||||
}
|
||||
?>
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
|
||||
<html>
|
||||
<head>
|
||||
<title>HTML Purifier Live Demo</title>
|
||||
<title>HTMLPurifier Live Demo</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
</head>
|
||||
<body>
|
||||
<h1>HTML Purifier Live Demo</h1>
|
||||
<h1>HTMLPurifier Live Demo</h1>
|
||||
<?php
|
||||
|
||||
require_once '../../library/HTMLPurifier.auto.php';
|
||||
set_include_path('../../library' . PATH_SEPARATOR . get_include_path());
|
||||
require_once 'HTMLPurifier.php';
|
||||
|
||||
if (!empty($_REQUEST['html'])) { // start result
|
||||
if (!empty($_POST['html'])) {
|
||||
|
||||
if (strlen($_REQUEST['html']) > 50000) {
|
||||
?>
|
||||
<p>Request exceeds maximum allowed text size of 50kb.</p>
|
||||
<?php
|
||||
} else { // start main processing
|
||||
|
||||
$html = get_magic_quotes_gpc() ? stripslashes($_REQUEST['html']) : $_REQUEST['html'];
|
||||
$html = get_magic_quotes_gpc() ? stripslashes($_POST['html']) : $_POST['html'];
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Core', 'TidyFormat', !empty($_REQUEST['tidy']));
|
||||
$config->set('HTML', 'Strict', !empty($_REQUEST['strict']));
|
||||
$config->set('Core', 'TidyFormat', !empty($_POST['tidy']));
|
||||
$purifier = new HTMLPurifier($config);
|
||||
$pure_html = $purifier->purify($html);
|
||||
|
||||
?>
|
||||
<p>Here is your purified HTML:</p>
|
||||
<div style="border:5px solid #CCC;margin:0 10%;padding:1em;">
|
||||
<?php if(getFormMethod() == 'get') { ?>
|
||||
<div style="float:right;">
|
||||
<a href="http://validator.w3.org/check?uri=referer"><img
|
||||
src="http://www.w3.org/Icons/valid-xhtml10"
|
||||
alt="Valid XHTML 1.0 Transitional" height="31" width="88" style="border:0;" /></a>
|
||||
</div>
|
||||
<?php } ?>
|
||||
<?php
|
||||
|
||||
echo $pure_html;
|
||||
@@ -75,34 +43,23 @@ echo htmlspecialchars($pure_html, ENT_COMPAT, 'UTF-8');
|
||||
|
||||
?></pre>
|
||||
<?php
|
||||
if (getFormMethod() == 'post') { // start POST validation notice
|
||||
?>
|
||||
<p>If you would like to validate the code with
|
||||
<a href="http://validator.w3.org/#validate-by-input">W3C's
|
||||
validator</a>, copy and paste the <em>entire</em> demo page's source.</p>
|
||||
<?php
|
||||
} // end POST validation notice
|
||||
|
||||
} // end main processing
|
||||
|
||||
// end result
|
||||
|
||||
} else {
|
||||
|
||||
?>
|
||||
<p>Welcome to the live demo. Enter some HTML and see how HTML Purifier
|
||||
<p>Welcome to the live demo. Enter some HTML and see how HTMLPurifier
|
||||
will filter it.</p>
|
||||
<?php
|
||||
|
||||
}
|
||||
|
||||
?>
|
||||
<form id="filter" action="demo.php<?php
|
||||
echo '?' . getFormMethod();
|
||||
if (isset($_REQUEST['profile']) || isset($_REQUEST['XDEBUG_PROFILE'])) {
|
||||
echo '&XDEBUG_PROFILE=1';
|
||||
} ?>" method="<?php echo getFormMethod(); ?>">
|
||||
<form name="filter" action="demo.php<?php
|
||||
if (isset($_GET['profile']) || isset($_GET['XDEBUG_PROFILE'])) {
|
||||
echo '?XDEBUG_PROFILE=1';
|
||||
} ?>" method="post">
|
||||
<fieldset>
|
||||
<legend>HTML Purifier Input (<?php echo getFormMethod(); ?>)</legend>
|
||||
<legend>HTML</legend>
|
||||
<textarea name="html" cols="60" rows="15"><?php
|
||||
|
||||
if (isset($html)) {
|
||||
@@ -110,27 +67,13 @@ if (isset($html)) {
|
||||
HTMLPurifier_Encoder::cleanUTF8($html), ENT_COMPAT, 'UTF-8');
|
||||
}
|
||||
?></textarea>
|
||||
<?php if (getFormMethod() == 'get') { ?>
|
||||
<p><strong>Warning:</strong> GET request method can only hold
|
||||
8129 characters (probably less depending on your browser).
|
||||
If you need to test anything
|
||||
larger than that, try the <a href="demo.php?post">POST form</a>.</p>
|
||||
<?php } ?>
|
||||
<?php if (extension_loaded('tidy')) { ?>
|
||||
<div>Nicely format output with Tidy? <input type="checkbox" value="1"
|
||||
name="tidy"<?php if (!empty($_REQUEST['tidy'])) echo ' checked="checked"'; ?> /></div>
|
||||
<?php } ?>
|
||||
<div>XHTML 1.0 Strict output? <input type="checkbox" value="1"
|
||||
name="strict"<?php if (!empty($_REQUEST['strict'])) echo ' checked="checked"'; ?> /></div>
|
||||
<div>Serve as application/xhtml+xml? (not for IE) <input type="checkbox" value="1"
|
||||
name="xml"<?php if (!empty($_REQUEST['xml'])) echo ' checked="checked"'; ?> /></div>
|
||||
<div>Nicely format output with Tidy? <input type="checkbox" value="1"
|
||||
name="tidy"<?php if (!empty($_POST['tidy'])) echo ' checked="checked"'; ?> /></div>
|
||||
<div>
|
||||
<input type="submit" value="Submit" name="submit" class="button" />
|
||||
</div>
|
||||
</fieldset>
|
||||
</form>
|
||||
<p>Return to <a href="http://hp.jpsband.org/">HTML Purifier's home page</a>.
|
||||
Try the form in <a href="demo.php?get">GET</a> and <a href="demo.php?post">POST</a> request
|
||||
flavors (GET is easy to validate with W3C, but POST allows larger inputs).</p>
|
||||
<p>Return to <a href="http://hp.jpsband.org/">HTMLPurifier's home page</a>.</p>
|
||||
</body>
|
||||
</html>
|
@@ -23,10 +23,7 @@ information for casual developers using HTML Purifier.</p>
|
||||
<dl>
|
||||
|
||||
<dt><a href="enduser-id.html">IDs</a></dt>
|
||||
<dd>Explains various methods for allowing IDs in documents safely.</dd>
|
||||
|
||||
<dt><a href="enduser-youtube.html">Embedding YouTube videos</a></dt>
|
||||
<dd>Explains how to safely allow the embedding of flash from trusted sites.</dd>
|
||||
<dd>Explains various methods for allowing IDs in documents safely in HTML Purifier.</dd>
|
||||
|
||||
</dl>
|
||||
|
||||
@@ -68,85 +65,6 @@ that may not directly discuss HTML Purifier.</p>
|
||||
<dd>Credits and links to DevNetwork forum topics.</dd>
|
||||
</dl>
|
||||
|
||||
<h2>Internal memos</h2>
|
||||
|
||||
<p>Plaintext documents that are more for use by active developers of
|
||||
the code. They may be upgraded to HTML files or stay as TXT scratchpads.</p>
|
||||
|
||||
<table class="table">
|
||||
|
||||
<thead><tr>
|
||||
<th width="10%">Type</th>
|
||||
<th width="20%">Name</th>
|
||||
<th>Description</th>
|
||||
</tr></thead>
|
||||
|
||||
<tbody>
|
||||
|
||||
<tr>
|
||||
<td>End-user</td>
|
||||
<td><a href="enduser-overview.txt">Overview</a></td>
|
||||
<td>High level overview of the general control flow (mostly obsolete).</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>End-user</td>
|
||||
<td><a href="enduser-security.txt">Security</a></td>
|
||||
<td>Common security issues that may still arise (half-baked).</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Proposal</td>
|
||||
<td><a href="proposal-filter-levels.txt">Filter levels</a></td>
|
||||
<td>Outlines details of projected configurable level of filtering.</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Proposal</td>
|
||||
<td><a href="proposal-language.txt">Language</a></td>
|
||||
<td>Specification of I18N for error messages derived from MediaWiki (half-baked).</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Proposal</td>
|
||||
<td><a href="proposal-new-directives.txt">New directives</a></td>
|
||||
<td>Assorted configuration options that could be implemented.</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Reference</td>
|
||||
<td><a href="ref-loose-vs-strict.txt">Loose vs.Strict</a></td>
|
||||
<td>Differences between HTML Strict and Transitional versions.</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Reference</td>
|
||||
<td><a href="ref-proprietary-tags.txt">Proprietary tags</a></td>
|
||||
<td>List of vendor-specific tags we may want to transform to W3C compliant markup.</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Reference</td>
|
||||
<td><a href="ref-strictness.txt">Strictness</a></td>
|
||||
<td>Short essay on how loose definition isn't really loose.</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Reference</td>
|
||||
<td><a href="ref-xhtml-1.1.txt">XHTML 1.1</a></td>
|
||||
<td>What we'd have to do to support XHTML 1.1.</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Reference</td>
|
||||
<td><a href="ref-whatwg.txt">WHATWG</a></td>
|
||||
<td>How WHATWG plays into what we need to do.</td>
|
||||
</tr>
|
||||
|
||||
</tbody>
|
||||
|
||||
</table>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
</body>
|
||||
</html>
|
@@ -8,11 +8,11 @@ could go into this definition: the set of HTML good for blog entries is
|
||||
definitely too large for HTML that would be allowed in blog comments. Going
|
||||
from Transitional to Strict requires changes to the definition.
|
||||
|
||||
Allowing users to specify their own whitelists is one step (implemented, btw),
|
||||
but I have doubts on only doing this. Simply put, the typical programmer is too
|
||||
lazy to actually go through the trouble of investigating which tags, attributes
|
||||
and properties to allow. HTMLDefinition makes a big part of what HTMLPurifier
|
||||
is.
|
||||
However, allowing users to specify their own whitelists was an idea I
|
||||
rejected from the start. Simply put, the typical programmer is too lazy
|
||||
to actually go through the trouble of investigating which tags, attributes
|
||||
and properties to allow. HTMLDefinition makes a big part of what HTMLPurifier
|
||||
is.
|
||||
|
||||
The idea, then, is to setup fundamentally different set of definitions, which
|
||||
can further be customized using simpler configuration options.
|
||||
@@ -28,7 +28,7 @@ Here are some fuzzy levels you could set:
|
||||
to be useful)
|
||||
3. Pages - As permissive as possible without allowing XSS. No protection
|
||||
against bad design sense, unfortunantely. Suitable for wiki and page
|
||||
environments. (probably what we have now)
|
||||
environments.
|
||||
4. Lint - Accept everything in the spec, a Tidy wannabe. (This probably won't
|
||||
get implemented as it would require routines for things like <object>
|
||||
and friends to be implemented, which is a lot of work for not a lot of
|
||||
|
@@ -21,11 +21,20 @@ time. Note the naming convention: %Namespace.Directive
|
||||
%Attr.MaxHeight - caps for width and height related checks.
|
||||
(the hack in Pixels for an image crashing attack could be replaced by this)
|
||||
|
||||
%URI.Munge - will munge all external URIs to a different URI, which redirects
|
||||
the user to the applicable page. A urlencoded version of the URI
|
||||
will replace any instances of %s in the string. One possible
|
||||
string is 'http://www.google.com/url?q=%s'. Useful for preventing
|
||||
pagerank from being sent to other sites, but can also be used to
|
||||
redirect to a splash page notifying user that they are leaving your
|
||||
website.
|
||||
|
||||
%URI.AddRelNofollow - will add rel="nofollow" to all links, preventing the
|
||||
spread of ill-gotten pagerank
|
||||
|
||||
%URI.RelativeToAbsolute - transforms all relative URIs to absolute form
|
||||
|
||||
%URI.HostBlacklist - strings that if found in the host of a URI are disallowed
|
||||
%URI.HostBlacklistRegex - regexes that if matching the host are disallowed
|
||||
%URI.HostWhitelist - domain names that are excluded from the host blacklist
|
||||
%URI.HostPolicy - determines whether or not its reject all and then whitelist
|
||||
@@ -44,3 +53,7 @@ time. Note the naming convention: %Namespace.Directive
|
||||
absolute DNS. While this is actually the preferred method according to
|
||||
the RFC, most people opt to use a relative domain name relative to . (root).
|
||||
|
||||
%URI.DisableExternalResources - disallow resource links (i.e. URIs that result
|
||||
in immediate requests, such as src in IMG) to external websites
|
||||
|
||||
%HTML.DisableImg - disables all images
|
||||
|
@@ -1,37 +0,0 @@
|
||||
|
||||
Loose versus Strict
|
||||
Changes from one doctype to another
|
||||
|
||||
There are changes. Wow, how insightful. Not everything changed is relevant
|
||||
to HTML Purifier, though, so let's take a look:
|
||||
|
||||
== Major incompatibilities ==
|
||||
|
||||
[done] BLOCKQUOTE changes from 'flow' to 'block'
|
||||
current behavior: inline inner contents should not be nuked, block-ify as necessary
|
||||
[partially-done] U, S, STRIKE cut
|
||||
current behavior: removed completely
|
||||
projected behavior: replace with appropriate inline span + CSS
|
||||
[done] ADDRESS from potpourri to Inline (removes p tags)
|
||||
current behavior: block tags silently dropped
|
||||
ideal behavior: replace tags with something like <br>. (not high priority)
|
||||
|
||||
== Things we can loosen up ==
|
||||
|
||||
Tags DIR, MENU, CENTER, ISINDEX, FONT, BASEFONT? allowed in loose
|
||||
current behavior: transform to strict-valid forms
|
||||
Attributes allowed in loose (see attribute transforms in 'dev-progress.html')
|
||||
current behavior: projected to transform into strict-valid forms
|
||||
|
||||
== Periphery issues ==
|
||||
|
||||
A tag's attribute 'target' (for selecting frames) cut
|
||||
current behavior: not allowed at all
|
||||
projected behavior: use loose doctype if needed, needs valid values
|
||||
[done] OL/LI tag's attribute 'start'/'value' (for renumbering lists) cut
|
||||
current behavior: no substitute, just delete when in strict, allow in loose
|
||||
Attribute 'name' deprecated in favor of 'id'
|
||||
current behavior: dropped silently
|
||||
projected behavior: create proper AttrTransform (currently not allowed at all)
|
||||
[done] PRE tag allows SUB/SUP? (strict dtd comment vs syntax, loose disallows)
|
||||
current behavior: disallow as usual
|
@@ -1,22 +0,0 @@
|
||||
|
||||
Proprietary Tags
|
||||
<nobr> and friends
|
||||
|
||||
Here are some proprietary tags that W3C does not define but occasionally show
|
||||
up in the wild. We have only included tags that would make sense in an
|
||||
HTML Purifier context.
|
||||
|
||||
<align>, block element that aligns (extremely rare)
|
||||
<blackface>, inline that double-bolds text (extremely rare)
|
||||
<comment>, hidden comment for IE and WebTV
|
||||
<multicol cols=number gutter=pixels width=pixels>, multiple columns
|
||||
<nobr>, no linebreaks
|
||||
<spacer align=* type="vertical|horizontal|block">, whitespace in doc,
|
||||
use width/height for block and size for vertical/horizontal (attributes)
|
||||
(extremely rare)
|
||||
<wbr>, potential word break point: allows linebreaks. Only works in <nobr>
|
||||
|
||||
<listing>, monospace pre-variant (extremely rare)
|
||||
<plaintext>, escapes all tags to the end of document
|
||||
<ruby> and friends, (more research needed, appears to be XHTML 1.1 markup)
|
||||
<xmp>, monospace, replace with pre
|
@@ -22,15 +22,4 @@ whole point about CSS is to seperate styling from content, so inline styling
|
||||
doesn't solve that problem.
|
||||
|
||||
It's an icky question, and we'll have to deal with it as more and more
|
||||
transforms get implemented. As of right now, however, we currently support
|
||||
these loose-only constructs in loose mode:
|
||||
|
||||
- <ul start="1">, <li value="1"> attributes
|
||||
- <u>, <strike>, <s> tags
|
||||
- flow children in <blockquote>
|
||||
- mixed children in <address>
|
||||
|
||||
The changed child definitions as well as the ul.start li.value are the most
|
||||
compelling reasons why loose should be used. We may want offer disabling <u>,
|
||||
<strike> and <s> by themselves.
|
||||
|
||||
transforms get implemented.
|
||||
|
@@ -1,9 +0,0 @@
|
||||
|
||||
Web Hypertext Application Technology Working Group
|
||||
WHATWG
|
||||
|
||||
I don't think we need to worry about them. Untrusted users shouldn't be
|
||||
submitting applications, eh? But if some interesting attribute pops up in
|
||||
their spec, and might be worth supporting, stick it here.
|
||||
|
||||
(none so far, as you can see)
|
@@ -1,21 +0,0 @@
|
||||
|
||||
Getting XHTML 1.1 Working
|
||||
|
||||
It's quite simple, according to <http://www.w3.org/TR/xhtml11/changes.html>
|
||||
|
||||
1. Scratch lang entirely in favor of xml:lang
|
||||
2. Scratch name entirely in favor of id (partially-done)
|
||||
3. Support Ruby <http://www.w3.org/TR/2001/REC-ruby-20010531/>
|
||||
|
||||
...but that's only an informative section. More things to do:
|
||||
|
||||
1. Scratch style attribute (it's deprecated)
|
||||
2. Be module-aware (this might entail intelligent grouping in the definition
|
||||
and allowing users to specifically remove certain modules (see 5))
|
||||
3. Cross-reference minimal content models with existing DTDs and determine
|
||||
changes (todo)
|
||||
4. Watch out for the Legacy Module
|
||||
<http://www.w3.org/TR/2001/REC-xhtml-modularization-20010410/abstract_modules.html#s_legacymodule>
|
||||
5. Let users specify their own custom modules
|
||||
6. Study Modularization document
|
||||
<http://www.w3.org/TR/2001/REC-xhtml-modularization-20010410/>
|
@@ -1,21 +0,0 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Function wrapper for HTML Purifier for quick use.
|
||||
* @note This function only includes the library when it is called. While
|
||||
* this is efficient for instances when you only use HTML Purifier
|
||||
* on a few of your pages, it murders bytecode caching. You still
|
||||
* need to add HTML Purifier to your path.
|
||||
*/
|
||||
|
||||
function HTMLPurifier($html, $config = null) {
|
||||
static $purifier = false;
|
||||
if (!$purifier) {
|
||||
$init = true;
|
||||
require_once 'HTMLPurifier.php';
|
||||
$purifier = new HTMLPurifier();
|
||||
}
|
||||
return $purifier->purify($html, $config);
|
||||
}
|
||||
|
||||
?>
|
@@ -22,7 +22,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
HTML Purifier 1.3.2 - Standards Compliant HTML Filtering
|
||||
HTML Purifier 1.2.0 - Standards Compliant HTML Filtering
|
||||
Copyright (C) 2006 Edward Z. Yang
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
@@ -64,29 +64,19 @@ require_once 'HTMLPurifier/Encoder.php';
|
||||
class HTMLPurifier
|
||||
{
|
||||
|
||||
var $version = '1.3.2';
|
||||
|
||||
var $config;
|
||||
|
||||
var $lexer, $strategy, $generator;
|
||||
|
||||
/**
|
||||
* Final HTMLPurifier_Context of last run purification. Might be an array.
|
||||
* @public
|
||||
*/
|
||||
var $context;
|
||||
|
||||
/**
|
||||
* Initializes the purifier.
|
||||
* @param $config Optional HTMLPurifier_Config object for all instances of
|
||||
* the purifier, if omitted, a default configuration is
|
||||
* supplied (which can be overridden on a per-use basis).
|
||||
* The parameter can also be any type that
|
||||
* HTMLPurifier_Config::create() supports.
|
||||
*/
|
||||
function HTMLPurifier($config = null) {
|
||||
|
||||
$this->config = HTMLPurifier_Config::create($config);
|
||||
$this->config = $config ? $config : HTMLPurifier_Config::createDefault();
|
||||
|
||||
$this->lexer = HTMLPurifier_Lexer::create();
|
||||
$this->strategy = new HTMLPurifier_Strategy_Core();
|
||||
@@ -101,54 +91,25 @@ class HTMLPurifier
|
||||
* @param $html String of HTML to purify
|
||||
* @param $config HTMLPurifier_Config object for this operation, if omitted,
|
||||
* defaults to the config object specified during this
|
||||
* object's construction. The parameter can also be any type
|
||||
* that HTMLPurifier_Config::create() supports.
|
||||
* object's construction.
|
||||
* @return Purified HTML
|
||||
*/
|
||||
function purify($html, $config = null) {
|
||||
|
||||
$config = $config ? HTMLPurifier_Config::create($config) : $this->config;
|
||||
|
||||
$config = $config ? $config : $this->config;
|
||||
$context =& new HTMLPurifier_Context();
|
||||
$html = $this->encoder->convertToUTF8($html, $config, $context);
|
||||
|
||||
// purified HTML
|
||||
$html =
|
||||
$this->generator->generateFromTokens(
|
||||
// list of tokens
|
||||
$this->strategy->execute(
|
||||
// list of un-purified tokens
|
||||
$this->lexer->tokenizeHTML(
|
||||
// un-purified HTML
|
||||
$html, $config, $context
|
||||
),
|
||||
$this->lexer->tokenizeHTML($html, $config, $context),
|
||||
$config, $context
|
||||
),
|
||||
$config, $context
|
||||
);
|
||||
|
||||
$html = $this->encoder->convertFromUTF8($html, $config, $context);
|
||||
$this->context =& $context;
|
||||
return $html;
|
||||
}
|
||||
|
||||
/**
|
||||
* Filters an array of HTML snippets
|
||||
* @param $config Optional HTMLPurifier_Config object for this operation.
|
||||
* See HTMLPurifier::purify() for more details.
|
||||
* @return Array of purified HTML
|
||||
*/
|
||||
function purifyArray($array_of_html, $config = null) {
|
||||
$context_array = array();
|
||||
foreach ($array_of_html as $key => $html) {
|
||||
$array_of_html[$key] = $this->purify($html, $config);
|
||||
$context_array[$key] = $this->context;
|
||||
}
|
||||
$this->context = $context_array;
|
||||
return $array_of_html;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -49,7 +49,7 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
|
||||
if ($length == 0 || $length == 1 || $length > 8 || !ctype_alnum($subtags[1])) {
|
||||
return $new_string;
|
||||
}
|
||||
if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]);
|
||||
if (!ctype_lower($subtags[1])) $subtags[1] = strotolower($subtags[1]);
|
||||
|
||||
$new_string .= '-' . $subtags[1];
|
||||
if ($num_subtags == 2) return $new_string;
|
||||
@@ -61,7 +61,7 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
|
||||
return $new_string;
|
||||
}
|
||||
if (!ctype_lower($subtags[$i])) {
|
||||
$subtags[$i] = strtolower($subtags[$i]);
|
||||
$subtags[$i] = strotolower($subtags[$i]);
|
||||
}
|
||||
$new_string .= '-' . $subtags[$i];
|
||||
}
|
||||
|
@@ -24,7 +24,7 @@ HTMLPurifier_ConfigSchema::define(
|
||||
'This directive has been available since 1.2.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
HTMLPurifier_ConfigSchema::Define(
|
||||
'URI', 'DisableExternal', false, 'bool',
|
||||
'Disables links to external websites. This is a highly effective '.
|
||||
'anti-spam and anti-pagerank-leech measure, but comes at a hefty price: no'.
|
||||
@@ -34,49 +34,6 @@ HTMLPurifier_ConfigSchema::define(
|
||||
'This directive has been available since 1.2.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DisableExternalResources', false, 'bool',
|
||||
'Disables the embedding of external resources, preventing users from '.
|
||||
'embedding things like images from other hosts. This prevents '.
|
||||
'access tracking (good for email viewers), bandwidth leeching, '.
|
||||
'cross-site request forging, goatse.cx posting, and '.
|
||||
'other nasties, but also results in '.
|
||||
'a loss of end-user functionality (they can\'t directly post a pic '.
|
||||
'they posted from Flickr anymore). Use it if you don\'t have a '.
|
||||
'robust user-content moderation team. This directive has been '.
|
||||
'available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DisableResources', false, 'bool',
|
||||
'Disables embedding resources, essentially meaning no pictures. You can '.
|
||||
'still link to them though. See %URI.DisableExternalResources for why '.
|
||||
'this might be a good idea. This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'Munge', null, 'string/null',
|
||||
'Munges all browsable (usually http, https and ftp) URI\'s into some URL '.
|
||||
'redirection service. Pass this directive a URI, with %s inserted where '.
|
||||
'the url-encoded original URI should be inserted (sample: '.
|
||||
'<code>http://www.google.com/url?q=%s</code>). '.
|
||||
'This prevents PageRank leaks, while being as transparent as possible '.
|
||||
'to users (you may also want to add some client side JavaScript to '.
|
||||
'override the text in the statusbar). Warning: many security experts '.
|
||||
'believe that this form of protection does not deter spam-bots. '.
|
||||
'You can also use this directive to redirect users to a splash page '.
|
||||
'telling them they are leaving your website. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'HostBlacklist', array(), 'list',
|
||||
'List of strings that are forbidden in the host of any URI. Use it to '.
|
||||
'kill domain names of spam, etc. Note that it will catch anything in '.
|
||||
'the domain, so <tt>moo.com</tt> will catch <tt>moo.com.example.com</tt>. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Validates a URI as defined by RFC 3986.
|
||||
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
|
||||
@@ -86,15 +43,15 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
|
||||
var $host;
|
||||
var $PercentEncoder;
|
||||
var $embeds_resource;
|
||||
var $embeds;
|
||||
|
||||
/**
|
||||
* @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
|
||||
* @param $embeds Does the URI here result in an extra HTTP request?
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
|
||||
function HTMLPurifier_AttrDef_URI($embeds = false) {
|
||||
$this->host = new HTMLPurifier_AttrDef_Host();
|
||||
$this->PercentEncoder = new HTMLPurifier_PercentEncoder();
|
||||
$this->embeds_resource = (bool) $embeds_resource;
|
||||
$this->embeds = (bool) $embeds;
|
||||
}
|
||||
|
||||
function validate($uri, $config, &$context) {
|
||||
@@ -148,25 +105,18 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
}
|
||||
|
||||
|
||||
// the URI we're processing embeds_resource a resource in the page, but the URI
|
||||
// the URI we're processing embeds a resource in the page, but the URI
|
||||
// it references cannot be located
|
||||
if ($this->embeds_resource && !$scheme_obj->browsable) {
|
||||
if ($this->embeds && !$scheme_obj->browsable) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
if ($authority !== null) {
|
||||
|
||||
// remove URI if it's absolute and we disabled externals or
|
||||
// if it's absolute and embedded and we disabled external resources
|
||||
// remove URI if it's absolute and we disallow externals
|
||||
unset($our_host);
|
||||
if (
|
||||
$config->get('URI', 'DisableExternal') ||
|
||||
(
|
||||
$config->get('URI', 'DisableExternalResources') &&
|
||||
$this->embeds_resource
|
||||
)
|
||||
) {
|
||||
if ($config->get('URI', 'DisableExternal')) {
|
||||
$our_host = $config->get('URI', 'Host');
|
||||
if ($our_host === null) return false;
|
||||
}
|
||||
@@ -193,8 +143,6 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
$host = $this->host->validate($host, $config, $context);
|
||||
if ($host === false) $host = null;
|
||||
|
||||
if ($this->checkBlacklist($host, $config, $context)) return false;
|
||||
|
||||
// more lenient absolute checking
|
||||
if (isset($our_host)) {
|
||||
$host_parts = array_reverse(explode('.', $host));
|
||||
@@ -250,37 +198,10 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
if ($query !== null) $result .= "?$query";
|
||||
if ($fragment !== null) $result .= "#$fragment";
|
||||
|
||||
// munge if necessary
|
||||
$munge = $config->get('URI', 'Munge');
|
||||
if (!empty($scheme_obj->browsable) && $munge !== null) {
|
||||
if ($authority !== null) {
|
||||
$result = str_replace('%s', rawurlencode($result), $munge);
|
||||
}
|
||||
}
|
||||
|
||||
return $result;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks a host against an array blacklist
|
||||
* @param $host Host to check
|
||||
* @param $config HTMLPurifier_Config instance
|
||||
* @param $context HTMLPurifier_Context instance
|
||||
* @return bool Is spam?
|
||||
*/
|
||||
function checkBlacklist($host, &$config, &$context) {
|
||||
$blacklist = $config->get('URI', 'HostBlacklist');
|
||||
if (!empty($blacklist)) {
|
||||
foreach($blacklist as $blacklisted_host_fragment) {
|
||||
if (strpos($host, $blacklisted_host_fragment) !== false) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
||||
|
@@ -21,7 +21,7 @@ class HTMLPurifier_AttrTransform
|
||||
* Abstract: makes changes to the attributes dependent on multiple values.
|
||||
*
|
||||
* @param $attr Assoc array of attributes, usually from
|
||||
* HTMLPurifier_Token_Tag::$attr
|
||||
* HTMLPurifier_Token_Tag::$attributes
|
||||
* @param $config Mandatory HTMLPurifier_Config object.
|
||||
* @param $context Mandatory HTMLPurifier_Context object
|
||||
* @returns Processed attribute array.
|
||||
|
@@ -20,9 +20,10 @@ HTMLPurifier_ConfigSchema::define(
|
||||
class HTMLPurifier_ChildDef
|
||||
{
|
||||
/**
|
||||
* Type of child definition, usually right-most part of class name lowercase.
|
||||
* Used occasionally in terms of context.
|
||||
* @public
|
||||
* Type of child definition, usually right-most part of class name lowercase
|
||||
*
|
||||
* Used occasionally in terms of context. Possible values include
|
||||
* custom, required, optional and empty.
|
||||
*/
|
||||
var $type;
|
||||
|
||||
@@ -31,15 +32,12 @@ class HTMLPurifier_ChildDef
|
||||
*
|
||||
* This is necessary for redundant checking when changes affecting
|
||||
* a child node may cause a parent node to now be disallowed.
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
var $allow_empty;
|
||||
|
||||
/**
|
||||
* Validates nodes according to definition and returns modification.
|
||||
*
|
||||
* @public
|
||||
* @param $tokens_of_children Array of HTMLPurifier_Token
|
||||
* @param $config HTMLPurifier_Config object
|
||||
* @param $context HTMLPurifier_Context object
|
||||
@@ -52,4 +50,391 @@ class HTMLPurifier_ChildDef
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Custom validation class, accepts DTD child definitions
|
||||
*
|
||||
* @warning Currently this class is an all or nothing proposition, that is,
|
||||
* it will only give a bool return value.
|
||||
* @note This class is currently not used by any code, although it is unit
|
||||
* tested.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $type = 'custom';
|
||||
var $allow_empty = false;
|
||||
/**
|
||||
* Allowed child pattern as defined by the DTD
|
||||
*/
|
||||
var $dtd_regex;
|
||||
/**
|
||||
* PCRE regex derived from $dtd_regex
|
||||
* @private
|
||||
*/
|
||||
var $_pcre_regex;
|
||||
/**
|
||||
* @param $dtd_regex Allowed child pattern from the DTD
|
||||
*/
|
||||
function HTMLPurifier_ChildDef_Custom($dtd_regex) {
|
||||
$this->dtd_regex = $dtd_regex;
|
||||
$this->_compileRegex();
|
||||
}
|
||||
/**
|
||||
* Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
|
||||
*/
|
||||
function _compileRegex() {
|
||||
$raw = str_replace(' ', '', $this->dtd_regex);
|
||||
if ($raw{0} != '(') {
|
||||
$raw = "($raw)";
|
||||
}
|
||||
$reg = str_replace(',', ',?', $raw);
|
||||
$reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
|
||||
$this->_pcre_regex = $reg;
|
||||
}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
$list_of_children = '';
|
||||
$nesting = 0; // depth into the nest
|
||||
foreach ($tokens_of_children as $token) {
|
||||
if (!empty($token->is_whitespace)) continue;
|
||||
|
||||
$is_child = ($nesting == 0); // direct
|
||||
|
||||
if ($token->type == 'start') {
|
||||
$nesting++;
|
||||
} elseif ($token->type == 'end') {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
if ($is_child) {
|
||||
$list_of_children .= $token->name . ',';
|
||||
}
|
||||
}
|
||||
$list_of_children = rtrim($list_of_children, ',');
|
||||
|
||||
$okay =
|
||||
preg_match(
|
||||
'/^'.$this->_pcre_regex.'$/',
|
||||
$list_of_children
|
||||
);
|
||||
|
||||
return (bool) $okay;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Definition that allows a set of elements, but disallows empty children.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
|
||||
{
|
||||
/**
|
||||
* Lookup table of allowed elements.
|
||||
*/
|
||||
var $elements = array();
|
||||
/**
|
||||
* @param $elements List of allowed element names (lowercase).
|
||||
*/
|
||||
function HTMLPurifier_ChildDef_Required($elements) {
|
||||
if (is_string($elements)) {
|
||||
$elements = str_replace(' ', '', $elements);
|
||||
$elements = explode('|', $elements);
|
||||
}
|
||||
$elements = array_flip($elements);
|
||||
foreach ($elements as $i => $x) $elements[$i] = true;
|
||||
$this->elements = $elements;
|
||||
$this->gen = new HTMLPurifier_Generator();
|
||||
}
|
||||
var $allow_empty = false;
|
||||
var $type = 'required';
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
// if there are no tokens, delete parent node
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
// the new set of children
|
||||
$result = array();
|
||||
|
||||
// current depth into the nest
|
||||
$nesting = 0;
|
||||
|
||||
// whether or not we're deleting a node
|
||||
$is_deleting = false;
|
||||
|
||||
// whether or not parsed character data is allowed
|
||||
// this controls whether or not we silently drop a tag
|
||||
// or generate escaped HTML from it
|
||||
$pcdata_allowed = isset($this->elements['#PCDATA']);
|
||||
|
||||
// a little sanity check to make sure it's not ALL whitespace
|
||||
$all_whitespace = true;
|
||||
|
||||
// some configuration
|
||||
$escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren');
|
||||
|
||||
foreach ($tokens_of_children as $token) {
|
||||
if (!empty($token->is_whitespace)) {
|
||||
$result[] = $token;
|
||||
continue;
|
||||
}
|
||||
$all_whitespace = false; // phew, we're not talking about whitespace
|
||||
|
||||
$is_child = ($nesting == 0);
|
||||
|
||||
if ($token->type == 'start') {
|
||||
$nesting++;
|
||||
} elseif ($token->type == 'end') {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
if ($is_child) {
|
||||
$is_deleting = false;
|
||||
if (!isset($this->elements[$token->name])) {
|
||||
$is_deleting = true;
|
||||
if ($pcdata_allowed && $token->type == 'text') {
|
||||
$result[] = $token;
|
||||
} elseif ($pcdata_allowed && $escape_invalid_children) {
|
||||
$result[] = new HTMLPurifier_Token_Text(
|
||||
$this->gen->generateFromToken($token, $config)
|
||||
);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (!$is_deleting || ($pcdata_allowed && $token->type == 'text')) {
|
||||
$result[] = $token;
|
||||
} elseif ($pcdata_allowed && $escape_invalid_children) {
|
||||
$result[] =
|
||||
new HTMLPurifier_Token_Text(
|
||||
$this->gen->generateFromToken( $token, $config )
|
||||
);
|
||||
} else {
|
||||
// drop silently
|
||||
}
|
||||
}
|
||||
if (empty($result)) return false;
|
||||
if ($all_whitespace) return false;
|
||||
if ($tokens_of_children == $result) return true;
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Definition that allows a set of elements, and allows no children.
|
||||
* @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required,
|
||||
* really, one shouldn't inherit from the other. Only altered behavior
|
||||
* is to overload a returned false with an array. Thus, it will never
|
||||
* return false.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
|
||||
{
|
||||
var $allow_empty = true;
|
||||
var $type = 'optional';
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
$result = parent::validateChildren($tokens_of_children, $config, $context);
|
||||
if ($result === false) return array();
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Definition that disallows all elements.
|
||||
* @warning validateChildren() in this class is actually never called, because
|
||||
* empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed
|
||||
* before child definitions are parsed in earnest by
|
||||
* HTMLPurifier_Strategy_FixNesting.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $allow_empty = true;
|
||||
var $type = 'empty';
|
||||
function HTMLPurifier_ChildDef_Empty() {}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
return array();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Definition that uses different definitions depending on context.
|
||||
*
|
||||
* The del and ins tags are notable because they allow different types of
|
||||
* elements depending on whether or not they're in a block or inline context.
|
||||
* Chameleon allows this behavior to happen by using two different
|
||||
* definitions depending on context. While this somewhat generalized,
|
||||
* it is specifically intended for those two tags.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Instance of the definition object to use when inline. Usually stricter.
|
||||
*/
|
||||
var $inline;
|
||||
/**
|
||||
* Instance of the definition object to use when block.
|
||||
*/
|
||||
var $block;
|
||||
|
||||
/**
|
||||
* @param $inline List of elements to allow when inline.
|
||||
* @param $block List of elements to allow when block.
|
||||
*/
|
||||
function HTMLPurifier_ChildDef_Chameleon($inline, $block) {
|
||||
$this->inline = new HTMLPurifier_ChildDef_Optional($inline);
|
||||
$this->block = new HTMLPurifier_ChildDef_Optional($block);
|
||||
}
|
||||
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
$parent_type = $context->get('ParentType');
|
||||
switch ($parent_type) {
|
||||
case 'unknown':
|
||||
case 'inline':
|
||||
$result = $this->inline->validateChildren(
|
||||
$tokens_of_children, $config, $context);
|
||||
break;
|
||||
case 'block':
|
||||
$result = $this->block->validateChildren(
|
||||
$tokens_of_children, $config, $context);
|
||||
break;
|
||||
default:
|
||||
trigger_error('Invalid context', E_USER_ERROR);
|
||||
return false;
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Definition for tables
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $allow_empty = false;
|
||||
var $type = 'table';
|
||||
function HTMLPurifier_ChildDef_Table() {}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
// this ensures that the loop gets run one last time before closing
|
||||
// up. It's a little bit of a hack, but it works! Just make sure you
|
||||
// get rid of the token later.
|
||||
$tokens_of_children[] = false;
|
||||
|
||||
// only one of these elements is allowed in a table
|
||||
$caption = false;
|
||||
$thead = false;
|
||||
$tfoot = false;
|
||||
|
||||
// as many of these as you want
|
||||
$cols = array();
|
||||
$content = array();
|
||||
|
||||
$nesting = 0; // current depth so we can determine nodes
|
||||
$is_collecting = false; // are we globbing together tokens to package
|
||||
// into one of the collectors?
|
||||
$collection = array(); // collected nodes
|
||||
$tag_index = 0; // the first node might be whitespace,
|
||||
// so this tells us where the start tag is
|
||||
|
||||
foreach ($tokens_of_children as $token) {
|
||||
$is_child = ($nesting == 0);
|
||||
|
||||
if ($token === false) {
|
||||
// terminating sequence started
|
||||
} elseif ($token->type == 'start') {
|
||||
$nesting++;
|
||||
} elseif ($token->type == 'end') {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
// handle node collection
|
||||
if ($is_collecting) {
|
||||
if ($is_child) {
|
||||
// okay, let's stash the tokens away
|
||||
// first token tells us the type of the collection
|
||||
switch ($collection[$tag_index]->name) {
|
||||
case 'tr':
|
||||
case 'tbody':
|
||||
$content[] = $collection;
|
||||
break;
|
||||
case 'caption':
|
||||
if ($caption !== false) break;
|
||||
$caption = $collection;
|
||||
break;
|
||||
case 'thead':
|
||||
case 'tfoot':
|
||||
// access the appropriate variable, $thead or $tfoot
|
||||
$var = $collection[$tag_index]->name;
|
||||
if ($$var === false) {
|
||||
$$var = $collection;
|
||||
} else {
|
||||
// transmutate the first and less entries into
|
||||
// tbody tags, and then put into content
|
||||
$collection[$tag_index]->name = 'tbody';
|
||||
$collection[count($collection)-1]->name = 'tbody';
|
||||
$content[] = $collection;
|
||||
}
|
||||
break;
|
||||
case 'colgroup':
|
||||
$cols[] = $collection;
|
||||
break;
|
||||
}
|
||||
$collection = array();
|
||||
$is_collecting = false;
|
||||
$tag_index = 0;
|
||||
} else {
|
||||
// add the node to the collection
|
||||
$collection[] = $token;
|
||||
}
|
||||
}
|
||||
|
||||
// terminate
|
||||
if ($token === false) break;
|
||||
|
||||
if ($is_child) {
|
||||
// determine what we're dealing with
|
||||
if ($token->name == 'col') {
|
||||
// the only empty tag in the possie, we can handle it
|
||||
// immediately
|
||||
$cols[] = array_merge($collection, array($token));
|
||||
$collection = array();
|
||||
$tag_index = 0;
|
||||
continue;
|
||||
}
|
||||
switch($token->name) {
|
||||
case 'caption':
|
||||
case 'colgroup':
|
||||
case 'thead':
|
||||
case 'tfoot':
|
||||
case 'tbody':
|
||||
case 'tr':
|
||||
$is_collecting = true;
|
||||
$collection[] = $token;
|
||||
continue;
|
||||
default:
|
||||
if ($token->type == 'text' && $token->is_whitespace) {
|
||||
$collection[] = $token;
|
||||
$tag_index++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($content)) return false;
|
||||
|
||||
$ret = array();
|
||||
if ($caption !== false) $ret = array_merge($ret, $caption);
|
||||
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
|
||||
if ($thead !== false) $ret = array_merge($ret, $thead);
|
||||
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
|
||||
foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
|
||||
if (!empty($collection) && $is_collecting == false){
|
||||
// grab the trailing space
|
||||
$ret = array_merge($ret, $collection);
|
||||
}
|
||||
|
||||
array_pop($tokens_of_children); // remove phantom token
|
||||
|
||||
return ($ret === $tokens_of_children) ? true : $ret;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
||||
|
@@ -1,60 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef.php';
|
||||
|
||||
/**
|
||||
* Definition that uses different definitions depending on context.
|
||||
*
|
||||
* The del and ins tags are notable because they allow different types of
|
||||
* elements depending on whether or not they're in a block or inline context.
|
||||
* Chameleon allows this behavior to happen by using two different
|
||||
* definitions depending on context. While this somewhat generalized,
|
||||
* it is specifically intended for those two tags.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Instance of the definition object to use when inline. Usually stricter.
|
||||
* @public
|
||||
*/
|
||||
var $inline;
|
||||
|
||||
/**
|
||||
* Instance of the definition object to use when block.
|
||||
* @public
|
||||
*/
|
||||
var $block;
|
||||
|
||||
var $type = 'chameleon';
|
||||
|
||||
/**
|
||||
* @param $inline List of elements to allow when inline.
|
||||
* @param $block List of elements to allow when block.
|
||||
*/
|
||||
function HTMLPurifier_ChildDef_Chameleon($inline, $block) {
|
||||
$this->inline = new HTMLPurifier_ChildDef_Optional($inline);
|
||||
$this->block = new HTMLPurifier_ChildDef_Optional($block);
|
||||
}
|
||||
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
$parent_type = $context->get('ParentType');
|
||||
switch ($parent_type) {
|
||||
case 'unknown':
|
||||
case 'inline':
|
||||
$result = $this->inline->validateChildren(
|
||||
$tokens_of_children, $config, $context);
|
||||
break;
|
||||
case 'block':
|
||||
$result = $this->block->validateChildren(
|
||||
$tokens_of_children, $config, $context);
|
||||
break;
|
||||
default:
|
||||
trigger_error('Invalid context', E_USER_ERROR);
|
||||
return false;
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@@ -1,75 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef.php';
|
||||
|
||||
/**
|
||||
* Custom validation class, accepts DTD child definitions
|
||||
*
|
||||
* @warning Currently this class is an all or nothing proposition, that is,
|
||||
* it will only give a bool return value.
|
||||
* @note This class is currently not used by any code, although it is unit
|
||||
* tested.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $type = 'custom';
|
||||
var $allow_empty = false;
|
||||
/**
|
||||
* Allowed child pattern as defined by the DTD
|
||||
*/
|
||||
var $dtd_regex;
|
||||
/**
|
||||
* PCRE regex derived from $dtd_regex
|
||||
* @private
|
||||
*/
|
||||
var $_pcre_regex;
|
||||
/**
|
||||
* @param $dtd_regex Allowed child pattern from the DTD
|
||||
*/
|
||||
function HTMLPurifier_ChildDef_Custom($dtd_regex) {
|
||||
$this->dtd_regex = $dtd_regex;
|
||||
$this->_compileRegex();
|
||||
}
|
||||
/**
|
||||
* Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
|
||||
*/
|
||||
function _compileRegex() {
|
||||
$raw = str_replace(' ', '', $this->dtd_regex);
|
||||
if ($raw{0} != '(') {
|
||||
$raw = "($raw)";
|
||||
}
|
||||
$reg = str_replace(',', ',?', $raw);
|
||||
$reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
|
||||
$this->_pcre_regex = $reg;
|
||||
}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
$list_of_children = '';
|
||||
$nesting = 0; // depth into the nest
|
||||
foreach ($tokens_of_children as $token) {
|
||||
if (!empty($token->is_whitespace)) continue;
|
||||
|
||||
$is_child = ($nesting == 0); // direct
|
||||
|
||||
if ($token->type == 'start') {
|
||||
$nesting++;
|
||||
} elseif ($token->type == 'end') {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
if ($is_child) {
|
||||
$list_of_children .= $token->name . ',';
|
||||
}
|
||||
}
|
||||
$list_of_children = rtrim($list_of_children, ',');
|
||||
|
||||
$okay =
|
||||
preg_match(
|
||||
'/^'.$this->_pcre_regex.'$/',
|
||||
$list_of_children
|
||||
);
|
||||
|
||||
return (bool) $okay;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@@ -1,22 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef.php';
|
||||
|
||||
/**
|
||||
* Definition that disallows all elements.
|
||||
* @warning validateChildren() in this class is actually never called, because
|
||||
* empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed
|
||||
* before child definitions are parsed in earnest by
|
||||
* HTMLPurifier_Strategy_FixNesting.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $allow_empty = true;
|
||||
var $type = 'empty';
|
||||
function HTMLPurifier_ChildDef_Empty() {}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
return array();
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@@ -1,23 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef/Required.php';
|
||||
|
||||
/**
|
||||
* Definition that allows a set of elements, and allows no children.
|
||||
* @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required,
|
||||
* really, one shouldn't inherit from the other. Only altered behavior
|
||||
* is to overload a returned false with an array. Thus, it will never
|
||||
* return false.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
|
||||
{
|
||||
var $allow_empty = true;
|
||||
var $type = 'optional';
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
$result = parent::validateChildren($tokens_of_children, $config, $context);
|
||||
if ($result === false) return array();
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@@ -1,104 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef.php';
|
||||
|
||||
/**
|
||||
* Definition that allows a set of elements, but disallows empty children.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
|
||||
{
|
||||
/**
|
||||
* Lookup table of allowed elements.
|
||||
* @public
|
||||
*/
|
||||
var $elements = array();
|
||||
/**
|
||||
* @param $elements List of allowed element names (lowercase).
|
||||
*/
|
||||
function HTMLPurifier_ChildDef_Required($elements) {
|
||||
if (is_string($elements)) {
|
||||
$elements = str_replace(' ', '', $elements);
|
||||
$elements = explode('|', $elements);
|
||||
}
|
||||
$elements = array_flip($elements);
|
||||
foreach ($elements as $i => $x) {
|
||||
$elements[$i] = true;
|
||||
if (empty($i)) unset($elements[$i]);
|
||||
}
|
||||
$this->elements = $elements;
|
||||
$this->gen = new HTMLPurifier_Generator();
|
||||
}
|
||||
var $allow_empty = false;
|
||||
var $type = 'required';
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
// if there are no tokens, delete parent node
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
// the new set of children
|
||||
$result = array();
|
||||
|
||||
// current depth into the nest
|
||||
$nesting = 0;
|
||||
|
||||
// whether or not we're deleting a node
|
||||
$is_deleting = false;
|
||||
|
||||
// whether or not parsed character data is allowed
|
||||
// this controls whether or not we silently drop a tag
|
||||
// or generate escaped HTML from it
|
||||
$pcdata_allowed = isset($this->elements['#PCDATA']);
|
||||
|
||||
// a little sanity check to make sure it's not ALL whitespace
|
||||
$all_whitespace = true;
|
||||
|
||||
// some configuration
|
||||
$escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren');
|
||||
|
||||
foreach ($tokens_of_children as $token) {
|
||||
if (!empty($token->is_whitespace)) {
|
||||
$result[] = $token;
|
||||
continue;
|
||||
}
|
||||
$all_whitespace = false; // phew, we're not talking about whitespace
|
||||
|
||||
$is_child = ($nesting == 0);
|
||||
|
||||
if ($token->type == 'start') {
|
||||
$nesting++;
|
||||
} elseif ($token->type == 'end') {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
if ($is_child) {
|
||||
$is_deleting = false;
|
||||
if (!isset($this->elements[$token->name])) {
|
||||
$is_deleting = true;
|
||||
if ($pcdata_allowed && $token->type == 'text') {
|
||||
$result[] = $token;
|
||||
} elseif ($pcdata_allowed && $escape_invalid_children) {
|
||||
$result[] = new HTMLPurifier_Token_Text(
|
||||
$this->gen->generateFromToken($token, $config)
|
||||
);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (!$is_deleting || ($pcdata_allowed && $token->type == 'text')) {
|
||||
$result[] = $token;
|
||||
} elseif ($pcdata_allowed && $escape_invalid_children) {
|
||||
$result[] =
|
||||
new HTMLPurifier_Token_Text(
|
||||
$this->gen->generateFromToken( $token, $config )
|
||||
);
|
||||
} else {
|
||||
// drop silently
|
||||
}
|
||||
}
|
||||
if (empty($result)) return false;
|
||||
if ($all_whitespace) return false;
|
||||
if ($tokens_of_children == $result) return true;
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@@ -1,70 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef/Required.php';
|
||||
|
||||
/**
|
||||
* Takes the contents of blockquote when in strict and reformats for validation.
|
||||
*
|
||||
* From XHTML 1.0 Transitional to Strict, there is a notable change where
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_StrictBlockquote
|
||||
extends HTMLPurifier_ChildDef_Required
|
||||
{
|
||||
var $allow_empty = true;
|
||||
var $type = 'strictblockquote';
|
||||
var $init = false;
|
||||
function HTMLPurifier_ChildDef_StrictBlockquote() {}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
|
||||
$def = $config->getHTMLDefinition();
|
||||
if (!$this->init) {
|
||||
// allow all inline elements
|
||||
$this->elements = $def->info_flow_elements;
|
||||
$this->elements['#PCDATA'] = true;
|
||||
$this->init = true;
|
||||
}
|
||||
|
||||
$result = parent::validateChildren($tokens_of_children, $config, $context);
|
||||
if ($result === false) return array();
|
||||
if ($result === true) $result = $tokens_of_children;
|
||||
|
||||
$block_wrap_start = new HTMLPurifier_Token_Start($def->info_block_wrapper);
|
||||
$block_wrap_end = new HTMLPurifier_Token_End( $def->info_block_wrapper);
|
||||
$is_inline = false;
|
||||
$depth = 0;
|
||||
$ret = array();
|
||||
|
||||
// assuming that there are no comment tokens
|
||||
foreach ($result as $i => $token) {
|
||||
$token = $result[$i];
|
||||
// ifs are nested for readability
|
||||
if (!$is_inline) {
|
||||
if (!$depth) {
|
||||
if (($token->type == 'text') ||
|
||||
($def->info[$token->name]->type == 'inline')) {
|
||||
$is_inline = true;
|
||||
$ret[] = $block_wrap_start;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (!$depth) {
|
||||
// starting tokens have been inline text / empty
|
||||
if ($token->type == 'start' || $token->type == 'empty') {
|
||||
if ($def->info[$token->name]->type == 'block') {
|
||||
// ended
|
||||
$ret[] = $block_wrap_end;
|
||||
$is_inline = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
$ret[] = $token;
|
||||
if ($token->type == 'start') $depth++;
|
||||
if ($token->type == 'end') $depth--;
|
||||
}
|
||||
if ($is_inline) $ret[] = $block_wrap_end;
|
||||
return $ret;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@@ -1,142 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef.php';
|
||||
|
||||
/**
|
||||
* Definition for tables
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $allow_empty = false;
|
||||
var $type = 'table';
|
||||
function HTMLPurifier_ChildDef_Table() {}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
// this ensures that the loop gets run one last time before closing
|
||||
// up. It's a little bit of a hack, but it works! Just make sure you
|
||||
// get rid of the token later.
|
||||
$tokens_of_children[] = false;
|
||||
|
||||
// only one of these elements is allowed in a table
|
||||
$caption = false;
|
||||
$thead = false;
|
||||
$tfoot = false;
|
||||
|
||||
// as many of these as you want
|
||||
$cols = array();
|
||||
$content = array();
|
||||
|
||||
$nesting = 0; // current depth so we can determine nodes
|
||||
$is_collecting = false; // are we globbing together tokens to package
|
||||
// into one of the collectors?
|
||||
$collection = array(); // collected nodes
|
||||
$tag_index = 0; // the first node might be whitespace,
|
||||
// so this tells us where the start tag is
|
||||
|
||||
foreach ($tokens_of_children as $token) {
|
||||
$is_child = ($nesting == 0);
|
||||
|
||||
if ($token === false) {
|
||||
// terminating sequence started
|
||||
} elseif ($token->type == 'start') {
|
||||
$nesting++;
|
||||
} elseif ($token->type == 'end') {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
// handle node collection
|
||||
if ($is_collecting) {
|
||||
if ($is_child) {
|
||||
// okay, let's stash the tokens away
|
||||
// first token tells us the type of the collection
|
||||
switch ($collection[$tag_index]->name) {
|
||||
case 'tr':
|
||||
case 'tbody':
|
||||
$content[] = $collection;
|
||||
break;
|
||||
case 'caption':
|
||||
if ($caption !== false) break;
|
||||
$caption = $collection;
|
||||
break;
|
||||
case 'thead':
|
||||
case 'tfoot':
|
||||
// access the appropriate variable, $thead or $tfoot
|
||||
$var = $collection[$tag_index]->name;
|
||||
if ($$var === false) {
|
||||
$$var = $collection;
|
||||
} else {
|
||||
// transmutate the first and less entries into
|
||||
// tbody tags, and then put into content
|
||||
$collection[$tag_index]->name = 'tbody';
|
||||
$collection[count($collection)-1]->name = 'tbody';
|
||||
$content[] = $collection;
|
||||
}
|
||||
break;
|
||||
case 'colgroup':
|
||||
$cols[] = $collection;
|
||||
break;
|
||||
}
|
||||
$collection = array();
|
||||
$is_collecting = false;
|
||||
$tag_index = 0;
|
||||
} else {
|
||||
// add the node to the collection
|
||||
$collection[] = $token;
|
||||
}
|
||||
}
|
||||
|
||||
// terminate
|
||||
if ($token === false) break;
|
||||
|
||||
if ($is_child) {
|
||||
// determine what we're dealing with
|
||||
if ($token->name == 'col') {
|
||||
// the only empty tag in the possie, we can handle it
|
||||
// immediately
|
||||
$cols[] = array_merge($collection, array($token));
|
||||
$collection = array();
|
||||
$tag_index = 0;
|
||||
continue;
|
||||
}
|
||||
switch($token->name) {
|
||||
case 'caption':
|
||||
case 'colgroup':
|
||||
case 'thead':
|
||||
case 'tfoot':
|
||||
case 'tbody':
|
||||
case 'tr':
|
||||
$is_collecting = true;
|
||||
$collection[] = $token;
|
||||
continue;
|
||||
default:
|
||||
if ($token->type == 'text' && $token->is_whitespace) {
|
||||
$collection[] = $token;
|
||||
$tag_index++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($content)) return false;
|
||||
|
||||
$ret = array();
|
||||
if ($caption !== false) $ret = array_merge($ret, $caption);
|
||||
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
|
||||
if ($thead !== false) $ret = array_merge($ret, $thead);
|
||||
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
|
||||
foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
|
||||
if (!empty($collection) && $is_collecting == false){
|
||||
// grab the trailing space
|
||||
$ret = array_merge($ret, $collection);
|
||||
}
|
||||
|
||||
array_pop($tokens_of_children); // remove phantom token
|
||||
|
||||
return ($ret === $tokens_of_children) ? true : $ret;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@@ -44,20 +44,6 @@ class HTMLPurifier_Config
|
||||
$this->def = $definition; // keep a copy around for checking
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience constructor that creates a config object based on a mixed var
|
||||
* @param mixed $config Variable that defines the state of the config
|
||||
* object. Can be: a HTMLPurifier_Config() object or
|
||||
* an array of directives based on loadArray().
|
||||
* @return Configured HTMLPurifier_Config object
|
||||
*/
|
||||
function create($config) {
|
||||
if (is_a($config, 'HTMLPurifier_Config')) return $config;
|
||||
$ret = HTMLPurifier_Config::createDefault();
|
||||
if (is_array($config)) $ret->loadArray($config);
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience constructor that creates a default configuration object.
|
||||
* @return Default HTMLPurifier_Config object.
|
||||
@@ -82,19 +68,6 @@ class HTMLPurifier_Config
|
||||
return $this->conf[$namespace][$key];
|
||||
}
|
||||
|
||||
/**
|
||||
* Retreives an array of directives to values from a given namespace
|
||||
* @param $namespace String namespace
|
||||
*/
|
||||
function getBatch($namespace) {
|
||||
if (!isset($this->def->info[$namespace])) {
|
||||
trigger_error('Cannot retrieve undefined namespace',
|
||||
E_USER_WARNING);
|
||||
return;
|
||||
}
|
||||
return $this->conf[$namespace];
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a value to configuration.
|
||||
* @param $namespace String namespace
|
||||
@@ -161,7 +134,6 @@ class HTMLPurifier_Config
|
||||
*/
|
||||
function loadArray($config_array) {
|
||||
foreach ($config_array as $key => $value) {
|
||||
$key = str_replace('_', '.', $key);
|
||||
if (strpos($key, '.') !== false) {
|
||||
// condensed form
|
||||
list($namespace, $directive) = explode('.', $key);
|
||||
|
@@ -247,26 +247,11 @@ class HTMLPurifier_ConfigSchema {
|
||||
case 'bool':
|
||||
if (is_int($var) && ($var === 0 || $var === 1)) {
|
||||
$var = (bool) $var;
|
||||
} elseif (is_string($var)) {
|
||||
if ($var == 'on' || $var == 'true' || $var == '1') {
|
||||
$var = true;
|
||||
} elseif ($var == 'off' || $var == 'false' || $var == '0') {
|
||||
$var = false;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} elseif (!is_bool($var)) break;
|
||||
return $var;
|
||||
case 'list':
|
||||
case 'hash':
|
||||
case 'lookup':
|
||||
if (is_string($var)) {
|
||||
// simplistic string to array method that only works
|
||||
// for simple lists of tag names or alphanumeric characters
|
||||
$var = explode(',',$var);
|
||||
// remove spaces
|
||||
foreach ($var as $i => $j) $var[$i] = trim($j);
|
||||
}
|
||||
if (!is_array($var)) break;
|
||||
$keys = array_keys($var);
|
||||
if ($keys === array_keys($keys)) {
|
||||
|
@@ -104,14 +104,14 @@ class HTMLPurifier_Generator
|
||||
function generateFromToken($token) {
|
||||
if (!isset($token->type)) return '';
|
||||
if ($token->type == 'start') {
|
||||
$attr = $this->generateAttributes($token->attr);
|
||||
$attr = $this->generateAttributes($token->attributes);
|
||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
|
||||
|
||||
} elseif ($token->type == 'end') {
|
||||
return '</' . $token->name . '>';
|
||||
|
||||
} elseif ($token->type == 'empty') {
|
||||
$attr = $this->generateAttributes($token->attr);
|
||||
$attr = $this->generateAttributes($token->attributes);
|
||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr .
|
||||
( $this->_xhtml ? ' /': '' )
|
||||
. '>';
|
||||
|
@@ -18,12 +18,6 @@ require_once 'HTMLPurifier/AttrTransform.php';
|
||||
require_once 'HTMLPurifier/AttrTransform/BdoDir.php';
|
||||
require_once 'HTMLPurifier/AttrTransform/ImgRequired.php';
|
||||
require_once 'HTMLPurifier/ChildDef.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Chameleon.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Empty.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Required.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Optional.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Table.php';
|
||||
require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
|
||||
require_once 'HTMLPurifier/Generator.php';
|
||||
require_once 'HTMLPurifier/Token.php';
|
||||
require_once 'HTMLPurifier/TagTransform.php';
|
||||
@@ -41,63 +35,6 @@ HTMLPurifier_ConfigSchema::define(
|
||||
'versions.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'HTML', 'Strict', false, 'bool',
|
||||
'Determines whether or not to use Transitional (loose) or Strict rulesets. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'HTML', 'BlockWrapper', 'p', 'string',
|
||||
'String name of element to wrap inline elements that are inside a block '.
|
||||
'context. This only occurs in the children of blockquote in strict mode. '.
|
||||
'Example: by default value, <code><blockquote>Foo</blockquote></code> '.
|
||||
'would become <code><blockquote><p>Foo</p></blockquote></code>. The '.
|
||||
'<code><p></code> tags can be replaced '.
|
||||
'with whatever you desire, as long as it is a block level element. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'HTML', 'Parent', 'div', 'string',
|
||||
'String name of element that HTML fragment passed to library will be '.
|
||||
'inserted in. An interesting variation would be using span as the '.
|
||||
'parent element, meaning that only inline tags would be allowed. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'HTML', 'AllowedElements', null, 'lookup/null',
|
||||
'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '.
|
||||
'can overload it with your own list of tags to allow. Note that this '.
|
||||
'method is subtractive: it does its job by taking away from HTML Purifier '.
|
||||
'usual feature set, so you cannot add a tag that HTML Purifier never '.
|
||||
'supported in the first place (like embed, form or head). If you change this, you '.
|
||||
'probably also want to change %HTML.AllowedAttributes. '.
|
||||
'<strong>Warning:</strong> If another directive conflicts with the '.
|
||||
'elements here, <em>that</em> directive will win and override. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'HTML', 'AllowedAttributes', null, 'lookup/null',
|
||||
'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '.
|
||||
'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '.
|
||||
'(style, id, class, dir, lang, xml:lang).'.
|
||||
'<strong>Warning:</strong> If another directive conflicts with the '.
|
||||
'elements here, <em>that</em> directive will win and override. For '.
|
||||
'example, %HTML.EnableAttrID will take precedence over *.id in this '.
|
||||
'directive. You must set that directive to true before you can use '.
|
||||
'IDs at all. This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'DisableURI', false, 'bool',
|
||||
'Disables all URIs in all forms. Not sure why you\'d want to do that '.
|
||||
'(after all, the Internet\'s founded on the notion of a hyperlink). '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Defines the purified HTML type with large amounts of objects.
|
||||
*
|
||||
@@ -132,24 +69,11 @@ class HTMLPurifier_HTMLDefinition
|
||||
|
||||
/**
|
||||
* String name of parent element HTML will be going into.
|
||||
* @todo Allow this to be overloaded by user config
|
||||
* @public
|
||||
*/
|
||||
var $info_parent = 'div';
|
||||
|
||||
/**
|
||||
* Definition for parent element, allows parent element to be a
|
||||
* tag that's not allowed inside the HTML fragment.
|
||||
* @public
|
||||
*/
|
||||
var $info_parent_def;
|
||||
|
||||
/**
|
||||
* String name of element used to wrap inline elements in block context
|
||||
* @note This is rarely used except for BLOCKQUOTEs in strict mode
|
||||
* @public
|
||||
*/
|
||||
var $info_block_wrapper = 'p';
|
||||
|
||||
/**
|
||||
* Associative array of deprecated tag name to HTMLPurifier_TagTransform
|
||||
* @public
|
||||
@@ -168,25 +92,14 @@ class HTMLPurifier_HTMLDefinition
|
||||
*/
|
||||
var $info_attr_transform_post = array();
|
||||
|
||||
/**
|
||||
* Lookup table of flow elements
|
||||
* @public
|
||||
*/
|
||||
var $info_flow_elements = array();
|
||||
|
||||
/**
|
||||
* Boolean is a strict definition?
|
||||
* @public
|
||||
*/
|
||||
var $strict;
|
||||
|
||||
/**
|
||||
* Initializes the definition, the meat of the class.
|
||||
*/
|
||||
function setup($config) {
|
||||
|
||||
// some cached config values
|
||||
$this->strict = $config->get('HTML', 'Strict');
|
||||
// emulates the structure of the DTD
|
||||
// these are condensed, however, with bad stuff taken out
|
||||
// screening process was done by hand
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// info[] : initializes the definition objects
|
||||
@@ -198,19 +111,13 @@ class HTMLPurifier_HTMLDefinition
|
||||
array(
|
||||
'ins', 'del', 'blockquote', 'dd', 'li', 'div', 'em', 'strong',
|
||||
'dfn', 'code', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym',
|
||||
'q', 'sub', 'tt', 'sup', 'i', 'b', 'big', 'small',
|
||||
'bdo', 'span', 'dt', 'p', 'h1', 'h2', 'h3', 'h4',
|
||||
'q', 'sub', 'tt', 'sup', 'i', 'b', 'big', 'small', 'u', 's',
|
||||
'strike', 'bdo', 'span', 'dt', 'p', 'h1', 'h2', 'h3', 'h4',
|
||||
'h5', 'h6', 'ol', 'ul', 'dl', 'address', 'img', 'br', 'hr',
|
||||
'pre', 'a', 'table', 'caption', 'thead', 'tfoot', 'tbody',
|
||||
'colgroup', 'col', 'td', 'th', 'tr'
|
||||
);
|
||||
|
||||
if (!$this->strict) {
|
||||
$allowed_tags[] = 'u';
|
||||
$allowed_tags[] = 's';
|
||||
$allowed_tags[] = 'strike';
|
||||
}
|
||||
|
||||
foreach ($allowed_tags as $tag) {
|
||||
$this->info[$tag] = new HTMLPurifier_ElementDef();
|
||||
}
|
||||
@@ -218,10 +125,6 @@ class HTMLPurifier_HTMLDefinition
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// info[]->child : defines allowed children for elements
|
||||
|
||||
// emulates the structure of the DTD
|
||||
// however, these are condensed, with bad stuff taken out
|
||||
// screening process was done by hand
|
||||
|
||||
// entities: prefixed with e_ and _ replaces . from DTD
|
||||
// double underlines are entities we made up
|
||||
|
||||
@@ -245,9 +148,11 @@ class HTMLPurifier_HTMLDefinition
|
||||
$e_phrase_basic = 'em | strong | dfn | code | q | samp | kbd | var'.
|
||||
' | cite | abbr | acronym';
|
||||
$e_phrase = "$e_phrase_basic | $e_phrase_extra";
|
||||
$e_inline_forms = ''; // humor the dtd
|
||||
$e_misc_inline = 'ins | del';
|
||||
$e_misc = "$e_misc_inline";
|
||||
$e_inline = "a | $e_special | $e_fontstyle | $e_phrase";
|
||||
$e_inline = "a | $e_special | $e_fontstyle | $e_phrase".
|
||||
" | $e_inline_forms";
|
||||
// pseudo-property we created for convenience, see later on
|
||||
$e__inline = "#PCDATA | $e_inline | $e_misc_inline";
|
||||
// note the casing
|
||||
@@ -256,14 +161,14 @@ class HTMLPurifier_HTMLDefinition
|
||||
$e_lists = 'ul | ol | dl';
|
||||
$e_blocktext = 'pre | hr | blockquote | address';
|
||||
$e_block = "p | $e_heading | div | $e_lists | $e_blocktext | table";
|
||||
$e_Block = new HTMLPurifier_ChildDef_Optional($e_block);
|
||||
$e__flow = "#PCDATA | $e_block | $e_inline | $e_misc";
|
||||
$e_Flow = new HTMLPurifier_ChildDef_Optional($e__flow);
|
||||
$e_a_content = new HTMLPurifier_ChildDef_Optional("#PCDATA".
|
||||
" | $e_special | $e_fontstyle | $e_phrase | $e_misc_inline");
|
||||
" | $e_special | $e_fontstyle | $e_phrase | $e_inline_forms".
|
||||
" | $e_misc_inline");
|
||||
$e_pre_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | a".
|
||||
" | $e_special_basic | $e_fontstyle_basic | $e_phrase_basic".
|
||||
" | $e_misc_inline");
|
||||
" | $e_inline_forms | $e_misc_inline");
|
||||
$e_form_content = new HTMLPurifier_ChildDef_Optional('');//unused
|
||||
$e_form_button_content = new HTMLPurifier_ChildDef_Optional('');//unused
|
||||
|
||||
@@ -271,16 +176,11 @@ class HTMLPurifier_HTMLDefinition
|
||||
$this->info['del']->child =
|
||||
new HTMLPurifier_ChildDef_Chameleon($e__inline, $e__flow);
|
||||
|
||||
$this->info['blockquote']->child=
|
||||
$this->info['dd']->child =
|
||||
$this->info['li']->child =
|
||||
$this->info['div']->child = $e_Flow;
|
||||
|
||||
if ($this->strict) {
|
||||
$this->info['blockquote']->child = new HTMLPurifier_ChildDef_StrictBlockquote();
|
||||
} else {
|
||||
$this->info['blockquote']->child = $e_Flow;
|
||||
}
|
||||
|
||||
$this->info['caption']->child =
|
||||
$this->info['em']->child =
|
||||
$this->info['strong']->child =
|
||||
@@ -320,13 +220,9 @@ class HTMLPurifier_HTMLDefinition
|
||||
|
||||
$this->info['dl']->child = new HTMLPurifier_ChildDef_Required('dt|dd');
|
||||
|
||||
if ($this->strict) {
|
||||
$this->info['address']->child = $e_Inline;
|
||||
} else {
|
||||
$this->info['address']->child =
|
||||
new HTMLPurifier_ChildDef_Optional("#PCDATA | p | $e_inline".
|
||||
" | $e_misc_inline");
|
||||
}
|
||||
$this->info['address']->child =
|
||||
new HTMLPurifier_ChildDef_Optional("#PCDATA | p | $e_inline".
|
||||
" | $e_misc_inline");
|
||||
|
||||
$this->info['img']->child =
|
||||
$this->info['br']->child =
|
||||
@@ -354,18 +250,15 @@ class HTMLPurifier_HTMLDefinition
|
||||
|
||||
// reuses $e_Inline and $e_Block
|
||||
foreach ($e_Inline->elements as $name => $bool) {
|
||||
if ($name == '#PCDATA') continue;
|
||||
if ($name == '#PCDATA' || $name == '') continue;
|
||||
$this->info[$name]->type = 'inline';
|
||||
}
|
||||
|
||||
$e_Block = new HTMLPurifier_ChildDef_Optional($e_block);
|
||||
foreach ($e_Block->elements as $name => $bool) {
|
||||
$this->info[$name]->type = 'block';
|
||||
}
|
||||
|
||||
foreach ($e_Flow->elements as $name => $bool) {
|
||||
$this->info_flow_elements[$name] = true;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// info[]->excludes : defines elements that aren't allowed in here
|
||||
|
||||
@@ -455,23 +348,16 @@ class HTMLPurifier_HTMLDefinition
|
||||
$this->info['td']->attr['colspan'] =
|
||||
$this->info['th']->attr['colspan'] = $e__NumberSpan;
|
||||
|
||||
if (!$config->get('Attr', 'DisableURI')) {
|
||||
$e_URI = new HTMLPurifier_AttrDef_URI();
|
||||
$this->info['a']->attr['href'] =
|
||||
$this->info['img']->attr['longdesc'] =
|
||||
$this->info['del']->attr['cite'] =
|
||||
$this->info['ins']->attr['cite'] =
|
||||
$this->info['blockquote']->attr['cite'] =
|
||||
$this->info['q']->attr['cite'] = $e_URI;
|
||||
|
||||
// URI that causes HTTP request
|
||||
$this->info['img']->attr['src'] = new HTMLPurifier_AttrDef_URI(true);
|
||||
}
|
||||
$e_URI = new HTMLPurifier_AttrDef_URI();
|
||||
$this->info['a']->attr['href'] =
|
||||
$this->info['img']->attr['longdesc'] =
|
||||
$this->info['del']->attr['cite'] =
|
||||
$this->info['ins']->attr['cite'] =
|
||||
$this->info['blockquote']->attr['cite'] =
|
||||
$this->info['q']->attr['cite'] = $e_URI;
|
||||
|
||||
if (!$this->strict) {
|
||||
$this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
|
||||
$this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
|
||||
}
|
||||
// URI that causes HTTP request
|
||||
$this->info['img']->attr['src'] = new HTMLPurifier_AttrDef_URI(true);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// info_tag_transform : transformations of tags
|
||||
@@ -536,53 +422,6 @@ class HTMLPurifier_HTMLDefinition
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// info_block_wrapper : wraps inline elements in block context
|
||||
|
||||
$block_wrapper = $config->get('HTML', 'BlockWrapper');
|
||||
if (isset($e_Block->elements[$block_wrapper])) {
|
||||
$this->info_block_wrapper = $block_wrapper;
|
||||
} else {
|
||||
trigger_error('Cannot use non-block element as block wrapper.',
|
||||
E_USER_ERROR);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// info_parent : parent element of the HTML fragment
|
||||
|
||||
$parent = $config->get('HTML', 'Parent');
|
||||
if (isset($this->info[$parent])) {
|
||||
$this->info_parent = $parent;
|
||||
} else {
|
||||
trigger_error('Cannot use unrecognized element as parent.',
|
||||
E_USER_ERROR);
|
||||
}
|
||||
$this->info_parent_def = $this->info[$this->info_parent];
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// %HTML.Allowed(Elements|Attributes) : cut non-allowed elements
|
||||
|
||||
$allowed_elements = $config->get('HTML', 'AllowedElements');
|
||||
if (is_array($allowed_elements)) {
|
||||
foreach ($this->info as $name => $d) {
|
||||
if(!isset($allowed_elements[$name])) unset($this->info[$name]);
|
||||
}
|
||||
}
|
||||
$allowed_attributes = $config->get('HTML', 'AllowedAttributes');
|
||||
if (is_array($allowed_attributes)) {
|
||||
foreach ($this->info_global_attr as $attr_key => $info) {
|
||||
if (!isset($allowed_attributes["*.$attr_key"])) {
|
||||
unset($this->info_global_attr[$attr_key]);
|
||||
}
|
||||
}
|
||||
foreach ($this->info as $tag => $info) {
|
||||
foreach ($info->attr as $attr => $attr_info) {
|
||||
if (!isset($allowed_attributes["$tag.$attr"])) {
|
||||
unset($this->info[$tag]->attr[$attr]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function setAttrForTableElements($attr, $def) {
|
||||
|
@@ -143,18 +143,18 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
)
|
||||
);
|
||||
if ($attribute_string) {
|
||||
$attr = $this->parseAttributeString(
|
||||
$attribute_string
|
||||
, $config, $context
|
||||
);
|
||||
$attributes = $this->parseAttributeString(
|
||||
$attribute_string
|
||||
, $config, $context
|
||||
);
|
||||
} else {
|
||||
$attr = array();
|
||||
$attributes = array();
|
||||
}
|
||||
|
||||
if ($is_self_closing) {
|
||||
$array[] = new HTMLPurifier_Token_Empty($type, $attr);
|
||||
$array[] = new HTMLPurifier_Token_Empty($type, $attributes);
|
||||
} else {
|
||||
$array[] = new HTMLPurifier_Token_Start($type, $attr);
|
||||
$array[] = new HTMLPurifier_Token_Start($type, $attributes);
|
||||
}
|
||||
$cursor = $position_next_gt + 1;
|
||||
$inside_tag = false;
|
||||
|
@@ -1,149 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/Generator.php';
|
||||
require_once 'HTMLPurifier/Token.php';
|
||||
require_once 'HTMLPurifier/Encoder.php';
|
||||
|
||||
class HTMLPurifier_Printer
|
||||
{
|
||||
|
||||
/**
|
||||
* Instance of HTMLPurifier_Generator for HTML generation convenience funcs
|
||||
*/
|
||||
var $generator;
|
||||
|
||||
/**
|
||||
* Instance of HTMLPurifier_Config, for easy access
|
||||
*/
|
||||
var $config;
|
||||
|
||||
/**
|
||||
* Initialize $generator.
|
||||
*/
|
||||
function HTMLPurifier_Printer() {
|
||||
$this->generator = new HTMLPurifier_Generator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Main function that renders object or aspect of that object
|
||||
* @param $config Configuration object
|
||||
*/
|
||||
function render($config) {}
|
||||
|
||||
/**
|
||||
* Returns a start tag
|
||||
* @param $tag Tag name
|
||||
* @param $attr Attribute array
|
||||
*/
|
||||
function start($tag, $attr = array()) {
|
||||
return $this->generator->generateFromToken(
|
||||
new HTMLPurifier_Token_Start($tag, $attr ? $attr : array())
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an end teg
|
||||
* @param $tag Tag name
|
||||
*/
|
||||
function end($tag) {
|
||||
return $this->generator->generateFromToken(
|
||||
new HTMLPurifier_Token_End($tag)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Prints a complete element with content inside
|
||||
* @param $tag Tag name
|
||||
* @param $contents Element contents
|
||||
* @param $attr Tag attributes
|
||||
* @param $escape Bool whether or not to escape contents
|
||||
*/
|
||||
function element($tag, $contents, $attr = array(), $escape = true) {
|
||||
return $this->start($tag, $attr) .
|
||||
($escape ? $this->escape($contents) : $contents) .
|
||||
$this->end($tag);
|
||||
}
|
||||
|
||||
/**
|
||||
* Prints a simple key/value row in a table.
|
||||
* @param $name Key
|
||||
* @param $value Value
|
||||
*/
|
||||
function row($name, $value) {
|
||||
if (is_bool($value)) $value = $value ? 'On' : 'Off';
|
||||
return
|
||||
$this->start('tr') . "\n" .
|
||||
$this->element('th', $name) . "\n" .
|
||||
$this->element('td', $value) . "\n" .
|
||||
$this->end('tr')
|
||||
;
|
||||
}
|
||||
|
||||
/**
|
||||
* Escapes a string for HTML output.
|
||||
* @param $string String to escape
|
||||
*/
|
||||
function escape($string) {
|
||||
$string = HTMLPurifier_Encoder::cleanUTF8($string);
|
||||
$string = htmlspecialchars($string, ENT_COMPAT, 'UTF-8');
|
||||
return $string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a list of strings and turns them into a single list
|
||||
* @param $array List of strings
|
||||
* @param $polite Bool whether or not to add an end before the last
|
||||
*/
|
||||
function listify($array, $polite = false) {
|
||||
if (empty($array)) return 'None';
|
||||
$ret = '';
|
||||
$i = count($array);
|
||||
foreach ($array as $value) {
|
||||
$i--;
|
||||
$ret .= $value;
|
||||
if ($i > 0 && !($polite && $i == 1)) $ret .= ', ';
|
||||
if ($polite && $i == 1) $ret .= 'and ';
|
||||
}
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the class of an object without prefixes, as well as metadata
|
||||
* @param $obj Object to determine class of
|
||||
* @param $prefix Further prefix to remove
|
||||
*/
|
||||
function getClass($obj, $sec_prefix = '') {
|
||||
static $five = null;
|
||||
if ($five === null) $five = version_compare(PHP_VERSION, '5', '>=');
|
||||
$prefix = 'HTMLPurifier_' . $sec_prefix;
|
||||
if (!$five) $prefix = strtolower($prefix);
|
||||
$class = str_replace($prefix, '', get_class($obj));
|
||||
$lclass = strtolower($class);
|
||||
$class .= '(';
|
||||
switch ($lclass) {
|
||||
case 'enum':
|
||||
$values = array();
|
||||
foreach ($obj->valid_values as $value => $bool) {
|
||||
$values[] = $value;
|
||||
}
|
||||
$class .= implode(', ', $values);
|
||||
break;
|
||||
case 'composite':
|
||||
$values = array();
|
||||
foreach ($obj->defs as $def) {
|
||||
$values[] = $this->getClass($def, $sec_prefix);
|
||||
}
|
||||
$class .= implode(', ', $values);
|
||||
break;
|
||||
case 'multiple':
|
||||
$class .= $this->getClass($obj->single, $sec_prefix) . ', ';
|
||||
$class .= $obj->max;
|
||||
break;
|
||||
}
|
||||
$class .= ')';
|
||||
return $class;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -1,40 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/Printer.php';
|
||||
|
||||
class HTMLPurifier_Printer_CSSDefinition extends HTMLPurifier_Printer
|
||||
{
|
||||
|
||||
var $def;
|
||||
|
||||
function render($config) {
|
||||
$this->def = $config->getCSSDefinition();
|
||||
$ret = '';
|
||||
|
||||
$ret .= $this->start('div', array('class' => 'HTMLPurifier_Printer'));
|
||||
$ret .= $this->start('table');
|
||||
|
||||
$ret .= $this->element('caption', 'Properties ($info)');
|
||||
|
||||
$ret .= $this->start('thead');
|
||||
$ret .= $this->start('tr');
|
||||
$ret .= $this->element('th', 'Property', array('class' => 'heavy'));
|
||||
$ret .= $this->element('th', 'Definition', array('class' => 'heavy', 'style' => 'width:auto;'));
|
||||
$ret .= $this->end('tr');
|
||||
$ret .= $this->end('thead');
|
||||
|
||||
ksort($this->def->info);
|
||||
foreach ($this->def->info as $property => $obj) {
|
||||
$name = $this->getClass($obj, 'AttrDef_');
|
||||
$ret .= $this->row($property, $name);
|
||||
}
|
||||
|
||||
$ret .= $this->end('table');
|
||||
$ret .= $this->end('div');
|
||||
|
||||
return $ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -1,206 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/Printer.php';
|
||||
|
||||
class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
|
||||
{
|
||||
|
||||
/**
|
||||
* Instance of HTMLPurifier_HTMLDefinition, for easy access
|
||||
*/
|
||||
var $def;
|
||||
|
||||
function render(&$config) {
|
||||
$ret = '';
|
||||
$this->config =& $config;
|
||||
$this->def =& $config->getHTMLDefinition();
|
||||
$def =& $this->def;
|
||||
|
||||
$ret .= $this->start('div', array('class' => 'HTMLPurifier_Printer'));
|
||||
$ret .= $this->start('table');
|
||||
$ret .= $this->element('caption', 'Environment');
|
||||
|
||||
$ret .= $this->row('Parent of fragment', $def->info_parent);
|
||||
$ret .= $this->row('Strict mode', $def->strict);
|
||||
if ($def->strict) $ret .= $this->row('Block wrap name', $def->info_block_wrapper);
|
||||
|
||||
$ret .= $this->start('tr');
|
||||
$ret .= $this->element('th', 'Global attributes');
|
||||
$ret .= $this->element('td', $this->listifyAttr($def->info_global_attr),0,0);
|
||||
$ret .= $this->end('tr');
|
||||
|
||||
$ret .= $this->renderChildren($def->info_parent_def->child);
|
||||
|
||||
$ret .= $this->start('tr');
|
||||
$ret .= $this->element('th', 'Tag transforms');
|
||||
$list = array();
|
||||
foreach ($def->info_tag_transform as $old => $new) {
|
||||
$new = $this->getClass($new, 'TagTransform_');
|
||||
$list[] = "<$old> with $new";
|
||||
}
|
||||
$ret .= $this->element('td', $this->listify($list));
|
||||
$ret .= $this->end('tr');
|
||||
|
||||
$ret .= $this->start('tr');
|
||||
$ret .= $this->element('th', 'Pre-AttrTransform');
|
||||
$ret .= $this->element('td', $this->listifyObjectList($def->info_attr_transform_pre));
|
||||
$ret .= $this->end('tr');
|
||||
|
||||
$ret .= $this->start('tr');
|
||||
$ret .= $this->element('th', 'Post-AttrTransform');
|
||||
$ret .= $this->element('td', $this->listifyObjectList($def->info_attr_transform_post));
|
||||
$ret .= $this->end('tr');
|
||||
|
||||
$ret .= $this->end('table');
|
||||
|
||||
|
||||
$ret .= $this->renderInfo();
|
||||
|
||||
|
||||
$ret .= $this->end('div');
|
||||
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Renders the Elements ($info) table
|
||||
*/
|
||||
function renderInfo() {
|
||||
$ret = '';
|
||||
$ret .= $this->start('table');
|
||||
$ret .= $this->element('caption', 'Elements ($info)');
|
||||
ksort($this->def->info);
|
||||
$ret .= $this->start('tr');
|
||||
$ret .= $this->element('th', 'Allowed tags', array('colspan' => 2, 'class' => 'heavy'));
|
||||
$ret .= $this->end('tr');
|
||||
$ret .= $this->start('tr');
|
||||
$ret .= $this->element('td', $this->listifyTagLookup($this->def->info), array('colspan' => 2));
|
||||
$ret .= $this->end('tr');
|
||||
foreach ($this->def->info as $name => $def) {
|
||||
$ret .= $this->start('tr');
|
||||
$ret .= $this->element('th', "<$name>", array('class'=>'heavy', 'colspan' => 2));
|
||||
$ret .= $this->end('tr');
|
||||
$ret .= $this->start('tr');
|
||||
$ret .= $this->element('th', 'Type');
|
||||
$ret .= $this->element('td', ucfirst($def->type));
|
||||
$ret .= $this->end('tr');
|
||||
if (!empty($def->excludes)) {
|
||||
$ret .= $this->start('tr');
|
||||
$ret .= $this->element('th', 'Excludes');
|
||||
$ret .= $this->element('td', $this->listifyTagLookup($def->excludes));
|
||||
$ret .= $this->end('tr');
|
||||
}
|
||||
if (!empty($def->attr_transform_pre)) {
|
||||
$ret .= $this->start('tr');
|
||||
$ret .= $this->element('th', 'Pre-AttrTransform');
|
||||
$ret .= $this->element('td', $this->listifyObjectList($def->attr_transform_pre));
|
||||
$ret .= $this->end('tr');
|
||||
}
|
||||
if (!empty($def->attr_transform_post)) {
|
||||
$ret .= $this->start('tr');
|
||||
$ret .= $this->element('th', 'Post-AttrTransform');
|
||||
$ret .= $this->element('td', $this->listifyObjectList($def->attr_transform_post));
|
||||
$ret .= $this->end('tr');
|
||||
}
|
||||
if (!empty($def->auto_close)) {
|
||||
$ret .= $this->start('tr');
|
||||
$ret .= $this->element('th', 'Auto closed by');
|
||||
$ret .= $this->element('td', $this->listifyTagLookup($def->auto_close));
|
||||
$ret .= $this->end('tr');
|
||||
}
|
||||
$ret .= $this->start('tr');
|
||||
$ret .= $this->element('th', 'Allowed attributes');
|
||||
$ret .= $this->element('td',$this->listifyAttr($def->attr),0,0);
|
||||
$ret .= $this->end('tr');
|
||||
|
||||
$ret .= $this->renderChildren($def->child);
|
||||
}
|
||||
$ret .= $this->end('table');
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Renders a row describing the allowed children of an element
|
||||
* @param $def HTMLPurifier_ChildDef of pertinent element
|
||||
*/
|
||||
function renderChildren($def) {
|
||||
$context = new HTMLPurifier_Context();
|
||||
$ret = '';
|
||||
$ret .= $this->start('tr');
|
||||
$elements = array();
|
||||
$attr = array();
|
||||
if (isset($def->elements)) {
|
||||
if ($def->type == 'strictblockquote') $def->validateChildren(array(), $this->config, $context);
|
||||
$elements = $def->elements;
|
||||
} elseif ($def->type == 'chameleon') {
|
||||
$attr['rowspan'] = 2;
|
||||
} elseif ($def->type == 'empty') {
|
||||
$elements = array();
|
||||
} elseif ($def->type == 'table') {
|
||||
$elements = array('col', 'caption', 'colgroup', 'thead',
|
||||
'tfoot', 'tbody', 'tr');
|
||||
}
|
||||
$ret .= $this->element('th', 'Allowed children', $attr);
|
||||
|
||||
if ($def->type == 'chameleon') {
|
||||
|
||||
$ret .= $this->element('td',
|
||||
'<em>Block</em>: ' .
|
||||
$this->escape($this->listifyTagLookup($def->block->elements)),0,0);
|
||||
$ret .= $this->end('tr');
|
||||
$ret .= $this->start('tr');
|
||||
$ret .= $this->element('td',
|
||||
'<em>Inline</em>: ' .
|
||||
$this->escape($this->listifyTagLookup($def->inline->elements)),0,0);
|
||||
|
||||
} else {
|
||||
$ret .= $this->element('td',
|
||||
'<em>'.ucfirst($def->type).'</em>: ' .
|
||||
$this->escape($this->listifyTagLookup($elements)),0,0);
|
||||
}
|
||||
$ret .= $this->end('tr');
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Listifies a tag lookup table.
|
||||
* @param $array Tag lookup array in form of array('tagname' => true)
|
||||
*/
|
||||
function listifyTagLookup($array) {
|
||||
$list = array();
|
||||
foreach ($array as $name => $discard) {
|
||||
if ($name !== '#PCDATA' && !isset($this->def->info[$name])) continue;
|
||||
$list[] = $name;
|
||||
}
|
||||
return $this->listify($list);
|
||||
}
|
||||
|
||||
/**
|
||||
* Listifies a list of objects by retrieving class names and internal state
|
||||
* @param $array List of objects
|
||||
* @todo Also add information about internal state
|
||||
*/
|
||||
function listifyObjectList($array) {
|
||||
$list = array();
|
||||
foreach ($array as $discard => $obj) {
|
||||
$list[] = $this->getClass($obj, 'AttrTransform_');
|
||||
}
|
||||
return $this->listify($list);
|
||||
}
|
||||
|
||||
/**
|
||||
* Listifies a hash of attributes to AttrDef classes
|
||||
* @param $array Array hash in form of array('attrname' => HTMLPurifier_AttrDef)
|
||||
*/
|
||||
function listifyAttr($array) {
|
||||
$list = array();
|
||||
foreach ($array as $name => $obj) {
|
||||
if ($obj === false) continue;
|
||||
$list[] = "$name = <i>" . $this->getClass($obj, 'AttrDef_') . '</i>';
|
||||
}
|
||||
return $this->listify($list);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -104,11 +104,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
if ($count = count($stack)) {
|
||||
$parent_index = $stack[$count-1];
|
||||
$parent_name = $tokens[$parent_index]->name;
|
||||
if ($parent_index == 0) {
|
||||
$parent_def = $definition->info_parent_def;
|
||||
} else {
|
||||
$parent_def = $definition->info[$parent_name];
|
||||
}
|
||||
$parent_def = $definition->info[$parent_name];
|
||||
} else {
|
||||
// unknown info, it won't be used anyway
|
||||
$parent_index = $parent_name = $parent_def = null;
|
||||
@@ -145,25 +141,14 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
if ($excluded) {
|
||||
// there is an exclusion, remove the entire node
|
||||
$result = false;
|
||||
$excludes = array(); // not used, but good to initialize anyway
|
||||
} else {
|
||||
// DEFINITION CALL
|
||||
if ($i === 0) {
|
||||
// special processing for the first node
|
||||
$def = $definition->info_parent_def;
|
||||
} else {
|
||||
$def = $definition->info[$tokens[$i]->name];
|
||||
|
||||
}
|
||||
$def = $definition->info[$tokens[$i]->name];
|
||||
$child_def = $def->child;
|
||||
|
||||
if (!empty($def->child)) {
|
||||
// have DTD child def validate children
|
||||
$result = $def->child->validateChildren(
|
||||
$child_tokens, $config, $context);
|
||||
} else {
|
||||
// weird, no child definition, get rid of everything
|
||||
$result = false;
|
||||
}
|
||||
// have DTD child def validate children
|
||||
$result = $child_def->validateChildren(
|
||||
$child_tokens, $config, $context);
|
||||
|
||||
// determine whether or not this element has any exclusions
|
||||
$excludes = $def->excludes;
|
||||
@@ -243,20 +228,13 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
|
||||
// Test if the token indeed is a start tag, if not, move forward
|
||||
// and test again.
|
||||
$size = count($tokens);
|
||||
while ($i < $size and $tokens[$i]->type != 'start') {
|
||||
if ($tokens[$i]->type == 'end') {
|
||||
// pop a token index off the stack if we ended a node
|
||||
array_pop($stack);
|
||||
// pop an exclusion lookup off exclusion stack if
|
||||
// we ended node and that node had exclusions
|
||||
if ($i == 0 || $i == $size - 1) {
|
||||
// use specialized var if it's the super-parent
|
||||
$s_excludes = $definition->info_parent_def->excludes;
|
||||
} else {
|
||||
$s_excludes = $definition->info[$tokens[$i]->name]->excludes;
|
||||
}
|
||||
if ($s_excludes) {
|
||||
if ($definition->info[$tokens[$i]->name]->excludes) {
|
||||
array_pop($exclude_stack);
|
||||
}
|
||||
}
|
||||
|
@@ -30,7 +30,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
$token->type == 'start' ) {
|
||||
|
||||
$result[] = new HTMLPurifier_Token_Empty($token->name,
|
||||
$token->attr);
|
||||
$token->attributes);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -39,7 +39,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
$token->type == 'empty' ) {
|
||||
|
||||
$result[] = new HTMLPurifier_Token_Start($token->name,
|
||||
$token->attr);
|
||||
$token->attributes);
|
||||
$result[] = new HTMLPurifier_Token_End($token->name);
|
||||
|
||||
continue;
|
||||
|
@@ -5,14 +5,6 @@ require_once 'HTMLPurifier/HTMLDefinition.php';
|
||||
require_once 'HTMLPurifier/Generator.php';
|
||||
require_once 'HTMLPurifier/TagTransform.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'RemoveInvalidImg', true, 'bool',
|
||||
'This directive enables pre-emptive URI checking in <code>img</code> '.
|
||||
'tags, as the attribute validation strategy is not authorized to '.
|
||||
'remove elements from the document. This directive has been available '.
|
||||
'since 1.3.0, revert to pre-1.3.0 behavior by setting to false.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Removes all unrecognized tags from the list of tokens.
|
||||
*
|
||||
@@ -33,26 +25,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
||||
if (!empty( $token->is_tag )) {
|
||||
// DEFINITION CALL
|
||||
if (isset($definition->info[$token->name])) {
|
||||
// leave untouched, except for a few special cases:
|
||||
|
||||
// hard-coded image special case, pre-emptively drop
|
||||
// if not available. Probably not abstract-able
|
||||
if ( $token->name == 'img' ) {
|
||||
if (!isset($token->attr['src'])) {
|
||||
continue;
|
||||
}
|
||||
if (!isset($definition->info['img']->attr['src'])) {
|
||||
continue;
|
||||
}
|
||||
$token->attr['src'] =
|
||||
$definition->
|
||||
info['img']->
|
||||
attr['src']->
|
||||
validate($token->attr['src'],
|
||||
$config, $context);
|
||||
if ($token->attr['src'] === false) continue;
|
||||
}
|
||||
|
||||
// leave untouched
|
||||
} elseif (
|
||||
isset($definition->info_tag_transform[$token->name])
|
||||
) {
|
||||
|
@@ -35,7 +35,7 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
|
||||
if ($token->type !== 'start' && $token->type !== 'empty') continue;
|
||||
|
||||
// copy out attributes for easy manipulation
|
||||
$attr = $token->attr;
|
||||
$attr = $token->attributes;
|
||||
|
||||
// do global transformations (pre)
|
||||
// nothing currently utilizes this
|
||||
@@ -117,7 +117,7 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
|
||||
|
||||
// commit changes
|
||||
// could interfere with flyweight implementation
|
||||
$tokens[$key]->attr = $attr;
|
||||
$tokens[$key]->attributes = $attr;
|
||||
}
|
||||
$context->destroy('IDAccumulator');
|
||||
|
||||
|
@@ -62,16 +62,16 @@ class HTMLPurifier_TagTransform_Center extends HTMLPurifier_TagTransform
|
||||
$new_tag = new HTMLPurifier_Token_End($this->transform_to);
|
||||
return $new_tag;
|
||||
}
|
||||
$attr = $tag->attr;
|
||||
$attributes = $tag->attributes;
|
||||
$prepend_css = 'text-align:center;';
|
||||
if (isset($attr['style'])) {
|
||||
$attr['style'] = $prepend_css . $attr['style'];
|
||||
if (isset($attributes['style'])) {
|
||||
$attributes['style'] = $prepend_css . $attributes['style'];
|
||||
} else {
|
||||
$attr['style'] = $prepend_css;
|
||||
$attributes['style'] = $prepend_css;
|
||||
}
|
||||
$new_tag = $tag->copy();
|
||||
$new_tag->name = $this->transform_to;
|
||||
$new_tag->attr = $attr;
|
||||
$new_tag->attributes = $attributes;
|
||||
return $new_tag;
|
||||
}
|
||||
}
|
||||
@@ -115,39 +115,39 @@ class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
|
||||
return $new_tag;
|
||||
}
|
||||
|
||||
$attr = $tag->attr;
|
||||
$attributes = $tag->attributes;
|
||||
$prepend_style = '';
|
||||
|
||||
// handle color transform
|
||||
if (isset($attr['color'])) {
|
||||
$prepend_style .= 'color:' . $attr['color'] . ';';
|
||||
unset($attr['color']);
|
||||
if (isset($attributes['color'])) {
|
||||
$prepend_style .= 'color:' . $attributes['color'] . ';';
|
||||
unset($attributes['color']);
|
||||
}
|
||||
|
||||
// handle face transform
|
||||
if (isset($attr['face'])) {
|
||||
$prepend_style .= 'font-family:' . $attr['face'] . ';';
|
||||
unset($attr['face']);
|
||||
if (isset($attributes['face'])) {
|
||||
$prepend_style .= 'font-family:' . $attributes['face'] . ';';
|
||||
unset($attributes['face']);
|
||||
}
|
||||
|
||||
// handle size transform
|
||||
if (isset($attr['size'])) {
|
||||
if (isset($this->_size_lookup[$attr['size']])) {
|
||||
if (isset($attributes['size'])) {
|
||||
if (isset($this->_size_lookup[$attributes['size']])) {
|
||||
$prepend_style .= 'font-size:' .
|
||||
$this->_size_lookup[$attr['size']] . ';';
|
||||
$this->_size_lookup[$attributes['size']] . ';';
|
||||
}
|
||||
unset($attr['size']);
|
||||
unset($attributes['size']);
|
||||
}
|
||||
|
||||
if ($prepend_style) {
|
||||
$attr['style'] = isset($attr['style']) ?
|
||||
$prepend_style . $attr['style'] :
|
||||
$attributes['style'] = isset($attributes['style']) ?
|
||||
$prepend_style . $attributes['style'] :
|
||||
$prepend_style;
|
||||
}
|
||||
|
||||
$new_tag = $tag->copy();
|
||||
$new_tag->name = $this->transform_to;
|
||||
$new_tag->attr = $attr;
|
||||
$new_tag->attributes = $attributes;
|
||||
|
||||
return $new_tag;
|
||||
|
||||
|
@@ -50,29 +50,30 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
|
||||
/**
|
||||
* Associative array of the tag's attributes.
|
||||
*/
|
||||
var $attr = array();
|
||||
var $attributes = array();
|
||||
|
||||
/**
|
||||
* Non-overloaded constructor, which lower-cases passed tag name.
|
||||
*
|
||||
* @param $name String name.
|
||||
* @param $attr Associative array of attributes.
|
||||
* @param $name String name.
|
||||
* @param $attributes Associative array of attributes.
|
||||
*/
|
||||
function HTMLPurifier_Token_Tag($name, $attr = array()) {
|
||||
function HTMLPurifier_Token_Tag($name, $attributes = array()) {
|
||||
//if ($attributes === null) var_dump(debug_backtrace());
|
||||
$this->name = ctype_lower($name) ? $name : strtolower($name);
|
||||
foreach ($attr as $key => $value) {
|
||||
foreach ($attributes as $key => $value) {
|
||||
// normalization only necessary when key is not lowercase
|
||||
if (!ctype_lower($key)) {
|
||||
$new_key = strtolower($key);
|
||||
if (!isset($attr[$new_key])) {
|
||||
$attr[$new_key] = $attr[$key];
|
||||
if (!isset($attributes[$new_key])) {
|
||||
$attributes[$new_key] = $attributes[$key];
|
||||
}
|
||||
if ($new_key !== $key) {
|
||||
unset($attr[$key]);
|
||||
unset($attributes[$key]);
|
||||
}
|
||||
}
|
||||
}
|
||||
$this->attr = $attr;
|
||||
$this->attributes = $attributes;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,7 +84,7 @@ class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
|
||||
{
|
||||
var $type = 'start';
|
||||
function copy() {
|
||||
return new HTMLPurifier_Token_Start($this->name, $this->attr);
|
||||
return new HTMLPurifier_Token_Start($this->name, $this->attributes);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -94,7 +95,7 @@ class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
|
||||
{
|
||||
var $type = 'empty';
|
||||
function copy() {
|
||||
return new HTMLPurifier_Token_Empty($this->name, $this->attr);
|
||||
return new HTMLPurifier_Token_Empty($this->name, $this->attributes);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -37,12 +37,12 @@ class HTMLPurifier_TokenFactory
|
||||
/**
|
||||
* Creates a HTMLPurifier_Token_Start.
|
||||
* @param $name Tag name
|
||||
* @param $attr Associative array of attributes
|
||||
* @param $attribute Associative array of attributes
|
||||
* @return Generated HTMLPurifier_Token_Start
|
||||
*/
|
||||
public function createStart($name, $attr = array()) {
|
||||
public function createStart($name, $attributes = array()) {
|
||||
$p = clone $this->p_start;
|
||||
$p->HTMLPurifier_Token_Tag($name, $attr);
|
||||
$p->HTMLPurifier_Token_Tag($name, $attributes);
|
||||
return $p;
|
||||
}
|
||||
|
||||
@@ -60,12 +60,12 @@ class HTMLPurifier_TokenFactory
|
||||
/**
|
||||
* Creates a HTMLPurifier_Token_Empty.
|
||||
* @param $name Tag name
|
||||
* @param $attr Associative array of attributes
|
||||
* @param $attribute Associative array of attributes
|
||||
* @return Generated HTMLPurifier_Token_Empty
|
||||
*/
|
||||
public function createEmpty($name, $attr = array()) {
|
||||
public function createEmpty($name, $attributes = array()) {
|
||||
$p = clone $this->p_empty;
|
||||
$p->HTMLPurifier_Token_Tag($name, $attr);
|
||||
$p->HTMLPurifier_Token_Tag($name, $attributes);
|
||||
return $p;
|
||||
}
|
||||
|
||||
|
@@ -2,7 +2,8 @@
|
||||
|
||||
header('Content-type: text/html; charset=UTF-8');
|
||||
|
||||
require_once '../library/HTMLPurifier.auto.php';
|
||||
set_include_path('../library' . PATH_SEPARATOR . get_include_path());
|
||||
require_once 'HTMLPurifier.php';
|
||||
|
||||
function escapeHTML($string) {
|
||||
$string = HTMLPurifier_Encoder::cleanUTF8($string);
|
||||
|
@@ -1,40 +0,0 @@
|
||||
<?php
|
||||
|
||||
set_include_path('../library/' . PATH_SEPARATOR . get_include_path() );
|
||||
|
||||
header('Content-type: text/html; charset=UTF-8');
|
||||
echo '<?xml version="1.0" encoding="UTF-8" ?>';
|
||||
|
||||
function printb($bool) {
|
||||
echo '<strong>' . ($bool ? 'Pass' : 'Fail') . '</strong>';
|
||||
}
|
||||
|
||||
function printEval($code) {
|
||||
echo '<pre>' . htmlspecialchars($code) . '</pre>';
|
||||
eval($code);
|
||||
}
|
||||
|
||||
?><!DOCTYPE html
|
||||
PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
||||
<head>
|
||||
<title>HTML Purifier Function Include Smoketest</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
</head>
|
||||
<body>
|
||||
<h1>HTML Purifier Function Include Smoketest</h1>
|
||||
|
||||
<p>Tests whether or not the includes are done properly and whether or
|
||||
not the library is lazy loaded.</p>
|
||||
|
||||
<?php printEval("require_once 'HTMLPurifier.func.php';"); ?>
|
||||
|
||||
<p>HTMLPurifier class doesn't exist: <?php printb(!class_exists('HTMLPurifier')); ?></li></p>
|
||||
|
||||
<?php printEval("HTMLPurifier('foobar');"); ?>
|
||||
|
||||
<p>HTMLPurifier class exists: <?php printb(class_exists('HTMLPurifier')); ?></li></p>
|
||||
|
||||
</body>
|
||||
</html>
|
@@ -1,65 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'common.php';
|
||||
|
||||
echo '<?xml version="1.0" encoding="UTF-8" ?>';
|
||||
?><!DOCTYPE html
|
||||
PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>HTML Purifier Preserve YouTube Smoketest</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
</head>
|
||||
<body>
|
||||
<h1>HTML Purifier Preserve YouTube Smoketest</h1>
|
||||
<?php
|
||||
|
||||
class HTMLPurifierX_PreserveYouTube extends HTMLPurifier
|
||||
{
|
||||
function purify($html, $config = null) {
|
||||
$pre_regex = '#<object[^>]+>.+?'.
|
||||
'http://www.youtube.com/v/([A-Za-z0-9]+).+?</object>#';
|
||||
$pre_replace = '<span class="youtube-embed">\1</span>';
|
||||
$html = preg_replace($pre_regex, $pre_replace, $html);
|
||||
$html = parent::purify($html, $config);
|
||||
$post_regex = '#<span class="youtube-embed">([A-Za-z0-9]+)</span>#';
|
||||
$post_replace = '<object width="425" height="350" '.
|
||||
'data="http://www.youtube.com/v/\1">'.
|
||||
'<param name="movie" value="http://www.youtube.com/v/\1"></param>'.
|
||||
'<param name="wmode" value="transparent"></param>'.
|
||||
'<!--[if IE]>'.
|
||||
'<embed src="http://www.youtube.com/v/\1"'.
|
||||
'type="application/x-shockwave-flash"'.
|
||||
'wmode="transparent" width="425" height="350" />'.
|
||||
'<![endif]-->'.
|
||||
'</object>';
|
||||
$html = preg_replace($post_regex, $post_replace, $html);
|
||||
return $html;
|
||||
}
|
||||
}
|
||||
|
||||
$string = '<object width="425" height="350"><param name="movie" value="http://www.youtube.com/v/JzqumbhfxRo"></param><param name="wmode" value="transparent"></param><embed src="http://www.youtube.com/v/JzqumbhfxRo" type="application/x-shockwave-flash" wmode="transparent" width="425" height="350"></embed></object>';
|
||||
|
||||
$regular_purifier = new HTMLPurifier();
|
||||
$youtube_purifier = new HTMLPurifierX_PreserveYouTube();
|
||||
|
||||
?>
|
||||
<h2>Unpurified</h2>
|
||||
<p><a href="?break">Click here to see the unpurified version (breaks validation).</a></p>
|
||||
<div><?php
|
||||
if (isset($_GET['break'])) echo $string;
|
||||
?></div>
|
||||
|
||||
<h2>Without YouTube exception</h2>
|
||||
<div><?php
|
||||
echo $regular_purifier->purify($string);
|
||||
?></div>
|
||||
|
||||
<h2>With YouTube exception</h2>
|
||||
<div><?php
|
||||
echo $youtube_purifier->purify($string);
|
||||
?></div>
|
||||
|
||||
</body>
|
||||
</html>
|
@@ -1,144 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'common.php'; // load library
|
||||
|
||||
require_once 'HTMLPurifier/Printer/HTMLDefinition.php';
|
||||
require_once 'HTMLPurifier/Printer/CSSDefinition.php';
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
|
||||
// you can do custom configuration!
|
||||
if (file_exists('printDefinition.settings.php')) {
|
||||
include 'printDefinition.settings.php';
|
||||
}
|
||||
|
||||
$get = $_GET;
|
||||
foreach ($_GET as $key => $value) {
|
||||
if (!strncmp($key, 'Null_', 5) && !empty($value)) {
|
||||
unset($get[substr($key, 5)]);
|
||||
unset($get[$key]);
|
||||
}
|
||||
}
|
||||
|
||||
@$config->loadArray($get);
|
||||
|
||||
$printer_html_definition = new HTMLPurifier_Printer_HTMLDefinition();
|
||||
$printer_css_definition = new HTMLPurifier_Printer_CSSDefinition();
|
||||
|
||||
echo '<?xml version="1.0" encoding="UTF-8" ?>';
|
||||
?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
|
||||
<head>
|
||||
<title>HTML Purifier Printer Smoketest</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<style type="text/css">
|
||||
form table {margin:1em auto;}
|
||||
form th {text-align:right;padding-right:1em;}
|
||||
form .c {display:none;}
|
||||
.HTMLPurifier_Printer table {border-collapse:collapse;
|
||||
border:1px solid #000; width:600px;
|
||||
margin:1em auto;font-family:sans-serif;font-size:75%;}
|
||||
.HTMLPurifier_Printer td, .HTMLPurifier_Printer th {padding:3px;
|
||||
border:1px solid #000;background:#CCC; vertical-align: baseline;}
|
||||
.HTMLPurifier_Printer th {text-align:left;background:#CCF;width:20%;}
|
||||
.HTMLPurifier_Printer caption {font-size:1.5em; font-weight:bold;
|
||||
width:100%;}
|
||||
.HTMLPurifier_Printer .heavy {background:#99C;text-align:center;}
|
||||
</style>
|
||||
<script type="text/javascript">
|
||||
function toggleWriteability(id_of_patient, checked) {
|
||||
document.getElementById(id_of_patient).disabled = checked;
|
||||
}
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<h1>HTML Purifier Printer Smoketest</h1>
|
||||
<p>This page will allow you to see precisely what HTML Purifier's internal
|
||||
whitelist is. You can
|
||||
also twiddle with the configuration settings to see how a directive
|
||||
influences the internal workings of the definition objects.</p>
|
||||
<h2>Modify configuration</h2>
|
||||
|
||||
<p>You can specify an array by typing in a comma-separated
|
||||
list of items, HTML Purifier will take care of the rest (including
|
||||
transformation into a real array list or a lookup table).</p>
|
||||
|
||||
<form id="edit-config" name="edit-config" method="get" action="printDefinition.php">
|
||||
<table>
|
||||
<?php
|
||||
$directives = $config->getBatch('HTML');
|
||||
// can't handle hashes
|
||||
foreach ($directives as $key => $value) {
|
||||
$directive = "HTML.$key";
|
||||
if (is_array($value)) {
|
||||
$keys = array_keys($value);
|
||||
if ($keys === array_keys($keys)) {
|
||||
$value = implode(',', $keys);
|
||||
} else {
|
||||
$new_value = '';
|
||||
foreach ($value as $name => $bool) {
|
||||
if ($bool !== true) continue;
|
||||
$new_value .= "$name,";
|
||||
}
|
||||
$value = rtrim($new_value, ',');
|
||||
}
|
||||
}
|
||||
$allow_null = $config->def->info['HTML'][$key]->allow_null;
|
||||
?>
|
||||
<tr>
|
||||
<th>
|
||||
<a href="http://hp.jpsband.org/live/configdoc/plain.html#<?php echo $directive ?>">
|
||||
<label for="<?php echo $directive; ?>">%<?php echo $directive; ?></label>
|
||||
</a>
|
||||
</th>
|
||||
<td>
|
||||
<?php if (is_bool($value)) { ?>
|
||||
<label for="Yes_<?php echo $directive; ?>"><span class="c">%<?php echo $directive; ?>:</span> Yes</label>
|
||||
<input type="radio" name="<?php echo $directive; ?>" id="Yes_<?php echo $directive; ?>" value="1"<?php if ($value) { ?> checked="checked"<?php } ?> />
|
||||
<label for="No_<?php echo $directive; ?>"><span class="c">%<?php echo $directive; ?>:</span> No</label>
|
||||
<input type="radio" name="<?php echo $directive; ?>" id="No_<?php echo $directive; ?>" value="0"<?php if (!$value) { ?> checked="checked"<?php } ?> />
|
||||
<?php } else { ?>
|
||||
<?php if($allow_null) { ?>
|
||||
<label for="Null_<?php echo $directive; ?>"><span class="c">%<?php echo $directive; ?>:</span> Null/Disabled*</label>
|
||||
<input
|
||||
type="checkbox"
|
||||
value="1"
|
||||
onclick="toggleWriteability('<?php echo $directive ?>',checked)"
|
||||
name="Null_<?php echo $directive; ?>"
|
||||
id="Null_<?php echo $directive; ?>"
|
||||
<?php if ($value === null) { ?> checked="checked"<?php } ?>
|
||||
/> or <br />
|
||||
<?php } ?>
|
||||
<input
|
||||
type="text"
|
||||
name="<?php echo $directive; ?>"
|
||||
id="<?php echo $directive; ?>"
|
||||
value="<?php echo escapeHTML($value); ?>"
|
||||
<?php if($value === null) {echo 'disabled="disabled"';} ?>
|
||||
/>
|
||||
<?php } ?>
|
||||
</td>
|
||||
</tr>
|
||||
<?php
|
||||
}
|
||||
?>
|
||||
<tr>
|
||||
<td colspan="2" style="text-align:right;">
|
||||
[<a href="printDefinition.php">Reset</a>]
|
||||
<input type="submit" value="Submit" />
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
<p>* Some configuration directives make a distinction between an empty
|
||||
variable and a null variable. A whitelist, for example, will take an
|
||||
empty array as meaning <em>no</em> allowed elements, while checking
|
||||
Null/Disabled will mean that user whitelisting functionality is disabled.</p>
|
||||
</form>
|
||||
<h2>HTMLDefinition</h2>
|
||||
<?php echo $printer_html_definition->render($config) ?>
|
||||
<h2>CSSDefinition</h2>
|
||||
<?php echo $printer_css_definition->render($config) ?>
|
||||
</body>
|
||||
</html>
|
@@ -2,17 +2,16 @@
|
||||
|
||||
require_once 'common.php';
|
||||
|
||||
echo '<?xml version="1.0" encoding="UTF-8" ?>';
|
||||
?><!DOCTYPE html
|
||||
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>HTML Purifier UTF-8 Smoketest</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<title>HTMLPurifier UTF-8 Smoketest</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
</head>
|
||||
<body>
|
||||
<h1>HTML Purifier UTF-8 Smoketest</h1>
|
||||
<h1>HTMLPurifier UTF-8 Smoketest</h1>
|
||||
<?php
|
||||
|
||||
$purifier = new HTMLPurifier();
|
||||
|
@@ -2,17 +2,16 @@
|
||||
|
||||
require_once 'common.php';
|
||||
|
||||
echo '<?xml version="1.0" encoding="UTF-8" ?>';
|
||||
?><!DOCTYPE html
|
||||
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>HTML Purifier Variable Width Attack Smoketest</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<title>HTMLPurifier Variable Width Attack Smoketest</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
</head>
|
||||
<body>
|
||||
<h1>HTML Purifier Variable Width Attack Smoketest</h1>
|
||||
<h1>HTMLPurifier Variable Width Attack Smoketest</h1>
|
||||
<p>For more information, see
|
||||
<a href="http://applesoup.googlepages.com/bypass_filter.txt">Cheng Peng Su's
|
||||
original advisory.</a> This particular exploit code appears only to work
|
||||
|
@@ -20,7 +20,7 @@ function formatCode($string) {
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>HTML Purifier XSS Attacks Smoketest</title>
|
||||
<title>HTMLPurifier XSS Attacks Smoketest</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<style type="text/css">
|
||||
.scroll {overflow:auto; width:100%;}
|
||||
@@ -31,13 +31,13 @@ function formatCode($string) {
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>HTML Purifier XSS Attacks Smoketest</h1>
|
||||
<h1>HTMLPurifier XSS Attacks Smoketest</h1>
|
||||
<p>XSS attacks are from
|
||||
<a href="http://ha.ckers.org/xss.html">http://ha.ckers.org/xss.html</a>.</p>
|
||||
<p><strong>Caveats:</strong>
|
||||
<tt>Google.com</tt> has been programatically disallowed, but as you can
|
||||
see, there are ways of getting around that, so coverage in this area
|
||||
is not complete. Most XSS broadcasts its presence by spawning an alert dialogue.
|
||||
The last segment of tests regarding blacklisted websites is not
|
||||
applicable at the moment, but when we add that functionality they'll be
|
||||
relevant. Most XSS broadcasts its presence by spawning an alert dialogue.
|
||||
The displayed code is not strictly correct, as linebreaks have been forced for
|
||||
readability. Linewraps have been marked with <tt>»</tt>. Some tests are
|
||||
omitted for your convenience. Not all control characters are displayed.</p>
|
||||
@@ -48,12 +48,7 @@ omitted for your convenience. Not all control characters are displayed.</p>
|
||||
if (version_compare(PHP_VERSION, '5', '<')) exit('<p>Requires PHP 5.</p>');
|
||||
|
||||
$xml = simplexml_load_file('xssAttacks.xml');
|
||||
|
||||
// programatically disallow google.com for URI evasion tests
|
||||
// not complete
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('URI', 'HostBlacklist', array('google.com'));
|
||||
$purifier = new HTMLPurifier($config);
|
||||
$purifier = new HTMLPurifier();
|
||||
|
||||
?>
|
||||
<table cellspacing="0" cellpadding="2">
|
||||
|
@@ -2,7 +2,7 @@
|
||||
<xss>
|
||||
<attack>
|
||||
<name>XSS Locator</name>
|
||||
<code>';alert(String.fromCharCode(88,83,83))//\';alert(String.fromCharCode(88,83,83))//";alert(String.fromCharCode(88,83,83))//\";alert(String.fromCharCode(88,83,83))//--></SCRIPT>">'><SCRIPT>alert(String.fromCharCode(88,83,83))</SCRIPT>=&{}</code>
|
||||
<code>';alert(String.fromCharCode(88,83,83))//\';alert(String.fromCharCode(88,83,83))//";alert(String.fromCharCode(88,83,83))//\";alert(String.fromCharCode(88,83,83))//></SCRIPT>!--<SCRIPT>alert(String.fromCharCode(88,83,83))</SCRIPT>=&{}</code>
|
||||
|
||||
<desc>Inject this string, and in most cases where a script is vulnerable with no special XSS vector requirements the word "XSS" will pop up. You'll need to replace the "&" with "%26" if you are submitting this XSS string via HTTP GET or it will be ignored and everything after it will be interpreted as another variable. Tip: If you're in a rush and need to quickly check a page, often times injecting the deprecated "<PLAINTEXT>" tag will be enough to check to see if something is vulnerable to XSS by messing up the output appreciably.</desc>
|
||||
<label>Basic XSS Attacks</label>
|
||||
|
@@ -17,9 +17,6 @@ class HTMLPurifier_AttrDef_LangTest extends HTMLPurifier_AttrDefHarness
|
||||
$this->assertDef(' en ', 'en'); // trim
|
||||
$this->assertDef('EN', 'en'); // case insensitivity
|
||||
|
||||
// (thanks Eugen Pankratz for noticing the typos!)
|
||||
$this->assertDef('En-Us-Edison', 'en-us-edison'); // complex ci
|
||||
|
||||
$this->assertDef('fr en', false); // multiple languages
|
||||
$this->assertDef('%', false); // bad character
|
||||
|
||||
@@ -29,7 +26,7 @@ class HTMLPurifier_AttrDef_LangTest extends HTMLPurifier_AttrDefHarness
|
||||
// primary subtag rules
|
||||
// I'm somewhat hesitant to allow x and i as primary language codes,
|
||||
// because they usually are never used in real life. However,
|
||||
// theoretically speaking, having them alone is permissable, so
|
||||
// theoretically speaking, having them alone is permissble, so
|
||||
// I'll be lenient. No XML parser is going to complain anyway.
|
||||
$this->assertDef('x');
|
||||
$this->assertDef('i');
|
||||
|
@@ -271,61 +271,6 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
||||
|
||||
}
|
||||
|
||||
function testDisableExternalResources() {
|
||||
|
||||
$this->config->set('URI', 'DisableExternalResources', true);
|
||||
|
||||
$this->def = new HTMLPurifier_AttrDef_URI();
|
||||
$this->assertDef('http://sub.example.com/alas?foo=asd');
|
||||
$this->assertDef('/img.png');
|
||||
|
||||
$this->def = new HTMLPurifier_AttrDef_URI(true);
|
||||
$this->assertDef('http://sub.example.com/alas?foo=asd', false);
|
||||
$this->assertDef('/img.png');
|
||||
|
||||
}
|
||||
|
||||
function testMunge() {
|
||||
|
||||
$this->config->set('URI', 'Munge', 'http://www.google.com/url?q=%s');
|
||||
$this->def = new HTMLPurifier_AttrDef_URI();
|
||||
|
||||
$this->assertDef(
|
||||
'http://www.example.com/',
|
||||
'http://www.google.com/url?q=http%3A%2F%2Fwww.example.com%2F'
|
||||
);
|
||||
|
||||
$this->assertDef('index.html');
|
||||
$this->assertDef('javascript:foobar();', false);
|
||||
|
||||
}
|
||||
|
||||
function testBlacklist() {
|
||||
|
||||
$this->config->set('URI', 'HostBlacklist', array('example.com', 'moo'));
|
||||
|
||||
$this->assertDef('foo.txt');
|
||||
$this->assertDef('http://www.google.com/example.com/moo');
|
||||
|
||||
$this->assertDef('http://example.com/#23', false);
|
||||
$this->assertDef('https://sub.domain.example.com/foobar', false);
|
||||
$this->assertDef('http://example.com.example.net/?whoo=foo', false);
|
||||
$this->assertDef('ftp://moo-moo.net/foo/foo/', false);
|
||||
|
||||
}
|
||||
|
||||
function testWhitelist() {
|
||||
/*
|
||||
$this->config->set('URI', 'HostPolicy', 'DenyAll');
|
||||
$this->config->set('URI', 'HostWhitelist', array(null, 'google.com'));
|
||||
|
||||
$this->assertDef('http://example.com/fo/google.com', false);
|
||||
$this->assertDef('server.txt');
|
||||
$this->assertDef('ftp://www.google.com/?t=a');
|
||||
$this->assertDef('http://google.com.tricky.spamsite.net', false);
|
||||
*/
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -1,35 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDefHarness.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Chameleon.php';
|
||||
|
||||
class HTMLPurifier_ChildDef_ChameleonTest extends HTMLPurifier_ChildDefHarness
|
||||
{
|
||||
|
||||
function test() {
|
||||
|
||||
$this->obj = new HTMLPurifier_ChildDef_Chameleon(
|
||||
'b | i', // allowed only when in inline context
|
||||
'b | i | div' // allowed only when in block context
|
||||
);
|
||||
|
||||
$this->assertResult(
|
||||
'<b>Allowed.</b>', true,
|
||||
array(), array('ParentType' => 'inline')
|
||||
);
|
||||
|
||||
$this->assertResult(
|
||||
'<div>Not allowed.</div>', '',
|
||||
array(), array('ParentType' => 'inline')
|
||||
);
|
||||
|
||||
$this->assertResult(
|
||||
'<div>Allowed.</div>', true,
|
||||
array(), array('ParentType' => 'block')
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -1,24 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDefHarness.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Custom.php';
|
||||
|
||||
class HTMLPurifier_ChildDef_CustomTest extends HTMLPurifier_ChildDefHarness
|
||||
{
|
||||
|
||||
function test() {
|
||||
|
||||
$this->obj = new HTMLPurifier_ChildDef_Custom('(a,b?,c*,d+,(a,b)*)');
|
||||
|
||||
$this->assertResult('', false);
|
||||
$this->assertResult('<a /><a />', false);
|
||||
|
||||
$this->assertResult('<a /><b /><c /><d /><a /><b />');
|
||||
$this->assertResult('<a /><d>Dob</d><a /><b>foo</b>'.
|
||||
'<a href="moo" /><b>foo</b>');
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -1,20 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDefHarness.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Optional.php';
|
||||
|
||||
class HTMLPurifier_ChildDef_OptionalTest extends HTMLPurifier_ChildDefHarness
|
||||
{
|
||||
|
||||
function test() {
|
||||
|
||||
$this->obj = new HTMLPurifier_ChildDef_Optional('b | i');
|
||||
|
||||
$this->assertResult('<b>Bold text</b><img />', '<b>Bold text</b>');
|
||||
$this->assertResult('Not allowed text', '');
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -1,69 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDefHarness.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Required.php';
|
||||
|
||||
class HTMLPurifier_ChildDef_RequiredTest extends HTMLPurifier_ChildDefHarness
|
||||
{
|
||||
|
||||
function testParsing() {
|
||||
|
||||
$def = new HTMLPurifier_ChildDef_Required('foobar | bang |gizmo');
|
||||
$this->assertEqual($def->elements,
|
||||
array(
|
||||
'foobar' => true
|
||||
,'bang' => true
|
||||
,'gizmo' => true
|
||||
));
|
||||
|
||||
$def = new HTMLPurifier_ChildDef_Required(array('href', 'src'));
|
||||
$this->assertEqual($def->elements,
|
||||
array(
|
||||
'href' => true
|
||||
,'src' => true
|
||||
));
|
||||
|
||||
}
|
||||
|
||||
function testPCDATAForbidden() {
|
||||
|
||||
$this->obj = new HTMLPurifier_ChildDef_Required('dt | dd');
|
||||
|
||||
$this->assertResult('', false);
|
||||
$this->assertResult(
|
||||
'<dt>Term</dt>Text in an illegal location'.
|
||||
'<dd>Definition</dd><b>Illegal tag</b>',
|
||||
'<dt>Term</dt><dd>Definition</dd>');
|
||||
$this->assertResult('How do you do!', false);
|
||||
|
||||
// whitespace shouldn't trigger it
|
||||
$this->assertResult("\n<dd>Definition</dd> ");
|
||||
|
||||
$this->assertResult(
|
||||
'<dd>Definition</dd> <b></b> ',
|
||||
'<dd>Definition</dd> '
|
||||
);
|
||||
$this->assertResult("\t ", false);
|
||||
|
||||
}
|
||||
|
||||
function testPCDATAAllowed() {
|
||||
|
||||
$this->obj = new HTMLPurifier_ChildDef_Required('#PCDATA | b');
|
||||
|
||||
$this->assertResult('<b>Bold text</b><img />', '<b>Bold text</b>');
|
||||
|
||||
// with child escaping on
|
||||
$this->assertResult(
|
||||
'<b>Bold text</b><img />',
|
||||
'<b>Bold text</b><img />',
|
||||
array(
|
||||
'Core.EscapeInvalidChildren' => true
|
||||
)
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -1,50 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDefHarness.php';
|
||||
require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
|
||||
|
||||
class HTMLPurifier_ChildDef_StrictBlockquoteTest
|
||||
extends HTMLPurifier_ChildDefHarness
|
||||
{
|
||||
|
||||
function test() {
|
||||
|
||||
$this->obj = new HTMLPurifier_ChildDef_StrictBlockquote();
|
||||
|
||||
$this->assertResult('');
|
||||
$this->assertResult('<p>Valid</p>');
|
||||
$this->assertResult('<div>Still valid</div>');
|
||||
$this->assertResult('Needs wrap', '<p>Needs wrap</p>');
|
||||
$this->assertResult(
|
||||
'Wrap'. '<p>Do not wrap</p>',
|
||||
'<p>Wrap</p><p>Do not wrap</p>'
|
||||
);
|
||||
$this->assertResult(
|
||||
'<p>Do not</p>'.'<b>Wrap</b>',
|
||||
'<p>Do not</p><p><b>Wrap</b></p>'
|
||||
);
|
||||
$this->assertResult(
|
||||
'<li>Not allowed</li>Paragraph.<p>Hmm.</p>',
|
||||
'<p>Not allowedParagraph.</p><p>Hmm.</p>'
|
||||
);
|
||||
$this->assertResult(
|
||||
$var = 'He said<br />perhaps<br />we should <b>nuke</b> them.',
|
||||
"<p>$var</p>"
|
||||
);
|
||||
$this->assertResult(
|
||||
'<foo>Bar</foo><bas /><b>People</b>Conniving.'. '<p>Fools!</p>',
|
||||
'<p>Bar'. '<b>People</b>Conniving.</p><p>Fools!</p>'
|
||||
);
|
||||
$this->assertResult('Needs wrap', '<div>Needs wrap</div>',
|
||||
array('HTML.BlockWrapper' => 'div'));
|
||||
|
||||
$this->assertResult('Needs wrap', '<p>Needs wrap</p>',
|
||||
array('HTML.BlockWrapper' => 'dav'));
|
||||
$this->assertError('Cannot use non-block element as block wrapper.');
|
||||
$this->assertNoErrors();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -1,51 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDefHarness.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Table.php';
|
||||
|
||||
class HTMLPurifier_ChildDef_TableTest extends HTMLPurifier_ChildDefHarness
|
||||
{
|
||||
|
||||
function test() {
|
||||
|
||||
$this->obj = new HTMLPurifier_ChildDef_Table();
|
||||
|
||||
$this->assertResult('', false);
|
||||
|
||||
// we're using empty tags to compact the tests: under real circumstances
|
||||
// there would be contents in them
|
||||
|
||||
$this->assertResult('<tr />');
|
||||
$this->assertResult('<caption /><col /><thead /><tfoot /><tbody>'.
|
||||
'<tr><td>asdf</td></tr></tbody>');
|
||||
$this->assertResult('<col /><col /><col /><tr />');
|
||||
|
||||
// mixed up order
|
||||
$this->assertResult(
|
||||
'<col /><colgroup /><tbody /><tfoot /><thead /><tr>1</tr><caption /><tr />',
|
||||
'<caption /><col /><colgroup /><thead /><tfoot /><tbody /><tr>1</tr><tr />');
|
||||
|
||||
// duplicates of singles
|
||||
// - first caption serves
|
||||
// - trailing tfoots/theads get turned into tbodys
|
||||
$this->assertResult(
|
||||
'<caption>1</caption><caption /><tbody /><tbody /><tfoot>1</tfoot><tfoot />',
|
||||
'<caption>1</caption><tfoot>1</tfoot><tbody /><tbody /><tbody />'
|
||||
);
|
||||
|
||||
// errant text dropped (until bubbling is implemented)
|
||||
$this->assertResult('foo', false);
|
||||
|
||||
// whitespace sticks to the previous element, last whitespace is
|
||||
// stationary
|
||||
$this->assertResult("\n <tr />\n <tr />\n ");
|
||||
$this->assertResult(
|
||||
"\n\t<tbody />\n\t\t<tfoot />\n\t\t\t",
|
||||
"\n\t\t<tfoot />\n\t<tbody />\n\t\t\t"
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -1,18 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/Harness.php';
|
||||
require_once 'HTMLPurifier/ChildDef.php';
|
||||
|
||||
class HTMLPurifier_ChildDefHarness extends HTMLPurifier_Harness
|
||||
{
|
||||
|
||||
function setUp() {
|
||||
$this->obj = null;
|
||||
$this->func = 'validateChildren';
|
||||
$this->to_tokens = true;
|
||||
$this->to_html = true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
168
tests/HTMLPurifier/ChildDefTest.php
Normal file
168
tests/HTMLPurifier/ChildDefTest.php
Normal file
@@ -0,0 +1,168 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/Harness.php';
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef.php';
|
||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||
require_once 'HTMLPurifier/Generator.php';
|
||||
|
||||
class HTMLPurifier_ChildDefTest extends HTMLPurifier_Harness
|
||||
{
|
||||
|
||||
function setUp() {
|
||||
$this->obj = null;
|
||||
$this->func = 'validateChildren';
|
||||
$this->to_tokens = true;
|
||||
$this->to_html = true;
|
||||
}
|
||||
|
||||
function test_custom() {
|
||||
|
||||
$this->obj = new HTMLPurifier_ChildDef_Custom('(a,b?,c*,d+,(a,b)*)');
|
||||
|
||||
$this->assertResult('', false);
|
||||
$this->assertResult('<a /><a />', false);
|
||||
|
||||
$this->assertResult('<a /><b /><c /><d /><a /><b />');
|
||||
$this->assertResult('<a /><d>Dob</d><a /><b>foo</b>'.
|
||||
'<a href="moo" /><b>foo</b>');
|
||||
|
||||
}
|
||||
|
||||
function test_table() {
|
||||
|
||||
// the table definition
|
||||
$this->obj = new HTMLPurifier_ChildDef_Table();
|
||||
|
||||
$inputs = $expect = $config = array();
|
||||
|
||||
$this->assertResult('', false);
|
||||
|
||||
// we're using empty tags to compact the tests: under real circumstances
|
||||
// there would be contents in them
|
||||
|
||||
$this->assertResult('<tr />');
|
||||
$this->assertResult('<caption /><col /><thead /><tfoot /><tbody>'.
|
||||
'<tr><td>asdf</td></tr></tbody>');
|
||||
$this->assertResult('<col /><col /><col /><tr />');
|
||||
|
||||
// mixed up order
|
||||
$this->assertResult(
|
||||
'<col /><colgroup /><tbody /><tfoot /><thead /><tr>1</tr><caption /><tr />',
|
||||
'<caption /><col /><colgroup /><thead /><tfoot /><tbody /><tr>1</tr><tr />');
|
||||
|
||||
// duplicates of singles
|
||||
// - first caption serves
|
||||
// - trailing tfoots/theads get turned into tbodys
|
||||
$this->assertResult(
|
||||
'<caption>1</caption><caption /><tbody /><tbody /><tfoot>1</tfoot><tfoot />',
|
||||
'<caption>1</caption><tfoot>1</tfoot><tbody /><tbody /><tbody />'
|
||||
);
|
||||
|
||||
// errant text dropped (until bubbling is implemented)
|
||||
$this->assertResult('foo', false);
|
||||
|
||||
// whitespace sticks to the previous element, last whitespace is
|
||||
// stationary
|
||||
$this->assertResult("\n <tr />\n <tr />\n ");
|
||||
$this->assertResult(
|
||||
"\n\t<tbody />\n\t\t<tfoot />\n\t\t\t",
|
||||
"\n\t\t<tfoot />\n\t<tbody />\n\t\t\t"
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
function testParsing() {
|
||||
|
||||
$def = new HTMLPurifier_ChildDef_Required('foobar | bang |gizmo');
|
||||
$this->assertEqual($def->elements,
|
||||
array(
|
||||
'foobar' => true
|
||||
,'bang' => true
|
||||
,'gizmo' => true
|
||||
));
|
||||
|
||||
$def = new HTMLPurifier_ChildDef_Required(array('href', 'src'));
|
||||
$this->assertEqual($def->elements,
|
||||
array(
|
||||
'href' => true
|
||||
,'src' => true
|
||||
));
|
||||
|
||||
}
|
||||
|
||||
function test_required_pcdata_forbidden() {
|
||||
|
||||
$this->obj = new HTMLPurifier_ChildDef_Required('dt | dd');
|
||||
|
||||
$this->assertResult('', false);
|
||||
$this->assertResult(
|
||||
'<dt>Term</dt>Text in an illegal location'.
|
||||
'<dd>Definition</dd><b>Illegal tag</b>',
|
||||
'<dt>Term</dt><dd>Definition</dd>');
|
||||
$this->assertResult('How do you do!', false);
|
||||
|
||||
// whitespace shouldn't trigger it
|
||||
$this->assertResult("\n<dd>Definition</dd> ");
|
||||
|
||||
$this->assertResult(
|
||||
'<dd>Definition</dd> <b></b> ',
|
||||
'<dd>Definition</dd> '
|
||||
);
|
||||
$this->assertResult("\t ", false);
|
||||
|
||||
}
|
||||
|
||||
function test_required_pcdata_allowed() {
|
||||
|
||||
$this->obj = new HTMLPurifier_ChildDef_Required('#PCDATA | b');
|
||||
|
||||
$this->assertResult('<b>Bold text</b><img />', '<b>Bold text</b>');
|
||||
|
||||
// with child escaping on
|
||||
$this->assertResult(
|
||||
'<b>Bold text</b><img />',
|
||||
'<b>Bold text</b><img />',
|
||||
array(
|
||||
'Core.EscapeInvalidChildren' => true
|
||||
)
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
function test_optional() {
|
||||
|
||||
$this->obj = new HTMLPurifier_ChildDef_Optional('b | i');
|
||||
|
||||
$this->assertResult('<b>Bold text</b><img />', '<b>Bold text</b>');
|
||||
$this->assertResult('Not allowed text', '');
|
||||
|
||||
}
|
||||
|
||||
function test_chameleon() {
|
||||
|
||||
$this->obj = new HTMLPurifier_ChildDef_Chameleon(
|
||||
'b | i', // allowed only when in inline context
|
||||
'b | i | div' // allowed only when in block context
|
||||
);
|
||||
|
||||
$this->assertResult(
|
||||
'<b>Allowed.</b>', true,
|
||||
array(), array('ParentType' => 'inline')
|
||||
);
|
||||
|
||||
$this->assertResult(
|
||||
'<div>Not allowed.</div>', '',
|
||||
array(), array('ParentType' => 'inline')
|
||||
);
|
||||
|
||||
$this->assertResult(
|
||||
'<div>Allowed.</div>', true,
|
||||
array(), array('ParentType' => 'block')
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -284,11 +284,6 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase
|
||||
$this->assertInvalid(array(0 => 'moo'), 'hash');
|
||||
$this->assertValid(array(1 => 'moo'), 'hash');
|
||||
$this->assertValid(23, 'mixed');
|
||||
$this->assertValid('foo,bar, cow', 'list', array('foo', 'bar', 'cow'));
|
||||
$this->assertValid('foo,bar', 'lookup', array('foo' => true, 'bar' => true));
|
||||
$this->assertValid('true', 'bool', true);
|
||||
$this->assertValid('false', 'bool', false);
|
||||
$this->assertValid('1', 'bool', true);
|
||||
|
||||
}
|
||||
|
||||
|
@@ -106,20 +106,6 @@ class HTMLPurifier_ConfigTest extends UnitTestCase
|
||||
$this->assertError('Value is of invalid type');
|
||||
$this->assertNoErrors();
|
||||
|
||||
// grab a namespace
|
||||
$config->set('Attr', 'Key', 0xBEEF);
|
||||
$this->assertIdentical(
|
||||
$config->getBatch('Attr'),
|
||||
array(
|
||||
'Key' => 0xBEEF
|
||||
)
|
||||
);
|
||||
|
||||
// grab a non-existant namespace
|
||||
$config->getBatch('FurnishedGoods');
|
||||
$this->assertError('Cannot retrieve undefined namespace');
|
||||
$this->assertNoErrors();
|
||||
|
||||
}
|
||||
|
||||
function test_getDefinition() {
|
||||
@@ -180,25 +166,6 @@ class HTMLPurifier_ConfigTest extends UnitTestCase
|
||||
|
||||
}
|
||||
|
||||
function test_create() {
|
||||
|
||||
HTMLPurifier_ConfigSchema::defineNamespace('Cake', 'Properties of it.');
|
||||
HTMLPurifier_ConfigSchema::define('Cake', 'Sprinkles', 666, 'int', 'Number of.');
|
||||
HTMLPurifier_ConfigSchema::define('Cake', 'Flavor', 'vanilla', 'string', 'Flavor of the batter.');
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Cake', 'Sprinkles', 42);
|
||||
|
||||
// test flat pass-through
|
||||
$created_config = HTMLPurifier_Config::create($config);
|
||||
$this->assertEqual($config, $created_config);
|
||||
|
||||
// test loadArray
|
||||
$created_config = HTMLPurifier_Config::create(array('Cake.Sprinkles' => 42));
|
||||
$this->assertEqual($config, $created_config);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -83,20 +83,6 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness
|
||||
'<a><span></span></a>'
|
||||
);
|
||||
|
||||
// test inline parent
|
||||
$this->assertResult(
|
||||
'<b>Bold</b>', true, array('HTML.Parent' => 'span')
|
||||
);
|
||||
$this->assertResult(
|
||||
'<div>Reject</div>', 'Reject', array('HTML.Parent' => 'span')
|
||||
);
|
||||
|
||||
$this->assertResult(
|
||||
'<div>Accept</div>', true, array('HTML.Parent' => 'script')
|
||||
);
|
||||
$this->assertError('Cannot use unrecognized element as parent.');
|
||||
$this->assertNoErrors();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -42,15 +42,6 @@ class HTMLPurifier_Strategy_RemoveForeignElementsTest
|
||||
' Warning!</span>'
|
||||
);
|
||||
|
||||
// test removal of invalid img tag
|
||||
$this->assertResult(
|
||||
'<img />',
|
||||
''
|
||||
);
|
||||
|
||||
// test preservation of valid img tag
|
||||
$this->assertResult('<img src="foobar.gif" />');
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -125,9 +125,6 @@ class HTMLPurifier_Strategy_ValidateAttributesTest extends
|
||||
);
|
||||
|
||||
// test required attributes for img
|
||||
|
||||
// (this should never happen, as RemoveForeignElements
|
||||
// should have removed the offending image tag)
|
||||
$this->assertResult(
|
||||
'<img />',
|
||||
'<img src="" alt="Invalid image" />'
|
||||
|
@@ -8,81 +8,17 @@ class HTMLPurifier_Test extends UnitTestCase
|
||||
{
|
||||
var $purifier;
|
||||
|
||||
function setUp() {
|
||||
$this->purifier = new HTMLPurifier();
|
||||
}
|
||||
|
||||
function assertPurification($input, $expect = null) {
|
||||
if ($expect === null) $expect = $input;
|
||||
$result = $this->purifier->purify($input);
|
||||
$this->assertIdentical($expect, $result);
|
||||
}
|
||||
|
||||
function testNull() {
|
||||
function test() {
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$this->purifier = new HTMLPurifier($config);
|
||||
$this->assertPurification("Null byte\0", "Null byte");
|
||||
}
|
||||
|
||||
function testStrict() {
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML', 'Strict', true);
|
||||
$this->purifier = new HTMLPurifier( $config ); // verbose syntax
|
||||
|
||||
$this->assertPurification(
|
||||
'<u>Illegal underline</u>',
|
||||
'Illegal underline'
|
||||
);
|
||||
|
||||
$this->assertPurification(
|
||||
'<blockquote>Illegal contents</blockquote>',
|
||||
'<blockquote><p>Illegal contents</p></blockquote>'
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
function testDifferentAllowedElements() {
|
||||
|
||||
$this->purifier = new HTMLPurifier(array(
|
||||
'HTML.AllowedElements' => array('b', 'i', 'p', 'a'),
|
||||
'HTML.AllowedAttributes' => array('a.href', '*.id')
|
||||
));
|
||||
|
||||
$this->assertPurification(
|
||||
'<p>Par.</p><p>Para<a href="http://google.com/">gr</a>aph</p>Text<b>Bol<i>d</i></b>'
|
||||
);
|
||||
|
||||
$this->assertPurification(
|
||||
'<span>Not allowed</span><a class="mef" id="foobar">Foobar</a>',
|
||||
'Not allowed<a>Foobar</a>' // no ID!!!
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
function testDisableURI() {
|
||||
|
||||
$this->purifier = new HTMLPurifier( array('Attr.DisableURI' => true) );
|
||||
|
||||
$this->assertPurification(
|
||||
'<img src="foobar"/>',
|
||||
''
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
function test_purifyArray() {
|
||||
|
||||
$this->purifier = new HTMLPurifier();
|
||||
|
||||
$this->assertEqual(
|
||||
$this->purifier->purifyArray(
|
||||
array('Good', '<b>Sketchy', 'foo' => '<script>bad</script>')
|
||||
),
|
||||
array('Good', '<b>Sketchy</b>', 'foo' => 'bad')
|
||||
);
|
||||
|
||||
$this->assertIsA($this->purifier->context, 'array');
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -5,15 +5,15 @@ require_once 'HTMLPurifier/Token.php';
|
||||
class HTMLPurifier_TokenTest extends UnitTestCase
|
||||
{
|
||||
|
||||
function assertTokenConstruction($name, $attr,
|
||||
$expect_name = null, $expect_attr = null
|
||||
function assertTokenConstruction($name, $attributes,
|
||||
$expect_name = null, $expect_attributes = null
|
||||
) {
|
||||
if ($expect_name === null) $expect_name = $name;
|
||||
if ($expect_attr === null) $expect_attr = $attr;
|
||||
$token = new HTMLPurifier_Token_Start($name, $attr);
|
||||
if ($expect_attributes === null) $expect_attributes = $attributes;
|
||||
$token = new HTMLPurifier_Token_Start($name, $attributes);
|
||||
|
||||
$this->assertEqual($expect_name, $token->name);
|
||||
$this->assertEqual($expect_attr, $token->attr);
|
||||
$this->assertEqual($expect_name, $token->name);
|
||||
$this->assertEqual($expect_attributes, $token->attributes);
|
||||
}
|
||||
|
||||
function testConstruct() {
|
||||
|
@@ -44,12 +44,7 @@ $test_files[] = 'ConfigSchemaTest.php';
|
||||
$test_files[] = 'LexerTest.php';
|
||||
$test_files[] = 'Lexer/DirectLexTest.php';
|
||||
$test_files[] = 'TokenTest.php';
|
||||
$test_files[] = 'ChildDef/RequiredTest.php';
|
||||
$test_files[] = 'ChildDef/OptionalTest.php';
|
||||
$test_files[] = 'ChildDef/ChameleonTest.php';
|
||||
$test_files[] = 'ChildDef/CustomTest.php';
|
||||
$test_files[] = 'ChildDef/TableTest.php';
|
||||
$test_files[] = 'ChildDef/StrictBlockquoteTest.php';
|
||||
$test_files[] = 'ChildDefTest.php';
|
||||
$test_files[] = 'GeneratorTest.php';
|
||||
$test_files[] = 'EntityLookupTest.php';
|
||||
$test_files[] = 'Strategy/RemoveForeignElementsTest.php';
|
||||
|
Reference in New Issue
Block a user