1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-04 13:18:00 +02:00

Compare commits

...

34 Commits

Author SHA1 Message Date
Edward Z. Yang
17af0e4fc1 Release 4.4.0
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-18 19:22:31 -05:00
Edward Z. Yang
70028f83d6 Make all of the tests work on all PHP versions.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-18 18:57:13 -05:00
Edward Z. Yang
5c5e3fe79f Avoid doing stupidly clever reflection tricks that make old PHP versions sad.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-18 18:21:36 -05:00
Edward Z. Yang
56a26cab14 Modernize some of the testing facilities.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-18 18:10:16 -05:00
Edward Z. Yang
1c7fedff5a Tighter CSS selector validation.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-17 15:36:26 -05:00
Edward Z. Yang
9de0785448 Remark about bypassing host list with punycode.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-06 05:32:53 -08:00
Edward Z. Yang
974fe3f25e Optional support for IDNAs with PEAR Net_IDNA2
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-06 05:28:00 -08:00
Edward Z. Yang
94468f3c24 Remove PEARSax3 lexer.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-03 20:40:17 +08:00
Edward Z. Yang
e0354fecd9 Make forms work for transitional doctypes.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-30 22:56:44 +08:00
Edward Z. Yang
1bbbc624dd Remove inscrutable TODO, optionalize another.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-27 23:50:02 +08:00
Edward Z. Yang
49879d2cc6 Add note about superseding modules in TODO.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-27 23:21:32 +08:00
Edward Z. Yang
5c9b5130c8 Bump minor version number to 4.4.0.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-26 21:55:14 +08:00
Edward Z. Yang
d2de8d976a Add test for invalid SafeIframe usage.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-26 21:52:55 +08:00
Bradley M. Froehle
4164b2eb2b Implement Iframe module, and provide %HTML.SafeIframe and %URI.SafeIframeRegexp for untrusted usage.
The purpose of this addition is twofold. In trusted mode, iframes are
now unconditionally allowed.

However, many online video providers (YouTube, Vimeo) and other web
applications (Google Maps, Google Calendar, etc) provide embed code in
iframe format, which is useful functionality in untrusted mode.
You can specify iframes as trusted elements with %HTML.SafeIframe;
however, you need to additionally specify a whitelist mechanism such as
%URI.SafeIframeRegexp to say what iframe embeds are OK (by default
everything is rejected).

Note: As iframes are invalid in strict doctypes, you will not be able to
use them there.

We also added an always_load parameter to URIFilters in order to support
the strange nature of the SafeIframe URIFilter (it always needs to be
loaded, due to the inability of accessing the %HTML.SafeIframe directive
to see if it's needed!)  We expect this URIFilter can expand in the future
to offer more complex validation mechanisms.

Signed-off-by: Bradley M. Froehle <brad.froehle@gmail.com>
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-26 21:50:53 +08:00
Edward Z. Yang
1e5293d9fe Add more attributions.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-26 15:45:41 +08:00
Edward Z. Yang
6b643ede02 Implement %HTML.AllowedComments and %HTML.AllowedCommentsRegexp
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-26 15:34:42 +08:00
Edward Z. Yang
e41af46a8b Fix broken table content model, easily seen in XHTML1.1
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-26 14:49:26 +08:00
Edward Z. Yang
3570c9985a Properly handle nested sublists by folding into previous list item.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-26 14:00:34 +08:00
Edward Z. Yang
8d572993b4 Implement %HTML.TargetBlank
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-26 08:36:00 +08:00
Edward Z. Yang
1bacbc0563 Add isBenign and getDefaultScheme methods.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-25 23:31:15 +08:00
Edward Z. Yang
bfe2c10d07 Add a little bit of documentation about contexts for URIFilters.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-25 23:31:15 +08:00
Edward Z. Yang
9b10515fa4 Core.EscapeNonASCIICharacters now always works, even if target is UTF-8.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-25 23:31:15 +08:00
Edward Z. Yang
1255d0f15d Add support for scope attribute on td and th.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-25 23:31:13 +08:00
Edward Z. Yang
d45e11cc6b Add one more test for SPL autoload defaults.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-25 02:58:51 -05:00
Edward Z. Yang
94c15d1f56 Fix iconv truncation bug.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-25 02:31:06 -05:00
Edward Z. Yang
ce68cfe484 Remove spurious abstract definition; PHP 5.4 doesn't like that.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-18 13:28:07 -05:00
Edward Z. Yang
9f5f85952b Don't unset parser variable; plays poorly with serialize.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-18 13:27:51 -05:00
Edward Z. Yang
dbb365155b Typofix.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-08-24 09:56:51 -04:00
Edward Z. Yang
32c0ffde0c Don't add nofollow for matching hosts, generalize this code.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-08-24 09:56:49 -04:00
Edward Z. Yang
856a5e5b89 Update INSTALL to avoid missing config snafu, update usage.xml.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-08-24 09:56:21 -04:00
Edward Z. Yang
820d6e9097 Do not duplicate nofollow attribute in transform.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-08-24 09:56:13 -04:00
Edward Z. Yang
35b1fbce01 Explicitly initialize anonModule to null.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-04-19 22:46:17 +01:00
Edward Z. Yang
bcfbb8338c URI.Munge munges https to http URIs.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-04-10 13:09:24 +01:00
Edward Z. Yang
f51a6f7de9 Color keywords now case-insensitive.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-04-10 12:45:02 +01:00
82 changed files with 1501 additions and 365 deletions

View File

@@ -31,7 +31,7 @@ PROJECT_NAME = HTMLPurifier
# This could be handy for archiving the generated documentation or
# if some version control system is used.
PROJECT_NUMBER = 4.3.0
PROJECT_NUMBER = 4.4.0
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put.

2
FOCUS
View File

@@ -1,4 +1,4 @@
9 - Major security fixes
8 - Minor security fixes
[ Appendix A: Release focus IDs ]
0 - N/A

12
INSTALL
View File

@@ -26,6 +26,10 @@ These optional extensions can enhance the capabilities of HTML Purifier:
* bcmath : Used for unit conversion and imagecrash protection
* tidy : Used for pretty-printing HTML
These optional libraries can enhance the capabilities of HTML Purifier:
* CSSTidy : Clean CSS stylesheets using %Core.ExtractStyleBlocks
* Net_IDNA2 (PEAR) : IRI support using %Core.EnableIDNA
---------------------------------------------------------------------------
2. Reconnaissance
@@ -331,11 +335,6 @@ Or move the cache directory somewhere else (no trailing slash):
The interface is mind-numbingly simple:
$purifier = new HTMLPurifier();
$clean_html = $purifier->purify( $dirty_html );
...or, if you're using the configuration object:
$purifier = new HTMLPurifier($config);
$clean_html = $purifier->purify( $dirty_html );
@@ -354,7 +353,8 @@ If your website is in UTF-8 and XHTML Transitional, use this code:
<?php
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
$purifier = new HTMLPurifier();
$config = HTMLPurifier_Config::createDefault();
$purifier = new HTMLPurifier($config);
$clean_html = $purifier->purify($dirty_html);
?>

43
NEWS
View File

@@ -9,6 +9,49 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. Internal change
==========================
4.4.0, released 2012-01-18
# Removed PEARSax3 handler.
# URI.Munge now munges URIs inside the same host that go from https
to http. Reported by Neike Taika-Tessaro.
# Core.EscapeNonASCIICharacters now always transforms entities to
entities, even if target encoding is UTF-8.
# Tighten up selector validation in ExtractStyleBlocks.
Non-syntactically valid selectors are now rejected, along with
some of the more obscure ones such as attribute selectors, the
:lang pseudoselector, and anything not in CSS2.1. Furthermore,
ID and class selectors now work properly with the relevant
configuration attributes. Also, mute errors when parsing CSS
with CSS Tidy.
! Added support for 'scope' attribute on tables.
! Added %HTML.TargetBlank, which adds target="blank" to all outgoing links.
! Properly handle sub-lists directly nested inside of lists in
a standards compliant way, by moving them into the preceding <li>
! Added %HTML.AllowedComments and %HTML.AllowedCommentsRegexp for
limited allowed comments in untrusted situations.
! Implement iframes, and allow them to be used in untrusted mode with
%HTML.SafeIframe and %URI.SafeIframeRegexp. Thanks Bradley M. Froehle
<brad.froehle@gmail.com> for submitting an initial version of the patch.
! The Forms module now works properly for transitional doctypes.
! Added support for internationalized domain names. You need the PEAR
Net_IDNA2 module to be in your path; if it is installed, ensure the
class can be loaded and then set %Core.EnableIDNA to true.
- Color keywords are now case insensitive. Thanks Yzmir Ramirez
<yramirez-htmlpurifier@adicio.com> for reporting.
- Explicitly initialize anonModule variable to null.
- Do not duplicate nofollow if already present. Thanks 178
for reporting.
- Do not add nofollow if hostname matches our current host. Thanks 178
for reporting, and Neike Taika-Tessaro for helping diagnose.
- Do not unset parser variable; this fixes intermittent serialization
problems. Thanks Neike Taika-Tessaro for reporting, bill
<10010tiger@gmail.com> for diagnosing.
- Fix iconv truncation bug, where non-UTF-8 target encodings see
output truncated after around 8000 characters. Thanks Jörg Ludwig
<joerg.ludwig@iserv.eu> for reporting.
- Fix broken table content model for XHTML1.1 (and also earlier
versions, although the W3C validator doesn't catch those violations).
Thanks GlitchMr <glitch.mr@gmail.com> for reporting.
4.3.0, released 2011-03-27
# Fixed broken caching of customized raw definitions, but requires an
API change. The old API still works but will emit a warning,

14
TODO
View File

@@ -14,20 +14,24 @@ afraid to cast your vote for the next feature to be implemented!
Things to do as soon as possible:
- Think about allowing explicit order of operations hooks for transforms
- Inputs don't do the right thing with submit
- Fix "<.<" bug (trailing < is removed if not EOD)
- Build in better internal state dumps and debugging tools for remote
debugging
- Allowed/Allowed* have strange interactions when both set
- Transform lone embeds into object tags
? Transform lone embeds into object tags
- Deprecated config options that emit warnings when you set them (with'
a way of muting the warning if you really want to)
- Make HTML.Trusted work with Output.FlashCompat
- HTML.Trusted and HTML.SafeObject have funny interaction; general
problem is what to do when a module "supersedes" another
(see also tables and basic tables.) This is a little dicier
because HTML.SafeObject has some extra functionality that
trusted might find useful. See http://htmlpurifier.org/phorum/read.php?3,5762,6100
FUTURE VERSIONS
---------------
4.4 release [OMG CONFIG PONIES]
4.5 release [OMG CONFIG PONIES]
! Fix Printer. It's from the old days when we didn't have decent XML classes
! Factor demo.php into a set of Printer classes, and then create a stub
file for users here (inside the actual HTML Purifier library)
@@ -112,6 +116,10 @@ Neat feature related
3. Extend the tag exclusion system to specify whether or not the
contents should be dropped or not (currently, there's code that could do
something like this if it didn't drop the inner text too.)
? Make AutoParagraph also support paragraph-izing double <br> tags, and not
just double newlines. This is kind of tough to do in the current framework,
though, and might be reasonably approximated by search replacing double <br>s
with newlines before running it through HTML Purifier.
Maintenance related (slightly boring)
# CHMOD install script for PEAR installs

View File

@@ -1 +1 @@
4.3.0
4.4.0

View File

@@ -1,8 +1,8 @@
HTML Purifier 4.3.0 is a major security release addressing various
security vulnerabilities related to user-submitted code and legitimate
client-side scripts. It also contains an accumulation of new features
and bugfixes over half a year. New configuration options include
%CSS.Trusted, %CSS.AllowedFonts and %Cache.SerializerPermissions.
There is a backwards-incompatible API change for customized raw
definitions, see <http://htmlpurifier.org/docs/enduser-customize.html#optimized>
for details.
HTML Purifier 4.4.0 is a minor security release addressing a security
vulnerability associated with some optional functionality. It also
contains an accumulation of new features and bugfixes over half a year.
New configuration options include %HTML.TargetBlank,
%HTML.AllowedComments, %HTML.AllowedCommentsRegexp, %HTML.SafeIframe,
%URI.SafeIframeRegexp, %Core.EnableIDNA (requires PEAR Net_IDNA2 module and
doesn't work for PHP 5.0.5). We also now support the 'scope' attribute on
tables.

View File

@@ -14,7 +14,7 @@
<line>348</line>
</file>
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>47</line>
<line>50</line>
</file>
</directive>
<directive id="CSS.MaxImgLength">
@@ -79,19 +79,19 @@
</directive>
<directive id="Core.Encoding">
<file name="HTMLPurifier/Encoder.php">
<line>267</line>
<line>300</line>
<line>337</line>
<line>367</line>
</file>
</directive>
<directive id="Test.ForceNoIconv">
<file name="HTMLPurifier/Encoder.php">
<line>272</line>
<line>308</line>
<line>341</line>
<line>374</line>
</file>
</directive>
<directive id="Core.EscapeNonASCIICharacters">
<file name="HTMLPurifier/Encoder.php">
<line>304</line>
<line>368</line>
</file>
</directive>
<directive id="Output.CommentScriptContents">
@@ -169,7 +169,7 @@
</directive>
<directive id="HTML.Trusted">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>202</line>
<line>204</line>
</file>
<file name="HTMLPurifier/Lexer.php">
<line>271</line>
@@ -186,32 +186,37 @@
</directive>
<directive id="HTML.AllowedModules">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>209</line>
<line>211</line>
</file>
</directive>
<directive id="HTML.CoreModules">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>210</line>
<line>212</line>
</file>
</directive>
<directive id="HTML.Proprietary">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>220</line>
<line>222</line>
</file>
</directive>
<directive id="HTML.SafeObject">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>223</line>
<line>225</line>
</file>
</directive>
<directive id="HTML.SafeEmbed">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>226</line>
<line>228</line>
</file>
</directive>
<directive id="HTML.Nofollow">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>229</line>
<line>231</line>
</file>
</directive>
<directive id="HTML.TargetBlank">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>234</line>
</file>
</directive>
<directive id="Attr.IDBlacklist">
@@ -249,7 +254,7 @@
</directive>
<directive id="URI.">
<file name="HTMLPurifier/URIDefinition.php">
<line>55</line>
<line>59</line>
</file>
<file name="HTMLPurifier/URIFilter/Munge.php">
<line>12</line>
@@ -257,20 +262,20 @@
</directive>
<directive id="URI.Host">
<file name="HTMLPurifier/URIDefinition.php">
<line>64</line>
<line>69</line>
</file>
<file name="HTMLPurifier/URIScheme.php">
<line>75</line>
<line>81</line>
</file>
</directive>
<directive id="URI.Base">
<file name="HTMLPurifier/URIDefinition.php">
<line>65</line>
<line>70</line>
</file>
</directive>
<directive id="URI.DefaultScheme">
<file name="HTMLPurifier/URIDefinition.php">
<line>72</line>
<line>77</line>
</file>
</directive>
<directive id="URI.AllowedSchemes">
@@ -318,23 +323,23 @@
</directive>
<directive id="Attr.EnableID">
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
<line>20</line>
<line>30</line>
</file>
</directive>
<directive id="Attr.IDPrefix">
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
<line>26</line>
<line>36</line>
</file>
</directive>
<directive id="Attr.IDPrefixLocal">
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
<line>28</line>
<line>31</line>
<line>38</line>
<line>41</line>
</file>
</directive>
<directive id="Attr.IDBlacklistRegexp">
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
<line>54</line>
<line>64</line>
</file>
</directive>
<directive id="Attr.">
@@ -342,6 +347,11 @@
<line>30</line>
</file>
</directive>
<directive id="Core.EnableIDNA">
<file name="HTMLPurifier/AttrDef/URI/Host.php">
<line>67</line>
</file>
</directive>
<directive id="Attr.DefaultTextDir">
<file name="HTMLPurifier/AttrTransform/BdoDir.php">
<line>13</line>
@@ -401,17 +411,25 @@
</directive>
<directive id="Filter.ExtractStyleBlocks.TidyImpl">
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
<line>41</line>
<line>54</line>
</file>
</directive>
<directive id="Filter.ExtractStyleBlocks.Scope">
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
<line>65</line>
<line>78</line>
</file>
</directive>
<directive id="Filter.ExtractStyleBlocks.Escaping">
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
<line>123</line>
<line>276</line>
</file>
</directive>
<directive id="HTML.SafeIframe">
<file name="HTMLPurifier/HTMLModule/Iframe.php">
<line>17</line>
</file>
<file name="HTMLPurifier/URIFilter/SafeIframe.php">
<line>23</line>
</file>
</directive>
<directive id="HTML.MaxImgLength">
@@ -473,19 +491,29 @@
<line>19</line>
</file>
</directive>
<directive id="Core.RemoveScriptContents">
<directive id="HTML.AllowedComments">
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>24</line>
</file>
</directive>
<directive id="HTML.AllowedCommentsRegexp">
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>25</line>
</file>
</directive>
<directive id="Core.RemoveScriptContents">
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>28</line>
</file>
</directive>
<directive id="Core.HiddenElements">
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>26</line>
<line>29</line>
</file>
</directive>
<directive id="URI.HostBlacklist">
<file name="HTMLPurifier/URIFilter/HostBlacklist.php">
<line>8</line>
<line>12</line>
</file>
</directive>
<directive id="URI.MungeResources">
@@ -498,4 +526,9 @@
<line>15</line>
</file>
</directive>
<directive id="URI.SafeIframeRegexp">
<file name="HTMLPurifier/URIFilter/SafeIframe.php">
<line>18</line>
</file>
</directive>
</usage>

View File

@@ -255,7 +255,7 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
<tr class="feature"><td>axis</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
<tr class="feature"><td>char</td><td>COL, COLGROUP, TBODY, TD, TFOOT, TH, THEAD, TR</td><td>W3C only: No browser implementation</td></tr>
<tr class="feature"><td>headers</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
<tr class="feature"><td>scope</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
<tr class="impl-yes"><td>scope</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
</tbody>
<tbody class="impl-yes">

View File

@@ -561,7 +561,7 @@ page on special characters</a> for more details.
<h3 id="whyutf8-forms">Forms</h3>
<p>While we're on the tack of users, how do non-UTF-8 web forms deal
with characters that our outside of their character set? Rather than
with characters that are outside of their character set? Rather than
discuss what UTF-8 does right, we're going to show what could go wrong
if you didn't use UTF-8 and people tried to use characters outside
of your character encoding.</p>

View File

@@ -7,7 +7,7 @@
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
* FILE, changes will be overwritten the next time the script is run.
*
* @version 4.3.0
* @version 4.4.0
*
* @warning
* You must *not* include any other HTML Purifier files before this file,
@@ -73,6 +73,7 @@ require 'HTMLPurifier/UnitConverter.php';
require 'HTMLPurifier/VarParser.php';
require 'HTMLPurifier/VarParserException.php';
require 'HTMLPurifier/AttrDef/CSS.php';
require 'HTMLPurifier/AttrDef/Clone.php';
require 'HTMLPurifier/AttrDef/Enum.php';
require 'HTMLPurifier/AttrDef/Integer.php';
require 'HTMLPurifier/AttrDef/Lang.php';
@@ -90,6 +91,7 @@ require 'HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
require 'HTMLPurifier/AttrDef/CSS/Filter.php';
require 'HTMLPurifier/AttrDef/CSS/Font.php';
require 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
require 'HTMLPurifier/AttrDef/CSS/Ident.php';
require 'HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
require 'HTMLPurifier/AttrDef/CSS/Length.php';
require 'HTMLPurifier/AttrDef/CSS/ListStyle.php';
@@ -130,10 +132,12 @@ require 'HTMLPurifier/AttrTransform/SafeEmbed.php';
require 'HTMLPurifier/AttrTransform/SafeObject.php';
require 'HTMLPurifier/AttrTransform/SafeParam.php';
require 'HTMLPurifier/AttrTransform/ScriptRequired.php';
require 'HTMLPurifier/AttrTransform/TargetBlank.php';
require 'HTMLPurifier/AttrTransform/Textarea.php';
require 'HTMLPurifier/ChildDef/Chameleon.php';
require 'HTMLPurifier/ChildDef/Custom.php';
require 'HTMLPurifier/ChildDef/Empty.php';
require 'HTMLPurifier/ChildDef/List.php';
require 'HTMLPurifier/ChildDef/Required.php';
require 'HTMLPurifier/ChildDef/Optional.php';
require 'HTMLPurifier/ChildDef/StrictBlockquote.php';
@@ -148,6 +152,7 @@ require 'HTMLPurifier/HTMLModule/CommonAttributes.php';
require 'HTMLPurifier/HTMLModule/Edit.php';
require 'HTMLPurifier/HTMLModule/Forms.php';
require 'HTMLPurifier/HTMLModule/Hypertext.php';
require 'HTMLPurifier/HTMLModule/Iframe.php';
require 'HTMLPurifier/HTMLModule/Image.php';
require 'HTMLPurifier/HTMLModule/Legacy.php';
require 'HTMLPurifier/HTMLModule/List.php';
@@ -164,6 +169,7 @@ require 'HTMLPurifier/HTMLModule/Scripting.php';
require 'HTMLPurifier/HTMLModule/StyleAttribute.php';
require 'HTMLPurifier/HTMLModule/Tables.php';
require 'HTMLPurifier/HTMLModule/Target.php';
require 'HTMLPurifier/HTMLModule/TargetBlank.php';
require 'HTMLPurifier/HTMLModule/Text.php';
require 'HTMLPurifier/HTMLModule/Tidy.php';
require 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
@@ -202,6 +208,7 @@ require 'HTMLPurifier/URIFilter/DisableResources.php';
require 'HTMLPurifier/URIFilter/HostBlacklist.php';
require 'HTMLPurifier/URIFilter/MakeAbsolute.php';
require 'HTMLPurifier/URIFilter/Munge.php';
require 'HTMLPurifier/URIFilter/SafeIframe.php';
require 'HTMLPurifier/URIScheme/data.php';
require 'HTMLPurifier/URIScheme/file.php';
require 'HTMLPurifier/URIScheme/ftp.php';

View File

@@ -19,7 +19,7 @@
*/
/*
HTML Purifier 4.3.0 - Standards Compliant HTML Filtering
HTML Purifier 4.4.0 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or
@@ -55,10 +55,10 @@ class HTMLPurifier
{
/** Version of HTML Purifier */
public $version = '4.3.0';
public $version = '4.4.0';
/** Constant with version of HTML Purifier */
const VERSION = '4.3.0';
const VERSION = '4.4.0';
/** Global configuration object */
public $config;

View File

@@ -67,6 +67,7 @@ require_once $__dir . '/HTMLPurifier/UnitConverter.php';
require_once $__dir . '/HTMLPurifier/VarParser.php';
require_once $__dir . '/HTMLPurifier/VarParserException.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Clone.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Enum.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Integer.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Lang.php';
@@ -84,6 +85,7 @@ require_once $__dir . '/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Filter.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Font.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/FontFamily.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Ident.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Length.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ListStyle.php';
@@ -124,10 +126,12 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/SafeEmbed.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/TargetBlank.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php';
require_once $__dir . '/HTMLPurifier/ChildDef/List.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Required.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php';
require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php';
@@ -142,6 +146,7 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/CommonAttributes.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Edit.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Forms.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Hypertext.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Iframe.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Image.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/List.php';
@@ -158,6 +163,7 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/Scripting.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/StyleAttribute.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tables.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Target.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/TargetBlank.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Text.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
@@ -196,6 +202,7 @@ require_once $__dir . '/HTMLPurifier/URIFilter/DisableResources.php';
require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';
require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';
require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php';
require_once $__dir . '/HTMLPurifier/URIFilter/SafeIframe.php';
require_once $__dir . '/HTMLPurifier/URIScheme/data.php';
require_once $__dir . '/HTMLPurifier/URIScheme/file.php';
require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';

View File

@@ -0,0 +1,24 @@
<?php
/**
* Validates based on {ident} CSS grammar production
*/
class HTMLPurifier_AttrDef_CSS_Ident extends HTMLPurifier_AttrDef
{
public function validate($string, $config, $context) {
$string = trim($string);
// early abort: '' and '0' (strings that convert to false) are invalid
if (!$string) return false;
$pattern = '/^(-?[A-Za-z_][A-Za-z_\-0-9]*)$/';
if (!preg_match($pattern, $string)) return false;
return $string;
}
}
// vim: et sw=4 sts=4

View File

@@ -0,0 +1,28 @@
<?php
/**
* Dummy AttrDef that mimics another AttrDef, BUT it generates clones
* with make.
*/
class HTMLPurifier_AttrDef_Clone extends HTMLPurifier_AttrDef
{
/**
* What we're cloning
*/
protected $clone;
public function __construct($clone) {
$this->clone = $clone;
}
public function validate($v, $config, $context) {
return $this->clone->validate($v, $config, $context);
}
public function make($string) {
return clone $this->clone;
}
}
// vim: et sw=4 sts=4

View File

@@ -14,7 +14,7 @@ class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
$string = trim($string);
if (empty($string)) return false;
if (isset($colors[$string])) return $colors[$string];
if (isset($colors[strtolower($string)])) return $colors[$string];
if ($string[0] === '#') $hex = substr($string, 1);
else $hex = $string;

View File

@@ -12,12 +12,22 @@
class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
{
// ref functionality disabled, since we also have to verify
// whether or not the ID it refers to exists
// selector is NOT a valid thing to use for IDREFs, because IDREFs
// *must* target IDs that exist, whereas selector #ids do not.
/**
* Determines whether or not we're validating an ID in a CSS
* selector context.
*/
protected $selector;
public function __construct($selector = false) {
$this->selector = $selector;
}
public function validate($id, $config, $context) {
if (!$config->get('Attr.EnableID')) return false;
if (!$this->selector && !$config->get('Attr.EnableID')) return false;
$id = trim($id); // trim it first
@@ -33,10 +43,10 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
'%Attr.IDPrefix is set', E_USER_WARNING);
}
//if (!$this->ref) {
if (!$this->selector) {
$id_accumulator =& $context->get('IDAccumulator');
if (isset($id_accumulator->ids[$id])) return false;
//}
}
// we purposely avoid using regex, hopefully this is faster
@@ -56,7 +66,7 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
return false;
}
if (/*!$this->ref && */$result) $id_accumulator->add($id);
if (!$this->selector && $result) $id_accumulator->add($id);
// if no change was made to the ID, return the result
// else, return the new id if stripping whitespace made it

View File

@@ -19,7 +19,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
}
public function make($string) {
$embeds = (bool) $string;
$embeds = ($string === 'embedded');
return new HTMLPurifier_AttrDef_URI($embeds);
}

View File

@@ -44,9 +44,8 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
// A regular domain name.
// This breaks I18N domain names, but we don't have proper IRI support,
// so force users to insert Punycode. If there's complaining we'll
// try to fix things into an international friendly form.
// This doesn't match I18N domain names, but we don't have proper IRI support,
// so force users to insert Punycode.
// The productions describing this are:
$a = '[a-z]'; // alpha
@@ -57,12 +56,46 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
// toplabel = alpha | alpha *( alphanum | "-" ) alphanum
$toplabel = "$a($and*$an)?";
// hostname = *( domainlabel "." ) toplabel [ "." ]
$match = preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string);
if (!$match) return false;
if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
return $string;
}
// If we have Net_IDNA2 support, we can support IRIs by
// punycoding them. (This is the most portable thing to do,
// since otherwise we have to assume browsers support
if ($config->get('Core.EnableIDNA')) {
$idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true));
// we need to encode each period separately
$parts = explode('.', $string);
try {
$new_parts = array();
foreach ($parts as $part) {
$encodable = false;
for ($i = 0, $c = strlen($part); $i < $c; $i++) {
if (ord($part[$i]) > 0x7a) {
$encodable = true;
break;
}
}
if (!$encodable) {
$new_parts[] = $part;
} else {
$new_parts[] = $idna->encode($part);
}
}
$string = implode('.', $new_parts);
if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
return $string;
}
} catch (Exception $e) {
// XXX error reporting
}
}
return false;
}
}
// vim: et sw=4 sts=4

View File

@@ -24,9 +24,13 @@ class HTMLPurifier_AttrTransform_Nofollow extends HTMLPurifier_AttrTransform
$url = $this->parser->parse($attr['href']);
$scheme = $url->getSchemeObj($config, $context);
if (!is_null($url->host) && $scheme !== false && $scheme->browsable) {
if ($scheme->browsable && !$url->isLocal($config, $context)) {
if (isset($attr['rel'])) {
$attr['rel'] .= ' nofollow';
$rels = explode(' ', $attr);
if (!in_array('nofollow', $rels)) {
$rels[] = 'nofollow';
}
$attr['rel'] = implode(' ', $rels);
} else {
$attr['rel'] = 'nofollow';
}

View File

@@ -0,0 +1,38 @@
<?php
// must be called POST validation
/**
* Adds target="blank" to all outbound links. This transform is
* only attached if Attr.TargetBlank is TRUE. This works regardless
* of whether or not Attr.AllowedFrameTargets
*/
class HTMLPurifier_AttrTransform_TargetBlank extends HTMLPurifier_AttrTransform
{
private $parser;
public function __construct() {
$this->parser = new HTMLPurifier_URIParser();
}
public function transform($attr, $config, $context) {
if (!isset($attr['href'])) {
return $attr;
}
// XXX Kind of inefficient
$url = $this->parser->parse($attr['href']);
$scheme = $url->getSchemeObj($config, $context);
if ($scheme->browsable && !$url->isBenign($config, $context)) {
$attr['target'] = 'blank';
}
return $attr;
}
}
// vim: et sw=4 sts=4

View File

@@ -15,6 +15,13 @@ class HTMLPurifier_AttrTypes
* types.
*/
public function __construct() {
// XXX This is kind of poor, since we don't actually /clone/
// instances; instead, we use the supplied make() attribute. So,
// the underlying class must know how to deal with arguments.
// With the old implementation of Enum, that ignored its
// arguments when handling a make dispatch, the IAlign
// definition wouldn't work.
// pseudo-types, must be instantiated via shorthand
$this->info['Enum'] = new HTMLPurifier_AttrDef_Enum();
$this->info['Bool'] = new HTMLPurifier_AttrDef_HTML_Bool();
@@ -29,6 +36,9 @@ class HTMLPurifier_AttrTypes
$this->info['URI'] = new HTMLPurifier_AttrDef_URI();
$this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
$this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color();
$this->info['IAlign'] = self::makeEnum('top,middle,bottom,left,right');
$this->info['LAlign'] = self::makeEnum('top,bottom,left,right');
$this->info['FrameTarget'] = new HTMLPurifier_AttrDef_HTML_FrameTarget();
// unimplemented aliases
$this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
@@ -44,6 +54,10 @@ class HTMLPurifier_AttrTypes
$this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);
}
private static function makeEnum($in) {
return new HTMLPurifier_AttrDef_Clone(new HTMLPurifier_AttrDef_Enum(explode(',', $in)));
}
/**
* Retrieves a type
* @param $type String type name

View File

@@ -0,0 +1,120 @@
<?php
/**
* Definition for list containers ul and ol.
*/
class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
{
public $type = 'list';
// lying a little bit, so that we can handle ul and ol ourselves
// XXX: This whole business with 'wrap' is all a bit unsatisfactory
public $elements = array('li' => true, 'ul' => true, 'ol' => true);
public function validateChildren($tokens_of_children, $config, $context) {
// Flag for subclasses
$this->whitespace = false;
// if there are no tokens, delete parent node
if (empty($tokens_of_children)) return false;
// the new set of children
$result = array();
// current depth into the nest
$nesting = 0;
// a little sanity check to make sure it's not ALL whitespace
$all_whitespace = true;
$seen_li = false;
$need_close_li = false;
foreach ($tokens_of_children as $token) {
if (!empty($token->is_whitespace)) {
$result[] = $token;
continue;
}
$all_whitespace = false; // phew, we're not talking about whitespace
if ($nesting == 1 && $need_close_li) {
$result[] = new HTMLPurifier_Token_End('li');
$nesting--;
$need_close_li = false;
}
$is_child = ($nesting == 0);
if ($token instanceof HTMLPurifier_Token_Start) {
$nesting++;
} elseif ($token instanceof HTMLPurifier_Token_End) {
$nesting--;
}
if ($is_child) {
if ($token->name === 'li') {
// good
$seen_li = true;
} elseif ($token->name === 'ul' || $token->name === 'ol') {
// we want to tuck this into the previous li
$need_close_li = true;
$nesting++;
if (!$seen_li) {
// create a new li element
$result[] = new HTMLPurifier_Token_Start('li');
} else {
// backtrack until </li> found
while(true) {
$t = array_pop($result);
if ($t instanceof HTMLPurifier_Token_End) {
// XXX actually, these invariants could very plausibly be violated
// if we are doing silly things with modifying the set of allowed elements.
// FORTUNATELY, it doesn't make a difference, since the allowed
// elements are hard-coded here!
if ($t->name !== 'li') {
trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
return false;
}
break;
} elseif ($t instanceof HTMLPurifier_Token_Empty) { // bleagh
if ($t->name !== 'li') {
trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
return false;
}
// XXX this should have a helper for it...
$result[] = new HTMLPurifier_Token_Start('li', $t->attr, $t->line, $t->col, $t->armor);
break;
} else {
if (!$t->is_whitespace) {
trigger_error("Only whitespace present invariant violated in List ChildDef", E_USER_ERROR);
return false;
}
}
}
}
} else {
// start wrapping (this doesn't precisely mimic
// browser behavior, but what browsers do is kind of
// hard to mimic in a standards compliant way
// XXX Actually, this has no impact in practice,
// because this gets handled earlier. Arguably,
// we should rip out all of that processing
$result[] = new HTMLPurifier_Token_Start('li');
$nesting++;
$seen_li = true;
$need_close_li = true;
}
}
$result[] = $token;
}
if ($need_close_li) {
$result[] = new HTMLPurifier_Token_End('li');
}
if (empty($result)) return false;
if ($all_whitespace) {
return false;
}
if ($tokens_of_children == $result) return true;
return $result;
}
}
// vim: et sw=4 sts=4

View File

@@ -1,7 +1,33 @@
<?php
/**
* Definition for tables
* Definition for tables. The general idea is to extract out all of the
* essential bits, and then reconstruct it later.
*
* This is a bit confusing, because the DTDs and the W3C
* validators seem to disagree on the appropriate definition. The
* DTD claims:
*
* (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)
*
* But actually, the HTML4 spec then has this to say:
*
* The TBODY start tag is always required except when the table
* contains only one table body and no table head or foot sections.
* The TBODY end tag may always be safely omitted.
*
* So the DTD is kind of wrong. The validator is, unfortunately, kind
* of on crack.
*
* The definition changed again in XHTML1.1; and in my opinion, this
* formulation makes the most sense.
*
* caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))
*
* Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode.
* If we encounter a thead, tfoot or tbody, we are placed in the former
* mode, and we *must* wrap any stray tr segments with a tbody. But if
* we don't run into any of them, just have tr tags is OK.
*/
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
{
@@ -33,6 +59,8 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
$collection = array(); // collected nodes
$tag_index = 0; // the first node might be whitespace,
// so this tells us where the start tag is
$tbody_mode = false; // if true, then we need to wrap any stray
// <tr>s with a <tbody>.
foreach ($tokens_of_children as $token) {
$is_child = ($nesting == 0);
@@ -51,8 +79,9 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
// okay, let's stash the tokens away
// first token tells us the type of the collection
switch ($collection[$tag_index]->name) {
case 'tr':
case 'tbody':
$tbody_mode = true;
case 'tr':
$content[] = $collection;
break;
case 'caption':
@@ -61,13 +90,28 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
break;
case 'thead':
case 'tfoot':
$tbody_mode = true;
// XXX This breaks rendering properties with
// Firefox, which never floats a <thead> to
// the top. Ever. (Our scheme will float the
// first <thead> to the top.) So maybe
// <thead>s that are not first should be
// turned into <tbody>? Very tricky, indeed.
// access the appropriate variable, $thead or $tfoot
$var = $collection[$tag_index]->name;
if ($$var === false) {
$$var = $collection;
} else {
// transmutate the first and less entries into
// tbody tags, and then put into content
// Oops, there's a second one! What
// should we do? Current behavior is to
// transmutate the first and last entries into
// tbody tags, and then put into content.
// Maybe a better idea is to *attach
// it* to the existing thead or tfoot?
// We don't do this, because Firefox
// doesn't float an extra tfoot to the
// bottom like it does for the first one.
$collection[$tag_index]->name = 'tbody';
$collection[count($collection)-1]->name = 'tbody';
$content[] = $collection;
@@ -126,7 +170,48 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
if ($thead !== false) $ret = array_merge($ret, $thead);
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
if ($tbody_mode) {
// a little tricky, since the start of the collection may be
// whitespace
$inside_tbody = false;
foreach ($content as $token_array) {
// find the starting token
foreach ($token_array as $t) {
if ($t->name === 'tr' || $t->name === 'tbody') {
break;
}
} // iterator variable carries over
if ($t->name === 'tr') {
if ($inside_tbody) {
$ret = array_merge($ret, $token_array);
} else {
$ret[] = new HTMLPurifier_Token_Start('tbody');
$ret = array_merge($ret, $token_array);
$inside_tbody = true;
}
} elseif ($t->name === 'tbody') {
if ($inside_tbody) {
$ret[] = new HTMLPurifier_Token_End('tbody');
$inside_tbody = false;
$ret = array_merge($ret, $token_array);
} else {
$ret = array_merge($ret, $token_array);
}
} else {
trigger_error("tr/tbody in content invariant failed in Table ChildDef", E_USER_ERROR);
}
}
if ($inside_tbody) {
$ret[] = new HTMLPurifier_Token_End('tbody');
}
} else {
foreach ($content as $token_array) {
// invariant: everything in here is <tr>s
$ret = array_merge($ret, $token_array);
}
}
if (!empty($collection) && $is_collecting == false){
// grab the trailing space
$ret = array_merge($ret, $collection);

View File

@@ -20,7 +20,7 @@ class HTMLPurifier_Config
/**
* HTML Purifier's version
*/
public $version = '4.3.0';
public $version = '4.4.0';
/**
* Bool indicator whether or not to automatically finalize
@@ -44,7 +44,7 @@ class HTMLPurifier_Config
/**
* Parser for variables
*/
protected $parser;
protected $parser = null;
/**
* Reference HTMLPurifier_ConfigSchema for value checking
@@ -668,7 +668,7 @@ class HTMLPurifier_Config
*/
public function finalize() {
$this->finalized = true;
unset($this->parser);
$this->parser = null;
}
/**

View File

@@ -24,5 +24,6 @@ array (
--DESCRIPTION--
Lookup array of color names to six digit hexadecimal number corresponding
to color, with preceding hash mark. Used when parsing colors.
to color, with preceding hash mark. Used when parsing colors. The lookup
is done in a case-insensitive manner.
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,9 @@
Core.EnableIDNA
TYPE: bool
DEFAULT: false
VERSION: 4.4.0
--DESCRIPTION--
Allows international domain names in URLs. This configuration option
requires the PEAR Net_IDNA2 module to be installed. It operates by
punycoding any internationalized host names for maximum portability.
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,10 @@
HTML.AllowedComments
TYPE: lookup
VERSION: 4.4.0
DEFAULT: array()
--DESCRIPTION--
A whitelist which indicates what explicit comment bodies should be
allowed, modulo leading and trailing whitespace. See also %HTML.AllowedCommentsRegexp
(these directives are union'ed together, so a comment is considered
valid if any directive deems it valid.)
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,15 @@
HTML.AllowedCommentsRegexp
TYPE: string/null
VERSION: 4.4.0
DEFAULT: NULL
--DESCRIPTION--
A regexp, which if it matches the body of a comment, indicates that
it should be allowed. Trailing and leading spaces are removed prior
to running this regular expression.
<strong>Warning:</strong> Make sure you specify
correct anchor metacharacters <code>^regex$</code>, otherwise you may accept
comments that you did not mean to! In particular, the regex <code>/foo|bar/</code>
is probably not sufficiently strict, since it also allows <code>foobar</code>.
See also %HTML.AllowedComments (these directives are union'ed together,
so a comment is considered valid if any directive deems it valid.)
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,13 @@
HTML.SafeIframe
TYPE: bool
VERSION: 4.4.0
DEFAULT: false
--DESCRIPTION--
<p>
Whether or not to permit iframe tags in untrusted documents. This
directive must be accompanied by a whitelist of permitted iframes,
such as %URI.SafeIframeRegexp, otherwise it will fatally error.
This directive has no effect on strict doctypes, as iframes are not
valid.
</p>
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,8 @@
HTML.TargetBlank
TYPE: bool
VERSION: 4.4.0
DEFAULT: FALSE
--DESCRIPTION--
If enabled, <code>target=blank</code> attributes are added to all outgoing links.
(This includes links from an HTTPS version of a page to an HTTP version.)
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,22 @@
URI.SafeIframeRegexp
TYPE: string/null
VERSION: 4.4.0
DEFAULT: NULL
--DESCRIPTION--
<p>
A PCRE regular expression that will be matched against an iframe URI. This is
a relatively inflexible scheme, but works well enough for the most common
use-case of iframes: embedded video. This directive only has an effect if
%HTML.SafeIframe is enabled. Here are some example values:
</p>
<ul>
<li><code>%^http://www.youtube.com/embed/%</code> - Allow YouTube videos</li>
<li><code>%^http://player.vimeo.com/video/%</code> - Allow Vimeo videos</li>
<li><code>%^http://(www.youtube.com/embed/|player.vimeo.com/video/)%</code> - Allow both</li>
</ul>
<p>
Note that this directive does not give you enough granularity to, say, disable
all <code>autoplay</code> videos. Pipe up on the HTML Purifier forums if this
is a capability you want.
</p>
--# vim: et sw=4 sts=4

View File

@@ -19,6 +19,68 @@ class HTMLPurifier_Encoder
*/
public static function muteErrorHandler() {}
/**
* iconv wrapper which mutes errors, but doesn't work around bugs.
*/
public static function unsafeIconv($in, $out, $text) {
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
$r = iconv($in, $out, $text);
restore_error_handler();
return $r;
}
/**
* iconv wrapper which mutes errors and works around bugs.
*/
public static function iconv($in, $out, $text, $max_chunk_size = 8000) {
$code = self::testIconvTruncateBug();
if ($code == self::ICONV_OK) {
return self::unsafeIconv($in, $out, $text);
} elseif ($code == self::ICONV_TRUNCATES) {
// we can only work around this if the input character set
// is utf-8
if ($in == 'utf-8') {
if ($max_chunk_size < 4) {
trigger_error('max_chunk_size is too small', E_USER_WARNING);
return false;
}
// split into 8000 byte chunks, but be careful to handle
// multibyte boundaries properly
if (($c = strlen($text)) <= $max_chunk_size) {
return self::unsafeIconv($in, $out, $text);
}
$r = '';
$i = 0;
while (true) {
if ($i + $max_chunk_size >= $c) {
$r .= self::unsafeIconv($in, $out, substr($text, $i));
break;
}
// wibble the boundary
if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) {
$chunk_size = $max_chunk_size;
} elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) {
$chunk_size = $max_chunk_size - 1;
} elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) {
$chunk_size = $max_chunk_size - 2;
} elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) {
$chunk_size = $max_chunk_size - 3;
} else {
return false; // rather confusing UTF-8...
}
$chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths
$r .= self::unsafeIconv($in, $out, $chunk);
$i += $chunk_size;
}
return $r;
} else {
return false;
}
} else {
return false;
}
}
/**
* Cleans a UTF-8 string for well-formedness and SGML validity
*
@@ -260,6 +322,14 @@ class HTMLPurifier_Encoder
return $ret;
}
public static function iconvAvailable() {
static $iconv = null;
if ($iconv === null) {
$iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE;
}
return $iconv;
}
/**
* Converts a string to UTF-8 based on configuration.
*/
@@ -267,25 +337,22 @@ class HTMLPurifier_Encoder
$encoding = $config->get('Core.Encoding');
if ($encoding === 'utf-8') return $str;
static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv');
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
if ($iconv === null) $iconv = self::iconvAvailable();
if ($iconv && !$config->get('Test.ForceNoIconv')) {
$str = iconv($encoding, 'utf-8//IGNORE', $str);
// unaffected by bugs, since UTF-8 support all characters
$str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
if ($str === false) {
// $encoding is not a valid encoding
restore_error_handler();
trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
return '';
}
// If the string is bjorked by Shift_JIS or a similar encoding
// that doesn't support all of ASCII, convert the naughty
// characters to their true byte-wise ASCII/UTF-8 equivalents.
$str = strtr($str, HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding));
restore_error_handler();
$str = strtr($str, self::testEncodingSupportsASCII($encoding));
return $str;
} elseif ($encoding === 'iso-8859-1') {
$str = utf8_encode($str);
restore_error_handler();
return $str;
}
trigger_error('Encoding not supported, please install iconv', E_USER_ERROR);
@@ -298,16 +365,15 @@ class HTMLPurifier_Encoder
*/
public static function convertFromUTF8($str, $config, $context) {
$encoding = $config->get('Core.Encoding');
if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
$str = self::convertToASCIIDumbLossless($str);
}
if ($encoding === 'utf-8') return $str;
static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv');
if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
$str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str);
}
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
if ($iconv === null) $iconv = self::iconvAvailable();
if ($iconv && !$config->get('Test.ForceNoIconv')) {
// Undo our previous fix in convertToUTF8, otherwise iconv will barf
$ascii_fix = HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding);
$ascii_fix = self::testEncodingSupportsASCII($encoding);
if (!$escape && !empty($ascii_fix)) {
$clear_fix = array();
foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = '';
@@ -315,15 +381,17 @@ class HTMLPurifier_Encoder
}
$str = strtr($str, array_flip($ascii_fix));
// Normal stuff
$str = iconv('utf-8', $encoding . '//IGNORE', $str);
restore_error_handler();
$str = self::iconv('utf-8', $encoding . '//IGNORE', $str);
return $str;
} elseif ($encoding === 'iso-8859-1') {
$str = utf8_decode($str);
restore_error_handler();
return $str;
}
trigger_error('Encoding not supported', E_USER_ERROR);
// You might be tempted to assume that the ASCII representation
// might be OK, however, this is *not* universally true over all
// encodings. So we take the conservative route here, rather
// than forcibly turn on %Core.EscapeNonASCIICharacters
}
/**
@@ -373,6 +441,49 @@ class HTMLPurifier_Encoder
return $result;
}
/** No bugs detected in iconv. */
const ICONV_OK = 0;
/** Iconv truncates output if converting from UTF-8 to another
* character set with //IGNORE, and a non-encodable character is found */
const ICONV_TRUNCATES = 1;
/** Iconv does not support //IGNORE, making it unusable for
* transcoding purposes */
const ICONV_UNUSABLE = 2;
/**
* glibc iconv has a known bug where it doesn't handle the magic
* //IGNORE stanza correctly. In particular, rather than ignore
* characters, it will return an EILSEQ after consuming some number
* of characters, and expect you to restart iconv as if it were
* an E2BIG. Old versions of PHP did not respect the errno, and
* returned the fragment, so as a result you would see iconv
* mysteriously truncating output. We can work around this by
* manually chopping our input into segments of about 8000
* characters, as long as PHP ignores the error code. If PHP starts
* paying attention to the error code, iconv becomes unusable.
*
* @returns Error code indicating severity of bug.
*/
public static function testIconvTruncateBug() {
static $code = null;
if ($code === null) {
// better not use iconv, otherwise infinite loop!
$r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000));
if ($r === false) {
$code = self::ICONV_UNUSABLE;
} elseif (($c = strlen($r)) < 9000) {
$code = self::ICONV_TRUNCATES;
} elseif ($c > 9000) {
trigger_error('Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: include your iconv version as per phpversion()', E_USER_ERROR);
} else {
$code = self::ICONV_OK;
}
}
return $code;
}
/**
* This expensive function tests whether or not a given character
* encoding supports ASCII. 7/8-bit encodings like Shift_JIS will
@@ -385,6 +496,11 @@ class HTMLPurifier_Encoder
* which can be used to "undo" any overzealous iconv action.
*/
public static function testEncodingSupportsASCII($encoding, $bypass = false) {
// All calls to iconv here are unsafe, proof by case analysis:
// If ICONV_OK, no difference.
// If ICONV_TRUNCATE, all calls involve one character inputs,
// so bug is not triggered.
// If ICONV_UNUSABLE, this call is irrelevant
static $encodings = array();
if (!$bypass) {
if (isset($encodings[$encoding])) return $encodings[$encoding];
@@ -398,24 +514,22 @@ class HTMLPurifier_Encoder
if (strpos($lenc, 'iso-8859-') === 0) return array();
}
$ret = array();
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
if (iconv('UTF-8', $encoding, 'a') === false) return false;
if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false;
for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
$c = chr($i); // UTF-8 char
$r = iconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
$r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
if (
$r === '' ||
// This line is needed for iconv implementations that do not
// omit characters that do not exist in the target character set
($r === $c && iconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
) {
// Reverse engineer: what's the UTF-8 equiv of this byte
// sequence? This assumes that there's no variable width
// encoding that doesn't support ASCII.
$ret[iconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
$ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
}
}
restore_error_handler();
$encodings[$encoding] = $ret;
return $ret;
}

View File

@@ -1,5 +1,11 @@
<?php
// why is this a top level function? Because PHP 5.2.0 doesn't seem to
// understand how to interpret this filter if it's a static method.
// It's all really silly, but if we go this route it might be reasonable
// to coalesce all of these methods into one.
function htmlpurifier_filter_extractstyleblocks_muteerrorhandler() {}
/**
* This filter extracts <style> blocks from input HTML, cleans them up
* using CSSTidy, and then places them in $purifier->context->get('StyleBlocks')
@@ -21,8 +27,15 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
private $_styleMatches = array();
private $_tidy;
private $_id_attrdef;
private $_class_attrdef;
private $_enum_attrdef;
public function __construct() {
$this->_tidy = new csstidy();
$this->_id_attrdef = new HTMLPurifier_AttrDef_HTML_ID(true);
$this->_class_attrdef = new HTMLPurifier_AttrDef_CSS_Ident();
$this->_enum_attrdef = new HTMLPurifier_AttrDef_Enum(array('first-child', 'link', 'visited', 'active', 'hover', 'focus'));
}
/**
@@ -77,27 +90,166 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
$css = substr($css, 0, -3);
}
$css = trim($css);
set_error_handler('htmlpurifier_filter_extractstyleblocks_muteerrorhandler');
$this->_tidy->parse($css);
restore_error_handler();
$css_definition = $config->getDefinition('CSS');
$html_definition = $config->getDefinition('HTML');
$new_css = array();
foreach ($this->_tidy->css as $k => $decls) {
// $decls are all CSS declarations inside an @ selector
$new_decls = array();
foreach ($decls as $selector => $style) {
$selector = trim($selector);
if ($selector === '') continue; // should not happen
if ($selector[0] === '+') {
if ($selector !== '' && $selector[0] === '+') continue;
}
if (!empty($scopes)) {
$new_selector = array(); // because multiple ones are possible
// Parse the selector
// Here is the relevant part of the CSS grammar:
//
// ruleset
// : selector [ ',' S* selector ]* '{' ...
// selector
// : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]?
// combinator
// : '+' S*
// : '>' S*
// simple_selector
// : element_name [ HASH | class | attrib | pseudo ]*
// | [ HASH | class | attrib | pseudo ]+
// element_name
// : IDENT | '*'
// ;
// class
// : '.' IDENT
// ;
// attrib
// : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S*
// [ IDENT | STRING ] S* ]? ']'
// ;
// pseudo
// : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ]
// ;
//
// For reference, here are the relevant tokens:
//
// HASH #{name}
// IDENT {ident}
// INCLUDES ==
// DASHMATCH |=
// STRING {string}
// FUNCTION {ident}\(
//
// And the lexical scanner tokens
//
// name {nmchar}+
// nmchar [_a-z0-9-]|{nonascii}|{escape}
// nonascii [\240-\377]
// escape {unicode}|\\[^\r\n\f0-9a-f]
// unicode \\{h}}{1,6}(\r\n|[ \t\r\n\f])?
// ident -?{nmstart}{nmchar*}
// nmstart [_a-z]|{nonascii}|{escape}
// string {string1}|{string2}
// string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
// string2 \'([^\n\r\f\\"]|\\{nl}|{escape})*\'
//
// We'll implement a subset (in order to reduce attack
// surface); in particular:
//
// - No Unicode support
// - No escapes support
// - No string support (by proxy no attrib support)
// - element_name is matched against allowed
// elements (some people might find this
// annoying...)
// - Pseudo-elements one of :first-child, :link,
// :visited, :active, :hover, :focus
// handle ruleset
$selectors = array_map('trim', explode(',', $selector));
foreach ($scopes as $s1) {
foreach ($selectors as $s2) {
$new_selector[] = "$s1 $s2";
$new_selectors = array();
foreach ($selectors as $sel) {
// split on +, > and spaces
$basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE);
// even indices are chunks, odd indices are
// delimiters
$nsel = null;
$delim = null; // guaranteed to be non-null after
// two loop iterations
for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) {
$x = $basic_selectors[$i];
if ($i % 2) {
// delimiter
if ($x === ' ') {
$delim = ' ';
} else {
$delim = ' ' . $x . ' ';
}
} else {
// simple selector
$components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE);
$sdelim = null;
$nx = null;
for ($j = 0, $cc = count($components); $j < $cc; $j ++) {
$y = $components[$j];
if ($j === 0) {
if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) {
$nx = $y;
} else {
// $nx stays null; this matters
// if we don't manage to find
// any valid selector content,
// in which case we ignore the
// outer $delim
}
} elseif ($j % 2) {
// set delimiter
$sdelim = $y;
} else {
$attrdef = null;
if ($sdelim === '#') {
$attrdef = $this->_id_attrdef;
} elseif ($sdelim === '.') {
$attrdef = $this->_class_attrdef;
} elseif ($sdelim === ':') {
$attrdef = $this->_enum_attrdef;
} else {
throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split');
}
$r = $attrdef->validate($y, $config, $context);
if ($r !== false) {
if ($r !== true) {
$y = $r;
}
if ($nx === null) {
$nx = '';
}
$nx .= $sdelim . $y;
}
}
$selector = implode(', ', $new_selector); // now it's a string
}
if ($nx !== null) {
if ($nsel === null) {
$nsel = $nx;
} else {
$nsel .= $delim . $nx;
}
} else {
// delimiters to the left of invalid
// basic selector ignored
}
}
}
if ($nsel !== null) {
if (!empty($scopes)) {
foreach ($scopes as $s) {
$new_selectors[] = "$s $nsel";
}
} else {
$new_selectors[] = $nsel;
}
}
}
if (empty($new_selectors)) continue;
$selector = implode(', ', $new_selectors);
foreach ($style as $name => $value) {
if (!isset($css_definition->info[$name])) {
unset($style[$name]);
@@ -110,10 +262,11 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
}
$new_decls[$selector] = $style;
}
$this->_tidy->css[$k] = $new_decls;
$new_css[$k] = $new_decls;
}
// remove stuff that shouldn't be used, could be reenabled
// after security risks are analyzed
$this->_tidy->css = $new_css;
$this->_tidy->import = array();
$this->_tidy->charset = null;
$this->_tidy->namespace = null;

View File

@@ -147,7 +147,7 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
return $this->_anonModule;
}
private $_anonModule;
private $_anonModule = null;
// PUBLIC BUT INTERNAL VARIABLES --------------------------------------

View File

@@ -35,7 +35,7 @@ class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule
'name' => 'CDATA',
'readonly' => 'Bool#readonly',
'size' => 'Number',
'src' => 'URI#embeds',
'src' => 'URI#embedded',
'tabindex' => 'Number',
'type' => 'Enum#text,password,checkbox,button,radio,submit,reset,file,hidden,image',
'value' => 'CDATA',
@@ -84,7 +84,8 @@ class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule
$button->excludes = $this->makeLookup(
'form', 'fieldset', // Form
'input', 'select', 'textarea', 'label', 'button', // Formctrl
'a' // as per HTML 4.01 spec, this is omitted by modularization
'a', // as per HTML 4.01 spec, this is omitted by modularization
'isindex', 'iframe' // legacy items
);
// Extra exclusion: img usemap="" is not permitted within this element.

View File

@@ -0,0 +1,38 @@
<?php
/**
* XHTML 1.1 Iframe Module provides inline frames.
*
* @note This module is not considered safe unless an Iframe
* whitelisting mechanism is specified. Currently, the only
* such mechanism is %URL.SafeIframeRegexp
*/
class HTMLPurifier_HTMLModule_Iframe extends HTMLPurifier_HTMLModule
{
public $name = 'Iframe';
public $safe = false;
public function setup($config) {
if ($config->get('HTML.SafeIframe')) {
$this->safe = true;
}
$this->addElement(
'iframe', 'Inline', 'Flow', 'Common',
array(
'src' => 'URI#embedded',
'width' => 'Length',
'height' => 'Length',
'name' => 'ID',
'scrolling' => 'Enum#yes,no,auto',
'frameborder' => 'Enum#0,1',
'longdesc' => 'URI',
'marginheight' => 'Pixels',
'marginwidth' => 'Pixels',
)
);
}
}
// vim: et sw=4 sts=4

View File

@@ -89,7 +89,7 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
$hr->attr['width'] = 'Length';
$img = $this->addBlankElement('img');
$img->attr['align'] = 'Enum#top,middle,bottom,left,right';
$img->attr['align'] = 'IAlign';
$img->attr['border'] = 'Pixels';
$img->attr['hspace'] = 'Pixels';
$img->attr['vspace'] = 'Pixels';
@@ -136,6 +136,22 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
$ul->attr['compact'] = 'Bool#compact';
$ul->attr['type'] = 'Enum#square,disc,circle';
// "safe" modifications to "unsafe" elements
// WARNING: If you want to add support for an unsafe, legacy
// attribute, make a new TrustedLegacy module with the trusted
// bit set appropriately
$form = $this->addBlankElement('form');
$form->content_model = 'Flow | #PCDATA';
$form->content_model_type = 'optional';
$form->attr['target'] = 'FrameTarget';
$input = $this->addBlankElement('input');
$input->attr['align'] = 'IAlign';
$legend = $this->addBlankElement('legend');
$legend->attr['align'] = 'LAlign';
}
}

View File

@@ -20,10 +20,16 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
public $content_sets = array('Flow' => 'List');
public function setup($config) {
$ol = $this->addElement('ol', 'List', 'Required: li', 'Common');
$ol->wrap = "li";
$ul = $this->addElement('ul', 'List', 'Required: li', 'Common');
$ul->wrap = "li";
$ol = $this->addElement('ol', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
$ul = $this->addElement('ul', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
// XXX The wrap attribute is handled by MakeWellFormed. This is all
// quite unsatisfactory, because we generated this
// *specifically* for lists, and now a big chunk of the handling
// is done properly by the List ChildDef. So actually, we just
// want enough information to make autoclosing work properly,
// and then hand off the tricky stuff to the ChildDef.
$ol->wrap = 'li';
$ul->wrap = 'li';
$this->addElement('dl', 'List', 'Required: dt | dd', 'Common');
$this->addElement('li', false, 'Flow', 'Common');

View File

@@ -37,6 +37,9 @@ class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule
'abbr' => 'Text',
'colspan' => 'Number',
'rowspan' => 'Number',
// Apparently, as of HTML5 this attribute only applies
// to 'th' elements.
'scope' => 'Enum#row,col,rowgroup,colgroup',
),
$cell_align
);

View File

@@ -0,0 +1,19 @@
<?php
/**
* Module adds the target=blank attribute transformation to a tags. It
* is enabled by HTML.TargetBlank
*/
class HTMLPurifier_HTMLModule_TargetBlank extends HTMLPurifier_HTMLModule
{
public $name = 'TargetBlank';
public function setup($config) {
$a = $this->addBlankElement('a');
$a->attr_transform_post[] = new HTMLPurifier_AttrTransform_TargetBlank();
}
}
// vim: et sw=4 sts=4

View File

@@ -69,7 +69,7 @@ class HTMLPurifier_HTMLModuleManager
// Sorta legacy, but present in strict:
'Name',
);
$transitional = array('Legacy', 'Target');
$transitional = array('Legacy', 'Target', 'Iframe');
$xml = array('XMLCommonAttributes');
$non_xml = array('NonXMLCommonAttributes');
@@ -112,7 +112,9 @@ class HTMLPurifier_HTMLModuleManager
$this->doctypes->register(
'XHTML 1.1', true,
array_merge($common, $xml, array('Ruby')),
// Iframe is a real XHTML 1.1 module, despite being
// "transitional"!
array_merge($common, $xml, array('Ruby', 'Iframe')),
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
array(),
'-//W3C//DTD XHTML 1.1//EN',
@@ -229,6 +231,9 @@ class HTMLPurifier_HTMLModuleManager
if ($config->get('HTML.Nofollow')) {
$modules[] = 'Nofollow';
}
if ($config->get('HTML.TargetBlank')) {
$modules[] = 'TargetBlank';
}
// merge in custom modules
$modules = array_merge($modules, $this->userModules);
@@ -364,6 +369,13 @@ class HTMLPurifier_HTMLModuleManager
// :TODO:
// non-standalone definitions that don't have a standalone
// to merge into could be deferred to the end
// HOWEVER, it is perfectly valid for a non-standalone
// definition to lack a standalone definition, even
// after all processing: this allows us to safely
// specify extra attributes for elements that may not be
// enabled all in one place. In particular, this might
// be the case for trusted elements. WARNING: care must
// be taken that the /extra/ definitions are all safe.
continue;
}

View File

@@ -1,139 +0,0 @@
<?php
/**
* Proof-of-concept lexer that uses the PEAR package XML_HTMLSax3 to parse HTML.
*
* PEAR, not suprisingly, also has a SAX parser for HTML. I don't know
* very much about implementation, but it's fairly well written. However, that
* abstraction comes at a price: performance. You need to have it installed,
* and if the API changes, it might break our adapter. Not sure whether or not
* it's UTF-8 aware, but it has some entity parsing trouble (in all areas,
* text and attributes).
*
* Quite personally, I don't recommend using the PEAR class, and the defaults
* don't use it. The unit tests do perform the tests on the SAX parser too, but
* whatever it does for poorly formed HTML is up to it.
*
* @todo Generalize so that XML_HTMLSax is also supported.
*
* @warning Entity-resolution inside attributes is broken.
*/
class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
{
/**
* Internal accumulator array for SAX parsers.
*/
protected $tokens = array();
protected $last_token_was_empty;
private $parent_handler;
private $stack = array();
public function tokenizeHTML($string, $config, $context) {
$this->tokens = array();
$this->last_token_was_empty = false;
$string = $this->normalize($string, $config, $context);
$this->parent_handler = set_error_handler(array($this, 'muteStrictErrorHandler'));
$parser = new XML_HTMLSax3();
$parser->set_object($this);
$parser->set_element_handler('openHandler','closeHandler');
$parser->set_data_handler('dataHandler');
$parser->set_escape_handler('escapeHandler');
// doesn't seem to work correctly for attributes
$parser->set_option('XML_OPTION_ENTITIES_PARSED', 1);
$parser->parse($string);
restore_error_handler();
return $this->tokens;
}
/**
* Open tag event handler, interface is defined by PEAR package.
*/
public function openHandler(&$parser, $name, $attrs, $closed) {
// entities are not resolved in attrs
foreach ($attrs as $key => $attr) {
$attrs[$key] = $this->parseData($attr);
}
if ($closed) {
$this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs);
$this->last_token_was_empty = true;
} else {
$this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs);
}
$this->stack[] = $name;
return true;
}
/**
* Close tag event handler, interface is defined by PEAR package.
*/
public function closeHandler(&$parser, $name) {
// HTMLSax3 seems to always send empty tags an extra close tag
// check and ignore if you see it:
// [TESTME] to make sure it doesn't overreach
if ($this->last_token_was_empty) {
$this->last_token_was_empty = false;
return true;
}
$this->tokens[] = new HTMLPurifier_Token_End($name);
if (!empty($this->stack)) array_pop($this->stack);
return true;
}
/**
* Data event handler, interface is defined by PEAR package.
*/
public function dataHandler(&$parser, $data) {
$this->last_token_was_empty = false;
$this->tokens[] = new HTMLPurifier_Token_Text($data);
return true;
}
/**
* Escaped text handler, interface is defined by PEAR package.
*/
public function escapeHandler(&$parser, $data) {
if (strpos($data, '--') === 0) {
// remove trailing and leading double-dashes
$data = substr($data, 2);
if (strlen($data) >= 2 && substr($data, -2) == "--") {
$data = substr($data, 0, -2);
}
if (isset($this->stack[sizeof($this->stack) - 1]) &&
$this->stack[sizeof($this->stack) - 1] == "style") {
$this->tokens[] = new HTMLPurifier_Token_Text($data);
} else {
$this->tokens[] = new HTMLPurifier_Token_Comment($data);
}
$this->last_token_was_empty = false;
}
// CDATA is handled elsewhere, but if it was handled here:
//if (strpos($data, '[CDATA[') === 0) {
// $this->tokens[] = new HTMLPurifier_Token_Text(
// substr($data, 7, strlen($data) - 9) );
//}
return true;
}
/**
* An error handler that mutes strict errors
*/
public function muteStrictErrorHandler($errno, $errstr, $errfile=null, $errline=null, $errcontext=null) {
if ($errno == E_STRICT) return;
return call_user_func($this->parent_handler, $errno, $errstr, $errfile, $errline, $errcontext);
}
}
// vim: et sw=4 sts=4

View File

@@ -11,8 +11,6 @@ abstract class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy
*/
protected $strategies = array();
abstract public function __construct();
public function execute($tokens, $config, $context) {
foreach ($this->strategies as $strategy) {
$tokens = $strategy->execute($tokens, $config, $context);

View File

@@ -21,6 +21,9 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
// currently only used to determine if comments should be kept
$trusted = $config->get('HTML.Trusted');
$comment_lookup = $config->get('HTML.AllowedComments');
$comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
$check_comments = $comment_lookup !== array() || $comment_regexp !== null;
$remove_script_contents = $config->get('Core.RemoveScriptContents');
$hidden_elements = $config->get('Core.HiddenElements');
@@ -128,22 +131,36 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
if ($textify_comments !== false) {
$data = $token->data;
$token = new HTMLPurifier_Token_Text($data);
} elseif ($trusted) {
// keep, but perform comment cleaning
} elseif ($trusted || $check_comments) {
// always cleanup comments
$trailing_hyphen = false;
if ($e) {
// perform check whether or not there's a trailing hyphen
if (substr($token->data, -1) == '-') {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
$trailing_hyphen = true;
}
}
$token->data = rtrim($token->data, '-');
$found_double_hyphen = false;
while (strpos($token->data, '--') !== false) {
if ($e && !$found_double_hyphen) {
$found_double_hyphen = true;
$token->data = str_replace('--', '-', $token->data);
}
if ($trusted || !empty($comment_lookup[trim($token->data)]) || ($comment_regexp !== NULL && preg_match($comment_regexp, trim($token->data)))) {
// OK good
if ($e) {
if ($trailing_hyphen) {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
}
if ($found_double_hyphen) {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
}
$found_double_hyphen = true; // prevent double-erroring
$token->data = str_replace('--', '-', $token->data);
}
} else {
if ($e) {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
}
continue;
}
} else {
// strip comments

View File

@@ -40,7 +40,7 @@ class HTMLPurifier_URI
} else {
// no scheme: retrieve the default one
$def = $config->getDefinition('URI');
$scheme_obj = $registry->getScheme($def->defaultScheme, $config, $context);
$scheme_obj = $def->getDefaultScheme($config, $context);
if (!$scheme_obj) {
// something funky happened to the default scheme object
trigger_error(
@@ -199,6 +199,44 @@ class HTMLPurifier_URI
return $result;
}
/**
* Returns true if this URL might be considered a 'local' URL given
* the current context. This is true when the host is null, or
* when it matches the host supplied to the configuration.
*
* Note that this does not do any scheme checking, so it is mostly
* only appropriate for metadata that doesn't care about protocol
* security. isBenign is probably what you actually want.
*/
public function isLocal($config, $context) {
if ($this->host === null) return true;
$uri_def = $config->getDefinition('URI');
if ($uri_def->host === $this->host) return true;
return false;
}
/**
* Returns true if this URL should be considered a 'benign' URL,
* that is:
*
* - It is a local URL (isLocal), and
* - It has a equal or better level of security
*/
public function isBenign($config, $context) {
if (!$this->isLocal($config, $context)) return false;
$scheme_obj = $this->getSchemeObj($config, $context);
if (!$scheme_obj) return false; // conservative approach
$current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);
if ($current_scheme_obj->secure) {
if (!$scheme_obj->secure) {
return false;
}
}
return true;
}
}
// vim: et sw=4 sts=4

View File

@@ -27,6 +27,7 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
$this->registerFilter(new HTMLPurifier_URIFilter_DisableExternal());
$this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources());
$this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist());
$this->registerFilter(new HTMLPurifier_URIFilter_SafeIframe());
$this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute());
$this->registerFilter(new HTMLPurifier_URIFilter_Munge());
}
@@ -52,11 +53,15 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
protected function setupFilters($config) {
foreach ($this->registeredFilters as $name => $filter) {
if ($filter->always_load) {
$this->addFilter($filter, $config);
} else {
$conf = $config->get('URI.' . $name);
if ($conf !== false && $conf !== null) {
$this->addFilter($filter, $config);
}
}
}
unset($this->registeredFilters);
}
@@ -72,6 +77,10 @@ class HTMLPurifier_URIDefinition extends HTMLPurifier_Definition
if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI.DefaultScheme');
}
public function getDefaultScheme($config, $context) {
return HTMLPurifier_URISchemeRegistry::instance()->getScheme($this->defaultScheme, $config, $context);
}
public function filter(&$uri, $config, $context) {
foreach ($this->filters as $name => $f) {
$result = $f->filter($uri, $config, $context);

View File

@@ -4,7 +4,21 @@
* Chainable filters for custom URI processing.
*
* These filters can perform custom actions on a URI filter object,
* including transformation or blacklisting.
* including transformation or blacklisting. A filter named Foo
* must have a corresponding configuration directive %URI.Foo,
* unless always_load is specified to be true.
*
* The following contexts may be available while URIFilters are being
* processed:
*
* - EmbeddedURI: true if URI is an embedded resource that will
* be loaded automatically on page load
* - CurrentToken: a reference to the token that is currently
* being processed
* - CurrentAttr: the name of the attribute that is currently being
* processed
* - CurrentCSSProperty: the name of the CSS property that is
* currently being processed (if applicable)
*
* @warning This filter is called before scheme object validation occurs.
* Make sure, if you require a specific scheme object, you
@@ -25,7 +39,15 @@ abstract class HTMLPurifier_URIFilter
public $post = false;
/**
* Performs initialization for the filter
* True if this filter should always be loaded (this permits
* a filter to be named Foo without the corresponding %URI.Foo
* directive existing.)
*/
public $always_load = false;
/**
* Performs initialization for the filter. If the filter returns
* false, this means that it shouldn't be considered active.
*/
public function prepare($config) {return true;}

View File

@@ -1,5 +1,9 @@
<?php
// It's not clear to me whether or not Punycode means that hostnames
// do not have canonical forms anymore. As far as I can tell, it's
// not a problem (punycoding should be identity when no Unicode
// points are involved), but I'm not 100% sure
class HTMLPurifier_URIFilter_HostBlacklist extends HTMLPurifier_URIFilter
{
public $name = 'HostBlacklist';

View File

@@ -20,13 +20,8 @@ class HTMLPurifier_URIFilter_Munge extends HTMLPurifier_URIFilter
$scheme_obj = $uri->getSchemeObj($config, $context);
if (!$scheme_obj) return true; // ignore unknown schemes, maybe another postfilter did it
if (is_null($uri->host) || empty($scheme_obj->browsable)) {
return true;
}
// don't redirect if target host is our host
if ($uri->host === $config->getDefinition('URI')->host) {
return true;
}
if (!$scheme_obj->browsable) return true; // ignore non-browseable schemes, since we can't munge those in a reasonable way
if ($uri->isBenign($config, $context)) return true; // don't redirect if a benign URL
$this->makeReplace($uri, $config, $context);
$this->replace = array_map('rawurlencode', $this->replace);

View File

@@ -0,0 +1,35 @@
<?php
/**
* Implements safety checks for safe iframes.
*
* @warning This filter is *critical* for ensuring that %HTML.SafeIframe
* works safely.
*/
class HTMLPurifier_URIFilter_SafeIframe extends HTMLPurifier_URIFilter
{
public $name = 'SafeIframe';
public $always_load = true;
protected $regexp = NULL;
// XXX: The not so good bit about how this is all setup now is we
// can't check HTML.SafeIframe in the 'prepare' step: we have to
// defer till the actual filtering.
public function prepare($config) {
$this->regexp = $config->get('URI.SafeIframeRegexp');
return true;
}
public function filter(&$uri, $config, $context) {
// check if filter not applicable
if (!$config->get('HTML.SafeIframe')) return true;
// check if the filter should actually trigger
if (!$context->get('EmbeddedURI', true)) return true;
$token = $context->get('CurrentToken', true);
if (!($token && $token->name == 'iframe')) return true;
// check if we actually have some whitelists enabled
if ($this->regexp === null) return false;
// actually check the whitelists
return preg_match($this->regexp, $uri->toString());
}
}
// vim: et sw=4 sts=4

View File

@@ -19,6 +19,12 @@ abstract class HTMLPurifier_URIScheme
*/
public $browsable = false;
/**
* Whether or not data transmitted over this scheme is encrypted.
* https is secure, http is not.
*/
public $secure = false;
/**
* Whether or not the URI always uses <hier_part>, resolves edge cases
* with making relative URIs absolute

View File

@@ -6,6 +6,7 @@
class HTMLPurifier_URIScheme_https extends HTMLPurifier_URIScheme_http {
public $default_port = 443;
public $secure = true;
}

View File

@@ -145,7 +145,6 @@ make_dir_standalone('HTMLPurifier/Filter');
make_dir_standalone('HTMLPurifier/Printer');
make_file_standalone('HTMLPurifier/Printer.php');
make_file_standalone('HTMLPurifier/Lexer/PH5P.php');
make_file_standalone('HTMLPurifier/Lexer/PEARSax3.php');
echo ' done!' . PHP_EOL;

View File

@@ -69,4 +69,8 @@ $phpv = false;
// to true (or, if it's not in the include path, to its install directory).
$GLOBALS['HTMLPurifierTest']['PEAR'] = false;
// If PEAR is enabled, what PEAR tests should be run? (Note: you will
// need to ensure these libraries are installed)
$GLOBALS['HTMLPurifierTest']['Net_IDNA2'] = true;
// vim: et sw=4 sts=4

View File

@@ -23,11 +23,11 @@ class CliTestCase
public function getLabel() {
return $this->_command;
}
public function run(&$reporter) {
public function run($reporter) {
if (!$this->_quiet) $reporter->paintFormattedMessage('Running ['.$this->_command.']');
return $this->_invokeCommand($this->_command, $reporter);
}
public function _invokeCommand($command, &$reporter) {
public function _invokeCommand($command, $reporter) {
$xml = shell_exec($command);
if (! $xml) {
if (!$this->_quiet) {
@@ -35,7 +35,7 @@ class CliTestCase
}
return false;
}
$parser = &$this->_createParser($reporter);
$parser = $this->_createParser($reporter);
set_error_handler(array($this, '_errorHandler'));
$status = $parser->parse($xml);
@@ -59,7 +59,7 @@ class CliTestCase
}
return true;
}
public function &_createParser(&$reporter) {
public function _createParser($reporter) {
$parser = new SimpleTestXmlParser($reporter);
return $parser;
}

View File

@@ -35,6 +35,17 @@ class HTMLPurifier_AttrDef_URI_HostTest extends HTMLPurifier_AttrDefHarness
$this->assertDef('f1.top');
$this->assertDef('f-.top', false);
$this->assertDef("\xE4\xB8\xAD\xE6\x96\x87.com.cn", false);
}
function testIDNA() {
if (!$GLOBALS['HTMLPurifierTest']['Net_IDNA2']) {
return false;
}
$this->config->set('Core.EnableIDNA', true);
$this->assertDef("\xE4\xB8\xAD\xE6\x96\x87.com.cn", "xn--fiq228c.com.cn");
$this->assertDef("\xe2\x80\x85.com", false); // rejected
}
}

View File

@@ -0,0 +1,50 @@
<?php
class HTMLPurifier_ChildDef_ListTest extends HTMLPurifier_ChildDefHarness
{
function setUp() {
parent::setUp();
$this->obj = new HTMLPurifier_ChildDef_List();
}
function testEmptyInput() {
$this->assertResult('', false);
}
function testSingleLi() {
$this->assertResult('<li />');
}
function testSomeLi() {
$this->assertResult('<li>asdf</li><li />');
}
function testIllegal() {
// XXX actually this never gets triggered in practice
$this->assertResult('<li /><b />', '<li /><li><b /></li>');
}
function testOlAtBeginning() {
$this->assertResult('<ol />', '<li><ol /></li>');
}
function testOlAtBeginningWithOtherJunk() {
$this->assertResult('<ol /><li />', '<li><ol /></li><li />');
}
function testOlInMiddle() {
$this->assertResult('<li>Foo</li><ol><li>Bar</li></ol>', '<li>Foo<ol><li>Bar</li></ol></li>');
}
function testMultipleOl() {
$this->assertResult('<li /><ol /><ol />', '<li><ol /><ol /></li>');
}
function testUlAtBeginning() {
$this->assertResult('<ul />', '<li><ul /></li>');
}
}
// vim: et sw=4 sts=4

View File

@@ -28,7 +28,21 @@ class HTMLPurifier_ChildDef_TableTest extends HTMLPurifier_ChildDefHarness
function testReorderContents() {
$this->assertResult(
'<col /><colgroup /><tbody /><tfoot /><thead /><tr>1</tr><caption /><tr />',
'<caption /><col /><colgroup /><thead /><tfoot /><tbody /><tr>1</tr><tr />');
'<caption /><col /><colgroup /><thead /><tfoot /><tbody /><tbody><tr>1</tr><tr /></tbody>');
}
function testXhtml11Illegal() {
$this->assertResult(
'<thead><tr><th>a</th></tr></thead><tr><td>a</td></tr>',
'<thead><tr><th>a</th></tr></thead><tbody><tr><td>a</td></tr></tbody>'
);
}
function testTrOverflowAndClose() {
$this->assertResult(
'<tr><td>a</td></tr><tr><td>b</td></tr><tbody><tr><td>c</td></tr></tbody><tr><td>d</td></tr>',
'<tbody><tr><td>a</td></tr><tr><td>b</td></tr></tbody><tbody><tr><td>c</td></tr></tbody><tbody><tr><td>d</td></tr></tbody>'
);
}
function testDuplicateProcessing() {

View File

@@ -220,7 +220,7 @@ class HTMLPurifier_ConfigTest extends HTMLPurifier_Harness
$def = $config->getHTMLDefinition();
$def2 = $config->getHTMLDefinition();
$this->assertIsA($def, 'HTMLPurifier_HTMLDefinition');
$this->assertSame($def, $def2);
$this->assertTrue($def === $def2);
$this->assertTrue($def->setup);
$old_def = clone $def2;
@@ -228,7 +228,7 @@ class HTMLPurifier_ConfigTest extends HTMLPurifier_Harness
$config->set('HTML.Doctype', 'HTML 4.01 Transitional');
$def = $config->getHTMLDefinition();
$this->assertIsA($def, 'HTMLPurifier_HTMLDefinition');
$this->assertNotEqual($def, $old_def);
$this->assertTrue($def !== $old_def);
$this->assertTrue($def->setup);
}
@@ -491,7 +491,7 @@ class HTMLPurifier_ConfigTest extends HTMLPurifier_Harness
$config = HTMLPurifier_Config::createDefault();
$config->set('HTML.Allowed', 'a');
$config2 = unserialize($config->serialize());
$this->assertIdentical($config, $config2);
$this->assertIdentical($config->get('HTML.Allowed'), $config2->get('HTML.Allowed'));
}
function testDefinitionCachingNothing() {

View File

@@ -4,6 +4,11 @@ class HTMLPurifier_DefinitionCache_SerializerTest extends HTMLPurifier_Definitio
{
function test() {
// XXX SimpleTest does some really crazy stuff in the background
// to do equality checks. Unfortunately, this makes some
// versions of PHP segfault. So we need to define a better,
// homebrew notion of equality and use that instead. For now,
// the identical asserts are commented out.
$cache = new HTMLPurifier_DefinitionCache_Serializer('Test');
@@ -30,27 +35,27 @@ class HTMLPurifier_DefinitionCache_SerializerTest extends HTMLPurifier_Definitio
$this->assertIdentical(realpath($rel_file), realpath($file_generated));
$def_1 = $cache->get($config);
$this->assertIdentical($def_original, $def_1);
// $this->assertIdentical($def_original, $def_1);
$def_original->info_random = 'changed';
$cache->set($def_original, $config);
$def_2 = $cache->get($config);
$this->assertIdentical($def_original, $def_2);
$this->assertNotEqual ($def_original, $def_1);
// $this->assertIdentical($def_original, $def_2);
// $this->assertNotEqual ($def_original, $def_1);
$def_original->info_random = 'did it change?';
$this->assertFalse($cache->add($def_original, $config));
$def_3 = $cache->get($config);
$this->assertNotEqual ($def_original, $def_3); // did not change!
$this->assertIdentical($def_3, $def_2);
// $this->assertNotEqual ($def_original, $def_3); // did not change!
// $this->assertIdentical($def_3, $def_2);
$cache->replace($def_original, $config);
$def_4 = $cache->get($config);
$this->assertIdentical($def_original, $def_4);
// $this->assertIdentical($def_original, $def_4);
$cache->remove($config);
$this->assertFileNotExist($file);

View File

@@ -123,7 +123,15 @@ class HTMLPurifier_EncoderTest extends HTMLPurifier_Harness
HTMLPurifier_Encoder::convertFromUTF8($this->getZhongWen(), $this->config, $this->context),
"&#20013;&#25991; (Chinese)"
);
}
function test_convertFromUTF8_withProtectionButUtf8() {
// Preserve the characters!
$this->config->set('Core.EscapeNonASCIICharacters', true);
$this->assertIdentical(
HTMLPurifier_Encoder::convertFromUTF8($this->getZhongWen(), $this->config, $this->context),
"&#20013;&#25991; (Chinese)"
);
}
function test_convertToASCIIDumbLossless() {
@@ -181,6 +189,27 @@ class HTMLPurifier_EncoderTest extends HTMLPurifier_Harness
);
}
function testIconvTruncateBug() {
if (!function_exists('iconv')) return;
if (HTMLPurifier_Encoder::testIconvTruncateBug() !== HTMLPurifier_Encoder::ICONV_TRUNCATES) return;
$this->config->set('Core.Encoding', 'ISO-8859-1');
$this->assertIdentical(
HTMLPurifier_Encoder::convertFromUTF8("\xE4\xB8\xAD" . str_repeat('a', 10000), $this->config, $this->context),
str_repeat('a', 10000)
);
}
function testIconvChunking() {
if (!function_exists('iconv')) return;
if (HTMLPurifier_Encoder::testIconvTruncateBug() !== HTMLPurifier_Encoder::ICONV_TRUNCATES) return;
$this->assertIdentical(HTMLPurifier_Encoder::iconv('utf-8', 'iso-8859-1//IGNORE', "a\xF3\xA0\x80\xA0b", 4), 'ab');
$this->assertIdentical(HTMLPurifier_Encoder::iconv('utf-8', 'iso-8859-1//IGNORE', "aa\xE4\xB8\xADb", 4), 'aab');
$this->assertIdentical(HTMLPurifier_Encoder::iconv('utf-8', 'iso-8859-1//IGNORE', "aaa\xCE\xB1b", 4), 'aaab');
$this->assertIdentical(HTMLPurifier_Encoder::iconv('utf-8', 'iso-8859-1//IGNORE', "aaaa\xF3\xA0\x80\xA0b", 4), 'aaaab');
$this->assertIdentical(HTMLPurifier_Encoder::iconv('utf-8', 'iso-8859-1//IGNORE', "aaaa\xE4\xB8\xADb", 4), 'aaaab');
$this->assertIdentical(HTMLPurifier_Encoder::iconv('utf-8', 'iso-8859-1//IGNORE', "aaaa\xCE\xB1b", 4), 'aaaab');
}
}
// vim: et sw=4 sts=4

View File

@@ -10,7 +10,7 @@ class HTMLPurifier_Filter_ExtractStyleBlocksTest extends HTMLPurifier_Harness
function test_tokenizeHTML_extractStyleBlocks() {
$this->config->set('Filter.ExtractStyleBlocks', true);
$purifier = new HTMLPurifier($this->config);
$result = $purifier->purify('<style type="text/css">.foo {text-align:center;bogus:remove-me;}</style>Test<style>* {font-size:12pt;}</style>');
$result = $purifier->purify('<style type="text/css">.foo {text-align:center;bogus:remove-me;} body.class[foo="attr"] {text-align:right;}</style>Test<style>* {font-size:12pt;}</style>');
$this->assertIdentical($result, 'Test');
$this->assertIdentical($purifier->context->get('StyleBlocks'),
array(
@@ -153,7 +153,7 @@ class HTMLPurifier_Filter_ExtractStyleBlocksTest extends HTMLPurifier_Harness
$this->config->set('Filter.ExtractStyleBlocks.Scope', '#foo, .bar');
$this->assertCleanCSS(
"p, div {\ntext-indent:1em;\n}",
"#foo p, #foo div, .bar p, .bar div {\ntext-indent:1em;\n}"
"#foo p, .bar p, #foo div, .bar div {\ntext-indent:1em;\n}"
);
}
@@ -191,6 +191,41 @@ text-align:right;
);
}
function test_atSelector() {
$this->assertCleanCSS(
"{
b { text-align: center; }
}",
""
);
}
function test_selectorValidation() {
$this->assertCleanCSS(
"&, & {
text-align: center;
}",
""
);
$this->assertCleanCSS(
"&, b {
text-align:center;
}",
"b {
text-align:center;
}"
);
$this->assertCleanCSS(
"& a #foo:hover.bar +b > i {
text-align:center;
}",
"a #foo:hover.bar + b \\3E i {
text-align:center;
}"
);
$this->assertCleanCSS("doesnt-exist { text-align:center }", "");
}
}
// vim: et sw=4 sts=4

View File

@@ -7,10 +7,10 @@ class HTMLPurifier_HTMLModule_FormsTest extends HTMLPurifier_HTMLModuleHarness
parent::setUp();
$this->config->set('HTML.Trusted', true);
$this->config->set('Attr.EnableID', true);
$this->config->set('Cache.DefinitionImpl', null);
}
function testBasicUse() {
$this->config->set('HTML.Doctype', 'HTML 4.01 Strict');
$this->assertResult( // need support for label for later
'
<form action="http://somesite.com/prog/adduser" method="post">
@@ -30,6 +30,7 @@ class HTMLPurifier_HTMLModule_FormsTest extends HTMLPurifier_HTMLModuleHarness
}
function testSelectOption() {
$this->config->set('HTML.Doctype', 'HTML 4.01 Strict');
$this->assertResult('
<form action="http://somesite.com/prog/component-select" method="post">
<p>
@@ -49,6 +50,7 @@ class HTMLPurifier_HTMLModule_FormsTest extends HTMLPurifier_HTMLModuleHarness
}
function testSelectOptgroup() {
$this->config->set('HTML.Doctype', 'HTML 4.01 Strict');
$this->assertResult('
<form action="http://somesite.com/prog/someprog" method="post">
<p>
@@ -74,6 +76,7 @@ class HTMLPurifier_HTMLModule_FormsTest extends HTMLPurifier_HTMLModuleHarness
}
function testTextarea() {
$this->config->set('HTML.Doctype', 'HTML 4.01 Strict');
$this->assertResult('
<form action="http://somesite.com/prog/text-read" method="post">
<p>
@@ -90,6 +93,7 @@ class HTMLPurifier_HTMLModule_FormsTest extends HTMLPurifier_HTMLModuleHarness
// label tests omitted
function testFieldset() {
$this->config->set('HTML.Doctype', 'HTML 4.01 Strict');
$this->assertResult('
<form action="..." method="post">
<fieldset>
@@ -122,17 +126,30 @@ class HTMLPurifier_HTMLModule_FormsTest extends HTMLPurifier_HTMLModuleHarness
}
function testInputTransform() {
$this->config->set('HTML.Doctype', 'XHTML 1.0 Strict');
$this->assertResult('<input type="checkbox" />', '<input type="checkbox" value="" />');
}
function testTextareaTransform() {
$this->config->set('HTML.Doctype', 'HTML 4.01 Strict');
$this->assertResult('<textarea></textarea>', '<textarea cols="22" rows="3"></textarea>');
}
function testTextInFieldset() {
$this->config->set('HTML.Doctype', 'HTML 4.01 Strict');
$this->assertResult('<fieldset> <legend></legend>foo</fieldset>');
}
function testStrict() {
$this->config->set('HTML.Doctype', 'HTML 4.01 Strict');
$this->assertResult('<form action=""></form>', '');
}
function testLegacy() {
$this->assertResult('<form action=""></form>');
$this->assertResult('<form action=""><input align="left" /></form>');
}
}
// vim: et sw=4 sts=4

View File

@@ -15,6 +15,12 @@ class HTMLPurifier_HTMLModule_NofollowTest extends HTMLPurifier_HTMLModuleHarnes
);
}
function testNofollowDupe() {
$this->assertResult(
'<a href="http://google.com" rel="nofollow">a</a><a href="/local">b</a><a href="mailto:foo@example.com">c</a>'
);
}
}
// vim: et sw=4 sts=4

View File

@@ -0,0 +1,20 @@
<?php
class HTMLPurifier_HTMLModule_TargetBlankTest extends HTMLPurifier_HTMLModuleHarness
{
function setUp() {
parent::setUp();
$this->config->set('HTML.TargetBlank', true);
}
function testTargetBlank() {
$this->assertResult(
'<a href="http://google.com">a</a><a href="/local">b</a><a href="mailto:foo@example.com">c</a>',
'<a href="http://google.com" target="blank">a</a><a href="/local">b</a><a href="mailto:foo@example.com">c</a>'
);
}
}
// vim: et sw=4 sts=4

View File

@@ -0,0 +1,5 @@
--HTML--
<ul><li>Sublist 1</li><ul><li>Bullet</li></ul></ul>
--EXPECT--
<ul><li>Sublist 1<ul><li>Bullet</li></ul></li></ul>
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,8 @@
--INI--
HTML.SafeIframe = true
URI.SafeIframeRegexp = "%^http://maps.google.com/%"
--HTML--
<iframe width="425" height="350" frameborder="0" scrolling="no" marginheight="0" marginwidth="0" src="http://maps.google.com/?ie=UTF8&amp;ll=37.0625,-95.677068&amp;spn=24.455808,37.353516&amp;z=4&amp;output=embed"></iframe>
--EXPECT--
<iframe width="425" height="350" frameborder="0" scrolling="no" marginheight="0" marginwidth="0" src="http://maps.google.com/?ie=UTF8&amp;ll=37.0625,-95.677068&amp;spn=24.455808,37.353516&amp;z=4&amp;output=embed"></iframe>
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,7 @@
--INI--
HTML.SafeIframe = true
--HTML--
<iframe src="http://www.example.com/"></iframe>
--EXPECT--
<iframe></iframe>
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,8 @@
--INI--
HTML.SafeIframe = true
URI.SafeIframeRegexp = "%^http://www.youtube.com/embed/%"
--HTML--
<iframe title="YouTube video player" width="480" height="390" src="http://www.youtube.com/embed/RVtEQxH7PWA" frameborder="0" allowfullscreen></iframe>
--EXPECT--
<iframe title="YouTube video player" width="480" height="390" src="http://www.youtube.com/embed/RVtEQxH7PWA" frameborder="0"></iframe>
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,14 @@
--INI--
HTML.SafeIframe = true
URI.SafeIframeRegexp = "%(^http://www.example.com/|^https?://dev.example.com/)%"
--HTML--
<iframe src="http://www.example.com/"></iframe>
<iframe src="http://malicious.host.com/?http://www.example.com/"></iframe>
<iframe src="http://dev.example.com/"></iframe>
<iframe src="https://dev.example.com/"></iframe>
--EXPECT--
<iframe src="http://www.example.com/"></iframe>
<iframe></iframe>
<iframe src="http://dev.example.com/"></iframe>
<iframe src="https://dev.example.com/"></iframe>
--# vim: et sw=4 sts=4

View File

@@ -7,13 +7,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
public function __construct() {
parent::__construct();
if ($GLOBALS['HTMLPurifierTest']['PEAR'] &&
// PEARSax3 is not maintained and throws loads of DEPRECATED
// errors in PHP 5.3
version_compare(PHP_VERSION, '5.3', '<')) {
require_once 'HTMLPurifier/Lexer/PEARSax3.php';
$this->_has_pear = true;
}
if ($GLOBALS['HTMLPurifierTest']['PH5P']) {
require_once 'HTMLPurifier/Lexer/PH5P.php';
}
@@ -158,7 +151,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
function assertTokenization($input, $expect, $alt_expect = array()) {
$lexers = array();
$lexers['DirectLex'] = new HTMLPurifier_Lexer_DirectLex();
if ($this->_has_pear) $lexers['PEARSax3'] = new HTMLPurifier_Lexer_PEARSax3();
if (class_exists('DOMDocument')) {
$lexers['DOMLex'] = new HTMLPurifier_Lexer_DOMLex();
$lexers['PH5P'] = new HTMLPurifier_Lexer_PH5P();
@@ -299,7 +291,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
array(
// I like our behavior better, but it's non-standard
'DOMLex' => array( new HTMLPurifier_Token_Empty('a', array('href'=>'')) ),
'PEARSax3' => array( new HTMLPurifier_Token_Start('a', array('href'=>'')) ),
'PH5P' => false, // total barfing, grabs scaffolding too
)
);
@@ -313,12 +304,11 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
),
array(
// some parsers will separate entities out
'PEARSax3' => $split = array(
'PH5P' => array(
new HTMLPurifier_Token_Text('<'),
new HTMLPurifier_Token_Text('b'),
new HTMLPurifier_Token_Text('>'),
),
'PH5P' => $split,
)
);
}
@@ -329,10 +319,9 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
array( new HTMLPurifier_Token_Empty('a') ),
array(
// we barf on this input
'DirectLex' => $tokens = array(
'DirectLex' => array(
new HTMLPurifier_Token_Start('a', array('"' => ''))
),
'PEARSax3' => $tokens,
'PH5P' => false, // behavior varies; handle this personally
)
);
@@ -363,10 +352,7 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
function test_tokenizeHTML_escapedQuote() {
$this->assertTokenization(
'&quot;',
array( new HTMLPurifier_Token_Text('"') ),
array(
'PEARSax3' => false, // PEAR barfs on this
)
array( new HTMLPurifier_Token_Text('"') )
);
}
@@ -375,8 +361,7 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
'<![CDATA[You <b>can&#39;t</b> get me!]]>',
array( new HTMLPurifier_Token_Text('You <b>can&#39;t</b> get me!') ),
array(
// PEAR splits up all of the CDATA
'PEARSax3' => $split = array(
'PH5P' => array(
new HTMLPurifier_Token_Text('You '),
new HTMLPurifier_Token_Text('<'),
new HTMLPurifier_Token_Text('b'),
@@ -389,7 +374,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
new HTMLPurifier_Token_Text('>'),
new HTMLPurifier_Token_Text(' get me!'),
),
'PH5P' => $split,
)
);
}
@@ -406,11 +390,10 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
'<![CDATA[&rarr;]]>',
array( new HTMLPurifier_Token_Text("&rarr;") ),
array(
'PEARSax3' => $split = array(
'PH5P' => array(
new HTMLPurifier_Token_Text('&'),
new HTMLPurifier_Token_Text('rarr;'),
),
'PH5P' => $split,
)
);
}
@@ -457,7 +440,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
new HTMLPurifier_Token_Text('Whoa! <3 That\'s not good >.>'),
new HTMLPurifier_Token_End('b'),
),
'PEARSax3' => false, // totally mangled
'PH5P' => array( // interesting grouping
new HTMLPurifier_Token_Start('b'),
new HTMLPurifier_Token_Text('Whoa! '),
@@ -475,9 +457,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
array(
new HTMLPurifier_Token_Comment(' This >< comment '),
new HTMLPurifier_Token_Empty('br'),
),
array(
'PEARSax3' => false,
)
);
}
@@ -488,7 +467,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
array( new HTMLPurifier_Token_Comment(' This >< comment') ),
array(
'DOMLex' => false,
'PEARSax3' => false,
'PH5P' => false,
)
);
@@ -505,7 +483,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
new HTMLPurifier_Token_End('script'),
),
array(
'PEARSax3' => false,
// PH5P, for some reason, bubbles the script to <head>
'PH5P' => false,
)
@@ -515,10 +492,7 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
function test_tokenizeHTML_entitiesInComment() {
$this->assertTokenization(
'<!-- This comment < &lt; & -->',
array( new HTMLPurifier_Token_Comment(' This comment < &lt; & ') ),
array(
'PEARSax3' => false
)
array( new HTMLPurifier_Token_Comment(' This comment < &lt; & ') )
);
}
@@ -531,8 +505,7 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
new HTMLPurifier_Token_Start('a', array('href' => '')),
new HTMLPurifier_Token_Text('<'),
new HTMLPurifier_Token_Text('">'),
),
'PEARSax3' => false,
)
)
);
}
@@ -595,7 +568,6 @@ div {}
),
array(
'DirectLex' => $alt_expect,
'PEARSax3' => $alt_expect,
)
);
}
@@ -615,11 +587,6 @@ div {}
new HTMLPurifier_Token_Text('<3'),
new HTMLPurifier_Token_Empty('br'),
),
'PEARSax3' => array(
// bah too lazy to fix this
new HTMLPurifier_Token_Empty('br'),
new HTMLPurifier_Token_Empty('3<br'),
),
)
);
}
@@ -639,12 +606,6 @@ div {}
new HTMLPurifier_Token_Text('<<'),
new HTMLPurifier_Token_End('b'),
),
'PEARSax3' => array(
// also too lazy to fix
new HTMLPurifier_Token_Start('b'),
new HTMLPurifier_Token_Empty('<<'),
new HTMLPurifier_Token_Text('b>'),
),
)
);
}
@@ -666,13 +627,6 @@ div {}
new HTMLPurifier_Token_Text('test'),
new HTMLPurifier_Token_End('b'),
),
'PEARSax3' => array(
// totally doing the wrong thing here
new HTMLPurifier_Token_Text(' '),
new HTMLPurifier_Token_Start('b'),
new HTMLPurifier_Token_Text('test'),
new HTMLPurifier_Token_End('b'),
),
)
);
}
@@ -694,7 +648,6 @@ div {}
),
array(
'PH5P' => $alt_tokens,
'PEARSax3' => $alt_tokens,
)
);
}
@@ -777,7 +730,6 @@ div {}
),
array(
'DirectLex' => $start,
'PEARSax3' => $start,
)
);
}

View File

@@ -0,0 +1,25 @@
--TEST--
HTMLPurifier.auto.php using spl_autoload_register default
--SKIPIF--
<?php
if (!function_exists('spl_autoload_register')) {
echo "skip - spl_autoload_register() not available";
}
--FILE--
<?php
spl_autoload_extensions(".php");
spl_autoload_register();
require '../library/HTMLPurifier.auto.php';
require 'HTMLPurifier/PHPT/loading/_autoload.inc';
$config = HTMLPurifier_Config::createDefault();
$purifier = new HTMLPurifier($config);
echo $purifier->purify('<b>Salsa!') . "
";
// purposely invoke standard autoload
$test = new default_load();
--EXPECT--
<b>Salsa!</b>
Default loaded

View File

@@ -35,10 +35,17 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness
$this->assertResult('<ul></ul>', '');
}
function testRemoveIllegalPCDATA() {
function testListHandleIllegalPCDATA() {
$this->assertResult(
'<ul>Illegal text<li>Legal item</li></ul>',
'<ul><li>Legal item</li></ul>'
'<ul><li>Illegal text</li><li>Legal item</li></ul>'
);
}
function testRemoveIllegalPCDATA() {
$this->assertResult(
'<table><tr>Illegal text<td></td></tr></table>',
'<table><tr><td></td></tr></table>'
);
}

View File

@@ -119,21 +119,21 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn
function testNestedOl() {
$this->assertResult(
'<ol><ol><li>foo</li></ol></ol>',
'<ol><li><ol><li>foo</li></ol></li></ol>'
'<ol><ol><li>foo</li></ol></ol>'
);
}
function testNestedUl() {
$this->assertResult(
'<ul><ul><li>foo</li></ul></ul>',
'<ul><li><ul><li>foo</li></ul></li></ul>'
'<ul><ul><li>foo</li></ul></ul>'
);
}
function testNestedOlWithStrangeEnding() {
$this->assertResult(
'<ol><li><ol><ol><li>foo</li></ol></li><li>foo</li></ol>',
'<ol><li><ol><li><ol><li>foo</li></ol></li><li>foo</li></ol></li></ol>'
'<ol><li><ol><ol><li>foo</li></ol></ol></li><li>foo</li></ol>'
);
}

View File

@@ -100,6 +100,16 @@ alert(&lt;b&gt;bold&lt;/b&gt;);
$this->assertResult('<!-- bo --- asdf--as -->', '<!-- bo - asdf-as -->');
}
function testPreserveCommentsWithLookup() {
$this->config->set('HTML.AllowedComments', array('allowed'));
$this->assertResult('<!-- allowed --><!-- not allowed -->', '<!-- allowed -->');
}
function testPreserveCommentsWithRegexp() {
$this->config->set('HTML.AllowedCommentsRegexp', '/^allowed[1-9]$/');
$this->assertResult('<!-- allowed1 --><!-- not allowed -->', '<!-- allowed1 -->');
}
}
// vim: et sw=4 sts=4

View File

@@ -48,14 +48,14 @@ class HTMLPurifier_Strategy_RemoveForeignElements_ErrorsTest extends HTMLPurifie
function testTrailingHyphenInCommentRemoved() {
$this->config->set('HTML.Trusted', true);
$this->expectErrorCollection(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
$this->expectContext('CurrentToken', new HTMLPurifier_Token_Comment(' test --', 1));
$this->expectContext('CurrentToken', new HTMLPurifier_Token_Comment(' test ', 1));
$this->invoke('<!-- test ---->');
}
function testDoubleHyphenInCommentRemoved() {
$this->config->set('HTML.Trusted', true);
$this->expectErrorCollection(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
$this->expectContext('CurrentToken', new HTMLPurifier_Token_Comment(' test --- test -- test ', 1));
$this->expectContext('CurrentToken', new HTMLPurifier_Token_Comment(' test - test - test ', 1));
$this->invoke('<!-- test --- test -- test -->');
}

View File

@@ -117,6 +117,23 @@ class HTMLPurifier_URIFilter_MungeTest extends HTMLPurifier_URIFilterHarness
$this->assertFiltering('http://example.com/foobar');
}
function testMungeIgnoreSameDomainInsecureToSecure() {
$this->setMunge('http://example.com/%s');
$this->assertFiltering('https://example.com/foobar');
}
function testMungeIgnoreSameDomainSecureToSecure() {
$this->config->set('URI.Base', 'https://example.com');
$this->setMunge('http://example.com/%s');
$this->assertFiltering('https://example.com/foobar');
}
function testMungeSameDomainSecureToInsecure() {
$this->config->set('URI.Base', 'https://example.com');
$this->setMunge('/%s');
$this->assertFiltering('http://example.com/foobar', '/http%3A%2F%2Fexample.com%2Ffoobar');
}
function testMungeIgnoresSourceHost() {
$this->config->set('URI.Host', 'foo.example.com');
$this->setMunge('http://example.com/%s');

3
tests/default_load.php Normal file
View File

@@ -0,0 +1,3 @@
<?php
class default_load { }
echo "Default loaded\n";

View File

@@ -107,6 +107,14 @@ if ($AC['standalone']) {
require '../library/HTMLPurifier.autoload.php';
require 'HTMLPurifier/Harness.php';
// immediately load external libraries, so we can bail out early if
// they're bad
if ($GLOBALS['HTMLPurifierTest']['PEAR']) {
if ($GLOBALS['HTMLPurifierTest']['Net_IDNA2']) {
require_once 'Net/IDNA2.php';
}
}
// Shell-script code is executed
if ($AC['xml']) {

View File

@@ -51,9 +51,11 @@ $AC['standalone'] = false; // convenience for --distro=standalone
$AC['only-phpt'] = false; // --type=phpt
$AC['exclude-normal'] = false; // --distro=standalone
$AC['exclude-standalone'] = false; // --distro=normal
$AC['verbose'] = false;
$aliases = array(
'f' => 'file',
'q' => 'quiet',
'v' => 'verbose',
);
htmlpurifier_parse_args($AC, $aliases);
@@ -68,7 +70,7 @@ elseif ($AC['standalone']) $AC['distro'] = 'standalone';
if ($AC['xml']) {
$reporter = new XmlReporter();
} else {
$reporter = new TextReporter();
$reporter = new HTMLPurifier_SimpleTest_TextReporter($AC);
}
// Regenerate any necessary files