1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-06 14:16:32 +02:00

Compare commits

...

50 Commits

Author SHA1 Message Date
Edward Z. Yang
882ffed9ba Release 4.2.0.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-15 02:52:57 -04:00
Edward Z. Yang
86990a21f1 Rename newline normalization directive to something better.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-15 02:50:39 -04:00
Tomasz Muras
9573f0933d Make newline normalization optional. 2010-09-14 23:49:28 -04:00
Edward Z. Yang
632bf2bbd4 Shift to 4.2.0 release cycle.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-14 23:38:51 -04:00
Edward Z. Yang
ec86598446 Add support for file:// URI scheme.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-09 00:01:26 -04:00
Edward Z. Yang
b6c3f5e89b Update TODO.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-08 23:42:05 -04:00
Edward Z. Yang
7c91104532 Implement HTML.FlashAllowFullScreen.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-08 23:39:20 -04:00
Edward Z. Yang
eac628f490 Add %CSS.ForbiddenProperties directive.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-04 02:59:03 -04:00
Edward Z. Yang
92913bc816 Add documentation about configuration directive types.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-04 02:28:53 -04:00
Edward Z. Yang
479d793562 Reword documentation to be clearer, and give warning on common user error.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-04 01:31:20 -04:00
Edward Z. Yang
e2c15f1c98 Fix Mac Snow Leopard APC bug.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-08-26 21:40:58 -07:00
Edward Z. Yang
57ced3f361 Tighten up ignore spec.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-06-30 06:00:45 -07:00
Edward Z. Yang
c04a441b3e Actually make URI.DisableResources do something.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-06-30 05:59:17 -07:00
Edward Z. Yang
1bed8b6d5f Added %Core.RemoveProcessingInstructions.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-06-20 18:26:44 -07:00
Edward Z. Yang
33afd7d9e0 Fix improper handling of IE conditional comments.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-06-18 06:08:54 -07:00
Edward Z. Yang
18e538317a Release 4.1.1.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-05-31 20:17:31 -07:00
Edward Z. Yang
96a4193fc9 Fix undefined index warnings in maintenance scripts.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-05-31 20:07:27 -07:00
Edward Z. Yang
00c66fa9cb Fix bug in parsing single attribute with entities.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-05-31 19:44:18 -07:00
Edward Z. Yang
d3abcb90e3 Rewrite CSS url() and font-family output logic.
The new logic is as follows:

* Given a URL to insert into url(), check that it is properly URL
  encoded (in particular, a doublequote and backslash never occurs
  within it) and then place it as url("http://example.com").

* Given a font name, if it is strictly alphanumeric, it is safe to omit
  quotes. Otherwise, wrap in double quotes and replace '"' with '\22 '
  (note trailing space) and '\' with '\5C ' (ditto).

We introduce expandCSSEscape() which is a hack for common parsing
idioms in CSS; this means that CSS escapes are now recognized inside
URLs as well as unquoted font names.

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-05-31 18:45:21 -07:00
Edward Z. Yang
df3100b1b3 Make test script less chatty when log_errors is on.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-05-20 21:50:44 -04:00
Edward Z. Yang
143e1ad718 Remove shebang and +x from test script.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-05-20 21:21:26 -04:00
Edward Z. Yang
875b0febde Fix infinite loop involving wrapping formedness.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-05-17 23:22:51 -04:00
Edward Z. Yang
3166b8a10f Fix bug in background-position with center keyword.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-05-05 15:08:57 -04:00
Edward Z. Yang
1a70bffd5a Emit errors when body is extracted.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-05-04 13:41:09 -04:00
Edward Z. Yang
f4c6e10ff7 Release 4.1.0.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-04-26 18:31:40 -04:00
Edward Z. Yang
c1cbd9e565 Mute STRICT errors from CSSTidy and don't run PEARSax3 on PHP 5.3.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-04-26 18:27:32 -04:00
Edward Z. Yang
da94d3d6ac Always quote the contents of url() in CSS.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-04-26 12:10:15 -04:00
Edward Z. Yang
80793e925e Remove +x bit from RemoveSpansWithoutAttributes.php
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-04-17 00:23:09 -04:00
Edward Z. Yang
8ef4fb22db Support for flashvars in HTML.SafeEmbed.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-03-30 13:33:13 -04:00
Edward Z. Yang
70a7a3f5dd Handle <ol><ol> properly by adding missing <li> tag.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-03-10 00:58:37 -05:00
Edward Z. Yang
4d612d5a77 Improve handling of malformed object parameters.
When specifying source material for <object> tags, you must use
data inside the object tag as well as specify movie in a param.
If you specify a src (which is the appropriate markup for <embed>)
we now convert and fill in the other attributes appropriately.

Also, fix a PHP warning in Generator code.

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-03-09 17:29:38 -05:00
Edward Z. Yang
63a854ee5d Remove call-time pass-by-reference.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-03-08 03:45:11 -05:00
Edward Z. Yang
0229458f8f Implement Internet Explorer compatibility code for embedded content.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-03-08 01:56:40 -05:00
Edward Z. Yang
baa477ac08 Truncate alt text from src if it's too long.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-03-08 01:22:21 -05:00
Edward Z. Yang
dc90e8e85b Support flashvars.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-03-08 01:16:57 -05:00
Edward Z. Yang
97125ed18b Implement data URI scheme.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-03-07 21:45:39 -05:00
Paul Stone
9a9036c689 Implement auto-formatter that removes empty span tags.
Signed-off-by: Paul Stone <patches@pdjs.co.uk>
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-03-07 18:59:33 -05:00
Edward Z. Yang
aea7d02dfe Support YouTube slideshow embedding.
YouTube slideshows contain a /cp/, not a /v/, in their URL;
relax the YouTube filter to allow them.

Signed-off-by: Nigel McNie <nigel@catalyst.net.nz>
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-03-07 18:57:22 -05:00
Brian DeRocher
b3ca1498c2 Add boolean value flag for PEARSax3 for testing if a token is empty.
Signed-off-by: Brian DeRocher <brian@derocher.org>
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-02-26 21:36:51 -05:00
Edward Z. Yang
ac18672aba Fix extant broken PEARSax3 parsing patterns.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-02-26 21:14:52 -05:00
Edward Z. Yang
faf28682ad Manually work around PEARSax3 E_STRICT errors.
Previously, my development environment was not running the PEARSax3
tests because my environment was set to E_STRICT error handling, and
thus the tests were skipped.  Relax this requirement by making the
wrapper class E_STRICT safe.  This introduces a few failing tests.

Also update TODO and add another fresh test.

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-02-26 20:42:42 -05:00
Edward Z. Yang
e2cd852bcf Add shebang line to tests index script.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-02-15 02:55:43 -05:00
Edward Z. Yang
694583259c Fix autoparagraph bug with non-inline elements.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-02-15 02:55:33 -05:00
Edward Z. Yang
bde4de3c78 Update TODO.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2009-08-27 20:17:41 -04:00
Edward Z. Yang
5b4e5c983e Support proprietary height attribute on table.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2009-08-27 20:17:24 -04:00
Edward Z. Yang
1ad8fd5ce9 Gracefully deal with null injectors.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2009-08-27 20:03:31 -04:00
Edward Z. Yang
6bdf161afd Update TODO.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2009-07-15 14:50:52 -04:00
Edward Z. Yang
af45a6c191 Release Phorum module 4.0.0.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2009-07-09 21:12:35 -04:00
Edward Z. Yang
2b72d0445f Add 4.1.0 release NEWS entry.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2009-07-09 21:03:46 -04:00
Edward Z. Yang
d7b3117678 Add doxygen doc scripts, and fix package.php
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2009-07-08 22:11:15 -04:00
94 changed files with 1302 additions and 289 deletions

2
.gitignore vendored
View File

@@ -18,3 +18,5 @@ docs/doxygen*
*.phpt.php
*.phpt.skip.php
*.htmlt.ini
*.patch
/*.php

View File

@@ -31,7 +31,7 @@ PROJECT_NAME = HTMLPurifier
# This could be handy for archiving the generated documentation or
# if some version control system is used.
PROJECT_NUMBER = 4.0.0
PROJECT_NUMBER = 4.2.0
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put.

2
FOCUS
View File

@@ -1,4 +1,4 @@
5 - Major feature enhancements
4 - Minor feature enhancements
[ Appendix A: Release focus IDs ]
0 - N/A

46
NEWS
View File

@@ -9,6 +9,52 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. Internal change
==========================
4.2.0, released 2010-09-15
! Added %Core.RemoveProcessingInstructions, which lets you remove
<? ... ?> statements.
! Added %URI.DisableResources functionality; the directive originally
did nothing. Thanks David Rothstein for reporting.
! Add documentation about configuration directive types.
! Add %CSS.ForbiddenProperties configuration directive.
! Add %HTML.FlashAllowFullScreen to permit embedded Flash objects
to utilize full-screen mode.
! Add optional support for the <code>file</code> URI scheme, enable
by explicitly setting %URI.AllowedSchemes.
! Add %Core.NormalizeNewlines options to allow turning off newline
normalization.
- Fix improper handling of Internet Explorer conditional comments
by parser. Thanks zmonteca for reporting.
- Fix missing attributes bug when running on Mac Snow Leopard and APC.
Thanks sidepodcast for the fix.
- Warn if an element is allowed, but an attribute it requires is
not allowed.
4.1.1, released 2010-05-31
- Fix undefined index warnings in maintenance scripts.
- Fix bug in DirectLex for parsing elements with a single attribute
with entities.
- Rewrite CSS output logic for font-family and url(). Thanks Mario
Heiderich <mario.heiderich@googlemail.com> for reporting and Takeshi
Terada <t-terada@violet.plala.or.jp> for suggesting the fix.
- Emit an error for CollectErrors if a body is extracted
- Fix bug where in background-position for center keyword handling.
- Fix infinite loop when a wrapper element is inserted in a context
where it's not allowed. Thanks Lars <lars@renoz.dk> for reporting.
- Remove +x bit and shebang from index.php; only supported mode is to
explicitly call it with php.
- Make test script less chatty when log_errors is on.
4.1.0, released 2010-04-26
! Support proprietary height attribute on table element
! Support YouTube slideshows that contain /cp/ in their URL.
! Support for data: URI scheme; not enabled by default, add it using
%URI.AllowedSchemes
! Support flashvars when using %HTML.SafeObject and %HTML.SafeEmbed.
! Support for Internet Explorer compatibility with %HTML.SafeObject
using %Output.FlashCompat.
! Handle <ol><ol> properly, by inserting the necessary <li> tag.
- Always quote the insides of url(...) in CSS.
4.0.0, released 2009-07-07
# APIs for ConfigSchema subsystem have substantially changed. See
docs/dev-config-bcbreaks.txt for details; in essence, anything that

87
TODO
View File

@@ -11,22 +11,38 @@ If no interest is expressed for a feature that may require a considerable
amount of effort to implement, it may get endlessly delayed. Do not be
afraid to cast your vote for the next feature to be implemented!
- Built-in support for target="_blank" on all external links
- Incorporate data: support as implemented here:
http://htmlpurifier.org/phorum/read.php?3,3491,3548
- Fix ImgRequired to handle data correctly
- Incorporate download and resize support as implemented here:
http://htmlpurifier.org/phorum/read.php?3,2795,3628
- Think about allowing explicit order of operations hooks for transforms
- Add "register" field to config schemas to eliminate dependence on
naming conventions
- Add examples to everything (make built-in which also automatically
gives output)
Things to do as soon as possible:
- Think about allowing explicit order of operations hooks for transforms
- Inputs don't do the right thing with submit
- Fix "<.<" bug (trailing < is removed if not EOD)
- Build in better internal state dumps and debugging tools for remote
debugging
- Allowed/Allowed* have strange interactions when both set
- Transform lone embeds into object tags
- Deprecated config options that emit warnings when you set them (with'
a way of muting the warning if you really want to)
- Make HTML.Trusted work with Output.FlashCompat
FUTURE VERSIONS
---------------
4.1 release [It's All About Trust] (floating)
4.3 release [OMG CONFIG PONIES]
! Fix Printer. It's from the old days when we didn't have decent XML classes
! Factor demo.php into a set of Printer classes, and then create a stub
file for users here (inside the actual HTML Purifier library)
- Fix error handling with form construction
- Do encoding validation in Printers, or at least, where user data comes in
- Config: Add examples to everything (make built-in which also automatically
gives output)
- Add "register" field to config schemas to eliminate dependence on
naming conventions (try to remember why we ultimately decided on tihs)
5.0 release [HTML 5]
# Swap out code to use html5lib tokenizer and tree-builder
! Allow turning off of FixNesting and required attribute insertion
5.1 release [It's All About Trust] (floating)
# Implement untrusted, dangerous elements/attributes
# Implement IDREF support (harder than it seems, since you cannot have
IDREFs to non-existent IDs)
@@ -35,36 +51,23 @@ FUTURE VERSIONS
# Frameset XHTML 1.0 and HTML 4.01 doctypes
- Figure out how to simultaneously set %CSS.Trusted and %HTML.Trusted (?)
4.2 release [Error'ed]
5.2 release [Error'ed]
# Error logging for filtering/cleanup procedures
- XSS-attempt detection--certain errors are flagged XSS-like
4.3 release [Do What I Mean, Not What I Say]
# Additional support for poorly written HTML
- Microsoft Word HTML cleaning (i.e. MsoNormal, but research essential!)
- Friendly strict handling of <address> (block -> <br>)
? Remove redundant tags, ex. <u><u>Underlined</u></u>. Implementation notes:
1. Analyzing which tags to remove duplicants
2. Ensure attributes are merged into the parent tag
3. Extend the tag exclusion system to specify whether or not the
contents should be dropped or not (currently, there's code that could do
something like this if it didn't drop the inner text too.)
- Remove <span> tags that don't do anything (no attributes)
- XSS-attempt detection--certain errors are flagged XSS-like
- Append something to duplicate IDs so they're still usable (impl. note: the
dupe detector would also need to detect the suffix as well)
- Externalize inline CSS to promote clean HTML, proposed by Sander Tekelenburg
5.0 release [Beyond HTML]
6.0 release [Beyond HTML]
# Legit token based CSS parsing (will require revamping almost every
AttrDef class). Probably will use CSSTidy class?
AttrDef class). Probably will use CSSTidy
# More control over allowed CSS properties using a modularization
# HTML 5 support
# IRI support (this includes IDN)
- Standardize token armor for all areas of processing
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
Also, enable disabling of directionality
6.0 release [To XML and Beyond]
7.0 release [To XML and Beyond]
- Extended HTML capabilities based on namespacing and tag transforms (COMPLEX)
- Hooks for adding custom processors to custom namespaced tags and
attributes, offer default implementation
@@ -75,25 +78,14 @@ Ongoing
- Refactor unit tests into lots of test methods
- Plugins for major CMSes (COMPLEX)
- phpBB
- Drupal needs loving!
- Phorum need loving!
- more! (look for ones that use WYSIWYGs)
- Also, maybe a FAQ for extension writers with HTML Purifier
- Also, a FAQ for extension writers with HTML Purifier
AutoFormat
- Smileys
- Syntax highlighting (with GeSHi) with <pre> and possibly <?php
- Look at http://drupal.org/project/Modules/category/63 for ideas
Optimizations
- Reduce size of internal data-structures (esp. HTMLDefinition)
- Get PH5P working with the latest versions of DOM, which have much more
stringent error checking procedures. Maybe convert straight to tokens.
- Get rid of set_include_path(). Save this for another major release.
Neat feature related
! Factor demo.php into a set of Printer classes, and then create a stub
file for users here (inside the actual HTML Purifier library)
! Support exporting configuration, so users can easily tweak settings
in the demo, and then copy-paste into their own setup
- Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
@@ -110,10 +102,21 @@ Neat feature related
- Full set of color keywords. Also, a way to add onto them without
finalizing the configuration object.
- Write a var_export and memcached DefinitionCache - Denis
- Built-in support for target="_blank" on all external links
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
Also, enable disabling of directionality
? Externalize inline CSS to promote clean HTML, proposed by Sander Tekelenburg
? Remove redundant tags, ex. <u><u>Underlined</u></u>. Implementation notes:
1. Analyzing which tags to remove duplicants
2. Ensure attributes are merged into the parent tag
3. Extend the tag exclusion system to specify whether or not the
contents should be dropped or not (currently, there's code that could do
something like this if it didn't drop the inner text too.)
Maintenance related (slightly boring)
# CHMOD install script for PEAR installs
! Factor out command line parser into its own class, and unit test it
- Reduce size of internal data-structures (esp. HTMLDefinition)
- Allow merging configurations. Thus,
a -> b -> default
c -> d -> default

View File

@@ -1 +1 @@
4.0.0
4.2.0

View File

@@ -1,7 +1,8 @@
HTML Purifier 4.0 is a major feature release focused on configuration
It deprecates the $config->set('Ns', 'Directive', $value) syntax for
$config->set('Ns.Directive', $value); both syntaxes work but the
former will throw errors. There are also some new features: robust
support for name/id, configuration inheritance, remove nbsp in
the RemoveEmpty autoformatter, userland configuration directives
and configuration serialization.
HTML Purifier 4.2.0 is a minor release that implements a number of
feature requests accumulated over half a year. New configuration
options include %Core.RemoveProcessingInstructions,
%CSS.ForbiddenProperties, %HTML.FlashAllowFullScreen and
%Core.NormalizeNewlines. Additionally,%URI.DisableResources is
now functional and file: is an optionally supported URI scheme.
There are also some minor bugfixes, usability improvements and
documentation updates.

View File

@@ -40,12 +40,26 @@
</xsl:apply-templates>
</ul>
</div>
<div id="typesContainer">
<h2>Types</h2>
<xsl:apply-templates select="$typeLookup" mode="types" />
</div>
<xsl:apply-templates />
</div>
</body>
</html>
</xsl:template>
<xsl:template match="type" mode="types">
<div class="type-block">
<xsl:attribute name="id">type-<xsl:value-of select="@id" /></xsl:attribute>
<h3><code><xsl:value-of select="@id" /></code>: <xsl:value-of select="@name" /></h3>
<div class="type-description">
<xsl:copy-of xmlns:xhtml="http://www.w3.org/1999/xhtml" select="xhtml:div/node()" />
</div>
</div>
</xsl:template>
<xsl:template match="title" mode="toc" />
<xsl:template match="namespace" mode="toc">
<xsl:param name="overflowNumber" />
@@ -192,10 +206,13 @@
<td>
<xsl:variable name="type" select="text()" />
<xsl:attribute name="class">type type-<xsl:value-of select="$type" /></xsl:attribute>
<xsl:value-of select="$typeLookup/type[@id=$type]/text()" />
<xsl:if test="@allow-null='yes'">
(or null)
</xsl:if>
<a>
<xsl:attribute name="href">#type-<xsl:value-of select="$type" /></xsl:attribute>
<xsl:value-of select="$typeLookup/type[@id=$type]/@name" />
<xsl:if test="@allow-null='yes'">
(or null)
</xsl:if>
</a>
</td>
</tr>
</xsl:template>

View File

@@ -1,16 +1,68 @@
<?xml version="1.0" encoding="UTF-8"?>
<types>
<type id="string">String</type>
<type id="istring">Case-insensitive string</type>
<type id="text">Text</type>
<type id="itext">Case-insensitive text</type>
<type id="int">Integer</type>
<type id="float">Float</type>
<type id="bool">Boolean</type>
<type id="lookup">Lookup array</type>
<type id="list">Array list</type>
<type id="hash">Associative array</type>
<type id="mixed">Mixed</type>
<type id="string" name="String"><div xmlns="http://www.w3.org/1999/xhtml">
A <a
href="http://docs.php.net/manual/en/language.types.string.php">sequence
of characters</a>.
</div></type>
<type id="istring" name="Case-insensitive string"><div xmlns="http://www.w3.org/1999/xhtml">
A series of case-insensitive characters. Internally, upper-case
ASCII characters will be converted to lower-case.
</div></type>
<type id="text" name="Text"><div xmlns="http://www.w3.org/1999/xhtml">
A series of characters that may contain newlines. Text tends to
indicate human-oriented text, as opposed to a machine format.
</div></type>
<type id="itext" name="Case-insensitive text"><div xmlns="http://www.w3.org/1999/xhtml">
A series of case-insensitive characters that may contain newlines.
</div></type>
<type id="int" name="Integer"><div xmlns="http://www.w3.org/1999/xhtml">
An <a
href="http://docs.php.net/manual/en/language.types.integer.php">
integer</a>. You are alternatively permitted to pass a string of
digits instead, which will be cast to an integer using
<code>(int)</code>.
</div></type>
<type id="float" name="Float"><div xmlns="http://www.w3.org/1999/xhtml">
A <a href="http://docs.php.net/manual/en/language.types.float.php">
floating point number</a>. You are alternatively permitted to
pass a numeric string (as defined by <code>is_numeric()</code>),
which will be cast to a float using <code>(float)</code>.
</div></type>
<type id="bool" name="Boolean"><div xmlns="http://www.w3.org/1999/xhtml">
A <a
href="http://docs.php.net/manual/en/language.types.boolean.php">boolean</a>.
You are alternatively permitted to pass an integer <code>0</code> or
<code>1</code> (other integers are not permitted) or a string
<code>"on"</code>, <code>"true"</code> or <code>"1"</code> for
<code>true</code>, and <code>"off"</code>, <code>"false"</code> or
<code>"0"</code> for <code>false</code>.
</div></type>
<type id="lookup" name="Lookup array"><div xmlns="http://www.w3.org/1999/xhtml">
An array whose values are <code>true</code>, e.g. <code>array('key'
=> true, 'key2' => true)</code>. You are alternatively permitted
to pass an array list of the keys <code>array('key', 'key2')</code>
or a comma-separated string of keys <code>"key, key2"</code>. If
you pass an array list of values, ensure that your values are
strictly numerically indexed: <code>array('key1', 2 =>
'key2')</code> will not do what you expect and emits a warning.
</div></type>
<type id="list" name="Array list"><div xmlns="http://www.w3.org/1999/xhtml">
An array which has consecutive integer indexes, e.g.
<code>array('val1', 'val2')</code>. You are alternatively permitted
to pass a comma-separated string of keys <code>"val1, val2"</code>.
If your array is not in this form, <code>array_values</code> is run
on the array and a warning is emitted.
</div></type>
<type id="hash" name="Associative array"><div xmlns="http://www.w3.org/1999/xhtml">
An array which is a mapping of keys to values, e.g.
<code>array('key1' => 'val1', 'key2' => 'val2')</code>. You are
alternatively permitted to pass a comma-separated string of
key-colon-value strings, e.g. <code>"key1: val1, key2: val2"</code>.
</div></type>
<type id="mixed" name="Mixed"><div xmlns="http://www.w3.org/1999/xhtml">
An arbitrary PHP value of any type.
</div></type>
</types>
<!-- vim: et sw=4 sts=4

View File

@@ -6,6 +6,7 @@
</file>
<file name="HTMLPurifier/Lexer.php">
<line>81</line>
<line>284</line>
</file>
<file name="HTMLPurifier/Lexer/DirectLex.php">
<line>53</line>
@@ -41,6 +42,11 @@
<line>275</line>
</file>
</directive>
<directive id="CSS.ForbiddenProperties">
<file name="HTMLPurifier/CSSDefinition.php">
<line>289</line>
</file>
</directive>
<directive id="Cache.DefinitionImpl">
<file name="HTMLPurifier/DefinitionCacheFactory.php">
<line>49</line>
@@ -85,22 +91,35 @@
</directive>
<directive id="Output.CommentScriptContents">
<file name="HTMLPurifier/Generator.php">
<line>45</line>
<line>56</line>
</file>
</directive>
<directive id="Output.SortAttr">
<file name="HTMLPurifier/Generator.php">
<line>46</line>
<line>57</line>
</file>
</directive>
<directive id="Output.FlashCompat">
<file name="HTMLPurifier/Generator.php">
<line>58</line>
</file>
</directive>
<directive id="Output.TidyFormat">
<file name="HTMLPurifier/Generator.php">
<line>75</line>
<line>87</line>
</file>
</directive>
<directive id="Core.NormalizeNewlines">
<file name="HTMLPurifier/Generator.php">
<line>101</line>
</file>
<file name="HTMLPurifier/Lexer.php">
<line>266</line>
</file>
</directive>
<directive id="Output.Newline">
<file name="HTMLPurifier/Generator.php">
<line>89</line>
<line>102</line>
</file>
</directive>
<directive id="HTML.BlockWrapper">
@@ -130,12 +149,12 @@
</directive>
<directive id="HTML.ForbiddenElements">
<file name="HTMLPurifier/HTMLDefinition.php">
<line>337</line>
<line>342</line>
</file>
</directive>
<directive id="HTML.ForbiddenAttributes">
<file name="HTMLPurifier/HTMLDefinition.php">
<line>338</line>
<line>343</line>
</file>
</directive>
<directive id="HTML.Trusted">
@@ -143,7 +162,7 @@
<line>202</line>
</file>
<file name="HTMLPurifier/Lexer.php">
<line>258</line>
<line>271</line>
</file>
<file name="HTMLPurifier/HTMLModule/Image.php">
<line>27</line>
@@ -205,7 +224,12 @@
</directive>
<directive id="Core.ConvertDocumentToFragment">
<file name="HTMLPurifier/Lexer.php">
<line>267</line>
<line>282</line>
</file>
</directive>
<directive id="Core.RemoveProcessingInstructions">
<file name="HTMLPurifier/Lexer.php">
<line>303</line>
</file>
</directive>
<directive id="URI.">
@@ -320,7 +344,7 @@
</directive>
<directive id="Attr.DefaultInvalidImageAlt">
<file name="HTMLPurifier/AttrTransform/ImgRequired.php">
<line>32</line>
<line>33</line>
</file>
</directive>
<directive id="HTML.Attr.Name.UseCDATA">
@@ -331,6 +355,11 @@
<line>13</line>
</file>
</directive>
<directive id="HTML.FlashAllowFullScreen">
<file name="HTMLPurifier/AttrTransform/SafeParam.php">
<line>37</line>
</file>
</directive>
<directive id="Core.EscapeInvalidChildren">
<file name="HTMLPurifier/ChildDef/Required.php">
<line>62</line>

View File

@@ -7,7 +7,7 @@
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
* FILE, changes will be overwritten the next time the script is run.
*
* @version 4.0.0
* @version 4.2.0
*
* @warning
* You must *not* include any other HTML Purifier files before this file,
@@ -176,6 +176,7 @@ require 'HTMLPurifier/Injector/DisplayLinkURI.php';
require 'HTMLPurifier/Injector/Linkify.php';
require 'HTMLPurifier/Injector/PurifierLinkify.php';
require 'HTMLPurifier/Injector/RemoveEmpty.php';
require 'HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
require 'HTMLPurifier/Injector/SafeObject.php';
require 'HTMLPurifier/Lexer/DOMLex.php';
require 'HTMLPurifier/Lexer/DirectLex.php';
@@ -195,9 +196,12 @@ require 'HTMLPurifier/Token/Start.php';
require 'HTMLPurifier/Token/Text.php';
require 'HTMLPurifier/URIFilter/DisableExternal.php';
require 'HTMLPurifier/URIFilter/DisableExternalResources.php';
require 'HTMLPurifier/URIFilter/DisableResources.php';
require 'HTMLPurifier/URIFilter/HostBlacklist.php';
require 'HTMLPurifier/URIFilter/MakeAbsolute.php';
require 'HTMLPurifier/URIFilter/Munge.php';
require 'HTMLPurifier/URIScheme/data.php';
require 'HTMLPurifier/URIScheme/file.php';
require 'HTMLPurifier/URIScheme/ftp.php';
require 'HTMLPurifier/URIScheme/http.php';
require 'HTMLPurifier/URIScheme/https.php';

View File

@@ -19,7 +19,7 @@
*/
/*
HTML Purifier 4.0.0 - Standards Compliant HTML Filtering
HTML Purifier 4.2.0 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or
@@ -55,10 +55,10 @@ class HTMLPurifier
{
/** Version of HTML Purifier */
public $version = '4.0.0';
public $version = '4.2.0';
/** Constant with version of HTML Purifier */
const VERSION = '4.0.0';
const VERSION = '4.2.0';
/** Global configuration object */
public $config;

View File

@@ -170,6 +170,7 @@ require_once $__dir . '/HTMLPurifier/Injector/DisplayLinkURI.php';
require_once $__dir . '/HTMLPurifier/Injector/Linkify.php';
require_once $__dir . '/HTMLPurifier/Injector/PurifierLinkify.php';
require_once $__dir . '/HTMLPurifier/Injector/RemoveEmpty.php';
require_once $__dir . '/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
require_once $__dir . '/HTMLPurifier/Injector/SafeObject.php';
require_once $__dir . '/HTMLPurifier/Lexer/DOMLex.php';
require_once $__dir . '/HTMLPurifier/Lexer/DirectLex.php';
@@ -189,9 +190,12 @@ require_once $__dir . '/HTMLPurifier/Token/Start.php';
require_once $__dir . '/HTMLPurifier/Token/Text.php';
require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternal.php';
require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternalResources.php';
require_once $__dir . '/HTMLPurifier/URIFilter/DisableResources.php';
require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';
require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';
require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php';
require_once $__dir . '/HTMLPurifier/URIScheme/data.php';
require_once $__dir . '/HTMLPurifier/URIScheme/file.php';
require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';
require_once $__dir . '/HTMLPurifier/URIScheme/http.php';
require_once $__dir . '/HTMLPurifier/URIScheme/https.php';

View File

@@ -82,6 +82,42 @@ abstract class HTMLPurifier_AttrDef
return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
}
/**
* Parses a possibly escaped CSS string and returns the "pure"
* version of it.
*/
protected function expandCSSEscape($string) {
// flexibly parse it
$ret = '';
for ($i = 0, $c = strlen($string); $i < $c; $i++) {
if ($string[$i] === '\\') {
$i++;
if ($i >= $c) {
$ret .= '\\';
break;
}
if (ctype_xdigit($string[$i])) {
$code = $string[$i];
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
if (!ctype_xdigit($string[$i])) break;
$code .= $string[$i];
}
// We have to be extremely careful when adding
// new characters, to make sure we're not breaking
// the encoding.
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
$ret .= $char;
if ($i < $c && trim($string[$i]) !== '') $i--;
continue;
}
if ($string[$i] === "\n") continue;
}
$ret .= $string[$i];
}
return $ret;
}
}
// vim: et sw=4 sts=4

View File

@@ -59,7 +59,8 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
$keywords = array();
$keywords['h'] = false; // left, right
$keywords['v'] = false; // top, bottom
$keywords['c'] = false; // center
$keywords['ch'] = false; // center (first word)
$keywords['cv'] = false; // center (second word)
$measures = array();
$i = 0;
@@ -79,6 +80,13 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
$lbit = ctype_lower($bit) ? $bit : strtolower($bit);
if (isset($lookup[$lbit])) {
$status = $lookup[$lbit];
if ($status == 'c') {
if ($i == 0) {
$status = 'ch';
} else {
$status = 'cv';
}
}
$keywords[$status] = $lbit;
$i++;
}
@@ -101,20 +109,19 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
if (!$i) return false; // no valid values were caught
$ret = array();
// first keyword
if ($keywords['h']) $ret[] = $keywords['h'];
elseif (count($measures)) $ret[] = array_shift($measures);
elseif ($keywords['c']) {
$ret[] = $keywords['c'];
$keywords['c'] = false; // prevent re-use: center = center center
elseif ($keywords['ch']) {
$ret[] = $keywords['ch'];
$keywords['cv'] = false; // prevent re-use: center = center center
}
elseif (count($measures)) $ret[] = array_shift($measures);
if ($keywords['v']) $ret[] = $keywords['v'];
elseif ($keywords['cv']) $ret[] = $keywords['cv'];
elseif (count($measures)) $ret[] = array_shift($measures);
elseif ($keywords['c']) $ret[] = $keywords['c'];
if (empty($ret)) return false;
return implode(' ', $ret);

View File

@@ -34,37 +34,10 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
$quote = $font[0];
if ($font[$length - 1] !== $quote) continue;
$font = substr($font, 1, $length - 2);
$new_font = '';
for ($i = 0, $c = strlen($font); $i < $c; $i++) {
if ($font[$i] === '\\') {
$i++;
if ($i >= $c) {
$new_font .= '\\';
break;
}
if (ctype_xdigit($font[$i])) {
$code = $font[$i];
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
if (!ctype_xdigit($font[$i])) break;
$code .= $font[$i];
}
// We have to be extremely careful when adding
// new characters, to make sure we're not breaking
// the encoding.
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
$new_font .= $char;
if ($i < $c && trim($font[$i]) !== '') $i--;
continue;
}
if ($font[$i] === "\n") continue;
}
$new_font .= $font[$i];
}
$font = $new_font;
}
$font = $this->expandCSSEscape($font);
// $font is a pure representation of the font name
if (ctype_alnum($font) && $font !== '') {
@@ -73,12 +46,21 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
continue;
}
// complicated font, requires quoting
// bugger out on whitespace. form feed (0C) really
// shouldn't show up regardless
$font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
// armor single quotes and new lines
$font = str_replace("\\", "\\\\", $font);
$font = str_replace("'", "\\'", $font);
$final .= "'$font', ";
// These ugly transforms don't pose a security
// risk (as \\ and \" might). We could try to be clever and
// use single-quote wrapping when there is a double quote
// present, but I have choosen not to implement that.
// (warning: this code relies on the selection of quotation
// mark below)
$font = str_replace('\\', '\\5C ', $font);
$font = str_replace('"', '\\22 ', $font);
// complicated font, requires quoting
$final .= "\"$font\", "; // note that this will later get turned into &quot;
}
$final = rtrim($final, ', ');
if ($final === '') return false;

View File

@@ -34,20 +34,16 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
$uri = substr($uri, 1, $new_length - 1);
}
$keys = array( '(', ')', ',', ' ', '"', "'");
$values = array('\\(', '\\)', '\\,', '\\ ', '\\"', "\\'");
$uri = str_replace($values, $keys, $uri);
$uri = $this->expandCSSEscape($uri);
$result = parent::validate($uri, $config, $context);
if ($result === false) return false;
// escape necessary characters according to CSS spec
// except for the comma, none of these should appear in the
// URI at all
$result = str_replace($keys, $values, $result);
// extra sanity check; should have been done by URI
$result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result);
return "url($result)";
return "url(\"$result\")";
}

View File

@@ -24,7 +24,8 @@ class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
if ($src) {
$alt = $config->get('Attr.DefaultImageAlt');
if ($alt === null) {
$attr['alt'] = basename($attr['src']);
// truncate if the alt is too long
$attr['alt'] = substr(basename($attr['src']),0,40);
} else {
$attr['alt'] = $alt;
}

View File

@@ -33,12 +33,25 @@ class HTMLPurifier_AttrTransform_SafeParam extends HTMLPurifier_AttrTransform
case 'allowNetworking':
$attr['value'] = 'internal';
break;
case 'allowFullScreen':
if ($config->get('HTML.FlashAllowFullScreen')) {
$attr['value'] = ($attr['value'] == 'true') ? 'true' : 'false';
} else {
$attr['value'] = 'false';
}
break;
case 'wmode':
$attr['value'] = 'window';
break;
case 'movie':
case 'src':
$attr['name'] = "movie";
$attr['value'] = $this->uri->validate($attr['value'], $config, $context);
break;
case 'flashvars':
// we're going to allow arbitrary inputs to the SWF, on
// the reasoning that it could only hack the SWF, not us.
break;
// add other cases to support other param name/value pairs
default:
$attr['name'] = $attr['value'] = null;

View File

@@ -272,20 +272,29 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
// setup allowed elements
$support = "(for information on implementing this, see the ".
"support forums) ";
$allowed_attributes = $config->get('CSS.AllowedProperties');
if ($allowed_attributes !== null) {
$allowed_properties = $config->get('CSS.AllowedProperties');
if ($allowed_properties !== null) {
foreach ($this->info as $name => $d) {
if(!isset($allowed_attributes[$name])) unset($this->info[$name]);
unset($allowed_attributes[$name]);
if(!isset($allowed_properties[$name])) unset($this->info[$name]);
unset($allowed_properties[$name]);
}
// emit errors
foreach ($allowed_attributes as $name => $d) {
foreach ($allowed_properties as $name => $d) {
// :TODO: Is this htmlspecialchars() call really necessary?
$name = htmlspecialchars($name);
trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING);
}
}
$forbidden_properties = $config->get('CSS.ForbiddenProperties');
if ($forbidden_properties !== null) {
foreach ($this->info as $name => $d) {
if (isset($forbidden_properties[$name])) {
unset($this->info[$name]);
}
}
}
}
}

View File

@@ -20,7 +20,7 @@ class HTMLPurifier_Config
/**
* HTML Purifier's version
*/
public $version = '4.0.0';
public $version = '4.2.0';
/**
* Bool indicator whether or not to automatically finalize

View File

@@ -0,0 +1,11 @@
AutoFormat.RemoveSpansWithoutAttributes
TYPE: bool
VERSION: 4.0.1
DEFAULT: false
--DESCRIPTION--
<p>
This directive causes <code>span</code> tags without any attributes
to be removed. It will also remove spans that had all attributes
removed during processing.
</p>
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,13 @@
CSS.ForbiddenProperties
TYPE: lookup
VERSION: 4.2.0
DEFAULT: array()
--DESCRIPTION--
<p>
This is the logical inverse of %CSS.AllowedProperties, and it will
override that directive or any other directive. If possible,
%CSS.AllowedProperties is recommended over this directive,
because it can sometimes be difficult to tell whether or not you've
forbidden all of the CSS properties you truly would like to disallow.
</p>
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,11 @@
Core.NormalizeNewlines
TYPE: bool
VERSION: 4.2.0
DEFAULT: true
--DESCRIPTION--
<p>
Whether or not to normalize newlines to the operating
system default. When <code>false</code>, HTML Purifier
will attempt to preserve mixed newline files.
</p>
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,11 @@
Core.RemoveProcessingInstructions
TYPE: bool
VERSION: 4.2.0
DEFAULT: false
--DESCRIPTION--
Instead of escaping processing instructions in the form <code>&lt;? ...
?&gt;</code>, remove it out-right. This may be useful if the HTML
you are validating contains XML processing instruction gunk, however,
it can also be user-unfriendly for people attempting to post PHP
snippets.
--# vim: et sw=4 sts=4

View File

@@ -3,6 +3,11 @@ TYPE: bool
VERSION: 3.1.0
DEFAULT: false
--DESCRIPTION--
<p>
<strong>Warning:</strong> Deprecated in favor of %HTML.SafeObject and
%Output.FlashCompat (turn both on to allow YouTube videos and other
Flash content).
</p>
<p>
This directive enables YouTube video embedding in HTML Purifier. Check
<a href="http://htmlpurifier.org/docs/enduser-youtube.html">this document

View File

@@ -5,11 +5,14 @@ DEFAULT: NULL
--DESCRIPTION--
<p>
This is a convenience directive that rolls the functionality of
%HTML.AllowedElements and %HTML.AllowedAttributes into one directive.
This is a preferred convenience directive that combines
%HTML.AllowedElements and %HTML.AllowedAttributes.
Specify elements and attributes that are allowed using:
<code>element1[attr1|attr2],element2...</code>. You can also use
newlines instead of commas to separate elements.
<code>element1[attr1|attr2],element2...</code>. For example,
if you would like to only allow paragraphs and links, specify
<code>a[href],p</code>. You can specify attributes that apply
to all elements using an asterisk, e.g. <code>*[lang]</code>.
You can also use newlines instead of commas to separate elements.
</p>
<p>
<strong>Warning</strong>:

View File

@@ -4,12 +4,17 @@ VERSION: 1.3.0
DEFAULT: NULL
--DESCRIPTION--
<p>
If HTML Purifier's tag set is unsatisfactory for your needs, you
can overload it with your own list of tags to allow. Note that this
method is subtractive: it does its job by taking away from HTML Purifier
usual feature set, so you cannot add a tag that HTML Purifier never
supported in the first place (like embed, form or head). If you
change this, you probably also want to change %HTML.AllowedAttributes.
If HTML Purifier's tag set is unsatisfactory for your needs, you can
overload it with your own list of tags to allow. If you change
this, you probably also want to change %HTML.AllowedAttributes; see
also %HTML.Allowed which lets you set allowed elements and
attributes at the same time.
</p>
<p>
If you attempt to allow an element that HTML Purifier does not know
about, HTML Purifier will raise an error. You will need to manually
tell HTML Purifier about this element by using the
<a href="http://htmlpurifier.org/docs/enduser-customize.html">advanced customization features.</a>
</p>
<p>
<strong>Warning:</strong> If another directive conflicts with the

View File

@@ -0,0 +1,11 @@
HTML.FlashAllowFullScreen
TYPE: bool
VERSION: 4.2.0
DEFAULT: false
--DESCRIPTION--
<p>
Whether or not to permit embedded Flash content from
%HTML.SafeObject to expand to the full screen. Corresponds to
the <code>allowFullScreen</code> parameter.
</p>
--# vim: et sw=4 sts=4

View File

@@ -7,8 +7,7 @@ DEFAULT: false
Whether or not to permit embed tags in documents, with a number of extra
security features added to prevent script execution. This is similar to
what websites like MySpace do to embed tags. Embed is a proprietary
element and will cause your website to stop validating. You probably want
to enable this with %HTML.SafeObject.
<strong>Highly experimental.</strong>
</p>
element and will cause your website to stop validating; you should
see if you can use %Output.FlashCompat with %HTML.SafeObject instead
first.</p>
--# vim: et sw=4 sts=4

View File

@@ -6,9 +6,8 @@ DEFAULT: false
<p>
Whether or not to permit object tags in documents, with a number of extra
security features added to prevent script execution. This is similar to
what websites like MySpace do to object tags. You may also want to
enable %HTML.SafeEmbed for maximum interoperability with Internet Explorer,
although embed tags will cause your website to stop validating.
<strong>Highly experimental.</strong>
what websites like MySpace do to object tags. You should also enable
%Output.FlashCompat in order to generate Internet Explorer
compatibility code for your object tags.
</p>
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,11 @@
Output.FlashCompat
TYPE: bool
VERSION: 4.1.0
DEFAULT: false
--DESCRIPTION--
<p>
If true, HTML Purifier will generate Internet Explorer compatibility
code for all object code. This is highly recommended if you enable
%HTML.SafeObject.
</p>
--# vim: et sw=4 sts=4

View File

@@ -12,4 +12,6 @@ array (
--DESCRIPTION--
Whitelist that defines the schemes that a URI is allowed to have. This
prevents XSS attacks from using pseudo-schemes like javascript or mocha.
There is also support for the <code>data</code> and <code>file</code>
URI schemes, but they are not enabled by default.
--# vim: et sw=4 sts=4

View File

@@ -1,12 +1,15 @@
URI.DisableResources
TYPE: bool
VERSION: 1.3.0
VERSION: 4.2.0
DEFAULT: false
--DESCRIPTION--
<p>
Disables embedding resources, essentially meaning no pictures. You can
still link to them though. See %URI.DisableExternalResources for why
this might be a good idea.
</p>
<p>
<em>Note:</em> While this directive has been available since 1.3.0,
it didn't actually start doing anything until 4.2.0.
</p>
--# vim: et sw=4 sts=4

View File

@@ -97,6 +97,13 @@ class HTMLPurifier_ElementDef
*/
public $autoclose = array();
/**
* If a foreign element is found in this element, test if it is
* allowed by this sub-element; if it is, instead of closing the
* current element, place it inside this element.
*/
public $wrap;
/**
* Whether or not this is a formatting element affected by the
* "Active Formatting Elements" algorithm.

View File

@@ -7,13 +7,13 @@ class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter
public function preFilter($html, $config, $context) {
$pre_regex = '#<object[^>]+>.+?'.
'http://www.youtube.com/v/([A-Za-z0-9\-_]+).+?</object>#s';
'http://www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?</object>#s';
$pre_replace = '<span class="youtube-embed">\1</span>';
return preg_replace($pre_regex, $pre_replace, $html);
}
public function postFilter($html, $config, $context) {
$post_regex = '#<span class="youtube-embed">([A-Za-z0-9\-_]+)</span>#';
$post_regex = '#<span class="youtube-embed">((?:v|cp)/[A-Za-z0-9\-_=]+)</span>#';
return preg_replace_callback($post_regex, array($this, 'postFilterCallback'), $html);
}
@@ -24,10 +24,10 @@ class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter
protected function postFilterCallback($matches) {
$url = $this->armorUrl($matches[1]);
return '<object width="425" height="350" type="application/x-shockwave-flash" '.
'data="http://www.youtube.com/v/'.$url.'">'.
'<param name="movie" value="http://www.youtube.com/v/'.$url.'"></param>'.
'data="http://www.youtube.com/'.$url.'">'.
'<param name="movie" value="http://www.youtube.com/'.$url.'"></param>'.
'<!--[if IE]>'.
'<embed src="http://www.youtube.com/v/'.$url.'"'.
'<embed src="http://www.youtube.com/'.$url.'"'.
'type="application/x-shockwave-flash"'.
'wmode="transparent" width="425" height="350" />'.
'<![endif]-->'.

View File

@@ -31,6 +31,17 @@ class HTMLPurifier_Generator
*/
private $_sortAttr;
/**
* Cache of %Output.FlashCompat
*/
private $_flashCompat;
/**
* Stack for keeping track of object information when outputting IE
* compatibility code.
*/
private $_flashStack = array();
/**
* Configuration for the generator
*/
@@ -44,6 +55,7 @@ class HTMLPurifier_Generator
$this->config = $config;
$this->_scriptFix = $config->get('Output.CommentScriptContents');
$this->_sortAttr = $config->get('Output.SortAttr');
$this->_flashCompat = $config->get('Output.FlashCompat');
$this->_def = $config->getHTMLDefinition();
$this->_xhtml = $this->_def->doctype->xml;
}
@@ -86,9 +98,11 @@ class HTMLPurifier_Generator
}
// Normalize newlines to system defined value
$nl = $this->config->get('Output.Newline');
if ($nl === null) $nl = PHP_EOL;
if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
if ($this->config->get('Core.NormalizeNewlines')) {
$nl = $this->config->get('Output.Newline');
if ($nl === null) $nl = PHP_EOL;
if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
}
return $html;
}
@@ -104,12 +118,41 @@ class HTMLPurifier_Generator
} elseif ($token instanceof HTMLPurifier_Token_Start) {
$attr = $this->generateAttributes($token->attr, $token->name);
if ($this->_flashCompat) {
if ($token->name == "object") {
$flash = new stdclass();
$flash->attr = $token->attr;
$flash->param = array();
$this->_flashStack[] = $flash;
}
}
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
} elseif ($token instanceof HTMLPurifier_Token_End) {
return '</' . $token->name . '>';
$_extra = '';
if ($this->_flashCompat) {
if ($token->name == "object" && !empty($this->_flashStack)) {
$flash = array_pop($this->_flashStack);
$compat_token = new HTMLPurifier_Token_Empty("embed");
foreach ($flash->attr as $name => $val) {
if ($name == "classid") continue;
if ($name == "type") continue;
if ($name == "data") $name = "src";
$compat_token->attr[$name] = $val;
}
foreach ($flash->param as $name => $val) {
if ($name == "movie") $name = "src";
$compat_token->attr[$name] = $val;
}
$_extra = "<!--[if IE]>".$this->generateFromToken($compat_token)."<![endif]-->";
}
}
return $_extra . '</' . $token->name . '>';
} elseif ($token instanceof HTMLPurifier_Token_Empty) {
if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {
$this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];
}
$attr = $this->generateAttributes($token->attr, $token->name);
return '<' . $token->name . ($attr ? ' ' : '') . $attr .
( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
@@ -174,7 +217,10 @@ class HTMLPurifier_Generator
* permissible for non-attribute output.
* @return String escaped data.
*/
public function escape($string, $quote = ENT_COMPAT) {
public function escape($string, $quote = null) {
// Workaround for APC bug on Mac Leopard reported by sidepodcast
// http://htmlpurifier.org/phorum/read.php?3,4823,4846
if ($quote === null) $quote = ENT_COMPAT;
return htmlspecialchars($string, $quote, 'UTF-8');
}

View File

@@ -300,7 +300,12 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
unset($allowed_attributes_mutable[$key]);
}
}
if ($delete) unset($this->info[$tag]->attr[$attr]);
if ($delete) {
if ($this->info[$tag]->attr[$attr]->required) {
trigger_error("Required attribute '$attr' in element '$tag' was not allowed, which means '$tag' will not be allowed either", E_USER_WARNING);
}
unset($this->info[$tag]->attr[$attr]);
}
}
}
// emit errors

View File

@@ -20,8 +20,10 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
public $content_sets = array('Flow' => 'List');
public function setup($config) {
$this->addElement('ol', 'List', 'Required: li', 'Common');
$this->addElement('ul', 'List', 'Required: li', 'Common');
$ol = $this->addElement('ol', 'List', 'Required: li', 'Common');
$ol->wrap = "li";
$ul = $this->addElement('ul', 'List', 'Required: li', 'Common');
$ul->wrap = "li";
$this->addElement('dl', 'List', 'Required: dt | dd', 'Common');
$this->addElement('li', false, 'Flow', 'Common');

View File

@@ -20,6 +20,7 @@ class HTMLPurifier_HTMLModule_SafeEmbed extends HTMLPurifier_HTMLModule
'height' => 'Pixels#' . $max,
'allowscriptaccess' => 'Enum#never',
'allownetworking' => 'Enum#internal',
'flashvars' => 'Text',
'wmode' => 'Enum#window',
'name' => 'ID',
)

View File

@@ -28,7 +28,10 @@ class HTMLPurifier_HTMLModule_SafeObject extends HTMLPurifier_HTMLModule
'type' => 'Enum#application/x-shockwave-flash',
'width' => 'Pixels#' . $max,
'height' => 'Pixels#' . $max,
'data' => 'URI#embedded'
'data' => 'URI#embedded',
'classid' => 'Enum#clsid:d27cdb6e-ae6d-11cf-96b8-444553540000',
'codebase' => new HTMLPurifier_AttrDef_Enum(array(
'http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0')),
)
);
$object->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeObject();

View File

@@ -15,6 +15,7 @@ class HTMLPurifier_HTMLModule_Tidy_Proprietary extends HTMLPurifier_HTMLModule_T
$r['thead@background'] = new HTMLPurifier_AttrTransform_Background();
$r['tfoot@background'] = new HTMLPurifier_AttrTransform_Background();
$r['tbody@background'] = new HTMLPurifier_AttrTransform_Background();
$r['table@height'] = new HTMLPurifier_AttrTransform_Length('height');
return $r;
}

View File

@@ -34,16 +34,21 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
// ----
// This is a degenerate case
} else {
// State 1.2: PAR1
// ----
if (!$token->is_whitespace || $this->_isInline($current)) {
// State 1.2: PAR1
// ----
// State 1.3: PAR1\n\nPAR2
// ------------
// State 1.3: PAR1\n\nPAR2
// ------------
// State 1.4: <div>PAR1\n\nPAR2 (see State 2)
// ------------
$token = array($this->_pStart());
$this->_splitText($text, $token);
// State 1.4: <div>PAR1\n\nPAR2 (see State 2)
// ------------
$token = array($this->_pStart());
$this->_splitText($text, $token);
} else {
// State 1.5: \n<hr />
// --
}
}
} else {
// State 2: <div>PAR1... (similar to 1.4)

View File

@@ -0,0 +1,60 @@
<?php
/**
* Injector that removes spans with no attributes
*/
class HTMLPurifier_Injector_RemoveSpansWithoutAttributes extends HTMLPurifier_Injector
{
public $name = 'RemoveSpansWithoutAttributes';
public $needed = array('span');
private $attrValidator;
/**
* Used by AttrValidator
*/
private $config;
private $context;
public function prepare($config, $context) {
$this->attrValidator = new HTMLPurifier_AttrValidator();
$this->config = $config;
$this->context = $context;
return parent::prepare($config, $context);
}
public function handleElement(&$token) {
if ($token->name !== 'span' || !$token instanceof HTMLPurifier_Token_Start) {
return;
}
// We need to validate the attributes now since this doesn't normally
// happen until after MakeWellFormed. If all the attributes are removed
// the span needs to be removed too.
$this->attrValidator->validateToken($token, $this->config, $this->context);
$token->armor['ValidateAttributes'] = true;
if (!empty($token->attr)) {
return;
}
$nesting = 0;
$spanContentTokens = array();
while ($this->forwardUntilEndToken($i, $current, $nesting)) {}
if ($current instanceof HTMLPurifier_Token_End && $current->name === 'span') {
// Mark closing span tag for deletion
$current->markForDeletion = true;
// Delete open span tag
$token = false;
}
}
public function handleEnd(&$token) {
if ($token->markForDeletion) {
$token = false;
}
}
}
// vim: et sw=4 sts=4

View File

@@ -20,6 +20,9 @@ class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector
protected $allowedParam = array(
'wmode' => true,
'movie' => true,
'flashvars' => true,
'src' => true,
'allowFullScreen' => true, // if omitted, assume to be 'false'
);
public function prepare($config, $context) {
@@ -47,7 +50,8 @@ class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector
// We need this fix because YouTube doesn't supply a data
// attribute, which we need if a type is specified. This is
// *very* Flash specific.
if (!isset($this->objectStack[$i]->attr['data']) && $token->attr['name'] == 'movie') {
if (!isset($this->objectStack[$i]->attr['data']) &&
($token->attr['name'] == 'movie' || $token->attr['name'] == 'src')) {
$this->objectStack[$i]->attr['data'] = $token->attr['value'];
}
// Check if the parameter is the correct value but has not

View File

@@ -23,6 +23,7 @@ $messages = array(
'Lexer: Missing gt' => 'Missing greater-than sign (>), previous less-than sign (<) should be escaped',
'Lexer: Missing attribute key' => 'Attribute declaration has no key',
'Lexer: Missing end quote' => 'Attribute declaration has no end quote',
'Lexer: Extracted body' => 'Removed document metadata tags',
'Strategy_RemoveForeignElements: Tag transform' => '<$1> element transformed into $CurrentToken.Serialized',
'Strategy_RemoveForeignElements: Missing required attribute' => '$CurrentToken.Compact element missing required attribute $1',

View File

@@ -230,6 +230,17 @@ class HTMLPurifier_Lexer
);
}
/**
* Special Internet Explorer conditional comments should be removed.
*/
protected static function removeIEConditional($string) {
return preg_replace(
'#<!--\[if [^>]+\]>.*<!\[endif\]-->#si', // probably should generalize for all strings
'',
$string
);
}
/**
* Callback function for escapeCDATA() that does the work.
*
@@ -252,20 +263,32 @@ class HTMLPurifier_Lexer
public function normalize($html, $config, $context) {
// normalize newlines to \n
$html = str_replace("\r\n", "\n", $html);
$html = str_replace("\r", "\n", $html);
if ($config->get('Core.NormalizeNewlines')) {
$html = str_replace("\r\n", "\n", $html);
$html = str_replace("\r", "\n", $html);
}
if ($config->get('HTML.Trusted')) {
// escape convoluted CDATA
$html = $this->escapeCommentedCDATA($html);
}
$html = $this->removeIEConditional($html);
// escape CDATA
$html = $this->escapeCDATA($html);
// extract body from document if applicable
if ($config->get('Core.ConvertDocumentToFragment')) {
$html = $this->extractBody($html);
$e = false;
if ($config->get('Core.CollectErrors')) {
$e =& $context->get('ErrorCollector');
}
$new_html = $this->extractBody($html);
if ($e && $new_html != $html) {
$e->send(E_WARNING, 'Lexer: Extracted body');
}
$html = $new_html;
}
// expand entities that aren't the big five
@@ -276,6 +299,11 @@ class HTMLPurifier_Lexer
// represent non-SGML characters (horror, horror!)
$html = HTMLPurifier_Encoder::cleanUTF8($html);
// if processing instructions are to removed, remove them now
if ($config->get('Core.RemoveProcessingInstructions')) {
$html = preg_replace('#<\?.+?\?>#s', '', $html);
}
return $html;
}

View File

@@ -384,7 +384,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
}
}
if ($value === false) $value = '';
return array($key => $value);
return array($key => $this->parseData($value));
}
// setup loop environment

View File

@@ -26,13 +26,20 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
* Internal accumulator array for SAX parsers.
*/
protected $tokens = array();
protected $last_token_was_empty;
private $parent_handler;
private $stack = array();
public function tokenizeHTML($string, $config, $context) {
$this->tokens = array();
$this->last_token_was_empty = false;
$string = $this->normalize($string, $config, $context);
$this->parent_handler = set_error_handler(array($this, 'muteStrictErrorHandler'));
$parser = new XML_HTMLSax3();
$parser->set_object($this);
$parser->set_element_handler('openHandler','closeHandler');
@@ -44,6 +51,8 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
$parser->parse($string);
restore_error_handler();
return $this->tokens;
}
@@ -58,9 +67,11 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
}
if ($closed) {
$this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs);
$this->last_token_was_empty = true;
} else {
$this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs);
}
$this->stack[] = $name;
return true;
}
@@ -71,10 +82,12 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
// HTMLSax3 seems to always send empty tags an extra close tag
// check and ignore if you see it:
// [TESTME] to make sure it doesn't overreach
if ($this->tokens[count($this->tokens)-1] instanceof HTMLPurifier_Token_Empty) {
if ($this->last_token_was_empty) {
$this->last_token_was_empty = false;
return true;
}
$this->tokens[] = new HTMLPurifier_Token_End($name);
if (!empty($this->stack)) array_pop($this->stack);
return true;
}
@@ -82,6 +95,7 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
* Data event handler, interface is defined by PEAR package.
*/
public function dataHandler(&$parser, $data) {
$this->last_token_was_empty = false;
$this->tokens[] = new HTMLPurifier_Token_Text($data);
return true;
}
@@ -91,7 +105,18 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
*/
public function escapeHandler(&$parser, $data) {
if (strpos($data, '--') === 0) {
$this->tokens[] = new HTMLPurifier_Token_Comment($data);
// remove trailing and leading double-dashes
$data = substr($data, 2);
if (strlen($data) >= 2 && substr($data, -2) == "--") {
$data = substr($data, 0, -2);
}
if (isset($this->stack[sizeof($this->stack) - 1]) &&
$this->stack[sizeof($this->stack) - 1] == "style") {
$this->tokens[] = new HTMLPurifier_Token_Text($data);
} else {
$this->tokens[] = new HTMLPurifier_Token_Comment($data);
}
$this->last_token_was_empty = false;
}
// CDATA is handled elsewhere, but if it was handled here:
//if (strpos($data, '[CDATA[') === 0) {
@@ -101,6 +126,14 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
return true;
}
/**
* An error handler that mutes strict errors
*/
public function muteStrictErrorHandler($errno, $errstr, $errfile=null, $errline=null, $errcontext=null) {
if ($errno == E_STRICT) return;
return call_user_func($this->parent_handler, $errno, $errstr, $errfile, $errline, $errcontext);
}
}
// vim: et sw=4 sts=4

View File

@@ -125,8 +125,6 @@ class HTML5 {
const EOF = 5;
public function __construct($data) {
$data = str_replace("\r\n", "\n", $data);
$data = str_replace("\r", null, $data);
$this->data = $data;
$this->char = -1;

View File

@@ -83,6 +83,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$this->injectors[] = $injector;
}
foreach ($custom_injectors as $injector) {
if (!$injector) continue;
if (is_string($injector)) {
$injector = "HTMLPurifier_Injector_$injector";
$injector = new $injector;
@@ -164,6 +165,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$token = $tokens[$t];
//echo '<br>'; printTokens($tokens, $t); printTokens($this->stack);
//flush();
// quick-check: if it's not a tag, no need to process
if (empty($token->is_tag)) {
@@ -219,6 +221,22 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$autoclose = false;
}
if ($autoclose && $definition->info[$token->name]->wrap) {
// Check if an element can be wrapped by another
// element to make it valid in a context (for
// example, <ul><ul> needs a <li> in between)
$wrapname = $definition->info[$token->name]->wrap;
$wrapdef = $definition->info[$wrapname];
$elements = $wrapdef->child->getAllowedElements($config);
$parent_elements = $definition->info[$parent->name]->child->getAllowedElements($config);
if (isset($elements[$token->name]) && isset($parent_elements[$wrapname])) {
$newtoken = new HTMLPurifier_Token_Start($wrapname);
$this->insertBefore($newtoken);
$reprocess = true;
continue;
}
}
$carryover = false;
if ($autoclose && $definition->info[$parent->name]->formatting) {
$carryover = true;

View File

@@ -0,0 +1,11 @@
<?php
class HTMLPurifier_URIFilter_DisableResources extends HTMLPurifier_URIFilter
{
public $name = 'DisableResources';
public function filter(&$uri, $config, $context) {
return !$context->get('EmbeddedURI', true);
}
}
// vim: et sw=4 sts=4

View File

@@ -0,0 +1,93 @@
<?php
/**
* Implements data: URI for base64 encoded images supported by GD.
*/
class HTMLPurifier_URIScheme_data extends HTMLPurifier_URIScheme {
public $browsable = true;
public $allowed_types = array(
// you better write validation code for other types if you
// decide to allow them
'image/jpeg' => true,
'image/gif' => true,
'image/png' => true,
);
public function validate(&$uri, $config, $context) {
$result = explode(',', $uri->path, 2);
$is_base64 = false;
$charset = null;
$content_type = null;
if (count($result) == 2) {
list($metadata, $data) = $result;
// do some legwork on the metadata
$metas = explode(';', $metadata);
while(!empty($metas)) {
$cur = array_shift($metas);
if ($cur == 'base64') {
$is_base64 = true;
break;
}
if (substr($cur, 0, 8) == 'charset=') {
// doesn't match if there are arbitrary spaces, but
// whatever dude
if ($charset !== null) continue; // garbage
$charset = substr($cur, 8); // not used
} else {
if ($content_type !== null) continue; // garbage
$content_type = $cur;
}
}
} else {
$data = $result[0];
}
if ($content_type !== null && empty($this->allowed_types[$content_type])) {
return false;
}
if ($charset !== null) {
// error; we don't allow plaintext stuff
$charset = null;
}
$data = rawurldecode($data);
if ($is_base64) {
$raw_data = base64_decode($data);
} else {
$raw_data = $data;
}
// XXX probably want to refactor this into a general mechanism
// for filtering arbitrary content types
$file = tempnam("/tmp", "");
file_put_contents($file, $raw_data);
if (function_exists('exif_imagetype')) {
$image_code = exif_imagetype($file);
} elseif (function_exists('getimagesize')) {
set_error_handler(array($this, 'muteErrorHandler'));
$info = getimagesize($file);
restore_error_handler();
if ($info == false) return false;
$image_code = $info[2];
} else {
trigger_error("could not find exif_imagetype or getimagesize functions", E_USER_ERROR);
}
$real_content_type = image_type_to_mime_type($image_code);
if ($real_content_type != $content_type) {
// we're nice guys; if the content type is something else we
// support, change it over
if (empty($this->allowed_types[$real_content_type])) return false;
$content_type = $real_content_type;
}
// ok, it's kosher, rewrite what we need
$uri->userinfo = null;
$uri->host = null;
$uri->port = null;
$uri->fragment = null;
$uri->query = null;
$uri->path = "$content_type;base64," . base64_encode($raw_data);
return true;
}
public function muteErrorHandler($errno, $errstr) {}
}

View File

@@ -0,0 +1,26 @@
<?php
/**
* Validates file as defined by RFC 1630 and RFC 1738.
*/
class HTMLPurifier_URIScheme_file extends HTMLPurifier_URIScheme {
// Generally file:// URLs are not accessible from most
// machines, so placing them as an img src is incorrect.
public $browsable = false;
public function validate(&$uri, $config, $context) {
parent::validate($uri, $config, $context);
// Authentication method is not supported
$uri->userinfo = null;
// file:// makes no provisions for accessing the resource
$uri->port = null;
// While it seems to work on Firefox, the querystring has
// no possible effect and is thus stripped.
$uri->query = null;
return true;
}
}
// vim: et sw=4 sts=4

View File

@@ -62,7 +62,7 @@ class HTMLPurifier_VarParser_Flexible extends HTMLPurifier_VarParser
foreach ($var as $keypair) {
$c = explode(':', $keypair, 2);
if (!isset($c[1])) continue;
$nvar[$c[0]] = $c[1];
$nvar[trim($c[0])] = trim($c[1]);
}
$var = $nvar;
}
@@ -79,8 +79,15 @@ class HTMLPurifier_VarParser_Flexible extends HTMLPurifier_VarParser
return $new;
} else break;
}
if ($type === self::ALIST) {
trigger_error("Array list did not have consecutive integer indexes", E_USER_WARNING);
return array_values($var);
}
if ($type === self::LOOKUP) {
foreach ($var as $key => $value) {
if ($value !== true) {
trigger_error("Lookup array has non-true value at key '$key'; maybe your input array was not indexed numerically", E_USER_WARNING);
}
$var[$key] = true;
}
}

11
maintenance/compile-doxygen.sh Executable file
View File

@@ -0,0 +1,11 @@
#!/bin/bash
cd ..
mkdir docs/doxygen
rm -Rf docs/doxygen/*
doxygen 1>docs/doxygen/info.log 2>docs/doxygen/errors.log
if [ "$?" != 0 ]; then
cat docs/doxygen/errors.log
exit
fi
cd docs
tar czf doxygen.tgz doxygen

View File

@@ -18,8 +18,7 @@ function e($cmd) {
if ($status) exit($status);
}
$php = $_SERVER['argv'][1];
if (!$php) $php = 'php';
$php = empty($_SERVER['argv'][1]) ? 'php' : $_SERVER['argv'][1];
e($php . ' generate-includes.php');
e($php . ' generate-schema-cache.php');

View File

@@ -80,8 +80,9 @@ function get_dependency_lookup($file) {
if (strncmp('class', $line, 5) === 0) {
// The implementation here is fragile and will break if we attempt
// to use interfaces. Beware!
list(, $parent) = explode(' extends ', trim($line, ' {'."\n\r"), 2);
if (empty($parent)) break;
$arr = explode(' extends ', trim($line, ' {'."\n\r"), 2);
if (count($arr) < 2) break;
$parent = $arr[1];
$dep_file = HTMLPurifier_Bootstrap::getPath($parent);
if (!$dep_file) break;
$deps[$dep_file] = true;

5
maintenance/regenerate-docs.sh Executable file
View File

@@ -0,0 +1,5 @@
#!/bin/bash -e
./compile-doxygen.sh
cd ../docs
scp doxygen.tgz htmlpurifier.org:/home/ezyang/htmlpurifier.org
ssh htmlpurifier.org "cd /home/ezyang/htmlpurifier.org && ./reload-docs.sh"

View File

@@ -10,7 +10,7 @@ $pkg = new PEAR_PackageFileManager2;
$pkg->setOptions(
array(
'baseinstalldir' => '/',
'packagefile' => 'package2.xml',
'packagefile' => 'package.xml',
'packagedirectory' => realpath(dirname(__FILE__) . '/library'),
'filelistgenerator' => 'file',
'include' => array('*'),
@@ -56,8 +56,6 @@ $pkg->setPearinstallerDep('1.4.3');
$pkg->generateContents();
$compat =& $pkg->exportCompatiblePackageFile1();
$compat->writePackageFile();
$pkg->writePackageFile();
// vim: et sw=4 sts=4

View File

@@ -9,7 +9,8 @@ Changelog HTMLPurifier : Phorum Mod
. Internal change
==========================
Version 3.0.0.1 for Phorum 5.2, unknown release date
Version 4.0.0 for Phorum 5.2, released July 9, 2009
# Works only with HTML Purifier 4.0.0
! Better installation documentation
- Fixed double encoded quotes
- Fixed fatal error when migrate.php is blank

View File

@@ -2,6 +2,11 @@
Install
How to install the Phorum HTML Purifier plugin
0. PREREQUISITES
----------------
This Phorum module only works on PHP5 and with HTML Purifier 4.0.0
or later.
1. UNZIP
--------
Unzip phorum-htmlpurifier-x.y.z, producing an htmlpurifier folder.

View File

@@ -17,7 +17,7 @@
* administrators who need to edit other people's comments may be at
* risk for some nasty attacks.
*
* Tested with Phorum 5.2.6.
* Tested with Phorum 5.2.11.
*/
// Note: Cache data is base64 encoded because Phorum insists on flinging

View File

@@ -2,7 +2,7 @@ title: HTML Purifier Phorum Mod
desc: This module enables standards-compliant HTML filtering on Phorum. Please check migrate.bbcode.php before enabling this mod.
author: Edward Z. Yang
url: http://htmlpurifier.org/
version: 3.0.0
version: 4.0.0
hook: format|phorum_htmlpurifier_format
hook: quote|phorum_htmlpurifier_quote

37
smoketests/dataScheme.php Normal file
View File

@@ -0,0 +1,37 @@
<?php
require_once 'common.php';
echo '<?xml version="1.0" encoding="UTF-8" ?>';
?><!DOCTYPE html
PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>HTML Purifier data Scheme Smoketest</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
</head>
<body>
<h1>HTML Purifier data Scheme Smoketest</h1>
<?php
$string = '<img src="data:image/png;base64,
iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAABGdBTUEAALGP
C/xhBQAAAAlwSFlzAAALEwAACxMBAJqcGAAAAAd0SU1FB9YGARc5KB0XV+IA
AAAddEVYdENvbW1lbnQAQ3JlYXRlZCB3aXRoIFRoZSBHSU1Q72QlbgAAAF1J
REFUGNO9zL0NglAAxPEfdLTs4BZM4DIO4C7OwQg2JoQ9LE1exdlYvBBeZ7jq
ch9//q1uH4TLzw4d6+ErXMMcXuHWxId3KOETnnXXV6MJpcq2MLaI97CER3N0
vr4MkhoXe0rZigAAAABJRU5ErkJggg==" alt="Red dot" />';
$purifier = new HTMLPurifier(array('URI.AllowedSchemes' => 'data'));
?>
<div><?php
echo $purifier->purify($string);
?></div>
</body>
</html>
<?php
// vim: et sw=4 sts=4

View File

@@ -15,12 +15,20 @@ echo '<?xml version="1.0" encoding="UTF-8" ?>';
<h1>HTML Purifier Preserve YouTube Smoketest</h1>
<?php
$string = '<object width="425" height="350"><param name="movie" value="http://www.youtube.com/v/BdU--T8rLns"></param><param name="wmode" value="transparent"></param><embed src="http://www.youtube.com/v/BdU--T8rLns" type="application/x-shockwave-flash" wmode="transparent" width="425" height="350"></embed></object>';
$string = '<object width="425" height="350"><param name="movie" value="http://www.youtube.com/v/BdU--T8rLns"></param><param name="wmode" value="transparent"></param><embed src="http://www.youtube.com/v/BdU--T8rLns" type="application/x-shockwave-flash" wmode="transparent" width="425" height="350"></embed></object>
<object width="416" height="337"><param name="movie" value="http://www.youtube.com/cp/vjVQa1PpcFNbP_fag8PvopkXZyiXyT0J8U47lw7x5Fc="></param><embed src="http://www.youtube.com/cp/vjVQa1PpcFNbP_fag8PvopkXZyiXyT0J8U47lw7x5Fc=" type="application/x-shockwave-flash" width="416" height="337"></embed></object>
<object width="640" height="385"><param name="movie" value="http://www.youtube.com/v/uNxBeJNyAqA&hl=en_US&fs=1&"></param><param name="allowFullScreen" value="true"></param><param name="allowscriptaccess" value="always"></param><embed src="http://www.youtube.com/v/uNxBeJNyAqA&hl=en_US&fs=1&" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="640" height="385"></embed></object>
<object classid="clsid:d27cdb6e-ae6d-11cf-96b8-444553540000" codebase="http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0" height="385" width="480"><param name="width" value="480" /><param name="height" value="385" /><param name="src" value="http://www.youtube.com/p/E37ADDDFCA0FD050&amp;hl=en" /><embed height="385" src="http://www.youtube.com/p/E37ADDDFCA0FD050&amp;hl=en" type="application/x-shockwave-flash" width="480"></embed></object>
';
$regular_purifier = new HTMLPurifier();
$youtube_purifier = new HTMLPurifier(array(
'Filter.YouTube' => true,
$safeobject_purifier = new HTMLPurifier(array(
'HTML.SafeObject' => true,
'Output.FlashCompat' => true,
));
?>
@@ -35,9 +43,9 @@ if (isset($_GET['break'])) echo $string;
echo $regular_purifier->purify($string);
?></div>
<h2>With YouTube exception</h2>
<h2>With SafeObject exception and flash compatibility</h2>
<div><?php
echo $youtube_purifier->purify($string);
echo $safeobject_purifier->purify($string);
?></div>
</body>

View File

@@ -28,13 +28,13 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPositionTest extends HTMLPurifier_AttrD
// reordered due to internal impl details
$this->assertDef('top left', 'left top');
$this->assertDef('top center', 'center top');
$this->assertDef('top center', 'top');
$this->assertDef('top right', 'right top');
$this->assertDef('center left', 'left center');
$this->assertDef('center center', 'center'); // two centers collide
$this->assertDef('center right', 'right center');
$this->assertDef('center left', 'left');
$this->assertDef('center center', 'center');
$this->assertDef('center right', 'right');
$this->assertDef('bottom left', 'left bottom');
$this->assertDef('bottom center', 'center bottom');
$this->assertDef('bottom center', 'bottom');
$this->assertDef('bottom right', 'right bottom');
// more cases from the defined syntax

View File

@@ -8,12 +8,12 @@ class HTMLPurifier_AttrDef_CSS_BackgroundTest extends HTMLPurifier_AttrDefHarnes
$config = HTMLPurifier_Config::createDefault();
$this->def = new HTMLPurifier_AttrDef_CSS_Background($config);
$valid = '#333 url(chess.png) repeat fixed 50% top';
$valid = '#333 url("chess.png") repeat fixed 50% top';
$this->assertDef($valid);
$this->assertDef('url("chess.png") #333 50% top repeat fixed', $valid);
$this->assertDef('url(\'chess.png\') #333 50% top repeat fixed', $valid);
$this->assertDef(
'rgb(34, 56, 33) url(chess.png) repeat fixed top',
'rgb(34,56,33) url(chess.png) repeat fixed top'
'rgb(34,56,33) url("chess.png") repeat fixed top'
);
}

View File

@@ -8,29 +8,29 @@ class HTMLPurifier_AttrDef_CSS_FontFamilyTest extends HTMLPurifier_AttrDefHarnes
$this->def = new HTMLPurifier_AttrDef_CSS_FontFamily();
$this->assertDef('Gill, Helvetica, sans-serif');
$this->assertDef('\'Times New Roman\', serif');
$this->assertDef('"Times New Roman"', "'Times New Roman'");
$this->assertDef('"Times New Roman", serif');
$this->assertDef('\'Times New Roman\'', '"Times New Roman"');
$this->assertDef('01234');
$this->assertDef(',', false);
$this->assertDef('Times New Roman, serif', '\'Times New Roman\', serif');
$this->assertDef($d = "'John\\'s Font'");
$this->assertDef('Times New Roman, serif', '"Times New Roman", serif');
$this->assertDef($d = '"John\'s Font"');
$this->assertDef("John's Font", $d);
$this->assertDef($d = "'\xE5\xAE\x8B\xE4\xBD\x93'");
$this->assertDef($d = "\"\xE5\xAE\x8B\xE4\xBD\x93\"");
$this->assertDef("\xE5\xAE\x8B\xE4\xBD\x93", $d);
$this->assertDef("'\\','f'", "'\\\\', f");
$this->assertDef("'\\01'", "''");
$this->assertDef("'\\20'", "' '");
$this->assertDef("\\0020", "'\\\\0020'");
$this->assertDef("'\\','f'", "\"\\5C \", f");
$this->assertDef("'\\01'", "\"\"");
$this->assertDef("'\\20'", "\" \"");
$this->assertDef("\\0020", "\" \"");
$this->assertDef("'\\000045'", "E");
$this->assertDef("','", false);
$this->assertDef("',' foobar','", "' foobar'");
$this->assertDef("'\\27'", "'\''");
$this->assertDef('"\\22"', "'\"'");
$this->assertDef('"\\""', "'\"'");
$this->assertDef('"\'"', "'\\''");
$this->assertDef("',' foobar','", "\" foobar\"");
$this->assertDef("'\\27'", "\"'\"");
$this->assertDef('"\\22"', "\"\\22 \"");
$this->assertDef('"\\""', "\"\\22 \"");
$this->assertDef('"\'"', "\"'\"");
$this->assertDef("'\\000045a'", "Ea");
$this->assertDef("'\\00045 a'", "Ea");
$this->assertDef("'\\00045 a'", "'E a'");
$this->assertDef("'\\00045 a'", "\"E a\"");
$this->assertDef("'\\\nf'", "f");
}

View File

@@ -11,10 +11,10 @@ class HTMLPurifier_AttrDef_CSS_FontTest extends HTMLPurifier_AttrDefHarness
// hodgepodge of usage cases from W3C spec, but " -> '
$this->assertDef('12px/14px sans-serif');
$this->assertDef('80% sans-serif');
$this->assertDef('x-large/110% \'New Century Schoolbook\', serif');
$this->assertDef('x-large/110% "New Century Schoolbook", serif');
$this->assertDef('bold italic large Palatino, serif');
$this->assertDef('normal small-caps 120%/120% fantasy');
$this->assertDef('300 italic 1.3em/1.7em \'FB Armada\', sans-serif');
$this->assertDef('300 italic 1.3em/1.7em "FB Armada", sans-serif');
$this->assertDef('600 9px Charcoal');
$this->assertDef('600 9px/ 12px Charcoal', '600 9px/12px Charcoal');

View File

@@ -13,14 +13,14 @@ class HTMLPurifier_AttrDef_CSS_ListStyleTest extends HTMLPurifier_AttrDefHarness
$this->assertDef('circle outside');
$this->assertDef('inside');
$this->assertDef('none');
$this->assertDef('url(foo.gif)');
$this->assertDef('circle url(foo.gif) inside');
$this->assertDef('url("foo.gif")');
$this->assertDef('circle url("foo.gif") inside');
// invalid values
$this->assertDef('outside inside', 'outside');
// ordering
$this->assertDef('url(foo.gif) none', 'none url(foo.gif)');
$this->assertDef('url(foo.gif) none', 'none url("foo.gif")');
$this->assertDef('circle lower-alpha', 'circle');
// the spec is ambiguous about what happens in these
// cases, so we're going off the W3C CSS validator

View File

@@ -12,20 +12,16 @@ class HTMLPurifier_AttrDef_CSS_URITest extends HTMLPurifier_AttrDefHarness
// we could be nice but we won't be
$this->assertDef('http://www.example.com/', false);
// no quotes are used, since that's the most widely supported
// syntax
$this->assertDef('url(', false);
$this->assertDef('url()', true);
$result = "url(http://www.example.com/)";
$this->assertDef('url("")', true);
$result = 'url("http://www.example.com/")';
$this->assertDef('url(http://www.example.com/)', $result);
$this->assertDef('url("http://www.example.com/")', $result);
$this->assertDef("url('http://www.example.com/')", $result);
$this->assertDef(
' url( "http://www.example.com/" ) ', $result);
// escaping
$this->assertDef("url(http://www.example.com/foo,bar\))",
"url(http://www.example.com/foo\,bar\))");
'url("http://www.example.com/foo,bar)")');
}
}

View File

@@ -25,7 +25,7 @@ class HTMLPurifier_AttrDef_CSSTest extends HTMLPurifier_AttrDefHarness
$this->assertDef('text-transform:capitalize;');
$this->assertDef('background-color:rgb(0,0,255);');
$this->assertDef('background-color:transparent;');
$this->assertDef('background:#333 url(chess.png) repeat fixed 50% top;');
$this->assertDef('background:#333 url("chess.png") repeat fixed 50% top;');
$this->assertDef('color:#F00;');
$this->assertDef('border-top-color:#F00;');
$this->assertDef('border-color:#F00 #FF0;');
@@ -62,7 +62,7 @@ class HTMLPurifier_AttrDef_CSSTest extends HTMLPurifier_AttrDefHarness
$this->assertDef('width:-50px;', false);
$this->assertDef('text-decoration:underline;');
$this->assertDef('font-family:sans-serif;');
$this->assertDef('font-family:Gill, \'Times New Roman\', sans-serif;');
$this->assertDef('font-family:Gill, "Times New Roman", sans-serif;');
$this->assertDef('font:12px serif;');
$this->assertDef('border:1px solid #000;');
$this->assertDef('border-bottom:2em double #FF00FA;');
@@ -73,9 +73,9 @@ class HTMLPurifier_AttrDef_CSSTest extends HTMLPurifier_AttrDefHarness
$this->assertDef('vertical-align:12px;');
$this->assertDef('vertical-align:50%;');
$this->assertDef('table-layout:fixed;');
$this->assertDef('list-style-image:url(nice.jpg);');
$this->assertDef('list-style:disc url(nice.jpg) inside;');
$this->assertDef('background-image:url(foo.jpg);');
$this->assertDef('list-style-image:url("nice.jpg");');
$this->assertDef('list-style:disc url("nice.jpg") inside;');
$this->assertDef('background-image:url("foo.jpg");');
$this->assertDef('background-image:none;');
$this->assertDef('background-repeat:repeat-y;');
$this->assertDef('background-attachment:fixed;');
@@ -101,7 +101,7 @@ class HTMLPurifier_AttrDef_CSSTest extends HTMLPurifier_AttrDefHarness
// bad props
$this->assertDef('nodice:foobar;', false);
$this->assertDef('position:absolute;', false);
$this->assertDef('background-image:url(javascript:alert\(\));', false);
$this->assertDef('background-image:url(\'javascript:alert\(\)\');', false);
// airy input
$this->assertDef(' font-weight : bold; color : #ff0000',
@@ -144,6 +144,12 @@ class HTMLPurifier_AttrDef_CSSTest extends HTMLPurifier_AttrDefHarness
$this->assertDef('overflow:scroll;');
}
function testForbidden() {
$this->config->set('CSS.ForbiddenProperties', 'float');
$this->assertDef('float:left;', false);
$this->assertDef('text-align:right;');
}
}
// vim: et sw=4 sts=4

View File

@@ -81,7 +81,7 @@ class HTMLPurifier_Filter_ExtractStyleBlocksTest extends HTMLPurifier_Harness
function test_cleanCSS_angledBrackets() {
$this->assertCleanCSS(
".class {\nfont-family:'</style>';\n}",
".class {\nfont-family:'\\3C /style\\3E ';\n}"
".class {\nfont-family:\"\\3C /style\\3E \";\n}"
);
}
@@ -99,14 +99,14 @@ class HTMLPurifier_Filter_ExtractStyleBlocksTest extends HTMLPurifier_Harness
function test_cleanCSS_escapeCodes() {
$this->assertCleanCSS(
".class {\nfont-family:'\\3C /style\\3E ';\n}"
".class {\nfont-family:\"\\3C /style\\3E \";\n}"
);
}
function test_cleanCSS_noEscapeCodes() {
$this->config->set('Filter.ExtractStyleBlocks.Escaping', false);
$this->assertCleanCSS(
".class {\nfont-family:'</style>';\n}"
".class {\nfont-family:\"</style>\";\n}"
);
}

View File

@@ -122,17 +122,20 @@ a[href|title]
}
function test_AllowedAttributes_global_preferredSyntax() {
$this->config->set('HTML.AllowedElements', array('p', 'br'));
$this->config->set('HTML.AllowedAttributes', 'style');
$this->assertPurification_AllowedAttributes_global_style();
}
function test_AllowedAttributes_global_verboseSyntax() {
$this->config->set('HTML.AllowedElements', array('p', 'br'));
$this->config->set('HTML.AllowedAttributes', '*@style');
$this->assertPurification_AllowedAttributes_global_style();
}
function test_AllowedAttributes_global_discouragedSyntax() {
// Emit errors eventually
$this->config->set('HTML.AllowedElements', array('p', 'br'));
$this->config->set('HTML.AllowedAttributes', '*.style');
$this->assertPurification_AllowedAttributes_global_style();
}
@@ -144,16 +147,19 @@ a[href|title]
}
function test_AllowedAttributes_local_preferredSyntax() {
$this->config->set('HTML.AllowedElements', array('p', 'br'));
$this->config->set('HTML.AllowedAttributes', 'p@style');
$this->assertPurification_AllowedAttributes_local_p_style();
}
function test_AllowedAttributes_local_discouragedSyntax() {
$this->config->set('HTML.AllowedElements', array('p', 'br'));
$this->config->set('HTML.AllowedAttributes', 'p.style');
$this->assertPurification_AllowedAttributes_local_p_style();
}
function test_AllowedAttributes_multiple() {
$this->config->set('HTML.AllowedElements', array('p', 'br'));
$this->config->set('HTML.AllowedAttributes', 'p@style,br@class,title');
$this->assertPurification(
'<p style="font-weight:bold;" class="foo" title="foo">Jelly</p><br style="clear:both;" class="foo" title="foo" />',
@@ -162,29 +168,34 @@ a[href|title]
}
function test_AllowedAttributes_local_invalidAttribute() {
$this->config->set('HTML.AllowedElements', array('p', 'br'));
$this->config->set('HTML.AllowedAttributes', array('p@style', 'p@<foo>'));
$this->expectError(new PatternExpectation("/Attribute '&lt;foo&gt;' in element 'p' not supported/"));
$this->assertPurification_AllowedAttributes_local_p_style();
}
function test_AllowedAttributes_global_invalidAttribute() {
$this->config->set('HTML.AllowedElements', array('p', 'br'));
$this->config->set('HTML.AllowedAttributes', array('style', '<foo>'));
$this->expectError(new PatternExpectation("/Global attribute '&lt;foo&gt;' is not supported in any elements/"));
$this->assertPurification_AllowedAttributes_global_style();
}
function test_AllowedAttributes_local_invalidAttributeDueToMissingElement() {
$this->config->set('HTML.AllowedElements', array('p', 'br'));
$this->config->set('HTML.AllowedAttributes', 'p.style,foo.style');
$this->expectError(new PatternExpectation("/Cannot allow attribute 'style' if element 'foo' is not allowed\/supported/"));
$this->assertPurification_AllowedAttributes_local_p_style();
}
function test_AllowedAttributes_duplicate() {
$this->config->set('HTML.AllowedElements', array('p', 'br'));
$this->config->set('HTML.AllowedAttributes', 'p.style,p@style');
$this->assertPurification_AllowedAttributes_local_p_style();
}
function test_AllowedAttributes_multipleErrors() {
$this->config->set('HTML.AllowedElements', array('p', 'br'));
$this->config->set('HTML.AllowedAttributes', 'p.style,foo.style,<foo>');
$this->expectError(new PatternExpectation("/Cannot allow attribute 'style' if element 'foo' is not allowed\/supported/"));
$this->expectError(new PatternExpectation("/Global attribute '&lt;foo&gt;' is not supported in any elements/"));
@@ -347,6 +358,12 @@ a[href|title]
);
}
function test_notAllowedRequiredAttributeError() {
$this->expectError("Required attribute 'src' in element 'img' was not allowed, which means 'img' will not be allowed either");
$this->config->set('HTML.Allowed', 'img[alt]');
$this->config->getHTMLDefinition();
}
}
// vim: et sw=4 sts=4

View File

@@ -6,8 +6,7 @@ class HTMLPurifier_HTMLModule_SafeObjectTest extends HTMLPurifier_HTMLModuleHarn
function setUp() {
parent::setUp();
$this->config->set('HTML.DefinitionID', 'HTMLPurifier_HTMLModule_SafeObjectTest');
$def = $this->config->getHTMLDefinition(true);
$def->manager->addModule('SafeObject');
$this->config->set('HTML.SafeObject', true);
}
function testMinimal() {
@@ -34,7 +33,14 @@ class HTMLPurifier_HTMLModule_SafeObjectTest extends HTMLPurifier_HTMLModuleHarn
function testFull() {
$this->assertResult(
'<b><object width="425" height="344" type="application/x-shockwave-flash" data="Foobar"><param name="allowScriptAccess" value="never" /><param name="allowNetworking" value="internal" /><param name="movie" value="http://www.youtube.com/v/RVtEQxH7PWA&amp;hl=en" /><param name="wmode" value="window" /></object></b>'
'<b><object width="425" height="344" type="application/x-shockwave-flash" data="Foobar"><param name="allowScriptAccess" value="never" /><param name="allowNetworking" value="internal" /><param name="flashvars" value="foobarbaz=bally" /><param name="movie" value="http://www.youtube.com/v/RVtEQxH7PWA&amp;hl=en" /><param name="wmode" value="window" /></object></b>'
);
}
function testFullScreen() {
$this->config->set('HTML.FlashAllowFullScreen', true);
$this->assertResult(
'<b><object width="425" height="344" type="application/x-shockwave-flash" data="Foobar"><param name="allowScriptAccess" value="never" /><param name="allowNetworking" value="internal" /><param name="flashvars" value="foobarbaz=bally" /><param name="movie" value="http://www.youtube.com/v/RVtEQxH7PWA&amp;hl=en" /><param name="wmode" value="window" /><param name="allowFullScreen" value="true" /></object></b>'
);
}

View File

@@ -0,0 +1,6 @@
--INI--
HTML.SafeObject = true
Output.FlashCompat = true
--HTML--
<object width="425" height="350" data="http://www.youtube.com/v/BdU--T8rLns" type="application/x-shockwave-flash"><param name="allowScriptAccess" value="never" /><param name="allowNetworking" value="internal" /><param name="movie" value="http://www.youtube.com/v/BdU--T8rLns" /><param name="wmode" value="window" /><!--[if IE]><embed width="425" height="350" src="http://www.youtube.com/v/BdU--T8rLns" allowScriptAccess="never" allowNetworking="internal" wmode="window" /><![endif]--></object>
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,5 @@
--INI--
URI.AllowedSchemes = file
--HTML--
<a href="file:///foo">foo</a>
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,5 @@
--HTML--
<i><ul></ul></i>
--EXPECT--
<i></i><i></i>
--# vim: et sw=4 sts=4

View File

@@ -7,5 +7,5 @@ URI.MungeResources = true
<img src="http://example.com" style="background-image:url(http://example.com);" alt="example.com" />
--EXPECT--
<a href="/redirect?s=http%3A%2F%2Fexample.com&amp;t=c15354f3953dfec262c55b1403067e0d045a3059&amp;r=&amp;n=a&amp;m=href&amp;p=">Link</a>
<img src="/redirect?s=http%3A%2F%2Fexample.com&amp;t=c15354f3953dfec262c55b1403067e0d045a3059&amp;r=1&amp;n=img&amp;m=src&amp;p=" style="background-image:url(/redirect?s=http%3A%2F%2Fexample.com&amp;t=c15354f3953dfec262c55b1403067e0d045a3059&amp;r=1&amp;n=img&amp;m=style&amp;p=background-image);" alt="example.com" />
<img src="/redirect?s=http%3A%2F%2Fexample.com&amp;t=c15354f3953dfec262c55b1403067e0d045a3059&amp;r=1&amp;n=img&amp;m=src&amp;p=" style="background-image:url(&quot;/redirect?s=http%3A%2F%2Fexample.com&amp;t=c15354f3953dfec262c55b1403067e0d045a3059&amp;r=1&amp;n=img&amp;m=style&amp;p=background-image&quot;);" alt="example.com" />
--# vim: et sw=4 sts=4

View File

@@ -4,5 +4,5 @@ if (!function_exists('iconv')) return true;
Core.Encoding = "Shift_JIS"
Core.EscapeNonASCIICharacters = true
--HTML--
<b style="font-family:'&#165;';">111</b>
<b style="font-family:&quot;&#165;&quot;;">111</b>
--# vim: et sw=4 sts=4

View File

@@ -3,7 +3,7 @@ if (!function_exists('iconv')) return true;
--INI--
Core.Encoding = Shift_JIS
--HTML--
<b style="font-family:'&#165;';">111</b>
<b style="font-family:&quot;&#165;&quot;;">111</b>
--EXPECT--
<b style="font-family:'';">111</b>
<b style="font-family:&quot;&quot;;">111</b>
--# vim: et sw=4 sts=4

View File

@@ -1,5 +1,5 @@
--HTML--
<table background="logo.png"><tr><td>asdf</td></tr></table>
--EXPECT--
<table style="background-image:url(logo.png);"><tr><td>asdf</td></tr></table>
<table style="background-image:url(&quot;logo.png&quot;);"><tr><td>asdf</td></tr></table>
--# vim: et sw=4 sts=4

View File

@@ -497,6 +497,13 @@ Bar</div>",
);
}
function testAutoClose() {
$this->assertResult(
'<p></p>
<hr />'
);
}
function testErrorNeeded() {
$this->config->set('HTML.Allowed', 'b');
$this->expectError('Cannot enable AutoParagraph injector because p is not allowed');

View File

@@ -0,0 +1,99 @@
<?php
class HTMLPurifier_Injector_RemoveSpansWithoutAttributesTest extends HTMLPurifier_InjectorHarness
{
function setup() {
parent::setup();
$this->config->set('HTML.Allowed', 'span[class],div,p,strong,em');
$this->config->set('AutoFormat.RemoveSpansWithoutAttributes', true);
}
function testSingleSpan() {
$this->assertResult(
'<span>foo</span>',
'foo'
);
}
function testSingleSpanWithAttributes() {
$this->assertResult(
'<span class="bar">foo</span>',
'<span class="bar">foo</span>'
);
}
function testSingleNestedSpan() {
$this->assertResult(
'<p><span>foo</span></p>',
'<p>foo</p>'
);
}
function testSingleNestedSpanWithAttributes() {
$this->assertResult(
'<p><span class="bar">foo</span></p>',
'<p><span class="bar">foo</span></p>'
);
}
function testSpanWithChildren() {
$this->assertResult(
'<span>foo <strong>bar</strong> <em>baz</em></span>',
'foo <strong>bar</strong> <em>baz</em>'
);
}
function testSpanWithSiblings() {
$this->assertResult(
'<p>before <span>inside</span> <strong>after</strong></p>',
'<p>before inside <strong>after</strong></p>'
);
}
function testNestedSpanWithSiblingsAndChildren() {
$this->assertResult(
'<p>a <span>b <em>c</em> d</span> e</p>',
'<p>a b <em>c</em> d e</p>'
);
}
function testNestedSpansWithoutAttributes() {
$this->assertResult(
'<span>one<span>two<span>three</span></span></span>',
'onetwothree'
);
}
function testDeeplyNestedSpan() {
$this->assertResult(
'<div><div><div><span class="a">a <span>b</span> c</span></div></div></div>',
'<div><div><div><span class="a">a b c</span></div></div></div>'
);
}
function testSpanWithInvalidAttributes() {
$this->assertResult(
'<p><span snorkel buzzer="emu">foo</span></p>',
'<p>foo</p>'
);
}
function testNestedAlternateSpans() {
$this->assertResult(
'<span>a <span class="x">b <span>c <span class="y">d <span>e <span class="z">f
</span></span></span></span></span></span>',
'a <span class="x">b c <span class="y">d e <span class="z">f
</span></span></span>'
);
}
function testSpanWithSomeInvalidAttributes() {
$this->assertResult(
'<p><span buzzer="emu" class="bar">foo</span></p>',
'<p><span class="bar">foo</span></p>'
);
}
}
// vim: et sw=4 sts=4

View File

@@ -13,6 +13,11 @@ class HTMLPurifier_Lexer_DirectLex_ErrorsTest extends HTMLPurifier_ErrorsHarness
$lexer->parseAttributeString($input, $this->config, $this->context);
}
function testExtractBody() {
$this->expectErrorCollection(E_WARNING, 'Lexer: Extracted body');
$this->invoke('<body>foo</body>');
}
function testUnclosedComment() {
$this->expectErrorCollection(E_WARNING, 'Lexer: Unclosed comment');
$this->expectContext('CurrentLine', 1);

View File

@@ -7,12 +7,10 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
public function __construct() {
parent::__construct();
// E_STRICT = 2048, int used for PHP4 compat: this check disables
// PEAR if PHP 5 strict mode is on, since the class is not strict safe
if (
$GLOBALS['HTMLPurifierTest']['PEAR'] &&
((error_reporting() & 2048) != 2048) // ought to be a better way
) {
if ($GLOBALS['HTMLPurifierTest']['PEAR'] &&
// PEARSax3 is not maintained and throws loads of DEPRECATED
// errors in PHP 5.3
version_compare(PHP_VERSION, '5.3', '<')) {
require_once 'HTMLPurifier/Lexer/PEARSax3.php';
$this->_has_pear = true;
}
@@ -177,7 +175,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
}
if ($t_expect != $result) {
printTokens($result);
//var_dump($result);
}
}
}
@@ -265,6 +262,15 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
);
}
function test_tokenizeHTML_singleAttribute() {
$this->assertTokenization(
'<br style="&amp;" />',
array(
new HTMLPurifier_Token_Empty('br', array('style' => '&'))
)
);
}
function test_tokenizeHTML_emptyTag() {
$this->assertTokenization(
'<br />',
@@ -275,20 +281,14 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
function test_tokenizeHTML_comment() {
$this->assertTokenization(
'<!-- Comment -->',
array( new HTMLPurifier_Token_Comment(' Comment ') ),
array(
'PEARSax3' => array( new HTMLPurifier_Token_Comment('-- Comment --') ),
)
array( new HTMLPurifier_Token_Comment(' Comment ') )
);
}
function test_tokenizeHTML_malformedComment() {
$this->assertTokenization(
'<!-- not so well formed --->',
array( new HTMLPurifier_Token_Comment(' not so well formed -') ),
array(
'PEARSax3' => array( new HTMLPurifier_Token_Comment('-- not so well formed ---') ),
)
array( new HTMLPurifier_Token_Comment(' not so well formed -') )
);
}
@@ -579,6 +579,13 @@ div {}
}
function test_tokenizeHTML_tagWithAtSignAndExtraGt() {
$alt_expect = array(
// Technically this is invalid, but it won't be a
// problem with invalid element removal; also, this
// mimics Mozilla's parsing of the tag.
new HTMLPurifier_Token_Start('a@'),
new HTMLPurifier_Token_Text('>'),
);
$this->assertTokenization(
'<a@>>',
array(
@@ -587,13 +594,8 @@ div {}
new HTMLPurifier_Token_End('a'),
),
array(
'DirectLex' => array(
// Technically this is invalid, but it won't be a
// problem with invalid element removal; also, this
// mimics Mozilla's parsing of the tag.
new HTMLPurifier_Token_Start('a@'),
new HTMLPurifier_Token_Text('>'),
),
'DirectLex' => $alt_expect,
'PEARSax3' => $alt_expect,
)
);
}
@@ -613,6 +615,11 @@ div {}
new HTMLPurifier_Token_Text('<3'),
new HTMLPurifier_Token_Empty('br'),
),
'PEARSax3' => array(
// bah too lazy to fix this
new HTMLPurifier_Token_Empty('br'),
new HTMLPurifier_Token_Empty('3<br'),
),
)
);
}
@@ -632,6 +639,12 @@ div {}
new HTMLPurifier_Token_Text('<<'),
new HTMLPurifier_Token_End('b'),
),
'PEARSax3' => array(
// also too lazy to fix
new HTMLPurifier_Token_Start('b'),
new HTMLPurifier_Token_Empty('<<'),
new HTMLPurifier_Token_Text('b>'),
),
)
);
}
@@ -653,30 +666,83 @@ div {}
new HTMLPurifier_Token_Text('test'),
new HTMLPurifier_Token_End('b'),
),
'PEARSax3' => array(
// totally doing the wrong thing here
new HTMLPurifier_Token_Text(' '),
new HTMLPurifier_Token_Start('b'),
new HTMLPurifier_Token_Text('test'),
new HTMLPurifier_Token_End('b'),
),
)
);
}
function test_tokenizeHTML_bodyInCDATA() {
$alt_tokens = array(
new HTMLPurifier_Token_Text('<'),
new HTMLPurifier_Token_Text('body'),
new HTMLPurifier_Token_Text('>'),
new HTMLPurifier_Token_Text('Foo'),
new HTMLPurifier_Token_Text('<'),
new HTMLPurifier_Token_Text('/body'),
new HTMLPurifier_Token_Text('>'),
);
$this->assertTokenization(
'<![CDATA[<body>Foo</body>]]>',
array(
new HTMLPurifier_Token_Text('<body>Foo</body>'),
),
array(
'PH5P' => array(
new HTMLPurifier_Token_Text('<'),
new HTMLPurifier_Token_Text('body'),
new HTMLPurifier_Token_Text('>'),
new HTMLPurifier_Token_Text('Foo'),
new HTMLPurifier_Token_Text('<'),
new HTMLPurifier_Token_Text('/body'),
new HTMLPurifier_Token_Text('>'),
),
'PH5P' => $alt_tokens,
'PEARSax3' => $alt_tokens,
)
);
}
function test_tokenizeHTML_() {
$this->assertTokenization(
'<a><img /></a>',
array(
new HTMLPurifier_Token_Start('a'),
new HTMLPurifier_Token_Empty('img'),
new HTMLPurifier_Token_End('a'),
)
);
}
function test_tokenizeHTML_ignoreIECondComment() {
$this->assertTokenization(
'<!--[if IE]>foo<a>bar<!-- baz --><![endif]-->',
array()
);
}
function test_tokenizeHTML_removeProcessingInstruction() {
$this->config->set('Core.RemoveProcessingInstructions', true);
$this->assertTokenization(
'<?xml blah blah ?>',
array()
);
}
function test_tokenizeHTML_removeNewline() {
$this->config->set('Core.NormalizeNewlines', true);
$input = "plain\rtext\r\n";
$expect = array(
new HTMLPurifier_Token_Text("plain\ntext\n")
);
}
function test_tokenizeHTML_noRemoveNewline() {
$this->config->set('Core.NormalizeNewlines', false);
$input = "plain\rtext\r\n";
$expect = array(
new HTMLPurifier_Token_Text("plain\rtext\r\n")
);
$this->assertTokenization($input, $expect);
}
/*
function test_tokenizeHTML_() {

View File

@@ -116,6 +116,27 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn
);
}
function testNestedOl() {
$this->assertResult(
'<ol><ol><li>foo</li></ol></ol>',
'<ol><li><ol><li>foo</li></ol></li></ol>'
);
}
function testNestedUl() {
$this->assertResult(
'<ul><ul></ul></ul>',
'<ul><li><ul></ul></li></ul>'
);
}
function testNestedOlWithStrangeEnding() {
$this->assertResult(
'<ol><li><ol><ol><li>foo</li></ol></li><li>foo</li></ol>',
'<ol><li><ol><li><ol><li>foo</li></ol></li><li>foo</li></ol></li></ol>'
);
}
}
// vim: et sw=4 sts=4

View File

@@ -123,8 +123,6 @@ asdf<b></b></p>
<p>asdf</p>
"
);
}

View File

@@ -0,0 +1,24 @@
<?php
class HTMLPurifier_URIFilter_DisableResourcesTest extends HTMLPurifier_URIFilterHarness
{
function setUp() {
parent::setUp();
$this->filter = new HTMLPurifier_URIFilter_DisableResources();
$var = true;
$this->context->register('EmbeddedURI', $var);
}
function testRemoveResource() {
$this->assertFiltering('/foo/bar', false);
}
function testPreserveRegular() {
$this->context->destroy('EmbeddedURI'); // undo setUp
$this->assertFiltering('/foo/bar');
}
}
// vim: et sw=4 sts=4

View File

@@ -6,8 +6,21 @@
class HTMLPurifier_URISchemeTest extends HTMLPurifier_URIHarness
{
private $pngBase64;
public function __construct() {
$this->pngBase64 =
'iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAABGdBTUEAALGP'.
'C/xhBQAAAAlwSFlzAAALEwAACxMBAJqcGAAAAAd0SU1FB9YGARc5KB0XV+IA'.
'AAAddEVYdENvbW1lbnQAQ3JlYXRlZCB3aXRoIFRoZSBHSU1Q72QlbgAAAF1J'.
'REFUGNO9zL0NglAAxPEfdLTs4BZM4DIO4C7OwQg2JoQ9LE1exdlYvBBeZ7jq'.
'ch9//q1uH4TLzw4d6+ErXMMcXuHWxId3KOETnnXXV6MJpcq2MLaI97CER3N0'.
'vr4MkhoXe0rZigAAAABJRU5ErkJggg==';
}
protected function assertValidation($uri, $expect_uri = true) {
$this->prepareURI($uri, $expect_uri);
$this->config->set('URI.AllowedSchemes', array($uri->scheme));
// convenience hack: the scheme should be explicitly specified
$scheme = $uri->getSchemeObj($this->config, $this->context);
$result = $scheme->validate($uri, $this->config, $this->context);
@@ -132,6 +145,33 @@ class HTMLPurifier_URISchemeTest extends HTMLPurifier_URIHarness
);
}
function test_data_png() {
$this->assertValidation(
'data:image/png;base64,'.$this->pngBase64
);
}
function test_data_malformed() {
$this->assertValidation(
'data:image/png;base64,vr4MkhoXJRU5ErkJggg==',
false
);
}
function test_data_implicit() {
$this->assertValidation(
'data:base64,'.$this->pngBase64,
'data:image/png;base64,'.$this->pngBase64
);
}
function test_file_basic() {
$this->assertValidation(
'file://user@MYCOMPUTER:12/foo/bar?baz#frag',
'file://MYCOMPUTER/foo/bar#frag'
);
}
}
// vim: et sw=4 sts=4

View File

@@ -48,7 +48,9 @@ require_once $simpletest_location . 'remote.php';
// load CSS Tidy
if ($csstidy_location !== false) {
$old = error_reporting(E_ALL);
require $csstidy_location . 'class.csstidy.php';
error_reporting($old);
}
// load PEAR to include path

10
tests/index.php Executable file → Normal file
View File

@@ -23,6 +23,16 @@
* $test_files) do not have underscores in their names.
*/
// HTML Purifier runs error free on E_STRICT, so if code reports
// errors, we want to know about it.
error_reporting(E_ALL | E_STRICT);
// Because we always want to know about errors, and because SimpleTest
// will notify us about them, logging the errors to stderr is
// counterproductive and in fact the wrong thing when a test case
// exercises an error condition to detect for it.
ini_set('log_errors', false);
define('HTMLPurifierTest', 1);
define('HTMLPURIFIER_SCHEMA_STRICT', true); // validate schemas
chdir(dirname(__FILE__));