1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-03 12:47:56 +02:00

Compare commits

...

102 Commits

Author SHA1 Message Date
Edward Z. Yang
c768146e4d Gusev's proposed patch
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-10-12 21:24:38 -07:00
Edward Z. Yang
6e37ecd1c8 Make URI parsing algorithm more strict.
Thanks Michael Gusev <mgusev@sugarcrm.com> for contributing this patch.

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-04-16 13:56:43 -07:00
Edward Z. Yang
20eff0a3a0 Fix NEWS entry.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-02-21 14:08:36 -08:00
Edward Z. Yang
d516e2f8de Release 4.5.0
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-02-17 16:04:08 -08:00
Edward Z. Yang
631021733b Add %Core.DisableExcludes directive
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-02-17 15:47:38 -08:00
Michael Tibben
344e0640b6 Add required constant for composer autoloading
Signed-off-by: Michael Tibben <michael.tibben@99designs.com>
2012-12-21 16:16:16 +08:00
Edward Z. Yang
62d2550e16 Use SHA-1 instead of MD5.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-10-27 02:33:22 -07:00
Edward Z. Yang
087145a71b Blacklist more tags from RemoveEmpty.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-10-27 02:32:48 -07:00
Edward Z. Yang
a44187a5c1 Cleanup after data validation.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-10-27 02:30:58 -07:00
Edward Z. Yang
c0ad68108a Do checks against iconvAvailable because PHP 5.4 has botched iconv support.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-10-27 02:27:57 -07:00
Edward Z. Yang
83a574491e Comment for bug that needs to get fixed.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-10-11 11:40:02 -07:00
Edward Z. Yang
3b537365a4 CSS properties page-break-*
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-10-11 11:39:52 -07:00
Rob Loach
8a8b123d33 Autoloading support for Composer 2012-09-16 18:11:46 +02:00
Edward Z. Yang
72db575446 Fix bug with non-lower case color names in HTML.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-07-30 10:54:32 -04:00
Edward Z. Yang
d8bb73ce46 Permit underscores in font-families.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-07-27 18:28:29 -04:00
Edward Z. Yang
f90372f8ab More support for white-space.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-06-16 17:10:36 -04:00
Edward Z. Yang
f38fca32a9 Don't lower-case components of background.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-06-02 11:22:58 -04:00
Edward Z. Yang
5a23004652 Support for inline-block.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-05-25 23:55:48 -04:00
Edward Z. Yang
6705140082 Fix in AttrTransform_Nofollow
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-05-14 23:07:27 -04:00
Edward Z. Yang
cb7162a995 Use prepend for autoloading on PHP 5.3+
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-05-02 11:07:24 -04:00
Edward Z. Yang
2189a9430f Support for safe external scripts via explicit whitelist.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-04-27 17:44:49 -04:00
Edward Z. Yang
7291f19347 Fix problem where stacked AttrTransforms clobber each other.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-03-16 23:12:16 -04:00
Benjamin Steininger
9fcffd6533 Add composer.json file for easy install via composer.
Composer: http://getcomposer.org/

Since HTML Purifier is not completely psr-0 compatible (a classmap is
not enough for autoloading), the package-description does not contain
anything autoload-related. The user has to include the autoloader
himself.

This lets us create an entry on packagist which allows installing HTML
Purifier without the need to declare a repository in projects; it also
makes it easy to create libraries which want to use HTML Purifier using
composer.

Signed-off-by: Benjamin Steininger <robo47@robo47.net>
2012-03-16 01:05:02 -04:00
Edward Z. Yang
31dce298ea Actually make URI.DisableResources do something.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-03-02 13:25:00 -05:00
Edward Z. Yang
8c9d461a62 Bugfix: _blank not blank.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-02-18 11:28:01 -05:00
Edward Z. Yang
7291a9647e Update NEWS.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-25 07:06:30 -05:00
Edward Z. Yang
17af0e4fc1 Release 4.4.0
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-18 19:22:31 -05:00
Edward Z. Yang
70028f83d6 Make all of the tests work on all PHP versions.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-18 18:57:13 -05:00
Edward Z. Yang
5c5e3fe79f Avoid doing stupidly clever reflection tricks that make old PHP versions sad.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-18 18:21:36 -05:00
Edward Z. Yang
56a26cab14 Modernize some of the testing facilities.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-18 18:10:16 -05:00
Edward Z. Yang
1c7fedff5a Tighter CSS selector validation.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-17 15:36:26 -05:00
Edward Z. Yang
9de0785448 Remark about bypassing host list with punycode.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-06 05:32:53 -08:00
Edward Z. Yang
974fe3f25e Optional support for IDNAs with PEAR Net_IDNA2
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-06 05:28:00 -08:00
Edward Z. Yang
94468f3c24 Remove PEARSax3 lexer.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-03 20:40:17 +08:00
Edward Z. Yang
e0354fecd9 Make forms work for transitional doctypes.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-30 22:56:44 +08:00
Edward Z. Yang
1bbbc624dd Remove inscrutable TODO, optionalize another.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-27 23:50:02 +08:00
Edward Z. Yang
49879d2cc6 Add note about superseding modules in TODO.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-27 23:21:32 +08:00
Edward Z. Yang
5c9b5130c8 Bump minor version number to 4.4.0.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-26 21:55:14 +08:00
Edward Z. Yang
d2de8d976a Add test for invalid SafeIframe usage.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-26 21:52:55 +08:00
Bradley M. Froehle
4164b2eb2b Implement Iframe module, and provide %HTML.SafeIframe and %URI.SafeIframeRegexp for untrusted usage.
The purpose of this addition is twofold. In trusted mode, iframes are
now unconditionally allowed.

However, many online video providers (YouTube, Vimeo) and other web
applications (Google Maps, Google Calendar, etc) provide embed code in
iframe format, which is useful functionality in untrusted mode.
You can specify iframes as trusted elements with %HTML.SafeIframe;
however, you need to additionally specify a whitelist mechanism such as
%URI.SafeIframeRegexp to say what iframe embeds are OK (by default
everything is rejected).

Note: As iframes are invalid in strict doctypes, you will not be able to
use them there.

We also added an always_load parameter to URIFilters in order to support
the strange nature of the SafeIframe URIFilter (it always needs to be
loaded, due to the inability of accessing the %HTML.SafeIframe directive
to see if it's needed!)  We expect this URIFilter can expand in the future
to offer more complex validation mechanisms.

Signed-off-by: Bradley M. Froehle <brad.froehle@gmail.com>
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-26 21:50:53 +08:00
Edward Z. Yang
1e5293d9fe Add more attributions.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-26 15:45:41 +08:00
Edward Z. Yang
6b643ede02 Implement %HTML.AllowedComments and %HTML.AllowedCommentsRegexp
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-26 15:34:42 +08:00
Edward Z. Yang
e41af46a8b Fix broken table content model, easily seen in XHTML1.1
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-26 14:49:26 +08:00
Edward Z. Yang
3570c9985a Properly handle nested sublists by folding into previous list item.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-26 14:00:34 +08:00
Edward Z. Yang
8d572993b4 Implement %HTML.TargetBlank
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-26 08:36:00 +08:00
Edward Z. Yang
1bacbc0563 Add isBenign and getDefaultScheme methods.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-25 23:31:15 +08:00
Edward Z. Yang
bfe2c10d07 Add a little bit of documentation about contexts for URIFilters.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-25 23:31:15 +08:00
Edward Z. Yang
9b10515fa4 Core.EscapeNonASCIICharacters now always works, even if target is UTF-8.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-25 23:31:15 +08:00
Edward Z. Yang
1255d0f15d Add support for scope attribute on td and th.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-25 23:31:13 +08:00
Edward Z. Yang
d45e11cc6b Add one more test for SPL autoload defaults.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-25 02:58:51 -05:00
Edward Z. Yang
94c15d1f56 Fix iconv truncation bug.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-25 02:31:06 -05:00
Edward Z. Yang
ce68cfe484 Remove spurious abstract definition; PHP 5.4 doesn't like that.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-18 13:28:07 -05:00
Edward Z. Yang
9f5f85952b Don't unset parser variable; plays poorly with serialize.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-18 13:27:51 -05:00
Edward Z. Yang
dbb365155b Typofix.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-08-24 09:56:51 -04:00
Edward Z. Yang
32c0ffde0c Don't add nofollow for matching hosts, generalize this code.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-08-24 09:56:49 -04:00
Edward Z. Yang
856a5e5b89 Update INSTALL to avoid missing config snafu, update usage.xml.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-08-24 09:56:21 -04:00
Edward Z. Yang
820d6e9097 Do not duplicate nofollow attribute in transform.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-08-24 09:56:13 -04:00
Edward Z. Yang
35b1fbce01 Explicitly initialize anonModule to null.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-04-19 22:46:17 +01:00
Edward Z. Yang
bcfbb8338c URI.Munge munges https to http URIs.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-04-10 13:09:24 +01:00
Edward Z. Yang
f51a6f7de9 Color keywords now case-insensitive.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-04-10 12:45:02 +01:00
Edward Z. Yang
f1439f0af5 Release 4.3.0
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-03-27 23:02:49 +01:00
Edward Z. Yang
0124605918 Fix CSS URL innerHTML/cssText escaping bug.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-03-27 21:24:32 +01:00
Edward Z. Yang
afb007d22f Protect against font family innerHTML/cssText attacks.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-03-27 20:35:43 +01:00
Edward Z. Yang
0dd9e4faf4 Fix Internet Explorer innerHTML bug.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-03-27 11:50:52 +01:00
Edward Z. Yang
94ed3b1231 Implement CSS.AllowedFonts.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-03-24 22:54:39 +00:00
Edward Z. Yang
6a6c0ed5d7 Don't autoclose if no parents support the tag.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-03-22 00:26:41 +00:00
Edward Z. Yang
e05b555448 Safety update for nested ul test.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-03-21 21:05:23 +00:00
Edward Z. Yang
ee9c70ab7f Fix E_NOTICE from indexing into empty string.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-03-17 17:33:11 +00:00
Edward Z. Yang
b4469f17aa Fix missing numeric entities (shows up when DirectLexing).
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-02-27 11:58:37 +00:00
Edward Z. Yang
e76f4b45d0 Dramatically rewrite null host URI handling.
Basically, browsers don't parse what should be valid URIs correctly, so
we have to go through some backbends to accomodate them.  Specifically,
for browseable URIs, the following URIs have unintended behavior:

    - ///example.com
    - http:/example.com
    - http:///example.com

Furthermore, if the path begins with //, modifying these URLs must
be done with care, as if you remove the host-name component, the
parse tree changes.

I've modified the engine to follow correct URI semantics as much
as possible while outputting browser compatible code, and invalidate
the URI in cases where we can't deal.  There has been a refactoring
of URIScheme so that this important check is always performed,
introducing a new member variable allow_empty_host which is true
on data, file, mailto and news schemes.

This also fixes bypass bugs on URI.Munge.

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-01-25 18:56:46 +00:00
Edward Z. Yang
a32d5b52e1 Fix embedding flash on non-IE browsers and allow more wmode.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-01-22 12:28:57 +00:00
Maxim Krizhanovsky
a3d71fe606 Iterative traversal of DOM.
There are some deep DOMs you can hit the maximum nesting level
limit in tokenizeDOM (we've experienced this even with maximum nesting
level of 300). Here is an iterative version of the same function with
simple queue/dequeue approach.

Signed-off-by: Maxim Krizhanovsky <darhazer@gmail.com>
2011-01-19 22:06:40 +00:00
Edward Z. Yang
77982bd61d Bump version number for Cache.SerializerPermissions.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-01-14 00:40:39 +00:00
Petr Skoda
78c4e62245 Add new Cache.SerializerPermissions option. 2011-01-13 22:57:40 +00:00
Edward Z. Yang
5803c06765 Check that argv is set before operating on it.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-01-13 22:42:47 +00:00
Edward Z. Yang
b63569ac22 Fix bad interaction between bootstrap autoloader and Zend Debugger/APC.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-12-31 09:48:28 +00:00
Edward Z. Yang
f3d050c517 Fix two bugs with caching of customized raw definitions.
The first bug is that we will repeatedly write out the result
of a customized raw definition to the filesystem, even when a cache
entry already exists.

The second bug is that caching these definitions doesn't actually
work (the cache entry is written but never used.)  A new API
for retrieving raw definitions permits the user to take advantage
of caching.

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-12-30 23:51:53 +00:00
Edward Z. Yang
6dcc37cb55 Update PHPT instructions.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-11-21 14:00:20 +00:00
Edward Z. Yang
cfc4ee1faf Add initial implementation of CSS.Trusted.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-11-12 18:45:03 +00:00
Edward Z. Yang
598c5b60c9 Add sanity check against ze1_compatibility_mode.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-11-12 16:15:03 +00:00
Edward Z. Yang
c9e7ffc172 Fix incorrect PEARSax3 test assertion.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-11-12 16:06:34 +00:00
Edward Z. Yang
feeffe6ed2 Check if schema.ser was corrupted.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-10-29 14:47:40 +01:00
Edward Z. Yang
4754d407aa Fix removal of id with DirectLex by preserving armor.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-10-28 17:25:31 +01:00
Nick Pope
0b9db1f54b Allow non-static autoload methods w/ PHP >= 5.2.11
HTML Purifier loads itself as the first autoload function by
unregistering all existing functions and re-registering them after
registering itself.

Originally an exception was thrown when a non-static object method was
encountered as the behaviour of spl_autoload_functions() did not return
the object instance, but only the class name.  This was filed on PHP
bugs (#44144).

The bug was fixed for PHP >= 5.2.11 and >= 5.3

Signed-off-by: Nick Pope <nick@nickpope.me.uk>
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-10-28 17:25:17 +01:00
Edward Z. Yang
1d4a38d055 Escape CDATA before handling conditional comments.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-28 12:11:26 -04:00
Edward Z. Yang
8c80349f9d Implement HTML.Nofollow for external links.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-28 12:01:57 -04:00
Edward Z. Yang
d848c99b74 Make IE conditional comment matching ungreedy.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-28 10:22:38 -04:00
Edward Z. Yang
882ffed9ba Release 4.2.0.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-15 02:52:57 -04:00
Edward Z. Yang
86990a21f1 Rename newline normalization directive to something better.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-15 02:50:39 -04:00
Tomasz Muras
9573f0933d Make newline normalization optional. 2010-09-14 23:49:28 -04:00
Edward Z. Yang
632bf2bbd4 Shift to 4.2.0 release cycle.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-14 23:38:51 -04:00
Edward Z. Yang
ec86598446 Add support for file:// URI scheme.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-09 00:01:26 -04:00
Edward Z. Yang
b6c3f5e89b Update TODO.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-08 23:42:05 -04:00
Edward Z. Yang
7c91104532 Implement HTML.FlashAllowFullScreen.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-08 23:39:20 -04:00
Edward Z. Yang
eac628f490 Add %CSS.ForbiddenProperties directive.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-04 02:59:03 -04:00
Edward Z. Yang
92913bc816 Add documentation about configuration directive types.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-04 02:28:53 -04:00
Edward Z. Yang
479d793562 Reword documentation to be clearer, and give warning on common user error.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-04 01:31:20 -04:00
Edward Z. Yang
e2c15f1c98 Fix Mac Snow Leopard APC bug.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-08-26 21:40:58 -07:00
Edward Z. Yang
57ced3f361 Tighten up ignore spec.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-06-30 06:00:45 -07:00
Edward Z. Yang
c04a441b3e Actually make URI.DisableResources do something.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-06-30 05:59:17 -07:00
Edward Z. Yang
1bed8b6d5f Added %Core.RemoveProcessingInstructions.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-06-20 18:26:44 -07:00
Edward Z. Yang
33afd7d9e0 Fix improper handling of IE conditional comments.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-06-18 06:08:54 -07:00
182 changed files with 4210 additions and 1124 deletions

4
.gitignore vendored
View File

@@ -18,3 +18,7 @@ docs/doxygen*
*.phpt.php
*.phpt.skip.php
*.htmlt.ini
*.patch
/*.php
vendor
composer.lock

View File

@@ -31,7 +31,7 @@ PROJECT_NAME = HTMLPurifier
# This could be handy for archiving the generated documentation or
# if some version control system is used.
PROJECT_NUMBER = 4.1.1
PROJECT_NUMBER = 4.5.0
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put.

2
FOCUS
View File

@@ -1,4 +1,4 @@
9 - Major security fixes
4 - Minor feature enhancements
[ Appendix A: Release focus IDs ]
0 - N/A

13
INSTALL
View File

@@ -18,6 +18,7 @@ with these contents.
HTML Purifier is PHP 5 only, and is actively tested from PHP 5.0.5 and
up. It has no core dependencies with other libraries. PHP
4 support was deprecated on December 31, 2007 with HTML Purifier 3.0.0.
HTML Purifier is not compatible with zend.ze1_compatibility_mode.
These optional extensions can enhance the capabilities of HTML Purifier:
@@ -25,6 +26,10 @@ These optional extensions can enhance the capabilities of HTML Purifier:
* bcmath : Used for unit conversion and imagecrash protection
* tidy : Used for pretty-printing HTML
These optional libraries can enhance the capabilities of HTML Purifier:
* CSSTidy : Clean CSS stylesheets using %Core.ExtractStyleBlocks
* Net_IDNA2 (PEAR) : IRI support using %Core.EnableIDNA
---------------------------------------------------------------------------
2. Reconnaissance
@@ -330,11 +335,6 @@ Or move the cache directory somewhere else (no trailing slash):
The interface is mind-numbingly simple:
$purifier = new HTMLPurifier();
$clean_html = $purifier->purify( $dirty_html );
...or, if you're using the configuration object:
$purifier = new HTMLPurifier($config);
$clean_html = $purifier->purify( $dirty_html );
@@ -353,7 +353,8 @@ If your website is in UTF-8 and XHTML Transitional, use this code:
<?php
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
$purifier = new HTMLPurifier();
$config = HTMLPurifier_Config::createDefault();
$purifier = new HTMLPurifier($config);
$clean_html = $purifier->purify($dirty_html);
?>

143
NEWS
View File

@@ -9,6 +9,149 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. Internal change
==========================
4.6.0, unknown release date
# URI parsing algorithm was made more strict, so only prefixes which
looks like schemes will actually be schemes. Thanks
Michael Gusev <mgusev@sugarcrm.com> for fixing.
4.5.0, released 2013-02-17
# Fix bug where stacked attribute transforms clobber each other;
this also means it's no longer possible to override attribute
transforms in later modules. No internal code was using this
but this may break some clients.
# We now use SHA-1 to identify cached definitions, instead of MD5.
! Support display:inline-block
! Support for more white-space CSS values.
! Permit underscores in font families
! Support for page-break-* CSS3 properties when proprietary properties
are enabled.
! New directive %Core.DisableExcludes; can be set to 'true' to turn off
SGML excludes checking. If HTML Purifier is removing too much text
and you don't care about full standards compliance, try setting this to
'true'.
- Use prepend for SPL autoloading on PHP 5.3 and later.
- Fix bug with nofollow transform when pre-existing rel exists.
- Fix bug where background:url() always gets lower-cased
(but not background-image:url())
- Fix bug with non lower-case color names in HTML
- Fix bug where data URI validation doesn't remove temporary files.
Thanks Javier Marín Ros <javiermarinros@gmail.com> for reporting.
- Don't remove certain empty tags on RemoveEmpty.
4.4.0, released 2012-01-18
# Removed PEARSax3 handler.
# URI.Munge now munges URIs inside the same host that go from https
to http. Reported by Neike Taika-Tessaro.
# Core.EscapeNonASCIICharacters now always transforms entities to
entities, even if target encoding is UTF-8.
# Tighten up selector validation in ExtractStyleBlocks.
Non-syntactically valid selectors are now rejected, along with
some of the more obscure ones such as attribute selectors, the
:lang pseudoselector, and anything not in CSS2.1. Furthermore,
ID and class selectors now work properly with the relevant
configuration attributes. Also, mute errors when parsing CSS
with CSS Tidy. Reported by Mario Heiderich and Norman Hippert.
! Added support for 'scope' attribute on tables.
! Added %HTML.TargetBlank, which adds target="blank" to all outgoing links.
! Properly handle sub-lists directly nested inside of lists in
a standards compliant way, by moving them into the preceding <li>
! Added %HTML.AllowedComments and %HTML.AllowedCommentsRegexp for
limited allowed comments in untrusted situations.
! Implement iframes, and allow them to be used in untrusted mode with
%HTML.SafeIframe and %URI.SafeIframeRegexp. Thanks Bradley M. Froehle
<brad.froehle@gmail.com> for submitting an initial version of the patch.
! The Forms module now works properly for transitional doctypes.
! Added support for internationalized domain names. You need the PEAR
Net_IDNA2 module to be in your path; if it is installed, ensure the
class can be loaded and then set %Core.EnableIDNA to true.
- Color keywords are now case insensitive. Thanks Yzmir Ramirez
<yramirez-htmlpurifier@adicio.com> for reporting.
- Explicitly initialize anonModule variable to null.
- Do not duplicate nofollow if already present. Thanks 178
for reporting.
- Do not add nofollow if hostname matches our current host. Thanks 178
for reporting, and Neike Taika-Tessaro for helping diagnose.
- Do not unset parser variable; this fixes intermittent serialization
problems. Thanks Neike Taika-Tessaro for reporting, bill
<10010tiger@gmail.com> for diagnosing.
- Fix iconv truncation bug, where non-UTF-8 target encodings see
output truncated after around 8000 characters. Thanks Jörg Ludwig
<joerg.ludwig@iserv.eu> for reporting.
- Fix broken table content model for XHTML1.1 (and also earlier
versions, although the W3C validator doesn't catch those violations).
Thanks GlitchMr <glitch.mr@gmail.com> for reporting.
4.3.0, released 2011-03-27
# Fixed broken caching of customized raw definitions, but requires an
API change. The old API still works but will emit a warning,
see http://htmlpurifier.org/docs/enduser-customize.html#optimized
for how to upgrade your code.
# Protect against Internet Explorer innerHTML behavior by specially
treating attributes with backticks but no angled brackets, quotes or
spaces. This constitutes a slight semantic change, which can be
reverted using %Output.FixInnerHTML. Reported by Neike Taika-Tessaro
and Mario Heiderich.
# Protect against cssText/innerHTML by restricting allowed characters
used in fonts further than mandated by the specification and encoding
some extra special characters in URLs. Reported by Neike
Taika-Tessaro and Mario Heiderich.
! Added %HTML.Nofollow to add rel="nofollow" to external links.
! More types of SPL autoloaders allowed on later versions of PHP.
! Implementations for position, top, left, right, bottom, z-index
when %CSS.Trusted is on.
! Add %Cache.SerializerPermissions option for custom serializer
directory/file permissions
! Fix longstanding bug in Flash support for non-IE browsers, and
allow more wmode attributes.
! Add %CSS.AllowedFonts to restrict permissible font names.
- Switch to an iterative traversal of the DOM, which prevents us
from running out of stack space for deeply nested documents.
Thanks Maxim Krizhanovsky for contributing a patch.
- Make removal of conditional IE comments ungreedy; thanks Bernd
for reporting.
- Escape CDATA before removing Internet Explorer comments.
- Fix removal of id attributes under certain conditions by ensuring
armor attributes are preserved when recreating tags.
- Check if schema.ser was corrupted.
- Check if zend.ze1_compatibility_mode is on, and error out if it is.
This safety check is only done for HTMLPurifier.auto.php; if you
are using standalone or the specialized includes files, you're
expected to know what you're doing.
- Stop repeatedly writing the cache file after I'm done customizing a
raw definition. Reported by ajh.
- Switch to using require_once in the Bootstrap to work around bad
interaction with Zend Debugger and APC. Reported by Antonio Parraga.
- Fix URI handling when hostname is missing but scheme is present.
Reported by Neike Taika-Tessaro.
- Fix missing numeric entities on DirectLex; thanks Neike Taika-Tessaro
for reporting.
- Fix harmless notice from indexing into empty string. Thanks Matthijs
Kooijman <matthijs@stdin.nl> for reporting.
- Don't autoclose no parent elements are able to support the element
that triggered the autoclose. In particular fixes strange behavior
of stray <li> tags. Thanks pkuliga@gmail.com for reporting and
Neike Taika-Tessaro <pinkgothic@gmail.com> for debugging assistance.
4.2.0, released 2010-09-15
! Added %Core.RemoveProcessingInstructions, which lets you remove
<? ... ?> statements.
! Added %URI.DisableResources functionality; the directive originally
did nothing. Thanks David Rothstein for reporting.
! Add documentation about configuration directive types.
! Add %CSS.ForbiddenProperties configuration directive.
! Add %HTML.FlashAllowFullScreen to permit embedded Flash objects
to utilize full-screen mode.
! Add optional support for the <code>file</code> URI scheme, enable
by explicitly setting %URI.AllowedSchemes.
! Add %Core.NormalizeNewlines options to allow turning off newline
normalization.
- Fix improper handling of Internet Explorer conditional comments
by parser. Thanks zmonteca for reporting.
- Fix missing attributes bug when running on Mac Snow Leopard and APC.
Thanks sidepodcast for the fix.
- Warn if an element is allowed, but an attribute it requires is
not allowed.
4.1.1, released 2010-05-31
- Fix undefined index warnings in maintenance scripts.
- Fix bug in DirectLex for parsing elements with a single attribute

18
TODO
View File

@@ -13,18 +13,26 @@ afraid to cast your vote for the next feature to be implemented!
Things to do as soon as possible:
- http://htmlpurifier.org/phorum/read.php?3,5560,6307#msg-6307
- Think about allowing explicit order of operations hooks for transforms
- Inputs don't do the right thing with submit
- Fix "<.<" bug (trailing < is removed if not EOD)
- Build in better internal state dumps and debugging tools for remote
debugging
- Allowed/Allowed* have strange interactions when both set
- Transform lone embeds into object tags
? Transform lone embeds into object tags
- Deprecated config options that emit warnings when you set them (with'
a way of muting the warning if you really want to)
- Make HTML.Trusted work with Output.FlashCompat
- HTML.Trusted and HTML.SafeObject have funny interaction; general
problem is what to do when a module "supersedes" another
(see also tables and basic tables.) This is a little dicier
because HTML.SafeObject has some extra functionality that
trusted might find useful. See http://htmlpurifier.org/phorum/read.php?3,5762,6100
FUTURE VERSIONS
---------------
4.2 release [OMG CONFIG PONIES]
4.6 release [OMG CONFIG PONIES]
! Fix Printer. It's from the old days when we didn't have decent XML classes
! Factor demo.php into a set of Printer classes, and then create a stub
file for users here (inside the actual HTML Purifier library)
@@ -109,6 +117,10 @@ Neat feature related
3. Extend the tag exclusion system to specify whether or not the
contents should be dropped or not (currently, there's code that could do
something like this if it didn't drop the inner text too.)
? Make AutoParagraph also support paragraph-izing double <br> tags, and not
just double newlines. This is kind of tough to do in the current framework,
though, and might be reasonably approximated by search replacing double <br>s
with newlines before running it through HTML Purifier.
Maintenance related (slightly boring)
# CHMOD install script for PEAR installs

View File

@@ -1 +1 @@
4.1.1
4.5.0

View File

@@ -1,5 +1,6 @@
HTML Purifier 4.1.1 is a major security and bugfix release that
improves on 4.1's fix for an XSS vulnerability exploitable on Internet
Explorer. It also contains a number of important bugfixes, including
the removal of improper logic that could result in infinite loops and
fixed parsing for single-attributes with entities with DirectLex.
HTML Purifier 4.5.0 is a minor bugfix and feature release, containing an
accumulation of changes over a year. CSS support has been extended to
support display:inline-block, white-space, underscores in font families,
page-break-* CSS3 properties (when proprietary is enabled.) We now use
SHA-1 to identify cached definitions, and the semantics of stacked
attribute transforms has changed slightly.

22
composer.json Normal file
View File

@@ -0,0 +1,22 @@
{
"name": "ezyang/htmlpurifier",
"description": "Standards compliant HTML filter written in PHP",
"type": "library",
"keywords": ["html"],
"homepage": "http://htmlpurifier.org/",
"license": "LGPL",
"authors": [
{
"name": "Edward Z. Yang",
"email": "admin@htmlpurifier.org",
"homepage": "http://ezyang.com"
}
],
"require": {
"php": ">=5.2"
},
"autoload": {
"psr-0": { "HTMLPurifier": "library/" },
"files": ["library/HTMLPurifier.composer.php"]
}
}

View File

@@ -40,12 +40,26 @@
</xsl:apply-templates>
</ul>
</div>
<div id="typesContainer">
<h2>Types</h2>
<xsl:apply-templates select="$typeLookup" mode="types" />
</div>
<xsl:apply-templates />
</div>
</body>
</html>
</xsl:template>
<xsl:template match="type" mode="types">
<div class="type-block">
<xsl:attribute name="id">type-<xsl:value-of select="@id" /></xsl:attribute>
<h3><code><xsl:value-of select="@id" /></code>: <xsl:value-of select="@name" /></h3>
<div class="type-description">
<xsl:copy-of xmlns:xhtml="http://www.w3.org/1999/xhtml" select="xhtml:div/node()" />
</div>
</div>
</xsl:template>
<xsl:template match="title" mode="toc" />
<xsl:template match="namespace" mode="toc">
<xsl:param name="overflowNumber" />
@@ -192,10 +206,13 @@
<td>
<xsl:variable name="type" select="text()" />
<xsl:attribute name="class">type type-<xsl:value-of select="$type" /></xsl:attribute>
<xsl:value-of select="$typeLookup/type[@id=$type]/text()" />
<xsl:if test="@allow-null='yes'">
(or null)
</xsl:if>
<a>
<xsl:attribute name="href">#type-<xsl:value-of select="$type" /></xsl:attribute>
<xsl:value-of select="$typeLookup/type[@id=$type]/@name" />
<xsl:if test="@allow-null='yes'">
(or null)
</xsl:if>
</a>
</td>
</tr>
</xsl:template>

View File

@@ -1,16 +1,68 @@
<?xml version="1.0" encoding="UTF-8"?>
<types>
<type id="string">String</type>
<type id="istring">Case-insensitive string</type>
<type id="text">Text</type>
<type id="itext">Case-insensitive text</type>
<type id="int">Integer</type>
<type id="float">Float</type>
<type id="bool">Boolean</type>
<type id="lookup">Lookup array</type>
<type id="list">Array list</type>
<type id="hash">Associative array</type>
<type id="mixed">Mixed</type>
<type id="string" name="String"><div xmlns="http://www.w3.org/1999/xhtml">
A <a
href="http://docs.php.net/manual/en/language.types.string.php">sequence
of characters</a>.
</div></type>
<type id="istring" name="Case-insensitive string"><div xmlns="http://www.w3.org/1999/xhtml">
A series of case-insensitive characters. Internally, upper-case
ASCII characters will be converted to lower-case.
</div></type>
<type id="text" name="Text"><div xmlns="http://www.w3.org/1999/xhtml">
A series of characters that may contain newlines. Text tends to
indicate human-oriented text, as opposed to a machine format.
</div></type>
<type id="itext" name="Case-insensitive text"><div xmlns="http://www.w3.org/1999/xhtml">
A series of case-insensitive characters that may contain newlines.
</div></type>
<type id="int" name="Integer"><div xmlns="http://www.w3.org/1999/xhtml">
An <a
href="http://docs.php.net/manual/en/language.types.integer.php">
integer</a>. You are alternatively permitted to pass a string of
digits instead, which will be cast to an integer using
<code>(int)</code>.
</div></type>
<type id="float" name="Float"><div xmlns="http://www.w3.org/1999/xhtml">
A <a href="http://docs.php.net/manual/en/language.types.float.php">
floating point number</a>. You are alternatively permitted to
pass a numeric string (as defined by <code>is_numeric()</code>),
which will be cast to a float using <code>(float)</code>.
</div></type>
<type id="bool" name="Boolean"><div xmlns="http://www.w3.org/1999/xhtml">
A <a
href="http://docs.php.net/manual/en/language.types.boolean.php">boolean</a>.
You are alternatively permitted to pass an integer <code>0</code> or
<code>1</code> (other integers are not permitted) or a string
<code>"on"</code>, <code>"true"</code> or <code>"1"</code> for
<code>true</code>, and <code>"off"</code>, <code>"false"</code> or
<code>"0"</code> for <code>false</code>.
</div></type>
<type id="lookup" name="Lookup array"><div xmlns="http://www.w3.org/1999/xhtml">
An array whose values are <code>true</code>, e.g. <code>array('key'
=> true, 'key2' => true)</code>. You are alternatively permitted
to pass an array list of the keys <code>array('key', 'key2')</code>
or a comma-separated string of keys <code>"key, key2"</code>. If
you pass an array list of values, ensure that your values are
strictly numerically indexed: <code>array('key1', 2 =>
'key2')</code> will not do what you expect and emits a warning.
</div></type>
<type id="list" name="Array list"><div xmlns="http://www.w3.org/1999/xhtml">
An array which has consecutive integer indexes, e.g.
<code>array('val1', 'val2')</code>. You are alternatively permitted
to pass a comma-separated string of keys <code>"val1, val2"</code>.
If your array is not in this form, <code>array_values</code> is run
on the array and a warning is emitted.
</div></type>
<type id="hash" name="Associative array"><div xmlns="http://www.w3.org/1999/xhtml">
An array which is a mapping of keys to values, e.g.
<code>array('key1' => 'val1', 'key2' => 'val2')</code>. You are
alternatively permitted to pass a comma-separated string of
key-colon-value strings, e.g. <code>"key1: val1, key2: val2"</code>.
</div></type>
<type id="mixed" name="Mixed"><div xmlns="http://www.w3.org/1999/xhtml">
An arbitrary PHP value of any type.
</div></type>
</types>
<!-- vim: et sw=4 sts=4

View File

@@ -6,7 +6,7 @@
</file>
<file name="HTMLPurifier/Lexer.php">
<line>81</line>
<line>269</line>
<line>284</line>
</file>
<file name="HTMLPurifier/Lexer/DirectLex.php">
<line>53</line>
@@ -14,7 +14,7 @@
<line>348</line>
</file>
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>47</line>
<line>50</line>
</file>
</directive>
<directive id="CSS.MaxImgLength">
@@ -24,22 +24,32 @@
</directive>
<directive id="CSS.Proprietary">
<file name="HTMLPurifier/CSSDefinition.php">
<line>214</line>
<line>215</line>
</file>
</directive>
<directive id="CSS.AllowTricky">
<file name="HTMLPurifier/CSSDefinition.php">
<line>218</line>
<line>219</line>
</file>
</directive>
<directive id="CSS.Trusted">
<file name="HTMLPurifier/CSSDefinition.php">
<line>223</line>
</file>
</directive>
<directive id="CSS.AllowImportant">
<file name="HTMLPurifier/CSSDefinition.php">
<line>222</line>
<line>227</line>
</file>
</directive>
<directive id="CSS.AllowedProperties">
<file name="HTMLPurifier/CSSDefinition.php">
<line>275</line>
<line>302</line>
</file>
</directive>
<directive id="CSS.ForbiddenProperties">
<file name="HTMLPurifier/CSSDefinition.php">
<line>316</line>
</file>
</directive>
<directive id="Cache.DefinitionImpl">
@@ -69,44 +79,57 @@
</directive>
<directive id="Core.Encoding">
<file name="HTMLPurifier/Encoder.php">
<line>267</line>
<line>300</line>
<line>337</line>
<line>372</line>
</file>
</directive>
<directive id="Test.ForceNoIconv">
<file name="HTMLPurifier/Encoder.php">
<line>272</line>
<line>308</line>
<line>341</line>
<line>379</line>
</file>
</directive>
<directive id="Core.EscapeNonASCIICharacters">
<file name="HTMLPurifier/Encoder.php">
<line>304</line>
<line>373</line>
</file>
</directive>
<directive id="Output.CommentScriptContents">
<file name="HTMLPurifier/Generator.php">
<line>56</line>
<line>61</line>
</file>
</directive>
<directive id="Output.FixInnerHTML">
<file name="HTMLPurifier/Generator.php">
<line>62</line>
</file>
</directive>
<directive id="Output.SortAttr">
<file name="HTMLPurifier/Generator.php">
<line>57</line>
<line>63</line>
</file>
</directive>
<directive id="Output.FlashCompat">
<file name="HTMLPurifier/Generator.php">
<line>58</line>
<line>64</line>
</file>
</directive>
<directive id="Output.TidyFormat">
<file name="HTMLPurifier/Generator.php">
<line>87</line>
<line>93</line>
</file>
</directive>
<directive id="Core.NormalizeNewlines">
<file name="HTMLPurifier/Generator.php">
<line>107</line>
</file>
<file name="HTMLPurifier/Lexer.php">
<line>266</line>
</file>
</directive>
<directive id="Output.Newline">
<file name="HTMLPurifier/Generator.php">
<line>101</line>
<line>108</line>
</file>
</directive>
<directive id="HTML.BlockWrapper">
@@ -136,20 +159,20 @@
</directive>
<directive id="HTML.ForbiddenElements">
<file name="HTMLPurifier/HTMLDefinition.php">
<line>337</line>
<line>342</line>
</file>
</directive>
<directive id="HTML.ForbiddenAttributes">
<file name="HTMLPurifier/HTMLDefinition.php">
<line>338</line>
<line>343</line>
</file>
</directive>
<directive id="HTML.Trusted">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>202</line>
<line>204</line>
</file>
<file name="HTMLPurifier/Lexer.php">
<line>258</line>
<line>271</line>
</file>
<file name="HTMLPurifier/HTMLModule/Image.php">
<line>27</line>
@@ -163,27 +186,45 @@
</directive>
<directive id="HTML.AllowedModules">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>209</line>
<line>211</line>
</file>
</directive>
<directive id="HTML.CoreModules">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>210</line>
<line>212</line>
</file>
</directive>
<directive id="HTML.Proprietary">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>221</line>
<line>222</line>
</file>
</directive>
<directive id="HTML.SafeObject">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>226</line>
<line>225</line>
</file>
</directive>
<directive id="HTML.SafeEmbed">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>229</line>
<line>228</line>
</file>
</directive>
<directive id="HTML.SafeScripting">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>231</line>
</file>
<file name="HTMLPurifier/HTMLModule/SafeScripting.php">
<line>17</line>
</file>
</directive>
<directive id="HTML.Nofollow">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>234</line>
</file>
</directive>
<directive id="HTML.TargetBlank">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>237</line>
</file>
</directive>
<directive id="Attr.IDBlacklist">
@@ -211,12 +252,17 @@
</directive>
<directive id="Core.ConvertDocumentToFragment">
<file name="HTMLPurifier/Lexer.php">
<line>267</line>
<line>282</line>
</file>
</directive>
<directive id="Core.RemoveProcessingInstructions">
<file name="HTMLPurifier/Lexer.php">
<line>303</line>
</file>
</directive>
<directive id="URI.">
<file name="HTMLPurifier/URIDefinition.php">
<line>55</line>
<line>60</line>
</file>
<file name="HTMLPurifier/URIFilter/Munge.php">
<line>12</line>
@@ -224,17 +270,20 @@
</directive>
<directive id="URI.Host">
<file name="HTMLPurifier/URIDefinition.php">
<line>64</line>
<line>70</line>
</file>
<file name="HTMLPurifier/URIScheme.php">
<line>81</line>
</file>
</directive>
<directive id="URI.Base">
<file name="HTMLPurifier/URIDefinition.php">
<line>65</line>
<line>71</line>
</file>
</directive>
<directive id="URI.DefaultScheme">
<file name="HTMLPurifier/URIDefinition.php">
<line>72</line>
<line>78</line>
</file>
</directive>
<directive id="URI.AllowedSchemes">
@@ -260,6 +309,11 @@
<line>12</line>
</file>
</directive>
<directive id="CSS.AllowedFonts">
<file name="HTMLPurifier/AttrDef/CSS/FontFamily.php">
<line>50</line>
</file>
</directive>
<directive id="Attr.AllowedClasses">
<file name="HTMLPurifier/AttrDef/HTML/Class.php">
<line>18</line>
@@ -277,23 +331,23 @@
</directive>
<directive id="Attr.EnableID">
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
<line>20</line>
<line>30</line>
</file>
</directive>
<directive id="Attr.IDPrefix">
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
<line>26</line>
<line>36</line>
</file>
</directive>
<directive id="Attr.IDPrefixLocal">
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
<line>28</line>
<line>31</line>
<line>38</line>
<line>41</line>
</file>
</directive>
<directive id="Attr.IDBlacklistRegexp">
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
<line>54</line>
<line>64</line>
</file>
</directive>
<directive id="Attr.">
@@ -301,6 +355,11 @@
<line>30</line>
</file>
</directive>
<directive id="Core.EnableIDNA">
<file name="HTMLPurifier/AttrDef/URI/Host.php">
<line>67</line>
</file>
</directive>
<directive id="Attr.DefaultTextDir">
<file name="HTMLPurifier/AttrTransform/BdoDir.php">
<line>13</line>
@@ -337,6 +396,11 @@
<line>13</line>
</file>
</directive>
<directive id="HTML.FlashAllowFullScreen">
<file name="HTMLPurifier/AttrTransform/SafeParam.php">
<line>38</line>
</file>
</directive>
<directive id="Core.EscapeInvalidChildren">
<file name="HTMLPurifier/ChildDef/Required.php">
<line>62</line>
@@ -347,19 +411,33 @@
<line>91</line>
</file>
</directive>
<directive id="Cache.SerializerPermissions">
<file name="HTMLPurifier/DefinitionCache/Serializer.php">
<line>107</line>
<line>124</line>
</file>
</directive>
<directive id="Filter.ExtractStyleBlocks.TidyImpl">
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
<line>41</line>
<line>55</line>
</file>
</directive>
<directive id="Filter.ExtractStyleBlocks.Scope">
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
<line>65</line>
<line>79</line>
</file>
</directive>
<directive id="Filter.ExtractStyleBlocks.Escaping">
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
<line>123</line>
<line>277</line>
</file>
</directive>
<directive id="HTML.SafeIframe">
<file name="HTMLPurifier/HTMLModule/Iframe.php">
<line>17</line>
</file>
<file name="HTMLPurifier/URIFilter/SafeIframe.php">
<line>23</line>
</file>
</directive>
<directive id="HTML.MaxImgLength">
@@ -395,12 +473,12 @@
</directive>
<directive id="AutoFormat.RemoveEmpty.RemoveNbsp">
<file name="HTMLPurifier/Injector/RemoveEmpty.php">
<line>12</line>
<line>15</line>
</file>
</directive>
<directive id="AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions">
<file name="HTMLPurifier/Injector/RemoveEmpty.php">
<line>13</line>
<line>16</line>
</file>
</directive>
<directive id="Core.AggressivelyFixLt">
@@ -413,27 +491,42 @@
<line>70</line>
</file>
</directive>
<directive id="Core.DisableExcludes">
<file name="HTMLPurifier/Strategy/FixNesting.php">
<line>57</line>
</file>
</directive>
<directive id="Core.EscapeInvalidTags">
<file name="HTMLPurifier/Strategy/MakeWellFormed.php">
<line>45</line>
<line>53</line>
</file>
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>19</line>
</file>
</directive>
<directive id="Core.RemoveScriptContents">
<directive id="HTML.AllowedComments">
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>24</line>
</file>
</directive>
<directive id="HTML.AllowedCommentsRegexp">
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>25</line>
</file>
</directive>
<directive id="Core.RemoveScriptContents">
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>28</line>
</file>
</directive>
<directive id="Core.HiddenElements">
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>26</line>
<line>29</line>
</file>
</directive>
<directive id="URI.HostBlacklist">
<file name="HTMLPurifier/URIFilter/HostBlacklist.php">
<line>8</line>
<line>12</line>
</file>
</directive>
<directive id="URI.MungeResources">
@@ -446,4 +539,9 @@
<line>15</line>
</file>
</directive>
<directive id="URI.SafeIframeRegexp">
<file name="HTMLPurifier/URIFilter/SafeIframe.php">
<line>18</line>
</file>
</directive>
</usage>

View File

@@ -255,7 +255,7 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
<tr class="feature"><td>axis</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
<tr class="feature"><td>char</td><td>COL, COLGROUP, TBODY, TD, TFOOT, TH, THEAD, TR</td><td>W3C only: No browser implementation</td></tr>
<tr class="feature"><td>headers</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
<tr class="feature"><td>scope</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
<tr class="impl-yes"><td>scope</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
</tbody>
<tbody class="impl-yes">

View File

@@ -146,7 +146,9 @@
<pre>$config = HTMLPurifier_Config::createDefault();
$config-&gt;set('HTML.DefinitionID', 'enduser-customize.html tutorial');
$config-&gt;set('HTML.DefinitionRev', 1);
$def = $config-&gt;getHTMLDefinition(true);</pre>
if ($def = $config-&gt;maybeGetRawHTMLDefinition()) {
// our code will go here
}</pre>
<p>
Assuming that HTML Purifier has already been properly loaded (hint:
@@ -174,23 +176,15 @@ $def = $config-&gt;getHTMLDefinition(true);</pre>
</li>
<li>
The fourth line retrieves a raw <code>HTMLPurifier_HTMLDefinition</code>
object that we will be tweaking. If the parameter was removed, we
would be retrieving a fully formed definition object, which is somewhat
useless for customization purposes.
object that we will be tweaking. Interestingly enough, we have
placed it in an if block: this is because
<code>maybeGetRawHTMLDefinition</code>, as its name suggests, may
return a NULL, in which case we should skip doing any
initialization. This, in fact, will correspond to when our fully
customized object is already in the cache.
</li>
</ul>
<h3>Broken backwards-compatibility</h3>
<p>
Those of you who have already been twiddling around with the raw
HTML definition object, you'll be noticing that you're getting an error
when you attempt to retrieve the raw definition object without specifying
a DefinitionID. It is vital to caching (see below) that you make a unique
name for your customized definition, so make up something right now and
things will operate again.
</p>
<h2>Turn off caching</h2>
<p>
@@ -781,6 +775,75 @@ $form-&gt;excludes = array('form' => true);</strong></pre>
<li><a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ElementDef.php"><code>library/HTMLPurifier/ElementDef.php</code></a></li>
</ul>
<h2 id="optimized">Notes for HTML Purifier 4.2.0 and earlier</h3>
<p>
Previously, this tutorial gave some incorrect template code for
editing raw definitions, and that template code will now produce the
error <q>Due to a documentation error in previous version of HTML
Purifier...</q> Here is how to mechanically transform old-style
code into new-style code.
</p>
<p>
First, identify all code that edits the raw definition object, and
put it together. Ensure none of this code must be run on every
request; if some sub-part needs to always be run, move it outside
this block. Here is an example below, with the raw definition
object code bolded.
</p>
<pre>$config = HTMLPurifier_Config::createDefault();
$config-&gt;set('HTML.DefinitionID', 'enduser-customize.html tutorial');
$config-&gt;set('HTML.DefinitionRev', 1);
$def = $config-&gt;getHTMLDefinition(true);
<strong>$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');</strong>
$purifier = new HTMLPurifier($config);</pre>
<p>
Next, replace the raw definition retrieval with a
maybeGetRawHTMLDefinition method call inside an if conditional, and
place the editing code inside that if block.
</p>
<pre>$config = HTMLPurifier_Config::createDefault();
$config-&gt;set('HTML.DefinitionID', 'enduser-customize.html tutorial');
$config-&gt;set('HTML.DefinitionRev', 1);
<strong>if ($def = $config-&gt;maybeGetRawHTMLDefinition()) {
$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');
}</strong>
$purifier = new HTMLPurifier($config);</pre>
<p>
And you're done! Alternatively, if you're OK with not ever caching
your code, the following will still work and not emit warnings.
</p>
<pre>$config = HTMLPurifier_Config::createDefault();
$def = $config-&gt;getHTMLDefinition(true);
$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');
$purifier = new HTMLPurifier($config);</pre>
<p>
A slightly less efficient version of this was what was going on with
old versions of HTML Purifier.
</p>
<p>
<em>Technical notes:</em> ajh pointed out on <a
href="http://htmlpurifier.org/phorum/read.php?5,5164,5169#msg-5169">in a forum topic</a> that
HTML Purifier appeared to be repeatedly writing to the cache even
when a cache entry already existed. Investigation lead to the
discovery of the following infelicity: caching of customized
definitions didn't actually work! The problem was that even though
a cache file would be written out at the end of the process, there
was no way for HTML Purifier to say, <q>Actually, I've already got a
copy of your work, no need to reconfigure your
customizations</q>. This required the API to change: placing
all of the customizations to the raw definition object in a
conditional which could be skipped.
</p>
</body></html>
<!-- vim: et sw=4 sts=4

View File

@@ -561,7 +561,7 @@ page on special characters</a> for more details.
<h3 id="whyutf8-forms">Forms</h3>
<p>While we're on the tack of users, how do non-UTF-8 web forms deal
with characters that our outside of their character set? Rather than
with characters that are outside of their character set? Rather than
discuss what UTF-8 does right, we're going to show what could go wrong
if you didn't use UTF-8 and people tried to use characters outside
of your character encoding.</p>

View File

@@ -3,6 +3,7 @@
/**
* @file
* Convenience file that registers autoload handler for HTML Purifier.
* It also does some sanity checks.
*/
if (function_exists('spl_autoload_register') && function_exists('spl_autoload_unregister')) {
@@ -18,4 +19,8 @@ if (function_exists('spl_autoload_register') && function_exists('spl_autoload_un
}
}
if (ini_get('zend.ze1_compatibility_mode')) {
trigger_error("HTML Purifier is not compatible with zend.ze1_compatibility_mode; please turn it off", E_USER_ERROR);
}
// vim: et sw=4 sts=4

View File

@@ -0,0 +1,4 @@
<?php
if (!defined('HTMLPURIFIER_PREFIX')) {
define('HTMLPURIFIER_PREFIX', __DIR__);
}

View File

@@ -7,7 +7,7 @@
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
* FILE, changes will be overwritten the next time the script is run.
*
* @version 4.1.1
* @version 4.5.0
*
* @warning
* You must *not* include any other HTML Purifier files before this file,
@@ -19,6 +19,8 @@
*/
require 'HTMLPurifier.php';
require 'HTMLPurifier/Array.php';
require 'HTMLPurifier/ArrayNode.php';
require 'HTMLPurifier/AttrCollections.php';
require 'HTMLPurifier/AttrDef.php';
require 'HTMLPurifier/AttrTransform.php';
@@ -36,6 +38,7 @@ require 'HTMLPurifier/DefinitionCache.php';
require 'HTMLPurifier/DefinitionCacheFactory.php';
require 'HTMLPurifier/Doctype.php';
require 'HTMLPurifier/DoctypeRegistry.php';
require 'HTMLPurifier/DoublyLinkedList.php';
require 'HTMLPurifier/ElementDef.php';
require 'HTMLPurifier/Encoder.php';
require 'HTMLPurifier/EntityLookup.php';
@@ -73,6 +76,7 @@ require 'HTMLPurifier/UnitConverter.php';
require 'HTMLPurifier/VarParser.php';
require 'HTMLPurifier/VarParserException.php';
require 'HTMLPurifier/AttrDef/CSS.php';
require 'HTMLPurifier/AttrDef/Clone.php';
require 'HTMLPurifier/AttrDef/Enum.php';
require 'HTMLPurifier/AttrDef/Integer.php';
require 'HTMLPurifier/AttrDef/Lang.php';
@@ -90,6 +94,7 @@ require 'HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
require 'HTMLPurifier/AttrDef/CSS/Filter.php';
require 'HTMLPurifier/AttrDef/CSS/Font.php';
require 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
require 'HTMLPurifier/AttrDef/CSS/Ident.php';
require 'HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
require 'HTMLPurifier/AttrDef/CSS/Length.php';
require 'HTMLPurifier/AttrDef/CSS/ListStyle.php';
@@ -125,14 +130,17 @@ require 'HTMLPurifier/AttrTransform/Lang.php';
require 'HTMLPurifier/AttrTransform/Length.php';
require 'HTMLPurifier/AttrTransform/Name.php';
require 'HTMLPurifier/AttrTransform/NameSync.php';
require 'HTMLPurifier/AttrTransform/Nofollow.php';
require 'HTMLPurifier/AttrTransform/SafeEmbed.php';
require 'HTMLPurifier/AttrTransform/SafeObject.php';
require 'HTMLPurifier/AttrTransform/SafeParam.php';
require 'HTMLPurifier/AttrTransform/ScriptRequired.php';
require 'HTMLPurifier/AttrTransform/TargetBlank.php';
require 'HTMLPurifier/AttrTransform/Textarea.php';
require 'HTMLPurifier/ChildDef/Chameleon.php';
require 'HTMLPurifier/ChildDef/Custom.php';
require 'HTMLPurifier/ChildDef/Empty.php';
require 'HTMLPurifier/ChildDef/List.php';
require 'HTMLPurifier/ChildDef/Required.php';
require 'HTMLPurifier/ChildDef/Optional.php';
require 'HTMLPurifier/ChildDef/StrictBlockquote.php';
@@ -147,10 +155,12 @@ require 'HTMLPurifier/HTMLModule/CommonAttributes.php';
require 'HTMLPurifier/HTMLModule/Edit.php';
require 'HTMLPurifier/HTMLModule/Forms.php';
require 'HTMLPurifier/HTMLModule/Hypertext.php';
require 'HTMLPurifier/HTMLModule/Iframe.php';
require 'HTMLPurifier/HTMLModule/Image.php';
require 'HTMLPurifier/HTMLModule/Legacy.php';
require 'HTMLPurifier/HTMLModule/List.php';
require 'HTMLPurifier/HTMLModule/Name.php';
require 'HTMLPurifier/HTMLModule/Nofollow.php';
require 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
require 'HTMLPurifier/HTMLModule/Object.php';
require 'HTMLPurifier/HTMLModule/Presentation.php';
@@ -158,10 +168,12 @@ require 'HTMLPurifier/HTMLModule/Proprietary.php';
require 'HTMLPurifier/HTMLModule/Ruby.php';
require 'HTMLPurifier/HTMLModule/SafeEmbed.php';
require 'HTMLPurifier/HTMLModule/SafeObject.php';
require 'HTMLPurifier/HTMLModule/SafeScripting.php';
require 'HTMLPurifier/HTMLModule/Scripting.php';
require 'HTMLPurifier/HTMLModule/StyleAttribute.php';
require 'HTMLPurifier/HTMLModule/Tables.php';
require 'HTMLPurifier/HTMLModule/Target.php';
require 'HTMLPurifier/HTMLModule/TargetBlank.php';
require 'HTMLPurifier/HTMLModule/Text.php';
require 'HTMLPurifier/HTMLModule/Tidy.php';
require 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
@@ -196,10 +208,13 @@ require 'HTMLPurifier/Token/Start.php';
require 'HTMLPurifier/Token/Text.php';
require 'HTMLPurifier/URIFilter/DisableExternal.php';
require 'HTMLPurifier/URIFilter/DisableExternalResources.php';
require 'HTMLPurifier/URIFilter/DisableResources.php';
require 'HTMLPurifier/URIFilter/HostBlacklist.php';
require 'HTMLPurifier/URIFilter/MakeAbsolute.php';
require 'HTMLPurifier/URIFilter/Munge.php';
require 'HTMLPurifier/URIFilter/SafeIframe.php';
require 'HTMLPurifier/URIScheme/data.php';
require 'HTMLPurifier/URIScheme/file.php';
require 'HTMLPurifier/URIScheme/ftp.php';
require 'HTMLPurifier/URIScheme/http.php';
require 'HTMLPurifier/URIScheme/https.php';

View File

@@ -19,7 +19,7 @@
*/
/*
HTML Purifier 4.1.1 - Standards Compliant HTML Filtering
HTML Purifier 4.5.0 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or
@@ -55,10 +55,10 @@ class HTMLPurifier
{
/** Version of HTML Purifier */
public $version = '4.1.1';
public $version = '4.5.0';
/** Constant with version of HTML Purifier */
const VERSION = '4.1.1';
const VERSION = '4.5.0';
/** Global configuration object */
public $config;

View File

@@ -13,6 +13,8 @@
$__dir = dirname(__FILE__);
require_once $__dir . '/HTMLPurifier.php';
require_once $__dir . '/HTMLPurifier/Array.php';
require_once $__dir . '/HTMLPurifier/ArrayNode.php';
require_once $__dir . '/HTMLPurifier/AttrCollections.php';
require_once $__dir . '/HTMLPurifier/AttrDef.php';
require_once $__dir . '/HTMLPurifier/AttrTransform.php';
@@ -30,6 +32,7 @@ require_once $__dir . '/HTMLPurifier/DefinitionCache.php';
require_once $__dir . '/HTMLPurifier/DefinitionCacheFactory.php';
require_once $__dir . '/HTMLPurifier/Doctype.php';
require_once $__dir . '/HTMLPurifier/DoctypeRegistry.php';
require_once $__dir . '/HTMLPurifier/DoublyLinkedList.php';
require_once $__dir . '/HTMLPurifier/ElementDef.php';
require_once $__dir . '/HTMLPurifier/Encoder.php';
require_once $__dir . '/HTMLPurifier/EntityLookup.php';
@@ -67,6 +70,7 @@ require_once $__dir . '/HTMLPurifier/UnitConverter.php';
require_once $__dir . '/HTMLPurifier/VarParser.php';
require_once $__dir . '/HTMLPurifier/VarParserException.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Clone.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Enum.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Integer.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Lang.php';
@@ -84,6 +88,7 @@ require_once $__dir . '/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Filter.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Font.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/FontFamily.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Ident.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Length.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ListStyle.php';
@@ -119,14 +124,17 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/Lang.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Length.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Name.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/NameSync.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Nofollow.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeEmbed.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/TargetBlank.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php';
require_once $__dir . '/HTMLPurifier/ChildDef/List.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Required.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php';
require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php';
@@ -141,10 +149,12 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/CommonAttributes.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Edit.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Forms.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Hypertext.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Iframe.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Image.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/List.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Name.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Nofollow.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Object.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Presentation.php';
@@ -152,10 +162,12 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/Proprietary.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Ruby.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeEmbed.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeObject.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeScripting.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Scripting.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/StyleAttribute.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tables.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Target.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/TargetBlank.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Text.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
@@ -190,10 +202,13 @@ require_once $__dir . '/HTMLPurifier/Token/Start.php';
require_once $__dir . '/HTMLPurifier/Token/Text.php';
require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternal.php';
require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternalResources.php';
require_once $__dir . '/HTMLPurifier/URIFilter/DisableResources.php';
require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';
require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';
require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php';
require_once $__dir . '/HTMLPurifier/URIFilter/SafeIframe.php';
require_once $__dir . '/HTMLPurifier/URIScheme/data.php';
require_once $__dir . '/HTMLPurifier/URIScheme/file.php';
require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';
require_once $__dir . '/HTMLPurifier/URIScheme/http.php';
require_once $__dir . '/HTMLPurifier/URIScheme/https.php';

View File

@@ -0,0 +1,184 @@
<?php
class HTMLPurifier_Array implements ArrayAccess
{
/**
* @param HTMLPurifier_ArrayNode
*/
public $head = null;
/**
* @var int
*/
protected $count = 0;
/**
* @var int
*/
protected $offset = 0;
/**
* @var HTMLPurifier_ArrayNode
*/
protected $offsetItem = null;
public function __construct(array $array = array())
{
/**
* @var HTMLPurifier_ArrayNode $temp
*/
$temp = null;
$i = 0;
foreach ($array as &$v) {
$item = new HTMLPurifier_ArrayNode($v);
if ($this->head == null) {
$this->head = &$item;
}
if ($temp instanceof HTMLPurifier_ArrayNode) {
$item->prev = &$temp;
$temp->next = &$item;
}
unset($temp);
$temp = &$item;
$i ++;
unset($item, $v);
}
$this->count = $i;
$this->offset = 0;
$this->offsetItem = &$this->head;
}
protected function findIndex($offset)
{
if ($this->head == null) {
return array(
'correct' => false,
'value' => null
);
}
$current = &$this->head;
$index = 0;
if ($this->offset <= $offset && $this->offsetItem instanceof HTMLPurifier_ArrayNode) {
$current = &$this->offsetItem;
$index = $this->offset;
}
while ($current->next instanceof HTMLPurifier_ArrayNode && $index != $offset) {
$current = &$current->next;
$index ++;
}
if ($index == $offset) {
$this->offset = $offset;
$this->offsetItem = &$current;
return array(
'correct' => true,
'value' => &$current
);
}
return array(
'correct' => false,
'value' => &$current
);
}
public function insertBefore($offset, $value)
{
$result = $this->findIndex($offset);
$this->count ++;
$item = new HTMLPurifier_ArrayNode($value);
if ($result['correct'] == false) {
if ($result['value'] instanceof HTMLPurifier_ArrayNode) {
$result['value']->next = &$item;
$item->prev = &$result['value'];
}
} else {
if ($result['value'] instanceof HTMLPurifier_ArrayNode) {
$item->prev = &$result['value']->prev;
$item->next = &$result['value'];
}
if ($item->prev instanceof HTMLPurifier_ArrayNode) {
$item->prev->next = &$item;
}
if ($result['value'] instanceof HTMLPurifier_ArrayNode) {
$result['value']->prev = &$item;
}
}
if ($offset == 0) {
$this->head = &$item;
}
if ($offset <= $this->offset && $this->offsetItem instanceof HTMLPurifier_ArrayNode) {
$this->offsetItem = &$this->offsetItem->prev;
}
}
public function remove($offset)
{
$result = $this->findIndex($offset);
if ($result['correct']) {
$this->count --;
$item = $result['value'];
$item->prev->next = &$result['value']->next;
$item->next->prev = &$result['value']->prev;
if ($offset == 0) {
$this->head = &$item->next;
}
if ($offset < $this->offset) {
$this->offset --;
} elseif ($offset == $this->offset) {
$this->offsetItem = &$item->next;
}
}
}
public function getArray()
{
$return = array();
$head = $this->head;
while ($head instanceof HTMLPurifier_ArrayNode) {
$return[] = $head->value;
$head = &$head->next;
}
return $return;
}
public function offsetExists($offset)
{
return $offset >= 0 && $offset < $this->count;
}
public function offsetGet($offset)
{
$result = $this->findIndex($offset);
if ($result['correct']) {
return $result['value']->value;
}
return null;
}
public function offsetSet($offset, $value)
{
$result = $this->findIndex($offset);
if ($result['correct']) {
$result['value']->value = &$value;
}
}
public function offsetUnset($offset)
{
$this->remove($offset);
}
}

View File

@@ -0,0 +1,24 @@
<?php
class HTMLPurifier_ArrayNode
{
public function __construct(&$value)
{
$this->value = &$value;
}
/**
* @var HTMLPurifier_ArrayNode
*/
public $prev = null;
/**
* @var HTMLPurifier_ArrayNode
*/
public $next = null;
/**
* @var mixed
*/
public $value = null;
}

View File

@@ -32,7 +32,7 @@ class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
$string = $this->mungeRgb($string);
// assumes URI doesn't have spaces in it
$bits = explode(' ', strtolower($string)); // bits to process
$bits = explode(' ', $string); // bits to process
$caught = array();
$caught['color'] = false;

View File

@@ -2,11 +2,43 @@
/**
* Validates a font family list according to CSS spec
* @todo whitelisting allowed fonts would be nice
*/
class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
{
protected $mask = null;
public function __construct() {
$this->mask = '_- ';
for ($c = 'a'; $c <= 'z'; $c++) $this->mask .= $c;
for ($c = 'A'; $c <= 'Z'; $c++) $this->mask .= $c;
for ($c = '0'; $c <= '9'; $c++) $this->mask .= $c; // cast-y, but should be fine
// special bytes used by UTF-8
for ($i = 0x80; $i <= 0xFF; $i++) {
// We don't bother excluding invalid bytes in this range,
// because the our restriction of well-formed UTF-8 will
// prevent these from ever occurring.
$this->mask .= chr($i);
}
/*
PHP's internal strcspn implementation is
O(length of string * length of mask), making it inefficient
for large masks. However, it's still faster than
preg_match 8)
for (p = s1;;) {
spanp = s2;
do {
if (*spanp == c || p == s1_end) {
return p - s1;
}
} while (spanp++ < (s2_end - 1));
c = *++p;
}
*/
// possible optimization: invert the mask.
}
public function validate($string, $config, $context) {
static $generic_names = array(
'serif' => true,
@@ -15,6 +47,7 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
'fantasy' => true,
'cursive' => true
);
$allowed_fonts = $config->get('CSS.AllowedFonts');
// assume that no font names contain commas in them
$fonts = explode(',', $string);
@@ -24,7 +57,9 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
if ($font === '') continue;
// match a generic name
if (isset($generic_names[$font])) {
$final .= $font . ', ';
if ($allowed_fonts === null || isset($allowed_fonts[$font])) {
$final .= $font . ', ';
}
continue;
}
// match a quoted name
@@ -40,6 +75,10 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
// $font is a pure representation of the font name
if ($allowed_fonts !== null && !isset($allowed_fonts[$font])) {
continue;
}
if (ctype_alnum($font) && $font !== '') {
// very simple font, allow it in unharmed
$final .= $font . ', ';
@@ -50,17 +89,103 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
// shouldn't show up regardless
$font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
// These ugly transforms don't pose a security
// risk (as \\ and \" might). We could try to be clever and
// use single-quote wrapping when there is a double quote
// present, but I have choosen not to implement that.
// (warning: this code relies on the selection of quotation
// mark below)
$font = str_replace('\\', '\\5C ', $font);
$font = str_replace('"', '\\22 ', $font);
// Here, there are various classes of characters which need
// to be treated differently:
// - Alphanumeric characters are essentially safe. We
// handled these above.
// - Spaces require quoting, though most parsers will do
// the right thing if there aren't any characters that
// can be misinterpreted
// - Dashes rarely occur, but they fairly unproblematic
// for parsing/rendering purposes.
// The above characters cover the majority of Western font
// names.
// - Arbitrary Unicode characters not in ASCII. Because
// most parsers give little thought to Unicode, treatment
// of these codepoints is basically uniform, even for
// punctuation-like codepoints. These characters can
// show up in non-Western pages and are supported by most
// major browsers, for example: " 明朝" is a
// legitimate font-name
// <http://ja.wikipedia.org/wiki/MS_明朝>. See
// the CSS3 spec for more examples:
// <http://www.w3.org/TR/2011/WD-css3-fonts-20110324/localizedfamilynames.png>
// You can see live samples of these on the Internet:
// <http://www.google.co.jp/search?q=font-family++明朝|ゴシック>
// However, most of these fonts have ASCII equivalents:
// for example, 'MS Mincho', and it's considered
// professional to use ASCII font names instead of
// Unicode font names. Thanks Takeshi Terada for
// providing this information.
// The following characters, to my knowledge, have not been
// used to name font names.
// - Single quote. While theoretically you might find a
// font name that has a single quote in its name (serving
// as an apostrophe, e.g. Dave's Scribble), I haven't
// been able to find any actual examples of this.
// Internet Explorer's cssText translation (which I
// believe is invoked by innerHTML) normalizes any
// quoting to single quotes, and fails to escape single
// quotes. (Note that this is not IE's behavior for all
// CSS properties, just some sort of special casing for
// font-family). So a single quote *cannot* be used
// safely in the font-family context if there will be an
// innerHTML/cssText translation. Note that Firefox 3.x
// does this too.
// - Double quote. In IE, these get normalized to
// single-quotes, no matter what the encoding. (Fun
// fact, in IE8, the 'content' CSS property gained
// support, where they special cased to preserve encoded
// double quotes, but still translate unadorned double
// quotes into single quotes.) So, because their
// fixpoint behavior is identical to single quotes, they
// cannot be allowed either. Firefox 3.x displays
// single-quote style behavior.
// - Backslashes are reduced by one (so \\ -> \) every
// iteration, so they cannot be used safely. This shows
// up in IE7, IE8 and FF3
// - Semicolons, commas and backticks are handled properly.
// - The rest of the ASCII punctuation is handled properly.
// We haven't checked what browsers do to unadorned
// versions, but this is not important as long as the
// browser doesn't /remove/ surrounding quotes (as IE does
// for HTML).
//
// With these results in hand, we conclude that there are
// various levels of safety:
// - Paranoid: alphanumeric, spaces and dashes(?)
// - International: Paranoid + non-ASCII Unicode
// - Edgy: Everything except quotes, backslashes
// - NoJS: Standards compliance, e.g. sod IE. Note that
// with some judicious character escaping (since certain
// types of escaping doesn't work) this is theoretically
// OK as long as innerHTML/cssText is not called.
// We believe that international is a reasonable default
// (that we will implement now), and once we do more
// extensive research, we may feel comfortable with dropping
// it down to edgy.
// complicated font, requires quoting
$final .= "\"$font\", "; // note that this will later get turned into &quot;
// Edgy: alphanumeric, spaces, dashes, underscores and Unicode. Use of
// str(c)spn assumes that the string was already well formed
// Unicode (which of course it is).
if (strspn($font, $this->mask) !== strlen($font)) {
continue;
}
// Historical:
// In the absence of innerHTML/cssText, these ugly
// transforms don't pose a security risk (as \\ and \"
// might--these escapes are not supported by most browsers).
// We could try to be clever and use single-quote wrapping
// when there is a double quote present, but I have choosen
// not to implement that. (NOTE: you can reduce the amount
// of escapes by one depending on what quoting style you use)
// $font = str_replace('\\', '\\5C ', $font);
// $font = str_replace('"', '\\22 ', $font);
// $font = str_replace("'", '\\27 ', $font);
// font possibly with spaces, requires quoting
$final .= "'$font', ";
}
$final = rtrim($final, ', ');
if ($final === '') return false;

View File

@@ -0,0 +1,24 @@
<?php
/**
* Validates based on {ident} CSS grammar production
*/
class HTMLPurifier_AttrDef_CSS_Ident extends HTMLPurifier_AttrDef
{
public function validate($string, $config, $context) {
$string = trim($string);
// early abort: '' and '0' (strings that convert to false) are invalid
if (!$string) return false;
$pattern = '/^(-?[A-Za-z_][A-Za-z_\-0-9]*)$/';
if (!preg_match($pattern, $string)) return false;
return $string;
}
}
// vim: et sw=4 sts=4

View File

@@ -43,6 +43,15 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
// extra sanity check; should have been done by URI
$result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result);
// suspicious characters are ()'; we're going to percent encode
// them for safety.
$result = str_replace(array('(', ')', "'"), array('%28', '%29', '%27'), $result);
// there's an extra bug where ampersands lose their escaping on
// an innerHTML cycle, so a very unlucky query parameter could
// then change the meaning of the URL. Unfortunately, there's
// not much we can do about that...
return "url(\"$result\")";
}

View File

@@ -0,0 +1,28 @@
<?php
/**
* Dummy AttrDef that mimics another AttrDef, BUT it generates clones
* with make.
*/
class HTMLPurifier_AttrDef_Clone extends HTMLPurifier_AttrDef
{
/**
* What we're cloning
*/
protected $clone;
public function __construct($clone) {
$this->clone = $clone;
}
public function validate($v, $config, $context) {
return $this->clone->validate($v, $config, $context);
}
public function make($string) {
return clone $this->clone;
}
}
// vim: et sw=4 sts=4

View File

@@ -14,7 +14,8 @@ class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
$string = trim($string);
if (empty($string)) return false;
if (isset($colors[$string])) return $colors[$string];
$lower = strtolower($string);
if (isset($colors[$lower])) return $colors[$lower];
if ($string[0] === '#') $hex = substr($string, 1);
else $hex = $string;

View File

@@ -12,12 +12,22 @@
class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
{
// ref functionality disabled, since we also have to verify
// whether or not the ID it refers to exists
// selector is NOT a valid thing to use for IDREFs, because IDREFs
// *must* target IDs that exist, whereas selector #ids do not.
/**
* Determines whether or not we're validating an ID in a CSS
* selector context.
*/
protected $selector;
public function __construct($selector = false) {
$this->selector = $selector;
}
public function validate($id, $config, $context) {
if (!$config->get('Attr.EnableID')) return false;
if (!$this->selector && !$config->get('Attr.EnableID')) return false;
$id = trim($id); // trim it first
@@ -33,10 +43,10 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
'%Attr.IDPrefix is set', E_USER_WARNING);
}
//if (!$this->ref) {
if (!$this->selector) {
$id_accumulator =& $context->get('IDAccumulator');
if (isset($id_accumulator->ids[$id])) return false;
//}
}
// we purposely avoid using regex, hopefully this is faster
@@ -56,7 +66,7 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
return false;
}
if (/*!$this->ref && */$result) $id_accumulator->add($id);
if (!$this->selector && $result) $id_accumulator->add($id);
// if no change was made to the ID, return the result
// else, return the new id if stripping whitespace made it

View File

@@ -19,7 +19,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
}
public function make($string) {
$embeds = (bool) $string;
$embeds = ($string === 'embedded');
return new HTMLPurifier_AttrDef_URI($embeds);
}

View File

@@ -23,6 +23,12 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
public function validate($string, $config, $context) {
$length = strlen($string);
// empty hostname is OK; it's usually semantically equivalent:
// the default host as defined by a URI scheme is used:
//
// If the URI scheme defines a default for host, then that
// default applies when the host subcomponent is undefined
// or when the registered name is empty (zero length).
if ($string === '') return '';
if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') {
//IPv6
@@ -38,9 +44,8 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
// A regular domain name.
// This breaks I18N domain names, but we don't have proper IRI support,
// so force users to insert Punycode. If there's complaining we'll
// try to fix things into an international friendly form.
// This doesn't match I18N domain names, but we don't have proper IRI support,
// so force users to insert Punycode.
// The productions describing this are:
$a = '[a-z]'; // alpha
@@ -51,10 +56,44 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
// toplabel = alpha | alpha *( alphanum | "-" ) alphanum
$toplabel = "$a($and*$an)?";
// hostname = *( domainlabel "." ) toplabel [ "." ]
$match = preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string);
if (!$match) return false;
if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
return $string;
}
return $string;
// If we have Net_IDNA2 support, we can support IRIs by
// punycoding them. (This is the most portable thing to do,
// since otherwise we have to assume browsers support
if ($config->get('Core.EnableIDNA')) {
$idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true));
// we need to encode each period separately
$parts = explode('.', $string);
try {
$new_parts = array();
foreach ($parts as $part) {
$encodable = false;
for ($i = 0, $c = strlen($part); $i < $c; $i++) {
if (ord($part[$i]) > 0x7a) {
$encodable = true;
break;
}
}
if (!$encodable) {
$new_parts[] = $part;
} else {
$new_parts[] = $idna->encode($part);
}
}
$string = implode('.', $new_parts);
if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
return $string;
}
} catch (Exception $e) {
// XXX error reporting
}
}
return false;
}
}

View File

@@ -0,0 +1,45 @@
<?php
// must be called POST validation
/**
* Adds rel="nofollow" to all outbound links. This transform is
* only attached if Attr.Nofollow is TRUE.
*/
class HTMLPurifier_AttrTransform_Nofollow extends HTMLPurifier_AttrTransform
{
private $parser;
public function __construct() {
$this->parser = new HTMLPurifier_URIParser();
}
public function transform($attr, $config, $context) {
if (!isset($attr['href'])) {
return $attr;
}
// XXX Kind of inefficient
$url = $this->parser->parse($attr['href']);
$scheme = $url->getSchemeObj($config, $context);
if ($scheme->browsable && !$url->isLocal($config, $context)) {
if (isset($attr['rel'])) {
$rels = explode(' ', $attr['rel']);
if (!in_array('nofollow', $rels)) {
$rels[] = 'nofollow';
}
$attr['rel'] = implode(' ', $rels);
} else {
$attr['rel'] = 'nofollow';
}
}
return $attr;
}
}
// vim: et sw=4 sts=4

View File

@@ -19,6 +19,7 @@ class HTMLPurifier_AttrTransform_SafeParam extends HTMLPurifier_AttrTransform
public function __construct() {
$this->uri = new HTMLPurifier_AttrDef_URI(true); // embedded
$this->wmode = new HTMLPurifier_AttrDef_Enum(array('window', 'opaque', 'transparent'));
}
public function transform($attr, $config, $context) {
@@ -33,8 +34,15 @@ class HTMLPurifier_AttrTransform_SafeParam extends HTMLPurifier_AttrTransform
case 'allowNetworking':
$attr['value'] = 'internal';
break;
case 'allowFullScreen':
if ($config->get('HTML.FlashAllowFullScreen')) {
$attr['value'] = ($attr['value'] == 'true') ? 'true' : 'false';
} else {
$attr['value'] = 'false';
}
break;
case 'wmode':
$attr['value'] = 'window';
$attr['value'] = $this->wmode->validate($attr['value'], $config, $context);
break;
case 'movie':
case 'src':

View File

@@ -0,0 +1,38 @@
<?php
// must be called POST validation
/**
* Adds target="blank" to all outbound links. This transform is
* only attached if Attr.TargetBlank is TRUE. This works regardless
* of whether or not Attr.AllowedFrameTargets
*/
class HTMLPurifier_AttrTransform_TargetBlank extends HTMLPurifier_AttrTransform
{
private $parser;
public function __construct() {
$this->parser = new HTMLPurifier_URIParser();
}
public function transform($attr, $config, $context) {
if (!isset($attr['href'])) {
return $attr;
}
// XXX Kind of inefficient
$url = $this->parser->parse($attr['href']);
$scheme = $url->getSchemeObj($config, $context);
if ($scheme->browsable && !$url->isBenign($config, $context)) {
$attr['target'] = '_blank';
}
return $attr;
}
}
// vim: et sw=4 sts=4

View File

@@ -15,6 +15,13 @@ class HTMLPurifier_AttrTypes
* types.
*/
public function __construct() {
// XXX This is kind of poor, since we don't actually /clone/
// instances; instead, we use the supplied make() attribute. So,
// the underlying class must know how to deal with arguments.
// With the old implementation of Enum, that ignored its
// arguments when handling a make dispatch, the IAlign
// definition wouldn't work.
// pseudo-types, must be instantiated via shorthand
$this->info['Enum'] = new HTMLPurifier_AttrDef_Enum();
$this->info['Bool'] = new HTMLPurifier_AttrDef_HTML_Bool();
@@ -29,6 +36,9 @@ class HTMLPurifier_AttrTypes
$this->info['URI'] = new HTMLPurifier_AttrDef_URI();
$this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
$this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color();
$this->info['IAlign'] = self::makeEnum('top,middle,bottom,left,right');
$this->info['LAlign'] = self::makeEnum('top,bottom,left,right');
$this->info['FrameTarget'] = new HTMLPurifier_AttrDef_HTML_FrameTarget();
// unimplemented aliases
$this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
@@ -44,6 +54,10 @@ class HTMLPurifier_AttrTypes
$this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);
}
private static function makeEnum($in) {
return new HTMLPurifier_AttrDef_Clone(new HTMLPurifier_AttrDef_Enum(explode(',', $in)));
}
/**
* Retrieves a type
* @param $type String type name

View File

@@ -37,7 +37,12 @@ class HTMLPurifier_Bootstrap
public static function autoload($class) {
$file = HTMLPurifier_Bootstrap::getPath($class);
if (!$file) return false;
require HTMLPURIFIER_PREFIX . '/' . $file;
// Technically speaking, it should be ok and more efficient to
// just do 'require', but Antonio Parraga reports that with
// Zend extensions such as Zend debugger and APC, this invariant
// may be broken. Since we have efficient alternatives, pay
// the cost here and avoid the bug.
require_once HTMLPURIFIER_PREFIX . '/' . $file;
return true;
}
@@ -65,31 +70,37 @@ class HTMLPurifier_Bootstrap
if ( ($funcs = spl_autoload_functions()) === false ) {
spl_autoload_register($autoload);
} elseif (function_exists('spl_autoload_unregister')) {
$compat = version_compare(PHP_VERSION, '5.1.2', '<=') &&
version_compare(PHP_VERSION, '5.1.0', '>=');
foreach ($funcs as $func) {
if (is_array($func)) {
// :TRICKY: There are some compatibility issues and some
// places where we need to error out
$reflector = new ReflectionMethod($func[0], $func[1]);
if (!$reflector->isStatic()) {
throw new Exception('
HTML Purifier autoloader registrar is not compatible
with non-static object methods due to PHP Bug #44144;
Please do not use HTMLPurifier.autoload.php (or any
file that includes this file); instead, place the code:
spl_autoload_register(array(\'HTMLPurifier_Bootstrap\', \'autoload\'))
after your own autoloaders.
');
if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
// prepend flag exists, no need for shenanigans
spl_autoload_register($autoload, true, true);
} else {
$buggy = version_compare(PHP_VERSION, '5.2.11', '<');
$compat = version_compare(PHP_VERSION, '5.1.2', '<=') &&
version_compare(PHP_VERSION, '5.1.0', '>=');
foreach ($funcs as $func) {
if ($buggy && is_array($func)) {
// :TRICKY: There are some compatibility issues and some
// places where we need to error out
$reflector = new ReflectionMethod($func[0], $func[1]);
if (!$reflector->isStatic()) {
throw new Exception('
HTML Purifier autoloader registrar is not compatible
with non-static object methods due to PHP Bug #44144;
Please do not use HTMLPurifier.autoload.php (or any
file that includes this file); instead, place the code:
spl_autoload_register(array(\'HTMLPurifier_Bootstrap\', \'autoload\'))
after your own autoloaders.
');
}
// Suprisingly, spl_autoload_register supports the
// Class::staticMethod callback format, although call_user_func doesn't
if ($compat) $func = implode('::', $func);
}
// Suprisingly, spl_autoload_register supports the
// Class::staticMethod callback format, although call_user_func doesn't
if ($compat) $func = implode('::', $func);
spl_autoload_unregister($func);
}
spl_autoload_unregister($func);
spl_autoload_register($autoload);
foreach ($funcs as $func) spl_autoload_register($func);
}
spl_autoload_register($autoload);
foreach ($funcs as $func) spl_autoload_register($func);
}
}

View File

@@ -208,8 +208,9 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
$this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2);
// partial support
$this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap'));
// These CSS properties don't work on many browsers, but we live
// in THE FUTURE!
$this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap', 'normal', 'pre', 'pre-wrap', 'pre-line'));
if ($config->get('CSS.Proprietary')) {
$this->doSetupProprietary($config);
@@ -219,6 +220,10 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
$this->doSetupTricky($config);
}
if ($config->get('CSS.Trusted')) {
$this->doSetupTrusted($config);
}
$allow_important = $config->get('CSS.AllowImportant');
// wrap all attr-defs with decorator that handles !important
foreach ($this->info as $k => $v) {
@@ -245,12 +250,17 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
// only opacity, for now
$this->info['filter'] = new HTMLPurifier_AttrDef_CSS_Filter();
// more CSS3
$this->info['page-break-after'] =
$this->info['page-break-before'] = new HTMLPurifier_AttrDef_Enum(array('auto','always','avoid','left','right'));
$this->info['page-break-inside'] = new HTMLPurifier_AttrDef_Enum(array('auto','avoid'));
}
protected function doSetupTricky($config) {
$this->info['display'] = new HTMLPurifier_AttrDef_Enum(array(
'inline', 'block', 'list-item', 'run-in', 'compact',
'marker', 'table', 'inline-table', 'table-row-group',
'marker', 'table', 'inline-block', 'inline-table', 'table-row-group',
'table-header-group', 'table-footer-group', 'table-row',
'table-column-group', 'table-column', 'table-cell', 'table-caption', 'none'
));
@@ -260,6 +270,23 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
$this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(array('visible', 'hidden', 'auto', 'scroll'));
}
protected function doSetupTrusted($config) {
$this->info['position'] = new HTMLPurifier_AttrDef_Enum(array(
'static', 'relative', 'absolute', 'fixed'
));
$this->info['top'] =
$this->info['left'] =
$this->info['right'] =
$this->info['bottom'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_CSS_Length(),
new HTMLPurifier_AttrDef_CSS_Percentage(),
new HTMLPurifier_AttrDef_Enum(array('auto')),
));
$this->info['z-index'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_Integer(),
new HTMLPurifier_AttrDef_Enum(array('auto')),
));
}
/**
* Performs extra config-based processing. Based off of
@@ -272,20 +299,29 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
// setup allowed elements
$support = "(for information on implementing this, see the ".
"support forums) ";
$allowed_attributes = $config->get('CSS.AllowedProperties');
if ($allowed_attributes !== null) {
$allowed_properties = $config->get('CSS.AllowedProperties');
if ($allowed_properties !== null) {
foreach ($this->info as $name => $d) {
if(!isset($allowed_attributes[$name])) unset($this->info[$name]);
unset($allowed_attributes[$name]);
if(!isset($allowed_properties[$name])) unset($this->info[$name]);
unset($allowed_properties[$name]);
}
// emit errors
foreach ($allowed_attributes as $name => $d) {
foreach ($allowed_properties as $name => $d) {
// :TODO: Is this htmlspecialchars() call really necessary?
$name = htmlspecialchars($name);
trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING);
}
}
$forbidden_properties = $config->get('CSS.ForbiddenProperties');
if ($forbidden_properties !== null) {
foreach ($this->info as $name => $d) {
if (isset($forbidden_properties[$name])) {
unset($this->info[$name]);
}
}
}
}
}

View File

@@ -0,0 +1,120 @@
<?php
/**
* Definition for list containers ul and ol.
*/
class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
{
public $type = 'list';
// lying a little bit, so that we can handle ul and ol ourselves
// XXX: This whole business with 'wrap' is all a bit unsatisfactory
public $elements = array('li' => true, 'ul' => true, 'ol' => true);
public function validateChildren($tokens_of_children, $config, $context) {
// Flag for subclasses
$this->whitespace = false;
// if there are no tokens, delete parent node
if (empty($tokens_of_children)) return false;
// the new set of children
$result = array();
// current depth into the nest
$nesting = 0;
// a little sanity check to make sure it's not ALL whitespace
$all_whitespace = true;
$seen_li = false;
$need_close_li = false;
foreach ($tokens_of_children as $token) {
if (!empty($token->is_whitespace)) {
$result[] = $token;
continue;
}
$all_whitespace = false; // phew, we're not talking about whitespace
if ($nesting == 1 && $need_close_li) {
$result[] = new HTMLPurifier_Token_End('li');
$nesting--;
$need_close_li = false;
}
$is_child = ($nesting == 0);
if ($token instanceof HTMLPurifier_Token_Start) {
$nesting++;
} elseif ($token instanceof HTMLPurifier_Token_End) {
$nesting--;
}
if ($is_child) {
if ($token->name === 'li') {
// good
$seen_li = true;
} elseif ($token->name === 'ul' || $token->name === 'ol') {
// we want to tuck this into the previous li
$need_close_li = true;
$nesting++;
if (!$seen_li) {
// create a new li element
$result[] = new HTMLPurifier_Token_Start('li');
} else {
// backtrack until </li> found
while(true) {
$t = array_pop($result);
if ($t instanceof HTMLPurifier_Token_End) {
// XXX actually, these invariants could very plausibly be violated
// if we are doing silly things with modifying the set of allowed elements.
// FORTUNATELY, it doesn't make a difference, since the allowed
// elements are hard-coded here!
if ($t->name !== 'li') {
trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
return false;
}
break;
} elseif ($t instanceof HTMLPurifier_Token_Empty) { // bleagh
if ($t->name !== 'li') {
trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
return false;
}
// XXX this should have a helper for it...
$result[] = new HTMLPurifier_Token_Start('li', $t->attr, $t->line, $t->col, $t->armor);
break;
} else {
if (!$t->is_whitespace) {
trigger_error("Only whitespace present invariant violated in List ChildDef", E_USER_ERROR);
return false;
}
}
}
}
} else {
// start wrapping (this doesn't precisely mimic
// browser behavior, but what browsers do is kind of
// hard to mimic in a standards compliant way
// XXX Actually, this has no impact in practice,
// because this gets handled earlier. Arguably,
// we should rip out all of that processing
$result[] = new HTMLPurifier_Token_Start('li');
$nesting++;
$seen_li = true;
$need_close_li = true;
}
}
$result[] = $token;
}
if ($need_close_li) {
$result[] = new HTMLPurifier_Token_End('li');
}
if (empty($result)) return false;
if ($all_whitespace) {
return false;
}
if ($tokens_of_children == $result) return true;
return $result;
}
}
// vim: et sw=4 sts=4

View File

@@ -1,7 +1,33 @@
<?php
/**
* Definition for tables
* Definition for tables. The general idea is to extract out all of the
* essential bits, and then reconstruct it later.
*
* This is a bit confusing, because the DTDs and the W3C
* validators seem to disagree on the appropriate definition. The
* DTD claims:
*
* (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)
*
* But actually, the HTML4 spec then has this to say:
*
* The TBODY start tag is always required except when the table
* contains only one table body and no table head or foot sections.
* The TBODY end tag may always be safely omitted.
*
* So the DTD is kind of wrong. The validator is, unfortunately, kind
* of on crack.
*
* The definition changed again in XHTML1.1; and in my opinion, this
* formulation makes the most sense.
*
* caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))
*
* Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode.
* If we encounter a thead, tfoot or tbody, we are placed in the former
* mode, and we *must* wrap any stray tr segments with a tbody. But if
* we don't run into any of them, just have tr tags is OK.
*/
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
{
@@ -33,6 +59,8 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
$collection = array(); // collected nodes
$tag_index = 0; // the first node might be whitespace,
// so this tells us where the start tag is
$tbody_mode = false; // if true, then we need to wrap any stray
// <tr>s with a <tbody>.
foreach ($tokens_of_children as $token) {
$is_child = ($nesting == 0);
@@ -51,8 +79,9 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
// okay, let's stash the tokens away
// first token tells us the type of the collection
switch ($collection[$tag_index]->name) {
case 'tr':
case 'tbody':
$tbody_mode = true;
case 'tr':
$content[] = $collection;
break;
case 'caption':
@@ -61,13 +90,28 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
break;
case 'thead':
case 'tfoot':
$tbody_mode = true;
// XXX This breaks rendering properties with
// Firefox, which never floats a <thead> to
// the top. Ever. (Our scheme will float the
// first <thead> to the top.) So maybe
// <thead>s that are not first should be
// turned into <tbody>? Very tricky, indeed.
// access the appropriate variable, $thead or $tfoot
$var = $collection[$tag_index]->name;
if ($$var === false) {
$$var = $collection;
} else {
// transmutate the first and less entries into
// tbody tags, and then put into content
// Oops, there's a second one! What
// should we do? Current behavior is to
// transmutate the first and last entries into
// tbody tags, and then put into content.
// Maybe a better idea is to *attach
// it* to the existing thead or tfoot?
// We don't do this, because Firefox
// doesn't float an extra tfoot to the
// bottom like it does for the first one.
$collection[$tag_index]->name = 'tbody';
$collection[count($collection)-1]->name = 'tbody';
$content[] = $collection;
@@ -126,7 +170,48 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
if ($thead !== false) $ret = array_merge($ret, $thead);
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
if ($tbody_mode) {
// a little tricky, since the start of the collection may be
// whitespace
$inside_tbody = false;
foreach ($content as $token_array) {
// find the starting token
foreach ($token_array as $t) {
if ($t->name === 'tr' || $t->name === 'tbody') {
break;
}
} // iterator variable carries over
if ($t->name === 'tr') {
if ($inside_tbody) {
$ret = array_merge($ret, $token_array);
} else {
$ret[] = new HTMLPurifier_Token_Start('tbody');
$ret = array_merge($ret, $token_array);
$inside_tbody = true;
}
} elseif ($t->name === 'tbody') {
if ($inside_tbody) {
$ret[] = new HTMLPurifier_Token_End('tbody');
$inside_tbody = false;
$ret = array_merge($ret, $token_array);
} else {
$ret = array_merge($ret, $token_array);
}
} else {
trigger_error("tr/tbody in content invariant failed in Table ChildDef", E_USER_ERROR);
}
}
if ($inside_tbody) {
$ret[] = new HTMLPurifier_Token_End('tbody');
}
} else {
foreach ($content as $token_array) {
// invariant: everything in here is <tr>s
$ret = array_merge($ret, $token_array);
}
}
if (!empty($collection) && $is_collecting == false){
// grab the trailing space
$ret = array_merge($ret, $collection);

View File

@@ -20,7 +20,7 @@ class HTMLPurifier_Config
/**
* HTML Purifier's version
*/
public $version = '4.1.1';
public $version = '4.5.0';
/**
* Bool indicator whether or not to automatically finalize
@@ -44,7 +44,7 @@ class HTMLPurifier_Config
/**
* Parser for variables
*/
protected $parser;
protected $parser = null;
/**
* Reference HTMLPurifier_ConfigSchema for value checking
@@ -76,7 +76,8 @@ class HTMLPurifier_Config
/**
* Set to false if you do not want line and file numbers in errors
* (useful when unit testing)
* (useful when unit testing). This will also compress some errors
* and exceptions.
*/
public $chatty = true;
@@ -188,7 +189,7 @@ class HTMLPurifier_Config
}
/**
* Returns a md5 signature of a segment of the configuration object
* Returns a SHA-1 signature of a segment of the configuration object
* that uniquely identifies that particular configuration
* @note Revision is handled specially and is removed from the batch
* before processing!
@@ -198,18 +199,18 @@ class HTMLPurifier_Config
if (empty($this->serials[$namespace])) {
$batch = $this->getBatch($namespace);
unset($batch['DefinitionRev']);
$this->serials[$namespace] = md5(serialize($batch));
$this->serials[$namespace] = sha1(serialize($batch));
}
return $this->serials[$namespace];
}
/**
* Returns a md5 signature for the entire configuration object
* Returns a SHA-1 signature for the entire configuration object
* that uniquely identifies that particular configuration
*/
public function getSerial() {
if (empty($this->serial)) {
$this->serial = md5(serialize($this->getAll()));
$this->serial = sha1(serialize($this->getAll()));
}
return $this->serial;
}
@@ -318,26 +319,64 @@ class HTMLPurifier_Config
* Retrieves object reference to the HTML definition.
* @param $raw Return a copy that has not been setup yet. Must be
* called before it's been setup, otherwise won't work.
* @param $optimized If true, this method may return null, to
* indicate that a cached version of the modified
* definition object is available and no further edits
* are necessary. Consider using
* maybeGetRawHTMLDefinition, which is more explicitly
* named, instead.
*/
public function getHTMLDefinition($raw = false) {
return $this->getDefinition('HTML', $raw);
public function getHTMLDefinition($raw = false, $optimized = false) {
return $this->getDefinition('HTML', $raw, $optimized);
}
/**
* Retrieves object reference to the CSS definition
* @param $raw Return a copy that has not been setup yet. Must be
* called before it's been setup, otherwise won't work.
* @param $optimized If true, this method may return null, to
* indicate that a cached version of the modified
* definition object is available and no further edits
* are necessary. Consider using
* maybeGetRawCSSDefinition, which is more explicitly
* named, instead.
*/
public function getCSSDefinition($raw = false) {
return $this->getDefinition('CSS', $raw);
public function getCSSDefinition($raw = false, $optimized = false) {
return $this->getDefinition('CSS', $raw, $optimized);
}
/**
* Retrieves object reference to the URI definition
* @param $raw Return a copy that has not been setup yet. Must be
* called before it's been setup, otherwise won't work.
* @param $optimized If true, this method may return null, to
* indicate that a cached version of the modified
* definition object is available and no further edits
* are necessary. Consider using
* maybeGetRawURIDefinition, which is more explicitly
* named, instead.
*/
public function getURIDefinition($raw = false, $optimized = false) {
return $this->getDefinition('URI', $raw, $optimized);
}
/**
* Retrieves a definition
* @param $type Type of definition: HTML, CSS, etc
* @param $raw Whether or not definition should be returned raw
* @param $optimized Only has an effect when $raw is true. Whether
* or not to return null if the result is already present in
* the cache. This is off by default for backwards
* compatibility reasons, but you need to do things this
* way in order to ensure that caching is done properly.
* Check out enduser-customize.html for more details.
* We probably won't ever change this default, as much as the
* maybe semantics is the "right thing to do."
*/
public function getDefinition($type, $raw = false) {
public function getDefinition($type, $raw = false, $optimized = false) {
if ($optimized && !$raw) {
throw new HTMLPurifier_Exception("Cannot set optimized = true when raw = false");
}
if (!$this->finalized) $this->autoFinalize();
// temporarily suspend locks, so we can handle recursive definition calls
$lock = $this->lock;
@@ -346,52 +385,137 @@ class HTMLPurifier_Config
$cache = $factory->create($type, $this);
$this->lock = $lock;
if (!$raw) {
// see if we can quickly supply a definition
// full definition
// ---------------
// check if definition is in memory
if (!empty($this->definitions[$type])) {
if (!$this->definitions[$type]->setup) {
$this->definitions[$type]->setup($this);
$cache->set($this->definitions[$type], $this);
$def = $this->definitions[$type];
// check if the definition is setup
if ($def->setup) {
return $def;
} else {
$def->setup($this);
if ($def->optimized) $cache->add($def, $this);
return $def;
}
return $this->definitions[$type];
}
// memory check missed, try cache
$this->definitions[$type] = $cache->get($this);
if ($this->definitions[$type]) {
// definition in cache, return it
return $this->definitions[$type];
// check if definition is in cache
$def = $cache->get($this);
if ($def) {
// definition in cache, save to memory and return it
$this->definitions[$type] = $def;
return $def;
}
} elseif (
!empty($this->definitions[$type]) &&
!$this->definitions[$type]->setup
) {
// raw requested, raw in memory, quick return
return $this->definitions[$type];
// initialize it
$def = $this->initDefinition($type);
// set it up
$this->lock = $type;
$def->setup($this);
$this->lock = null;
// save in cache
$cache->add($def, $this);
// return it
return $def;
} else {
// raw definition
// --------------
// check preconditions
$def = null;
if ($optimized) {
if (is_null($this->get($type . '.DefinitionID'))) {
// fatally error out if definition ID not set
throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID");
}
}
if (!empty($this->definitions[$type])) {
$def = $this->definitions[$type];
if ($def->setup && !$optimized) {
$extra = $this->chatty ? " (try moving this code block earlier in your initialization)" : "";
throw new HTMLPurifier_Exception("Cannot retrieve raw definition after it has already been setup" . $extra);
}
if ($def->optimized === null) {
$extra = $this->chatty ? " (try flushing your cache)" : "";
throw new HTMLPurifier_Exception("Optimization status of definition is unknown" . $extra);
}
if ($def->optimized !== $optimized) {
$msg = $optimized ? "optimized" : "unoptimized";
$extra = $this->chatty ? " (this backtrace is for the first inconsistent call, which was for a $msg raw definition)" : "";
throw new HTMLPurifier_Exception("Inconsistent use of optimized and unoptimized raw definition retrievals" . $extra);
}
}
// check if definition was in memory
if ($def) {
if ($def->setup) {
// invariant: $optimized === true (checked above)
return null;
} else {
return $def;
}
}
// if optimized, check if definition was in cache
// (because we do the memory check first, this formulation
// is prone to cache slamming, but I think
// guaranteeing that either /all/ of the raw
// setup code or /none/ of it is run is more important.)
if ($optimized) {
// This code path only gets run once; once we put
// something in $definitions (which is guaranteed by the
// trailing code), we always short-circuit above.
$def = $cache->get($this);
if ($def) {
// save the full definition for later, but don't
// return it yet
$this->definitions[$type] = $def;
return null;
}
}
// check invariants for creation
if (!$optimized) {
if (!is_null($this->get($type . '.DefinitionID'))) {
if ($this->chatty) {
$this->triggerError("Due to a documentation error in previous version of HTML Purifier, your definitions are not being cached. If this is OK, you can remove the %$type.DefinitionRev and %$type.DefinitionID declaration. Otherwise, modify your code to use maybeGetRawDefinition, and test if the returned value is null before making any edits (if it is null, that means that a cached version is available, and no raw operations are necessary). See <a href='http://htmlpurifier.org/docs/enduser-customize.html#optimized'>Customize</a> for more details", E_USER_WARNING);
} else {
$this->triggerError("Useless DefinitionID declaration", E_USER_WARNING);
}
}
}
// initialize it
$def = $this->initDefinition($type);
$def->optimized = $optimized;
return $def;
}
throw new HTMLPurifier_Exception("The impossible happened!");
}
private function initDefinition($type) {
// quick checks failed, let's create the object
if ($type == 'HTML') {
$this->definitions[$type] = new HTMLPurifier_HTMLDefinition();
$def = new HTMLPurifier_HTMLDefinition();
} elseif ($type == 'CSS') {
$this->definitions[$type] = new HTMLPurifier_CSSDefinition();
$def = new HTMLPurifier_CSSDefinition();
} elseif ($type == 'URI') {
$this->definitions[$type] = new HTMLPurifier_URIDefinition();
$def = new HTMLPurifier_URIDefinition();
} else {
throw new HTMLPurifier_Exception("Definition of $type type not supported");
}
// quick abort if raw
if ($raw) {
if (is_null($this->get($type . '.DefinitionID'))) {
// fatally error out if definition ID not set
throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID");
}
return $this->definitions[$type];
}
// set it up
$this->lock = $type;
$this->definitions[$type]->setup($this);
$this->lock = null;
// save in cache
$cache->set($this->definitions[$type], $this);
return $this->definitions[$type];
$this->definitions[$type] = $def;
return $def;
}
public function maybeGetRawDefinition($name) {
return $this->getDefinition($name, true, true);
}
public function maybeGetRawHTMLDefinition() {
return $this->getDefinition('HTML', true, true);
}
public function maybeGetRawCSSDefinition() {
return $this->getDefinition('CSS', true, true);
}
public function maybeGetRawURIDefinition() {
return $this->getDefinition('URI', true, true);
}
/**
@@ -544,22 +668,28 @@ class HTMLPurifier_Config
*/
public function finalize() {
$this->finalized = true;
unset($this->parser);
$this->parser = null;
}
/**
* Produces a nicely formatted error message by supplying the
* stack frame information from two levels up and OUTSIDE of
* HTMLPurifier_Config.
* stack frame information OUTSIDE of HTMLPurifier_Config.
*/
protected function triggerError($msg, $no) {
// determine previous stack frame
$backtrace = debug_backtrace();
if ($this->chatty && isset($backtrace[1])) {
$frame = $backtrace[1];
$extra = " on line {$frame['line']} in file {$frame['file']}";
} else {
$extra = '';
$extra = '';
if ($this->chatty) {
$trace = debug_backtrace();
// zip(tail(trace), trace) -- but PHP is not Haskell har har
for ($i = 0, $c = count($trace); $i < $c - 1; $i++) {
// XXX this is not correct on some versions of HTML Purifier
if ($trace[$i + 1]['class'] === 'HTMLPurifier_Config') {
continue;
}
$frame = $trace[$i];
$extra = " invoked on line {$frame['line']} in file {$frame['file']}";
break;
}
}
trigger_error($msg . $extra, $no);
}

View File

@@ -60,7 +60,13 @@ class HTMLPurifier_ConfigSchema {
* Unserializes the default ConfigSchema.
*/
public static function makeFromSerial() {
return unserialize(file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser'));
$contents = file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser');
$r = unserialize($contents);
if (!$r) {
$hash = sha1($contents);
trigger_error("Unserialization of configuration schema failed, sha1 of file was $hash", E_USER_ERROR);
}
return $r;
}
/**

View File

@@ -0,0 +1,12 @@
CSS.AllowedFonts
TYPE: lookup/null
VERSION: 4.3.0
DEFAULT: NULL
--DESCRIPTION--
<p>
Allows you to manually specify a set of allowed fonts. If
<code>NULL</code>, all fonts are allowed. This directive
affects generic names (serif, sans-serif, monospace, cursive,
fantasy) as well as specific font families.
</p>
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,13 @@
CSS.ForbiddenProperties
TYPE: lookup
VERSION: 4.2.0
DEFAULT: array()
--DESCRIPTION--
<p>
This is the logical inverse of %CSS.AllowedProperties, and it will
override that directive or any other directive. If possible,
%CSS.AllowedProperties is recommended over this directive,
because it can sometimes be difficult to tell whether or not you've
forbidden all of the CSS properties you truly would like to disallow.
</p>
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,9 @@
CSS.Trusted
TYPE: bool
VERSION: 4.2.1
DEFAULT: false
--DESCRIPTION--
Indicates whether or not the user's CSS input is trusted or not. If the
input is trusted, a more expansive set of allowed properties. See
also %HTML.Trusted.
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,11 @@
Cache.SerializerPermissions
TYPE: int
VERSION: 4.3.0
DEFAULT: 0755
--DESCRIPTION--
<p>
Directory permissions of the files and directories created inside
the DefinitionCache/Serializer or other custom serializer path.
</p>
--# vim: et sw=4 sts=4

View File

@@ -24,5 +24,6 @@ array (
--DESCRIPTION--
Lookup array of color names to six digit hexadecimal number corresponding
to color, with preceding hash mark. Used when parsing colors.
to color, with preceding hash mark. Used when parsing colors. The lookup
is done in a case-insensitive manner.
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,14 @@
Core.DisableExcludes
TYPE: bool
DEFAULT: false
VERSION: 4.5.0
--DESCRIPTION--
<p>
This directive disables SGML-style exclusions, e.g. the exclusion of
<code>&lt;object&gt;</code> in any descendant of a
<code>&lt;pre&gt;</code> tag. Disabling excludes will allow some
invalid documents to pass through HTML Purifier, but HTML Purifier
will also be less likely to accidentally remove large documents during
processing.
</p>
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,9 @@
Core.EnableIDNA
TYPE: bool
DEFAULT: false
VERSION: 4.4.0
--DESCRIPTION--
Allows international domain names in URLs. This configuration option
requires the PEAR Net_IDNA2 module to be installed. It operates by
punycoding any internationalized host names for maximum portability.
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,11 @@
Core.NormalizeNewlines
TYPE: bool
VERSION: 4.2.0
DEFAULT: true
--DESCRIPTION--
<p>
Whether or not to normalize newlines to the operating
system default. When <code>false</code>, HTML Purifier
will attempt to preserve mixed newline files.
</p>
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,11 @@
Core.RemoveProcessingInstructions
TYPE: bool
VERSION: 4.2.0
DEFAULT: false
--DESCRIPTION--
Instead of escaping processing instructions in the form <code>&lt;? ...
?&gt;</code>, remove it out-right. This may be useful if the HTML
you are validating contains XML processing instruction gunk, however,
it can also be user-unfriendly for people attempting to post PHP
snippets.
--# vim: et sw=4 sts=4

View File

@@ -3,6 +3,11 @@ TYPE: bool
VERSION: 3.1.0
DEFAULT: false
--DESCRIPTION--
<p>
<strong>Warning:</strong> Deprecated in favor of %HTML.SafeObject and
%Output.FlashCompat (turn both on to allow YouTube videos and other
Flash content).
</p>
<p>
This directive enables YouTube video embedding in HTML Purifier. Check
<a href="http://htmlpurifier.org/docs/enduser-youtube.html">this document

View File

@@ -5,11 +5,14 @@ DEFAULT: NULL
--DESCRIPTION--
<p>
This is a convenience directive that rolls the functionality of
%HTML.AllowedElements and %HTML.AllowedAttributes into one directive.
This is a preferred convenience directive that combines
%HTML.AllowedElements and %HTML.AllowedAttributes.
Specify elements and attributes that are allowed using:
<code>element1[attr1|attr2],element2...</code>. You can also use
newlines instead of commas to separate elements.
<code>element1[attr1|attr2],element2...</code>. For example,
if you would like to only allow paragraphs and links, specify
<code>a[href],p</code>. You can specify attributes that apply
to all elements using an asterisk, e.g. <code>*[lang]</code>.
You can also use newlines instead of commas to separate elements.
</p>
<p>
<strong>Warning</strong>:

View File

@@ -0,0 +1,10 @@
HTML.AllowedComments
TYPE: lookup
VERSION: 4.4.0
DEFAULT: array()
--DESCRIPTION--
A whitelist which indicates what explicit comment bodies should be
allowed, modulo leading and trailing whitespace. See also %HTML.AllowedCommentsRegexp
(these directives are union'ed together, so a comment is considered
valid if any directive deems it valid.)
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,15 @@
HTML.AllowedCommentsRegexp
TYPE: string/null
VERSION: 4.4.0
DEFAULT: NULL
--DESCRIPTION--
A regexp, which if it matches the body of a comment, indicates that
it should be allowed. Trailing and leading spaces are removed prior
to running this regular expression.
<strong>Warning:</strong> Make sure you specify
correct anchor metacharacters <code>^regex$</code>, otherwise you may accept
comments that you did not mean to! In particular, the regex <code>/foo|bar/</code>
is probably not sufficiently strict, since it also allows <code>foobar</code>.
See also %HTML.AllowedComments (these directives are union'ed together,
so a comment is considered valid if any directive deems it valid.)
--# vim: et sw=4 sts=4

View File

@@ -4,12 +4,17 @@ VERSION: 1.3.0
DEFAULT: NULL
--DESCRIPTION--
<p>
If HTML Purifier's tag set is unsatisfactory for your needs, you
can overload it with your own list of tags to allow. Note that this
method is subtractive: it does its job by taking away from HTML Purifier
usual feature set, so you cannot add a tag that HTML Purifier never
supported in the first place (like embed, form or head). If you
change this, you probably also want to change %HTML.AllowedAttributes.
If HTML Purifier's tag set is unsatisfactory for your needs, you can
overload it with your own list of tags to allow. If you change
this, you probably also want to change %HTML.AllowedAttributes; see
also %HTML.Allowed which lets you set allowed elements and
attributes at the same time.
</p>
<p>
If you attempt to allow an element that HTML Purifier does not know
about, HTML Purifier will raise an error. You will need to manually
tell HTML Purifier about this element by using the
<a href="http://htmlpurifier.org/docs/enduser-customize.html">advanced customization features.</a>
</p>
<p>
<strong>Warning:</strong> If another directive conflicts with the

View File

@@ -0,0 +1,11 @@
HTML.FlashAllowFullScreen
TYPE: bool
VERSION: 4.2.0
DEFAULT: false
--DESCRIPTION--
<p>
Whether or not to permit embedded Flash content from
%HTML.SafeObject to expand to the full screen. Corresponds to
the <code>allowFullScreen</code> parameter.
</p>
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,7 @@
HTML.Nofollow
TYPE: bool
VERSION: 4.3.0
DEFAULT: FALSE
--DESCRIPTION--
If enabled, nofollow rel attributes are added to all outgoing links.
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,13 @@
HTML.SafeIframe
TYPE: bool
VERSION: 4.4.0
DEFAULT: false
--DESCRIPTION--
<p>
Whether or not to permit iframe tags in untrusted documents. This
directive must be accompanied by a whitelist of permitted iframes,
such as %URI.SafeIframeRegexp, otherwise it will fatally error.
This directive has no effect on strict doctypes, as iframes are not
valid.
</p>
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,10 @@
HTML.SafeScripting
TYPE: lookup
VERSION: 4.5.0
DEFAULT: array()
--DESCRIPTION--
<p>
Whether or not to permit script tags to external scripts in documents.
Inline scripting is not allowed, and the script must match an explicit whitelist.
</p>
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,8 @@
HTML.TargetBlank
TYPE: bool
VERSION: 4.4.0
DEFAULT: FALSE
--DESCRIPTION--
If enabled, <code>target=blank</code> attributes are added to all outgoing links.
(This includes links from an HTTPS version of a page to an HTTP version.)
--# vim: et sw=4 sts=4

View File

@@ -5,4 +5,5 @@ DEFAULT: false
--DESCRIPTION--
Indicates whether or not the user input is trusted or not. If the input is
trusted, a more expansive set of allowed tags and attributes will be used.
See also %CSS.Trusted.
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,15 @@
Output.FixInnerHTML
TYPE: bool
VERSION: 4.3.0
DEFAULT: true
--DESCRIPTION--
<p>
If true, HTML Purifier will protect against Internet Explorer's
mishandling of the <code>innerHTML</code> attribute by appending
a space to any attribute that does not contain angled brackets, spaces
or quotes, but contains a backtick. This slightly changes the
semantics of any given attribute, so if this is unacceptable and
you do not use <code>innerHTML</code> on any of your pages, you can
turn this directive off.
</p>
--# vim: et sw=4 sts=4

View File

@@ -12,6 +12,6 @@ array (
--DESCRIPTION--
Whitelist that defines the schemes that a URI is allowed to have. This
prevents XSS attacks from using pseudo-schemes like javascript or mocha.
There is also support for the <code>data</code> URI scheme, but it is not
enabled by default.
There is also support for the <code>data</code> and <code>file</code>
URI schemes, but they are not enabled by default.
--# vim: et sw=4 sts=4

View File

@@ -1,12 +1,15 @@
URI.DisableResources
TYPE: bool
VERSION: 1.3.0
VERSION: 4.2.0
DEFAULT: false
--DESCRIPTION--
<p>
Disables embedding resources, essentially meaning no pictures. You can
still link to them though. See %URI.DisableExternalResources for why
this might be a good idea.
</p>
<p>
<em>Note:</em> While this directive has been available since 1.3.0,
it didn't actually start doing anything until 4.2.0.
</p>
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,22 @@
URI.SafeIframeRegexp
TYPE: string/null
VERSION: 4.4.0
DEFAULT: NULL
--DESCRIPTION--
<p>
A PCRE regular expression that will be matched against an iframe URI. This is
a relatively inflexible scheme, but works well enough for the most common
use-case of iframes: embedded video. This directive only has an effect if
%HTML.SafeIframe is enabled. Here are some example values:
</p>
<ul>
<li><code>%^http://www.youtube.com/embed/%</code> - Allow YouTube videos</li>
<li><code>%^http://player.vimeo.com/video/%</code> - Allow Vimeo videos</li>
<li><code>%^http://(www.youtube.com/embed/|player.vimeo.com/video/)%</code> - Allow both</li>
</ul>
<p>
Note that this directive does not give you enough granularity to, say, disable
all <code>autoplay</code> videos. Pipe up on the HTML Purifier forums if this
is a capability you want.
</p>
--# vim: et sw=4 sts=4

View File

@@ -12,6 +12,17 @@ abstract class HTMLPurifier_Definition
*/
public $setup = false;
/**
* If true, write out the final definition object to the cache after
* setup. This will be true only if all invocations to get a raw
* definition object are also optimized. This does not cause file
* system thrashing because on subsequent calls the cached object
* is used and any writes to the raw definition object are short
* circuited. See enduser-customize.html for the high-level
* picture.
*/
public $optimized = null;
/**
* What type of definition is it?
*/

View File

@@ -9,14 +9,14 @@ class HTMLPurifier_DefinitionCache_Serializer extends
$file = $this->generateFilePath($config);
if (file_exists($file)) return false;
if (!$this->_prepareDir($config)) return false;
return $this->_write($file, serialize($def));
return $this->_write($file, serialize($def), $config);
}
public function set($def, $config) {
if (!$this->checkDefType($def)) return;
$file = $this->generateFilePath($config);
if (!$this->_prepareDir($config)) return false;
return $this->_write($file, serialize($def));
return $this->_write($file, serialize($def), $config);
}
public function replace($def, $config) {
@@ -24,7 +24,7 @@ class HTMLPurifier_DefinitionCache_Serializer extends
$file = $this->generateFilePath($config);
if (!file_exists($file)) return false;
if (!$this->_prepareDir($config)) return false;
return $this->_write($file, serialize($def));
return $this->_write($file, serialize($def), $config);
}
public function get($config) {
@@ -97,18 +97,34 @@ class HTMLPurifier_DefinitionCache_Serializer extends
* Convenience wrapper function for file_put_contents
* @param $file File name to write to
* @param $data Data to write into file
* @param $config Config object
* @return Number of bytes written if success, or false if failure.
*/
private function _write($file, $data) {
return file_put_contents($file, $data);
private function _write($file, $data, $config) {
$result = file_put_contents($file, $data);
if ($result !== false) {
// set permissions of the new file (no execute)
$chmod = $config->get('Cache.SerializerPermissions');
if (!$chmod) {
$chmod = 0644; // invalid config or simpletest
}
$chmod = $chmod & 0666;
chmod($file, $chmod);
}
return $result;
}
/**
* Prepares the directory that this type stores the serials in
* @param $config Config object
* @return True if successful
*/
private function _prepareDir($config) {
$directory = $this->generateDirectoryPath($config);
$chmod = $config->get('Cache.SerializerPermissions');
if (!$chmod) {
$chmod = 0755; // invalid config or simpletest
}
if (!is_dir($directory)) {
$base = $this->generateBaseDirectoryPath($config);
if (!is_dir($base)) {
@@ -116,13 +132,13 @@ class HTMLPurifier_DefinitionCache_Serializer extends
please create or change using %Cache.SerializerPath',
E_USER_WARNING);
return false;
} elseif (!$this->_testPermissions($base)) {
} elseif (!$this->_testPermissions($base, $chmod)) {
return false;
}
$old = umask(0022); // disable group and world writes
mkdir($directory);
$old = umask(0000);
mkdir($directory, $chmod);
umask($old);
} elseif (!$this->_testPermissions($directory)) {
} elseif (!$this->_testPermissions($directory, $chmod)) {
return false;
}
return true;
@@ -131,8 +147,11 @@ class HTMLPurifier_DefinitionCache_Serializer extends
/**
* Tests permissions on a directory and throws out friendly
* error messages and attempts to chmod it itself if possible
* @param $dir Directory path
* @param $chmod Permissions
* @return True if directory writable
*/
private function _testPermissions($dir) {
private function _testPermissions($dir, $chmod) {
// early abort, if it is writable, everything is hunky-dory
if (is_writable($dir)) return true;
if (!is_dir($dir)) {
@@ -146,17 +165,17 @@ class HTMLPurifier_DefinitionCache_Serializer extends
// POSIX system, we can give more specific advice
if (fileowner($dir) === posix_getuid()) {
// we can chmod it ourselves
chmod($dir, 0755);
return true;
$chmod = $chmod | 0700;
if (chmod($dir, $chmod)) return true;
} elseif (filegroup($dir) === posix_getgid()) {
$chmod = '775';
$chmod = $chmod | 0070;
} else {
// PHP's probably running as nobody, so we'll
// need to give global permissions
$chmod = '777';
$chmod = $chmod | 0777;
}
trigger_error('Directory '.$dir.' not writable, '.
'please chmod to ' . $chmod,
'please chmod to ' . decoct($chmod),
E_USER_WARNING);
} else {
// generic error message

View File

@@ -30,13 +30,25 @@ class HTMLPurifier_ElementDef
*/
public $attr = array();
// XXX: Design note: currently, it's not possible to override
// previously defined AttrTransforms without messing around with
// the final generated config. This is by design; a previous version
// used an associated list of attr_transform, but it was extremely
// easy to accidentally override other attribute transforms by
// forgetting to specify an index (and just using 0.) While we
// could check this by checking the index number and complaining,
// there is a second problem which is that it is not at all easy to
// tell when something is getting overridden. Combine this with a
// codebase where this isn't really being used, and it's perfect for
// nuking.
/**
* Indexed list of tag's HTMLPurifier_AttrTransform to be done before validation
* List of tags HTMLPurifier_AttrTransform to be done before validation
*/
public $attr_transform_pre = array();
/**
* Indexed list of tag's HTMLPurifier_AttrTransform to be done after validation
* List of tags HTMLPurifier_AttrTransform to be done after validation
*/
public $attr_transform_post = array();
@@ -144,9 +156,9 @@ class HTMLPurifier_ElementDef
}
$this->attr[$k] = $v;
}
$this->_mergeAssocArray($this->attr_transform_pre, $def->attr_transform_pre);
$this->_mergeAssocArray($this->attr_transform_post, $def->attr_transform_post);
$this->_mergeAssocArray($this->excludes, $def->excludes);
$this->attr_transform_pre = array_merge($this->attr_transform_pre, $def->attr_transform_pre);
$this->attr_transform_post = array_merge($this->attr_transform_post, $def->attr_transform_post);
if(!empty($def->content_model)) {
$this->content_model =

View File

@@ -19,6 +19,68 @@ class HTMLPurifier_Encoder
*/
public static function muteErrorHandler() {}
/**
* iconv wrapper which mutes errors, but doesn't work around bugs.
*/
public static function unsafeIconv($in, $out, $text) {
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
$r = iconv($in, $out, $text);
restore_error_handler();
return $r;
}
/**
* iconv wrapper which mutes errors and works around bugs.
*/
public static function iconv($in, $out, $text, $max_chunk_size = 8000) {
$code = self::testIconvTruncateBug();
if ($code == self::ICONV_OK) {
return self::unsafeIconv($in, $out, $text);
} elseif ($code == self::ICONV_TRUNCATES) {
// we can only work around this if the input character set
// is utf-8
if ($in == 'utf-8') {
if ($max_chunk_size < 4) {
trigger_error('max_chunk_size is too small', E_USER_WARNING);
return false;
}
// split into 8000 byte chunks, but be careful to handle
// multibyte boundaries properly
if (($c = strlen($text)) <= $max_chunk_size) {
return self::unsafeIconv($in, $out, $text);
}
$r = '';
$i = 0;
while (true) {
if ($i + $max_chunk_size >= $c) {
$r .= self::unsafeIconv($in, $out, substr($text, $i));
break;
}
// wibble the boundary
if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) {
$chunk_size = $max_chunk_size;
} elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) {
$chunk_size = $max_chunk_size - 1;
} elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) {
$chunk_size = $max_chunk_size - 2;
} elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) {
$chunk_size = $max_chunk_size - 3;
} else {
return false; // rather confusing UTF-8...
}
$chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths
$r .= self::unsafeIconv($in, $out, $chunk);
$i += $chunk_size;
}
return $r;
} else {
return false;
}
} else {
return false;
}
}
/**
* Cleans a UTF-8 string for well-formedness and SGML validity
*
@@ -260,6 +322,14 @@ class HTMLPurifier_Encoder
return $ret;
}
public static function iconvAvailable() {
static $iconv = null;
if ($iconv === null) {
$iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE;
}
return $iconv;
}
/**
* Converts a string to UTF-8 based on configuration.
*/
@@ -267,28 +337,30 @@ class HTMLPurifier_Encoder
$encoding = $config->get('Core.Encoding');
if ($encoding === 'utf-8') return $str;
static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv');
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
if ($iconv === null) $iconv = self::iconvAvailable();
if ($iconv && !$config->get('Test.ForceNoIconv')) {
$str = iconv($encoding, 'utf-8//IGNORE', $str);
// unaffected by bugs, since UTF-8 support all characters
$str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
if ($str === false) {
// $encoding is not a valid encoding
restore_error_handler();
trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
return '';
}
// If the string is bjorked by Shift_JIS or a similar encoding
// that doesn't support all of ASCII, convert the naughty
// characters to their true byte-wise ASCII/UTF-8 equivalents.
$str = strtr($str, HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding));
restore_error_handler();
$str = strtr($str, self::testEncodingSupportsASCII($encoding));
return $str;
} elseif ($encoding === 'iso-8859-1') {
$str = utf8_encode($str);
restore_error_handler();
return $str;
}
trigger_error('Encoding not supported, please install iconv', E_USER_ERROR);
$bug = HTMLPurifier_Encoder::testIconvTruncateBug();
if ($bug == self::ICONV_OK) {
trigger_error('Encoding not supported, please install iconv', E_USER_ERROR);
} else {
trigger_error('You have a buggy version of iconv, see https://bugs.php.net/bug.php?id=48147 and http://sourceware.org/bugzilla/show_bug.cgi?id=13541', E_USER_ERROR);
}
}
/**
@@ -298,16 +370,15 @@ class HTMLPurifier_Encoder
*/
public static function convertFromUTF8($str, $config, $context) {
$encoding = $config->get('Core.Encoding');
if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
$str = self::convertToASCIIDumbLossless($str);
}
if ($encoding === 'utf-8') return $str;
static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv');
if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
$str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str);
}
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
if ($iconv === null) $iconv = self::iconvAvailable();
if ($iconv && !$config->get('Test.ForceNoIconv')) {
// Undo our previous fix in convertToUTF8, otherwise iconv will barf
$ascii_fix = HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding);
$ascii_fix = self::testEncodingSupportsASCII($encoding);
if (!$escape && !empty($ascii_fix)) {
$clear_fix = array();
foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = '';
@@ -315,15 +386,17 @@ class HTMLPurifier_Encoder
}
$str = strtr($str, array_flip($ascii_fix));
// Normal stuff
$str = iconv('utf-8', $encoding . '//IGNORE', $str);
restore_error_handler();
$str = self::iconv('utf-8', $encoding . '//IGNORE', $str);
return $str;
} elseif ($encoding === 'iso-8859-1') {
$str = utf8_decode($str);
restore_error_handler();
return $str;
}
trigger_error('Encoding not supported', E_USER_ERROR);
// You might be tempted to assume that the ASCII representation
// might be OK, however, this is *not* universally true over all
// encodings. So we take the conservative route here, rather
// than forcibly turn on %Core.EscapeNonASCIICharacters
}
/**
@@ -373,6 +446,49 @@ class HTMLPurifier_Encoder
return $result;
}
/** No bugs detected in iconv. */
const ICONV_OK = 0;
/** Iconv truncates output if converting from UTF-8 to another
* character set with //IGNORE, and a non-encodable character is found */
const ICONV_TRUNCATES = 1;
/** Iconv does not support //IGNORE, making it unusable for
* transcoding purposes */
const ICONV_UNUSABLE = 2;
/**
* glibc iconv has a known bug where it doesn't handle the magic
* //IGNORE stanza correctly. In particular, rather than ignore
* characters, it will return an EILSEQ after consuming some number
* of characters, and expect you to restart iconv as if it were
* an E2BIG. Old versions of PHP did not respect the errno, and
* returned the fragment, so as a result you would see iconv
* mysteriously truncating output. We can work around this by
* manually chopping our input into segments of about 8000
* characters, as long as PHP ignores the error code. If PHP starts
* paying attention to the error code, iconv becomes unusable.
*
* @returns Error code indicating severity of bug.
*/
public static function testIconvTruncateBug() {
static $code = null;
if ($code === null) {
// better not use iconv, otherwise infinite loop!
$r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000));
if ($r === false) {
$code = self::ICONV_UNUSABLE;
} elseif (($c = strlen($r)) < 9000) {
$code = self::ICONV_TRUNCATES;
} elseif ($c > 9000) {
trigger_error('Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: include your iconv version as per phpversion()', E_USER_ERROR);
} else {
$code = self::ICONV_OK;
}
}
return $code;
}
/**
* This expensive function tests whether or not a given character
* encoding supports ASCII. 7/8-bit encodings like Shift_JIS will
@@ -385,6 +501,11 @@ class HTMLPurifier_Encoder
* which can be used to "undo" any overzealous iconv action.
*/
public static function testEncodingSupportsASCII($encoding, $bypass = false) {
// All calls to iconv here are unsafe, proof by case analysis:
// If ICONV_OK, no difference.
// If ICONV_TRUNCATE, all calls involve one character inputs,
// so bug is not triggered.
// If ICONV_UNUSABLE, this call is irrelevant
static $encodings = array();
if (!$bypass) {
if (isset($encodings[$encoding])) return $encodings[$encoding];
@@ -398,24 +519,22 @@ class HTMLPurifier_Encoder
if (strpos($lenc, 'iso-8859-') === 0) return array();
}
$ret = array();
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
if (iconv('UTF-8', $encoding, 'a') === false) return false;
if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false;
for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
$c = chr($i); // UTF-8 char
$r = iconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
$r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
if (
$r === '' ||
// This line is needed for iconv implementations that do not
// omit characters that do not exist in the target character set
($r === $c && iconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
) {
// Reverse engineer: what's the UTF-8 equiv of this byte
// sequence? This assumes that there's no variable width
// encoding that doesn't support ASCII.
$ret[iconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
$ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
}
}
restore_error_handler();
$encodings[$encoding] = $ret;
return $ret;
}

File diff suppressed because one or more lines are too long

View File

@@ -1,5 +1,11 @@
<?php
// why is this a top level function? Because PHP 5.2.0 doesn't seem to
// understand how to interpret this filter if it's a static method.
// It's all really silly, but if we go this route it might be reasonable
// to coalesce all of these methods into one.
function htmlpurifier_filter_extractstyleblocks_muteerrorhandler() {}
/**
* This filter extracts <style> blocks from input HTML, cleans them up
* using CSSTidy, and then places them in $purifier->context->get('StyleBlocks')
@@ -21,8 +27,16 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
private $_styleMatches = array();
private $_tidy;
private $_id_attrdef;
private $_class_attrdef;
private $_enum_attrdef;
public function __construct() {
$this->_tidy = new csstidy();
$this->_tidy->set_cfg('lowercase_s', false);
$this->_id_attrdef = new HTMLPurifier_AttrDef_HTML_ID(true);
$this->_class_attrdef = new HTMLPurifier_AttrDef_CSS_Ident();
$this->_enum_attrdef = new HTMLPurifier_AttrDef_Enum(array('first-child', 'link', 'visited', 'active', 'hover', 'focus'));
}
/**
@@ -77,27 +91,166 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
$css = substr($css, 0, -3);
}
$css = trim($css);
set_error_handler('htmlpurifier_filter_extractstyleblocks_muteerrorhandler');
$this->_tidy->parse($css);
restore_error_handler();
$css_definition = $config->getDefinition('CSS');
$html_definition = $config->getDefinition('HTML');
$new_css = array();
foreach ($this->_tidy->css as $k => $decls) {
// $decls are all CSS declarations inside an @ selector
$new_decls = array();
foreach ($decls as $selector => $style) {
$selector = trim($selector);
if ($selector === '') continue; // should not happen
if ($selector[0] === '+') {
if ($selector !== '' && $selector[0] === '+') continue;
}
if (!empty($scopes)) {
$new_selector = array(); // because multiple ones are possible
$selectors = array_map('trim', explode(',', $selector));
foreach ($scopes as $s1) {
foreach ($selectors as $s2) {
$new_selector[] = "$s1 $s2";
// Parse the selector
// Here is the relevant part of the CSS grammar:
//
// ruleset
// : selector [ ',' S* selector ]* '{' ...
// selector
// : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]?
// combinator
// : '+' S*
// : '>' S*
// simple_selector
// : element_name [ HASH | class | attrib | pseudo ]*
// | [ HASH | class | attrib | pseudo ]+
// element_name
// : IDENT | '*'
// ;
// class
// : '.' IDENT
// ;
// attrib
// : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S*
// [ IDENT | STRING ] S* ]? ']'
// ;
// pseudo
// : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ]
// ;
//
// For reference, here are the relevant tokens:
//
// HASH #{name}
// IDENT {ident}
// INCLUDES ==
// DASHMATCH |=
// STRING {string}
// FUNCTION {ident}\(
//
// And the lexical scanner tokens
//
// name {nmchar}+
// nmchar [_a-z0-9-]|{nonascii}|{escape}
// nonascii [\240-\377]
// escape {unicode}|\\[^\r\n\f0-9a-f]
// unicode \\{h}}{1,6}(\r\n|[ \t\r\n\f])?
// ident -?{nmstart}{nmchar*}
// nmstart [_a-z]|{nonascii}|{escape}
// string {string1}|{string2}
// string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
// string2 \'([^\n\r\f\\"]|\\{nl}|{escape})*\'
//
// We'll implement a subset (in order to reduce attack
// surface); in particular:
//
// - No Unicode support
// - No escapes support
// - No string support (by proxy no attrib support)
// - element_name is matched against allowed
// elements (some people might find this
// annoying...)
// - Pseudo-elements one of :first-child, :link,
// :visited, :active, :hover, :focus
// handle ruleset
$selectors = array_map('trim', explode(',', $selector));
$new_selectors = array();
foreach ($selectors as $sel) {
// split on +, > and spaces
$basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE);
// even indices are chunks, odd indices are
// delimiters
$nsel = null;
$delim = null; // guaranteed to be non-null after
// two loop iterations
for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) {
$x = $basic_selectors[$i];
if ($i % 2) {
// delimiter
if ($x === ' ') {
$delim = ' ';
} else {
$delim = ' ' . $x . ' ';
}
} else {
// simple selector
$components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE);
$sdelim = null;
$nx = null;
for ($j = 0, $cc = count($components); $j < $cc; $j ++) {
$y = $components[$j];
if ($j === 0) {
if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) {
$nx = $y;
} else {
// $nx stays null; this matters
// if we don't manage to find
// any valid selector content,
// in which case we ignore the
// outer $delim
}
} elseif ($j % 2) {
// set delimiter
$sdelim = $y;
} else {
$attrdef = null;
if ($sdelim === '#') {
$attrdef = $this->_id_attrdef;
} elseif ($sdelim === '.') {
$attrdef = $this->_class_attrdef;
} elseif ($sdelim === ':') {
$attrdef = $this->_enum_attrdef;
} else {
throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split');
}
$r = $attrdef->validate($y, $config, $context);
if ($r !== false) {
if ($r !== true) {
$y = $r;
}
if ($nx === null) {
$nx = '';
}
$nx .= $sdelim . $y;
}
}
}
if ($nx !== null) {
if ($nsel === null) {
$nsel = $nx;
} else {
$nsel .= $delim . $nx;
}
} else {
// delimiters to the left of invalid
// basic selector ignored
}
}
}
if ($nsel !== null) {
if (!empty($scopes)) {
foreach ($scopes as $s) {
$new_selectors[] = "$s $nsel";
}
} else {
$new_selectors[] = $nsel;
}
}
$selector = implode(', ', $new_selector); // now it's a string
}
if (empty($new_selectors)) continue;
$selector = implode(', ', $new_selectors);
foreach ($style as $name => $value) {
if (!isset($css_definition->info[$name])) {
unset($style[$name]);
@@ -110,10 +263,11 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
}
$new_decls[$selector] = $style;
}
$this->_tidy->css[$k] = $new_decls;
$new_css[$k] = $new_decls;
}
// remove stuff that shouldn't be used, could be reenabled
// after security risks are analyzed
$this->_tidy->css = $new_css;
$this->_tidy->import = array();
$this->_tidy->charset = null;
$this->_tidy->namespace = null;

View File

@@ -36,6 +36,11 @@ class HTMLPurifier_Generator
*/
private $_flashCompat;
/**
* Cache of %Output.FixInnerHTML
*/
private $_innerHTMLFix;
/**
* Stack for keeping track of object information when outputting IE
* compatibility code.
@@ -54,6 +59,7 @@ class HTMLPurifier_Generator
public function __construct($config, $context) {
$this->config = $config;
$this->_scriptFix = $config->get('Output.CommentScriptContents');
$this->_innerHTMLFix = $config->get('Output.FixInnerHTML');
$this->_sortAttr = $config->get('Output.SortAttr');
$this->_flashCompat = $config->get('Output.FlashCompat');
$this->_def = $config->getHTMLDefinition();
@@ -98,9 +104,11 @@ class HTMLPurifier_Generator
}
// Normalize newlines to system defined value
$nl = $this->config->get('Output.Newline');
if ($nl === null) $nl = PHP_EOL;
if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
if ($this->config->get('Core.NormalizeNewlines')) {
$nl = $this->config->get('Output.Newline');
if ($nl === null) $nl = PHP_EOL;
if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
}
return $html;
}
@@ -130,19 +138,7 @@ class HTMLPurifier_Generator
$_extra = '';
if ($this->_flashCompat) {
if ($token->name == "object" && !empty($this->_flashStack)) {
$flash = array_pop($this->_flashStack);
$compat_token = new HTMLPurifier_Token_Empty("embed");
foreach ($flash->attr as $name => $val) {
if ($name == "classid") continue;
if ($name == "type") continue;
if ($name == "data") $name = "src";
$compat_token->attr[$name] = $val;
}
foreach ($flash->param as $name => $val) {
if ($name == "movie") $name = "src";
$compat_token->attr[$name] = $val;
}
$_extra = "<!--[if IE]>".$this->generateFromToken($compat_token)."<![endif]-->";
// doesn't do anything for now
}
}
return $_extra . '</' . $token->name . '>';
@@ -200,6 +196,37 @@ class HTMLPurifier_Generator
continue;
}
}
// Workaround for Internet Explorer innerHTML bug.
// Essentially, Internet Explorer, when calculating
// innerHTML, omits quotes if there are no instances of
// angled brackets, quotes or spaces. However, when parsing
// HTML (for example, when you assign to innerHTML), it
// treats backticks as quotes. Thus,
// <img alt="``" />
// becomes
// <img alt=`` />
// becomes
// <img alt='' />
// Fortunately, all we need to do is trigger an appropriate
// quoting style, which we do by adding an extra space.
// This also is consistent with the W3C spec, which states
// that user agents may ignore leading or trailing
// whitespace (in fact, most don't, at least for attributes
// like alt, but an extra space at the end is barely
// noticeable). Still, we have a configuration knob for
// this, since this transformation is not necesary if you
// don't process user input with innerHTML or you don't plan
// on supporting Internet Explorer.
if ($this->_innerHTMLFix) {
if (strpos($value, '`') !== false) {
// check if correct quoting style would not already be
// triggered
if (strcspn($value, '"\' <>') === strlen($value)) {
// protect!
$value .= ' ';
}
}
}
$html .= $key.'="'.$this->escape($value).'" ';
}
return rtrim($html);
@@ -215,7 +242,10 @@ class HTMLPurifier_Generator
* permissible for non-attribute output.
* @return String escaped data.
*/
public function escape($string, $quote = ENT_COMPAT) {
public function escape($string, $quote = null) {
// Workaround for APC bug on Mac Leopard reported by sidepodcast
// http://htmlpurifier.org/phorum/read.php?3,4823,4846
if ($quote === null) $quote = ENT_COMPAT;
return htmlspecialchars($string, $quote, 'UTF-8');
}

View File

@@ -147,7 +147,7 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
return $this->_anonModule;
}
private $_anonModule;
private $_anonModule = null;
// PUBLIC BUT INTERNAL VARIABLES --------------------------------------
@@ -300,7 +300,12 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
unset($allowed_attributes_mutable[$key]);
}
}
if ($delete) unset($this->info[$tag]->attr[$attr]);
if ($delete) {
if ($this->info[$tag]->attr[$attr]->required) {
trigger_error("Required attribute '$attr' in element '$tag' was not allowed, which means '$tag' will not be allowed either", E_USER_WARNING);
}
unset($this->info[$tag]->attr[$attr]);
}
}
}
// emit errors

View File

@@ -21,7 +21,7 @@ class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule
// inclusions wrong for bdo: bdo allows Lang
)
);
$bdo->attr_transform_post['required-dir'] = new HTMLPurifier_AttrTransform_BdoDir();
$bdo->attr_transform_post[] = new HTMLPurifier_AttrTransform_BdoDir();
$this->attr_collections['I18N']['dir'] = 'Enum#ltr,rtl';
}

View File

@@ -35,7 +35,7 @@ class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule
'name' => 'CDATA',
'readonly' => 'Bool#readonly',
'size' => 'Number',
'src' => 'URI#embeds',
'src' => 'URI#embedded',
'tabindex' => 'Number',
'type' => 'Enum#text,password,checkbox,button,radio,submit,reset,file,hidden,image',
'value' => 'CDATA',
@@ -84,7 +84,8 @@ class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule
$button->excludes = $this->makeLookup(
'form', 'fieldset', // Form
'input', 'select', 'textarea', 'label', 'button', // Formctrl
'a' // as per HTML 4.01 spec, this is omitted by modularization
'a', // as per HTML 4.01 spec, this is omitted by modularization
'isindex', 'iframe' // legacy items
);
// Extra exclusion: img usemap="" is not permitted within this element.

View File

@@ -0,0 +1,38 @@
<?php
/**
* XHTML 1.1 Iframe Module provides inline frames.
*
* @note This module is not considered safe unless an Iframe
* whitelisting mechanism is specified. Currently, the only
* such mechanism is %URL.SafeIframeRegexp
*/
class HTMLPurifier_HTMLModule_Iframe extends HTMLPurifier_HTMLModule
{
public $name = 'Iframe';
public $safe = false;
public function setup($config) {
if ($config->get('HTML.SafeIframe')) {
$this->safe = true;
}
$this->addElement(
'iframe', 'Inline', 'Flow', 'Common',
array(
'src' => 'URI#embedded',
'width' => 'Length',
'height' => 'Length',
'name' => 'ID',
'scrolling' => 'Enum#yes,no,auto',
'frameborder' => 'Enum#0,1',
'longdesc' => 'URI',
'marginheight' => 'Pixels',
'marginwidth' => 'Pixels',
)
);
}
}
// vim: et sw=4 sts=4

View File

@@ -89,7 +89,7 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
$hr->attr['width'] = 'Length';
$img = $this->addBlankElement('img');
$img->attr['align'] = 'Enum#top,middle,bottom,left,right';
$img->attr['align'] = 'IAlign';
$img->attr['border'] = 'Pixels';
$img->attr['hspace'] = 'Pixels';
$img->attr['vspace'] = 'Pixels';
@@ -136,6 +136,22 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
$ul->attr['compact'] = 'Bool#compact';
$ul->attr['type'] = 'Enum#square,disc,circle';
// "safe" modifications to "unsafe" elements
// WARNING: If you want to add support for an unsafe, legacy
// attribute, make a new TrustedLegacy module with the trusted
// bit set appropriately
$form = $this->addBlankElement('form');
$form->content_model = 'Flow | #PCDATA';
$form->content_model_type = 'optional';
$form->attr['target'] = 'FrameTarget';
$input = $this->addBlankElement('input');
$input->attr['align'] = 'IAlign';
$legend = $this->addBlankElement('legend');
$legend->attr['align'] = 'LAlign';
}
}

View File

@@ -20,10 +20,16 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
public $content_sets = array('Flow' => 'List');
public function setup($config) {
$ol = $this->addElement('ol', 'List', 'Required: li', 'Common');
$ol->wrap = "li";
$ul = $this->addElement('ul', 'List', 'Required: li', 'Common');
$ul->wrap = "li";
$ol = $this->addElement('ol', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
$ul = $this->addElement('ul', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
// XXX The wrap attribute is handled by MakeWellFormed. This is all
// quite unsatisfactory, because we generated this
// *specifically* for lists, and now a big chunk of the handling
// is done properly by the List ChildDef. So actually, we just
// want enough information to make autoclosing work properly,
// and then hand off the tricky stuff to the ChildDef.
$ol->wrap = 'li';
$ul->wrap = 'li';
$this->addElement('dl', 'List', 'Required: dt | dd', 'Common');
$this->addElement('li', false, 'Flow', 'Common');

View File

@@ -11,7 +11,7 @@ class HTMLPurifier_HTMLModule_Name extends HTMLPurifier_HTMLModule
$element = $this->addBlankElement($name);
$element->attr['name'] = 'CDATA';
if (!$config->get('HTML.Attr.Name.UseCDATA')) {
$element->attr_transform_post['NameSync'] = new HTMLPurifier_AttrTransform_NameSync();
$element->attr_transform_post[] = new HTMLPurifier_AttrTransform_NameSync();
}
}
}

View File

@@ -0,0 +1,19 @@
<?php
/**
* Module adds the nofollow attribute transformation to a tags. It
* is enabled by HTML.Nofollow
*/
class HTMLPurifier_HTMLModule_Nofollow extends HTMLPurifier_HTMLModule
{
public $name = 'Nofollow';
public function setup($config) {
$a = $this->addBlankElement('a');
$a->attr_transform_post[] = new HTMLPurifier_AttrTransform_Nofollow();
}
}
// vim: et sw=4 sts=4

View File

@@ -21,7 +21,7 @@ class HTMLPurifier_HTMLModule_SafeEmbed extends HTMLPurifier_HTMLModule
'allowscriptaccess' => 'Enum#never',
'allownetworking' => 'Enum#internal',
'flashvars' => 'Text',
'wmode' => 'Enum#window',
'wmode' => 'Enum#window,transparent,opaque',
'name' => 'ID',
)
);

View File

@@ -29,7 +29,6 @@ class HTMLPurifier_HTMLModule_SafeObject extends HTMLPurifier_HTMLModule
'width' => 'Pixels#' . $max,
'height' => 'Pixels#' . $max,
'data' => 'URI#embedded',
'classid' => 'Enum#clsid:d27cdb6e-ae6d-11cf-96b8-444553540000',
'codebase' => new HTMLPurifier_AttrDef_Enum(array(
'http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0')),
)

View File

@@ -0,0 +1,37 @@
<?php
/**
* A "safe" script module. No inline JS is allowed, and pointed to JS
* files must match whitelist.
*/
class HTMLPurifier_HTMLModule_SafeScripting extends HTMLPurifier_HTMLModule
{
public $name = 'SafeScripting';
public function setup($config) {
// These definitions are not intrinsically safe: the attribute transforms
// are a vital part of ensuring safety.
$allowed = $config->get('HTML.SafeScripting');
$script = $this->addElement(
'script',
'Inline',
'Empty',
null,
array(
// While technically not required by the spec, we're forcing
// it to this value.
'type' => 'Enum#text/javascript',
'src*' => new HTMLPurifier_AttrDef_Enum(array_keys($allowed))
)
);
$script->attr_transform_pre[] =
$script->attr_transform_post[] = new HTMLPurifier_AttrTransform_ScriptRequired();
}
}
// vim: et sw=4 sts=4

View File

@@ -45,8 +45,8 @@ class HTMLPurifier_HTMLModule_Scripting extends HTMLPurifier_HTMLModule
);
$this->info['script']->content_model = '#PCDATA';
$this->info['script']->content_model_type = 'optional';
$this->info['script']->attr_transform_pre['type'] =
$this->info['script']->attr_transform_post['type'] =
$this->info['script']->attr_transform_pre[] =
$this->info['script']->attr_transform_post[] =
new HTMLPurifier_AttrTransform_ScriptRequired();
}
}

View File

@@ -37,6 +37,9 @@ class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule
'abbr' => 'Text',
'colspan' => 'Number',
'rowspan' => 'Number',
// Apparently, as of HTML5 this attribute only applies
// to 'th' elements.
'scope' => 'Enum#row,col,rowgroup,colgroup',
),
$cell_align
);

View File

@@ -0,0 +1,19 @@
<?php
/**
* Module adds the target=blank attribute transformation to a tags. It
* is enabled by HTML.TargetBlank
*/
class HTMLPurifier_HTMLModule_TargetBlank extends HTMLPurifier_HTMLModule
{
public $name = 'TargetBlank';
public function setup($config) {
$a = $this->addBlankElement('a');
$a->attr_transform_post[] = new HTMLPurifier_AttrTransform_TargetBlank();
}
}
// vim: et sw=4 sts=4

View File

@@ -65,11 +65,11 @@ class HTMLPurifier_HTMLModuleManager
'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
'StyleAttribute',
// Unsafe:
'Scripting', 'Object', 'Forms',
'Scripting', 'Object', 'Forms',
// Sorta legacy, but present in strict:
'Name',
);
$transitional = array('Legacy', 'Target');
$transitional = array('Legacy', 'Target', 'Iframe');
$xml = array('XMLCommonAttributes');
$non_xml = array('NonXMLCommonAttributes');
@@ -112,7 +112,9 @@ class HTMLPurifier_HTMLModuleManager
$this->doctypes->register(
'XHTML 1.1', true,
array_merge($common, $xml, array('Ruby')),
// Iframe is a real XHTML 1.1 module, despite being
// "transitional"!
array_merge($common, $xml, array('Ruby', 'Iframe')),
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
array(),
'-//W3C//DTD XHTML 1.1//EN',
@@ -216,19 +218,25 @@ class HTMLPurifier_HTMLModuleManager
}
}
// add proprietary module (this gets special treatment because
// it is completely removed from doctypes, etc.)
// custom modules
if ($config->get('HTML.Proprietary')) {
$modules[] = 'Proprietary';
}
// add SafeObject/Safeembed modules
if ($config->get('HTML.SafeObject')) {
$modules[] = 'SafeObject';
}
if ($config->get('HTML.SafeEmbed')) {
$modules[] = 'SafeEmbed';
}
if ($config->get('HTML.SafeScripting') !== array()) {
$modules[] = 'SafeScripting';
}
if ($config->get('HTML.Nofollow')) {
$modules[] = 'Nofollow';
}
if ($config->get('HTML.TargetBlank')) {
$modules[] = 'TargetBlank';
}
// merge in custom modules
$modules = array_merge($modules, $this->userModules);
@@ -364,6 +372,13 @@ class HTMLPurifier_HTMLModuleManager
// :TODO:
// non-standalone definitions that don't have a standalone
// to merge into could be deferred to the end
// HOWEVER, it is perfectly valid for a non-standalone
// definition to lack a standalone definition, even
// after all processing: this allows us to safely
// specify extra attributes for elements that may not be
// enabled all in one place. In particular, this might
// be the case for trusted elements. WARNING: care must
// be taken that the /extra/ definitions are all safe.
continue;
}

View File

@@ -5,6 +5,9 @@ class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector
private $context, $config, $attrValidator, $removeNbsp, $removeNbspExceptions;
// TODO: make me configurable
private $_exclude = array('colgroup' => 1, 'th' => 1, 'td' => 1, 'iframe' => 1);
public function prepare($config, $context) {
parent::prepare($config, $context);
$this->config = $config;
@@ -30,7 +33,7 @@ class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector
break;
}
if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) {
if ($token->name == 'colgroup') return;
if (isset($this->_exclude[$token->name])) return;
$this->attrValidator->validateToken($token, $this->config, $this->context);
$token->armor['ValidateAttributes'] = true;
if (isset($token->attr['id']) || isset($token->attr['name'])) return;

View File

@@ -22,6 +22,7 @@ class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector
'movie' => true,
'flashvars' => true,
'src' => true,
'allowFullScreen' => true, // if omitted, assume to be 'false'
);
public function prepare($config, $context) {

View File

@@ -230,6 +230,17 @@ class HTMLPurifier_Lexer
);
}
/**
* Special Internet Explorer conditional comments should be removed.
*/
protected static function removeIEConditional($string) {
return preg_replace(
'#<!--\[if [^>]+\]>.*?<!\[endif\]-->#si', // probably should generalize for all strings
'',
$string
);
}
/**
* Callback function for escapeCDATA() that does the work.
*
@@ -252,8 +263,10 @@ class HTMLPurifier_Lexer
public function normalize($html, $config, $context) {
// normalize newlines to \n
$html = str_replace("\r\n", "\n", $html);
$html = str_replace("\r", "\n", $html);
if ($config->get('Core.NormalizeNewlines')) {
$html = str_replace("\r\n", "\n", $html);
$html = str_replace("\r", "\n", $html);
}
if ($config->get('HTML.Trusted')) {
// escape convoluted CDATA
@@ -263,6 +276,8 @@ class HTMLPurifier_Lexer
// escape CDATA
$html = $this->escapeCDATA($html);
$html = $this->removeIEConditional($html);
// extract body from document if applicable
if ($config->get('Core.ConvertDocumentToFragment')) {
$e = false;
@@ -284,6 +299,11 @@ class HTMLPurifier_Lexer
// represent non-SGML characters (horror, horror!)
$html = HTMLPurifier_Encoder::cleanUTF8($html);
// if processing instructions are to removed, remove them now
if ($config->get('Core.RemoveProcessingInstructions')) {
$html = preg_replace('#<\?.+?\?>#s', '', $html);
}
return $html;
}

View File

@@ -72,23 +72,57 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
}
/**
* Recursive function that tokenizes a node, putting it into an accumulator.
*
* Iterative function that tokenizes a node, putting it into an accumulator.
* To iterate is human, to recurse divine - L. Peter Deutsch
* @param $node DOMNode to be tokenized.
* @param $tokens Array-list of already tokenized tokens.
* @param $collect Says whether or start and close are collected, set to
* false at first recursion because it's the implicit DIV
* tag you're dealing with.
* @returns Tokens of node appended to previously passed tokens.
*/
protected function tokenizeDOM($node, &$tokens, $collect = false) {
protected function tokenizeDOM($node, &$tokens) {
$level = 0;
$nodes = array($level => array($node));
$closingNodes = array();
do {
while (!empty($nodes[$level])) {
$node = array_shift($nodes[$level]); // FIFO
$collect = $level > 0 ? true : false;
$needEndingTag = $this->createStartNode($node, $tokens, $collect);
if ($needEndingTag) {
$closingNodes[$level][] = $node;
}
if ($node->childNodes && $node->childNodes->length) {
$level++;
$nodes[$level] = array();
foreach ($node->childNodes as $childNode) {
array_push($nodes[$level], $childNode);
}
}
}
$level--;
if ($level && isset($closingNodes[$level])) {
while($node = array_pop($closingNodes[$level])) {
$this->createEndNode($node, $tokens);
}
}
} while ($level > 0);
}
/**
* @param $node DOMNode to be tokenized.
* @param $tokens Array-list of already tokenized tokens.
* @param $collect Says whether or start and close are collected, set to
* false at first recursion because it's the implicit DIV
* tag you're dealing with.
* @returns bool if the token needs an endtoken
*/
protected function createStartNode($node, &$tokens, $collect) {
// intercept non element nodes. WE MUST catch all of them,
// but we're not getting the character reference nodes because
// those should have been preprocessed
if ($node->nodeType === XML_TEXT_NODE) {
$tokens[] = $this->factory->createText($node->data);
return;
return false;
} elseif ($node->nodeType === XML_CDATA_SECTION_NODE) {
// undo libxml's special treatment of <script> and <style> tags
$last = end($tokens);
@@ -106,48 +140,44 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
}
}
$tokens[] = $this->factory->createText($this->parseData($data));
return;
return false;
} elseif ($node->nodeType === XML_COMMENT_NODE) {
// this is code is only invoked for comments in script/style in versions
// of libxml pre-2.6.28 (regular comments, of course, are still
// handled regularly)
$tokens[] = $this->factory->createComment($node->data);
return;
return false;
} elseif (
// not-well tested: there may be other nodes we have to grab
$node->nodeType !== XML_ELEMENT_NODE
) {
return;
return false;
}
$attr = $node->hasAttributes() ?
$this->transformAttrToAssoc($node->attributes) :
array();
$attr = $node->hasAttributes() ? $this->transformAttrToAssoc($node->attributes) : array();
// We still have to make sure that the element actually IS empty
if (!$node->childNodes->length) {
if ($collect) {
$tokens[] = $this->factory->createEmpty($node->tagName, $attr);
}
return false;
} else {
if ($collect) { // don't wrap on first iteration
if ($collect) {
$tokens[] = $this->factory->createStart(
$tag_name = $node->tagName, // somehow, it get's dropped
$attr
);
}
foreach ($node->childNodes as $node) {
// remember, it's an accumulator. Otherwise, we'd have
// to use array_merge
$this->tokenizeDOM($node, $tokens, true);
}
if ($collect) {
$tokens[] = $this->factory->createEnd($tag_name);
}
return true;
}
}
protected function createEndNode($node, &$tokens) {
$tokens[] = $this->factory->createEnd($node->tagName);
}
/**
* Converts a DOMNamedNodeMap of DOMAttr objects into an assoc array.
*

View File

@@ -1,139 +0,0 @@
<?php
/**
* Proof-of-concept lexer that uses the PEAR package XML_HTMLSax3 to parse HTML.
*
* PEAR, not suprisingly, also has a SAX parser for HTML. I don't know
* very much about implementation, but it's fairly well written. However, that
* abstraction comes at a price: performance. You need to have it installed,
* and if the API changes, it might break our adapter. Not sure whether or not
* it's UTF-8 aware, but it has some entity parsing trouble (in all areas,
* text and attributes).
*
* Quite personally, I don't recommend using the PEAR class, and the defaults
* don't use it. The unit tests do perform the tests on the SAX parser too, but
* whatever it does for poorly formed HTML is up to it.
*
* @todo Generalize so that XML_HTMLSax is also supported.
*
* @warning Entity-resolution inside attributes is broken.
*/
class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
{
/**
* Internal accumulator array for SAX parsers.
*/
protected $tokens = array();
protected $last_token_was_empty;
private $parent_handler;
private $stack = array();
public function tokenizeHTML($string, $config, $context) {
$this->tokens = array();
$this->last_token_was_empty = false;
$string = $this->normalize($string, $config, $context);
$this->parent_handler = set_error_handler(array($this, 'muteStrictErrorHandler'));
$parser = new XML_HTMLSax3();
$parser->set_object($this);
$parser->set_element_handler('openHandler','closeHandler');
$parser->set_data_handler('dataHandler');
$parser->set_escape_handler('escapeHandler');
// doesn't seem to work correctly for attributes
$parser->set_option('XML_OPTION_ENTITIES_PARSED', 1);
$parser->parse($string);
restore_error_handler();
return $this->tokens;
}
/**
* Open tag event handler, interface is defined by PEAR package.
*/
public function openHandler(&$parser, $name, $attrs, $closed) {
// entities are not resolved in attrs
foreach ($attrs as $key => $attr) {
$attrs[$key] = $this->parseData($attr);
}
if ($closed) {
$this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs);
$this->last_token_was_empty = true;
} else {
$this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs);
}
$this->stack[] = $name;
return true;
}
/**
* Close tag event handler, interface is defined by PEAR package.
*/
public function closeHandler(&$parser, $name) {
// HTMLSax3 seems to always send empty tags an extra close tag
// check and ignore if you see it:
// [TESTME] to make sure it doesn't overreach
if ($this->last_token_was_empty) {
$this->last_token_was_empty = false;
return true;
}
$this->tokens[] = new HTMLPurifier_Token_End($name);
if (!empty($this->stack)) array_pop($this->stack);
return true;
}
/**
* Data event handler, interface is defined by PEAR package.
*/
public function dataHandler(&$parser, $data) {
$this->last_token_was_empty = false;
$this->tokens[] = new HTMLPurifier_Token_Text($data);
return true;
}
/**
* Escaped text handler, interface is defined by PEAR package.
*/
public function escapeHandler(&$parser, $data) {
if (strpos($data, '--') === 0) {
// remove trailing and leading double-dashes
$data = substr($data, 2);
if (strlen($data) >= 2 && substr($data, -2) == "--") {
$data = substr($data, 0, -2);
}
if (isset($this->stack[sizeof($this->stack) - 1]) &&
$this->stack[sizeof($this->stack) - 1] == "style") {
$this->tokens[] = new HTMLPurifier_Token_Text($data);
} else {
$this->tokens[] = new HTMLPurifier_Token_Comment($data);
}
$this->last_token_was_empty = false;
}
// CDATA is handled elsewhere, but if it was handled here:
//if (strpos($data, '[CDATA[') === 0) {
// $this->tokens[] = new HTMLPurifier_Token_Text(
// substr($data, 7, strlen($data) - 9) );
//}
return true;
}
/**
* An error handler that mutes strict errors
*/
public function muteStrictErrorHandler($errno, $errstr, $errfile=null, $errline=null, $errcontext=null) {
if ($errno == E_STRICT) return;
return call_user_func($this->parent_handler, $errno, $errstr, $errfile, $errline, $errcontext);
}
}
// vim: et sw=4 sts=4

View File

@@ -125,8 +125,6 @@ class HTML5 {
const EOF = 5;
public function __construct($data) {
$data = str_replace("\r\n", "\n", $data);
$data = str_replace("\r", null, $data);
$this->data = $data;
$this->char = -1;

View File

@@ -11,8 +11,6 @@ abstract class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy
*/
protected $strategies = array();
abstract public function __construct();
public function execute($tokens, $config, $context) {
foreach ($this->strategies as $strategy) {
$tokens = $strategy->execute($tokens, $config, $context);

View File

@@ -26,6 +26,22 @@
* translated into text depends on the child definitions.
*
* @todo Enable nodes to be bubbled out of the structure.
*
* @warning This algorithm (though it may be hard to see) proceeds from
* a top-down fashion. Thus, parents are processed before
* children. This is easy to implement and has a nice effiency
* benefit, in that if a node is removed, we never waste any
* time processing it, but it also means that if a child
* changes in a non-encapsulated way (e.g. it is removed), we
* need to go back and reprocess the parent to see if those
* changes resulted in problems for the parent. See
* [BACKTRACK] for an example of this. In the current
* implementation, this backtracking can only be triggered when
* a node is removed and if that node was the sole node, the
* parent would need to be removed. As such, it is easy to see
* that backtracking only incurs constant overhead. If more
* sophisticated backtracking is implemented, care must be
* taken to avoid nontermination or exponential blowup.
*/
class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
@@ -38,6 +54,8 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// get a copy of the HTML definition
$definition = $config->getHTMLDefinition();
$excludes_enabled = !$config->get('Core.DisableExcludes');
// insert implicit "parent" node, will be removed at end.
// DEFINITION CALL
$parent_name = $definition->info_parent;
@@ -147,7 +165,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// parent exclusions. The array should not be very large, two
// elements at most.
$excluded = false;
if (!empty($exclude_stack)) {
if (!empty($exclude_stack) && $excludes_enabled) {
foreach ($exclude_stack as $lookup) {
if (isset($lookup[$tokens[$i]->name])) {
$excluded = true;
@@ -235,7 +253,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
// our current implementation claims that that case would
// not allow empty, even if it did
if (!$parent_def->child->allow_empty) {
// we need to do a double-check
// we need to do a double-check [BACKTRACK]
$i = $parent_index;
array_pop($stack);
}

View File

@@ -2,6 +2,14 @@
/**
* Takes tokens makes them well-formed (balance end tags, etc.)
*
* Specification of the armor attributes this strategy uses:
*
* - MakeWellFormed_TagClosedError: This armor field is used to
* suppress tag closed errors for certain tokens [TagClosedSuppress],
* in particular, if a tag was generated automatically by HTML
* Purifier, we may rely on our infrastructure to close it for us
* and shouldn't report an error to the user [TagClosedAuto].
*/
class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
{
@@ -37,12 +45,18 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
protected $context;
public function execute($tokens, $config, $context) {
$tokens = new HTMLPurifier_Array($tokens);
$definition = $config->getHTMLDefinition();
// local variables
$generator = new HTMLPurifier_Generator($config, $context);
$escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
// used for autoclose early abortion
$global_parent_allowed_elements = array();
if (isset($definition->info[$definition->info_parent])) {
// may be unset under testing circumstances
$global_parent_allowed_elements = $definition->info[$definition->info_parent]->child->getAllowedElements($config);
}
$e = $context->get('ErrorCollector', true);
$t = false; // token index
$i = false; // injector index
@@ -102,7 +116,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// -- end INJECTOR --
// a note on punting:
// a note on reprocessing:
// In order to reduce code duplication, whenever some code needs
// to make HTML changes in order to make things "correct", the
// new HTML gets sent through the purifier, regardless of its
@@ -149,7 +163,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$top_nesting = array_pop($this->stack);
$this->stack[] = $top_nesting;
// send error
// send error [TagClosedSuppress]
if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting);
}
@@ -193,12 +207,12 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$ok = false;
if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
// claims to be a start tag but is empty
$token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
$token = new HTMLPurifier_Token_Empty($token->name, $token->attr, $token->line, $token->col, $token->armor);
$ok = true;
} elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
// claims to be empty but really is a start tag
$this->swap(new HTMLPurifier_Token_End($token->name));
$this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr));
$this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr, $token->line, $token->col, $token->armor));
// punt (since we had to modify the input stream in a non-trivial way)
$reprocess = true;
continue;
@@ -211,6 +225,19 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// ...unless they also have to close their parent
if (!empty($this->stack)) {
// Performance note: you might think that it's rather
// inefficient, recalculating the autoclose information
// for every tag that a token closes (since when we
// do an autoclose, we push a new token into the
// stream and then /process/ that, before
// re-processing this token.) But this is
// necessary, because an injector can make an
// arbitrary transformations to the autoclosing
// tokens we introduce, so things may have changed
// in the meantime. Also, doing the inefficient thing is
// "easy" to reason about (for certain perverse definitions
// of "easy")
$parent = array_pop($this->stack);
$this->stack[] = $parent;
@@ -243,24 +270,51 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
}
if ($autoclose) {
// errors need to be updated
$new_token = new HTMLPurifier_Token_End($parent->name);
$new_token->start = $parent;
if ($carryover) {
$element = clone $parent;
$element->armor['MakeWellFormed_TagClosedError'] = true;
$element->carryover = true;
$this->processToken(array($new_token, $token, $element));
} else {
$this->insertBefore($new_token);
}
if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
if (!$carryover) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
} else {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
// check if this autoclose is doomed to fail
// (this rechecks $parent, which his harmless)
$autoclose_ok = isset($global_parent_allowed_elements[$token->name]);
if (!$autoclose_ok) {
foreach ($this->stack as $ancestor) {
$elements = $definition->info[$ancestor->name]->child->getAllowedElements($config);
if (isset($elements[$token->name])) {
$autoclose_ok = true;
break;
}
if ($definition->info[$token->name]->wrap) {
$wrapname = $definition->info[$token->name]->wrap;
$wrapdef = $definition->info[$wrapname];
$wrap_elements = $wrapdef->child->getAllowedElements($config);
if (isset($wrap_elements[$token->name]) && isset($elements[$wrapname])) {
$autoclose_ok = true;
break;
}
}
}
}
if ($autoclose_ok) {
// errors need to be updated
$new_token = new HTMLPurifier_Token_End($parent->name);
$new_token->start = $parent;
if ($carryover) {
$element = clone $parent;
// [TagClosedAuto]
$element->armor['MakeWellFormed_TagClosedError'] = true;
$element->carryover = true;
$this->processToken(array($new_token, $token, $element));
} else {
$this->insertBefore($new_token);
}
// [TagClosedSuppress]
if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
if (!$carryover) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
} else {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
}
}
} else {
$this->remove();
}
$reprocess = true;
continue;
}
@@ -366,7 +420,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
if ($e) {
for ($j = $c - 1; $j > 0; $j--) {
// notice we exclude $j == 0, i.e. the current ending tag, from
// the errors...
// the errors... [TagClosedSuppress]
if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]);
}
@@ -381,6 +435,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$new_token->start = $skipped_tags[$j];
array_unshift($replace, $new_token);
if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) {
// [TagClosedAuto]
$element = clone $skipped_tags[$j];
$element->carryover = true;
$element->armor['MakeWellFormed_TagClosedError'] = true;
@@ -398,7 +453,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$context->destroy('CurrentToken');
unset($this->injectors, $this->stack, $this->tokens, $this->t);
return $tokens;
return $tokens->getArray();
}
/**
@@ -435,6 +490,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// array(number nodes to delete, new node 1, new node 2, ...)
$delete = array_shift($token);
throw new Exception("unsupported");
$old = array_splice($this->tokens, $this->t, $delete, $token);
if ($injector > -1) {
@@ -449,22 +505,24 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
}
/**
* Inserts a token before the current token. Cursor now points to this token
* Inserts a token before the current token. Cursor now points to
* this token. You must reprocess after this.
*/
private function insertBefore($token) {
array_splice($this->tokens, $this->t, 0, array($token));
$this->tokens->insertBefore($this->t, $token);
}
/**
* Removes current token. Cursor now points to new token occupying previously
* occupied space.
* occupied space. You must reprocess after this.
*/
private function remove() {
array_splice($this->tokens, $this->t, 1);
$this->tokens->remove($this->t);
}
/**
* Swap current token with new token. Cursor points to new token (no change).
* Swap current token with new token. Cursor points to new token (no
* change). You must reprocess after this.
*/
private function swap($token) {
$this->tokens[$this->t] = $token;

View File

@@ -21,6 +21,9 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
// currently only used to determine if comments should be kept
$trusted = $config->get('HTML.Trusted');
$comment_lookup = $config->get('HTML.AllowedComments');
$comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
$check_comments = $comment_lookup !== array() || $comment_regexp !== null;
$remove_script_contents = $config->get('Core.RemoveScriptContents');
$hidden_elements = $config->get('Core.HiddenElements');
@@ -128,23 +131,37 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
if ($textify_comments !== false) {
$data = $token->data;
$token = new HTMLPurifier_Token_Text($data);
} elseif ($trusted) {
// keep, but perform comment cleaning
} elseif ($trusted || $check_comments) {
// always cleanup comments
$trailing_hyphen = false;
if ($e) {
// perform check whether or not there's a trailing hyphen
if (substr($token->data, -1) == '-') {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
$trailing_hyphen = true;
}
}
$token->data = rtrim($token->data, '-');
$found_double_hyphen = false;
while (strpos($token->data, '--') !== false) {
if ($e && !$found_double_hyphen) {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
}
$found_double_hyphen = true; // prevent double-erroring
$found_double_hyphen = true;
$token->data = str_replace('--', '-', $token->data);
}
if ($trusted || !empty($comment_lookup[trim($token->data)]) || ($comment_regexp !== NULL && preg_match($comment_regexp, trim($token->data)))) {
// OK good
if ($e) {
if ($trailing_hyphen) {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
}
if ($found_double_hyphen) {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
}
}
} else {
if ($e) {
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
}
continue;
}
} else {
// strip comments
if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');

View File

@@ -63,13 +63,15 @@ class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
// handle size transform
if (isset($attr['size'])) {
// normalize large numbers
if ($attr['size']{0} == '+' || $attr['size']{0} == '-') {
$size = (int) $attr['size'];
if ($size < -2) $attr['size'] = '-2';
if ($size > 4) $attr['size'] = '+4';
} else {
$size = (int) $attr['size'];
if ($size > 7) $attr['size'] = '7';
if ($attr['size'] !== '') {
if ($attr['size']{0} == '+' || $attr['size']{0} == '-') {
$size = (int) $attr['size'];
if ($size < -2) $attr['size'] = '-2';
if ($size > 4) $attr['size'] = '+4';
} else {
$size = (int) $attr['size'];
if ($size > 7) $attr['size'] = '7';
}
}
if (isset($this->_size_lookup[$attr['size']])) {
$prepend_style .= 'font-size:' .

Some files were not shown because too many files have changed in this diff Show More