1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-04 21:28:06 +02:00

Compare commits

..

20 Commits

Author SHA1 Message Date
Edward Z. Yang
9db861e356 Release 2.1.3, merged in 1404 to HEAD.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@1444 48356398-32a2-884e-a903-53898d9a118a
2007-11-06 04:34:33 +00:00
Edward Z. Yang
b3f0e6c86c Release 2.1.2, merged in 1368 to HEAD.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@1404 48356398-32a2-884e-a903-53898d9a118a
2007-09-03 15:40:43 +00:00
Edward Z. Yang
80c60bb9b5 Release 2.1.0, merged in 1255 to HEAD.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@1368 48356398-32a2-884e-a903-53898d9a118a
2007-08-05 02:02:46 +00:00
Edward Z. Yang
503e76081b Revert to 2.0.1, it appears that the 2.1.0 merge was done improperly.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@1367 48356398-32a2-884e-a903-53898d9a118a
2007-08-05 01:42:52 +00:00
Edward Z. Yang
678a593e62 Release 2.1.0, merged in 1313 to HEAD.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@1352 48356398-32a2-884e-a903-53898d9a118a
2007-08-03 03:20:49 +00:00
Edward Z. Yang
495164e938 Release 2.0.1, merged in 1181 to HEAD.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@1255 48356398-32a2-884e-a903-53898d9a118a
2007-06-27 14:30:45 +00:00
Edward Z. Yang
42858ad594 Finish up with a few more files that didn't get updated. Hrmm..
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@1181 48356398-32a2-884e-a903-53898d9a118a
2007-06-21 00:53:09 +00:00
Edward Z. Yang
5ecb11f19a Tack on missing basic smoketests.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@1180 48356398-32a2-884e-a903-53898d9a118a
2007-06-21 00:42:00 +00:00
Edward Z. Yang
0101311193 Release 2.0.0, merged in 1026 to HEAD.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@1179 48356398-32a2-884e-a903-53898d9a118a
2007-06-21 00:36:12 +00:00
Edward Z. Yang
c35eb3e95f Release 1.6.1, merged in 931 to HEAD.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@1026 48356398-32a2-884e-a903-53898d9a118a
2007-05-05 20:49:49 +00:00
Edward Z. Yang
b829e76bbf Release 1.6.0, merged in r875-930.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@931 48356398-32a2-884e-a903-53898d9a118a
2007-04-02 03:09:23 +00:00
Edward Z. Yang
e967680250 Really release 1.5.0.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@876 48356398-32a2-884e-a903-53898d9a118a
2007-03-24 02:19:11 +00:00
Edward Z. Yang
dd2fd06591 Release 1.5.0, merged in r688-867.
- LanguageFactory::instance() declared static
- HTMLModuleManagerTest pass by reference bug fixed, merge back into trunk scheduled

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@869 48356398-32a2-884e-a903-53898d9a118a
2007-03-24 01:04:06 +00:00
Edward Z. Yang
cec7a1c087 Release 1.4.1, merged in 685-687.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@689 48356398-32a2-884e-a903-53898d9a118a
2007-01-21 21:54:03 +00:00
Edward Z. Yang
c2d3d5b859 Release 1.4.0.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@682 48356398-32a2-884e-a903-53898d9a118a
2007-01-21 17:45:33 +00:00
Edward Z. Yang
9a84e11f34 Merge in r657-674, prompted by near release of 1.4.0.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@675 48356398-32a2-884e-a903-53898d9a118a
2007-01-21 16:07:36 +00:00
Edward Z. Yang
37ea1673dd Merge in r649-656, prompted by changing two of Encoder's functions to static.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@657 48356398-32a2-884e-a903-53898d9a118a
2007-01-19 02:28:53 +00:00
Edward Z. Yang
5395d8b4bd Renamed remotely
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/strict@649 48356398-32a2-884e-a903-53898d9a118a
2007-01-16 22:03:54 +00:00
Edward Z. Yang
c980e76197 Moved remotely
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/trunk-strict@648 48356398-32a2-884e-a903-53898d9a118a
2007-01-16 22:03:00 +00:00
Edward Z. Yang
2bf912d528 Commit strict version of HTML Purifier.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk-strict@647 48356398-32a2-884e-a903-53898d9a118a
2007-01-16 21:59:29 +00:00
60 changed files with 5571 additions and 316 deletions

1100
Doxyfile

File diff suppressed because it is too large Load Diff

209
INSTALL
View File

@@ -2,62 +2,55 @@
Install Install
How to install HTML Purifier How to install HTML Purifier
HTML Purifier is designed to run out of the box, so actually using the library HTML Purifier is designed to run out of the box, so actually using the
is extremely easy. (Although, if you were looking for a step-by-step library is extremely easy. (Although... if you were looking for a
installation GUI, you've come to the wrong place!) The impatient can scroll step-by-step installation GUI, you've downloaded the wrong software!)
down to the bottom of this INSTALL document to see the code, but you really
should make sure a few things are properly done. While the impatient can get going immediately with some of the sample
code at the bottom of this library, it's well worth performing some
basic sanity checks to get the most out of this library.
---------------------------------------------------------------------------
1. Compatibility 1. Compatibility
HTML Purifier works in both PHP 4 and PHP 5, from PHP 4.3.2 and up. It has no HTML Purifier works in both PHP 4 and PHP 5, and is actively tested from
core dependencies with other libraries. PHP 4.3.7 and up (see tests/multitest.php for specific versions). It has
no core dependencies with other libraries. PHP 4 support will be
deprecated on December 31, 2007, at which time only essential security
fixes will be issued for the PHP 4 version until August 8, 2008.
Optional extensions are iconv (usually installed) and tidy (also common). These optional extensions can enhance the capabilities of HTML Purifier:
If you use UTF-8 and don't plan on pretty-printing HTML, you can get away with
not having either of these extensions. * iconv : Converts text to and from non-UTF-8 encodings
* tidy : Used for pretty-printing HTML
---------------------------------------------------------------------------
2. Reconnaissance
2. Including the library A big plus of HTML Purifier is its inerrant support of standards, so
your web-pages should be standards-compliant. (They should also use
semantic markup, but that's another issue altogether, one HTML Purifier
cannot fix without reading your mind.)
Simply use: HTML Purifier can process these doctypes:
require_once '/path/to/library/HTMLPurifier.auto.php';
...and you're good to go. Since HTML Purifier's codebase is fairly
large, I recommend only including HTML Purifier when you need it.
If you don't like your include_path to be fiddled around with, simply set
HTML Purifier's library/ directory to the include path yourself and then:
require_once 'HTMLPurifier.php';
Only the contents in the library/ folder are necessary, so you can remove
everything else when using HTML Purifier in a production environment.
3. Preparing the proper output environment
HTML Purifier is all about web-standards, so accordingly your webpages should
be standards compliant. HTML Purifier can deal with these doctypes:
* XHTML 1.0 Transitional (default) * XHTML 1.0 Transitional (default)
* XHTML 1.0 Strict * XHTML 1.0 Strict
* HTML 4.01 Transitional * HTML 4.01 Transitional
* HTML 4.01 Strict * HTML 4.01 Strict
* XHTML 1.1 (sans Ruby) * XHTML 1.1
...and these character encodings: ...and these character encodings:
* UTF-8 (default) * UTF-8 (default)
* Any encoding iconv supports (support is crippled for i18n though) * Any encoding iconv supports (with crippled internationalization support)
The defaults are there for a reason: they are best-practice choices that These defaults reflect what my choices where be if I were authoring an
should not be changed lightly. For those of you in the dark, you can determine HTML document, however, what you choose depends on the nature of your
the doctype from this code in your HTML documents: codebase. If you don't know what doctype you are using, you can determine
the doctype from this identifier at the top of your source code:
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
@@ -66,14 +59,34 @@ the doctype from this code in your HTML documents:
<meta http-equiv="Content-type" content="text/html;charset=ENCODING"> <meta http-equiv="Content-type" content="text/html;charset=ENCODING">
For legacy codebases these declarations may be missing. If that is the case, If the character encoding declaration is missing, STOP NOW, and
STOP, and read docs/enduser-utf8.html read 'docs/enduser-utf8.html' (web accessible at
http://htmlpurifier.org/docs/enduser-utf8.html). In fact, even if it is
You may currently be vulnerable to XSS and other security threats, and HTML present, read this document anyway, as most websites specify character
Purifier won't be able to fix that. encoding incorrectly.
---------------------------------------------------------------------------
3. Including the library
The procedure is quite simple:
require_once '/path/to/library/HTMLPurifier.auto.php';
I recommend only including HTML Purifier when you need it, because that
call represents the inclusion of a lot of PHP files which constitute
the bulk of HTML Purifier's memory usage.
If you don't like your include_path to be fiddled around with, simply set
HTML Purifier's library/ directory to the include path yourself and then:
require_once 'HTMLPurifier.php';
Only the contents in the library/ folder are necessary, so you can remove
everything else when using HTML Purifier in a production environment.
---------------------------------------------------------------------------
4. Configuration 4. Configuration
HTML Purifier is designed to run out-of-the-box, but occasionally HTML HTML Purifier is designed to run out-of-the-box, but occasionally HTML
@@ -90,7 +103,6 @@ object and read on:
$config = HTMLPurifier_Config::createDefault(); $config = HTMLPurifier_Config::createDefault();
4.1. Setting a different character encoding 4.1. Setting a different character encoding
You really shouldn't use any other encoding except UTF-8, especially if you You really shouldn't use any other encoding except UTF-8, especially if you
@@ -117,7 +129,6 @@ but please be cognizant of the issues the "solution" creates (for this
reason, I do not include the solution in this document). reason, I do not include the solution in this document).
4.2. Setting a different doctype 4.2. Setting a different doctype
For those of you using HTML 4.01 Transitional, you can disable For those of you using HTML 4.01 Transitional, you can disable
@@ -134,7 +145,6 @@ Other supported doctypes include:
* XHTML 1.1 * XHTML 1.1
4.3. Other settings 4.3. Other settings
There are more configuration directives which can be read about There are more configuration directives which can be read about
@@ -144,55 +154,24 @@ your code. Some of the more interesting ones are configurable at the
demo <http://htmlpurifier.org/demo.php> and are well worth looking into demo <http://htmlpurifier.org/demo.php> and are well worth looking into
for your own system. for your own system.
For example, you can fine tune allowed elements and attributes, convert
relative URLs to absolute ones, and even autoparagraph input text! These
are, respectively, %HTML.Allowed, %URI.MakeAbsolute and %URI.Base, and
%AutoFormat.AutoParagraph. The %Namespace.Directive naming convention
translates to:
$config->set('Namespace', 'Directive', $value);
E.g.
$config->set('HTML', 'Allowed', 'p,b,a[href],i');
$config->set('URI', 'Base', 'http://www.example.com');
$config->set('URI', 'MakeAbsolute', true);
$config->set('AutoFormat', 'AutoParagraph', true);
5. Using the code ---------------------------------------------------------------------------
5. Caching
The interface is mind-numbingly simple:
$purifier = new HTMLPurifier();
$clean_html = $purifier->purify( $dirty_html );
...or, if you're using the configuration object:
$purifier = new HTMLPurifier($config);
$clean_html = $purifier->purify( $dirty_html );
That's it! For more examples, check out docs/examples/ (they aren't very
different though). Also, docs/enduser-slow.html gives advice on what to
do if HTML Purifier is slowing down your application.
6. Quick install
First, make sure library/HTMLPurifier/DefinitionCache/Serializer is
writable by the webserver (see Section 7: Caching below for details).
If your website is in UTF-8 and XHTML Transitional, use this code:
<?php
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
$purifier = new HTMLPurifier();
$clean_html = $purifier->purify($dirty_html);
?>
If your website is in a different encoding or doctype, use this code:
<?php
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
$config = HTMLPurifier_Config::createDefault();
$config->set('Core', 'Encoding', 'ISO-8859-1'); // replace with your encoding
$config->set('HTML', 'Doctype', 'HTML 4.01 Transitional'); // replace with your doctype
$purifier = new HTMLPurifier($config);
$clean_html = $purifier->purify($dirty_html);
?>
7. Caching
HTML Purifier generates some cache files (generally one or two) to speed up HTML Purifier generates some cache files (generally one or two) to speed up
its execution. For maximum performance, make sure that its execution. For maximum performance, make sure that
@@ -228,3 +207,49 @@ Or move the cache directory somewhere else (no trailing slash):
$config->set('Cache', 'SerializerPath', '/home/user/absolute/path'); $config->set('Cache', 'SerializerPath', '/home/user/absolute/path');
---------------------------------------------------------------------------
6. Using the code
The interface is mind-numbingly simple:
$purifier = new HTMLPurifier();
$clean_html = $purifier->purify( $dirty_html );
...or, if you're using the configuration object:
$purifier = new HTMLPurifier($config);
$clean_html = $purifier->purify( $dirty_html );
That's it! For more examples, check out docs/examples/ (they aren't very
different though). Also, docs/enduser-slow.html gives advice on what to
do if HTML Purifier is slowing down your application.
---------------------------------------------------------------------------
7. Quick install
First, make sure library/HTMLPurifier/DefinitionCache/Serializer is
writable by the webserver (see Section 5: Caching above for details).
If your website is in UTF-8 and XHTML Transitional, use this code:
<?php
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
$purifier = new HTMLPurifier();
$clean_html = $purifier->purify($dirty_html);
?>
If your website is in a different encoding or doctype, use this code:
<?php
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
$config = HTMLPurifier_Config::createDefault();
$config->set('Core', 'Encoding', 'ISO-8859-1'); // replace with your encoding
$config->set('HTML', 'Doctype', 'HTML 4.01 Transitional'); // replace with your doctype
$purifier = new HTMLPurifier($config);
$clean_html = $purifier->purify($dirty_html);
?>

53
NEWS
View File

@@ -9,6 +9,55 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. Internal change . Internal change
========================== ==========================
2.1.3, released 2007-11-05
! tests/multitest.php allows you to test multiple versions by running
tests/index.php through multiple interpreters using `phpv` shell
script (you must provide this script!)
- Fixed poor include ordering for Email URI AttrDefs, causes fatal errors
on some systems.
- Injector algorithm further refined: off-by-one error regarding skip
counts for dormant injectors fixed
- Corrective blockquote definition now enabled for HTML 4.01 Strict
- Fatal error when <img> tag (or any other element with required attributes)
has 'id' attribute fixed, thanks NykO18 for reporting
- Fix warning emitted when a non-supported URI scheme is passed to the
MakeAbsolute URIFilter, thanks NykO18 (again)
- Further refine AutoParagraph injector. Behavior inside of elements
allowing paragraph tags clarified: only inline content delimeted by
double newlines (not block elements) are paragraphed.
- Buggy treatment of end tags of elements that have required attributes
fixed (does not manifest on default tag-set)
- Spurious internal content reorganization error suppressed
- HTMLDefinition->addElement now returns a reference to the created
element object, as implied by the documentation
- Phorum mod's HTML Purifier help message expanded (unreleased elsewhere)
- Fix a theoretical class of infinite loops from DirectLex reported
by Nate Abele
- Work around unnecessary DOMElement type-cast in PH5P that caused errors
in PHP 5.1
- Work around PHP 4 SimpleTest lack-of-error complaining for one-time-only
HTMLDefinition errors, this may indicate problems with error-collecting
facilities in PHP 5
- Make ErrorCollectorEMock work in both PHP 4 and PHP 5
- Make PH5P work with PHP 5.0 by removing unnecessary array parameter typedef
. %Core.AcceptFullDocuments renamed to %Core.ConvertDocumentToFragment
to better communicate its purpose
. Error unit tests can now specify the expectation of no errors. Future
iterations of the harness will be extremely strict about what errors
are allowed
. Extend Injector hooks to allow for more powerful injector routines
. HTMLDefinition->addBlankElement created, as according to the HTMLModule
method
. Doxygen configuration file updated, with minor improvements
. Test runner now checks for similarly named files in conf/ directory too.
. Minor cosmetic change to flush-definition-cache.php: trailing newline is
outputted
. Maintenance script for generating PH5P patch added, original PH5P source
file also added under version control
. Full unit test runner script title made more descriptive with PHP version
. Updated INSTALL file to state that 4.3.7 is the earliest version we
are actively testing
2.1.2, released 2007-09-03 2.1.2, released 2007-09-03
! Implemented Object module for trusted users ! Implemented Object module for trusted users
! Implemented experimental HTML5 parsing mode using PH5P. To use, add ! Implemented experimental HTML5 parsing mode using PH5P. To use, add
@@ -249,6 +298,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
%Attr.IDBlacklistRegexp %Attr.IDBlacklistRegexp
- Error messages are emitted when you attempt to "allow" elements or - Error messages are emitted when you attempt to "allow" elements or
attributes that HTML Purifier does not support attributes that HTML Purifier does not support
- Fix segfault in unit test. The problem is not very reproduceable and - Fix segfault in unit test. The problem is not very reproduceable and
I don't know what causes it, but a six line patch fixed it. I don't know what causes it, but a six line patch fixed it.
@@ -447,4 +498,4 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
! First public release, most functionality implemented. Notable omissions are: ! First public release, most functionality implemented. Notable omissions are:
+ Shorthand CSS properties + Shorthand CSS properties
+ Table CSS properties + Table CSS properties
+ Deprecated attribute transformations + Deprecated attribute transformations

1
TODO
View File

@@ -1,4 +1,3 @@
TODO List TODO List
= KEY ==================== = KEY ====================

View File

@@ -1 +1 @@
2.1.2 2.1.3

View File

@@ -1,8 +1,6 @@
Version 2.1.2 is a mix of experimental features and stability updates. Stability release 2.1.3 fixes a slew of minor bugs found in HTML Purifier,
Among new features: an Object module for trusted users, support for the and also includes some internal code enhancements and refactorings.
CSS property 'border-spacing', and HTML 5 style parsing using PH5P. Notably, tests/multitest.php automates testing in multiple versions,
Bug fixes ihave resolved a few obscure issues including border-collapse:seperate, fatal AttrDef_URI_Email error fixed, blockquote contents are more lenient
a DirectLex parsing error, broken HTML in printDefinition.php, and problems in HTML 4.01 Strict and fatal errors involving ID tags in img tags were
with the experimental standalone distribution. Also, there were large fixed.
amounts of behind-the-scenes refactoring and the removal of URIScheme
inclusion reflection.

View File

@@ -22,8 +22,8 @@
*/ */
/* /*
HTML Purifier 2.1.2 - Standards Compliant HTML Filtering HTML Purifier 2.1.3 - Standards Compliant HTML Filtering
Copyright (C) 2006 Edward Z. Yang Copyright (C) 2006-2007 Edward Z. Yang
This library is free software; you can redistribute it and/or This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public modify it under the terms of the GNU Lesser General Public
@@ -43,9 +43,8 @@
// constants are slow, but we'll make one exception // constants are slow, but we'll make one exception
define('HTMLPURIFIER_PREFIX', dirname(__FILE__)); define('HTMLPURIFIER_PREFIX', dirname(__FILE__));
// almost every class has an undocumented dependency to these, so make sure // every class has an undocumented dependency to these, must be included!
// they get included require_once 'HTMLPurifier/ConfigSchema.php'; // fatal errors if not included
require_once 'HTMLPurifier/ConfigSchema.php'; // important
require_once 'HTMLPurifier/Config.php'; require_once 'HTMLPurifier/Config.php';
require_once 'HTMLPurifier/Context.php'; require_once 'HTMLPurifier/Context.php';
@@ -60,16 +59,23 @@ require_once 'HTMLPurifier/LanguageFactory.php';
HTMLPurifier_ConfigSchema::define( HTMLPurifier_ConfigSchema::define(
'Core', 'CollectErrors', false, 'bool', ' 'Core', 'CollectErrors', false, 'bool', '
Whether or not to collect errors found while filtering the document. This Whether or not to collect errors found while filtering the document. This
is a useful way to give feedback to your users. CURRENTLY NOT IMPLEMENTED. is a useful way to give feedback to your users. <strong>Warning:</strong>
This directive has been available since 2.0.0. Currently this feature is very patchy and experimental, with lots of
possible error messages not yet implemented. It will not cause any problems,
but it may not help your users either. This directive has been available
since 2.0.0.
'); ');
/** /**
* Main library execution class. * Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
* *
* Facade that performs calls to the HTMLPurifier_Lexer, * @note There are several points in which configuration can be specified
* HTMLPurifier_Strategy and HTMLPurifier_Generator subsystems in order to * for HTML Purifier. The precedence of these (from lowest to
* purify HTML. * highest) is as follows:
* -# Instance: new HTMLPurifier($config)
* -# Invocation: purify($html, $config)
* These configurations are entirely independent of each other and
* are *not* merged.
* *
* @todo We need an easier way to inject strategies, it'll probably end * @todo We need an easier way to inject strategies, it'll probably end
* up getting done through config though. * up getting done through config though.
@@ -77,15 +83,16 @@ This directive has been available since 2.0.0.
class HTMLPurifier class HTMLPurifier
{ {
var $version = '2.1.2'; var $version = '2.1.3';
var $config; var $config;
var $filters; var $filters = array();
var $strategy, $generator; var $strategy, $generator;
/** /**
* Final HTMLPurifier_Context of last run purification. Might be an array. * Resultant HTMLPurifier_Context of last run purification. Is an array
* of contexts if the last called method was purifyArray().
* @public * @public
*/ */
var $context; var $context;
@@ -150,6 +157,11 @@ class HTMLPurifier
$context->register('ErrorCollector', $error_collector); $context->register('ErrorCollector', $error_collector);
} }
// setup id_accumulator context, necessary due to the fact that
// AttrValidator can be called from many places
$id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
$context->register('IDAccumulator', $id_accumulator);
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
for ($i = 0, $size = count($this->filters); $i < $size; $i++) { for ($i = 0, $size = count($this->filters); $i < $size; $i++) {
@@ -198,11 +210,13 @@ class HTMLPurifier
/** /**
* Singleton for enforcing just one HTML Purifier in your system * Singleton for enforcing just one HTML Purifier in your system
* @param $prototype Optional prototype HTMLPurifier instance to
* overload singleton with.
*/ */
function &getInstance($prototype = null) { static function &getInstance($prototype = null) {
static $htmlpurifier; static $htmlpurifier;
if (!$htmlpurifier || $prototype) { if (!$htmlpurifier || $prototype) {
if (is_a($prototype, 'HTMLPurifier')) { if ($prototype instanceof HTMLPurifier) {
$htmlpurifier = $prototype; $htmlpurifier = $prototype;
} elseif ($prototype) { } elseif ($prototype) {
$htmlpurifier = new HTMLPurifier($prototype); $htmlpurifier = new HTMLPurifier($prototype);

View File

@@ -102,7 +102,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
$result = $uri->validate($config, $context); $result = $uri->validate($config, $context);
if (!$result) break; if (!$result) break;
// chained validation // chained filtering
$uri_def =& $config->getDefinition('URI'); $uri_def =& $config->getDefinition('URI');
$result = $uri_def->filter($uri, $config, $context); $result = $uri_def->filter($uri, $config, $context);
if (!$result) break; if (!$result) break;

View File

@@ -1,7 +1,6 @@
<?php <?php
require_once 'HTMLPurifier/AttrDef.php'; require_once 'HTMLPurifier/AttrDef.php';
require_once 'HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
{ {
@@ -15,3 +14,5 @@ class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
} }
// sub-implementations
require_once 'HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';

View File

@@ -23,6 +23,13 @@ class HTMLPurifier_AttrValidator
$definition = $config->getHTMLDefinition(); $definition = $config->getHTMLDefinition();
$e =& $context->get('ErrorCollector', true); $e =& $context->get('ErrorCollector', true);
// initialize IDAccumulator if necessary
$ok =& $context->get('IDAccumulator', true);
if (!$ok) {
$id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
$context->register('IDAccumulator', $id_accumulator);
}
// initialize CurrentToken if necessary // initialize CurrentToken if necessary
$current_token =& $context->get('CurrentToken', true); $current_token =& $context->get('CurrentToken', true);
if (!$current_token) $context->register('CurrentToken', $token); if (!$current_token) $context->register('CurrentToken', $token);

View File

@@ -15,7 +15,10 @@ class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
var $type = 'optional'; var $type = 'optional';
function validateChildren($tokens_of_children, $config, &$context) { function validateChildren($tokens_of_children, $config, &$context) {
$result = parent::validateChildren($tokens_of_children, $config, $context); $result = parent::validateChildren($tokens_of_children, $config, $context);
if ($result === false) return array(); if ($result === false) {
if (empty($tokens_of_children)) return true;
else return array();
}
return $result; return $result;
} }
} }

View File

@@ -42,7 +42,7 @@ class HTMLPurifier_Config
/** /**
* HTML Purifier's version * HTML Purifier's version
*/ */
var $version = '2.1.2'; var $version = '2.1.3';
/** /**
* Two-level associative array of configuration directives * Two-level associative array of configuration directives
@@ -99,11 +99,8 @@ class HTMLPurifier_Config
* or a string filename of an ini file. * or a string filename of an ini file.
* @return Configured HTMLPurifier_Config object * @return Configured HTMLPurifier_Config object
*/ */
function create($config) { static function create($config) {
if (is_a($config, 'HTMLPurifier_Config')) { if ($config instanceof HTMLPurifier_Config) return $config;
// pass-through
return $config;
}
$ret = HTMLPurifier_Config::createDefault(); $ret = HTMLPurifier_Config::createDefault();
if (is_string($config)) $ret->loadIni($config); if (is_string($config)) $ret->loadIni($config);
elseif (is_array($config)) $ret->loadArray($config); elseif (is_array($config)) $ret->loadArray($config);
@@ -115,7 +112,7 @@ class HTMLPurifier_Config
* @static * @static
* @return Default HTMLPurifier_Config object. * @return Default HTMLPurifier_Config object.
*/ */
function createDefault() { static function createDefault() {
$definition =& HTMLPurifier_ConfigSchema::instance(); $definition =& HTMLPurifier_ConfigSchema::instance();
$config = new HTMLPurifier_Config($definition); $config = new HTMLPurifier_Config($definition);
return $config; return $config;
@@ -368,7 +365,7 @@ class HTMLPurifier_Config
* @param $allowed List of allowed namespaces/directives * @param $allowed List of allowed namespaces/directives
* @static * @static
*/ */
function getAllowedDirectivesForForm($allowed) { static function getAllowedDirectivesForForm($allowed) {
$schema = HTMLPurifier_ConfigSchema::instance(); $schema = HTMLPurifier_ConfigSchema::instance();
if ($allowed !== true) { if ($allowed !== true) {
if (is_string($allowed)) $allowed = array($allowed); if (is_string($allowed)) $allowed = array($allowed);
@@ -413,7 +410,7 @@ class HTMLPurifier_Config
* @param $mq_fix Boolean whether or not to enable magic quotes fix * @param $mq_fix Boolean whether or not to enable magic quotes fix
* @static * @static
*/ */
function loadArrayFromForm($array, $index, $allowed = true, $mq_fix = true) { static function loadArrayFromForm($array, $index, $allowed = true, $mq_fix = true) {
$ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix); $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix);
$config = HTMLPurifier_Config::create($ret); $config = HTMLPurifier_Config::create($ret);
return $config; return $config;
@@ -433,7 +430,7 @@ class HTMLPurifier_Config
* strict parts of HTMLPurifier_Config * strict parts of HTMLPurifier_Config
* @static * @static
*/ */
function prepareArrayFromForm($array, $index, $allowed = true, $mq_fix = true) { static function prepareArrayFromForm($array, $index, $allowed = true, $mq_fix = true) {
$array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array(); $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array();
$mq = get_magic_quotes_gpc() && $mq_fix; $mq = get_magic_quotes_gpc() && $mq_fix;

View File

@@ -82,7 +82,7 @@ class HTMLPurifier_ConfigSchema {
* Retrieves an instance of the application-wide configuration definition. * Retrieves an instance of the application-wide configuration definition.
* @static * @static
*/ */
function &instance($prototype = null) { static function &instance($prototype = null) {
static $instance; static $instance;
if ($prototype !== null) { if ($prototype !== null) {
$instance = $prototype; $instance = $prototype;
@@ -104,7 +104,7 @@ class HTMLPurifier_ConfigSchema {
* HTMLPurifier_DirectiveDef::$type for allowed values * HTMLPurifier_DirectiveDef::$type for allowed values
* @param $description Description of directive for documentation * @param $description Description of directive for documentation
*/ */
function define($namespace, $name, $default, $type, $description) { static function define($namespace, $name, $default, $type, $description) {
$def =& HTMLPurifier_ConfigSchema::instance(); $def =& HTMLPurifier_ConfigSchema::instance();
// basic sanity checks // basic sanity checks
@@ -177,7 +177,7 @@ class HTMLPurifier_ConfigSchema {
* @param $namespace Namespace's name * @param $namespace Namespace's name
* @param $description Description of the namespace * @param $description Description of the namespace
*/ */
function defineNamespace($namespace, $description) { static function defineNamespace($namespace, $description) {
$def =& HTMLPurifier_ConfigSchema::instance(); $def =& HTMLPurifier_ConfigSchema::instance();
if (HTMLPURIFIER_SCHEMA_STRICT) { if (HTMLPURIFIER_SCHEMA_STRICT) {
if (isset($def->info[$namespace])) { if (isset($def->info[$namespace])) {
@@ -212,7 +212,7 @@ class HTMLPurifier_ConfigSchema {
* @param $alias Name of aliased value * @param $alias Name of aliased value
* @param $real Value aliased value will be converted into * @param $real Value aliased value will be converted into
*/ */
function defineValueAliases($namespace, $name, $aliases) { static function defineValueAliases($namespace, $name, $aliases) {
$def =& HTMLPurifier_ConfigSchema::instance(); $def =& HTMLPurifier_ConfigSchema::instance();
if (HTMLPURIFIER_SCHEMA_STRICT && !isset($def->info[$namespace][$name])) { if (HTMLPURIFIER_SCHEMA_STRICT && !isset($def->info[$namespace][$name])) {
trigger_error('Cannot set value alias for non-existant directive', trigger_error('Cannot set value alias for non-existant directive',
@@ -245,7 +245,7 @@ class HTMLPurifier_ConfigSchema {
* @param $name Name of directive * @param $name Name of directive
* @param $allowed_values Arraylist of allowed values * @param $allowed_values Arraylist of allowed values
*/ */
function defineAllowedValues($namespace, $name, $allowed_values) { static function defineAllowedValues($namespace, $name, $allowed_values) {
$def =& HTMLPurifier_ConfigSchema::instance(); $def =& HTMLPurifier_ConfigSchema::instance();
if (HTMLPURIFIER_SCHEMA_STRICT && !isset($def->info[$namespace][$name])) { if (HTMLPURIFIER_SCHEMA_STRICT && !isset($def->info[$namespace][$name])) {
trigger_error('Cannot define allowed values for undefined directive', trigger_error('Cannot define allowed values for undefined directive',
@@ -285,7 +285,7 @@ class HTMLPurifier_ConfigSchema {
* @param $new_namespace * @param $new_namespace
* @param $new_name Directive that the alias will be to * @param $new_name Directive that the alias will be to
*/ */
function defineAlias($namespace, $name, $new_namespace, $new_name) { static function defineAlias($namespace, $name, $new_namespace, $new_name) {
$def =& HTMLPurifier_ConfigSchema::instance(); $def =& HTMLPurifier_ConfigSchema::instance();
if (HTMLPURIFIER_SCHEMA_STRICT) { if (HTMLPURIFIER_SCHEMA_STRICT) {
if (!isset($def->info[$namespace])) { if (!isset($def->info[$namespace])) {
@@ -428,7 +428,7 @@ class HTMLPurifier_ConfigSchema {
*/ */
function isError($var) { function isError($var) {
if (!is_object($var)) return false; if (!is_object($var)) return false;
if (!is_a($var, 'HTMLPurifier_Error')) return false; if (!($var instanceof HTMLPurifier_Error)) return false;
return true; return true;
} }
} }

View File

@@ -40,7 +40,7 @@ class HTMLPurifier_DefinitionCacheFactory
* Retrieves an instance of global definition cache factory. * Retrieves an instance of global definition cache factory.
* @static * @static
*/ */
function &instance($prototype = null) { static function &instance($prototype = null) {
static $instance; static $instance;
if ($prototype !== null) { if ($prototype !== null) {
$instance = $prototype; $instance = $prototype;

View File

@@ -110,7 +110,7 @@ class HTMLPurifier_ElementDef
* Low-level factory constructor for creating new standalone element defs * Low-level factory constructor for creating new standalone element defs
* @static * @static
*/ */
function create($safe, $content_model, $content_model_type, $attr) { static function create($safe, $content_model, $content_model_type, $attr) {
$def = new HTMLPurifier_ElementDef(); $def = new HTMLPurifier_ElementDef();
$def->safe = (bool) $safe; $def->safe = (bool) $safe;
$def->content_model = $content_model; $def->content_model = $content_model;

View File

@@ -88,7 +88,7 @@ class HTMLPurifier_Encoder
* would need that, and I'm probably not going to implement them. * would need that, and I'm probably not going to implement them.
* Once again, PHP 6 should solve all our problems. * Once again, PHP 6 should solve all our problems.
*/ */
function cleanUTF8($str, $force_php = false) { static function cleanUTF8($str, $force_php = false) {
static $non_sgml_chars = array(); static $non_sgml_chars = array();
if (empty($non_sgml_chars)) { if (empty($non_sgml_chars)) {
@@ -271,7 +271,7 @@ class HTMLPurifier_Encoder
// | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes // | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes
// +----------+----------+----------+----------+ // +----------+----------+----------+----------+
function unichr($code) { static function unichr($code) {
if($code > 1114111 or $code < 0 or if($code > 1114111 or $code < 0 or
($code >= 55296 and $code <= 57343) ) { ($code >= 55296 and $code <= 57343) ) {
// bits are set outside the "valid" range as defined // bits are set outside the "valid" range as defined
@@ -312,7 +312,7 @@ class HTMLPurifier_Encoder
* Converts a string to UTF-8 based on configuration. * Converts a string to UTF-8 based on configuration.
* @static * @static
*/ */
function convertToUTF8($str, $config, &$context) { static function convertToUTF8($str, $config, &$context) {
static $iconv = null; static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv'); if ($iconv === null) $iconv = function_exists('iconv');
$encoding = $config->get('Core', 'Encoding'); $encoding = $config->get('Core', 'Encoding');
@@ -331,7 +331,7 @@ class HTMLPurifier_Encoder
* @note Currently, this is a lossy conversion, with unexpressable * @note Currently, this is a lossy conversion, with unexpressable
* characters being omitted. * characters being omitted.
*/ */
function convertFromUTF8($str, $config, &$context) { static function convertFromUTF8($str, $config, &$context) {
static $iconv = null; static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv'); if ($iconv === null) $iconv = function_exists('iconv');
$encoding = $config->get('Core', 'Encoding'); $encoding = $config->get('Core', 'Encoding');
@@ -364,7 +364,7 @@ class HTMLPurifier_Encoder
* @note Sort of with cleanUTF8() but it assumes that $str is * @note Sort of with cleanUTF8() but it assumes that $str is
* well-formed UTF-8 * well-formed UTF-8
*/ */
function convertToASCIIDumbLossless($str) { static function convertToASCIIDumbLossless($str) {
$bytesleft = 0; $bytesleft = 0;
$result = ''; $result = '';
$working = 0; $working = 0;

View File

@@ -29,7 +29,7 @@ class HTMLPurifier_EntityLookup {
* @static * @static
* @param Optional prototype of custom lookup table to overload with. * @param Optional prototype of custom lookup table to overload with.
*/ */
function instance($prototype = false) { static function instance($prototype = false) {
// no references, since PHP doesn't copy unless modified // no references, since PHP doesn't copy unless modified
static $instance = null; static $instance = null;
if ($prototype) { if ($prototype) {

View File

@@ -236,13 +236,26 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
/** /**
* Adds a custom element to your HTML definition * Adds a custom element to your HTML definition
* @note See HTMLPurifier_HTMLModule::addElement for detailed * @note See HTMLPurifier_HTMLModule::addElement for detailed
* parameter descriptions. * parameter and return value descriptions.
*/ */
function addElement($element_name, $type, $contents, $attr_collections, $attributes) { function &addElement($element_name, $type, $contents, $attr_collections, $attributes) {
$module =& $this->getAnonymousModule(); $module =& $this->getAnonymousModule();
// assume that if the user is calling this, the element // assume that if the user is calling this, the element
// is safe. This may not be a good idea // is safe. This may not be a good idea
$module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes); $element =& $module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes);
return $element;
}
/**
* Adds a blank element to your HTML definition, for overriding
* existing behavior
* @note See HTMLPurifier_HTMLModule::addBlankElement for detailed
* parameter and return value descriptions.
*/
function &addBlankElement($element_name) {
$module =& $this->getAnonymousModule();
$element =& $module->addBlankElement($element_name);
return $element;
} }
/** /**

View File

@@ -13,6 +13,8 @@ require_once 'HTMLPurifier/AttrTransform/Length.php';
require_once 'HTMLPurifier/AttrTransform/ImgSpace.php'; require_once 'HTMLPurifier/AttrTransform/ImgSpace.php';
require_once 'HTMLPurifier/AttrTransform/EnumToCSS.php'; require_once 'HTMLPurifier/AttrTransform/EnumToCSS.php';
require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
class HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 extends class HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 extends
HTMLPurifier_HTMLModule_Tidy HTMLPurifier_HTMLModule_Tidy
{ {
@@ -188,5 +190,17 @@ class HTMLPurifier_HTMLModule_Tidy_Strict extends
{ {
var $name = 'Tidy_Strict'; var $name = 'Tidy_Strict';
var $defaultLevel = 'light'; var $defaultLevel = 'light';
function makeFixes() {
$r = parent::makeFixes();
$r['blockquote#content_model_type'] = 'strictblockquote';
return $r;
}
var $defines_child_def = true;
function getChildDef($def) {
if ($def->content_model_type != 'strictblockquote') return parent::getChildDef($def);
return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
}
} }

View File

@@ -1,26 +0,0 @@
<?php
require_once 'HTMLPurifier/HTMLModule/Tidy.php';
require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
class HTMLPurifier_HTMLModule_Tidy_XHTMLStrict extends
HTMLPurifier_HTMLModule_Tidy
{
var $name = 'Tidy_XHTMLStrict';
var $defaultLevel = 'light';
function makeFixes() {
$r = array();
$r['blockquote#content_model_type'] = 'strictblockquote';
return $r;
}
var $defines_child_def = true;
function getChildDef($def) {
if ($def->content_model_type != 'strictblockquote') return false;
return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
}
}

View File

@@ -35,7 +35,6 @@ require_once 'HTMLPurifier/HTMLModule/Object.php';
require_once 'HTMLPurifier/HTMLModule/Tidy.php'; require_once 'HTMLPurifier/HTMLModule/Tidy.php';
require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php'; require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
require_once 'HTMLPurifier/HTMLModule/Tidy/XHTML.php'; require_once 'HTMLPurifier/HTMLModule/Tidy/XHTML.php';
require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php';
require_once 'HTMLPurifier/HTMLModule/Tidy/Proprietary.php'; require_once 'HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
HTMLPurifier_ConfigSchema::define( HTMLPurifier_ConfigSchema::define(
@@ -209,7 +208,7 @@ class HTMLPurifier_HTMLModuleManager
$this->doctypes->register( $this->doctypes->register(
'XHTML 1.0 Strict', true, 'XHTML 1.0 Strict', true,
array_merge($common, $xml, $non_xml), array_merge($common, $xml, $non_xml),
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_XHTMLStrict', 'Tidy_Proprietary'), array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary'),
array(), array(),
'-//W3C//DTD XHTML 1.0 Strict//EN', '-//W3C//DTD XHTML 1.0 Strict//EN',
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
@@ -218,7 +217,7 @@ class HTMLPurifier_HTMLModuleManager
$this->doctypes->register( $this->doctypes->register(
'XHTML 1.1', true, 'XHTML 1.1', true,
array_merge($common, $xml, array('Ruby')), array_merge($common, $xml, array('Ruby')),
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_XHTMLStrict'), // Tidy_XHTML1_1 array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict'), // Tidy_XHTML1_1
array(), array(),
'-//W3C//DTD XHTML 1.1//EN', '-//W3C//DTD XHTML 1.1//EN',
'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'

View File

@@ -1,11 +1,15 @@
<?php <?php
HTMLPurifier_ConfigSchema::define(
'Attr', 'IDBlacklist', array(), 'list',
'Array of IDs not allowed in the document.'
);
/** /**
* Component of HTMLPurifier_AttrContext that accumulates IDs to prevent dupes * Component of HTMLPurifier_AttrContext that accumulates IDs to prevent dupes
* @note In Slashdot-speak, dupe means duplicate. * @note In Slashdot-speak, dupe means duplicate.
* @note This class does not accept $config or $context, thus, it is the * @note The default constructor does not accept $config or $context objects:
* burden of the callee to register the appropriate errors or * use must use the static build() factory method to perform initialization.
* configuration.
*/ */
class HTMLPurifier_IDAccumulator class HTMLPurifier_IDAccumulator
{ {
@@ -16,6 +20,19 @@ class HTMLPurifier_IDAccumulator
*/ */
var $ids = array(); var $ids = array();
/**
* Builds an IDAccumulator, also initializing the default blacklist
* @param $config Instance of HTMLPurifier_Config
* @param $context Instance of HTMLPurifier_Context
* @return Fully initialized HTMLPurifier_IDAccumulator
* @static
*/
static function build($config, &$context) {
$id_accumulator = new HTMLPurifier_IDAccumulator();
$id_accumulator->load($config->get('Attr', 'IDBlacklist'));
return $id_accumulator;
}
/** /**
* Add an ID to the lookup table. * Add an ID to the lookup table.
* @param $id ID to be added. * @param $id ID to be added.

View File

@@ -4,6 +4,9 @@
* Injects tokens into the document while parsing for well-formedness. * Injects tokens into the document while parsing for well-formedness.
* This enables "formatter-like" functionality such as auto-paragraphing, * This enables "formatter-like" functionality such as auto-paragraphing,
* smiley-ification and linkification to take place. * smiley-ification and linkification to take place.
*
* @todo Allow injectors to request a re-run on their output. This
* would help if an operation is recursive.
*/ */
class HTMLPurifier_Injector class HTMLPurifier_Injector
{ {
@@ -107,5 +110,12 @@ class HTMLPurifier_Injector
*/ */
function handleElement(&$token) {} function handleElement(&$token) {}
/**
* Notifier that is called when an end token is processed
* @note This differs from handlers in that the token is read-only
*/
function notifyEnd($token) {}
} }

View File

@@ -6,20 +6,28 @@ HTMLPurifier_ConfigSchema::define(
'AutoFormat', 'AutoParagraph', false, 'bool', ' 'AutoFormat', 'AutoParagraph', false, 'bool', '
<p> <p>
This directive turns on auto-paragraphing, where double newlines are This directive turns on auto-paragraphing, where double newlines are
converted in to paragraphs whenever possible. Auto-paragraphing converted in to paragraphs whenever possible. Auto-paragraphing:
applies when:
</p> </p>
<ul> <ul>
<li>There are inline elements or text in the root node</li> <li>Always applies to inline elements or text in the root node,</li>
<li>There are inline elements or text with double newlines or <li>Applies to inline elements or text with double newlines in nodes
block elements in nodes that allow paragraph tags</li> that allow paragraph tags,</li>
<li>There are double newlines in paragraph tags</li> <li>Applies to double newlines in paragraph tags</li>
</ul> </ul>
<p> <p>
<code>p</code> tags must be allowed for this directive to take effect. <code>p</code> tags must be allowed for this directive to take effect.
We do not use <code>br</code> tags for paragraphing, as that is We do not use <code>br</code> tags for paragraphing, as that is
semantically incorrect. semantically incorrect.
</p> </p>
<p>
To prevent auto-paragraphing as a content-producer, refrain from using
double-newlines except to specify a new paragraph or in contexts where
it has special meaning (whitespace usually has no meaning except in
tags like <code>pre</code>, so this should not be difficult.) To prevent
the paragraphing of inline text adjacent to block elements, wrap them
in <code>div</code> tags (the behavior is slightly different outside of
the root node.)
</p>
<p> <p>
This directive has been available since 2.0.1. This directive has been available since 2.0.1.
</p> </p>
@@ -62,19 +70,27 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
$ok = false; $ok = false;
// test if up-coming tokens are either block or have // test if up-coming tokens are either block or have
// a double newline in them // a double newline in them
$nesting = 0;
for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) { for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) {
if ($this->inputTokens[$i]->type == 'start'){ if ($this->inputTokens[$i]->type == 'start'){
if (!$this->_isInline($this->inputTokens[$i])) { if (!$this->_isInline($this->inputTokens[$i])) {
$ok = true; // we haven't found a double-newline, and
// we've hit a block element, so don't paragraph
$ok = false;
break;
} }
break; $nesting++;
}
if ($this->inputTokens[$i]->type == 'end') {
if ($nesting <= 0) break;
$nesting--;
} }
if ($this->inputTokens[$i]->type == 'end') break;
if ($this->inputTokens[$i]->type == 'text') { if ($this->inputTokens[$i]->type == 'text') {
// found it!
if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) { if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) {
$ok = true; $ok = true;
break;
} }
if (!$this->inputTokens[$i]->is_whitespace) break;
} }
} }
if ($ok) { if ($ok) {

View File

@@ -113,7 +113,7 @@ class HTMLPurifier_Language
$generator = false; $generator = false;
foreach ($args as $i => $value) { foreach ($args as $i => $value) {
if (is_object($value)) { if (is_object($value)) {
if (is_a($value, 'HTMLPurifier_Token')) { if ($value instanceof HTMLPurifier_Token) {
// factor this out some time // factor this out some time
if (!$generator) $generator = $this->context->get('Generator'); if (!$generator) $generator = $this->context->get('Generator');
if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name; if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name;

View File

@@ -65,7 +65,7 @@ class HTMLPurifier_LanguageFactory
* @param $prototype Optional prototype to overload sole instance with, * @param $prototype Optional prototype to overload sole instance with,
* or bool true to reset to default factory. * or bool true to reset to default factory.
*/ */
function &instance($prototype = null) { static function &instance($prototype = null) {
static $instance = null; static $instance = null;
if ($prototype !== null) { if ($prototype !== null) {
$instance = $prototype; $instance = $prototype;

View File

@@ -13,11 +13,14 @@ if (version_compare(PHP_VERSION, "5", ">=")) {
} }
HTMLPurifier_ConfigSchema::define( HTMLPurifier_ConfigSchema::define(
'Core', 'AcceptFullDocuments', true, 'bool', 'Core', 'ConvertDocumentToFragment', true, 'bool', '
'This parameter determines whether or not the filter should accept full '. This parameter determines whether or not the filter should convert
'HTML documents, not just HTML fragments. When on, it will '. input that is a full document with html and body tags to a fragment
'drop all sections except the content between body.' of just the contents of a body tag. This parameter is simply something
); HTML Purifier can do during an edge-case: for most inputs, this
processing is not necessary.
');
HTMLPurifier_ConfigSchema::defineAlias('Core', 'AcceptFullDocuments', 'Core', 'ConvertDocumentToFragment');
HTMLPurifier_ConfigSchema::define( HTMLPurifier_ConfigSchema::define(
'Core', 'LexerImpl', null, 'mixed/null', ' 'Core', 'LexerImpl', null, 'mixed/null', '
@@ -145,9 +148,9 @@ class HTMLPurifier_Lexer
* @param $prototype Optional prototype lexer or configuration object * @param $prototype Optional prototype lexer or configuration object
* @return Concrete lexer. * @return Concrete lexer.
*/ */
function create($config) { static function create($config) {
if (!is_a($config, 'HTMLPurifier_Config')) { if (!($config instanceof HTMLPurifier_Config)) {
$lexer = $config; $lexer = $config;
trigger_error("Passing a prototype to trigger_error("Passing a prototype to
HTMLPurifier_Lexer::create() is deprecated, please instead HTMLPurifier_Lexer::create() is deprecated, please instead
@@ -275,7 +278,7 @@ class HTMLPurifier_Lexer
* @param $string HTML string to process. * @param $string HTML string to process.
* @returns HTML with CDATA sections escaped. * @returns HTML with CDATA sections escaped.
*/ */
function escapeCDATA($string) { static function escapeCDATA($string) {
return preg_replace_callback( return preg_replace_callback(
'/<!\[CDATA\[(.+?)\]\]>/s', '/<!\[CDATA\[(.+?)\]\]>/s',
array('HTMLPurifier_Lexer', 'CDATACallback'), array('HTMLPurifier_Lexer', 'CDATACallback'),
@@ -304,7 +307,7 @@ class HTMLPurifier_Lexer
* and 1 the inside of the CDATA section. * and 1 the inside of the CDATA section.
* @returns Escaped internals of the CDATA section. * @returns Escaped internals of the CDATA section.
*/ */
function CDATACallback($matches) { static function CDATACallback($matches) {
// not exactly sure why the character set is needed, but whatever // not exactly sure why the character set is needed, but whatever
return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8'); return htmlspecialchars($matches[1], ENT_COMPAT, 'UTF-8');
} }
@@ -316,7 +319,7 @@ class HTMLPurifier_Lexer
function normalize($html, $config, &$context) { function normalize($html, $config, &$context) {
// extract body from document if applicable // extract body from document if applicable
if ($config->get('Core', 'AcceptFullDocuments')) { if ($config->get('Core', 'ConvertDocumentToFragment')) {
$html = $this->extractBody($html); $html = $this->extractBody($html);
} }

View File

@@ -158,7 +158,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
* Callback function for undoing escaping of stray angled brackets * Callback function for undoing escaping of stray angled brackets
* in comments * in comments
*/ */
function callbackUndoCommentSubst($matches) { static public function callbackUndoCommentSubst($matches) {
return '<!--' . strtr($matches[1], array('&amp;'=>'&','&lt;'=>'<')) . $matches[2]; return '<!--' . strtr($matches[1], array('&amp;'=>'&','&lt;'=>'<')) . $matches[2];
} }
@@ -166,7 +166,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
* Callback function that entity-izes ampersands in comments so that * Callback function that entity-izes ampersands in comments so that
* callbackUndoCommentSubst doesn't clobber them * callbackUndoCommentSubst doesn't clobber them
*/ */
function callbackArmorCommentEntities($matches) { static public function callbackArmorCommentEntities($matches) {
return '<!--' . str_replace('&', '&amp;', $matches[1]) . $matches[2]; return '<!--' . str_replace('&', '&amp;', $matches[1]) . $matches[2];
} }

View File

@@ -40,7 +40,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
* @param $matches, in form of array(opening tag, contents, closing tag) * @param $matches, in form of array(opening tag, contents, closing tag)
* @static * @static
*/ */
function scriptCallback($matches) { static function scriptCallback($matches) {
return $matches[1] . htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8') . $matches[3]; return $matches[1] . htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8') . $matches[3];
} }
@@ -160,9 +160,15 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
$segment = substr($html, $cursor, $strlen_segment); $segment = substr($html, $cursor, $strlen_segment);
if ($segment === false) {
// somehow, we attempted to access beyond the end of
// the string, defense-in-depth, reported by Nate Abele
break;
}
// Check if it's a comment // Check if it's a comment
if ( if (
substr($segment, 0, 3) == '!--' substr($segment, 0, 3) === '!--'
) { ) {
// re-determine segment length, looking for --> // re-determine segment length, looking for -->
$position_comment_end = strpos($html, '-->', $cursor); $position_comment_end = strpos($html, '-->', $cursor);
@@ -237,7 +243,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
// trailing slash. Remember, we could have a tag like <br>, so // trailing slash. Remember, we could have a tag like <br>, so
// any later token processing scripts must convert improperly // any later token processing scripts must convert improperly
// classified EmptyTags from StartTags. // classified EmptyTags from StartTags.
$is_self_closing= (strrpos($segment,'/') === $strlen_segment-1); $is_self_closing = (strrpos($segment,'/') === $strlen_segment-1);
if ($is_self_closing) { if ($is_self_closing) {
$strlen_segment--; $strlen_segment--;
$segment = substr($segment, 0, $strlen_segment); $segment = substr($segment, 0, $strlen_segment);

View File

@@ -26,8 +26,6 @@ class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex {
} }
// begin PHP5P source code here
/* /*
Copyright 2007 Jeroen van der Meer <http://jero.net/> Copyright 2007 Jeroen van der Meer <http://jero.net/>
@@ -3722,7 +3720,7 @@ class HTML5TreeConstructer {
} }
} }
private function generateImpliedEndTags(array $exclude = array()) { private function generateImpliedEndTags($exclude = array()) {
/* When the steps below require the UA to generate implied end tags, /* When the steps below require the UA to generate implied end tags,
then, if the current node is a dd element, a dt element, an li element, then, if the current node is a dd element, a dt element, an li element,
a p element, a td element, a th element, or a tr element, the UA must a p element, a td element, a th element, or a tr element, the UA must
@@ -3736,7 +3734,8 @@ class HTML5TreeConstructer {
} }
} }
private function getElementCategory($name) { private function getElementCategory($node) {
$name = $node->tagName;
if(in_array($name, $this->special)) if(in_array($name, $this->special))
return self::SPECIAL; return self::SPECIAL;
@@ -3884,3 +3883,4 @@ class HTML5TreeConstructer {
return $this->dom; return $this->dom;
} }
} }
?>

View File

@@ -195,7 +195,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
//################################################################// //################################################################//
// Process result by interpreting $result // Process result by interpreting $result
if ($result === true) { if ($result === true || $child_tokens === $result) {
// leave the node as is // leave the node as is
// register start token as a parental node start // register start token as a parental node start

View File

@@ -36,28 +36,23 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$definition = $config->getHTMLDefinition(); $definition = $config->getHTMLDefinition();
// CurrentNesting // local variables
$this->currentNesting = array();
$context->register('CurrentNesting', $this->currentNesting);
// InputIndex
$this->inputIndex = false;
$context->register('InputIndex', $this->inputIndex);
// InputTokens
$context->register('InputTokens', $tokens);
$this->inputTokens =& $tokens;
// OutputTokens
$result = array(); $result = array();
$this->outputTokens =& $result;
// %Core.EscapeInvalidTags
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
$generator = new HTMLPurifier_Generator(); $generator = new HTMLPurifier_Generator();
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
$e =& $context->get('ErrorCollector', true); $e =& $context->get('ErrorCollector', true);
// member variables
$this->currentNesting = array();
$this->inputIndex = false;
$this->inputTokens =& $tokens;
$this->outputTokens =& $result;
// context variables
$context->register('CurrentNesting', $this->currentNesting);
$context->register('InputIndex', $this->inputIndex);
$context->register('InputTokens', $tokens);
// -- begin INJECTOR -- // -- begin INJECTOR --
$this->injectors = array(); $this->injectors = array();
@@ -95,6 +90,10 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
trigger_error("Cannot enable $name injector because $error is not allowed", E_USER_WARNING); trigger_error("Cannot enable $name injector because $error is not allowed", E_USER_WARNING);
} }
// warning: most foreach loops follow the convention $i => $x.
// be sure, for PHP4 compatibility, to only perform write operations
// directly referencing the object using $i: $x is only safe for reads
// -- end INJECTOR -- // -- end INJECTOR --
$token = false; $token = false;
@@ -105,6 +104,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// if all goes well, this token will be passed through unharmed // if all goes well, this token will be passed through unharmed
$token = $tokens[$this->inputIndex]; $token = $tokens[$this->inputIndex];
//printTokens($tokens, $this->inputIndex);
foreach ($this->injectors as $i => $x) { foreach ($this->injectors as $i => $x) {
if ($x->skip > 0) $this->injectors[$i]->skip--; if ($x->skip > 0) $this->injectors[$i]->skip--;
} }
@@ -114,7 +115,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
if ($token->type === 'text') { if ($token->type === 'text') {
// injector handler code; duplicated for performance reasons // injector handler code; duplicated for performance reasons
foreach ($this->injectors as $i => $x) { foreach ($this->injectors as $i => $x) {
if (!$x->skip) $x->handleText($token); if (!$x->skip) $this->injectors[$i]->handleText($token);
if (is_array($token)) { if (is_array($token)) {
$this->currentInjector = $i; $this->currentInjector = $i;
break; break;
@@ -172,7 +173,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// injector handler code; duplicated for performance reasons // injector handler code; duplicated for performance reasons
if ($ok) { if ($ok) {
foreach ($this->injectors as $i => $x) { foreach ($this->injectors as $i => $x) {
if (!$x->skip) $x->handleElement($token); if (!$x->skip) $this->injectors[$i]->handleElement($token);
if (is_array($token)) { if (is_array($token)) {
$this->currentInjector = $i; $this->currentInjector = $i;
break; break;
@@ -202,6 +203,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$current_parent = array_pop($this->currentNesting); $current_parent = array_pop($this->currentNesting);
if ($current_parent->name == $token->name) { if ($current_parent->name == $token->name) {
$result[] = $token; $result[] = $token;
foreach ($this->injectors as $i => $x) {
$this->injectors[$i]->notifyEnd($token);
}
continue; continue;
} }
@@ -238,16 +242,16 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// okay, we found it, close all the skipped tags // okay, we found it, close all the skipped tags
// note that skipped tags contains the element we need closed // note that skipped tags contains the element we need closed
$size = count($skipped_tags); for ($i = count($skipped_tags) - 1; $i >= 0; $i--) {
for ($i = $size - 1; $i > 0; $i--) { if ($i && $e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
if ($e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]); $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
} }
$result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name); $result[] = $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
foreach ($this->injectors as $j => $x) { // $j, not $i!!!
$this->injectors[$j]->notifyEnd($new_token);
}
} }
$result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
} }
$context->destroy('CurrentNesting'); $context->destroy('CurrentNesting');
@@ -255,17 +259,18 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$context->destroy('InputIndex'); $context->destroy('InputIndex');
$context->destroy('CurrentToken'); $context->destroy('CurrentToken');
// we're at the end now, fix all still unclosed tags // we're at the end now, fix all still unclosed tags (this is
// not using processToken() because at this point we don't // duplicated from the end of the loop with some slight modifications)
// care about current nesting // not using $skipped_tags since it would invariably be all of them
if (!empty($this->currentNesting)) { if (!empty($this->currentNesting)) {
$size = count($this->currentNesting); for ($i = count($this->currentNesting) - 1; $i >= 0; $i--) {
for ($i = $size - 1; $i >= 0; $i--) {
if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) { if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]); $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
} }
$result[] = $result[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
new HTMLPurifier_Token_End($this->currentNesting[$i]->name); foreach ($this->injectors as $j => $x) { // $j, not $i!!!
$this->injectors[$j]->notifyEnd($new_token);
}
} }
} }
@@ -286,8 +291,14 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// adjust the injector skips based on the array substitution // adjust the injector skips based on the array substitution
if ($this->injectors) { if ($this->injectors) {
$offset = count($token) + 1; $offset = count($token);
for ($i = 0; $i <= $this->currentInjector; $i++) { for ($i = 0; $i <= $this->currentInjector; $i++) {
// because of the skip back, we need to add one more
// for uninitialized injectors. I'm not exactly
// sure why this is the case, but I think it has to
// do with the fact that we're decrementing skips
// before re-checking text
if (!$this->injectors[$i]->skip) $this->injectors[$i]->skip++;
$this->injectors[$i]->skip += $offset; $this->injectors[$i]->skip += $offset;
} }
} }

View File

@@ -116,6 +116,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
// mostly everything's good, but // mostly everything's good, but
// we need to make sure required attributes are in order // we need to make sure required attributes are in order
if ( if (
($token->type === 'start' || $token->type === 'empty') &&
$definition->info[$token->name]->required_attr && $definition->info[$token->name]->required_attr &&
($token->name != 'img' || $remove_invalid_img) // ensure config option still works ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
) { ) {
@@ -134,7 +135,6 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
$token->armor['ValidateAttributes'] = true; $token->armor['ValidateAttributes'] = true;
} }
// CAN BE GENERICIZED
if (isset($hidden_elements[$token->name]) && $token->type == 'start') { if (isset($hidden_elements[$token->name]) && $token->type == 'start') {
$textify_comments = $token->name; $textify_comments = $token->name;
} elseif ($token->name === $textify_comments && $token->type == 'end') { } elseif ($token->name === $textify_comments && $token->type == 'end') {

View File

@@ -6,10 +6,6 @@ require_once 'HTMLPurifier/IDAccumulator.php';
require_once 'HTMLPurifier/AttrValidator.php'; require_once 'HTMLPurifier/AttrValidator.php';
HTMLPurifier_ConfigSchema::define(
'Attr', 'IDBlacklist', array(), 'list',
'Array of IDs not allowed in the document.');
/** /**
* Validate all attributes in the tokens. * Validate all attributes in the tokens.
*/ */
@@ -19,11 +15,6 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
function execute($tokens, $config, &$context) { function execute($tokens, $config, &$context) {
// setup id_accumulator context
$id_accumulator = new HTMLPurifier_IDAccumulator();
$id_accumulator->load($config->get('Attr', 'IDBlacklist'));
$context->register('IDAccumulator', $id_accumulator);
// setup validator // setup validator
$validator = new HTMLPurifier_AttrValidator(); $validator = new HTMLPurifier_AttrValidator();
@@ -44,8 +35,6 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
$tokens[$key] = $token; // for PHP 4 $tokens[$key] = $token; // for PHP 4
} }
$context->destroy('IDAccumulator');
$context->destroy('CurrentToken'); $context->destroy('CurrentToken');
return $tokens; return $tokens;

View File

@@ -1,10 +1,22 @@
<?php <?php
/** /**
* Chainable filters for custom URI processing * Chainable filters for custom URI processing.
*
* These filters can perform custom actions on a URI filter object,
* including transformation or blacklisting.
*
* @warning This filter is called before scheme object validation occurs.
* Make sure, if you require a specific scheme object, you
* you check that it exists. This allows filters to convert
* proprietary URI schemes into regular ones.
*/ */
class HTMLPurifier_URIFilter class HTMLPurifier_URIFilter
{ {
/**
* Unique identifier of filter
*/
var $name; var $name;
/** /**
@@ -17,8 +29,12 @@ class HTMLPurifier_URIFilter
* @param &$uri Reference to URI object * @param &$uri Reference to URI object
* @param $config Instance of HTMLPurifier_Config * @param $config Instance of HTMLPurifier_Config
* @param &$context Instance of HTMLPurifier_Context * @param &$context Instance of HTMLPurifier_Context
* @return bool Whether or not to continue processing: false indicates
* URL is no good, true indicates continue processing. Note that
* all changes are committed directly on the URI object
*/ */
function filter(&$uri, $config, &$context) { function filter(&$uri, $config, &$context) {
trigger_error('Cannot call abstract function', E_USER_ERROR); trigger_error('Cannot call abstract function', E_USER_ERROR);
} }
} }

View File

@@ -47,6 +47,10 @@ class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter
// absolute URI already: don't change // absolute URI already: don't change
if (!is_null($uri->host)) return true; if (!is_null($uri->host)) return true;
$scheme_obj = $uri->getSchemeObj($config, $context); $scheme_obj = $uri->getSchemeObj($config, $context);
if (!$scheme_obj) {
// scheme not recognized
return false;
}
if (!$scheme_obj->hierarchical) { if (!$scheme_obj->hierarchical) {
// non-hierarchal URI with explicit scheme, don't change // non-hierarchal URI with explicit scheme, don't change
return true; return true;

View File

@@ -44,7 +44,7 @@ class HTMLPurifier_URISchemeRegistry
* @note Pass a registry object $prototype with a compatible interface and * @note Pass a registry object $prototype with a compatible interface and
* the function will copy it and return it all further times. * the function will copy it and return it all further times.
*/ */
function &instance($prototype = null) { static function &instance($prototype = null) {
static $instance = null; static $instance = null;
if ($prototype !== null) { if ($prototype !== null) {
$instance = $prototype; $instance = $prototype;

View File

@@ -1,5 +1,5 @@
--- old.php 2007-08-19 14:42:33.640625000 -0400 --- C:\Users\Edward\Webs\htmlpurifier\maintenance\PH5P.php 2007-11-04 23:41:49.074543700 -0500
+++ new.php 2007-08-19 14:41:51.609375000 -0400 +++ C:\Users\Edward\Webs\htmlpurifier\maintenance/PH5P.new.php 2007-11-05 00:23:52.839543700 -0500
@@ -211,7 +211,10 @@ @@ -211,7 +211,10 @@
// If nothing is returned, emit a U+0026 AMPERSAND character token. // If nothing is returned, emit a U+0026 AMPERSAND character token.
// Otherwise, emit the character token that was returned. // Otherwise, emit the character token that was returned.
@@ -43,3 +43,22 @@
$entity = $id; $entity = $id;
break; break;
} }
@@ -3659,7 +3668,7 @@
}
}
- private function generateImpliedEndTags(array $exclude = array()) {
+ private function generateImpliedEndTags($exclude = array()) {
/* When the steps below require the UA to generate implied end tags,
then, if the current node is a dd element, a dt element, an li element,
a p element, a td element, a th element, or a tr element, the UA must
@@ -3673,7 +3682,8 @@
}
}
- private function getElementCategory($name) {
+ private function getElementCategory($node) {
+ $name = $node->tagName;
if(in_array($name, $this->special))
return self::SPECIAL;

3824
maintenance/PH5P.php Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -32,5 +32,5 @@ foreach ($names as $name) {
$cache->flush($config); $cache->flush($config);
} }
echo 'Cache flushed successfully.'; echo "Cache flushed successfully.\n";

View File

@@ -0,0 +1,13 @@
<?php
$orig = realpath(dirname(__FILE__) . '/PH5P.php');
$new = realpath(dirname(__FILE__) . '/../library/HTMLPurifier/Lexer/PH5P.php');
$newt = dirname(__FILE__) . '/PH5P.new.php'; // temporary file
// minor text-processing of new file to get into same format as original
$new_src = file_get_contents($new);
$new_src = '<?php' . PHP_EOL . substr($new_src, strpos($new_src, 'class HTML5 {'));
file_put_contents($newt, $new_src);
shell_exec("diff -u \"$orig\" \"$newt\" > PH5P.patch");
unlink($newt);

View File

@@ -261,12 +261,42 @@ function phorum_htmlpurifier_editor_after_subject() {
// don't show this message if it's a WYSIWYG editor, since it will // don't show this message if it's a WYSIWYG editor, since it will
// then be handled automatically // then be handled automatically
if (!empty($GLOBALS['PHORUM']['mod_htmlpurifier']['wysiwyg'])) return; if (!empty($GLOBALS['PHORUM']['mod_htmlpurifier']['wysiwyg'])) return;
?><tr><td colspan="2" style="padding:1em 0.3em;"> ?><tr><td colspan="2" style="padding:1em 0.3em;" class="htmlpurifier-help">
HTML input is <strong>on</strong>. Make sure you escape all HTML and <p>
angled-brackets with &amp;lt; and &amp;gt; (you can also use CDATA <strong>HTML input</strong> is enabled. Make sure you escape all HTML and
tags, simply wrap the suspect text with angled brackets with <code>&amp;lt;</code> and <code>&amp;gt;</code>.
&lt;![CDATA[<em>text</em>]]&gt;. Paragraphs will only be applied to </p><?php
double-spaces; single-spaces will not generate <tt>&lt;br&gt;</tt> tags. $purifier =& HTMLPurifier::getInstance();
$config = $purifier->config;
if ($config->get('AutoFormat', 'AutoParagraph')) {
?><p>
<strong>Auto-paragraphing</strong> is enabled. Double
newlines will be converted to paragraphs; for single
newlines, use the <code>pre</code> tag.
</p><?php
}
$html_definition = $config->getDefinition('HTML');
$allowed = array();
foreach ($html_definition->info as $name => $x) $allowed[] = "<code>$name</code>";
sort($allowed);
$allowed_text = implode(', ', $allowed);
?><p><strong>Allowed tags:</strong> <?php
echo $allowed_text;
?>.</p><?php
?>
</p>
<p>
For inputting literal code such as HTML and PHP for display, use
CDATA tags to auto-escape your angled brackets, and <code>pre</code>
to preserve newlines:
</p>
<pre>&lt;pre&gt;&lt;![CDATA[
<em>Place code here</em>
]]&gt;&lt;/pre&gt;</pre>
<p>
Power users, you can hide this notice with:
<pre>.htmlpurifier-help {display:none;}</pre>
</p>
</td></tr><?php </td></tr><?php
} }

View File

@@ -20,8 +20,10 @@ function phorum_htmlpurifier_migrate_sigs_check() {
function phorum_htmlpurifier_migrate_sigs($offset) { function phorum_htmlpurifier_migrate_sigs($offset) {
global $PHORUM; global $PHORUM;
if(!$offset) return; // bail out quick of $offset == 0 if(!$offset) return; // bail out quick if $offset == 0
// theoretically, we could get rid of this multi-request
// doo-hickery if safe mode is off
@set_time_limit(0); // attempt to let this run @set_time_limit(0); // attempt to let this run
$increment = $PHORUM['mod_htmlpurifier']['migrate-sigs-increment']; $increment = $PHORUM['mod_htmlpurifier']['migrate-sigs-increment'];
@@ -52,21 +54,19 @@ function phorum_htmlpurifier_migrate_sigs($offset) {
// query for highest ID in database // query for highest ID in database
$type = $PHORUM['DBCONFIG']['type']; $type = $PHORUM['DBCONFIG']['type'];
$sql = "select MAX(user_id) from {$PHORUM['user_table']}";
if ($type == 'mysql') { if ($type == 'mysql') {
$conn = phorum_db_mysql_connect(); $conn = phorum_db_mysql_connect();
$sql = "select MAX(user_id) from {$PHORUM['user_table']}";
$res = mysql_query($sql, $conn); $res = mysql_query($sql, $conn);
$row = mysql_fetch_row($res); $row = mysql_fetch_row($res);
$top_id = (int) $row[0];
} elseif ($type == 'mysqli') { } elseif ($type == 'mysqli') {
$conn = phorum_db_mysqli_connect(); $conn = phorum_db_mysqli_connect();
$sql = "select MAX(user_id) from {$PHORUM['user_table']}";
$res = mysqli_query($conn, $sql); $res = mysqli_query($conn, $sql);
$row = mysqli_fetch_row($res); $row = mysqli_fetch_row($res);
$top_id = (int) $row[0];
} else { } else {
exit('Unrecognized database!'); exit('Unrecognized database!');
} }
$top_id = (int) $row[0];
$offset += $increment; $offset += $increment;
if ($offset > $top_id) { // test for end condition if ($offset > $top_id) { // test for end condition

View File

@@ -7,7 +7,7 @@ if (!isset($_GET['standalone'])) {
} else { } else {
require_once '../library/HTMLPurifier.standalone.php'; require_once '../library/HTMLPurifier.standalone.php';
} }
error_reporting(E_ALL); error_reporting(E_ALL | E_STRICT);
function escapeHTML($string) { function escapeHTML($string) {
$string = HTMLPurifier_Encoder::cleanUTF8($string); $string = HTMLPurifier_Encoder::cleanUTF8($string);

View File

@@ -86,7 +86,7 @@ class Debugger
/** /**
* @static * @static
*/ */
function &instance() { static function &instance() {
static $soleInstance = false; static $soleInstance = false;
if (!$soleInstance) $soleInstance = new Debugger(); if (!$soleInstance) $soleInstance = new Debugger();
return $soleInstance; return $soleInstance;

View File

@@ -2,11 +2,12 @@
require_once 'HTMLPurifier/AttrCollections.php'; require_once 'HTMLPurifier/AttrCollections.php';
Mock::generatePartial( class HTMLPurifier_AttrCollectionsTest_NoConstructor extends HTMLPurifier_AttrCollections
'HTMLPurifier_AttrCollections', {
'HTMLPurifier_AttrCollections_TestForConstruct', function HTMLPurifier_AttrCollectionsTest_NoConstructor() {}
array('performInclusions', 'expandIdentifiers') function expandIdentifiers(&$a, $b) {}
); function performInclusions(&$a) {}
}
class HTMLPurifier_AttrCollectionsTest extends HTMLPurifier_Harness class HTMLPurifier_AttrCollectionsTest extends HTMLPurifier_Harness
{ {
@@ -15,7 +16,7 @@ class HTMLPurifier_AttrCollectionsTest extends HTMLPurifier_Harness
generate_mock_once('HTMLPurifier_AttrTypes'); generate_mock_once('HTMLPurifier_AttrTypes');
$collections = new HTMLPurifier_AttrCollections_TestForConstruct(); $collections = new HTMLPurifier_AttrCollectionsTest_NoConstructor();
$types = new HTMLPurifier_AttrTypesMock(); $types = new HTMLPurifier_AttrTypesMock();

View File

@@ -19,5 +19,9 @@ class HTMLPurifier_ChildDef_OptionalTest extends HTMLPurifier_ChildDefHarness
$this->assertResult('Not allowed text', ''); $this->assertResult('Not allowed text', '');
} }
function testEmpty() {
$this->assertResult('');
}
} }

View File

@@ -74,10 +74,11 @@ extends HTMLPurifier_ChildDefHarness
} }
function testError() { function testError() {
$this->expectError('Cannot use non-block element as block wrapper'); // $this->expectError('Cannot use non-block element as block wrapper');
$this->obj = new HTMLPurifier_ChildDef_StrictBlockquote('div | p'); $this->obj = new HTMLPurifier_ChildDef_StrictBlockquote('div | p');
$this->config->set('HTML', 'BlockWrapper', 'dav'); $this->config->set('HTML', 'BlockWrapper', 'dav');
$this->assertResult('Needs wrap', '<p>Needs wrap</p>'); $this->assertResult('Needs wrap', '<p>Needs wrap</p>');
$this->swallowErrors();
} }
} }

View File

@@ -25,13 +25,22 @@ class HTMLPurifier_ErrorCollectorEMock extends HTMLPurifier_ErrorCollectorMock
$this->_expected_context_at[$step][$key] = $value; $this->_expected_context_at[$step][$key] = $value;
} }
function send() { function send($severity, $msg) {
// test for context // test for context
$test = &$this->_getCurrentTestCase(); $context =& SimpleTest::getContext();
$test =& $context->getTest();
// compat
if (empty($this->_mock)) {
$mock =& $this;
} else {
$mock =& $this->_mock;
}
foreach ($this->_expected_context as $key => $value) { foreach ($this->_expected_context as $key => $value) {
$test->assertEqual($value, $this->_context->get($key)); $test->assertEqual($value, $this->_context->get($key));
} }
$step = $this->getCallCount('send'); $step = $mock->getCallCount('send');
if (isset($this->_expected_context_at[$step])) { if (isset($this->_expected_context_at[$step])) {
foreach ($this->_expected_context_at[$step] as $key => $value) { foreach ($this->_expected_context_at[$step] as $key => $value) {
$test->assertEqual($value, $this->_context->get($key)); $test->assertEqual($value, $this->_context->get($key));
@@ -39,7 +48,7 @@ class HTMLPurifier_ErrorCollectorEMock extends HTMLPurifier_ErrorCollectorMock
} }
// boilerplate mock code, does not have return value or references // boilerplate mock code, does not have return value or references
$args = func_get_args(); $args = func_get_args();
$this->_invoke('send', $args); $mock->_invoke('send', $args);
} }
} }

View File

@@ -3,11 +3,15 @@
require_once 'HTMLPurifier/ErrorCollectorEMock.php'; require_once 'HTMLPurifier/ErrorCollectorEMock.php';
require_once 'HTMLPurifier/Lexer/DirectLex.php'; require_once 'HTMLPurifier/Lexer/DirectLex.php';
/**
* @todo Make the callCount variable actually work, so we can precisely
* specify what errors we want: no more, no less
*/
class HTMLPurifier_ErrorsHarness extends HTMLPurifier_Harness class HTMLPurifier_ErrorsHarness extends HTMLPurifier_Harness
{ {
var $config, $context; var $config, $context;
var $collector, $generator; var $collector, $generator, $callCount;
function setup() { function setup() {
$this->config = HTMLPurifier_Config::create(array('Core.CollectErrors' => true)); $this->config = HTMLPurifier_Config::create(array('Core.CollectErrors' => true));
@@ -16,6 +20,11 @@ class HTMLPurifier_ErrorsHarness extends HTMLPurifier_Harness
$this->collector = new HTMLPurifier_ErrorCollectorEMock(); $this->collector = new HTMLPurifier_ErrorCollectorEMock();
$this->collector->prepare($this->context); $this->collector->prepare($this->context);
$this->context->register('ErrorCollector', $this->collector); $this->context->register('ErrorCollector', $this->collector);
$this->callCount = 0;
}
function expectNoErrorCollection() {
$this->collector->expectNever('send');
} }
function expectErrorCollection() { function expectErrorCollection() {

View File

@@ -30,5 +30,11 @@ class HTMLPurifier_IDAccumulatorTest extends HTMLPurifier_Harness
} }
function testBuild() {
$this->config->set('Attr', 'IDBlacklist', array('foo'));
$accumulator = HTMLPurifier_IDAccumulator::build($this->config, $this->context);
$this->assertTrue( isset($accumulator->ids['foo']) );
}
} }

View File

@@ -194,10 +194,7 @@ Bar</p></div>',
} }
function testNoParagraphSingleInlineNodeInBlockNode() { function testNoParagraphSingleInlineNodeInBlockNode() {
$this->assertResult( $this->assertResult( '<div><b>Foo</b></div>' );
'<div><b>Foo</b></div>',
'<div><b>Foo</b></div>'
);
} }
function testParagraphInBlockquote() { function testParagraphInBlockquote() {
@@ -277,9 +274,7 @@ Par1
function testBlockNodeTextDelimeterWithoutDoublespaceInBlockNode() { function testBlockNodeTextDelimeterWithoutDoublespaceInBlockNode() {
$this->assertResult( $this->assertResult(
'<div>Par1 '<div>Par1
<div>Par2</div></div>', <div>Par2</div></div>'
'<div><p>Par1
</p><div>Par2</div></div>'
); );
} }
@@ -351,6 +346,30 @@ Par2'
); );
} }
function testInlineAndBlockTagInDivNoParagraph() {
$this->assertResult(
'<div><code>bar</code> mmm <pre>asdf</pre></div>'
);
}
function testInlineAndBlockTagInDivNeedingParagraph() {
$this->assertResult(
'<div><code>bar</code> mmm
<pre>asdf</pre></div>',
'<div><p><code>bar</code> mmm</p><pre>asdf</pre></div>'
);
}
function testTextInlineNodeTextThenDoubleNewlineNeedsParagraph() {
$this->assertResult(
'<div>asdf <code>bar</code> mmm
<pre>asdf</pre></div>',
'<div><p>asdf <code>bar</code> mmm</p><pre>asdf</pre></div>'
);
}
function testErrorNeeded() { function testErrorNeeded() {
$this->config->set('HTML', 'Allowed', 'b'); $this->config->set('HTML', 'Allowed', 'b');
$this->expectError('Cannot enable AutoParagraph injector because p is not allowed'); $this->expectError('Cannot enable AutoParagraph injector because p is not allowed');

View File

@@ -109,8 +109,9 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness
function testInvalidParentError() { function testInvalidParentError() {
// test fallback to div // test fallback to div
$this->config->set('HTML', 'Parent', 'obviously-impossible'); $this->config->set('HTML', 'Parent', 'obviously-impossible');
$this->expectError('Cannot use unrecognized element as parent'); // $this->expectError('Cannot use unrecognized element as parent');
$this->assertResult('<div>Accept</div>'); $this->assertResult('<div>Accept</div>');
$this->swallowErrors();
} }
function testCascadingRemovalOfNodesMissingRequiredChildren() { function testCascadingRemovalOfNodesMissingRequiredChildren() {
@@ -129,5 +130,10 @@ class HTMLPurifier_Strategy_FixNestingTest extends HTMLPurifier_StrategyHarness
$this->assertResult('<table></table><table></table>', ''); $this->assertResult('<table></table><table></table>', '');
} }
function testStrictBlockquoteInHTML401() {
$this->config->set('HTML', 'Doctype', 'HTML 4.01 Strict');
$this->assertResult('<blockquote>text</blockquote>', '<blockquote><p>text</p></blockquote>');
}
} }

View File

@@ -28,6 +28,11 @@ class HTMLPurifier_Strategy_FixNesting_ErrorsTest extends HTMLPurifier_Strategy_
$this->invoke("<span>Valid<div>Invalid</div></span>"); $this->invoke("<span>Valid<div>Invalid</div></span>");
} }
function testNoNodeReorganizedForEmptyNode() {
$this->expectNoErrorCollection();
$this->invoke("<span></span>");
}
function testNodeContentsRemoved() { function testNodeContentsRemoved() {
$this->expectErrorCollection(E_ERROR, 'Strategy_FixNesting: Node contents removed'); $this->expectErrorCollection(E_ERROR, 'Strategy_FixNesting: Node contents removed');
$this->expectContext('CurrentToken', new HTMLPurifier_Token_Start('span', array(), 1)); $this->expectContext('CurrentToken', new HTMLPurifier_Token_Start('span', array(), 1));

View File

@@ -11,6 +11,19 @@ class HTMLPurifier_Strategy_MakeWellFormed_InjectorTest extends HTMLPurifier_Str
$this->obj = new HTMLPurifier_Strategy_MakeWellFormed(); $this->obj = new HTMLPurifier_Strategy_MakeWellFormed();
$this->config->set('AutoFormat', 'AutoParagraph', true); $this->config->set('AutoFormat', 'AutoParagraph', true);
$this->config->set('AutoFormat', 'Linkify', true); $this->config->set('AutoFormat', 'Linkify', true);
generate_mock_once('HTMLPurifier_Injector');
}
function testEndNotification() {
$mock = new HTMLPurifier_InjectorMock();
$mock->skip = false;
$mock->expectAt(0, 'notifyEnd', array(new HTMLPurifier_Token_End('b')));
$mock->expectAt(1, 'notifyEnd', array(new HTMLPurifier_Token_End('i')));
$mock->expectCallCount('notifyEnd', 2);
$this->config->set('AutoFormat', 'AutoParagraph', false);
$this->config->set('AutoFormat', 'Linkify', false);
$this->config->set('AutoFormat', 'Custom', array($mock));
$this->assertResult('<i><b>asdf</b>', '<i><b>asdf</b></i>');
} }
function testOnlyAutoParagraph() { function testOnlyAutoParagraph() {
@@ -62,4 +75,11 @@ class HTMLPurifier_Strategy_MakeWellFormed_InjectorTest extends HTMLPurifier_Str
); );
} }
function testParagraphAfterLinkifiedURL() {
$this->assertResult(
"http://google.com\n\n<b>b</b>",
"<p><a href=\"http://google.com\">http://google.com</a></p><p><b>b</b></p>"
);
}
} }

View File

@@ -82,5 +82,14 @@ alert(&lt;b&gt;bold&lt;/b&gt;);
); );
} }
function testRequiredAttributesTestNotPerformedOnEndTag() {
$this->config->set('HTML', 'DefinitionID',
'HTMLPurifier_Strategy_RemoveForeignElementsTest'.
'->testRequiredAttributesTestNotPerformedOnEndTag');
$def =& $this->config->getHTMLDefinition(true);
$def->addElement('f', 'Block', 'Optional: #PCDATA', false, array('req*' => 'Text'));
$this->assertResult('<f req="text">Foo</f> Bar');
}
} }

View File

@@ -111,6 +111,12 @@ class HTMLPurifier_URIFilter_MakeAbsoluteTest extends HTMLPurifier_URIFilterHarn
$this->assertFiltering('.', '../'); $this->assertFiltering('.', '../');
} }
function testRemoveJavaScriptWithEmbeddedLink() {
// credits: NykO18
$this->setBase('http://www.example.com/');
$this->assertFiltering('javascript: window.location = \'http://www.example.com\';', false);
}
// error case // error case
function testErrorNoBase() { function testErrorNoBase() {

View File

@@ -94,6 +94,7 @@ class HTMLPurifierTest extends HTMLPurifier_Harness
$this->purifier = new HTMLPurifier(array('HTML.EnableAttrID' => true)); $this->purifier = new HTMLPurifier(array('HTML.EnableAttrID' => true));
$this->assertPurification('<span id="moon">foobar</span>'); $this->assertPurification('<span id="moon">foobar</span>');
$this->assertPurification('<img id="folly" src="folly.png" alt="Omigosh!" />');
} }

View File

@@ -3,7 +3,9 @@
// call one file using /?f=FileTest.php , see $test_files array for // call one file using /?f=FileTest.php , see $test_files array for
// valid values // valid values
error_reporting(E_ALL); if (version_compare(PHP_VERSION, '5.1', '>=')) error_reporting(E_ALL | E_STRICT);
else error_reporting(E_ALL);
define('HTMLPurifierTest', 1); define('HTMLPurifierTest', 1);
define('HTMLPURIFIER_SCHEMA_STRICT', true); // validate schemas define('HTMLPURIFIER_SCHEMA_STRICT', true); // validate schemas
@@ -17,6 +19,7 @@ $GLOBALS['HTMLPurifierTest']['PH5P'] = version_compare(PHP_VERSION, "5", ">=") &
$simpletest_location = 'simpletest/'; // reasonable guess $simpletest_location = 'simpletest/'; // reasonable guess
// load SimpleTest // load SimpleTest
if (file_exists('../conf/test-settings.php')) include '../conf/test-settings.php';
if (file_exists('../test-settings.php')) include '../test-settings.php'; if (file_exists('../test-settings.php')) include '../test-settings.php';
require_once $simpletest_location . 'unit_tester.php'; require_once $simpletest_location . 'unit_tester.php';
require_once $simpletest_location . 'reporter.php'; require_once $simpletest_location . 'reporter.php';
@@ -79,7 +82,7 @@ if ($test_file = $GLOBALS['HTMLPurifierTest']['File']) {
} else { } else {
$test = new GroupTest('All Tests'); $test = new GroupTest('All tests on PHP ' . PHP_VERSION);
foreach ($test_files as $test_file) { foreach ($test_files as $test_file) {
require_once $test_file; require_once $test_file;
$test->addTestClass(path2class($test_file)); $test->addTestClass(path2class($test_file));
@@ -91,5 +94,3 @@ if (SimpleReporter::inCli()) $reporter = new TextReporter();
else $reporter = new HTMLPurifier_SimpleTest_Reporter('UTF-8'); else $reporter = new HTMLPurifier_SimpleTest_Reporter('UTF-8');
$test->run($reporter); $test->run($reporter);

33
tests/multitest.php Normal file
View File

@@ -0,0 +1,33 @@
<?php
$versions_to_test = array(
'FLUSH',
'5.0.4',
'5.0.5',
'5.1.4',
'5.1.6',
'5.2.0',
'5.2.1',
'5.2.2',
'5.2.3',
'5.2.4',
'5.2.5RC2-dev',
'5.3.0-dev',
// '6.0.0-dev',
);
echo str_repeat('-', 70) . "\n";
echo "HTML Purifier\n";
echo "Multiple PHP Versions Test\n\n";
passthru("php ../maintenance/merge-library.php");
foreach ($versions_to_test as $version) {
if ($version === 'FLUSH') {
shell_exec('php ../maintenance/flush-definition-cache.php');
continue;
}
passthru("phpv $version index.php");
passthru("phpv $version index.php standalone");
echo "\n\n";
}