mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-06 22:26:31 +02:00
Compare commits
81 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
f38e81785f | ||
|
2cc829a8cf | ||
|
e80a54a7c9 | ||
|
6f71e65661 | ||
|
6f25c39c3e | ||
|
b8b1ac283d | ||
|
450fc6649d | ||
|
369a69d533 | ||
|
72f5819ef6 | ||
|
3540ea7fce | ||
|
c03953f85e | ||
|
0d262b3a1d | ||
|
234cd2196f | ||
|
0dbe87bbc7 | ||
|
245b5bdb27 | ||
|
864cb9e136 | ||
|
487fcd55ea | ||
|
ec6b6821cf | ||
|
f26eb7551a | ||
|
a2aca4819d | ||
|
a75e4c6b7c | ||
|
e7fa8cbdd5 | ||
|
5fa575f8ac | ||
|
9f23bc005b | ||
|
957a840f54 | ||
|
a7762c5137 | ||
|
aca9d725ed | ||
|
4ce3deba26 | ||
|
d4da02ba95 | ||
|
97d3c8509c | ||
|
21c6803401 | ||
|
36badb06f6 | ||
|
4066416160 | ||
|
fad6aa45fa | ||
|
a7e6d85f6d | ||
|
c330860606 | ||
|
0ea53e5a3d | ||
|
68167176dc | ||
|
bb08f679f0 | ||
|
8cd1806ec8 | ||
|
1274cfed49 | ||
|
1ab47ba949 | ||
|
da95ee096a | ||
|
6d7250c309 | ||
|
df55df1083 | ||
|
1a8d864a42 | ||
|
552102f7f2 | ||
|
f5371bbad4 | ||
|
c8b020879d | ||
|
094b20f58f | ||
|
f2df669eec | ||
|
ca43df9fdd | ||
|
5f76796e14 | ||
|
1f9a6ba30e | ||
|
ccca8cc34f | ||
|
28c29656af | ||
|
88f4f57a47 | ||
|
43a98de909 | ||
|
b9d886d53b | ||
|
5b3c8c5534 | ||
|
dd40d41bc3 | ||
|
37a80f1295 | ||
|
fb367dc871 | ||
|
29c3c21b34 | ||
|
e45cc503a2 | ||
|
85cdea0120 | ||
|
c7676afb0d | ||
|
d75c695994 | ||
|
6f6fcbc354 | ||
|
c31d6ec80e | ||
|
cb92a57e4e | ||
|
423afedbf4 | ||
|
7827a95273 | ||
|
9881a34712 | ||
|
a19f30fdcf | ||
|
8f58c7f49e | ||
|
71301b36eb | ||
|
4f0d012dfa | ||
|
24a4dfdf83 | ||
|
f922285383 | ||
|
3af6457801 |
211
INSTALL
211
INSTALL
@@ -2,62 +2,57 @@
|
|||||||
Install
|
Install
|
||||||
How to install HTML Purifier
|
How to install HTML Purifier
|
||||||
|
|
||||||
HTML Purifier is designed to run out of the box, so actually using the library
|
HTML Purifier is designed to run out of the box, so actually using the
|
||||||
is extremely easy. (Although, if you were looking for a step-by-step
|
library is extremely easy. (Although... if you were looking for a
|
||||||
installation GUI, you've come to the wrong place!) The impatient can scroll
|
step-by-step installation GUI, you've downloaded the wrong software!)
|
||||||
down to the bottom of this INSTALL document to see the code, but you really
|
|
||||||
should make sure a few things are properly done.
|
While the impatient can get going immediately with some of the sample
|
||||||
|
code at the bottom of this library, it's well worth performing some
|
||||||
|
basic sanity checks to get the most out of this library.
|
||||||
|
|
||||||
|
|
||||||
|
---------------------------------------------------------------------------
|
||||||
1. Compatibility
|
1. Compatibility
|
||||||
|
|
||||||
HTML Purifier works in both PHP 4 and PHP 5, from PHP 4.3.2 and up. It has no
|
THIS IS A DEPRECATED PHP4 VERSION OF HTML PURIFIER.
|
||||||
core dependencies with other libraries.
|
|
||||||
|
|
||||||
Optional extensions are iconv (usually installed) and tidy (also common).
|
If you are running PHP5, please go to http://htmlpurifier.org to download
|
||||||
If you use UTF-8 and don't plan on pretty-printing HTML, you can get away with
|
the latest version. This version of HTML Purifier is only actively tested
|
||||||
not having either of these extensions.
|
from PHP 4.3.7 to PHP 5.0.5. Essential security will be released for this branch
|
||||||
|
fixes will be issued for the PHP 4 version until August 8, 2008.
|
||||||
|
|
||||||
|
These optional extensions can enhance the capabilities of HTML Purifier:
|
||||||
|
|
||||||
|
* iconv : Converts text to and from non-UTF-8 encodings
|
||||||
|
* bcmath : Used for unit conversion and imagecrash protection
|
||||||
|
* tidy : Used for pretty-printing HTML
|
||||||
|
|
||||||
|
|
||||||
|
---------------------------------------------------------------------------
|
||||||
|
2. Reconnaissance
|
||||||
|
|
||||||
2. Including the library
|
A big plus of HTML Purifier is its inerrant support of standards, so
|
||||||
|
your web-pages should be standards-compliant. (They should also use
|
||||||
|
semantic markup, but that's another issue altogether, one HTML Purifier
|
||||||
|
cannot fix without reading your mind.)
|
||||||
|
|
||||||
Simply use:
|
HTML Purifier can process these doctypes:
|
||||||
|
|
||||||
require_once '/path/to/library/HTMLPurifier.auto.php';
|
|
||||||
|
|
||||||
...and you're good to go. Since HTML Purifier's codebase is fairly
|
|
||||||
large, I recommend only including HTML Purifier when you need it.
|
|
||||||
|
|
||||||
If you don't like your include_path to be fiddled around with, simply set
|
|
||||||
HTML Purifier's library/ directory to the include path yourself and then:
|
|
||||||
|
|
||||||
require_once 'HTMLPurifier.php';
|
|
||||||
|
|
||||||
Only the contents in the library/ folder are necessary, so you can remove
|
|
||||||
everything else when using HTML Purifier in a production environment.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
3. Preparing the proper output environment
|
|
||||||
|
|
||||||
HTML Purifier is all about web-standards, so accordingly your webpages should
|
|
||||||
be standards compliant. HTML Purifier can deal with these doctypes:
|
|
||||||
|
|
||||||
* XHTML 1.0 Transitional (default)
|
* XHTML 1.0 Transitional (default)
|
||||||
* XHTML 1.0 Strict
|
* XHTML 1.0 Strict
|
||||||
* HTML 4.01 Transitional
|
* HTML 4.01 Transitional
|
||||||
* HTML 4.01 Strict
|
* HTML 4.01 Strict
|
||||||
* XHTML 1.1 (sans Ruby)
|
* XHTML 1.1
|
||||||
|
|
||||||
...and these character encodings:
|
...and these character encodings:
|
||||||
|
|
||||||
* UTF-8 (default)
|
* UTF-8 (default)
|
||||||
* Any encoding iconv supports (support is crippled for i18n though)
|
* Any encoding iconv supports (with crippled internationalization support)
|
||||||
|
|
||||||
The defaults are there for a reason: they are best-practice choices that
|
These defaults reflect what my choices where be if I were authoring an
|
||||||
should not be changed lightly. For those of you in the dark, you can determine
|
HTML document, however, what you choose depends on the nature of your
|
||||||
the doctype from this code in your HTML documents:
|
codebase. If you don't know what doctype you are using, you can determine
|
||||||
|
the doctype from this identifier at the top of your source code:
|
||||||
|
|
||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||||
@@ -66,14 +61,34 @@ the doctype from this code in your HTML documents:
|
|||||||
|
|
||||||
<meta http-equiv="Content-type" content="text/html;charset=ENCODING">
|
<meta http-equiv="Content-type" content="text/html;charset=ENCODING">
|
||||||
|
|
||||||
For legacy codebases these declarations may be missing. If that is the case,
|
If the character encoding declaration is missing, STOP NOW, and
|
||||||
STOP, and read docs/enduser-utf8.html
|
read 'docs/enduser-utf8.html' (web accessible at
|
||||||
|
http://htmlpurifier.org/docs/enduser-utf8.html). In fact, even if it is
|
||||||
You may currently be vulnerable to XSS and other security threats, and HTML
|
present, read this document anyway, as most websites specify character
|
||||||
Purifier won't be able to fix that.
|
encoding incorrectly.
|
||||||
|
|
||||||
|
|
||||||
|
---------------------------------------------------------------------------
|
||||||
|
3. Including the library
|
||||||
|
|
||||||
|
The procedure is quite simple:
|
||||||
|
|
||||||
|
require_once '/path/to/library/HTMLPurifier.auto.php';
|
||||||
|
|
||||||
|
I recommend only including HTML Purifier when you need it, because that
|
||||||
|
call represents the inclusion of a lot of PHP files which constitute
|
||||||
|
the bulk of HTML Purifier's memory usage.
|
||||||
|
|
||||||
|
If you don't like your include_path to be fiddled around with, simply set
|
||||||
|
HTML Purifier's library/ directory to the include path yourself and then:
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier.php';
|
||||||
|
|
||||||
|
Only the contents in the library/ folder are necessary, so you can remove
|
||||||
|
everything else when using HTML Purifier in a production environment.
|
||||||
|
|
||||||
|
|
||||||
|
---------------------------------------------------------------------------
|
||||||
4. Configuration
|
4. Configuration
|
||||||
|
|
||||||
HTML Purifier is designed to run out-of-the-box, but occasionally HTML
|
HTML Purifier is designed to run out-of-the-box, but occasionally HTML
|
||||||
@@ -90,7 +105,6 @@ object and read on:
|
|||||||
$config = HTMLPurifier_Config::createDefault();
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
4.1. Setting a different character encoding
|
4.1. Setting a different character encoding
|
||||||
|
|
||||||
You really shouldn't use any other encoding except UTF-8, especially if you
|
You really shouldn't use any other encoding except UTF-8, especially if you
|
||||||
@@ -117,7 +131,6 @@ but please be cognizant of the issues the "solution" creates (for this
|
|||||||
reason, I do not include the solution in this document).
|
reason, I do not include the solution in this document).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
4.2. Setting a different doctype
|
4.2. Setting a different doctype
|
||||||
|
|
||||||
For those of you using HTML 4.01 Transitional, you can disable
|
For those of you using HTML 4.01 Transitional, you can disable
|
||||||
@@ -134,7 +147,6 @@ Other supported doctypes include:
|
|||||||
* XHTML 1.1
|
* XHTML 1.1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
4.3. Other settings
|
4.3. Other settings
|
||||||
|
|
||||||
There are more configuration directives which can be read about
|
There are more configuration directives which can be read about
|
||||||
@@ -144,55 +156,24 @@ your code. Some of the more interesting ones are configurable at the
|
|||||||
demo <http://htmlpurifier.org/demo.php> and are well worth looking into
|
demo <http://htmlpurifier.org/demo.php> and are well worth looking into
|
||||||
for your own system.
|
for your own system.
|
||||||
|
|
||||||
|
For example, you can fine tune allowed elements and attributes, convert
|
||||||
|
relative URLs to absolute ones, and even autoparagraph input text! These
|
||||||
|
are, respectively, %HTML.Allowed, %URI.MakeAbsolute and %URI.Base, and
|
||||||
|
%AutoFormat.AutoParagraph. The %Namespace.Directive naming convention
|
||||||
|
translates to:
|
||||||
|
|
||||||
|
$config->set('Namespace', 'Directive', $value);
|
||||||
|
|
||||||
|
E.g.
|
||||||
|
|
||||||
|
$config->set('HTML', 'Allowed', 'p,b,a[href],i');
|
||||||
|
$config->set('URI', 'Base', 'http://www.example.com');
|
||||||
|
$config->set('URI', 'MakeAbsolute', true);
|
||||||
|
$config->set('AutoFormat', 'AutoParagraph', true);
|
||||||
|
|
||||||
|
|
||||||
5. Using the code
|
---------------------------------------------------------------------------
|
||||||
|
5. Caching
|
||||||
The interface is mind-numbingly simple:
|
|
||||||
|
|
||||||
$purifier = new HTMLPurifier();
|
|
||||||
$clean_html = $purifier->purify( $dirty_html );
|
|
||||||
|
|
||||||
...or, if you're using the configuration object:
|
|
||||||
|
|
||||||
$purifier = new HTMLPurifier($config);
|
|
||||||
$clean_html = $purifier->purify( $dirty_html );
|
|
||||||
|
|
||||||
That's it! For more examples, check out docs/examples/ (they aren't very
|
|
||||||
different though). Also, docs/enduser-slow.html gives advice on what to
|
|
||||||
do if HTML Purifier is slowing down your application.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
6. Quick install
|
|
||||||
|
|
||||||
First, make sure library/HTMLPurifier/DefinitionCache/Serializer is
|
|
||||||
writable by the webserver (see Section 7: Caching below for details).
|
|
||||||
If your website is in UTF-8 and XHTML Transitional, use this code:
|
|
||||||
|
|
||||||
<?php
|
|
||||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
|
||||||
|
|
||||||
$purifier = new HTMLPurifier();
|
|
||||||
$clean_html = $purifier->purify($dirty_html);
|
|
||||||
?>
|
|
||||||
|
|
||||||
If your website is in a different encoding or doctype, use this code:
|
|
||||||
|
|
||||||
<?php
|
|
||||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
|
||||||
|
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
|
||||||
$config->set('Core', 'Encoding', 'ISO-8859-1'); // replace with your encoding
|
|
||||||
$config->set('HTML', 'Doctype', 'HTML 4.01 Transitional'); // replace with your doctype
|
|
||||||
$purifier = new HTMLPurifier($config);
|
|
||||||
|
|
||||||
$clean_html = $purifier->purify($dirty_html);
|
|
||||||
?>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
7. Caching
|
|
||||||
|
|
||||||
HTML Purifier generates some cache files (generally one or two) to speed up
|
HTML Purifier generates some cache files (generally one or two) to speed up
|
||||||
its execution. For maximum performance, make sure that
|
its execution. For maximum performance, make sure that
|
||||||
@@ -228,3 +209,49 @@ Or move the cache directory somewhere else (no trailing slash):
|
|||||||
|
|
||||||
$config->set('Cache', 'SerializerPath', '/home/user/absolute/path');
|
$config->set('Cache', 'SerializerPath', '/home/user/absolute/path');
|
||||||
|
|
||||||
|
|
||||||
|
---------------------------------------------------------------------------
|
||||||
|
6. Using the code
|
||||||
|
|
||||||
|
The interface is mind-numbingly simple:
|
||||||
|
|
||||||
|
$purifier = new HTMLPurifier();
|
||||||
|
$clean_html = $purifier->purify( $dirty_html );
|
||||||
|
|
||||||
|
...or, if you're using the configuration object:
|
||||||
|
|
||||||
|
$purifier = new HTMLPurifier($config);
|
||||||
|
$clean_html = $purifier->purify( $dirty_html );
|
||||||
|
|
||||||
|
That's it! For more examples, check out docs/examples/ (they aren't very
|
||||||
|
different though). Also, docs/enduser-slow.html gives advice on what to
|
||||||
|
do if HTML Purifier is slowing down your application.
|
||||||
|
|
||||||
|
|
||||||
|
---------------------------------------------------------------------------
|
||||||
|
7. Quick install
|
||||||
|
|
||||||
|
First, make sure library/HTMLPurifier/DefinitionCache/Serializer is
|
||||||
|
writable by the webserver (see Section 5: Caching above for details).
|
||||||
|
If your website is in UTF-8 and XHTML Transitional, use this code:
|
||||||
|
|
||||||
|
<?php
|
||||||
|
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||||
|
|
||||||
|
$purifier = new HTMLPurifier();
|
||||||
|
$clean_html = $purifier->purify($dirty_html);
|
||||||
|
?>
|
||||||
|
|
||||||
|
If your website is in a different encoding or doctype, use this code:
|
||||||
|
|
||||||
|
<?php
|
||||||
|
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||||
|
|
||||||
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
$config->set('Core', 'Encoding', 'ISO-8859-1'); // replace with your encoding
|
||||||
|
$config->set('HTML', 'Doctype', 'HTML 4.01 Transitional'); // replace with your doctype
|
||||||
|
$purifier = new HTMLPurifier($config);
|
||||||
|
|
||||||
|
$clean_html = $purifier->purify($dirty_html);
|
||||||
|
?>
|
||||||
|
|
||||||
|
135
NEWS
135
NEWS
@@ -9,6 +9,141 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
. Internal change
|
. Internal change
|
||||||
==========================
|
==========================
|
||||||
|
|
||||||
|
ERRATA
|
||||||
|
- PH5P is seriously broken here; it can result in fatal errors and exceptions.
|
||||||
|
If you desire to use it, please use it with the latest, PHP5-only version of
|
||||||
|
HTML Purifier.
|
||||||
|
|
||||||
|
2.1.5, released 2008-06-19
|
||||||
|
! More robust imagecrash protection with height/width CSS with %CSS.MaxImgLength,
|
||||||
|
and height/width HTML with %HTML.MaxImgLength.
|
||||||
|
- AttrValidator operations are now atomic; updates to attributes are not
|
||||||
|
manifest in token until end of operations. This prevents naughty internal
|
||||||
|
code from directly modifying CurrentToken when they're not supposed to.
|
||||||
|
- Percent encoding checks enabled for URI query and fragment
|
||||||
|
- Disable percent height/width attributes for img
|
||||||
|
- Fix stray backslashes in font-family; CSS Unicode character escapes are
|
||||||
|
now properly resolved (although *only* in font-family).
|
||||||
|
- Improve parseCDATA algorithm to take into account newline normalization
|
||||||
|
- Account for browser confusion between Yen character and backslash in
|
||||||
|
Shift_JIS encoding. This fix generalizes to any other encoding which is not
|
||||||
|
a strict superset of printable ASCII.
|
||||||
|
- Improved adherence to Unicode by checking for non-character codepoints.
|
||||||
|
Thanks Geoffrey Sneddon for reporting. This may result in degraded
|
||||||
|
performance for extremely large inputs.
|
||||||
|
- Allow CSS property-value pair ''text-decoration: none''
|
||||||
|
. Added HTMLPurifier_UnitConverter and HTMLPurifier_Length for convenient
|
||||||
|
handling of CSS-style lengths. HTMLPurifier_AttrDef_CSS_Length now uses
|
||||||
|
this class.
|
||||||
|
. API of HTMLPurifier_AttrDef_CSS_Length changed from __construct($disable_negative)
|
||||||
|
to __construct($min, $max). __construct(true) is equivalent to
|
||||||
|
__construct('0'). (replace __construct with HTMLPurifier_AttrDef_CSS_Length)
|
||||||
|
. Added HTMLPurifier_AttrDef_Switch class
|
||||||
|
. Rename HTMLPurifier_HTMLModule_Tidy->construct() to setup() and bubble method
|
||||||
|
up inheritance hierarchy to HTMLPurifier_HTMLModule. All HTMLModules
|
||||||
|
get this called with the configuration object. All modules now
|
||||||
|
use this rather than __construct(), although legacy code using constructors
|
||||||
|
will still work--the new format, however, lets modules access the
|
||||||
|
configuration object for HTML namespace dependant tweaks.
|
||||||
|
. AttrDef_HTML_Pixels now takes a single construction parameter, pixels.
|
||||||
|
|
||||||
|
2.1.4, released 2008-05-18
|
||||||
|
! DefinitionCacheFactory now can register new implementations
|
||||||
|
! CSS properties are now case-insensitive
|
||||||
|
! Encoder optimized with valid UTF-8 input
|
||||||
|
! HTML Purifier's URI handling is a lot more robust, with much stricter
|
||||||
|
validation checks and better percent encoding handling.
|
||||||
|
- Colors missing # but in hex form will be corrected
|
||||||
|
- CSS Number algorithm improved
|
||||||
|
- Autoclose now operates iteratively, i.e. <span><span><div> now has
|
||||||
|
both span tags closed.
|
||||||
|
- Fix bug with trusted script handling in libxml versions later than 2.6.28.
|
||||||
|
- Fix bug in comment parsing with DirectLex
|
||||||
|
- Fix bug with rgb(0, 1, 2) color syntax with spaces inside shorthand syntax
|
||||||
|
- HTMLPurifier_HTMLDefinition->addAttribute can now be called multiple times
|
||||||
|
on the same element without emitting errors.
|
||||||
|
- Iconv uses set_error_handler instead of shut-up operator
|
||||||
|
- Add protection against imagecrash attack with CSS height/width
|
||||||
|
- HTMLPurifier::getInstance() renamed to HTMLPurifier::instance() for consistency
|
||||||
|
- Fixed bug with fallback languages in LanguageFactory
|
||||||
|
|
||||||
|
2.1.3, released 2007-11-05
|
||||||
|
! tests/multitest.php allows you to test multiple versions by running
|
||||||
|
tests/index.php through multiple interpreters using `phpv` shell
|
||||||
|
script (you must provide this script!)
|
||||||
|
- Fixed poor include ordering for Email URI AttrDefs, causes fatal errors
|
||||||
|
on some systems.
|
||||||
|
- Injector algorithm further refined: off-by-one error regarding skip
|
||||||
|
counts for dormant injectors fixed
|
||||||
|
- Corrective blockquote definition now enabled for HTML 4.01 Strict
|
||||||
|
- Fatal error when <img> tag (or any other element with required attributes)
|
||||||
|
has 'id' attribute fixed, thanks NykO18 for reporting
|
||||||
|
- Fix warning emitted when a non-supported URI scheme is passed to the
|
||||||
|
MakeAbsolute URIFilter, thanks NykO18 (again)
|
||||||
|
- Further refine AutoParagraph injector. Behavior inside of elements
|
||||||
|
allowing paragraph tags clarified: only inline content delimeted by
|
||||||
|
double newlines (not block elements) are paragraphed.
|
||||||
|
- Buggy treatment of end tags of elements that have required attributes
|
||||||
|
fixed (does not manifest on default tag-set)
|
||||||
|
- Spurious internal content reorganization error suppressed
|
||||||
|
- HTMLDefinition->addElement now returns a reference to the created
|
||||||
|
element object, as implied by the documentation
|
||||||
|
- Phorum mod's HTML Purifier help message expanded (unreleased elsewhere)
|
||||||
|
- Fix a theoretical class of infinite loops from DirectLex reported
|
||||||
|
by Nate Abele
|
||||||
|
- Work around unnecessary DOMElement type-cast in PH5P that caused errors
|
||||||
|
in PHP 5.1
|
||||||
|
- Work around PHP 4 SimpleTest lack-of-error complaining for one-time-only
|
||||||
|
HTMLDefinition errors, this may indicate problems with error-collecting
|
||||||
|
facilities in PHP 5
|
||||||
|
- Make ErrorCollectorEMock work in both PHP 4 and PHP 5
|
||||||
|
- Make PH5P work with PHP 5.0 by removing unnecessary array parameter typedef
|
||||||
|
. %Core.AcceptFullDocuments renamed to %Core.ConvertDocumentToFragment
|
||||||
|
to better communicate its purpose
|
||||||
|
. Error unit tests can now specify the expectation of no errors. Future
|
||||||
|
iterations of the harness will be extremely strict about what errors
|
||||||
|
are allowed
|
||||||
|
. Extend Injector hooks to allow for more powerful injector routines
|
||||||
|
. HTMLDefinition->addBlankElement created, as according to the HTMLModule
|
||||||
|
method
|
||||||
|
. Doxygen configuration file updated, with minor improvements
|
||||||
|
. Test runner now checks for similarly named files in conf/ directory too.
|
||||||
|
. Minor cosmetic change to flush-definition-cache.php: trailing newline is
|
||||||
|
outputted
|
||||||
|
. Maintenance script for generating PH5P patch added, original PH5P source
|
||||||
|
file also added under version control
|
||||||
|
. Full unit test runner script title made more descriptive with PHP version
|
||||||
|
. Updated INSTALL file to state that 4.3.7 is the earliest version we
|
||||||
|
are actively testing
|
||||||
|
|
||||||
|
2.1.2, released 2007-09-03
|
||||||
|
! Implemented Object module for trusted users
|
||||||
|
! Implemented experimental HTML5 parsing mode using PH5P. To use, add
|
||||||
|
this to your code:
|
||||||
|
require_once 'HTMLPurifier/Lexer/PH5P.php';
|
||||||
|
$config->set('Core', 'LexerImpl', 'PH5P');
|
||||||
|
Note that this Lexer introduces some classes not in the HTMLPurifier
|
||||||
|
namespace. Also, this is PHP5 only.
|
||||||
|
! CSS property border-spacing implemented
|
||||||
|
- Fix non-visible parsing error in DirectLex with empty tags that have
|
||||||
|
slashes inside attribute values.
|
||||||
|
- Fix typo in CSS definition: border-collapse:seperate; was incorrectly
|
||||||
|
accepted as valid CSS. Usually non-visible, because this styling is the
|
||||||
|
default for tables in most browsers. Thanks Brett Zamir for pointing
|
||||||
|
this out.
|
||||||
|
- Fix validation errors in configuration form
|
||||||
|
- Hammer out a bunch of edge-case bugs in the standalone distribution
|
||||||
|
- Inclusion reflection removed from URISchemeRegistry; you must manually
|
||||||
|
include any new schema files you wish to use
|
||||||
|
- Numerous typo fixes in documentation thanks to Brett Zamir
|
||||||
|
. Unit test refactoring for one logical test per test function
|
||||||
|
. Config and context parameters in ComplexHarness deprecated: instead, edit
|
||||||
|
the $config and $context member variables
|
||||||
|
. HTML wrapper in DOMLex now takes DTD identifiers into account; doesn't
|
||||||
|
really make a difference, but is good for completeness sake
|
||||||
|
. merge-library.php script refactored for greater code reusability and
|
||||||
|
PHP4 compatibility
|
||||||
|
|
||||||
2.1.1, released 2007-08-04
|
2.1.1, released 2007-08-04
|
||||||
- Fix show-stopper bug in %URI.MakeAbsolute functionality
|
- Fix show-stopper bug in %URI.MakeAbsolute functionality
|
||||||
- Fix PHP4 syntax error in standalone version
|
- Fix PHP4 syntax error in standalone version
|
||||||
|
15
TODO
15
TODO
@@ -29,23 +29,22 @@ afraid to cast your vote for the next feature to be implemented!
|
|||||||
- Remove empty inline tags<i></i>
|
- Remove empty inline tags<i></i>
|
||||||
- Append something to duplicate IDs so they're still usable (impl. note: the
|
- Append something to duplicate IDs so they're still usable (impl. note: the
|
||||||
dupe detector would also need to detect the suffix as well)
|
dupe detector would also need to detect the suffix as well)
|
||||||
|
- Externalize inline CSS to promote clean HTML
|
||||||
|
|
||||||
2.4 release [It's All About Trust] (floating)
|
2.4 release [It's All About Trust] (floating)
|
||||||
# Implement untrusted, dangerous elements/attributes
|
# Implement untrusted, dangerous elements/attributes
|
||||||
# Implement IDREF support (harder than it seems, since you cannot have
|
# Implement IDREF support (harder than it seems, since you cannot have
|
||||||
IDREFs to non-existent IDs)
|
IDREFs to non-existent IDs)
|
||||||
|
# Frameset XHTML 1.0 and HTML 4.01 doctypes
|
||||||
|
|
||||||
3.0 release [Beyond HTML]
|
3.0 release [Beyond HTML]
|
||||||
# Legit token based CSS parsing (will require revamping almost every
|
# Legit token based CSS parsing (will require revamping almost every
|
||||||
AttrDef class)
|
AttrDef class). Probably will use CSSTidy class
|
||||||
# More control over allowed CSS properties (maybe modularize it in the
|
# More control over allowed CSS properties (maybe modularize it in the
|
||||||
same fashion!)
|
same fashion!)
|
||||||
# Formatters for plaintext
|
# Formatters for plaintext
|
||||||
- Smileys
|
- Smileys
|
||||||
- Standardize token armor for all areas of processing
|
- Standardize token armor for all areas of processing
|
||||||
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
|
||||||
- Automatically add non-breaking spaces to empty table cells when
|
|
||||||
empty-cells:show is applied to have compatibility with Internet Explorer
|
|
||||||
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
|
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
|
||||||
Also, enable disabling of directionality
|
Also, enable disabling of directionality
|
||||||
|
|
||||||
@@ -64,25 +63,27 @@ Ongoing
|
|||||||
- Complete basic smoketests
|
- Complete basic smoketests
|
||||||
|
|
||||||
Unknown release (on a scratch-an-itch basis)
|
Unknown release (on a scratch-an-itch basis)
|
||||||
? Semi-lossy dumb alternate character encoding transfor
|
# CHMOD install script for PEAR installs
|
||||||
? Have 'lang' attribute be checked against official lists, achieved by
|
? Have 'lang' attribute be checked against official lists, achieved by
|
||||||
encoding all characters that have string entity equivalents
|
encoding all characters that have string entity equivalents
|
||||||
- Abstract ChildDef_BlockQuote to work with all elements that only
|
- Abstract ChildDef_BlockQuote to work with all elements that only
|
||||||
allow blocks in them, required or optional
|
allow blocks in them, required or optional
|
||||||
- Reorganize Unit Tests
|
- Reorganize Unit Tests
|
||||||
- Refactor loop tests: Lexer
|
|
||||||
- Reorganize configuration directives (Create more namespaces! Get messy!)
|
- Reorganize configuration directives (Create more namespaces! Get messy!)
|
||||||
- Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
|
- Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
|
||||||
- Implement lenient <ruby> child validation
|
- Implement lenient <ruby> child validation
|
||||||
- Explain how to use HTML Purifier in non-PHP languages / create
|
- Explain how to use HTML Purifier in non-PHP languages / create
|
||||||
a simple command line stub (or complicated?)
|
a simple command line stub (or complicated?)
|
||||||
|
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
||||||
|
- Automatically add non-breaking spaces to empty table cells when
|
||||||
|
empty-cells:show is applied to have compatibility with Internet Explorer
|
||||||
|
|
||||||
Requested
|
Requested
|
||||||
|
|
||||||
Wontfix
|
Wontfix
|
||||||
- Non-lossy smart alternate character encoding transformations (unless
|
- Non-lossy smart alternate character encoding transformations (unless
|
||||||
patch provided)
|
patch provided)
|
||||||
- Pretty-printing HTML, users can use Tidy on the output on entire page
|
- Pretty-printing HTML: users can use Tidy on the output on entire page
|
||||||
- Native content compression, whitespace stripping (don't rely on Tidy, make
|
- Native content compression, whitespace stripping (don't rely on Tidy, make
|
||||||
sure we don't remove from <pre> or related tags): use gzip if this is
|
sure we don't remove from <pre> or related tags): use gzip if this is
|
||||||
really important
|
really important
|
||||||
|
17
WHATSNEW
17
WHATSNEW
@@ -1,10 +1,7 @@
|
|||||||
In version 2.1, HTML Purifier's URI validation and filtering handling
|
Security and bugfix release 2.1.5 is a backport that fixes two vulnerabilities
|
||||||
system has been revamped with a new, extensible URIFilter system. Also
|
related to CSS, one of which only occurs under Shift_JIS. It also improves
|
||||||
notable features include preservation of emoticons in PHP5 with
|
imagecrash protection (percent CSS width and height is now disabled for
|
||||||
%Core.AggressivelyFixLt, standalone and lite download versions,
|
images, and you can control the bounds with %CSS.MaxImgLength and
|
||||||
transforming relative URIs to absolute URIs, Ruby in XHTML 1.1, a Phorum
|
%HTML.MaxImgLength). Finally, there are number of bug fixes, most notably
|
||||||
mod, and UTF-8 font names. Notable bug-fixes include refinement of
|
support for text-decoration: none, improved adherence to Unicode and increased
|
||||||
the auto-paragraphing algorithm (no longer experimental), better XHTML
|
percent encoding checks.
|
||||||
1.1 support and the removal of the contents of <style> elements. Version
|
|
||||||
2.1.1 amends a few bugs in some of newly introduced features, namely
|
|
||||||
running the standalone download version in PHP4 and %URI.MakeAbsolute.
|
|
||||||
|
@@ -39,7 +39,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
|||||||
<table cellspacing="0"><tbody>
|
<table cellspacing="0"><tbody>
|
||||||
<tr><td class="impl-yes">Implemented</td></tr>
|
<tr><td class="impl-yes">Implemented</td></tr>
|
||||||
<tr><td class="impl-partial">Partially implemented</td></tr>
|
<tr><td class="impl-partial">Partially implemented</td></tr>
|
||||||
<tr><td class="impl-no">Will not implement</td></tr>
|
<tr><td class="impl-no">Not priority to implement</td></tr>
|
||||||
<tr><td class="danger">Dangerous attribute/property</td></tr>
|
<tr><td class="danger">Dangerous attribute/property</td></tr>
|
||||||
<tr><td class="css1">Present in CSS1</td></tr>
|
<tr><td class="css1">Present in CSS1</td></tr>
|
||||||
<tr><td class="feature">Feature, requires extra work</td></tr>
|
<tr><td class="feature">Feature, requires extra work</td></tr>
|
||||||
@@ -118,6 +118,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
|||||||
<tbody>
|
<tbody>
|
||||||
<tr><th colspan="2">Table</th></tr>
|
<tr><th colspan="2">Table</th></tr>
|
||||||
<tr class="impl-yes"><td>border-collapse</td><td>ENUM(collapse, seperate)</td></tr>
|
<tr class="impl-yes"><td>border-collapse</td><td>ENUM(collapse, seperate)</td></tr>
|
||||||
|
<tr class="impl-yes"><td>border-space</td><td>MULTIPLE</td></tr>
|
||||||
<tr class="impl-yes"><td>caption-side</td><td>ENUM(top, bottom)</td></tr>
|
<tr class="impl-yes"><td>caption-side</td><td>ENUM(top, bottom)</td></tr>
|
||||||
<tr class="feature"><td>empty-cells</td><td>ENUM(show, hide), No IE support makes this useless,
|
<tr class="feature"><td>empty-cells</td><td>ENUM(show, hide), No IE support makes this useless,
|
||||||
possible fix with &nbsp;? Unknown release milestone.</td></tr>
|
possible fix with &nbsp;? Unknown release milestone.</td></tr>
|
||||||
|
@@ -32,7 +32,7 @@
|
|||||||
Before we even write any code, it is paramount to consider whether or
|
Before we even write any code, it is paramount to consider whether or
|
||||||
not the code we're writing is necessary or not. HTML Purifier, by default,
|
not the code we're writing is necessary or not. HTML Purifier, by default,
|
||||||
contains a large set of elements and attributes: large enough so that
|
contains a large set of elements and attributes: large enough so that
|
||||||
<em>any</em> element or attribute in XHTML 1.0 (and its HTML variant)
|
<em>any</em> element or attribute in XHTML 1.0 or 1.1 (and its HTML variants)
|
||||||
that can be safely used by the general public is implemented.
|
that can be safely used by the general public is implemented.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
@@ -76,11 +76,12 @@
|
|||||||
<h3>XHTML 1.1</h3>
|
<h3>XHTML 1.1</h3>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
We have not implemented the
|
As of HTMLPurifier 2.1.0, we have implemented the
|
||||||
<a href="http://www.w3.org/TR/2001/REC-ruby-20010531/">Ruby module</a>,
|
<a href="http://www.w3.org/TR/2001/REC-ruby-20010531/">Ruby module</a>,
|
||||||
which defines a set of tags
|
which defines a set of tags
|
||||||
for publishing short annotations for text, used mostly in Japanese
|
for publishing short annotations for text, used mostly in Japanese
|
||||||
and Chinese school texts.
|
and Chinese school texts, but applicable for positioning any text (not
|
||||||
|
limited to translations) above or below other corresponding text.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<h3>XHTML 2.0</h3>
|
<h3>XHTML 2.0</h3>
|
||||||
@@ -492,10 +493,11 @@ $def =& $config->getHTMLDefinition(true);
|
|||||||
<p>
|
<p>
|
||||||
The <code>(%flow;)*</code> indicates the allowed children of the
|
The <code>(%flow;)*</code> indicates the allowed children of the
|
||||||
<code>li</code> tag: <code>li</code> allows any number of flow
|
<code>li</code> tag: <code>li</code> allows any number of flow
|
||||||
elements as its children. In HTML Purifier, we'd write it like
|
elements as its children. (The <code>- O</code> allows the closing tag to be
|
||||||
<code>Flow</code> (here's where the content sets we were
|
omitted, though in XML this is not allowed.) In HTML Purifier,
|
||||||
discussing earlier come into play). There are three shorthand content models you
|
we'd write it like <code>Flow</code> (here's where the content sets
|
||||||
can specify:
|
we were discussing earlier come into play). There are three shorthand
|
||||||
|
content models you can specify:
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<table class="table">
|
<table class="table">
|
||||||
@@ -668,12 +670,22 @@ $def =& $config->getHTMLDefinition(true);
|
|||||||
Common is a combination of the above-mentioned collections.
|
Common is a combination of the above-mentioned collections.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
<p class="aside">
|
||||||
|
Readers familiar with the modularization may have noticed that the Core
|
||||||
|
attribute collection differs from that specified by the <a
|
||||||
|
href="http://www.w3.org/TR/xhtml-modularization/abstract_modules.html#s_commonatts">abstract
|
||||||
|
modules of the XHTML Modularization 1.1</a>. We believe this section
|
||||||
|
to be in error, as <code>br</code> permits the use of the <code>style</code>
|
||||||
|
attribute even though it uses the <code>Core</code> collection, and
|
||||||
|
the DTD and XML Schemas supplied by W3C support our interpretation.
|
||||||
|
</p>
|
||||||
|
|
||||||
<h3>Attributes</h3>
|
<h3>Attributes</h3>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
If you didn't read the <a href="#addAttribute">previous section on
|
If you didn't read the <a href="#addAttribute">earlier section on
|
||||||
adding attributes</a>, read it now. The last parameter is simply
|
adding attributes</a>, read it now. The last parameter is simply
|
||||||
array of attribute names to attribute implementations, in the exact
|
an array of attribute names to attribute implementations, in the exact
|
||||||
same format as <code>addAttribute()</code>.
|
same format as <code>addAttribute()</code>.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
@@ -58,7 +58,7 @@ appear elsewhere on the document. The method is simple:</p>
|
|||||||
|
|
||||||
<pre>$config->set('HTML', 'EnableAttrID', true);
|
<pre>$config->set('HTML', 'EnableAttrID', true);
|
||||||
$config->set('Attr', 'IDBlacklist' array(
|
$config->set('Attr', 'IDBlacklist' array(
|
||||||
'list', 'of', 'attributes', 'that', 'are', 'forbidden'
|
'list', 'of', 'attribute', 'values', 'that', 'are', 'forbidden'
|
||||||
));</pre>
|
));</pre>
|
||||||
|
|
||||||
<p>That being said, there are some notable drawbacks. First of all, you have to
|
<p>That being said, there are some notable drawbacks. First of all, you have to
|
||||||
@@ -71,9 +71,9 @@ to possible standards-compliance issues.</p>
|
|||||||
<p>Furthermore, this position becomes untenable when a single web page must hold
|
<p>Furthermore, this position becomes untenable when a single web page must hold
|
||||||
multiple portions of user-submitted content. Since there's obviously no way
|
multiple portions of user-submitted content. Since there's obviously no way
|
||||||
to find out before-hand what IDs users will use, the blacklist is helpless.
|
to find out before-hand what IDs users will use, the blacklist is helpless.
|
||||||
And even since HTML Purifier validates each segment seperately, perhaps doing
|
And since HTML Purifier validates each segment separately, perhaps doing
|
||||||
so at different times, it would be extremely difficult to dynamically update
|
so at different times, it would be extremely difficult to dynamically update
|
||||||
the blacklist inbetween runs.</p>
|
the blacklist in between runs.</p>
|
||||||
|
|
||||||
<p>Finally, simply destroying the ID is extremely un-userfriendly behavior: after
|
<p>Finally, simply destroying the ID is extremely un-userfriendly behavior: after
|
||||||
all, they might have simply specified a duplicate ID by accident.</p>
|
all, they might have simply specified a duplicate ID by accident.</p>
|
||||||
|
@@ -22,7 +22,7 @@ out:</p>
|
|||||||
|
|
||||||
<p class="emphasis">This ain't HTML Tidy!</p>
|
<p class="emphasis">This ain't HTML Tidy!</p>
|
||||||
|
|
||||||
<p>Rather, Tidy stands for a cool set of Tidy-inspired in HTML Purifier
|
<p>Rather, Tidy stands for a cool set of Tidy-inspired features in HTML Purifier
|
||||||
that allows users to submit deprecated elements and attributes and get
|
that allows users to submit deprecated elements and attributes and get
|
||||||
valid strict markup back. For example:</p>
|
valid strict markup back. For example:</p>
|
||||||
|
|
||||||
@@ -33,8 +33,8 @@ valid strict markup back. For example:</p>
|
|||||||
<pre><div style="text-align:center;">Centered</div></pre>
|
<pre><div style="text-align:center;">Centered</div></pre>
|
||||||
|
|
||||||
<p>...when this particular fix is run on the HTML. This tutorial will give
|
<p>...when this particular fix is run on the HTML. This tutorial will give
|
||||||
you down the lowdown of what exactly HTML Purifier will do when Tidy
|
you the lowdown of what exactly HTML Purifier will do when Tidy
|
||||||
is on, and how to fine tune this behavior. Once again, <strong>you do
|
is on, and how to fine-tune this behavior. Once again, <strong>you do
|
||||||
not need Tidy installed on your PHP to use these features!</strong></p>
|
not need Tidy installed on your PHP to use these features!</strong></p>
|
||||||
|
|
||||||
<h2>What does it do?</h2>
|
<h2>What does it do?</h2>
|
||||||
@@ -221,7 +221,7 @@ general syntax:</p>
|
|||||||
|
|
||||||
<p>The lowdown is, quite frankly, HTML Purifier's default settings are
|
<p>The lowdown is, quite frankly, HTML Purifier's default settings are
|
||||||
probably good enough. The next step is to bump the level up to heavy,
|
probably good enough. The next step is to bump the level up to heavy,
|
||||||
and if that still doesn't satisfy your appetite, do some fine tuning.
|
and if that still doesn't satisfy your appetite, do some fine-tuning.
|
||||||
Other than that, don't worry about it: this all works silently and
|
Other than that, don't worry about it: this all works silently and
|
||||||
effectively in the background.</p>
|
effectively in the background.</p>
|
||||||
|
|
||||||
|
@@ -96,7 +96,7 @@ which can be a rewarding (but difficult) task.</p>
|
|||||||
<h2 id="findcharset">Finding the real encoding</h2>
|
<h2 id="findcharset">Finding the real encoding</h2>
|
||||||
|
|
||||||
<p>In the beginning, there was ASCII, and things were simple. But they
|
<p>In the beginning, there was ASCII, and things were simple. But they
|
||||||
weren't good, for no one could write in Cryllic or Thai. So there
|
weren't good, for no one could write in Cyrillic or Thai. So there
|
||||||
exploded a proliferation of character encodings to remedy the problem
|
exploded a proliferation of character encodings to remedy the problem
|
||||||
by extending the characters ASCII could express. This ridiculously
|
by extending the characters ASCII could express. This ridiculously
|
||||||
simplified version of the history of character encodings shows us that
|
simplified version of the history of character encodings shows us that
|
||||||
@@ -138,7 +138,7 @@ browser:</p>
|
|||||||
<dd>View > Encoding: bulleted item is unofficial name</dd>
|
<dd>View > Encoding: bulleted item is unofficial name</dd>
|
||||||
</dl>
|
</dl>
|
||||||
|
|
||||||
<p>Internet Explorer won't give you the mime (i.e. useful/real) name of the
|
<p>Internet Explorer won't give you the MIME (i.e. useful/real) name of the
|
||||||
character encoding, so you'll have to look it up using their description.
|
character encoding, so you'll have to look it up using their description.
|
||||||
Some common ones:</p>
|
Some common ones:</p>
|
||||||
|
|
||||||
@@ -216,6 +216,12 @@ if your <code>META</code> tag claims that either:</p>
|
|||||||
|
|
||||||
<h2 id="fixcharset">Fixing the encoding</h2>
|
<h2 id="fixcharset">Fixing the encoding</h2>
|
||||||
|
|
||||||
|
<p class="aside">The advice given here is for pages being served as
|
||||||
|
vanilla <code>text/html</code>. Different practices must be used
|
||||||
|
for <code>application/xml</code> or <code>application/xml+xhtml</code>, see
|
||||||
|
<a href="http://www.w3.org/TR/2002/NOTE-xhtml-media-types-20020430/">W3C's
|
||||||
|
document on XHTML media types</a> for more information.</p>
|
||||||
|
|
||||||
<p>If your <code>META</code> encoding and your real encoding match,
|
<p>If your <code>META</code> encoding and your real encoding match,
|
||||||
savvy! You can skip this section. If they don't...</p>
|
savvy! You can skip this section. If they don't...</p>
|
||||||
|
|
||||||
@@ -302,7 +308,8 @@ languages</a>. The appropriate code is:</p>
|
|||||||
|
|
||||||
<p>...replacing UTF-8 with whatever your embedded encoding is.
|
<p>...replacing UTF-8 with whatever your embedded encoding is.
|
||||||
This code must come before any output, so be careful about
|
This code must come before any output, so be careful about
|
||||||
stray whitespace in your application.</p>
|
stray whitespace in your application (i.e., any whitespace before
|
||||||
|
output excluding whitespace within <?php ?> tags).</p>
|
||||||
|
|
||||||
<h4 id="fixcharset-server-phpini">PHP ini directive</h4>
|
<h4 id="fixcharset-server-phpini">PHP ini directive</h4>
|
||||||
|
|
||||||
@@ -313,8 +320,8 @@ header call: <code><a href="http://php.net/ini.core#ini.default-charset">default
|
|||||||
|
|
||||||
<p>...will also do the trick. If PHP is running as an Apache module (and
|
<p>...will also do the trick. If PHP is running as an Apache module (and
|
||||||
not as FastCGI, consult
|
not as FastCGI, consult
|
||||||
<a href="http://php.net/phpinfo">phpinfo</a>() for details), you can even use htaccess do apply this property
|
<a href="http://php.net/phpinfo">phpinfo</a>() for details), you can even use htaccess to apply this property
|
||||||
globally:</p>
|
across many PHP files:</p>
|
||||||
|
|
||||||
<pre><a href="http://php.net/configuration.changes#configuration.changes.apache">php_value</a> default_charset "UTF-8"</pre>
|
<pre><a href="http://php.net/configuration.changes#configuration.changes.apache">php_value</a> default_charset "UTF-8"</pre>
|
||||||
|
|
||||||
@@ -360,10 +367,11 @@ to send anything at all:</p>
|
|||||||
|
|
||||||
<pre><a href="http://httpd.apache.org/docs/1.3/mod/core.html#adddefaultcharset">AddDefaultCharset</a> Off</pre>
|
<pre><a href="http://httpd.apache.org/docs/1.3/mod/core.html#adddefaultcharset">AddDefaultCharset</a> Off</pre>
|
||||||
|
|
||||||
<p>...making your <code>META</code> tags the sole source of
|
<p>...making your internal charset declaration (usually the <code>META</code> tags)
|
||||||
character encoding information. In these cases, it is
|
the sole source of character encoding
|
||||||
<em>especially</em> important to make sure you have valid <code>META</code>
|
information. In these cases, it is <em>especially</em> important to make
|
||||||
tags on your pages and all the text before them is ASCII.</p>
|
sure you have valid <code>META</code> tags on your pages and all the
|
||||||
|
text before them is ASCII.</p>
|
||||||
|
|
||||||
<blockquote class="aside"><p>These directives can also be
|
<blockquote class="aside"><p>These directives can also be
|
||||||
placed in httpd.conf file for Apache, but
|
placed in httpd.conf file for Apache, but
|
||||||
@@ -428,28 +436,30 @@ IIS to change character encodings, I'd be grateful.</p>
|
|||||||
|
|
||||||
<p><code>META</code> tags are the most common source of embedded
|
<p><code>META</code> tags are the most common source of embedded
|
||||||
encodings, but they can also come from somewhere else: XML
|
encodings, but they can also come from somewhere else: XML
|
||||||
processing instructions. They look like:</p>
|
Declarations. They look like:</p>
|
||||||
|
|
||||||
<pre><?xml version="1.0" encoding="UTF-8"?></pre>
|
<pre><?xml version="1.0" encoding="UTF-8"?></pre>
|
||||||
|
|
||||||
<p>...and are most often found in XML documents (including XHTML).</p>
|
<p>...and are most often found in XML documents (including XHTML).</p>
|
||||||
|
|
||||||
<p>For XHTML, this processing instruction theoretically
|
<p>For XHTML, this XML Declaration theoretically
|
||||||
overrides the <code>META</code> tag. In reality, this happens only when the
|
overrides the <code>META</code> tag. In reality, this happens only when the
|
||||||
XHTML is actually served as legit XML and not HTML, which is almost always
|
XHTML is actually served as legit XML and not HTML, which is almost always
|
||||||
never due to Internet Explorer's lack of support for
|
never due to Internet Explorer's lack of support for
|
||||||
<code>application/xhtml+xml</code> (even though doing so is often
|
<code>application/xhtml+xml</code> (even though doing so is often
|
||||||
argued to be <a href="http://www.hixie.ch/advocacy/xhtml">good practice</a>).</p>
|
argued to be <a href="http://www.hixie.ch/advocacy/xhtml">good
|
||||||
|
practice</a> and is required by the XHTML 1.1 specification).</p>
|
||||||
|
|
||||||
<p>For XML, however, this processing instruction is extremely important.
|
<p>For XML, however, this XML Declaration is extremely important.
|
||||||
Since most webservers are not configured to send charsets for .xml files,
|
Since most webservers are not configured to send charsets for .xml files,
|
||||||
this is the only thing a parser has to go on. Furthermore, the default
|
this is the only thing a parser has to go on. Furthermore, the default
|
||||||
for XML files is UTF-8, which often butts heads with more common
|
for XML files is UTF-8, which often butts heads with more common
|
||||||
ISO-8859-1 encoding (you see this in garbled RSS feeds).</p>
|
ISO-8859-1 encoding (you see this in garbled RSS feeds).</p>
|
||||||
|
|
||||||
<p>In short, if you use XHTML and have gone through the
|
<p>In short, if you use XHTML and have gone through the
|
||||||
trouble of adding the XML header, make sure it jives
|
trouble of adding the XML Declaration, make sure it jives
|
||||||
with your <code>META</code> tags and HTTP headers.</p>
|
with your <code>META</code> tags (which should only be present
|
||||||
|
if served in text/html) and HTTP headers.</p>
|
||||||
|
|
||||||
<h3 id="fixcharset-internals">Inside the process</h3>
|
<h3 id="fixcharset-internals">Inside the process</h3>
|
||||||
|
|
||||||
@@ -506,7 +516,7 @@ usage in one language sometimes requires the occasional special character
|
|||||||
that, without surprise, is not available in your character set. Sometimes
|
that, without surprise, is not available in your character set. Sometimes
|
||||||
developers get around this by adding support for multiple encodings: when
|
developers get around this by adding support for multiple encodings: when
|
||||||
using Chinese, use Big5, when using Japanese, use Shift-JIS, when
|
using Chinese, use Big5, when using Japanese, use Shift-JIS, when
|
||||||
using Greek, etc. Other times, they use character entities with great
|
using Greek, etc. Other times, they use character references with great
|
||||||
zeal.</p>
|
zeal.</p>
|
||||||
|
|
||||||
<p>UTF-8, however, obviates the need for any of these complicated
|
<p>UTF-8, however, obviates the need for any of these complicated
|
||||||
@@ -520,14 +530,14 @@ you don't have to use those user-unfriendly entities.</p>
|
|||||||
|
|
||||||
<p>Websites encoded in Latin-1 (ISO-8859-1) which ocassionally need
|
<p>Websites encoded in Latin-1 (ISO-8859-1) which ocassionally need
|
||||||
a special character outside of their scope often will use a character
|
a special character outside of their scope often will use a character
|
||||||
entity to achieve the desired effect. For instance, θ can be
|
entity reference to achieve the desired effect. For instance, θ can be
|
||||||
written <code>&theta;</code>, regardless of the character encoding's
|
written <code>&theta;</code>, regardless of the character encoding's
|
||||||
support of Greek letters.</p>
|
support of Greek letters.</p>
|
||||||
|
|
||||||
<p>This works nicely for limited use of special characters, but
|
<p>This works nicely for limited use of special characters, but
|
||||||
say you wanted this sentence of Chinese text: 激光,
|
say you wanted this sentence of Chinese text: 激光,
|
||||||
這兩個字是甚麼意思.
|
這兩個字是甚麼意思.
|
||||||
The entity-ized version would look like this:</p>
|
The ampersand encoded version would look like this:</p>
|
||||||
|
|
||||||
<pre>&#28608;&#20809;, &#36889;&#20841;&#20491;&#23383;&#26159;&#29978;&#40636;&#24847;&#24605;</pre>
|
<pre>&#28608;&#20809;, &#36889;&#20841;&#20491;&#23383;&#26159;&#29978;&#40636;&#24847;&#24605;</pre>
|
||||||
|
|
||||||
@@ -545,7 +555,7 @@ an application that originally used ISO-8859-1 but switched to UTF-8
|
|||||||
when it became far to cumbersome to support foreign languages. Bots
|
when it became far to cumbersome to support foreign languages. Bots
|
||||||
will now actually go through articles and convert character entities
|
will now actually go through articles and convert character entities
|
||||||
to their corresponding real characters for the sake of user-friendliness
|
to their corresponding real characters for the sake of user-friendliness
|
||||||
and searcheability. See
|
and searchability. See
|
||||||
<a href="http://meta.wikimedia.org/wiki/Help:Special_characters">Meta's
|
<a href="http://meta.wikimedia.org/wiki/Help:Special_characters">Meta's
|
||||||
page on special characters</a> for more details.
|
page on special characters</a> for more details.
|
||||||
</p></blockquote>
|
</p></blockquote>
|
||||||
@@ -593,7 +603,7 @@ browser you're using, they might:</p>
|
|||||||
<ul>
|
<ul>
|
||||||
<li>Replace the unsupported characters with useless question marks,</li>
|
<li>Replace the unsupported characters with useless question marks,</li>
|
||||||
<li>Attempt to fix the characters (example: smart quotes to regular quotes),</li>
|
<li>Attempt to fix the characters (example: smart quotes to regular quotes),</li>
|
||||||
<li>Replace the character with a character entity, or</li>
|
<li>Replace the character with a character entity reference, or</li>
|
||||||
<li>Send it anyway as a different character encoding mixed in
|
<li>Send it anyway as a different character encoding mixed in
|
||||||
with the original encoding (usually Windows-1252 rather than
|
with the original encoding (usually Windows-1252 rather than
|
||||||
iso-8859-1 or UTF-8 interspersed in 8-bit)</li>
|
iso-8859-1 or UTF-8 interspersed in 8-bit)</li>
|
||||||
@@ -609,7 +619,7 @@ since UTF-8 supports every character.</p>
|
|||||||
|
|
||||||
<h4 id="whyutf8-forms-multipart"><code>multipart/form-data</code></h4>
|
<h4 id="whyutf8-forms-multipart"><code>multipart/form-data</code></h4>
|
||||||
|
|
||||||
<p>Multipart form submission takes a way a lot of the ambiguity
|
<p>Multipart form submission takes away a lot of the ambiguity
|
||||||
that percent-encoding had: the server now can explicitly ask for
|
that percent-encoding had: the server now can explicitly ask for
|
||||||
certain encodings, and the client can explicitly tell the server
|
certain encodings, and the client can explicitly tell the server
|
||||||
during the form submission what encoding the fields are in.</p>
|
during the form submission what encoding the fields are in.</p>
|
||||||
@@ -622,9 +632,9 @@ Each method has deficiencies, especially the former.</p>
|
|||||||
<p>If you tell the browser to send the form in the same encoding as
|
<p>If you tell the browser to send the form in the same encoding as
|
||||||
the page, you still have the trouble of what to do with characters
|
the page, you still have the trouble of what to do with characters
|
||||||
that are outside of the character encoding's range. The behavior, once
|
that are outside of the character encoding's range. The behavior, once
|
||||||
again, varies: Firefox 2.0 entity-izes them while Internet Explorer
|
again, varies: Firefox 2.0 converts them to character entity references
|
||||||
7.0 mangles them beyond intelligibility. For serious internationalization purposes,
|
while Internet Explorer 7.0 mangles them beyond intelligibility. For
|
||||||
this is not an option.</p>
|
serious internationalization purposes, this is not an option.</p>
|
||||||
|
|
||||||
<p>The other possibility is to set Accept-Encoding to UTF-8, which
|
<p>The other possibility is to set Accept-Encoding to UTF-8, which
|
||||||
begs the question: Why aren't you using UTF-8 for everything then?
|
begs the question: Why aren't you using UTF-8 for everything then?
|
||||||
@@ -664,12 +674,12 @@ it up to the module iconv to do the dirty work.</p>
|
|||||||
<p>This approach, however, is not perfect. iconv is blithely unaware
|
<p>This approach, however, is not perfect. iconv is blithely unaware
|
||||||
of HTML character entities. HTML Purifier, in order to
|
of HTML character entities. HTML Purifier, in order to
|
||||||
protect against sophisticated escaping schemes, normalizes all character
|
protect against sophisticated escaping schemes, normalizes all character
|
||||||
and numeric entities before processing the text. This leads to
|
and numeric entitie references before processing the text. This leads to
|
||||||
one important ramification:</p>
|
one important ramification:</p>
|
||||||
|
|
||||||
<p><strong>Any character that is not supported by the target character
|
<p><strong>Any character that is not supported by the target character
|
||||||
set, regardless of whether or not it is in the form of a character
|
set, regardless of whether or not it is in the form of a character
|
||||||
entity or a raw character, will be silently ignored.</strong></p>
|
entity reference or a raw character, will be silently ignored.</strong></p>
|
||||||
|
|
||||||
<p>Example of this principle at work: say you have <code>&theta;</code>
|
<p>Example of this principle at work: say you have <code>&theta;</code>
|
||||||
in your HTML, but the output is in Latin-1 (which, understandably,
|
in your HTML, but the output is in Latin-1 (which, understandably,
|
||||||
@@ -678,7 +688,7 @@ set the encoding correctly using %Core.Encoding):</p>
|
|||||||
|
|
||||||
<ul>
|
<ul>
|
||||||
<li>The <code>Encoder</code> will transform the text from ISO 8859-1 to UTF-8
|
<li>The <code>Encoder</code> will transform the text from ISO 8859-1 to UTF-8
|
||||||
(note that theta is preserved since it doesn't actually use
|
(note that theta is preserved here since it doesn't actually use
|
||||||
any non-ASCII characters): <code>&theta;</code></li>
|
any non-ASCII characters): <code>&theta;</code></li>
|
||||||
<li>The <code>EntityParser</code> will transform all named and numeric
|
<li>The <code>EntityParser</code> will transform all named and numeric
|
||||||
character entities to their corresponding raw UTF-8 equivalents:
|
character entities to their corresponding raw UTF-8 equivalents:
|
||||||
@@ -701,7 +711,7 @@ Purifier has provided a slightly more palatable workaround using
|
|||||||
<li>The <code>EntityParser</code> transforms entities: <code>θ</code></li>
|
<li>The <code>EntityParser</code> transforms entities: <code>θ</code></li>
|
||||||
<li>HTML Purifier processes the code: <code>θ</code></li>
|
<li>HTML Purifier processes the code: <code>θ</code></li>
|
||||||
<li>The <code>Encoder</code> replaces all non-ASCII characters
|
<li>The <code>Encoder</code> replaces all non-ASCII characters
|
||||||
with numeric entities: <code>&#952;</code></li>
|
with numeric entity reference: <code>&#952;</code></li>
|
||||||
<li>For good measure, <code>Encoder</code> transforms encoding back to
|
<li>For good measure, <code>Encoder</code> transforms encoding back to
|
||||||
original (which is strictly unnecessary for 99% of encodings
|
original (which is strictly unnecessary for 99% of encodings
|
||||||
out there): <code>&#952;</code> (remember, it's all ASCII!)</li>
|
out there): <code>&#952;</code> (remember, it's all ASCII!)</li>
|
||||||
@@ -711,19 +721,19 @@ Purifier has provided a slightly more palatable workaround using
|
|||||||
the land of Unicode characters, and is totally unacceptable for Chinese
|
the land of Unicode characters, and is totally unacceptable for Chinese
|
||||||
or Japanese texts. The even bigger kicker is that, supposing the
|
or Japanese texts. The even bigger kicker is that, supposing the
|
||||||
input encoding was actually ISO-8859-7, which <em>does</em> support
|
input encoding was actually ISO-8859-7, which <em>does</em> support
|
||||||
theta, the character would get entity-ized anyway! (The Encoder does
|
theta, the character would get converted into a character entity reference
|
||||||
not discriminate).</p>
|
anyway! (The Encoder does not discriminate).</p>
|
||||||
|
|
||||||
<p>The current functionality is about where HTML Purifier will be for
|
<p>The current functionality is about where HTML Purifier will be for
|
||||||
the rest of eternity. HTML Purifier could attempt to preserve the original
|
the rest of eternity. HTML Purifier could attempt to preserve the original
|
||||||
form of the entities so that they could be substituted back in, only the
|
form of the character references so that they could be substituted back in, only the
|
||||||
DOM extension kills them off irreversibly. HTML Purifier could also attempt
|
DOM extension kills them off irreversibly. HTML Purifier could also attempt
|
||||||
to be smart and only convert non-ASCII characters that weren't supported
|
to be smart and only convert non-ASCII characters that weren't supported
|
||||||
by the target encoding, but that would require reimplementing iconv
|
by the target encoding, but that would require reimplementing iconv
|
||||||
with HTML awareness, something I will not do.</p>
|
with HTML awareness, something I will not do.</p>
|
||||||
|
|
||||||
<p>So there: either it's UTF-8 or crippled international support. Your pick! (and I'm
|
<p>So there: either it's UTF-8 or crippled international support. Your pick! (and I'm
|
||||||
not being sarcastic here: some people could care less about other languages)</p>
|
not being sarcastic here: some people could care less about other languages).</p>
|
||||||
|
|
||||||
<h2 id="migrate">Migrate to UTF-8</h2>
|
<h2 id="migrate">Migrate to UTF-8</h2>
|
||||||
|
|
||||||
@@ -985,7 +995,7 @@ and yes, it is variable width. Other traits:</p>
|
|||||||
in different ways. It is beyond the scope of this document to explain
|
in different ways. It is beyond the scope of this document to explain
|
||||||
what precisely these implications are. PHPWact provides
|
what precisely these implications are. PHPWact provides
|
||||||
a very good <a href="http://www.phpwact.org/php/i18n/utf-8">reference document</a>
|
a very good <a href="http://www.phpwact.org/php/i18n/utf-8">reference document</a>
|
||||||
on what to expect from each functions, although coverage is spotty in
|
on what to expect from each function, although coverage is spotty in
|
||||||
some areas. Their more general notes on
|
some areas. Their more general notes on
|
||||||
<a href="http://www.phpwact.org/php/i18n/charsets">character sets</a>
|
<a href="http://www.phpwact.org/php/i18n/charsets">character sets</a>
|
||||||
are also worth looking at for information on UTF-8. Some rules of thumb
|
are also worth looking at for information on UTF-8. Some rules of thumb
|
||||||
@@ -999,7 +1009,7 @@ when dealing with Unicode text:</p>
|
|||||||
<li>Think twice before using functions that:<ul>
|
<li>Think twice before using functions that:<ul>
|
||||||
<li>...count characters (strlen will return bytes, not characters;
|
<li>...count characters (strlen will return bytes, not characters;
|
||||||
str_split and word_wrap may corrupt)</li>
|
str_split and word_wrap may corrupt)</li>
|
||||||
<li>...entity-ize things (UTF-8 doesn't need entities)</li>
|
<li>...convert characters to entity references (UTF-8 doesn't need entities)</li>
|
||||||
<li>...do very complex string processing (*printf)</li>
|
<li>...do very complex string processing (*printf)</li>
|
||||||
</ul></li>
|
</ul></li>
|
||||||
</ul>
|
</ul>
|
||||||
|
@@ -22,8 +22,8 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
HTML Purifier 2.1.1 - Standards Compliant HTML Filtering
|
HTML Purifier 2.1.5 - Standards Compliant HTML Filtering
|
||||||
Copyright (C) 2006 Edward Z. Yang
|
Copyright (C) 2006-2007 Edward Z. Yang
|
||||||
|
|
||||||
This library is free software; you can redistribute it and/or
|
This library is free software; you can redistribute it and/or
|
||||||
modify it under the terms of the GNU Lesser General Public
|
modify it under the terms of the GNU Lesser General Public
|
||||||
@@ -43,9 +43,8 @@
|
|||||||
// constants are slow, but we'll make one exception
|
// constants are slow, but we'll make one exception
|
||||||
define('HTMLPURIFIER_PREFIX', dirname(__FILE__));
|
define('HTMLPURIFIER_PREFIX', dirname(__FILE__));
|
||||||
|
|
||||||
// almost every class has an undocumented dependency to these, so make sure
|
// every class has an undocumented dependency to these, must be included!
|
||||||
// they get included
|
require_once 'HTMLPurifier/ConfigSchema.php'; // fatal errors if not included
|
||||||
require_once 'HTMLPurifier/ConfigSchema.php'; // important
|
|
||||||
require_once 'HTMLPurifier/Config.php';
|
require_once 'HTMLPurifier/Config.php';
|
||||||
require_once 'HTMLPurifier/Context.php';
|
require_once 'HTMLPurifier/Context.php';
|
||||||
|
|
||||||
@@ -60,16 +59,23 @@ require_once 'HTMLPurifier/LanguageFactory.php';
|
|||||||
HTMLPurifier_ConfigSchema::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'CollectErrors', false, 'bool', '
|
'Core', 'CollectErrors', false, 'bool', '
|
||||||
Whether or not to collect errors found while filtering the document. This
|
Whether or not to collect errors found while filtering the document. This
|
||||||
is a useful way to give feedback to your users. CURRENTLY NOT IMPLEMENTED.
|
is a useful way to give feedback to your users. <strong>Warning:</strong>
|
||||||
This directive has been available since 2.0.0.
|
Currently this feature is very patchy and experimental, with lots of
|
||||||
|
possible error messages not yet implemented. It will not cause any problems,
|
||||||
|
but it may not help your users either. This directive has been available
|
||||||
|
since 2.0.0.
|
||||||
');
|
');
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Main library execution class.
|
* Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
|
||||||
*
|
*
|
||||||
* Facade that performs calls to the HTMLPurifier_Lexer,
|
* @note There are several points in which configuration can be specified
|
||||||
* HTMLPurifier_Strategy and HTMLPurifier_Generator subsystems in order to
|
* for HTML Purifier. The precedence of these (from lowest to
|
||||||
* purify HTML.
|
* highest) is as follows:
|
||||||
|
* -# Instance: new HTMLPurifier($config)
|
||||||
|
* -# Invocation: purify($html, $config)
|
||||||
|
* These configurations are entirely independent of each other and
|
||||||
|
* are *not* merged.
|
||||||
*
|
*
|
||||||
* @todo We need an easier way to inject strategies, it'll probably end
|
* @todo We need an easier way to inject strategies, it'll probably end
|
||||||
* up getting done through config though.
|
* up getting done through config though.
|
||||||
@@ -77,15 +83,16 @@ This directive has been available since 2.0.0.
|
|||||||
class HTMLPurifier
|
class HTMLPurifier
|
||||||
{
|
{
|
||||||
|
|
||||||
var $version = '2.1.1';
|
var $version = '2.1.5';
|
||||||
|
|
||||||
var $config;
|
var $config;
|
||||||
var $filters;
|
var $filters = array();
|
||||||
|
|
||||||
var $strategy, $generator;
|
var $strategy, $generator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Final HTMLPurifier_Context of last run purification. Might be an array.
|
* Resultant HTMLPurifier_Context of last run purification. Is an array
|
||||||
|
* of contexts if the last called method was purifyArray().
|
||||||
* @public
|
* @public
|
||||||
*/
|
*/
|
||||||
var $context;
|
var $context;
|
||||||
@@ -150,6 +157,11 @@ class HTMLPurifier
|
|||||||
$context->register('ErrorCollector', $error_collector);
|
$context->register('ErrorCollector', $error_collector);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// setup id_accumulator context, necessary due to the fact that
|
||||||
|
// AttrValidator can be called from many places
|
||||||
|
$id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
|
||||||
|
$context->register('IDAccumulator', $id_accumulator);
|
||||||
|
|
||||||
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
|
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
|
||||||
|
|
||||||
for ($i = 0, $size = count($this->filters); $i < $size; $i++) {
|
for ($i = 0, $size = count($this->filters); $i < $size; $i++) {
|
||||||
@@ -198,8 +210,10 @@ class HTMLPurifier
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Singleton for enforcing just one HTML Purifier in your system
|
* Singleton for enforcing just one HTML Purifier in your system
|
||||||
|
* @param $prototype Optional prototype HTMLPurifier instance to
|
||||||
|
* overload singleton with.
|
||||||
*/
|
*/
|
||||||
function &getInstance($prototype = null) {
|
function &instance($prototype = null) {
|
||||||
static $htmlpurifier;
|
static $htmlpurifier;
|
||||||
if (!$htmlpurifier || $prototype) {
|
if (!$htmlpurifier || $prototype) {
|
||||||
if (is_a($prototype, 'HTMLPurifier')) {
|
if (is_a($prototype, 'HTMLPurifier')) {
|
||||||
@@ -213,6 +227,9 @@ class HTMLPurifier
|
|||||||
return $htmlpurifier;
|
return $htmlpurifier;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function &getInstance($prototype = null) {
|
||||||
|
return HTMLPurifier::instance($prototype);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -54,18 +54,15 @@ class HTMLPurifier_AttrDef
|
|||||||
*
|
*
|
||||||
* @warning This processing is inconsistent with XML's whitespace handling
|
* @warning This processing is inconsistent with XML's whitespace handling
|
||||||
* as specified by section 3.3.3 and referenced XHTML 1.0 section
|
* as specified by section 3.3.3 and referenced XHTML 1.0 section
|
||||||
* 4.7. Compliant processing requires all line breaks normalized
|
* 4.7. However, note that we are NOT necessarily
|
||||||
* to "\n", so the fix is not as simple as fixing it in this
|
* parsing XML, thus, this behavior may still be correct. We
|
||||||
* function. Trim and whitespace collapsing are supposed to only
|
* assume that newlines have been normalized.
|
||||||
* occur in NMTOKENs. However, note that we are NOT necessarily
|
|
||||||
* parsing XML, thus, this behavior may still be correct.
|
|
||||||
*
|
*
|
||||||
* @public
|
* @public
|
||||||
*/
|
*/
|
||||||
function parseCDATA($string) {
|
function parseCDATA($string) {
|
||||||
$string = trim($string);
|
$string = trim($string);
|
||||||
$string = str_replace("\n", '', $string);
|
$string = str_replace(array("\n", "\t", "\r"), ' ', $string);
|
||||||
$string = str_replace(array("\r", "\t"), ' ', $string);
|
|
||||||
return $string;
|
return $string;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -82,5 +79,13 @@ class HTMLPurifier_AttrDef
|
|||||||
return $this;
|
return $this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
|
||||||
|
* properly. THIS IS A HACK!
|
||||||
|
*/
|
||||||
|
function mungeRgb($string) {
|
||||||
|
return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -38,7 +38,20 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
|
|||||||
list($property, $value) = explode(':', $declaration, 2);
|
list($property, $value) = explode(':', $declaration, 2);
|
||||||
$property = trim($property);
|
$property = trim($property);
|
||||||
$value = trim($value);
|
$value = trim($value);
|
||||||
if (!isset($definition->info[$property])) continue;
|
$ok = false;
|
||||||
|
do {
|
||||||
|
if (isset($definition->info[$property])) {
|
||||||
|
$ok = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (ctype_lower($property)) break;
|
||||||
|
$property = strtolower($property);
|
||||||
|
if (isset($definition->info[$property])) {
|
||||||
|
$ok = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} while(0);
|
||||||
|
if (!$ok) continue;
|
||||||
// inefficient call, since the validator will do this again
|
// inefficient call, since the validator will do this again
|
||||||
if (strtolower(trim($value)) !== 'inherit') {
|
if (strtolower(trim($value)) !== 'inherit') {
|
||||||
// inherit works for everything (but only on the base property)
|
// inherit works for everything (but only on the base property)
|
||||||
|
@@ -31,6 +31,9 @@ class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
|
|||||||
$string = $this->parseCDATA($string);
|
$string = $this->parseCDATA($string);
|
||||||
if ($string === '') return false;
|
if ($string === '') return false;
|
||||||
|
|
||||||
|
// munge rgb() decl if necessary
|
||||||
|
$string = $this->mungeRgb($string);
|
||||||
|
|
||||||
// assumes URI doesn't have spaces in it
|
// assumes URI doesn't have spaces in it
|
||||||
$bits = explode(' ', strtolower($string)); // bits to process
|
$bits = explode(' ', strtolower($string)); // bits to process
|
||||||
|
|
||||||
|
@@ -22,7 +22,7 @@ class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
function validate($string, $config, &$context) {
|
function validate($string, $config, &$context) {
|
||||||
$string = $this->parseCDATA($string);
|
$string = $this->parseCDATA($string);
|
||||||
// we specifically will not support rgb() syntax with spaces
|
$string = $this->mungeRgb($string);
|
||||||
$bits = explode(' ', $string);
|
$bits = explode(' ', $string);
|
||||||
$done = array(); // segments we've finished
|
$done = array(); // segments we've finished
|
||||||
$ret = ''; // return value
|
$ret = ''; // return value
|
||||||
|
@@ -39,20 +39,13 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
|
|||||||
if ($colors === null) $colors = $config->get('Core', 'ColorKeywords');
|
if ($colors === null) $colors = $config->get('Core', 'ColorKeywords');
|
||||||
|
|
||||||
$color = trim($color);
|
$color = trim($color);
|
||||||
if (!$color) return false;
|
if ($color === '') return false;
|
||||||
|
|
||||||
$lower = strtolower($color);
|
$lower = strtolower($color);
|
||||||
if (isset($colors[$lower])) return $colors[$lower];
|
if (isset($colors[$lower])) return $colors[$lower];
|
||||||
|
|
||||||
if ($color[0] === '#') {
|
if (strpos($color, 'rgb(') !== false) {
|
||||||
// hexadecimal handling
|
|
||||||
$hex = substr($color, 1);
|
|
||||||
$length = strlen($hex);
|
|
||||||
if ($length !== 3 && $length !== 6) return false;
|
|
||||||
if (!ctype_xdigit($hex)) return false;
|
|
||||||
} else {
|
|
||||||
// rgb literal handling
|
// rgb literal handling
|
||||||
if (strpos($color, 'rgb(')) return false;
|
|
||||||
$length = strlen($color);
|
$length = strlen($color);
|
||||||
if (strpos($color, ')') !== $length - 1) return false;
|
if (strpos($color, ')') !== $length - 1) return false;
|
||||||
$triad = substr($color, 4, $length - 4 - 1);
|
$triad = substr($color, 4, $length - 4 - 1);
|
||||||
@@ -90,6 +83,17 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
|
|||||||
}
|
}
|
||||||
$new_triad = implode(',', $new_parts);
|
$new_triad = implode(',', $new_parts);
|
||||||
$color = "rgb($new_triad)";
|
$color = "rgb($new_triad)";
|
||||||
|
} else {
|
||||||
|
// hexadecimal handling
|
||||||
|
if ($color[0] === '#') {
|
||||||
|
$hex = substr($color, 1);
|
||||||
|
} else {
|
||||||
|
$hex = $color;
|
||||||
|
$color = '#' . $color;
|
||||||
|
}
|
||||||
|
$length = strlen($hex);
|
||||||
|
if ($length !== 3 && $length !== 6) return false;
|
||||||
|
if (!ctype_xdigit($hex)) return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return $color;
|
return $color;
|
||||||
|
26
library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php
Normal file
26
library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decorator which enables CSS properties to be disabled for specific elements.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef
|
||||||
|
{
|
||||||
|
var $def, $element;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param $def Definition to wrap
|
||||||
|
* @param $element Element to deny
|
||||||
|
*/
|
||||||
|
function HTMLPurifier_AttrDef_CSS_DenyElementDecorator(&$def, $element) {
|
||||||
|
$this->def =& $def;
|
||||||
|
$this->element = $element;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Checks if CurrentToken is set and equal to $this->element
|
||||||
|
*/
|
||||||
|
function validate($string, $config, $context) {
|
||||||
|
$token = $context->get('CurrentToken', true);
|
||||||
|
if ($token && $token->name == $this->element) return false;
|
||||||
|
return $this->def->validate($string, $config, $context);
|
||||||
|
}
|
||||||
|
}
|
@@ -19,7 +19,6 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
|||||||
'cursive' => true
|
'cursive' => true
|
||||||
);
|
);
|
||||||
|
|
||||||
$string = $this->parseCDATA($string);
|
|
||||||
// assume that no font names contain commas in them
|
// assume that no font names contain commas in them
|
||||||
$fonts = explode(',', $string);
|
$fonts = explode(',', $string);
|
||||||
$final = '';
|
$final = '';
|
||||||
@@ -38,13 +37,40 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
|||||||
$quote = $font[0];
|
$quote = $font[0];
|
||||||
if ($font[$length - 1] !== $quote) continue;
|
if ($font[$length - 1] !== $quote) continue;
|
||||||
$font = substr($font, 1, $length - 2);
|
$font = substr($font, 1, $length - 2);
|
||||||
// double-backslash processing is buggy
|
|
||||||
$font = str_replace("\\$quote", $quote, $font); // de-escape quote
|
$new_font = '';
|
||||||
$font = str_replace("\\\n", "\n", $font); // de-escape newlines
|
for ($i = 0, $c = strlen($font); $i < $c; $i++) {
|
||||||
|
if ($font[$i] === '\\') {
|
||||||
|
$i++;
|
||||||
|
if ($i >= $c) {
|
||||||
|
$new_font .= '\\';
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (ctype_xdigit($font[$i])) {
|
||||||
|
$code = $font[$i];
|
||||||
|
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
|
||||||
|
if (!ctype_xdigit($font[$i])) break;
|
||||||
|
$code .= $font[$i];
|
||||||
|
}
|
||||||
|
// We have to be extremely careful when adding
|
||||||
|
// new characters, to make sure we're not breaking
|
||||||
|
// the encoding.
|
||||||
|
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
|
||||||
|
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
|
||||||
|
$new_font .= $char;
|
||||||
|
if ($i < $c && trim($font[$i]) !== '') $i--;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if ($font[$i] === "\n") continue;
|
||||||
|
}
|
||||||
|
$new_font .= $font[$i];
|
||||||
|
}
|
||||||
|
|
||||||
|
$font = $new_font;
|
||||||
}
|
}
|
||||||
// $font is a pure representation of the font name
|
// $font is a pure representation of the font name
|
||||||
|
|
||||||
if (ctype_alnum($font)) {
|
if (ctype_alnum($font) && $font !== '') {
|
||||||
// very simple font, allow it in unharmed
|
// very simple font, allow it in unharmed
|
||||||
$final .= $font . ', ';
|
$final .= $font . ', ';
|
||||||
continue;
|
continue;
|
||||||
@@ -53,8 +79,8 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
|||||||
// complicated font, requires quoting
|
// complicated font, requires quoting
|
||||||
|
|
||||||
// armor single quotes and new lines
|
// armor single quotes and new lines
|
||||||
|
$font = str_replace("\\", "\\\\", $font);
|
||||||
$font = str_replace("'", "\\'", $font);
|
$font = str_replace("'", "\\'", $font);
|
||||||
$font = str_replace("\n", "\\\n", $font);
|
|
||||||
$final .= "'$font', ";
|
$final .= "'$font', ";
|
||||||
}
|
}
|
||||||
$final = rtrim($final, ', ');
|
$final = rtrim($final, ', ');
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDef.php';
|
require_once 'HTMLPurifier/Length.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/CSS/Number.php';
|
require_once 'HTMLPurifier/UnitConverter.php';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents a Length as defined by CSS.
|
* Represents a Length as defined by CSS.
|
||||||
@@ -9,46 +9,40 @@ require_once 'HTMLPurifier/AttrDef/CSS/Number.php';
|
|||||||
class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
var $min, $max;
|
||||||
* Valid unit lookup table.
|
|
||||||
* @warning The code assumes all units are two characters long. Be careful
|
|
||||||
* if we have to change this behavior!
|
|
||||||
*/
|
|
||||||
var $units = array('em' => true, 'ex' => true, 'px' => true, 'in' => true,
|
|
||||||
'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true);
|
|
||||||
/**
|
|
||||||
* Instance of HTMLPurifier_AttrDef_Number to defer number validation to
|
|
||||||
*/
|
|
||||||
var $number_def;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param $non_negative Bool indication whether or not negative values are
|
* @param HTMLPurifier_Length $max Minimum length, or null for no bound. String is also acceptable.
|
||||||
* allowed.
|
* @param HTMLPurifier_Length $max Maximum length, or null for no bound. String is also acceptable.
|
||||||
*/
|
*/
|
||||||
function HTMLPurifier_AttrDef_CSS_Length($non_negative = false) {
|
function HTMLPurifier_AttrDef_CSS_Length($min = null, $max = null) {
|
||||||
$this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
|
$this->min = $min !== null ? HTMLPurifier_Length::make($min) : null;
|
||||||
|
$this->max = $max !== null ? HTMLPurifier_Length::make($max) : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function validate($length, $config, &$context) {
|
function validate($string, $config, $context) {
|
||||||
|
$string = $this->parseCDATA($string);
|
||||||
|
|
||||||
$length = $this->parseCDATA($length);
|
// Optimizations
|
||||||
if ($length === '') return false;
|
if ($string === '') return false;
|
||||||
if ($length === '0') return '0';
|
if ($string === '0') return '0';
|
||||||
$strlen = strlen($length);
|
if (strlen($string) === 1) return false;
|
||||||
if ($strlen === 1) return false; // impossible!
|
|
||||||
|
|
||||||
// we assume all units are two characters
|
$length = HTMLPurifier_Length::make($string);
|
||||||
$unit = substr($length, $strlen - 2);
|
if (!$length->isValid()) return false;
|
||||||
if (!ctype_lower($unit)) $unit = strtolower($unit);
|
|
||||||
$number = substr($length, 0, $strlen - 2);
|
|
||||||
|
|
||||||
if (!isset($this->units[$unit])) return false;
|
if ($this->min) {
|
||||||
|
$c = $length->compareTo($this->min);
|
||||||
$number = $this->number_def->validate($number, $config, $context);
|
if ($c === false) return false;
|
||||||
if ($number === false) return false;
|
if ($c < 0) return false;
|
||||||
|
}
|
||||||
return $number . $unit;
|
if ($this->max) {
|
||||||
|
$c = $length->compareTo($this->max);
|
||||||
|
if ($c === false) return false;
|
||||||
|
if ($c > 0) return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $length->toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@@ -18,6 +18,11 @@ class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
|
|||||||
$this->non_negative = $non_negative;
|
$this->non_negative = $non_negative;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @warning Some contexts do not pass $config, $context. These
|
||||||
|
* variables should not be used without checking HTMLPurifier_Length.
|
||||||
|
* This might not work properly in PHP4.
|
||||||
|
*/
|
||||||
function validate($number, $config, &$context) {
|
function validate($number, $config, &$context) {
|
||||||
|
|
||||||
$number = $this->parseCDATA($number);
|
$number = $this->parseCDATA($number);
|
||||||
|
@@ -15,10 +15,13 @@ class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
|
|||||||
static $allowed_values = array(
|
static $allowed_values = array(
|
||||||
'line-through' => true,
|
'line-through' => true,
|
||||||
'overline' => true,
|
'overline' => true,
|
||||||
'underline' => true
|
'underline' => true,
|
||||||
);
|
);
|
||||||
|
|
||||||
$string = strtolower($this->parseCDATA($string));
|
$string = strtolower($this->parseCDATA($string));
|
||||||
|
|
||||||
|
if ($string === 'none') return $string;
|
||||||
|
|
||||||
$parts = explode(' ', $string);
|
$parts = explode(' ', $string);
|
||||||
$final = '';
|
$final = '';
|
||||||
foreach ($parts as $part) {
|
foreach ($parts as $part) {
|
||||||
|
@@ -8,6 +8,12 @@ require_once 'HTMLPurifier/AttrDef.php';
|
|||||||
class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
|
var $max;
|
||||||
|
|
||||||
|
function HTMLPurifier_AttrDef_HTML_Pixels($max = null) {
|
||||||
|
$this->max = $max;
|
||||||
|
}
|
||||||
|
|
||||||
function validate($string, $config, &$context) {
|
function validate($string, $config, &$context) {
|
||||||
|
|
||||||
$string = trim($string);
|
$string = trim($string);
|
||||||
@@ -26,11 +32,18 @@ class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
|
|||||||
// crash operating systems, see <http://ha.ckers.org/imagecrash.html>
|
// crash operating systems, see <http://ha.ckers.org/imagecrash.html>
|
||||||
// WARNING, above link WILL crash you if you're using Windows
|
// WARNING, above link WILL crash you if you're using Windows
|
||||||
|
|
||||||
if ($int > 1200) return '1200';
|
if ($this->max !== null && $int > $this->max) return (string) $this->max;
|
||||||
|
|
||||||
return (string) $int;
|
return (string) $int;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function make($string) {
|
||||||
|
if ($string === '') $max = null;
|
||||||
|
else $max = (int) $string;
|
||||||
|
$class = get_class($this);
|
||||||
|
return new $class($max);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
32
library/HTMLPurifier/AttrDef/Switch.php
Normal file
32
library/HTMLPurifier/AttrDef/Switch.php
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decorator that, depending on a token, switches between two definitions.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_AttrDef_Switch
|
||||||
|
{
|
||||||
|
|
||||||
|
var $tag;
|
||||||
|
var $withTag, $withoutTag;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string $tag Tag name to switch upon
|
||||||
|
* @param HTMLPurifier_AttrDef $with_tag Call if token matches tag
|
||||||
|
* @param HTMLPurifier_AttrDef $without_tag Call if token doesn't match, or there is no token
|
||||||
|
*/
|
||||||
|
function HTMLPurifier_AttrDef_Switch($tag, $with_tag, $without_tag) {
|
||||||
|
$this->tag = $tag;
|
||||||
|
$this->withTag = $with_tag;
|
||||||
|
$this->withoutTag = $without_tag;
|
||||||
|
}
|
||||||
|
|
||||||
|
function validate($string, $config, $context) {
|
||||||
|
$token = $context->get('CurrentToken', true);
|
||||||
|
if (!$token || $token->name !== $this->tag) {
|
||||||
|
return $this->withoutTag->validate($string, $config, $context);
|
||||||
|
} else {
|
||||||
|
return $this->withTag->validate($string, $config, $context);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@@ -6,6 +6,7 @@ require_once 'HTMLPurifier/URIScheme.php';
|
|||||||
require_once 'HTMLPurifier/URISchemeRegistry.php';
|
require_once 'HTMLPurifier/URISchemeRegistry.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/URI/Host.php';
|
require_once 'HTMLPurifier/AttrDef/URI/Host.php';
|
||||||
require_once 'HTMLPurifier/PercentEncoder.php';
|
require_once 'HTMLPurifier/PercentEncoder.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/URI/Email.php';
|
||||||
|
|
||||||
// special case filtering directives
|
// special case filtering directives
|
||||||
|
|
||||||
@@ -67,7 +68,7 @@ HTMLPurifier_ConfigSchema::define(
|
|||||||
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
var $parser, $percentEncoder;
|
var $parser;
|
||||||
var $embedsResource;
|
var $embedsResource;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -75,7 +76,6 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
|||||||
*/
|
*/
|
||||||
function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
|
function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
|
||||||
$this->parser = new HTMLPurifier_URIParser();
|
$this->parser = new HTMLPurifier_URIParser();
|
||||||
$this->percentEncoder = new HTMLPurifier_PercentEncoder();
|
|
||||||
$this->embedsResource = (bool) $embeds_resource;
|
$this->embedsResource = (bool) $embeds_resource;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -83,9 +83,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
if ($config->get('URI', 'Disable')) return false;
|
if ($config->get('URI', 'Disable')) return false;
|
||||||
|
|
||||||
// initial operations
|
|
||||||
$uri = $this->parseCDATA($uri);
|
$uri = $this->parseCDATA($uri);
|
||||||
$uri = $this->percentEncoder->normalize($uri);
|
|
||||||
|
|
||||||
// parse the URI
|
// parse the URI
|
||||||
$uri = $this->parser->parse($uri);
|
$uri = $this->parser->parse($uri);
|
||||||
@@ -101,7 +99,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
|||||||
$result = $uri->validate($config, $context);
|
$result = $uri->validate($config, $context);
|
||||||
if (!$result) break;
|
if (!$result) break;
|
||||||
|
|
||||||
// chained validation
|
// chained filtering
|
||||||
$uri_def =& $config->getDefinition('URI');
|
$uri_def =& $config->getDefinition('URI');
|
||||||
$result = $uri_def->filter($uri, $config, $context);
|
$result = $uri_def->filter($uri, $config, $context);
|
||||||
if (!$result) break;
|
if (!$result) break;
|
||||||
@@ -121,13 +119,6 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
|||||||
$context->destroy('EmbeddedURI');
|
$context->destroy('EmbeddedURI');
|
||||||
if (!$ok) return false;
|
if (!$ok) return false;
|
||||||
|
|
||||||
// munge scheme off if necessary (this must be last)
|
|
||||||
if (!is_null($uri->scheme) && is_null($uri->host)) {
|
|
||||||
if ($uri_def->defaultScheme == $uri->scheme) {
|
|
||||||
$uri->scheme = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// back to string
|
// back to string
|
||||||
$result = $uri->toString();
|
$result = $uri->toString();
|
||||||
|
|
||||||
|
@@ -14,3 +14,5 @@ class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// sub-implementations
|
||||||
|
require_once 'HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
|
||||||
|
@@ -40,11 +40,23 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
|
|||||||
$ipv4 = $this->ipv4->validate($string, $config, $context);
|
$ipv4 = $this->ipv4->validate($string, $config, $context);
|
||||||
if ($ipv4 !== false) return $ipv4;
|
if ($ipv4 !== false) return $ipv4;
|
||||||
|
|
||||||
// validate a domain name here, do filtering, etc etc etc
|
// A regular domain name.
|
||||||
|
|
||||||
// We could use this, but it would break I18N domain names
|
// This breaks I18N domain names, but we don't have proper IRI support,
|
||||||
//$match = preg_match('/^[a-z0-9][\w\-\.]*[a-z0-9]$/i', $string);
|
// so force users to insert Punycode. If there's complaining we'll
|
||||||
//if (!$match) return false;
|
// try to fix things into an international friendly form.
|
||||||
|
|
||||||
|
// The productions describing this are:
|
||||||
|
$a = '[a-z]'; // alpha
|
||||||
|
$an = '[a-z0-9]'; // alphanum
|
||||||
|
$and = '[a-z0-9-]'; // alphanum | "-"
|
||||||
|
// domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
|
||||||
|
$domainlabel = "$an($and*$an)?";
|
||||||
|
// toplabel = alpha | alpha *( alphanum | "-" ) alphanum
|
||||||
|
$toplabel = "$a($and*$an)?";
|
||||||
|
// hostname = *( domainlabel "." ) toplabel [ "." ]
|
||||||
|
$match = preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string);
|
||||||
|
if (!$match) return false;
|
||||||
|
|
||||||
return $string;
|
return $string;
|
||||||
}
|
}
|
||||||
|
@@ -44,6 +44,9 @@ class HTMLPurifier_AttrTypes
|
|||||||
$this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
|
$this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
|
||||||
$this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color();
|
$this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color();
|
||||||
|
|
||||||
|
// unimplemented aliases
|
||||||
|
$this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
|
||||||
|
|
||||||
// number is really a positive integer (one or more digits)
|
// number is really a positive integer (one or more digits)
|
||||||
// FIXME: ^^ not always, see start and value of list items
|
// FIXME: ^^ not always, see start and value of list items
|
||||||
$this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);
|
$this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);
|
||||||
|
@@ -23,6 +23,13 @@ class HTMLPurifier_AttrValidator
|
|||||||
$definition = $config->getHTMLDefinition();
|
$definition = $config->getHTMLDefinition();
|
||||||
$e =& $context->get('ErrorCollector', true);
|
$e =& $context->get('ErrorCollector', true);
|
||||||
|
|
||||||
|
// initialize IDAccumulator if necessary
|
||||||
|
$ok =& $context->get('IDAccumulator', true);
|
||||||
|
if (!$ok) {
|
||||||
|
$id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
|
||||||
|
$context->register('IDAccumulator', $id_accumulator);
|
||||||
|
}
|
||||||
|
|
||||||
// initialize CurrentToken if necessary
|
// initialize CurrentToken if necessary
|
||||||
$current_token =& $context->get('CurrentToken', true);
|
$current_token =& $context->get('CurrentToken', true);
|
||||||
if (!$current_token) $context->register('CurrentToken', $token);
|
if (!$current_token) $context->register('CurrentToken', $token);
|
||||||
@@ -33,8 +40,8 @@ class HTMLPurifier_AttrValidator
|
|||||||
// DEFINITION CALL
|
// DEFINITION CALL
|
||||||
$d_defs = $definition->info_global_attr;
|
$d_defs = $definition->info_global_attr;
|
||||||
|
|
||||||
// reference attributes for easy manipulation
|
// don't update token until the very end, to ensure an atomic update
|
||||||
$attr =& $token->attr;
|
$attr = $token->attr;
|
||||||
|
|
||||||
// do global transformations (pre)
|
// do global transformations (pre)
|
||||||
// nothing currently utilizes this
|
// nothing currently utilizes this
|
||||||
@@ -129,6 +136,8 @@ class HTMLPurifier_AttrValidator
|
|||||||
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
|
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$token->attr = $attr;
|
||||||
|
|
||||||
// destroy CurrentToken if we made it ourselves
|
// destroy CurrentToken if we made it ourselves
|
||||||
if (!$current_token) $context->destroy('CurrentToken');
|
if (!$current_token) $context->destroy('CurrentToken');
|
||||||
|
|
||||||
|
@@ -7,6 +7,7 @@ require_once 'HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';
|
|||||||
require_once 'HTMLPurifier/AttrDef/CSS/Border.php';
|
require_once 'HTMLPurifier/AttrDef/CSS/Border.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/CSS/Color.php';
|
require_once 'HTMLPurifier/AttrDef/CSS/Color.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/CSS/Composite.php';
|
require_once 'HTMLPurifier/AttrDef/CSS/Composite.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/CSS/Font.php';
|
require_once 'HTMLPurifier/AttrDef/CSS/Font.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
|
require_once 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/CSS/Length.php';
|
require_once 'HTMLPurifier/AttrDef/CSS/Length.php';
|
||||||
@@ -16,6 +17,7 @@ require_once 'HTMLPurifier/AttrDef/CSS/Percentage.php';
|
|||||||
require_once 'HTMLPurifier/AttrDef/CSS/TextDecoration.php';
|
require_once 'HTMLPurifier/AttrDef/CSS/TextDecoration.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/CSS/URI.php';
|
require_once 'HTMLPurifier/AttrDef/CSS/URI.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/Enum.php';
|
require_once 'HTMLPurifier/AttrDef/Enum.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/Switch.php';
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'CSS', 'DefinitionRev', 1, 'int', '
|
'CSS', 'DefinitionRev', 1, 'int', '
|
||||||
@@ -26,6 +28,20 @@ HTMLPurifier_ConfigSchema::define(
|
|||||||
</p>
|
</p>
|
||||||
');
|
');
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'CSS', 'MaxImgLength', '1200px', 'string/null', '
|
||||||
|
<p>
|
||||||
|
This parameter sets the maximum allowed length on <code>img</code> tags,
|
||||||
|
effectively the <code>width</code> and <code>height</code> properties.
|
||||||
|
Only absolute units of measurement (in, pt, pc, mm, cm) and pixels (px) are allowed. This is
|
||||||
|
in place to prevent imagecrash attacks, disable with null at your own risk.
|
||||||
|
This directive is similar to %HTML.MaxImgLength, and both should be
|
||||||
|
concurrently edited, although there are
|
||||||
|
subtle differences in the input format (the CSS max is a number with
|
||||||
|
a unit).
|
||||||
|
</p>
|
||||||
|
');
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Defines allowed CSS attributes and what their values are.
|
* Defines allowed CSS attributes and what their values are.
|
||||||
* @see HTMLPurifier_HTMLDefinition
|
* @see HTMLPurifier_HTMLDefinition
|
||||||
@@ -116,7 +132,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
|
|||||||
$this->info['border-left-width'] =
|
$this->info['border-left-width'] =
|
||||||
$this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
$this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||||
new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
|
new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
|
||||||
new HTMLPurifier_AttrDef_CSS_Length(true) //disallow negative
|
new HTMLPurifier_AttrDef_CSS_Length('0') //disallow negative
|
||||||
));
|
));
|
||||||
|
|
||||||
$this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width);
|
$this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width);
|
||||||
@@ -142,7 +158,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
|
|||||||
$this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
$this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||||
new HTMLPurifier_AttrDef_Enum(array('normal')),
|
new HTMLPurifier_AttrDef_Enum(array('normal')),
|
||||||
new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives
|
new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives
|
||||||
new HTMLPurifier_AttrDef_CSS_Length(true),
|
new HTMLPurifier_AttrDef_CSS_Length('0'),
|
||||||
new HTMLPurifier_AttrDef_CSS_Percentage(true)
|
new HTMLPurifier_AttrDef_CSS_Percentage(true)
|
||||||
));
|
));
|
||||||
|
|
||||||
@@ -164,7 +180,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
|
|||||||
$this->info['padding-bottom'] =
|
$this->info['padding-bottom'] =
|
||||||
$this->info['padding-left'] =
|
$this->info['padding-left'] =
|
||||||
$this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
$this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||||
new HTMLPurifier_AttrDef_CSS_Length(true),
|
new HTMLPurifier_AttrDef_CSS_Length('0'),
|
||||||
new HTMLPurifier_AttrDef_CSS_Percentage(true)
|
new HTMLPurifier_AttrDef_CSS_Percentage(true)
|
||||||
));
|
));
|
||||||
|
|
||||||
@@ -175,13 +191,25 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
|
|||||||
new HTMLPurifier_AttrDef_CSS_Percentage()
|
new HTMLPurifier_AttrDef_CSS_Percentage()
|
||||||
));
|
));
|
||||||
|
|
||||||
$this->info['width'] =
|
$trusted_wh = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||||
$this->info['height'] =
|
new HTMLPurifier_AttrDef_CSS_Length('0'),
|
||||||
new HTMLPurifier_AttrDef_CSS_Composite(array(
|
|
||||||
new HTMLPurifier_AttrDef_CSS_Length(true),
|
|
||||||
new HTMLPurifier_AttrDef_CSS_Percentage(true),
|
new HTMLPurifier_AttrDef_CSS_Percentage(true),
|
||||||
new HTMLPurifier_AttrDef_Enum(array('auto'))
|
new HTMLPurifier_AttrDef_Enum(array('auto'))
|
||||||
));
|
));
|
||||||
|
$max = $config->get('CSS', 'MaxImgLength');
|
||||||
|
$this->info['width'] =
|
||||||
|
$this->info['height'] =
|
||||||
|
$max === null ?
|
||||||
|
$trusted_wh :
|
||||||
|
new HTMLPurifier_AttrDef_Switch('img',
|
||||||
|
// For img tags:
|
||||||
|
new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||||
|
new HTMLPurifier_AttrDef_CSS_Length('0', $max),
|
||||||
|
new HTMLPurifier_AttrDef_Enum(array('auto'))
|
||||||
|
)),
|
||||||
|
// For everyone else:
|
||||||
|
$trusted_wh
|
||||||
|
);
|
||||||
|
|
||||||
$this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();
|
$this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();
|
||||||
|
|
||||||
@@ -204,7 +232,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
|
|||||||
$this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
|
$this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
|
||||||
|
|
||||||
$this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(array(
|
$this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||||
'collapse', 'seperate'));
|
'collapse', 'separate'));
|
||||||
|
|
||||||
$this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(array(
|
$this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||||
'top', 'bottom'));
|
'top', 'bottom'));
|
||||||
@@ -219,6 +247,8 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
|
|||||||
new HTMLPurifier_AttrDef_CSS_Percentage()
|
new HTMLPurifier_AttrDef_CSS_Percentage()
|
||||||
));
|
));
|
||||||
|
|
||||||
|
$this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2);
|
||||||
|
|
||||||
// partial support
|
// partial support
|
||||||
$this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap'));
|
$this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap'));
|
||||||
|
|
||||||
|
@@ -15,7 +15,10 @@ class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
|
|||||||
var $type = 'optional';
|
var $type = 'optional';
|
||||||
function validateChildren($tokens_of_children, $config, &$context) {
|
function validateChildren($tokens_of_children, $config, &$context) {
|
||||||
$result = parent::validateChildren($tokens_of_children, $config, $context);
|
$result = parent::validateChildren($tokens_of_children, $config, $context);
|
||||||
if ($result === false) return array();
|
if ($result === false) {
|
||||||
|
if (empty($tokens_of_children)) return true;
|
||||||
|
else return array();
|
||||||
|
}
|
||||||
return $result;
|
return $result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -42,7 +42,7 @@ class HTMLPurifier_Config
|
|||||||
/**
|
/**
|
||||||
* HTML Purifier's version
|
* HTML Purifier's version
|
||||||
*/
|
*/
|
||||||
var $version = '2.1.1';
|
var $version = '2.1.5';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Two-level associative array of configuration directives
|
* Two-level associative array of configuration directives
|
||||||
|
@@ -120,6 +120,9 @@ class HTMLPurifier_DefinitionCache
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Clears all expired (older version or revision) objects from cache
|
* Clears all expired (older version or revision) objects from cache
|
||||||
|
* @note Be carefuly implementing this method as flush. Flush must
|
||||||
|
* not interfere with other Definition types, and cleanup()
|
||||||
|
* should not be repeatedly called by userland code.
|
||||||
*/
|
*/
|
||||||
function cleanup($config) {
|
function cleanup($config) {
|
||||||
trigger_error('Cannot call abstract method', E_USER_ERROR);
|
trigger_error('Cannot call abstract method', E_USER_ERROR);
|
||||||
|
@@ -1,6 +1,7 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/DefinitionCache.php';
|
require_once 'HTMLPurifier/DefinitionCache.php';
|
||||||
|
require_once 'HTMLPurifier/DefinitionCache/Serializer.php';
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Cache', 'DefinitionImpl', 'Serializer', 'string/null', '
|
'Cache', 'DefinitionImpl', 'Serializer', 'string/null', '
|
||||||
@@ -10,10 +11,6 @@ to disable caching (not recommended, as you will see a definite
|
|||||||
performance degradation). This directive has been available since 2.0.0.
|
performance degradation). This directive has been available since 2.0.0.
|
||||||
');
|
');
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
|
||||||
'Cache', 'DefinitionImpl', array('Serializer')
|
|
||||||
);
|
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::defineAlias(
|
HTMLPurifier_ConfigSchema::defineAlias(
|
||||||
'Core', 'DefinitionCache',
|
'Core', 'DefinitionCache',
|
||||||
'Cache', 'DefinitionImpl'
|
'Cache', 'DefinitionImpl'
|
||||||
@@ -27,6 +24,7 @@ class HTMLPurifier_DefinitionCacheFactory
|
|||||||
{
|
{
|
||||||
|
|
||||||
var $caches = array('Serializer' => array());
|
var $caches = array('Serializer' => array());
|
||||||
|
var $implementations = array();
|
||||||
var $decorators = array();
|
var $decorators = array();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -51,14 +49,21 @@ class HTMLPurifier_DefinitionCacheFactory
|
|||||||
return $instance;
|
return $instance;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Registers a new definition cache object
|
||||||
|
* @param $short Short name of cache object, for reference
|
||||||
|
* @param $long Full class name of cache object, for construction
|
||||||
|
*/
|
||||||
|
function register($short, $long) {
|
||||||
|
$this->implementations[$short] = $long;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory method that creates a cache object based on configuration
|
* Factory method that creates a cache object based on configuration
|
||||||
* @param $name Name of definitions handled by cache
|
* @param $name Name of definitions handled by cache
|
||||||
* @param $config Instance of HTMLPurifier_Config
|
* @param $config Instance of HTMLPurifier_Config
|
||||||
*/
|
*/
|
||||||
function &create($type, $config) {
|
function &create($type, $config) {
|
||||||
// only one implementation as for right now, $config will
|
|
||||||
// be used to determine implementation
|
|
||||||
$method = $config->get('Cache', 'DefinitionImpl');
|
$method = $config->get('Cache', 'DefinitionImpl');
|
||||||
if ($method === null) {
|
if ($method === null) {
|
||||||
$null = new HTMLPurifier_DefinitionCache_Null($type);
|
$null = new HTMLPurifier_DefinitionCache_Null($type);
|
||||||
@@ -67,7 +72,17 @@ class HTMLPurifier_DefinitionCacheFactory
|
|||||||
if (!empty($this->caches[$method][$type])) {
|
if (!empty($this->caches[$method][$type])) {
|
||||||
return $this->caches[$method][$type];
|
return $this->caches[$method][$type];
|
||||||
}
|
}
|
||||||
$cache = new HTMLPurifier_DefinitionCache_Serializer($type);
|
if (
|
||||||
|
isset($this->implementations[$method]) &&
|
||||||
|
class_exists($class = $this->implementations[$method])
|
||||||
|
) {
|
||||||
|
$cache = new $class($type);
|
||||||
|
} else {
|
||||||
|
if ($method != 'Serializer') {
|
||||||
|
trigger_error("Unrecognized DefinitionCache $method, using Serializer instead", E_USER_WARNING);
|
||||||
|
}
|
||||||
|
$cache = new HTMLPurifier_DefinitionCache_Serializer($type);
|
||||||
|
}
|
||||||
foreach ($this->decorators as $decorator) {
|
foreach ($this->decorators as $decorator) {
|
||||||
$new_cache = $decorator->decorate($cache);
|
$new_cache = $decorator->decorate($cache);
|
||||||
// prevent infinite recursion in PHP 4
|
// prevent infinite recursion in PHP 4
|
||||||
|
@@ -82,7 +82,7 @@ class HTMLPurifier_ElementDef
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* List of the names of required attributes this element has. Dynamically
|
* List of the names of required attributes this element has. Dynamically
|
||||||
* populated.
|
* populated by HTMLPurifier_HTMLDefinition::getElement
|
||||||
* @public
|
* @public
|
||||||
*/
|
*/
|
||||||
var $required_attr = array();
|
var $required_attr = array();
|
||||||
|
@@ -62,6 +62,12 @@ class HTMLPurifier_Encoder
|
|||||||
trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR);
|
trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Error-handler that mutes errors, alternative to shut-up operator.
|
||||||
|
*/
|
||||||
|
function muteErrorHandler() {}
|
||||||
|
|
||||||
|
/**
|
||||||
/**
|
/**
|
||||||
* Cleans a UTF-8 string for well-formedness and SGML validity
|
* Cleans a UTF-8 string for well-formedness and SGML validity
|
||||||
*
|
*
|
||||||
@@ -90,26 +96,13 @@ class HTMLPurifier_Encoder
|
|||||||
*/
|
*/
|
||||||
function cleanUTF8($str, $force_php = false) {
|
function cleanUTF8($str, $force_php = false) {
|
||||||
|
|
||||||
static $non_sgml_chars = array();
|
// UTF-8 validity is checked since PHP 4.3.5
|
||||||
if (empty($non_sgml_chars)) {
|
// This is an optimization: if the string is already valid UTF-8, no
|
||||||
for ($i = 0; $i <= 31; $i++) {
|
// need to do PHP stuff. 99% of the time, this will be the case.
|
||||||
// non-SGML ASCII chars
|
// The regexp matches the XML char production, as well as well as excluding
|
||||||
// save \r, \t and \n
|
// non-SGML codepoints U+007F to U+009F
|
||||||
if ($i == 9 || $i == 13 || $i == 10) continue;
|
if (preg_match('/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du', $str)) {
|
||||||
$non_sgml_chars[chr($i)] = '';
|
return $str;
|
||||||
}
|
|
||||||
for ($i = 127; $i <= 159; $i++) {
|
|
||||||
$non_sgml_chars[HTMLPurifier_Encoder::unichr($i)] = '';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static $iconv = null;
|
|
||||||
if ($iconv === null) $iconv = function_exists('iconv');
|
|
||||||
|
|
||||||
if ($iconv && !$force_php) {
|
|
||||||
// do the shortcut way
|
|
||||||
$str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
|
|
||||||
return strtr($str, $non_sgml_chars);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$mState = 0; // cached expected number of octets after the current octet
|
$mState = 0; // cached expected number of octets after the current octet
|
||||||
@@ -220,7 +213,17 @@ class HTMLPurifier_Encoder
|
|||||||
) {
|
) {
|
||||||
|
|
||||||
} elseif (0xFEFF != $mUcs4 && // omit BOM
|
} elseif (0xFEFF != $mUcs4 && // omit BOM
|
||||||
!($mUcs4 >= 128 && $mUcs4 <= 159) // omit non-SGML
|
// check for valid Char unicode codepoints
|
||||||
|
(
|
||||||
|
0x9 == $mUcs4 ||
|
||||||
|
0xA == $mUcs4 ||
|
||||||
|
0xD == $mUcs4 ||
|
||||||
|
(0x20 <= $mUcs4 && 0x7E >= $mUcs4) ||
|
||||||
|
// 7F-9F is not strictly prohibited by XML,
|
||||||
|
// but it is non-SGML, and thus we don't allow it
|
||||||
|
(0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) ||
|
||||||
|
(0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4)
|
||||||
|
)
|
||||||
) {
|
) {
|
||||||
$out .= $char;
|
$out .= $char;
|
||||||
}
|
}
|
||||||
@@ -313,14 +316,23 @@ class HTMLPurifier_Encoder
|
|||||||
* @static
|
* @static
|
||||||
*/
|
*/
|
||||||
function convertToUTF8($str, $config, &$context) {
|
function convertToUTF8($str, $config, &$context) {
|
||||||
static $iconv = null;
|
|
||||||
if ($iconv === null) $iconv = function_exists('iconv');
|
|
||||||
$encoding = $config->get('Core', 'Encoding');
|
$encoding = $config->get('Core', 'Encoding');
|
||||||
if ($encoding === 'utf-8') return $str;
|
if ($encoding === 'utf-8') return $str;
|
||||||
|
static $iconv = null;
|
||||||
|
if ($iconv === null) $iconv = function_exists('iconv');
|
||||||
|
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
|
||||||
if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
|
if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
|
||||||
return @iconv($encoding, 'utf-8//IGNORE', $str);
|
$str = iconv($encoding, 'utf-8//IGNORE', $str);
|
||||||
|
// If the string is bjorked by Shift_JIS or a similar encoding
|
||||||
|
// that doesn't support all of ASCII, convert the naughty
|
||||||
|
// characters to their true byte-wise ASCII/UTF-8 equivalents.
|
||||||
|
$str = strtr($str, HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding));
|
||||||
|
restore_error_handler();
|
||||||
|
return $str;
|
||||||
} elseif ($encoding === 'iso-8859-1') {
|
} elseif ($encoding === 'iso-8859-1') {
|
||||||
return @utf8_encode($str);
|
$str = utf8_encode($str);
|
||||||
|
restore_error_handler();
|
||||||
|
return $str;
|
||||||
}
|
}
|
||||||
trigger_error('Encoding not supported', E_USER_ERROR);
|
trigger_error('Encoding not supported', E_USER_ERROR);
|
||||||
}
|
}
|
||||||
@@ -332,17 +344,31 @@ class HTMLPurifier_Encoder
|
|||||||
* characters being omitted.
|
* characters being omitted.
|
||||||
*/
|
*/
|
||||||
function convertFromUTF8($str, $config, &$context) {
|
function convertFromUTF8($str, $config, &$context) {
|
||||||
static $iconv = null;
|
|
||||||
if ($iconv === null) $iconv = function_exists('iconv');
|
|
||||||
$encoding = $config->get('Core', 'Encoding');
|
$encoding = $config->get('Core', 'Encoding');
|
||||||
if ($encoding === 'utf-8') return $str;
|
if ($encoding === 'utf-8') return $str;
|
||||||
if ($config->get('Core', 'EscapeNonASCIICharacters')) {
|
static $iconv = null;
|
||||||
|
if ($iconv === null) $iconv = function_exists('iconv');
|
||||||
|
if ($escape = $config->get('Core', 'EscapeNonASCIICharacters')) {
|
||||||
$str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str);
|
$str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str);
|
||||||
}
|
}
|
||||||
|
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
|
||||||
if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
|
if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
|
||||||
return @iconv('utf-8', $encoding . '//IGNORE', $str);
|
// Undo our previous fix in convertToUTF8, otherwise iconv will barf
|
||||||
|
$ascii_fix = HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding);
|
||||||
|
if (!$escape && !empty($ascii_fix)) {
|
||||||
|
$clear_fix = array();
|
||||||
|
foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = '';
|
||||||
|
$str = strtr($str, $clear_fix);
|
||||||
|
}
|
||||||
|
$str = strtr($str, array_flip($ascii_fix));
|
||||||
|
// Normal stuff
|
||||||
|
$str = iconv('utf-8', $encoding . '//IGNORE', $str);
|
||||||
|
restore_error_handler();
|
||||||
|
return $str;
|
||||||
} elseif ($encoding === 'iso-8859-1') {
|
} elseif ($encoding === 'iso-8859-1') {
|
||||||
return @utf8_decode($str);
|
$str = utf8_decode($str);
|
||||||
|
restore_error_handler();
|
||||||
|
return $str;
|
||||||
}
|
}
|
||||||
trigger_error('Encoding not supported', E_USER_ERROR);
|
trigger_error('Encoding not supported', E_USER_ERROR);
|
||||||
}
|
}
|
||||||
@@ -395,6 +421,47 @@ class HTMLPurifier_Encoder
|
|||||||
return $result;
|
return $result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This expensive function tests whether or not a given character
|
||||||
|
* encoding supports ASCII. 7/8-bit encodings like Shift_JIS will
|
||||||
|
* fail this test, and require special processing. Variable width
|
||||||
|
* encodings shouldn't ever fail.
|
||||||
|
*
|
||||||
|
* @param string $encoding Encoding name to test, as per iconv format
|
||||||
|
* @param bool $bypass Whether or not to bypass the precompiled arrays.
|
||||||
|
* @return Array of UTF-8 characters to their corresponding ASCII,
|
||||||
|
* which can be used to "undo" any overzealous iconv action.
|
||||||
|
*/
|
||||||
|
function testEncodingSupportsASCII($encoding, $bypass = false) {
|
||||||
|
static $encodings = array();
|
||||||
|
if (!$bypass) {
|
||||||
|
if (isset($encodings[$encoding])) return $encodings[$encoding];
|
||||||
|
$lenc = strtolower($encoding);
|
||||||
|
switch ($lenc) {
|
||||||
|
case 'shift_jis':
|
||||||
|
return array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~');
|
||||||
|
case 'johab':
|
||||||
|
return array("\xE2\x82\xA9" => '\\');
|
||||||
|
}
|
||||||
|
if (strpos($lenc, 'iso-8859-') === 0) return array();
|
||||||
|
}
|
||||||
|
$ret = array();
|
||||||
|
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
|
||||||
|
if (iconv('UTF-8', $encoding, 'a') === false) return false;
|
||||||
|
for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
|
||||||
|
$c = chr($i);
|
||||||
|
if (iconv('UTF-8', "$encoding//IGNORE", $c) === '') {
|
||||||
|
// Reverse engineer: what's the UTF-8 equiv of this byte
|
||||||
|
// sequence? This assumes that there's no variable width
|
||||||
|
// encoding that doesn't support ASCII.
|
||||||
|
$ret[iconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
restore_error_handler();
|
||||||
|
$encodings[$encoding] = $ret;
|
||||||
|
return $ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -222,6 +222,8 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds a custom attribute to a pre-existing element
|
* Adds a custom attribute to a pre-existing element
|
||||||
|
* @note This is strictly convenience, and does not have a corresponding
|
||||||
|
* method in HTMLPurifier_HTMLModule
|
||||||
* @param $element_name String element name to add attribute to
|
* @param $element_name String element name to add attribute to
|
||||||
* @param $attr_name String name of attribute
|
* @param $attr_name String name of attribute
|
||||||
* @param $def Attribute definition, can be string or object, see
|
* @param $def Attribute definition, can be string or object, see
|
||||||
@@ -229,20 +231,37 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
|
|||||||
*/
|
*/
|
||||||
function addAttribute($element_name, $attr_name, $def) {
|
function addAttribute($element_name, $attr_name, $def) {
|
||||||
$module =& $this->getAnonymousModule();
|
$module =& $this->getAnonymousModule();
|
||||||
$element =& $module->addBlankElement($element_name);
|
if (!isset($module->info[$element_name])) {
|
||||||
|
$element =& $module->addBlankElement($element_name);
|
||||||
|
} else {
|
||||||
|
$element =& $module->info[$element_name];
|
||||||
|
}
|
||||||
$element->attr[$attr_name] = $def;
|
$element->attr[$attr_name] = $def;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds a custom element to your HTML definition
|
* Adds a custom element to your HTML definition
|
||||||
* @note See HTMLPurifier_HTMLModule::addElement for detailed
|
* @note See HTMLPurifier_HTMLModule::addElement for detailed
|
||||||
* parameter descriptions.
|
* parameter and return value descriptions.
|
||||||
*/
|
*/
|
||||||
function addElement($element_name, $type, $contents, $attr_collections, $attributes) {
|
function &addElement($element_name, $type, $contents, $attr_collections, $attributes) {
|
||||||
$module =& $this->getAnonymousModule();
|
$module =& $this->getAnonymousModule();
|
||||||
// assume that if the user is calling this, the element
|
// assume that if the user is calling this, the element
|
||||||
// is safe. This may not be a good idea
|
// is safe. This may not be a good idea
|
||||||
$module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes);
|
$element =& $module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes);
|
||||||
|
return $element;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a blank element to your HTML definition, for overriding
|
||||||
|
* existing behavior
|
||||||
|
* @note See HTMLPurifier_HTMLModule::addBlankElement for detailed
|
||||||
|
* parameter and return value descriptions.
|
||||||
|
*/
|
||||||
|
function &addBlankElement($element_name) {
|
||||||
|
$module =& $this->getAnonymousModule();
|
||||||
|
$element =& $module->addBlankElement($element_name);
|
||||||
|
return $element;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -330,7 +349,7 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
|
|||||||
if (isset($this->info_content_sets['Block'][$block_wrapper])) {
|
if (isset($this->info_content_sets['Block'][$block_wrapper])) {
|
||||||
$this->info_block_wrapper = $block_wrapper;
|
$this->info_block_wrapper = $block_wrapper;
|
||||||
} else {
|
} else {
|
||||||
trigger_error('Cannot use non-block element as block wrapper.',
|
trigger_error('Cannot use non-block element as block wrapper',
|
||||||
E_USER_ERROR);
|
E_USER_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -340,7 +359,7 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
|
|||||||
$this->info_parent = $parent;
|
$this->info_parent = $parent;
|
||||||
$this->info_parent_def = $def;
|
$this->info_parent_def = $def;
|
||||||
} else {
|
} else {
|
||||||
trigger_error('Cannot use unrecognized element as parent.',
|
trigger_error('Cannot use unrecognized element as parent',
|
||||||
E_USER_ERROR);
|
E_USER_ERROR);
|
||||||
$this->info_parent_def = $this->manager->getElement($this->info_parent, true);
|
$this->info_parent_def = $this->manager->getElement($this->info_parent, true);
|
||||||
}
|
}
|
||||||
|
@@ -219,5 +219,14 @@ class HTMLPurifier_HTMLModule
|
|||||||
}
|
}
|
||||||
return $ret;
|
return $ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lazy load construction of the module after determining whether
|
||||||
|
* or not it's needed, and also when a finalized configuration object
|
||||||
|
* is available.
|
||||||
|
* @param $config Instance of HTMLPurifier_Config
|
||||||
|
*/
|
||||||
|
function setup($config) {}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -15,7 +15,7 @@ class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule
|
|||||||
'I18N' => array('dir' => false)
|
'I18N' => array('dir' => false)
|
||||||
);
|
);
|
||||||
|
|
||||||
function HTMLPurifier_HTMLModule_Bdo() {
|
function setup($config) {
|
||||||
$bdo =& $this->addElement(
|
$bdo =& $this->addElement(
|
||||||
'bdo', true, 'Inline', 'Inline', array('Core', 'Lang'),
|
'bdo', true, 'Inline', 'Inline', array('Core', 'Lang'),
|
||||||
array(
|
array(
|
||||||
|
@@ -12,7 +12,7 @@ class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule
|
|||||||
|
|
||||||
var $name = 'Edit';
|
var $name = 'Edit';
|
||||||
|
|
||||||
function HTMLPurifier_HTMLModule_Edit() {
|
function setup($config) {
|
||||||
$contents = 'Chameleon: #PCDATA | Inline ! #PCDATA | Flow';
|
$contents = 'Chameleon: #PCDATA | Inline ! #PCDATA | Flow';
|
||||||
$attr = array(
|
$attr = array(
|
||||||
'cite' => 'URI',
|
'cite' => 'URI',
|
||||||
|
@@ -11,7 +11,7 @@ class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule
|
|||||||
|
|
||||||
var $name = 'Hypertext';
|
var $name = 'Hypertext';
|
||||||
|
|
||||||
function HTMLPurifier_HTMLModule_Hypertext() {
|
function setup($config) {
|
||||||
$a =& $this->addElement(
|
$a =& $this->addElement(
|
||||||
'a', true, 'Inline', 'Inline', 'Common',
|
'a', true, 'Inline', 'Inline', 'Common',
|
||||||
array(
|
array(
|
||||||
|
@@ -5,6 +5,18 @@ require_once 'HTMLPurifier/HTMLModule.php';
|
|||||||
require_once 'HTMLPurifier/AttrDef/URI.php';
|
require_once 'HTMLPurifier/AttrDef/URI.php';
|
||||||
require_once 'HTMLPurifier/AttrTransform/ImgRequired.php';
|
require_once 'HTMLPurifier/AttrTransform/ImgRequired.php';
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'HTML', 'MaxImgLength', 1200, 'int/null', '
|
||||||
|
<p>
|
||||||
|
This directive controls the maximum number of pixels in the width and
|
||||||
|
height attributes in <code>img</code> tags. This is
|
||||||
|
in place to prevent imagecrash attacks, disable with null at your own risk.
|
||||||
|
This directive is similar to %CSS.MaxImgLength, and both should be
|
||||||
|
concurrently edited, although there are
|
||||||
|
subtle differences in the input format (the HTML max is an integer).
|
||||||
|
</p>
|
||||||
|
');
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* XHTML 1.1 Image Module provides basic image embedding.
|
* XHTML 1.1 Image Module provides basic image embedding.
|
||||||
* @note There is specialized code for removing empty images in
|
* @note There is specialized code for removing empty images in
|
||||||
@@ -15,17 +27,26 @@ class HTMLPurifier_HTMLModule_Image extends HTMLPurifier_HTMLModule
|
|||||||
|
|
||||||
var $name = 'Image';
|
var $name = 'Image';
|
||||||
|
|
||||||
function HTMLPurifier_HTMLModule_Image() {
|
function setup($config) {
|
||||||
|
$max = $config->get('HTML', 'MaxImgLength');
|
||||||
$img =& $this->addElement(
|
$img =& $this->addElement(
|
||||||
'img', true, 'Inline', 'Empty', 'Common',
|
'img', true, 'Inline', 'Empty', 'Common',
|
||||||
array(
|
array(
|
||||||
'alt*' => 'Text',
|
'alt*' => 'Text',
|
||||||
'height' => 'Length',
|
// According to the spec, it's Length, but percents can
|
||||||
|
// be abused, so we allow only Pixels. A trusted module
|
||||||
|
// could overload this with the real value.
|
||||||
|
'height' => 'Pixels#' . $max,
|
||||||
|
'width' => 'Pixels#' . $max,
|
||||||
'longdesc' => 'URI',
|
'longdesc' => 'URI',
|
||||||
'src*' => new HTMLPurifier_AttrDef_URI(true), // embedded
|
'src*' => new HTMLPurifier_AttrDef_URI(true), // embedded
|
||||||
'width' => 'Length'
|
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
if ($max === null || $config->get('HTML', 'Trusted')) {
|
||||||
|
$img->attr['height'] =
|
||||||
|
$img->attr['width'] = 'Length';
|
||||||
|
}
|
||||||
|
|
||||||
// kind of strange, but splitting things up would be inefficient
|
// kind of strange, but splitting things up would be inefficient
|
||||||
$img->attr_transform_pre[] =
|
$img->attr_transform_pre[] =
|
||||||
$img->attr_transform_post[] =
|
$img->attr_transform_post[] =
|
||||||
|
@@ -25,7 +25,7 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
|
|||||||
|
|
||||||
var $name = 'Legacy';
|
var $name = 'Legacy';
|
||||||
|
|
||||||
function HTMLPurifier_HTMLModule_Legacy() {
|
function setup($config) {
|
||||||
|
|
||||||
$this->addElement('basefont', true, 'Inline', 'Empty', false, array(
|
$this->addElement('basefont', true, 'Inline', 'Empty', false, array(
|
||||||
'color' => 'Color',
|
'color' => 'Color',
|
||||||
|
@@ -21,7 +21,7 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
|
|||||||
|
|
||||||
var $content_sets = array('Flow' => 'List');
|
var $content_sets = array('Flow' => 'List');
|
||||||
|
|
||||||
function HTMLPurifier_HTMLModule_List() {
|
function setup($config) {
|
||||||
$this->addElement('ol', true, 'List', 'Required: li', 'Common');
|
$this->addElement('ol', true, 'List', 'Required: li', 'Common');
|
||||||
$this->addElement('ul', true, 'List', 'Required: li', 'Common');
|
$this->addElement('ul', true, 'List', 'Required: li', 'Common');
|
||||||
$this->addElement('dl', true, 'List', 'Required: dt | dd', 'Common');
|
$this->addElement('dl', true, 'List', 'Required: dt | dd', 'Common');
|
||||||
|
47
library/HTMLPurifier/HTMLModule/Object.php
Normal file
47
library/HTMLPurifier/HTMLModule/Object.php
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/HTMLModule.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* XHTML 1.1 Object Module, defines elements for generic object inclusion
|
||||||
|
* @warning Users will commonly use <embed> to cater to legacy browsers: this
|
||||||
|
* module does not allow this sort of behavior
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_HTMLModule_Object extends HTMLPurifier_HTMLModule
|
||||||
|
{
|
||||||
|
|
||||||
|
var $name = 'Object';
|
||||||
|
|
||||||
|
function setup($config) {
|
||||||
|
|
||||||
|
$this->addElement('object', false, 'Inline', 'Optional: #PCDATA | Flow | param', 'Common',
|
||||||
|
array(
|
||||||
|
'archive' => 'URI',
|
||||||
|
'classid' => 'URI',
|
||||||
|
'codebase' => 'URI',
|
||||||
|
'codetype' => 'Text',
|
||||||
|
'data' => 'URI',
|
||||||
|
'declare' => 'Bool#declare',
|
||||||
|
'height' => 'Length',
|
||||||
|
'name' => 'CDATA',
|
||||||
|
'standby' => 'Text',
|
||||||
|
'tabindex' => 'Number',
|
||||||
|
'type' => 'ContentType',
|
||||||
|
'width' => 'Length'
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
$this->addElement('param', false, false, 'Empty', false,
|
||||||
|
array(
|
||||||
|
'id' => 'ID',
|
||||||
|
'name*' => 'Text',
|
||||||
|
'type' => 'Text',
|
||||||
|
'value' => 'Text',
|
||||||
|
'valuetype' => 'Enum#data,ref,object'
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@@ -17,7 +17,7 @@ class HTMLPurifier_HTMLModule_Presentation extends HTMLPurifier_HTMLModule
|
|||||||
|
|
||||||
var $name = 'Presentation';
|
var $name = 'Presentation';
|
||||||
|
|
||||||
function HTMLPurifier_HTMLModule_Presentation() {
|
function setup($config) {
|
||||||
$this->addElement('b', true, 'Inline', 'Inline', 'Common');
|
$this->addElement('b', true, 'Inline', 'Inline', 'Common');
|
||||||
$this->addElement('big', true, 'Inline', 'Inline', 'Common');
|
$this->addElement('big', true, 'Inline', 'Inline', 'Common');
|
||||||
$this->addElement('hr', true, 'Block', 'Empty', 'Common');
|
$this->addElement('hr', true, 'Block', 'Empty', 'Common');
|
||||||
|
@@ -11,7 +11,7 @@ class HTMLPurifier_HTMLModule_Ruby extends HTMLPurifier_HTMLModule
|
|||||||
|
|
||||||
var $name = 'Ruby';
|
var $name = 'Ruby';
|
||||||
|
|
||||||
function HTMLPurifier_HTMLModule_Ruby() {
|
function setup($config) {
|
||||||
$this->addElement('ruby', true, 'Inline',
|
$this->addElement('ruby', true, 'Inline',
|
||||||
'Custom: ((rb, (rt | (rp, rt, rp))) | (rbc, rtc, rtc?))',
|
'Custom: ((rb, (rt | (rp, rt, rp))) | (rbc, rtc, rtc?))',
|
||||||
'Common');
|
'Common');
|
||||||
|
@@ -32,7 +32,7 @@ class HTMLPurifier_HTMLModule_Scripting extends HTMLPurifier_HTMLModule
|
|||||||
var $elements = array('script', 'noscript');
|
var $elements = array('script', 'noscript');
|
||||||
var $content_sets = array('Block' => 'script | noscript', 'Inline' => 'script | noscript');
|
var $content_sets = array('Block' => 'script | noscript', 'Inline' => 'script | noscript');
|
||||||
|
|
||||||
function HTMLPurifier_HTMLModule_Scripting() {
|
function setup($config) {
|
||||||
// TODO: create custom child-definition for noscript that
|
// TODO: create custom child-definition for noscript that
|
||||||
// auto-wraps stray #PCDATA in a similar manner to
|
// auto-wraps stray #PCDATA in a similar manner to
|
||||||
// blockquote's custom definition (we would use it but
|
// blockquote's custom definition (we would use it but
|
||||||
|
@@ -18,7 +18,7 @@ class HTMLPurifier_HTMLModule_StyleAttribute extends HTMLPurifier_HTMLModule
|
|||||||
'Core' => array(0 => array('Style'))
|
'Core' => array(0 => array('Style'))
|
||||||
);
|
);
|
||||||
|
|
||||||
function HTMLPurifier_HTMLModule_StyleAttribute() {
|
function setup($config) {
|
||||||
$this->attr_collections['Style']['style'] = new HTMLPurifier_AttrDef_CSS();
|
$this->attr_collections['Style']['style'] = new HTMLPurifier_AttrDef_CSS();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -11,7 +11,7 @@ class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule
|
|||||||
|
|
||||||
var $name = 'Tables';
|
var $name = 'Tables';
|
||||||
|
|
||||||
function HTMLPurifier_HTMLModule_Tables() {
|
function setup($config) {
|
||||||
|
|
||||||
$this->addElement('caption', true, false, 'Inline', 'Common');
|
$this->addElement('caption', true, false, 'Inline', 'Common');
|
||||||
|
|
||||||
|
@@ -10,7 +10,7 @@ class HTMLPurifier_HTMLModule_Target extends HTMLPurifier_HTMLModule
|
|||||||
|
|
||||||
var $name = 'Target';
|
var $name = 'Target';
|
||||||
|
|
||||||
function HTMLPurifier_HTMLModule_Target() {
|
function setup($config) {
|
||||||
$elements = array('a');
|
$elements = array('a');
|
||||||
foreach ($elements as $name) {
|
foreach ($elements as $name) {
|
||||||
$e =& $this->addBlankElement($name);
|
$e =& $this->addBlankElement($name);
|
||||||
|
@@ -22,7 +22,7 @@ class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
|
|||||||
'Flow' => 'Heading | Block | Inline'
|
'Flow' => 'Heading | Block | Inline'
|
||||||
);
|
);
|
||||||
|
|
||||||
function HTMLPurifier_HTMLModule_Text() {
|
function setup($config) {
|
||||||
|
|
||||||
// Inline Phrasal -------------------------------------------------
|
// Inline Phrasal -------------------------------------------------
|
||||||
$this->addElement('abbr', true, 'Inline', 'Inline', 'Common');
|
$this->addElement('abbr', true, 'Inline', 'Inline', 'Common');
|
||||||
|
@@ -70,7 +70,7 @@ class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
|
|||||||
* @todo Wildcard matching and error reporting when an added or
|
* @todo Wildcard matching and error reporting when an added or
|
||||||
* subtracted fix has no effect.
|
* subtracted fix has no effect.
|
||||||
*/
|
*/
|
||||||
function construct($config) {
|
function setup($config) {
|
||||||
|
|
||||||
// create fixes, initialize fixesForLevel
|
// create fixes, initialize fixesForLevel
|
||||||
$fixes = $this->makeFixes();
|
$fixes = $this->makeFixes();
|
||||||
|
@@ -13,6 +13,8 @@ require_once 'HTMLPurifier/AttrTransform/Length.php';
|
|||||||
require_once 'HTMLPurifier/AttrTransform/ImgSpace.php';
|
require_once 'HTMLPurifier/AttrTransform/ImgSpace.php';
|
||||||
require_once 'HTMLPurifier/AttrTransform/EnumToCSS.php';
|
require_once 'HTMLPurifier/AttrTransform/EnumToCSS.php';
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
|
||||||
|
|
||||||
class HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 extends
|
class HTMLPurifier_HTMLModule_Tidy_XHTMLAndHTML4 extends
|
||||||
HTMLPurifier_HTMLModule_Tidy
|
HTMLPurifier_HTMLModule_Tidy
|
||||||
{
|
{
|
||||||
@@ -188,5 +190,17 @@ class HTMLPurifier_HTMLModule_Tidy_Strict extends
|
|||||||
{
|
{
|
||||||
var $name = 'Tidy_Strict';
|
var $name = 'Tidy_Strict';
|
||||||
var $defaultLevel = 'light';
|
var $defaultLevel = 'light';
|
||||||
|
|
||||||
|
function makeFixes() {
|
||||||
|
$r = parent::makeFixes();
|
||||||
|
$r['blockquote#content_model_type'] = 'strictblockquote';
|
||||||
|
return $r;
|
||||||
|
}
|
||||||
|
|
||||||
|
var $defines_child_def = true;
|
||||||
|
function getChildDef($def) {
|
||||||
|
if ($def->content_model_type != 'strictblockquote') return parent::getChildDef($def);
|
||||||
|
return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1,26 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
require_once 'HTMLPurifier/HTMLModule/Tidy.php';
|
|
||||||
require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
|
|
||||||
|
|
||||||
class HTMLPurifier_HTMLModule_Tidy_XHTMLStrict extends
|
|
||||||
HTMLPurifier_HTMLModule_Tidy
|
|
||||||
{
|
|
||||||
|
|
||||||
var $name = 'Tidy_XHTMLStrict';
|
|
||||||
var $defaultLevel = 'light';
|
|
||||||
|
|
||||||
function makeFixes() {
|
|
||||||
$r = array();
|
|
||||||
$r['blockquote#content_model_type'] = 'strictblockquote';
|
|
||||||
return $r;
|
|
||||||
}
|
|
||||||
|
|
||||||
var $defines_child_def = true;
|
|
||||||
function getChildDef($def) {
|
|
||||||
if ($def->content_model_type != 'strictblockquote') return false;
|
|
||||||
return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@@ -29,12 +29,12 @@ require_once 'HTMLPurifier/HTMLModule/Scripting.php';
|
|||||||
require_once 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
|
require_once 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
|
||||||
require_once 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
|
require_once 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
|
||||||
require_once 'HTMLPurifier/HTMLModule/Ruby.php';
|
require_once 'HTMLPurifier/HTMLModule/Ruby.php';
|
||||||
|
require_once 'HTMLPurifier/HTMLModule/Object.php';
|
||||||
|
|
||||||
// tidy modules
|
// tidy modules
|
||||||
require_once 'HTMLPurifier/HTMLModule/Tidy.php';
|
require_once 'HTMLPurifier/HTMLModule/Tidy.php';
|
||||||
require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
|
require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
|
||||||
require_once 'HTMLPurifier/HTMLModule/Tidy/XHTML.php';
|
require_once 'HTMLPurifier/HTMLModule/Tidy/XHTML.php';
|
||||||
require_once 'HTMLPurifier/HTMLModule/Tidy/XHTMLStrict.php';
|
|
||||||
require_once 'HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
|
require_once 'HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
@@ -172,7 +172,7 @@ class HTMLPurifier_HTMLModuleManager
|
|||||||
$common = array(
|
$common = array(
|
||||||
'CommonAttributes', 'Text', 'Hypertext', 'List',
|
'CommonAttributes', 'Text', 'Hypertext', 'List',
|
||||||
'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
|
'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
|
||||||
'StyleAttribute', 'Scripting'
|
'StyleAttribute', 'Scripting', 'Object'
|
||||||
);
|
);
|
||||||
$transitional = array('Legacy', 'Target');
|
$transitional = array('Legacy', 'Target');
|
||||||
$xml = array('XMLCommonAttributes');
|
$xml = array('XMLCommonAttributes');
|
||||||
@@ -208,7 +208,7 @@ class HTMLPurifier_HTMLModuleManager
|
|||||||
$this->doctypes->register(
|
$this->doctypes->register(
|
||||||
'XHTML 1.0 Strict', true,
|
'XHTML 1.0 Strict', true,
|
||||||
array_merge($common, $xml, $non_xml),
|
array_merge($common, $xml, $non_xml),
|
||||||
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_XHTMLStrict', 'Tidy_Proprietary'),
|
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary'),
|
||||||
array(),
|
array(),
|
||||||
'-//W3C//DTD XHTML 1.0 Strict//EN',
|
'-//W3C//DTD XHTML 1.0 Strict//EN',
|
||||||
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
|
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
|
||||||
@@ -217,7 +217,7 @@ class HTMLPurifier_HTMLModuleManager
|
|||||||
$this->doctypes->register(
|
$this->doctypes->register(
|
||||||
'XHTML 1.1', true,
|
'XHTML 1.1', true,
|
||||||
array_merge($common, $xml, array('Ruby')),
|
array_merge($common, $xml, array('Ruby')),
|
||||||
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_XHTMLStrict'), // Tidy_XHTML1_1
|
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict'), // Tidy_XHTML1_1
|
||||||
array(),
|
array(),
|
||||||
'-//W3C//DTD XHTML 1.1//EN',
|
'-//W3C//DTD XHTML 1.1//EN',
|
||||||
'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
|
'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
|
||||||
@@ -342,13 +342,12 @@ class HTMLPurifier_HTMLModuleManager
|
|||||||
|
|
||||||
foreach ($modules as $module) {
|
foreach ($modules as $module) {
|
||||||
$this->processModule($module);
|
$this->processModule($module);
|
||||||
|
$this->modules[$module]->setup($config);
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach ($this->doctype->tidyModules as $module) {
|
foreach ($this->doctype->tidyModules as $module) {
|
||||||
$this->processModule($module);
|
$this->processModule($module);
|
||||||
if (method_exists($this->modules[$module], 'construct')) {
|
$this->modules[$module]->setup($config);
|
||||||
$this->modules[$module]->construct($config);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// setup lookup table based on all valid modules
|
// setup lookup table based on all valid modules
|
||||||
|
@@ -1,11 +1,15 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'Attr', 'IDBlacklist', array(), 'list',
|
||||||
|
'Array of IDs not allowed in the document.'
|
||||||
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Component of HTMLPurifier_AttrContext that accumulates IDs to prevent dupes
|
* Component of HTMLPurifier_AttrContext that accumulates IDs to prevent dupes
|
||||||
* @note In Slashdot-speak, dupe means duplicate.
|
* @note In Slashdot-speak, dupe means duplicate.
|
||||||
* @note This class does not accept $config or $context, thus, it is the
|
* @note The default constructor does not accept $config or $context objects:
|
||||||
* burden of the callee to register the appropriate errors or
|
* use must use the static build() factory method to perform initialization.
|
||||||
* configuration.
|
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_IDAccumulator
|
class HTMLPurifier_IDAccumulator
|
||||||
{
|
{
|
||||||
@@ -16,6 +20,19 @@ class HTMLPurifier_IDAccumulator
|
|||||||
*/
|
*/
|
||||||
var $ids = array();
|
var $ids = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Builds an IDAccumulator, also initializing the default blacklist
|
||||||
|
* @param $config Instance of HTMLPurifier_Config
|
||||||
|
* @param $context Instance of HTMLPurifier_Context
|
||||||
|
* @return Fully initialized HTMLPurifier_IDAccumulator
|
||||||
|
* @static
|
||||||
|
*/
|
||||||
|
function build($config, &$context) {
|
||||||
|
$acc = new HTMLPurifier_IDAccumulator();
|
||||||
|
$acc->load($config->get('Attr', 'IDBlacklist'));
|
||||||
|
return $acc;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add an ID to the lookup table.
|
* Add an ID to the lookup table.
|
||||||
* @param $id ID to be added.
|
* @param $id ID to be added.
|
||||||
|
@@ -4,6 +4,9 @@
|
|||||||
* Injects tokens into the document while parsing for well-formedness.
|
* Injects tokens into the document while parsing for well-formedness.
|
||||||
* This enables "formatter-like" functionality such as auto-paragraphing,
|
* This enables "formatter-like" functionality such as auto-paragraphing,
|
||||||
* smiley-ification and linkification to take place.
|
* smiley-ification and linkification to take place.
|
||||||
|
*
|
||||||
|
* @todo Allow injectors to request a re-run on their output. This
|
||||||
|
* would help if an operation is recursive.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_Injector
|
class HTMLPurifier_Injector
|
||||||
{
|
{
|
||||||
@@ -107,5 +110,12 @@ class HTMLPurifier_Injector
|
|||||||
*/
|
*/
|
||||||
function handleElement(&$token) {}
|
function handleElement(&$token) {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Notifier that is called when an end token is processed
|
||||||
|
* @note This differs from handlers in that the token is read-only
|
||||||
|
*/
|
||||||
|
function notifyEnd($token) {}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -6,20 +6,28 @@ HTMLPurifier_ConfigSchema::define(
|
|||||||
'AutoFormat', 'AutoParagraph', false, 'bool', '
|
'AutoFormat', 'AutoParagraph', false, 'bool', '
|
||||||
<p>
|
<p>
|
||||||
This directive turns on auto-paragraphing, where double newlines are
|
This directive turns on auto-paragraphing, where double newlines are
|
||||||
converted in to paragraphs whenever possible. Auto-paragraphing
|
converted in to paragraphs whenever possible. Auto-paragraphing:
|
||||||
applies when:
|
|
||||||
</p>
|
</p>
|
||||||
<ul>
|
<ul>
|
||||||
<li>There are inline elements or text in the root node</li>
|
<li>Always applies to inline elements or text in the root node,</li>
|
||||||
<li>There are inline elements or text with double newlines or
|
<li>Applies to inline elements or text with double newlines in nodes
|
||||||
block elements in nodes that allow paragraph tags</li>
|
that allow paragraph tags,</li>
|
||||||
<li>There are double newlines in paragraph tags</li>
|
<li>Applies to double newlines in paragraph tags</li>
|
||||||
</ul>
|
</ul>
|
||||||
<p>
|
<p>
|
||||||
<code>p</code> tags must be allowed for this directive to take effect.
|
<code>p</code> tags must be allowed for this directive to take effect.
|
||||||
We do not use <code>br</code> tags for paragraphing, as that is
|
We do not use <code>br</code> tags for paragraphing, as that is
|
||||||
semantically incorrect.
|
semantically incorrect.
|
||||||
</p>
|
</p>
|
||||||
|
<p>
|
||||||
|
To prevent auto-paragraphing as a content-producer, refrain from using
|
||||||
|
double-newlines except to specify a new paragraph or in contexts where
|
||||||
|
it has special meaning (whitespace usually has no meaning except in
|
||||||
|
tags like <code>pre</code>, so this should not be difficult.) To prevent
|
||||||
|
the paragraphing of inline text adjacent to block elements, wrap them
|
||||||
|
in <code>div</code> tags (the behavior is slightly different outside of
|
||||||
|
the root node.)
|
||||||
|
</p>
|
||||||
<p>
|
<p>
|
||||||
This directive has been available since 2.0.1.
|
This directive has been available since 2.0.1.
|
||||||
</p>
|
</p>
|
||||||
@@ -62,19 +70,27 @@ class HTMLPurifier_Injector_AutoParagraph extends HTMLPurifier_Injector
|
|||||||
$ok = false;
|
$ok = false;
|
||||||
// test if up-coming tokens are either block or have
|
// test if up-coming tokens are either block or have
|
||||||
// a double newline in them
|
// a double newline in them
|
||||||
|
$nesting = 0;
|
||||||
for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) {
|
for ($i = $this->inputIndex + 1; isset($this->inputTokens[$i]); $i++) {
|
||||||
if ($this->inputTokens[$i]->type == 'start'){
|
if ($this->inputTokens[$i]->type == 'start'){
|
||||||
if (!$this->_isInline($this->inputTokens[$i])) {
|
if (!$this->_isInline($this->inputTokens[$i])) {
|
||||||
$ok = true;
|
// we haven't found a double-newline, and
|
||||||
|
// we've hit a block element, so don't paragraph
|
||||||
|
$ok = false;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
break;
|
$nesting++;
|
||||||
|
}
|
||||||
|
if ($this->inputTokens[$i]->type == 'end') {
|
||||||
|
if ($nesting <= 0) break;
|
||||||
|
$nesting--;
|
||||||
}
|
}
|
||||||
if ($this->inputTokens[$i]->type == 'end') break;
|
|
||||||
if ($this->inputTokens[$i]->type == 'text') {
|
if ($this->inputTokens[$i]->type == 'text') {
|
||||||
|
// found it!
|
||||||
if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) {
|
if (strpos($this->inputTokens[$i]->data, "\n\n") !== false) {
|
||||||
$ok = true;
|
$ok = true;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
if (!$this->inputTokens[$i]->is_whitespace) break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ($ok) {
|
if ($ok) {
|
||||||
|
@@ -25,6 +25,13 @@ class HTMLPurifier_Language
|
|||||||
*/
|
*/
|
||||||
var $errorNames = array();
|
var $errorNames = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* True if no message file was found for this language, so English
|
||||||
|
* is being used instead. Check this if you'd like to notify the
|
||||||
|
* user that they've used a non-supported language.
|
||||||
|
*/
|
||||||
|
var $error = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Has the language object been loaded yet?
|
* Has the language object been loaded yet?
|
||||||
* @private
|
* @private
|
||||||
|
11
library/HTMLPurifier/Language/messages/en-x-testmini.php
Normal file
11
library/HTMLPurifier/Language/messages/en-x-testmini.php
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
// private language message file for unit testing purposes
|
||||||
|
// this language file has no class associated with it
|
||||||
|
|
||||||
|
$fallback = 'en';
|
||||||
|
|
||||||
|
$messages = array(
|
||||||
|
'HTMLPurifier' => 'HTML Purifier XNone'
|
||||||
|
);
|
||||||
|
|
@@ -16,6 +16,7 @@ This directive has been available since 2.0.0.
|
|||||||
* caching and fallbacks.
|
* caching and fallbacks.
|
||||||
* @note Thanks to MediaWiki for the general logic, although this version
|
* @note Thanks to MediaWiki for the general logic, although this version
|
||||||
* has been entirely rewritten
|
* has been entirely rewritten
|
||||||
|
* @todo Serialized cache for languages
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_LanguageFactory
|
class HTMLPurifier_LanguageFactory
|
||||||
{
|
{
|
||||||
@@ -89,40 +90,42 @@ class HTMLPurifier_LanguageFactory
|
|||||||
* Creates a language object, handles class fallbacks
|
* Creates a language object, handles class fallbacks
|
||||||
* @param $config Instance of HTMLPurifier_Config
|
* @param $config Instance of HTMLPurifier_Config
|
||||||
* @param $context Instance of HTMLPurifier_Context
|
* @param $context Instance of HTMLPurifier_Context
|
||||||
|
* @param $code Code to override configuration with. Private parameter.
|
||||||
*/
|
*/
|
||||||
function create($config, &$context) {
|
function create($config, &$context, $code = false) {
|
||||||
|
|
||||||
// validate language code
|
// validate language code
|
||||||
$code = $this->validator->validate(
|
if ($code === false) {
|
||||||
$config->get('Core', 'Language'), $config, $context
|
$code = $this->validator->validate(
|
||||||
);
|
$config->get('Core', 'Language'), $config, $context
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
$code = $this->validator->validate($code, $config, $context);
|
||||||
|
}
|
||||||
if ($code === false) $code = 'en'; // malformed code becomes English
|
if ($code === false) $code = 'en'; // malformed code becomes English
|
||||||
|
|
||||||
$pcode = str_replace('-', '_', $code); // make valid PHP classname
|
$pcode = str_replace('-', '_', $code); // make valid PHP classname
|
||||||
static $depth = 0; // recursion protection
|
static $depth = 0; // recursion protection
|
||||||
|
|
||||||
if ($code == 'en') {
|
if ($code == 'en') {
|
||||||
$class = 'HTMLPurifier_Language';
|
$lang = new HTMLPurifier_Language($config, $context);
|
||||||
$file = $this->dir . '/Language.php';
|
|
||||||
} else {
|
} else {
|
||||||
$class = 'HTMLPurifier_Language_' . $pcode;
|
$class = 'HTMLPurifier_Language_' . $pcode;
|
||||||
$file = $this->dir . '/Language/classes/' . $code . '.php';
|
$file = $this->dir . '/Language/classes/' . $code . '.php';
|
||||||
// PHP5/APC deps bug workaround can go here
|
if (file_exists($file)) {
|
||||||
// you can bypass the conditional include by loading the
|
include $file;
|
||||||
// file yourself
|
$lang = new $class($config, $context);
|
||||||
if (file_exists($file) && !class_exists($class)) {
|
} else {
|
||||||
include_once $file;
|
// Go fallback
|
||||||
}
|
$raw_fallback = $this->getFallbackFor($code);
|
||||||
}
|
$fallback = $raw_fallback ? $raw_fallback : 'en';
|
||||||
|
$depth++;
|
||||||
if (!class_exists($class)) {
|
$lang = $this->create($config, $context, $fallback);
|
||||||
// go fallback
|
if (!$raw_fallback) {
|
||||||
$fallback = HTMLPurifier_LanguageFactory::getFallbackFor($code);
|
$lang->error = true;
|
||||||
$depth++;
|
}
|
||||||
$lang = HTMLPurifier_LanguageFactory::factory( $fallback );
|
$depth--;
|
||||||
$depth--;
|
}
|
||||||
} else {
|
|
||||||
$lang = new $class($config, $context);
|
|
||||||
}
|
}
|
||||||
$lang->code = $code;
|
$lang->code = $code;
|
||||||
|
|
||||||
|
111
library/HTMLPurifier/Length.php
Normal file
111
library/HTMLPurifier/Length.php
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents a measurable length, with a string numeric magnitude
|
||||||
|
* and a unit. This object is immutable.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_Length
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* String numeric magnitude.
|
||||||
|
*/
|
||||||
|
var $n;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* String unit. False is permitted if $n = 0.
|
||||||
|
*/
|
||||||
|
var $unit;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether or not this length is valid. Null if not calculated yet.
|
||||||
|
*/
|
||||||
|
var $isValid;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @param number $n Magnitude
|
||||||
|
* @param string $u Unit
|
||||||
|
*/
|
||||||
|
function HTMLPurifier_Length($n = '0', $u = false) {
|
||||||
|
$this->n = (string) $n;
|
||||||
|
$this->unit = $u !== false ? (string) $u : false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string $s Unit string, like '2em' or '3.4in'
|
||||||
|
* @warning Does not perform validation.
|
||||||
|
*/
|
||||||
|
function make($s) {
|
||||||
|
if (is_a($s, 'HTMLPurifier_Length')) return $s;
|
||||||
|
$n_length = strspn($s, '1234567890.+-');
|
||||||
|
$n = substr($s, 0, $n_length);
|
||||||
|
$unit = substr($s, $n_length);
|
||||||
|
if ($unit === '') $unit = false;
|
||||||
|
return new HTMLPurifier_Length($n, $unit);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validates the number and unit.
|
||||||
|
*/
|
||||||
|
function validate() {
|
||||||
|
// Special case:
|
||||||
|
|
||||||
|
static $allowedUnits = array(
|
||||||
|
'em' => true, 'ex' => true, 'px' => true, 'in' => true,
|
||||||
|
'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true
|
||||||
|
);
|
||||||
|
if ($this->n === '+0' || $this->n === '-0') $this->n = '0';
|
||||||
|
if ($this->n === '0' && $this->unit === false) return true;
|
||||||
|
if (!ctype_lower($this->unit)) $this->unit = strtolower($this->unit);
|
||||||
|
if (!isset($allowedUnits[$this->unit])) return false;
|
||||||
|
// Hack:
|
||||||
|
$def = new HTMLPurifier_AttrDef_CSS_Number();
|
||||||
|
$a = false; // hack hack
|
||||||
|
$result = $def->validate($this->n, $a, $a);
|
||||||
|
if ($result === false) return false;
|
||||||
|
$this->n = $result;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns string representation of number.
|
||||||
|
*/
|
||||||
|
function toString() {
|
||||||
|
if (!$this->isValid()) return false;
|
||||||
|
return $this->n . $this->unit;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves string numeric magnitude.
|
||||||
|
*/
|
||||||
|
function getN() {return $this->n;}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves string unit.
|
||||||
|
*/
|
||||||
|
function getUnit() {return $this->unit;}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if this length unit is valid.
|
||||||
|
*/
|
||||||
|
function isValid() {
|
||||||
|
if ($this->isValid === null) $this->isValid = $this->validate();
|
||||||
|
return $this->isValid;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compares two lengths, and returns 1 if greater, -1 if less and 0 if equal.
|
||||||
|
* @warning If both values are too large or small, this calculation will
|
||||||
|
* not work properly
|
||||||
|
*/
|
||||||
|
function compareTo($l) {
|
||||||
|
if ($l === false) return false;
|
||||||
|
if ($l->unit !== $this->unit) {
|
||||||
|
$converter = new HTMLPurifier_UnitConverter();
|
||||||
|
$l = $converter->convert($l, $this->unit);
|
||||||
|
if ($l === false) return false;
|
||||||
|
}
|
||||||
|
return $this->n - $l->n;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@@ -13,11 +13,14 @@ if (version_compare(PHP_VERSION, "5", ">=")) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'AcceptFullDocuments', true, 'bool',
|
'Core', 'ConvertDocumentToFragment', true, 'bool', '
|
||||||
'This parameter determines whether or not the filter should accept full '.
|
This parameter determines whether or not the filter should convert
|
||||||
'HTML documents, not just HTML fragments. When on, it will '.
|
input that is a full document with html and body tags to a fragment
|
||||||
'drop all sections except the content between body.'
|
of just the contents of a body tag. This parameter is simply something
|
||||||
);
|
HTML Purifier can do during an edge-case: for most inputs, this
|
||||||
|
processing is not necessary.
|
||||||
|
');
|
||||||
|
HTMLPurifier_ConfigSchema::defineAlias('Core', 'AcceptFullDocuments', 'Core', 'ConvertDocumentToFragment');
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'LexerImpl', null, 'mixed/null', '
|
'Core', 'LexerImpl', null, 'mixed/null', '
|
||||||
@@ -189,6 +192,9 @@ class HTMLPurifier_Lexer
|
|||||||
return new HTMLPurifier_Lexer_DOMLex();
|
return new HTMLPurifier_Lexer_DOMLex();
|
||||||
case 'DirectLex':
|
case 'DirectLex':
|
||||||
return new HTMLPurifier_Lexer_DirectLex();
|
return new HTMLPurifier_Lexer_DirectLex();
|
||||||
|
case 'PH5P':
|
||||||
|
// experimental Lexer that must be manually included
|
||||||
|
return new HTMLPurifier_Lexer_PH5P();
|
||||||
default:
|
default:
|
||||||
trigger_error("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer), E_USER_ERROR);
|
trigger_error("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer), E_USER_ERROR);
|
||||||
}
|
}
|
||||||
@@ -313,7 +319,7 @@ class HTMLPurifier_Lexer
|
|||||||
function normalize($html, $config, &$context) {
|
function normalize($html, $config, &$context) {
|
||||||
|
|
||||||
// extract body from document if applicable
|
// extract body from document if applicable
|
||||||
if ($config->get('Core', 'AcceptFullDocuments')) {
|
if ($config->get('Core', 'ConvertDocumentToFragment')) {
|
||||||
$html = $this->extractBody($html);
|
$html = $this->extractBody($html);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -53,14 +53,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||||||
}
|
}
|
||||||
|
|
||||||
// preprocess html, essential for UTF-8
|
// preprocess html, essential for UTF-8
|
||||||
$html =
|
$html = $this->wrapHTML($html, $config, $context);
|
||||||
'<!DOCTYPE html '.
|
|
||||||
'PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'.
|
|
||||||
'"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'.
|
|
||||||
'<html><head>'.
|
|
||||||
'<meta http-equiv="Content-Type" content="text/html;'.
|
|
||||||
' charset=utf-8" />'.
|
|
||||||
'</head><body><div>'.$html.'</div></body></html>';
|
|
||||||
|
|
||||||
$doc = new DOMDocument();
|
$doc = new DOMDocument();
|
||||||
$doc->encoding = 'UTF-8'; // theoretically, the above has this covered
|
$doc->encoding = 'UTF-8'; // theoretically, the above has this covered
|
||||||
@@ -97,10 +90,27 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||||||
$tokens[] = $this->factory->createText($node->data);
|
$tokens[] = $this->factory->createText($node->data);
|
||||||
return;
|
return;
|
||||||
} elseif ($node->nodeType === XML_CDATA_SECTION_NODE) {
|
} elseif ($node->nodeType === XML_CDATA_SECTION_NODE) {
|
||||||
// undo DOM's special treatment of <script> tags
|
// undo libxml's special treatment of <script> and <style> tags
|
||||||
$tokens[] = $this->factory->createText($this->parseData($node->data));
|
$last = end($tokens);
|
||||||
|
$data = $node->data;
|
||||||
|
// (note $node->tagname is already normalized)
|
||||||
|
if ($last instanceof HTMLPurifier_Token_Start && $last->name == 'script') {
|
||||||
|
$new_data = trim($data);
|
||||||
|
if (substr($new_data, 0, 4) === '<!--') {
|
||||||
|
$data = substr($new_data, 4);
|
||||||
|
if (substr($data, -3) === '-->') {
|
||||||
|
$data = substr($data, 0, -3);
|
||||||
|
} else {
|
||||||
|
// Highly suspicious! Not sure what to do...
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$tokens[] = $this->factory->createText($this->parseData($data));
|
||||||
return;
|
return;
|
||||||
} elseif ($node->nodeType === XML_COMMENT_NODE) {
|
} elseif ($node->nodeType === XML_COMMENT_NODE) {
|
||||||
|
// this is code is only invoked for comments in script/style in versions
|
||||||
|
// of libxml pre-2.6.28 (regular comments, of course, are still
|
||||||
|
// handled regularly)
|
||||||
$tokens[] = $this->factory->createComment($node->data);
|
$tokens[] = $this->factory->createComment($node->data);
|
||||||
return;
|
return;
|
||||||
} elseif (
|
} elseif (
|
||||||
@@ -177,5 +187,25 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||||||
return '<!--' . str_replace('&', '&', $matches[1]) . $matches[2];
|
return '<!--' . str_replace('&', '&', $matches[1]) . $matches[2];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wraps an HTML fragment in the necessary HTML
|
||||||
|
*/
|
||||||
|
function wrapHTML($html, $config, &$context) {
|
||||||
|
$def = $config->getDefinition('HTML');
|
||||||
|
$ret = '';
|
||||||
|
|
||||||
|
if (!empty($def->doctype->dtdPublic) || !empty($def->doctype->dtdSystem)) {
|
||||||
|
$ret .= '<!DOCTYPE html ';
|
||||||
|
if (!empty($def->doctype->dtdPublic)) $ret .= 'PUBLIC "' . $def->doctype->dtdPublic . '" ';
|
||||||
|
if (!empty($def->doctype->dtdSystem)) $ret .= '"' . $def->doctype->dtdSystem . '" ';
|
||||||
|
$ret .= '>';
|
||||||
|
}
|
||||||
|
|
||||||
|
$ret .= '<html><head>';
|
||||||
|
$ret .= '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
|
||||||
|
$ret .= '</head><body><div>'.$html.'</div></body></html>';
|
||||||
|
return $ret;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -160,9 +160,15 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
|||||||
|
|
||||||
$segment = substr($html, $cursor, $strlen_segment);
|
$segment = substr($html, $cursor, $strlen_segment);
|
||||||
|
|
||||||
|
if ($segment === false) {
|
||||||
|
// somehow, we attempted to access beyond the end of
|
||||||
|
// the string, defense-in-depth, reported by Nate Abele
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// Check if it's a comment
|
// Check if it's a comment
|
||||||
if (
|
if (
|
||||||
substr($segment, 0, 3) == '!--'
|
strncmp('!--', $segment, 3) === 0
|
||||||
) {
|
) {
|
||||||
// re-determine segment length, looking for -->
|
// re-determine segment length, looking for -->
|
||||||
$position_comment_end = strpos($html, '-->', $cursor);
|
$position_comment_end = strpos($html, '-->', $cursor);
|
||||||
@@ -178,12 +184,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
|||||||
}
|
}
|
||||||
$strlen_segment = $position_comment_end - $cursor;
|
$strlen_segment = $position_comment_end - $cursor;
|
||||||
$segment = substr($html, $cursor, $strlen_segment);
|
$segment = substr($html, $cursor, $strlen_segment);
|
||||||
$token = new
|
$token = new HTMLPurifier_Token_Comment(substr($segment, 3));
|
||||||
HTMLPurifier_Token_Comment(
|
|
||||||
substr(
|
|
||||||
$segment, 3, $strlen_segment - 3
|
|
||||||
)
|
|
||||||
);
|
|
||||||
if ($maintain_line_numbers) {
|
if ($maintain_line_numbers) {
|
||||||
$token->line = $current_line;
|
$token->line = $current_line;
|
||||||
$current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
|
$current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
|
||||||
@@ -237,7 +238,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
|||||||
// trailing slash. Remember, we could have a tag like <br>, so
|
// trailing slash. Remember, we could have a tag like <br>, so
|
||||||
// any later token processing scripts must convert improperly
|
// any later token processing scripts must convert improperly
|
||||||
// classified EmptyTags from StartTags.
|
// classified EmptyTags from StartTags.
|
||||||
$is_self_closing= (strpos($segment,'/') === $strlen_segment-1);
|
$is_self_closing = (strrpos($segment,'/') === $strlen_segment-1);
|
||||||
if ($is_self_closing) {
|
if ($is_self_closing) {
|
||||||
$strlen_segment--;
|
$strlen_segment--;
|
||||||
$segment = substr($segment, 0, $strlen_segment);
|
$segment = substr($segment, 0, $strlen_segment);
|
||||||
|
3886
library/HTMLPurifier/Lexer/PH5P.php
Normal file
3886
library/HTMLPurifier/Lexer/PH5P.php
Normal file
File diff suppressed because it is too large
Load Diff
@@ -2,12 +2,68 @@
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Class that handles operations involving percent-encoding in URIs.
|
* Class that handles operations involving percent-encoding in URIs.
|
||||||
|
*
|
||||||
|
* @warning
|
||||||
|
* Be careful when reusing instances of PercentEncoder. The object
|
||||||
|
* you use for normalize() SHOULD NOT be used for encode(), or
|
||||||
|
* vice-versa.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_PercentEncoder
|
class HTMLPurifier_PercentEncoder
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fix up percent-encoding by decoding unreserved characters and normalizing
|
* Reserved characters to preserve when using encode().
|
||||||
|
*/
|
||||||
|
var $preserve = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* String of characters that should be preserved while using encode().
|
||||||
|
*/
|
||||||
|
function HTMLPurifier_PercentEncoder($preserve = false) {
|
||||||
|
// unreserved letters, ought to const-ify
|
||||||
|
for ($i = 48; $i <= 57; $i++) $this->preserve[$i] = true; // digits
|
||||||
|
for ($i = 65; $i <= 90; $i++) $this->preserve[$i] = true; // upper-case
|
||||||
|
for ($i = 97; $i <= 122; $i++) $this->preserve[$i] = true; // lower-case
|
||||||
|
$this->preserve[45] = true; // Dash -
|
||||||
|
$this->preserve[46] = true; // Period .
|
||||||
|
$this->preserve[95] = true; // Underscore _
|
||||||
|
$this->preserve[126]= true; // Tilde ~
|
||||||
|
|
||||||
|
// extra letters not to escape
|
||||||
|
if ($preserve !== false) {
|
||||||
|
for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {
|
||||||
|
$this->preserve[ord($preserve[$i])] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Our replacement for urlencode, it encodes all non-reserved characters,
|
||||||
|
* as well as any extra characters that were instructed to be preserved.
|
||||||
|
* @note
|
||||||
|
* Assumes that the string has already been normalized, making any
|
||||||
|
* and all percent escape sequences valid. Percents will not be
|
||||||
|
* re-escaped, regardless of their status in $preserve
|
||||||
|
* @param $string String to be encoded
|
||||||
|
* @return Encoded string.
|
||||||
|
*/
|
||||||
|
function encode($string) {
|
||||||
|
$ret = '';
|
||||||
|
for ($i = 0, $c = strlen($string); $i < $c; $i++) {
|
||||||
|
if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])]) ) {
|
||||||
|
$ret .= '%' . sprintf('%02X', $int);
|
||||||
|
} else {
|
||||||
|
$ret .= $string[$i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fix up percent-encoding by decoding unreserved characters and normalizing.
|
||||||
|
* @warning This function is affected by $preserve, even though the
|
||||||
|
* usual desired behavior is for this not to preserve those
|
||||||
|
* characters. Be careful when reusing instances of PercentEncoder!
|
||||||
* @param $string String to normalize
|
* @param $string String to normalize
|
||||||
*/
|
*/
|
||||||
function normalize($string) {
|
function normalize($string) {
|
||||||
@@ -27,12 +83,7 @@ class HTMLPurifier_PercentEncoder
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
$int = hexdec($encoding);
|
$int = hexdec($encoding);
|
||||||
if (
|
if (isset($this->preserve[$int])) {
|
||||||
($int >= 48 && $int <= 57) || // digits
|
|
||||||
($int >= 65 && $int <= 90) || // uppercase letters
|
|
||||||
($int >= 97 && $int <= 122) || // lowercase letters
|
|
||||||
$int == 126 || $int == 45 || $int == 46 || $int == 95 // ~-._
|
|
||||||
) {
|
|
||||||
$ret .= chr($int) . $text;
|
$ret .= chr($int) . $text;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@@ -25,7 +25,9 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Whether or not to compress directive names, clipping them off
|
* Whether or not to compress directive names, clipping them off
|
||||||
* after a certain amount of letters
|
* after a certain amount of letters. False to disable or integer letters
|
||||||
|
* before clipping.
|
||||||
|
* @protected
|
||||||
*/
|
*/
|
||||||
var $compress = false;
|
var $compress = false;
|
||||||
|
|
||||||
@@ -41,11 +43,13 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer
|
|||||||
$this->docURL = $doc_url;
|
$this->docURL = $doc_url;
|
||||||
$this->name = $name;
|
$this->name = $name;
|
||||||
$this->compress = $compress;
|
$this->compress = $compress;
|
||||||
|
// initialize sub-printers
|
||||||
$this->fields['default'] = new HTMLPurifier_Printer_ConfigForm_default();
|
$this->fields['default'] = new HTMLPurifier_Printer_ConfigForm_default();
|
||||||
$this->fields['bool'] = new HTMLPurifier_Printer_ConfigForm_bool();
|
$this->fields['bool'] = new HTMLPurifier_Printer_ConfigForm_bool();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* Sets default column and row size for textareas in sub-printers
|
||||||
* @param $cols Integer columns of textarea, null to use default
|
* @param $cols Integer columns of textarea, null to use default
|
||||||
* @param $rows Integer rows of textarea, null to use default
|
* @param $rows Integer rows of textarea, null to use default
|
||||||
*/
|
*/
|
||||||
@@ -55,15 +59,14 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves styling, in case the directory it's in is not publically
|
* Retrieves styling, in case it is not accessible by webserver
|
||||||
* available
|
|
||||||
*/
|
*/
|
||||||
function getCSS() {
|
function getCSS() {
|
||||||
return file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/Printer/ConfigForm.css');
|
return file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/Printer/ConfigForm.css');
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves JavaScript, in case directory is not public
|
* Retrieves JavaScript, in case it is not accessible by webserver
|
||||||
*/
|
*/
|
||||||
function getJavaScript() {
|
function getJavaScript() {
|
||||||
return file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/Printer/ConfigForm.js');
|
return file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/Printer/ConfigForm.js');
|
||||||
@@ -97,14 +100,14 @@ class HTMLPurifier_Printer_ConfigForm extends HTMLPurifier_Printer
|
|||||||
$ret .= $this->renderNamespace($ns, $directives);
|
$ret .= $this->renderNamespace($ns, $directives);
|
||||||
}
|
}
|
||||||
if ($render_controls) {
|
if ($render_controls) {
|
||||||
$ret .= $this->start('tfoot');
|
$ret .= $this->start('tbody');
|
||||||
$ret .= $this->start('tr');
|
$ret .= $this->start('tr');
|
||||||
$ret .= $this->start('td', array('colspan' => 2, 'class' => 'controls'));
|
$ret .= $this->start('td', array('colspan' => 2, 'class' => 'controls'));
|
||||||
$ret .= $this->elementEmpty('input', array('type' => 'Submit', 'value' => 'Submit'));
|
$ret .= $this->elementEmpty('input', array('type' => 'submit', 'value' => 'Submit'));
|
||||||
$ret .= '[<a href="?">Reset</a>]';
|
$ret .= '[<a href="?">Reset</a>]';
|
||||||
$ret .= $this->end('td');
|
$ret .= $this->end('td');
|
||||||
$ret .= $this->end('tr');
|
$ret .= $this->end('tr');
|
||||||
$ret .= $this->end('tfoot');
|
$ret .= $this->end('tbody');
|
||||||
}
|
}
|
||||||
$ret .= $this->end('table');
|
$ret .= $this->end('table');
|
||||||
return $ret;
|
return $ret;
|
||||||
|
@@ -102,6 +102,7 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
|
|||||||
$ret .= $this->element('td', $this->listifyTagLookup($lookup));
|
$ret .= $this->element('td', $this->listifyTagLookup($lookup));
|
||||||
$ret .= $this->end('tr');
|
$ret .= $this->end('tr');
|
||||||
}
|
}
|
||||||
|
$ret .= $this->end('table');
|
||||||
return $ret;
|
return $ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -179,7 +180,8 @@ class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
|
|||||||
$def->validateChildren(array(), $this->config, $context);
|
$def->validateChildren(array(), $this->config, $context);
|
||||||
}
|
}
|
||||||
$elements = $def->elements;
|
$elements = $def->elements;
|
||||||
} elseif ($def->type == 'chameleon') {
|
}
|
||||||
|
if ($def->type == 'chameleon') {
|
||||||
$attr['rowspan'] = 2;
|
$attr['rowspan'] = 2;
|
||||||
} elseif ($def->type == 'empty') {
|
} elseif ($def->type == 'empty') {
|
||||||
$elements = array();
|
$elements = array();
|
||||||
|
@@ -195,7 +195,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
|||||||
//################################################################//
|
//################################################################//
|
||||||
// Process result by interpreting $result
|
// Process result by interpreting $result
|
||||||
|
|
||||||
if ($result === true) {
|
if ($result === true || $child_tokens === $result) {
|
||||||
// leave the node as is
|
// leave the node as is
|
||||||
|
|
||||||
// register start token as a parental node start
|
// register start token as a parental node start
|
||||||
|
@@ -36,28 +36,23 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
|
|
||||||
$definition = $config->getHTMLDefinition();
|
$definition = $config->getHTMLDefinition();
|
||||||
|
|
||||||
// CurrentNesting
|
// local variables
|
||||||
$this->currentNesting = array();
|
|
||||||
$context->register('CurrentNesting', $this->currentNesting);
|
|
||||||
|
|
||||||
// InputIndex
|
|
||||||
$this->inputIndex = false;
|
|
||||||
$context->register('InputIndex', $this->inputIndex);
|
|
||||||
|
|
||||||
// InputTokens
|
|
||||||
$context->register('InputTokens', $tokens);
|
|
||||||
$this->inputTokens =& $tokens;
|
|
||||||
|
|
||||||
// OutputTokens
|
|
||||||
$result = array();
|
$result = array();
|
||||||
$this->outputTokens =& $result;
|
|
||||||
|
|
||||||
// %Core.EscapeInvalidTags
|
|
||||||
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
|
|
||||||
$generator = new HTMLPurifier_Generator();
|
$generator = new HTMLPurifier_Generator();
|
||||||
|
$escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
|
||||||
$e =& $context->get('ErrorCollector', true);
|
$e =& $context->get('ErrorCollector', true);
|
||||||
|
|
||||||
|
// member variables
|
||||||
|
$this->currentNesting = array();
|
||||||
|
$this->inputIndex = false;
|
||||||
|
$this->inputTokens =& $tokens;
|
||||||
|
$this->outputTokens =& $result;
|
||||||
|
|
||||||
|
// context variables
|
||||||
|
$context->register('CurrentNesting', $this->currentNesting);
|
||||||
|
$context->register('InputIndex', $this->inputIndex);
|
||||||
|
$context->register('InputTokens', $tokens);
|
||||||
|
|
||||||
// -- begin INJECTOR --
|
// -- begin INJECTOR --
|
||||||
|
|
||||||
$this->injectors = array();
|
$this->injectors = array();
|
||||||
@@ -95,6 +90,10 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
trigger_error("Cannot enable $name injector because $error is not allowed", E_USER_WARNING);
|
trigger_error("Cannot enable $name injector because $error is not allowed", E_USER_WARNING);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// warning: most foreach loops follow the convention $i => $x.
|
||||||
|
// be sure, for PHP4 compatibility, to only perform write operations
|
||||||
|
// directly referencing the object using $i: $x is only safe for reads
|
||||||
|
|
||||||
// -- end INJECTOR --
|
// -- end INJECTOR --
|
||||||
|
|
||||||
$token = false;
|
$token = false;
|
||||||
@@ -105,6 +104,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
// if all goes well, this token will be passed through unharmed
|
// if all goes well, this token will be passed through unharmed
|
||||||
$token = $tokens[$this->inputIndex];
|
$token = $tokens[$this->inputIndex];
|
||||||
|
|
||||||
|
//printTokens($tokens, $this->inputIndex);
|
||||||
|
|
||||||
foreach ($this->injectors as $i => $x) {
|
foreach ($this->injectors as $i => $x) {
|
||||||
if ($x->skip > 0) $this->injectors[$i]->skip--;
|
if ($x->skip > 0) $this->injectors[$i]->skip--;
|
||||||
}
|
}
|
||||||
@@ -114,7 +115,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
if ($token->type === 'text') {
|
if ($token->type === 'text') {
|
||||||
// injector handler code; duplicated for performance reasons
|
// injector handler code; duplicated for performance reasons
|
||||||
foreach ($this->injectors as $i => $x) {
|
foreach ($this->injectors as $i => $x) {
|
||||||
if (!$x->skip) $x->handleText($token);
|
if (!$x->skip) $this->injectors[$i]->handleText($token);
|
||||||
if (is_array($token)) {
|
if (is_array($token)) {
|
||||||
$this->currentInjector = $i;
|
$this->currentInjector = $i;
|
||||||
break;
|
break;
|
||||||
@@ -157,10 +158,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
// the parent
|
// the parent
|
||||||
if (!isset($parent_info->child->elements[$token->name])) {
|
if (!isset($parent_info->child->elements[$token->name])) {
|
||||||
if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
|
if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
|
||||||
// close the parent, then append the token
|
// close the parent, then re-loop to reprocess token
|
||||||
$result[] = new HTMLPurifier_Token_End($parent->name);
|
$result[] = new HTMLPurifier_Token_End($parent->name);
|
||||||
$result[] = $token;
|
$this->inputIndex--;
|
||||||
$this->currentNesting[] = $token;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -172,7 +172,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
// injector handler code; duplicated for performance reasons
|
// injector handler code; duplicated for performance reasons
|
||||||
if ($ok) {
|
if ($ok) {
|
||||||
foreach ($this->injectors as $i => $x) {
|
foreach ($this->injectors as $i => $x) {
|
||||||
if (!$x->skip) $x->handleElement($token);
|
if (!$x->skip) $this->injectors[$i]->handleElement($token);
|
||||||
if (is_array($token)) {
|
if (is_array($token)) {
|
||||||
$this->currentInjector = $i;
|
$this->currentInjector = $i;
|
||||||
break;
|
break;
|
||||||
@@ -202,6 +202,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
$current_parent = array_pop($this->currentNesting);
|
$current_parent = array_pop($this->currentNesting);
|
||||||
if ($current_parent->name == $token->name) {
|
if ($current_parent->name == $token->name) {
|
||||||
$result[] = $token;
|
$result[] = $token;
|
||||||
|
foreach ($this->injectors as $i => $x) {
|
||||||
|
$this->injectors[$i]->notifyEnd($token);
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -238,16 +241,16 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
|
|
||||||
// okay, we found it, close all the skipped tags
|
// okay, we found it, close all the skipped tags
|
||||||
// note that skipped tags contains the element we need closed
|
// note that skipped tags contains the element we need closed
|
||||||
$size = count($skipped_tags);
|
for ($i = count($skipped_tags) - 1; $i >= 0; $i--) {
|
||||||
for ($i = $size - 1; $i > 0; $i--) {
|
if ($i && $e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
|
||||||
if ($e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
|
|
||||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
|
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
|
||||||
}
|
}
|
||||||
$result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
|
$result[] = $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
|
||||||
|
foreach ($this->injectors as $j => $x) { // $j, not $i!!!
|
||||||
|
$this->injectors[$j]->notifyEnd($new_token);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
$result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$context->destroy('CurrentNesting');
|
$context->destroy('CurrentNesting');
|
||||||
@@ -255,17 +258,18 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
$context->destroy('InputIndex');
|
$context->destroy('InputIndex');
|
||||||
$context->destroy('CurrentToken');
|
$context->destroy('CurrentToken');
|
||||||
|
|
||||||
// we're at the end now, fix all still unclosed tags
|
// we're at the end now, fix all still unclosed tags (this is
|
||||||
// not using processToken() because at this point we don't
|
// duplicated from the end of the loop with some slight modifications)
|
||||||
// care about current nesting
|
// not using $skipped_tags since it would invariably be all of them
|
||||||
if (!empty($this->currentNesting)) {
|
if (!empty($this->currentNesting)) {
|
||||||
$size = count($this->currentNesting);
|
for ($i = count($this->currentNesting) - 1; $i >= 0; $i--) {
|
||||||
for ($i = $size - 1; $i >= 0; $i--) {
|
|
||||||
if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
|
if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
|
||||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
|
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
|
||||||
}
|
}
|
||||||
$result[] =
|
$result[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
|
||||||
new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
|
foreach ($this->injectors as $j => $x) { // $j, not $i!!!
|
||||||
|
$this->injectors[$j]->notifyEnd($new_token);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -286,8 +290,14 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
|
|
||||||
// adjust the injector skips based on the array substitution
|
// adjust the injector skips based on the array substitution
|
||||||
if ($this->injectors) {
|
if ($this->injectors) {
|
||||||
$offset = count($token) + 1;
|
$offset = count($token);
|
||||||
for ($i = 0; $i <= $this->currentInjector; $i++) {
|
for ($i = 0; $i <= $this->currentInjector; $i++) {
|
||||||
|
// because of the skip back, we need to add one more
|
||||||
|
// for uninitialized injectors. I'm not exactly
|
||||||
|
// sure why this is the case, but I think it has to
|
||||||
|
// do with the fact that we're decrementing skips
|
||||||
|
// before re-checking text
|
||||||
|
if (!$this->injectors[$i]->skip) $this->injectors[$i]->skip++;
|
||||||
$this->injectors[$i]->skip += $offset;
|
$this->injectors[$i]->skip += $offset;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -116,6 +116,7 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
|||||||
// mostly everything's good, but
|
// mostly everything's good, but
|
||||||
// we need to make sure required attributes are in order
|
// we need to make sure required attributes are in order
|
||||||
if (
|
if (
|
||||||
|
($token->type === 'start' || $token->type === 'empty') &&
|
||||||
$definition->info[$token->name]->required_attr &&
|
$definition->info[$token->name]->required_attr &&
|
||||||
($token->name != 'img' || $remove_invalid_img) // ensure config option still works
|
($token->name != 'img' || $remove_invalid_img) // ensure config option still works
|
||||||
) {
|
) {
|
||||||
@@ -134,7 +135,6 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
|||||||
$token->armor['ValidateAttributes'] = true;
|
$token->armor['ValidateAttributes'] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// CAN BE GENERICIZED
|
|
||||||
if (isset($hidden_elements[$token->name]) && $token->type == 'start') {
|
if (isset($hidden_elements[$token->name]) && $token->type == 'start') {
|
||||||
$textify_comments = $token->name;
|
$textify_comments = $token->name;
|
||||||
} elseif ($token->name === $textify_comments && $token->type == 'end') {
|
} elseif ($token->name === $textify_comments && $token->type == 'end') {
|
||||||
|
@@ -6,10 +6,6 @@ require_once 'HTMLPurifier/IDAccumulator.php';
|
|||||||
|
|
||||||
require_once 'HTMLPurifier/AttrValidator.php';
|
require_once 'HTMLPurifier/AttrValidator.php';
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
|
||||||
'Attr', 'IDBlacklist', array(), 'list',
|
|
||||||
'Array of IDs not allowed in the document.');
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validate all attributes in the tokens.
|
* Validate all attributes in the tokens.
|
||||||
*/
|
*/
|
||||||
@@ -19,11 +15,6 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
|
|||||||
|
|
||||||
function execute($tokens, $config, &$context) {
|
function execute($tokens, $config, &$context) {
|
||||||
|
|
||||||
// setup id_accumulator context
|
|
||||||
$id_accumulator = new HTMLPurifier_IDAccumulator();
|
|
||||||
$id_accumulator->load($config->get('Attr', 'IDBlacklist'));
|
|
||||||
$context->register('IDAccumulator', $id_accumulator);
|
|
||||||
|
|
||||||
// setup validator
|
// setup validator
|
||||||
$validator = new HTMLPurifier_AttrValidator();
|
$validator = new HTMLPurifier_AttrValidator();
|
||||||
|
|
||||||
@@ -44,8 +35,6 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
|
|||||||
|
|
||||||
$tokens[$key] = $token; // for PHP 4
|
$tokens[$key] = $token; // for PHP 4
|
||||||
}
|
}
|
||||||
|
|
||||||
$context->destroy('IDAccumulator');
|
|
||||||
$context->destroy('CurrentToken');
|
$context->destroy('CurrentToken');
|
||||||
|
|
||||||
return $tokens;
|
return $tokens;
|
||||||
|
@@ -4,7 +4,12 @@ require_once 'HTMLPurifier/URIParser.php';
|
|||||||
require_once 'HTMLPurifier/URIFilter.php';
|
require_once 'HTMLPurifier/URIFilter.php';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* HTML Purifier's internal representation of a URI
|
* HTML Purifier's internal representation of a URI.
|
||||||
|
* @note
|
||||||
|
* Internal data-structures are completely escaped. If the data needs
|
||||||
|
* to be used in a non-URI context (which is very unlikely), be sure
|
||||||
|
* to decode it first. The URI may not necessarily be well-formed until
|
||||||
|
* validate() is called.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_URI
|
class HTMLPurifier_URI
|
||||||
{
|
{
|
||||||
@@ -52,13 +57,27 @@ class HTMLPurifier_URI
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generic validation method applicable for all schemes
|
* Generic validation method applicable for all schemes. May modify
|
||||||
|
* this URI in order to get it into a compliant form.
|
||||||
* @param $config Instance of HTMLPurifier_Config
|
* @param $config Instance of HTMLPurifier_Config
|
||||||
* @param $context Instance of HTMLPurifier_Context
|
* @param $context Instance of HTMLPurifier_Context
|
||||||
* @return True if validation/filtering succeeds, false if failure
|
* @return True if validation/filtering succeeds, false if failure
|
||||||
*/
|
*/
|
||||||
function validate($config, &$context) {
|
function validate($config, &$context) {
|
||||||
|
|
||||||
|
// ABNF definitions from RFC 3986
|
||||||
|
$chars_sub_delims = '!$&\'()*+,;=';
|
||||||
|
$chars_gen_delims = ':/?#[]@';
|
||||||
|
$chars_pchar = $chars_sub_delims . ':@';
|
||||||
|
|
||||||
|
// validate scheme (MUST BE FIRST!)
|
||||||
|
if (!is_null($this->scheme) && is_null($this->host)) {
|
||||||
|
$def = $config->getDefinition('URI');
|
||||||
|
if ($def->defaultScheme === $this->scheme) {
|
||||||
|
$this->scheme = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// validate host
|
// validate host
|
||||||
if (!is_null($this->host)) {
|
if (!is_null($this->host)) {
|
||||||
$host_def = new HTMLPurifier_AttrDef_URI_Host();
|
$host_def = new HTMLPurifier_AttrDef_URI_Host();
|
||||||
@@ -66,18 +85,62 @@ class HTMLPurifier_URI
|
|||||||
if ($this->host === false) $this->host = null;
|
if ($this->host === false) $this->host = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// validate username
|
||||||
|
if (!is_null($this->userinfo)) {
|
||||||
|
$encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
|
||||||
|
$this->userinfo = $encoder->encode($this->userinfo);
|
||||||
|
}
|
||||||
|
|
||||||
// validate port
|
// validate port
|
||||||
if (!is_null($this->port)) {
|
if (!is_null($this->port)) {
|
||||||
if ($this->port < 1 || $this->port > 65535) $this->port = null;
|
if ($this->port < 1 || $this->port > 65535) $this->port = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// query and fragment are quite simple in terms of definition:
|
// validate path
|
||||||
// *( pchar / "/" / "?" ), so define their validation routines
|
$path_parts = array();
|
||||||
// when we start fixing percent encoding
|
$segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
|
||||||
|
if (!is_null($this->host)) {
|
||||||
|
// path-abempty (hier and relative)
|
||||||
|
$this->path = $segments_encoder->encode($this->path);
|
||||||
|
} elseif ($this->path !== '' && $this->path[0] === '/') {
|
||||||
|
// path-absolute (hier and relative)
|
||||||
|
if (strlen($this->path) >= 2 && $this->path[1] === '/') {
|
||||||
|
// This shouldn't ever happen!
|
||||||
|
$this->path = '';
|
||||||
|
} else {
|
||||||
|
$this->path = $segments_encoder->encode($this->path);
|
||||||
|
}
|
||||||
|
} elseif (!is_null($this->scheme) && $this->path !== '') {
|
||||||
|
// path-rootless (hier)
|
||||||
|
// Short circuit evaluation means we don't need to check nz
|
||||||
|
$this->path = $segments_encoder->encode($this->path);
|
||||||
|
} elseif (is_null($this->scheme) && $this->path !== '') {
|
||||||
|
// path-noscheme (relative)
|
||||||
|
// (once again, not checking nz)
|
||||||
|
$segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
|
||||||
|
$c = strpos($this->path, '/');
|
||||||
|
if ($c !== false) {
|
||||||
|
$this->path =
|
||||||
|
$segment_nc_encoder->encode(substr($this->path, 0, $c)) .
|
||||||
|
$segments_encoder->encode(substr($this->path, $c));
|
||||||
|
} else {
|
||||||
|
$this->path = $segment_nc_encoder->encode($this->path);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// path-empty (hier and relative)
|
||||||
|
$this->path = ''; // just to be safe
|
||||||
|
}
|
||||||
|
|
||||||
// path gets to be validated against a hodge-podge of rules depending
|
// qf = query and fragment
|
||||||
// on the status of authority and scheme, but it's not that important,
|
$qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
|
||||||
// esp. since it won't be applicable to everyone
|
|
||||||
|
if (!is_null($this->query)) {
|
||||||
|
$this->query = $qf_encoder->encode($this->query);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!is_null($this->fragment)) {
|
||||||
|
$this->fragment = $qf_encoder->encode($this->fragment);
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
@@ -1,10 +1,22 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Chainable filters for custom URI processing
|
* Chainable filters for custom URI processing.
|
||||||
|
*
|
||||||
|
* These filters can perform custom actions on a URI filter object,
|
||||||
|
* including transformation or blacklisting.
|
||||||
|
*
|
||||||
|
* @warning This filter is called before scheme object validation occurs.
|
||||||
|
* Make sure, if you require a specific scheme object, you
|
||||||
|
* you check that it exists. This allows filters to convert
|
||||||
|
* proprietary URI schemes into regular ones.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_URIFilter
|
class HTMLPurifier_URIFilter
|
||||||
{
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unique identifier of filter
|
||||||
|
*/
|
||||||
var $name;
|
var $name;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -17,8 +29,12 @@ class HTMLPurifier_URIFilter
|
|||||||
* @param &$uri Reference to URI object
|
* @param &$uri Reference to URI object
|
||||||
* @param $config Instance of HTMLPurifier_Config
|
* @param $config Instance of HTMLPurifier_Config
|
||||||
* @param &$context Instance of HTMLPurifier_Context
|
* @param &$context Instance of HTMLPurifier_Context
|
||||||
|
* @return bool Whether or not to continue processing: false indicates
|
||||||
|
* URL is no good, true indicates continue processing. Note that
|
||||||
|
* all changes are committed directly on the URI object
|
||||||
*/
|
*/
|
||||||
function filter(&$uri, $config, &$context) {
|
function filter(&$uri, $config, &$context) {
|
||||||
trigger_error('Cannot call abstract function', E_USER_ERROR);
|
trigger_error('Cannot call abstract function', E_USER_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@@ -47,6 +47,10 @@ class HTMLPurifier_URIFilter_MakeAbsolute extends HTMLPurifier_URIFilter
|
|||||||
// absolute URI already: don't change
|
// absolute URI already: don't change
|
||||||
if (!is_null($uri->host)) return true;
|
if (!is_null($uri->host)) return true;
|
||||||
$scheme_obj = $uri->getSchemeObj($config, $context);
|
$scheme_obj = $uri->getSchemeObj($config, $context);
|
||||||
|
if (!$scheme_obj) {
|
||||||
|
// scheme not recognized
|
||||||
|
return false;
|
||||||
|
}
|
||||||
if (!$scheme_obj->hierarchical) {
|
if (!$scheme_obj->hierarchical) {
|
||||||
// non-hierarchal URI with explicit scheme, don't change
|
// non-hierarchal URI with explicit scheme, don't change
|
||||||
return true;
|
return true;
|
||||||
|
@@ -4,24 +4,39 @@ require_once 'HTMLPurifier/URI.php';
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Parses a URI into the components and fragment identifier as specified
|
* Parses a URI into the components and fragment identifier as specified
|
||||||
* by RFC 2396.
|
* by RFC 3986.
|
||||||
* @todo Replace regexps with a native PHP parser
|
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_URIParser
|
class HTMLPurifier_URIParser
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parses a URI
|
* Instance of HTMLPurifier_PercentEncoder to do normalization with.
|
||||||
|
*/
|
||||||
|
var $percentEncoder;
|
||||||
|
|
||||||
|
function HTMLPurifier_URIParser() {
|
||||||
|
$this->percentEncoder = new HTMLPurifier_PercentEncoder();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses a URI.
|
||||||
* @param $uri string URI to parse
|
* @param $uri string URI to parse
|
||||||
* @return HTMLPurifier_URI representation of URI
|
* @return HTMLPurifier_URI representation of URI. This representation has
|
||||||
|
* not been validated yet and may not conform to RFC.
|
||||||
*/
|
*/
|
||||||
function parse($uri) {
|
function parse($uri) {
|
||||||
|
|
||||||
|
$uri = $this->percentEncoder->normalize($uri);
|
||||||
|
|
||||||
|
// Regexp is as per Appendix B.
|
||||||
|
// Note that ["<>] are an addition to the RFC's recommended
|
||||||
|
// characters, because they represent external delimeters.
|
||||||
$r_URI = '!'.
|
$r_URI = '!'.
|
||||||
'(([^:/?#<>\'"]+):)?'. // 2. Scheme
|
'(([^:/?#"<>]+):)?'. // 2. Scheme
|
||||||
'(//([^/?#<>\'"]*))?'. // 4. Authority
|
'(//([^/?#"<>]*))?'. // 4. Authority
|
||||||
'([^?#<>\'"]*)'. // 5. Path
|
'([^?#"<>]*)'. // 5. Path
|
||||||
'(\?([^#<>\'"]*))?'. // 7. Query
|
'(\?([^#"<>]*))?'. // 7. Query
|
||||||
'(#([^<>\'"]*))?'. // 8. Fragment
|
'(#([^"<>]*))?'. // 8. Fragment
|
||||||
'!';
|
'!';
|
||||||
|
|
||||||
$matches = array();
|
$matches = array();
|
||||||
@@ -38,13 +53,7 @@ class HTMLPurifier_URIParser
|
|||||||
|
|
||||||
// further parse authority
|
// further parse authority
|
||||||
if ($authority !== null) {
|
if ($authority !== null) {
|
||||||
// ridiculously inefficient: it's a stacked regex!
|
$r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
|
||||||
$HEXDIG = '[A-Fa-f0-9]';
|
|
||||||
$unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with []
|
|
||||||
$sub_delims = '!$&\'()'; // needs []
|
|
||||||
$pct_encoded = "%$HEXDIG$HEXDIG";
|
|
||||||
$r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*";
|
|
||||||
$r_authority = "/^(($r_userinfo)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
|
|
||||||
$matches = array();
|
$matches = array();
|
||||||
preg_match($r_authority, $authority, $matches);
|
preg_match($r_authority, $authority, $matches);
|
||||||
$userinfo = !empty($matches[1]) ? $matches[2] : null;
|
$userinfo = !empty($matches[1]) ? $matches[2] : null;
|
||||||
|
@@ -1,5 +1,12 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/URIScheme/http.php';
|
||||||
|
require_once 'HTMLPurifier/URIScheme/https.php';
|
||||||
|
require_once 'HTMLPurifier/URIScheme/mailto.php';
|
||||||
|
require_once 'HTMLPurifier/URIScheme/ftp.php';
|
||||||
|
require_once 'HTMLPurifier/URIScheme/nntp.php';
|
||||||
|
require_once 'HTMLPurifier/URIScheme/news.php';
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'URI', 'AllowedSchemes', array(
|
'URI', 'AllowedSchemes', array(
|
||||||
'http' => true, // "Hypertext Transfer Protocol", nuf' said
|
'http' => true, // "Hypertext Transfer Protocol", nuf' said
|
||||||
@@ -7,7 +14,6 @@ HTMLPurifier_ConfigSchema::define(
|
|||||||
// quite useful, but not necessary
|
// quite useful, but not necessary
|
||||||
'mailto' => true,// Email
|
'mailto' => true,// Email
|
||||||
'ftp' => true, // "File Transfer Protocol"
|
'ftp' => true, // "File Transfer Protocol"
|
||||||
'irc' => true, // "Internet Relay Chat", usually needs another app
|
|
||||||
// for Usenet, these two are similar, but distinct
|
// for Usenet, these two are similar, but distinct
|
||||||
'nntp' => true, // individual Netnews articles
|
'nntp' => true, // individual Netnews articles
|
||||||
'news' => true // newsgroup or individual Netnews articles
|
'news' => true // newsgroup or individual Netnews articles
|
||||||
@@ -54,12 +60,6 @@ class HTMLPurifier_URISchemeRegistry
|
|||||||
*/
|
*/
|
||||||
var $schemes = array();
|
var $schemes = array();
|
||||||
|
|
||||||
/**
|
|
||||||
* Directory where scheme objects can be found
|
|
||||||
* @private
|
|
||||||
*/
|
|
||||||
var $_scheme_dir = null;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves a scheme validator object
|
* Retrieves a scheme validator object
|
||||||
* @param $scheme String scheme name like http or mailto
|
* @param $scheme String scheme name like http or mailto
|
||||||
@@ -79,21 +79,16 @@ class HTMLPurifier_URISchemeRegistry
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (isset($this->schemes[$scheme])) return $this->schemes[$scheme];
|
if (isset($this->schemes[$scheme])) return $this->schemes[$scheme];
|
||||||
if (empty($this->_dir)) $this->_dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier/URIScheme/';
|
|
||||||
|
|
||||||
if (!isset($allowed_schemes[$scheme])) return $null;
|
if (!isset($allowed_schemes[$scheme])) return $null;
|
||||||
|
|
||||||
// this bit of reflection is not very efficient, and a bit
|
|
||||||
// hacky too
|
|
||||||
$class = 'HTMLPurifier_URIScheme_' . $scheme;
|
$class = 'HTMLPurifier_URIScheme_' . $scheme;
|
||||||
if (!class_exists($class)) include_once $this->_dir . $scheme . '.php';
|
|
||||||
if (!class_exists($class)) return $null;
|
if (!class_exists($class)) return $null;
|
||||||
$this->schemes[$scheme] = new $class();
|
$this->schemes[$scheme] = new $class();
|
||||||
return $this->schemes[$scheme];
|
return $this->schemes[$scheme];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Registers a custom scheme to the cache.
|
* Registers a custom scheme to the cache, bypassing reflection.
|
||||||
* @param $scheme Scheme name
|
* @param $scheme Scheme name
|
||||||
* @param $scheme_obj HTMLPurifier_URIScheme object
|
* @param $scheme_obj HTMLPurifier_URIScheme object
|
||||||
*/
|
*/
|
||||||
|
241
library/HTMLPurifier/UnitConverter.php
Normal file
241
library/HTMLPurifier/UnitConverter.php
Normal file
@@ -0,0 +1,241 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class for converting between different unit-lengths as specified by
|
||||||
|
* CSS.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_UnitConverter
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Minimum bcmath precision for output.
|
||||||
|
*/
|
||||||
|
var $outputPrecision;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bcmath precision for internal calculations.
|
||||||
|
*/
|
||||||
|
var $internalPrecision;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether or not BCMath is available
|
||||||
|
*/
|
||||||
|
var $bcmath;
|
||||||
|
|
||||||
|
function HTMLPurifier_UnitConverter($output_precision = 4, $internal_precision = 10, $force_no_bcmath = false) {
|
||||||
|
$this->outputPrecision = $output_precision;
|
||||||
|
$this->internalPrecision = $internal_precision;
|
||||||
|
$this->bcmath = !$force_no_bcmath && function_exists('bcmul');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts a length object of one unit into another unit.
|
||||||
|
* @param HTMLPurifier_Length $length
|
||||||
|
* Instance of HTMLPurifier_Length to convert. You must validate()
|
||||||
|
* it before passing it here!
|
||||||
|
* @param string $to_unit
|
||||||
|
* Unit to convert to.
|
||||||
|
* @note
|
||||||
|
* About precision: This conversion function pays very special
|
||||||
|
* attention to the incoming precision of values and attempts
|
||||||
|
* to maintain a number of significant figure. Results are
|
||||||
|
* fairly accurate up to nine digits. Some caveats:
|
||||||
|
* - If a number is zero-padded as a result of this significant
|
||||||
|
* figure tracking, the zeroes will be eliminated.
|
||||||
|
* - If a number contains less than four sigfigs ($outputPrecision)
|
||||||
|
* and this causes some decimals to be excluded, those
|
||||||
|
* decimals will be added on.
|
||||||
|
*/
|
||||||
|
function convert($length, $to_unit) {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Units information array. Units are grouped into measuring systems
|
||||||
|
* (English, Metric), and are assigned an integer representing
|
||||||
|
* the conversion factor between that unit and the smallest unit in
|
||||||
|
* the system. Numeric indexes are actually magical constants that
|
||||||
|
* encode conversion data from one system to the next, with a O(n^2)
|
||||||
|
* constraint on memory (this is generally not a problem, since
|
||||||
|
* the number of measuring systems is small.)
|
||||||
|
*/
|
||||||
|
static $units = array(
|
||||||
|
1 => array(
|
||||||
|
'px' => 3, // This is as per CSS 2.1 and Firefox. Your mileage may vary
|
||||||
|
'pt' => 4,
|
||||||
|
'pc' => 48,
|
||||||
|
'in' => 288,
|
||||||
|
2 => array('pt', '0.352777778', 'mm'),
|
||||||
|
),
|
||||||
|
2 => array(
|
||||||
|
'mm' => 1,
|
||||||
|
'cm' => 10,
|
||||||
|
1 => array('mm', '2.83464567', 'pt'),
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!$length->isValid()) return false;
|
||||||
|
|
||||||
|
$n = $length->getN();
|
||||||
|
$unit = $length->getUnit();
|
||||||
|
|
||||||
|
if ($n === '0' || $unit === false) {
|
||||||
|
return new HTMLPurifier_Length('0', false);
|
||||||
|
}
|
||||||
|
|
||||||
|
$state = $dest_state = false;
|
||||||
|
foreach ($units as $k => $x) {
|
||||||
|
if (isset($x[$unit])) $state = $k;
|
||||||
|
if (isset($x[$to_unit])) $dest_state = $k;
|
||||||
|
}
|
||||||
|
if (!$state || !$dest_state) return false;
|
||||||
|
|
||||||
|
// Some calculations about the initial precision of the number;
|
||||||
|
// this will be useful when we need to do final rounding.
|
||||||
|
$sigfigs = $this->getSigFigs($n);
|
||||||
|
if ($sigfigs < $this->outputPrecision) $sigfigs = $this->outputPrecision;
|
||||||
|
|
||||||
|
// Cleanup $n for PHP 4.3.9 and 4.3.10. See http://bugs.php.net/bug.php?id=30726
|
||||||
|
if (strncmp($n, '-.', 2) === 0) {
|
||||||
|
$n = '-0.' . substr($n, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// BCMath's internal precision deals only with decimals. Use
|
||||||
|
// our default if the initial number has no decimals, or increase
|
||||||
|
// it by how ever many decimals, thus, the number of guard digits
|
||||||
|
// will always be greater than or equal to internalPrecision.
|
||||||
|
$log = (int) floor(log(abs($n), 10));
|
||||||
|
$cp = ($log < 0) ? $this->internalPrecision - $log : $this->internalPrecision; // internal precision
|
||||||
|
|
||||||
|
for ($i = 0; $i < 2; $i++) {
|
||||||
|
|
||||||
|
// Determine what unit IN THIS SYSTEM we need to convert to
|
||||||
|
if ($dest_state === $state) {
|
||||||
|
// Simple conversion
|
||||||
|
$dest_unit = $to_unit;
|
||||||
|
} else {
|
||||||
|
// Convert to the smallest unit, pending a system shift
|
||||||
|
$dest_unit = $units[$state][$dest_state][0];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do the conversion if necessary
|
||||||
|
if ($dest_unit !== $unit) {
|
||||||
|
$factor = $this->div($units[$state][$unit], $units[$state][$dest_unit], $cp);
|
||||||
|
$n = $this->mul($n, $factor, $cp);
|
||||||
|
$unit = $dest_unit;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Output was zero, so bail out early. Shouldn't ever happen.
|
||||||
|
if ($n === '') {
|
||||||
|
$n = '0';
|
||||||
|
$unit = $to_unit;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// It was a simple conversion, so bail out
|
||||||
|
if ($dest_state === $state) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($i !== 0) {
|
||||||
|
// Conversion failed! Apparently, the system we forwarded
|
||||||
|
// to didn't have this unit. This should never happen!
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pre-condition: $i == 0
|
||||||
|
|
||||||
|
// Perform conversion to next system of units
|
||||||
|
$n = $this->mul($n, $units[$state][$dest_state][1], $cp);
|
||||||
|
$unit = $units[$state][$dest_state][2];
|
||||||
|
$state = $dest_state;
|
||||||
|
|
||||||
|
// One more loop around to convert the unit in the new system.
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Post-condition: $unit == $to_unit
|
||||||
|
if ($unit !== $to_unit) return false;
|
||||||
|
|
||||||
|
// Useful for debugging:
|
||||||
|
//echo "<pre>n";
|
||||||
|
//echo "$n\nsigfigs = $sigfigs\nnew_log = $new_log\nlog = $log\nrp = $rp\n</pre>\n";
|
||||||
|
|
||||||
|
$n = $this->round($n, $sigfigs);
|
||||||
|
if (strpos($n, '.') !== false) $n = rtrim($n, '0');
|
||||||
|
$n = rtrim($n, '.');
|
||||||
|
|
||||||
|
return new HTMLPurifier_Length($n, $unit);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the number of significant figures in a string number.
|
||||||
|
* @param string $n Decimal number
|
||||||
|
* @return int number of sigfigs
|
||||||
|
*/
|
||||||
|
function getSigFigs($n) {
|
||||||
|
$n = ltrim($n, '0+-');
|
||||||
|
$dp = strpos($n, '.'); // decimal position
|
||||||
|
if ($dp === false) {
|
||||||
|
$sigfigs = strlen(rtrim($n, '0'));
|
||||||
|
} else {
|
||||||
|
$sigfigs = strlen(ltrim($n, '0.')); // eliminate extra decimal character
|
||||||
|
if ($dp !== 0) $sigfigs--;
|
||||||
|
}
|
||||||
|
return $sigfigs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds two numbers, using arbitrary precision when available.
|
||||||
|
*/
|
||||||
|
function add($s1, $s2, $scale) {
|
||||||
|
if ($this->bcmath) return bcadd($s1, $s2, $scale);
|
||||||
|
else return $this->scale($s1 + $s2, $scale);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Multiples two numbers, using arbitrary precision when available.
|
||||||
|
*/
|
||||||
|
function mul($s1, $s2, $scale) {
|
||||||
|
if ($this->bcmath) return bcmul($s1, $s2, $scale);
|
||||||
|
else return $this->scale($s1 * $s2, $scale);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Divides two numbers, using arbitrary precision when available.
|
||||||
|
*/
|
||||||
|
function div($s1, $s2, $scale) {
|
||||||
|
if ($this->bcmath) return bcdiv($s1, $s2, $scale);
|
||||||
|
else return $this->scale($s1 / $s2, $scale);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Rounds a number according to the number of sigfigs it should have,
|
||||||
|
* using arbitrary precision when available.
|
||||||
|
*/
|
||||||
|
function round($n, $sigfigs) {
|
||||||
|
$new_log = (int) floor(log(abs($n), 10)); // Number of digits left of decimal - 1
|
||||||
|
$rp = $sigfigs - $new_log - 1; // Number of decimal places needed
|
||||||
|
$neg = $n < 0 ? '-' : ''; // Negative sign
|
||||||
|
if ($this->bcmath) {
|
||||||
|
if ($rp >= 0) {
|
||||||
|
$n = bcadd($n, $neg . '0.' . str_repeat('0', $rp) . '5', $rp + 1);
|
||||||
|
$n = bcdiv($n, '1', $rp);
|
||||||
|
} else {
|
||||||
|
// This algorithm partially depends on the standardized
|
||||||
|
// form of numbers that comes out of bcmath.
|
||||||
|
$n = bcadd($n, $neg . '5' . str_repeat('0', $new_log - $sigfigs), 0);
|
||||||
|
$n = substr($n, 0, $sigfigs + strlen($neg)) . str_repeat('0', $new_log - $sigfigs + 1);
|
||||||
|
}
|
||||||
|
return $n;
|
||||||
|
} else {
|
||||||
|
return $this->scale(round($n, $sigfigs - $new_log - 1), $rp + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scales a float to $scale digits right of decimal point, like BCMath.
|
||||||
|
*/
|
||||||
|
function scale($r, $scale) {
|
||||||
|
return sprintf('%.' . $scale . 'f', (float) $r);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
64
maintenance/PH5P.patch
Normal file
64
maintenance/PH5P.patch
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
--- C:\Users\Edward\Webs\htmlpurifier\maintenance\PH5P.php 2007-11-04 23:41:49.074543700 -0500
|
||||||
|
+++ C:\Users\Edward\Webs\htmlpurifier\maintenance/PH5P.new.php 2007-11-05 00:23:52.839543700 -0500
|
||||||
|
@@ -211,7 +211,10 @@
|
||||||
|
// If nothing is returned, emit a U+0026 AMPERSAND character token.
|
||||||
|
// Otherwise, emit the character token that was returned.
|
||||||
|
$char = (!$entity) ? '&' : $entity;
|
||||||
|
- $this->emitToken($char);
|
||||||
|
+ $this->emitToken(array(
|
||||||
|
+ 'type' => self::CHARACTR,
|
||||||
|
+ 'data' => $char
|
||||||
|
+ ));
|
||||||
|
|
||||||
|
// Finally, switch to the data state.
|
||||||
|
$this->state = 'data';
|
||||||
|
@@ -708,7 +711,7 @@
|
||||||
|
} elseif($char === '&') {
|
||||||
|
/* U+0026 AMPERSAND (&)
|
||||||
|
Switch to the entity in attribute value state. */
|
||||||
|
- $this->entityInAttributeValueState('non');
|
||||||
|
+ $this->entityInAttributeValueState();
|
||||||
|
|
||||||
|
} elseif($char === '>') {
|
||||||
|
/* U+003E GREATER-THAN SIGN (>)
|
||||||
|
@@ -738,7 +741,8 @@
|
||||||
|
? '&'
|
||||||
|
: $entity;
|
||||||
|
|
||||||
|
- $this->emitToken($char);
|
||||||
|
+ $last = count($this->token['attr']) - 1;
|
||||||
|
+ $this->token['attr'][$last]['value'] .= $char;
|
||||||
|
}
|
||||||
|
|
||||||
|
private function bogusCommentState() {
|
||||||
|
@@ -1066,6 +1070,11 @@
|
||||||
|
$this->char++;
|
||||||
|
|
||||||
|
if(in_array($id, $this->entities)) {
|
||||||
|
+ if ($e_name[$c-1] !== ';') {
|
||||||
|
+ if ($c < $len && $e_name[$c] == ';') {
|
||||||
|
+ $this->char++; // consume extra semicolon
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
$entity = $id;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
@@ -3659,7 +3668,7 @@
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
- private function generateImpliedEndTags(array $exclude = array()) {
|
||||||
|
+ private function generateImpliedEndTags($exclude = array()) {
|
||||||
|
/* When the steps below require the UA to generate implied end tags,
|
||||||
|
then, if the current node is a dd element, a dt element, an li element,
|
||||||
|
a p element, a td element, a th element, or a tr element, the UA must
|
||||||
|
@@ -3673,7 +3682,8 @@
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
- private function getElementCategory($name) {
|
||||||
|
+ private function getElementCategory($node) {
|
||||||
|
+ $name = $node->tagName;
|
||||||
|
if(in_array($name, $this->special))
|
||||||
|
return self::SPECIAL;
|
||||||
|
|
3824
maintenance/PH5P.php
Normal file
3824
maintenance/PH5P.php
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,7 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
require_once 'compat-function-file-put-contents.php';
|
||||||
|
|
||||||
function assertCli() {
|
function assertCli() {
|
||||||
if (php_sapi_name() != 'cli' && !getenv('PHP_IS_CLI')) {
|
if (php_sapi_name() != 'cli' && !getenv('PHP_IS_CLI')) {
|
||||||
echo 'Script cannot be called from web-browser (if you are calling via cli,
|
echo 'Script cannot be called from web-browser (if you are calling via cli,
|
||||||
@@ -7,3 +9,135 @@ set environment variable PHP_IS_CLI to work around this).';
|
|||||||
exit;
|
exit;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Filesystem tools not provided by default; can recursively create, copy
|
||||||
|
* and delete folders. Some template methods are provided for extensibility.
|
||||||
|
* @note This class must be instantiated to be used, although it does
|
||||||
|
* not maintain state.
|
||||||
|
*/
|
||||||
|
class FSTools
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recursively creates a directory
|
||||||
|
* @param string $folder Name of folder to create
|
||||||
|
* @note Adapted from the PHP manual comment 76612
|
||||||
|
*/
|
||||||
|
function mkdir($folder) {
|
||||||
|
$folders = preg_split("#[\\\\/]#", $folder);
|
||||||
|
$base = '';
|
||||||
|
for($i = 0, $c = count($folders); $i < $c; $i++) {
|
||||||
|
if(empty($folders[$i])) {
|
||||||
|
if (!$i) {
|
||||||
|
// special case for root level
|
||||||
|
$base .= DIRECTORY_SEPARATOR;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$base .= $folders[$i];
|
||||||
|
if(!is_dir($base)){
|
||||||
|
mkdir($base);
|
||||||
|
}
|
||||||
|
$base .= DIRECTORY_SEPARATOR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copy a file, or recursively copy a folder and its contents; modified
|
||||||
|
* so that copied files, if PHP, have includes removed
|
||||||
|
*
|
||||||
|
* @author Aidan Lister <aidan@php.net>
|
||||||
|
* @version 1.0.1-modified
|
||||||
|
* @link http://aidanlister.com/repos/v/function.copyr.php
|
||||||
|
* @param string $source Source path
|
||||||
|
* @param string $dest Destination path
|
||||||
|
* @return bool Returns TRUE on success, FALSE on failure
|
||||||
|
*/
|
||||||
|
function copyr($source, $dest) {
|
||||||
|
// Simple copy for a file
|
||||||
|
if (is_file($source)) {
|
||||||
|
return $this->copy($source, $dest);
|
||||||
|
}
|
||||||
|
// Make destination directory
|
||||||
|
if (!is_dir($dest)) {
|
||||||
|
mkdir($dest);
|
||||||
|
}
|
||||||
|
// Loop through the folder
|
||||||
|
$dir = dir($source);
|
||||||
|
while (false !== $entry = $dir->read()) {
|
||||||
|
// Skip pointers
|
||||||
|
if ($entry == '.' || $entry == '..') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!$this->copyable($entry)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Deep copy directories
|
||||||
|
if ($dest !== "$source/$entry") {
|
||||||
|
$this->copyr("$source/$entry", "$dest/$entry");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Clean up
|
||||||
|
$dir->close();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stub for PHP's built-in copy function, can be used to overload
|
||||||
|
* functionality
|
||||||
|
*/
|
||||||
|
function copy($source, $dest) {
|
||||||
|
return copy($source, $dest);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Overloadable function that tests a filename for copyability. By
|
||||||
|
* default, everything should be copied; you can restrict things to
|
||||||
|
* ignore hidden files, unreadable files, etc.
|
||||||
|
*/
|
||||||
|
function copyable($file) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete a file, or a folder and its contents
|
||||||
|
*
|
||||||
|
* @author Aidan Lister <aidan@php.net>
|
||||||
|
* @version 1.0.3
|
||||||
|
* @link http://aidanlister.com/repos/v/function.rmdirr.php
|
||||||
|
* @param string $dirname Directory to delete
|
||||||
|
* @return bool Returns TRUE on success, FALSE on failure
|
||||||
|
*/
|
||||||
|
function rmdirr($dirname)
|
||||||
|
{
|
||||||
|
// Sanity check
|
||||||
|
if (!file_exists($dirname)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Simple delete for a file
|
||||||
|
if (is_file($dirname) || is_link($dirname)) {
|
||||||
|
return unlink($dirname);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Loop through the folder
|
||||||
|
$dir = dir($dirname);
|
||||||
|
while (false !== $entry = $dir->read()) {
|
||||||
|
// Skip pointers
|
||||||
|
if ($entry == '.' || $entry == '..') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Recurse
|
||||||
|
$this->rmdirr($dirname . DIRECTORY_SEPARATOR . $entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up
|
||||||
|
$dir->close();
|
||||||
|
return rmdir($dirname);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
107
maintenance/compat-function-file-put-contents.php
Normal file
107
maintenance/compat-function-file-put-contents.php
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
<?php
|
||||||
|
// $Id: file_put_contents.php,v 1.27 2007/04/17 10:09:56 arpad Exp $
|
||||||
|
|
||||||
|
|
||||||
|
if (!defined('FILE_USE_INCLUDE_PATH')) {
|
||||||
|
define('FILE_USE_INCLUDE_PATH', 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!defined('LOCK_EX')) {
|
||||||
|
define('LOCK_EX', 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!defined('FILE_APPEND')) {
|
||||||
|
define('FILE_APPEND', 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replace file_put_contents()
|
||||||
|
*
|
||||||
|
* @category PHP
|
||||||
|
* @package PHP_Compat
|
||||||
|
* @license LGPL - http://www.gnu.org/licenses/lgpl.html
|
||||||
|
* @copyright 2004-2007 Aidan Lister <aidan@php.net>, Arpad Ray <arpad@php.net>
|
||||||
|
* @link http://php.net/function.file_put_contents
|
||||||
|
* @author Aidan Lister <aidan@php.net>
|
||||||
|
* @version $Revision: 1.27 $
|
||||||
|
* @internal resource_context is not supported
|
||||||
|
* @since PHP 5
|
||||||
|
* @require PHP 4.0.0 (user_error)
|
||||||
|
*/
|
||||||
|
function php_compat_file_put_contents($filename, $content, $flags = null, $resource_context = null)
|
||||||
|
{
|
||||||
|
// If $content is an array, convert it to a string
|
||||||
|
if (is_array($content)) {
|
||||||
|
$content = implode('', $content);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we don't have a string, throw an error
|
||||||
|
if (!is_scalar($content)) {
|
||||||
|
user_error('file_put_contents() The 2nd parameter should be either a string or an array',
|
||||||
|
E_USER_WARNING);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the length of data to write
|
||||||
|
$length = strlen($content);
|
||||||
|
|
||||||
|
// Check what mode we are using
|
||||||
|
$mode = ($flags & FILE_APPEND) ?
|
||||||
|
'a' :
|
||||||
|
'wb';
|
||||||
|
|
||||||
|
// Check if we're using the include path
|
||||||
|
$use_inc_path = ($flags & FILE_USE_INCLUDE_PATH) ?
|
||||||
|
true :
|
||||||
|
false;
|
||||||
|
|
||||||
|
// Open the file for writing
|
||||||
|
if (($fh = @fopen($filename, $mode, $use_inc_path)) === false) {
|
||||||
|
user_error('file_put_contents() failed to open stream: Permission denied',
|
||||||
|
E_USER_WARNING);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attempt to get an exclusive lock
|
||||||
|
$use_lock = ($flags & LOCK_EX) ? true : false ;
|
||||||
|
if ($use_lock === true) {
|
||||||
|
if (!flock($fh, LOCK_EX)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write to the file
|
||||||
|
$bytes = 0;
|
||||||
|
if (($bytes = @fwrite($fh, $content)) === false) {
|
||||||
|
$errormsg = sprintf('file_put_contents() Failed to write %d bytes to %s',
|
||||||
|
$length,
|
||||||
|
$filename);
|
||||||
|
user_error($errormsg, E_USER_WARNING);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close the handle
|
||||||
|
@fclose($fh);
|
||||||
|
|
||||||
|
// Check all the data was written
|
||||||
|
if ($bytes != $length) {
|
||||||
|
$errormsg = sprintf('file_put_contents() Only %d of %d bytes written, possibly out of free disk space.',
|
||||||
|
$bytes,
|
||||||
|
$length);
|
||||||
|
user_error($errormsg, E_USER_WARNING);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return length
|
||||||
|
return $bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Define
|
||||||
|
if (!function_exists('file_put_contents')) {
|
||||||
|
function file_put_contents($filename, $content, $flags = null, $resource_context = null)
|
||||||
|
{
|
||||||
|
return php_compat_file_put_contents($filename, $content, $flags, $resource_context);
|
||||||
|
}
|
||||||
|
}
|
@@ -32,5 +32,5 @@ foreach ($names as $name) {
|
|||||||
$cache->flush($config);
|
$cache->flush($config);
|
||||||
}
|
}
|
||||||
|
|
||||||
echo 'Cache flushed successfully.';
|
echo "Cache flushed successfully.\n";
|
||||||
|
|
||||||
|
13
maintenance/generate-ph5p-patch.php
Normal file
13
maintenance/generate-ph5p-patch.php
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
$orig = realpath(dirname(__FILE__) . '/PH5P.php');
|
||||||
|
$new = realpath(dirname(__FILE__) . '/../library/HTMLPurifier/Lexer/PH5P.php');
|
||||||
|
$newt = dirname(__FILE__) . '/PH5P.new.php'; // temporary file
|
||||||
|
|
||||||
|
// minor text-processing of new file to get into same format as original
|
||||||
|
$new_src = file_get_contents($new);
|
||||||
|
$new_src = '<?php' . PHP_EOL . substr($new_src, strpos($new_src, 'class HTML5 {'));
|
||||||
|
|
||||||
|
file_put_contents($newt, $new_src);
|
||||||
|
shell_exec("diff -u \"$orig\" \"$newt\" > PH5P.patch");
|
||||||
|
unlink($newt);
|
@@ -6,20 +6,38 @@ assertCli();
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Compiles all of HTML Purifier's library files into one big file
|
* Compiles all of HTML Purifier's library files into one big file
|
||||||
* named HTMLPurifier.standalone.php. Operates recursively, and will
|
* named HTMLPurifier.standalone.php.
|
||||||
* barf if there are conditional includes.
|
|
||||||
*
|
|
||||||
* Details: also creates blank "include" files in the test/blank directory
|
|
||||||
* in order to simulate require_once's inside the test files.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Global array that tracks already loaded includes
|
* Global hash that tracks already loaded includes
|
||||||
*/
|
*/
|
||||||
$GLOBALS['loaded'] = array('HTMLPurifier.php' => true);
|
$GLOBALS['loaded'] = array('HTMLPurifier.php' => true);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param $text Text to replace includes from
|
* Custom FSTools for this script that overloads some behavior
|
||||||
|
* @warning The overloading of copy() is not necessarily global for
|
||||||
|
* this script. Watch out!
|
||||||
|
*/
|
||||||
|
class MergeLibraryFSTools extends FSTools
|
||||||
|
{
|
||||||
|
function copyable($entry) {
|
||||||
|
// Skip hidden files
|
||||||
|
if ($entry[0] == '.') {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
function copy($source, $dest) {
|
||||||
|
copy_and_remove_includes($source, $dest);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$FS = new MergeLibraryFSTools();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replaces the includes inside PHP source code with the corresponding
|
||||||
|
* source.
|
||||||
|
* @param string $text PHP source code to replace includes from
|
||||||
*/
|
*/
|
||||||
function replace_includes($text) {
|
function replace_includes($text) {
|
||||||
return preg_replace_callback(
|
return preg_replace_callback(
|
||||||
@@ -32,6 +50,8 @@ function replace_includes($text) {
|
|||||||
/**
|
/**
|
||||||
* Removes leading PHP tags from included files. Assumes that there is
|
* Removes leading PHP tags from included files. Assumes that there is
|
||||||
* no trailing tag.
|
* no trailing tag.
|
||||||
|
* @note This is safe for files that have internal <?php
|
||||||
|
* @param string $text Text to have leading PHP tag from
|
||||||
*/
|
*/
|
||||||
function remove_php_tags($text) {
|
function remove_php_tags($text) {
|
||||||
return substr($text, 5);
|
return substr($text, 5);
|
||||||
@@ -40,125 +60,48 @@ function remove_php_tags($text) {
|
|||||||
/**
|
/**
|
||||||
* Creates an appropriate blank file, recursively generating directories
|
* Creates an appropriate blank file, recursively generating directories
|
||||||
* if necessary
|
* if necessary
|
||||||
|
* @param string $file Filename to create blank for
|
||||||
*/
|
*/
|
||||||
function create_blank($file) {
|
function create_blank($file) {
|
||||||
|
global $FS;
|
||||||
$dir = dirname($file);
|
$dir = dirname($file);
|
||||||
$base = realpath('../tests/blanks/') . DIRECTORY_SEPARATOR ;
|
$base = realpath('../tests/blanks/') . DIRECTORY_SEPARATOR ;
|
||||||
if ($dir != '.') mkdir_deep($base . $dir);
|
if ($dir != '.') {
|
||||||
|
$FS->mkdir($base . $dir);
|
||||||
|
}
|
||||||
file_put_contents($base . $file, '');
|
file_put_contents($base . $file, '');
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Recursively creates a directory
|
* Copies the contents of a directory to the standalone directory
|
||||||
* @note Adapted from the PHP manual comment 76612
|
* @param string $dir Directory to copy
|
||||||
*/
|
*/
|
||||||
function mkdir_deep($folder) {
|
function make_dir_standalone($dir) {
|
||||||
$folders = preg_split("#[\\\\/]#", $folder);
|
global $FS;
|
||||||
$base = '';
|
return $FS->copyr($dir, 'standalone/' . $dir);
|
||||||
for($i = 0, $c = count($folders); $i < $c; $i++) {
|
|
||||||
if(empty($folders[$i])) {
|
|
||||||
if (!$i) {
|
|
||||||
// special case for root level
|
|
||||||
$base .= DIRECTORY_SEPARATOR;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
$base .= $folders[$i];
|
|
||||||
if(!is_dir($base)){
|
|
||||||
mkdir($base);
|
|
||||||
}
|
|
||||||
$base .= DIRECTORY_SEPARATOR;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copy a file, or recursively copy a folder and its contents
|
* Copies the contents of a file to the standalone directory
|
||||||
*
|
* @param string $file File to copy
|
||||||
* @author Aidan Lister <aidan@php.net>
|
|
||||||
* @version 1.0.1
|
|
||||||
* @link http://aidanlister.com/repos/v/function.copyr.php
|
|
||||||
* @param string $source Source path
|
|
||||||
* @param string $dest Destination path
|
|
||||||
* @return bool Returns TRUE on success, FALSE on failure
|
|
||||||
*/
|
*/
|
||||||
function copyr($source, $dest) {
|
function make_file_standalone($file) {
|
||||||
// Simple copy for a file
|
global $FS;
|
||||||
if (is_file($source)) {
|
$FS->mkdir('standalone/' . dirname($file));
|
||||||
return copy($source, $dest);
|
copy_and_remove_includes($file, 'standalone/' . $file);
|
||||||
}
|
|
||||||
// Make destination directory
|
|
||||||
if (!is_dir($dest)) {
|
|
||||||
mkdir($dest);
|
|
||||||
}
|
|
||||||
// Loop through the folder
|
|
||||||
$dir = dir($source);
|
|
||||||
while (false !== $entry = $dir->read()) {
|
|
||||||
// Skip pointers
|
|
||||||
if ($entry == '.' || $entry == '..') {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Skip hidden files
|
|
||||||
if ($entry[0] == '.') {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Deep copy directories
|
|
||||||
if ($dest !== "$source/$entry") {
|
|
||||||
copyr("$source/$entry", "$dest/$entry");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Clean up
|
|
||||||
$dir->close();
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Delete a file, or a folder and its contents
|
* Copies a file to another location recursively, if it is a PHP file
|
||||||
*
|
* remove includes
|
||||||
* @author Aidan Lister <aidan@php.net>
|
* @param string $file Original file
|
||||||
* @version 1.0.3
|
* @param string $sfile New location of file
|
||||||
* @link http://aidanlister.com/repos/v/function.rmdirr.php
|
|
||||||
* @param string $dirname Directory to delete
|
|
||||||
* @return bool Returns TRUE on success, FALSE on failure
|
|
||||||
*/
|
*/
|
||||||
function rmdirr($dirname)
|
function copy_and_remove_includes($file, $sfile) {
|
||||||
{
|
$contents = file_get_contents($file);
|
||||||
// Sanity check
|
if (strrchr($file, '.') === '.php') $contents = replace_includes($contents);
|
||||||
if (!file_exists($dirname)) {
|
return file_put_contents($sfile, $contents);
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Simple delete for a file
|
|
||||||
if (is_file($dirname) || is_link($dirname)) {
|
|
||||||
return unlink($dirname);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Loop through the folder
|
|
||||||
$dir = dir($dirname);
|
|
||||||
while (false !== $entry = $dir->read()) {
|
|
||||||
// Skip pointers
|
|
||||||
if ($entry == '.' || $entry == '..') {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Recurse
|
|
||||||
rmdirr($dirname . DIRECTORY_SEPARATOR . $entry);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clean up
|
|
||||||
$dir->close();
|
|
||||||
return rmdir($dirname);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Copies the contents of a directory to the standalone directory
|
|
||||||
*/
|
|
||||||
function make_dir_standalone($dir) {
|
|
||||||
return copyr($dir, 'standalone/' . $dir);
|
|
||||||
}
|
|
||||||
|
|
||||||
function make_file_standalone($file) {
|
|
||||||
mkdir_deep('standalone/' . dirname($file));
|
|
||||||
return copy($file, 'standalone/' . $file);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -167,8 +110,14 @@ function make_file_standalone($file) {
|
|||||||
*/
|
*/
|
||||||
function replace_includes_callback($matches) {
|
function replace_includes_callback($matches) {
|
||||||
$file = $matches[1];
|
$file = $matches[1];
|
||||||
// PHP 5 only file
|
$preserve = array(
|
||||||
if ($file == 'HTMLPurifier/Lexer/DOMLex.php') {
|
// PHP 5 only
|
||||||
|
'HTMLPurifier/Lexer/DOMLex.php' => 1,
|
||||||
|
'HTMLPurifier/Printer.php' => 1,
|
||||||
|
// PEAR (external)
|
||||||
|
'XML/HTMLSax3.php' => 1
|
||||||
|
);
|
||||||
|
if (isset($preserve[$file])) {
|
||||||
return $matches[0];
|
return $matches[0];
|
||||||
}
|
}
|
||||||
if (isset($GLOBALS['loaded'][$file])) return '';
|
if (isset($GLOBALS['loaded'][$file])) return '';
|
||||||
@@ -192,16 +141,22 @@ file_put_contents('HTMLPurifier.standalone.php', $contents);
|
|||||||
echo ' done!' . PHP_EOL;
|
echo ' done!' . PHP_EOL;
|
||||||
|
|
||||||
echo 'Creating standalone directory...';
|
echo 'Creating standalone directory...';
|
||||||
rmdirr('standalone'); // ensure a clean copy
|
$FS->rmdirr('standalone'); // ensure a clean copy
|
||||||
mkdir_deep('standalone/HTMLPurifier/DefinitionCache/Serializer');
|
|
||||||
make_dir_standalone('HTMLPurifier/EntityLookup');
|
|
||||||
make_dir_standalone('HTMLPurifier/Language');
|
|
||||||
make_file_standalone('HTMLPurifier/Printer/ConfigForm.js');
|
|
||||||
make_file_standalone('HTMLPurifier/Printer/ConfigForm.css');
|
|
||||||
make_dir_standalone('HTMLPurifier/URIScheme');
|
|
||||||
// PHP 5 only file
|
|
||||||
mkdir_deep('standalone/HTMLPurifier/Lexer');
|
|
||||||
make_file_standalone('HTMLPurifier/Lexer/DOMLex.php');
|
|
||||||
make_file_standalone('HTMLPurifier/TokenFactory.php');
|
|
||||||
echo ' done!' . PHP_EOL;
|
|
||||||
|
|
||||||
|
// data files
|
||||||
|
$FS->mkdir('standalone/HTMLPurifier/DefinitionCache/Serializer');
|
||||||
|
make_dir_standalone('HTMLPurifier/EntityLookup');
|
||||||
|
|
||||||
|
// non-standard inclusion setup
|
||||||
|
make_dir_standalone('HTMLPurifier/Language');
|
||||||
|
|
||||||
|
// optional components
|
||||||
|
make_file_standalone('HTMLPurifier/Printer.php');
|
||||||
|
make_dir_standalone('HTMLPurifier/Printer');
|
||||||
|
make_dir_standalone('HTMLPurifier/Filter');
|
||||||
|
make_file_standalone('HTMLPurifier/Lexer/PEARSax3.php');
|
||||||
|
|
||||||
|
// PHP 5 only files
|
||||||
|
make_file_standalone('HTMLPurifier/Lexer/DOMLex.php');
|
||||||
|
make_file_standalone('HTMLPurifier/Lexer/PH5P.php');
|
||||||
|
echo ' done!' . PHP_EOL;
|
||||||
|
@@ -10,11 +10,11 @@ $pkg->setOptions(
|
|||||||
array(
|
array(
|
||||||
'baseinstalldir' => '/',
|
'baseinstalldir' => '/',
|
||||||
'packagefile' => 'package2.xml',
|
'packagefile' => 'package2.xml',
|
||||||
'packagedirectory' => dirname(__FILE__) . '/library',
|
'packagedirectory' => realpath(dirname(__FILE__) . '/library'),
|
||||||
'filelistgenerator' => 'file',
|
'filelistgenerator' => 'file',
|
||||||
'include' => array('*'),
|
'include' => array('*'),
|
||||||
'dir_roles' => array('/' => 'php'), // hack to put .ser in the right place
|
'dir_roles' => array('/' => 'php'), // hack to put .ser in the right place
|
||||||
'ignore' => array('HTMLPurifier.auto.php'),
|
'ignore' => array('HTMLPurifier.auto.php', 'HTMLPurifier.standalone.php', 'standalone/'),
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@@ -71,7 +71,7 @@ readmeinstallchangelog = README, INSTALL, NEWS, WYSIWYG, SLOW, LICENSE, CREDITS
|
|||||||
;; legal values: directory paths separated by commas
|
;; legal values: directory paths separated by commas
|
||||||
;directory = /path1,/path2,.,..,subdirectory
|
;directory = /path1,/path2,.,..,subdirectory
|
||||||
;directory = /home/jeichorn/cvs/pear
|
;directory = /home/jeichorn/cvs/pear
|
||||||
directory = ./
|
directory = .
|
||||||
|
|
||||||
;; template base directory (the equivalent directory of <installdir>/phpDocumentor)
|
;; template base directory (the equivalent directory of <installdir>/phpDocumentor)
|
||||||
;templatebase = /path/to/my/templates
|
;templatebase = /path/to/my/templates
|
||||||
@@ -82,7 +82,7 @@ directory = ./
|
|||||||
;; comma-separated list of files, directories or wildcards ? and * (any wildcard) to ignore
|
;; comma-separated list of files, directories or wildcards ? and * (any wildcard) to ignore
|
||||||
;; legal values: any wildcard strings separated by commas
|
;; legal values: any wildcard strings separated by commas
|
||||||
;ignore = /path/to/ignore*,*list.php,myfile.php,subdirectory/
|
;ignore = /path/to/ignore*,*list.php,myfile.php,subdirectory/
|
||||||
ignore = pear-*,templates/,Documentation/,test*.php,Lexer.inc
|
ignore = *tests*,*benchmarks*,*docs*,*test-settings.php,*configdoc*,*maintenance*,*smoketests*,*standalone*,*.svn*,*conf*
|
||||||
|
|
||||||
sourcecode = on
|
sourcecode = on
|
||||||
|
|
||||||
|
@@ -261,12 +261,42 @@ function phorum_htmlpurifier_editor_after_subject() {
|
|||||||
// don't show this message if it's a WYSIWYG editor, since it will
|
// don't show this message if it's a WYSIWYG editor, since it will
|
||||||
// then be handled automatically
|
// then be handled automatically
|
||||||
if (!empty($GLOBALS['PHORUM']['mod_htmlpurifier']['wysiwyg'])) return;
|
if (!empty($GLOBALS['PHORUM']['mod_htmlpurifier']['wysiwyg'])) return;
|
||||||
?><tr><td colspan="2" style="padding:1em 0.3em;">
|
?><tr><td colspan="2" style="padding:1em 0.3em;" class="htmlpurifier-help">
|
||||||
HTML input is <strong>on</strong>. Make sure you escape all HTML and
|
<p>
|
||||||
angled-brackets with &lt; and &gt; (you can also use CDATA
|
<strong>HTML input</strong> is enabled. Make sure you escape all HTML and
|
||||||
tags, simply wrap the suspect text with
|
angled brackets with <code>&lt;</code> and <code>&gt;</code>.
|
||||||
<![CDATA[<em>text</em>]]>. Paragraphs will only be applied to
|
</p><?php
|
||||||
double-spaces; single-spaces will not generate <tt><br></tt> tags.
|
$purifier =& HTMLPurifier::getInstance();
|
||||||
|
$config = $purifier->config;
|
||||||
|
if ($config->get('AutoFormat', 'AutoParagraph')) {
|
||||||
|
?><p>
|
||||||
|
<strong>Auto-paragraphing</strong> is enabled. Double
|
||||||
|
newlines will be converted to paragraphs; for single
|
||||||
|
newlines, use the <code>pre</code> tag.
|
||||||
|
</p><?php
|
||||||
|
}
|
||||||
|
$html_definition = $config->getDefinition('HTML');
|
||||||
|
$allowed = array();
|
||||||
|
foreach ($html_definition->info as $name => $x) $allowed[] = "<code>$name</code>";
|
||||||
|
sort($allowed);
|
||||||
|
$allowed_text = implode(', ', $allowed);
|
||||||
|
?><p><strong>Allowed tags:</strong> <?php
|
||||||
|
echo $allowed_text;
|
||||||
|
?>.</p><?php
|
||||||
|
?>
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
For inputting literal code such as HTML and PHP for display, use
|
||||||
|
CDATA tags to auto-escape your angled brackets, and <code>pre</code>
|
||||||
|
to preserve newlines:
|
||||||
|
</p>
|
||||||
|
<pre><pre><![CDATA[
|
||||||
|
<em>Place code here</em>
|
||||||
|
]]></pre></pre>
|
||||||
|
<p>
|
||||||
|
Power users, you can hide this notice with:
|
||||||
|
<pre>.htmlpurifier-help {display:none;}</pre>
|
||||||
|
</p>
|
||||||
</td></tr><?php
|
</td></tr><?php
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -20,8 +20,10 @@ function phorum_htmlpurifier_migrate_sigs_check() {
|
|||||||
function phorum_htmlpurifier_migrate_sigs($offset) {
|
function phorum_htmlpurifier_migrate_sigs($offset) {
|
||||||
global $PHORUM;
|
global $PHORUM;
|
||||||
|
|
||||||
if(!$offset) return; // bail out quick of $offset == 0
|
if(!$offset) return; // bail out quick if $offset == 0
|
||||||
|
|
||||||
|
// theoretically, we could get rid of this multi-request
|
||||||
|
// doo-hickery if safe mode is off
|
||||||
@set_time_limit(0); // attempt to let this run
|
@set_time_limit(0); // attempt to let this run
|
||||||
$increment = $PHORUM['mod_htmlpurifier']['migrate-sigs-increment'];
|
$increment = $PHORUM['mod_htmlpurifier']['migrate-sigs-increment'];
|
||||||
|
|
||||||
@@ -52,21 +54,19 @@ function phorum_htmlpurifier_migrate_sigs($offset) {
|
|||||||
|
|
||||||
// query for highest ID in database
|
// query for highest ID in database
|
||||||
$type = $PHORUM['DBCONFIG']['type'];
|
$type = $PHORUM['DBCONFIG']['type'];
|
||||||
|
$sql = "select MAX(user_id) from {$PHORUM['user_table']}";
|
||||||
if ($type == 'mysql') {
|
if ($type == 'mysql') {
|
||||||
$conn = phorum_db_mysql_connect();
|
$conn = phorum_db_mysql_connect();
|
||||||
$sql = "select MAX(user_id) from {$PHORUM['user_table']}";
|
|
||||||
$res = mysql_query($sql, $conn);
|
$res = mysql_query($sql, $conn);
|
||||||
$row = mysql_fetch_row($res);
|
$row = mysql_fetch_row($res);
|
||||||
$top_id = (int) $row[0];
|
|
||||||
} elseif ($type == 'mysqli') {
|
} elseif ($type == 'mysqli') {
|
||||||
$conn = phorum_db_mysqli_connect();
|
$conn = phorum_db_mysqli_connect();
|
||||||
$sql = "select MAX(user_id) from {$PHORUM['user_table']}";
|
|
||||||
$res = mysqli_query($conn, $sql);
|
$res = mysqli_query($conn, $sql);
|
||||||
$row = mysqli_fetch_row($res);
|
$row = mysqli_fetch_row($res);
|
||||||
$top_id = (int) $row[0];
|
|
||||||
} else {
|
} else {
|
||||||
exit('Unrecognized database!');
|
exit('Unrecognized database!');
|
||||||
}
|
}
|
||||||
|
$top_id = (int) $row[0];
|
||||||
|
|
||||||
$offset += $increment;
|
$offset += $increment;
|
||||||
if ($offset > $top_id) { // test for end condition
|
if ($offset > $top_id) { // test for end condition
|
||||||
|
@@ -1,30 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
// Merges in changes from trunk to strict branch
|
|
||||||
// WORKING COPY MUST BE POINTED TO STRICT BRANCH
|
|
||||||
|
|
||||||
if (php_sapi_name() != 'cli') {
|
|
||||||
echo 'Release script cannot be called from web-browser.';
|
|
||||||
exit;
|
|
||||||
}
|
|
||||||
|
|
||||||
require 'svn.php';
|
|
||||||
|
|
||||||
$svn_info = svn_info('.');
|
|
||||||
|
|
||||||
$last_rev = (int) $svn_info['Last Changed Rev'];
|
|
||||||
$trunk_url = $svn_info['Repository Root'] . '/htmlpurifier/trunk';
|
|
||||||
echo "Last revision was $last_rev, merging from $last_rev to head.\n";
|
|
||||||
|
|
||||||
$merge_cmd = "svn merge -r $last_rev:HEAD $trunk_url .";
|
|
||||||
$out = explode("\n", shell_exec($merge_cmd));
|
|
||||||
|
|
||||||
echo "Conflicted files:\n";
|
|
||||||
foreach ($out as $line) {
|
|
||||||
if (empty($line)) continue;
|
|
||||||
if ($line{0} === 'C' || $line{1} === 'C') echo $line . "\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
$version = trim(file_get_contents('VERSION'));
|
|
||||||
echo "Resolve conflicts and then commit as 'Release $version, merged in $last_rev to HEAD.'";
|
|
||||||
|
|
20
release2-tag.php
Normal file
20
release2-tag.php
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
// Tags releases
|
||||||
|
|
||||||
|
if (php_sapi_name() != 'cli') {
|
||||||
|
echo 'Release script cannot be called from web-browser.';
|
||||||
|
exit;
|
||||||
|
}
|
||||||
|
|
||||||
|
require 'svn.php';
|
||||||
|
|
||||||
|
$svn_info = my_svn_info('.');
|
||||||
|
|
||||||
|
$version = trim(file_get_contents('VERSION'));
|
||||||
|
|
||||||
|
$trunk_url = $svn_info['Repository Root'] . '/htmlpurifier/branches/php4';
|
||||||
|
$trunk_tag_url = $svn_info['Repository Root'] . '/htmlpurifier/tags/' . $version;
|
||||||
|
|
||||||
|
echo "Tagging php4 branch to tags/$version...";
|
||||||
|
passthru("svn copy --message \"Tag $version release.\" $trunk_url $trunk_tag_url");
|
@@ -1,25 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
// Tags releases
|
|
||||||
|
|
||||||
if (php_sapi_name() != 'cli') {
|
|
||||||
echo 'Release script cannot be called from web-browser.';
|
|
||||||
exit;
|
|
||||||
}
|
|
||||||
|
|
||||||
require 'svn.php';
|
|
||||||
|
|
||||||
$svn_info = svn_info('.');
|
|
||||||
|
|
||||||
$version = trim(file_get_contents('VERSION'));
|
|
||||||
|
|
||||||
$trunk_url = $svn_info['Repository Root'] . '/htmlpurifier/trunk';
|
|
||||||
$strict_url = $svn_info['Repository Root'] . '/htmlpurifier/branches/strict';
|
|
||||||
$trunk_tag_url = $svn_info['Repository Root'] . '/htmlpurifier/tags/' . $version;
|
|
||||||
$strict_tag_url = $svn_info['Repository Root'] . '/htmlpurifier/tags/' . $version . '-strict';
|
|
||||||
|
|
||||||
echo "Tagging trunk to tags/$version...";
|
|
||||||
passthru("svn copy --message \"Tag $version release.\" $trunk_url $trunk_tag_url");
|
|
||||||
echo "Tagging strict to tags/$version-strict...";
|
|
||||||
passthru("svn copy --message \"Tag $version-strict release.\" $strict_url $strict_tag_url");
|
|
||||||
|
|
@@ -31,7 +31,7 @@ while (false !== ($filename = readdir($dh))) {
|
|||||||
if ($filename == 'all.php') continue;
|
if ($filename == 'all.php') continue;
|
||||||
if ($filename == 'testSchema.php') continue;
|
if ($filename == 'testSchema.php') continue;
|
||||||
?>
|
?>
|
||||||
<iframe src="<?php echo escapeHTML($filename); ?>"></iframe>
|
<iframe src="<?php echo escapeHTML($filename); if (isset($_GET['standalone'])) {echo '?standalone';} ?>"></iframe>
|
||||||
<?php
|
<?php
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -2,7 +2,11 @@
|
|||||||
|
|
||||||
header('Content-type: text/html; charset=UTF-8');
|
header('Content-type: text/html; charset=UTF-8');
|
||||||
|
|
||||||
require_once '../library/HTMLPurifier.auto.php';
|
if (!isset($_GET['standalone'])) {
|
||||||
|
require_once '../library/HTMLPurifier.auto.php';
|
||||||
|
} else {
|
||||||
|
require_once '../library/HTMLPurifier.standalone.php';
|
||||||
|
}
|
||||||
error_reporting(E_ALL);
|
error_reporting(E_ALL);
|
||||||
|
|
||||||
function escapeHTML($string) {
|
function escapeHTML($string) {
|
||||||
|
2
svn.php
2
svn.php
@@ -1,6 +1,6 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
function svn_info($dir) {
|
function my_svn_info($dir) {
|
||||||
$raw = explode("\n", shell_exec("svn info $dir"));
|
$raw = explode("\n", shell_exec("svn info $dir"));
|
||||||
$svn_info = array();
|
$svn_info = array();
|
||||||
foreach ($raw as $r) {
|
foreach ($raw as $r) {
|
||||||
|
@@ -54,14 +54,14 @@ function isInScopes($array = array()) {
|
|||||||
}
|
}
|
||||||
/**#@-*/
|
/**#@-*/
|
||||||
|
|
||||||
function printTokens($tokens, $index) {
|
function printTokens($tokens, $index = null) {
|
||||||
$string = '<pre>';
|
$string = '<pre>';
|
||||||
$generator = new HTMLPurifier_Generator();
|
$generator = new HTMLPurifier_Generator();
|
||||||
foreach ($tokens as $i => $token) {
|
foreach ($tokens as $i => $token) {
|
||||||
if ($index == $i) $string .= '[<strong>';
|
if ($index === $i) $string .= '[<strong>';
|
||||||
$string .= "<sup>$i</sup>";
|
$string .= "<sup>$i</sup>";
|
||||||
$string .= $generator->escape($generator->generateFromToken($token));
|
$string .= $generator->escape($generator->generateFromToken($token));
|
||||||
if ($index == $i) $string .= '</strong>]';
|
if ($index === $i) $string .= '</strong>]';
|
||||||
}
|
}
|
||||||
$string .= '</pre>';
|
$string .= '</pre>';
|
||||||
echo $string;
|
echo $string;
|
||||||
|
@@ -14,6 +14,10 @@ class HTMLPurifier_AttrDef_CSS_BackgroundTest extends HTMLPurifier_AttrDefHarnes
|
|||||||
$valid = '#333 url(chess.png) repeat fixed 50% top';
|
$valid = '#333 url(chess.png) repeat fixed 50% top';
|
||||||
$this->assertDef($valid);
|
$this->assertDef($valid);
|
||||||
$this->assertDef('url("chess.png") #333 50% top repeat fixed', $valid);
|
$this->assertDef('url("chess.png") #333 50% top repeat fixed', $valid);
|
||||||
|
$this->assertDef(
|
||||||
|
'rgb(34, 56, 33) url(chess.png) repeat fixed top',
|
||||||
|
'rgb(34,56,33) url(chess.png) repeat fixed top'
|
||||||
|
);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -14,6 +14,7 @@ class HTMLPurifier_AttrDef_CSS_BorderTest extends HTMLPurifier_AttrDefHarness
|
|||||||
$this->assertDef('thick solid');
|
$this->assertDef('thick solid');
|
||||||
$this->assertDef('solid red', 'solid #FF0000');
|
$this->assertDef('solid red', 'solid #FF0000');
|
||||||
$this->assertDef('1px solid #000');
|
$this->assertDef('1px solid #000');
|
||||||
|
$this->assertDef('1px solid rgb(0, 0, 0)', '1px solid rgb(0,0,0)');
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user