mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-03 12:47:56 +02:00
Compare commits
172 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
c768146e4d | ||
|
6e37ecd1c8 | ||
|
20eff0a3a0 | ||
|
d516e2f8de | ||
|
631021733b | ||
|
344e0640b6 | ||
|
62d2550e16 | ||
|
087145a71b | ||
|
a44187a5c1 | ||
|
c0ad68108a | ||
|
83a574491e | ||
|
3b537365a4 | ||
|
8a8b123d33 | ||
|
72db575446 | ||
|
d8bb73ce46 | ||
|
f90372f8ab | ||
|
f38fca32a9 | ||
|
5a23004652 | ||
|
6705140082 | ||
|
cb7162a995 | ||
|
2189a9430f | ||
|
7291f19347 | ||
|
9fcffd6533 | ||
|
31dce298ea | ||
|
8c9d461a62 | ||
|
7291a9647e | ||
|
17af0e4fc1 | ||
|
70028f83d6 | ||
|
5c5e3fe79f | ||
|
56a26cab14 | ||
|
1c7fedff5a | ||
|
9de0785448 | ||
|
974fe3f25e | ||
|
94468f3c24 | ||
|
e0354fecd9 | ||
|
1bbbc624dd | ||
|
49879d2cc6 | ||
|
5c9b5130c8 | ||
|
d2de8d976a | ||
|
4164b2eb2b | ||
|
1e5293d9fe | ||
|
6b643ede02 | ||
|
e41af46a8b | ||
|
3570c9985a | ||
|
8d572993b4 | ||
|
1bacbc0563 | ||
|
bfe2c10d07 | ||
|
9b10515fa4 | ||
|
1255d0f15d | ||
|
d45e11cc6b | ||
|
94c15d1f56 | ||
|
ce68cfe484 | ||
|
9f5f85952b | ||
|
dbb365155b | ||
|
32c0ffde0c | ||
|
856a5e5b89 | ||
|
820d6e9097 | ||
|
35b1fbce01 | ||
|
bcfbb8338c | ||
|
f51a6f7de9 | ||
|
f1439f0af5 | ||
|
0124605918 | ||
|
afb007d22f | ||
|
0dd9e4faf4 | ||
|
94ed3b1231 | ||
|
6a6c0ed5d7 | ||
|
e05b555448 | ||
|
ee9c70ab7f | ||
|
b4469f17aa | ||
|
e76f4b45d0 | ||
|
a32d5b52e1 | ||
|
a3d71fe606 | ||
|
77982bd61d | ||
|
78c4e62245 | ||
|
5803c06765 | ||
|
b63569ac22 | ||
|
f3d050c517 | ||
|
6dcc37cb55 | ||
|
cfc4ee1faf | ||
|
598c5b60c9 | ||
|
c9e7ffc172 | ||
|
feeffe6ed2 | ||
|
4754d407aa | ||
|
0b9db1f54b | ||
|
1d4a38d055 | ||
|
8c80349f9d | ||
|
d848c99b74 | ||
|
882ffed9ba | ||
|
86990a21f1 | ||
|
9573f0933d | ||
|
632bf2bbd4 | ||
|
ec86598446 | ||
|
b6c3f5e89b | ||
|
7c91104532 | ||
|
eac628f490 | ||
|
92913bc816 | ||
|
479d793562 | ||
|
e2c15f1c98 | ||
|
57ced3f361 | ||
|
c04a441b3e | ||
|
1bed8b6d5f | ||
|
33afd7d9e0 | ||
|
18e538317a | ||
|
96a4193fc9 | ||
|
00c66fa9cb | ||
|
d3abcb90e3 | ||
|
df3100b1b3 | ||
|
143e1ad718 | ||
|
875b0febde | ||
|
3166b8a10f | ||
|
1a70bffd5a | ||
|
f4c6e10ff7 | ||
|
c1cbd9e565 | ||
|
da94d3d6ac | ||
|
80793e925e | ||
|
8ef4fb22db | ||
|
70a7a3f5dd | ||
|
4d612d5a77 | ||
|
63a854ee5d | ||
|
0229458f8f | ||
|
baa477ac08 | ||
|
dc90e8e85b | ||
|
97125ed18b | ||
|
9a9036c689 | ||
|
aea7d02dfe | ||
|
b3ca1498c2 | ||
|
ac18672aba | ||
|
faf28682ad | ||
|
e2cd852bcf | ||
|
694583259c | ||
|
bde4de3c78 | ||
|
5b4e5c983e | ||
|
1ad8fd5ce9 | ||
|
6bdf161afd | ||
|
af45a6c191 | ||
|
2b72d0445f | ||
|
d7b3117678 | ||
|
53ff3e2744 | ||
|
6776efccdd | ||
|
ba9fd175d7 | ||
|
4d27906b02 | ||
|
8f573df3dc | ||
|
c7594487a2 | ||
|
733a5ce5c3 | ||
|
e8abd5953c | ||
|
1b8c8865b2 | ||
|
6e66dc9cad | ||
|
77b60a4206 | ||
|
5bf7ac4e9f | ||
|
a025203b18 | ||
|
809da84ae1 | ||
|
777781a95c | ||
|
4a87f732ca | ||
|
a2885181df | ||
|
84abae08f5 | ||
|
10e2d32a79 | ||
|
baf053b016 | ||
|
bf71c3f392 | ||
|
bfbe29d5a1 | ||
|
e194b8efc6 | ||
|
4214ac9d67 | ||
|
24f761d84a | ||
|
41c9226f3d | ||
|
e3c2063f69 | ||
|
398a02039e | ||
|
84e2e141fc | ||
|
47bbbad000 | ||
|
eaa906f8fc | ||
|
86ca784da3 | ||
|
b107eec452 | ||
|
fcbf724e6e | ||
|
92344cc83a |
6
.gitignore
vendored
6
.gitignore
vendored
@@ -1,5 +1,7 @@
|
||||
tags
|
||||
conf/
|
||||
test-settings.php
|
||||
config-schema.php
|
||||
library/HTMLPurifier/DefinitionCache/Serializer/*/
|
||||
library/standalone/
|
||||
library/HTMLPurifier.standalone.php
|
||||
@@ -16,3 +18,7 @@ docs/doxygen*
|
||||
*.phpt.php
|
||||
*.phpt.skip.php
|
||||
*.htmlt.ini
|
||||
*.patch
|
||||
/*.php
|
||||
vendor
|
||||
composer.lock
|
||||
|
2
Doxyfile
2
Doxyfile
@@ -31,7 +31,7 @@ PROJECT_NAME = HTMLPurifier
|
||||
# This could be handy for archiving the generated documentation or
|
||||
# if some version control system is used.
|
||||
|
||||
PROJECT_NUMBER = 3.3.0
|
||||
PROJECT_NUMBER = 4.5.0
|
||||
|
||||
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
|
||||
# base path where the generated documentation will be put.
|
||||
|
2
FOCUS
2
FOCUS
@@ -1,4 +1,4 @@
|
||||
7 - Major bugfixes
|
||||
4 - Minor feature enhancements
|
||||
|
||||
[ Appendix A: Release focus IDs ]
|
||||
0 - N/A
|
||||
|
37
INSTALL
37
INSTALL
@@ -18,6 +18,7 @@ with these contents.
|
||||
HTML Purifier is PHP 5 only, and is actively tested from PHP 5.0.5 and
|
||||
up. It has no core dependencies with other libraries. PHP
|
||||
4 support was deprecated on December 31, 2007 with HTML Purifier 3.0.0.
|
||||
HTML Purifier is not compatible with zend.ze1_compatibility_mode.
|
||||
|
||||
These optional extensions can enhance the capabilities of HTML Purifier:
|
||||
|
||||
@@ -25,6 +26,10 @@ These optional extensions can enhance the capabilities of HTML Purifier:
|
||||
* bcmath : Used for unit conversion and imagecrash protection
|
||||
* tidy : Used for pretty-printing HTML
|
||||
|
||||
These optional libraries can enhance the capabilities of HTML Purifier:
|
||||
|
||||
* CSSTidy : Clean CSS stylesheets using %Core.ExtractStyleBlocks
|
||||
* Net_IDNA2 (PEAR) : IRI support using %Core.EnableIDNA
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
2. Reconnaissance
|
||||
@@ -231,12 +236,12 @@ HTML Purifier uses iconv to support other character encodings, as such,
|
||||
any encoding that iconv supports <http://www.gnu.org/software/libiconv/>
|
||||
HTML Purifier supports with this code:
|
||||
|
||||
$config->set('Core', 'Encoding', /* put your encoding here */);
|
||||
$config->set('Core.Encoding', /* put your encoding here */);
|
||||
|
||||
An example usage for Latin-1 websites (the most common encoding for English
|
||||
websites):
|
||||
|
||||
$config->set('Core', 'Encoding', 'ISO-8859-1');
|
||||
$config->set('Core.Encoding', 'ISO-8859-1');
|
||||
|
||||
Note that HTML Purifier's support for non-Unicode encodings is crippled by the
|
||||
fact that any character not supported by that encoding will be silently
|
||||
@@ -251,7 +256,7 @@ reason, I do not include the solution in this document).
|
||||
For those of you using HTML 4.01 Transitional, you can disable
|
||||
XHTML output like this:
|
||||
|
||||
$config->set('HTML', 'Doctype', 'HTML 4.01 Transitional');
|
||||
$config->set('HTML.Doctype', 'HTML 4.01 Transitional');
|
||||
|
||||
Other supported doctypes include:
|
||||
|
||||
@@ -277,14 +282,14 @@ are, respectively, %HTML.Allowed, %URI.MakeAbsolute and %URI.Base, and
|
||||
%AutoFormat.AutoParagraph. The %Namespace.Directive naming convention
|
||||
translates to:
|
||||
|
||||
$config->set('Namespace', 'Directive', $value);
|
||||
$config->set('Namespace.Directive', $value);
|
||||
|
||||
E.g.
|
||||
|
||||
$config->set('HTML', 'Allowed', 'p,b,a[href],i');
|
||||
$config->set('URI', 'Base', 'http://www.example.com');
|
||||
$config->set('URI', 'MakeAbsolute', true);
|
||||
$config->set('AutoFormat', 'AutoParagraph', true);
|
||||
$config->set('HTML.Allowed', 'p,b,a[href],i');
|
||||
$config->set('URI.Base', 'http://www.example.com');
|
||||
$config->set('URI.MakeAbsolute', true);
|
||||
$config->set('AutoFormat.AutoParagraph', true);
|
||||
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
@@ -318,11 +323,11 @@ If you are unable or unwilling to give write permissions to the cache
|
||||
directory, you can either disable the cache (and suffer a performance
|
||||
hit):
|
||||
|
||||
$config->set('Core', 'DefinitionCache', null);
|
||||
$config->set('Core.DefinitionCache', null);
|
||||
|
||||
Or move the cache directory somewhere else (no trailing slash):
|
||||
|
||||
$config->set('Cache', 'SerializerPath', '/home/user/absolute/path');
|
||||
$config->set('Cache.SerializerPath', '/home/user/absolute/path');
|
||||
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
@@ -330,11 +335,6 @@ Or move the cache directory somewhere else (no trailing slash):
|
||||
|
||||
The interface is mind-numbingly simple:
|
||||
|
||||
$purifier = new HTMLPurifier();
|
||||
$clean_html = $purifier->purify( $dirty_html );
|
||||
|
||||
...or, if you're using the configuration object:
|
||||
|
||||
$purifier = new HTMLPurifier($config);
|
||||
$clean_html = $purifier->purify( $dirty_html );
|
||||
|
||||
@@ -353,7 +353,8 @@ If your website is in UTF-8 and XHTML Transitional, use this code:
|
||||
<?php
|
||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||
|
||||
$purifier = new HTMLPurifier();
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$purifier = new HTMLPurifier($config);
|
||||
$clean_html = $purifier->purify($dirty_html);
|
||||
?>
|
||||
|
||||
@@ -363,8 +364,8 @@ If your website is in a different encoding or doctype, use this code:
|
||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Core', 'Encoding', 'ISO-8859-1'); // replace with your encoding
|
||||
$config->set('HTML', 'Doctype', 'HTML 4.01 Transitional'); // replace with your doctype
|
||||
$config->set('Core.Encoding', 'ISO-8859-1'); // replace with your encoding
|
||||
$config->set('HTML.Doctype', 'HTML 4.01 Transitional'); // replace with your doctype
|
||||
$purifier = new HTMLPurifier($config);
|
||||
|
||||
$clean_html = $purifier->purify($dirty_html);
|
||||
|
219
NEWS
219
NEWS
@@ -9,6 +9,225 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
. Internal change
|
||||
==========================
|
||||
|
||||
4.6.0, unknown release date
|
||||
# URI parsing algorithm was made more strict, so only prefixes which
|
||||
looks like schemes will actually be schemes. Thanks
|
||||
Michael Gusev <mgusev@sugarcrm.com> for fixing.
|
||||
|
||||
4.5.0, released 2013-02-17
|
||||
# Fix bug where stacked attribute transforms clobber each other;
|
||||
this also means it's no longer possible to override attribute
|
||||
transforms in later modules. No internal code was using this
|
||||
but this may break some clients.
|
||||
# We now use SHA-1 to identify cached definitions, instead of MD5.
|
||||
! Support display:inline-block
|
||||
! Support for more white-space CSS values.
|
||||
! Permit underscores in font families
|
||||
! Support for page-break-* CSS3 properties when proprietary properties
|
||||
are enabled.
|
||||
! New directive %Core.DisableExcludes; can be set to 'true' to turn off
|
||||
SGML excludes checking. If HTML Purifier is removing too much text
|
||||
and you don't care about full standards compliance, try setting this to
|
||||
'true'.
|
||||
- Use prepend for SPL autoloading on PHP 5.3 and later.
|
||||
- Fix bug with nofollow transform when pre-existing rel exists.
|
||||
- Fix bug where background:url() always gets lower-cased
|
||||
(but not background-image:url())
|
||||
- Fix bug with non lower-case color names in HTML
|
||||
- Fix bug where data URI validation doesn't remove temporary files.
|
||||
Thanks Javier Marín Ros <javiermarinros@gmail.com> for reporting.
|
||||
- Don't remove certain empty tags on RemoveEmpty.
|
||||
|
||||
4.4.0, released 2012-01-18
|
||||
# Removed PEARSax3 handler.
|
||||
# URI.Munge now munges URIs inside the same host that go from https
|
||||
to http. Reported by Neike Taika-Tessaro.
|
||||
# Core.EscapeNonASCIICharacters now always transforms entities to
|
||||
entities, even if target encoding is UTF-8.
|
||||
# Tighten up selector validation in ExtractStyleBlocks.
|
||||
Non-syntactically valid selectors are now rejected, along with
|
||||
some of the more obscure ones such as attribute selectors, the
|
||||
:lang pseudoselector, and anything not in CSS2.1. Furthermore,
|
||||
ID and class selectors now work properly with the relevant
|
||||
configuration attributes. Also, mute errors when parsing CSS
|
||||
with CSS Tidy. Reported by Mario Heiderich and Norman Hippert.
|
||||
! Added support for 'scope' attribute on tables.
|
||||
! Added %HTML.TargetBlank, which adds target="blank" to all outgoing links.
|
||||
! Properly handle sub-lists directly nested inside of lists in
|
||||
a standards compliant way, by moving them into the preceding <li>
|
||||
! Added %HTML.AllowedComments and %HTML.AllowedCommentsRegexp for
|
||||
limited allowed comments in untrusted situations.
|
||||
! Implement iframes, and allow them to be used in untrusted mode with
|
||||
%HTML.SafeIframe and %URI.SafeIframeRegexp. Thanks Bradley M. Froehle
|
||||
<brad.froehle@gmail.com> for submitting an initial version of the patch.
|
||||
! The Forms module now works properly for transitional doctypes.
|
||||
! Added support for internationalized domain names. You need the PEAR
|
||||
Net_IDNA2 module to be in your path; if it is installed, ensure the
|
||||
class can be loaded and then set %Core.EnableIDNA to true.
|
||||
- Color keywords are now case insensitive. Thanks Yzmir Ramirez
|
||||
<yramirez-htmlpurifier@adicio.com> for reporting.
|
||||
- Explicitly initialize anonModule variable to null.
|
||||
- Do not duplicate nofollow if already present. Thanks 178
|
||||
for reporting.
|
||||
- Do not add nofollow if hostname matches our current host. Thanks 178
|
||||
for reporting, and Neike Taika-Tessaro for helping diagnose.
|
||||
- Do not unset parser variable; this fixes intermittent serialization
|
||||
problems. Thanks Neike Taika-Tessaro for reporting, bill
|
||||
<10010tiger@gmail.com> for diagnosing.
|
||||
- Fix iconv truncation bug, where non-UTF-8 target encodings see
|
||||
output truncated after around 8000 characters. Thanks Jörg Ludwig
|
||||
<joerg.ludwig@iserv.eu> for reporting.
|
||||
- Fix broken table content model for XHTML1.1 (and also earlier
|
||||
versions, although the W3C validator doesn't catch those violations).
|
||||
Thanks GlitchMr <glitch.mr@gmail.com> for reporting.
|
||||
|
||||
4.3.0, released 2011-03-27
|
||||
# Fixed broken caching of customized raw definitions, but requires an
|
||||
API change. The old API still works but will emit a warning,
|
||||
see http://htmlpurifier.org/docs/enduser-customize.html#optimized
|
||||
for how to upgrade your code.
|
||||
# Protect against Internet Explorer innerHTML behavior by specially
|
||||
treating attributes with backticks but no angled brackets, quotes or
|
||||
spaces. This constitutes a slight semantic change, which can be
|
||||
reverted using %Output.FixInnerHTML. Reported by Neike Taika-Tessaro
|
||||
and Mario Heiderich.
|
||||
# Protect against cssText/innerHTML by restricting allowed characters
|
||||
used in fonts further than mandated by the specification and encoding
|
||||
some extra special characters in URLs. Reported by Neike
|
||||
Taika-Tessaro and Mario Heiderich.
|
||||
! Added %HTML.Nofollow to add rel="nofollow" to external links.
|
||||
! More types of SPL autoloaders allowed on later versions of PHP.
|
||||
! Implementations for position, top, left, right, bottom, z-index
|
||||
when %CSS.Trusted is on.
|
||||
! Add %Cache.SerializerPermissions option for custom serializer
|
||||
directory/file permissions
|
||||
! Fix longstanding bug in Flash support for non-IE browsers, and
|
||||
allow more wmode attributes.
|
||||
! Add %CSS.AllowedFonts to restrict permissible font names.
|
||||
- Switch to an iterative traversal of the DOM, which prevents us
|
||||
from running out of stack space for deeply nested documents.
|
||||
Thanks Maxim Krizhanovsky for contributing a patch.
|
||||
- Make removal of conditional IE comments ungreedy; thanks Bernd
|
||||
for reporting.
|
||||
- Escape CDATA before removing Internet Explorer comments.
|
||||
- Fix removal of id attributes under certain conditions by ensuring
|
||||
armor attributes are preserved when recreating tags.
|
||||
- Check if schema.ser was corrupted.
|
||||
- Check if zend.ze1_compatibility_mode is on, and error out if it is.
|
||||
This safety check is only done for HTMLPurifier.auto.php; if you
|
||||
are using standalone or the specialized includes files, you're
|
||||
expected to know what you're doing.
|
||||
- Stop repeatedly writing the cache file after I'm done customizing a
|
||||
raw definition. Reported by ajh.
|
||||
- Switch to using require_once in the Bootstrap to work around bad
|
||||
interaction with Zend Debugger and APC. Reported by Antonio Parraga.
|
||||
- Fix URI handling when hostname is missing but scheme is present.
|
||||
Reported by Neike Taika-Tessaro.
|
||||
- Fix missing numeric entities on DirectLex; thanks Neike Taika-Tessaro
|
||||
for reporting.
|
||||
- Fix harmless notice from indexing into empty string. Thanks Matthijs
|
||||
Kooijman <matthijs@stdin.nl> for reporting.
|
||||
- Don't autoclose no parent elements are able to support the element
|
||||
that triggered the autoclose. In particular fixes strange behavior
|
||||
of stray <li> tags. Thanks pkuliga@gmail.com for reporting and
|
||||
Neike Taika-Tessaro <pinkgothic@gmail.com> for debugging assistance.
|
||||
|
||||
4.2.0, released 2010-09-15
|
||||
! Added %Core.RemoveProcessingInstructions, which lets you remove
|
||||
<? ... ?> statements.
|
||||
! Added %URI.DisableResources functionality; the directive originally
|
||||
did nothing. Thanks David Rothstein for reporting.
|
||||
! Add documentation about configuration directive types.
|
||||
! Add %CSS.ForbiddenProperties configuration directive.
|
||||
! Add %HTML.FlashAllowFullScreen to permit embedded Flash objects
|
||||
to utilize full-screen mode.
|
||||
! Add optional support for the <code>file</code> URI scheme, enable
|
||||
by explicitly setting %URI.AllowedSchemes.
|
||||
! Add %Core.NormalizeNewlines options to allow turning off newline
|
||||
normalization.
|
||||
- Fix improper handling of Internet Explorer conditional comments
|
||||
by parser. Thanks zmonteca for reporting.
|
||||
- Fix missing attributes bug when running on Mac Snow Leopard and APC.
|
||||
Thanks sidepodcast for the fix.
|
||||
- Warn if an element is allowed, but an attribute it requires is
|
||||
not allowed.
|
||||
|
||||
4.1.1, released 2010-05-31
|
||||
- Fix undefined index warnings in maintenance scripts.
|
||||
- Fix bug in DirectLex for parsing elements with a single attribute
|
||||
with entities.
|
||||
- Rewrite CSS output logic for font-family and url(). Thanks Mario
|
||||
Heiderich <mario.heiderich@googlemail.com> for reporting and Takeshi
|
||||
Terada <t-terada@violet.plala.or.jp> for suggesting the fix.
|
||||
- Emit an error for CollectErrors if a body is extracted
|
||||
- Fix bug where in background-position for center keyword handling.
|
||||
- Fix infinite loop when a wrapper element is inserted in a context
|
||||
where it's not allowed. Thanks Lars <lars@renoz.dk> for reporting.
|
||||
- Remove +x bit and shebang from index.php; only supported mode is to
|
||||
explicitly call it with php.
|
||||
- Make test script less chatty when log_errors is on.
|
||||
|
||||
4.1.0, released 2010-04-26
|
||||
! Support proprietary height attribute on table element
|
||||
! Support YouTube slideshows that contain /cp/ in their URL.
|
||||
! Support for data: URI scheme; not enabled by default, add it using
|
||||
%URI.AllowedSchemes
|
||||
! Support flashvars when using %HTML.SafeObject and %HTML.SafeEmbed.
|
||||
! Support for Internet Explorer compatibility with %HTML.SafeObject
|
||||
using %Output.FlashCompat.
|
||||
! Handle <ol><ol> properly, by inserting the necessary <li> tag.
|
||||
- Always quote the insides of url(...) in CSS.
|
||||
|
||||
4.0.0, released 2009-07-07
|
||||
# APIs for ConfigSchema subsystem have substantially changed. See
|
||||
docs/dev-config-bcbreaks.txt for details; in essence, anything that
|
||||
had both namespace and directive now have a single unified key.
|
||||
# Some configuration directives were renamed, specifically:
|
||||
%AutoFormatParam.PurifierLinkifyDocURL -> %AutoFormat.PurifierLinkify.DocURL
|
||||
%FilterParam.ExtractStyleBlocksEscaping -> %Filter.ExtractStyleBlocks.Escaping
|
||||
%FilterParam.ExtractStyleBlocksScope -> %Filter.ExtractStyleBlocks.Scope
|
||||
%FilterParam.ExtractStyleBlocksTidyImpl -> %Filter.ExtractStyleBlocks.TidyImpl
|
||||
As usual, the old directive names will still work, but will throw E_NOTICE
|
||||
errors.
|
||||
# The allowed values for class have been relaxed to allow all of CDATA for
|
||||
doctypes that are not XHTML 1.1 or XHTML 2.0. For old behavior, set
|
||||
%Attr.ClassUseCDATA to false.
|
||||
# Instead of appending the content model to an old content model, a blank
|
||||
element will replace the old content model. You can use #SUPER to get
|
||||
the old content model.
|
||||
! More robust support for name="" and id=""
|
||||
! HTMLPurifier_Config::inherit($config) allows you to inherit one
|
||||
configuration, and have changes to that configuration be propagated
|
||||
to all of its children.
|
||||
! Implement %HTML.Attr.Name.UseCDATA, which relaxes validation rules on
|
||||
the name attribute when set. Use with care. Thanks Ian Cook for
|
||||
sponsoring.
|
||||
! Implement %AutoFormat.RemoveEmpty.RemoveNbsp, which removes empty
|
||||
tags that contain non-breaking spaces as well other whitespace. You
|
||||
can also modify which tags should have maintained with
|
||||
%AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions.
|
||||
! Implement %Attr.AllowedClasses, which allows administrators to restrict
|
||||
classes users can use to a specified finite set of classes, and
|
||||
%Attr.ForbiddenClasses, which is the logical inverse.
|
||||
! You can now maintain your own configuration schema directories by
|
||||
creating a config-schema.php file or passing an extra argument. Check
|
||||
docs/dev-config-schema.html for more details.
|
||||
! Added HTMLPurifier_Config->serialize() method, which lets you save away
|
||||
your configuration in a compact serial file, which you can unserialize
|
||||
and use directly without having to go through the overhead of setup.
|
||||
- Fix bug where URIDefinition would not get cleared if it's directives got
|
||||
changed.
|
||||
- Fix fatal error in HTMLPurifier_Encoder on certain platforms (probably NetBSD 5.0)
|
||||
- Fix bug in Linkify autoformatter involving <a><span>http://foo</span></a>
|
||||
- Make %URI.Munge not apply to links that have the same host as your host.
|
||||
- Prevent stray </body> tag from truncating output, if a second </body>
|
||||
is present.
|
||||
. Created script maintenance/rename-config.php for renaming a configuration
|
||||
directive while maintaining its alias. This script does not change source code.
|
||||
. Implement namespace locking for definition construction, to prevent
|
||||
bugs where a directive is used for definition construction but is not
|
||||
used to construct the cache hash.
|
||||
|
||||
3.3.0, released 2009-02-16
|
||||
! Implement CSS property 'overflow' when %CSS.AllowTricky is true.
|
||||
! Implement generic property list classess
|
||||
|
107
TODO
107
TODO
@@ -11,54 +11,68 @@ If no interest is expressed for a feature that may require a considerable
|
||||
amount of effort to implement, it may get endlessly delayed. Do not be
|
||||
afraid to cast your vote for the next feature to be implemented!
|
||||
|
||||
- Investigate how early internal structures can be accessed; this would
|
||||
prevent structures from being parsed and serialized multiple times.
|
||||
- Built-in support for target="_blank" on all external links
|
||||
- Allow <a id="asdf" name="asdf">
|
||||
- Convert configuration to allow an arbitrary number of namespaces;
|
||||
then rename as appropriate.
|
||||
Things to do as soon as possible:
|
||||
|
||||
- http://htmlpurifier.org/phorum/read.php?3,5560,6307#msg-6307
|
||||
- Think about allowing explicit order of operations hooks for transforms
|
||||
- Fix "<.<" bug (trailing < is removed if not EOD)
|
||||
- Build in better internal state dumps and debugging tools for remote
|
||||
debugging
|
||||
- Allowed/Allowed* have strange interactions when both set
|
||||
? Transform lone embeds into object tags
|
||||
- Deprecated config options that emit warnings when you set them (with'
|
||||
a way of muting the warning if you really want to)
|
||||
- Make HTML.Trusted work with Output.FlashCompat
|
||||
- HTML.Trusted and HTML.SafeObject have funny interaction; general
|
||||
problem is what to do when a module "supersedes" another
|
||||
(see also tables and basic tables.) This is a little dicier
|
||||
because HTML.SafeObject has some extra functionality that
|
||||
trusted might find useful. See http://htmlpurifier.org/phorum/read.php?3,5762,6100
|
||||
|
||||
FUTURE VERSIONS
|
||||
---------------
|
||||
|
||||
4.1 release [It's All About Trust] (floating)
|
||||
4.6 release [OMG CONFIG PONIES]
|
||||
! Fix Printer. It's from the old days when we didn't have decent XML classes
|
||||
! Factor demo.php into a set of Printer classes, and then create a stub
|
||||
file for users here (inside the actual HTML Purifier library)
|
||||
- Fix error handling with form construction
|
||||
- Do encoding validation in Printers, or at least, where user data comes in
|
||||
- Config: Add examples to everything (make built-in which also automatically
|
||||
gives output)
|
||||
- Add "register" field to config schemas to eliminate dependence on
|
||||
naming conventions (try to remember why we ultimately decided on tihs)
|
||||
|
||||
5.0 release [HTML 5]
|
||||
# Swap out code to use html5lib tokenizer and tree-builder
|
||||
! Allow turning off of FixNesting and required attribute insertion
|
||||
|
||||
5.1 release [It's All About Trust] (floating)
|
||||
# Implement untrusted, dangerous elements/attributes
|
||||
# Implement IDREF support (harder than it seems, since you cannot have
|
||||
IDREFs to non-existent IDs)
|
||||
- Implement <area> (client and server side image maps are blocking
|
||||
on IDREF support)
|
||||
# Frameset XHTML 1.0 and HTML 4.01 doctypes
|
||||
- Implement <area>
|
||||
- Figure out how to simultaneously set %CSS.Trusted and %HTML.Trusted (?)
|
||||
|
||||
4.2 release [Error'ed]
|
||||
5.2 release [Error'ed]
|
||||
# Error logging for filtering/cleanup procedures
|
||||
- XSS-attempt detection--certain errors are flagged XSS-like
|
||||
|
||||
4.3 release [Do What I Mean, Not What I Say]
|
||||
# Additional support for poorly written HTML
|
||||
- Microsoft Word HTML cleaning (i.e. MsoNormal, but research essential!)
|
||||
- Friendly strict handling of <address> (block -> <br>)
|
||||
? Remove redundant tags, ex. <u><u>Underlined</u></u>. Implementation notes:
|
||||
1. Analyzing which tags to remove duplicants
|
||||
2. Ensure attributes are merged into the parent tag
|
||||
3. Extend the tag exclusion system to specify whether or not the
|
||||
contents should be dropped or not (currently, there's code that could do
|
||||
something like this if it didn't drop the inner text too.)
|
||||
- Remove <span> tags that don't do anything (no attributes)
|
||||
- XSS-attempt detection--certain errors are flagged XSS-like
|
||||
- Append something to duplicate IDs so they're still usable (impl. note: the
|
||||
dupe detector would also need to detect the suffix as well)
|
||||
- Externalize inline CSS to promote clean HTML, proposed by Sander Tekelenburg
|
||||
|
||||
5.0 release [Beyond HTML]
|
||||
6.0 release [Beyond HTML]
|
||||
# Legit token based CSS parsing (will require revamping almost every
|
||||
AttrDef class). Probably will use CSSTidy class?
|
||||
AttrDef class). Probably will use CSSTidy
|
||||
# More control over allowed CSS properties using a modularization
|
||||
# HTML 5 support
|
||||
# IRI support (this includes IDN)
|
||||
- Standardize token armor for all areas of processing
|
||||
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
|
||||
Also, enable disabling of directionality
|
||||
|
||||
6.0 release [To XML and Beyond]
|
||||
7.0 release [To XML and Beyond]
|
||||
- Extended HTML capabilities based on namespacing and tag transforms (COMPLEX)
|
||||
- Hooks for adding custom processors to custom namespaced tags and
|
||||
attributes, offer default implementation
|
||||
@@ -69,27 +83,14 @@ Ongoing
|
||||
- Refactor unit tests into lots of test methods
|
||||
- Plugins for major CMSes (COMPLEX)
|
||||
- phpBB
|
||||
- Drupal needs loving!
|
||||
- Phorum need loving!
|
||||
- more! (look for ones that use WYSIWYGs)
|
||||
- Also, maybe a FAQ for extension writers with HTML Purifier
|
||||
- Also, a FAQ for extension writers with HTML Purifier
|
||||
|
||||
AutoFormat
|
||||
- Smileys
|
||||
- Syntax highlighting (with GeSHi) with <pre> and possibly <?php
|
||||
- Look at http://drupal.org/project/Modules/category/63 for ideas
|
||||
|
||||
Optimizations
|
||||
- Reduce size of internal data-structures (esp. HTMLDefinition)
|
||||
- Research memory usage of objects versus arrays
|
||||
- Combine multiple strategies into a single, single-pass strategy
|
||||
- Get PH5P working with the latest versions of DOM, which have much more
|
||||
stringent error checking procedures. Maybe convert straight to tokens.
|
||||
- Get rid of set_include_path(). Save this for another major release.
|
||||
|
||||
Neat feature related
|
||||
! Factor demo.php into a set of Printer classes, and then create a stub
|
||||
file for users here (inside the actual HTML Purifier library)
|
||||
! Support exporting configuration, so users can easily tweak settings
|
||||
in the demo, and then copy-paste into their own setup
|
||||
- Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
|
||||
@@ -106,14 +107,32 @@ Neat feature related
|
||||
- Full set of color keywords. Also, a way to add onto them without
|
||||
finalizing the configuration object.
|
||||
- Write a var_export and memcached DefinitionCache - Denis
|
||||
- Allow restriction of allowed class values
|
||||
- Built-in support for target="_blank" on all external links
|
||||
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
|
||||
Also, enable disabling of directionality
|
||||
? Externalize inline CSS to promote clean HTML, proposed by Sander Tekelenburg
|
||||
? Remove redundant tags, ex. <u><u>Underlined</u></u>. Implementation notes:
|
||||
1. Analyzing which tags to remove duplicants
|
||||
2. Ensure attributes are merged into the parent tag
|
||||
3. Extend the tag exclusion system to specify whether or not the
|
||||
contents should be dropped or not (currently, there's code that could do
|
||||
something like this if it didn't drop the inner text too.)
|
||||
? Make AutoParagraph also support paragraph-izing double <br> tags, and not
|
||||
just double newlines. This is kind of tough to do in the current framework,
|
||||
though, and might be reasonably approximated by search replacing double <br>s
|
||||
with newlines before running it through HTML Purifier.
|
||||
|
||||
Maintenance related (slightly boring)
|
||||
# CHMOD install script for PEAR installs
|
||||
! Factor out command line parser into its own class, and unit test it
|
||||
! Nested configuration namespaces
|
||||
- Distinguish between default settings and explicitly set settings, so
|
||||
configurations can be merged
|
||||
- Reduce size of internal data-structures (esp. HTMLDefinition)
|
||||
- Allow merging configurations. Thus,
|
||||
a -> b -> default
|
||||
c -> d -> default
|
||||
becomes
|
||||
a -> b -> c -> d -> default
|
||||
Maybe allow more fine-grained tuning of this behavior. Alternatively,
|
||||
encourage people to use short plist depths before building them up.
|
||||
- Time PHPT tests
|
||||
|
||||
ChildDef related (very boring)
|
||||
|
12
WHATSNEW
12
WHATSNEW
@@ -1,6 +1,6 @@
|
||||
HTML Purifier 3.3.0 is fixes a number of obscure bugs reported and fixed
|
||||
over a four month period. It is probably the last release in the 3.x
|
||||
series. Notable new features include support for the overflow CSS
|
||||
property; notable bugfixes include fixed YouTube rendering in certain
|
||||
versions of Firefox, CSSDefinition Printer, improved early PHP support
|
||||
and bugs in iconv.
|
||||
HTML Purifier 4.5.0 is a minor bugfix and feature release, containing an
|
||||
accumulation of changes over a year. CSS support has been extended to
|
||||
support display:inline-block, white-space, underscores in font families,
|
||||
page-break-* CSS3 properties (when proprietary is enabled.) We now use
|
||||
SHA-1 to identify cached definitions, and the semantics of stacked
|
||||
attribute transforms has changed slightly.
|
||||
|
@@ -52,4 +52,5 @@
|
||||
</body>
|
||||
</html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -16,4 +16,5 @@ function qs(el) {if (window.RegExp && window.encodeURIComponent) {var ue=el.href
|
||||
// -->
|
||||
</script><table border=0 cellspacing=0 cellpadding=4><tr><td nowrap><font size=-1><b>Web</b> <a id=1a class=q href="/imghp?hl=en&tab=wi" onClick="return qs(this);">Images</a> <a id=2a class=q href="http://groups.google.com/grphp?hl=en&tab=wg" onClick="return qs(this);">Groups</a> <a id=4a class=q href="http://news.google.com/nwshp?hl=en&tab=wn" onClick="return qs(this);">News</a> <a id=5a class=q href="http://froogle.google.com/frghp?hl=en&tab=wf" onClick="return qs(this);">Froogle</a> <a id=8a class=q href="/lochp?hl=en&tab=wl" onClick="return qs(this);">Local</a> <b><a href="/intl/en/options/" class=q>more »</a></b></font></td></tr></table><table cellspacing=0 cellpadding=0><tr><td width=25%> </td><td align=center><input type=hidden name=hl value=en><input maxlength=2048 size=55 name=q value="" title="Google Search"><br><input type=submit value="Google Search" name=btnG><input type=submit value="I'm Feeling Lucky" name=btnI></td><td valign=top nowrap width=25%><font size=-2> <a href=/advanced_search?hl=en>Advanced Search</a><br> <a href=/preferences?hl=en>Preferences</a><br> <a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><br><font size=-1><a href="/ads/">Advertising Programs</a> - <a href=/services/>Business Solutions</a> - <a href=/about.html>About Google</a></font><p><font size=-2>©2006 Google</font></p></center></body></html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -127,4 +127,5 @@ if (objAdMgr.isSlotAvailable("leaderboard")) {
|
||||
|
||||
</html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -539,4 +539,5 @@ Retrieved from "<a href="http://en.wikipedia.org/wiki/Tai_Chi_Chuan">http://en.w
|
||||
<!-- Served by srv25 in 0.089 secs. -->
|
||||
</body></html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
22
composer.json
Normal file
22
composer.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"name": "ezyang/htmlpurifier",
|
||||
"description": "Standards compliant HTML filter written in PHP",
|
||||
"type": "library",
|
||||
"keywords": ["html"],
|
||||
"homepage": "http://htmlpurifier.org/",
|
||||
"license": "LGPL",
|
||||
"authors": [
|
||||
{
|
||||
"name": "Edward Z. Yang",
|
||||
"email": "admin@htmlpurifier.org",
|
||||
"homepage": "http://ezyang.com"
|
||||
}
|
||||
],
|
||||
"require": {
|
||||
"php": ">=5.2"
|
||||
},
|
||||
"autoload": {
|
||||
"psr-0": { "HTMLPurifier": "library/" },
|
||||
"files": ["library/HTMLPurifier.composer.php"]
|
||||
}
|
||||
}
|
@@ -18,22 +18,24 @@ TODO:
|
||||
if (version_compare(PHP_VERSION, '5.2', '<')) exit('PHP 5.2+ required.');
|
||||
error_reporting(E_ALL | E_STRICT);
|
||||
|
||||
chdir(dirname(__FILE__));
|
||||
|
||||
// load dual-libraries
|
||||
require_once '../extras/HTMLPurifierExtras.auto.php';
|
||||
require_once '../library/HTMLPurifier.auto.php';
|
||||
require_once dirname(__FILE__) . '/../extras/HTMLPurifierExtras.auto.php';
|
||||
require_once dirname(__FILE__) . '/../library/HTMLPurifier.auto.php';
|
||||
|
||||
// setup HTML Purifier singleton
|
||||
HTMLPurifier::getInstance(array(
|
||||
'AutoFormat.PurifierLinkify' => true
|
||||
));
|
||||
|
||||
$interchange = HTMLPurifier_ConfigSchema_InterchangeBuilder::buildFromDirectory();
|
||||
$builder = new HTMLPurifier_ConfigSchema_InterchangeBuilder();
|
||||
$interchange = new HTMLPurifier_ConfigSchema_Interchange();
|
||||
$builder->buildDir($interchange);
|
||||
$loader = dirname(__FILE__) . '/../config-schema.php';
|
||||
if (file_exists($loader)) include $loader;
|
||||
$interchange->validate();
|
||||
|
||||
$style = 'plain'; // use $_GET in the future, careful to validate!
|
||||
$configdoc_xml = 'configdoc.xml';
|
||||
$configdoc_xml = dirname(__FILE__) . '/configdoc.xml';
|
||||
|
||||
$xml_builder = new HTMLPurifier_ConfigSchema_Builder_Xml();
|
||||
$xml_builder->openURI($configdoc_xml);
|
||||
@@ -50,13 +52,13 @@ if (!$output) {
|
||||
}
|
||||
|
||||
// write out
|
||||
file_put_contents("$style.html", $output);
|
||||
file_put_contents(dirname(__FILE__) . "/$style.html", $output);
|
||||
|
||||
if (php_sapi_name() != 'cli') {
|
||||
// output (instant feedback if it's a browser)
|
||||
echo $output;
|
||||
} else {
|
||||
echo 'Files generated successfully.';
|
||||
echo "Files generated successfully.\n";
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
@@ -40,12 +40,26 @@
|
||||
</xsl:apply-templates>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="typesContainer">
|
||||
<h2>Types</h2>
|
||||
<xsl:apply-templates select="$typeLookup" mode="types" />
|
||||
</div>
|
||||
<xsl:apply-templates />
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="type" mode="types">
|
||||
<div class="type-block">
|
||||
<xsl:attribute name="id">type-<xsl:value-of select="@id" /></xsl:attribute>
|
||||
<h3><code><xsl:value-of select="@id" /></code>: <xsl:value-of select="@name" /></h3>
|
||||
<div class="type-description">
|
||||
<xsl:copy-of xmlns:xhtml="http://www.w3.org/1999/xhtml" select="xhtml:div/node()" />
|
||||
</div>
|
||||
</div>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="title" mode="toc" />
|
||||
<xsl:template match="namespace" mode="toc">
|
||||
<xsl:param name="overflowNumber" />
|
||||
@@ -192,10 +206,13 @@
|
||||
<td>
|
||||
<xsl:variable name="type" select="text()" />
|
||||
<xsl:attribute name="class">type type-<xsl:value-of select="$type" /></xsl:attribute>
|
||||
<xsl:value-of select="$typeLookup/type[@id=$type]/text()" />
|
||||
<xsl:if test="@allow-null='yes'">
|
||||
(or null)
|
||||
</xsl:if>
|
||||
<a>
|
||||
<xsl:attribute name="href">#type-<xsl:value-of select="$type" /></xsl:attribute>
|
||||
<xsl:value-of select="$typeLookup/type[@id=$type]/@name" />
|
||||
<xsl:if test="@allow-null='yes'">
|
||||
(or null)
|
||||
</xsl:if>
|
||||
</a>
|
||||
</td>
|
||||
</tr>
|
||||
</xsl:template>
|
||||
@@ -232,4 +249,5 @@
|
||||
|
||||
</xsl:stylesheet>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -1,16 +1,69 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<types>
|
||||
<type id="string">String</type>
|
||||
<type id="istring">Case-insensitive string</type>
|
||||
<type id="text">Text</type>
|
||||
<type id="itext">Case-insensitive text</type>
|
||||
<type id="int">Integer</type>
|
||||
<type id="float">Float</type>
|
||||
<type id="bool">Boolean</type>
|
||||
<type id="lookup">Lookup array</type>
|
||||
<type id="list">Array list</type>
|
||||
<type id="hash">Associative array</type>
|
||||
<type id="mixed">Mixed</type>
|
||||
<type id="string" name="String"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
A <a
|
||||
href="http://docs.php.net/manual/en/language.types.string.php">sequence
|
||||
of characters</a>.
|
||||
</div></type>
|
||||
<type id="istring" name="Case-insensitive string"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
A series of case-insensitive characters. Internally, upper-case
|
||||
ASCII characters will be converted to lower-case.
|
||||
</div></type>
|
||||
<type id="text" name="Text"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
A series of characters that may contain newlines. Text tends to
|
||||
indicate human-oriented text, as opposed to a machine format.
|
||||
</div></type>
|
||||
<type id="itext" name="Case-insensitive text"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
A series of case-insensitive characters that may contain newlines.
|
||||
</div></type>
|
||||
<type id="int" name="Integer"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
An <a
|
||||
href="http://docs.php.net/manual/en/language.types.integer.php">
|
||||
integer</a>. You are alternatively permitted to pass a string of
|
||||
digits instead, which will be cast to an integer using
|
||||
<code>(int)</code>.
|
||||
</div></type>
|
||||
<type id="float" name="Float"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
A <a href="http://docs.php.net/manual/en/language.types.float.php">
|
||||
floating point number</a>. You are alternatively permitted to
|
||||
pass a numeric string (as defined by <code>is_numeric()</code>),
|
||||
which will be cast to a float using <code>(float)</code>.
|
||||
</div></type>
|
||||
<type id="bool" name="Boolean"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
A <a
|
||||
href="http://docs.php.net/manual/en/language.types.boolean.php">boolean</a>.
|
||||
You are alternatively permitted to pass an integer <code>0</code> or
|
||||
<code>1</code> (other integers are not permitted) or a string
|
||||
<code>"on"</code>, <code>"true"</code> or <code>"1"</code> for
|
||||
<code>true</code>, and <code>"off"</code>, <code>"false"</code> or
|
||||
<code>"0"</code> for <code>false</code>.
|
||||
</div></type>
|
||||
<type id="lookup" name="Lookup array"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
An array whose values are <code>true</code>, e.g. <code>array('key'
|
||||
=> true, 'key2' => true)</code>. You are alternatively permitted
|
||||
to pass an array list of the keys <code>array('key', 'key2')</code>
|
||||
or a comma-separated string of keys <code>"key, key2"</code>. If
|
||||
you pass an array list of values, ensure that your values are
|
||||
strictly numerically indexed: <code>array('key1', 2 =>
|
||||
'key2')</code> will not do what you expect and emits a warning.
|
||||
</div></type>
|
||||
<type id="list" name="Array list"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
An array which has consecutive integer indexes, e.g.
|
||||
<code>array('val1', 'val2')</code>. You are alternatively permitted
|
||||
to pass a comma-separated string of keys <code>"val1, val2"</code>.
|
||||
If your array is not in this form, <code>array_values</code> is run
|
||||
on the array and a warning is emitted.
|
||||
</div></type>
|
||||
<type id="hash" name="Associative array"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
An array which is a mapping of keys to values, e.g.
|
||||
<code>array('key1' => 'val1', 'key2' => 'val2')</code>. You are
|
||||
alternatively permitted to pass a comma-separated string of
|
||||
key-colon-value strings, e.g. <code>"key1: val1, key2: val2"</code>.
|
||||
</div></type>
|
||||
<type id="mixed" name="Mixed"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
An arbitrary PHP value of any type.
|
||||
</div></type>
|
||||
</types>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -6,6 +6,7 @@
|
||||
</file>
|
||||
<file name="HTMLPurifier/Lexer.php">
|
||||
<line>81</line>
|
||||
<line>284</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Lexer/DirectLex.php">
|
||||
<line>53</line>
|
||||
@@ -13,7 +14,7 @@
|
||||
<line>348</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>47</line>
|
||||
<line>50</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="CSS.MaxImgLength">
|
||||
@@ -23,22 +24,32 @@
|
||||
</directive>
|
||||
<directive id="CSS.Proprietary">
|
||||
<file name="HTMLPurifier/CSSDefinition.php">
|
||||
<line>214</line>
|
||||
<line>215</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="CSS.AllowTricky">
|
||||
<file name="HTMLPurifier/CSSDefinition.php">
|
||||
<line>218</line>
|
||||
<line>219</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="CSS.Trusted">
|
||||
<file name="HTMLPurifier/CSSDefinition.php">
|
||||
<line>223</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="CSS.AllowImportant">
|
||||
<file name="HTMLPurifier/CSSDefinition.php">
|
||||
<line>222</line>
|
||||
<line>227</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="CSS.AllowedProperties">
|
||||
<file name="HTMLPurifier/CSSDefinition.php">
|
||||
<line>275</line>
|
||||
<line>302</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="CSS.ForbiddenProperties">
|
||||
<file name="HTMLPurifier/CSSDefinition.php">
|
||||
<line>316</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Cache.DefinitionImpl">
|
||||
@@ -68,39 +79,57 @@
|
||||
</directive>
|
||||
<directive id="Core.Encoding">
|
||||
<file name="HTMLPurifier/Encoder.php">
|
||||
<line>267</line>
|
||||
<line>300</line>
|
||||
<line>337</line>
|
||||
<line>372</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Test.ForceNoIconv">
|
||||
<file name="HTMLPurifier/Encoder.php">
|
||||
<line>272</line>
|
||||
<line>308</line>
|
||||
<line>341</line>
|
||||
<line>379</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.EscapeNonASCIICharacters">
|
||||
<file name="HTMLPurifier/Encoder.php">
|
||||
<line>304</line>
|
||||
<line>373</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Output.CommentScriptContents">
|
||||
<file name="HTMLPurifier/Generator.php">
|
||||
<line>45</line>
|
||||
<line>61</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Output.FixInnerHTML">
|
||||
<file name="HTMLPurifier/Generator.php">
|
||||
<line>62</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Output.SortAttr">
|
||||
<file name="HTMLPurifier/Generator.php">
|
||||
<line>46</line>
|
||||
<line>63</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Output.FlashCompat">
|
||||
<file name="HTMLPurifier/Generator.php">
|
||||
<line>64</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Output.TidyFormat">
|
||||
<file name="HTMLPurifier/Generator.php">
|
||||
<line>75</line>
|
||||
<line>93</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.NormalizeNewlines">
|
||||
<file name="HTMLPurifier/Generator.php">
|
||||
<line>107</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Lexer.php">
|
||||
<line>266</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Output.Newline">
|
||||
<file name="HTMLPurifier/Generator.php">
|
||||
<line>89</line>
|
||||
<line>108</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.BlockWrapper">
|
||||
@@ -130,20 +159,20 @@
|
||||
</directive>
|
||||
<directive id="HTML.ForbiddenElements">
|
||||
<file name="HTMLPurifier/HTMLDefinition.php">
|
||||
<line>337</line>
|
||||
<line>342</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.ForbiddenAttributes">
|
||||
<file name="HTMLPurifier/HTMLDefinition.php">
|
||||
<line>338</line>
|
||||
<line>343</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.Trusted">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>202</line>
|
||||
<line>204</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Lexer.php">
|
||||
<line>258</line>
|
||||
<line>271</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/HTMLModule/Image.php">
|
||||
<line>27</line>
|
||||
@@ -157,27 +186,45 @@
|
||||
</directive>
|
||||
<directive id="HTML.AllowedModules">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>209</line>
|
||||
<line>211</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.CoreModules">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>210</line>
|
||||
<line>212</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.Proprietary">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>221</line>
|
||||
<line>222</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.SafeObject">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>226</line>
|
||||
<line>225</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.SafeEmbed">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>229</line>
|
||||
<line>228</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.SafeScripting">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>231</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/HTMLModule/SafeScripting.php">
|
||||
<line>17</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.Nofollow">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>234</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.TargetBlank">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>237</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.IDBlacklist">
|
||||
@@ -205,32 +252,48 @@
|
||||
</directive>
|
||||
<directive id="Core.ConvertDocumentToFragment">
|
||||
<file name="HTMLPurifier/Lexer.php">
|
||||
<line>267</line>
|
||||
<line>282</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.RemoveProcessingInstructions">
|
||||
<file name="HTMLPurifier/Lexer.php">
|
||||
<line>303</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.">
|
||||
<file name="HTMLPurifier/URIDefinition.php">
|
||||
<line>60</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/URIFilter/Munge.php">
|
||||
<line>12</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.Host">
|
||||
<file name="HTMLPurifier/URIDefinition.php">
|
||||
<line>64</line>
|
||||
<line>70</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/URIScheme.php">
|
||||
<line>81</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.Base">
|
||||
<file name="HTMLPurifier/URIDefinition.php">
|
||||
<line>65</line>
|
||||
<line>71</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.DefaultScheme">
|
||||
<file name="HTMLPurifier/URIDefinition.php">
|
||||
<line>72</line>
|
||||
<line>78</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.AllowedSchemes">
|
||||
<file name="HTMLPurifier/URISchemeRegistry.php">
|
||||
<line>42</line>
|
||||
<line>41</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.OverrideAllowedSchemes">
|
||||
<file name="HTMLPurifier/URISchemeRegistry.php">
|
||||
<line>43</line>
|
||||
<line>42</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.Disable">
|
||||
@@ -246,6 +309,21 @@
|
||||
<line>12</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="CSS.AllowedFonts">
|
||||
<file name="HTMLPurifier/AttrDef/CSS/FontFamily.php">
|
||||
<line>50</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.AllowedClasses">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/Class.php">
|
||||
<line>18</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.ForbiddenClasses">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/Class.php">
|
||||
<line>19</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.AllowedFrameTargets">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/FrameTarget.php">
|
||||
<line>15</line>
|
||||
@@ -253,23 +331,33 @@
|
||||
</directive>
|
||||
<directive id="Attr.EnableID">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
|
||||
<line>20</line>
|
||||
<line>30</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.IDPrefix">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
|
||||
<line>26</line>
|
||||
<line>36</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.IDPrefixLocal">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
|
||||
<line>28</line>
|
||||
<line>31</line>
|
||||
<line>38</line>
|
||||
<line>41</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.IDBlacklistRegexp">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
|
||||
<line>54</line>
|
||||
<line>64</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/LinkTypes.php">
|
||||
<line>30</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.EnableIDNA">
|
||||
<file name="HTMLPurifier/AttrDef/URI/Host.php">
|
||||
<line>67</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.DefaultTextDir">
|
||||
@@ -297,7 +385,20 @@
|
||||
</directive>
|
||||
<directive id="Attr.DefaultInvalidImageAlt">
|
||||
<file name="HTMLPurifier/AttrTransform/ImgRequired.php">
|
||||
<line>32</line>
|
||||
<line>33</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.Attr.Name.UseCDATA">
|
||||
<file name="HTMLPurifier/AttrTransform/Name.php">
|
||||
<line>11</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/HTMLModule/Name.php">
|
||||
<line>13</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.FlashAllowFullScreen">
|
||||
<file name="HTMLPurifier/AttrTransform/SafeParam.php">
|
||||
<line>38</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.EscapeInvalidChildren">
|
||||
@@ -310,19 +411,33 @@
|
||||
<line>91</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="FilterParam.ExtractStyleBlocksTidyImpl">
|
||||
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
|
||||
<line>41</line>
|
||||
<directive id="Cache.SerializerPermissions">
|
||||
<file name="HTMLPurifier/DefinitionCache/Serializer.php">
|
||||
<line>107</line>
|
||||
<line>124</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="FilterParam.ExtractStyleBlocksScope">
|
||||
<directive id="Filter.ExtractStyleBlocks.TidyImpl">
|
||||
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
|
||||
<line>65</line>
|
||||
<line>55</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="FilterParam.ExtractStyleBlocksEscaping">
|
||||
<directive id="Filter.ExtractStyleBlocks.Scope">
|
||||
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
|
||||
<line>123</line>
|
||||
<line>79</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Filter.ExtractStyleBlocks.Escaping">
|
||||
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
|
||||
<line>277</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.SafeIframe">
|
||||
<file name="HTMLPurifier/HTMLModule/Iframe.php">
|
||||
<line>17</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/URIFilter/SafeIframe.php">
|
||||
<line>23</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.MaxImgLength">
|
||||
@@ -351,11 +466,21 @@
|
||||
<line>50</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="AutoFormatParam.PurifierLinkifyDocURL">
|
||||
<directive id="AutoFormat.PurifierLinkify.DocURL">
|
||||
<file name="HTMLPurifier/Injector/PurifierLinkify.php">
|
||||
<line>15</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="AutoFormat.RemoveEmpty.RemoveNbsp">
|
||||
<file name="HTMLPurifier/Injector/RemoveEmpty.php">
|
||||
<line>15</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions">
|
||||
<file name="HTMLPurifier/Injector/RemoveEmpty.php">
|
||||
<line>16</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.AggressivelyFixLt">
|
||||
<file name="HTMLPurifier/Lexer/DOMLex.php">
|
||||
<line>44</line>
|
||||
@@ -366,27 +491,42 @@
|
||||
<line>70</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.DisableExcludes">
|
||||
<file name="HTMLPurifier/Strategy/FixNesting.php">
|
||||
<line>57</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.EscapeInvalidTags">
|
||||
<file name="HTMLPurifier/Strategy/MakeWellFormed.php">
|
||||
<line>45</line>
|
||||
<line>53</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>19</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.RemoveScriptContents">
|
||||
<directive id="HTML.AllowedComments">
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>24</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.AllowedCommentsRegexp">
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>25</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.RemoveScriptContents">
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>28</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.HiddenElements">
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>26</line>
|
||||
<line>29</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.HostBlacklist">
|
||||
<file name="HTMLPurifier/URIFilter/HostBlacklist.php">
|
||||
<line>8</line>
|
||||
<line>12</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.MungeResources">
|
||||
@@ -399,4 +539,9 @@
|
||||
<line>15</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.SafeIframeRegexp">
|
||||
<file name="HTMLPurifier/URIFilter/SafeIframe.php">
|
||||
<line>18</line>
|
||||
</file>
|
||||
</directive>
|
||||
</usage>
|
||||
|
@@ -17,202 +17,10 @@
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>
|
||||
<strong>Warning:</strong> This document may be out-of-date. When in doubt,
|
||||
consult the source code documentation.
|
||||
Please see <a href="enduser-customize.html">Customize!</a>
|
||||
</p>
|
||||
|
||||
<p>HTML Purifier currently natively supports only a subset of HTML's
|
||||
allowed elements, attributes, and behavior; specifically, this subset
|
||||
is the set of elements that are safe for untrusted users to use.
|
||||
However, HTML Purifier is often utilized to ensure standards-compliance
|
||||
from input that is trusted (making it a sort of Tidy substitute),
|
||||
and often users need to define new elements or attributes. The
|
||||
advanced API is oriented specifically for these use-cases.</p>
|
||||
|
||||
<p>Our goals are to let the user:</p>
|
||||
|
||||
<dl>
|
||||
<dt>Select</dt>
|
||||
<dd><ul>
|
||||
<li>Doctype</li>
|
||||
<!-- <li>Filterset</li> -->
|
||||
<li>Elements / Attributes / Modules</li>
|
||||
<li>Tidy</li>
|
||||
</ul></dd>
|
||||
<dt>Customize</dt>
|
||||
<dd><ul>
|
||||
<li>Attributes</li>
|
||||
<li>Elements</li>
|
||||
<!--<li>Doctypes</li>-->
|
||||
</ul></dd>
|
||||
</dl>
|
||||
|
||||
<h2>Select</h2>
|
||||
|
||||
<p>For basic use, the user will have to specify some basic parameters. This
|
||||
is not strictly necessary, as HTML Purifier's default setting will always
|
||||
output safe code, but is required for standards-compliant output.</p>
|
||||
|
||||
<h3>Selecting a Doctype</h3>
|
||||
|
||||
<p>The first thing to select is the <strong>doctype</strong>. This
|
||||
is essential for standards-compliant output.</p>
|
||||
|
||||
<p class="technical">This identifier is based
|
||||
on the name the W3C has given to the document type and <em>not</em>
|
||||
the DTD identifier.</p>
|
||||
|
||||
<p>This parameter is set via the configuration object:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');</pre>
|
||||
|
||||
<p>Due to historical reasons, the default doctype is XHTML 1.0
|
||||
Transitional, however, we really shouldn't be guessing what the user's
|
||||
doctype is. Fortunantely, people who can't be bothered to set this won't
|
||||
be bothered when their pages stop validating.</p>
|
||||
|
||||
<h3>Selecting Elements / Attributes / Modules</h3>
|
||||
|
||||
<p>HTML Purifier will, by default, allow as many elements and attributes
|
||||
as possible. However, a user may decide to roll their own filterset by
|
||||
selecting modules, elements and attributes to allow for their own
|
||||
specific use-case. This can be done using %HTML.Allowed:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Allowed', 'a[href|title],em,p,blockquote');</pre>
|
||||
|
||||
<p class="technical">The directive %HTML.Allowed is a convenience feature
|
||||
that may be fully expressed with the legacy interface.</p>
|
||||
|
||||
<p>We currently support another interface from older versions:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'AllowedElements', 'a,em,p,blockquote');
|
||||
$config->set('HTML', 'AllowedAttributes', 'a.href,a.title');</pre>
|
||||
|
||||
<p>A user may also choose to allow modules using a specialized
|
||||
directive:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'AllowedModules', 'Hypertext,Text,Lists');</pre>
|
||||
|
||||
<p>But it is not expected that this feature will be widely used.</p>
|
||||
|
||||
<p class="technical">Module selection will work slightly differently
|
||||
from the other AllowedElements and AllowedAttributes directives by
|
||||
directly modifying the doctype you are operating in, in the spirit of
|
||||
XHTML 1.1's modularization. We stop users from shooting themselves in the
|
||||
foot by mandating the modules in %HTML.CoreModules be used.</p>
|
||||
|
||||
<p class="technical">Modules are distinguished from regular elements by the
|
||||
case of their first letter. While XML distinguishes between and allows
|
||||
lower and uppercase letters in element names, XHTML uses only lower-case
|
||||
element names for sake of consistency.</p>
|
||||
|
||||
<h3>Selecting Tidy</h3>
|
||||
|
||||
<p>The name of this segment of functionality is inspired off of Dave
|
||||
Ragget's program HTML Tidy, which purported to help clean up HTML. In
|
||||
HTML Purifier, Tidy functionality involves turning unsupported and
|
||||
deprecated elements into standards-compliant ones, maintaining
|
||||
backwards compatibility, and enforcing best practices.</p>
|
||||
|
||||
<p>This is a complicated feature, and is explained more in depth at
|
||||
<a href="enduser-tidy.html">the Tidy documentation page</a>.</p>
|
||||
|
||||
<!--
|
||||
<h3>Unified selector</h3>
|
||||
|
||||
<p>Because selecting each and every one of these configuration options
|
||||
is a chore, we may wish to offer a specialized configuration method
|
||||
for selecting a filterset. Possibility:</p>
|
||||
|
||||
<pre>function selectFilter($doctype, $filterset, $tidy)</pre>
|
||||
|
||||
<p>...which is simply a light wrapper over the individual configuration
|
||||
calls. A custom config file format or text format could also be adopted.</p>
|
||||
-->
|
||||
|
||||
<h2>Customize</h2>
|
||||
|
||||
<p>By reviewing topic posts in the support forum, we determined that
|
||||
there were two primarily demanded customization features people wanted:
|
||||
to add an attribute to an existing element, and to add an element.
|
||||
Thus, we'll want to create convenience functions for these common
|
||||
use-cases.</p>
|
||||
|
||||
<p>Note that the functions described here are only available if
|
||||
a raw copy of <code>HTMLPurifier_HTMLDefinition</code> was retrieved.
|
||||
Furthermore, caching may prevent your changes from immediately
|
||||
being seen: consult <a href="enduser-customize.html">enduser-customize.html</a> on how
|
||||
to work around this.</p>
|
||||
|
||||
<h3>Attributes</h3>
|
||||
|
||||
<p>An attribute is bound to an element by a name and has a specific
|
||||
<code>AttrDef</code> that validates it. The interface is therefore:</p>
|
||||
|
||||
<pre>function addAttribute($element, $attribute, $attribute_def);</pre>
|
||||
|
||||
<p>Example of the functionality in action:</p>
|
||||
|
||||
<pre>$def->addAttribute('a', 'rel', 'Enum#nofollow');</pre>
|
||||
|
||||
<p>The <code>$attribute_def</code> value is flexible,
|
||||
to make things simpler. It can be a literal object or:</p>
|
||||
|
||||
<ul>
|
||||
<!--<li>Class name: We'll instantiate it for you</li>
|
||||
<li>Function name: We'll create an <code>HTMLPurifier_AttrDef_Anonymous</code>
|
||||
class with that function registered as a callback.</li>-->
|
||||
<li>String attribute type: We'll use <code>HTMLPurifier_AttrTypes</code>
|
||||
to resolve it for you. Any data that follows a hash mark (#) will
|
||||
be used to customize the attribute type: in the example above,
|
||||
we specify which values for Enum to allow.</li>
|
||||
</ul>
|
||||
|
||||
<h3>Elements</h3>
|
||||
|
||||
<p>An element requires certain information as specified by
|
||||
<code>HTMLPurifier_ElementDef</code>. However, not all of it is necessary,
|
||||
the usual things required are:</p>
|
||||
|
||||
<ul>
|
||||
<li>Attributes</li>
|
||||
<li>Content model/type</li>
|
||||
<li>Registration in a content set</li>
|
||||
</ul>
|
||||
|
||||
<p>This suggests an API like this:</p>
|
||||
|
||||
<pre>function addElement($element, $type, $contents,
|
||||
$attr_collections = array(); $attributes = array());</pre>
|
||||
|
||||
<p>Each parameter explained in depth:</p>
|
||||
|
||||
<dl>
|
||||
<dt><code>$element</code></dt>
|
||||
<dd>Element name, ex. 'label'</dd>
|
||||
<dt><code>$type</code></dt>
|
||||
<dd>Content set to register in, ex. 'Inline' or 'Flow'</dd>
|
||||
<dt><code>$contents</code></dt>
|
||||
<dd>Description of allowed children. This is a merged form of
|
||||
<code>HTMLPurifier_ElementDef</code>'s member variables
|
||||
<code>$content_model</code> and <code>$content_model_type</code>,
|
||||
where the form is <q>Type: Model</q>, ex. 'Optional: Inline'.
|
||||
There are also a number of predefined templates one may use.</dd>
|
||||
<dt><code>$attr_collections</code></dt>
|
||||
<dd>Array (or string if only one) of attribute collection(s) to
|
||||
merge into the attributes array.</dd>
|
||||
<dt><code>$attributes</code></dt>
|
||||
<dd>Array of attribute names to attribute definitions, much like
|
||||
the above-described attribute customization.</dd>
|
||||
</dl>
|
||||
|
||||
<p>A possible usage:</p>
|
||||
|
||||
<pre>$def->addElement('font', 'Inline', 'Optional: Inline', 'Common',
|
||||
array('color' => 'Color'));</pre>
|
||||
|
||||
<p>See <code>HTMLPurifier/HTMLModule.php</code> for details.</p>
|
||||
|
||||
</body></html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
79
docs/dev-config-bcbreaks.txt
Normal file
79
docs/dev-config-bcbreaks.txt
Normal file
@@ -0,0 +1,79 @@
|
||||
|
||||
Configuration Backwards-Compatibility Breaks
|
||||
|
||||
In version 4.0.0, the configuration subsystem (composed of the outwards
|
||||
facing Config class, as well as the ConfigSchema and ConfigSchema_Interchange
|
||||
subsystems), was significantly revamped to make use of property lists.
|
||||
While most of the changes are internal, some internal APIs were changed for the
|
||||
sake of clarity. HTMLPurifier_Config was kept completely backwards compatible,
|
||||
although some of the functions were retrofitted with an unambiguous alternate
|
||||
syntax. Both of these changes are discussed in this document.
|
||||
|
||||
|
||||
|
||||
1. Outwards Facing Changes
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
The HTMLPurifier_Config class now takes an alternate syntax. The general rule
|
||||
is:
|
||||
|
||||
If you passed $namespace, $directive, pass "$namespace.$directive"
|
||||
instead.
|
||||
|
||||
An example:
|
||||
|
||||
$config->set('HTML', 'Allowed', 'p');
|
||||
|
||||
becomes:
|
||||
|
||||
$config->set('HTML.Allowed', 'p');
|
||||
|
||||
New configuration options may have more than one namespace, they might
|
||||
look something like %Filter.YouTube.Blacklist. While you could technically
|
||||
set it with ('HTML', 'YouTube.Blacklist'), the logical extension
|
||||
('HTML', 'YouTube', 'Blacklist') does not work.
|
||||
|
||||
The old API will still work, but will emit E_USER_NOTICEs.
|
||||
|
||||
|
||||
|
||||
2. Internal API Changes
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Some overarching notes: we've completely eliminated the notion of namespace;
|
||||
it's now an informal construct for organizing related configuration directives.
|
||||
|
||||
Also, the validation routines for keys (formerly "$namespace.$directive")
|
||||
have been completely relaxed. I don't think it really should be necessary.
|
||||
|
||||
2.1 HTMLPurifier_ConfigSchema
|
||||
|
||||
First off, if you're interfacing with this class, you really shouldn't.
|
||||
HTMLPurifier_ConfigSchema_Builder_ConfigSchema is really the only class that
|
||||
should ever be creating HTMLPurifier_ConfigSchema, and HTMLPurifier_Config the
|
||||
only class that should be reading it.
|
||||
|
||||
All namespace related methods were removed; they are completely unnecessary
|
||||
now. Any $namespace, $name arguments must be replaced with $key (where
|
||||
$key == "$namespace.$name"), including for addAlias().
|
||||
|
||||
The $info and $defaults member variables are no longer indexed as
|
||||
[$namespace][$name]; they are now indexed as ["$namespace.$name"].
|
||||
|
||||
All deprecated methods were finally removed, after having yelled at you as
|
||||
an E_USER_NOTICE for a while now.
|
||||
|
||||
2.2 HTMLPurifier_ConfigSchema_Interchange
|
||||
|
||||
Member variable $namespaces was removed.
|
||||
|
||||
2.3 HTMLPurifier_ConfigSchema_Interchange_Id
|
||||
|
||||
Member variable $namespace and $directive removed; member variable $key added.
|
||||
Any method that took $namespace, $directive now takes $key.
|
||||
|
||||
2.4 HTMLPurifier_ConfigSchema_Interchange_Namespace
|
||||
|
||||
Removed.
|
||||
|
||||
vim: et sw=4 sts=4
|
164
docs/dev-config-naming.txt
Normal file
164
docs/dev-config-naming.txt
Normal file
@@ -0,0 +1,164 @@
|
||||
Configuration naming
|
||||
|
||||
HTML Purifier 4.0.0 features a new configuration naming system that
|
||||
allows arbitrary nesting of namespaces. While there are certain cases
|
||||
in which using two namespaces is obviously better (the canonical example
|
||||
is where we were using AutoFormatParam to contain directives for AutoFormat
|
||||
parameters), it is unclear whether or not a general migration to highly
|
||||
namespaced directives is a good idea or not.
|
||||
|
||||
== Case studies ==
|
||||
|
||||
=== Attr.* ===
|
||||
|
||||
We have a dead duck HTML.Attr.Name.UseCDATA which migrated before we decided
|
||||
to think this out thoroughly.
|
||||
|
||||
We currently have a large number of directives in the Attr.* namespace.
|
||||
These directives tweak the behavior of some HTML attributes. They have
|
||||
the properties:
|
||||
|
||||
* While they apply to only one attribute at a time, the attribute can
|
||||
span over multiple elements (not necessarily all attributes, either).
|
||||
The information of which elements it impacts is either omitted or
|
||||
informally stated (EnableID applies to all elements, DefaultImageAlt
|
||||
applies to <img> tags, AllowedRev doesn't say but only applies to a tags).
|
||||
|
||||
* There is a certain degree of clustering that could be applied, especially
|
||||
to the ID directives. The clustering could be done with respect to
|
||||
what element/attribute was used, i.e.
|
||||
|
||||
*.id -> EnableID, IDBlacklistRegexp, IDBlacklist, IDPrefixLocal, IDPrefix
|
||||
img.src -> DefaultInvalidImage
|
||||
img.alt -> DefaultImageAlt, DefaultInvalidImageAlt
|
||||
bdo.dir -> DefaultTextDir
|
||||
a.rel -> AllowedRel
|
||||
a.rev -> AllowedRev
|
||||
a.target -> AllowedFrameTargets
|
||||
a.name -> Name.UseCDATA
|
||||
|
||||
* The directives often reference generic attribute types that were specified
|
||||
in the DTD/specification. However, some of the behavior specifically relies
|
||||
on the fact that other use cases of the attribute are not, at current,
|
||||
supported by HTML Purifier.
|
||||
|
||||
AllowedRel, AllowedRev -> heavily <a> specific; if <link> ends up being
|
||||
allowed, we will also have to give users specificity there (we also
|
||||
want to preserve generality) DTD %Linktypes, HTML5 distinguishes
|
||||
between <link> and <a>/<area>
|
||||
AllowedFrameTargets -> heavily <a> specific, but also used by <area>
|
||||
and <form>. Transitional DTD %FrameTarget, not present in strict,
|
||||
HTML5 calls them "browsing contexts"
|
||||
Default*Image* -> as a default parameter, is almost entirely exlcusive
|
||||
to <img>
|
||||
EnableID -> global attribute
|
||||
Name.UseCDATA -> heavily <a> specific, but has heavy other usage by
|
||||
many things
|
||||
|
||||
== AutoFormat.* ==
|
||||
|
||||
These have the fairly normal pluggable architecture that lends itself to
|
||||
large amounts of namespaces (pluggability may be the key to figuring
|
||||
out when gratuitous namespacing is good.) Properties:
|
||||
|
||||
* Boolean directives are fair game for being namespaced: for example,
|
||||
RemoveEmpty.RemoveNbsp triggers RemoveEmpty.RemoveNbsp.Exceptions,
|
||||
the latter of which only makes sense when RemoveEmpty.RemoveNbsp
|
||||
is set to true. (The same applies to RemoveNbsp too)
|
||||
|
||||
The AutoFormat string is a bit long, but is the only bit of repeated
|
||||
context.
|
||||
|
||||
== Core.* ==
|
||||
|
||||
Core is the potpourri of directives, mostly regarding some minor behavioral
|
||||
tweaks for HTML handling abilities.
|
||||
|
||||
AggressivelyFixLt
|
||||
ConvertDocumentToFragment
|
||||
DirectLexLineNumberSyncInterval
|
||||
LexerImpl
|
||||
MaintainLineNumbers
|
||||
Lexer
|
||||
CollectErrors
|
||||
Language
|
||||
Error handling (Language is ostensibly a little more general, but
|
||||
it's only used for error handling right now)
|
||||
ColorKeywords
|
||||
CSS and HTML
|
||||
Encoding
|
||||
EscapeNonASCIICharacters
|
||||
Character encoding
|
||||
EscapeInvalidChildren
|
||||
EscapeInvalidTags
|
||||
HiddenElements
|
||||
RemoveInvalidImg
|
||||
Lexing/Output
|
||||
RemoveScriptContents
|
||||
Deprecated
|
||||
|
||||
== HTML.* ==
|
||||
|
||||
AllowedAttributes
|
||||
AllowedElements
|
||||
AllowedModules
|
||||
Allowed
|
||||
ForbiddenAttributes
|
||||
ForbiddenElements
|
||||
Element set tuning
|
||||
BlockWrapper
|
||||
Child def advanced twiddle
|
||||
CoreModules
|
||||
CustomDoctype
|
||||
Advanced HTMLModuleManager twiddles
|
||||
DefinitionID
|
||||
DefinitionRev
|
||||
Caching
|
||||
Doctype
|
||||
Parent
|
||||
Strict
|
||||
XHTML
|
||||
Global environment
|
||||
MaxImgLength
|
||||
Attribute twiddle? (applies to two attributes)
|
||||
Proprietary
|
||||
SafeEmbed
|
||||
SafeObject
|
||||
Trusted
|
||||
Extra functionality/tagsets
|
||||
TidyAdd
|
||||
TidyLevel
|
||||
TidyRemove
|
||||
Tidy
|
||||
|
||||
== Output.* ==
|
||||
|
||||
These directly affect the output of Generator. These are all advanced
|
||||
twiddles.
|
||||
|
||||
== URI.* ==
|
||||
|
||||
AllowedSchemes
|
||||
OverrideAllowedSchemes
|
||||
Scheme tuning
|
||||
Base
|
||||
DefaultScheme
|
||||
Host
|
||||
Global environment
|
||||
DefinitionID
|
||||
DefinitionRev
|
||||
Caching
|
||||
DisableExternalResources
|
||||
DisableExternal
|
||||
DisableResources
|
||||
Disable
|
||||
Contextual/authority tuning
|
||||
HostBlacklist
|
||||
Authority tuning
|
||||
MakeAbsolute
|
||||
MungeResources
|
||||
MungeSecretKey
|
||||
Munge
|
||||
Transformation behavior (munge can be grouped)
|
||||
|
||||
|
@@ -114,7 +114,7 @@ Test.Example</pre>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>VALUE-ALIASES</td>
|
||||
<td>'baz' => 'bar'</td>
|
||||
<td>'baz' => 'bar'</td>
|
||||
<td><em>Optional</em>. Mapping of one value to another, and
|
||||
should be a comma separated list of keypair duples. This
|
||||
is only allowed string, istring, text and itext TYPEs.</td>
|
||||
@@ -213,7 +213,7 @@ Test.Example</pre>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>lookup</td>
|
||||
<td>array('key' => true)</td>
|
||||
<td>array('key' => true)</td>
|
||||
<td>Lookup array, used with <code>isset($var[$key])</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
@@ -223,7 +223,7 @@ Test.Example</pre>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>hash</td>
|
||||
<td>array('key' => 'val')</td>
|
||||
<td>array('key' => 'val')</td>
|
||||
<td>Associative array of keys to values</td>
|
||||
</tr>
|
||||
<tr>
|
||||
@@ -267,6 +267,41 @@ Test.Example</pre>
|
||||
If you ever make changes to your configuration directives, you
|
||||
will need to run this script again.
|
||||
</p>
|
||||
<h2>Adding in-house schema definitions</h2>
|
||||
|
||||
<p>
|
||||
Placing stuff directly in HTML Purifier's source tree is generally not a
|
||||
good idea, so HTML Purifier 4.0.0+ has some facilities in place to make your
|
||||
life easier.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
The first is to pass an extra parameter to <code>maintenance/generate-schema-cache.php</code>
|
||||
with the location of your directory (relative or absolute path will do). For example,
|
||||
if I'm storing my custom definitions in <em>/var/htmlpurifier/myschema</em>, run:
|
||||
<code>php maintenance/generate-schema-cache.php /var/htmlpurifier/myschema</code>.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Alternatively, you can create a small loader PHP file in the HTML Purifier base
|
||||
directory named <code>config-schema.php</code> (this is the same directory
|
||||
you would place a <code>test-settings.php</code> file). In this file, add
|
||||
the following line for each directory you want to load:
|
||||
</p>
|
||||
|
||||
<pre>$builder->buildDir($interchange, '/var/htmlpurifier/myschema');</pre>
|
||||
|
||||
<p>You can even load a single file using:</p>
|
||||
|
||||
<pre>$builder->buildFile($interchange, '/var/htmlpurifier/myschema/MyApp.Directive.txt');</pre>
|
||||
|
||||
<p>Storing custom definitions that you don't plan on sending back upstream in
|
||||
a separate directory is <em>definitely</em> a good idea! Additionally, picking
|
||||
a good namespace can go a long way to saving you grief if you want to use
|
||||
someone else's change, but they picked the same name, or if HTML Purifier
|
||||
decides to add support for a configuration directive that has the same name.</p>
|
||||
|
||||
<!-- TODO: how to name directives that rely on naming conventions -->
|
||||
|
||||
<h2>Errors</h2>
|
||||
|
||||
@@ -373,4 +408,5 @@ Test.Example</pre>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -64,4 +64,5 @@
|
||||
|
||||
</body></html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -79,4 +79,5 @@ help you find the correct functionality more quickly. Here they are:</p>
|
||||
|
||||
</body></html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -29,4 +29,5 @@ that itch, put it here!</p>
|
||||
|
||||
</body></html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -255,7 +255,7 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
||||
<tr class="feature"><td>axis</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||
<tr class="feature"><td>char</td><td>COL, COLGROUP, TBODY, TD, TFOOT, TH, THEAD, TR</td><td>W3C only: No browser implementation</td></tr>
|
||||
<tr class="feature"><td>headers</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||
<tr class="feature"><td>scope</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||
<tr class="impl-yes"><td>scope</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody class="impl-yes">
|
||||
@@ -305,4 +305,5 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
||||
|
||||
</body></html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -18,12 +18,11 @@
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>
|
||||
You may have heard of the <a href="dev-advanced-api.html">Advanced API</a>.
|
||||
If you're interested in reading dry prose and boring functional
|
||||
specifications, feel free to click that link to get a no-nonsense overview
|
||||
on the Advanced API. For the rest of us, there's this tutorial. By the time
|
||||
you're finished reading this, you should have a pretty good idea on
|
||||
how to implement custom tags and attributes that HTML Purifier may not have.
|
||||
HTML Purifier has this quirk where if you try to allow certain elements or
|
||||
attributes, HTML Purifier will tell you that it's not supported, and that
|
||||
you should go to the forums to find out how to implement it. Well, this
|
||||
document is how to implement elements and attributes which HTML Purifier
|
||||
doesn't support out of the box.
|
||||
</p>
|
||||
|
||||
<h2>Is it necessary?</h2>
|
||||
@@ -84,17 +83,6 @@
|
||||
limited to translations) above or below other corresponding text.
|
||||
</p>
|
||||
|
||||
<h3>XHTML 2.0</h3>
|
||||
|
||||
<p>
|
||||
<a href="http://www.w3.org/TR/xhtml2/">XHTML 2.0</a> is still a
|
||||
working draft, so any elements introduced in the
|
||||
specification have not been implemented and will not be implemented
|
||||
until we get a recommendation or proposal. Because XHTML 2.0 is
|
||||
an entirely new markup language, implementing rules for it will be
|
||||
no easy task.
|
||||
</p>
|
||||
|
||||
<h3>HTML 5</h3>
|
||||
|
||||
<p>
|
||||
@@ -156,9 +144,11 @@
|
||||
</p>
|
||||
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML', 'DefinitionRev', 1);
|
||||
$def = $config->getHTMLDefinition(true);</pre>
|
||||
$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML.DefinitionRev', 1);
|
||||
if ($def = $config->maybeGetRawHTMLDefinition()) {
|
||||
// our code will go here
|
||||
}</pre>
|
||||
|
||||
<p>
|
||||
Assuming that HTML Purifier has already been properly loaded (hint:
|
||||
@@ -186,23 +176,15 @@ $def = $config->getHTMLDefinition(true);</pre>
|
||||
</li>
|
||||
<li>
|
||||
The fourth line retrieves a raw <code>HTMLPurifier_HTMLDefinition</code>
|
||||
object that we will be tweaking. If the parameter was removed, we
|
||||
would be retrieving a fully formed definition object, which is somewhat
|
||||
useless for customization purposes.
|
||||
object that we will be tweaking. Interestingly enough, we have
|
||||
placed it in an if block: this is because
|
||||
<code>maybeGetRawHTMLDefinition</code>, as its name suggests, may
|
||||
return a NULL, in which case we should skip doing any
|
||||
initialization. This, in fact, will correspond to when our fully
|
||||
customized object is already in the cache.
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<h3>Broken backwards-compatibility</h3>
|
||||
|
||||
<p>
|
||||
Those of you who have already been twiddling around with the raw
|
||||
HTML definition object, you'll be noticing that you're getting an error
|
||||
when you attempt to retrieve the raw definition object without specifying
|
||||
a DefinitionID. It is vital to caching (see below) that you make a unique
|
||||
name for your customized definition, so make up something right now and
|
||||
things will operate again.
|
||||
</p>
|
||||
|
||||
<h2>Turn off caching</h2>
|
||||
|
||||
<p>
|
||||
@@ -211,10 +193,10 @@ $def = $config->getHTMLDefinition(true);</pre>
|
||||
</p>
|
||||
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML', 'DefinitionRev', 1);
|
||||
<strong>$config->set('Cache', 'DefinitionImpl', null); // remove this later!</strong>
|
||||
$def = $config->getHTMLDefinition(true);</pre>
|
||||
$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML.DefinitionRev', 1);
|
||||
<strong>$config->set('Cache.DefinitionImpl', null); // TODO: remove this later!</strong>
|
||||
$def = $config->getHTMLDefinition(true);</pre>
|
||||
|
||||
<p>
|
||||
A few things should be mentioned about the caching mechanism before
|
||||
@@ -267,10 +249,10 @@ $def = $config->getHTMLDefinition(true);</pre>
|
||||
</p>
|
||||
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML', 'DefinitionRev', 1);
|
||||
$config->set('Cache', 'DefinitionImpl', null); // remove this later!
|
||||
$def = $config->getHTMLDefinition(true);
|
||||
$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML.DefinitionRev', 1);
|
||||
$config->set('Cache.DefinitionImpl', null); // remove this later!
|
||||
$def = $config->getHTMLDefinition(true);
|
||||
<strong>$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');</strong></pre>
|
||||
|
||||
<p>
|
||||
@@ -385,11 +367,11 @@ $def = $config->getHTMLDefinition(true);
|
||||
</p>
|
||||
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML', 'DefinitionRev', 1);
|
||||
$config->set('Cache', 'DefinitionImpl', null); // remove this later!
|
||||
$def = $config->getHTMLDefinition(true);
|
||||
<strong>$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
|
||||
$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML.DefinitionRev', 1);
|
||||
$config->set('Cache.DefinitionImpl', null); // remove this later!
|
||||
$def = $config->getHTMLDefinition(true);
|
||||
<strong>$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
|
||||
array('_blank','_self','_target','_top')
|
||||
));</strong></pre>
|
||||
|
||||
@@ -724,7 +706,7 @@ $def = $config->getHTMLDefinition(true);
|
||||
or more flow elements, but no nested <code>form</code>s</strong></li>
|
||||
<li>What attributes does the element allow that are general? <strong>Common</strong></li>
|
||||
<li>What attributes does the element allow that are specific to this element? <strong>A whole bunch, see ATTLIST;
|
||||
we're going to the vital ones: <code>action</code>, <code>method</code> and <code>name</code></strong></li>
|
||||
we're going to do the vital ones: <code>action</code>, <code>method</code> and <code>name</code></strong></li>
|
||||
</ol>
|
||||
|
||||
<p>
|
||||
@@ -732,14 +714,14 @@ $def = $config->getHTMLDefinition(true);
|
||||
</p>
|
||||
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML', 'DefinitionRev', 1);
|
||||
$config->set('Cache', 'DefinitionImpl', null); // remove this later!
|
||||
$def = $config->getHTMLDefinition(true);
|
||||
$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
|
||||
$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML.DefinitionRev', 1);
|
||||
$config->set('Cache.DefinitionImpl', null); // remove this later!
|
||||
$def = $config->getHTMLDefinition(true);
|
||||
$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
|
||||
array('_blank','_self','_target','_top')
|
||||
));
|
||||
<strong>$form = $def->addElement(
|
||||
<strong>$form = $def->addElement(
|
||||
'form', // name
|
||||
'Block', // content set
|
||||
'Flow', // allowed children
|
||||
@@ -750,7 +732,7 @@ $def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
|
||||
'name' => 'ID'
|
||||
)
|
||||
);
|
||||
$form->excludes = array('form' => true);</strong></pre>
|
||||
$form->excludes = array('form' => true);</strong></pre>
|
||||
|
||||
<p>
|
||||
Each of the parameters corresponds to one of the questions we asked.
|
||||
@@ -793,6 +775,76 @@ $form->excludes = array('form' => true);</strong></pre>
|
||||
<li><a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ElementDef.php"><code>library/HTMLPurifier/ElementDef.php</code></a></li>
|
||||
</ul>
|
||||
|
||||
<h2 id="optimized">Notes for HTML Purifier 4.2.0 and earlier</h3>
|
||||
|
||||
<p>
|
||||
Previously, this tutorial gave some incorrect template code for
|
||||
editing raw definitions, and that template code will now produce the
|
||||
error <q>Due to a documentation error in previous version of HTML
|
||||
Purifier...</q> Here is how to mechanically transform old-style
|
||||
code into new-style code.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
First, identify all code that edits the raw definition object, and
|
||||
put it together. Ensure none of this code must be run on every
|
||||
request; if some sub-part needs to always be run, move it outside
|
||||
this block. Here is an example below, with the raw definition
|
||||
object code bolded.
|
||||
</p>
|
||||
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML.DefinitionRev', 1);
|
||||
$def = $config->getHTMLDefinition(true);
|
||||
<strong>$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');</strong>
|
||||
$purifier = new HTMLPurifier($config);</pre>
|
||||
|
||||
<p>
|
||||
Next, replace the raw definition retrieval with a
|
||||
maybeGetRawHTMLDefinition method call inside an if conditional, and
|
||||
place the editing code inside that if block.
|
||||
</p>
|
||||
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML.DefinitionRev', 1);
|
||||
<strong>if ($def = $config->maybeGetRawHTMLDefinition()) {
|
||||
$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');
|
||||
}</strong>
|
||||
$purifier = new HTMLPurifier($config);</pre>
|
||||
|
||||
<p>
|
||||
And you're done! Alternatively, if you're OK with not ever caching
|
||||
your code, the following will still work and not emit warnings.
|
||||
</p>
|
||||
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$def = $config->getHTMLDefinition(true);
|
||||
$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');
|
||||
$purifier = new HTMLPurifier($config);</pre>
|
||||
|
||||
<p>
|
||||
A slightly less efficient version of this was what was going on with
|
||||
old versions of HTML Purifier.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
<em>Technical notes:</em> ajh pointed out on <a
|
||||
href="http://htmlpurifier.org/phorum/read.php?5,5164,5169#msg-5169">in a forum topic</a> that
|
||||
HTML Purifier appeared to be repeatedly writing to the cache even
|
||||
when a cache entry already existed. Investigation lead to the
|
||||
discovery of the following infelicity: caching of customized
|
||||
definitions didn't actually work! The problem was that even though
|
||||
a cache file would be written out at the end of the process, there
|
||||
was no way for HTML Purifier to say, <q>Actually, I've already got a
|
||||
copy of your work, no need to reconfigure your
|
||||
customizations</q>. This required the API to change: placing
|
||||
all of the customizations to the raw definition object in a
|
||||
conditional which could be skipped.
|
||||
</p>
|
||||
|
||||
</body></html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -31,7 +31,7 @@ by default.</p>
|
||||
|
||||
<p>IDs, however, are quite useful functionality to have, so if users start
|
||||
complaining about broken anchors you'll probably want to turn them back on
|
||||
with %HTML.EnableAttrID. But before you go mucking around with the config
|
||||
with %Attr.EnableID. But before you go mucking around with the config
|
||||
object, it's probably worth to take some precautions to keep your page
|
||||
validating. Why?</p>
|
||||
|
||||
@@ -56,8 +56,8 @@ validating. Why?</p>
|
||||
deal with the most obvious solution: preventing users from using any IDs that
|
||||
appear elsewhere on the document. The method is simple:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'EnableAttrID', true);
|
||||
$config->set('Attr', 'IDBlacklist' array(
|
||||
<pre>$config->set('Attr.EnableID', true);
|
||||
$config->set('Attr.IDBlacklist' array(
|
||||
'list', 'of', 'attribute', 'values', 'that', 'are', 'forbidden'
|
||||
));</pre>
|
||||
|
||||
@@ -88,8 +88,8 @@ all, they might have simply specified a duplicate ID by accident.</p>
|
||||
<p>This method, too, is quite simple: add a prefix to all user IDs. With this
|
||||
code:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'EnableAttrID', true);
|
||||
$config->set('Attr', 'IDPrefix', 'user_');</pre>
|
||||
<pre>$config->set('Attr.EnableID', true);
|
||||
$config->set('Attr.IDPrefix', 'user_');</pre>
|
||||
|
||||
<p>...this:</p>
|
||||
|
||||
@@ -109,7 +109,7 @@ user_ to the beginning."</p>
|
||||
nothing about multiple HTML Purifier outputs on one page. Thus, we have
|
||||
a second configuration value to piggy-back off of: %Attr.IDPrefixLocal:</p>
|
||||
|
||||
<pre>$config->set('Attr', 'IDPrefixLocal', 'comment' . $id . '_');</pre>
|
||||
<pre>$config->set('Attr.IDPrefixLocal', 'comment' . $id . '_');</pre>
|
||||
|
||||
<p>This new attributes does nothing but append on to regular IDPrefix, but is
|
||||
special in that it is volatile: it's value is determined at run-time and
|
||||
@@ -137,11 +137,12 @@ anchors is beyond me.</p>
|
||||
|
||||
<p>To revert back to pre-1.2.0 behavior, simply:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'EnableAttrID', true);</pre>
|
||||
<pre>$config->set('Attr.EnableID', true);</pre>
|
||||
|
||||
<p>Don't come crying to me when your page mysteriously stops validating, though.</p>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -116,4 +116,5 @@ if you decide to do that! Especially if you port HTML Purifier to C++.
|
||||
</body>
|
||||
</html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -76,7 +76,7 @@ associated with it, although it may change depending on your doctype.</p>
|
||||
change the level of cleaning by setting the %HTML.TidyLevel configuration
|
||||
directive:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'TidyLevel', 'heavy'); // burn baby burn!</pre>
|
||||
<pre>$config->set('HTML.TidyLevel', 'heavy'); // burn baby burn!</pre>
|
||||
|
||||
<h2>Is the light level really light?</h2>
|
||||
|
||||
@@ -165,17 +165,17 @@ smoketest</a>.</p>
|
||||
so happy about the br@clear implementation. That's perfectly fine!
|
||||
HTML Purifier will make accomodations:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');
|
||||
$config->set('HTML', 'TidyLevel', 'heavy'); // all changes, minus...
|
||||
<strong>$config->set('HTML', 'TidyRemove', 'br@clear');</strong></pre>
|
||||
<pre>$config->set('HTML.Doctype', 'XHTML 1.0 Transitional');
|
||||
$config->set('HTML.TidyLevel', 'heavy'); // all changes, minus...
|
||||
<strong>$config->set('HTML.TidyRemove', 'br@clear');</strong></pre>
|
||||
|
||||
<p>That third line does the magic, removing the br@clear fix
|
||||
from the module, ensuring that <code><br clear="both" /></code>
|
||||
will pass through unharmed. The reverse is possible too:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');
|
||||
$config->set('HTML', 'TidyLevel', 'none'); // no changes, plus...
|
||||
<strong>$config->set('HTML', 'TidyAdd', 'p@align');</strong></pre>
|
||||
<pre>$config->set('HTML.Doctype', 'XHTML 1.0 Transitional');
|
||||
$config->set('HTML.TidyLevel', 'none'); // no changes, plus...
|
||||
<strong>$config->set('HTML.TidyAdd', 'p@align');</strong></pre>
|
||||
|
||||
<p>In this case, all transformations are shut off, except for the p@align
|
||||
one, which you found handy.</p>
|
||||
@@ -227,4 +227,5 @@ effectively in the background.</p>
|
||||
|
||||
</body></html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -160,27 +160,14 @@
|
||||
</p>
|
||||
|
||||
<pre>$uri = $config->getDefinition('URI');
|
||||
$uri->addFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>());</pre>
|
||||
$uri->addFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>(), $config);</pre>
|
||||
|
||||
<p>
|
||||
If you want to be really fancy, you can define a configuration directive
|
||||
for your filter and have HTML Purifier automatically manage whether or
|
||||
not your filter gets loaded or not (this is how internal filters manage
|
||||
things):
|
||||
After adding a filter, you won't be able to set configuration directives.
|
||||
Structure your code accordingly.
|
||||
</p>
|
||||
|
||||
<pre>HTMLPurifier_ConfigSchema::define(
|
||||
'URI', '<strong>NameOfFilter</strong>', false, 'bool',
|
||||
'<strong>What your filter does.</strong>'
|
||||
);
|
||||
$uri = $config->getDefinition('URI', true);
|
||||
$uri->registerFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>());
|
||||
</pre>
|
||||
|
||||
<p>
|
||||
Now, your filter will only be called when %URI.<strong>NameOfFilter</strong>
|
||||
is set to true.
|
||||
</p>
|
||||
<!-- XXX: link to new documentation system -->
|
||||
|
||||
<h2>Post-filter</h2>
|
||||
|
||||
@@ -213,4 +200,5 @@ $uri->registerFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>())
|
||||
|
||||
</body></html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -561,7 +561,7 @@ page on special characters</a> for more details.
|
||||
<h3 id="whyutf8-forms">Forms</h3>
|
||||
|
||||
<p>While we're on the tack of users, how do non-UTF-8 web forms deal
|
||||
with characters that our outside of their character set? Rather than
|
||||
with characters that are outside of their character set? Rather than
|
||||
discuss what UTF-8 does right, we're going to show what could go wrong
|
||||
if you didn't use UTF-8 and people tried to use characters outside
|
||||
of your character encoding.</p>
|
||||
@@ -1056,4 +1056,5 @@ a more in-depth look into character sets and encodings.</p>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -67,7 +67,7 @@ into your documents. YouTube's code goes like this:</p>
|
||||
</ol>
|
||||
|
||||
<p>What point 2 means is that if we have code like <code><span
|
||||
class="embed-youtube">AyPzM5WK8ys</span></code> your
|
||||
class="youtube-embed">AyPzM5WK8ys</span></code> your
|
||||
application can reconstruct the full object from this small snippet that
|
||||
passes through HTML Purifier <em>unharmed</em>.
|
||||
<a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/Filter/YouTube.php">Show me the code!</a></p>
|
||||
@@ -75,7 +75,7 @@ passes through HTML Purifier <em>unharmed</em>.
|
||||
<p>And the corresponding usage:</p>
|
||||
|
||||
<pre><?php
|
||||
$config->set('Filter', 'YouTube', true);
|
||||
$config->set('Filter.YouTube', true);
|
||||
?></pre>
|
||||
|
||||
<p>There is a bit going in the two code snippets, so let's explain.</p>
|
||||
@@ -149,4 +149,5 @@ with the core!</p>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -8,8 +8,8 @@ require_once '../../library/HTMLPurifier.auto.php';
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
|
||||
// configuration goes here:
|
||||
$config->set('Core', 'Encoding', 'UTF-8'); // replace with your encoding
|
||||
$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional'); // replace with your doctype
|
||||
$config->set('Core.Encoding', 'UTF-8'); // replace with your encoding
|
||||
$config->set('HTML.Doctype', 'XHTML 1.0 Transitional'); // replace with your doctype
|
||||
|
||||
$purifier = new HTMLPurifier($config);
|
||||
|
||||
|
@@ -5,4 +5,5 @@ function init() {
|
||||
}
|
||||
</script>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -117,6 +117,12 @@ the code. They may be upgraded to HTML files or stay as TXT scratchpads.</p>
|
||||
<td>Common security issues that may still arise (half-baked).</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Development</td>
|
||||
<td><a href="dev-config-bcbreaks.txt">Config BC Breaks</a></td>
|
||||
<td>Backwards-incompatible changes in HTML Purifier 4.0.0</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Development</td>
|
||||
<td><a href="dev-code-quality.txt">Code Quality Issues</a></td>
|
||||
@@ -178,4 +184,5 @@ the code. They may be upgraded to HTML files or stay as TXT scratchpads.</p>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -45,4 +45,5 @@ something like that?</li>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
218
docs/proposal-plists.txt
Normal file
218
docs/proposal-plists.txt
Normal file
@@ -0,0 +1,218 @@
|
||||
THE UNIVERSAL DESIGN PATTERN: PROPERTIES
|
||||
Steve Yegge
|
||||
|
||||
Implementation:
|
||||
get(name)
|
||||
put(name, value)
|
||||
has(name)
|
||||
remove(name)
|
||||
iteration, with filtering [this will be our namespaces]
|
||||
parent
|
||||
|
||||
Representations:
|
||||
- Keys are strings
|
||||
- It's nice to not need to quote keys (if we formulate our own language,
|
||||
consider this)
|
||||
- Property not present representation (key missing)
|
||||
- Frequent removal/re-add may have null help. If null is valid, use
|
||||
another value. (PHP semantics are weird here)
|
||||
|
||||
Data structures:
|
||||
- LinkedHashMap is wonderful (O(1) access and maintains order)
|
||||
- Using a special property that points to the parent is usual
|
||||
- Multiple inheritance possible, need rules for which to lookup first
|
||||
- Iterative inheritance is best
|
||||
- Consider performance!
|
||||
|
||||
Deletion
|
||||
- Tricky problem with inheritance
|
||||
- Distinguish between "not found" and "look in my parent for the property"
|
||||
[Maybe HTML Purifier won't allow deletion]
|
||||
|
||||
Read/write asymmetry (it's correct!)
|
||||
|
||||
Read-only plists
|
||||
- Allow ability to freeze [this is what we have already]
|
||||
- Don't overuse it
|
||||
|
||||
Performance:
|
||||
- Intern strings (PHP does this already)
|
||||
- Don't be case-insensitive
|
||||
- If all properties in a plist are known a-priori, you can use a "perfect"
|
||||
hash function. Often overkill.
|
||||
- Copy-on-read caching "plundering" reduces lookup, but uses memory and can
|
||||
grow stale. Use as last resort.
|
||||
- Refactoring to fields. Watch for API compatibility, system complexity,
|
||||
and lack of flexibility.
|
||||
- Refrigerator: external data-structure to hold plists
|
||||
|
||||
Transient properties:
|
||||
[Don't need to worry about this]
|
||||
- Use a separate plist for transient properties
|
||||
- Non-numeric override; numeric should ADD
|
||||
- Deletion: removeTransientProperty() and transientlyRemoveProperty()
|
||||
|
||||
Persistence:
|
||||
- XML/JSON are good
|
||||
- Text-based is good for readability, maintainability and bootstrapping
|
||||
- Compressed binary format for network transport [not necessary]
|
||||
- RDBMS or XML database
|
||||
|
||||
Querying: [not relevant]
|
||||
- XML database is nice for XPath/XQuery
|
||||
- jQuery for JSON
|
||||
- Just load it all into a program
|
||||
|
||||
Backfills/Data integrity:
|
||||
- Use usual methods
|
||||
- Lazy backfill is a nice hack
|
||||
|
||||
Type systems:
|
||||
- Flags: ReadOnly, Permanent, DontEnum
|
||||
- Typed properties isn't that useful [It's also Not-PHP]
|
||||
- Seperate meta-list of directive properties IS useful
|
||||
- Duck typing is useful for systems designed fully around properties pattern
|
||||
|
||||
Trade-off:
|
||||
+ Flexibility
|
||||
+ Extensibility
|
||||
+ Unit-testing/prototype-speed
|
||||
- Performance
|
||||
- Data integrity
|
||||
- Navagability/Query-ability
|
||||
- Reversability (hard to go back)
|
||||
|
||||
HTML Purifier
|
||||
|
||||
We are not happy with our current system of defining configuration directives,
|
||||
because it has become clear that things will get a lot nicer if we allow
|
||||
multiple namespaces, and there are some features that naturally lend themselves
|
||||
to inheritance, which we do not really support well.
|
||||
|
||||
One of the considered implementation changes would be to go from a structure
|
||||
like:
|
||||
|
||||
array(
|
||||
'Namespace' => array(
|
||||
'Directive' => 'val1',
|
||||
'Directive2' => 'val2',
|
||||
)
|
||||
)
|
||||
|
||||
to:
|
||||
|
||||
array(
|
||||
'Namespace.Directive' => 'val1',
|
||||
'Namespace.Directive2' => 'val2',
|
||||
)
|
||||
|
||||
The below implementation takes more memory, however, and it makes it a bit
|
||||
complicated to grab all values from a namespace.
|
||||
|
||||
The alternate implementation choice is to allow nested plists. This keeps
|
||||
iteration easy, but is problematic for inheritance (it would be difficult
|
||||
to distinguish a plist from an array) and retrieval (when specifying multiple
|
||||
namespaces we would need some multiple de-referencing).
|
||||
|
||||
----
|
||||
|
||||
We can bite the performance hit, and just do iteration with filter
|
||||
(the strncmp call should be relatively cheap). Then, users should be able
|
||||
to optimize doing something like:
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
if (!file_exists('config.php')) {
|
||||
// set up $config
|
||||
$config->save('config.php');
|
||||
} else {
|
||||
$config->load('config.php');
|
||||
}
|
||||
|
||||
Or maybe memcache, or something. This means that "// set up $config" must
|
||||
not have any dynamic parts, or the user has to invalidate the cache when
|
||||
they do update it. We have to think about this a little more carefully; the
|
||||
file call might be more expensive.
|
||||
|
||||
----
|
||||
|
||||
This might get expensive, however, when we actually care about iterating
|
||||
over the configuration and want the actual values. So what about nesting the
|
||||
lists?
|
||||
|
||||
"ns.sub.directive" => values['ns']['sub']['directive']
|
||||
|
||||
We can distinguish between plists and arrays by using ArrayObjects for the
|
||||
plists, and regular arrays for the arrays? Alternatively, use ArrayObjects
|
||||
for the arrays, and regular arrays for the plists.
|
||||
|
||||
----
|
||||
|
||||
Implementation demands, and what has caused them:
|
||||
|
||||
1. DefinitionCache, the HTML, CSS and URI namespaces have caches attached to them
|
||||
Results:
|
||||
- getBatchSerial()
|
||||
- getBatch() : in general, the ability to traverse just a namespace
|
||||
|
||||
2. AutoFormat/Filter, this is a plugin architecture, directives not hard-coded
|
||||
- getBatch()
|
||||
|
||||
3. Configuration form
|
||||
- Namespaces used to organize directives
|
||||
|
||||
Other than that, we have a pure plist. PERHAPS we should maintain separate things
|
||||
for these different demands.
|
||||
|
||||
Issue 2: Directives for configuring the plugins are regular plists, but
|
||||
when enabling them, while it's "plist-ish", what you're really doing is adding
|
||||
them to an array of "autoformatters"/"filters" to enable. We can setup
|
||||
magic BC as well as in the new interface, but there should also be an
|
||||
add('AutoFormat', 'AutoParagraph'); which does the right thing.
|
||||
|
||||
One thing to consider is whether or not inheritance rules will apply to these.
|
||||
I'd say yes. That means that they're still plisty, in fact, the underlying
|
||||
implementation will probably be a plist. However, they will get their OWN
|
||||
plists, and will NOT support nesting.
|
||||
|
||||
Issue 1: Our current implementation is generally not efficient; md5(serialize($foo))
|
||||
is pretty expensive. So, I don't think there will be any problems if it
|
||||
gets "less" efficient, as long as we give users a properly fast alternative;
|
||||
DefinitionRev gives us a way to do this, by simply telling the user they must
|
||||
update it whenever they update Configuration directives as well. (There are
|
||||
obvious BC concerns here).
|
||||
|
||||
In such a case, we simply iterate over our plist (performing full retrievals
|
||||
for each value), grab the entries we care about, and then serialize and hash.
|
||||
It's going to be slow either way, due to the ability of plists to inherit.
|
||||
If we ksort(), we don't have to traverse the entire array, however, the
|
||||
cost of a ksort() call may not be worth it.
|
||||
|
||||
At this point, last time, I started worrying about the performance implications
|
||||
of allowing inheritance, and wondering whether or not I wanted to squash
|
||||
the plist. At first blush, our code might be under the assumption that
|
||||
accessing properties is cheap; but actually we prefer to copy out the value
|
||||
into a member variable if it's going to be used many times. With this is mind
|
||||
I don't think CPU consumption from a few nested function calls is going to
|
||||
be a problem. We *are* going to enforce a function only interface.
|
||||
|
||||
The next issue at hand is how we're going to manage the "special" plists,
|
||||
which should still be able to be inherited. Basically, it means that multiple
|
||||
plists would be attached to the configuration object, which is not the
|
||||
best for memory performance. The alternative is to keep them all in one
|
||||
big plist, and then eat the one-time cost of traversing the entire plist
|
||||
to grab the appropriate values.
|
||||
|
||||
I think at this point we can write the generic interface, and then set up separate
|
||||
plists if that ends up being necessary for performance (it probably won't.) Now
|
||||
lets code our generic plist implementation.
|
||||
|
||||
----
|
||||
|
||||
Iterating over the plist presents some problems. The way we've chosen to solve
|
||||
this is to squash all of the parents.
|
||||
|
||||
----
|
||||
|
||||
But I don't need iteration.
|
||||
|
||||
vim: et sw=4 sts=4
|
@@ -43,4 +43,5 @@ the development of this library in these forum threads:</p>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -163,5 +163,3 @@ div.segment {width:250px; float:left; margin-top:1em;}
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
|
@@ -127,5 +127,3 @@ style='color:black'>www.example.com/disclaimer</span></a><o:p></o:p></span></p>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
|
@@ -72,5 +72,3 @@ title="Join Windows Live to share photos using Windows Live Photo E-mail.">Onlin
|
||||
pictures are available for 30 days. <A style="COLOR: #0088e4"
|
||||
href="http://g.msn.com/5meen_us/175">Get Windows Live Mail desktop to create
|
||||
your own photo e-mails. </A></SPAN></NOBR></DIV></BODY></HTML>
|
||||
|
||||
<!-- vim: et sw=4 sts=4 -->
|
||||
|
@@ -3,6 +3,7 @@
|
||||
/**
|
||||
* @file
|
||||
* Convenience file that registers autoload handler for HTML Purifier.
|
||||
* It also does some sanity checks.
|
||||
*/
|
||||
|
||||
if (function_exists('spl_autoload_register') && function_exists('spl_autoload_unregister')) {
|
||||
@@ -18,4 +19,8 @@ if (function_exists('spl_autoload_register') && function_exists('spl_autoload_un
|
||||
}
|
||||
}
|
||||
|
||||
if (ini_get('zend.ze1_compatibility_mode')) {
|
||||
trigger_error("HTML Purifier is not compatible with zend.ze1_compatibility_mode; please turn it off", E_USER_ERROR);
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
4
library/HTMLPurifier.composer.php
Normal file
4
library/HTMLPurifier.composer.php
Normal file
@@ -0,0 +1,4 @@
|
||||
<?php
|
||||
if (!defined('HTMLPURIFIER_PREFIX')) {
|
||||
define('HTMLPURIFIER_PREFIX', __DIR__);
|
||||
}
|
@@ -7,7 +7,7 @@
|
||||
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
|
||||
* FILE, changes will be overwritten the next time the script is run.
|
||||
*
|
||||
* @version 3.3.0
|
||||
* @version 4.5.0
|
||||
*
|
||||
* @warning
|
||||
* You must *not* include any other HTML Purifier files before this file,
|
||||
@@ -19,6 +19,8 @@
|
||||
*/
|
||||
|
||||
require 'HTMLPurifier.php';
|
||||
require 'HTMLPurifier/Array.php';
|
||||
require 'HTMLPurifier/ArrayNode.php';
|
||||
require 'HTMLPurifier/AttrCollections.php';
|
||||
require 'HTMLPurifier/AttrDef.php';
|
||||
require 'HTMLPurifier/AttrTransform.php';
|
||||
@@ -36,6 +38,7 @@ require 'HTMLPurifier/DefinitionCache.php';
|
||||
require 'HTMLPurifier/DefinitionCacheFactory.php';
|
||||
require 'HTMLPurifier/Doctype.php';
|
||||
require 'HTMLPurifier/DoctypeRegistry.php';
|
||||
require 'HTMLPurifier/DoublyLinkedList.php';
|
||||
require 'HTMLPurifier/ElementDef.php';
|
||||
require 'HTMLPurifier/Encoder.php';
|
||||
require 'HTMLPurifier/EntityLookup.php';
|
||||
@@ -73,6 +76,7 @@ require 'HTMLPurifier/UnitConverter.php';
|
||||
require 'HTMLPurifier/VarParser.php';
|
||||
require 'HTMLPurifier/VarParserException.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS.php';
|
||||
require 'HTMLPurifier/AttrDef/Clone.php';
|
||||
require 'HTMLPurifier/AttrDef/Enum.php';
|
||||
require 'HTMLPurifier/AttrDef/Integer.php';
|
||||
require 'HTMLPurifier/AttrDef/Lang.php';
|
||||
@@ -90,6 +94,7 @@ require 'HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/Filter.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/Font.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/Ident.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/Length.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/ListStyle.php';
|
||||
@@ -98,6 +103,8 @@ require 'HTMLPurifier/AttrDef/CSS/Percentage.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/TextDecoration.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/URI.php';
|
||||
require 'HTMLPurifier/AttrDef/HTML/Bool.php';
|
||||
require 'HTMLPurifier/AttrDef/HTML/Nmtokens.php';
|
||||
require 'HTMLPurifier/AttrDef/HTML/Class.php';
|
||||
require 'HTMLPurifier/AttrDef/HTML/Color.php';
|
||||
require 'HTMLPurifier/AttrDef/HTML/FrameTarget.php';
|
||||
require 'HTMLPurifier/AttrDef/HTML/ID.php';
|
||||
@@ -105,7 +112,6 @@ require 'HTMLPurifier/AttrDef/HTML/Pixels.php';
|
||||
require 'HTMLPurifier/AttrDef/HTML/Length.php';
|
||||
require 'HTMLPurifier/AttrDef/HTML/LinkTypes.php';
|
||||
require 'HTMLPurifier/AttrDef/HTML/MultiLength.php';
|
||||
require 'HTMLPurifier/AttrDef/HTML/Nmtokens.php';
|
||||
require 'HTMLPurifier/AttrDef/URI/Email.php';
|
||||
require 'HTMLPurifier/AttrDef/URI/Host.php';
|
||||
require 'HTMLPurifier/AttrDef/URI/IPv4.php';
|
||||
@@ -123,14 +129,18 @@ require 'HTMLPurifier/AttrTransform/Input.php';
|
||||
require 'HTMLPurifier/AttrTransform/Lang.php';
|
||||
require 'HTMLPurifier/AttrTransform/Length.php';
|
||||
require 'HTMLPurifier/AttrTransform/Name.php';
|
||||
require 'HTMLPurifier/AttrTransform/NameSync.php';
|
||||
require 'HTMLPurifier/AttrTransform/Nofollow.php';
|
||||
require 'HTMLPurifier/AttrTransform/SafeEmbed.php';
|
||||
require 'HTMLPurifier/AttrTransform/SafeObject.php';
|
||||
require 'HTMLPurifier/AttrTransform/SafeParam.php';
|
||||
require 'HTMLPurifier/AttrTransform/ScriptRequired.php';
|
||||
require 'HTMLPurifier/AttrTransform/TargetBlank.php';
|
||||
require 'HTMLPurifier/AttrTransform/Textarea.php';
|
||||
require 'HTMLPurifier/ChildDef/Chameleon.php';
|
||||
require 'HTMLPurifier/ChildDef/Custom.php';
|
||||
require 'HTMLPurifier/ChildDef/Empty.php';
|
||||
require 'HTMLPurifier/ChildDef/List.php';
|
||||
require 'HTMLPurifier/ChildDef/Required.php';
|
||||
require 'HTMLPurifier/ChildDef/Optional.php';
|
||||
require 'HTMLPurifier/ChildDef/StrictBlockquote.php';
|
||||
@@ -145,10 +155,12 @@ require 'HTMLPurifier/HTMLModule/CommonAttributes.php';
|
||||
require 'HTMLPurifier/HTMLModule/Edit.php';
|
||||
require 'HTMLPurifier/HTMLModule/Forms.php';
|
||||
require 'HTMLPurifier/HTMLModule/Hypertext.php';
|
||||
require 'HTMLPurifier/HTMLModule/Iframe.php';
|
||||
require 'HTMLPurifier/HTMLModule/Image.php';
|
||||
require 'HTMLPurifier/HTMLModule/Legacy.php';
|
||||
require 'HTMLPurifier/HTMLModule/List.php';
|
||||
require 'HTMLPurifier/HTMLModule/Name.php';
|
||||
require 'HTMLPurifier/HTMLModule/Nofollow.php';
|
||||
require 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
|
||||
require 'HTMLPurifier/HTMLModule/Object.php';
|
||||
require 'HTMLPurifier/HTMLModule/Presentation.php';
|
||||
@@ -156,10 +168,12 @@ require 'HTMLPurifier/HTMLModule/Proprietary.php';
|
||||
require 'HTMLPurifier/HTMLModule/Ruby.php';
|
||||
require 'HTMLPurifier/HTMLModule/SafeEmbed.php';
|
||||
require 'HTMLPurifier/HTMLModule/SafeObject.php';
|
||||
require 'HTMLPurifier/HTMLModule/SafeScripting.php';
|
||||
require 'HTMLPurifier/HTMLModule/Scripting.php';
|
||||
require 'HTMLPurifier/HTMLModule/StyleAttribute.php';
|
||||
require 'HTMLPurifier/HTMLModule/Tables.php';
|
||||
require 'HTMLPurifier/HTMLModule/Target.php';
|
||||
require 'HTMLPurifier/HTMLModule/TargetBlank.php';
|
||||
require 'HTMLPurifier/HTMLModule/Text.php';
|
||||
require 'HTMLPurifier/HTMLModule/Tidy.php';
|
||||
require 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
|
||||
@@ -174,6 +188,7 @@ require 'HTMLPurifier/Injector/DisplayLinkURI.php';
|
||||
require 'HTMLPurifier/Injector/Linkify.php';
|
||||
require 'HTMLPurifier/Injector/PurifierLinkify.php';
|
||||
require 'HTMLPurifier/Injector/RemoveEmpty.php';
|
||||
require 'HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
|
||||
require 'HTMLPurifier/Injector/SafeObject.php';
|
||||
require 'HTMLPurifier/Lexer/DOMLex.php';
|
||||
require 'HTMLPurifier/Lexer/DirectLex.php';
|
||||
@@ -193,9 +208,13 @@ require 'HTMLPurifier/Token/Start.php';
|
||||
require 'HTMLPurifier/Token/Text.php';
|
||||
require 'HTMLPurifier/URIFilter/DisableExternal.php';
|
||||
require 'HTMLPurifier/URIFilter/DisableExternalResources.php';
|
||||
require 'HTMLPurifier/URIFilter/DisableResources.php';
|
||||
require 'HTMLPurifier/URIFilter/HostBlacklist.php';
|
||||
require 'HTMLPurifier/URIFilter/MakeAbsolute.php';
|
||||
require 'HTMLPurifier/URIFilter/Munge.php';
|
||||
require 'HTMLPurifier/URIFilter/SafeIframe.php';
|
||||
require 'HTMLPurifier/URIScheme/data.php';
|
||||
require 'HTMLPurifier/URIScheme/file.php';
|
||||
require 'HTMLPurifier/URIScheme/ftp.php';
|
||||
require 'HTMLPurifier/URIScheme/http.php';
|
||||
require 'HTMLPurifier/URIScheme/https.php';
|
||||
|
@@ -17,11 +17,11 @@ function kses($string, $allowed_html, $allowed_protocols = null) {
|
||||
$allowed_attributes["$element.$attribute"] = true;
|
||||
}
|
||||
}
|
||||
$config->set('HTML', 'AllowedElements', $allowed_elements);
|
||||
$config->set('HTML', 'AllowedAttributes', $allowed_attributes);
|
||||
$config->set('HTML.AllowedElements', $allowed_elements);
|
||||
$config->set('HTML.AllowedAttributes', $allowed_attributes);
|
||||
$allowed_schemes = array();
|
||||
if ($allowed_protocols !== null) {
|
||||
$config->set('URI', 'AllowedSchemes', $allowed_protocols);
|
||||
$config->set('URI.AllowedSchemes', $allowed_protocols);
|
||||
}
|
||||
$purifier = new HTMLPurifier($config);
|
||||
return $purifier->purify($string);
|
||||
|
@@ -19,7 +19,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
HTML Purifier 3.3.0 - Standards Compliant HTML Filtering
|
||||
HTML Purifier 4.5.0 - Standards Compliant HTML Filtering
|
||||
Copyright (C) 2006-2008 Edward Z. Yang
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
@@ -55,10 +55,10 @@ class HTMLPurifier
|
||||
{
|
||||
|
||||
/** Version of HTML Purifier */
|
||||
public $version = '3.3.0';
|
||||
public $version = '4.5.0';
|
||||
|
||||
/** Constant with version of HTML Purifier */
|
||||
const VERSION = '3.3.0';
|
||||
const VERSION = '4.5.0';
|
||||
|
||||
/** Global configuration object */
|
||||
public $config;
|
||||
@@ -128,7 +128,7 @@ class HTMLPurifier
|
||||
$context->register('Generator', $this->generator);
|
||||
|
||||
// set up global context variables
|
||||
if ($config->get('Core', 'CollectErrors')) {
|
||||
if ($config->get('Core.CollectErrors')) {
|
||||
// may get moved out if other facilities use it
|
||||
$language_factory = HTMLPurifier_LanguageFactory::instance();
|
||||
$language = $language_factory->create($config, $context);
|
||||
@@ -152,6 +152,7 @@ class HTMLPurifier
|
||||
$filters = array();
|
||||
foreach ($filter_flags as $filter => $flag) {
|
||||
if (!$flag) continue;
|
||||
if (strpos($filter, '.') !== false) continue;
|
||||
$class = "HTMLPurifier_Filter_$filter";
|
||||
$filters[] = new $class;
|
||||
}
|
||||
|
@@ -13,6 +13,8 @@
|
||||
$__dir = dirname(__FILE__);
|
||||
|
||||
require_once $__dir . '/HTMLPurifier.php';
|
||||
require_once $__dir . '/HTMLPurifier/Array.php';
|
||||
require_once $__dir . '/HTMLPurifier/ArrayNode.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrCollections.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform.php';
|
||||
@@ -30,6 +32,7 @@ require_once $__dir . '/HTMLPurifier/DefinitionCache.php';
|
||||
require_once $__dir . '/HTMLPurifier/DefinitionCacheFactory.php';
|
||||
require_once $__dir . '/HTMLPurifier/Doctype.php';
|
||||
require_once $__dir . '/HTMLPurifier/DoctypeRegistry.php';
|
||||
require_once $__dir . '/HTMLPurifier/DoublyLinkedList.php';
|
||||
require_once $__dir . '/HTMLPurifier/ElementDef.php';
|
||||
require_once $__dir . '/HTMLPurifier/Encoder.php';
|
||||
require_once $__dir . '/HTMLPurifier/EntityLookup.php';
|
||||
@@ -67,6 +70,7 @@ require_once $__dir . '/HTMLPurifier/UnitConverter.php';
|
||||
require_once $__dir . '/HTMLPurifier/VarParser.php';
|
||||
require_once $__dir . '/HTMLPurifier/VarParserException.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/Clone.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/Enum.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/Integer.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/Lang.php';
|
||||
@@ -84,6 +88,7 @@ require_once $__dir . '/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Filter.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Font.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/FontFamily.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Ident.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Length.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ListStyle.php';
|
||||
@@ -92,6 +97,8 @@ require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Percentage.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/TextDecoration.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/URI.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Bool.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Nmtokens.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Class.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Color.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/FrameTarget.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/ID.php';
|
||||
@@ -99,7 +106,6 @@ require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Pixels.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Length.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/LinkTypes.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/MultiLength.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Nmtokens.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/URI/Email.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/URI/Host.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/URI/IPv4.php';
|
||||
@@ -117,14 +123,18 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/Input.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Lang.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Length.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Name.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/NameSync.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Nofollow.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeEmbed.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/TargetBlank.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/List.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/Required.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php';
|
||||
@@ -139,10 +149,12 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/CommonAttributes.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Edit.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Forms.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Hypertext.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Iframe.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Image.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/List.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Name.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Nofollow.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Object.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Presentation.php';
|
||||
@@ -150,10 +162,12 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/Proprietary.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Ruby.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeEmbed.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeObject.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeScripting.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Scripting.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/StyleAttribute.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tables.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Target.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/TargetBlank.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Text.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
|
||||
@@ -168,6 +182,7 @@ require_once $__dir . '/HTMLPurifier/Injector/DisplayLinkURI.php';
|
||||
require_once $__dir . '/HTMLPurifier/Injector/Linkify.php';
|
||||
require_once $__dir . '/HTMLPurifier/Injector/PurifierLinkify.php';
|
||||
require_once $__dir . '/HTMLPurifier/Injector/RemoveEmpty.php';
|
||||
require_once $__dir . '/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
|
||||
require_once $__dir . '/HTMLPurifier/Injector/SafeObject.php';
|
||||
require_once $__dir . '/HTMLPurifier/Lexer/DOMLex.php';
|
||||
require_once $__dir . '/HTMLPurifier/Lexer/DirectLex.php';
|
||||
@@ -187,9 +202,13 @@ require_once $__dir . '/HTMLPurifier/Token/Start.php';
|
||||
require_once $__dir . '/HTMLPurifier/Token/Text.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternal.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternalResources.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIFilter/DisableResources.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIFilter/SafeIframe.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIScheme/data.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIScheme/file.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIScheme/http.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIScheme/https.php';
|
||||
|
184
library/HTMLPurifier/Array.php
Normal file
184
library/HTMLPurifier/Array.php
Normal file
@@ -0,0 +1,184 @@
|
||||
<?php
|
||||
|
||||
class HTMLPurifier_Array implements ArrayAccess
|
||||
{
|
||||
/**
|
||||
* @param HTMLPurifier_ArrayNode
|
||||
*/
|
||||
public $head = null;
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
protected $count = 0;
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
protected $offset = 0;
|
||||
|
||||
/**
|
||||
* @var HTMLPurifier_ArrayNode
|
||||
*/
|
||||
protected $offsetItem = null;
|
||||
|
||||
|
||||
public function __construct(array $array = array())
|
||||
{
|
||||
/**
|
||||
* @var HTMLPurifier_ArrayNode $temp
|
||||
*/
|
||||
$temp = null;
|
||||
$i = 0;
|
||||
|
||||
foreach ($array as &$v) {
|
||||
$item = new HTMLPurifier_ArrayNode($v);
|
||||
|
||||
if ($this->head == null) {
|
||||
$this->head = &$item;
|
||||
}
|
||||
if ($temp instanceof HTMLPurifier_ArrayNode) {
|
||||
$item->prev = &$temp;
|
||||
$temp->next = &$item;
|
||||
}
|
||||
unset($temp);
|
||||
$temp = &$item;
|
||||
|
||||
$i ++;
|
||||
|
||||
unset($item, $v);
|
||||
}
|
||||
$this->count = $i;
|
||||
$this->offset = 0;
|
||||
$this->offsetItem = &$this->head;
|
||||
}
|
||||
|
||||
protected function findIndex($offset)
|
||||
{
|
||||
if ($this->head == null) {
|
||||
return array(
|
||||
'correct' => false,
|
||||
'value' => null
|
||||
);
|
||||
}
|
||||
|
||||
$current = &$this->head;
|
||||
$index = 0;
|
||||
|
||||
if ($this->offset <= $offset && $this->offsetItem instanceof HTMLPurifier_ArrayNode) {
|
||||
$current = &$this->offsetItem;
|
||||
$index = $this->offset;
|
||||
}
|
||||
|
||||
while ($current->next instanceof HTMLPurifier_ArrayNode && $index != $offset) {
|
||||
$current = &$current->next;
|
||||
$index ++;
|
||||
}
|
||||
|
||||
if ($index == $offset) {
|
||||
$this->offset = $offset;
|
||||
$this->offsetItem = &$current;
|
||||
return array(
|
||||
'correct' => true,
|
||||
'value' => &$current
|
||||
);
|
||||
}
|
||||
|
||||
return array(
|
||||
'correct' => false,
|
||||
'value' => &$current
|
||||
);
|
||||
}
|
||||
|
||||
public function insertBefore($offset, $value)
|
||||
{
|
||||
$result = $this->findIndex($offset);
|
||||
|
||||
$this->count ++;
|
||||
$item = new HTMLPurifier_ArrayNode($value);
|
||||
if ($result['correct'] == false) {
|
||||
if ($result['value'] instanceof HTMLPurifier_ArrayNode) {
|
||||
$result['value']->next = &$item;
|
||||
$item->prev = &$result['value'];
|
||||
}
|
||||
} else {
|
||||
if ($result['value'] instanceof HTMLPurifier_ArrayNode) {
|
||||
$item->prev = &$result['value']->prev;
|
||||
$item->next = &$result['value'];
|
||||
}
|
||||
if ($item->prev instanceof HTMLPurifier_ArrayNode) {
|
||||
$item->prev->next = &$item;
|
||||
}
|
||||
if ($result['value'] instanceof HTMLPurifier_ArrayNode) {
|
||||
$result['value']->prev = &$item;
|
||||
}
|
||||
}
|
||||
if ($offset == 0) {
|
||||
$this->head = &$item;
|
||||
}
|
||||
if ($offset <= $this->offset && $this->offsetItem instanceof HTMLPurifier_ArrayNode) {
|
||||
$this->offsetItem = &$this->offsetItem->prev;
|
||||
}
|
||||
}
|
||||
|
||||
public function remove($offset)
|
||||
{
|
||||
$result = $this->findIndex($offset);
|
||||
|
||||
if ($result['correct']) {
|
||||
$this->count --;
|
||||
$item = $result['value'];
|
||||
$item->prev->next = &$result['value']->next;
|
||||
$item->next->prev = &$result['value']->prev;
|
||||
if ($offset == 0) {
|
||||
$this->head = &$item->next;
|
||||
}
|
||||
if ($offset < $this->offset) {
|
||||
$this->offset --;
|
||||
} elseif ($offset == $this->offset) {
|
||||
$this->offsetItem = &$item->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public function getArray()
|
||||
{
|
||||
$return = array();
|
||||
$head = $this->head;
|
||||
|
||||
while ($head instanceof HTMLPurifier_ArrayNode) {
|
||||
$return[] = $head->value;
|
||||
$head = &$head->next;
|
||||
}
|
||||
|
||||
return $return;
|
||||
}
|
||||
|
||||
public function offsetExists($offset)
|
||||
{
|
||||
return $offset >= 0 && $offset < $this->count;
|
||||
}
|
||||
|
||||
public function offsetGet($offset)
|
||||
{
|
||||
$result = $this->findIndex($offset);
|
||||
if ($result['correct']) {
|
||||
return $result['value']->value;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public function offsetSet($offset, $value)
|
||||
{
|
||||
$result = $this->findIndex($offset);
|
||||
if ($result['correct']) {
|
||||
$result['value']->value = &$value;
|
||||
}
|
||||
}
|
||||
|
||||
public function offsetUnset($offset)
|
||||
{
|
||||
$this->remove($offset);
|
||||
}
|
||||
}
|
24
library/HTMLPurifier/ArrayNode.php
Normal file
24
library/HTMLPurifier/ArrayNode.php
Normal file
@@ -0,0 +1,24 @@
|
||||
<?php
|
||||
|
||||
class HTMLPurifier_ArrayNode
|
||||
{
|
||||
public function __construct(&$value)
|
||||
{
|
||||
$this->value = &$value;
|
||||
}
|
||||
|
||||
/**
|
||||
* @var HTMLPurifier_ArrayNode
|
||||
*/
|
||||
public $prev = null;
|
||||
|
||||
/**
|
||||
* @var HTMLPurifier_ArrayNode
|
||||
*/
|
||||
public $next = null;
|
||||
|
||||
/**
|
||||
* @var mixed
|
||||
*/
|
||||
public $value = null;
|
||||
}
|
@@ -82,6 +82,42 @@ abstract class HTMLPurifier_AttrDef
|
||||
return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a possibly escaped CSS string and returns the "pure"
|
||||
* version of it.
|
||||
*/
|
||||
protected function expandCSSEscape($string) {
|
||||
// flexibly parse it
|
||||
$ret = '';
|
||||
for ($i = 0, $c = strlen($string); $i < $c; $i++) {
|
||||
if ($string[$i] === '\\') {
|
||||
$i++;
|
||||
if ($i >= $c) {
|
||||
$ret .= '\\';
|
||||
break;
|
||||
}
|
||||
if (ctype_xdigit($string[$i])) {
|
||||
$code = $string[$i];
|
||||
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
|
||||
if (!ctype_xdigit($string[$i])) break;
|
||||
$code .= $string[$i];
|
||||
}
|
||||
// We have to be extremely careful when adding
|
||||
// new characters, to make sure we're not breaking
|
||||
// the encoding.
|
||||
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
|
||||
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
|
||||
$ret .= $char;
|
||||
if ($i < $c && trim($string[$i]) !== '') $i--;
|
||||
continue;
|
||||
}
|
||||
if ($string[$i] === "\n") continue;
|
||||
}
|
||||
$ret .= $string[$i];
|
||||
}
|
||||
return $ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
@@ -32,7 +32,7 @@ class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
|
||||
$string = $this->mungeRgb($string);
|
||||
|
||||
// assumes URI doesn't have spaces in it
|
||||
$bits = explode(' ', strtolower($string)); // bits to process
|
||||
$bits = explode(' ', $string); // bits to process
|
||||
|
||||
$caught = array();
|
||||
$caught['color'] = false;
|
||||
|
@@ -59,7 +59,8 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
|
||||
$keywords = array();
|
||||
$keywords['h'] = false; // left, right
|
||||
$keywords['v'] = false; // top, bottom
|
||||
$keywords['c'] = false; // center
|
||||
$keywords['ch'] = false; // center (first word)
|
||||
$keywords['cv'] = false; // center (second word)
|
||||
$measures = array();
|
||||
|
||||
$i = 0;
|
||||
@@ -79,6 +80,13 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
|
||||
$lbit = ctype_lower($bit) ? $bit : strtolower($bit);
|
||||
if (isset($lookup[$lbit])) {
|
||||
$status = $lookup[$lbit];
|
||||
if ($status == 'c') {
|
||||
if ($i == 0) {
|
||||
$status = 'ch';
|
||||
} else {
|
||||
$status = 'cv';
|
||||
}
|
||||
}
|
||||
$keywords[$status] = $lbit;
|
||||
$i++;
|
||||
}
|
||||
@@ -101,20 +109,19 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
|
||||
|
||||
if (!$i) return false; // no valid values were caught
|
||||
|
||||
|
||||
$ret = array();
|
||||
|
||||
// first keyword
|
||||
if ($keywords['h']) $ret[] = $keywords['h'];
|
||||
elseif (count($measures)) $ret[] = array_shift($measures);
|
||||
elseif ($keywords['c']) {
|
||||
$ret[] = $keywords['c'];
|
||||
$keywords['c'] = false; // prevent re-use: center = center center
|
||||
elseif ($keywords['ch']) {
|
||||
$ret[] = $keywords['ch'];
|
||||
$keywords['cv'] = false; // prevent re-use: center = center center
|
||||
}
|
||||
elseif (count($measures)) $ret[] = array_shift($measures);
|
||||
|
||||
if ($keywords['v']) $ret[] = $keywords['v'];
|
||||
elseif ($keywords['cv']) $ret[] = $keywords['cv'];
|
||||
elseif (count($measures)) $ret[] = array_shift($measures);
|
||||
elseif ($keywords['c']) $ret[] = $keywords['c'];
|
||||
|
||||
if (empty($ret)) return false;
|
||||
return implode(' ', $ret);
|
||||
|
@@ -9,7 +9,7 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
|
||||
public function validate($color, $config, $context) {
|
||||
|
||||
static $colors = null;
|
||||
if ($colors === null) $colors = $config->get('Core', 'ColorKeywords');
|
||||
if ($colors === null) $colors = $config->get('Core.ColorKeywords');
|
||||
|
||||
$color = trim($color);
|
||||
if ($color === '') return false;
|
||||
|
@@ -2,11 +2,43 @@
|
||||
|
||||
/**
|
||||
* Validates a font family list according to CSS spec
|
||||
* @todo whitelisting allowed fonts would be nice
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
protected $mask = null;
|
||||
|
||||
public function __construct() {
|
||||
$this->mask = '_- ';
|
||||
for ($c = 'a'; $c <= 'z'; $c++) $this->mask .= $c;
|
||||
for ($c = 'A'; $c <= 'Z'; $c++) $this->mask .= $c;
|
||||
for ($c = '0'; $c <= '9'; $c++) $this->mask .= $c; // cast-y, but should be fine
|
||||
// special bytes used by UTF-8
|
||||
for ($i = 0x80; $i <= 0xFF; $i++) {
|
||||
// We don't bother excluding invalid bytes in this range,
|
||||
// because the our restriction of well-formed UTF-8 will
|
||||
// prevent these from ever occurring.
|
||||
$this->mask .= chr($i);
|
||||
}
|
||||
|
||||
/*
|
||||
PHP's internal strcspn implementation is
|
||||
O(length of string * length of mask), making it inefficient
|
||||
for large masks. However, it's still faster than
|
||||
preg_match 8)
|
||||
for (p = s1;;) {
|
||||
spanp = s2;
|
||||
do {
|
||||
if (*spanp == c || p == s1_end) {
|
||||
return p - s1;
|
||||
}
|
||||
} while (spanp++ < (s2_end - 1));
|
||||
c = *++p;
|
||||
}
|
||||
*/
|
||||
// possible optimization: invert the mask.
|
||||
}
|
||||
|
||||
public function validate($string, $config, $context) {
|
||||
static $generic_names = array(
|
||||
'serif' => true,
|
||||
@@ -15,6 +47,7 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
||||
'fantasy' => true,
|
||||
'cursive' => true
|
||||
);
|
||||
$allowed_fonts = $config->get('CSS.AllowedFonts');
|
||||
|
||||
// assume that no font names contain commas in them
|
||||
$fonts = explode(',', $string);
|
||||
@@ -24,7 +57,9 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
||||
if ($font === '') continue;
|
||||
// match a generic name
|
||||
if (isset($generic_names[$font])) {
|
||||
$final .= $font . ', ';
|
||||
if ($allowed_fonts === null || isset($allowed_fonts[$font])) {
|
||||
$final .= $font . ', ';
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// match a quoted name
|
||||
@@ -34,50 +69,122 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
||||
$quote = $font[0];
|
||||
if ($font[$length - 1] !== $quote) continue;
|
||||
$font = substr($font, 1, $length - 2);
|
||||
|
||||
$new_font = '';
|
||||
for ($i = 0, $c = strlen($font); $i < $c; $i++) {
|
||||
if ($font[$i] === '\\') {
|
||||
$i++;
|
||||
if ($i >= $c) {
|
||||
$new_font .= '\\';
|
||||
break;
|
||||
}
|
||||
if (ctype_xdigit($font[$i])) {
|
||||
$code = $font[$i];
|
||||
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
|
||||
if (!ctype_xdigit($font[$i])) break;
|
||||
$code .= $font[$i];
|
||||
}
|
||||
// We have to be extremely careful when adding
|
||||
// new characters, to make sure we're not breaking
|
||||
// the encoding.
|
||||
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
|
||||
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
|
||||
$new_font .= $char;
|
||||
if ($i < $c && trim($font[$i]) !== '') $i--;
|
||||
continue;
|
||||
}
|
||||
if ($font[$i] === "\n") continue;
|
||||
}
|
||||
$new_font .= $font[$i];
|
||||
}
|
||||
|
||||
$font = $new_font;
|
||||
}
|
||||
|
||||
$font = $this->expandCSSEscape($font);
|
||||
|
||||
// $font is a pure representation of the font name
|
||||
|
||||
if ($allowed_fonts !== null && !isset($allowed_fonts[$font])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ctype_alnum($font) && $font !== '') {
|
||||
// very simple font, allow it in unharmed
|
||||
$final .= $font . ', ';
|
||||
continue;
|
||||
}
|
||||
|
||||
// complicated font, requires quoting
|
||||
// bugger out on whitespace. form feed (0C) really
|
||||
// shouldn't show up regardless
|
||||
$font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
|
||||
|
||||
// armor single quotes and new lines
|
||||
$font = str_replace("\\", "\\\\", $font);
|
||||
$font = str_replace("'", "\\'", $font);
|
||||
// Here, there are various classes of characters which need
|
||||
// to be treated differently:
|
||||
// - Alphanumeric characters are essentially safe. We
|
||||
// handled these above.
|
||||
// - Spaces require quoting, though most parsers will do
|
||||
// the right thing if there aren't any characters that
|
||||
// can be misinterpreted
|
||||
// - Dashes rarely occur, but they fairly unproblematic
|
||||
// for parsing/rendering purposes.
|
||||
// The above characters cover the majority of Western font
|
||||
// names.
|
||||
// - Arbitrary Unicode characters not in ASCII. Because
|
||||
// most parsers give little thought to Unicode, treatment
|
||||
// of these codepoints is basically uniform, even for
|
||||
// punctuation-like codepoints. These characters can
|
||||
// show up in non-Western pages and are supported by most
|
||||
// major browsers, for example: "MS 明朝" is a
|
||||
// legitimate font-name
|
||||
// <http://ja.wikipedia.org/wiki/MS_明朝>. See
|
||||
// the CSS3 spec for more examples:
|
||||
// <http://www.w3.org/TR/2011/WD-css3-fonts-20110324/localizedfamilynames.png>
|
||||
// You can see live samples of these on the Internet:
|
||||
// <http://www.google.co.jp/search?q=font-family+MS+明朝|ゴシック>
|
||||
// However, most of these fonts have ASCII equivalents:
|
||||
// for example, 'MS Mincho', and it's considered
|
||||
// professional to use ASCII font names instead of
|
||||
// Unicode font names. Thanks Takeshi Terada for
|
||||
// providing this information.
|
||||
// The following characters, to my knowledge, have not been
|
||||
// used to name font names.
|
||||
// - Single quote. While theoretically you might find a
|
||||
// font name that has a single quote in its name (serving
|
||||
// as an apostrophe, e.g. Dave's Scribble), I haven't
|
||||
// been able to find any actual examples of this.
|
||||
// Internet Explorer's cssText translation (which I
|
||||
// believe is invoked by innerHTML) normalizes any
|
||||
// quoting to single quotes, and fails to escape single
|
||||
// quotes. (Note that this is not IE's behavior for all
|
||||
// CSS properties, just some sort of special casing for
|
||||
// font-family). So a single quote *cannot* be used
|
||||
// safely in the font-family context if there will be an
|
||||
// innerHTML/cssText translation. Note that Firefox 3.x
|
||||
// does this too.
|
||||
// - Double quote. In IE, these get normalized to
|
||||
// single-quotes, no matter what the encoding. (Fun
|
||||
// fact, in IE8, the 'content' CSS property gained
|
||||
// support, where they special cased to preserve encoded
|
||||
// double quotes, but still translate unadorned double
|
||||
// quotes into single quotes.) So, because their
|
||||
// fixpoint behavior is identical to single quotes, they
|
||||
// cannot be allowed either. Firefox 3.x displays
|
||||
// single-quote style behavior.
|
||||
// - Backslashes are reduced by one (so \\ -> \) every
|
||||
// iteration, so they cannot be used safely. This shows
|
||||
// up in IE7, IE8 and FF3
|
||||
// - Semicolons, commas and backticks are handled properly.
|
||||
// - The rest of the ASCII punctuation is handled properly.
|
||||
// We haven't checked what browsers do to unadorned
|
||||
// versions, but this is not important as long as the
|
||||
// browser doesn't /remove/ surrounding quotes (as IE does
|
||||
// for HTML).
|
||||
//
|
||||
// With these results in hand, we conclude that there are
|
||||
// various levels of safety:
|
||||
// - Paranoid: alphanumeric, spaces and dashes(?)
|
||||
// - International: Paranoid + non-ASCII Unicode
|
||||
// - Edgy: Everything except quotes, backslashes
|
||||
// - NoJS: Standards compliance, e.g. sod IE. Note that
|
||||
// with some judicious character escaping (since certain
|
||||
// types of escaping doesn't work) this is theoretically
|
||||
// OK as long as innerHTML/cssText is not called.
|
||||
// We believe that international is a reasonable default
|
||||
// (that we will implement now), and once we do more
|
||||
// extensive research, we may feel comfortable with dropping
|
||||
// it down to edgy.
|
||||
|
||||
// Edgy: alphanumeric, spaces, dashes, underscores and Unicode. Use of
|
||||
// str(c)spn assumes that the string was already well formed
|
||||
// Unicode (which of course it is).
|
||||
if (strspn($font, $this->mask) !== strlen($font)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Historical:
|
||||
// In the absence of innerHTML/cssText, these ugly
|
||||
// transforms don't pose a security risk (as \\ and \"
|
||||
// might--these escapes are not supported by most browsers).
|
||||
// We could try to be clever and use single-quote wrapping
|
||||
// when there is a double quote present, but I have choosen
|
||||
// not to implement that. (NOTE: you can reduce the amount
|
||||
// of escapes by one depending on what quoting style you use)
|
||||
// $font = str_replace('\\', '\\5C ', $font);
|
||||
// $font = str_replace('"', '\\22 ', $font);
|
||||
// $font = str_replace("'", '\\27 ', $font);
|
||||
|
||||
// font possibly with spaces, requires quoting
|
||||
$final .= "'$font', ";
|
||||
}
|
||||
$final = rtrim($final, ', ');
|
||||
|
24
library/HTMLPurifier/AttrDef/CSS/Ident.php
Normal file
24
library/HTMLPurifier/AttrDef/CSS/Ident.php
Normal file
@@ -0,0 +1,24 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Validates based on {ident} CSS grammar production
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Ident extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
public function validate($string, $config, $context) {
|
||||
|
||||
$string = trim($string);
|
||||
|
||||
// early abort: '' and '0' (strings that convert to false) are invalid
|
||||
if (!$string) return false;
|
||||
|
||||
$pattern = '/^(-?[A-Za-z_][A-Za-z_\-0-9]*)$/';
|
||||
if (!preg_match($pattern, $string)) return false;
|
||||
return $string;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
@@ -34,20 +34,25 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
|
||||
$uri = substr($uri, 1, $new_length - 1);
|
||||
}
|
||||
|
||||
$keys = array( '(', ')', ',', ' ', '"', "'");
|
||||
$values = array('\\(', '\\)', '\\,', '\\ ', '\\"', "\\'");
|
||||
$uri = str_replace($values, $keys, $uri);
|
||||
$uri = $this->expandCSSEscape($uri);
|
||||
|
||||
$result = parent::validate($uri, $config, $context);
|
||||
|
||||
if ($result === false) return false;
|
||||
|
||||
// escape necessary characters according to CSS spec
|
||||
// except for the comma, none of these should appear in the
|
||||
// URI at all
|
||||
$result = str_replace($keys, $values, $result);
|
||||
// extra sanity check; should have been done by URI
|
||||
$result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result);
|
||||
|
||||
return "url($result)";
|
||||
// suspicious characters are ()'; we're going to percent encode
|
||||
// them for safety.
|
||||
$result = str_replace(array('(', ')', "'"), array('%28', '%29', '%27'), $result);
|
||||
|
||||
// there's an extra bug where ampersands lose their escaping on
|
||||
// an innerHTML cycle, so a very unlucky query parameter could
|
||||
// then change the meaning of the URL. Unfortunately, there's
|
||||
// not much we can do about that...
|
||||
|
||||
return "url(\"$result\")";
|
||||
|
||||
}
|
||||
|
||||
|
28
library/HTMLPurifier/AttrDef/Clone.php
Normal file
28
library/HTMLPurifier/AttrDef/Clone.php
Normal file
@@ -0,0 +1,28 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Dummy AttrDef that mimics another AttrDef, BUT it generates clones
|
||||
* with make.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Clone extends HTMLPurifier_AttrDef
|
||||
{
|
||||
/**
|
||||
* What we're cloning
|
||||
*/
|
||||
protected $clone;
|
||||
|
||||
public function __construct($clone) {
|
||||
$this->clone = $clone;
|
||||
}
|
||||
|
||||
public function validate($v, $config, $context) {
|
||||
return $this->clone->validate($v, $config, $context);
|
||||
}
|
||||
|
||||
public function make($string) {
|
||||
return clone $this->clone;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
34
library/HTMLPurifier/AttrDef/HTML/Class.php
Normal file
34
library/HTMLPurifier/AttrDef/HTML/Class.php
Normal file
@@ -0,0 +1,34 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Implements special behavior for class attribute (normally NMTOKENS)
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens
|
||||
{
|
||||
protected function split($string, $config, $context) {
|
||||
// really, this twiddle should be lazy loaded
|
||||
$name = $config->getDefinition('HTML')->doctype->name;
|
||||
if ($name == "XHTML 1.1" || $name == "XHTML 2.0") {
|
||||
return parent::split($string, $config, $context);
|
||||
} else {
|
||||
return preg_split('/\s+/', $string);
|
||||
}
|
||||
}
|
||||
protected function filter($tokens, $config, $context) {
|
||||
$allowed = $config->get('Attr.AllowedClasses');
|
||||
$forbidden = $config->get('Attr.ForbiddenClasses');
|
||||
$ret = array();
|
||||
foreach ($tokens as $token) {
|
||||
if (
|
||||
($allowed === null || isset($allowed[$token])) &&
|
||||
!isset($forbidden[$token]) &&
|
||||
// We need this O(n) check because of PHP's array
|
||||
// implementation that casts -0 to 0.
|
||||
!in_array($token, $ret, true)
|
||||
) {
|
||||
$ret[] = $token;
|
||||
}
|
||||
}
|
||||
return $ret;
|
||||
}
|
||||
}
|
@@ -9,12 +9,13 @@ class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
|
||||
public function validate($string, $config, $context) {
|
||||
|
||||
static $colors = null;
|
||||
if ($colors === null) $colors = $config->get('Core', 'ColorKeywords');
|
||||
if ($colors === null) $colors = $config->get('Core.ColorKeywords');
|
||||
|
||||
$string = trim($string);
|
||||
|
||||
if (empty($string)) return false;
|
||||
if (isset($colors[$string])) return $colors[$string];
|
||||
$lower = strtolower($string);
|
||||
if (isset($colors[$lower])) return $colors[$lower];
|
||||
if ($string[0] === '#') $hex = substr($string, 1);
|
||||
else $hex = $string;
|
||||
|
||||
|
@@ -12,7 +12,7 @@ class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum
|
||||
public function __construct() {}
|
||||
|
||||
public function validate($string, $config, $context) {
|
||||
if ($this->valid_values === false) $this->valid_values = $config->get('Attr', 'AllowedFrameTargets');
|
||||
if ($this->valid_values === false) $this->valid_values = $config->get('Attr.AllowedFrameTargets');
|
||||
return parent::validate($string, $config, $context);
|
||||
}
|
||||
|
||||
|
@@ -12,31 +12,41 @@
|
||||
class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
// ref functionality disabled, since we also have to verify
|
||||
// whether or not the ID it refers to exists
|
||||
// selector is NOT a valid thing to use for IDREFs, because IDREFs
|
||||
// *must* target IDs that exist, whereas selector #ids do not.
|
||||
|
||||
/**
|
||||
* Determines whether or not we're validating an ID in a CSS
|
||||
* selector context.
|
||||
*/
|
||||
protected $selector;
|
||||
|
||||
public function __construct($selector = false) {
|
||||
$this->selector = $selector;
|
||||
}
|
||||
|
||||
public function validate($id, $config, $context) {
|
||||
|
||||
if (!$config->get('Attr', 'EnableID')) return false;
|
||||
if (!$this->selector && !$config->get('Attr.EnableID')) return false;
|
||||
|
||||
$id = trim($id); // trim it first
|
||||
|
||||
if ($id === '') return false;
|
||||
|
||||
$prefix = $config->get('Attr', 'IDPrefix');
|
||||
$prefix = $config->get('Attr.IDPrefix');
|
||||
if ($prefix !== '') {
|
||||
$prefix .= $config->get('Attr', 'IDPrefixLocal');
|
||||
$prefix .= $config->get('Attr.IDPrefixLocal');
|
||||
// prevent re-appending the prefix
|
||||
if (strpos($id, $prefix) !== 0) $id = $prefix . $id;
|
||||
} elseif ($config->get('Attr', 'IDPrefixLocal') !== '') {
|
||||
} elseif ($config->get('Attr.IDPrefixLocal') !== '') {
|
||||
trigger_error('%Attr.IDPrefixLocal cannot be used unless '.
|
||||
'%Attr.IDPrefix is set', E_USER_WARNING);
|
||||
}
|
||||
|
||||
//if (!$this->ref) {
|
||||
if (!$this->selector) {
|
||||
$id_accumulator =& $context->get('IDAccumulator');
|
||||
if (isset($id_accumulator->ids[$id])) return false;
|
||||
//}
|
||||
}
|
||||
|
||||
// we purposely avoid using regex, hopefully this is faster
|
||||
|
||||
@@ -51,12 +61,12 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
|
||||
$result = ($trim === '');
|
||||
}
|
||||
|
||||
$regexp = $config->get('Attr', 'IDBlacklistRegexp');
|
||||
$regexp = $config->get('Attr.IDBlacklistRegexp');
|
||||
if ($regexp && preg_match($regexp, $id)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (/*!$this->ref && */$result) $id_accumulator->add($id);
|
||||
if (!$this->selector && $result) $id_accumulator->add($id);
|
||||
|
||||
// if no change was made to the ID, return the result
|
||||
// else, return the new id if stripping whitespace made it
|
||||
|
@@ -27,7 +27,7 @@ class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
|
||||
|
||||
public function validate($string, $config, $context) {
|
||||
|
||||
$allowed = $config->get('Attr', $this->name);
|
||||
$allowed = $config->get('Attr.' . $this->name);
|
||||
if (empty($allowed)) return false;
|
||||
|
||||
$string = $this->parseCDATA($string);
|
||||
|
@@ -2,10 +2,6 @@
|
||||
|
||||
/**
|
||||
* Validates contents based on NMTOKENS attribute type.
|
||||
* @note The only current use for this is the class attribute in HTML
|
||||
* @note Could have some functionality factored out into Nmtoken class
|
||||
* @warning We cannot assume this class will be used only for 'class'
|
||||
* attributes. Not sure how to hook in magic behavior, then.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
|
||||
{
|
||||
@@ -17,6 +13,17 @@ class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
|
||||
// early abort: '' and '0' (strings that convert to false) are invalid
|
||||
if (!$string) return false;
|
||||
|
||||
$tokens = $this->split($string, $config, $context);
|
||||
$tokens = $this->filter($tokens, $config, $context);
|
||||
if (empty($tokens)) return false;
|
||||
return implode(' ', $tokens);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits a space separated list of tokens into its constituent parts.
|
||||
*/
|
||||
protected function split($string, $config, $context) {
|
||||
// OPTIMIZABLE!
|
||||
// do the preg_match, capture all subpatterns for reformulation
|
||||
|
||||
@@ -24,23 +31,20 @@ class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
|
||||
// escaping because I don't know how to do that with regexps
|
||||
// and plus it would complicate optimization efforts (you never
|
||||
// see that anyway).
|
||||
$matches = array();
|
||||
$pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start
|
||||
'((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
|
||||
'(?:(?=\s)|\z)/'; // look ahead for space or string end
|
||||
preg_match_all($pattern, $string, $matches);
|
||||
return $matches[1];
|
||||
}
|
||||
|
||||
if (empty($matches[1])) return false;
|
||||
|
||||
// reconstruct string
|
||||
$new_string = '';
|
||||
foreach ($matches[1] as $token) {
|
||||
$new_string .= $token . ' ';
|
||||
}
|
||||
$new_string = rtrim($new_string);
|
||||
|
||||
return $new_string;
|
||||
|
||||
/**
|
||||
* Template method for removing certain tokens based on arbitrary criteria.
|
||||
* @note If we wanted to be really functional, we'd do an array_filter
|
||||
* with a callback. But... we're not.
|
||||
*/
|
||||
protected function filter($tokens, $config, $context) {
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -19,13 +19,13 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
}
|
||||
|
||||
public function make($string) {
|
||||
$embeds = (bool) $string;
|
||||
$embeds = ($string === 'embedded');
|
||||
return new HTMLPurifier_AttrDef_URI($embeds);
|
||||
}
|
||||
|
||||
public function validate($uri, $config, $context) {
|
||||
|
||||
if ($config->get('URI', 'Disable')) return false;
|
||||
if ($config->get('URI.Disable')) return false;
|
||||
|
||||
$uri = $this->parseCDATA($uri);
|
||||
|
||||
|
@@ -23,6 +23,12 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
|
||||
|
||||
public function validate($string, $config, $context) {
|
||||
$length = strlen($string);
|
||||
// empty hostname is OK; it's usually semantically equivalent:
|
||||
// the default host as defined by a URI scheme is used:
|
||||
//
|
||||
// If the URI scheme defines a default for host, then that
|
||||
// default applies when the host subcomponent is undefined
|
||||
// or when the registered name is empty (zero length).
|
||||
if ($string === '') return '';
|
||||
if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') {
|
||||
//IPv6
|
||||
@@ -38,9 +44,8 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
|
||||
|
||||
// A regular domain name.
|
||||
|
||||
// This breaks I18N domain names, but we don't have proper IRI support,
|
||||
// so force users to insert Punycode. If there's complaining we'll
|
||||
// try to fix things into an international friendly form.
|
||||
// This doesn't match I18N domain names, but we don't have proper IRI support,
|
||||
// so force users to insert Punycode.
|
||||
|
||||
// The productions describing this are:
|
||||
$a = '[a-z]'; // alpha
|
||||
@@ -51,10 +56,44 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
|
||||
// toplabel = alpha | alpha *( alphanum | "-" ) alphanum
|
||||
$toplabel = "$a($and*$an)?";
|
||||
// hostname = *( domainlabel "." ) toplabel [ "." ]
|
||||
$match = preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string);
|
||||
if (!$match) return false;
|
||||
if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
|
||||
return $string;
|
||||
}
|
||||
|
||||
return $string;
|
||||
// If we have Net_IDNA2 support, we can support IRIs by
|
||||
// punycoding them. (This is the most portable thing to do,
|
||||
// since otherwise we have to assume browsers support
|
||||
|
||||
if ($config->get('Core.EnableIDNA')) {
|
||||
$idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true));
|
||||
// we need to encode each period separately
|
||||
$parts = explode('.', $string);
|
||||
try {
|
||||
$new_parts = array();
|
||||
foreach ($parts as $part) {
|
||||
$encodable = false;
|
||||
for ($i = 0, $c = strlen($part); $i < $c; $i++) {
|
||||
if (ord($part[$i]) > 0x7a) {
|
||||
$encodable = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!$encodable) {
|
||||
$new_parts[] = $part;
|
||||
} else {
|
||||
$new_parts[] = $idna->encode($part);
|
||||
}
|
||||
}
|
||||
$string = implode('.', $new_parts);
|
||||
if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
|
||||
return $string;
|
||||
}
|
||||
} catch (Exception $e) {
|
||||
// XXX error reporting
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -10,7 +10,7 @@ class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform
|
||||
|
||||
public function transform($attr, $config, $context) {
|
||||
if (isset($attr['dir'])) return $attr;
|
||||
$attr['dir'] = $config->get('Attr', 'DefaultTextDir');
|
||||
$attr['dir'] = $config->get('Attr.DefaultTextDir');
|
||||
return $attr;
|
||||
}
|
||||
|
||||
|
@@ -15,21 +15,22 @@ class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
|
||||
|
||||
$src = true;
|
||||
if (!isset($attr['src'])) {
|
||||
if ($config->get('Core', 'RemoveInvalidImg')) return $attr;
|
||||
$attr['src'] = $config->get('Attr', 'DefaultInvalidImage');
|
||||
if ($config->get('Core.RemoveInvalidImg')) return $attr;
|
||||
$attr['src'] = $config->get('Attr.DefaultInvalidImage');
|
||||
$src = false;
|
||||
}
|
||||
|
||||
if (!isset($attr['alt'])) {
|
||||
if ($src) {
|
||||
$alt = $config->get('Attr', 'DefaultImageAlt');
|
||||
$alt = $config->get('Attr.DefaultImageAlt');
|
||||
if ($alt === null) {
|
||||
$attr['alt'] = basename($attr['src']);
|
||||
// truncate if the alt is too long
|
||||
$attr['alt'] = substr(basename($attr['src']),0,40);
|
||||
} else {
|
||||
$attr['alt'] = $alt;
|
||||
}
|
||||
} else {
|
||||
$attr['alt'] = $config->get('Attr', 'DefaultInvalidImageAlt');
|
||||
$attr['alt'] = $config->get('Attr.DefaultInvalidImageAlt');
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -7,6 +7,8 @@ class HTMLPurifier_AttrTransform_Name extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
public function transform($attr, $config, $context) {
|
||||
// Abort early if we're using relaxed definition of name
|
||||
if ($config->get('HTML.Attr.Name.UseCDATA')) return $attr;
|
||||
if (!isset($attr['name'])) return $attr;
|
||||
$id = $this->confiscateAttr($attr, 'name');
|
||||
if ( isset($attr['id'])) return $attr;
|
||||
|
27
library/HTMLPurifier/AttrTransform/NameSync.php
Normal file
27
library/HTMLPurifier/AttrTransform/NameSync.php
Normal file
@@ -0,0 +1,27 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Post-transform that performs validation to the name attribute; if
|
||||
* it is present with an equivalent id attribute, it is passed through;
|
||||
* otherwise validation is performed.
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_NameSync extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
public function __construct() {
|
||||
$this->idDef = new HTMLPurifier_AttrDef_HTML_ID();
|
||||
}
|
||||
|
||||
public function transform($attr, $config, $context) {
|
||||
if (!isset($attr['name'])) return $attr;
|
||||
$name = $attr['name'];
|
||||
if (isset($attr['id']) && $attr['id'] === $name) return $attr;
|
||||
$result = $this->idDef->validate($name, $config, $context);
|
||||
if ($result === false) unset($attr['name']);
|
||||
else $attr['name'] = $result;
|
||||
return $attr;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
45
library/HTMLPurifier/AttrTransform/Nofollow.php
Normal file
45
library/HTMLPurifier/AttrTransform/Nofollow.php
Normal file
@@ -0,0 +1,45 @@
|
||||
<?php
|
||||
|
||||
// must be called POST validation
|
||||
|
||||
/**
|
||||
* Adds rel="nofollow" to all outbound links. This transform is
|
||||
* only attached if Attr.Nofollow is TRUE.
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_Nofollow extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
private $parser;
|
||||
|
||||
public function __construct() {
|
||||
$this->parser = new HTMLPurifier_URIParser();
|
||||
}
|
||||
|
||||
public function transform($attr, $config, $context) {
|
||||
|
||||
if (!isset($attr['href'])) {
|
||||
return $attr;
|
||||
}
|
||||
|
||||
// XXX Kind of inefficient
|
||||
$url = $this->parser->parse($attr['href']);
|
||||
$scheme = $url->getSchemeObj($config, $context);
|
||||
|
||||
if ($scheme->browsable && !$url->isLocal($config, $context)) {
|
||||
if (isset($attr['rel'])) {
|
||||
$rels = explode(' ', $attr['rel']);
|
||||
if (!in_array('nofollow', $rels)) {
|
||||
$rels[] = 'nofollow';
|
||||
}
|
||||
$attr['rel'] = implode(' ', $rels);
|
||||
} else {
|
||||
$attr['rel'] = 'nofollow';
|
||||
}
|
||||
}
|
||||
|
||||
return $attr;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
@@ -19,6 +19,7 @@ class HTMLPurifier_AttrTransform_SafeParam extends HTMLPurifier_AttrTransform
|
||||
|
||||
public function __construct() {
|
||||
$this->uri = new HTMLPurifier_AttrDef_URI(true); // embedded
|
||||
$this->wmode = new HTMLPurifier_AttrDef_Enum(array('window', 'opaque', 'transparent'));
|
||||
}
|
||||
|
||||
public function transform($attr, $config, $context) {
|
||||
@@ -33,12 +34,25 @@ class HTMLPurifier_AttrTransform_SafeParam extends HTMLPurifier_AttrTransform
|
||||
case 'allowNetworking':
|
||||
$attr['value'] = 'internal';
|
||||
break;
|
||||
case 'allowFullScreen':
|
||||
if ($config->get('HTML.FlashAllowFullScreen')) {
|
||||
$attr['value'] = ($attr['value'] == 'true') ? 'true' : 'false';
|
||||
} else {
|
||||
$attr['value'] = 'false';
|
||||
}
|
||||
break;
|
||||
case 'wmode':
|
||||
$attr['value'] = 'window';
|
||||
$attr['value'] = $this->wmode->validate($attr['value'], $config, $context);
|
||||
break;
|
||||
case 'movie':
|
||||
case 'src':
|
||||
$attr['name'] = "movie";
|
||||
$attr['value'] = $this->uri->validate($attr['value'], $config, $context);
|
||||
break;
|
||||
case 'flashvars':
|
||||
// we're going to allow arbitrary inputs to the SWF, on
|
||||
// the reasoning that it could only hack the SWF, not us.
|
||||
break;
|
||||
// add other cases to support other param name/value pairs
|
||||
default:
|
||||
$attr['name'] = $attr['value'] = null;
|
||||
|
38
library/HTMLPurifier/AttrTransform/TargetBlank.php
Normal file
38
library/HTMLPurifier/AttrTransform/TargetBlank.php
Normal file
@@ -0,0 +1,38 @@
|
||||
<?php
|
||||
|
||||
// must be called POST validation
|
||||
|
||||
/**
|
||||
* Adds target="blank" to all outbound links. This transform is
|
||||
* only attached if Attr.TargetBlank is TRUE. This works regardless
|
||||
* of whether or not Attr.AllowedFrameTargets
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_TargetBlank extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
private $parser;
|
||||
|
||||
public function __construct() {
|
||||
$this->parser = new HTMLPurifier_URIParser();
|
||||
}
|
||||
|
||||
public function transform($attr, $config, $context) {
|
||||
|
||||
if (!isset($attr['href'])) {
|
||||
return $attr;
|
||||
}
|
||||
|
||||
// XXX Kind of inefficient
|
||||
$url = $this->parser->parse($attr['href']);
|
||||
$scheme = $url->getSchemeObj($config, $context);
|
||||
|
||||
if ($scheme->browsable && !$url->isBenign($config, $context)) {
|
||||
$attr['target'] = '_blank';
|
||||
}
|
||||
|
||||
return $attr;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
@@ -15,6 +15,13 @@ class HTMLPurifier_AttrTypes
|
||||
* types.
|
||||
*/
|
||||
public function __construct() {
|
||||
// XXX This is kind of poor, since we don't actually /clone/
|
||||
// instances; instead, we use the supplied make() attribute. So,
|
||||
// the underlying class must know how to deal with arguments.
|
||||
// With the old implementation of Enum, that ignored its
|
||||
// arguments when handling a make dispatch, the IAlign
|
||||
// definition wouldn't work.
|
||||
|
||||
// pseudo-types, must be instantiated via shorthand
|
||||
$this->info['Enum'] = new HTMLPurifier_AttrDef_Enum();
|
||||
$this->info['Bool'] = new HTMLPurifier_AttrDef_HTML_Bool();
|
||||
@@ -29,6 +36,9 @@ class HTMLPurifier_AttrTypes
|
||||
$this->info['URI'] = new HTMLPurifier_AttrDef_URI();
|
||||
$this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
|
||||
$this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color();
|
||||
$this->info['IAlign'] = self::makeEnum('top,middle,bottom,left,right');
|
||||
$this->info['LAlign'] = self::makeEnum('top,bottom,left,right');
|
||||
$this->info['FrameTarget'] = new HTMLPurifier_AttrDef_HTML_FrameTarget();
|
||||
|
||||
// unimplemented aliases
|
||||
$this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
|
||||
@@ -36,11 +46,18 @@ class HTMLPurifier_AttrTypes
|
||||
$this->info['Charsets'] = new HTMLPurifier_AttrDef_Text();
|
||||
$this->info['Character'] = new HTMLPurifier_AttrDef_Text();
|
||||
|
||||
// "proprietary" types
|
||||
$this->info['Class'] = new HTMLPurifier_AttrDef_HTML_Class();
|
||||
|
||||
// number is really a positive integer (one or more digits)
|
||||
// FIXME: ^^ not always, see start and value of list items
|
||||
$this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);
|
||||
}
|
||||
|
||||
private static function makeEnum($in) {
|
||||
return new HTMLPurifier_AttrDef_Clone(new HTMLPurifier_AttrDef_Enum(explode(',', $in)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a type
|
||||
* @param $type String type name
|
||||
|
@@ -37,7 +37,12 @@ class HTMLPurifier_Bootstrap
|
||||
public static function autoload($class) {
|
||||
$file = HTMLPurifier_Bootstrap::getPath($class);
|
||||
if (!$file) return false;
|
||||
require HTMLPURIFIER_PREFIX . '/' . $file;
|
||||
// Technically speaking, it should be ok and more efficient to
|
||||
// just do 'require', but Antonio Parraga reports that with
|
||||
// Zend extensions such as Zend debugger and APC, this invariant
|
||||
// may be broken. Since we have efficient alternatives, pay
|
||||
// the cost here and avoid the bug.
|
||||
require_once HTMLPURIFIER_PREFIX . '/' . $file;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -65,31 +70,37 @@ class HTMLPurifier_Bootstrap
|
||||
if ( ($funcs = spl_autoload_functions()) === false ) {
|
||||
spl_autoload_register($autoload);
|
||||
} elseif (function_exists('spl_autoload_unregister')) {
|
||||
$compat = version_compare(PHP_VERSION, '5.1.2', '<=') &&
|
||||
version_compare(PHP_VERSION, '5.1.0', '>=');
|
||||
foreach ($funcs as $func) {
|
||||
if (is_array($func)) {
|
||||
// :TRICKY: There are some compatibility issues and some
|
||||
// places where we need to error out
|
||||
$reflector = new ReflectionMethod($func[0], $func[1]);
|
||||
if (!$reflector->isStatic()) {
|
||||
throw new Exception('
|
||||
HTML Purifier autoloader registrar is not compatible
|
||||
with non-static object methods due to PHP Bug #44144;
|
||||
Please do not use HTMLPurifier.autoload.php (or any
|
||||
file that includes this file); instead, place the code:
|
||||
spl_autoload_register(array(\'HTMLPurifier_Bootstrap\', \'autoload\'))
|
||||
after your own autoloaders.
|
||||
');
|
||||
if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
|
||||
// prepend flag exists, no need for shenanigans
|
||||
spl_autoload_register($autoload, true, true);
|
||||
} else {
|
||||
$buggy = version_compare(PHP_VERSION, '5.2.11', '<');
|
||||
$compat = version_compare(PHP_VERSION, '5.1.2', '<=') &&
|
||||
version_compare(PHP_VERSION, '5.1.0', '>=');
|
||||
foreach ($funcs as $func) {
|
||||
if ($buggy && is_array($func)) {
|
||||
// :TRICKY: There are some compatibility issues and some
|
||||
// places where we need to error out
|
||||
$reflector = new ReflectionMethod($func[0], $func[1]);
|
||||
if (!$reflector->isStatic()) {
|
||||
throw new Exception('
|
||||
HTML Purifier autoloader registrar is not compatible
|
||||
with non-static object methods due to PHP Bug #44144;
|
||||
Please do not use HTMLPurifier.autoload.php (or any
|
||||
file that includes this file); instead, place the code:
|
||||
spl_autoload_register(array(\'HTMLPurifier_Bootstrap\', \'autoload\'))
|
||||
after your own autoloaders.
|
||||
');
|
||||
}
|
||||
// Suprisingly, spl_autoload_register supports the
|
||||
// Class::staticMethod callback format, although call_user_func doesn't
|
||||
if ($compat) $func = implode('::', $func);
|
||||
}
|
||||
// Suprisingly, spl_autoload_register supports the
|
||||
// Class::staticMethod callback format, although call_user_func doesn't
|
||||
if ($compat) $func = implode('::', $func);
|
||||
spl_autoload_unregister($func);
|
||||
}
|
||||
spl_autoload_unregister($func);
|
||||
spl_autoload_register($autoload);
|
||||
foreach ($funcs as $func) spl_autoload_register($func);
|
||||
}
|
||||
spl_autoload_register($autoload);
|
||||
foreach ($funcs as $func) spl_autoload_register($func);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -154,7 +154,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage(true),
|
||||
new HTMLPurifier_AttrDef_Enum(array('auto'))
|
||||
));
|
||||
$max = $config->get('CSS', 'MaxImgLength');
|
||||
$max = $config->get('CSS.MaxImgLength');
|
||||
|
||||
$this->info['width'] =
|
||||
$this->info['height'] =
|
||||
@@ -208,18 +208,23 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
|
||||
|
||||
$this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2);
|
||||
|
||||
// partial support
|
||||
$this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap'));
|
||||
// These CSS properties don't work on many browsers, but we live
|
||||
// in THE FUTURE!
|
||||
$this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap', 'normal', 'pre', 'pre-wrap', 'pre-line'));
|
||||
|
||||
if ($config->get('CSS', 'Proprietary')) {
|
||||
if ($config->get('CSS.Proprietary')) {
|
||||
$this->doSetupProprietary($config);
|
||||
}
|
||||
|
||||
if ($config->get('CSS', 'AllowTricky')) {
|
||||
if ($config->get('CSS.AllowTricky')) {
|
||||
$this->doSetupTricky($config);
|
||||
}
|
||||
|
||||
$allow_important = $config->get('CSS', 'AllowImportant');
|
||||
if ($config->get('CSS.Trusted')) {
|
||||
$this->doSetupTrusted($config);
|
||||
}
|
||||
|
||||
$allow_important = $config->get('CSS.AllowImportant');
|
||||
// wrap all attr-defs with decorator that handles !important
|
||||
foreach ($this->info as $k => $v) {
|
||||
$this->info[$k] = new HTMLPurifier_AttrDef_CSS_ImportantDecorator($v, $allow_important);
|
||||
@@ -245,12 +250,17 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
|
||||
// only opacity, for now
|
||||
$this->info['filter'] = new HTMLPurifier_AttrDef_CSS_Filter();
|
||||
|
||||
// more CSS3
|
||||
$this->info['page-break-after'] =
|
||||
$this->info['page-break-before'] = new HTMLPurifier_AttrDef_Enum(array('auto','always','avoid','left','right'));
|
||||
$this->info['page-break-inside'] = new HTMLPurifier_AttrDef_Enum(array('auto','avoid'));
|
||||
|
||||
}
|
||||
|
||||
protected function doSetupTricky($config) {
|
||||
$this->info['display'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||
'inline', 'block', 'list-item', 'run-in', 'compact',
|
||||
'marker', 'table', 'inline-table', 'table-row-group',
|
||||
'marker', 'table', 'inline-block', 'inline-table', 'table-row-group',
|
||||
'table-header-group', 'table-footer-group', 'table-row',
|
||||
'table-column-group', 'table-column', 'table-cell', 'table-caption', 'none'
|
||||
));
|
||||
@@ -260,6 +270,23 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
|
||||
$this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(array('visible', 'hidden', 'auto', 'scroll'));
|
||||
}
|
||||
|
||||
protected function doSetupTrusted($config) {
|
||||
$this->info['position'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||
'static', 'relative', 'absolute', 'fixed'
|
||||
));
|
||||
$this->info['top'] =
|
||||
$this->info['left'] =
|
||||
$this->info['right'] =
|
||||
$this->info['bottom'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_CSS_Length(),
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage(),
|
||||
new HTMLPurifier_AttrDef_Enum(array('auto')),
|
||||
));
|
||||
$this->info['z-index'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_Integer(),
|
||||
new HTMLPurifier_AttrDef_Enum(array('auto')),
|
||||
));
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs extra config-based processing. Based off of
|
||||
@@ -272,20 +299,29 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
|
||||
// setup allowed elements
|
||||
$support = "(for information on implementing this, see the ".
|
||||
"support forums) ";
|
||||
$allowed_attributes = $config->get('CSS', 'AllowedProperties');
|
||||
if ($allowed_attributes !== null) {
|
||||
$allowed_properties = $config->get('CSS.AllowedProperties');
|
||||
if ($allowed_properties !== null) {
|
||||
foreach ($this->info as $name => $d) {
|
||||
if(!isset($allowed_attributes[$name])) unset($this->info[$name]);
|
||||
unset($allowed_attributes[$name]);
|
||||
if(!isset($allowed_properties[$name])) unset($this->info[$name]);
|
||||
unset($allowed_properties[$name]);
|
||||
}
|
||||
// emit errors
|
||||
foreach ($allowed_attributes as $name => $d) {
|
||||
foreach ($allowed_properties as $name => $d) {
|
||||
// :TODO: Is this htmlspecialchars() call really necessary?
|
||||
$name = htmlspecialchars($name);
|
||||
trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING);
|
||||
}
|
||||
}
|
||||
|
||||
$forbidden_properties = $config->get('CSS.ForbiddenProperties');
|
||||
if ($forbidden_properties !== null) {
|
||||
foreach ($this->info as $name => $d) {
|
||||
if (isset($forbidden_properties[$name])) {
|
||||
unset($this->info[$name]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
120
library/HTMLPurifier/ChildDef/List.php
Normal file
120
library/HTMLPurifier/ChildDef/List.php
Normal file
@@ -0,0 +1,120 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Definition for list containers ul and ol.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
|
||||
{
|
||||
public $type = 'list';
|
||||
// lying a little bit, so that we can handle ul and ol ourselves
|
||||
// XXX: This whole business with 'wrap' is all a bit unsatisfactory
|
||||
public $elements = array('li' => true, 'ul' => true, 'ol' => true);
|
||||
public function validateChildren($tokens_of_children, $config, $context) {
|
||||
// Flag for subclasses
|
||||
$this->whitespace = false;
|
||||
|
||||
// if there are no tokens, delete parent node
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
// the new set of children
|
||||
$result = array();
|
||||
|
||||
// current depth into the nest
|
||||
$nesting = 0;
|
||||
|
||||
// a little sanity check to make sure it's not ALL whitespace
|
||||
$all_whitespace = true;
|
||||
|
||||
$seen_li = false;
|
||||
$need_close_li = false;
|
||||
|
||||
foreach ($tokens_of_children as $token) {
|
||||
if (!empty($token->is_whitespace)) {
|
||||
$result[] = $token;
|
||||
continue;
|
||||
}
|
||||
$all_whitespace = false; // phew, we're not talking about whitespace
|
||||
|
||||
if ($nesting == 1 && $need_close_li) {
|
||||
$result[] = new HTMLPurifier_Token_End('li');
|
||||
$nesting--;
|
||||
$need_close_li = false;
|
||||
}
|
||||
|
||||
$is_child = ($nesting == 0);
|
||||
|
||||
if ($token instanceof HTMLPurifier_Token_Start) {
|
||||
$nesting++;
|
||||
} elseif ($token instanceof HTMLPurifier_Token_End) {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
if ($is_child) {
|
||||
if ($token->name === 'li') {
|
||||
// good
|
||||
$seen_li = true;
|
||||
} elseif ($token->name === 'ul' || $token->name === 'ol') {
|
||||
// we want to tuck this into the previous li
|
||||
$need_close_li = true;
|
||||
$nesting++;
|
||||
if (!$seen_li) {
|
||||
// create a new li element
|
||||
$result[] = new HTMLPurifier_Token_Start('li');
|
||||
} else {
|
||||
// backtrack until </li> found
|
||||
while(true) {
|
||||
$t = array_pop($result);
|
||||
if ($t instanceof HTMLPurifier_Token_End) {
|
||||
// XXX actually, these invariants could very plausibly be violated
|
||||
// if we are doing silly things with modifying the set of allowed elements.
|
||||
// FORTUNATELY, it doesn't make a difference, since the allowed
|
||||
// elements are hard-coded here!
|
||||
if ($t->name !== 'li') {
|
||||
trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
} elseif ($t instanceof HTMLPurifier_Token_Empty) { // bleagh
|
||||
if ($t->name !== 'li') {
|
||||
trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
|
||||
return false;
|
||||
}
|
||||
// XXX this should have a helper for it...
|
||||
$result[] = new HTMLPurifier_Token_Start('li', $t->attr, $t->line, $t->col, $t->armor);
|
||||
break;
|
||||
} else {
|
||||
if (!$t->is_whitespace) {
|
||||
trigger_error("Only whitespace present invariant violated in List ChildDef", E_USER_ERROR);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// start wrapping (this doesn't precisely mimic
|
||||
// browser behavior, but what browsers do is kind of
|
||||
// hard to mimic in a standards compliant way
|
||||
// XXX Actually, this has no impact in practice,
|
||||
// because this gets handled earlier. Arguably,
|
||||
// we should rip out all of that processing
|
||||
$result[] = new HTMLPurifier_Token_Start('li');
|
||||
$nesting++;
|
||||
$seen_li = true;
|
||||
$need_close_li = true;
|
||||
}
|
||||
}
|
||||
$result[] = $token;
|
||||
}
|
||||
if ($need_close_li) {
|
||||
$result[] = new HTMLPurifier_Token_End('li');
|
||||
}
|
||||
if (empty($result)) return false;
|
||||
if ($all_whitespace) {
|
||||
return false;
|
||||
}
|
||||
if ($tokens_of_children == $result) return true;
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
@@ -59,7 +59,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
|
||||
$all_whitespace = true;
|
||||
|
||||
// some configuration
|
||||
$escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren');
|
||||
$escape_invalid_children = $config->get('Core.EscapeInvalidChildren');
|
||||
|
||||
// generator
|
||||
$gen = new HTMLPurifier_Generator($config, $context);
|
||||
|
@@ -1,7 +1,33 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Definition for tables
|
||||
* Definition for tables. The general idea is to extract out all of the
|
||||
* essential bits, and then reconstruct it later.
|
||||
*
|
||||
* This is a bit confusing, because the DTDs and the W3C
|
||||
* validators seem to disagree on the appropriate definition. The
|
||||
* DTD claims:
|
||||
*
|
||||
* (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)
|
||||
*
|
||||
* But actually, the HTML4 spec then has this to say:
|
||||
*
|
||||
* The TBODY start tag is always required except when the table
|
||||
* contains only one table body and no table head or foot sections.
|
||||
* The TBODY end tag may always be safely omitted.
|
||||
*
|
||||
* So the DTD is kind of wrong. The validator is, unfortunately, kind
|
||||
* of on crack.
|
||||
*
|
||||
* The definition changed again in XHTML1.1; and in my opinion, this
|
||||
* formulation makes the most sense.
|
||||
*
|
||||
* caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))
|
||||
*
|
||||
* Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode.
|
||||
* If we encounter a thead, tfoot or tbody, we are placed in the former
|
||||
* mode, and we *must* wrap any stray tr segments with a tbody. But if
|
||||
* we don't run into any of them, just have tr tags is OK.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||
{
|
||||
@@ -33,6 +59,8 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||
$collection = array(); // collected nodes
|
||||
$tag_index = 0; // the first node might be whitespace,
|
||||
// so this tells us where the start tag is
|
||||
$tbody_mode = false; // if true, then we need to wrap any stray
|
||||
// <tr>s with a <tbody>.
|
||||
|
||||
foreach ($tokens_of_children as $token) {
|
||||
$is_child = ($nesting == 0);
|
||||
@@ -51,8 +79,9 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||
// okay, let's stash the tokens away
|
||||
// first token tells us the type of the collection
|
||||
switch ($collection[$tag_index]->name) {
|
||||
case 'tr':
|
||||
case 'tbody':
|
||||
$tbody_mode = true;
|
||||
case 'tr':
|
||||
$content[] = $collection;
|
||||
break;
|
||||
case 'caption':
|
||||
@@ -61,13 +90,28 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||
break;
|
||||
case 'thead':
|
||||
case 'tfoot':
|
||||
$tbody_mode = true;
|
||||
// XXX This breaks rendering properties with
|
||||
// Firefox, which never floats a <thead> to
|
||||
// the top. Ever. (Our scheme will float the
|
||||
// first <thead> to the top.) So maybe
|
||||
// <thead>s that are not first should be
|
||||
// turned into <tbody>? Very tricky, indeed.
|
||||
|
||||
// access the appropriate variable, $thead or $tfoot
|
||||
$var = $collection[$tag_index]->name;
|
||||
if ($$var === false) {
|
||||
$$var = $collection;
|
||||
} else {
|
||||
// transmutate the first and less entries into
|
||||
// tbody tags, and then put into content
|
||||
// Oops, there's a second one! What
|
||||
// should we do? Current behavior is to
|
||||
// transmutate the first and last entries into
|
||||
// tbody tags, and then put into content.
|
||||
// Maybe a better idea is to *attach
|
||||
// it* to the existing thead or tfoot?
|
||||
// We don't do this, because Firefox
|
||||
// doesn't float an extra tfoot to the
|
||||
// bottom like it does for the first one.
|
||||
$collection[$tag_index]->name = 'tbody';
|
||||
$collection[count($collection)-1]->name = 'tbody';
|
||||
$content[] = $collection;
|
||||
@@ -126,7 +170,48 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
|
||||
if ($thead !== false) $ret = array_merge($ret, $thead);
|
||||
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
|
||||
foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
|
||||
|
||||
if ($tbody_mode) {
|
||||
// a little tricky, since the start of the collection may be
|
||||
// whitespace
|
||||
$inside_tbody = false;
|
||||
foreach ($content as $token_array) {
|
||||
// find the starting token
|
||||
foreach ($token_array as $t) {
|
||||
if ($t->name === 'tr' || $t->name === 'tbody') {
|
||||
break;
|
||||
}
|
||||
} // iterator variable carries over
|
||||
if ($t->name === 'tr') {
|
||||
if ($inside_tbody) {
|
||||
$ret = array_merge($ret, $token_array);
|
||||
} else {
|
||||
$ret[] = new HTMLPurifier_Token_Start('tbody');
|
||||
$ret = array_merge($ret, $token_array);
|
||||
$inside_tbody = true;
|
||||
}
|
||||
} elseif ($t->name === 'tbody') {
|
||||
if ($inside_tbody) {
|
||||
$ret[] = new HTMLPurifier_Token_End('tbody');
|
||||
$inside_tbody = false;
|
||||
$ret = array_merge($ret, $token_array);
|
||||
} else {
|
||||
$ret = array_merge($ret, $token_array);
|
||||
}
|
||||
} else {
|
||||
trigger_error("tr/tbody in content invariant failed in Table ChildDef", E_USER_ERROR);
|
||||
}
|
||||
}
|
||||
if ($inside_tbody) {
|
||||
$ret[] = new HTMLPurifier_Token_End('tbody');
|
||||
}
|
||||
} else {
|
||||
foreach ($content as $token_array) {
|
||||
// invariant: everything in here is <tr>s
|
||||
$ret = array_merge($ret, $token_array);
|
||||
}
|
||||
}
|
||||
|
||||
if (!empty($collection) && $is_collecting == false){
|
||||
// grab the trailing space
|
||||
$ret = array_merge($ret, $collection);
|
||||
|
@@ -20,7 +20,7 @@ class HTMLPurifier_Config
|
||||
/**
|
||||
* HTML Purifier's version
|
||||
*/
|
||||
public $version = '3.3.0';
|
||||
public $version = '4.5.0';
|
||||
|
||||
/**
|
||||
* Bool indicator whether or not to automatically finalize
|
||||
@@ -44,7 +44,7 @@ class HTMLPurifier_Config
|
||||
/**
|
||||
* Parser for variables
|
||||
*/
|
||||
protected $parser;
|
||||
protected $parser = null;
|
||||
|
||||
/**
|
||||
* Reference HTMLPurifier_ConfigSchema for value checking
|
||||
@@ -68,12 +68,31 @@ class HTMLPurifier_Config
|
||||
*/
|
||||
protected $plist;
|
||||
|
||||
/**
|
||||
* Whether or not a set is taking place due to an
|
||||
* alias lookup.
|
||||
*/
|
||||
private $aliasMode;
|
||||
|
||||
/**
|
||||
* Set to false if you do not want line and file numbers in errors
|
||||
* (useful when unit testing). This will also compress some errors
|
||||
* and exceptions.
|
||||
*/
|
||||
public $chatty = true;
|
||||
|
||||
/**
|
||||
* Current lock; only gets to this namespace are allowed.
|
||||
*/
|
||||
private $lock;
|
||||
|
||||
/**
|
||||
* @param $definition HTMLPurifier_ConfigSchema that defines what directives
|
||||
* are allowed.
|
||||
*/
|
||||
public function __construct($definition) {
|
||||
$this->plist = new HTMLPurifier_PropertyList($definition->defaultPlist);
|
||||
public function __construct($definition, $parent = null) {
|
||||
$parent = $parent ? $parent : $definition->defaultPlist;
|
||||
$this->plist = new HTMLPurifier_PropertyList($parent);
|
||||
$this->def = $definition; // keep a copy around for checking
|
||||
$this->parser = new HTMLPurifier_VarParser_Flexible();
|
||||
}
|
||||
@@ -102,6 +121,16 @@ class HTMLPurifier_Config
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new config object that inherits from a previous one.
|
||||
* @param HTMLPurifier_Config $config Configuration object to inherit
|
||||
* from.
|
||||
* @return HTMLPurifier_Config object with $config as its parent.
|
||||
*/
|
||||
public static function inherit(HTMLPurifier_Config $config) {
|
||||
return new HTMLPurifier_Config($config->def, $config->plist);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience constructor that creates a default configuration object.
|
||||
* @return Default HTMLPurifier_Config object.
|
||||
@@ -114,24 +143,34 @@ class HTMLPurifier_Config
|
||||
|
||||
/**
|
||||
* Retreives a value from the configuration.
|
||||
* @param $namespace String namespace
|
||||
* @param $key String key
|
||||
*/
|
||||
public function get($namespace, $key) {
|
||||
if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true);
|
||||
if (!isset($this->def->info[$namespace][$key])) {
|
||||
public function get($key, $a = null) {
|
||||
if ($a !== null) {
|
||||
$this->triggerError("Using deprecated API: use \$config->get('$key.$a') instead", E_USER_WARNING);
|
||||
$key = "$key.$a";
|
||||
}
|
||||
if (!$this->finalized) $this->autoFinalize();
|
||||
if (!isset($this->def->info[$key])) {
|
||||
// can't add % due to SimpleTest bug
|
||||
trigger_error('Cannot retrieve value of undefined directive ' . htmlspecialchars("$namespace.$key"),
|
||||
$this->triggerError('Cannot retrieve value of undefined directive ' . htmlspecialchars($key),
|
||||
E_USER_WARNING);
|
||||
return;
|
||||
}
|
||||
if (isset($this->def->info[$namespace][$key]->isAlias)) {
|
||||
$d = $this->def->info[$namespace][$key];
|
||||
trigger_error('Cannot get value from aliased directive, use real name ' . $d->namespace . '.' . $d->name,
|
||||
if (isset($this->def->info[$key]->isAlias)) {
|
||||
$d = $this->def->info[$key];
|
||||
$this->triggerError('Cannot get value from aliased directive, use real name ' . $d->key,
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
return $this->plist->get("$namespace.$key");
|
||||
if ($this->lock) {
|
||||
list($ns) = explode('.', $key);
|
||||
if ($ns !== $this->lock) {
|
||||
$this->triggerError('Cannot get value of namespace ' . $ns . ' when lock for ' . $this->lock . ' is active, this probably indicates a Definition setup method is accessing directives that are not within its namespace', E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
}
|
||||
return $this->plist->get($key);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -139,18 +178,18 @@ class HTMLPurifier_Config
|
||||
* @param $namespace String namespace
|
||||
*/
|
||||
public function getBatch($namespace) {
|
||||
if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true);
|
||||
if (!isset($this->def->info[$namespace])) {
|
||||
trigger_error('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace),
|
||||
if (!$this->finalized) $this->autoFinalize();
|
||||
$full = $this->getAll();
|
||||
if (!isset($full[$namespace])) {
|
||||
$this->triggerError('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace),
|
||||
E_USER_WARNING);
|
||||
return;
|
||||
}
|
||||
$full = $this->getAll();
|
||||
return $full[$namespace];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a md5 signature of a segment of the configuration object
|
||||
* Returns a SHA-1 signature of a segment of the configuration object
|
||||
* that uniquely identifies that particular configuration
|
||||
* @note Revision is handled specially and is removed from the batch
|
||||
* before processing!
|
||||
@@ -160,27 +199,28 @@ class HTMLPurifier_Config
|
||||
if (empty($this->serials[$namespace])) {
|
||||
$batch = $this->getBatch($namespace);
|
||||
unset($batch['DefinitionRev']);
|
||||
$this->serials[$namespace] = md5(serialize($batch));
|
||||
$this->serials[$namespace] = sha1(serialize($batch));
|
||||
}
|
||||
return $this->serials[$namespace];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a md5 signature for the entire configuration object
|
||||
* Returns a SHA-1 signature for the entire configuration object
|
||||
* that uniquely identifies that particular configuration
|
||||
*/
|
||||
public function getSerial() {
|
||||
if (empty($this->serial)) {
|
||||
$this->serial = md5(serialize($this->getAll()));
|
||||
$this->serial = sha1(serialize($this->getAll()));
|
||||
}
|
||||
return $this->serial;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves all directives, organized by namespace
|
||||
* @warning This is a pretty inefficient function, avoid if you can
|
||||
*/
|
||||
public function getAll() {
|
||||
if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true);
|
||||
if (!$this->finalized) $this->autoFinalize();
|
||||
$ret = array();
|
||||
foreach ($this->plist->squash() as $name => $value) {
|
||||
list($ns, $key) = explode('.', $name, 2);
|
||||
@@ -191,29 +231,37 @@ class HTMLPurifier_Config
|
||||
|
||||
/**
|
||||
* Sets a value to configuration.
|
||||
* @param $namespace String namespace
|
||||
* @param $key String key
|
||||
* @param $value Mixed value
|
||||
*/
|
||||
public function set($namespace, $key, $value, $from_alias = false) {
|
||||
public function set($key, $value, $a = null) {
|
||||
if (strpos($key, '.') === false) {
|
||||
$namespace = $key;
|
||||
$directive = $value;
|
||||
$value = $a;
|
||||
$key = "$key.$directive";
|
||||
$this->triggerError("Using deprecated API: use \$config->set('$key', ...) instead", E_USER_NOTICE);
|
||||
} else {
|
||||
list($namespace) = explode('.', $key);
|
||||
}
|
||||
if ($this->isFinalized('Cannot set directive after finalization')) return;
|
||||
if (!isset($this->def->info[$namespace][$key])) {
|
||||
trigger_error('Cannot set undefined directive ' . htmlspecialchars("$namespace.$key") . ' to value',
|
||||
if (!isset($this->def->info[$key])) {
|
||||
$this->triggerError('Cannot set undefined directive ' . htmlspecialchars($key) . ' to value',
|
||||
E_USER_WARNING);
|
||||
return;
|
||||
}
|
||||
$def = $this->def->info[$namespace][$key];
|
||||
$def = $this->def->info[$key];
|
||||
|
||||
if (isset($def->isAlias)) {
|
||||
if ($from_alias) {
|
||||
trigger_error('Double-aliases not allowed, please fix '.
|
||||
'ConfigSchema bug with' . "$namespace.$key", E_USER_ERROR);
|
||||
if ($this->aliasMode) {
|
||||
$this->triggerError('Double-aliases not allowed, please fix '.
|
||||
'ConfigSchema bug with' . $key, E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
$this->set($new_ns = $def->namespace,
|
||||
$new_dir = $def->name,
|
||||
$value, true);
|
||||
trigger_error("$namespace.$key is an alias, preferred directive name is $new_ns.$new_dir", E_USER_NOTICE);
|
||||
$this->aliasMode = true;
|
||||
$this->set($def->key, $value);
|
||||
$this->aliasMode = false;
|
||||
$this->triggerError("$key is an alias, preferred directive name is {$def->key}", E_USER_NOTICE);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -231,7 +279,7 @@ class HTMLPurifier_Config
|
||||
try {
|
||||
$value = $this->parser->parse($value, $type, $allow_null);
|
||||
} catch (HTMLPurifier_VarParserException $e) {
|
||||
trigger_error('Value for ' . "$namespace.$key" . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING);
|
||||
$this->triggerError('Value for ' . $key . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING);
|
||||
return;
|
||||
}
|
||||
if (is_string($value) && is_object($def)) {
|
||||
@@ -241,17 +289,17 @@ class HTMLPurifier_Config
|
||||
}
|
||||
// check to see if the value is allowed
|
||||
if (isset($def->allowed) && !isset($def->allowed[$value])) {
|
||||
trigger_error('Value not supported, valid values are: ' .
|
||||
$this->triggerError('Value not supported, valid values are: ' .
|
||||
$this->_listify($def->allowed), E_USER_WARNING);
|
||||
return;
|
||||
}
|
||||
}
|
||||
$this->plist->set("$namespace.$key", $value);
|
||||
$this->plist->set($key, $value);
|
||||
|
||||
// reset definitions if the directives they depend on changed
|
||||
// this is a very costly process, so it's discouraged
|
||||
// with finalization
|
||||
if ($namespace == 'HTML' || $namespace == 'CSS') {
|
||||
if ($namespace == 'HTML' || $namespace == 'CSS' || $namespace == 'URI') {
|
||||
$this->definitions[$namespace] = null;
|
||||
}
|
||||
|
||||
@@ -271,74 +319,203 @@ class HTMLPurifier_Config
|
||||
* Retrieves object reference to the HTML definition.
|
||||
* @param $raw Return a copy that has not been setup yet. Must be
|
||||
* called before it's been setup, otherwise won't work.
|
||||
* @param $optimized If true, this method may return null, to
|
||||
* indicate that a cached version of the modified
|
||||
* definition object is available and no further edits
|
||||
* are necessary. Consider using
|
||||
* maybeGetRawHTMLDefinition, which is more explicitly
|
||||
* named, instead.
|
||||
*/
|
||||
public function getHTMLDefinition($raw = false) {
|
||||
return $this->getDefinition('HTML', $raw);
|
||||
public function getHTMLDefinition($raw = false, $optimized = false) {
|
||||
return $this->getDefinition('HTML', $raw, $optimized);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves object reference to the CSS definition
|
||||
* @param $raw Return a copy that has not been setup yet. Must be
|
||||
* called before it's been setup, otherwise won't work.
|
||||
* @param $optimized If true, this method may return null, to
|
||||
* indicate that a cached version of the modified
|
||||
* definition object is available and no further edits
|
||||
* are necessary. Consider using
|
||||
* maybeGetRawCSSDefinition, which is more explicitly
|
||||
* named, instead.
|
||||
*/
|
||||
public function getCSSDefinition($raw = false) {
|
||||
return $this->getDefinition('CSS', $raw);
|
||||
public function getCSSDefinition($raw = false, $optimized = false) {
|
||||
return $this->getDefinition('CSS', $raw, $optimized);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves object reference to the URI definition
|
||||
* @param $raw Return a copy that has not been setup yet. Must be
|
||||
* called before it's been setup, otherwise won't work.
|
||||
* @param $optimized If true, this method may return null, to
|
||||
* indicate that a cached version of the modified
|
||||
* definition object is available and no further edits
|
||||
* are necessary. Consider using
|
||||
* maybeGetRawURIDefinition, which is more explicitly
|
||||
* named, instead.
|
||||
*/
|
||||
public function getURIDefinition($raw = false, $optimized = false) {
|
||||
return $this->getDefinition('URI', $raw, $optimized);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a definition
|
||||
* @param $type Type of definition: HTML, CSS, etc
|
||||
* @param $raw Whether or not definition should be returned raw
|
||||
* @param $optimized Only has an effect when $raw is true. Whether
|
||||
* or not to return null if the result is already present in
|
||||
* the cache. This is off by default for backwards
|
||||
* compatibility reasons, but you need to do things this
|
||||
* way in order to ensure that caching is done properly.
|
||||
* Check out enduser-customize.html for more details.
|
||||
* We probably won't ever change this default, as much as the
|
||||
* maybe semantics is the "right thing to do."
|
||||
*/
|
||||
public function getDefinition($type, $raw = false) {
|
||||
if (!$this->finalized) $this->autoFinalize ? $this->finalize() : $this->plist->squash(true);
|
||||
public function getDefinition($type, $raw = false, $optimized = false) {
|
||||
if ($optimized && !$raw) {
|
||||
throw new HTMLPurifier_Exception("Cannot set optimized = true when raw = false");
|
||||
}
|
||||
if (!$this->finalized) $this->autoFinalize();
|
||||
// temporarily suspend locks, so we can handle recursive definition calls
|
||||
$lock = $this->lock;
|
||||
$this->lock = null;
|
||||
$factory = HTMLPurifier_DefinitionCacheFactory::instance();
|
||||
$cache = $factory->create($type, $this);
|
||||
$this->lock = $lock;
|
||||
if (!$raw) {
|
||||
// see if we can quickly supply a definition
|
||||
// full definition
|
||||
// ---------------
|
||||
// check if definition is in memory
|
||||
if (!empty($this->definitions[$type])) {
|
||||
if (!$this->definitions[$type]->setup) {
|
||||
$this->definitions[$type]->setup($this);
|
||||
$cache->set($this->definitions[$type], $this);
|
||||
$def = $this->definitions[$type];
|
||||
// check if the definition is setup
|
||||
if ($def->setup) {
|
||||
return $def;
|
||||
} else {
|
||||
$def->setup($this);
|
||||
if ($def->optimized) $cache->add($def, $this);
|
||||
return $def;
|
||||
}
|
||||
return $this->definitions[$type];
|
||||
}
|
||||
// memory check missed, try cache
|
||||
$this->definitions[$type] = $cache->get($this);
|
||||
if ($this->definitions[$type]) {
|
||||
// definition in cache, return it
|
||||
return $this->definitions[$type];
|
||||
// check if definition is in cache
|
||||
$def = $cache->get($this);
|
||||
if ($def) {
|
||||
// definition in cache, save to memory and return it
|
||||
$this->definitions[$type] = $def;
|
||||
return $def;
|
||||
}
|
||||
} elseif (
|
||||
!empty($this->definitions[$type]) &&
|
||||
!$this->definitions[$type]->setup
|
||||
) {
|
||||
// raw requested, raw in memory, quick return
|
||||
return $this->definitions[$type];
|
||||
// initialize it
|
||||
$def = $this->initDefinition($type);
|
||||
// set it up
|
||||
$this->lock = $type;
|
||||
$def->setup($this);
|
||||
$this->lock = null;
|
||||
// save in cache
|
||||
$cache->add($def, $this);
|
||||
// return it
|
||||
return $def;
|
||||
} else {
|
||||
// raw definition
|
||||
// --------------
|
||||
// check preconditions
|
||||
$def = null;
|
||||
if ($optimized) {
|
||||
if (is_null($this->get($type . '.DefinitionID'))) {
|
||||
// fatally error out if definition ID not set
|
||||
throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID");
|
||||
}
|
||||
}
|
||||
if (!empty($this->definitions[$type])) {
|
||||
$def = $this->definitions[$type];
|
||||
if ($def->setup && !$optimized) {
|
||||
$extra = $this->chatty ? " (try moving this code block earlier in your initialization)" : "";
|
||||
throw new HTMLPurifier_Exception("Cannot retrieve raw definition after it has already been setup" . $extra);
|
||||
}
|
||||
if ($def->optimized === null) {
|
||||
$extra = $this->chatty ? " (try flushing your cache)" : "";
|
||||
throw new HTMLPurifier_Exception("Optimization status of definition is unknown" . $extra);
|
||||
}
|
||||
if ($def->optimized !== $optimized) {
|
||||
$msg = $optimized ? "optimized" : "unoptimized";
|
||||
$extra = $this->chatty ? " (this backtrace is for the first inconsistent call, which was for a $msg raw definition)" : "";
|
||||
throw new HTMLPurifier_Exception("Inconsistent use of optimized and unoptimized raw definition retrievals" . $extra);
|
||||
}
|
||||
}
|
||||
// check if definition was in memory
|
||||
if ($def) {
|
||||
if ($def->setup) {
|
||||
// invariant: $optimized === true (checked above)
|
||||
return null;
|
||||
} else {
|
||||
return $def;
|
||||
}
|
||||
}
|
||||
// if optimized, check if definition was in cache
|
||||
// (because we do the memory check first, this formulation
|
||||
// is prone to cache slamming, but I think
|
||||
// guaranteeing that either /all/ of the raw
|
||||
// setup code or /none/ of it is run is more important.)
|
||||
if ($optimized) {
|
||||
// This code path only gets run once; once we put
|
||||
// something in $definitions (which is guaranteed by the
|
||||
// trailing code), we always short-circuit above.
|
||||
$def = $cache->get($this);
|
||||
if ($def) {
|
||||
// save the full definition for later, but don't
|
||||
// return it yet
|
||||
$this->definitions[$type] = $def;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
// check invariants for creation
|
||||
if (!$optimized) {
|
||||
if (!is_null($this->get($type . '.DefinitionID'))) {
|
||||
if ($this->chatty) {
|
||||
$this->triggerError("Due to a documentation error in previous version of HTML Purifier, your definitions are not being cached. If this is OK, you can remove the %$type.DefinitionRev and %$type.DefinitionID declaration. Otherwise, modify your code to use maybeGetRawDefinition, and test if the returned value is null before making any edits (if it is null, that means that a cached version is available, and no raw operations are necessary). See <a href='http://htmlpurifier.org/docs/enduser-customize.html#optimized'>Customize</a> for more details", E_USER_WARNING);
|
||||
} else {
|
||||
$this->triggerError("Useless DefinitionID declaration", E_USER_WARNING);
|
||||
}
|
||||
}
|
||||
}
|
||||
// initialize it
|
||||
$def = $this->initDefinition($type);
|
||||
$def->optimized = $optimized;
|
||||
return $def;
|
||||
}
|
||||
throw new HTMLPurifier_Exception("The impossible happened!");
|
||||
}
|
||||
|
||||
private function initDefinition($type) {
|
||||
// quick checks failed, let's create the object
|
||||
if ($type == 'HTML') {
|
||||
$this->definitions[$type] = new HTMLPurifier_HTMLDefinition();
|
||||
$def = new HTMLPurifier_HTMLDefinition();
|
||||
} elseif ($type == 'CSS') {
|
||||
$this->definitions[$type] = new HTMLPurifier_CSSDefinition();
|
||||
$def = new HTMLPurifier_CSSDefinition();
|
||||
} elseif ($type == 'URI') {
|
||||
$this->definitions[$type] = new HTMLPurifier_URIDefinition();
|
||||
$def = new HTMLPurifier_URIDefinition();
|
||||
} else {
|
||||
throw new HTMLPurifier_Exception("Definition of $type type not supported");
|
||||
}
|
||||
// quick abort if raw
|
||||
if ($raw) {
|
||||
if (is_null($this->get($type, 'DefinitionID'))) {
|
||||
// fatally error out if definition ID not set
|
||||
throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID");
|
||||
}
|
||||
return $this->definitions[$type];
|
||||
}
|
||||
// set it up
|
||||
$this->definitions[$type]->setup($this);
|
||||
// save in cache
|
||||
$cache->set($this->definitions[$type], $this);
|
||||
return $this->definitions[$type];
|
||||
$this->definitions[$type] = $def;
|
||||
return $def;
|
||||
}
|
||||
|
||||
public function maybeGetRawDefinition($name) {
|
||||
return $this->getDefinition($name, true, true);
|
||||
}
|
||||
|
||||
public function maybeGetRawHTMLDefinition() {
|
||||
return $this->getDefinition('HTML', true, true);
|
||||
}
|
||||
|
||||
public function maybeGetRawCSSDefinition() {
|
||||
return $this->getDefinition('CSS', true, true);
|
||||
}
|
||||
|
||||
public function maybeGetRawURIDefinition() {
|
||||
return $this->getDefinition('URI', true, true);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -351,14 +528,12 @@ class HTMLPurifier_Config
|
||||
foreach ($config_array as $key => $value) {
|
||||
$key = str_replace('_', '.', $key);
|
||||
if (strpos($key, '.') !== false) {
|
||||
// condensed form
|
||||
list($namespace, $directive) = explode('.', $key);
|
||||
$this->set($namespace, $directive, $value);
|
||||
$this->set($key, $value);
|
||||
} else {
|
||||
$namespace = $key;
|
||||
$namespace_values = $value;
|
||||
foreach ($namespace_values as $directive => $value) {
|
||||
$this->set($namespace, $directive, $value);
|
||||
$this->set($namespace .'.'. $directive, $value);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -394,16 +569,15 @@ class HTMLPurifier_Config
|
||||
}
|
||||
}
|
||||
$ret = array();
|
||||
foreach ($schema->info as $ns => $keypairs) {
|
||||
foreach ($keypairs as $directive => $def) {
|
||||
if ($allowed !== true) {
|
||||
if (isset($blacklisted_directives["$ns.$directive"])) continue;
|
||||
if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue;
|
||||
}
|
||||
if (isset($def->isAlias)) continue;
|
||||
if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue;
|
||||
$ret[] = array($ns, $directive);
|
||||
foreach ($schema->info as $key => $def) {
|
||||
list($ns, $directive) = explode('.', $key, 2);
|
||||
if ($allowed !== true) {
|
||||
if (isset($blacklisted_directives["$ns.$directive"])) continue;
|
||||
if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue;
|
||||
}
|
||||
if (isset($def->isAlias)) continue;
|
||||
if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue;
|
||||
$ret[] = array($ns, $directive);
|
||||
}
|
||||
return $ret;
|
||||
}
|
||||
@@ -472,7 +646,7 @@ class HTMLPurifier_Config
|
||||
*/
|
||||
public function isFinalized($error = false) {
|
||||
if ($this->finalized && $error) {
|
||||
trigger_error($error, E_USER_ERROR);
|
||||
$this->triggerError($error, E_USER_ERROR);
|
||||
}
|
||||
return $this->finalized;
|
||||
}
|
||||
@@ -482,7 +656,11 @@ class HTMLPurifier_Config
|
||||
* already finalized
|
||||
*/
|
||||
public function autoFinalize() {
|
||||
if (!$this->finalized && $this->autoFinalize) $this->finalize();
|
||||
if ($this->autoFinalize) {
|
||||
$this->finalize();
|
||||
} else {
|
||||
$this->plist->squash(true);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -490,6 +668,41 @@ class HTMLPurifier_Config
|
||||
*/
|
||||
public function finalize() {
|
||||
$this->finalized = true;
|
||||
$this->parser = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Produces a nicely formatted error message by supplying the
|
||||
* stack frame information OUTSIDE of HTMLPurifier_Config.
|
||||
*/
|
||||
protected function triggerError($msg, $no) {
|
||||
// determine previous stack frame
|
||||
$extra = '';
|
||||
if ($this->chatty) {
|
||||
$trace = debug_backtrace();
|
||||
// zip(tail(trace), trace) -- but PHP is not Haskell har har
|
||||
for ($i = 0, $c = count($trace); $i < $c - 1; $i++) {
|
||||
// XXX this is not correct on some versions of HTML Purifier
|
||||
if ($trace[$i + 1]['class'] === 'HTMLPurifier_Config') {
|
||||
continue;
|
||||
}
|
||||
$frame = $trace[$i];
|
||||
$extra = " invoked on line {$frame['line']} in file {$frame['file']}";
|
||||
break;
|
||||
}
|
||||
}
|
||||
trigger_error($msg . $extra, $no);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a serialized form of the configuration object that can
|
||||
* be reconstituted.
|
||||
*/
|
||||
public function serialize() {
|
||||
$this->getDefinition('HTML');
|
||||
$this->getDefinition('CSS');
|
||||
$this->getDefinition('URI');
|
||||
return serialize($this);
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -60,7 +60,13 @@ class HTMLPurifier_ConfigSchema {
|
||||
* Unserializes the default ConfigSchema.
|
||||
*/
|
||||
public static function makeFromSerial() {
|
||||
return unserialize(file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser'));
|
||||
$contents = file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser');
|
||||
$r = unserialize($contents);
|
||||
if (!$r) {
|
||||
$hash = sha1($contents);
|
||||
trigger_error("Unserialization of configuration schema failed, sha1 of file was $hash", E_USER_ERROR);
|
||||
}
|
||||
return $r;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -87,24 +93,13 @@ class HTMLPurifier_ConfigSchema {
|
||||
* HTMLPurifier_DirectiveDef::$type for allowed values
|
||||
* @param $allow_null Whether or not to allow null values
|
||||
*/
|
||||
public function add($namespace, $name, $default, $type, $allow_null) {
|
||||
public function add($key, $default, $type, $allow_null) {
|
||||
$obj = new stdclass();
|
||||
$obj->type = is_int($type) ? $type : HTMLPurifier_VarParser::$types[$type];
|
||||
if ($allow_null) $obj->allow_null = true;
|
||||
$this->info[$namespace][$name] = $obj;
|
||||
$this->defaults[$namespace][$name] = $default;
|
||||
$this->defaultPlist->set("$namespace.$name", $default);
|
||||
}
|
||||
|
||||
/**
|
||||
* Defines a namespace for directives to be put into.
|
||||
* @warning This is slightly different from the corresponding static
|
||||
* method.
|
||||
* @param $namespace Namespace's name
|
||||
*/
|
||||
public function addNamespace($namespace) {
|
||||
$this->info[$namespace] = array();
|
||||
$this->defaults[$namespace] = array();
|
||||
$this->info[$key] = $obj;
|
||||
$this->defaults[$key] = $default;
|
||||
$this->defaultPlist->set($key, $default);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -116,12 +111,12 @@ class HTMLPurifier_ConfigSchema {
|
||||
* @param $name Name of Directive
|
||||
* @param $aliases Hash of aliased values to the real alias
|
||||
*/
|
||||
public function addValueAliases($namespace, $name, $aliases) {
|
||||
if (!isset($this->info[$namespace][$name]->aliases)) {
|
||||
$this->info[$namespace][$name]->aliases = array();
|
||||
public function addValueAliases($key, $aliases) {
|
||||
if (!isset($this->info[$key]->aliases)) {
|
||||
$this->info[$key]->aliases = array();
|
||||
}
|
||||
foreach ($aliases as $alias => $real) {
|
||||
$this->info[$namespace][$name]->aliases[$alias] = $real;
|
||||
$this->info[$key]->aliases[$alias] = $real;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -133,8 +128,8 @@ class HTMLPurifier_ConfigSchema {
|
||||
* @param $name Name of directive
|
||||
* @param $allowed Lookup array of allowed values
|
||||
*/
|
||||
public function addAllowedValues($namespace, $name, $allowed) {
|
||||
$this->info[$namespace][$name]->allowed = $allowed;
|
||||
public function addAllowedValues($key, $allowed) {
|
||||
$this->info[$key]->allowed = $allowed;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -144,88 +139,26 @@ class HTMLPurifier_ConfigSchema {
|
||||
* @param $new_namespace
|
||||
* @param $new_name Directive that the alias will be to
|
||||
*/
|
||||
public function addAlias($namespace, $name, $new_namespace, $new_name) {
|
||||
public function addAlias($key, $new_key) {
|
||||
$obj = new stdclass;
|
||||
$obj->namespace = $new_namespace;
|
||||
$obj->name = $new_name;
|
||||
$obj->key = $new_key;
|
||||
$obj->isAlias = true;
|
||||
$this->info[$namespace][$name] = $obj;
|
||||
$this->info[$key] = $obj;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces any stdclass that only has the type property with type integer.
|
||||
*/
|
||||
public function postProcess() {
|
||||
foreach ($this->info as $namespace => $info) {
|
||||
foreach ($info as $directive => $v) {
|
||||
if (count((array) $v) == 1) {
|
||||
$this->info[$namespace][$directive] = $v->type;
|
||||
} elseif (count((array) $v) == 2 && isset($v->allow_null)) {
|
||||
$this->info[$namespace][$directive] = -$v->type;
|
||||
}
|
||||
foreach ($this->info as $key => $v) {
|
||||
if (count((array) $v) == 1) {
|
||||
$this->info[$key] = $v->type;
|
||||
} elseif (count((array) $v) == 2 && isset($v->allow_null)) {
|
||||
$this->info[$key] = -$v->type;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// DEPRECATED METHODS
|
||||
|
||||
/** @see HTMLPurifier_ConfigSchema->set() */
|
||||
public static function define($namespace, $name, $default, $type, $description) {
|
||||
HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
|
||||
$type_values = explode('/', $type, 2);
|
||||
$type = $type_values[0];
|
||||
$modifier = isset($type_values[1]) ? $type_values[1] : false;
|
||||
$allow_null = ($modifier === 'null');
|
||||
$def = HTMLPurifier_ConfigSchema::instance();
|
||||
$def->add($namespace, $name, $default, $type, $allow_null);
|
||||
}
|
||||
|
||||
/** @see HTMLPurifier_ConfigSchema->addNamespace() */
|
||||
public static function defineNamespace($namespace, $description) {
|
||||
HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
|
||||
$def = HTMLPurifier_ConfigSchema::instance();
|
||||
$def->addNamespace($namespace);
|
||||
}
|
||||
|
||||
/** @see HTMLPurifier_ConfigSchema->addValueAliases() */
|
||||
public static function defineValueAliases($namespace, $name, $aliases) {
|
||||
HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
|
||||
$def = HTMLPurifier_ConfigSchema::instance();
|
||||
$def->addValueAliases($namespace, $name, $aliases);
|
||||
}
|
||||
|
||||
/** @see HTMLPurifier_ConfigSchema->addAllowedValues() */
|
||||
public static function defineAllowedValues($namespace, $name, $allowed_values) {
|
||||
HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
|
||||
$allowed = array();
|
||||
foreach ($allowed_values as $value) {
|
||||
$allowed[$value] = true;
|
||||
}
|
||||
$def = HTMLPurifier_ConfigSchema::instance();
|
||||
$def->addAllowedValues($namespace, $name, $allowed);
|
||||
}
|
||||
|
||||
/** @see HTMLPurifier_ConfigSchema->addAlias() */
|
||||
public static function defineAlias($namespace, $name, $new_namespace, $new_name) {
|
||||
HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
|
||||
$def = HTMLPurifier_ConfigSchema::instance();
|
||||
$def->addAlias($namespace, $name, $new_namespace, $new_name);
|
||||
}
|
||||
|
||||
/** @deprecated, use HTMLPurifier_VarParser->parse() */
|
||||
public function validate($a, $b, $c = false) {
|
||||
trigger_error("HTMLPurifier_ConfigSchema->validate deprecated, use HTMLPurifier_VarParser->parse instead", E_USER_NOTICE);
|
||||
$parser = new HTMLPurifier_VarParser();
|
||||
return $parser->parse($a, $b, $c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Throws an E_USER_NOTICE stating that a method is deprecated.
|
||||
*/
|
||||
private static function deprecated($method) {
|
||||
trigger_error("Static HTMLPurifier_ConfigSchema::$method deprecated, use add*() method instead", E_USER_NOTICE);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
@@ -9,36 +9,28 @@ class HTMLPurifier_ConfigSchema_Builder_ConfigSchema
|
||||
|
||||
public function build($interchange) {
|
||||
$schema = new HTMLPurifier_ConfigSchema();
|
||||
foreach ($interchange->namespaces as $n) {
|
||||
$schema->addNamespace($n->namespace);
|
||||
}
|
||||
foreach ($interchange->directives as $d) {
|
||||
$schema->add(
|
||||
$d->id->namespace,
|
||||
$d->id->directive,
|
||||
$d->id->key,
|
||||
$d->default,
|
||||
$d->type,
|
||||
$d->typeAllowsNull
|
||||
);
|
||||
if ($d->allowed !== null) {
|
||||
$schema->addAllowedValues(
|
||||
$d->id->namespace,
|
||||
$d->id->directive,
|
||||
$d->id->key,
|
||||
$d->allowed
|
||||
);
|
||||
}
|
||||
foreach ($d->aliases as $alias) {
|
||||
$schema->addAlias(
|
||||
$alias->namespace,
|
||||
$alias->directive,
|
||||
$d->id->namespace,
|
||||
$d->id->directive
|
||||
$alias->key,
|
||||
$d->id->key
|
||||
);
|
||||
}
|
||||
if ($d->valueAliases !== null) {
|
||||
$schema->addValueAliases(
|
||||
$d->id->namespace,
|
||||
$d->id->directive,
|
||||
$d->id->key,
|
||||
$d->valueAliases
|
||||
);
|
||||
}
|
||||
|
@@ -8,6 +8,7 @@ class HTMLPurifier_ConfigSchema_Builder_Xml extends XMLWriter
|
||||
{
|
||||
|
||||
protected $interchange;
|
||||
private $namespace;
|
||||
|
||||
protected function writeHTMLDiv($html) {
|
||||
$this->startElement('div');
|
||||
@@ -34,36 +35,33 @@ class HTMLPurifier_ConfigSchema_Builder_Xml extends XMLWriter
|
||||
$this->startElement('configdoc');
|
||||
$this->writeElement('title', $interchange->name);
|
||||
|
||||
foreach ($interchange->namespaces as $namespace) {
|
||||
$this->buildNamespace($namespace);
|
||||
foreach ($interchange->directives as $directive) {
|
||||
$this->buildDirective($directive);
|
||||
}
|
||||
|
||||
if ($this->namespace) $this->endElement(); // namespace
|
||||
|
||||
$this->endElement(); // configdoc
|
||||
$this->flush();
|
||||
}
|
||||
|
||||
public function buildNamespace($namespace) {
|
||||
$this->startElement('namespace');
|
||||
$this->writeAttribute('id', $namespace->namespace);
|
||||
public function buildDirective($directive) {
|
||||
|
||||
$this->writeElement('name', $namespace->namespace);
|
||||
$this->startElement('description');
|
||||
$this->writeHTMLDiv($namespace->description);
|
||||
$this->endElement(); // description
|
||||
|
||||
foreach ($this->interchange->directives as $directive) {
|
||||
if ($directive->id->namespace !== $namespace->namespace) continue;
|
||||
$this->buildDirective($directive);
|
||||
// Kludge, although I suppose having a notion of a "root namespace"
|
||||
// certainly makes things look nicer when documentation is built.
|
||||
// Depends on things being sorted.
|
||||
if (!$this->namespace || $this->namespace !== $directive->id->getRootNamespace()) {
|
||||
if ($this->namespace) $this->endElement(); // namespace
|
||||
$this->namespace = $directive->id->getRootNamespace();
|
||||
$this->startElement('namespace');
|
||||
$this->writeAttribute('id', $this->namespace);
|
||||
$this->writeElement('name', $this->namespace);
|
||||
}
|
||||
|
||||
$this->endElement(); // namespace
|
||||
}
|
||||
|
||||
public function buildDirective($directive) {
|
||||
$this->startElement('directive');
|
||||
$this->writeAttribute('id', $directive->id->toString());
|
||||
|
||||
$this->writeElement('name', $directive->id->directive);
|
||||
$this->writeElement('name', $directive->id->getDirective());
|
||||
|
||||
$this->startElement('aliases');
|
||||
foreach ($directive->aliases as $alias) $this->writeElement('alias', $alias->toString());
|
||||
|
@@ -13,26 +13,11 @@ class HTMLPurifier_ConfigSchema_Interchange
|
||||
*/
|
||||
public $name;
|
||||
|
||||
/**
|
||||
* Array of Namespace ID => array(namespace info)
|
||||
*/
|
||||
public $namespaces = array();
|
||||
|
||||
/**
|
||||
* Array of Directive ID => array(directive info)
|
||||
*/
|
||||
public $directives = array();
|
||||
|
||||
/**
|
||||
* Adds a namespace array to $namespaces
|
||||
*/
|
||||
public function addNamespace($namespace) {
|
||||
if (isset($this->namespaces[$i = $namespace->namespace])) {
|
||||
throw new HTMLPurifier_ConfigSchema_Exception("Cannot redefine namespace '$i'");
|
||||
}
|
||||
$this->namespaces[$i] = $namespace;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a directive array to $directives
|
||||
*/
|
||||
|
@@ -6,11 +6,10 @@
|
||||
class HTMLPurifier_ConfigSchema_Interchange_Id
|
||||
{
|
||||
|
||||
public $namespace, $directive;
|
||||
public $key;
|
||||
|
||||
public function __construct($namespace, $directive) {
|
||||
$this->namespace = $namespace;
|
||||
$this->directive = $directive;
|
||||
public function __construct($key) {
|
||||
$this->key = $key;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -18,12 +17,19 @@ class HTMLPurifier_ConfigSchema_Interchange_Id
|
||||
* cause problems for PHP 5.0 support.
|
||||
*/
|
||||
public function toString() {
|
||||
return $this->namespace . '.' . $this->directive;
|
||||
return $this->key;
|
||||
}
|
||||
|
||||
public function getRootNamespace() {
|
||||
return substr($this->key, 0, strpos($this->key, "."));
|
||||
}
|
||||
|
||||
public function getDirective() {
|
||||
return substr($this->key, strpos($this->key, ".") + 1);
|
||||
}
|
||||
|
||||
public static function make($id) {
|
||||
list($namespace, $directive) = explode('.', $id);
|
||||
return new HTMLPurifier_ConfigSchema_Interchange_Id($namespace, $directive);
|
||||
return new HTMLPurifier_ConfigSchema_Interchange_Id($id);
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -1,21 +0,0 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Interchange component class describing namespaces.
|
||||
*/
|
||||
class HTMLPurifier_ConfigSchema_Interchange_Namespace
|
||||
{
|
||||
|
||||
/**
|
||||
* Name of namespace defined.
|
||||
*/
|
||||
public $namespace;
|
||||
|
||||
/**
|
||||
* HTML description.
|
||||
*/
|
||||
public $description;
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
@@ -13,13 +13,17 @@ class HTMLPurifier_ConfigSchema_InterchangeBuilder
|
||||
}
|
||||
|
||||
public static function buildFromDirectory($dir = null) {
|
||||
$parser = new HTMLPurifier_StringHashParser();
|
||||
$builder = new HTMLPurifier_ConfigSchema_InterchangeBuilder();
|
||||
$interchange = new HTMLPurifier_ConfigSchema_Interchange();
|
||||
return $builder->buildDir($interchange, $dir);
|
||||
}
|
||||
|
||||
if (!$dir) $dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema/';
|
||||
$info = parse_ini_file($dir . 'info.ini');
|
||||
$interchange->name = $info['name'];
|
||||
public function buildDir($interchange, $dir = null) {
|
||||
if (!$dir) $dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema';
|
||||
if (file_exists($dir . '/info.ini')) {
|
||||
$info = parse_ini_file($dir . '/info.ini');
|
||||
$interchange->name = $info['name'];
|
||||
}
|
||||
|
||||
$files = array();
|
||||
$dh = opendir($dir);
|
||||
@@ -33,15 +37,20 @@ class HTMLPurifier_ConfigSchema_InterchangeBuilder
|
||||
|
||||
sort($files);
|
||||
foreach ($files as $file) {
|
||||
$builder->build(
|
||||
$interchange,
|
||||
new HTMLPurifier_StringHash( $parser->parseFile($dir . $file) )
|
||||
);
|
||||
$this->buildFile($interchange, $dir . '/' . $file);
|
||||
}
|
||||
|
||||
return $interchange;
|
||||
}
|
||||
|
||||
public function buildFile($interchange, $file) {
|
||||
$parser = new HTMLPurifier_StringHashParser();
|
||||
$this->build(
|
||||
$interchange,
|
||||
new HTMLPurifier_StringHash( $parser->parseFile($file) )
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an interchange object based on a hash.
|
||||
* @param $interchange HTMLPurifier_ConfigSchema_Interchange object to build
|
||||
@@ -55,22 +64,17 @@ class HTMLPurifier_ConfigSchema_InterchangeBuilder
|
||||
throw new HTMLPurifier_ConfigSchema_Exception('Hash does not have any ID');
|
||||
}
|
||||
if (strpos($hash['ID'], '.') === false) {
|
||||
$this->buildNamespace($interchange, $hash);
|
||||
if (count($hash) == 2 && isset($hash['DESCRIPTION'])) {
|
||||
$hash->offsetGet('DESCRIPTION'); // prevent complaining
|
||||
} else {
|
||||
throw new HTMLPurifier_ConfigSchema_Exception('All directives must have a namespace');
|
||||
}
|
||||
} else {
|
||||
$this->buildDirective($interchange, $hash);
|
||||
}
|
||||
$this->_findUnused($hash);
|
||||
}
|
||||
|
||||
public function buildNamespace($interchange, $hash) {
|
||||
$namespace = new HTMLPurifier_ConfigSchema_Interchange_Namespace();
|
||||
$namespace->namespace = $hash->offsetGet('ID');
|
||||
if (isset($hash['DESCRIPTION'])) {
|
||||
$namespace->description = $hash->offsetGet('DESCRIPTION');
|
||||
}
|
||||
$interchange->addNamespace($namespace);
|
||||
}
|
||||
|
||||
public function buildDirective($interchange, $hash) {
|
||||
$directive = new HTMLPurifier_ConfigSchema_Interchange_Directive();
|
||||
|
||||
|
@@ -39,10 +39,6 @@ class HTMLPurifier_ConfigSchema_Validator
|
||||
$this->aliases = array();
|
||||
// PHP is a bit lax with integer <=> string conversions in
|
||||
// arrays, so we don't use the identical !== comparison
|
||||
foreach ($interchange->namespaces as $i => $namespace) {
|
||||
if ($i != $namespace->namespace) $this->error(false, "Integrity violation: key '$i' does not match internal id '{$namespace->namespace}'");
|
||||
$this->validateNamespace($namespace);
|
||||
}
|
||||
foreach ($interchange->directives as $i => $directive) {
|
||||
$id = $directive->id->toString();
|
||||
if ($i != $id) $this->error(false, "Integrity violation: key '$i' does not match internal id '$id'");
|
||||
@@ -51,20 +47,6 @@ class HTMLPurifier_ConfigSchema_Validator
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates a HTMLPurifier_ConfigSchema_Interchange_Namespace object.
|
||||
*/
|
||||
public function validateNamespace($n) {
|
||||
$this->context[] = "namespace '{$n->namespace}'";
|
||||
$this->with($n, 'namespace')
|
||||
->assertNotEmpty()
|
||||
->assertAlnum(); // implicit assertIsString handled by InterchangeBuilder
|
||||
$this->with($n, 'description')
|
||||
->assertNotEmpty()
|
||||
->assertIsString(); // handled by InterchangeBuilder
|
||||
array_pop($this->context);
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates a HTMLPurifier_ConfigSchema_Interchange_Id object.
|
||||
*/
|
||||
@@ -75,12 +57,11 @@ class HTMLPurifier_ConfigSchema_Validator
|
||||
// handled by InterchangeBuilder
|
||||
$this->error(false, 'is not an instance of HTMLPurifier_ConfigSchema_Interchange_Id');
|
||||
}
|
||||
if (!isset($this->interchange->namespaces[$id->namespace])) {
|
||||
$this->error('namespace', 'does not exist'); // assumes that the namespace was validated already
|
||||
}
|
||||
$this->with($id, 'directive')
|
||||
// keys are now unconstrained (we might want to narrow down to A-Za-z0-9.)
|
||||
// we probably should check that it has at least one namespace
|
||||
$this->with($id, 'key')
|
||||
->assertNotEmpty()
|
||||
->assertAlnum(); // implicit assertIsString handled by InterchangeBuilder
|
||||
->assertIsString(); // implicit assertIsString handled by InterchangeBuilder
|
||||
array_pop($this->context);
|
||||
}
|
||||
|
||||
|
Binary file not shown.
@@ -0,0 +1,8 @@
|
||||
Attr.AllowedClasses
|
||||
TYPE: lookup/null
|
||||
VERSION: 4.0.0
|
||||
DEFAULT: null
|
||||
--DESCRIPTION--
|
||||
List of allowed class values in the class attribute. By default, this is null,
|
||||
which means all classes are allowed.
|
||||
--# vim: et sw=4 sts=4
|
@@ -0,0 +1,19 @@
|
||||
Attr.ClassUseCDATA
|
||||
TYPE: bool/null
|
||||
DEFAULT: null
|
||||
VERSION: 4.0.0
|
||||
--DESCRIPTION--
|
||||
If null, class will auto-detect the doctype and, if matching XHTML 1.1 or
|
||||
XHTML 2.0, will use the restrictive NMTOKENS specification of class. Otherwise,
|
||||
it will use a relaxed CDATA definition. If true, the relaxed CDATA definition
|
||||
is forced; if false, the NMTOKENS definition is forced. To get behavior
|
||||
of HTML Purifier prior to 4.0.0, set this directive to false.
|
||||
|
||||
Some rational behind the auto-detection:
|
||||
in previous versions of HTML Purifier, it was assumed that the form of
|
||||
class was NMTOKENS, as specified by the XHTML Modularization (representing
|
||||
XHTML 1.1 and XHTML 2.0). The DTDs for HTML 4.01 and XHTML 1.0, however
|
||||
specify class as CDATA. HTML 5 effectively defines it as CDATA, but
|
||||
with the additional constraint that each name should be unique (this is not
|
||||
explicitly outlined in previous specifications).
|
||||
--# vim: et sw=4 sts=4
|
@@ -0,0 +1,8 @@
|
||||
Attr.ForbiddenClasses
|
||||
TYPE: lookup
|
||||
VERSION: 4.0.0
|
||||
DEFAULT: array()
|
||||
--DESCRIPTION--
|
||||
List of forbidden class values in the class attribute. By default, this is
|
||||
empty, which means that no classes are forbidden. See also %Attr.AllowedClasses.
|
||||
--# vim: et sw=4 sts=4
|
@@ -1,3 +0,0 @@
|
||||
Attr
|
||||
DESCRIPTION: Features regarding attribute validation.
|
||||
--# vim: et sw=4 sts=4
|
@@ -1,9 +1,9 @@
|
||||
AutoFormatParam.PurifierLinkifyDocURL
|
||||
AutoFormat.PurifierLinkify.DocURL
|
||||
TYPE: string
|
||||
VERSION: 2.0.1
|
||||
DEFAULT: '#%s'
|
||||
ALIASES: AutoFormatParam.PurifierLinkifyDocURL
|
||||
--DESCRIPTION--
|
||||
|
||||
<p>
|
||||
Location of configuration documentation to link to, let %s substitute
|
||||
into the configuration's namespace and directive names sans the percent
|
@@ -0,0 +1,11 @@
|
||||
AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions
|
||||
TYPE: lookup
|
||||
VERSION: 4.0.0
|
||||
DEFAULT: array('td' => true, 'th' => true)
|
||||
--DESCRIPTION--
|
||||
<p>
|
||||
When %AutoFormat.RemoveEmpty and %AutoFormat.RemoveEmpty.RemoveNbsp
|
||||
are enabled, this directive defines what HTML elements should not be
|
||||
removede if they have only a non-breaking space in them.
|
||||
</p>
|
||||
--# vim: et sw=4 sts=4
|
@@ -0,0 +1,15 @@
|
||||
AutoFormat.RemoveEmpty.RemoveNbsp
|
||||
TYPE: bool
|
||||
VERSION: 4.0.0
|
||||
DEFAULT: false
|
||||
--DESCRIPTION--
|
||||
<p>
|
||||
When enabled, HTML Purifier will treat any elements that contain only
|
||||
non-breaking spaces as well as regular whitespace as empty, and remove
|
||||
them when %AutoForamt.RemoveEmpty is enabled.
|
||||
</p>
|
||||
<p>
|
||||
See %AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions for a list of elements
|
||||
that don't have this behavior applied to them.
|
||||
</p>
|
||||
--# vim: et sw=4 sts=4
|
@@ -31,7 +31,8 @@ DEFAULT: false
|
||||
</p>
|
||||
<p>
|
||||
Elements that contain only whitespace will be treated as empty. Non-breaking
|
||||
spaces, however, do not count as whitespace.
|
||||
spaces, however, do not count as whitespace. See
|
||||
%AutoFormat.RemoveEmpty.RemoveNbsp for alternate behavior.
|
||||
</p>
|
||||
<p>
|
||||
This algorithm is not perfect; you may still notice some empty tags,
|
||||
@@ -39,7 +40,7 @@ DEFAULT: false
|
||||
because they were not permitted in that context, or tags that, after
|
||||
being auto-closed by another tag, where empty. This is for safety reasons
|
||||
to prevent clever code from breaking validation. The general rule of thumb:
|
||||
if a tag looked empty on the way end, it will get removed; if HTML Purifier
|
||||
if a tag looked empty on the way in, it will get removed; if HTML Purifier
|
||||
made it empty, it will stay.
|
||||
</p>
|
||||
--# vim: et sw=4 sts=4
|
||||
|
@@ -0,0 +1,11 @@
|
||||
AutoFormat.RemoveSpansWithoutAttributes
|
||||
TYPE: bool
|
||||
VERSION: 4.0.1
|
||||
DEFAULT: false
|
||||
--DESCRIPTION--
|
||||
<p>
|
||||
This directive causes <code>span</code> tags without any attributes
|
||||
to be removed. It will also remove spans that had all attributes
|
||||
removed during processing.
|
||||
</p>
|
||||
--# vim: et sw=4 sts=4
|
@@ -1,3 +0,0 @@
|
||||
AutoFormat
|
||||
DESCRIPTION: Configuration for activating auto-formatting functionality (also known as <code>Injector</code>s)
|
||||
--# vim: et sw=4 sts=4
|
@@ -1,3 +0,0 @@
|
||||
AutoFormatParam
|
||||
DESCRIPTION: Configuration for customizing auto-formatting functionality
|
||||
--# vim: et sw=4 sts=4
|
@@ -0,0 +1,12 @@
|
||||
CSS.AllowedFonts
|
||||
TYPE: lookup/null
|
||||
VERSION: 4.3.0
|
||||
DEFAULT: NULL
|
||||
--DESCRIPTION--
|
||||
<p>
|
||||
Allows you to manually specify a set of allowed fonts. If
|
||||
<code>NULL</code>, all fonts are allowed. This directive
|
||||
affects generic names (serif, sans-serif, monospace, cursive,
|
||||
fantasy) as well as specific font families.
|
||||
</p>
|
||||
--# vim: et sw=4 sts=4
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user