mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-03 12:47:56 +02:00
Compare commits
102 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
c768146e4d | ||
|
6e37ecd1c8 | ||
|
20eff0a3a0 | ||
|
d516e2f8de | ||
|
631021733b | ||
|
344e0640b6 | ||
|
62d2550e16 | ||
|
087145a71b | ||
|
a44187a5c1 | ||
|
c0ad68108a | ||
|
83a574491e | ||
|
3b537365a4 | ||
|
8a8b123d33 | ||
|
72db575446 | ||
|
d8bb73ce46 | ||
|
f90372f8ab | ||
|
f38fca32a9 | ||
|
5a23004652 | ||
|
6705140082 | ||
|
cb7162a995 | ||
|
2189a9430f | ||
|
7291f19347 | ||
|
9fcffd6533 | ||
|
31dce298ea | ||
|
8c9d461a62 | ||
|
7291a9647e | ||
|
17af0e4fc1 | ||
|
70028f83d6 | ||
|
5c5e3fe79f | ||
|
56a26cab14 | ||
|
1c7fedff5a | ||
|
9de0785448 | ||
|
974fe3f25e | ||
|
94468f3c24 | ||
|
e0354fecd9 | ||
|
1bbbc624dd | ||
|
49879d2cc6 | ||
|
5c9b5130c8 | ||
|
d2de8d976a | ||
|
4164b2eb2b | ||
|
1e5293d9fe | ||
|
6b643ede02 | ||
|
e41af46a8b | ||
|
3570c9985a | ||
|
8d572993b4 | ||
|
1bacbc0563 | ||
|
bfe2c10d07 | ||
|
9b10515fa4 | ||
|
1255d0f15d | ||
|
d45e11cc6b | ||
|
94c15d1f56 | ||
|
ce68cfe484 | ||
|
9f5f85952b | ||
|
dbb365155b | ||
|
32c0ffde0c | ||
|
856a5e5b89 | ||
|
820d6e9097 | ||
|
35b1fbce01 | ||
|
bcfbb8338c | ||
|
f51a6f7de9 | ||
|
f1439f0af5 | ||
|
0124605918 | ||
|
afb007d22f | ||
|
0dd9e4faf4 | ||
|
94ed3b1231 | ||
|
6a6c0ed5d7 | ||
|
e05b555448 | ||
|
ee9c70ab7f | ||
|
b4469f17aa | ||
|
e76f4b45d0 | ||
|
a32d5b52e1 | ||
|
a3d71fe606 | ||
|
77982bd61d | ||
|
78c4e62245 | ||
|
5803c06765 | ||
|
b63569ac22 | ||
|
f3d050c517 | ||
|
6dcc37cb55 | ||
|
cfc4ee1faf | ||
|
598c5b60c9 | ||
|
c9e7ffc172 | ||
|
feeffe6ed2 | ||
|
4754d407aa | ||
|
0b9db1f54b | ||
|
1d4a38d055 | ||
|
8c80349f9d | ||
|
d848c99b74 | ||
|
882ffed9ba | ||
|
86990a21f1 | ||
|
9573f0933d | ||
|
632bf2bbd4 | ||
|
ec86598446 | ||
|
b6c3f5e89b | ||
|
7c91104532 | ||
|
eac628f490 | ||
|
92913bc816 | ||
|
479d793562 | ||
|
e2c15f1c98 | ||
|
57ced3f361 | ||
|
c04a441b3e | ||
|
1bed8b6d5f | ||
|
33afd7d9e0 |
4
.gitignore
vendored
4
.gitignore
vendored
@@ -18,3 +18,7 @@ docs/doxygen*
|
||||
*.phpt.php
|
||||
*.phpt.skip.php
|
||||
*.htmlt.ini
|
||||
*.patch
|
||||
/*.php
|
||||
vendor
|
||||
composer.lock
|
||||
|
2
Doxyfile
2
Doxyfile
@@ -31,7 +31,7 @@ PROJECT_NAME = HTMLPurifier
|
||||
# This could be handy for archiving the generated documentation or
|
||||
# if some version control system is used.
|
||||
|
||||
PROJECT_NUMBER = 4.1.1
|
||||
PROJECT_NUMBER = 4.5.0
|
||||
|
||||
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
|
||||
# base path where the generated documentation will be put.
|
||||
|
2
FOCUS
2
FOCUS
@@ -1,4 +1,4 @@
|
||||
9 - Major security fixes
|
||||
4 - Minor feature enhancements
|
||||
|
||||
[ Appendix A: Release focus IDs ]
|
||||
0 - N/A
|
||||
|
13
INSTALL
13
INSTALL
@@ -18,6 +18,7 @@ with these contents.
|
||||
HTML Purifier is PHP 5 only, and is actively tested from PHP 5.0.5 and
|
||||
up. It has no core dependencies with other libraries. PHP
|
||||
4 support was deprecated on December 31, 2007 with HTML Purifier 3.0.0.
|
||||
HTML Purifier is not compatible with zend.ze1_compatibility_mode.
|
||||
|
||||
These optional extensions can enhance the capabilities of HTML Purifier:
|
||||
|
||||
@@ -25,6 +26,10 @@ These optional extensions can enhance the capabilities of HTML Purifier:
|
||||
* bcmath : Used for unit conversion and imagecrash protection
|
||||
* tidy : Used for pretty-printing HTML
|
||||
|
||||
These optional libraries can enhance the capabilities of HTML Purifier:
|
||||
|
||||
* CSSTidy : Clean CSS stylesheets using %Core.ExtractStyleBlocks
|
||||
* Net_IDNA2 (PEAR) : IRI support using %Core.EnableIDNA
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
2. Reconnaissance
|
||||
@@ -330,11 +335,6 @@ Or move the cache directory somewhere else (no trailing slash):
|
||||
|
||||
The interface is mind-numbingly simple:
|
||||
|
||||
$purifier = new HTMLPurifier();
|
||||
$clean_html = $purifier->purify( $dirty_html );
|
||||
|
||||
...or, if you're using the configuration object:
|
||||
|
||||
$purifier = new HTMLPurifier($config);
|
||||
$clean_html = $purifier->purify( $dirty_html );
|
||||
|
||||
@@ -353,7 +353,8 @@ If your website is in UTF-8 and XHTML Transitional, use this code:
|
||||
<?php
|
||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||
|
||||
$purifier = new HTMLPurifier();
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$purifier = new HTMLPurifier($config);
|
||||
$clean_html = $purifier->purify($dirty_html);
|
||||
?>
|
||||
|
||||
|
143
NEWS
143
NEWS
@@ -9,6 +9,149 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
. Internal change
|
||||
==========================
|
||||
|
||||
4.6.0, unknown release date
|
||||
# URI parsing algorithm was made more strict, so only prefixes which
|
||||
looks like schemes will actually be schemes. Thanks
|
||||
Michael Gusev <mgusev@sugarcrm.com> for fixing.
|
||||
|
||||
4.5.0, released 2013-02-17
|
||||
# Fix bug where stacked attribute transforms clobber each other;
|
||||
this also means it's no longer possible to override attribute
|
||||
transforms in later modules. No internal code was using this
|
||||
but this may break some clients.
|
||||
# We now use SHA-1 to identify cached definitions, instead of MD5.
|
||||
! Support display:inline-block
|
||||
! Support for more white-space CSS values.
|
||||
! Permit underscores in font families
|
||||
! Support for page-break-* CSS3 properties when proprietary properties
|
||||
are enabled.
|
||||
! New directive %Core.DisableExcludes; can be set to 'true' to turn off
|
||||
SGML excludes checking. If HTML Purifier is removing too much text
|
||||
and you don't care about full standards compliance, try setting this to
|
||||
'true'.
|
||||
- Use prepend for SPL autoloading on PHP 5.3 and later.
|
||||
- Fix bug with nofollow transform when pre-existing rel exists.
|
||||
- Fix bug where background:url() always gets lower-cased
|
||||
(but not background-image:url())
|
||||
- Fix bug with non lower-case color names in HTML
|
||||
- Fix bug where data URI validation doesn't remove temporary files.
|
||||
Thanks Javier Marín Ros <javiermarinros@gmail.com> for reporting.
|
||||
- Don't remove certain empty tags on RemoveEmpty.
|
||||
|
||||
4.4.0, released 2012-01-18
|
||||
# Removed PEARSax3 handler.
|
||||
# URI.Munge now munges URIs inside the same host that go from https
|
||||
to http. Reported by Neike Taika-Tessaro.
|
||||
# Core.EscapeNonASCIICharacters now always transforms entities to
|
||||
entities, even if target encoding is UTF-8.
|
||||
# Tighten up selector validation in ExtractStyleBlocks.
|
||||
Non-syntactically valid selectors are now rejected, along with
|
||||
some of the more obscure ones such as attribute selectors, the
|
||||
:lang pseudoselector, and anything not in CSS2.1. Furthermore,
|
||||
ID and class selectors now work properly with the relevant
|
||||
configuration attributes. Also, mute errors when parsing CSS
|
||||
with CSS Tidy. Reported by Mario Heiderich and Norman Hippert.
|
||||
! Added support for 'scope' attribute on tables.
|
||||
! Added %HTML.TargetBlank, which adds target="blank" to all outgoing links.
|
||||
! Properly handle sub-lists directly nested inside of lists in
|
||||
a standards compliant way, by moving them into the preceding <li>
|
||||
! Added %HTML.AllowedComments and %HTML.AllowedCommentsRegexp for
|
||||
limited allowed comments in untrusted situations.
|
||||
! Implement iframes, and allow them to be used in untrusted mode with
|
||||
%HTML.SafeIframe and %URI.SafeIframeRegexp. Thanks Bradley M. Froehle
|
||||
<brad.froehle@gmail.com> for submitting an initial version of the patch.
|
||||
! The Forms module now works properly for transitional doctypes.
|
||||
! Added support for internationalized domain names. You need the PEAR
|
||||
Net_IDNA2 module to be in your path; if it is installed, ensure the
|
||||
class can be loaded and then set %Core.EnableIDNA to true.
|
||||
- Color keywords are now case insensitive. Thanks Yzmir Ramirez
|
||||
<yramirez-htmlpurifier@adicio.com> for reporting.
|
||||
- Explicitly initialize anonModule variable to null.
|
||||
- Do not duplicate nofollow if already present. Thanks 178
|
||||
for reporting.
|
||||
- Do not add nofollow if hostname matches our current host. Thanks 178
|
||||
for reporting, and Neike Taika-Tessaro for helping diagnose.
|
||||
- Do not unset parser variable; this fixes intermittent serialization
|
||||
problems. Thanks Neike Taika-Tessaro for reporting, bill
|
||||
<10010tiger@gmail.com> for diagnosing.
|
||||
- Fix iconv truncation bug, where non-UTF-8 target encodings see
|
||||
output truncated after around 8000 characters. Thanks Jörg Ludwig
|
||||
<joerg.ludwig@iserv.eu> for reporting.
|
||||
- Fix broken table content model for XHTML1.1 (and also earlier
|
||||
versions, although the W3C validator doesn't catch those violations).
|
||||
Thanks GlitchMr <glitch.mr@gmail.com> for reporting.
|
||||
|
||||
4.3.0, released 2011-03-27
|
||||
# Fixed broken caching of customized raw definitions, but requires an
|
||||
API change. The old API still works but will emit a warning,
|
||||
see http://htmlpurifier.org/docs/enduser-customize.html#optimized
|
||||
for how to upgrade your code.
|
||||
# Protect against Internet Explorer innerHTML behavior by specially
|
||||
treating attributes with backticks but no angled brackets, quotes or
|
||||
spaces. This constitutes a slight semantic change, which can be
|
||||
reverted using %Output.FixInnerHTML. Reported by Neike Taika-Tessaro
|
||||
and Mario Heiderich.
|
||||
# Protect against cssText/innerHTML by restricting allowed characters
|
||||
used in fonts further than mandated by the specification and encoding
|
||||
some extra special characters in URLs. Reported by Neike
|
||||
Taika-Tessaro and Mario Heiderich.
|
||||
! Added %HTML.Nofollow to add rel="nofollow" to external links.
|
||||
! More types of SPL autoloaders allowed on later versions of PHP.
|
||||
! Implementations for position, top, left, right, bottom, z-index
|
||||
when %CSS.Trusted is on.
|
||||
! Add %Cache.SerializerPermissions option for custom serializer
|
||||
directory/file permissions
|
||||
! Fix longstanding bug in Flash support for non-IE browsers, and
|
||||
allow more wmode attributes.
|
||||
! Add %CSS.AllowedFonts to restrict permissible font names.
|
||||
- Switch to an iterative traversal of the DOM, which prevents us
|
||||
from running out of stack space for deeply nested documents.
|
||||
Thanks Maxim Krizhanovsky for contributing a patch.
|
||||
- Make removal of conditional IE comments ungreedy; thanks Bernd
|
||||
for reporting.
|
||||
- Escape CDATA before removing Internet Explorer comments.
|
||||
- Fix removal of id attributes under certain conditions by ensuring
|
||||
armor attributes are preserved when recreating tags.
|
||||
- Check if schema.ser was corrupted.
|
||||
- Check if zend.ze1_compatibility_mode is on, and error out if it is.
|
||||
This safety check is only done for HTMLPurifier.auto.php; if you
|
||||
are using standalone or the specialized includes files, you're
|
||||
expected to know what you're doing.
|
||||
- Stop repeatedly writing the cache file after I'm done customizing a
|
||||
raw definition. Reported by ajh.
|
||||
- Switch to using require_once in the Bootstrap to work around bad
|
||||
interaction with Zend Debugger and APC. Reported by Antonio Parraga.
|
||||
- Fix URI handling when hostname is missing but scheme is present.
|
||||
Reported by Neike Taika-Tessaro.
|
||||
- Fix missing numeric entities on DirectLex; thanks Neike Taika-Tessaro
|
||||
for reporting.
|
||||
- Fix harmless notice from indexing into empty string. Thanks Matthijs
|
||||
Kooijman <matthijs@stdin.nl> for reporting.
|
||||
- Don't autoclose no parent elements are able to support the element
|
||||
that triggered the autoclose. In particular fixes strange behavior
|
||||
of stray <li> tags. Thanks pkuliga@gmail.com for reporting and
|
||||
Neike Taika-Tessaro <pinkgothic@gmail.com> for debugging assistance.
|
||||
|
||||
4.2.0, released 2010-09-15
|
||||
! Added %Core.RemoveProcessingInstructions, which lets you remove
|
||||
<? ... ?> statements.
|
||||
! Added %URI.DisableResources functionality; the directive originally
|
||||
did nothing. Thanks David Rothstein for reporting.
|
||||
! Add documentation about configuration directive types.
|
||||
! Add %CSS.ForbiddenProperties configuration directive.
|
||||
! Add %HTML.FlashAllowFullScreen to permit embedded Flash objects
|
||||
to utilize full-screen mode.
|
||||
! Add optional support for the <code>file</code> URI scheme, enable
|
||||
by explicitly setting %URI.AllowedSchemes.
|
||||
! Add %Core.NormalizeNewlines options to allow turning off newline
|
||||
normalization.
|
||||
- Fix improper handling of Internet Explorer conditional comments
|
||||
by parser. Thanks zmonteca for reporting.
|
||||
- Fix missing attributes bug when running on Mac Snow Leopard and APC.
|
||||
Thanks sidepodcast for the fix.
|
||||
- Warn if an element is allowed, but an attribute it requires is
|
||||
not allowed.
|
||||
|
||||
4.1.1, released 2010-05-31
|
||||
- Fix undefined index warnings in maintenance scripts.
|
||||
- Fix bug in DirectLex for parsing elements with a single attribute
|
||||
|
18
TODO
18
TODO
@@ -13,18 +13,26 @@ afraid to cast your vote for the next feature to be implemented!
|
||||
|
||||
Things to do as soon as possible:
|
||||
|
||||
- http://htmlpurifier.org/phorum/read.php?3,5560,6307#msg-6307
|
||||
- Think about allowing explicit order of operations hooks for transforms
|
||||
- Inputs don't do the right thing with submit
|
||||
- Fix "<.<" bug (trailing < is removed if not EOD)
|
||||
- Build in better internal state dumps and debugging tools for remote
|
||||
debugging
|
||||
- Allowed/Allowed* have strange interactions when both set
|
||||
- Transform lone embeds into object tags
|
||||
? Transform lone embeds into object tags
|
||||
- Deprecated config options that emit warnings when you set them (with'
|
||||
a way of muting the warning if you really want to)
|
||||
- Make HTML.Trusted work with Output.FlashCompat
|
||||
- HTML.Trusted and HTML.SafeObject have funny interaction; general
|
||||
problem is what to do when a module "supersedes" another
|
||||
(see also tables and basic tables.) This is a little dicier
|
||||
because HTML.SafeObject has some extra functionality that
|
||||
trusted might find useful. See http://htmlpurifier.org/phorum/read.php?3,5762,6100
|
||||
|
||||
FUTURE VERSIONS
|
||||
---------------
|
||||
|
||||
4.2 release [OMG CONFIG PONIES]
|
||||
4.6 release [OMG CONFIG PONIES]
|
||||
! Fix Printer. It's from the old days when we didn't have decent XML classes
|
||||
! Factor demo.php into a set of Printer classes, and then create a stub
|
||||
file for users here (inside the actual HTML Purifier library)
|
||||
@@ -109,6 +117,10 @@ Neat feature related
|
||||
3. Extend the tag exclusion system to specify whether or not the
|
||||
contents should be dropped or not (currently, there's code that could do
|
||||
something like this if it didn't drop the inner text too.)
|
||||
? Make AutoParagraph also support paragraph-izing double <br> tags, and not
|
||||
just double newlines. This is kind of tough to do in the current framework,
|
||||
though, and might be reasonably approximated by search replacing double <br>s
|
||||
with newlines before running it through HTML Purifier.
|
||||
|
||||
Maintenance related (slightly boring)
|
||||
# CHMOD install script for PEAR installs
|
||||
|
11
WHATSNEW
11
WHATSNEW
@@ -1,5 +1,6 @@
|
||||
HTML Purifier 4.1.1 is a major security and bugfix release that
|
||||
improves on 4.1's fix for an XSS vulnerability exploitable on Internet
|
||||
Explorer. It also contains a number of important bugfixes, including
|
||||
the removal of improper logic that could result in infinite loops and
|
||||
fixed parsing for single-attributes with entities with DirectLex.
|
||||
HTML Purifier 4.5.0 is a minor bugfix and feature release, containing an
|
||||
accumulation of changes over a year. CSS support has been extended to
|
||||
support display:inline-block, white-space, underscores in font families,
|
||||
page-break-* CSS3 properties (when proprietary is enabled.) We now use
|
||||
SHA-1 to identify cached definitions, and the semantics of stacked
|
||||
attribute transforms has changed slightly.
|
||||
|
22
composer.json
Normal file
22
composer.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"name": "ezyang/htmlpurifier",
|
||||
"description": "Standards compliant HTML filter written in PHP",
|
||||
"type": "library",
|
||||
"keywords": ["html"],
|
||||
"homepage": "http://htmlpurifier.org/",
|
||||
"license": "LGPL",
|
||||
"authors": [
|
||||
{
|
||||
"name": "Edward Z. Yang",
|
||||
"email": "admin@htmlpurifier.org",
|
||||
"homepage": "http://ezyang.com"
|
||||
}
|
||||
],
|
||||
"require": {
|
||||
"php": ">=5.2"
|
||||
},
|
||||
"autoload": {
|
||||
"psr-0": { "HTMLPurifier": "library/" },
|
||||
"files": ["library/HTMLPurifier.composer.php"]
|
||||
}
|
||||
}
|
@@ -40,12 +40,26 @@
|
||||
</xsl:apply-templates>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="typesContainer">
|
||||
<h2>Types</h2>
|
||||
<xsl:apply-templates select="$typeLookup" mode="types" />
|
||||
</div>
|
||||
<xsl:apply-templates />
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="type" mode="types">
|
||||
<div class="type-block">
|
||||
<xsl:attribute name="id">type-<xsl:value-of select="@id" /></xsl:attribute>
|
||||
<h3><code><xsl:value-of select="@id" /></code>: <xsl:value-of select="@name" /></h3>
|
||||
<div class="type-description">
|
||||
<xsl:copy-of xmlns:xhtml="http://www.w3.org/1999/xhtml" select="xhtml:div/node()" />
|
||||
</div>
|
||||
</div>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="title" mode="toc" />
|
||||
<xsl:template match="namespace" mode="toc">
|
||||
<xsl:param name="overflowNumber" />
|
||||
@@ -192,10 +206,13 @@
|
||||
<td>
|
||||
<xsl:variable name="type" select="text()" />
|
||||
<xsl:attribute name="class">type type-<xsl:value-of select="$type" /></xsl:attribute>
|
||||
<xsl:value-of select="$typeLookup/type[@id=$type]/text()" />
|
||||
<xsl:if test="@allow-null='yes'">
|
||||
(or null)
|
||||
</xsl:if>
|
||||
<a>
|
||||
<xsl:attribute name="href">#type-<xsl:value-of select="$type" /></xsl:attribute>
|
||||
<xsl:value-of select="$typeLookup/type[@id=$type]/@name" />
|
||||
<xsl:if test="@allow-null='yes'">
|
||||
(or null)
|
||||
</xsl:if>
|
||||
</a>
|
||||
</td>
|
||||
</tr>
|
||||
</xsl:template>
|
||||
|
@@ -1,16 +1,68 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<types>
|
||||
<type id="string">String</type>
|
||||
<type id="istring">Case-insensitive string</type>
|
||||
<type id="text">Text</type>
|
||||
<type id="itext">Case-insensitive text</type>
|
||||
<type id="int">Integer</type>
|
||||
<type id="float">Float</type>
|
||||
<type id="bool">Boolean</type>
|
||||
<type id="lookup">Lookup array</type>
|
||||
<type id="list">Array list</type>
|
||||
<type id="hash">Associative array</type>
|
||||
<type id="mixed">Mixed</type>
|
||||
<type id="string" name="String"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
A <a
|
||||
href="http://docs.php.net/manual/en/language.types.string.php">sequence
|
||||
of characters</a>.
|
||||
</div></type>
|
||||
<type id="istring" name="Case-insensitive string"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
A series of case-insensitive characters. Internally, upper-case
|
||||
ASCII characters will be converted to lower-case.
|
||||
</div></type>
|
||||
<type id="text" name="Text"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
A series of characters that may contain newlines. Text tends to
|
||||
indicate human-oriented text, as opposed to a machine format.
|
||||
</div></type>
|
||||
<type id="itext" name="Case-insensitive text"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
A series of case-insensitive characters that may contain newlines.
|
||||
</div></type>
|
||||
<type id="int" name="Integer"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
An <a
|
||||
href="http://docs.php.net/manual/en/language.types.integer.php">
|
||||
integer</a>. You are alternatively permitted to pass a string of
|
||||
digits instead, which will be cast to an integer using
|
||||
<code>(int)</code>.
|
||||
</div></type>
|
||||
<type id="float" name="Float"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
A <a href="http://docs.php.net/manual/en/language.types.float.php">
|
||||
floating point number</a>. You are alternatively permitted to
|
||||
pass a numeric string (as defined by <code>is_numeric()</code>),
|
||||
which will be cast to a float using <code>(float)</code>.
|
||||
</div></type>
|
||||
<type id="bool" name="Boolean"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
A <a
|
||||
href="http://docs.php.net/manual/en/language.types.boolean.php">boolean</a>.
|
||||
You are alternatively permitted to pass an integer <code>0</code> or
|
||||
<code>1</code> (other integers are not permitted) or a string
|
||||
<code>"on"</code>, <code>"true"</code> or <code>"1"</code> for
|
||||
<code>true</code>, and <code>"off"</code>, <code>"false"</code> or
|
||||
<code>"0"</code> for <code>false</code>.
|
||||
</div></type>
|
||||
<type id="lookup" name="Lookup array"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
An array whose values are <code>true</code>, e.g. <code>array('key'
|
||||
=> true, 'key2' => true)</code>. You are alternatively permitted
|
||||
to pass an array list of the keys <code>array('key', 'key2')</code>
|
||||
or a comma-separated string of keys <code>"key, key2"</code>. If
|
||||
you pass an array list of values, ensure that your values are
|
||||
strictly numerically indexed: <code>array('key1', 2 =>
|
||||
'key2')</code> will not do what you expect and emits a warning.
|
||||
</div></type>
|
||||
<type id="list" name="Array list"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
An array which has consecutive integer indexes, e.g.
|
||||
<code>array('val1', 'val2')</code>. You are alternatively permitted
|
||||
to pass a comma-separated string of keys <code>"val1, val2"</code>.
|
||||
If your array is not in this form, <code>array_values</code> is run
|
||||
on the array and a warning is emitted.
|
||||
</div></type>
|
||||
<type id="hash" name="Associative array"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
An array which is a mapping of keys to values, e.g.
|
||||
<code>array('key1' => 'val1', 'key2' => 'val2')</code>. You are
|
||||
alternatively permitted to pass a comma-separated string of
|
||||
key-colon-value strings, e.g. <code>"key1: val1, key2: val2"</code>.
|
||||
</div></type>
|
||||
<type id="mixed" name="Mixed"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
An arbitrary PHP value of any type.
|
||||
</div></type>
|
||||
</types>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
|
@@ -6,7 +6,7 @@
|
||||
</file>
|
||||
<file name="HTMLPurifier/Lexer.php">
|
||||
<line>81</line>
|
||||
<line>269</line>
|
||||
<line>284</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Lexer/DirectLex.php">
|
||||
<line>53</line>
|
||||
@@ -14,7 +14,7 @@
|
||||
<line>348</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>47</line>
|
||||
<line>50</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="CSS.MaxImgLength">
|
||||
@@ -24,22 +24,32 @@
|
||||
</directive>
|
||||
<directive id="CSS.Proprietary">
|
||||
<file name="HTMLPurifier/CSSDefinition.php">
|
||||
<line>214</line>
|
||||
<line>215</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="CSS.AllowTricky">
|
||||
<file name="HTMLPurifier/CSSDefinition.php">
|
||||
<line>218</line>
|
||||
<line>219</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="CSS.Trusted">
|
||||
<file name="HTMLPurifier/CSSDefinition.php">
|
||||
<line>223</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="CSS.AllowImportant">
|
||||
<file name="HTMLPurifier/CSSDefinition.php">
|
||||
<line>222</line>
|
||||
<line>227</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="CSS.AllowedProperties">
|
||||
<file name="HTMLPurifier/CSSDefinition.php">
|
||||
<line>275</line>
|
||||
<line>302</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="CSS.ForbiddenProperties">
|
||||
<file name="HTMLPurifier/CSSDefinition.php">
|
||||
<line>316</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Cache.DefinitionImpl">
|
||||
@@ -69,44 +79,57 @@
|
||||
</directive>
|
||||
<directive id="Core.Encoding">
|
||||
<file name="HTMLPurifier/Encoder.php">
|
||||
<line>267</line>
|
||||
<line>300</line>
|
||||
<line>337</line>
|
||||
<line>372</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Test.ForceNoIconv">
|
||||
<file name="HTMLPurifier/Encoder.php">
|
||||
<line>272</line>
|
||||
<line>308</line>
|
||||
<line>341</line>
|
||||
<line>379</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.EscapeNonASCIICharacters">
|
||||
<file name="HTMLPurifier/Encoder.php">
|
||||
<line>304</line>
|
||||
<line>373</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Output.CommentScriptContents">
|
||||
<file name="HTMLPurifier/Generator.php">
|
||||
<line>56</line>
|
||||
<line>61</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Output.FixInnerHTML">
|
||||
<file name="HTMLPurifier/Generator.php">
|
||||
<line>62</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Output.SortAttr">
|
||||
<file name="HTMLPurifier/Generator.php">
|
||||
<line>57</line>
|
||||
<line>63</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Output.FlashCompat">
|
||||
<file name="HTMLPurifier/Generator.php">
|
||||
<line>58</line>
|
||||
<line>64</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Output.TidyFormat">
|
||||
<file name="HTMLPurifier/Generator.php">
|
||||
<line>87</line>
|
||||
<line>93</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.NormalizeNewlines">
|
||||
<file name="HTMLPurifier/Generator.php">
|
||||
<line>107</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Lexer.php">
|
||||
<line>266</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Output.Newline">
|
||||
<file name="HTMLPurifier/Generator.php">
|
||||
<line>101</line>
|
||||
<line>108</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.BlockWrapper">
|
||||
@@ -136,20 +159,20 @@
|
||||
</directive>
|
||||
<directive id="HTML.ForbiddenElements">
|
||||
<file name="HTMLPurifier/HTMLDefinition.php">
|
||||
<line>337</line>
|
||||
<line>342</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.ForbiddenAttributes">
|
||||
<file name="HTMLPurifier/HTMLDefinition.php">
|
||||
<line>338</line>
|
||||
<line>343</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.Trusted">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>202</line>
|
||||
<line>204</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Lexer.php">
|
||||
<line>258</line>
|
||||
<line>271</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/HTMLModule/Image.php">
|
||||
<line>27</line>
|
||||
@@ -163,27 +186,45 @@
|
||||
</directive>
|
||||
<directive id="HTML.AllowedModules">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>209</line>
|
||||
<line>211</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.CoreModules">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>210</line>
|
||||
<line>212</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.Proprietary">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>221</line>
|
||||
<line>222</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.SafeObject">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>226</line>
|
||||
<line>225</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.SafeEmbed">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>229</line>
|
||||
<line>228</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.SafeScripting">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>231</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/HTMLModule/SafeScripting.php">
|
||||
<line>17</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.Nofollow">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>234</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.TargetBlank">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>237</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.IDBlacklist">
|
||||
@@ -211,12 +252,17 @@
|
||||
</directive>
|
||||
<directive id="Core.ConvertDocumentToFragment">
|
||||
<file name="HTMLPurifier/Lexer.php">
|
||||
<line>267</line>
|
||||
<line>282</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.RemoveProcessingInstructions">
|
||||
<file name="HTMLPurifier/Lexer.php">
|
||||
<line>303</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.">
|
||||
<file name="HTMLPurifier/URIDefinition.php">
|
||||
<line>55</line>
|
||||
<line>60</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/URIFilter/Munge.php">
|
||||
<line>12</line>
|
||||
@@ -224,17 +270,20 @@
|
||||
</directive>
|
||||
<directive id="URI.Host">
|
||||
<file name="HTMLPurifier/URIDefinition.php">
|
||||
<line>64</line>
|
||||
<line>70</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/URIScheme.php">
|
||||
<line>81</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.Base">
|
||||
<file name="HTMLPurifier/URIDefinition.php">
|
||||
<line>65</line>
|
||||
<line>71</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.DefaultScheme">
|
||||
<file name="HTMLPurifier/URIDefinition.php">
|
||||
<line>72</line>
|
||||
<line>78</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.AllowedSchemes">
|
||||
@@ -260,6 +309,11 @@
|
||||
<line>12</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="CSS.AllowedFonts">
|
||||
<file name="HTMLPurifier/AttrDef/CSS/FontFamily.php">
|
||||
<line>50</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.AllowedClasses">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/Class.php">
|
||||
<line>18</line>
|
||||
@@ -277,23 +331,23 @@
|
||||
</directive>
|
||||
<directive id="Attr.EnableID">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
|
||||
<line>20</line>
|
||||
<line>30</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.IDPrefix">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
|
||||
<line>26</line>
|
||||
<line>36</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.IDPrefixLocal">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
|
||||
<line>28</line>
|
||||
<line>31</line>
|
||||
<line>38</line>
|
||||
<line>41</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.IDBlacklistRegexp">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
|
||||
<line>54</line>
|
||||
<line>64</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.">
|
||||
@@ -301,6 +355,11 @@
|
||||
<line>30</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.EnableIDNA">
|
||||
<file name="HTMLPurifier/AttrDef/URI/Host.php">
|
||||
<line>67</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.DefaultTextDir">
|
||||
<file name="HTMLPurifier/AttrTransform/BdoDir.php">
|
||||
<line>13</line>
|
||||
@@ -337,6 +396,11 @@
|
||||
<line>13</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.FlashAllowFullScreen">
|
||||
<file name="HTMLPurifier/AttrTransform/SafeParam.php">
|
||||
<line>38</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.EscapeInvalidChildren">
|
||||
<file name="HTMLPurifier/ChildDef/Required.php">
|
||||
<line>62</line>
|
||||
@@ -347,19 +411,33 @@
|
||||
<line>91</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Cache.SerializerPermissions">
|
||||
<file name="HTMLPurifier/DefinitionCache/Serializer.php">
|
||||
<line>107</line>
|
||||
<line>124</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Filter.ExtractStyleBlocks.TidyImpl">
|
||||
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
|
||||
<line>41</line>
|
||||
<line>55</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Filter.ExtractStyleBlocks.Scope">
|
||||
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
|
||||
<line>65</line>
|
||||
<line>79</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Filter.ExtractStyleBlocks.Escaping">
|
||||
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
|
||||
<line>123</line>
|
||||
<line>277</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.SafeIframe">
|
||||
<file name="HTMLPurifier/HTMLModule/Iframe.php">
|
||||
<line>17</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/URIFilter/SafeIframe.php">
|
||||
<line>23</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.MaxImgLength">
|
||||
@@ -395,12 +473,12 @@
|
||||
</directive>
|
||||
<directive id="AutoFormat.RemoveEmpty.RemoveNbsp">
|
||||
<file name="HTMLPurifier/Injector/RemoveEmpty.php">
|
||||
<line>12</line>
|
||||
<line>15</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions">
|
||||
<file name="HTMLPurifier/Injector/RemoveEmpty.php">
|
||||
<line>13</line>
|
||||
<line>16</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.AggressivelyFixLt">
|
||||
@@ -413,27 +491,42 @@
|
||||
<line>70</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.DisableExcludes">
|
||||
<file name="HTMLPurifier/Strategy/FixNesting.php">
|
||||
<line>57</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.EscapeInvalidTags">
|
||||
<file name="HTMLPurifier/Strategy/MakeWellFormed.php">
|
||||
<line>45</line>
|
||||
<line>53</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>19</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.RemoveScriptContents">
|
||||
<directive id="HTML.AllowedComments">
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>24</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.AllowedCommentsRegexp">
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>25</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.RemoveScriptContents">
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>28</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.HiddenElements">
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>26</line>
|
||||
<line>29</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.HostBlacklist">
|
||||
<file name="HTMLPurifier/URIFilter/HostBlacklist.php">
|
||||
<line>8</line>
|
||||
<line>12</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.MungeResources">
|
||||
@@ -446,4 +539,9 @@
|
||||
<line>15</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.SafeIframeRegexp">
|
||||
<file name="HTMLPurifier/URIFilter/SafeIframe.php">
|
||||
<line>18</line>
|
||||
</file>
|
||||
</directive>
|
||||
</usage>
|
||||
|
@@ -255,7 +255,7 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
||||
<tr class="feature"><td>axis</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||
<tr class="feature"><td>char</td><td>COL, COLGROUP, TBODY, TD, TFOOT, TH, THEAD, TR</td><td>W3C only: No browser implementation</td></tr>
|
||||
<tr class="feature"><td>headers</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||
<tr class="feature"><td>scope</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||
<tr class="impl-yes"><td>scope</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody class="impl-yes">
|
||||
|
@@ -146,7 +146,9 @@
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML.DefinitionRev', 1);
|
||||
$def = $config->getHTMLDefinition(true);</pre>
|
||||
if ($def = $config->maybeGetRawHTMLDefinition()) {
|
||||
// our code will go here
|
||||
}</pre>
|
||||
|
||||
<p>
|
||||
Assuming that HTML Purifier has already been properly loaded (hint:
|
||||
@@ -174,23 +176,15 @@ $def = $config->getHTMLDefinition(true);</pre>
|
||||
</li>
|
||||
<li>
|
||||
The fourth line retrieves a raw <code>HTMLPurifier_HTMLDefinition</code>
|
||||
object that we will be tweaking. If the parameter was removed, we
|
||||
would be retrieving a fully formed definition object, which is somewhat
|
||||
useless for customization purposes.
|
||||
object that we will be tweaking. Interestingly enough, we have
|
||||
placed it in an if block: this is because
|
||||
<code>maybeGetRawHTMLDefinition</code>, as its name suggests, may
|
||||
return a NULL, in which case we should skip doing any
|
||||
initialization. This, in fact, will correspond to when our fully
|
||||
customized object is already in the cache.
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<h3>Broken backwards-compatibility</h3>
|
||||
|
||||
<p>
|
||||
Those of you who have already been twiddling around with the raw
|
||||
HTML definition object, you'll be noticing that you're getting an error
|
||||
when you attempt to retrieve the raw definition object without specifying
|
||||
a DefinitionID. It is vital to caching (see below) that you make a unique
|
||||
name for your customized definition, so make up something right now and
|
||||
things will operate again.
|
||||
</p>
|
||||
|
||||
<h2>Turn off caching</h2>
|
||||
|
||||
<p>
|
||||
@@ -781,6 +775,75 @@ $form->excludes = array('form' => true);</strong></pre>
|
||||
<li><a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ElementDef.php"><code>library/HTMLPurifier/ElementDef.php</code></a></li>
|
||||
</ul>
|
||||
|
||||
<h2 id="optimized">Notes for HTML Purifier 4.2.0 and earlier</h3>
|
||||
|
||||
<p>
|
||||
Previously, this tutorial gave some incorrect template code for
|
||||
editing raw definitions, and that template code will now produce the
|
||||
error <q>Due to a documentation error in previous version of HTML
|
||||
Purifier...</q> Here is how to mechanically transform old-style
|
||||
code into new-style code.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
First, identify all code that edits the raw definition object, and
|
||||
put it together. Ensure none of this code must be run on every
|
||||
request; if some sub-part needs to always be run, move it outside
|
||||
this block. Here is an example below, with the raw definition
|
||||
object code bolded.
|
||||
</p>
|
||||
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML.DefinitionRev', 1);
|
||||
$def = $config->getHTMLDefinition(true);
|
||||
<strong>$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');</strong>
|
||||
$purifier = new HTMLPurifier($config);</pre>
|
||||
|
||||
<p>
|
||||
Next, replace the raw definition retrieval with a
|
||||
maybeGetRawHTMLDefinition method call inside an if conditional, and
|
||||
place the editing code inside that if block.
|
||||
</p>
|
||||
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML.DefinitionRev', 1);
|
||||
<strong>if ($def = $config->maybeGetRawHTMLDefinition()) {
|
||||
$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');
|
||||
}</strong>
|
||||
$purifier = new HTMLPurifier($config);</pre>
|
||||
|
||||
<p>
|
||||
And you're done! Alternatively, if you're OK with not ever caching
|
||||
your code, the following will still work and not emit warnings.
|
||||
</p>
|
||||
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$def = $config->getHTMLDefinition(true);
|
||||
$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');
|
||||
$purifier = new HTMLPurifier($config);</pre>
|
||||
|
||||
<p>
|
||||
A slightly less efficient version of this was what was going on with
|
||||
old versions of HTML Purifier.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
<em>Technical notes:</em> ajh pointed out on <a
|
||||
href="http://htmlpurifier.org/phorum/read.php?5,5164,5169#msg-5169">in a forum topic</a> that
|
||||
HTML Purifier appeared to be repeatedly writing to the cache even
|
||||
when a cache entry already existed. Investigation lead to the
|
||||
discovery of the following infelicity: caching of customized
|
||||
definitions didn't actually work! The problem was that even though
|
||||
a cache file would be written out at the end of the process, there
|
||||
was no way for HTML Purifier to say, <q>Actually, I've already got a
|
||||
copy of your work, no need to reconfigure your
|
||||
customizations</q>. This required the API to change: placing
|
||||
all of the customizations to the raw definition object in a
|
||||
conditional which could be skipped.
|
||||
</p>
|
||||
|
||||
</body></html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
|
@@ -561,7 +561,7 @@ page on special characters</a> for more details.
|
||||
<h3 id="whyutf8-forms">Forms</h3>
|
||||
|
||||
<p>While we're on the tack of users, how do non-UTF-8 web forms deal
|
||||
with characters that our outside of their character set? Rather than
|
||||
with characters that are outside of their character set? Rather than
|
||||
discuss what UTF-8 does right, we're going to show what could go wrong
|
||||
if you didn't use UTF-8 and people tried to use characters outside
|
||||
of your character encoding.</p>
|
||||
|
@@ -3,6 +3,7 @@
|
||||
/**
|
||||
* @file
|
||||
* Convenience file that registers autoload handler for HTML Purifier.
|
||||
* It also does some sanity checks.
|
||||
*/
|
||||
|
||||
if (function_exists('spl_autoload_register') && function_exists('spl_autoload_unregister')) {
|
||||
@@ -18,4 +19,8 @@ if (function_exists('spl_autoload_register') && function_exists('spl_autoload_un
|
||||
}
|
||||
}
|
||||
|
||||
if (ini_get('zend.ze1_compatibility_mode')) {
|
||||
trigger_error("HTML Purifier is not compatible with zend.ze1_compatibility_mode; please turn it off", E_USER_ERROR);
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
4
library/HTMLPurifier.composer.php
Normal file
4
library/HTMLPurifier.composer.php
Normal file
@@ -0,0 +1,4 @@
|
||||
<?php
|
||||
if (!defined('HTMLPURIFIER_PREFIX')) {
|
||||
define('HTMLPURIFIER_PREFIX', __DIR__);
|
||||
}
|
@@ -7,7 +7,7 @@
|
||||
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
|
||||
* FILE, changes will be overwritten the next time the script is run.
|
||||
*
|
||||
* @version 4.1.1
|
||||
* @version 4.5.0
|
||||
*
|
||||
* @warning
|
||||
* You must *not* include any other HTML Purifier files before this file,
|
||||
@@ -19,6 +19,8 @@
|
||||
*/
|
||||
|
||||
require 'HTMLPurifier.php';
|
||||
require 'HTMLPurifier/Array.php';
|
||||
require 'HTMLPurifier/ArrayNode.php';
|
||||
require 'HTMLPurifier/AttrCollections.php';
|
||||
require 'HTMLPurifier/AttrDef.php';
|
||||
require 'HTMLPurifier/AttrTransform.php';
|
||||
@@ -36,6 +38,7 @@ require 'HTMLPurifier/DefinitionCache.php';
|
||||
require 'HTMLPurifier/DefinitionCacheFactory.php';
|
||||
require 'HTMLPurifier/Doctype.php';
|
||||
require 'HTMLPurifier/DoctypeRegistry.php';
|
||||
require 'HTMLPurifier/DoublyLinkedList.php';
|
||||
require 'HTMLPurifier/ElementDef.php';
|
||||
require 'HTMLPurifier/Encoder.php';
|
||||
require 'HTMLPurifier/EntityLookup.php';
|
||||
@@ -73,6 +76,7 @@ require 'HTMLPurifier/UnitConverter.php';
|
||||
require 'HTMLPurifier/VarParser.php';
|
||||
require 'HTMLPurifier/VarParserException.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS.php';
|
||||
require 'HTMLPurifier/AttrDef/Clone.php';
|
||||
require 'HTMLPurifier/AttrDef/Enum.php';
|
||||
require 'HTMLPurifier/AttrDef/Integer.php';
|
||||
require 'HTMLPurifier/AttrDef/Lang.php';
|
||||
@@ -90,6 +94,7 @@ require 'HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/Filter.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/Font.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/Ident.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/Length.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/ListStyle.php';
|
||||
@@ -125,14 +130,17 @@ require 'HTMLPurifier/AttrTransform/Lang.php';
|
||||
require 'HTMLPurifier/AttrTransform/Length.php';
|
||||
require 'HTMLPurifier/AttrTransform/Name.php';
|
||||
require 'HTMLPurifier/AttrTransform/NameSync.php';
|
||||
require 'HTMLPurifier/AttrTransform/Nofollow.php';
|
||||
require 'HTMLPurifier/AttrTransform/SafeEmbed.php';
|
||||
require 'HTMLPurifier/AttrTransform/SafeObject.php';
|
||||
require 'HTMLPurifier/AttrTransform/SafeParam.php';
|
||||
require 'HTMLPurifier/AttrTransform/ScriptRequired.php';
|
||||
require 'HTMLPurifier/AttrTransform/TargetBlank.php';
|
||||
require 'HTMLPurifier/AttrTransform/Textarea.php';
|
||||
require 'HTMLPurifier/ChildDef/Chameleon.php';
|
||||
require 'HTMLPurifier/ChildDef/Custom.php';
|
||||
require 'HTMLPurifier/ChildDef/Empty.php';
|
||||
require 'HTMLPurifier/ChildDef/List.php';
|
||||
require 'HTMLPurifier/ChildDef/Required.php';
|
||||
require 'HTMLPurifier/ChildDef/Optional.php';
|
||||
require 'HTMLPurifier/ChildDef/StrictBlockquote.php';
|
||||
@@ -147,10 +155,12 @@ require 'HTMLPurifier/HTMLModule/CommonAttributes.php';
|
||||
require 'HTMLPurifier/HTMLModule/Edit.php';
|
||||
require 'HTMLPurifier/HTMLModule/Forms.php';
|
||||
require 'HTMLPurifier/HTMLModule/Hypertext.php';
|
||||
require 'HTMLPurifier/HTMLModule/Iframe.php';
|
||||
require 'HTMLPurifier/HTMLModule/Image.php';
|
||||
require 'HTMLPurifier/HTMLModule/Legacy.php';
|
||||
require 'HTMLPurifier/HTMLModule/List.php';
|
||||
require 'HTMLPurifier/HTMLModule/Name.php';
|
||||
require 'HTMLPurifier/HTMLModule/Nofollow.php';
|
||||
require 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
|
||||
require 'HTMLPurifier/HTMLModule/Object.php';
|
||||
require 'HTMLPurifier/HTMLModule/Presentation.php';
|
||||
@@ -158,10 +168,12 @@ require 'HTMLPurifier/HTMLModule/Proprietary.php';
|
||||
require 'HTMLPurifier/HTMLModule/Ruby.php';
|
||||
require 'HTMLPurifier/HTMLModule/SafeEmbed.php';
|
||||
require 'HTMLPurifier/HTMLModule/SafeObject.php';
|
||||
require 'HTMLPurifier/HTMLModule/SafeScripting.php';
|
||||
require 'HTMLPurifier/HTMLModule/Scripting.php';
|
||||
require 'HTMLPurifier/HTMLModule/StyleAttribute.php';
|
||||
require 'HTMLPurifier/HTMLModule/Tables.php';
|
||||
require 'HTMLPurifier/HTMLModule/Target.php';
|
||||
require 'HTMLPurifier/HTMLModule/TargetBlank.php';
|
||||
require 'HTMLPurifier/HTMLModule/Text.php';
|
||||
require 'HTMLPurifier/HTMLModule/Tidy.php';
|
||||
require 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
|
||||
@@ -196,10 +208,13 @@ require 'HTMLPurifier/Token/Start.php';
|
||||
require 'HTMLPurifier/Token/Text.php';
|
||||
require 'HTMLPurifier/URIFilter/DisableExternal.php';
|
||||
require 'HTMLPurifier/URIFilter/DisableExternalResources.php';
|
||||
require 'HTMLPurifier/URIFilter/DisableResources.php';
|
||||
require 'HTMLPurifier/URIFilter/HostBlacklist.php';
|
||||
require 'HTMLPurifier/URIFilter/MakeAbsolute.php';
|
||||
require 'HTMLPurifier/URIFilter/Munge.php';
|
||||
require 'HTMLPurifier/URIFilter/SafeIframe.php';
|
||||
require 'HTMLPurifier/URIScheme/data.php';
|
||||
require 'HTMLPurifier/URIScheme/file.php';
|
||||
require 'HTMLPurifier/URIScheme/ftp.php';
|
||||
require 'HTMLPurifier/URIScheme/http.php';
|
||||
require 'HTMLPurifier/URIScheme/https.php';
|
||||
|
@@ -19,7 +19,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
HTML Purifier 4.1.1 - Standards Compliant HTML Filtering
|
||||
HTML Purifier 4.5.0 - Standards Compliant HTML Filtering
|
||||
Copyright (C) 2006-2008 Edward Z. Yang
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
@@ -55,10 +55,10 @@ class HTMLPurifier
|
||||
{
|
||||
|
||||
/** Version of HTML Purifier */
|
||||
public $version = '4.1.1';
|
||||
public $version = '4.5.0';
|
||||
|
||||
/** Constant with version of HTML Purifier */
|
||||
const VERSION = '4.1.1';
|
||||
const VERSION = '4.5.0';
|
||||
|
||||
/** Global configuration object */
|
||||
public $config;
|
||||
|
@@ -13,6 +13,8 @@
|
||||
$__dir = dirname(__FILE__);
|
||||
|
||||
require_once $__dir . '/HTMLPurifier.php';
|
||||
require_once $__dir . '/HTMLPurifier/Array.php';
|
||||
require_once $__dir . '/HTMLPurifier/ArrayNode.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrCollections.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform.php';
|
||||
@@ -30,6 +32,7 @@ require_once $__dir . '/HTMLPurifier/DefinitionCache.php';
|
||||
require_once $__dir . '/HTMLPurifier/DefinitionCacheFactory.php';
|
||||
require_once $__dir . '/HTMLPurifier/Doctype.php';
|
||||
require_once $__dir . '/HTMLPurifier/DoctypeRegistry.php';
|
||||
require_once $__dir . '/HTMLPurifier/DoublyLinkedList.php';
|
||||
require_once $__dir . '/HTMLPurifier/ElementDef.php';
|
||||
require_once $__dir . '/HTMLPurifier/Encoder.php';
|
||||
require_once $__dir . '/HTMLPurifier/EntityLookup.php';
|
||||
@@ -67,6 +70,7 @@ require_once $__dir . '/HTMLPurifier/UnitConverter.php';
|
||||
require_once $__dir . '/HTMLPurifier/VarParser.php';
|
||||
require_once $__dir . '/HTMLPurifier/VarParserException.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/Clone.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/Enum.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/Integer.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/Lang.php';
|
||||
@@ -84,6 +88,7 @@ require_once $__dir . '/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Filter.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Font.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/FontFamily.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Ident.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Length.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ListStyle.php';
|
||||
@@ -119,14 +124,17 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/Lang.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Length.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Name.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/NameSync.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Nofollow.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeEmbed.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/TargetBlank.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/List.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/Required.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php';
|
||||
@@ -141,10 +149,12 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/CommonAttributes.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Edit.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Forms.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Hypertext.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Iframe.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Image.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/List.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Name.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Nofollow.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Object.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Presentation.php';
|
||||
@@ -152,10 +162,12 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/Proprietary.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Ruby.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeEmbed.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeObject.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeScripting.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Scripting.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/StyleAttribute.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tables.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Target.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/TargetBlank.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Text.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
|
||||
@@ -190,10 +202,13 @@ require_once $__dir . '/HTMLPurifier/Token/Start.php';
|
||||
require_once $__dir . '/HTMLPurifier/Token/Text.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternal.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternalResources.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIFilter/DisableResources.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIFilter/SafeIframe.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIScheme/data.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIScheme/file.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIScheme/http.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIScheme/https.php';
|
||||
|
184
library/HTMLPurifier/Array.php
Normal file
184
library/HTMLPurifier/Array.php
Normal file
@@ -0,0 +1,184 @@
|
||||
<?php
|
||||
|
||||
class HTMLPurifier_Array implements ArrayAccess
|
||||
{
|
||||
/**
|
||||
* @param HTMLPurifier_ArrayNode
|
||||
*/
|
||||
public $head = null;
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
protected $count = 0;
|
||||
|
||||
/**
|
||||
* @var int
|
||||
*/
|
||||
protected $offset = 0;
|
||||
|
||||
/**
|
||||
* @var HTMLPurifier_ArrayNode
|
||||
*/
|
||||
protected $offsetItem = null;
|
||||
|
||||
|
||||
public function __construct(array $array = array())
|
||||
{
|
||||
/**
|
||||
* @var HTMLPurifier_ArrayNode $temp
|
||||
*/
|
||||
$temp = null;
|
||||
$i = 0;
|
||||
|
||||
foreach ($array as &$v) {
|
||||
$item = new HTMLPurifier_ArrayNode($v);
|
||||
|
||||
if ($this->head == null) {
|
||||
$this->head = &$item;
|
||||
}
|
||||
if ($temp instanceof HTMLPurifier_ArrayNode) {
|
||||
$item->prev = &$temp;
|
||||
$temp->next = &$item;
|
||||
}
|
||||
unset($temp);
|
||||
$temp = &$item;
|
||||
|
||||
$i ++;
|
||||
|
||||
unset($item, $v);
|
||||
}
|
||||
$this->count = $i;
|
||||
$this->offset = 0;
|
||||
$this->offsetItem = &$this->head;
|
||||
}
|
||||
|
||||
protected function findIndex($offset)
|
||||
{
|
||||
if ($this->head == null) {
|
||||
return array(
|
||||
'correct' => false,
|
||||
'value' => null
|
||||
);
|
||||
}
|
||||
|
||||
$current = &$this->head;
|
||||
$index = 0;
|
||||
|
||||
if ($this->offset <= $offset && $this->offsetItem instanceof HTMLPurifier_ArrayNode) {
|
||||
$current = &$this->offsetItem;
|
||||
$index = $this->offset;
|
||||
}
|
||||
|
||||
while ($current->next instanceof HTMLPurifier_ArrayNode && $index != $offset) {
|
||||
$current = &$current->next;
|
||||
$index ++;
|
||||
}
|
||||
|
||||
if ($index == $offset) {
|
||||
$this->offset = $offset;
|
||||
$this->offsetItem = &$current;
|
||||
return array(
|
||||
'correct' => true,
|
||||
'value' => &$current
|
||||
);
|
||||
}
|
||||
|
||||
return array(
|
||||
'correct' => false,
|
||||
'value' => &$current
|
||||
);
|
||||
}
|
||||
|
||||
public function insertBefore($offset, $value)
|
||||
{
|
||||
$result = $this->findIndex($offset);
|
||||
|
||||
$this->count ++;
|
||||
$item = new HTMLPurifier_ArrayNode($value);
|
||||
if ($result['correct'] == false) {
|
||||
if ($result['value'] instanceof HTMLPurifier_ArrayNode) {
|
||||
$result['value']->next = &$item;
|
||||
$item->prev = &$result['value'];
|
||||
}
|
||||
} else {
|
||||
if ($result['value'] instanceof HTMLPurifier_ArrayNode) {
|
||||
$item->prev = &$result['value']->prev;
|
||||
$item->next = &$result['value'];
|
||||
}
|
||||
if ($item->prev instanceof HTMLPurifier_ArrayNode) {
|
||||
$item->prev->next = &$item;
|
||||
}
|
||||
if ($result['value'] instanceof HTMLPurifier_ArrayNode) {
|
||||
$result['value']->prev = &$item;
|
||||
}
|
||||
}
|
||||
if ($offset == 0) {
|
||||
$this->head = &$item;
|
||||
}
|
||||
if ($offset <= $this->offset && $this->offsetItem instanceof HTMLPurifier_ArrayNode) {
|
||||
$this->offsetItem = &$this->offsetItem->prev;
|
||||
}
|
||||
}
|
||||
|
||||
public function remove($offset)
|
||||
{
|
||||
$result = $this->findIndex($offset);
|
||||
|
||||
if ($result['correct']) {
|
||||
$this->count --;
|
||||
$item = $result['value'];
|
||||
$item->prev->next = &$result['value']->next;
|
||||
$item->next->prev = &$result['value']->prev;
|
||||
if ($offset == 0) {
|
||||
$this->head = &$item->next;
|
||||
}
|
||||
if ($offset < $this->offset) {
|
||||
$this->offset --;
|
||||
} elseif ($offset == $this->offset) {
|
||||
$this->offsetItem = &$item->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public function getArray()
|
||||
{
|
||||
$return = array();
|
||||
$head = $this->head;
|
||||
|
||||
while ($head instanceof HTMLPurifier_ArrayNode) {
|
||||
$return[] = $head->value;
|
||||
$head = &$head->next;
|
||||
}
|
||||
|
||||
return $return;
|
||||
}
|
||||
|
||||
public function offsetExists($offset)
|
||||
{
|
||||
return $offset >= 0 && $offset < $this->count;
|
||||
}
|
||||
|
||||
public function offsetGet($offset)
|
||||
{
|
||||
$result = $this->findIndex($offset);
|
||||
if ($result['correct']) {
|
||||
return $result['value']->value;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public function offsetSet($offset, $value)
|
||||
{
|
||||
$result = $this->findIndex($offset);
|
||||
if ($result['correct']) {
|
||||
$result['value']->value = &$value;
|
||||
}
|
||||
}
|
||||
|
||||
public function offsetUnset($offset)
|
||||
{
|
||||
$this->remove($offset);
|
||||
}
|
||||
}
|
24
library/HTMLPurifier/ArrayNode.php
Normal file
24
library/HTMLPurifier/ArrayNode.php
Normal file
@@ -0,0 +1,24 @@
|
||||
<?php
|
||||
|
||||
class HTMLPurifier_ArrayNode
|
||||
{
|
||||
public function __construct(&$value)
|
||||
{
|
||||
$this->value = &$value;
|
||||
}
|
||||
|
||||
/**
|
||||
* @var HTMLPurifier_ArrayNode
|
||||
*/
|
||||
public $prev = null;
|
||||
|
||||
/**
|
||||
* @var HTMLPurifier_ArrayNode
|
||||
*/
|
||||
public $next = null;
|
||||
|
||||
/**
|
||||
* @var mixed
|
||||
*/
|
||||
public $value = null;
|
||||
}
|
@@ -32,7 +32,7 @@ class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
|
||||
$string = $this->mungeRgb($string);
|
||||
|
||||
// assumes URI doesn't have spaces in it
|
||||
$bits = explode(' ', strtolower($string)); // bits to process
|
||||
$bits = explode(' ', $string); // bits to process
|
||||
|
||||
$caught = array();
|
||||
$caught['color'] = false;
|
||||
|
@@ -2,11 +2,43 @@
|
||||
|
||||
/**
|
||||
* Validates a font family list according to CSS spec
|
||||
* @todo whitelisting allowed fonts would be nice
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
protected $mask = null;
|
||||
|
||||
public function __construct() {
|
||||
$this->mask = '_- ';
|
||||
for ($c = 'a'; $c <= 'z'; $c++) $this->mask .= $c;
|
||||
for ($c = 'A'; $c <= 'Z'; $c++) $this->mask .= $c;
|
||||
for ($c = '0'; $c <= '9'; $c++) $this->mask .= $c; // cast-y, but should be fine
|
||||
// special bytes used by UTF-8
|
||||
for ($i = 0x80; $i <= 0xFF; $i++) {
|
||||
// We don't bother excluding invalid bytes in this range,
|
||||
// because the our restriction of well-formed UTF-8 will
|
||||
// prevent these from ever occurring.
|
||||
$this->mask .= chr($i);
|
||||
}
|
||||
|
||||
/*
|
||||
PHP's internal strcspn implementation is
|
||||
O(length of string * length of mask), making it inefficient
|
||||
for large masks. However, it's still faster than
|
||||
preg_match 8)
|
||||
for (p = s1;;) {
|
||||
spanp = s2;
|
||||
do {
|
||||
if (*spanp == c || p == s1_end) {
|
||||
return p - s1;
|
||||
}
|
||||
} while (spanp++ < (s2_end - 1));
|
||||
c = *++p;
|
||||
}
|
||||
*/
|
||||
// possible optimization: invert the mask.
|
||||
}
|
||||
|
||||
public function validate($string, $config, $context) {
|
||||
static $generic_names = array(
|
||||
'serif' => true,
|
||||
@@ -15,6 +47,7 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
||||
'fantasy' => true,
|
||||
'cursive' => true
|
||||
);
|
||||
$allowed_fonts = $config->get('CSS.AllowedFonts');
|
||||
|
||||
// assume that no font names contain commas in them
|
||||
$fonts = explode(',', $string);
|
||||
@@ -24,7 +57,9 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
||||
if ($font === '') continue;
|
||||
// match a generic name
|
||||
if (isset($generic_names[$font])) {
|
||||
$final .= $font . ', ';
|
||||
if ($allowed_fonts === null || isset($allowed_fonts[$font])) {
|
||||
$final .= $font . ', ';
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// match a quoted name
|
||||
@@ -40,6 +75,10 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
||||
|
||||
// $font is a pure representation of the font name
|
||||
|
||||
if ($allowed_fonts !== null && !isset($allowed_fonts[$font])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ctype_alnum($font) && $font !== '') {
|
||||
// very simple font, allow it in unharmed
|
||||
$final .= $font . ', ';
|
||||
@@ -50,17 +89,103 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
||||
// shouldn't show up regardless
|
||||
$font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
|
||||
|
||||
// These ugly transforms don't pose a security
|
||||
// risk (as \\ and \" might). We could try to be clever and
|
||||
// use single-quote wrapping when there is a double quote
|
||||
// present, but I have choosen not to implement that.
|
||||
// (warning: this code relies on the selection of quotation
|
||||
// mark below)
|
||||
$font = str_replace('\\', '\\5C ', $font);
|
||||
$font = str_replace('"', '\\22 ', $font);
|
||||
// Here, there are various classes of characters which need
|
||||
// to be treated differently:
|
||||
// - Alphanumeric characters are essentially safe. We
|
||||
// handled these above.
|
||||
// - Spaces require quoting, though most parsers will do
|
||||
// the right thing if there aren't any characters that
|
||||
// can be misinterpreted
|
||||
// - Dashes rarely occur, but they fairly unproblematic
|
||||
// for parsing/rendering purposes.
|
||||
// The above characters cover the majority of Western font
|
||||
// names.
|
||||
// - Arbitrary Unicode characters not in ASCII. Because
|
||||
// most parsers give little thought to Unicode, treatment
|
||||
// of these codepoints is basically uniform, even for
|
||||
// punctuation-like codepoints. These characters can
|
||||
// show up in non-Western pages and are supported by most
|
||||
// major browsers, for example: "MS 明朝" is a
|
||||
// legitimate font-name
|
||||
// <http://ja.wikipedia.org/wiki/MS_明朝>. See
|
||||
// the CSS3 spec for more examples:
|
||||
// <http://www.w3.org/TR/2011/WD-css3-fonts-20110324/localizedfamilynames.png>
|
||||
// You can see live samples of these on the Internet:
|
||||
// <http://www.google.co.jp/search?q=font-family+MS+明朝|ゴシック>
|
||||
// However, most of these fonts have ASCII equivalents:
|
||||
// for example, 'MS Mincho', and it's considered
|
||||
// professional to use ASCII font names instead of
|
||||
// Unicode font names. Thanks Takeshi Terada for
|
||||
// providing this information.
|
||||
// The following characters, to my knowledge, have not been
|
||||
// used to name font names.
|
||||
// - Single quote. While theoretically you might find a
|
||||
// font name that has a single quote in its name (serving
|
||||
// as an apostrophe, e.g. Dave's Scribble), I haven't
|
||||
// been able to find any actual examples of this.
|
||||
// Internet Explorer's cssText translation (which I
|
||||
// believe is invoked by innerHTML) normalizes any
|
||||
// quoting to single quotes, and fails to escape single
|
||||
// quotes. (Note that this is not IE's behavior for all
|
||||
// CSS properties, just some sort of special casing for
|
||||
// font-family). So a single quote *cannot* be used
|
||||
// safely in the font-family context if there will be an
|
||||
// innerHTML/cssText translation. Note that Firefox 3.x
|
||||
// does this too.
|
||||
// - Double quote. In IE, these get normalized to
|
||||
// single-quotes, no matter what the encoding. (Fun
|
||||
// fact, in IE8, the 'content' CSS property gained
|
||||
// support, where they special cased to preserve encoded
|
||||
// double quotes, but still translate unadorned double
|
||||
// quotes into single quotes.) So, because their
|
||||
// fixpoint behavior is identical to single quotes, they
|
||||
// cannot be allowed either. Firefox 3.x displays
|
||||
// single-quote style behavior.
|
||||
// - Backslashes are reduced by one (so \\ -> \) every
|
||||
// iteration, so they cannot be used safely. This shows
|
||||
// up in IE7, IE8 and FF3
|
||||
// - Semicolons, commas and backticks are handled properly.
|
||||
// - The rest of the ASCII punctuation is handled properly.
|
||||
// We haven't checked what browsers do to unadorned
|
||||
// versions, but this is not important as long as the
|
||||
// browser doesn't /remove/ surrounding quotes (as IE does
|
||||
// for HTML).
|
||||
//
|
||||
// With these results in hand, we conclude that there are
|
||||
// various levels of safety:
|
||||
// - Paranoid: alphanumeric, spaces and dashes(?)
|
||||
// - International: Paranoid + non-ASCII Unicode
|
||||
// - Edgy: Everything except quotes, backslashes
|
||||
// - NoJS: Standards compliance, e.g. sod IE. Note that
|
||||
// with some judicious character escaping (since certain
|
||||
// types of escaping doesn't work) this is theoretically
|
||||
// OK as long as innerHTML/cssText is not called.
|
||||
// We believe that international is a reasonable default
|
||||
// (that we will implement now), and once we do more
|
||||
// extensive research, we may feel comfortable with dropping
|
||||
// it down to edgy.
|
||||
|
||||
// complicated font, requires quoting
|
||||
$final .= "\"$font\", "; // note that this will later get turned into "
|
||||
// Edgy: alphanumeric, spaces, dashes, underscores and Unicode. Use of
|
||||
// str(c)spn assumes that the string was already well formed
|
||||
// Unicode (which of course it is).
|
||||
if (strspn($font, $this->mask) !== strlen($font)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Historical:
|
||||
// In the absence of innerHTML/cssText, these ugly
|
||||
// transforms don't pose a security risk (as \\ and \"
|
||||
// might--these escapes are not supported by most browsers).
|
||||
// We could try to be clever and use single-quote wrapping
|
||||
// when there is a double quote present, but I have choosen
|
||||
// not to implement that. (NOTE: you can reduce the amount
|
||||
// of escapes by one depending on what quoting style you use)
|
||||
// $font = str_replace('\\', '\\5C ', $font);
|
||||
// $font = str_replace('"', '\\22 ', $font);
|
||||
// $font = str_replace("'", '\\27 ', $font);
|
||||
|
||||
// font possibly with spaces, requires quoting
|
||||
$final .= "'$font', ";
|
||||
}
|
||||
$final = rtrim($final, ', ');
|
||||
if ($final === '') return false;
|
||||
|
24
library/HTMLPurifier/AttrDef/CSS/Ident.php
Normal file
24
library/HTMLPurifier/AttrDef/CSS/Ident.php
Normal file
@@ -0,0 +1,24 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Validates based on {ident} CSS grammar production
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Ident extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
public function validate($string, $config, $context) {
|
||||
|
||||
$string = trim($string);
|
||||
|
||||
// early abort: '' and '0' (strings that convert to false) are invalid
|
||||
if (!$string) return false;
|
||||
|
||||
$pattern = '/^(-?[A-Za-z_][A-Za-z_\-0-9]*)$/';
|
||||
if (!preg_match($pattern, $string)) return false;
|
||||
return $string;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
@@ -43,6 +43,15 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
|
||||
// extra sanity check; should have been done by URI
|
||||
$result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result);
|
||||
|
||||
// suspicious characters are ()'; we're going to percent encode
|
||||
// them for safety.
|
||||
$result = str_replace(array('(', ')', "'"), array('%28', '%29', '%27'), $result);
|
||||
|
||||
// there's an extra bug where ampersands lose their escaping on
|
||||
// an innerHTML cycle, so a very unlucky query parameter could
|
||||
// then change the meaning of the URL. Unfortunately, there's
|
||||
// not much we can do about that...
|
||||
|
||||
return "url(\"$result\")";
|
||||
|
||||
}
|
||||
|
28
library/HTMLPurifier/AttrDef/Clone.php
Normal file
28
library/HTMLPurifier/AttrDef/Clone.php
Normal file
@@ -0,0 +1,28 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Dummy AttrDef that mimics another AttrDef, BUT it generates clones
|
||||
* with make.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Clone extends HTMLPurifier_AttrDef
|
||||
{
|
||||
/**
|
||||
* What we're cloning
|
||||
*/
|
||||
protected $clone;
|
||||
|
||||
public function __construct($clone) {
|
||||
$this->clone = $clone;
|
||||
}
|
||||
|
||||
public function validate($v, $config, $context) {
|
||||
return $this->clone->validate($v, $config, $context);
|
||||
}
|
||||
|
||||
public function make($string) {
|
||||
return clone $this->clone;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
@@ -14,7 +14,8 @@ class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
|
||||
$string = trim($string);
|
||||
|
||||
if (empty($string)) return false;
|
||||
if (isset($colors[$string])) return $colors[$string];
|
||||
$lower = strtolower($string);
|
||||
if (isset($colors[$lower])) return $colors[$lower];
|
||||
if ($string[0] === '#') $hex = substr($string, 1);
|
||||
else $hex = $string;
|
||||
|
||||
|
@@ -12,12 +12,22 @@
|
||||
class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
// ref functionality disabled, since we also have to verify
|
||||
// whether or not the ID it refers to exists
|
||||
// selector is NOT a valid thing to use for IDREFs, because IDREFs
|
||||
// *must* target IDs that exist, whereas selector #ids do not.
|
||||
|
||||
/**
|
||||
* Determines whether or not we're validating an ID in a CSS
|
||||
* selector context.
|
||||
*/
|
||||
protected $selector;
|
||||
|
||||
public function __construct($selector = false) {
|
||||
$this->selector = $selector;
|
||||
}
|
||||
|
||||
public function validate($id, $config, $context) {
|
||||
|
||||
if (!$config->get('Attr.EnableID')) return false;
|
||||
if (!$this->selector && !$config->get('Attr.EnableID')) return false;
|
||||
|
||||
$id = trim($id); // trim it first
|
||||
|
||||
@@ -33,10 +43,10 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
|
||||
'%Attr.IDPrefix is set', E_USER_WARNING);
|
||||
}
|
||||
|
||||
//if (!$this->ref) {
|
||||
if (!$this->selector) {
|
||||
$id_accumulator =& $context->get('IDAccumulator');
|
||||
if (isset($id_accumulator->ids[$id])) return false;
|
||||
//}
|
||||
}
|
||||
|
||||
// we purposely avoid using regex, hopefully this is faster
|
||||
|
||||
@@ -56,7 +66,7 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
|
||||
return false;
|
||||
}
|
||||
|
||||
if (/*!$this->ref && */$result) $id_accumulator->add($id);
|
||||
if (!$this->selector && $result) $id_accumulator->add($id);
|
||||
|
||||
// if no change was made to the ID, return the result
|
||||
// else, return the new id if stripping whitespace made it
|
||||
|
@@ -19,7 +19,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
}
|
||||
|
||||
public function make($string) {
|
||||
$embeds = (bool) $string;
|
||||
$embeds = ($string === 'embedded');
|
||||
return new HTMLPurifier_AttrDef_URI($embeds);
|
||||
}
|
||||
|
||||
|
@@ -23,6 +23,12 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
|
||||
|
||||
public function validate($string, $config, $context) {
|
||||
$length = strlen($string);
|
||||
// empty hostname is OK; it's usually semantically equivalent:
|
||||
// the default host as defined by a URI scheme is used:
|
||||
//
|
||||
// If the URI scheme defines a default for host, then that
|
||||
// default applies when the host subcomponent is undefined
|
||||
// or when the registered name is empty (zero length).
|
||||
if ($string === '') return '';
|
||||
if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') {
|
||||
//IPv6
|
||||
@@ -38,9 +44,8 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
|
||||
|
||||
// A regular domain name.
|
||||
|
||||
// This breaks I18N domain names, but we don't have proper IRI support,
|
||||
// so force users to insert Punycode. If there's complaining we'll
|
||||
// try to fix things into an international friendly form.
|
||||
// This doesn't match I18N domain names, but we don't have proper IRI support,
|
||||
// so force users to insert Punycode.
|
||||
|
||||
// The productions describing this are:
|
||||
$a = '[a-z]'; // alpha
|
||||
@@ -51,10 +56,44 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
|
||||
// toplabel = alpha | alpha *( alphanum | "-" ) alphanum
|
||||
$toplabel = "$a($and*$an)?";
|
||||
// hostname = *( domainlabel "." ) toplabel [ "." ]
|
||||
$match = preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string);
|
||||
if (!$match) return false;
|
||||
if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
|
||||
return $string;
|
||||
}
|
||||
|
||||
return $string;
|
||||
// If we have Net_IDNA2 support, we can support IRIs by
|
||||
// punycoding them. (This is the most portable thing to do,
|
||||
// since otherwise we have to assume browsers support
|
||||
|
||||
if ($config->get('Core.EnableIDNA')) {
|
||||
$idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true));
|
||||
// we need to encode each period separately
|
||||
$parts = explode('.', $string);
|
||||
try {
|
||||
$new_parts = array();
|
||||
foreach ($parts as $part) {
|
||||
$encodable = false;
|
||||
for ($i = 0, $c = strlen($part); $i < $c; $i++) {
|
||||
if (ord($part[$i]) > 0x7a) {
|
||||
$encodable = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!$encodable) {
|
||||
$new_parts[] = $part;
|
||||
} else {
|
||||
$new_parts[] = $idna->encode($part);
|
||||
}
|
||||
}
|
||||
$string = implode('.', $new_parts);
|
||||
if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
|
||||
return $string;
|
||||
}
|
||||
} catch (Exception $e) {
|
||||
// XXX error reporting
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
45
library/HTMLPurifier/AttrTransform/Nofollow.php
Normal file
45
library/HTMLPurifier/AttrTransform/Nofollow.php
Normal file
@@ -0,0 +1,45 @@
|
||||
<?php
|
||||
|
||||
// must be called POST validation
|
||||
|
||||
/**
|
||||
* Adds rel="nofollow" to all outbound links. This transform is
|
||||
* only attached if Attr.Nofollow is TRUE.
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_Nofollow extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
private $parser;
|
||||
|
||||
public function __construct() {
|
||||
$this->parser = new HTMLPurifier_URIParser();
|
||||
}
|
||||
|
||||
public function transform($attr, $config, $context) {
|
||||
|
||||
if (!isset($attr['href'])) {
|
||||
return $attr;
|
||||
}
|
||||
|
||||
// XXX Kind of inefficient
|
||||
$url = $this->parser->parse($attr['href']);
|
||||
$scheme = $url->getSchemeObj($config, $context);
|
||||
|
||||
if ($scheme->browsable && !$url->isLocal($config, $context)) {
|
||||
if (isset($attr['rel'])) {
|
||||
$rels = explode(' ', $attr['rel']);
|
||||
if (!in_array('nofollow', $rels)) {
|
||||
$rels[] = 'nofollow';
|
||||
}
|
||||
$attr['rel'] = implode(' ', $rels);
|
||||
} else {
|
||||
$attr['rel'] = 'nofollow';
|
||||
}
|
||||
}
|
||||
|
||||
return $attr;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
@@ -19,6 +19,7 @@ class HTMLPurifier_AttrTransform_SafeParam extends HTMLPurifier_AttrTransform
|
||||
|
||||
public function __construct() {
|
||||
$this->uri = new HTMLPurifier_AttrDef_URI(true); // embedded
|
||||
$this->wmode = new HTMLPurifier_AttrDef_Enum(array('window', 'opaque', 'transparent'));
|
||||
}
|
||||
|
||||
public function transform($attr, $config, $context) {
|
||||
@@ -33,8 +34,15 @@ class HTMLPurifier_AttrTransform_SafeParam extends HTMLPurifier_AttrTransform
|
||||
case 'allowNetworking':
|
||||
$attr['value'] = 'internal';
|
||||
break;
|
||||
case 'allowFullScreen':
|
||||
if ($config->get('HTML.FlashAllowFullScreen')) {
|
||||
$attr['value'] = ($attr['value'] == 'true') ? 'true' : 'false';
|
||||
} else {
|
||||
$attr['value'] = 'false';
|
||||
}
|
||||
break;
|
||||
case 'wmode':
|
||||
$attr['value'] = 'window';
|
||||
$attr['value'] = $this->wmode->validate($attr['value'], $config, $context);
|
||||
break;
|
||||
case 'movie':
|
||||
case 'src':
|
||||
|
38
library/HTMLPurifier/AttrTransform/TargetBlank.php
Normal file
38
library/HTMLPurifier/AttrTransform/TargetBlank.php
Normal file
@@ -0,0 +1,38 @@
|
||||
<?php
|
||||
|
||||
// must be called POST validation
|
||||
|
||||
/**
|
||||
* Adds target="blank" to all outbound links. This transform is
|
||||
* only attached if Attr.TargetBlank is TRUE. This works regardless
|
||||
* of whether or not Attr.AllowedFrameTargets
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_TargetBlank extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
private $parser;
|
||||
|
||||
public function __construct() {
|
||||
$this->parser = new HTMLPurifier_URIParser();
|
||||
}
|
||||
|
||||
public function transform($attr, $config, $context) {
|
||||
|
||||
if (!isset($attr['href'])) {
|
||||
return $attr;
|
||||
}
|
||||
|
||||
// XXX Kind of inefficient
|
||||
$url = $this->parser->parse($attr['href']);
|
||||
$scheme = $url->getSchemeObj($config, $context);
|
||||
|
||||
if ($scheme->browsable && !$url->isBenign($config, $context)) {
|
||||
$attr['target'] = '_blank';
|
||||
}
|
||||
|
||||
return $attr;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
@@ -15,6 +15,13 @@ class HTMLPurifier_AttrTypes
|
||||
* types.
|
||||
*/
|
||||
public function __construct() {
|
||||
// XXX This is kind of poor, since we don't actually /clone/
|
||||
// instances; instead, we use the supplied make() attribute. So,
|
||||
// the underlying class must know how to deal with arguments.
|
||||
// With the old implementation of Enum, that ignored its
|
||||
// arguments when handling a make dispatch, the IAlign
|
||||
// definition wouldn't work.
|
||||
|
||||
// pseudo-types, must be instantiated via shorthand
|
||||
$this->info['Enum'] = new HTMLPurifier_AttrDef_Enum();
|
||||
$this->info['Bool'] = new HTMLPurifier_AttrDef_HTML_Bool();
|
||||
@@ -29,6 +36,9 @@ class HTMLPurifier_AttrTypes
|
||||
$this->info['URI'] = new HTMLPurifier_AttrDef_URI();
|
||||
$this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
|
||||
$this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color();
|
||||
$this->info['IAlign'] = self::makeEnum('top,middle,bottom,left,right');
|
||||
$this->info['LAlign'] = self::makeEnum('top,bottom,left,right');
|
||||
$this->info['FrameTarget'] = new HTMLPurifier_AttrDef_HTML_FrameTarget();
|
||||
|
||||
// unimplemented aliases
|
||||
$this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
|
||||
@@ -44,6 +54,10 @@ class HTMLPurifier_AttrTypes
|
||||
$this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);
|
||||
}
|
||||
|
||||
private static function makeEnum($in) {
|
||||
return new HTMLPurifier_AttrDef_Clone(new HTMLPurifier_AttrDef_Enum(explode(',', $in)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a type
|
||||
* @param $type String type name
|
||||
|
@@ -37,7 +37,12 @@ class HTMLPurifier_Bootstrap
|
||||
public static function autoload($class) {
|
||||
$file = HTMLPurifier_Bootstrap::getPath($class);
|
||||
if (!$file) return false;
|
||||
require HTMLPURIFIER_PREFIX . '/' . $file;
|
||||
// Technically speaking, it should be ok and more efficient to
|
||||
// just do 'require', but Antonio Parraga reports that with
|
||||
// Zend extensions such as Zend debugger and APC, this invariant
|
||||
// may be broken. Since we have efficient alternatives, pay
|
||||
// the cost here and avoid the bug.
|
||||
require_once HTMLPURIFIER_PREFIX . '/' . $file;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -65,31 +70,37 @@ class HTMLPurifier_Bootstrap
|
||||
if ( ($funcs = spl_autoload_functions()) === false ) {
|
||||
spl_autoload_register($autoload);
|
||||
} elseif (function_exists('spl_autoload_unregister')) {
|
||||
$compat = version_compare(PHP_VERSION, '5.1.2', '<=') &&
|
||||
version_compare(PHP_VERSION, '5.1.0', '>=');
|
||||
foreach ($funcs as $func) {
|
||||
if (is_array($func)) {
|
||||
// :TRICKY: There are some compatibility issues and some
|
||||
// places where we need to error out
|
||||
$reflector = new ReflectionMethod($func[0], $func[1]);
|
||||
if (!$reflector->isStatic()) {
|
||||
throw new Exception('
|
||||
HTML Purifier autoloader registrar is not compatible
|
||||
with non-static object methods due to PHP Bug #44144;
|
||||
Please do not use HTMLPurifier.autoload.php (or any
|
||||
file that includes this file); instead, place the code:
|
||||
spl_autoload_register(array(\'HTMLPurifier_Bootstrap\', \'autoload\'))
|
||||
after your own autoloaders.
|
||||
');
|
||||
if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
|
||||
// prepend flag exists, no need for shenanigans
|
||||
spl_autoload_register($autoload, true, true);
|
||||
} else {
|
||||
$buggy = version_compare(PHP_VERSION, '5.2.11', '<');
|
||||
$compat = version_compare(PHP_VERSION, '5.1.2', '<=') &&
|
||||
version_compare(PHP_VERSION, '5.1.0', '>=');
|
||||
foreach ($funcs as $func) {
|
||||
if ($buggy && is_array($func)) {
|
||||
// :TRICKY: There are some compatibility issues and some
|
||||
// places where we need to error out
|
||||
$reflector = new ReflectionMethod($func[0], $func[1]);
|
||||
if (!$reflector->isStatic()) {
|
||||
throw new Exception('
|
||||
HTML Purifier autoloader registrar is not compatible
|
||||
with non-static object methods due to PHP Bug #44144;
|
||||
Please do not use HTMLPurifier.autoload.php (or any
|
||||
file that includes this file); instead, place the code:
|
||||
spl_autoload_register(array(\'HTMLPurifier_Bootstrap\', \'autoload\'))
|
||||
after your own autoloaders.
|
||||
');
|
||||
}
|
||||
// Suprisingly, spl_autoload_register supports the
|
||||
// Class::staticMethod callback format, although call_user_func doesn't
|
||||
if ($compat) $func = implode('::', $func);
|
||||
}
|
||||
// Suprisingly, spl_autoload_register supports the
|
||||
// Class::staticMethod callback format, although call_user_func doesn't
|
||||
if ($compat) $func = implode('::', $func);
|
||||
spl_autoload_unregister($func);
|
||||
}
|
||||
spl_autoload_unregister($func);
|
||||
spl_autoload_register($autoload);
|
||||
foreach ($funcs as $func) spl_autoload_register($func);
|
||||
}
|
||||
spl_autoload_register($autoload);
|
||||
foreach ($funcs as $func) spl_autoload_register($func);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -208,8 +208,9 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
|
||||
|
||||
$this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2);
|
||||
|
||||
// partial support
|
||||
$this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap'));
|
||||
// These CSS properties don't work on many browsers, but we live
|
||||
// in THE FUTURE!
|
||||
$this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap', 'normal', 'pre', 'pre-wrap', 'pre-line'));
|
||||
|
||||
if ($config->get('CSS.Proprietary')) {
|
||||
$this->doSetupProprietary($config);
|
||||
@@ -219,6 +220,10 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
|
||||
$this->doSetupTricky($config);
|
||||
}
|
||||
|
||||
if ($config->get('CSS.Trusted')) {
|
||||
$this->doSetupTrusted($config);
|
||||
}
|
||||
|
||||
$allow_important = $config->get('CSS.AllowImportant');
|
||||
// wrap all attr-defs with decorator that handles !important
|
||||
foreach ($this->info as $k => $v) {
|
||||
@@ -245,12 +250,17 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
|
||||
// only opacity, for now
|
||||
$this->info['filter'] = new HTMLPurifier_AttrDef_CSS_Filter();
|
||||
|
||||
// more CSS3
|
||||
$this->info['page-break-after'] =
|
||||
$this->info['page-break-before'] = new HTMLPurifier_AttrDef_Enum(array('auto','always','avoid','left','right'));
|
||||
$this->info['page-break-inside'] = new HTMLPurifier_AttrDef_Enum(array('auto','avoid'));
|
||||
|
||||
}
|
||||
|
||||
protected function doSetupTricky($config) {
|
||||
$this->info['display'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||
'inline', 'block', 'list-item', 'run-in', 'compact',
|
||||
'marker', 'table', 'inline-table', 'table-row-group',
|
||||
'marker', 'table', 'inline-block', 'inline-table', 'table-row-group',
|
||||
'table-header-group', 'table-footer-group', 'table-row',
|
||||
'table-column-group', 'table-column', 'table-cell', 'table-caption', 'none'
|
||||
));
|
||||
@@ -260,6 +270,23 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
|
||||
$this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(array('visible', 'hidden', 'auto', 'scroll'));
|
||||
}
|
||||
|
||||
protected function doSetupTrusted($config) {
|
||||
$this->info['position'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||
'static', 'relative', 'absolute', 'fixed'
|
||||
));
|
||||
$this->info['top'] =
|
||||
$this->info['left'] =
|
||||
$this->info['right'] =
|
||||
$this->info['bottom'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_CSS_Length(),
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage(),
|
||||
new HTMLPurifier_AttrDef_Enum(array('auto')),
|
||||
));
|
||||
$this->info['z-index'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_Integer(),
|
||||
new HTMLPurifier_AttrDef_Enum(array('auto')),
|
||||
));
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs extra config-based processing. Based off of
|
||||
@@ -272,20 +299,29 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
|
||||
// setup allowed elements
|
||||
$support = "(for information on implementing this, see the ".
|
||||
"support forums) ";
|
||||
$allowed_attributes = $config->get('CSS.AllowedProperties');
|
||||
if ($allowed_attributes !== null) {
|
||||
$allowed_properties = $config->get('CSS.AllowedProperties');
|
||||
if ($allowed_properties !== null) {
|
||||
foreach ($this->info as $name => $d) {
|
||||
if(!isset($allowed_attributes[$name])) unset($this->info[$name]);
|
||||
unset($allowed_attributes[$name]);
|
||||
if(!isset($allowed_properties[$name])) unset($this->info[$name]);
|
||||
unset($allowed_properties[$name]);
|
||||
}
|
||||
// emit errors
|
||||
foreach ($allowed_attributes as $name => $d) {
|
||||
foreach ($allowed_properties as $name => $d) {
|
||||
// :TODO: Is this htmlspecialchars() call really necessary?
|
||||
$name = htmlspecialchars($name);
|
||||
trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING);
|
||||
}
|
||||
}
|
||||
|
||||
$forbidden_properties = $config->get('CSS.ForbiddenProperties');
|
||||
if ($forbidden_properties !== null) {
|
||||
foreach ($this->info as $name => $d) {
|
||||
if (isset($forbidden_properties[$name])) {
|
||||
unset($this->info[$name]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
120
library/HTMLPurifier/ChildDef/List.php
Normal file
120
library/HTMLPurifier/ChildDef/List.php
Normal file
@@ -0,0 +1,120 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Definition for list containers ul and ol.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
|
||||
{
|
||||
public $type = 'list';
|
||||
// lying a little bit, so that we can handle ul and ol ourselves
|
||||
// XXX: This whole business with 'wrap' is all a bit unsatisfactory
|
||||
public $elements = array('li' => true, 'ul' => true, 'ol' => true);
|
||||
public function validateChildren($tokens_of_children, $config, $context) {
|
||||
// Flag for subclasses
|
||||
$this->whitespace = false;
|
||||
|
||||
// if there are no tokens, delete parent node
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
// the new set of children
|
||||
$result = array();
|
||||
|
||||
// current depth into the nest
|
||||
$nesting = 0;
|
||||
|
||||
// a little sanity check to make sure it's not ALL whitespace
|
||||
$all_whitespace = true;
|
||||
|
||||
$seen_li = false;
|
||||
$need_close_li = false;
|
||||
|
||||
foreach ($tokens_of_children as $token) {
|
||||
if (!empty($token->is_whitespace)) {
|
||||
$result[] = $token;
|
||||
continue;
|
||||
}
|
||||
$all_whitespace = false; // phew, we're not talking about whitespace
|
||||
|
||||
if ($nesting == 1 && $need_close_li) {
|
||||
$result[] = new HTMLPurifier_Token_End('li');
|
||||
$nesting--;
|
||||
$need_close_li = false;
|
||||
}
|
||||
|
||||
$is_child = ($nesting == 0);
|
||||
|
||||
if ($token instanceof HTMLPurifier_Token_Start) {
|
||||
$nesting++;
|
||||
} elseif ($token instanceof HTMLPurifier_Token_End) {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
if ($is_child) {
|
||||
if ($token->name === 'li') {
|
||||
// good
|
||||
$seen_li = true;
|
||||
} elseif ($token->name === 'ul' || $token->name === 'ol') {
|
||||
// we want to tuck this into the previous li
|
||||
$need_close_li = true;
|
||||
$nesting++;
|
||||
if (!$seen_li) {
|
||||
// create a new li element
|
||||
$result[] = new HTMLPurifier_Token_Start('li');
|
||||
} else {
|
||||
// backtrack until </li> found
|
||||
while(true) {
|
||||
$t = array_pop($result);
|
||||
if ($t instanceof HTMLPurifier_Token_End) {
|
||||
// XXX actually, these invariants could very plausibly be violated
|
||||
// if we are doing silly things with modifying the set of allowed elements.
|
||||
// FORTUNATELY, it doesn't make a difference, since the allowed
|
||||
// elements are hard-coded here!
|
||||
if ($t->name !== 'li') {
|
||||
trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
} elseif ($t instanceof HTMLPurifier_Token_Empty) { // bleagh
|
||||
if ($t->name !== 'li') {
|
||||
trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
|
||||
return false;
|
||||
}
|
||||
// XXX this should have a helper for it...
|
||||
$result[] = new HTMLPurifier_Token_Start('li', $t->attr, $t->line, $t->col, $t->armor);
|
||||
break;
|
||||
} else {
|
||||
if (!$t->is_whitespace) {
|
||||
trigger_error("Only whitespace present invariant violated in List ChildDef", E_USER_ERROR);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// start wrapping (this doesn't precisely mimic
|
||||
// browser behavior, but what browsers do is kind of
|
||||
// hard to mimic in a standards compliant way
|
||||
// XXX Actually, this has no impact in practice,
|
||||
// because this gets handled earlier. Arguably,
|
||||
// we should rip out all of that processing
|
||||
$result[] = new HTMLPurifier_Token_Start('li');
|
||||
$nesting++;
|
||||
$seen_li = true;
|
||||
$need_close_li = true;
|
||||
}
|
||||
}
|
||||
$result[] = $token;
|
||||
}
|
||||
if ($need_close_li) {
|
||||
$result[] = new HTMLPurifier_Token_End('li');
|
||||
}
|
||||
if (empty($result)) return false;
|
||||
if ($all_whitespace) {
|
||||
return false;
|
||||
}
|
||||
if ($tokens_of_children == $result) return true;
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
@@ -1,7 +1,33 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Definition for tables
|
||||
* Definition for tables. The general idea is to extract out all of the
|
||||
* essential bits, and then reconstruct it later.
|
||||
*
|
||||
* This is a bit confusing, because the DTDs and the W3C
|
||||
* validators seem to disagree on the appropriate definition. The
|
||||
* DTD claims:
|
||||
*
|
||||
* (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)
|
||||
*
|
||||
* But actually, the HTML4 spec then has this to say:
|
||||
*
|
||||
* The TBODY start tag is always required except when the table
|
||||
* contains only one table body and no table head or foot sections.
|
||||
* The TBODY end tag may always be safely omitted.
|
||||
*
|
||||
* So the DTD is kind of wrong. The validator is, unfortunately, kind
|
||||
* of on crack.
|
||||
*
|
||||
* The definition changed again in XHTML1.1; and in my opinion, this
|
||||
* formulation makes the most sense.
|
||||
*
|
||||
* caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))
|
||||
*
|
||||
* Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode.
|
||||
* If we encounter a thead, tfoot or tbody, we are placed in the former
|
||||
* mode, and we *must* wrap any stray tr segments with a tbody. But if
|
||||
* we don't run into any of them, just have tr tags is OK.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||
{
|
||||
@@ -33,6 +59,8 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||
$collection = array(); // collected nodes
|
||||
$tag_index = 0; // the first node might be whitespace,
|
||||
// so this tells us where the start tag is
|
||||
$tbody_mode = false; // if true, then we need to wrap any stray
|
||||
// <tr>s with a <tbody>.
|
||||
|
||||
foreach ($tokens_of_children as $token) {
|
||||
$is_child = ($nesting == 0);
|
||||
@@ -51,8 +79,9 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||
// okay, let's stash the tokens away
|
||||
// first token tells us the type of the collection
|
||||
switch ($collection[$tag_index]->name) {
|
||||
case 'tr':
|
||||
case 'tbody':
|
||||
$tbody_mode = true;
|
||||
case 'tr':
|
||||
$content[] = $collection;
|
||||
break;
|
||||
case 'caption':
|
||||
@@ -61,13 +90,28 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||
break;
|
||||
case 'thead':
|
||||
case 'tfoot':
|
||||
$tbody_mode = true;
|
||||
// XXX This breaks rendering properties with
|
||||
// Firefox, which never floats a <thead> to
|
||||
// the top. Ever. (Our scheme will float the
|
||||
// first <thead> to the top.) So maybe
|
||||
// <thead>s that are not first should be
|
||||
// turned into <tbody>? Very tricky, indeed.
|
||||
|
||||
// access the appropriate variable, $thead or $tfoot
|
||||
$var = $collection[$tag_index]->name;
|
||||
if ($$var === false) {
|
||||
$$var = $collection;
|
||||
} else {
|
||||
// transmutate the first and less entries into
|
||||
// tbody tags, and then put into content
|
||||
// Oops, there's a second one! What
|
||||
// should we do? Current behavior is to
|
||||
// transmutate the first and last entries into
|
||||
// tbody tags, and then put into content.
|
||||
// Maybe a better idea is to *attach
|
||||
// it* to the existing thead or tfoot?
|
||||
// We don't do this, because Firefox
|
||||
// doesn't float an extra tfoot to the
|
||||
// bottom like it does for the first one.
|
||||
$collection[$tag_index]->name = 'tbody';
|
||||
$collection[count($collection)-1]->name = 'tbody';
|
||||
$content[] = $collection;
|
||||
@@ -126,7 +170,48 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
|
||||
if ($thead !== false) $ret = array_merge($ret, $thead);
|
||||
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
|
||||
foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
|
||||
|
||||
if ($tbody_mode) {
|
||||
// a little tricky, since the start of the collection may be
|
||||
// whitespace
|
||||
$inside_tbody = false;
|
||||
foreach ($content as $token_array) {
|
||||
// find the starting token
|
||||
foreach ($token_array as $t) {
|
||||
if ($t->name === 'tr' || $t->name === 'tbody') {
|
||||
break;
|
||||
}
|
||||
} // iterator variable carries over
|
||||
if ($t->name === 'tr') {
|
||||
if ($inside_tbody) {
|
||||
$ret = array_merge($ret, $token_array);
|
||||
} else {
|
||||
$ret[] = new HTMLPurifier_Token_Start('tbody');
|
||||
$ret = array_merge($ret, $token_array);
|
||||
$inside_tbody = true;
|
||||
}
|
||||
} elseif ($t->name === 'tbody') {
|
||||
if ($inside_tbody) {
|
||||
$ret[] = new HTMLPurifier_Token_End('tbody');
|
||||
$inside_tbody = false;
|
||||
$ret = array_merge($ret, $token_array);
|
||||
} else {
|
||||
$ret = array_merge($ret, $token_array);
|
||||
}
|
||||
} else {
|
||||
trigger_error("tr/tbody in content invariant failed in Table ChildDef", E_USER_ERROR);
|
||||
}
|
||||
}
|
||||
if ($inside_tbody) {
|
||||
$ret[] = new HTMLPurifier_Token_End('tbody');
|
||||
}
|
||||
} else {
|
||||
foreach ($content as $token_array) {
|
||||
// invariant: everything in here is <tr>s
|
||||
$ret = array_merge($ret, $token_array);
|
||||
}
|
||||
}
|
||||
|
||||
if (!empty($collection) && $is_collecting == false){
|
||||
// grab the trailing space
|
||||
$ret = array_merge($ret, $collection);
|
||||
|
@@ -20,7 +20,7 @@ class HTMLPurifier_Config
|
||||
/**
|
||||
* HTML Purifier's version
|
||||
*/
|
||||
public $version = '4.1.1';
|
||||
public $version = '4.5.0';
|
||||
|
||||
/**
|
||||
* Bool indicator whether or not to automatically finalize
|
||||
@@ -44,7 +44,7 @@ class HTMLPurifier_Config
|
||||
/**
|
||||
* Parser for variables
|
||||
*/
|
||||
protected $parser;
|
||||
protected $parser = null;
|
||||
|
||||
/**
|
||||
* Reference HTMLPurifier_ConfigSchema for value checking
|
||||
@@ -76,7 +76,8 @@ class HTMLPurifier_Config
|
||||
|
||||
/**
|
||||
* Set to false if you do not want line and file numbers in errors
|
||||
* (useful when unit testing)
|
||||
* (useful when unit testing). This will also compress some errors
|
||||
* and exceptions.
|
||||
*/
|
||||
public $chatty = true;
|
||||
|
||||
@@ -188,7 +189,7 @@ class HTMLPurifier_Config
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a md5 signature of a segment of the configuration object
|
||||
* Returns a SHA-1 signature of a segment of the configuration object
|
||||
* that uniquely identifies that particular configuration
|
||||
* @note Revision is handled specially and is removed from the batch
|
||||
* before processing!
|
||||
@@ -198,18 +199,18 @@ class HTMLPurifier_Config
|
||||
if (empty($this->serials[$namespace])) {
|
||||
$batch = $this->getBatch($namespace);
|
||||
unset($batch['DefinitionRev']);
|
||||
$this->serials[$namespace] = md5(serialize($batch));
|
||||
$this->serials[$namespace] = sha1(serialize($batch));
|
||||
}
|
||||
return $this->serials[$namespace];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a md5 signature for the entire configuration object
|
||||
* Returns a SHA-1 signature for the entire configuration object
|
||||
* that uniquely identifies that particular configuration
|
||||
*/
|
||||
public function getSerial() {
|
||||
if (empty($this->serial)) {
|
||||
$this->serial = md5(serialize($this->getAll()));
|
||||
$this->serial = sha1(serialize($this->getAll()));
|
||||
}
|
||||
return $this->serial;
|
||||
}
|
||||
@@ -318,26 +319,64 @@ class HTMLPurifier_Config
|
||||
* Retrieves object reference to the HTML definition.
|
||||
* @param $raw Return a copy that has not been setup yet. Must be
|
||||
* called before it's been setup, otherwise won't work.
|
||||
* @param $optimized If true, this method may return null, to
|
||||
* indicate that a cached version of the modified
|
||||
* definition object is available and no further edits
|
||||
* are necessary. Consider using
|
||||
* maybeGetRawHTMLDefinition, which is more explicitly
|
||||
* named, instead.
|
||||
*/
|
||||
public function getHTMLDefinition($raw = false) {
|
||||
return $this->getDefinition('HTML', $raw);
|
||||
public function getHTMLDefinition($raw = false, $optimized = false) {
|
||||
return $this->getDefinition('HTML', $raw, $optimized);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves object reference to the CSS definition
|
||||
* @param $raw Return a copy that has not been setup yet. Must be
|
||||
* called before it's been setup, otherwise won't work.
|
||||
* @param $optimized If true, this method may return null, to
|
||||
* indicate that a cached version of the modified
|
||||
* definition object is available and no further edits
|
||||
* are necessary. Consider using
|
||||
* maybeGetRawCSSDefinition, which is more explicitly
|
||||
* named, instead.
|
||||
*/
|
||||
public function getCSSDefinition($raw = false) {
|
||||
return $this->getDefinition('CSS', $raw);
|
||||
public function getCSSDefinition($raw = false, $optimized = false) {
|
||||
return $this->getDefinition('CSS', $raw, $optimized);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves object reference to the URI definition
|
||||
* @param $raw Return a copy that has not been setup yet. Must be
|
||||
* called before it's been setup, otherwise won't work.
|
||||
* @param $optimized If true, this method may return null, to
|
||||
* indicate that a cached version of the modified
|
||||
* definition object is available and no further edits
|
||||
* are necessary. Consider using
|
||||
* maybeGetRawURIDefinition, which is more explicitly
|
||||
* named, instead.
|
||||
*/
|
||||
public function getURIDefinition($raw = false, $optimized = false) {
|
||||
return $this->getDefinition('URI', $raw, $optimized);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a definition
|
||||
* @param $type Type of definition: HTML, CSS, etc
|
||||
* @param $raw Whether or not definition should be returned raw
|
||||
* @param $optimized Only has an effect when $raw is true. Whether
|
||||
* or not to return null if the result is already present in
|
||||
* the cache. This is off by default for backwards
|
||||
* compatibility reasons, but you need to do things this
|
||||
* way in order to ensure that caching is done properly.
|
||||
* Check out enduser-customize.html for more details.
|
||||
* We probably won't ever change this default, as much as the
|
||||
* maybe semantics is the "right thing to do."
|
||||
*/
|
||||
public function getDefinition($type, $raw = false) {
|
||||
public function getDefinition($type, $raw = false, $optimized = false) {
|
||||
if ($optimized && !$raw) {
|
||||
throw new HTMLPurifier_Exception("Cannot set optimized = true when raw = false");
|
||||
}
|
||||
if (!$this->finalized) $this->autoFinalize();
|
||||
// temporarily suspend locks, so we can handle recursive definition calls
|
||||
$lock = $this->lock;
|
||||
@@ -346,52 +385,137 @@ class HTMLPurifier_Config
|
||||
$cache = $factory->create($type, $this);
|
||||
$this->lock = $lock;
|
||||
if (!$raw) {
|
||||
// see if we can quickly supply a definition
|
||||
// full definition
|
||||
// ---------------
|
||||
// check if definition is in memory
|
||||
if (!empty($this->definitions[$type])) {
|
||||
if (!$this->definitions[$type]->setup) {
|
||||
$this->definitions[$type]->setup($this);
|
||||
$cache->set($this->definitions[$type], $this);
|
||||
$def = $this->definitions[$type];
|
||||
// check if the definition is setup
|
||||
if ($def->setup) {
|
||||
return $def;
|
||||
} else {
|
||||
$def->setup($this);
|
||||
if ($def->optimized) $cache->add($def, $this);
|
||||
return $def;
|
||||
}
|
||||
return $this->definitions[$type];
|
||||
}
|
||||
// memory check missed, try cache
|
||||
$this->definitions[$type] = $cache->get($this);
|
||||
if ($this->definitions[$type]) {
|
||||
// definition in cache, return it
|
||||
return $this->definitions[$type];
|
||||
// check if definition is in cache
|
||||
$def = $cache->get($this);
|
||||
if ($def) {
|
||||
// definition in cache, save to memory and return it
|
||||
$this->definitions[$type] = $def;
|
||||
return $def;
|
||||
}
|
||||
} elseif (
|
||||
!empty($this->definitions[$type]) &&
|
||||
!$this->definitions[$type]->setup
|
||||
) {
|
||||
// raw requested, raw in memory, quick return
|
||||
return $this->definitions[$type];
|
||||
// initialize it
|
||||
$def = $this->initDefinition($type);
|
||||
// set it up
|
||||
$this->lock = $type;
|
||||
$def->setup($this);
|
||||
$this->lock = null;
|
||||
// save in cache
|
||||
$cache->add($def, $this);
|
||||
// return it
|
||||
return $def;
|
||||
} else {
|
||||
// raw definition
|
||||
// --------------
|
||||
// check preconditions
|
||||
$def = null;
|
||||
if ($optimized) {
|
||||
if (is_null($this->get($type . '.DefinitionID'))) {
|
||||
// fatally error out if definition ID not set
|
||||
throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID");
|
||||
}
|
||||
}
|
||||
if (!empty($this->definitions[$type])) {
|
||||
$def = $this->definitions[$type];
|
||||
if ($def->setup && !$optimized) {
|
||||
$extra = $this->chatty ? " (try moving this code block earlier in your initialization)" : "";
|
||||
throw new HTMLPurifier_Exception("Cannot retrieve raw definition after it has already been setup" . $extra);
|
||||
}
|
||||
if ($def->optimized === null) {
|
||||
$extra = $this->chatty ? " (try flushing your cache)" : "";
|
||||
throw new HTMLPurifier_Exception("Optimization status of definition is unknown" . $extra);
|
||||
}
|
||||
if ($def->optimized !== $optimized) {
|
||||
$msg = $optimized ? "optimized" : "unoptimized";
|
||||
$extra = $this->chatty ? " (this backtrace is for the first inconsistent call, which was for a $msg raw definition)" : "";
|
||||
throw new HTMLPurifier_Exception("Inconsistent use of optimized and unoptimized raw definition retrievals" . $extra);
|
||||
}
|
||||
}
|
||||
// check if definition was in memory
|
||||
if ($def) {
|
||||
if ($def->setup) {
|
||||
// invariant: $optimized === true (checked above)
|
||||
return null;
|
||||
} else {
|
||||
return $def;
|
||||
}
|
||||
}
|
||||
// if optimized, check if definition was in cache
|
||||
// (because we do the memory check first, this formulation
|
||||
// is prone to cache slamming, but I think
|
||||
// guaranteeing that either /all/ of the raw
|
||||
// setup code or /none/ of it is run is more important.)
|
||||
if ($optimized) {
|
||||
// This code path only gets run once; once we put
|
||||
// something in $definitions (which is guaranteed by the
|
||||
// trailing code), we always short-circuit above.
|
||||
$def = $cache->get($this);
|
||||
if ($def) {
|
||||
// save the full definition for later, but don't
|
||||
// return it yet
|
||||
$this->definitions[$type] = $def;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
// check invariants for creation
|
||||
if (!$optimized) {
|
||||
if (!is_null($this->get($type . '.DefinitionID'))) {
|
||||
if ($this->chatty) {
|
||||
$this->triggerError("Due to a documentation error in previous version of HTML Purifier, your definitions are not being cached. If this is OK, you can remove the %$type.DefinitionRev and %$type.DefinitionID declaration. Otherwise, modify your code to use maybeGetRawDefinition, and test if the returned value is null before making any edits (if it is null, that means that a cached version is available, and no raw operations are necessary). See <a href='http://htmlpurifier.org/docs/enduser-customize.html#optimized'>Customize</a> for more details", E_USER_WARNING);
|
||||
} else {
|
||||
$this->triggerError("Useless DefinitionID declaration", E_USER_WARNING);
|
||||
}
|
||||
}
|
||||
}
|
||||
// initialize it
|
||||
$def = $this->initDefinition($type);
|
||||
$def->optimized = $optimized;
|
||||
return $def;
|
||||
}
|
||||
throw new HTMLPurifier_Exception("The impossible happened!");
|
||||
}
|
||||
|
||||
private function initDefinition($type) {
|
||||
// quick checks failed, let's create the object
|
||||
if ($type == 'HTML') {
|
||||
$this->definitions[$type] = new HTMLPurifier_HTMLDefinition();
|
||||
$def = new HTMLPurifier_HTMLDefinition();
|
||||
} elseif ($type == 'CSS') {
|
||||
$this->definitions[$type] = new HTMLPurifier_CSSDefinition();
|
||||
$def = new HTMLPurifier_CSSDefinition();
|
||||
} elseif ($type == 'URI') {
|
||||
$this->definitions[$type] = new HTMLPurifier_URIDefinition();
|
||||
$def = new HTMLPurifier_URIDefinition();
|
||||
} else {
|
||||
throw new HTMLPurifier_Exception("Definition of $type type not supported");
|
||||
}
|
||||
// quick abort if raw
|
||||
if ($raw) {
|
||||
if (is_null($this->get($type . '.DefinitionID'))) {
|
||||
// fatally error out if definition ID not set
|
||||
throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID");
|
||||
}
|
||||
return $this->definitions[$type];
|
||||
}
|
||||
// set it up
|
||||
$this->lock = $type;
|
||||
$this->definitions[$type]->setup($this);
|
||||
$this->lock = null;
|
||||
// save in cache
|
||||
$cache->set($this->definitions[$type], $this);
|
||||
return $this->definitions[$type];
|
||||
$this->definitions[$type] = $def;
|
||||
return $def;
|
||||
}
|
||||
|
||||
public function maybeGetRawDefinition($name) {
|
||||
return $this->getDefinition($name, true, true);
|
||||
}
|
||||
|
||||
public function maybeGetRawHTMLDefinition() {
|
||||
return $this->getDefinition('HTML', true, true);
|
||||
}
|
||||
|
||||
public function maybeGetRawCSSDefinition() {
|
||||
return $this->getDefinition('CSS', true, true);
|
||||
}
|
||||
|
||||
public function maybeGetRawURIDefinition() {
|
||||
return $this->getDefinition('URI', true, true);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -544,22 +668,28 @@ class HTMLPurifier_Config
|
||||
*/
|
||||
public function finalize() {
|
||||
$this->finalized = true;
|
||||
unset($this->parser);
|
||||
$this->parser = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Produces a nicely formatted error message by supplying the
|
||||
* stack frame information from two levels up and OUTSIDE of
|
||||
* HTMLPurifier_Config.
|
||||
* stack frame information OUTSIDE of HTMLPurifier_Config.
|
||||
*/
|
||||
protected function triggerError($msg, $no) {
|
||||
// determine previous stack frame
|
||||
$backtrace = debug_backtrace();
|
||||
if ($this->chatty && isset($backtrace[1])) {
|
||||
$frame = $backtrace[1];
|
||||
$extra = " on line {$frame['line']} in file {$frame['file']}";
|
||||
} else {
|
||||
$extra = '';
|
||||
$extra = '';
|
||||
if ($this->chatty) {
|
||||
$trace = debug_backtrace();
|
||||
// zip(tail(trace), trace) -- but PHP is not Haskell har har
|
||||
for ($i = 0, $c = count($trace); $i < $c - 1; $i++) {
|
||||
// XXX this is not correct on some versions of HTML Purifier
|
||||
if ($trace[$i + 1]['class'] === 'HTMLPurifier_Config') {
|
||||
continue;
|
||||
}
|
||||
$frame = $trace[$i];
|
||||
$extra = " invoked on line {$frame['line']} in file {$frame['file']}";
|
||||
break;
|
||||
}
|
||||
}
|
||||
trigger_error($msg . $extra, $no);
|
||||
}
|
||||
|
@@ -60,7 +60,13 @@ class HTMLPurifier_ConfigSchema {
|
||||
* Unserializes the default ConfigSchema.
|
||||
*/
|
||||
public static function makeFromSerial() {
|
||||
return unserialize(file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser'));
|
||||
$contents = file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser');
|
||||
$r = unserialize($contents);
|
||||
if (!$r) {
|
||||
$hash = sha1($contents);
|
||||
trigger_error("Unserialization of configuration schema failed, sha1 of file was $hash", E_USER_ERROR);
|
||||
}
|
||||
return $r;
|
||||
}
|
||||
|
||||
/**
|
||||
|
Binary file not shown.
0
library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveSpansWithoutAttributes.txt
Executable file → Normal file
0
library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveSpansWithoutAttributes.txt
Executable file → Normal file
@@ -0,0 +1,12 @@
|
||||
CSS.AllowedFonts
|
||||
TYPE: lookup/null
|
||||
VERSION: 4.3.0
|
||||
DEFAULT: NULL
|
||||
--DESCRIPTION--
|
||||
<p>
|
||||
Allows you to manually specify a set of allowed fonts. If
|
||||
<code>NULL</code>, all fonts are allowed. This directive
|
||||
affects generic names (serif, sans-serif, monospace, cursive,
|
||||
fantasy) as well as specific font families.
|
||||
</p>
|
||||
--# vim: et sw=4 sts=4
|
@@ -0,0 +1,13 @@
|
||||
CSS.ForbiddenProperties
|
||||
TYPE: lookup
|
||||
VERSION: 4.2.0
|
||||
DEFAULT: array()
|
||||
--DESCRIPTION--
|
||||
<p>
|
||||
This is the logical inverse of %CSS.AllowedProperties, and it will
|
||||
override that directive or any other directive. If possible,
|
||||
%CSS.AllowedProperties is recommended over this directive,
|
||||
because it can sometimes be difficult to tell whether or not you've
|
||||
forbidden all of the CSS properties you truly would like to disallow.
|
||||
</p>
|
||||
--# vim: et sw=4 sts=4
|
9
library/HTMLPurifier/ConfigSchema/schema/CSS.Trusted.txt
Normal file
9
library/HTMLPurifier/ConfigSchema/schema/CSS.Trusted.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
CSS.Trusted
|
||||
TYPE: bool
|
||||
VERSION: 4.2.1
|
||||
DEFAULT: false
|
||||
--DESCRIPTION--
|
||||
Indicates whether or not the user's CSS input is trusted or not. If the
|
||||
input is trusted, a more expansive set of allowed properties. See
|
||||
also %HTML.Trusted.
|
||||
--# vim: et sw=4 sts=4
|
@@ -0,0 +1,11 @@
|
||||
Cache.SerializerPermissions
|
||||
TYPE: int
|
||||
VERSION: 4.3.0
|
||||
DEFAULT: 0755
|
||||
--DESCRIPTION--
|
||||
|
||||
<p>
|
||||
Directory permissions of the files and directories created inside
|
||||
the DefinitionCache/Serializer or other custom serializer path.
|
||||
</p>
|
||||
--# vim: et sw=4 sts=4
|
@@ -24,5 +24,6 @@ array (
|
||||
--DESCRIPTION--
|
||||
|
||||
Lookup array of color names to six digit hexadecimal number corresponding
|
||||
to color, with preceding hash mark. Used when parsing colors.
|
||||
to color, with preceding hash mark. Used when parsing colors. The lookup
|
||||
is done in a case-insensitive manner.
|
||||
--# vim: et sw=4 sts=4
|
||||
|
@@ -0,0 +1,14 @@
|
||||
Core.DisableExcludes
|
||||
TYPE: bool
|
||||
DEFAULT: false
|
||||
VERSION: 4.5.0
|
||||
--DESCRIPTION--
|
||||
<p>
|
||||
This directive disables SGML-style exclusions, e.g. the exclusion of
|
||||
<code><object></code> in any descendant of a
|
||||
<code><pre></code> tag. Disabling excludes will allow some
|
||||
invalid documents to pass through HTML Purifier, but HTML Purifier
|
||||
will also be less likely to accidentally remove large documents during
|
||||
processing.
|
||||
</p>
|
||||
--# vim: et sw=4 sts=4
|
@@ -0,0 +1,9 @@
|
||||
Core.EnableIDNA
|
||||
TYPE: bool
|
||||
DEFAULT: false
|
||||
VERSION: 4.4.0
|
||||
--DESCRIPTION--
|
||||
Allows international domain names in URLs. This configuration option
|
||||
requires the PEAR Net_IDNA2 module to be installed. It operates by
|
||||
punycoding any internationalized host names for maximum portability.
|
||||
--# vim: et sw=4 sts=4
|
@@ -0,0 +1,11 @@
|
||||
Core.NormalizeNewlines
|
||||
TYPE: bool
|
||||
VERSION: 4.2.0
|
||||
DEFAULT: true
|
||||
--DESCRIPTION--
|
||||
<p>
|
||||
Whether or not to normalize newlines to the operating
|
||||
system default. When <code>false</code>, HTML Purifier
|
||||
will attempt to preserve mixed newline files.
|
||||
</p>
|
||||
--# vim: et sw=4 sts=4
|
@@ -0,0 +1,11 @@
|
||||
Core.RemoveProcessingInstructions
|
||||
TYPE: bool
|
||||
VERSION: 4.2.0
|
||||
DEFAULT: false
|
||||
--DESCRIPTION--
|
||||
Instead of escaping processing instructions in the form <code><? ...
|
||||
?></code>, remove it out-right. This may be useful if the HTML
|
||||
you are validating contains XML processing instruction gunk, however,
|
||||
it can also be user-unfriendly for people attempting to post PHP
|
||||
snippets.
|
||||
--# vim: et sw=4 sts=4
|
@@ -3,6 +3,11 @@ TYPE: bool
|
||||
VERSION: 3.1.0
|
||||
DEFAULT: false
|
||||
--DESCRIPTION--
|
||||
<p>
|
||||
<strong>Warning:</strong> Deprecated in favor of %HTML.SafeObject and
|
||||
%Output.FlashCompat (turn both on to allow YouTube videos and other
|
||||
Flash content).
|
||||
</p>
|
||||
<p>
|
||||
This directive enables YouTube video embedding in HTML Purifier. Check
|
||||
<a href="http://htmlpurifier.org/docs/enduser-youtube.html">this document
|
||||
|
@@ -5,11 +5,14 @@ DEFAULT: NULL
|
||||
--DESCRIPTION--
|
||||
|
||||
<p>
|
||||
This is a convenience directive that rolls the functionality of
|
||||
%HTML.AllowedElements and %HTML.AllowedAttributes into one directive.
|
||||
This is a preferred convenience directive that combines
|
||||
%HTML.AllowedElements and %HTML.AllowedAttributes.
|
||||
Specify elements and attributes that are allowed using:
|
||||
<code>element1[attr1|attr2],element2...</code>. You can also use
|
||||
newlines instead of commas to separate elements.
|
||||
<code>element1[attr1|attr2],element2...</code>. For example,
|
||||
if you would like to only allow paragraphs and links, specify
|
||||
<code>a[href],p</code>. You can specify attributes that apply
|
||||
to all elements using an asterisk, e.g. <code>*[lang]</code>.
|
||||
You can also use newlines instead of commas to separate elements.
|
||||
</p>
|
||||
<p>
|
||||
<strong>Warning</strong>:
|
||||
|
@@ -0,0 +1,10 @@
|
||||
HTML.AllowedComments
|
||||
TYPE: lookup
|
||||
VERSION: 4.4.0
|
||||
DEFAULT: array()
|
||||
--DESCRIPTION--
|
||||
A whitelist which indicates what explicit comment bodies should be
|
||||
allowed, modulo leading and trailing whitespace. See also %HTML.AllowedCommentsRegexp
|
||||
(these directives are union'ed together, so a comment is considered
|
||||
valid if any directive deems it valid.)
|
||||
--# vim: et sw=4 sts=4
|
@@ -0,0 +1,15 @@
|
||||
HTML.AllowedCommentsRegexp
|
||||
TYPE: string/null
|
||||
VERSION: 4.4.0
|
||||
DEFAULT: NULL
|
||||
--DESCRIPTION--
|
||||
A regexp, which if it matches the body of a comment, indicates that
|
||||
it should be allowed. Trailing and leading spaces are removed prior
|
||||
to running this regular expression.
|
||||
<strong>Warning:</strong> Make sure you specify
|
||||
correct anchor metacharacters <code>^regex$</code>, otherwise you may accept
|
||||
comments that you did not mean to! In particular, the regex <code>/foo|bar/</code>
|
||||
is probably not sufficiently strict, since it also allows <code>foobar</code>.
|
||||
See also %HTML.AllowedComments (these directives are union'ed together,
|
||||
so a comment is considered valid if any directive deems it valid.)
|
||||
--# vim: et sw=4 sts=4
|
@@ -4,12 +4,17 @@ VERSION: 1.3.0
|
||||
DEFAULT: NULL
|
||||
--DESCRIPTION--
|
||||
<p>
|
||||
If HTML Purifier's tag set is unsatisfactory for your needs, you
|
||||
can overload it with your own list of tags to allow. Note that this
|
||||
method is subtractive: it does its job by taking away from HTML Purifier
|
||||
usual feature set, so you cannot add a tag that HTML Purifier never
|
||||
supported in the first place (like embed, form or head). If you
|
||||
change this, you probably also want to change %HTML.AllowedAttributes.
|
||||
If HTML Purifier's tag set is unsatisfactory for your needs, you can
|
||||
overload it with your own list of tags to allow. If you change
|
||||
this, you probably also want to change %HTML.AllowedAttributes; see
|
||||
also %HTML.Allowed which lets you set allowed elements and
|
||||
attributes at the same time.
|
||||
</p>
|
||||
<p>
|
||||
If you attempt to allow an element that HTML Purifier does not know
|
||||
about, HTML Purifier will raise an error. You will need to manually
|
||||
tell HTML Purifier about this element by using the
|
||||
<a href="http://htmlpurifier.org/docs/enduser-customize.html">advanced customization features.</a>
|
||||
</p>
|
||||
<p>
|
||||
<strong>Warning:</strong> If another directive conflicts with the
|
||||
|
@@ -0,0 +1,11 @@
|
||||
HTML.FlashAllowFullScreen
|
||||
TYPE: bool
|
||||
VERSION: 4.2.0
|
||||
DEFAULT: false
|
||||
--DESCRIPTION--
|
||||
<p>
|
||||
Whether or not to permit embedded Flash content from
|
||||
%HTML.SafeObject to expand to the full screen. Corresponds to
|
||||
the <code>allowFullScreen</code> parameter.
|
||||
</p>
|
||||
--# vim: et sw=4 sts=4
|
@@ -0,0 +1,7 @@
|
||||
HTML.Nofollow
|
||||
TYPE: bool
|
||||
VERSION: 4.3.0
|
||||
DEFAULT: FALSE
|
||||
--DESCRIPTION--
|
||||
If enabled, nofollow rel attributes are added to all outgoing links.
|
||||
--# vim: et sw=4 sts=4
|
13
library/HTMLPurifier/ConfigSchema/schema/HTML.SafeIframe.txt
Normal file
13
library/HTMLPurifier/ConfigSchema/schema/HTML.SafeIframe.txt
Normal file
@@ -0,0 +1,13 @@
|
||||
HTML.SafeIframe
|
||||
TYPE: bool
|
||||
VERSION: 4.4.0
|
||||
DEFAULT: false
|
||||
--DESCRIPTION--
|
||||
<p>
|
||||
Whether or not to permit iframe tags in untrusted documents. This
|
||||
directive must be accompanied by a whitelist of permitted iframes,
|
||||
such as %URI.SafeIframeRegexp, otherwise it will fatally error.
|
||||
This directive has no effect on strict doctypes, as iframes are not
|
||||
valid.
|
||||
</p>
|
||||
--# vim: et sw=4 sts=4
|
@@ -0,0 +1,10 @@
|
||||
HTML.SafeScripting
|
||||
TYPE: lookup
|
||||
VERSION: 4.5.0
|
||||
DEFAULT: array()
|
||||
--DESCRIPTION--
|
||||
<p>
|
||||
Whether or not to permit script tags to external scripts in documents.
|
||||
Inline scripting is not allowed, and the script must match an explicit whitelist.
|
||||
</p>
|
||||
--# vim: et sw=4 sts=4
|
@@ -0,0 +1,8 @@
|
||||
HTML.TargetBlank
|
||||
TYPE: bool
|
||||
VERSION: 4.4.0
|
||||
DEFAULT: FALSE
|
||||
--DESCRIPTION--
|
||||
If enabled, <code>target=blank</code> attributes are added to all outgoing links.
|
||||
(This includes links from an HTTPS version of a page to an HTTP version.)
|
||||
--# vim: et sw=4 sts=4
|
@@ -5,4 +5,5 @@ DEFAULT: false
|
||||
--DESCRIPTION--
|
||||
Indicates whether or not the user input is trusted or not. If the input is
|
||||
trusted, a more expansive set of allowed tags and attributes will be used.
|
||||
See also %CSS.Trusted.
|
||||
--# vim: et sw=4 sts=4
|
||||
|
@@ -0,0 +1,15 @@
|
||||
Output.FixInnerHTML
|
||||
TYPE: bool
|
||||
VERSION: 4.3.0
|
||||
DEFAULT: true
|
||||
--DESCRIPTION--
|
||||
<p>
|
||||
If true, HTML Purifier will protect against Internet Explorer's
|
||||
mishandling of the <code>innerHTML</code> attribute by appending
|
||||
a space to any attribute that does not contain angled brackets, spaces
|
||||
or quotes, but contains a backtick. This slightly changes the
|
||||
semantics of any given attribute, so if this is unacceptable and
|
||||
you do not use <code>innerHTML</code> on any of your pages, you can
|
||||
turn this directive off.
|
||||
</p>
|
||||
--# vim: et sw=4 sts=4
|
@@ -12,6 +12,6 @@ array (
|
||||
--DESCRIPTION--
|
||||
Whitelist that defines the schemes that a URI is allowed to have. This
|
||||
prevents XSS attacks from using pseudo-schemes like javascript or mocha.
|
||||
There is also support for the <code>data</code> URI scheme, but it is not
|
||||
enabled by default.
|
||||
There is also support for the <code>data</code> and <code>file</code>
|
||||
URI schemes, but they are not enabled by default.
|
||||
--# vim: et sw=4 sts=4
|
||||
|
@@ -1,12 +1,15 @@
|
||||
URI.DisableResources
|
||||
TYPE: bool
|
||||
VERSION: 1.3.0
|
||||
VERSION: 4.2.0
|
||||
DEFAULT: false
|
||||
--DESCRIPTION--
|
||||
|
||||
<p>
|
||||
Disables embedding resources, essentially meaning no pictures. You can
|
||||
still link to them though. See %URI.DisableExternalResources for why
|
||||
this might be a good idea.
|
||||
</p>
|
||||
<p>
|
||||
<em>Note:</em> While this directive has been available since 1.3.0,
|
||||
it didn't actually start doing anything until 4.2.0.
|
||||
</p>
|
||||
--# vim: et sw=4 sts=4
|
||||
|
@@ -0,0 +1,22 @@
|
||||
URI.SafeIframeRegexp
|
||||
TYPE: string/null
|
||||
VERSION: 4.4.0
|
||||
DEFAULT: NULL
|
||||
--DESCRIPTION--
|
||||
<p>
|
||||
A PCRE regular expression that will be matched against an iframe URI. This is
|
||||
a relatively inflexible scheme, but works well enough for the most common
|
||||
use-case of iframes: embedded video. This directive only has an effect if
|
||||
%HTML.SafeIframe is enabled. Here are some example values:
|
||||
</p>
|
||||
<ul>
|
||||
<li><code>%^http://www.youtube.com/embed/%</code> - Allow YouTube videos</li>
|
||||
<li><code>%^http://player.vimeo.com/video/%</code> - Allow Vimeo videos</li>
|
||||
<li><code>%^http://(www.youtube.com/embed/|player.vimeo.com/video/)%</code> - Allow both</li>
|
||||
</ul>
|
||||
<p>
|
||||
Note that this directive does not give you enough granularity to, say, disable
|
||||
all <code>autoplay</code> videos. Pipe up on the HTML Purifier forums if this
|
||||
is a capability you want.
|
||||
</p>
|
||||
--# vim: et sw=4 sts=4
|
@@ -12,6 +12,17 @@ abstract class HTMLPurifier_Definition
|
||||
*/
|
||||
public $setup = false;
|
||||
|
||||
/**
|
||||
* If true, write out the final definition object to the cache after
|
||||
* setup. This will be true only if all invocations to get a raw
|
||||
* definition object are also optimized. This does not cause file
|
||||
* system thrashing because on subsequent calls the cached object
|
||||
* is used and any writes to the raw definition object are short
|
||||
* circuited. See enduser-customize.html for the high-level
|
||||
* picture.
|
||||
*/
|
||||
public $optimized = null;
|
||||
|
||||
/**
|
||||
* What type of definition is it?
|
||||
*/
|
||||
|
@@ -9,14 +9,14 @@ class HTMLPurifier_DefinitionCache_Serializer extends
|
||||
$file = $this->generateFilePath($config);
|
||||
if (file_exists($file)) return false;
|
||||
if (!$this->_prepareDir($config)) return false;
|
||||
return $this->_write($file, serialize($def));
|
||||
return $this->_write($file, serialize($def), $config);
|
||||
}
|
||||
|
||||
public function set($def, $config) {
|
||||
if (!$this->checkDefType($def)) return;
|
||||
$file = $this->generateFilePath($config);
|
||||
if (!$this->_prepareDir($config)) return false;
|
||||
return $this->_write($file, serialize($def));
|
||||
return $this->_write($file, serialize($def), $config);
|
||||
}
|
||||
|
||||
public function replace($def, $config) {
|
||||
@@ -24,7 +24,7 @@ class HTMLPurifier_DefinitionCache_Serializer extends
|
||||
$file = $this->generateFilePath($config);
|
||||
if (!file_exists($file)) return false;
|
||||
if (!$this->_prepareDir($config)) return false;
|
||||
return $this->_write($file, serialize($def));
|
||||
return $this->_write($file, serialize($def), $config);
|
||||
}
|
||||
|
||||
public function get($config) {
|
||||
@@ -97,18 +97,34 @@ class HTMLPurifier_DefinitionCache_Serializer extends
|
||||
* Convenience wrapper function for file_put_contents
|
||||
* @param $file File name to write to
|
||||
* @param $data Data to write into file
|
||||
* @param $config Config object
|
||||
* @return Number of bytes written if success, or false if failure.
|
||||
*/
|
||||
private function _write($file, $data) {
|
||||
return file_put_contents($file, $data);
|
||||
private function _write($file, $data, $config) {
|
||||
$result = file_put_contents($file, $data);
|
||||
if ($result !== false) {
|
||||
// set permissions of the new file (no execute)
|
||||
$chmod = $config->get('Cache.SerializerPermissions');
|
||||
if (!$chmod) {
|
||||
$chmod = 0644; // invalid config or simpletest
|
||||
}
|
||||
$chmod = $chmod & 0666;
|
||||
chmod($file, $chmod);
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepares the directory that this type stores the serials in
|
||||
* @param $config Config object
|
||||
* @return True if successful
|
||||
*/
|
||||
private function _prepareDir($config) {
|
||||
$directory = $this->generateDirectoryPath($config);
|
||||
$chmod = $config->get('Cache.SerializerPermissions');
|
||||
if (!$chmod) {
|
||||
$chmod = 0755; // invalid config or simpletest
|
||||
}
|
||||
if (!is_dir($directory)) {
|
||||
$base = $this->generateBaseDirectoryPath($config);
|
||||
if (!is_dir($base)) {
|
||||
@@ -116,13 +132,13 @@ class HTMLPurifier_DefinitionCache_Serializer extends
|
||||
please create or change using %Cache.SerializerPath',
|
||||
E_USER_WARNING);
|
||||
return false;
|
||||
} elseif (!$this->_testPermissions($base)) {
|
||||
} elseif (!$this->_testPermissions($base, $chmod)) {
|
||||
return false;
|
||||
}
|
||||
$old = umask(0022); // disable group and world writes
|
||||
mkdir($directory);
|
||||
$old = umask(0000);
|
||||
mkdir($directory, $chmod);
|
||||
umask($old);
|
||||
} elseif (!$this->_testPermissions($directory)) {
|
||||
} elseif (!$this->_testPermissions($directory, $chmod)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@@ -131,8 +147,11 @@ class HTMLPurifier_DefinitionCache_Serializer extends
|
||||
/**
|
||||
* Tests permissions on a directory and throws out friendly
|
||||
* error messages and attempts to chmod it itself if possible
|
||||
* @param $dir Directory path
|
||||
* @param $chmod Permissions
|
||||
* @return True if directory writable
|
||||
*/
|
||||
private function _testPermissions($dir) {
|
||||
private function _testPermissions($dir, $chmod) {
|
||||
// early abort, if it is writable, everything is hunky-dory
|
||||
if (is_writable($dir)) return true;
|
||||
if (!is_dir($dir)) {
|
||||
@@ -146,17 +165,17 @@ class HTMLPurifier_DefinitionCache_Serializer extends
|
||||
// POSIX system, we can give more specific advice
|
||||
if (fileowner($dir) === posix_getuid()) {
|
||||
// we can chmod it ourselves
|
||||
chmod($dir, 0755);
|
||||
return true;
|
||||
$chmod = $chmod | 0700;
|
||||
if (chmod($dir, $chmod)) return true;
|
||||
} elseif (filegroup($dir) === posix_getgid()) {
|
||||
$chmod = '775';
|
||||
$chmod = $chmod | 0070;
|
||||
} else {
|
||||
// PHP's probably running as nobody, so we'll
|
||||
// need to give global permissions
|
||||
$chmod = '777';
|
||||
$chmod = $chmod | 0777;
|
||||
}
|
||||
trigger_error('Directory '.$dir.' not writable, '.
|
||||
'please chmod to ' . $chmod,
|
||||
'please chmod to ' . decoct($chmod),
|
||||
E_USER_WARNING);
|
||||
} else {
|
||||
// generic error message
|
||||
|
@@ -30,13 +30,25 @@ class HTMLPurifier_ElementDef
|
||||
*/
|
||||
public $attr = array();
|
||||
|
||||
// XXX: Design note: currently, it's not possible to override
|
||||
// previously defined AttrTransforms without messing around with
|
||||
// the final generated config. This is by design; a previous version
|
||||
// used an associated list of attr_transform, but it was extremely
|
||||
// easy to accidentally override other attribute transforms by
|
||||
// forgetting to specify an index (and just using 0.) While we
|
||||
// could check this by checking the index number and complaining,
|
||||
// there is a second problem which is that it is not at all easy to
|
||||
// tell when something is getting overridden. Combine this with a
|
||||
// codebase where this isn't really being used, and it's perfect for
|
||||
// nuking.
|
||||
|
||||
/**
|
||||
* Indexed list of tag's HTMLPurifier_AttrTransform to be done before validation
|
||||
* List of tags HTMLPurifier_AttrTransform to be done before validation
|
||||
*/
|
||||
public $attr_transform_pre = array();
|
||||
|
||||
/**
|
||||
* Indexed list of tag's HTMLPurifier_AttrTransform to be done after validation
|
||||
* List of tags HTMLPurifier_AttrTransform to be done after validation
|
||||
*/
|
||||
public $attr_transform_post = array();
|
||||
|
||||
@@ -144,9 +156,9 @@ class HTMLPurifier_ElementDef
|
||||
}
|
||||
$this->attr[$k] = $v;
|
||||
}
|
||||
$this->_mergeAssocArray($this->attr_transform_pre, $def->attr_transform_pre);
|
||||
$this->_mergeAssocArray($this->attr_transform_post, $def->attr_transform_post);
|
||||
$this->_mergeAssocArray($this->excludes, $def->excludes);
|
||||
$this->attr_transform_pre = array_merge($this->attr_transform_pre, $def->attr_transform_pre);
|
||||
$this->attr_transform_post = array_merge($this->attr_transform_post, $def->attr_transform_post);
|
||||
|
||||
if(!empty($def->content_model)) {
|
||||
$this->content_model =
|
||||
|
@@ -19,6 +19,68 @@ class HTMLPurifier_Encoder
|
||||
*/
|
||||
public static function muteErrorHandler() {}
|
||||
|
||||
/**
|
||||
* iconv wrapper which mutes errors, but doesn't work around bugs.
|
||||
*/
|
||||
public static function unsafeIconv($in, $out, $text) {
|
||||
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
|
||||
$r = iconv($in, $out, $text);
|
||||
restore_error_handler();
|
||||
return $r;
|
||||
}
|
||||
|
||||
/**
|
||||
* iconv wrapper which mutes errors and works around bugs.
|
||||
*/
|
||||
public static function iconv($in, $out, $text, $max_chunk_size = 8000) {
|
||||
$code = self::testIconvTruncateBug();
|
||||
if ($code == self::ICONV_OK) {
|
||||
return self::unsafeIconv($in, $out, $text);
|
||||
} elseif ($code == self::ICONV_TRUNCATES) {
|
||||
// we can only work around this if the input character set
|
||||
// is utf-8
|
||||
if ($in == 'utf-8') {
|
||||
if ($max_chunk_size < 4) {
|
||||
trigger_error('max_chunk_size is too small', E_USER_WARNING);
|
||||
return false;
|
||||
}
|
||||
// split into 8000 byte chunks, but be careful to handle
|
||||
// multibyte boundaries properly
|
||||
if (($c = strlen($text)) <= $max_chunk_size) {
|
||||
return self::unsafeIconv($in, $out, $text);
|
||||
}
|
||||
$r = '';
|
||||
$i = 0;
|
||||
while (true) {
|
||||
if ($i + $max_chunk_size >= $c) {
|
||||
$r .= self::unsafeIconv($in, $out, substr($text, $i));
|
||||
break;
|
||||
}
|
||||
// wibble the boundary
|
||||
if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) {
|
||||
$chunk_size = $max_chunk_size;
|
||||
} elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) {
|
||||
$chunk_size = $max_chunk_size - 1;
|
||||
} elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) {
|
||||
$chunk_size = $max_chunk_size - 2;
|
||||
} elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) {
|
||||
$chunk_size = $max_chunk_size - 3;
|
||||
} else {
|
||||
return false; // rather confusing UTF-8...
|
||||
}
|
||||
$chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths
|
||||
$r .= self::unsafeIconv($in, $out, $chunk);
|
||||
$i += $chunk_size;
|
||||
}
|
||||
return $r;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleans a UTF-8 string for well-formedness and SGML validity
|
||||
*
|
||||
@@ -260,6 +322,14 @@ class HTMLPurifier_Encoder
|
||||
return $ret;
|
||||
}
|
||||
|
||||
public static function iconvAvailable() {
|
||||
static $iconv = null;
|
||||
if ($iconv === null) {
|
||||
$iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE;
|
||||
}
|
||||
return $iconv;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a string to UTF-8 based on configuration.
|
||||
*/
|
||||
@@ -267,28 +337,30 @@ class HTMLPurifier_Encoder
|
||||
$encoding = $config->get('Core.Encoding');
|
||||
if ($encoding === 'utf-8') return $str;
|
||||
static $iconv = null;
|
||||
if ($iconv === null) $iconv = function_exists('iconv');
|
||||
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
|
||||
if ($iconv === null) $iconv = self::iconvAvailable();
|
||||
if ($iconv && !$config->get('Test.ForceNoIconv')) {
|
||||
$str = iconv($encoding, 'utf-8//IGNORE', $str);
|
||||
// unaffected by bugs, since UTF-8 support all characters
|
||||
$str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
|
||||
if ($str === false) {
|
||||
// $encoding is not a valid encoding
|
||||
restore_error_handler();
|
||||
trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
|
||||
return '';
|
||||
}
|
||||
// If the string is bjorked by Shift_JIS or a similar encoding
|
||||
// that doesn't support all of ASCII, convert the naughty
|
||||
// characters to their true byte-wise ASCII/UTF-8 equivalents.
|
||||
$str = strtr($str, HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding));
|
||||
restore_error_handler();
|
||||
$str = strtr($str, self::testEncodingSupportsASCII($encoding));
|
||||
return $str;
|
||||
} elseif ($encoding === 'iso-8859-1') {
|
||||
$str = utf8_encode($str);
|
||||
restore_error_handler();
|
||||
return $str;
|
||||
}
|
||||
trigger_error('Encoding not supported, please install iconv', E_USER_ERROR);
|
||||
$bug = HTMLPurifier_Encoder::testIconvTruncateBug();
|
||||
if ($bug == self::ICONV_OK) {
|
||||
trigger_error('Encoding not supported, please install iconv', E_USER_ERROR);
|
||||
} else {
|
||||
trigger_error('You have a buggy version of iconv, see https://bugs.php.net/bug.php?id=48147 and http://sourceware.org/bugzilla/show_bug.cgi?id=13541', E_USER_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -298,16 +370,15 @@ class HTMLPurifier_Encoder
|
||||
*/
|
||||
public static function convertFromUTF8($str, $config, $context) {
|
||||
$encoding = $config->get('Core.Encoding');
|
||||
if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
|
||||
$str = self::convertToASCIIDumbLossless($str);
|
||||
}
|
||||
if ($encoding === 'utf-8') return $str;
|
||||
static $iconv = null;
|
||||
if ($iconv === null) $iconv = function_exists('iconv');
|
||||
if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
|
||||
$str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str);
|
||||
}
|
||||
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
|
||||
if ($iconv === null) $iconv = self::iconvAvailable();
|
||||
if ($iconv && !$config->get('Test.ForceNoIconv')) {
|
||||
// Undo our previous fix in convertToUTF8, otherwise iconv will barf
|
||||
$ascii_fix = HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding);
|
||||
$ascii_fix = self::testEncodingSupportsASCII($encoding);
|
||||
if (!$escape && !empty($ascii_fix)) {
|
||||
$clear_fix = array();
|
||||
foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = '';
|
||||
@@ -315,15 +386,17 @@ class HTMLPurifier_Encoder
|
||||
}
|
||||
$str = strtr($str, array_flip($ascii_fix));
|
||||
// Normal stuff
|
||||
$str = iconv('utf-8', $encoding . '//IGNORE', $str);
|
||||
restore_error_handler();
|
||||
$str = self::iconv('utf-8', $encoding . '//IGNORE', $str);
|
||||
return $str;
|
||||
} elseif ($encoding === 'iso-8859-1') {
|
||||
$str = utf8_decode($str);
|
||||
restore_error_handler();
|
||||
return $str;
|
||||
}
|
||||
trigger_error('Encoding not supported', E_USER_ERROR);
|
||||
// You might be tempted to assume that the ASCII representation
|
||||
// might be OK, however, this is *not* universally true over all
|
||||
// encodings. So we take the conservative route here, rather
|
||||
// than forcibly turn on %Core.EscapeNonASCIICharacters
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -373,6 +446,49 @@ class HTMLPurifier_Encoder
|
||||
return $result;
|
||||
}
|
||||
|
||||
/** No bugs detected in iconv. */
|
||||
const ICONV_OK = 0;
|
||||
|
||||
/** Iconv truncates output if converting from UTF-8 to another
|
||||
* character set with //IGNORE, and a non-encodable character is found */
|
||||
const ICONV_TRUNCATES = 1;
|
||||
|
||||
/** Iconv does not support //IGNORE, making it unusable for
|
||||
* transcoding purposes */
|
||||
const ICONV_UNUSABLE = 2;
|
||||
|
||||
/**
|
||||
* glibc iconv has a known bug where it doesn't handle the magic
|
||||
* //IGNORE stanza correctly. In particular, rather than ignore
|
||||
* characters, it will return an EILSEQ after consuming some number
|
||||
* of characters, and expect you to restart iconv as if it were
|
||||
* an E2BIG. Old versions of PHP did not respect the errno, and
|
||||
* returned the fragment, so as a result you would see iconv
|
||||
* mysteriously truncating output. We can work around this by
|
||||
* manually chopping our input into segments of about 8000
|
||||
* characters, as long as PHP ignores the error code. If PHP starts
|
||||
* paying attention to the error code, iconv becomes unusable.
|
||||
*
|
||||
* @returns Error code indicating severity of bug.
|
||||
*/
|
||||
public static function testIconvTruncateBug() {
|
||||
static $code = null;
|
||||
if ($code === null) {
|
||||
// better not use iconv, otherwise infinite loop!
|
||||
$r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000));
|
||||
if ($r === false) {
|
||||
$code = self::ICONV_UNUSABLE;
|
||||
} elseif (($c = strlen($r)) < 9000) {
|
||||
$code = self::ICONV_TRUNCATES;
|
||||
} elseif ($c > 9000) {
|
||||
trigger_error('Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: include your iconv version as per phpversion()', E_USER_ERROR);
|
||||
} else {
|
||||
$code = self::ICONV_OK;
|
||||
}
|
||||
}
|
||||
return $code;
|
||||
}
|
||||
|
||||
/**
|
||||
* This expensive function tests whether or not a given character
|
||||
* encoding supports ASCII. 7/8-bit encodings like Shift_JIS will
|
||||
@@ -385,6 +501,11 @@ class HTMLPurifier_Encoder
|
||||
* which can be used to "undo" any overzealous iconv action.
|
||||
*/
|
||||
public static function testEncodingSupportsASCII($encoding, $bypass = false) {
|
||||
// All calls to iconv here are unsafe, proof by case analysis:
|
||||
// If ICONV_OK, no difference.
|
||||
// If ICONV_TRUNCATE, all calls involve one character inputs,
|
||||
// so bug is not triggered.
|
||||
// If ICONV_UNUSABLE, this call is irrelevant
|
||||
static $encodings = array();
|
||||
if (!$bypass) {
|
||||
if (isset($encodings[$encoding])) return $encodings[$encoding];
|
||||
@@ -398,24 +519,22 @@ class HTMLPurifier_Encoder
|
||||
if (strpos($lenc, 'iso-8859-') === 0) return array();
|
||||
}
|
||||
$ret = array();
|
||||
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
|
||||
if (iconv('UTF-8', $encoding, 'a') === false) return false;
|
||||
if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false;
|
||||
for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
|
||||
$c = chr($i); // UTF-8 char
|
||||
$r = iconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
|
||||
$r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
|
||||
if (
|
||||
$r === '' ||
|
||||
// This line is needed for iconv implementations that do not
|
||||
// omit characters that do not exist in the target character set
|
||||
($r === $c && iconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
|
||||
($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
|
||||
) {
|
||||
// Reverse engineer: what's the UTF-8 equiv of this byte
|
||||
// sequence? This assumes that there's no variable width
|
||||
// encoding that doesn't support ASCII.
|
||||
$ret[iconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
|
||||
$ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
|
||||
}
|
||||
}
|
||||
restore_error_handler();
|
||||
$encodings[$encoding] = $ret;
|
||||
return $ret;
|
||||
}
|
||||
|
File diff suppressed because one or more lines are too long
@@ -1,5 +1,11 @@
|
||||
<?php
|
||||
|
||||
// why is this a top level function? Because PHP 5.2.0 doesn't seem to
|
||||
// understand how to interpret this filter if it's a static method.
|
||||
// It's all really silly, but if we go this route it might be reasonable
|
||||
// to coalesce all of these methods into one.
|
||||
function htmlpurifier_filter_extractstyleblocks_muteerrorhandler() {}
|
||||
|
||||
/**
|
||||
* This filter extracts <style> blocks from input HTML, cleans them up
|
||||
* using CSSTidy, and then places them in $purifier->context->get('StyleBlocks')
|
||||
@@ -21,8 +27,16 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
|
||||
private $_styleMatches = array();
|
||||
private $_tidy;
|
||||
|
||||
private $_id_attrdef;
|
||||
private $_class_attrdef;
|
||||
private $_enum_attrdef;
|
||||
|
||||
public function __construct() {
|
||||
$this->_tidy = new csstidy();
|
||||
$this->_tidy->set_cfg('lowercase_s', false);
|
||||
$this->_id_attrdef = new HTMLPurifier_AttrDef_HTML_ID(true);
|
||||
$this->_class_attrdef = new HTMLPurifier_AttrDef_CSS_Ident();
|
||||
$this->_enum_attrdef = new HTMLPurifier_AttrDef_Enum(array('first-child', 'link', 'visited', 'active', 'hover', 'focus'));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -77,27 +91,166 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
|
||||
$css = substr($css, 0, -3);
|
||||
}
|
||||
$css = trim($css);
|
||||
set_error_handler('htmlpurifier_filter_extractstyleblocks_muteerrorhandler');
|
||||
$this->_tidy->parse($css);
|
||||
restore_error_handler();
|
||||
$css_definition = $config->getDefinition('CSS');
|
||||
$html_definition = $config->getDefinition('HTML');
|
||||
$new_css = array();
|
||||
foreach ($this->_tidy->css as $k => $decls) {
|
||||
// $decls are all CSS declarations inside an @ selector
|
||||
$new_decls = array();
|
||||
foreach ($decls as $selector => $style) {
|
||||
$selector = trim($selector);
|
||||
if ($selector === '') continue; // should not happen
|
||||
if ($selector[0] === '+') {
|
||||
if ($selector !== '' && $selector[0] === '+') continue;
|
||||
}
|
||||
if (!empty($scopes)) {
|
||||
$new_selector = array(); // because multiple ones are possible
|
||||
$selectors = array_map('trim', explode(',', $selector));
|
||||
foreach ($scopes as $s1) {
|
||||
foreach ($selectors as $s2) {
|
||||
$new_selector[] = "$s1 $s2";
|
||||
// Parse the selector
|
||||
// Here is the relevant part of the CSS grammar:
|
||||
//
|
||||
// ruleset
|
||||
// : selector [ ',' S* selector ]* '{' ...
|
||||
// selector
|
||||
// : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]?
|
||||
// combinator
|
||||
// : '+' S*
|
||||
// : '>' S*
|
||||
// simple_selector
|
||||
// : element_name [ HASH | class | attrib | pseudo ]*
|
||||
// | [ HASH | class | attrib | pseudo ]+
|
||||
// element_name
|
||||
// : IDENT | '*'
|
||||
// ;
|
||||
// class
|
||||
// : '.' IDENT
|
||||
// ;
|
||||
// attrib
|
||||
// : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S*
|
||||
// [ IDENT | STRING ] S* ]? ']'
|
||||
// ;
|
||||
// pseudo
|
||||
// : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ]
|
||||
// ;
|
||||
//
|
||||
// For reference, here are the relevant tokens:
|
||||
//
|
||||
// HASH #{name}
|
||||
// IDENT {ident}
|
||||
// INCLUDES ==
|
||||
// DASHMATCH |=
|
||||
// STRING {string}
|
||||
// FUNCTION {ident}\(
|
||||
//
|
||||
// And the lexical scanner tokens
|
||||
//
|
||||
// name {nmchar}+
|
||||
// nmchar [_a-z0-9-]|{nonascii}|{escape}
|
||||
// nonascii [\240-\377]
|
||||
// escape {unicode}|\\[^\r\n\f0-9a-f]
|
||||
// unicode \\{h}}{1,6}(\r\n|[ \t\r\n\f])?
|
||||
// ident -?{nmstart}{nmchar*}
|
||||
// nmstart [_a-z]|{nonascii}|{escape}
|
||||
// string {string1}|{string2}
|
||||
// string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
|
||||
// string2 \'([^\n\r\f\\"]|\\{nl}|{escape})*\'
|
||||
//
|
||||
// We'll implement a subset (in order to reduce attack
|
||||
// surface); in particular:
|
||||
//
|
||||
// - No Unicode support
|
||||
// - No escapes support
|
||||
// - No string support (by proxy no attrib support)
|
||||
// - element_name is matched against allowed
|
||||
// elements (some people might find this
|
||||
// annoying...)
|
||||
// - Pseudo-elements one of :first-child, :link,
|
||||
// :visited, :active, :hover, :focus
|
||||
|
||||
// handle ruleset
|
||||
$selectors = array_map('trim', explode(',', $selector));
|
||||
$new_selectors = array();
|
||||
foreach ($selectors as $sel) {
|
||||
// split on +, > and spaces
|
||||
$basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE);
|
||||
// even indices are chunks, odd indices are
|
||||
// delimiters
|
||||
$nsel = null;
|
||||
$delim = null; // guaranteed to be non-null after
|
||||
// two loop iterations
|
||||
for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) {
|
||||
$x = $basic_selectors[$i];
|
||||
if ($i % 2) {
|
||||
// delimiter
|
||||
if ($x === ' ') {
|
||||
$delim = ' ';
|
||||
} else {
|
||||
$delim = ' ' . $x . ' ';
|
||||
}
|
||||
} else {
|
||||
// simple selector
|
||||
$components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE);
|
||||
$sdelim = null;
|
||||
$nx = null;
|
||||
for ($j = 0, $cc = count($components); $j < $cc; $j ++) {
|
||||
$y = $components[$j];
|
||||
if ($j === 0) {
|
||||
if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) {
|
||||
$nx = $y;
|
||||
} else {
|
||||
// $nx stays null; this matters
|
||||
// if we don't manage to find
|
||||
// any valid selector content,
|
||||
// in which case we ignore the
|
||||
// outer $delim
|
||||
}
|
||||
} elseif ($j % 2) {
|
||||
// set delimiter
|
||||
$sdelim = $y;
|
||||
} else {
|
||||
$attrdef = null;
|
||||
if ($sdelim === '#') {
|
||||
$attrdef = $this->_id_attrdef;
|
||||
} elseif ($sdelim === '.') {
|
||||
$attrdef = $this->_class_attrdef;
|
||||
} elseif ($sdelim === ':') {
|
||||
$attrdef = $this->_enum_attrdef;
|
||||
} else {
|
||||
throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split');
|
||||
}
|
||||
$r = $attrdef->validate($y, $config, $context);
|
||||
if ($r !== false) {
|
||||
if ($r !== true) {
|
||||
$y = $r;
|
||||
}
|
||||
if ($nx === null) {
|
||||
$nx = '';
|
||||
}
|
||||
$nx .= $sdelim . $y;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($nx !== null) {
|
||||
if ($nsel === null) {
|
||||
$nsel = $nx;
|
||||
} else {
|
||||
$nsel .= $delim . $nx;
|
||||
}
|
||||
} else {
|
||||
// delimiters to the left of invalid
|
||||
// basic selector ignored
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($nsel !== null) {
|
||||
if (!empty($scopes)) {
|
||||
foreach ($scopes as $s) {
|
||||
$new_selectors[] = "$s $nsel";
|
||||
}
|
||||
} else {
|
||||
$new_selectors[] = $nsel;
|
||||
}
|
||||
}
|
||||
$selector = implode(', ', $new_selector); // now it's a string
|
||||
}
|
||||
if (empty($new_selectors)) continue;
|
||||
$selector = implode(', ', $new_selectors);
|
||||
foreach ($style as $name => $value) {
|
||||
if (!isset($css_definition->info[$name])) {
|
||||
unset($style[$name]);
|
||||
@@ -110,10 +263,11 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
|
||||
}
|
||||
$new_decls[$selector] = $style;
|
||||
}
|
||||
$this->_tidy->css[$k] = $new_decls;
|
||||
$new_css[$k] = $new_decls;
|
||||
}
|
||||
// remove stuff that shouldn't be used, could be reenabled
|
||||
// after security risks are analyzed
|
||||
$this->_tidy->css = $new_css;
|
||||
$this->_tidy->import = array();
|
||||
$this->_tidy->charset = null;
|
||||
$this->_tidy->namespace = null;
|
||||
|
@@ -36,6 +36,11 @@ class HTMLPurifier_Generator
|
||||
*/
|
||||
private $_flashCompat;
|
||||
|
||||
/**
|
||||
* Cache of %Output.FixInnerHTML
|
||||
*/
|
||||
private $_innerHTMLFix;
|
||||
|
||||
/**
|
||||
* Stack for keeping track of object information when outputting IE
|
||||
* compatibility code.
|
||||
@@ -54,6 +59,7 @@ class HTMLPurifier_Generator
|
||||
public function __construct($config, $context) {
|
||||
$this->config = $config;
|
||||
$this->_scriptFix = $config->get('Output.CommentScriptContents');
|
||||
$this->_innerHTMLFix = $config->get('Output.FixInnerHTML');
|
||||
$this->_sortAttr = $config->get('Output.SortAttr');
|
||||
$this->_flashCompat = $config->get('Output.FlashCompat');
|
||||
$this->_def = $config->getHTMLDefinition();
|
||||
@@ -98,9 +104,11 @@ class HTMLPurifier_Generator
|
||||
}
|
||||
|
||||
// Normalize newlines to system defined value
|
||||
$nl = $this->config->get('Output.Newline');
|
||||
if ($nl === null) $nl = PHP_EOL;
|
||||
if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
|
||||
if ($this->config->get('Core.NormalizeNewlines')) {
|
||||
$nl = $this->config->get('Output.Newline');
|
||||
if ($nl === null) $nl = PHP_EOL;
|
||||
if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
|
||||
}
|
||||
return $html;
|
||||
}
|
||||
|
||||
@@ -130,19 +138,7 @@ class HTMLPurifier_Generator
|
||||
$_extra = '';
|
||||
if ($this->_flashCompat) {
|
||||
if ($token->name == "object" && !empty($this->_flashStack)) {
|
||||
$flash = array_pop($this->_flashStack);
|
||||
$compat_token = new HTMLPurifier_Token_Empty("embed");
|
||||
foreach ($flash->attr as $name => $val) {
|
||||
if ($name == "classid") continue;
|
||||
if ($name == "type") continue;
|
||||
if ($name == "data") $name = "src";
|
||||
$compat_token->attr[$name] = $val;
|
||||
}
|
||||
foreach ($flash->param as $name => $val) {
|
||||
if ($name == "movie") $name = "src";
|
||||
$compat_token->attr[$name] = $val;
|
||||
}
|
||||
$_extra = "<!--[if IE]>".$this->generateFromToken($compat_token)."<![endif]-->";
|
||||
// doesn't do anything for now
|
||||
}
|
||||
}
|
||||
return $_extra . '</' . $token->name . '>';
|
||||
@@ -200,6 +196,37 @@ class HTMLPurifier_Generator
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// Workaround for Internet Explorer innerHTML bug.
|
||||
// Essentially, Internet Explorer, when calculating
|
||||
// innerHTML, omits quotes if there are no instances of
|
||||
// angled brackets, quotes or spaces. However, when parsing
|
||||
// HTML (for example, when you assign to innerHTML), it
|
||||
// treats backticks as quotes. Thus,
|
||||
// <img alt="``" />
|
||||
// becomes
|
||||
// <img alt=`` />
|
||||
// becomes
|
||||
// <img alt='' />
|
||||
// Fortunately, all we need to do is trigger an appropriate
|
||||
// quoting style, which we do by adding an extra space.
|
||||
// This also is consistent with the W3C spec, which states
|
||||
// that user agents may ignore leading or trailing
|
||||
// whitespace (in fact, most don't, at least for attributes
|
||||
// like alt, but an extra space at the end is barely
|
||||
// noticeable). Still, we have a configuration knob for
|
||||
// this, since this transformation is not necesary if you
|
||||
// don't process user input with innerHTML or you don't plan
|
||||
// on supporting Internet Explorer.
|
||||
if ($this->_innerHTMLFix) {
|
||||
if (strpos($value, '`') !== false) {
|
||||
// check if correct quoting style would not already be
|
||||
// triggered
|
||||
if (strcspn($value, '"\' <>') === strlen($value)) {
|
||||
// protect!
|
||||
$value .= ' ';
|
||||
}
|
||||
}
|
||||
}
|
||||
$html .= $key.'="'.$this->escape($value).'" ';
|
||||
}
|
||||
return rtrim($html);
|
||||
@@ -215,7 +242,10 @@ class HTMLPurifier_Generator
|
||||
* permissible for non-attribute output.
|
||||
* @return String escaped data.
|
||||
*/
|
||||
public function escape($string, $quote = ENT_COMPAT) {
|
||||
public function escape($string, $quote = null) {
|
||||
// Workaround for APC bug on Mac Leopard reported by sidepodcast
|
||||
// http://htmlpurifier.org/phorum/read.php?3,4823,4846
|
||||
if ($quote === null) $quote = ENT_COMPAT;
|
||||
return htmlspecialchars($string, $quote, 'UTF-8');
|
||||
}
|
||||
|
||||
|
@@ -147,7 +147,7 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
|
||||
return $this->_anonModule;
|
||||
}
|
||||
|
||||
private $_anonModule;
|
||||
private $_anonModule = null;
|
||||
|
||||
|
||||
// PUBLIC BUT INTERNAL VARIABLES --------------------------------------
|
||||
@@ -300,7 +300,12 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
|
||||
unset($allowed_attributes_mutable[$key]);
|
||||
}
|
||||
}
|
||||
if ($delete) unset($this->info[$tag]->attr[$attr]);
|
||||
if ($delete) {
|
||||
if ($this->info[$tag]->attr[$attr]->required) {
|
||||
trigger_error("Required attribute '$attr' in element '$tag' was not allowed, which means '$tag' will not be allowed either", E_USER_WARNING);
|
||||
}
|
||||
unset($this->info[$tag]->attr[$attr]);
|
||||
}
|
||||
}
|
||||
}
|
||||
// emit errors
|
||||
|
@@ -21,7 +21,7 @@ class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule
|
||||
// inclusions wrong for bdo: bdo allows Lang
|
||||
)
|
||||
);
|
||||
$bdo->attr_transform_post['required-dir'] = new HTMLPurifier_AttrTransform_BdoDir();
|
||||
$bdo->attr_transform_post[] = new HTMLPurifier_AttrTransform_BdoDir();
|
||||
|
||||
$this->attr_collections['I18N']['dir'] = 'Enum#ltr,rtl';
|
||||
}
|
||||
|
@@ -35,7 +35,7 @@ class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule
|
||||
'name' => 'CDATA',
|
||||
'readonly' => 'Bool#readonly',
|
||||
'size' => 'Number',
|
||||
'src' => 'URI#embeds',
|
||||
'src' => 'URI#embedded',
|
||||
'tabindex' => 'Number',
|
||||
'type' => 'Enum#text,password,checkbox,button,radio,submit,reset,file,hidden,image',
|
||||
'value' => 'CDATA',
|
||||
@@ -84,7 +84,8 @@ class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule
|
||||
$button->excludes = $this->makeLookup(
|
||||
'form', 'fieldset', // Form
|
||||
'input', 'select', 'textarea', 'label', 'button', // Formctrl
|
||||
'a' // as per HTML 4.01 spec, this is omitted by modularization
|
||||
'a', // as per HTML 4.01 spec, this is omitted by modularization
|
||||
'isindex', 'iframe' // legacy items
|
||||
);
|
||||
|
||||
// Extra exclusion: img usemap="" is not permitted within this element.
|
||||
|
38
library/HTMLPurifier/HTMLModule/Iframe.php
Normal file
38
library/HTMLPurifier/HTMLModule/Iframe.php
Normal file
@@ -0,0 +1,38 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* XHTML 1.1 Iframe Module provides inline frames.
|
||||
*
|
||||
* @note This module is not considered safe unless an Iframe
|
||||
* whitelisting mechanism is specified. Currently, the only
|
||||
* such mechanism is %URL.SafeIframeRegexp
|
||||
*/
|
||||
class HTMLPurifier_HTMLModule_Iframe extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
|
||||
public $name = 'Iframe';
|
||||
public $safe = false;
|
||||
|
||||
public function setup($config) {
|
||||
if ($config->get('HTML.SafeIframe')) {
|
||||
$this->safe = true;
|
||||
}
|
||||
$this->addElement(
|
||||
'iframe', 'Inline', 'Flow', 'Common',
|
||||
array(
|
||||
'src' => 'URI#embedded',
|
||||
'width' => 'Length',
|
||||
'height' => 'Length',
|
||||
'name' => 'ID',
|
||||
'scrolling' => 'Enum#yes,no,auto',
|
||||
'frameborder' => 'Enum#0,1',
|
||||
'longdesc' => 'URI',
|
||||
'marginheight' => 'Pixels',
|
||||
'marginwidth' => 'Pixels',
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
@@ -89,7 +89,7 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
|
||||
$hr->attr['width'] = 'Length';
|
||||
|
||||
$img = $this->addBlankElement('img');
|
||||
$img->attr['align'] = 'Enum#top,middle,bottom,left,right';
|
||||
$img->attr['align'] = 'IAlign';
|
||||
$img->attr['border'] = 'Pixels';
|
||||
$img->attr['hspace'] = 'Pixels';
|
||||
$img->attr['vspace'] = 'Pixels';
|
||||
@@ -136,6 +136,22 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
|
||||
$ul->attr['compact'] = 'Bool#compact';
|
||||
$ul->attr['type'] = 'Enum#square,disc,circle';
|
||||
|
||||
// "safe" modifications to "unsafe" elements
|
||||
// WARNING: If you want to add support for an unsafe, legacy
|
||||
// attribute, make a new TrustedLegacy module with the trusted
|
||||
// bit set appropriately
|
||||
|
||||
$form = $this->addBlankElement('form');
|
||||
$form->content_model = 'Flow | #PCDATA';
|
||||
$form->content_model_type = 'optional';
|
||||
$form->attr['target'] = 'FrameTarget';
|
||||
|
||||
$input = $this->addBlankElement('input');
|
||||
$input->attr['align'] = 'IAlign';
|
||||
|
||||
$legend = $this->addBlankElement('legend');
|
||||
$legend->attr['align'] = 'LAlign';
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -20,10 +20,16 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
|
||||
public $content_sets = array('Flow' => 'List');
|
||||
|
||||
public function setup($config) {
|
||||
$ol = $this->addElement('ol', 'List', 'Required: li', 'Common');
|
||||
$ol->wrap = "li";
|
||||
$ul = $this->addElement('ul', 'List', 'Required: li', 'Common');
|
||||
$ul->wrap = "li";
|
||||
$ol = $this->addElement('ol', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
|
||||
$ul = $this->addElement('ul', 'List', new HTMLPurifier_ChildDef_List(), 'Common');
|
||||
// XXX The wrap attribute is handled by MakeWellFormed. This is all
|
||||
// quite unsatisfactory, because we generated this
|
||||
// *specifically* for lists, and now a big chunk of the handling
|
||||
// is done properly by the List ChildDef. So actually, we just
|
||||
// want enough information to make autoclosing work properly,
|
||||
// and then hand off the tricky stuff to the ChildDef.
|
||||
$ol->wrap = 'li';
|
||||
$ul->wrap = 'li';
|
||||
$this->addElement('dl', 'List', 'Required: dt | dd', 'Common');
|
||||
|
||||
$this->addElement('li', false, 'Flow', 'Common');
|
||||
|
@@ -11,7 +11,7 @@ class HTMLPurifier_HTMLModule_Name extends HTMLPurifier_HTMLModule
|
||||
$element = $this->addBlankElement($name);
|
||||
$element->attr['name'] = 'CDATA';
|
||||
if (!$config->get('HTML.Attr.Name.UseCDATA')) {
|
||||
$element->attr_transform_post['NameSync'] = new HTMLPurifier_AttrTransform_NameSync();
|
||||
$element->attr_transform_post[] = new HTMLPurifier_AttrTransform_NameSync();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
19
library/HTMLPurifier/HTMLModule/Nofollow.php
Normal file
19
library/HTMLPurifier/HTMLModule/Nofollow.php
Normal file
@@ -0,0 +1,19 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Module adds the nofollow attribute transformation to a tags. It
|
||||
* is enabled by HTML.Nofollow
|
||||
*/
|
||||
class HTMLPurifier_HTMLModule_Nofollow extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
|
||||
public $name = 'Nofollow';
|
||||
|
||||
public function setup($config) {
|
||||
$a = $this->addBlankElement('a');
|
||||
$a->attr_transform_post[] = new HTMLPurifier_AttrTransform_Nofollow();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
@@ -21,7 +21,7 @@ class HTMLPurifier_HTMLModule_SafeEmbed extends HTMLPurifier_HTMLModule
|
||||
'allowscriptaccess' => 'Enum#never',
|
||||
'allownetworking' => 'Enum#internal',
|
||||
'flashvars' => 'Text',
|
||||
'wmode' => 'Enum#window',
|
||||
'wmode' => 'Enum#window,transparent,opaque',
|
||||
'name' => 'ID',
|
||||
)
|
||||
);
|
||||
|
@@ -29,7 +29,6 @@ class HTMLPurifier_HTMLModule_SafeObject extends HTMLPurifier_HTMLModule
|
||||
'width' => 'Pixels#' . $max,
|
||||
'height' => 'Pixels#' . $max,
|
||||
'data' => 'URI#embedded',
|
||||
'classid' => 'Enum#clsid:d27cdb6e-ae6d-11cf-96b8-444553540000',
|
||||
'codebase' => new HTMLPurifier_AttrDef_Enum(array(
|
||||
'http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0')),
|
||||
)
|
||||
|
37
library/HTMLPurifier/HTMLModule/SafeScripting.php
Normal file
37
library/HTMLPurifier/HTMLModule/SafeScripting.php
Normal file
@@ -0,0 +1,37 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* A "safe" script module. No inline JS is allowed, and pointed to JS
|
||||
* files must match whitelist.
|
||||
*/
|
||||
class HTMLPurifier_HTMLModule_SafeScripting extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
|
||||
public $name = 'SafeScripting';
|
||||
|
||||
public function setup($config) {
|
||||
|
||||
// These definitions are not intrinsically safe: the attribute transforms
|
||||
// are a vital part of ensuring safety.
|
||||
|
||||
$allowed = $config->get('HTML.SafeScripting');
|
||||
$script = $this->addElement(
|
||||
'script',
|
||||
'Inline',
|
||||
'Empty',
|
||||
null,
|
||||
array(
|
||||
// While technically not required by the spec, we're forcing
|
||||
// it to this value.
|
||||
'type' => 'Enum#text/javascript',
|
||||
'src*' => new HTMLPurifier_AttrDef_Enum(array_keys($allowed))
|
||||
)
|
||||
);
|
||||
$script->attr_transform_pre[] =
|
||||
$script->attr_transform_post[] = new HTMLPurifier_AttrTransform_ScriptRequired();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
@@ -45,8 +45,8 @@ class HTMLPurifier_HTMLModule_Scripting extends HTMLPurifier_HTMLModule
|
||||
);
|
||||
$this->info['script']->content_model = '#PCDATA';
|
||||
$this->info['script']->content_model_type = 'optional';
|
||||
$this->info['script']->attr_transform_pre['type'] =
|
||||
$this->info['script']->attr_transform_post['type'] =
|
||||
$this->info['script']->attr_transform_pre[] =
|
||||
$this->info['script']->attr_transform_post[] =
|
||||
new HTMLPurifier_AttrTransform_ScriptRequired();
|
||||
}
|
||||
}
|
||||
|
@@ -37,6 +37,9 @@ class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule
|
||||
'abbr' => 'Text',
|
||||
'colspan' => 'Number',
|
||||
'rowspan' => 'Number',
|
||||
// Apparently, as of HTML5 this attribute only applies
|
||||
// to 'th' elements.
|
||||
'scope' => 'Enum#row,col,rowgroup,colgroup',
|
||||
),
|
||||
$cell_align
|
||||
);
|
||||
|
19
library/HTMLPurifier/HTMLModule/TargetBlank.php
Normal file
19
library/HTMLPurifier/HTMLModule/TargetBlank.php
Normal file
@@ -0,0 +1,19 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Module adds the target=blank attribute transformation to a tags. It
|
||||
* is enabled by HTML.TargetBlank
|
||||
*/
|
||||
class HTMLPurifier_HTMLModule_TargetBlank extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
|
||||
public $name = 'TargetBlank';
|
||||
|
||||
public function setup($config) {
|
||||
$a = $this->addBlankElement('a');
|
||||
$a->attr_transform_post[] = new HTMLPurifier_AttrTransform_TargetBlank();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
@@ -65,11 +65,11 @@ class HTMLPurifier_HTMLModuleManager
|
||||
'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
|
||||
'StyleAttribute',
|
||||
// Unsafe:
|
||||
'Scripting', 'Object', 'Forms',
|
||||
'Scripting', 'Object', 'Forms',
|
||||
// Sorta legacy, but present in strict:
|
||||
'Name',
|
||||
);
|
||||
$transitional = array('Legacy', 'Target');
|
||||
$transitional = array('Legacy', 'Target', 'Iframe');
|
||||
$xml = array('XMLCommonAttributes');
|
||||
$non_xml = array('NonXMLCommonAttributes');
|
||||
|
||||
@@ -112,7 +112,9 @@ class HTMLPurifier_HTMLModuleManager
|
||||
|
||||
$this->doctypes->register(
|
||||
'XHTML 1.1', true,
|
||||
array_merge($common, $xml, array('Ruby')),
|
||||
// Iframe is a real XHTML 1.1 module, despite being
|
||||
// "transitional"!
|
||||
array_merge($common, $xml, array('Ruby', 'Iframe')),
|
||||
array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
|
||||
array(),
|
||||
'-//W3C//DTD XHTML 1.1//EN',
|
||||
@@ -216,19 +218,25 @@ class HTMLPurifier_HTMLModuleManager
|
||||
}
|
||||
}
|
||||
|
||||
// add proprietary module (this gets special treatment because
|
||||
// it is completely removed from doctypes, etc.)
|
||||
// custom modules
|
||||
if ($config->get('HTML.Proprietary')) {
|
||||
$modules[] = 'Proprietary';
|
||||
}
|
||||
|
||||
// add SafeObject/Safeembed modules
|
||||
if ($config->get('HTML.SafeObject')) {
|
||||
$modules[] = 'SafeObject';
|
||||
}
|
||||
if ($config->get('HTML.SafeEmbed')) {
|
||||
$modules[] = 'SafeEmbed';
|
||||
}
|
||||
if ($config->get('HTML.SafeScripting') !== array()) {
|
||||
$modules[] = 'SafeScripting';
|
||||
}
|
||||
if ($config->get('HTML.Nofollow')) {
|
||||
$modules[] = 'Nofollow';
|
||||
}
|
||||
if ($config->get('HTML.TargetBlank')) {
|
||||
$modules[] = 'TargetBlank';
|
||||
}
|
||||
|
||||
// merge in custom modules
|
||||
$modules = array_merge($modules, $this->userModules);
|
||||
@@ -364,6 +372,13 @@ class HTMLPurifier_HTMLModuleManager
|
||||
// :TODO:
|
||||
// non-standalone definitions that don't have a standalone
|
||||
// to merge into could be deferred to the end
|
||||
// HOWEVER, it is perfectly valid for a non-standalone
|
||||
// definition to lack a standalone definition, even
|
||||
// after all processing: this allows us to safely
|
||||
// specify extra attributes for elements that may not be
|
||||
// enabled all in one place. In particular, this might
|
||||
// be the case for trusted elements. WARNING: care must
|
||||
// be taken that the /extra/ definitions are all safe.
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@@ -5,6 +5,9 @@ class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector
|
||||
|
||||
private $context, $config, $attrValidator, $removeNbsp, $removeNbspExceptions;
|
||||
|
||||
// TODO: make me configurable
|
||||
private $_exclude = array('colgroup' => 1, 'th' => 1, 'td' => 1, 'iframe' => 1);
|
||||
|
||||
public function prepare($config, $context) {
|
||||
parent::prepare($config, $context);
|
||||
$this->config = $config;
|
||||
@@ -30,7 +33,7 @@ class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector
|
||||
break;
|
||||
}
|
||||
if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) {
|
||||
if ($token->name == 'colgroup') return;
|
||||
if (isset($this->_exclude[$token->name])) return;
|
||||
$this->attrValidator->validateToken($token, $this->config, $this->context);
|
||||
$token->armor['ValidateAttributes'] = true;
|
||||
if (isset($token->attr['id']) || isset($token->attr['name'])) return;
|
||||
|
@@ -22,6 +22,7 @@ class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector
|
||||
'movie' => true,
|
||||
'flashvars' => true,
|
||||
'src' => true,
|
||||
'allowFullScreen' => true, // if omitted, assume to be 'false'
|
||||
);
|
||||
|
||||
public function prepare($config, $context) {
|
||||
|
@@ -230,6 +230,17 @@ class HTMLPurifier_Lexer
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Special Internet Explorer conditional comments should be removed.
|
||||
*/
|
||||
protected static function removeIEConditional($string) {
|
||||
return preg_replace(
|
||||
'#<!--\[if [^>]+\]>.*?<!\[endif\]-->#si', // probably should generalize for all strings
|
||||
'',
|
||||
$string
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback function for escapeCDATA() that does the work.
|
||||
*
|
||||
@@ -252,8 +263,10 @@ class HTMLPurifier_Lexer
|
||||
public function normalize($html, $config, $context) {
|
||||
|
||||
// normalize newlines to \n
|
||||
$html = str_replace("\r\n", "\n", $html);
|
||||
$html = str_replace("\r", "\n", $html);
|
||||
if ($config->get('Core.NormalizeNewlines')) {
|
||||
$html = str_replace("\r\n", "\n", $html);
|
||||
$html = str_replace("\r", "\n", $html);
|
||||
}
|
||||
|
||||
if ($config->get('HTML.Trusted')) {
|
||||
// escape convoluted CDATA
|
||||
@@ -263,6 +276,8 @@ class HTMLPurifier_Lexer
|
||||
// escape CDATA
|
||||
$html = $this->escapeCDATA($html);
|
||||
|
||||
$html = $this->removeIEConditional($html);
|
||||
|
||||
// extract body from document if applicable
|
||||
if ($config->get('Core.ConvertDocumentToFragment')) {
|
||||
$e = false;
|
||||
@@ -284,6 +299,11 @@ class HTMLPurifier_Lexer
|
||||
// represent non-SGML characters (horror, horror!)
|
||||
$html = HTMLPurifier_Encoder::cleanUTF8($html);
|
||||
|
||||
// if processing instructions are to removed, remove them now
|
||||
if ($config->get('Core.RemoveProcessingInstructions')) {
|
||||
$html = preg_replace('#<\?.+?\?>#s', '', $html);
|
||||
}
|
||||
|
||||
return $html;
|
||||
}
|
||||
|
||||
|
@@ -72,23 +72,57 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursive function that tokenizes a node, putting it into an accumulator.
|
||||
*
|
||||
* Iterative function that tokenizes a node, putting it into an accumulator.
|
||||
* To iterate is human, to recurse divine - L. Peter Deutsch
|
||||
* @param $node DOMNode to be tokenized.
|
||||
* @param $tokens Array-list of already tokenized tokens.
|
||||
* @param $collect Says whether or start and close are collected, set to
|
||||
* false at first recursion because it's the implicit DIV
|
||||
* tag you're dealing with.
|
||||
* @returns Tokens of node appended to previously passed tokens.
|
||||
*/
|
||||
protected function tokenizeDOM($node, &$tokens, $collect = false) {
|
||||
protected function tokenizeDOM($node, &$tokens) {
|
||||
|
||||
$level = 0;
|
||||
$nodes = array($level => array($node));
|
||||
$closingNodes = array();
|
||||
do {
|
||||
while (!empty($nodes[$level])) {
|
||||
$node = array_shift($nodes[$level]); // FIFO
|
||||
$collect = $level > 0 ? true : false;
|
||||
$needEndingTag = $this->createStartNode($node, $tokens, $collect);
|
||||
if ($needEndingTag) {
|
||||
$closingNodes[$level][] = $node;
|
||||
}
|
||||
if ($node->childNodes && $node->childNodes->length) {
|
||||
$level++;
|
||||
$nodes[$level] = array();
|
||||
foreach ($node->childNodes as $childNode) {
|
||||
array_push($nodes[$level], $childNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
$level--;
|
||||
if ($level && isset($closingNodes[$level])) {
|
||||
while($node = array_pop($closingNodes[$level])) {
|
||||
$this->createEndNode($node, $tokens);
|
||||
}
|
||||
}
|
||||
} while ($level > 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param $node DOMNode to be tokenized.
|
||||
* @param $tokens Array-list of already tokenized tokens.
|
||||
* @param $collect Says whether or start and close are collected, set to
|
||||
* false at first recursion because it's the implicit DIV
|
||||
* tag you're dealing with.
|
||||
* @returns bool if the token needs an endtoken
|
||||
*/
|
||||
protected function createStartNode($node, &$tokens, $collect) {
|
||||
// intercept non element nodes. WE MUST catch all of them,
|
||||
// but we're not getting the character reference nodes because
|
||||
// those should have been preprocessed
|
||||
if ($node->nodeType === XML_TEXT_NODE) {
|
||||
$tokens[] = $this->factory->createText($node->data);
|
||||
return;
|
||||
return false;
|
||||
} elseif ($node->nodeType === XML_CDATA_SECTION_NODE) {
|
||||
// undo libxml's special treatment of <script> and <style> tags
|
||||
$last = end($tokens);
|
||||
@@ -106,48 +140,44 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||
}
|
||||
}
|
||||
$tokens[] = $this->factory->createText($this->parseData($data));
|
||||
return;
|
||||
return false;
|
||||
} elseif ($node->nodeType === XML_COMMENT_NODE) {
|
||||
// this is code is only invoked for comments in script/style in versions
|
||||
// of libxml pre-2.6.28 (regular comments, of course, are still
|
||||
// handled regularly)
|
||||
$tokens[] = $this->factory->createComment($node->data);
|
||||
return;
|
||||
return false;
|
||||
} elseif (
|
||||
// not-well tested: there may be other nodes we have to grab
|
||||
$node->nodeType !== XML_ELEMENT_NODE
|
||||
) {
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
$attr = $node->hasAttributes() ?
|
||||
$this->transformAttrToAssoc($node->attributes) :
|
||||
array();
|
||||
$attr = $node->hasAttributes() ? $this->transformAttrToAssoc($node->attributes) : array();
|
||||
|
||||
// We still have to make sure that the element actually IS empty
|
||||
if (!$node->childNodes->length) {
|
||||
if ($collect) {
|
||||
$tokens[] = $this->factory->createEmpty($node->tagName, $attr);
|
||||
}
|
||||
return false;
|
||||
} else {
|
||||
if ($collect) { // don't wrap on first iteration
|
||||
if ($collect) {
|
||||
$tokens[] = $this->factory->createStart(
|
||||
$tag_name = $node->tagName, // somehow, it get's dropped
|
||||
$attr
|
||||
);
|
||||
}
|
||||
foreach ($node->childNodes as $node) {
|
||||
// remember, it's an accumulator. Otherwise, we'd have
|
||||
// to use array_merge
|
||||
$this->tokenizeDOM($node, $tokens, true);
|
||||
}
|
||||
if ($collect) {
|
||||
$tokens[] = $this->factory->createEnd($tag_name);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
protected function createEndNode($node, &$tokens) {
|
||||
$tokens[] = $this->factory->createEnd($node->tagName);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Converts a DOMNamedNodeMap of DOMAttr objects into an assoc array.
|
||||
*
|
||||
|
@@ -1,139 +0,0 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Proof-of-concept lexer that uses the PEAR package XML_HTMLSax3 to parse HTML.
|
||||
*
|
||||
* PEAR, not suprisingly, also has a SAX parser for HTML. I don't know
|
||||
* very much about implementation, but it's fairly well written. However, that
|
||||
* abstraction comes at a price: performance. You need to have it installed,
|
||||
* and if the API changes, it might break our adapter. Not sure whether or not
|
||||
* it's UTF-8 aware, but it has some entity parsing trouble (in all areas,
|
||||
* text and attributes).
|
||||
*
|
||||
* Quite personally, I don't recommend using the PEAR class, and the defaults
|
||||
* don't use it. The unit tests do perform the tests on the SAX parser too, but
|
||||
* whatever it does for poorly formed HTML is up to it.
|
||||
*
|
||||
* @todo Generalize so that XML_HTMLSax is also supported.
|
||||
*
|
||||
* @warning Entity-resolution inside attributes is broken.
|
||||
*/
|
||||
|
||||
class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
|
||||
{
|
||||
|
||||
/**
|
||||
* Internal accumulator array for SAX parsers.
|
||||
*/
|
||||
protected $tokens = array();
|
||||
protected $last_token_was_empty;
|
||||
|
||||
private $parent_handler;
|
||||
private $stack = array();
|
||||
|
||||
public function tokenizeHTML($string, $config, $context) {
|
||||
|
||||
$this->tokens = array();
|
||||
$this->last_token_was_empty = false;
|
||||
|
||||
$string = $this->normalize($string, $config, $context);
|
||||
|
||||
$this->parent_handler = set_error_handler(array($this, 'muteStrictErrorHandler'));
|
||||
|
||||
$parser = new XML_HTMLSax3();
|
||||
$parser->set_object($this);
|
||||
$parser->set_element_handler('openHandler','closeHandler');
|
||||
$parser->set_data_handler('dataHandler');
|
||||
$parser->set_escape_handler('escapeHandler');
|
||||
|
||||
// doesn't seem to work correctly for attributes
|
||||
$parser->set_option('XML_OPTION_ENTITIES_PARSED', 1);
|
||||
|
||||
$parser->parse($string);
|
||||
|
||||
restore_error_handler();
|
||||
|
||||
return $this->tokens;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Open tag event handler, interface is defined by PEAR package.
|
||||
*/
|
||||
public function openHandler(&$parser, $name, $attrs, $closed) {
|
||||
// entities are not resolved in attrs
|
||||
foreach ($attrs as $key => $attr) {
|
||||
$attrs[$key] = $this->parseData($attr);
|
||||
}
|
||||
if ($closed) {
|
||||
$this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs);
|
||||
$this->last_token_was_empty = true;
|
||||
} else {
|
||||
$this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs);
|
||||
}
|
||||
$this->stack[] = $name;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close tag event handler, interface is defined by PEAR package.
|
||||
*/
|
||||
public function closeHandler(&$parser, $name) {
|
||||
// HTMLSax3 seems to always send empty tags an extra close tag
|
||||
// check and ignore if you see it:
|
||||
// [TESTME] to make sure it doesn't overreach
|
||||
if ($this->last_token_was_empty) {
|
||||
$this->last_token_was_empty = false;
|
||||
return true;
|
||||
}
|
||||
$this->tokens[] = new HTMLPurifier_Token_End($name);
|
||||
if (!empty($this->stack)) array_pop($this->stack);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Data event handler, interface is defined by PEAR package.
|
||||
*/
|
||||
public function dataHandler(&$parser, $data) {
|
||||
$this->last_token_was_empty = false;
|
||||
$this->tokens[] = new HTMLPurifier_Token_Text($data);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Escaped text handler, interface is defined by PEAR package.
|
||||
*/
|
||||
public function escapeHandler(&$parser, $data) {
|
||||
if (strpos($data, '--') === 0) {
|
||||
// remove trailing and leading double-dashes
|
||||
$data = substr($data, 2);
|
||||
if (strlen($data) >= 2 && substr($data, -2) == "--") {
|
||||
$data = substr($data, 0, -2);
|
||||
}
|
||||
if (isset($this->stack[sizeof($this->stack) - 1]) &&
|
||||
$this->stack[sizeof($this->stack) - 1] == "style") {
|
||||
$this->tokens[] = new HTMLPurifier_Token_Text($data);
|
||||
} else {
|
||||
$this->tokens[] = new HTMLPurifier_Token_Comment($data);
|
||||
}
|
||||
$this->last_token_was_empty = false;
|
||||
}
|
||||
// CDATA is handled elsewhere, but if it was handled here:
|
||||
//if (strpos($data, '[CDATA[') === 0) {
|
||||
// $this->tokens[] = new HTMLPurifier_Token_Text(
|
||||
// substr($data, 7, strlen($data) - 9) );
|
||||
//}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* An error handler that mutes strict errors
|
||||
*/
|
||||
public function muteStrictErrorHandler($errno, $errstr, $errfile=null, $errline=null, $errcontext=null) {
|
||||
if ($errno == E_STRICT) return;
|
||||
return call_user_func($this->parent_handler, $errno, $errstr, $errfile, $errline, $errcontext);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
@@ -125,8 +125,6 @@ class HTML5 {
|
||||
const EOF = 5;
|
||||
|
||||
public function __construct($data) {
|
||||
$data = str_replace("\r\n", "\n", $data);
|
||||
$data = str_replace("\r", null, $data);
|
||||
|
||||
$this->data = $data;
|
||||
$this->char = -1;
|
||||
|
@@ -11,8 +11,6 @@ abstract class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy
|
||||
*/
|
||||
protected $strategies = array();
|
||||
|
||||
abstract public function __construct();
|
||||
|
||||
public function execute($tokens, $config, $context) {
|
||||
foreach ($this->strategies as $strategy) {
|
||||
$tokens = $strategy->execute($tokens, $config, $context);
|
||||
|
@@ -26,6 +26,22 @@
|
||||
* translated into text depends on the child definitions.
|
||||
*
|
||||
* @todo Enable nodes to be bubbled out of the structure.
|
||||
*
|
||||
* @warning This algorithm (though it may be hard to see) proceeds from
|
||||
* a top-down fashion. Thus, parents are processed before
|
||||
* children. This is easy to implement and has a nice effiency
|
||||
* benefit, in that if a node is removed, we never waste any
|
||||
* time processing it, but it also means that if a child
|
||||
* changes in a non-encapsulated way (e.g. it is removed), we
|
||||
* need to go back and reprocess the parent to see if those
|
||||
* changes resulted in problems for the parent. See
|
||||
* [BACKTRACK] for an example of this. In the current
|
||||
* implementation, this backtracking can only be triggered when
|
||||
* a node is removed and if that node was the sole node, the
|
||||
* parent would need to be removed. As such, it is easy to see
|
||||
* that backtracking only incurs constant overhead. If more
|
||||
* sophisticated backtracking is implemented, care must be
|
||||
* taken to avoid nontermination or exponential blowup.
|
||||
*/
|
||||
|
||||
class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
@@ -38,6 +54,8 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
// get a copy of the HTML definition
|
||||
$definition = $config->getHTMLDefinition();
|
||||
|
||||
$excludes_enabled = !$config->get('Core.DisableExcludes');
|
||||
|
||||
// insert implicit "parent" node, will be removed at end.
|
||||
// DEFINITION CALL
|
||||
$parent_name = $definition->info_parent;
|
||||
@@ -147,7 +165,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
// parent exclusions. The array should not be very large, two
|
||||
// elements at most.
|
||||
$excluded = false;
|
||||
if (!empty($exclude_stack)) {
|
||||
if (!empty($exclude_stack) && $excludes_enabled) {
|
||||
foreach ($exclude_stack as $lookup) {
|
||||
if (isset($lookup[$tokens[$i]->name])) {
|
||||
$excluded = true;
|
||||
@@ -235,7 +253,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
// our current implementation claims that that case would
|
||||
// not allow empty, even if it did
|
||||
if (!$parent_def->child->allow_empty) {
|
||||
// we need to do a double-check
|
||||
// we need to do a double-check [BACKTRACK]
|
||||
$i = $parent_index;
|
||||
array_pop($stack);
|
||||
}
|
||||
|
@@ -2,6 +2,14 @@
|
||||
|
||||
/**
|
||||
* Takes tokens makes them well-formed (balance end tags, etc.)
|
||||
*
|
||||
* Specification of the armor attributes this strategy uses:
|
||||
*
|
||||
* - MakeWellFormed_TagClosedError: This armor field is used to
|
||||
* suppress tag closed errors for certain tokens [TagClosedSuppress],
|
||||
* in particular, if a tag was generated automatically by HTML
|
||||
* Purifier, we may rely on our infrastructure to close it for us
|
||||
* and shouldn't report an error to the user [TagClosedAuto].
|
||||
*/
|
||||
class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
{
|
||||
@@ -37,12 +45,18 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
protected $context;
|
||||
|
||||
public function execute($tokens, $config, $context) {
|
||||
|
||||
$tokens = new HTMLPurifier_Array($tokens);
|
||||
$definition = $config->getHTMLDefinition();
|
||||
|
||||
// local variables
|
||||
$generator = new HTMLPurifier_Generator($config, $context);
|
||||
$escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
|
||||
// used for autoclose early abortion
|
||||
$global_parent_allowed_elements = array();
|
||||
if (isset($definition->info[$definition->info_parent])) {
|
||||
// may be unset under testing circumstances
|
||||
$global_parent_allowed_elements = $definition->info[$definition->info_parent]->child->getAllowedElements($config);
|
||||
}
|
||||
$e = $context->get('ErrorCollector', true);
|
||||
$t = false; // token index
|
||||
$i = false; // injector index
|
||||
@@ -102,7 +116,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
|
||||
// -- end INJECTOR --
|
||||
|
||||
// a note on punting:
|
||||
// a note on reprocessing:
|
||||
// In order to reduce code duplication, whenever some code needs
|
||||
// to make HTML changes in order to make things "correct", the
|
||||
// new HTML gets sent through the purifier, regardless of its
|
||||
@@ -149,7 +163,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
$top_nesting = array_pop($this->stack);
|
||||
$this->stack[] = $top_nesting;
|
||||
|
||||
// send error
|
||||
// send error [TagClosedSuppress]
|
||||
if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) {
|
||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting);
|
||||
}
|
||||
@@ -193,12 +207,12 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
$ok = false;
|
||||
if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
|
||||
// claims to be a start tag but is empty
|
||||
$token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
|
||||
$token = new HTMLPurifier_Token_Empty($token->name, $token->attr, $token->line, $token->col, $token->armor);
|
||||
$ok = true;
|
||||
} elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
|
||||
// claims to be empty but really is a start tag
|
||||
$this->swap(new HTMLPurifier_Token_End($token->name));
|
||||
$this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr));
|
||||
$this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr, $token->line, $token->col, $token->armor));
|
||||
// punt (since we had to modify the input stream in a non-trivial way)
|
||||
$reprocess = true;
|
||||
continue;
|
||||
@@ -211,6 +225,19 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
// ...unless they also have to close their parent
|
||||
if (!empty($this->stack)) {
|
||||
|
||||
// Performance note: you might think that it's rather
|
||||
// inefficient, recalculating the autoclose information
|
||||
// for every tag that a token closes (since when we
|
||||
// do an autoclose, we push a new token into the
|
||||
// stream and then /process/ that, before
|
||||
// re-processing this token.) But this is
|
||||
// necessary, because an injector can make an
|
||||
// arbitrary transformations to the autoclosing
|
||||
// tokens we introduce, so things may have changed
|
||||
// in the meantime. Also, doing the inefficient thing is
|
||||
// "easy" to reason about (for certain perverse definitions
|
||||
// of "easy")
|
||||
|
||||
$parent = array_pop($this->stack);
|
||||
$this->stack[] = $parent;
|
||||
|
||||
@@ -243,24 +270,51 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
}
|
||||
|
||||
if ($autoclose) {
|
||||
// errors need to be updated
|
||||
$new_token = new HTMLPurifier_Token_End($parent->name);
|
||||
$new_token->start = $parent;
|
||||
if ($carryover) {
|
||||
$element = clone $parent;
|
||||
$element->armor['MakeWellFormed_TagClosedError'] = true;
|
||||
$element->carryover = true;
|
||||
$this->processToken(array($new_token, $token, $element));
|
||||
} else {
|
||||
$this->insertBefore($new_token);
|
||||
}
|
||||
if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
|
||||
if (!$carryover) {
|
||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
|
||||
} else {
|
||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
|
||||
// check if this autoclose is doomed to fail
|
||||
// (this rechecks $parent, which his harmless)
|
||||
$autoclose_ok = isset($global_parent_allowed_elements[$token->name]);
|
||||
if (!$autoclose_ok) {
|
||||
foreach ($this->stack as $ancestor) {
|
||||
$elements = $definition->info[$ancestor->name]->child->getAllowedElements($config);
|
||||
if (isset($elements[$token->name])) {
|
||||
$autoclose_ok = true;
|
||||
break;
|
||||
}
|
||||
if ($definition->info[$token->name]->wrap) {
|
||||
$wrapname = $definition->info[$token->name]->wrap;
|
||||
$wrapdef = $definition->info[$wrapname];
|
||||
$wrap_elements = $wrapdef->child->getAllowedElements($config);
|
||||
if (isset($wrap_elements[$token->name]) && isset($elements[$wrapname])) {
|
||||
$autoclose_ok = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($autoclose_ok) {
|
||||
// errors need to be updated
|
||||
$new_token = new HTMLPurifier_Token_End($parent->name);
|
||||
$new_token->start = $parent;
|
||||
if ($carryover) {
|
||||
$element = clone $parent;
|
||||
// [TagClosedAuto]
|
||||
$element->armor['MakeWellFormed_TagClosedError'] = true;
|
||||
$element->carryover = true;
|
||||
$this->processToken(array($new_token, $token, $element));
|
||||
} else {
|
||||
$this->insertBefore($new_token);
|
||||
}
|
||||
// [TagClosedSuppress]
|
||||
if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) {
|
||||
if (!$carryover) {
|
||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
|
||||
} else {
|
||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
$this->remove();
|
||||
}
|
||||
$reprocess = true;
|
||||
continue;
|
||||
}
|
||||
@@ -366,7 +420,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
if ($e) {
|
||||
for ($j = $c - 1; $j > 0; $j--) {
|
||||
// notice we exclude $j == 0, i.e. the current ending tag, from
|
||||
// the errors...
|
||||
// the errors... [TagClosedSuppress]
|
||||
if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) {
|
||||
$e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]);
|
||||
}
|
||||
@@ -381,6 +435,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
$new_token->start = $skipped_tags[$j];
|
||||
array_unshift($replace, $new_token);
|
||||
if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) {
|
||||
// [TagClosedAuto]
|
||||
$element = clone $skipped_tags[$j];
|
||||
$element->carryover = true;
|
||||
$element->armor['MakeWellFormed_TagClosedError'] = true;
|
||||
@@ -398,7 +453,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
$context->destroy('CurrentToken');
|
||||
|
||||
unset($this->injectors, $this->stack, $this->tokens, $this->t);
|
||||
return $tokens;
|
||||
return $tokens->getArray();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -435,6 +490,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
// array(number nodes to delete, new node 1, new node 2, ...)
|
||||
|
||||
$delete = array_shift($token);
|
||||
throw new Exception("unsupported");
|
||||
$old = array_splice($this->tokens, $this->t, $delete, $token);
|
||||
|
||||
if ($injector > -1) {
|
||||
@@ -449,22 +505,24 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
}
|
||||
|
||||
/**
|
||||
* Inserts a token before the current token. Cursor now points to this token
|
||||
* Inserts a token before the current token. Cursor now points to
|
||||
* this token. You must reprocess after this.
|
||||
*/
|
||||
private function insertBefore($token) {
|
||||
array_splice($this->tokens, $this->t, 0, array($token));
|
||||
$this->tokens->insertBefore($this->t, $token);
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes current token. Cursor now points to new token occupying previously
|
||||
* occupied space.
|
||||
* occupied space. You must reprocess after this.
|
||||
*/
|
||||
private function remove() {
|
||||
array_splice($this->tokens, $this->t, 1);
|
||||
$this->tokens->remove($this->t);
|
||||
}
|
||||
|
||||
/**
|
||||
* Swap current token with new token. Cursor points to new token (no change).
|
||||
* Swap current token with new token. Cursor points to new token (no
|
||||
* change). You must reprocess after this.
|
||||
*/
|
||||
private function swap($token) {
|
||||
$this->tokens[$this->t] = $token;
|
||||
|
@@ -21,6 +21,9 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
||||
|
||||
// currently only used to determine if comments should be kept
|
||||
$trusted = $config->get('HTML.Trusted');
|
||||
$comment_lookup = $config->get('HTML.AllowedComments');
|
||||
$comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
|
||||
$check_comments = $comment_lookup !== array() || $comment_regexp !== null;
|
||||
|
||||
$remove_script_contents = $config->get('Core.RemoveScriptContents');
|
||||
$hidden_elements = $config->get('Core.HiddenElements');
|
||||
@@ -128,23 +131,37 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
||||
if ($textify_comments !== false) {
|
||||
$data = $token->data;
|
||||
$token = new HTMLPurifier_Token_Text($data);
|
||||
} elseif ($trusted) {
|
||||
// keep, but perform comment cleaning
|
||||
} elseif ($trusted || $check_comments) {
|
||||
// always cleanup comments
|
||||
$trailing_hyphen = false;
|
||||
if ($e) {
|
||||
// perform check whether or not there's a trailing hyphen
|
||||
if (substr($token->data, -1) == '-') {
|
||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
|
||||
$trailing_hyphen = true;
|
||||
}
|
||||
}
|
||||
$token->data = rtrim($token->data, '-');
|
||||
$found_double_hyphen = false;
|
||||
while (strpos($token->data, '--') !== false) {
|
||||
if ($e && !$found_double_hyphen) {
|
||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
|
||||
}
|
||||
$found_double_hyphen = true; // prevent double-erroring
|
||||
$found_double_hyphen = true;
|
||||
$token->data = str_replace('--', '-', $token->data);
|
||||
}
|
||||
if ($trusted || !empty($comment_lookup[trim($token->data)]) || ($comment_regexp !== NULL && preg_match($comment_regexp, trim($token->data)))) {
|
||||
// OK good
|
||||
if ($e) {
|
||||
if ($trailing_hyphen) {
|
||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
|
||||
}
|
||||
if ($found_double_hyphen) {
|
||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if ($e) {
|
||||
$e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
|
||||
}
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
// strip comments
|
||||
if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
|
||||
|
@@ -63,13 +63,15 @@ class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
|
||||
// handle size transform
|
||||
if (isset($attr['size'])) {
|
||||
// normalize large numbers
|
||||
if ($attr['size']{0} == '+' || $attr['size']{0} == '-') {
|
||||
$size = (int) $attr['size'];
|
||||
if ($size < -2) $attr['size'] = '-2';
|
||||
if ($size > 4) $attr['size'] = '+4';
|
||||
} else {
|
||||
$size = (int) $attr['size'];
|
||||
if ($size > 7) $attr['size'] = '7';
|
||||
if ($attr['size'] !== '') {
|
||||
if ($attr['size']{0} == '+' || $attr['size']{0} == '-') {
|
||||
$size = (int) $attr['size'];
|
||||
if ($size < -2) $attr['size'] = '-2';
|
||||
if ($size > 4) $attr['size'] = '+4';
|
||||
} else {
|
||||
$size = (int) $attr['size'];
|
||||
if ($size > 7) $attr['size'] = '7';
|
||||
}
|
||||
}
|
||||
if (isset($this->_size_lookup[$attr['size']])) {
|
||||
$prepend_style .= 'font-size:' .
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user