mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-04 21:28:06 +02:00
Compare commits
2 Commits
v2.0.0-str
...
v2.1.0-str
Author | SHA1 | Date | |
---|---|---|---|
|
678a593e62 | ||
|
495164e938 |
2
Doxyfile
2
Doxyfile
@@ -4,7 +4,7 @@
|
||||
# Project related configuration options
|
||||
#---------------------------------------------------------------------------
|
||||
PROJECT_NAME = HTML Purifier
|
||||
PROJECT_NUMBER = 2.0.0
|
||||
PROJECT_NUMBER = 2.1.0
|
||||
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
|
||||
CREATE_SUBDIRS = NO
|
||||
OUTPUT_LANGUAGE = English
|
||||
|
53
INSTALL
53
INSTALL
@@ -49,7 +49,7 @@ be standards compliant. HTML Purifier can deal with these doctypes:
|
||||
* XHTML 1.0 Strict
|
||||
* HTML 4.01 Transitional
|
||||
* HTML 4.01 Strict
|
||||
* XHTML 1.1 sans Ruby
|
||||
* XHTML 1.1 (sans Ruby)
|
||||
|
||||
...and these character encodings:
|
||||
|
||||
@@ -117,8 +117,9 @@ websites):
|
||||
Note that HTML Purifier's support for non-Unicode encodings is crippled by the
|
||||
fact that any character not supported by that encoding will be silently
|
||||
dropped, EVEN if it is ampersand escaped. If you want to work around
|
||||
this, you are welcome to read docs/enduser-utf8.html for a workaround,
|
||||
but please be cognizant of the issues the "solution" creates.
|
||||
this, you are welcome to read docs/enduser-utf8.html for a fix,
|
||||
but please be cognizant of the issues the "solution" creates (for this
|
||||
reason, I do not include the solution in this document).
|
||||
|
||||
|
||||
|
||||
@@ -148,7 +149,9 @@ Other supported doctypes include:
|
||||
There are more configuration directives which can be read about
|
||||
here: <http://htmlpurifier.org/live/configdoc/plain.html> They're a bit boring,
|
||||
but they can help out for those of you who like to exert maximum control over
|
||||
your code.
|
||||
your code. Some of the more interesting ones are configurable at the
|
||||
demo <http://htmlpurifier.org/demo.php> and are well worth looking into
|
||||
for your own system.
|
||||
|
||||
|
||||
|
||||
@@ -165,13 +168,15 @@ The interface is mind-numbingly simple:
|
||||
$clean_html = $purifier->purify( $dirty_html );
|
||||
|
||||
That's it! For more examples, check out docs/examples/ (they aren't very
|
||||
different though). Also, SLOW gives advice on what to do if HTML Purifier
|
||||
is slowing down your application.
|
||||
different though). Also, docs/enduser-slow.html gives advice on what to
|
||||
do if HTML Purifier is slowing down your application.
|
||||
|
||||
|
||||
|
||||
6. Quick install
|
||||
|
||||
First, make sure library/HTMLPurifier/DefinitionCache/Serializer is
|
||||
writable by the webserver (see Section 7: Caching below for details).
|
||||
If your website is in UTF-8 and XHTML Transitional, use this code:
|
||||
|
||||
<?php
|
||||
@@ -198,6 +203,36 @@ If your website is in a different encoding or doctype, use this code:
|
||||
|
||||
7. Caching
|
||||
|
||||
HTML Purifier generates some cache files to speed up its execution. For
|
||||
maximum performance, make sure that library/HTMLPurifier/DefinitionCache/Serializer
|
||||
is writeable by the webserver.
|
||||
HTML Purifier generates some cache files (generally one or two) to speed up
|
||||
its execution. For maximum performance, make sure that
|
||||
library/HTMLPurifier/DefinitionCache/Serializer is writeable by the webserver.
|
||||
|
||||
If you are in the library/ folder of HTML Purifier, you can set the
|
||||
appropriate permissions using:
|
||||
|
||||
chmod -R 0755 HTMLPurifier/DefinitionCache/Serializer
|
||||
|
||||
If the above command doesn't work, you may need to assign write permissions
|
||||
to all. This may be necessary if your webserver runs as nobody, but is
|
||||
not recommended since it means any other user can write files in the
|
||||
directory. Use:
|
||||
|
||||
chmod -R 0777 HTMLPurifier/DefinitionCache/Serializer
|
||||
|
||||
You can also chmod files via your FTP client; this option
|
||||
is usually accessible by right clicking the corresponding directory and
|
||||
then selecting "chmod" or "file permissions".
|
||||
|
||||
Starting with 2.0.1, HTML Purifier will generate friendly error messages
|
||||
that will tell you exactly what you have to chmod the directory to, if in doubt,
|
||||
follow its advice.
|
||||
|
||||
If you are unable or unwilling to give write permissions to the cache
|
||||
directory, you can either disable the cache (and suffer a performance
|
||||
hit):
|
||||
|
||||
$config->set('Core', 'DefinitionCache', null);
|
||||
|
||||
Or move the cache directory somewhere else (no trailing slash):
|
||||
|
||||
$config->set('Cache', 'SerializerPath', '/home/user/absolute/path');
|
||||
|
44
NEWS
44
NEWS
@@ -9,6 +9,50 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
. Internal change
|
||||
==========================
|
||||
|
||||
2.0.1, released 2007-06-27
|
||||
! Tag auto-closing now based on a ChildDef heuristic rather than a
|
||||
manually set auto_close array; some behavior may change
|
||||
! Experimental AutoFormat functionality added: auto-paragraph and
|
||||
linkify your HTML input by setting %AutoFormat.AutoParagraph and
|
||||
%AutoFormat.Linkify to true
|
||||
! Newlines normalized internally, and then converted back to the
|
||||
value of PHP_EOL. If this is not desired, set your newline format
|
||||
using %Output.Newline.
|
||||
! Beta error collection, messages are implemented for the most generic
|
||||
cases involving Lexing or Strategies
|
||||
- Clean up special case code for <script> tags
|
||||
- Reorder includes for DefinitionCache decorators, fixes a possible
|
||||
missing class error
|
||||
- Fixed bug where manually modified definitions were not saved via cache
|
||||
(mostly harmless, except for the fact that it would be a little slower)
|
||||
- Configuration objects with different serials do not clobber each
|
||||
others when revision numbers are unequal
|
||||
- Improve Serializer DefinitionCache directory permissions checks
|
||||
- DefinitionCache no longer throws errors when it encounters old
|
||||
serial files that do not conform to the current style
|
||||
- Stray xmlns attributes removed from configuration documentation
|
||||
- configForm.php smoketest no longer has XSS vulnerability due to
|
||||
unescaped print_r output
|
||||
- Printer adheres to configuration's directives on output format
|
||||
- Fix improperly named form field in ConfigForm printer
|
||||
. Rewire some test-cases to swallow errors rather than expect them
|
||||
. HTMLDefinition printer updated with some of the new attributes
|
||||
. DefinitionCache keys reordered to reflect precedence: version number,
|
||||
hash, then revision number
|
||||
. %Core.DefinitionCache renamed to %Cache.DefinitionImpl
|
||||
. Interlinking in configuration documentation added using
|
||||
Injector_PurifierLinkify
|
||||
. Directives now keep track of aliases to themselves
|
||||
. Error collector now requires a severity to be passed, use PHP's internal
|
||||
error constants for this
|
||||
. HTMLPurifier_Config::getAllowedDirectivesForForm implemented, allows
|
||||
much easier selective embedding of configuration values
|
||||
. Doctype objects now accept public and system DTD identifiers
|
||||
. %HTML.Doctype is now constrained by specific values, to specify a custom
|
||||
doctype use new %HTML.CustomDoctype
|
||||
. ConfigForm truncates long directives to keep the form small, and does
|
||||
not re-output namespaces
|
||||
|
||||
2.0.0, released 2007-06-20
|
||||
# Completely refactored HTMLModuleManager, decentralizing safety
|
||||
information
|
||||
|
36
TODO
36
TODO
@@ -7,31 +7,17 @@ TODO List
|
||||
==========================
|
||||
|
||||
2.1 release [Refactor, refactor!]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# URI validation routines tighter (see docs/dev-code-quality.html) (COMPLEX)
|
||||
# Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
|
||||
# Ruby support
|
||||
- Configuration profiles: predefined directives set with one func call
|
||||
- Implement IDREF support (harder than it seems, since you cannot have
|
||||
IDREFs to non-existent IDs)
|
||||
- Allow non-ASCII characters in font names
|
||||
- Genericize special cases in RemoveForeignElements
|
||||
|
||||
2.2 release [Error'ed]
|
||||
# Error logging for filtering/cleanup procedures
|
||||
- Requires I18N facilities to be created first (COMPLEX)
|
||||
- XSS-attempt detection
|
||||
- More fine-grained control over escaping behavior
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
2.3 release [Do What I Mean, Not What I Say]
|
||||
# Additional support for poorly written HTML
|
||||
@@ -56,14 +42,9 @@ TODO List
|
||||
AttrDef class)
|
||||
# More control over allowed CSS properties (maybe modularize it in the
|
||||
same fashion!)
|
||||
# Formatters for plaintext (COMPLEX)
|
||||
- Auto-paragraphing (be sure to leverage fact that we know when things
|
||||
shouldn't be paragraphed, such as lists and tables).
|
||||
- Linkify URLs
|
||||
# Formatters for plaintext
|
||||
- Smileys
|
||||
- Linkification for HTML Purifier docs: notably configuration and classes
|
||||
- Allow tags to be "armored", an internal flag that protects them
|
||||
from validation and passes them out unharmed
|
||||
- Standardize token armor for all areas of processing
|
||||
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
||||
- Automatically add non-breaking spaces to empty table cells when
|
||||
empty-cells:show is applied to have compatibility with Internet Explorer
|
||||
@@ -75,12 +56,13 @@ TODO List
|
||||
- Hooks for adding custom processors to custom namespaced tags and
|
||||
attributes, offer default implementation
|
||||
- Lots of documentation and samples
|
||||
- XHTML 1.1 support
|
||||
|
||||
Ongoing
|
||||
- Lots of profiling, make it faster!
|
||||
- Plugins for major CMSes (COMPLEX)
|
||||
- WordPress (mostly written, needs beta-testing)
|
||||
- phpBB
|
||||
- Phorum
|
||||
- eFiction
|
||||
- more! (look for ones that use WYSIWYGs)
|
||||
- Complete basic smoketests
|
||||
@@ -89,7 +71,8 @@ Unknown release (on a scratch-an-itch basis)
|
||||
? Semi-lossy dumb alternate character encoding transfor
|
||||
? Have 'lang' attribute be checked against official lists, achieved by
|
||||
encoding all characters that have string entity equivalents
|
||||
- Explain how to use HTML Purifier in non-PHP languages
|
||||
- Explain how to use HTML Purifier in non-PHP languages / create
|
||||
a simple command line stub
|
||||
- Abstract ChildDef_BlockQuote to work with all elements that only
|
||||
allow blocks in them, required or optional
|
||||
- Reorganize Unit Tests
|
||||
@@ -97,10 +80,11 @@ Unknown release (on a scratch-an-itch basis)
|
||||
- Reorganize configuration directives (Create more namespaces! Get messy!)
|
||||
|
||||
Requested
|
||||
? Native content compression, whitespace stripping (don't rely on Tidy, make
|
||||
sure we don't remove from <pre> or related tags)
|
||||
|
||||
Wontfix
|
||||
- Non-lossy smart alternate character encoding transformations (unless
|
||||
patch provided)
|
||||
- Pretty-printing HTML, users can use Tidy on the output on entire page
|
||||
- Native content compression, whitespace stripping (don't rely on Tidy, make
|
||||
sure we don't remove from <pre> or related tags): use gzip if this is
|
||||
really important
|
||||
|
15
WHATSNEW
15
WHATSNEW
@@ -1,7 +1,8 @@
|
||||
HTML Purifier 2.0 is the culmination of two major architectural changes.
|
||||
The first is Tidy, which enables HTML Purifier to both natively support
|
||||
deprecated elements and also convert them to standards-compliant
|
||||
alternatives. The second is the Advanced API, which enables users to
|
||||
create new elements and attributes with ease. Keeping in line with a
|
||||
commitment to high quality, there are also five esoteric bug-fixes and a
|
||||
plethora of subtle improvements that enhance the library.
|
||||
In version 2.1, HTML Purifier's URI validation and filtering handling
|
||||
system has been revamped with a new, extensible URIFilter system. Also
|
||||
notable features include preservation of emoticons in PHP5 with
|
||||
%Core.AggressivelyFixLt, standalone and lite download versions,
|
||||
transforming relative URIs to absolute URIs, Ruby in XHTML 1.1, a Phorum
|
||||
mod, and UTF-8 font names. Notable bug-fixes include refinement of
|
||||
the auto-paragraphing algorithm (no longer experimental), better XHTML
|
||||
1.1 support and the removal of the contents of <style> elements.
|
||||
|
BIN
art/100cases.png
Normal file
BIN
art/100cases.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.7 KiB |
@@ -15,5 +15,3 @@ $context = new HTMLPurifier_Context();
|
||||
for ($i = 0; $i < 10; $i++) {
|
||||
$tokens = $lexer->tokenizeHTML($input, $config, $context);
|
||||
}
|
||||
|
||||
?>
|
@@ -18,10 +18,16 @@ TODO:
|
||||
if (version_compare('5', PHP_VERSION, '>')) exit('Requires PHP 5 or higher.');
|
||||
error_reporting(E_ALL); // probably not possible to use E_STRICT
|
||||
|
||||
define('HTMLPURIFIER_SCHEMA_STRICT', true); // description data needs to be collected
|
||||
|
||||
// load dual-libraries
|
||||
require_once '../library/HTMLPurifier.auto.php';
|
||||
require_once 'library/ConfigDoc.auto.php';
|
||||
|
||||
$purifier = HTMLPurifier::getInstance(array(
|
||||
'AutoFormat.PurifierLinkify' => true
|
||||
));
|
||||
|
||||
$schema = HTMLPurifier_ConfigSchema::instance();
|
||||
$style = 'plain'; // use $_GET in the future
|
||||
$configdoc = new ConfigDoc();
|
||||
@@ -37,4 +43,3 @@ if (php_sapi_name() != 'cli') {
|
||||
echo 'Files generated successfully.';
|
||||
}
|
||||
|
||||
?>
|
@@ -7,4 +7,3 @@
|
||||
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
|
||||
require_once 'ConfigDoc.php';
|
||||
|
||||
?>
|
@@ -36,4 +36,3 @@ class ConfigDoc
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -36,6 +36,7 @@ class ConfigDoc_HTMLXSLTProcessor
|
||||
// fudges for HTML backwards compatibility
|
||||
$out = str_replace('/>', ' />', $out); // <br /> not <br/>
|
||||
$out = str_replace(' xmlns=""', '', $out); // rm unnecessary xmlns
|
||||
$out = str_replace(' xmlns="http://www.w3.org/1999/xhtml"', '', $out); // rm unnecessary xmlns
|
||||
if (class_exists('Tidy')) {
|
||||
// cleanup output
|
||||
$config = array(
|
||||
@@ -59,4 +60,3 @@ class ConfigDoc_HTMLXSLTProcessor
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -23,4 +23,3 @@ class ConfigDoc_XMLSerializer
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -50,6 +50,12 @@ class ConfigDoc_XMLSerializer_ConfigSchema extends ConfigDoc_XMLSerializer
|
||||
$dom_document->createElement('name', $name)
|
||||
);
|
||||
|
||||
$dom_aliases = $dom_document->createElement('aliases');
|
||||
$dom_directive->appendChild($dom_aliases);
|
||||
foreach ($info->directiveAliases as $alias) {
|
||||
$dom_aliases->appendChild($dom_document->createElement('alias', $alias));
|
||||
}
|
||||
|
||||
$dom_constraints = $dom_document->createElement('constraints');
|
||||
$dom_directive->appendChild($dom_constraints);
|
||||
|
||||
@@ -115,4 +121,3 @@ class ConfigDoc_XMLSerializer_ConfigSchema extends ConfigDoc_XMLSerializer
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -24,4 +24,3 @@ class ConfigDoc_XMLSerializer_Types extends ConfigDoc_XMLSerializer
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -72,8 +72,16 @@
|
||||
<xsl:apply-templates />
|
||||
</xsl:template>
|
||||
<xsl:template match="directive/name">
|
||||
<xsl:apply-templates select="../aliases/alias" mode="anchor" />
|
||||
<h3 id="{../@id}"><xsl:value-of select="../@id" /></h3>
|
||||
</xsl:template>
|
||||
<xsl:template match="alias" mode="anchor">
|
||||
<a id="{.}"></a>
|
||||
</xsl:template>
|
||||
|
||||
<!-- Do not pass through -->
|
||||
<xsl:template match="alias"></xsl:template>
|
||||
|
||||
<xsl:template match="directive/constraints">
|
||||
<table class="constraints">
|
||||
<xsl:apply-templates />
|
||||
@@ -89,8 +97,20 @@
|
||||
</td>
|
||||
</tr>
|
||||
</xsl:if>
|
||||
<xsl:if test="../aliases/alias">
|
||||
<xsl:apply-templates select="../aliases" mode="constraints" />
|
||||
</xsl:if>
|
||||
</table>
|
||||
</xsl:template>
|
||||
<xsl:template match="directive/aliases" mode="constraints">
|
||||
<th>Aliases:</th>
|
||||
<td>
|
||||
<xsl:for-each select="alias">
|
||||
<xsl:if test="position()>1">, </xsl:if>
|
||||
<xsl:value-of select="." />
|
||||
</xsl:for-each>
|
||||
</td>
|
||||
</xsl:template>
|
||||
<xsl:template match="directive//description">
|
||||
<div class="description">
|
||||
<xsl:copy-of select="div/node()" />
|
||||
|
@@ -11,8 +11,7 @@ docs/examples/demo.php - ad hoc HTML/PHP soup to the extreme
|
||||
|
||||
AttrDef - a lot of duplication, more generic classes need to be created;
|
||||
a lot of strtolower() calls, no legit casing
|
||||
Class - doesn't support Unicode characters (fringe); uses regular
|
||||
expressions
|
||||
Class - doesn't support Unicode characters (fringe); uses regular expressions
|
||||
Lang - code duplication; premature optimization
|
||||
Length - easily mistaken for CSSLength
|
||||
URI - multiple regular expressions; missing validation for parts (?)
|
||||
@@ -22,9 +21,6 @@ ConfigSchema - redefinition is a mess
|
||||
Strategy
|
||||
FixNesting - cannot bubble nodes out of structures, duplicated checks
|
||||
for special-case parent node
|
||||
MakeWellFormed - insufficient automatic closing definitions (check HTML
|
||||
spec for optional end tags, also, closing based on type (block/inline)
|
||||
might be efficient).
|
||||
RemoveForeignElements - should be run in parallel with MakeWellFormed
|
||||
URIScheme - needs to have callable generic checks
|
||||
mailto - doesn't validate emails, doesn't validate querystring
|
||||
|
@@ -17,11 +17,6 @@
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<div id="applicability">
|
||||
This document covers currently unreleased functionality and
|
||||
only applies to recent SVN checkouts.
|
||||
</div>
|
||||
|
||||
<p>
|
||||
You may have heard of the <a href="dev-advanced-api.html">Advanced API</a>.
|
||||
If you're interested in reading dry prose and boring functional
|
||||
|
@@ -10,9 +10,7 @@ to be effective. Things to remember:
|
||||
|
||||
2. IDs: see enduser-id.html for more info
|
||||
|
||||
3. Links: document pending feature completion
|
||||
Rudimentary blacklisting, we should also allow only relative URIs. We
|
||||
need a doc to explain the stuff.
|
||||
3. URIs: see enduser-uri-filter.html
|
||||
|
||||
4. CSS: document pending
|
||||
Explain which CSS styles we blocked and why.
|
||||
|
@@ -16,11 +16,6 @@
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<div id="applicability">
|
||||
This document covers currently unreleased functionality and
|
||||
only applies to recent SVN checkouts.
|
||||
</div>
|
||||
|
||||
<p>You've probably heard of HTML Tidy, Dave Raggett's little piece
|
||||
of software that cleans up poorly written HTML. Let me say it straight
|
||||
out:</p>
|
||||
|
201
docs/enduser-uri-filter.html
Normal file
201
docs/enduser-uri-filter.html
Normal file
@@ -0,0 +1,201 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Tutorial for creating custom URI filters." />
|
||||
<link rel="stylesheet" type="text/css" href="style.css" />
|
||||
|
||||
<title>URI Filters - HTML Purifier</title>
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1>URI Filters</h1>
|
||||
|
||||
<div id="filing">Filed under End-User</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>
|
||||
This is a quick and dirty document to get you on your way to writing
|
||||
custom URI filters for your own URL filtering needs. Why would you
|
||||
want to write a URI filter? If you need URIs your users put into
|
||||
HTML to magically change into a different URI, this is
|
||||
exactly what you need!
|
||||
</p>
|
||||
|
||||
<h2>Creating the class</h2>
|
||||
|
||||
<p>
|
||||
Any URI filter you make will be a subclass of <code>HTMLPurifier_URIFilter</code>.
|
||||
The scaffolding is thus:
|
||||
</p>
|
||||
|
||||
<pre>class HTMLPurifier_URIFilter_<strong>NameOfFilter</strong> extends HTMLPurifier_URIFilter
|
||||
{
|
||||
var $name = '<strong>NameOfFilter</strong>';
|
||||
function prepare($config) {}
|
||||
function filter(&$uri, $config, &$context) {}
|
||||
}</pre>
|
||||
|
||||
<p>
|
||||
Fill in the variable <code>$name</code> with the name of your filter, and
|
||||
take a look at the two methods. <code>prepare()</code> is an initialization
|
||||
method that is called only once, before any filtering has been done of the
|
||||
HTML. Use it to perform any costly setup work that only needs to be done
|
||||
once. <code>filter()</code> is the guts and innards of our filter:
|
||||
it takes the URI and does whatever needs to be done to it.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
If you've worked with HTML Purifier, you'll recognize the <code>$config</code>
|
||||
and <code>$context</code> parameters. On the other hand, <code>$uri</code>
|
||||
is something unique to this section of the application: it's a
|
||||
<code>HTMLPurifier_URI</code> object. The interface is thus:
|
||||
</p>
|
||||
|
||||
<pre>class HTMLPurifier_URI
|
||||
{
|
||||
var $scheme, $userinfo, $host, $port, $path, $query, $fragment;
|
||||
function HTMLPurifier_URI($scheme, $userinfo, $host, $port, $path, $query, $fragment);
|
||||
function toString();
|
||||
function copy();
|
||||
function getSchemeObj($config, &$context);
|
||||
function validate($config, &$context);
|
||||
}</pre>
|
||||
|
||||
<p>
|
||||
The first three methods are fairly self-explanatory: you have a constructor,
|
||||
a serializer, and a cloner. Generally, you won't be using them when
|
||||
you are manipulating the URI objects themselves.
|
||||
<code>getSchemeObj()</code> is a special purpose method that returns
|
||||
a <code>HTMLPurifier_URIScheme</code> object corresponding to the specific
|
||||
URI at hand. <code>validate()</code> performs general-purpose validation
|
||||
on the internal components of a URI. Once again, you don't need to
|
||||
worry about these: they've already been handled for you.
|
||||
</p>
|
||||
|
||||
<h2>URI format</h2>
|
||||
|
||||
<p>
|
||||
As a URIFilter, we're interested in the member variables of the URI object.
|
||||
</p>
|
||||
|
||||
<table class="quick"><tbody>
|
||||
<tr><th>Scheme</th> <td>The protocol for identifying (and possibly locating) a resource (http, ftp, https)</td></tr>
|
||||
<tr><th>Userinfo</th> <td>User information such as a username (bob)</td></tr>
|
||||
<tr><th>Host</th> <td>Domain name or IP address of the server (example.com, 127.0.0.1)</td></tr>
|
||||
<tr><th>Port</th> <td>Network port number for the server (80, 12345)</td></tr>
|
||||
<tr><th>Path</th> <td>Data that identifies the resource, possibly hierarchical (/path/to, ed@example.com)</td></tr>
|
||||
<tr><th>Query</th> <td>String of information to be interpreted by the resource (?q=search-term)</td></tr>
|
||||
<tr><th>Fragment</th> <td>Additional information for the resource after retrieval (#bookmark)</td></tr>
|
||||
</tbody></table>
|
||||
|
||||
<p>
|
||||
Because the URI is presented to us in this form, and not
|
||||
<code>http://bob@example.com:8080/foo.php?q=string#hash</code>, it saves us
|
||||
a lot of trouble in having to parse the URI every time we want to filter
|
||||
it. For the record, the above URI has the following components:
|
||||
</p>
|
||||
|
||||
<table class="quick"><tbody>
|
||||
<tr><th>Scheme</th> <td>http</td></tr>
|
||||
<tr><th>Userinfo</th> <td>bob</td></tr>
|
||||
<tr><th>Host</th> <td>example.com</td></tr>
|
||||
<tr><th>Port</th> <td>8080</td></tr>
|
||||
<tr><th>Path</th> <td>/foo.php</td></tr>
|
||||
<tr><th>Query</th> <td>q=string</td></tr>
|
||||
<tr><th>Fragment</th> <td>hash</td></tr>
|
||||
</tbody></table>
|
||||
|
||||
<p>
|
||||
Note that there is no question mark or octothorpe in the query or
|
||||
fragment: these get removed during parsing.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
With this information, you can get straight to implementing your
|
||||
<code>filter()</code> method. But one more thing...
|
||||
</p>
|
||||
|
||||
<h2>Return value: Boolean, not URI</h2>
|
||||
|
||||
<p>
|
||||
You may have noticed that the URI is being passed in by reference.
|
||||
This means that whatever changes you make to it, those changes will
|
||||
be reflected in the URI object the callee had. <strong>Do not
|
||||
return the URI object: it is unnecessary and will cause bugs.</strong>
|
||||
Instead, return a boolean value, true if the filtering was successful,
|
||||
or false if the URI is beyond repair and needs to be axed.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Let's suppose I wanted to write a filter that de-internationalized domain
|
||||
names by converting them to <a href="http://en.wikipedia.org/wiki/Punycode">Punycode</a>.
|
||||
Assuming that <code>punycode_encode($input)</code> converts <code>$input</code> to
|
||||
Punycode and returns <code>false</code> on failure:
|
||||
</p>
|
||||
|
||||
<pre>class HTMLPurifier_URIFilter_ConvertIDNToPunycode extends HTMLPurifier_URIFilter
|
||||
{
|
||||
var $name = 'ConvertIDNToPunycode';
|
||||
function filter(&$uri, $config, &$context) {
|
||||
if (is_null($uri->host)) return true;
|
||||
if ($uri->host == utf8_decode($uri->host)) {
|
||||
// is ASCII, abort
|
||||
return true;
|
||||
}
|
||||
$host = punycode_encode($uri->host);
|
||||
if ($host === false) return false;
|
||||
$uri->host = $host;
|
||||
return true;
|
||||
}
|
||||
}</pre>
|
||||
|
||||
<p>
|
||||
Notice I did not <code>return $uri;</code>.
|
||||
</p>
|
||||
|
||||
<h2>Activating your filter</h2>
|
||||
|
||||
<p>
|
||||
Having a filter is all well and good, but you need to tell HTML Purifier
|
||||
to use it. Fortunately, this part's simple:
|
||||
</p>
|
||||
|
||||
<pre>$uri =& $config->getDefinition('URI');
|
||||
$uri->addFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>());</pre>
|
||||
|
||||
<p>
|
||||
If you want to be really fancy, you can define a configuration directive
|
||||
for your filter and have HTML Purifier automatically manage whether or
|
||||
not your filter gets loaded or not (this is how internal filters manage
|
||||
things):
|
||||
</p>
|
||||
|
||||
<pre>HTMLPurifier_ConfigSchema::define(
|
||||
'URI', '<strong>NameOfFilter</strong>', false, 'bool',
|
||||
'<strong>What your filter does.</strong>'
|
||||
);
|
||||
$uri =& $config->getDefinition('URI', true);
|
||||
$uri->registerFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>());
|
||||
</pre>
|
||||
|
||||
<p>
|
||||
Now, your filter will only be called when %URI.<strong>NameOfFilter</strong>
|
||||
is set to true.
|
||||
</p>
|
||||
|
||||
<h2>Examples</h2>
|
||||
|
||||
<p>
|
||||
Check the
|
||||
<a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/URIFilter/">URIFilter</a>
|
||||
directory for more implementation examples, and see <a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/docs/proposal-new-directives.txt">the
|
||||
new directives proposal document</a> for ideas on what could be implemented
|
||||
as a filter.
|
||||
</p>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
@@ -20,4 +20,3 @@ $pure_html = $purifier->purify($html);
|
||||
|
||||
echo '<pre>' . htmlspecialchars($pure_html) . '</pre>';
|
||||
|
||||
?>
|
@@ -40,6 +40,9 @@ information for casual developers using HTML Purifier.</p>
|
||||
<dt><a href="enduser-customize.html">Customize</a></dt>
|
||||
<dd>Tutorial for customizing HTML Purifier's tag and attribute sets.</dd>
|
||||
|
||||
<dt><a href="enduser-uri-filter.html">URI Filters</a></dt>
|
||||
<dd>Tutorial for creating custom URI filters.</dd>
|
||||
|
||||
</dl>
|
||||
|
||||
<h2>Development</h2>
|
||||
|
@@ -32,7 +32,7 @@ Here are some fuzzy levels you could set:
|
||||
|
||||
One final note: when you start axing tags that are more commonly used, you
|
||||
run the risk of accidentally destroying user data, especially if the data
|
||||
is incoming from a WYSIWYG eidtor that hasn't been synced accordingly. This may
|
||||
is incoming from a WYSIWYG editor that hasn't been synced accordingly. This may
|
||||
make forbidden element to text transformations desirable (for example, images).
|
||||
|
||||
|
||||
|
@@ -2,7 +2,8 @@
|
||||
Configuration Ideas
|
||||
|
||||
Here are some theoretical configuration ideas that we could implement some
|
||||
time. Note the naming convention: %Namespace.Directive
|
||||
time. Note the naming convention: %Namespace.Directive. If you want one
|
||||
implemented, give us a ring, and we'll move it up the priority chain.
|
||||
|
||||
%Attr.RewriteFragments - if there's %Attr.IDPrefix we may want to transparently
|
||||
rewrite the URLs we parse too. However, we can only do it when it's a pure
|
||||
@@ -22,8 +23,6 @@ time. Note the naming convention: %Namespace.Directive
|
||||
%URI.AddRelNofollow - will add rel="nofollow" to all links, preventing the
|
||||
spread of ill-gotten pagerank
|
||||
|
||||
%URI.RelativeToAbsolute - transforms all relative URIs to absolute form
|
||||
|
||||
%URI.HostBlacklistRegex - regexes that if matching the host are disallowed
|
||||
%URI.HostWhitelist - domain names that are excluded from the host blacklist
|
||||
%URI.HostPolicy - determines whether or not its reject all and then whitelist
|
||||
|
@@ -33,6 +33,9 @@ blockquote .label {font-weight:bold; font-size:1em; margin:0 0 .1em;
|
||||
.table thead th:first-child {-moz-border-radius-topleft:1em;}
|
||||
.table tbody td {border-bottom:1px solid #CCC; padding-right:0.6em;padding-left:0.6em;}
|
||||
|
||||
/* A quick table*/
|
||||
table.quick tbody th {text-align:right; padding-right:1em;}
|
||||
|
||||
/* Category of the file */
|
||||
#filing {font-weight:bold; font-size:smaller; }
|
||||
|
||||
|
@@ -7,4 +7,3 @@
|
||||
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
|
||||
require_once 'HTMLPurifier.php';
|
||||
|
||||
?>
|
@@ -18,4 +18,3 @@ function HTMLPurifier($html, $config = null) {
|
||||
return $purifier->purify($html, $config);
|
||||
}
|
||||
|
||||
?>
|
@@ -22,7 +22,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
HTML Purifier 2.0.0 - Standards Compliant HTML Filtering
|
||||
HTML Purifier 2.0.1 - Standards Compliant HTML Filtering
|
||||
Copyright (C) 2006 Edward Z. Yang
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
@@ -51,16 +51,9 @@ require_once 'HTMLPurifier/Generator.php';
|
||||
require_once 'HTMLPurifier/Strategy/Core.php';
|
||||
require_once 'HTMLPurifier/Encoder.php';
|
||||
|
||||
require_once 'HTMLPurifier/ErrorCollector.php';
|
||||
require_once 'HTMLPurifier/LanguageFactory.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'Language', 'en', 'string', '
|
||||
ISO 639 language code for localizable things in HTML Purifier to use,
|
||||
which is mainly error reporting. There is currently only an English (en)
|
||||
translation, so this directive is currently useless.
|
||||
This directive has been available since 2.0.0.
|
||||
');
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'CollectErrors', false, 'bool', '
|
||||
Whether or not to collect errors found while filtering the document. This
|
||||
@@ -81,7 +74,7 @@ This directive has been available since 2.0.0.
|
||||
class HTMLPurifier
|
||||
{
|
||||
|
||||
var $version = '2.0.0';
|
||||
var $version = '2.0.1';
|
||||
|
||||
var $config;
|
||||
var $filters;
|
||||
@@ -139,15 +132,19 @@ class HTMLPurifier
|
||||
|
||||
$context = new HTMLPurifier_Context();
|
||||
|
||||
// our friendly neighborhood generator, all primed with configuration too!
|
||||
$this->generator->generateFromTokens(array(), $config, $context);
|
||||
$context->register('Generator', $this->generator);
|
||||
|
||||
// set up global context variables
|
||||
if ($config->get('Core', 'CollectErrors')) {
|
||||
// may get moved out if other facilities use it
|
||||
$language_factory = HTMLPurifier_LanguageFactory::instance();
|
||||
$language = $language_factory->create($config->get('Core', 'Language'));
|
||||
$language = $language_factory->create($config, $context);
|
||||
$context->register('Locale', $language);
|
||||
|
||||
$error_collector = new HTMLPurifier_ErrorCollector();
|
||||
$context->register('ErrorCollector', $language);
|
||||
$error_collector = new HTMLPurifier_ErrorCollector($context);
|
||||
$context->register('ErrorCollector', $error_collector);
|
||||
}
|
||||
|
||||
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
|
||||
@@ -216,4 +213,3 @@ class HTMLPurifier
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -127,4 +127,3 @@ class HTMLPurifier_AttrCollections
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -84,4 +84,3 @@ class HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -66,4 +66,3 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -84,4 +84,3 @@ class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -127,4 +127,3 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -42,4 +42,3 @@ class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -98,4 +98,3 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -35,4 +35,3 @@ class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -148,4 +148,3 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -59,4 +59,3 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -53,4 +53,3 @@ class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -77,4 +77,3 @@ class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -55,4 +55,3 @@ class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -58,4 +58,3 @@ class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -40,4 +40,3 @@ class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -34,4 +34,3 @@ class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -55,4 +55,3 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -63,4 +63,3 @@ class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -27,4 +27,3 @@ class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -32,4 +32,3 @@ class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -31,4 +31,3 @@ class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -118,4 +118,3 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -41,4 +41,3 @@ class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -70,4 +70,3 @@ class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -41,4 +41,3 @@ class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Le
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -48,4 +48,3 @@ class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -34,4 +34,3 @@ class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -72,4 +72,3 @@ class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -72,4 +72,3 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -14,4 +14,3 @@ class HTMLPurifier_AttrDef_Text extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -302,4 +302,4 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
||||
|
||||
|
@@ -14,4 +14,3 @@ class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -20,4 +20,3 @@ class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_UR
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -51,4 +51,3 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -39,4 +39,3 @@ class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -98,4 +98,3 @@ class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -55,4 +55,3 @@ class HTMLPurifier_AttrTransform
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -28,4 +28,3 @@ class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -23,4 +23,3 @@ extends HTMLPurifier_AttrTransform {
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -36,4 +36,3 @@ extends HTMLPurifier_AttrTransform {
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -17,4 +17,3 @@ class HTMLPurifier_AttrTransform_Border extends HTMLPurifier_AttrTransform {
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -57,4 +57,3 @@ class HTMLPurifier_AttrTransform_EnumToCSS extends HTMLPurifier_AttrTransform {
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -51,4 +51,3 @@ class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -44,4 +44,3 @@ extends HTMLPurifier_AttrTransform {
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -27,4 +27,3 @@ class HTMLPurifier_AttrTransform_Lang extends HTMLPurifier_AttrTransform
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -26,4 +26,3 @@ class HTMLPurifier_AttrTransform_Length extends HTMLPurifier_AttrTransform
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -18,4 +18,3 @@ class HTMLPurifier_AttrTransform_Name extends HTMLPurifier_AttrTransform
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -79,4 +79,4 @@ class HTMLPurifier_AttrTypes
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
||||
|
||||
|
@@ -1,32 +1,53 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Validates the attributes of a token. Doesn't manage required attributes
|
||||
* very well. The only reason we factored this out was because RemoveForeignElements
|
||||
* also needed it besides ValidateAttributes.
|
||||
*/
|
||||
class HTMLPurifier_AttrValidator
|
||||
{
|
||||
|
||||
|
||||
function validateToken($token, &$config, &$context) {
|
||||
/**
|
||||
* Validates the attributes of a token, returning a modified token
|
||||
* that has valid tokens
|
||||
* @param $token Reference to token to validate. We require a reference
|
||||
* because the operation this class performs on the token are
|
||||
* not atomic, so the context CurrentToken to be updated
|
||||
* throughout
|
||||
* @param $config Instance of HTMLPurifier_Config
|
||||
* @param $context Instance of HTMLPurifier_Context
|
||||
*/
|
||||
function validateToken(&$token, &$config, &$context) {
|
||||
|
||||
$definition = $config->getHTMLDefinition();
|
||||
$e =& $context->get('ErrorCollector', true);
|
||||
|
||||
// initialize CurrentToken if necessary
|
||||
$current_token =& $context->get('CurrentToken', true);
|
||||
if (!$current_token) $context->register('CurrentToken', $token);
|
||||
|
||||
if ($token->type !== 'start' && $token->type !== 'empty') return $token;
|
||||
|
||||
// create alias to global definition array, see also $defs
|
||||
// DEFINITION CALL
|
||||
$d_defs = $definition->info_global_attr;
|
||||
|
||||
// copy out attributes for easy manipulation
|
||||
$attr = $token->attr;
|
||||
// reference attributes for easy manipulation
|
||||
$attr =& $token->attr;
|
||||
|
||||
// do global transformations (pre)
|
||||
// nothing currently utilizes this
|
||||
foreach ($definition->info_attr_transform_pre as $transform) {
|
||||
$attr = $transform->transform($attr, $config, $context);
|
||||
$attr = $transform->transform($o = $attr, $config, $context);
|
||||
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
|
||||
}
|
||||
|
||||
// do local transformations only applicable to this element (pre)
|
||||
// ex. <p align="right"> to <p style="text-align:right;">
|
||||
foreach ($definition->info[$token->name]->attr_transform_pre
|
||||
as $transform
|
||||
) {
|
||||
$attr = $transform->transform($attr, $config, $context);
|
||||
foreach ($definition->info[$token->name]->attr_transform_pre as $transform) {
|
||||
$attr = $transform->transform($o = $attr, $config, $context);
|
||||
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
|
||||
}
|
||||
|
||||
// create alias to this element's attribute definition array, see
|
||||
@@ -34,6 +55,9 @@ class HTMLPurifier_AttrValidator
|
||||
// DEFINITION CALL
|
||||
$defs = $definition->info[$token->name]->attr;
|
||||
|
||||
$attr_key = false;
|
||||
$context->register('CurrentAttr', $attr_key);
|
||||
|
||||
// iterate through all the attribute keypairs
|
||||
// Watch out for name collisions: $key has previously been used
|
||||
foreach ($attr as $attr_key => $value) {
|
||||
@@ -67,9 +91,17 @@ class HTMLPurifier_AttrValidator
|
||||
|
||||
// put the results into effect
|
||||
if ($result === false || $result === null) {
|
||||
// this is a generic error message that should replaced
|
||||
// with more specific ones when possible
|
||||
if ($e) $e->send(E_ERROR, 'AttrValidator: Attribute removed');
|
||||
|
||||
// remove the attribute
|
||||
unset($attr[$attr_key]);
|
||||
} elseif (is_string($result)) {
|
||||
// generally, if a substitution is happening, there
|
||||
// was some sort of implicit correction going on. We'll
|
||||
// delegate it to the attribute classes to say exactly what.
|
||||
|
||||
// simple substitution
|
||||
$attr[$attr_key] = $result;
|
||||
}
|
||||
@@ -81,25 +113,27 @@ class HTMLPurifier_AttrValidator
|
||||
// others would prepend themselves).
|
||||
}
|
||||
|
||||
$context->destroy('CurrentAttr');
|
||||
|
||||
// post transforms
|
||||
|
||||
// ex. <x lang="fr"> to <x lang="fr" xml:lang="fr">
|
||||
// global (error reporting untested)
|
||||
foreach ($definition->info_attr_transform_post as $transform) {
|
||||
$attr = $transform->transform($attr, $config, $context);
|
||||
$attr = $transform->transform($o = $attr, $config, $context);
|
||||
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
|
||||
}
|
||||
|
||||
// ex. <bdo> to <bdo dir="ltr">
|
||||
// local (error reporting untested)
|
||||
foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
|
||||
$attr = $transform->transform($attr, $config, $context);
|
||||
$attr = $transform->transform($o = $attr, $config, $context);
|
||||
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
|
||||
}
|
||||
|
||||
// commit changes
|
||||
$token->attr = $attr;
|
||||
return $token;
|
||||
// destroy CurrentToken if we made it ourselves
|
||||
if (!$current_token) $context->destroy('CurrentToken');
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -226,4 +226,3 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -36,6 +36,11 @@ class HTMLPurifier_ChildDef
|
||||
*/
|
||||
var $allow_empty;
|
||||
|
||||
/**
|
||||
* Lookup array of all elements that this definition could possibly allow
|
||||
*/
|
||||
var $elements = array();
|
||||
|
||||
/**
|
||||
* Validates nodes according to definition and returns modification.
|
||||
*
|
||||
@@ -52,4 +57,4 @@ class HTMLPurifier_ChildDef
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
||||
|
||||
|
@@ -35,6 +35,7 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
|
||||
function HTMLPurifier_ChildDef_Chameleon($inline, $block) {
|
||||
$this->inline = new HTMLPurifier_ChildDef_Optional($inline);
|
||||
$this->block = new HTMLPurifier_ChildDef_Optional($block);
|
||||
$this->elements = $this->block->elements;
|
||||
}
|
||||
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
@@ -48,4 +49,3 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@@ -44,6 +44,12 @@ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
|
||||
// COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M
|
||||
// DOING! Seriously: if there's problems, please report them.
|
||||
|
||||
// collect all elements into the $elements array
|
||||
preg_match_all("/$el/", $reg, $matches);
|
||||
foreach ($matches[0] as $match) {
|
||||
$this->elements[$match] = true;
|
||||
}
|
||||
|
||||
// setup all elements as parentheticals with leading commas
|
||||
$reg = preg_replace("/$el/", '(,\\0)', $reg);
|
||||
|
||||
@@ -85,4 +91,3 @@ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@@ -19,4 +19,3 @@ class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@@ -20,4 +20,3 @@ class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@@ -25,7 +25,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
|
||||
$elements = array_flip($elements);
|
||||
foreach ($elements as $i => $x) {
|
||||
$elements[$i] = true;
|
||||
if (empty($i)) unset($elements[$i]);
|
||||
if (empty($i)) unset($elements[$i]); // remove blank
|
||||
}
|
||||
}
|
||||
$this->elements = $elements;
|
||||
@@ -109,4 +109,3 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@@ -73,4 +73,3 @@ extends HTMLPurifier_ChildDef_Required
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@@ -9,6 +9,8 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $allow_empty = false;
|
||||
var $type = 'table';
|
||||
var $elements = array('tr' => true, 'tbody' => true, 'thead' => true,
|
||||
'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true);
|
||||
function HTMLPurifier_ChildDef_Table() {}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
if (empty($tokens_of_children)) return false;
|
||||
@@ -139,4 +141,3 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@@ -41,7 +41,7 @@ class HTMLPurifier_Config
|
||||
/**
|
||||
* HTML Purifier's version
|
||||
*/
|
||||
var $version = '2.0.0';
|
||||
var $version = '2.0.1';
|
||||
|
||||
/**
|
||||
* Two-level associative array of configuration directives
|
||||
@@ -152,11 +152,15 @@ class HTMLPurifier_Config
|
||||
/**
|
||||
* Returns a md5 signature of a segment of the configuration object
|
||||
* that uniquely identifies that particular configuration
|
||||
* @note Revision is handled specially and is removed from the batch
|
||||
* before processing!
|
||||
* @param $namespace Namespace to get serial for
|
||||
*/
|
||||
function getBatchSerial($namespace) {
|
||||
if (empty($this->serials[$namespace])) {
|
||||
$this->serials[$namespace] = md5(serialize($this->getBatch($namespace)));
|
||||
$batch = $this->getBatch($namespace);
|
||||
unset($batch['DefinitionRev']);
|
||||
$this->serials[$namespace] = md5(serialize($batch));
|
||||
}
|
||||
return $this->serials[$namespace];
|
||||
}
|
||||
@@ -243,14 +247,16 @@ class HTMLPurifier_Config
|
||||
* called before it's been setup, otherwise won't work.
|
||||
*/
|
||||
function &getHTMLDefinition($raw = false) {
|
||||
return $this->getDefinition('HTML', $raw);
|
||||
$def =& $this->getDefinition('HTML', $raw);
|
||||
return $def; // prevent PHP 4.4.0 from complaining
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves reference to the CSS definition
|
||||
*/
|
||||
function &getCSSDefinition($raw = false) {
|
||||
return $this->getDefinition('CSS', $raw);
|
||||
$def =& $this->getDefinition('CSS', $raw);
|
||||
return $def;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -267,6 +273,7 @@ class HTMLPurifier_Config
|
||||
if (!empty($this->definitions[$type])) {
|
||||
if (!$this->definitions[$type]->setup) {
|
||||
$this->definitions[$type]->setup($this);
|
||||
$cache->set($this->definitions[$type], $this);
|
||||
}
|
||||
return $this->definitions[$type];
|
||||
}
|
||||
@@ -298,7 +305,7 @@ class HTMLPurifier_Config
|
||||
if (is_null($this->get($type, 'DefinitionID'))) {
|
||||
// fatally error out if definition ID not set
|
||||
trigger_error("Cannot retrieve raw version without specifying %$type.DefinitionID", E_USER_ERROR);
|
||||
$false = false;
|
||||
$false = new HTMLPurifier_Error();
|
||||
return $false;
|
||||
}
|
||||
return $this->definitions[$type];
|
||||
@@ -333,25 +340,78 @@ class HTMLPurifier_Config
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of array(namespace, directive) for all directives
|
||||
* that are allowed in a web-form context as per an allowed
|
||||
* namespaces/directives list.
|
||||
* @param $allowed List of allowed namespaces/directives
|
||||
* @static
|
||||
*/
|
||||
static function getAllowedDirectivesForForm($allowed) {
|
||||
$schema = HTMLPurifier_ConfigSchema::instance();
|
||||
if ($allowed !== true) {
|
||||
if (is_string($allowed)) $allowed = array($allowed);
|
||||
$allowed_ns = array();
|
||||
$allowed_directives = array();
|
||||
$blacklisted_directives = array();
|
||||
foreach ($allowed as $ns_or_directive) {
|
||||
if (strpos($ns_or_directive, '.') !== false) {
|
||||
// directive
|
||||
if ($ns_or_directive[0] == '-') {
|
||||
$blacklisted_directives[substr($ns_or_directive, 1)] = true;
|
||||
} else {
|
||||
$allowed_directives[$ns_or_directive] = true;
|
||||
}
|
||||
} else {
|
||||
// namespace
|
||||
$allowed_ns[$ns_or_directive] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
$ret = array();
|
||||
foreach ($schema->info as $ns => $keypairs) {
|
||||
foreach ($keypairs as $directive => $def) {
|
||||
if ($allowed !== true) {
|
||||
if (isset($blacklisted_directives["$ns.$directive"])) continue;
|
||||
if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue;
|
||||
}
|
||||
if ($def->class == 'alias') continue;
|
||||
if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue;
|
||||
$ret[] = array($ns, $directive);
|
||||
}
|
||||
}
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads configuration values from $_GET/$_POST that were posted
|
||||
* via ConfigForm
|
||||
* @param $array $_GET or $_POST array to import
|
||||
* @param $index Index/name that the config variables are in
|
||||
* @param $allowed List of allowed namespaces/directives
|
||||
* @param $mq_fix Boolean whether or not to enable magic quotes fix
|
||||
* @static
|
||||
*/
|
||||
function loadArrayFromForm($array, $index, $mq_fix = true) {
|
||||
static function loadArrayFromForm($array, $index, $allowed = true, $mq_fix = true) {
|
||||
$array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array();
|
||||
$mq = get_magic_quotes_gpc() && $mq_fix;
|
||||
foreach ($array as $key => $value) {
|
||||
if (!strncmp($key, 'Null_', 5) && !empty($value)) {
|
||||
unset($array[substr($key, 5)]);
|
||||
unset($array[$key]);
|
||||
|
||||
$allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed);
|
||||
$ret = array();
|
||||
foreach ($allowed as $key) {
|
||||
list($ns, $directive) = $key;
|
||||
$skey = "$ns.$directive";
|
||||
if (!empty($array["Null_$skey"])) {
|
||||
$ret[$ns][$directive] = null;
|
||||
continue;
|
||||
}
|
||||
if ($mq) $array[$key] = stripslashes($value);
|
||||
if (!isset($array[$skey])) continue;
|
||||
$value = $mq ? stripslashes($array[$skey]) : $array[$skey];
|
||||
$ret[$ns][$directive] = $value;
|
||||
}
|
||||
return @HTMLPurifier_Config::create($array);
|
||||
|
||||
$config = HTMLPurifier_Config::create($ret);
|
||||
return $config;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -392,4 +452,4 @@ class HTMLPurifier_Config
|
||||
|
||||
}
|
||||
|
||||
?>
|
||||
|
||||
|
@@ -7,4 +7,3 @@ class HTMLPurifier_ConfigDef {
|
||||
var $class = false;
|
||||
}
|
||||
|
||||
?>
|
@@ -61,6 +61,12 @@ class HTMLPurifier_ConfigDef_Directive extends HTMLPurifier_ConfigDef
|
||||
*/
|
||||
var $aliases = array();
|
||||
|
||||
/**
|
||||
* Advisory list of directive aliases, i.e. other directives that
|
||||
* redirect here
|
||||
*/
|
||||
var $directiveAliases = array();
|
||||
|
||||
/**
|
||||
* Adds a description to the array
|
||||
*/
|
||||
@@ -71,4 +77,3 @@ class HTMLPurifier_ConfigDef_Directive extends HTMLPurifier_ConfigDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -24,4 +24,3 @@ class HTMLPurifier_ConfigDef_DirectiveAlias extends HTMLPurifier_ConfigDef
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@@ -20,4 +20,3 @@ class HTMLPurifier_ConfigDef_Namespace extends HTMLPurifier_ConfigDef {
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -67,6 +67,8 @@ class HTMLPurifier_ConfigSchema {
|
||||
$this->defineNamespace('URI', 'Features regarding Uniform Resource Identifiers.');
|
||||
$this->defineNamespace('HTML', 'Configuration regarding allowed HTML.');
|
||||
$this->defineNamespace('CSS', 'Configuration regarding allowed CSS.');
|
||||
$this->defineNamespace('AutoFormat', 'Configuration for activating auto-formatting functionality (also known as <code>Injector</code>s)');
|
||||
$this->defineNamespace('AutoFormatParam', 'Configuration for customizing auto-formatting functionality');
|
||||
$this->defineNamespace('Output', 'Configuration relating to the generation of (X)HTML.');
|
||||
$this->defineNamespace('Cache', 'Configuration for DefinitionCache and related subclasses.');
|
||||
$this->defineNamespace('Test', 'Developer testing configuration for our unit tests.');
|
||||
@@ -293,6 +295,7 @@ class HTMLPurifier_ConfigSchema {
|
||||
$def->info[$namespace][$name] =
|
||||
new HTMLPurifier_ConfigDef_DirectiveAlias(
|
||||
$new_namespace, $new_name);
|
||||
$def->info[$new_namespace][$new_name]->directiveAliases[] = "$namespace.$name";
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -401,4 +404,4 @@ class HTMLPurifier_ConfigSchema {
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
||||
|
||||
|
@@ -147,4 +147,3 @@ class HTMLPurifier_ContentSets
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -2,6 +2,8 @@
|
||||
|
||||
/**
|
||||
* Registry object that contains information about the current context.
|
||||
* @warning Is a bit buggy when variables are set to null: it thinks
|
||||
* they don't exist! So use false instead, please.
|
||||
*/
|
||||
class HTMLPurifier_Context
|
||||
{
|
||||
@@ -19,7 +21,7 @@ class HTMLPurifier_Context
|
||||
*/
|
||||
function register($name, &$ref) {
|
||||
if (isset($this->_storage[$name])) {
|
||||
trigger_error('Name collision, cannot re-register',
|
||||
trigger_error("Name $name produces collision, cannot re-register",
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
@@ -29,11 +31,14 @@ class HTMLPurifier_Context
|
||||
/**
|
||||
* Retrieves a variable reference from the context.
|
||||
* @param $name String name
|
||||
* @param $ignore_error Boolean whether or not to ignore error
|
||||
*/
|
||||
function &get($name) {
|
||||
function &get($name, $ignore_error = false) {
|
||||
if (!isset($this->_storage[$name])) {
|
||||
trigger_error('Attempted to retrieve non-existent variable',
|
||||
if (!$ignore_error) {
|
||||
trigger_error("Attempted to retrieve non-existent variable $name",
|
||||
E_USER_ERROR);
|
||||
}
|
||||
$var = null; // so we can return by reference
|
||||
return $var;
|
||||
}
|
||||
@@ -46,7 +51,7 @@ class HTMLPurifier_Context
|
||||
*/
|
||||
function destroy($name) {
|
||||
if (!isset($this->_storage[$name])) {
|
||||
trigger_error('Attempted to destroy non-existent variable',
|
||||
trigger_error("Attempted to destroy non-existent variable $name",
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
@@ -73,4 +78,3 @@ class HTMLPurifier_Context
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -38,4 +38,3 @@ class HTMLPurifier_Definition
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -4,6 +4,8 @@ require_once 'HTMLPurifier/DefinitionCache/Serializer.php';
|
||||
require_once 'HTMLPurifier/DefinitionCache/Null.php';
|
||||
|
||||
require_once 'HTMLPurifier/DefinitionCache/Decorator.php';
|
||||
require_once 'HTMLPurifier/DefinitionCache/Decorator/Memory.php';
|
||||
require_once 'HTMLPurifier/DefinitionCache/Decorator/Cleanup.php';
|
||||
|
||||
/**
|
||||
* Abstract class representing Definition cache managers that implements
|
||||
@@ -36,8 +38,8 @@ class HTMLPurifier_DefinitionCache
|
||||
*/
|
||||
function generateKey($config) {
|
||||
return $config->version . '-' . // possibly replace with function calls
|
||||
$config->get($this->type, 'DefinitionRev') . '-' .
|
||||
$config->getBatchSerial($this->type);
|
||||
$config->getBatchSerial($this->type) . '-' .
|
||||
$config->get($this->type, 'DefinitionRev');
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -47,11 +49,17 @@ class HTMLPurifier_DefinitionCache
|
||||
* @param $config Instance of HTMLPurifier_Config to test against
|
||||
*/
|
||||
function isOld($key, $config) {
|
||||
list($version, $revision, $hash) = explode('-', $key, 3);
|
||||
if (substr_count($key, '-') < 2) return true;
|
||||
list($version, $hash, $revision) = explode('-', $key, 3);
|
||||
$compare = version_compare($version, $config->version);
|
||||
if ($compare > 0) return false;
|
||||
if ($compare == 0 && $revision >= $config->get($this->type, 'DefinitionRev')) return false;
|
||||
return true;
|
||||
// version mismatch, is always old
|
||||
if ($compare != 0) return true;
|
||||
// versions match, ids match, check revision number
|
||||
if (
|
||||
$hash == $config->getBatchSerial($this->type) &&
|
||||
$revision < $config->get($this->type, 'DefinitionRev')
|
||||
) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -118,4 +126,3 @@ class HTMLPurifier_DefinitionCache
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@@ -2,9 +2,6 @@
|
||||
|
||||
require_once 'HTMLPurifier/DefinitionCache.php';
|
||||
|
||||
require_once 'HTMLPurifier/DefinitionCache/Decorator/Memory.php';
|
||||
require_once 'HTMLPurifier/DefinitionCache/Decorator/Cleanup.php';
|
||||
|
||||
class HTMLPurifier_DefinitionCache_Decorator extends HTMLPurifier_DefinitionCache
|
||||
{
|
||||
|
||||
@@ -60,4 +57,3 @@ class HTMLPurifier_DefinitionCache_Decorator extends HTMLPurifier_DefinitionCach
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -42,4 +42,3 @@ class HTMLPurifier_DefinitionCache_Decorator_Cleanup extends
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -45,4 +45,3 @@ class HTMLPurifier_DefinitionCache_Decorator_Memory extends
|
||||
|
||||
}
|
||||
|
||||
?>
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user