mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-04 05:07:55 +02:00
Compare commits
7 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
48ce521572 | ||
|
728e6c5b44 | ||
|
8104145580 | ||
|
6ef8abd04f | ||
|
bc5871f389 | ||
|
30d75c999d | ||
|
64d8ca9831 |
6
CREDITS
6
CREDITS
@@ -2,6 +2,6 @@
|
||||
CREDITS
|
||||
|
||||
Almost everything written by Edward Z. Yang (Ambush Commander). Lots of thanks
|
||||
to the DevNetwork Community for their help (see docs/ref-devnetwork.html for
|
||||
more details), Feyd especially (namely IPv6 and optimization). Thanks to RSnake
|
||||
for letting me package his fantastic XSS cheatsheet for a smoketest.
|
||||
to the DevNetwork Community for their help (see docs/devnetwork.html for more
|
||||
details), Feyd especially (namely IPv6 and optimization). Thanks to RSnake for
|
||||
letting me package his fantastic XSS cheatsheet for a smoketest.
|
||||
|
2
Doxyfile
2
Doxyfile
@@ -4,7 +4,7 @@
|
||||
# Project related configuration options
|
||||
#---------------------------------------------------------------------------
|
||||
PROJECT_NAME = HTML Purifier
|
||||
PROJECT_NUMBER = 1.2.0
|
||||
PROJECT_NUMBER = 1.1.2
|
||||
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
|
||||
CREATE_SUBDIRS = NO
|
||||
OUTPUT_LANGUAGE = English
|
||||
|
4
INSTALL
4
INSTALL
@@ -25,7 +25,7 @@ not having either of these extensions.
|
||||
|
||||
Simply use:
|
||||
|
||||
require_once '/path/to/library/HTMLPurifier.auto.php';
|
||||
require_once '/path/to/library/HTMLPurifier.auto.php';
|
||||
|
||||
...and you're good to go. Since HTML Purifier's codebase is fairly
|
||||
large, I recommend only including HTML Purifier when you need it.
|
||||
@@ -33,7 +33,7 @@ large, I recommend only including HTML Purifier when you need it.
|
||||
If you don't like your include_path to be fiddled around with, simply set
|
||||
HTML Purifier's library/ directory to the include path yourself and then:
|
||||
|
||||
require_once 'HTMLPurifier.php';
|
||||
require_once 'HTMLPurifier.php';
|
||||
|
||||
Only the contents in the library/ folder are necessary, so you can remove
|
||||
everything else when using HTML Purifier in a production environment.
|
||||
|
35
NEWS
35
NEWS
@@ -2,47 +2,12 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||
|
||||
= KEY ====================
|
||||
# Breaks back-compat
|
||||
! Feature
|
||||
- Bugfix
|
||||
+ Sub-comment
|
||||
. Internal change
|
||||
==========================
|
||||
|
||||
1.2.0, released 2006-11-19
|
||||
# ID attributes now disabled by default. New directives:
|
||||
+ %HTML.EnableAttrID - restores old behavior by allowing IDs
|
||||
+ %Attr.IDPrefix - %Attr.IDBlacklist alternative that munges all user IDs
|
||||
so that they don't collide with your IDs
|
||||
+ %Attr.IDPrefixLocal - Same as above, but for when there are multiple
|
||||
instances of user content on the page
|
||||
+ Profuse documentation on how to use these available in docs/enduser-id.txt
|
||||
! Added MODx plugin <http://modxcms.com/forums/index.php/topic,6604.0.html>
|
||||
! Added percent encoding normalization
|
||||
! XSS attacks smoketest given facelift
|
||||
! Configuration documentation now has table of contents
|
||||
! Added %URI.DisableExternal, which prevents links to external websites. You
|
||||
can also use %URI.Host to permit absolute linking to subdomains
|
||||
! Non-accessible resources (ex. mailto) blocked from embedded URIs (img src)
|
||||
- Type variable in HTMLDefinition was not being set properly, fixed
|
||||
- Documentation updated
|
||||
+ TODO added request Phalanger
|
||||
+ TODO added request Native compression
|
||||
+ TODO added request Remove redundant tags
|
||||
+ TODO added possible plaintext formatter for HTML Purifier documentation
|
||||
+ Updated ConfigDoc TODO
|
||||
+ Improved inline comments in AttrDef/Class.php, AttrDef/CSS.php
|
||||
and AttrDef/Host.php
|
||||
+ Revamped documentation into HTML, along with misc updates
|
||||
- HTMLPurifier_Context doesn't throw a variable reference error if you attempt
|
||||
to retrieve a non-existent variable
|
||||
. Switched to purify()-wide Context object registry
|
||||
. Refactored unit tests to minimize duplication
|
||||
. XSS attack sheet updated
|
||||
. configdoc.xml now has xml:space attached to default value nodes
|
||||
. Allow configuration directives to permit null values
|
||||
. Cleaned up test-cases to remove unnecessary swallowErrors()
|
||||
|
||||
1.1.2, released 2006-09-30
|
||||
! Add HTMLPurifier.auto.php stub file that configures include_path
|
||||
- Documentation updated
|
||||
|
9
SLOW
9
SLOW
@@ -2,13 +2,13 @@
|
||||
SLOW
|
||||
also known as the HELP ME LIBRARY IS TOO SLOW MY PAGE TAKE TOO LONG LOAD page
|
||||
|
||||
HTML Purifier is a very powerful library. But with power comes great
|
||||
HTMLPurifier is a very powerful library. But with power comes great
|
||||
responsibility, or, at least, longer execution times. Remember, this
|
||||
library isn't lightly grazing over submitted HTML: it's deconstructing
|
||||
the whole thing, rigorously checking the parts, and then putting it
|
||||
back together.
|
||||
|
||||
So, if it so turns out that HTML Purifier is kinda too slow for outbound
|
||||
So, if it so turns out that HTMLPurifier is kinda too slow for outbound
|
||||
filtering, you've got a few options:
|
||||
|
||||
1. Inbound filtering - perform filtering of HTML when it's submitted by the
|
||||
@@ -19,7 +19,7 @@ it directly from your database/filesystem. The trouble with this method is
|
||||
that your user loses the original text, and when doing edits, will be
|
||||
handling the filtered text. While this may be a good thing, especially if
|
||||
you're using a WYSIWYG editor, it can also result in data-loss if a user
|
||||
makes a typo.
|
||||
expects a certain to be available but it doesn't.
|
||||
|
||||
2. Caching the filtered output - accept the submitted text and put it
|
||||
unaltered into the database, but then also generate a filtered version and
|
||||
@@ -36,5 +36,4 @@ it has some drawbacks which cannot be fixed unless you save both the original
|
||||
and the filtered versions.
|
||||
|
||||
There is a third option: profile and optimize HTMLPurifier yourself. Be sure
|
||||
to report back your results if you decide to do that! Especially if you
|
||||
port HTML Purifier to C++. ;-)
|
||||
to tell me if you decide to do that! ;-)
|
||||
|
57
TODO
57
TODO
@@ -1,23 +1,23 @@
|
||||
|
||||
TODO List
|
||||
|
||||
1.3 release
|
||||
- Make URI validation routines tighter (especially mailto)
|
||||
- More extensive URI filtering schemes (see docs/proposal-new-directives.txt)
|
||||
- Allow for background-image and list-style-image (see above)
|
||||
- Error logging for filtering/cleanup procedures
|
||||
- Rich set* methods and config file loaders for HTMLPurifier_Config
|
||||
Ongoing
|
||||
- Lots of profiling, make it faster!
|
||||
- Plugins for major CMSes (very tricky issue)
|
||||
|
||||
1.4 release
|
||||
1.2 release
|
||||
- Make URI validation routines tighter (especially mailto)
|
||||
- More extensive URI filtering schemes
|
||||
- Allow for background-image and list-style-image (see above)
|
||||
- Distinguish between different types of URIs, for instance, a mailto URI
|
||||
in IMG SRC is nonsensical
|
||||
- Error logging for filtering/cleanup procedures
|
||||
|
||||
1.3 release
|
||||
- Add various "levels" of cleaning
|
||||
- Related: Allow strict (X)HTML
|
||||
- More fine-grained control over escaping behavior
|
||||
- Silently drop content inbetween SCRIPT tags (can be generalized to allow
|
||||
specification of elements that, when detected as foreign, trigger removal
|
||||
of children, although unbalanced tags could wreck havoc (or at least
|
||||
delete the rest of the document)).
|
||||
|
||||
1.5 release
|
||||
1.4 release
|
||||
- Additional support for poorly written HTML
|
||||
- Implement all non-essential attribute transforms
|
||||
- Microsoft Word HTML cleaning (i.e. MsoNormal)
|
||||
@@ -28,8 +28,6 @@ TODO List
|
||||
shouldn't be paragraphed, such as lists and tables).
|
||||
- Linkify URLs
|
||||
- Smileys
|
||||
- Linkification for HTML Purifier docs: notably configuration and
|
||||
class names
|
||||
|
||||
3.0 release
|
||||
- Extended HTML capabilities based on namespacing and tag transforms
|
||||
@@ -37,38 +35,19 @@ TODO List
|
||||
attributes, offer default implementation
|
||||
- Lots of documentation and samples
|
||||
|
||||
Ongoing
|
||||
- Lots of profiling, make it faster!
|
||||
- Plugins for major CMSes (very tricky issue)
|
||||
|
||||
Unknown release (on a scratch-an-itch basis)
|
||||
- Silently drop content inbetween SCRIPT tags (can be generalized to allow
|
||||
specification of elements that, when detected as foreign, trigger removal
|
||||
of children, although unbalanced tags could wreck havoc (or at least delete
|
||||
the rest of the document)).
|
||||
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
||||
- Automatically add non-breaking spaces to empty table cells when
|
||||
empty-cells:show is applied to have compatibility with Internet Explorer
|
||||
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
|
||||
Also, enable disabling of directionality
|
||||
- Append something to duplicate IDs so they're still usable (impl. note: the
|
||||
dupe detector would also need to detect the suffix as well)
|
||||
- Have 'lang' attribute be checked against official lists
|
||||
|
||||
Encoding workarounds
|
||||
- Non-lossy dumb alternate character encoding transformations, achieved by
|
||||
numerically encoding all non-ASCII characters
|
||||
- Semi-lossy dumb alternate character encoding transformations, achieved by
|
||||
encoding all characters that have string entity equivalents
|
||||
|
||||
Requested
|
||||
- Native content compression, whitespace stripping (don't rely on Tidy, make
|
||||
sure we don't remove from <pre> or related tags)
|
||||
- Win32 Phalanger C# binaries (?)
|
||||
- Remove redundant tags, ex. <u><u>Underlined</u></u>. Implementation notes:
|
||||
1. Analyzing which tags to remove duplicants
|
||||
2. Ensure attributes are merged into the parent tag
|
||||
3. Extend the tag exclusion system to specify whether or not the
|
||||
contents should be dropped or not (currently, there's code that could do
|
||||
something like this if it didn't drop the inner text too.)
|
||||
|
||||
Wontfix
|
||||
- Non-lossy smart alternate character encoding transformations (unless
|
||||
patch provided)
|
||||
- Non-lossy smart alternate character encoding transformations
|
||||
- Pretty-printing HTML, users can use Tidy on the output on entire page
|
||||
|
@@ -12,8 +12,10 @@ TODO:
|
||||
- multipage documentation
|
||||
- determine how to multilingualize
|
||||
- factor out code into classes
|
||||
- generate a table of contents
|
||||
*/
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Check and configure environment
|
||||
|
||||
@@ -80,6 +82,9 @@ $dom_root->appendChild($dom_document->createElement('title', 'HTML Purifier'));
|
||||
|
||||
/*
|
||||
TODO for XML format:
|
||||
- namespace descriptions
|
||||
- enumerated values
|
||||
- default values
|
||||
- create a definition (DTD or other) once interface stabilizes
|
||||
*/
|
||||
|
||||
@@ -110,12 +115,9 @@ foreach($schema->info as $namespace_name => $namespace_info) {
|
||||
$dom_constraints = $dom_document->createElement('constraints');
|
||||
$dom_directive->appendChild($dom_constraints);
|
||||
|
||||
$dom_type = $dom_document->createElement('type', $info->type);
|
||||
if ($info->allow_null) {
|
||||
$dom_type->setAttribute('allow-null', 'yes');
|
||||
}
|
||||
$dom_constraints->appendChild($dom_type);
|
||||
|
||||
$dom_constraints->appendChild(
|
||||
$dom_document->createElement('type', $info->type)
|
||||
);
|
||||
if ($info->allowed !== true) {
|
||||
$dom_allowed = $dom_document->createElement('allowed');
|
||||
$dom_constraints->appendChild($dom_allowed);
|
||||
@@ -131,20 +133,14 @@ foreach($schema->info as $namespace_name => $namespace_info) {
|
||||
$default = $raw_default ? 'true' : 'false';
|
||||
} elseif (is_string($raw_default)) {
|
||||
$default = "\"$raw_default\"";
|
||||
} elseif (is_null($raw_default)) {
|
||||
$default = 'null';
|
||||
} else {
|
||||
$default = print_r(
|
||||
$schema->defaults[$namespace_name][$name], true
|
||||
);
|
||||
}
|
||||
|
||||
$dom_default = $dom_document->createElement('default', $default);
|
||||
|
||||
// remove this once we get a DTD
|
||||
$dom_default->setAttribute('xml:space', 'preserve');
|
||||
|
||||
$dom_constraints->appendChild($dom_default);
|
||||
$dom_constraints->appendChild(
|
||||
$dom_document->createElement('default', $default)
|
||||
);
|
||||
|
||||
$dom_descriptions = $dom_document->createElement('descriptions');
|
||||
$dom_directive->appendChild($dom_descriptions);
|
||||
|
@@ -5,6 +5,3 @@ table.constraints {margin:0 0 1em;}
|
||||
table.constraints th {text-align:left;padding-left:0.4em;}
|
||||
table.constraints td {padding-right:0.4em;}
|
||||
table.constraints td pre {margin:0;}
|
||||
|
||||
#toc {list-style-type:none; font-weight:bold;}
|
||||
#toc ul {list-style-type:disc; font-weight:normal;}
|
||||
|
@@ -23,41 +23,23 @@
|
||||
<link rel="stylesheet" type="text/css" href="styles/plain.css" />
|
||||
</head>
|
||||
<body>
|
||||
<h1><xsl:value-of select="/configdoc/title" /> Configuration Documentation</h1>
|
||||
<h2>Table of Contents</h2>
|
||||
<ul id="toc">
|
||||
<xsl:apply-templates mode="toc" />
|
||||
</ul>
|
||||
<xsl:apply-templates />
|
||||
</body>
|
||||
</html>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="title" mode="toc" />
|
||||
<xsl:template match="namespace" mode="toc">
|
||||
<xsl:if test="count(directive)>0">
|
||||
<li>
|
||||
<a href="#{@id}"><xsl:value-of select="name" /></a>
|
||||
<ul>
|
||||
<xsl:apply-templates select="directive" mode="toc" />
|
||||
</ul>
|
||||
</li>
|
||||
</xsl:if>
|
||||
<xsl:template match="title">
|
||||
<h1><xsl:value-of select="/configdoc/title" /> Configuration Documentation</h1>
|
||||
</xsl:template>
|
||||
<xsl:template match="directive" mode="toc">
|
||||
<li><a href="#{@id}"><xsl:value-of select="name" /></a></li>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="title" />
|
||||
|
||||
<xsl:template match="namespace">
|
||||
<xsl:apply-templates />
|
||||
<xsl:if test="count(directive)=0">
|
||||
<xsl:if test="count(child::directive)=0">
|
||||
<p>No configuration directives defined for this namespace.</p>
|
||||
</xsl:if>
|
||||
</xsl:template>
|
||||
<xsl:template match="namespace/name">
|
||||
<h2 id="{../@id}"><xsl:value-of select="." /></h2>
|
||||
<h2 id="{../@id}"><xsl:value-of select="text()" /></h2>
|
||||
</xsl:template>
|
||||
<xsl:template match="namespace/description">
|
||||
<div class="description">
|
||||
@@ -69,7 +51,7 @@
|
||||
<xsl:apply-templates />
|
||||
</xsl:template>
|
||||
<xsl:template match="directive/name">
|
||||
<h3 id="{../@id}"><xsl:value-of select="../@id" /></h3>
|
||||
<h3 id="{../@id}"><xsl:value-of select="text()" /></h3>
|
||||
</xsl:template>
|
||||
<xsl:template match="directive/constraints">
|
||||
<table class="constraints">
|
||||
@@ -99,9 +81,6 @@
|
||||
<xsl:variable name="type" select="text()" />
|
||||
<xsl:attribute name="class">type type-<xsl:value-of select="$type" /></xsl:attribute>
|
||||
<xsl:value-of select="$typeLookup/types/type[@id=$type]/text()" />
|
||||
<xsl:if test="@allow-null='yes'">
|
||||
(or null)
|
||||
</xsl:if>
|
||||
</td>
|
||||
</tr>
|
||||
</xsl:template>
|
||||
|
39
docs/code-quality.txt
Normal file
39
docs/code-quality.txt
Normal file
@@ -0,0 +1,39 @@
|
||||
|
||||
Code Quality Issues
|
||||
|
||||
Okay, face it. Programmers can get lazy, cut corners, or make mistakes. They
|
||||
also can do quick prototypes, and then forget to rewrite them later. Well,
|
||||
while I can't list mistakes in here, I can list prototype-like segments
|
||||
of code that should be aggressively refactored after the beta is released.
|
||||
This does not list optimization issues, that needs to be done after intense
|
||||
profiling.
|
||||
|
||||
Here we go:
|
||||
|
||||
AttrDef
|
||||
Class - doesn't support Unicode characters (fringe); uses regular
|
||||
expressions
|
||||
Lang - code duplication; premature optimization; doesn't consult official
|
||||
lists (fringe)
|
||||
Length - easily mistaken for CSSLength
|
||||
URI - multiple regular expressions; needs host validation routines factored
|
||||
out for mailto scheme; missing validation for query; fragment and path,
|
||||
no percent-encode fixing
|
||||
CSS - parser doesn't accept advanced CSS (fringe)
|
||||
Number - constructor interface is inconsistent with Integer
|
||||
AttrTransform - doesn't accept AttrContext
|
||||
Config - "load configuration" hooks missing, rich set* accessors missing
|
||||
ConfigSchema - redefinition is a mess
|
||||
Strategy
|
||||
FixNesting - cannot bubble nodes out of structures
|
||||
MakeWellFormed - insufficient automatic closing definitions (check HTML
|
||||
spec for optional end tags, also, closing based on type (block/inline)
|
||||
might be efficient).
|
||||
RemoveForeignElements - should be run in parallel with MakeWellFormed
|
||||
URIScheme - needs to have callable generic checks
|
||||
ftp - missing typecode check
|
||||
mailto - doesn't validate emails
|
||||
news - doesn't validate opaque path
|
||||
nntp - doesn't constrain path
|
||||
EOL
|
||||
|
23
docs/colors.txt
Normal file
23
docs/colors.txt
Normal file
@@ -0,0 +1,23 @@
|
||||
|
||||
Colors
|
||||
Hammering some sense into those content-makers
|
||||
|
||||
Your website probably has a color-scheme. Green on white, purple on yellow,
|
||||
whatever. When you give users the ability to style their content, you may
|
||||
want them to keep in line with your styling. If you're website is all
|
||||
about light colors, you don't want a user to come in and vandalize your
|
||||
page with a deep maroon.
|
||||
|
||||
This is an extremely silly feature proposal, but I'm writing it down anyway.
|
||||
|
||||
What if the user could constrain the colors specified in inline styles? You
|
||||
are only allowed to use these shades of dark green for text and these shades
|
||||
of light yellow for the background. At the very least, you could ensure
|
||||
that we did not have pale yellow on white text.
|
||||
|
||||
Implementation issues:
|
||||
1. Requires the color attribute definition to know, currently, what the text
|
||||
and background colors are. This becomes difficult when classes are thrown
|
||||
into the mix.
|
||||
2. The user still has to define the permissible colors, how does one do
|
||||
something like that?
|
46
docs/config-ideas.txt
Normal file
46
docs/config-ideas.txt
Normal file
@@ -0,0 +1,46 @@
|
||||
|
||||
Configuration Ideas
|
||||
|
||||
Here are some theoretical configuration ideas that we could implement some
|
||||
time. Note the naming convention: %Namespace.Directive
|
||||
|
||||
%Attr.IDPrefix - prefix all ids with this
|
||||
|
||||
%Attr.RewriteFragments - if there's %Attr.IDPrefix we may want to transparently
|
||||
rewrite the URLs we parse too. However, we can only do it when it's a pure
|
||||
anchor link, so it's not foolproof
|
||||
|
||||
%Attr.ClassBlacklist,
|
||||
%Attr.ClassWhitelist,
|
||||
%Attr.ClassListMode - determines what classes are allowed. When
|
||||
%Attr.ClassListMode is set to Blacklist, only allow those not in
|
||||
%Attr.ClassBlacklist. When it's Whitelist, only allow those in
|
||||
%Attr.ClassWhitelist.
|
||||
|
||||
%Attr.LangAlphaOnly - designate whether or not to allow numerals in language
|
||||
code subtags
|
||||
* RFC 1766, the current standard referenced by XML, does not permit
|
||||
numbers, but,
|
||||
* RFC 3066, the superseding best practice standard since January 2001,
|
||||
permits them.
|
||||
We allow numbers by default, but you generally never see them
|
||||
at all, which makes this a little more sane.
|
||||
|
||||
%Attr.MaxWidth,
|
||||
%Attr.MaxHeight - caps for width and height related checks.
|
||||
(a hack in Pixels for an image crashing attack could be replaced by this)
|
||||
|
||||
%URI.Munge - will munge all URIs to a different URI, which should redirect
|
||||
the user to the applicable page. A urlencoded version of the URI
|
||||
will replace any instances of %s in the string. One possible
|
||||
string is 'http://www.google.com/url?q=%s'. Useful for preventing
|
||||
pagerank from being sent to other sites
|
||||
|
||||
%URI.AddRelNofollow - will add rel="nofollow" to all links, preventing the
|
||||
spread of ill-gotten pagerank
|
||||
|
||||
%URI.Host - host of website, for external link checks
|
||||
|
||||
%URI.RelativeToAbsolute - transforms all relative URIs to absolute form
|
||||
|
||||
%URI.DisableExternal - disable external links
|
@@ -10,9 +10,12 @@ Directives are divided into namespaces, indicating the major portion of
|
||||
functionality they cover (although there may be overlaps. Please consult
|
||||
the documentation in ConfigDef for more information on these namespaces.
|
||||
|
||||
Since configuration is dependant on context, internal classes require a
|
||||
configuration object to be passed as a parameter. (They also require a
|
||||
Context object).
|
||||
Since configuration is dependent on context, most of the internal classes
|
||||
require a configuration object to be passed as a parameter. However, a few
|
||||
make this optional: they will supply a default configuration object if none
|
||||
are passed. These classes are: HTMLPurifier::*, Generator::generateFromTokens
|
||||
and Lexer::tokenizeHTML. However, whenever a valid configuration object
|
||||
is defined, that object should be used.
|
||||
|
||||
In relation to HTMLDefinition and CSSDefinition, there is a special class
|
||||
of directives that influence the *construction* of the Definition object.
|
@@ -1,48 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Discusses code quality issues and places that need to be refactored in HTML Purifier." />
|
||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||
|
||||
<title>Code Quality Issues - HTML Purifier</title>
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1>Code Quality Issues</h1>
|
||||
|
||||
<div id="filing">Filed under Development</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
|
||||
<p>Okay, face it. Programmers can get lazy, cut corners, or make mistakes. They
|
||||
also can do quick prototypes, and then forget to rewrite them later. Well,
|
||||
while I can't list mistakes in here, I can list prototype-like segments
|
||||
of code that should be aggressively refactored. This does not list
|
||||
optimization issues, that needs to be done after intense profiling.</p>
|
||||
|
||||
<pre>
|
||||
AttrDef
|
||||
Class - doesn't support Unicode characters (fringe); uses regular
|
||||
expressions
|
||||
Lang - code duplication; premature optimization
|
||||
Length - easily mistaken for CSSLength
|
||||
URI - multiple regular expressions; missing validation for parts (?)
|
||||
CSS - parser doesn't accept advanced CSS (fringe)
|
||||
Number - constructor interface inconsistent with Integer
|
||||
ConfigSchema - redefinition is a mess
|
||||
Strategy
|
||||
FixNesting - cannot bubble nodes out of structures
|
||||
MakeWellFormed - insufficient automatic closing definitions (check HTML
|
||||
spec for optional end tags, also, closing based on type (block/inline)
|
||||
might be efficient).
|
||||
RemoveForeignElements - should be run in parallel with MakeWellFormed
|
||||
URIScheme - needs to have callable generic checks
|
||||
mailto - doesn't validate emails, doesn't validate querystring
|
||||
news - doesn't validate opaque path
|
||||
nntp - doesn't constrain path
|
||||
</pre>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
@@ -1,80 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Defines class naming conventions in HTML Purifier." />
|
||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||
|
||||
<title>Naming Conventions - HTML Purifier</title>
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1>Naming Conventions</h1>
|
||||
|
||||
<div id="filing">Filed under Development</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
|
||||
<p>The classes in this library follow a few naming conventions, which may
|
||||
help you find the correct functionality more quickly. Here they are:</p>
|
||||
|
||||
<dl>
|
||||
|
||||
<dt>All classes occupy the HTMLPurifier pseudo-namespace.</dt>
|
||||
<dd>This means that all classes are prefixed with HTMLPurifier_. As such, all
|
||||
names under HTMLPurifier_ are reserved. I recommend that you use the name
|
||||
HTMLPurifierX_YourName_ClassName, especially if you want to take advantage
|
||||
of HTMLPurifier_ConfigDef.</dd>
|
||||
|
||||
<dt>All classes correspond to their path if library/ was in the include path</dt>
|
||||
<dd>HTMLPurifier_AttrDef is located at HTMLPurifier/AttrDef.php; replace
|
||||
underscores with slashes and append .php and you'll have the location of
|
||||
the class.</dd>
|
||||
|
||||
<dt>Harness and Test are reserved class names for unit tests</dt>
|
||||
<dd>The suffix <code>Test</code> indicates that the class is a subclass of UnitTestCase
|
||||
(of the Simpletest library) and is testable. "Harness" indicates a subclass
|
||||
of UnitTestCase that is not meant to be run but to be extended into
|
||||
concrete test cases and contains custom test methods (i.e. assert*())</dd>
|
||||
|
||||
<dt>Class names do not necessarily represent inheritance hierarchies</dt>
|
||||
<dd>While we try to reflect inheritance in naming to some extent, it is not
|
||||
guaranteed (for instance, none of the classes inherit from HTMLPurifier,
|
||||
the base class). However, all class files have the require_once
|
||||
declarations to whichever classes they are tightly coupled to.</dd>
|
||||
|
||||
<dt>Strategy has a meaning different from the Gang of Four pattern</dt>
|
||||
<dd>In Design Patterns, the Gang of Four describes a Strategy object as
|
||||
encapsulating an algorithm so that they can be switched at run-time. While
|
||||
our strategies are indeed algorithms, they are not meant to be substituted:
|
||||
all must be present in order for proper functioning.</dd>
|
||||
|
||||
<dt>Abbreviations are avoided</dt>
|
||||
<dd>We try to avoid abbreviations as much as possible, but in some cases,
|
||||
abbreviated version is more readable than the full version. Here, we
|
||||
list common abbreviations:
|
||||
<ul>
|
||||
<li>Attr(s) to Attribute(s)</li>
|
||||
<li>Def to Definition</li>
|
||||
</ul>
|
||||
</dd>
|
||||
|
||||
<dt>Ambiguity concerning the definition of Def/Definition</dt>
|
||||
<dd>While a definition normally defines the structure/acceptable values of
|
||||
an entity, most of the definitions in this application also attempt
|
||||
to validate and fix the value. I am unsure of a better name, as
|
||||
"Validator" would exclude fixing the value, "Fixer" doesn't invoke
|
||||
the proper image of "fixing" something, and "ValidatorFixer" is too long!
|
||||
Some other suggestions were "Handler", "Reference", "Check", "Fix",
|
||||
"Repair" and "Heal".</dd>
|
||||
|
||||
<dt>Transform not Transformer</dt>
|
||||
<dd>Transform is both a noun and a verb, and thus we define a "Transform" as
|
||||
something that "transforms," leaving "Transformer" (which sounds like an
|
||||
electrical device/robot toy).</dd>
|
||||
|
||||
</dl>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
@@ -1,32 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Discusses possible methods of optimizing HTML Purifier." />
|
||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||
|
||||
<title>Optimization - HTML Purifier</title>
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1>Optimization</h1>
|
||||
|
||||
<div id="filing">Filed under Development</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
|
||||
<p>Here are some possible optimization techniques we can apply to code sections if
|
||||
they turn out to be slow. Be sure not to prematurely optimize: if you get
|
||||
that itch, put it here!</p>
|
||||
|
||||
<ul>
|
||||
<li>Make Tokens Flyweights (may prove problematic, probably not worth it)</li>
|
||||
<li>Rewrite regexps into PHP code</li>
|
||||
<li>Serialize the Definition object</li>
|
||||
<li>Batch regexp validation (do as many per function call as possible)</li>
|
||||
<li>Parallelize strategies</li>
|
||||
</ul>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
@@ -1,44 +1,31 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Credits and links to DevNetwork forum topics on HTML Purifier." />
|
||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||
|
||||
<title>DevNetwork Credits - HTML Purifier</title>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>DevNetwork Credits</h1>
|
||||
|
||||
<div id="filing">Filed under Reference</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
|
||||
<p>Many thanks to the DevNetwork community for answering questions,
|
||||
theorizing about design, and offering encouragement during
|
||||
the development of this library in these forum threads:</p>
|
||||
|
||||
<ul>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=52905">HTMLPurifier PHP Library hompeage</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53056">How much of CSS to implement?</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53083">Parsing URL only according to URI : Security Risk?</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53096">Gimme a name : URI and friends</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53415">How to document configuration directives</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53479">IPv6</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53539">http and ftp versus news and mailto</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53579">HTMLPurifier - Take your best shot</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53664">Need help optimizing a block of code</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53861">Non-SGML characters</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=54283">Wordpress makes me cry</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=54478">Parameter Object vs. Parameter Array vs. Parameter Functions</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=54521">Convert encoding where output cannot represent characters</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=56411">Reporting errors in a document without line numbers</a></li>
|
||||
</ul>
|
||||
|
||||
<p>...as well as any I may have forgotten.</p>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
</body>
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
|
||||
<title>DevNetwork Forums</title>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<p>Many thanks to the DevNetwork community for answering questions,
|
||||
theorizing about design, and offering encouragement during
|
||||
the development of this library in these forum threads:</p>
|
||||
|
||||
<ul>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=52905">HTMLPurifier PHP Library hompeage</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53056">How much of CSS to implement?</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53083">Parsing URL only according to URI : Security Risk?</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53096">Gimme a name : URI and friends</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53415">How to document configuration directives</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53479">IPv6</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53539">http and ftp versus news and mailto</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53579">HTMLPurifier - Take your best shot</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53664">Need help optimizing a block of code</a>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53861">Non-SGML characters</a>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=54283">Wordpress makes me cry</a>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=54478">Parameter Object vs. Parameter Array vs. Parameter Functions</a>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=54521">Convert encoding where output cannot represent characters</a>
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
@@ -1,146 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Explains various methods for allowing IDs in documents safely in HTML Purifier." />
|
||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||
|
||||
<title>IDs - HTML Purifier</title>
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1 class="subtitled">IDs</h1>
|
||||
<div class="subtitle">What they are, why you should(n't) wear them, and how to deal with it</div>
|
||||
|
||||
<div id="filing">Filed under End-User</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
|
||||
<p>Prior to HTML Purifier 1.2.0, this library blithely accepted user input that
|
||||
looked like this:</p>
|
||||
|
||||
<pre><a id="fragment">Anchor</a></pre>
|
||||
|
||||
<p>...presenting an attractive vector for those that would destroy standards
|
||||
compliance: simply set the ID to one that is already used elsewhere in the
|
||||
document and voila: validation breaks. There was a half-hearted attempt to
|
||||
prevent this by allowing users to blacklist IDs, but I suspect that no one
|
||||
really bothered, and thus, with the release of 1.2.0, IDs are now <em>removed</em>
|
||||
by default.</p>
|
||||
|
||||
<p>IDs, however, are quite useful functionality to have, so if users start
|
||||
complaining about broken anchors you'll probably want to turn them back on
|
||||
with %HTML.EnableAttrID. But before you go mucking around with the config
|
||||
object, it's probably worth to take some precautions to keep your page
|
||||
validating. Why?</p>
|
||||
|
||||
<ol>
|
||||
<li>Standards-compliant pages are good</li>
|
||||
<li>Duplicated IDs interfere with anchors. If there are two id="foobar"s in a
|
||||
document, which spot does a browser presented with the fragment #foobar go
|
||||
to? Most browsers opt for the first appearing ID, making it impossible
|
||||
to references the second section. Similarly, duplicated IDs can hijack
|
||||
client-side scripting that relies on the IDs of elements.</li>
|
||||
</ol>
|
||||
|
||||
<p>You have (currently) four ways of dealing with the problem.</p>
|
||||
|
||||
|
||||
|
||||
<h2 class="subtitled">Blacklisting IDs</h2>
|
||||
<div class="subsubtitle">Good for pages with single content source and stable templates</div>
|
||||
|
||||
<p>Keeping in terms with the
|
||||
<acronym title="Keep It Simple, Stupid">KISS</acronym> principle, let us
|
||||
deal with the most obvious solution: preventing users from using any IDs that
|
||||
appear elsewhere on the document. The method is simple:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'EnableAttrID', true);
|
||||
$config->set('Attr', 'IDBlacklist' array(
|
||||
'list', 'of', 'attributes', 'that', 'are', 'forbidden'
|
||||
));</pre>
|
||||
|
||||
<p>That being said, there are some notable drawbacks. First of all, you have to
|
||||
know precisely which IDs are being used by the HTML surrounding the user code.
|
||||
This is easier said than done: quite often the page designer and the system
|
||||
coder work separately, so the designer has to constantly be talking with the
|
||||
coder whenever he decides to add a new anchor. Miss one and you open yourself
|
||||
to possible standards-compliance issues.</p>
|
||||
|
||||
<p>Furthermore, this position becomes untenable when a single web page must hold
|
||||
multiple portions of user-submitted content. Since there's obviously no way
|
||||
to find out before-hand what IDs users will use, the blacklist is helpless.
|
||||
And even since HTML Purifier validates each segment seperately, perhaps doing
|
||||
so at different times, it would be extremely difficult to dynamically update
|
||||
the blacklist inbetween runs.</p>
|
||||
|
||||
<p>Finally, simply destroying the ID is extremely un-userfriendly behavior: after
|
||||
all, they might have simply specified a duplicate ID by accident.</p>
|
||||
|
||||
<p>Thus, we get to our second method.</p>
|
||||
|
||||
|
||||
|
||||
<h2 class="subtitled">Namespacing IDs</h2>
|
||||
<div class="subsubtitle">Lazy developer's way, but needs user education</div>
|
||||
|
||||
<p>This method, too, is quite simple: add a prefix to all user IDs. With this
|
||||
code:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'EnableAttrID', true);
|
||||
$config->set('Attr', 'IDPrefix', 'user_');</pre>
|
||||
|
||||
<p>...this:</p>
|
||||
|
||||
<pre><a id="foobar">Anchor!</a></pre>
|
||||
|
||||
<p>...turns into:</p>
|
||||
|
||||
<pre><a id="user_foobar">Anchor!</a></pre>
|
||||
|
||||
<p>As long as you don't have any IDs that start with user_, collisions are
|
||||
guaranteed not to happen. The drawback is obvious: if a user submits
|
||||
id="foobar", they probably expect to be able to reference their page with
|
||||
#foobar. You'll have to tell them, "No, that doesn't work, you have to add
|
||||
user_ to the beginning."</p>
|
||||
|
||||
<p>And yes, things get hairier. Even with a nice prefix, we still have done
|
||||
nothing about multiple HTML Purifier outputs on one page. Thus, we have
|
||||
a second configuration value to piggy-back off of: %Attr.IDPrefixLocal:</p>
|
||||
|
||||
<pre>$config->set('Attr', 'IDPrefixLocal', 'comment' . $id . '_');</pre>
|
||||
|
||||
<p>This new attributes does nothing but append on to regular IDPrefix, but is
|
||||
special in that it is volatile: it's value is determined at run-time and
|
||||
cannot possibly be cordoned into, say, a .ini config file. As for what to
|
||||
put into the directive, is up to you, but I would recommend the ID number
|
||||
the text has been assigned in the database. Whatever you pick, however, it
|
||||
has to be unique and stable for the text you are validating. Note, however,
|
||||
that we require that %Attr.IDPrefix be set before you use this directive.</p>
|
||||
|
||||
<p>And also remember: the user has to know what this prefix is too!</p>
|
||||
|
||||
|
||||
|
||||
<h2>Abstinence</h2>
|
||||
|
||||
<p>You may not want to bother. That's okay too, just don't enable IDs.</p>
|
||||
|
||||
<p>Personally, I would take this road whenever user-submitted content would be
|
||||
possibly be shown together on one page. Why a blog comment would need to use
|
||||
anchors is beyond me.</p>
|
||||
|
||||
|
||||
|
||||
<h2>Denial</h2>
|
||||
|
||||
<p>To revert back to pre-1.2.0 behavior, simply:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'EnableAttrID', true);</pre>
|
||||
|
||||
<p>Don't come crying to me when your page mysteriously stops validating, though.</p>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body>
|
||||
</html>
|
@@ -126,5 +126,5 @@ Colored - background-color (background), border-color (border), color
|
||||
Dramatic - border, list-style-position (list-style), margin, padding,
|
||||
text-align, text-indent, text-transform, vertical-align, line-height
|
||||
|
||||
Dramatic elements substantially change the look of text in ways that should
|
||||
Dramatic elements substnatially change the look of text in ways that should
|
||||
probably have been reserved to other areas.
|
@@ -1,70 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Index to all HTML Purifier documentation." />
|
||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||
|
||||
<title>Documentation - HTML Purifier</title>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Documentation</h1>
|
||||
|
||||
<p><strong>HTML Purifier</strong> has documentation for all types of people.
|
||||
Here is an index of all of them.</p>
|
||||
|
||||
<h2>End-user</h2>
|
||||
<p>End-user documentation that contains articles, tutorials and useful
|
||||
information for casual developers using HTML Purifier.</p>
|
||||
|
||||
<dl>
|
||||
|
||||
<dt><a href="enduser-id.html">IDs</a></dt>
|
||||
<dd>Explains various methods for allowing IDs in documents safely in HTML Purifier.</dd>
|
||||
|
||||
</dl>
|
||||
|
||||
<h2>Development</h2>
|
||||
<p>Developer documentation detailing code issues, roadmaps and project
|
||||
conventions.</p>
|
||||
|
||||
<dl>
|
||||
|
||||
<dt><a href="dev-code-quality.html">Code Quality Issues</a></dt>
|
||||
<dd>Discusses code quality issues and places that need to be refactored.</dd>
|
||||
|
||||
<dt><a href="dev-progress.html">Implementation Progress</a></dt>
|
||||
<dd>Tables detailing HTML element and CSS property implementation coverage.</dd>
|
||||
|
||||
<dt><a href="dev-naming.html">Naming Conventions</a></dt>
|
||||
<dd>Defines class naming conventions.</dd>
|
||||
|
||||
<dt><a href="dev-optimization.html">Optimization</a></dt>
|
||||
<dd>Discusses possible methods of optimizing HTML Purifier.</dd>
|
||||
|
||||
</dl>
|
||||
|
||||
<h2>Proposals</h2>
|
||||
<p>Proposed features, as well as the associated rambling to get a clear
|
||||
objective in place before attempted implementation.</p>
|
||||
|
||||
<dl>
|
||||
<dt><a href="proposal-colors.html">Colors</a></dt>
|
||||
<dd>Proposal to allow for color constraints.</dd>
|
||||
</dl>
|
||||
|
||||
<h2>Reference</h2>
|
||||
<p>Miscellaneous essays, research pieces and other reference type material
|
||||
that may not directly discuss HTML Purifier.</p>
|
||||
|
||||
<dl>
|
||||
<dt><a href="ref-devnetwork.html">DevNetwork Credits</a></dt>
|
||||
<dd>Credits and links to DevNetwork forum topics.</dd>
|
||||
</dl>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
</body>
|
||||
</html>
|
56
docs/naming.txt
Normal file
56
docs/naming.txt
Normal file
@@ -0,0 +1,56 @@
|
||||
|
||||
Naming
|
||||
|
||||
The classes in this library follow a few naming conventions, which may
|
||||
help you find the correct functionality more quickly. Here they are:
|
||||
|
||||
All classes occupy the HTMLPurifier pseudo-namespace.
|
||||
This means that all classes are prefixed with HTMLPurifier_. As such, all
|
||||
names under HTMLPurifier_ are reserved. I recommend that you use the name
|
||||
HTMLPurifierX_YourName_ClassName, especially if you want to take advantage
|
||||
of HTMLPurifier_ConfigDef.
|
||||
|
||||
All classes correspond to their path if library/ was in the include path
|
||||
HTMLPurifier_AttrDef is located at HTMLPurifier/AttrDef.php; replace
|
||||
underscores with slashes and append .php and you'll have the location of
|
||||
the class.
|
||||
|
||||
Harness and Test are reserved class names for unit tests
|
||||
The suffix "Test" indicates that the class is a subclass of UnitTestCase
|
||||
(of the Simpletest library) and is testable. "Harness" indicates a subclass
|
||||
of UnitTestCase that is not meant to be run but to be extended into
|
||||
concrete test cases and contains custom test methods (i.e. assert*())
|
||||
|
||||
Class names do not necessarily represent inheritance hierarchies
|
||||
While we try to reflect inheritance in naming to some extent, it is not
|
||||
guaranteed (for instance, none of the classes inherit from HTMLPurifier,
|
||||
the base class). However, all class files have the require_once
|
||||
declarations to whichever classes they are tightly coupled to.
|
||||
|
||||
Strategy has a meaning different from the Gang of Four pattern
|
||||
In Design Patterns, the Gang of Four describes a Strategy object as
|
||||
encapsulating an algorithm so that they can be switched at run-time. While
|
||||
our strategies are indeed algorithms, they are not meant to be substituted:
|
||||
all must be present in order for proper functioning.
|
||||
|
||||
Abbreviations are avoided
|
||||
We try to avoid abbreviations as much as possible, but in some cases,
|
||||
abbreviated version is more readable than the full version. Here, we
|
||||
list common abbreviations:
|
||||
Attr(s) -> Attribute(s)
|
||||
Def -> Definition
|
||||
|
||||
Ambiguity concerning the definition of Def/Definition
|
||||
While a definition normally defines the structure/acceptable values of
|
||||
an entity, most of the definitions in this application also attempt
|
||||
to validate and fix the value. I am unsure of a better name, as
|
||||
"Validator" would exclude fixing the value, "Fixer" doesn't invoke
|
||||
the proper image of "fixing" something, and "ValidatorFixer" is too long!
|
||||
Some other suggestions were "Handler", "Reference", "Check", "Fix",
|
||||
"Repair" and "Heal".
|
||||
|
||||
Transform not Transformer
|
||||
Transform is both a noun and a verb, and thus we define a "Transform" as
|
||||
something that "transforms," leaving "Transformer" (which sounds like an
|
||||
electrical device/robot toy).
|
||||
|
12
docs/optimization.txt
Normal file
12
docs/optimization.txt
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
Optimization
|
||||
|
||||
Here are some possible optimization techniques we can apply to code sections if
|
||||
they turn out to be slow. Be sure not to prematurely optimize: if you get
|
||||
that itch, put it here!
|
||||
|
||||
- Make Tokens Flyweights (may prove problematic, probably not worth it)
|
||||
- Rewrite regexps into PHP code
|
||||
- Serialize the Definition object
|
||||
- Batch regexp validation (do as many per function call as possible)
|
||||
- Parallelize strategies
|
@@ -1,300 +1,292 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Tables detailing HTML element and CSS property implementation coverage in HTML Purifier." />
|
||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||
|
||||
<title>Implementation Progress - HTML Purifier</title>
|
||||
|
||||
<style type="text/css">
|
||||
|
||||
td {padding-right:1em;border-bottom:1px solid #000;padding-left:0.5em;}
|
||||
th {text-align:left;padding-top:1.4em;font-size:13pt;
|
||||
border-bottom:2px solid #000;background:#FFF;}
|
||||
thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
||||
|
||||
.impl-yes {background:#9D9;}
|
||||
.impl-partial {background:#FFA;}
|
||||
.impl-no {background:#CCC;}
|
||||
|
||||
.danger {color:#600;}
|
||||
.css1 {color:#060;}
|
||||
.required {font-weight:bold;}
|
||||
.feature {color:#999;}
|
||||
|
||||
</style>
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1>Implementation Progress</h1>
|
||||
|
||||
<div id="filing">Filed under Development</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
|
||||
<h2>Key</h2>
|
||||
|
||||
<table cellspacing="0"><tbody>
|
||||
<tr><td class="impl-yes">Implemented</td></tr>
|
||||
<tr><td class="impl-partial">Partially implemented</td></tr>
|
||||
<tr><td class="impl-no">Will not implement</td></tr>
|
||||
<tr><td class="danger">Dangerous attribute/property</td></tr>
|
||||
<tr><td class="css1">Present in CSS1</td></tr>
|
||||
<tr><td class="feature">Feature, requires extra work</td></tr>
|
||||
</tbody></table>
|
||||
|
||||
<h2>CSS</h2>
|
||||
|
||||
<table cellspacing="0">
|
||||
|
||||
<thead>
|
||||
<tr><th>Name</th><th>Notes</th></tr>
|
||||
</thead>
|
||||
|
||||
<!--
|
||||
<tr><td>-</td><td>-</td></tr>
|
||||
-->
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="2">Standard</th></tr>
|
||||
<tr class="css1 impl-yes"><td>background-color</td><td>COMPOSITE(<color>, transparent)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>background</td><td>SHORTHAND, only for color, see below for info on background-image and friends</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border</td><td>SHORTHAND, MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border-color</td><td>MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border-style</td><td>MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border-width</td><td>MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border-*</td><td>SHORTHAND</td></tr>
|
||||
<tr class="impl-yes"><td>border-*-color</td><td>COMPOSITE(<color>, transparent)</td></tr>
|
||||
<tr class="impl-yes"><td>border-*-style</td><td>ENUM(none, hidden, dotted, dashed,
|
||||
solid, double, groove, ridge, inset, outset)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border-*-width</td><td>COMPOSITE(<length>, thin, medium, thick)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>clear</td><td>ENUM(none, left, right, both)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>color</td><td><color></td></tr>
|
||||
<tr class="css1 impl-yes"><td>float</td><td>ENUM(left, right, none), May require layout
|
||||
precautions with clear</td></tr>
|
||||
<tr class="css1 impl-yes"><td>font</td><td>SHORTHAND</td></tr>
|
||||
<tr class="css1 impl-yes"><td>font-family</td><td>CSS validator may complain if fallback font
|
||||
family not specified</td></tr>
|
||||
<tr class="css1 impl-yes"><td>font-size</td><td>COMPOSITE(<absolute-size>,
|
||||
<relative-size>, <length>, <percentage>)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>font-style</td><td>ENUM(normal, italic, oblique)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>font-variant</td><td>ENUM(normal, small-caps)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>font-weight</td><td>ENUM(normal, bold, bolder, lighter,
|
||||
100, 200, 300, 400, 500, 600, 700, 800, 900), maybe special code for
|
||||
in-between integers</td></tr>
|
||||
<tr class="css1 impl-yes"><td>letter-spacing</td><td>COMPOSITE(<length>, normal)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>line-height</td><td>COMPOSITE(<number>,
|
||||
<length>, <percentage>, normal)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>list-style-position</td><td>ENUM(inside, outside),
|
||||
Strange behavior in browsers</td></tr>
|
||||
<tr class="css1 impl-yes"><td>list-style-type</td><td>ENUM(...),
|
||||
Well-supported values are: disc, circle, square,
|
||||
decimal, lower-roman, upper-roman, lower-alpha and upper-alpha. See also
|
||||
CSS 3. Mostly IE lack of support.</td></tr>
|
||||
<tr class="css1 impl-yes"><td>list-style</td><td>SHORTHAND</td></tr>
|
||||
<tr class="css1 impl-yes"><td>margin</td><td>MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>margin-*</td><td>COMPOSITE(<length>,
|
||||
<percentage>, auto)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>padding</td><td>MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>padding-*</td><td>COMPOSITE(<length>(positive),
|
||||
<percentage>(positive))</td></tr>
|
||||
<tr class="css1 impl-yes"><td>text-align</td><td>ENUM(left, right,
|
||||
center, justify)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>text-decoration</td><td>No blink (argh my eyes), not
|
||||
enum, can be combined (composite sorta): underline, overline,
|
||||
line-through</td></tr>
|
||||
<tr class="css1 impl-yes"><td>text-indent</td><td>COMPOSITE(<length>,
|
||||
<percentage>)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>text-transform</td><td>ENUM(capitalize, uppercase,
|
||||
lowercase, none)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>width</td><td>COMPOSITE(<length>,
|
||||
<percentage>, auto), Interesting</td></tr>
|
||||
<tr class="css1 impl-yes"><td>word-spacing</td><td>COMPOSITE(<length>, auto),
|
||||
IE 5 no support</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="2">Table</th></tr>
|
||||
<tr class="impl-yes"><td>border-collapse</td><td>ENUM(collapse, seperate)</td></tr>
|
||||
<tr class="impl-yes"><td>caption-side</td><td>ENUM(top, bottom)</td></tr>
|
||||
<tr class="feature"><td>empty-cells</td><td>ENUM(show, hide), No IE support makes this useless,
|
||||
possible fix with &nbsp;? Unknown release milestone.</td></tr>
|
||||
<tr class="impl-yes"><td>table-layout</td><td>ENUM(auto, fixed)</td></tr>
|
||||
<tr class="impl-yes css1"><td>vertical-align</td><td>COMPOSITE(ENUM(baseline, sub,
|
||||
super, top, text-top, middle, bottom, text-bottom), <percentage>,
|
||||
<length>) Also applies to others with explicit height</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="2">Absolute positioning, unknown release milestone</th></tr>
|
||||
<tr class="danger"><td>bottom</td><td rowspan="4">Dangerous, must be non-negative</td></tr>
|
||||
<tr class="danger"><td>left</td></tr>
|
||||
<tr class="danger"><td>right</td></tr>
|
||||
<tr class="danger"><td>top</td></tr>
|
||||
<tr><td>clip</td><td>-</td></tr>
|
||||
<tr class="danger"><td>position</td><td>ENUM(static, relative, absolute, fixed), permit
|
||||
relative not absolute?</td></tr>
|
||||
<tr class="danger"><td>z-index</td><td>Dangerous</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="2">Unknown</th></tr>
|
||||
<tr class="danger css1"><td>background-image</td><td>Dangerous, target milestone 1.2</td></tr>
|
||||
<tr class="css1"><td>background-attachment</td><td>ENUM(scroll, fixed),
|
||||
Depends on background-image</td></tr>
|
||||
<tr class="css1"><td>background-position</td><td>Depends on background-image</td></tr>
|
||||
<tr class="danger impl-no"><td>cursor</td><td>Dangerous but fluffy</td></tr>
|
||||
<tr class="danger css1"><td>display</td><td>ENUM(...), Dangerous but interesting;
|
||||
will not implement list-item, run-in (Opera only) or table (no IE);
|
||||
inline-block has incomplete IE6 support and requires -moz-inline-box
|
||||
for Mozilla. Unknown target milestone.</td></tr>
|
||||
<tr><td class="css1">height</td><td>Interesting, why use it? Unknown target milestone.</td></tr>
|
||||
<tr class="danger css1"><td>list-style-image</td><td>Dangerous? Target milestone 1.2</td></tr>
|
||||
<tr class="impl-no"><td>max-height</td><td rowspan="4">No IE 5/6</td></tr>
|
||||
<tr class="impl-no"><td>min-height</td></tr>
|
||||
<tr class="impl-no"><td>max-width</td></tr>
|
||||
<tr class="impl-no"><td>min-width</td></tr>
|
||||
<tr class="impl-no"><td>orphans</td><td>No IE support</td></tr>
|
||||
<tr class="impl-no"><td>widows</td><td>No IE support</td></tr>
|
||||
<tr><td>overflow</td><td>ENUM, IE 5/6 almost (remove visible if set). Unknown target milestone.</td></tr>
|
||||
<tr><td>page-break-after</td><td>ENUM(auto, always, avoid, left, right),
|
||||
IE 5.5/6 and Opera. Unknown target milestone.</td></tr>
|
||||
<tr><td>page-break-before</td><td>ENUM(auto, always, avoid, left, right),
|
||||
Mostly supported. Unknown target milestone.</td></tr>
|
||||
<tr><td>page-break-inside</td><td>ENUM(avoid, auto), Opera only. Unknown target milestone.</td></tr>
|
||||
<tr class="impl-no"><td>quotes</td><td>May be dropped from CSS2, fairly useless for inline context</td></tr>
|
||||
<tr class="impl-no"><td>visibility</td><td>ENUM(visible, hidden, collapse),
|
||||
Dangerous</td></tr>
|
||||
<tr class="css1 feature"><td>white-space</td><td>ENUM(normal, pre, nowrap, pre-wrap,
|
||||
pre-line), Spotty implementation:
|
||||
pre (no IE 5/6), nowrap (no IE 5),
|
||||
pre-wrap (only Opera), pre-line (no support). Fixable? Unknown target milestone.</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody class="impl-no">
|
||||
<tr><th colspan="2">Aural</th></tr>
|
||||
<tr><td>azimuth</td><td>-</td></tr>
|
||||
<tr><td>cue</td><td>-</td></tr>
|
||||
<tr><td>cue-after</td><td>-</td></tr>
|
||||
<tr><td>cue-before</td><td>-</td></tr>
|
||||
<tr><td>elevation</td><td>-</td></tr>
|
||||
<tr><td>pause-after</td><td>-</td></tr>
|
||||
<tr><td>pause-before</td><td>-</td></tr>
|
||||
<tr><td>pause</td><td>-</td></tr>
|
||||
<tr><td>pitch-range</td><td>-</td></tr>
|
||||
<tr><td>pitch</td><td>-</td></tr>
|
||||
<tr><td>play-during</td><td>-</td></tr>
|
||||
<tr><td>richness</td><td>-</td></tr>
|
||||
<tr><td>speak-header</td><td>Table related</td></tr>
|
||||
<tr><td>speak-numeral</td><td>-</td></tr>
|
||||
<tr><td>speak-punctuation</td><td>-</td></tr>
|
||||
<tr><td>speak</td><td>-</td></tr>
|
||||
<tr><td>speech-rate</td><td>-</td></tr>
|
||||
<tr><td>stress</td><td>-</td></tr>
|
||||
<tr><td>voice-family</td><td>-</td></tr>
|
||||
<tr><td>volume</td><td>-</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody class="impl-no">
|
||||
<tr><th colspan="2">Will not implement</th></tr>
|
||||
<tr><td>content</td><td>Not applicable for inline styles</td></tr>
|
||||
<tr><td>counter-increment</td><td>Needs content, Opera only</td></tr>
|
||||
<tr><td>counter-reset</td><td>Needs content, Opera only</td></tr>
|
||||
<tr><td>direction</td><td>No support</td></tr>
|
||||
<tr><td>outline-color</td><td rowspan="4">IE Mac and Opera on outside,
|
||||
Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
||||
<tr><td>outline-style</td></tr>
|
||||
<tr><td>outline-width</td></tr>
|
||||
<tr><td>outline</td></tr>
|
||||
<tr><td>unicode-bidi</td><td>No support</td></tr>
|
||||
</tbody>
|
||||
|
||||
</table>
|
||||
|
||||
<h2>Interesting Attributes</h2>
|
||||
|
||||
<table cellspacing="0">
|
||||
|
||||
<thead>
|
||||
<tr><th>Attribute</th><th>Tags</th><th>Notes</th></tr>
|
||||
</thead>
|
||||
|
||||
<!--
|
||||
<tr><th></th></tr>
|
||||
<tbody>
|
||||
<tr><td>-</td><td>-</td><td>-</td></tr>
|
||||
</tbody>
|
||||
-->
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="3">CSS</th></tr>
|
||||
<tr class="impl-yes"><td>style</td><td>All</td><td>Not all properties may be implemented, parser is good though.</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="3">Questionable</th></tr>
|
||||
<tr class="impl-no"><td>accesskey</td><td>A</td><td>May interfere with main interface</td></tr>
|
||||
<tr class="impl-no"><td>tabindex</td><td>A</td><td>May interfere with main interface</td></tr>
|
||||
<tr><td>target</td><td>A</td><td>Config enabled, only useful for frame layouts</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="3">Miscellaneous</th></tr>
|
||||
<tr><td>datetime</td><td>DEL, INS</td><td>No visible effect, ISO format</td></tr>
|
||||
<tr><td>rel</td><td>A</td><td>Largely user-defined: nofollow, tag (see microformats)</td></tr>
|
||||
<tr><td>rev</td><td>A</td><td>Largely user-defined: vote-*</td></tr>
|
||||
<tr class="feature"><td>axis</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||
<tr class="feature"><td>char</td><td>COL, COLGROUP, TBODY, TD, TFOOT, TH, THEAD, TR</td><td>W3C only: No browser implementation</td></tr>
|
||||
<tr class="feature"><td>headers</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||
<tr class="feature"><td>scope</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody class="impl-yes">
|
||||
<tr><th colspan="3">URI</th></tr>
|
||||
<tr><td rowspan="2">cite</td><td>BLOCKQUOTE, Q</td><td>For attribution</td></tr>
|
||||
<tr><td>DEL, INS</td><td>Link to explanation why it changed</td></tr>
|
||||
<tr><td>href</td><td>A</td><td>-</td></tr>
|
||||
<tr><td>longdesc</td><td>IMG</td><td>-</td></tr>
|
||||
<tr class="required"><td>src</td><td>IMG</td><td>Required</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="3">Transform, target milestone 1.4</th></tr>
|
||||
<tr><td rowspan="5">align</td><td>CAPTION</td><td>Near-equiv style 'caption-side', drop left and right</td></tr>
|
||||
<tr><td>IMG</td><td rowspan="2">Margin-left and margin-right = auto or parent div</td></tr>
|
||||
<tr><td>TABLE</td></tr>
|
||||
<tr><td>HR</td><td>Equivalent style 'text-align' (IE tested)</td></tr>
|
||||
<tr class="impl-yes"><td>H1, H2, H3, H4, H5, H6, P</td><td>Equivalent style 'text-align'</td></tr>
|
||||
<tr class="required impl-yes"><td>alt</td><td>IMG</td><td>Required, insert image filename if src is present or default invalid image text</td></tr>
|
||||
<tr><td rowspan="3">bgcolor</td><td>TABLE</td><td>Equivalent style 'background-color' (IE tested)</td></tr>
|
||||
<tr><td>TR</td><td>Equivalent style 'background-color' (IE tested)</td></tr>
|
||||
<tr><td>TD, TH</td><td>Equivalent style 'background-color'</td></tr>
|
||||
<tr><td>border</td><td>IMG</td><td>Equivalent style 'border-width', only applies when link present</td></tr>
|
||||
<tr><td>clear</td><td>BR</td><td>Near-equiv style 'clear', transform 'all' into 'both'</td></tr>
|
||||
<tr class="impl-no"><td>compact</td><td>DL, OL, UL</td><td>Boolean, needs custom CSS class; rarely used anyway</td></tr>
|
||||
<tr class="required impl-yes"><td>dir</td><td>BDO</td><td>Required, insert ltr (or configuration value) if none</td></tr>
|
||||
<tr><td>height</td><td>TD, TH</td><td>Near-equiv style 'height', needs px suffix if original was in pixels</td></tr>
|
||||
<tr><td>hspace</td><td>IMG</td><td>Near-equiv styles 'margin-top' and 'margin-bottom', needs px suffix</td></tr>
|
||||
<tr class="impl-yes"><td>lang</td><td>*</td><td>Copy value to xml:lang</td></tr>
|
||||
<tr><td rowspan="2">name</td><td>IMG</td><td>Turn into ID</td></tr>
|
||||
<tr><td>A</td><td>Turn into ID? (not deprecated, though in which specs?)</td></tr>
|
||||
<tr><td>noshade</td><td>HR</td><td>Boolean, style 'border-style:solid;'</td></tr>
|
||||
<tr><td>nowrap</td><td>TD, TH</td><td>Boolean, style 'white-space:nowrap;' (not compat with IE5)</td></tr>
|
||||
<tr><td>size</td><td>HR</td><td>Near-equiv 'width', needs px suffix if original was pixels</td></tr>
|
||||
<tr class="required impl-yes"><td>src</td><td>IMG</td><td>Required, insert blank or default img if not set</td></tr>
|
||||
<tr><td>start</td><td>OL</td><td>Poorly supported 'counter-reset', transform may not be desirable</td></tr>
|
||||
<tr><td rowspan="3">type</td><td>LI</td><td rowspan="3">Equivalent style 'list-style-type', different allowed values though. (needs testing)</td></tr>
|
||||
<tr><td>OL</td></tr>
|
||||
<tr><td>UL</td></tr>
|
||||
<tr><td>value</td><td>LI</td><td>Poorly supported 'counter-reset', transform may not be desirable, see ol.start. Configurable.</td></tr>
|
||||
<tr><td>vspace</td><td>IMG</td><td>Near-equiv styles 'margin-left' and 'margin-right', needs px suffix, see hspace</td></tr>
|
||||
<tr><td rowspan="2">width</td><td>HR</td><td rowspan="2">Near-equiv style 'width', needs px suffix if original was pixels</td></tr>
|
||||
<tr><td>TD, TH</td></tr>
|
||||
</tbody>
|
||||
|
||||
</table>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
|
||||
<title>HTMLPurifier Progress</title>
|
||||
|
||||
<style type="text/css">
|
||||
|
||||
td {padding-right:1em;border-bottom:1px solid #000;padding-left:0.5em;}
|
||||
th {text-align:left;padding-top:1.4em;font-size:13pt;
|
||||
border-bottom:2px solid #000;background:#FFF;}
|
||||
thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
||||
|
||||
.impl-yes {background:#9D9;}
|
||||
.impl-partial {background:#FFA;}
|
||||
.impl-no {background:#CCC;}
|
||||
|
||||
.danger {color:#600;}
|
||||
.css1 {color:#060;}
|
||||
.required {font-weight:bold;}
|
||||
.feature {color:#999;}
|
||||
|
||||
</style>
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1>HTMLPurifier Progress</h1>
|
||||
|
||||
<h2>Key</h2>
|
||||
|
||||
<table cellspacing="0"><tbody>
|
||||
<tr><td class="impl-yes">Implemented</td></tr>
|
||||
<tr><td class="impl-partial">Partially implemented</td></tr>
|
||||
<tr><td class="impl-no">Will not implement</td></tr>
|
||||
<tr><td class="danger">Dangerous attribute/property</td></tr>
|
||||
<tr><td class="css1">Present in CSS1</td></tr>
|
||||
<tr><td class="feature">Feature, requires extra work</td></tr>
|
||||
</tbody></table>
|
||||
|
||||
<h3>CSS</h3>
|
||||
|
||||
<table cellspacing="0">
|
||||
|
||||
<thead>
|
||||
<tr><th>Name</th><th>Notes</th></tr>
|
||||
</thead>
|
||||
|
||||
<!--
|
||||
<tr><td>-</td><td>-</td></tr>
|
||||
-->
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="2">Standard</th></tr>
|
||||
<tr class="css1 impl-yes"><td>background-color</td><td>COMPOSITE(<color>, transparent)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>background</td><td>SHORTHAND, only for color, see below for info on background-image and friends</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border</td><td>SHORTHAND, MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border-color</td><td>MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border-style</td><td>MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border-width</td><td>MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border-*</td><td>SHORTHAND</td></tr>
|
||||
<tr class="impl-yes"><td>border-*-color</td><td>COMPOSITE(<color>, transparent)</td></tr>
|
||||
<tr class="impl-yes"><td>border-*-style</td><td>ENUM(none, hidden, dotted, dashed,
|
||||
solid, double, groove, ridge, inset, outset)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border-*-width</td><td>COMPOSITE(<length>, thin, medium, thick)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>clear</td><td>ENUM(none, left, right, both)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>color</td><td><color></td></tr>
|
||||
<tr class="css1 impl-yes"><td>float</td><td>ENUM(left, right, none), May require layout
|
||||
precautions with clear</td></tr>
|
||||
<tr class="css1 impl-yes"><td>font</td><td>SHORTHAND</td></tr>
|
||||
<tr class="css1 impl-yes"><td>font-family</td><td>CSS validator may complain if fallback font
|
||||
family not specified</td></tr>
|
||||
<tr class="css1 impl-yes"><td>font-size</td><td>COMPOSITE(<absolute-size>,
|
||||
<relative-size>, <length>, <percentage>)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>font-style</td><td>ENUM(normal, italic, oblique)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>font-variant</td><td>ENUM(normal, small-caps)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>font-weight</td><td>ENUM(normal, bold, bolder, lighter,
|
||||
100, 200, 300, 400, 500, 600, 700, 800, 900), maybe special code for
|
||||
in-between integers</td></tr>
|
||||
<tr class="css1 impl-yes"><td>letter-spacing</td><td>COMPOSITE(<length>, normal)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>line-height</td><td>COMPOSITE(<number>,
|
||||
<length>, <percentage>, normal)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>list-style-position</td><td>ENUM(inside, outside),
|
||||
Strange behavior in browsers</td></tr>
|
||||
<tr class="css1 impl-yes"><td>list-style-type</td><td>ENUM(...),
|
||||
Well-supported values are: disc, circle, square,
|
||||
decimal, lower-roman, upper-roman, lower-alpha and upper-alpha. See also
|
||||
CSS 3. Mostly IE lack of support.</td></tr>
|
||||
<tr class="css1 impl-yes"><td>list-style</td><td>SHORTHAND</td></tr>
|
||||
<tr class="css1 impl-yes"><td>margin</td><td>MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>margin-*</td><td>COMPOSITE(<length>,
|
||||
<percentage>, auto)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>padding</td><td>MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>padding-*</td><td>COMPOSITE(<length>(positive),
|
||||
<percentage>(positive))</td></tr>
|
||||
<tr class="css1 impl-yes"><td>text-align</td><td>ENUM(left, right,
|
||||
center, justify)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>text-decoration</td><td>No blink (argh my eyes), not
|
||||
enum, can be combined (composite sorta): underline, overline,
|
||||
line-through</td></tr>
|
||||
<tr class="css1 impl-yes"><td>text-indent</td><td>COMPOSITE(<length>,
|
||||
<percentage>)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>text-transform</td><td>ENUM(capitalize, uppercase,
|
||||
lowercase, none)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>width</td><td>COMPOSITE(<length>,
|
||||
<percentage>, auto), Interesting</td></tr>
|
||||
<tr class="css1 impl-yes"><td>word-spacing</td><td>COMPOSITE(<length>, auto),
|
||||
IE 5 no support</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="2">Table</th></tr>
|
||||
<tr class="impl-yes"><td>border-collapse</td><td>ENUM(collapse, seperate)</td></tr>
|
||||
<tr class="impl-yes"><td>caption-side</td><td>ENUM(top, bottom)</td></tr>
|
||||
<tr class="feature"><td>empty-cells</td><td>ENUM(show, hide), No IE support makes this useless,
|
||||
possible fix with &nbsp;? Unknown release milestone.</td></tr>
|
||||
<tr class="impl-yes"><td>table-layout</td><td>ENUM(auto, fixed)</td></tr>
|
||||
<tr class="impl-yes css1"><td>vertical-align</td><td>COMPOSITE(ENUM(baseline, sub,
|
||||
super, top, text-top, middle, bottom, text-bottom), <percentage>,
|
||||
<length>) Also applies to others with explicit height</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="2">Absolute positioning, unknown release milestone</th></tr>
|
||||
<tr class="danger"><td>bottom</td><td rowspan="4">Dangerous, must be non-negative</td></tr>
|
||||
<tr class="danger"><td>left</td></tr>
|
||||
<tr class="danger"><td>right</td></tr>
|
||||
<tr class="danger"><td>top</td></tr>
|
||||
<tr><td>clip</td><td>-</td></tr>
|
||||
<tr class="danger"><td>position</td><td>ENUM(static, relative, absolute, fixed), permit
|
||||
relative not absolute?</td></tr>
|
||||
<tr class="danger"><td>z-index</td><td>Dangerous</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="2">Unknown</th></tr>
|
||||
<tr class="danger css1"><td>background-image</td><td>Dangerous, target milestone 1.2</td></tr>
|
||||
<tr class="css1"><td>background-attachment</td><td>ENUM(scroll, fixed),
|
||||
Depends on background-image</td></tr>
|
||||
<tr class="css1"><td>background-position</td><td>Depends on background-image</td></tr>
|
||||
<tr class="danger impl-no"><td>cursor</td><td>Dangerous but fluffy</td></tr>
|
||||
<tr class="danger css1"><td>display</td><td>ENUM(...), Dangerous but interesting;
|
||||
will not implement list-item, run-in (Opera only) or table (no IE);
|
||||
inline-block has incomplete IE6 support and requires -moz-inline-box
|
||||
for Mozilla. Unknown target milestone.</td></tr>
|
||||
<tr><td class="css1">height</td><td>Interesting, why use it? Unknown target milestone.</td></tr>
|
||||
<tr class="danger css1"><td>list-style-image</td><td>Dangerous? Target milestone 1.2</td></tr>
|
||||
<tr class="impl-no"><td>max-height</td><td rowspan="4">No IE 5/6</td></tr>
|
||||
<tr class="impl-no"><td>min-height</td></tr>
|
||||
<tr class="impl-no"><td>max-width</td></tr>
|
||||
<tr class="impl-no"><td>min-width</td></tr>
|
||||
<tr class="impl-no"><td>orphans</td><td>No IE support</td></tr>
|
||||
<tr class="impl-no"><td>widows</td><td>No IE support</td></tr>
|
||||
<tr><td>overflow</td><td>ENUM, IE 5/6 almost (remove visible if set). Unknown target milestone.</td></tr>
|
||||
<tr><td>page-break-after</td><td>ENUM(auto, always, avoid, left, right),
|
||||
IE 5.5/6 and Opera. Unknown target milestone.</td></tr>
|
||||
<tr><td>page-break-before</td><td>ENUM(auto, always, avoid, left, right),
|
||||
Mostly supported. Unknown target milestone.</td></tr>
|
||||
<tr><td>page-break-inside</td><td>ENUM(avoid, auto), Opera only. Unknown target milestone.</td></tr>
|
||||
<tr class="impl-no"><td>quotes</td><td>May be dropped from CSS2, fairly useless for inline context</td></tr>
|
||||
<tr class="impl-no"><td>visibility</td><td>ENUM(visible, hidden, collapse),
|
||||
Dangerous</td></tr>
|
||||
<tr class="css1 feature"><td>white-space</td><td>ENUM(normal, pre, nowrap, pre-wrap,
|
||||
pre-line), Spotty implementation:
|
||||
pre (no IE 5/6), nowrap (no IE 5),
|
||||
pre-wrap (only Opera), pre-line (no support). Fixable? Unknown target milestone.</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody class="impl-no">
|
||||
<tr><th colspan="2">Aural</th></tr>
|
||||
<tr><td>azimuth</td><td>-</td></tr>
|
||||
<tr><td>cue</td><td>-</td></tr>
|
||||
<tr><td>cue-after</td><td>-</td></tr>
|
||||
<tr><td>cue-before</td><td>-</td></tr>
|
||||
<tr><td>elevation</td><td>-</td></tr>
|
||||
<tr><td>pause-after</td><td>-</td></tr>
|
||||
<tr><td>pause-before</td><td>-</td></tr>
|
||||
<tr><td>pause</td><td>-</td></tr>
|
||||
<tr><td>pitch-range</td><td>-</td></tr>
|
||||
<tr><td>pitch</td><td>-</td></tr>
|
||||
<tr><td>play-during</td><td>-</td></tr>
|
||||
<tr><td>richness</td><td>-</td></tr>
|
||||
<tr><td>speak-header</td><td>Table related</td></tr>
|
||||
<tr><td>speak-numeral</td><td>-</td></tr>
|
||||
<tr><td>speak-punctuation</td><td>-</td></tr>
|
||||
<tr><td>speak</td><td>-</td></tr>
|
||||
<tr><td>speech-rate</td><td>-</td></tr>
|
||||
<tr><td>stress</td><td>-</td></tr>
|
||||
<tr><td>voice-family</td><td>-</td></tr>
|
||||
<tr><td>volume</td><td>-</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody class="impl-no">
|
||||
<tr><th colspan="2">Will not implement</th></tr>
|
||||
<tr><td>content</td><td>Not applicable for inline styles</td></tr>
|
||||
<tr><td>counter-increment</td><td>Needs content, Opera only</td></tr>
|
||||
<tr><td>counter-reset</td><td>Needs content, Opera only</td></tr>
|
||||
<tr><td>direction</td><td>No support</td></tr>
|
||||
<tr><td>outline-color</td><td rowspan="4">IE Mac and Opera on outside,
|
||||
Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
||||
<tr><td>outline-style</td></tr>
|
||||
<tr><td>outline-width</td></tr>
|
||||
<tr><td>outline</td></tr>
|
||||
<tr><td>unicode-bidi</td><td>No support</td></tr>
|
||||
</tbody>
|
||||
|
||||
</table>
|
||||
|
||||
<h2>Interesting Attributes</h2>
|
||||
|
||||
<table cellspacing="0">
|
||||
|
||||
<thead>
|
||||
<tr><th>Attribute</th><th>Tags</th><th>Notes</th></tr>
|
||||
</thead>
|
||||
|
||||
<!--
|
||||
<tr><th></th></tr>
|
||||
<tbody>
|
||||
<tr><td>-</td><td>-</td><td>-</td></tr>
|
||||
</tbody>
|
||||
-->
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="3">CSS</th></tr>
|
||||
<tr class="impl-yes"><td>style</td><td>All</td><td>Not all properties may be implemented, parser is good though.</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="3">Questionable</th></tr>
|
||||
<tr class="impl-no"><td>accesskey</td><td>A</td><td>May interfere with main interface</td></tr>
|
||||
<tr class="impl-no"><td>tabindex</td><td>A</td><td>May interfere with main interface</td></tr>
|
||||
<tr><td>target</td><td>A</td><td>Config enabled, only useful for frame layouts</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="3">Miscellaneous</th></tr>
|
||||
<tr><td>datetime</td><td>DEL, INS</td><td>No visible effect, ISO format</td></tr>
|
||||
<tr><td>rel</td><td>A</td><td>Largely user-defined: nofollow, tag (see microformats)</td></tr>
|
||||
<tr><td>rev</td><td>A</td><td>Largely user-defined: vote-*</td></tr>
|
||||
<tr class="feature"><td>axis</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||
<tr class="feature"><td>char</td><td>COL, COLGROUP, TBODY, TD, TFOOT, TH, THEAD, TR</td><td>W3C only: No browser implementation</td></tr>
|
||||
<tr class="feature"><td>headers</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||
<tr class="feature"><td>scope</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody class="impl-yes">
|
||||
<tr><th colspan="3">URI</th></tr>
|
||||
<tr><td rowspan="2">cite</td><td>BLOCKQUOTE, Q</td><td>For attribution</td></tr>
|
||||
<tr><td>DEL, INS</td><td>Link to explanation why it changed</td></tr>
|
||||
<tr><td>href</td><td>A</td><td>-</td></tr>
|
||||
<tr><td>longdesc</td><td>IMG</td><td>-</td></tr>
|
||||
<tr class="required"><td>src</td><td>IMG</td><td>Required</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="3">Transform, target milestone 1.4</th></tr>
|
||||
<tr><td rowspan="5">align</td><td>CAPTION</td><td>Near-equiv style 'caption-side', drop left and right</td></tr>
|
||||
<tr><td>IMG</td><td rowspan="2">Margin-left and margin-right = auto or parent div</td></tr>
|
||||
<tr><td>TABLE</td></tr>
|
||||
<tr><td>HR</td><td>Equivalent style 'text-align' (IE tested)</td></tr>
|
||||
<tr class="impl-yes"><td>H1, H2, H3, H4, H5, H6, P</td><td>Equivalent style 'text-align'</td></tr>
|
||||
<tr class="required impl-yes"><td>alt</td><td>IMG</td><td>Required, insert image filename if src is present or default invalid image text</td></tr>
|
||||
<tr><td rowspan="3">bgcolor</td><td>TABLE</td><td>Equivalent style 'background-color' (IE tested)</td></tr>
|
||||
<tr><td>TR</td><td>Equivalent style 'background-color' (IE tested)</td></tr>
|
||||
<tr><td>TD, TH</td><td>Equivalent style 'background-color'</td></tr>
|
||||
<tr><td>border</td><td>IMG</td><td>Equivalent style 'border-width', only applies when link present</td></tr>
|
||||
<tr><td>clear</td><td>BR</td><td>Near-equiv style 'clear', transform 'all' into 'both'</td></tr>
|
||||
<tr class="impl-no"><td>compact</td><td>DL, OL, UL</td><td>Boolean, needs custom CSS class; rarely used anyway</td></tr>
|
||||
<tr class="required impl-yes"><td>dir</td><td>BDO</td><td>Required, insert ltr (or configuration value) if none</td></tr>
|
||||
<tr><td>height</td><td>TD, TH</td><td>Near-equiv style 'height', needs px suffix if original was in pixels</td></tr>
|
||||
<tr><td>hspace</td><td>IMG</td><td>Near-equiv styles 'margin-top' and 'margin-bottom', needs px suffix</td></tr>
|
||||
<tr class="impl-yes"><td>lang</td><td>*</td><td>Copy value to xml:lang</td></tr>
|
||||
<tr><td rowspan="2">name</td><td>IMG</td><td>Turn into ID</td></tr>
|
||||
<tr><td>A</td><td>Turn into ID? (not deprecated, though in which specs?)</td></tr>
|
||||
<tr><td>noshade</td><td>HR</td><td>Boolean, style 'border-style:solid;'</td></tr>
|
||||
<tr><td>nowrap</td><td>TD, TH</td><td>Boolean, style 'white-space:nowrap;' (not compat with IE5)</td></tr>
|
||||
<tr><td>size</td><td>HR</td><td>Near-equiv 'width', needs px suffix if original was pixels</td></tr>
|
||||
<tr class="required impl-yes"><td>src</td><td>IMG</td><td>Required, insert blank or default img if not set</td></tr>
|
||||
<tr><td>start</td><td>OL</td><td>Poorly supported 'counter-reset', transform may not be desirable</td></tr>
|
||||
<tr><td rowspan="3">type</td><td>LI</td><td rowspan="3">Equivalent style 'list-style-type', different allowed values though. (needs testing)</td></tr>
|
||||
<tr><td>OL</td></tr>
|
||||
<tr><td>UL</td></tr>
|
||||
<tr><td>value</td><td>LI</td><td>Poorly supported 'counter-reset', transform may not be desirable, see ol.start. Configurable.</td></tr>
|
||||
<tr><td>vspace</td><td>IMG</td><td>Near-equiv styles 'margin-left' and 'margin-right', needs px suffix, see hspace</td></tr>
|
||||
<tr><td rowspan="2">width</td><td>HR</td><td rowspan="2">Near-equiv style 'width', needs px suffix if original was pixels</td></tr>
|
||||
<tr><td>TD, TH</td></tr>
|
||||
</tbody>
|
||||
|
||||
</table>
|
||||
|
||||
</body></html>
|
@@ -1,47 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Proposal to allow for color constraints in HTML Purifier." />
|
||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||
|
||||
<title>Proposal: Colors - HTML Purifier</title>
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1 class="subtitled">Colors</h1>
|
||||
<div class="subtitle">Hammering some sense into those color-blind newbies</div>
|
||||
|
||||
<div id="filing">Filed under Proposals</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
|
||||
<p>Your website probably has a color-scheme.
|
||||
<span style="color:#090; background:#FFF;">Green on white</span>,
|
||||
<span style="color:#A0F; background:#FF0;">purple on yellow</span>,
|
||||
whatever. When you give users the ability to style their content, you may
|
||||
want them to keep in line with your styling. If you're website is all
|
||||
about light colors, you don't want a user to come in and vandalize your
|
||||
page with a deep maroon.</p>
|
||||
|
||||
<p>This is an extremely silly feature proposal, but I'm writing it down anyway.</p>
|
||||
|
||||
<p>What if the user could constrain the colors specified in inline styles? You
|
||||
are only allowed to use these shades of dark green for text and these shades
|
||||
of light yellow for the background. At the very least, you could ensure
|
||||
that we did not have pale yellow on white text.</p>
|
||||
|
||||
<h2>Implementation issues</h2>
|
||||
|
||||
<ol>
|
||||
<li>Requires the color attribute definition to know, currently, what the text
|
||||
and background colors are. This becomes difficult when classes are thrown
|
||||
into the mix.</li>
|
||||
<li>The user still has to define the permissible colors, how does one do
|
||||
something like that?</li>
|
||||
</ol>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body>
|
||||
</html>
|
@@ -1,98 +0,0 @@
|
||||
We are going to model our I18N/L10N off of MediaWiki's system. Their's is
|
||||
obviously quite complicated, so we're going to simplify it a bit for our needs.
|
||||
|
||||
== Structure ==
|
||||
|
||||
First, you have a Language object. This object contains all the localisable
|
||||
message strings, as well as other important language-specific settings and
|
||||
custom behavior (uppercasing, lowercasing, printing dates, formatting
|
||||
numbers, etc.)
|
||||
|
||||
The object is constructed from two sources: subclassed versions of itself
|
||||
(classes) and Message files (messages).
|
||||
|
||||
== General use ==
|
||||
|
||||
You load a language object by calling the Language::factory() function.
|
||||
This function the class file for the object (taking in account fallback
|
||||
languages by using the fallback langauge's object but overloading the
|
||||
language key) and returns that object. Nothing else happens.
|
||||
|
||||
When a message/etc is requested, a lazy load initializor is called. Now the
|
||||
real work starts. We're first going to take the scenario that the language
|
||||
is not cached. The system loads the Messages file by:
|
||||
|
||||
require( $filename );
|
||||
$cache = compact( self::$mLocalisationKeys );
|
||||
|
||||
...where self::$mLocalisationKeys is the name of variables that could be used
|
||||
in the localization file. This lets you use things like:
|
||||
|
||||
$fallback = false;
|
||||
$rtl = false;
|
||||
|
||||
...and easily siphon them into arrays.
|
||||
|
||||
Then, we load the $fallback language (if not set, English) to fill in the gaps in
|
||||
the messages. There is specialized behavior for certain keys, as they can be
|
||||
mergeable maps, lists or alias lists (not sure what the last one is).
|
||||
|
||||
== Caching ==
|
||||
|
||||
MediaWiki has lots of caching mechanisms built in, which make the code somewhat
|
||||
more difficult to understand. Before doing any loading, MediaWiki will check
|
||||
the following places to see if we can be lazy:
|
||||
|
||||
1. $mLocalisationCache[$code] - just a variable where it may have been stashed
|
||||
2. serialized/$code.ser - compiled serialized language file
|
||||
3. Memcached version of file (with expiration checking)
|
||||
|
||||
Expiration checking consists of by ensuring all dependencies have filemtime
|
||||
that match the ones bundled with the cached copy. Similar checking could be
|
||||
implemented for serialized versions, as it seems that they are not updated
|
||||
until manually recompiled.
|
||||
|
||||
== Behavior ==
|
||||
|
||||
Things that are localizable:
|
||||
|
||||
- Weekdays (and abbrev)
|
||||
- Months (and abbrev)
|
||||
- Bookstores
|
||||
- Skin names
|
||||
- Date preferences / Custom date format
|
||||
- Default date format
|
||||
- Default user option overrides
|
||||
-+ Language names
|
||||
- Timezones
|
||||
-+ Character encoding conversion via iconv
|
||||
- UpperLowerCase first (needs casemaps for some)
|
||||
- UpperLowerCase
|
||||
- Uppercase words
|
||||
- Uppercase word breaks
|
||||
- Case folding
|
||||
- Strip punctuation for MySQL search
|
||||
- Get first character
|
||||
-+ Alternate encoding
|
||||
-+ Recoding for edit (and then recode input)
|
||||
-+ RTL
|
||||
-+ Direction mark character depending on RTL
|
||||
-? Arrow depending on RTL
|
||||
- Languages where italics cannot be used
|
||||
-+ Number formatting (commafy, transform digits, transform separators)
|
||||
- Truncate (multibyte)
|
||||
- Grammar conversions for inflected languages
|
||||
- Plural transformations
|
||||
- Formatting expiry times
|
||||
- Segmenting for diffs (Chinese)
|
||||
- Convert to variants of language
|
||||
- Language specific user preference options
|
||||
- Link trails [[foo]]bar
|
||||
-+ Language code (RFC 3066)
|
||||
|
||||
Neat functionality:
|
||||
|
||||
- I18N sprintfDate
|
||||
- Roman numeral formatting
|
||||
|
||||
Items marked with a + likely need to be addressed by HTML Purifier
|
@@ -1,59 +0,0 @@
|
||||
|
||||
Configuration Ideas
|
||||
|
||||
Here are some theoretical configuration ideas that we could implement some
|
||||
time. Note the naming convention: %Namespace.Directive
|
||||
|
||||
%Attr.IDPrefix - prefix all ids with this
|
||||
|
||||
%Attr.RewriteFragments - if there's %Attr.IDPrefix we may want to transparently
|
||||
rewrite the URLs we parse too. However, we can only do it when it's a pure
|
||||
anchor link, so it's not foolproof
|
||||
|
||||
%Attr.ClassBlacklist,
|
||||
%Attr.ClassWhitelist,
|
||||
%Attr.ClassPolicy - determines what classes are allowed. When
|
||||
%Attr.ClassPolicy is set to Blacklist, only allow those not in
|
||||
%Attr.ClassBlacklist. When it's Whitelist, only allow those in
|
||||
%Attr.ClassWhitelist.
|
||||
|
||||
%Attr.MaxWidth,
|
||||
%Attr.MaxHeight - caps for width and height related checks.
|
||||
(the hack in Pixels for an image crashing attack could be replaced by this)
|
||||
|
||||
%URI.Munge - will munge all external URIs to a different URI, which redirects
|
||||
the user to the applicable page. A urlencoded version of the URI
|
||||
will replace any instances of %s in the string. One possible
|
||||
string is 'http://www.google.com/url?q=%s'. Useful for preventing
|
||||
pagerank from being sent to other sites, but can also be used to
|
||||
redirect to a splash page notifying user that they are leaving your
|
||||
website.
|
||||
|
||||
%URI.AddRelNofollow - will add rel="nofollow" to all links, preventing the
|
||||
spread of ill-gotten pagerank
|
||||
|
||||
%URI.RelativeToAbsolute - transforms all relative URIs to absolute form
|
||||
|
||||
%URI.HostBlacklist - strings that if found in the host of a URI are disallowed
|
||||
%URI.HostBlacklistRegex - regexes that if matching the host are disallowed
|
||||
%URI.HostWhitelist - domain names that are excluded from the host blacklist
|
||||
%URI.HostPolicy - determines whether or not its reject all and then whitelist
|
||||
or allow all in then do specific blacklists with whitelist intervening.
|
||||
'DenyAll' or 'AllowAll' (default)
|
||||
|
||||
%URI.DisableIPHosts - URIs that have IP addresses for hosts are disallowed.
|
||||
Be sure to also grab unusual encodings (dword, hex and octal), which may
|
||||
be currently be caught by regular DNS
|
||||
%URI.DisableIDN - Disallow raw internationalized domain names. Punycode
|
||||
will still be permitted.
|
||||
|
||||
%URI.ConvertUnusualIPHosts - transform dword/hex/octal IP addresses to the
|
||||
regular form
|
||||
%URI.ConvertAbsoluteDNS - Remove extra dots after host names that trigger
|
||||
absolute DNS. While this is actually the preferred method according to
|
||||
the RFC, most people opt to use a relative domain name relative to . (root).
|
||||
|
||||
%URI.DisableExternalResources - disallow resource links (i.e. URIs that result
|
||||
in immediate requests, such as src in IMG) to external websites
|
||||
|
||||
%HTML.DisableImg - disables all images
|
@@ -1,40 +0,0 @@
|
||||
html {font-size:1em; font-family:serif; }
|
||||
body {margin-left:4em; margin-right:4em; }
|
||||
|
||||
dt {font-weight:bold; }
|
||||
pre {margin-left:2em; }
|
||||
pre, code, tt {font-family:monospace; font-size:1em; }
|
||||
|
||||
h1 {text-align:center; font-family:Garamond, serif;
|
||||
font-variant:small-caps;}
|
||||
h2 {border-bottom:1px solid #CCC; font-family:sans-serif; font-weight:normal;
|
||||
font-size:1.3em;}
|
||||
h3 {font-family:sans-serif; font-size:1.1em; font-weight:bold; }
|
||||
h4 {font-family:sans-serif; font-size:0.9em; font-weight:bold; }
|
||||
|
||||
/* For witty quips */
|
||||
.subtitled {margin-bottom:0em;}
|
||||
.subtitle , .subsubtitle {font-size:.8em; margin-bottom:1em;
|
||||
font-style:italic; margin-top:-.2em;text-align:center;}
|
||||
.subsubtitle {text-align:left;margin-left:2em;}
|
||||
|
||||
/* Used for special "See also" links. */
|
||||
.reference {font-style:italic;margin-left:2em;}
|
||||
|
||||
/* Marks off asides, discussions on why something is the way it is */
|
||||
.aside {margin-left:2em; font-family:sans-serif; font-size:0.9em; }
|
||||
|
||||
/* A regular table */
|
||||
.table {border-collapse:collapse; border-bottom:2px solid #888; margin-left:2em; }
|
||||
.table thead th {margin:0; background:#888; color:#FFF; }
|
||||
.table thead th:first-child {-moz-border-radius-topleft:1em;}
|
||||
.table tbody td {border-bottom:1px solid #CCC; padding-right:0.6em;padding-left:0.6em;}
|
||||
|
||||
/* Category of the file */
|
||||
#filing {font-weight:bold; font-size:smaller; }
|
||||
|
||||
/* Contains, without exception, Return to index. */
|
||||
#index {font-size:smaller; }
|
||||
|
||||
/* Contains, without exception, $Id$, for SVN version info. */
|
||||
#version {text-align:right; font-style:italic; margin:2em 0;}
|
@@ -22,7 +22,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
HTML Purifier 1.2.0 - Standards Compliant HTML Filtering
|
||||
HTML Purifier - Standards Compliant HTML Filtering
|
||||
Copyright (C) 2006 Edward Z. Yang
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
@@ -44,7 +44,6 @@
|
||||
// they get included
|
||||
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||
require_once 'HTMLPurifier/Config.php';
|
||||
require_once 'HTMLPurifier/Context.php';
|
||||
|
||||
require_once 'HTMLPurifier/Lexer.php';
|
||||
require_once 'HTMLPurifier/Generator.php';
|
||||
@@ -96,17 +95,16 @@ class HTMLPurifier
|
||||
*/
|
||||
function purify($html, $config = null) {
|
||||
$config = $config ? $config : $this->config;
|
||||
$context =& new HTMLPurifier_Context();
|
||||
$html = $this->encoder->convertToUTF8($html, $config, $context);
|
||||
$html = $this->encoder->convertToUTF8($html, $config);
|
||||
$html =
|
||||
$this->generator->generateFromTokens(
|
||||
$this->strategy->execute(
|
||||
$this->lexer->tokenizeHTML($html, $config, $context),
|
||||
$config, $context
|
||||
$this->lexer->tokenizeHTML($html, $config),
|
||||
$config
|
||||
),
|
||||
$config, $context
|
||||
$config
|
||||
);
|
||||
$html = $this->encoder->convertFromUTF8($html, $config, $context);
|
||||
$html = $this->encoder->convertFromUTF8($html, $config);
|
||||
return $html;
|
||||
}
|
||||
|
||||
|
26
library/HTMLPurifier/AttrContext.php
Normal file
26
library/HTMLPurifier/AttrContext.php
Normal file
@@ -0,0 +1,26 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Internal data-structure used in attribute validation to accumulate state.
|
||||
*
|
||||
* This is a data-structure that holds objects that accumulate state, like
|
||||
* HTMLPurifier_IDAccumulator. It's better than using globals!
|
||||
*
|
||||
* @note Many functions that accept this object have it as a mandatory
|
||||
* parameter, even when there is no use for it. Though this is
|
||||
* for the same reasons as why HTMLPurifier_Config is a mandatory
|
||||
* parameter, it is also because you cannot assign a default value
|
||||
* to a parameter passed by reference (passing by reference is essential
|
||||
* for context to work in PHP 4).
|
||||
*/
|
||||
|
||||
class HTMLPurifier_AttrContext
|
||||
{
|
||||
/**
|
||||
* Contains an HTMLPurifier_IDAccumulator, which keeps track of used IDs.
|
||||
* @public
|
||||
*/
|
||||
var $id_accumulator;
|
||||
}
|
||||
|
||||
?>
|
@@ -1,5 +1,7 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrContext.php';
|
||||
|
||||
/**
|
||||
* Base class for all validating attribute definitions.
|
||||
*
|
||||
@@ -20,7 +22,10 @@ class HTMLPurifier_AttrDef
|
||||
var $minimized = false;
|
||||
|
||||
/**
|
||||
* Validates and cleans passed string according to a definition.
|
||||
* Abstract function defined for functions that validate and clean strings.
|
||||
*
|
||||
* This function forms the basis for all the subclasses: they must
|
||||
* define this method.
|
||||
*
|
||||
* @public
|
||||
* @param $string String to be validated and cleaned.
|
||||
|
@@ -43,7 +43,6 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
|
||||
$propvalues[$property] = $result;
|
||||
}
|
||||
|
||||
// procedure does not write the new CSS simultaneously, so it's
|
||||
// slightly inefficient, but it's the only way of getting rid of
|
||||
// duplicates. Perhaps config to optimize it, but not now.
|
||||
|
||||
|
@@ -24,14 +24,13 @@ class HTMLPurifier_AttrDef_Class extends HTMLPurifier_AttrDef
|
||||
// and plus it would complicate optimization efforts (you never
|
||||
// see that anyway).
|
||||
$matches = array();
|
||||
$pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start
|
||||
$pattern = '/(?:(?<=\s)|\A)'.
|
||||
'((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
|
||||
'(?:(?=\s)|\z)/'; // look ahead for space or string end
|
||||
'(?:(?=\s)|\z)/';
|
||||
preg_match_all($pattern, $string, $matches);
|
||||
|
||||
if (empty($matches[1])) return false;
|
||||
|
||||
// reconstruct class string
|
||||
$new_string = '';
|
||||
foreach ($matches[1] as $class_names) {
|
||||
$new_string .= $class_names . ' ';
|
||||
|
@@ -1,17 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
class HTMLPurifier_AttrDef_Email extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Unpacks a mailbox into its display-name and address
|
||||
*/
|
||||
function unpack($string) {
|
||||
// needs to be implemented
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -1,23 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef/Email.php';
|
||||
|
||||
/**
|
||||
* Primitive email validation class based on the regexp found at
|
||||
* http://www.regular-expressions.info/email.html
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Email_SimpleCheck extends HTMLPurifier_AttrDef_Email
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
// no support for named mailboxes i.e. "Bob <bob@example.com>"
|
||||
// that needs more percent encoding to be done
|
||||
if ($string == '') return false;
|
||||
$string = trim($string);
|
||||
$result = preg_match('/^[A-Z0-9._%-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$/i', $string);
|
||||
return $result ? $string : false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -5,7 +5,7 @@ require_once 'HTMLPurifier/AttrDef/IPv4.php';
|
||||
require_once 'HTMLPurifier/AttrDef/IPv6.php';
|
||||
|
||||
/**
|
||||
* Validates a host according to the IPv4, IPv6 and DNS (future) specifications.
|
||||
* Validates a host according to the IPv4, IPv6 and DNS specifications.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Host extends HTMLPurifier_AttrDef
|
||||
{
|
||||
@@ -35,8 +35,6 @@ class HTMLPurifier_AttrDef_Host extends HTMLPurifier_AttrDef
|
||||
if ($valid === false) return false;
|
||||
return '['. $valid . ']';
|
||||
}
|
||||
|
||||
// need to do checks on unusual encodings too
|
||||
$ipv4 = $this->ipv4->validate($string, $config, $context);
|
||||
if ($ipv4 !== false) return $ipv4;
|
||||
|
||||
|
@@ -3,30 +3,6 @@
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/IDAccumulator.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'IDPrefix', '', 'string',
|
||||
'String to prefix to IDs. If you have no idea what IDs your pages '.
|
||||
'may use, you may opt to simply add a prefix to all user-submitted ID '.
|
||||
'attributes so that they are still usable, but will not conflict with '.
|
||||
'core page IDs. Example: setting the directive to \'user_\' will result in '.
|
||||
'a user submitted \'foo\' to become \'user_foo\' Be sure to set '.
|
||||
'%HTML.EnableAttrID to true before using '.
|
||||
'this. This directive was available since 1.2.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'IDPrefixLocal', '', 'string',
|
||||
'Temporary prefix for IDs used in conjunction with %Attr.IDPrefix. If '.
|
||||
'you need to allow multiple sets of '.
|
||||
'user content on web page, you may need to have a seperate prefix that '.
|
||||
'changes with each iteration. This way, seperately submitted user content '.
|
||||
'displayed on the same page doesn\'t clobber each other. Ideal values '.
|
||||
'are unique identifiers for the content it represents (i.e. the id of '.
|
||||
'the row in the database). Be sure to add a seperator (like an underscore) '.
|
||||
'at the end. Warning: this directive will not work unless %Attr.IDPrefix '.
|
||||
'is set to a non-empty value! This directive was available since 1.2.0.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Validates the HTML attribute ID.
|
||||
* @warning Even though this is the id processor, it
|
||||
@@ -44,19 +20,7 @@ class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef
|
||||
$id = trim($id); // trim it first
|
||||
|
||||
if ($id === '') return false;
|
||||
|
||||
$prefix = $config->get('Attr', 'IDPrefix');
|
||||
if ($prefix !== '') {
|
||||
$prefix .= $config->get('Attr', 'IDPrefixLocal');
|
||||
// prevent re-appending the prefix
|
||||
if (strpos($id, $prefix) !== 0) $id = $prefix . $id;
|
||||
} elseif ($config->get('Attr', 'IDPrefixLocal') !== '') {
|
||||
trigger_error('%Attr.IDPrefixLocal cannot be used unless '.
|
||||
'%Attr.IDPrefix is set', E_USER_WARNING);
|
||||
}
|
||||
|
||||
$id_accumulator =& $context->get('IDAccumulator');
|
||||
if (isset($id_accumulator->ids[$id])) return false;
|
||||
if (isset($context->id_accumulator->ids[$id])) return false;
|
||||
|
||||
// we purposely avoid using regex, hopefully this is faster
|
||||
|
||||
@@ -71,7 +35,7 @@ class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef
|
||||
$result = ($trim === '');
|
||||
}
|
||||
|
||||
if ($result) $id_accumulator->add($id);
|
||||
if ($result) $context->id_accumulator->add($id);
|
||||
|
||||
// if no change was made to the ID, return the result
|
||||
// else, return the new id if stripping whitespace made it
|
||||
|
@@ -4,7 +4,6 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/URIScheme.php';
|
||||
require_once 'HTMLPurifier/URISchemeRegistry.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Host.php';
|
||||
require_once 'HTMLPurifier/PercentEncoder.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DefaultScheme', 'http', 'string',
|
||||
@@ -12,28 +11,6 @@ HTMLPurifier_ConfigSchema::define(
|
||||
'select the proper object validator when no scheme information is present.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'Host', null, 'string/null',
|
||||
'Defines the domain name of the server, so we can determine whether or '.
|
||||
'an absolute URI is from your website or not. Not strictly necessary, '.
|
||||
'as users should be using relative URIs to reference resources on your '.
|
||||
'website. It will, however, let you use absolute URIs to link to '.
|
||||
'subdomains of the domain you post here: i.e. example.com will allow '.
|
||||
'sub.example.com. However, higher up domains will still be excluded: '.
|
||||
'if you set %URI.Host to sub.example.com, example.com will be blocked. '.
|
||||
'This directive has been available since 1.2.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::Define(
|
||||
'URI', 'DisableExternal', false, 'bool',
|
||||
'Disables links to external websites. This is a highly effective '.
|
||||
'anti-spam and anti-pagerank-leech measure, but comes at a hefty price: no'.
|
||||
'links or images outside of your domain will be allowed. Non-linkified '.
|
||||
'URIs will still be preserved. If you want to be able to link to '.
|
||||
'subdomains or use absolute URIs, specify %URI.Host for your website. '.
|
||||
'This directive has been available since 1.2.0.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Validates a URI as defined by RFC 3986.
|
||||
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
|
||||
@@ -42,16 +19,9 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
var $host;
|
||||
var $PercentEncoder;
|
||||
var $embeds;
|
||||
|
||||
/**
|
||||
* @param $embeds Does the URI here result in an extra HTTP request?
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_URI($embeds = false) {
|
||||
function HTMLPurifier_AttrDef_URI() {
|
||||
$this->host = new HTMLPurifier_AttrDef_Host();
|
||||
$this->PercentEncoder = new HTMLPurifier_PercentEncoder();
|
||||
$this->embeds = (bool) $embeds;
|
||||
}
|
||||
|
||||
function validate($uri, $config, &$context) {
|
||||
@@ -62,9 +32,6 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
// parse as CDATA
|
||||
$uri = $this->parseCDATA($uri);
|
||||
|
||||
// fix up percent-encoding
|
||||
$uri = $this->PercentEncoder->normalize($uri);
|
||||
|
||||
// while it would be nice to use parse_url(), that's specifically
|
||||
// for HTTP and thus won't work for our generic URI parsing
|
||||
|
||||
@@ -96,31 +63,18 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
// no need to validate the scheme's fmt since we do that when we
|
||||
// retrieve the specific scheme object from the registry
|
||||
$scheme = ctype_lower($scheme) ? $scheme : strtolower($scheme);
|
||||
$scheme_obj =& $registry->getScheme($scheme, $config, $context);
|
||||
$scheme_obj =& $registry->getScheme($scheme, $config);
|
||||
if (!$scheme_obj) return false; // invalid scheme, clean it out
|
||||
} else {
|
||||
$scheme_obj =& $registry->getScheme(
|
||||
$config->get('URI', 'DefaultScheme'), $config, $context
|
||||
$config->get('URI', 'DefaultScheme'), $config
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
// the URI we're processing embeds a resource in the page, but the URI
|
||||
// it references cannot be located
|
||||
if ($this->embeds && !$scheme_obj->browsable) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
if ($authority !== null) {
|
||||
|
||||
// remove URI if it's absolute and we disallow externals
|
||||
unset($our_host);
|
||||
if ($config->get('URI', 'DisableExternal')) {
|
||||
$our_host = $config->get('URI', 'Host');
|
||||
if ($our_host === null) return false;
|
||||
}
|
||||
|
||||
$HEXDIG = '[A-Fa-f0-9]';
|
||||
$unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with []
|
||||
$sub_delims = '!$&\'()'; // needs []
|
||||
@@ -143,17 +97,6 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
$host = $this->host->validate($host, $config, $context);
|
||||
if ($host === false) $host = null;
|
||||
|
||||
// more lenient absolute checking
|
||||
if (isset($our_host)) {
|
||||
$host_parts = array_reverse(explode('.', $host));
|
||||
// could be cached
|
||||
$our_host_parts = array_reverse(explode('.', $our_host));
|
||||
foreach ($our_host_parts as $i => $discard) {
|
||||
if (!isset($host_parts[$i])) return false;
|
||||
if ($host_parts[$i] != $our_host_parts[$i]) return false;
|
||||
}
|
||||
}
|
||||
|
||||
// userinfo and host are validated within the regexp
|
||||
|
||||
} else {
|
||||
@@ -177,7 +120,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
// note that $fragment is omitted
|
||||
list($userinfo, $host, $port, $path, $query) =
|
||||
$scheme_obj->validateComponents(
|
||||
$userinfo, $host, $port, $path, $query, $config, $context
|
||||
$userinfo, $host, $port, $path, $query, $config
|
||||
);
|
||||
|
||||
|
||||
|
@@ -23,10 +23,9 @@ class HTMLPurifier_AttrTransform
|
||||
* @param $attr Assoc array of attributes, usually from
|
||||
* HTMLPurifier_Token_Tag::$attributes
|
||||
* @param $config Mandatory HTMLPurifier_Config object.
|
||||
* @param $context Mandatory HTMLPurifier_Context object
|
||||
* @returns Processed attribute array.
|
||||
*/
|
||||
function transform($attr, $config, &$context) {
|
||||
function transform($attr, $config) {
|
||||
trigger_error('Cannot call abstract function', E_USER_ERROR);
|
||||
}
|
||||
}
|
||||
|
@@ -20,7 +20,7 @@ HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||
class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
function transform($attr, $config, $context) {
|
||||
function transform($attr, $config) {
|
||||
if (isset($attr['dir'])) return $attr;
|
||||
$attr['dir'] = $config->get('Attr', 'DefaultTextDir');
|
||||
return $attr;
|
||||
|
@@ -25,7 +25,7 @@ HTMLPurifier_ConfigSchema::define(
|
||||
class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
function transform($attr, $config, $context) {
|
||||
function transform($attr, $config) {
|
||||
|
||||
$src = true;
|
||||
if (!isset($attr['src'])) {
|
||||
|
@@ -10,7 +10,7 @@ require_once 'HTMLPurifier/AttrTransform.php';
|
||||
class HTMLPurifier_AttrTransform_Lang extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
function transform($attr, $config, $context) {
|
||||
function transform($attr, $config) {
|
||||
|
||||
$lang = isset($attr['lang']) ? $attr['lang'] : false;
|
||||
$xml_lang = isset($attr['xml:lang']) ? $attr['xml:lang'] : false;
|
||||
|
@@ -8,7 +8,7 @@ require_once 'HTMLPurifier/AttrTransform.php';
|
||||
class HTMLPurifier_AttrTransform_TextAlign
|
||||
extends HTMLPurifier_AttrTransform {
|
||||
|
||||
function transform($attr, $config, $context) {
|
||||
function transform($attr, $config) {
|
||||
|
||||
if (!isset($attr['align'])) return $attr;
|
||||
|
||||
|
@@ -38,14 +38,15 @@ class HTMLPurifier_ChildDef
|
||||
/**
|
||||
* Validates nodes according to definition and returns modification.
|
||||
*
|
||||
* @warning $context is NOT HTMLPurifier_AttrContext
|
||||
* @param $tokens_of_children Array of HTMLPurifier_Token
|
||||
* @param $config HTMLPurifier_Config object
|
||||
* @param $context HTMLPurifier_Context object
|
||||
* @param $context String context indicating inline, block or unknown
|
||||
* @return bool true to leave nodes as is
|
||||
* @return bool false to remove parent node
|
||||
* @return array of replacement child tokens
|
||||
*/
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
function validateChildren($tokens_of_children, $config, $context) {
|
||||
trigger_error('Call to abstract function', E_USER_ERROR);
|
||||
}
|
||||
}
|
||||
@@ -90,7 +91,7 @@ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
|
||||
$reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
|
||||
$this->_pcre_regex = $reg;
|
||||
}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
function validateChildren($tokens_of_children, $config, $context) {
|
||||
$list_of_children = '';
|
||||
$nesting = 0; // depth into the nest
|
||||
foreach ($tokens_of_children as $token) {
|
||||
@@ -144,7 +145,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
|
||||
}
|
||||
var $allow_empty = false;
|
||||
var $type = 'required';
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
function validateChildren($tokens_of_children, $config, $context) {
|
||||
// if there are no tokens, delete parent node
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
@@ -226,7 +227,7 @@ class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
|
||||
{
|
||||
var $allow_empty = true;
|
||||
var $type = 'optional';
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
function validateChildren($tokens_of_children, $config, $context) {
|
||||
$result = parent::validateChildren($tokens_of_children, $config, $context);
|
||||
if ($result === false) return array();
|
||||
return $result;
|
||||
@@ -245,7 +246,7 @@ class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
|
||||
var $allow_empty = true;
|
||||
var $type = 'empty';
|
||||
function HTMLPurifier_ChildDef_Empty() {}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
function validateChildren($tokens_of_children, $config, $context) {
|
||||
return array();
|
||||
}
|
||||
}
|
||||
@@ -280,9 +281,8 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
|
||||
$this->block = new HTMLPurifier_ChildDef_Optional($block);
|
||||
}
|
||||
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
$parent_type = $context->get('ParentType');
|
||||
switch ($parent_type) {
|
||||
function validateChildren($tokens_of_children, $config, $context) {
|
||||
switch ($context) {
|
||||
case 'unknown':
|
||||
case 'inline':
|
||||
$result = $this->inline->validateChildren(
|
||||
@@ -308,7 +308,7 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||
var $allow_empty = false;
|
||||
var $type = 'table';
|
||||
function HTMLPurifier_ChildDef_Table() {}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
function validateChildren($tokens_of_children, $config, $context) {
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
// this ensures that the loop gets run one last time before closing
|
||||
|
@@ -60,7 +60,7 @@ class HTMLPurifier_Config
|
||||
* @param $key String key
|
||||
*/
|
||||
function get($namespace, $key) {
|
||||
if (!isset($this->def->info[$namespace][$key])) {
|
||||
if (!isset($this->conf[$namespace][$key])) {
|
||||
trigger_error('Cannot retrieve value of undefined directive',
|
||||
E_USER_WARNING);
|
||||
return;
|
||||
@@ -75,16 +75,13 @@ class HTMLPurifier_Config
|
||||
* @param $value Mixed value
|
||||
*/
|
||||
function set($namespace, $key, $value) {
|
||||
if (!isset($this->def->info[$namespace][$key])) {
|
||||
if (!isset($this->conf[$namespace][$key])) {
|
||||
trigger_error('Cannot set undefined directive to value',
|
||||
E_USER_WARNING);
|
||||
return;
|
||||
}
|
||||
$value = $this->def->validate(
|
||||
$value,
|
||||
$this->def->info[$namespace][$key]->type,
|
||||
$this->def->info[$namespace][$key]->allow_null
|
||||
);
|
||||
$value = $this->def->validate($value,
|
||||
$this->def->info[$namespace][$key]->type);
|
||||
if (is_string($value)) {
|
||||
// resolve value alias if defined
|
||||
if (isset($this->def->info[$namespace][$key]->aliases[$value])) {
|
||||
@@ -98,7 +95,7 @@ class HTMLPurifier_Config
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($this->def->isError($value)) {
|
||||
if ($value === null) {
|
||||
trigger_error('Value is of invalid type', E_USER_WARNING);
|
||||
return;
|
||||
}
|
||||
@@ -127,27 +124,6 @@ class HTMLPurifier_Config
|
||||
return $this->css_definition;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads configuration values from an array with the following structure:
|
||||
* Namespace.Directive => Value
|
||||
* @param $config_array Configuration associative array
|
||||
*/
|
||||
function loadArray($config_array) {
|
||||
foreach ($config_array as $key => $value) {
|
||||
if (strpos($key, '.') !== false) {
|
||||
// condensed form
|
||||
list($namespace, $directive) = explode('.', $key);
|
||||
$this->set($namespace, $directive, $value);
|
||||
} else {
|
||||
$namespace = $key;
|
||||
$namespace_values = $value;
|
||||
foreach ($namespace_values as $directive => $value) {
|
||||
$this->set($namespace, $directive, $value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -1,7 +1,5 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/Error.php';
|
||||
|
||||
/**
|
||||
* Configuration definition, defines directives and their defaults.
|
||||
* @todo The ability to define things multiple times is confusing and should
|
||||
@@ -113,19 +111,12 @@ class HTMLPurifier_ConfigSchema {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// process modifiers
|
||||
$type_values = explode('/', $type, 2);
|
||||
$type = $type_values[0];
|
||||
$modifier = isset($type_values[1]) ? $type_values[1] : false;
|
||||
$allow_null = ($modifier === 'null');
|
||||
|
||||
if (!isset($def->types[$type])) {
|
||||
trigger_error('Invalid type for configuration directive',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
$default = $def->validate($default, $type, $allow_null);
|
||||
if ($def->isError($default)) {
|
||||
if ($def->validate($default, $type) === null) {
|
||||
trigger_error('Default value does not match directive type',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
@@ -133,7 +124,6 @@ class HTMLPurifier_ConfigSchema {
|
||||
$def->info[$namespace][$name] =
|
||||
new HTMLPurifier_ConfigEntity_Directive();
|
||||
$def->info[$namespace][$name]->type = $type;
|
||||
$def->info[$namespace][$name]->allow_null = $allow_null;
|
||||
$def->defaults[$namespace][$name] = $default;
|
||||
}
|
||||
$backtrace = debug_backtrace();
|
||||
@@ -222,37 +212,36 @@ class HTMLPurifier_ConfigSchema {
|
||||
/**
|
||||
* Validate a variable according to type. Return null if invalid.
|
||||
*/
|
||||
function validate($var, $type, $allow_null = false) {
|
||||
function validate($var, $type) {
|
||||
if (!isset($this->types[$type])) {
|
||||
trigger_error('Invalid type', E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if ($allow_null && $var === null) return null;
|
||||
switch ($type) {
|
||||
case 'mixed':
|
||||
return $var;
|
||||
case 'istring':
|
||||
case 'string':
|
||||
if (!is_string($var)) break;
|
||||
if (!is_string($var)) return;
|
||||
if ($type === 'istring') $var = strtolower($var);
|
||||
return $var;
|
||||
case 'int':
|
||||
if (is_string($var) && ctype_digit($var)) $var = (int) $var;
|
||||
elseif (!is_int($var)) break;
|
||||
elseif (!is_int($var)) return;
|
||||
return $var;
|
||||
case 'float':
|
||||
if (is_string($var) && is_numeric($var)) $var = (float) $var;
|
||||
elseif (!is_float($var)) break;
|
||||
elseif (!is_float($var)) return;
|
||||
return $var;
|
||||
case 'bool':
|
||||
if (is_int($var) && ($var === 0 || $var === 1)) {
|
||||
$var = (bool) $var;
|
||||
} elseif (!is_bool($var)) break;
|
||||
} elseif (!is_bool($var)) return;
|
||||
return $var;
|
||||
case 'list':
|
||||
case 'hash':
|
||||
case 'lookup':
|
||||
if (!is_array($var)) break;
|
||||
if (!is_array($var)) return;
|
||||
$keys = array_keys($var);
|
||||
if ($keys === array_keys($keys)) {
|
||||
if ($type == 'list') return $var;
|
||||
@@ -262,7 +251,7 @@ class HTMLPurifier_ConfigSchema {
|
||||
$new[$key] = true;
|
||||
}
|
||||
return $new;
|
||||
} else break;
|
||||
} else return;
|
||||
}
|
||||
if ($type === 'lookup') {
|
||||
foreach ($var as $key => $value) {
|
||||
@@ -271,13 +260,8 @@ class HTMLPurifier_ConfigSchema {
|
||||
}
|
||||
return $var;
|
||||
}
|
||||
$error = new HTMLPurifier_Error();
|
||||
return $error;
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes an absolute path and munges it into a more manageable relative path
|
||||
*/
|
||||
function mungeFilename($filename) {
|
||||
$offset = strrpos($filename, 'HTMLPurifier');
|
||||
$filename = substr($filename, $offset);
|
||||
@@ -285,14 +269,6 @@ class HTMLPurifier_ConfigSchema {
|
||||
return $filename;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if var is an HTMLPurifier_Error object
|
||||
*/
|
||||
function isError($var) {
|
||||
if (!is_object($var)) return false;
|
||||
if (!is_a($var, 'HTMLPurifier_Error')) return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -342,13 +318,6 @@ class HTMLPurifier_ConfigEntity_Directive extends HTMLPurifier_ConfigEntity
|
||||
* - mixed (anything goes)
|
||||
*/
|
||||
var $type = 'mixed';
|
||||
|
||||
/**
|
||||
* Is null allowed? Has no affect for mixed type.
|
||||
* @bool
|
||||
*/
|
||||
var $allow_null = false;
|
||||
|
||||
/**
|
||||
* Plaintext descriptions of the configuration entity is. Organized by
|
||||
* file and line number, so multiple descriptions are allowed.
|
||||
|
@@ -1,76 +0,0 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Registry object that contains information about the current context.
|
||||
*/
|
||||
class HTMLPurifier_Context
|
||||
{
|
||||
|
||||
/**
|
||||
* Private array that stores the references.
|
||||
* @private
|
||||
*/
|
||||
var $_storage = array();
|
||||
|
||||
/**
|
||||
* Registers a variable into the context.
|
||||
* @param $name String name
|
||||
* @param $ref Variable to be registered
|
||||
*/
|
||||
function register($name, &$ref) {
|
||||
if (isset($this->_storage[$name])) {
|
||||
trigger_error('Name collision, cannot re-register',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
$this->_storage[$name] =& $ref;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a variable reference from the context.
|
||||
* @param $name String name
|
||||
*/
|
||||
function &get($name) {
|
||||
if (!isset($this->_storage[$name])) {
|
||||
trigger_error('Attempted to retrieve non-existent variable',
|
||||
E_USER_ERROR);
|
||||
$var = null; // so we can return by reference
|
||||
return $var;
|
||||
}
|
||||
return $this->_storage[$name];
|
||||
}
|
||||
|
||||
/**
|
||||
* Destorys a variable in the context.
|
||||
* @param $name String name
|
||||
*/
|
||||
function destroy($name) {
|
||||
if (!isset($this->_storage[$name])) {
|
||||
trigger_error('Attempted to destroy non-existent variable',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
unset($this->_storage[$name]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether or not the variable exists.
|
||||
* @param $name String name
|
||||
*/
|
||||
function exists($name) {
|
||||
return isset($this->_storage[$name]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a series of variables from an associative array
|
||||
* @param $context_array Assoc array of variables to load
|
||||
*/
|
||||
function loadArray(&$context_array) {
|
||||
foreach ($context_array as $key => $discard) {
|
||||
$this->register($key, $context_array[$key]);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -225,30 +225,7 @@ class HTMLPurifier_Encoder
|
||||
|
||||
/**
|
||||
* Translates a Unicode codepoint into its corresponding UTF-8 character.
|
||||
* @note Based on Feyd's function at
|
||||
* <http://forums.devnetwork.net/viewtopic.php?p=191404#191404>,
|
||||
* which is in public domain.
|
||||
* @note While we're going to do code point parsing anyway, a good
|
||||
* optimization would be to refuse to translate code points that
|
||||
* are non-SGML characters. However, this could lead to duplication.
|
||||
* @note This is very similar to the unichr function in
|
||||
* maintenance/generate-entity-file.php (although this is superior,
|
||||
* due to its sanity checks).
|
||||
*/
|
||||
|
||||
// +----------+----------+----------+----------+
|
||||
// | 33222222 | 22221111 | 111111 | |
|
||||
// | 10987654 | 32109876 | 54321098 | 76543210 | bit
|
||||
// +----------+----------+----------+----------+
|
||||
// | | | | 0xxxxxxx | 1 byte 0x00000000..0x0000007F
|
||||
// | | | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF
|
||||
// | | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF
|
||||
// | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF
|
||||
// +----------+----------+----------+----------+
|
||||
// | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF)
|
||||
// | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes
|
||||
// +----------+----------+----------+----------+
|
||||
|
||||
function unichr($code) {
|
||||
if($code > 1114111 or $code < 0 or
|
||||
($code >= 55296 and $code <= 57343) ) {
|
||||
@@ -289,7 +266,7 @@ class HTMLPurifier_Encoder
|
||||
/**
|
||||
* Converts a string to UTF-8 based on configuration.
|
||||
*/
|
||||
function convertToUTF8($str, $config, &$context) {
|
||||
function convertToUTF8($str, $config) {
|
||||
static $iconv = null;
|
||||
if ($iconv === null) $iconv = function_exists('iconv');
|
||||
$encoding = $config->get('Core', 'Encoding');
|
||||
@@ -306,7 +283,7 @@ class HTMLPurifier_Encoder
|
||||
* @note Currently, this is a lossy conversion, with unexpressable
|
||||
* characters being omitted.
|
||||
*/
|
||||
function convertFromUTF8($str, $config, &$context) {
|
||||
function convertFromUTF8($str, $config) {
|
||||
static $iconv = null;
|
||||
if ($iconv === null) $iconv = function_exists('iconv');
|
||||
$encoding = $config->get('Core', 'Encoding');
|
||||
|
@@ -19,7 +19,7 @@ class HTMLPurifier_EntityLookup {
|
||||
*/
|
||||
function setup($file = false) {
|
||||
if (!$file) {
|
||||
$file = dirname(__FILE__) . '/EntityLookup/entities.ser';
|
||||
$file = dirname(__FILE__) . '/EntityLookup/data.txt';
|
||||
}
|
||||
$this->table = unserialize(file_get_contents($file));
|
||||
}
|
||||
|
@@ -3,10 +3,6 @@
|
||||
require_once 'HTMLPurifier/EntityLookup.php';
|
||||
require_once 'HTMLPurifier/Encoder.php';
|
||||
|
||||
// if want to implement error collecting here, we'll need to use some sort
|
||||
// of global data (probably trigger_error) because it's impossible to pass
|
||||
// $config or $context to the callback functions.
|
||||
|
||||
/**
|
||||
* Handles referencing and derefencing character entities
|
||||
*/
|
||||
@@ -76,12 +72,37 @@ class HTMLPurifier_EntityParser
|
||||
*
|
||||
* @warning Though this is public in order to let the callback happen,
|
||||
* calling it directly is not recommended.
|
||||
* @note Based on Feyd's function at
|
||||
* <http://forums.devnetwork.net/viewtopic.php?p=191404#191404>,
|
||||
* which is in public domain.
|
||||
* @note While we're going to do code point parsing anyway, a good
|
||||
* optimization would be to refuse to translate code points that
|
||||
* are non-SGML characters. However, this could lead to duplication.
|
||||
* @note This function is heavily intimate with the inner workings of
|
||||
* UTF-8 and would also be well suited in the Encoder class (or at
|
||||
* least deferring some processing to it). This is also very
|
||||
* similar to the unichr function in
|
||||
* maintenance/generate-entity-file.php (although this is superior,
|
||||
* due to its sanity checks).
|
||||
* @param $matches PCRE matches array, with 0 the entire match, and
|
||||
* either index 1, 2 or 3 set with a hex value, dec value,
|
||||
* or string (respectively).
|
||||
* @returns Replacement string.
|
||||
*/
|
||||
|
||||
// +----------+----------+----------+----------+
|
||||
// | 33222222 | 22221111 | 111111 | |
|
||||
// | 10987654 | 32109876 | 54321098 | 76543210 | bit
|
||||
// +----------+----------+----------+----------+
|
||||
// | | | | 0xxxxxxx | 1 byte 0x00000000..0x0000007F
|
||||
// | | | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF
|
||||
// | | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF
|
||||
// | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF
|
||||
// +----------+----------+----------+----------+
|
||||
// | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF)
|
||||
// | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes
|
||||
// +----------+----------+----------+----------+
|
||||
|
||||
function nonSpecialEntityCallback($matches) {
|
||||
// replaces all but big five
|
||||
$entity = $matches[0];
|
||||
|
@@ -1,8 +0,0 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Return object from functions that signifies error when null doesn't cut it
|
||||
*/
|
||||
class HTMLPurifier_Error {}
|
||||
|
||||
?>
|
@@ -1,5 +1,7 @@
|
||||
<?php
|
||||
|
||||
// pretty-printing with indentation would be pretty cool
|
||||
|
||||
require_once 'HTMLPurifier/Lexer.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
@@ -50,7 +52,6 @@ class HTMLPurifier_Generator
|
||||
|
||||
/**
|
||||
* Bool cache of %Core.XHTML
|
||||
* @private
|
||||
*/
|
||||
var $_xhtml = true;
|
||||
|
||||
@@ -59,8 +60,9 @@ class HTMLPurifier_Generator
|
||||
* @param $tokens Array of HTMLPurifier_Token
|
||||
* @param $config HTMLPurifier_Config object
|
||||
* @return Generated HTML
|
||||
* @note Only unit tests may omit configuration: internals MUST pass config
|
||||
*/
|
||||
function generateFromTokens($tokens, $config, &$context) {
|
||||
function generateFromTokens($tokens, $config = null) {
|
||||
$html = '';
|
||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||
$this->_clean_utf8 = $config->get('Core', 'CleanUTF8DuringGeneration');
|
||||
|
@@ -22,19 +22,6 @@ require_once 'HTMLPurifier/Generator.php';
|
||||
require_once 'HTMLPurifier/Token.php';
|
||||
require_once 'HTMLPurifier/TagTransform.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'HTML', 'EnableAttrID', false, 'bool',
|
||||
'Allows the ID attribute in HTML. This is disabled by default '.
|
||||
'due to the fact that without proper configuration user input can '.
|
||||
'easily break the validation of a webpage by specifying an ID that is '.
|
||||
'already on the surrounding HTML. If you don\'t mind throwing caution to '.
|
||||
'the wind, enable this directive, but I strongly recommend you also '.
|
||||
'consider blacklisting IDs you use (%Attr.IDBlacklist) or prefixing all '.
|
||||
'user supplied IDs (%Attr.IDPrefix). This directive has been available '.
|
||||
'since 1.2.0, and when set to true reverts to the behavior of pre-1.2.0 '.
|
||||
'versions.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Defines the purified HTML type with large amounts of objects.
|
||||
*
|
||||
@@ -249,13 +236,13 @@ class HTMLPurifier_HTMLDefinition
|
||||
// info[]->type : defines the type of the element (block or inline)
|
||||
|
||||
// reuses $e_Inline and $e_Block
|
||||
foreach ($e_Inline->elements as $name => $bool) {
|
||||
if ($name == '#PCDATA' || $name == '') continue;
|
||||
|
||||
foreach ($e_Inline->elements as $name) {
|
||||
$this->info[$name]->type = 'inline';
|
||||
}
|
||||
|
||||
$e_Block = new HTMLPurifier_ChildDef_Optional($e_block);
|
||||
foreach ($e_Block->elements as $name => $bool) {
|
||||
foreach ($e_Block->elements as $name) {
|
||||
$this->info[$name]->type = 'block';
|
||||
}
|
||||
|
||||
@@ -284,6 +271,7 @@ class HTMLPurifier_HTMLDefinition
|
||||
// which manually override these in their local definitions
|
||||
$this->info_global_attr = array(
|
||||
// core attrs
|
||||
'id' => new HTMLPurifier_AttrDef_ID(),
|
||||
'class' => new HTMLPurifier_AttrDef_Class(),
|
||||
'title' => $e_Text,
|
||||
'style' => new HTMLPurifier_AttrDef_CSS(),
|
||||
@@ -293,10 +281,6 @@ class HTMLPurifier_HTMLDefinition
|
||||
'xml:lang' => new HTMLPurifier_AttrDef_Lang(),
|
||||
);
|
||||
|
||||
if ($config->get('HTML', 'EnableAttrID')) {
|
||||
$this->info_global_attr['id'] = new HTMLPurifier_AttrDef_ID();
|
||||
}
|
||||
|
||||
// required attribute stipulation handled in attribute transformation
|
||||
$this->info['bdo']->attr = array(); // nothing else
|
||||
|
||||
@@ -351,14 +335,12 @@ class HTMLPurifier_HTMLDefinition
|
||||
$e_URI = new HTMLPurifier_AttrDef_URI();
|
||||
$this->info['a']->attr['href'] =
|
||||
$this->info['img']->attr['longdesc'] =
|
||||
$this->info['img']->attr['src'] =
|
||||
$this->info['del']->attr['cite'] =
|
||||
$this->info['ins']->attr['cite'] =
|
||||
$this->info['blockquote']->attr['cite'] =
|
||||
$this->info['q']->attr['cite'] = $e_URI;
|
||||
|
||||
// URI that causes HTTP request
|
||||
$this->info['img']->attr['src'] = new HTMLPurifier_AttrDef_URI(true);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// info_tag_transform : transformations of tags
|
||||
|
||||
@@ -415,13 +397,6 @@ class HTMLPurifier_HTMLDefinition
|
||||
|
||||
$this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang();
|
||||
|
||||
// protect against stdclasses floating around
|
||||
foreach ($this->info as $key => $obj) {
|
||||
if (is_a($obj, 'stdclass')) {
|
||||
unset($this->info[$key]);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
function setAttrForTableElements($attr, $def) {
|
||||
|
@@ -3,9 +3,6 @@
|
||||
/**
|
||||
* Component of HTMLPurifier_AttrContext that accumulates IDs to prevent dupes
|
||||
* @note In Slashdot-speak, dupe means duplicate.
|
||||
* @note This class does not accept $config or $context, thus, it is the
|
||||
* burden of the callee to register the appropriate errors or
|
||||
* configuration.
|
||||
*/
|
||||
class HTMLPurifier_IDAccumulator
|
||||
{
|
||||
|
@@ -122,7 +122,7 @@ class HTMLPurifier_Lexer
|
||||
* @param $string String HTML.
|
||||
* @return HTMLPurifier_Token array representation of HTML.
|
||||
*/
|
||||
function tokenizeHTML($string, $config, &$context) {
|
||||
function tokenizeHTML($string, $config = null) {
|
||||
trigger_error('Call to abstract class', E_USER_ERROR);
|
||||
}
|
||||
|
||||
@@ -196,7 +196,7 @@ class HTMLPurifier_Lexer
|
||||
* Takes a piece of HTML and normalizes it by converting entities, fixing
|
||||
* encoding, extracting bits, and other good stuff.
|
||||
*/
|
||||
function normalize($html, $config, &$context) {
|
||||
function normalize($html, $config) {
|
||||
|
||||
// extract body from document if applicable
|
||||
if ($config->get('Core', 'AcceptFullDocuments')) {
|
||||
|
@@ -38,9 +38,10 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||
$this->factory = new HTMLPurifier_TokenFactory();
|
||||
}
|
||||
|
||||
public function tokenizeHTML($string, $config, &$context) {
|
||||
public function tokenizeHTML($string, $config = null) {
|
||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||
|
||||
$string = $this->normalize($string, $config, $context);
|
||||
$string = $this->normalize($string, $config);
|
||||
|
||||
// preprocess string, essential for UTF-8
|
||||
$string =
|
||||
|
@@ -24,9 +24,11 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
*/
|
||||
var $_whitespace = "\x20\x09\x0D\x0A";
|
||||
|
||||
function tokenizeHTML($html, $config, &$context) {
|
||||
function tokenizeHTML($html, $config = null) {
|
||||
|
||||
$html = $this->normalize($html, $config, $context);
|
||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||
|
||||
$html = $this->normalize($html, $config);
|
||||
|
||||
$cursor = 0; // our location in the text
|
||||
$inside_tag = false; // whether or not we're parsing the inside of a tag
|
||||
@@ -145,7 +147,6 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
if ($attribute_string) {
|
||||
$attributes = $this->parseAttributeString(
|
||||
$attribute_string
|
||||
, $config, $context
|
||||
);
|
||||
} else {
|
||||
$attributes = array();
|
||||
@@ -180,7 +181,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
* @param $string Inside of tag excluding name.
|
||||
* @returns Assoc array of attributes.
|
||||
*/
|
||||
function parseAttributeString($string, $config, &$context) {
|
||||
function parseAttributeString($string) {
|
||||
$string = (string) $string; // quick typecast
|
||||
|
||||
if ($string == '') return array(); // no attributes
|
||||
|
@@ -31,11 +31,12 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
|
||||
*/
|
||||
var $tokens = array();
|
||||
|
||||
function tokenizeHTML($string, $config, &$context) {
|
||||
function tokenizeHTML($string, $config = null) {
|
||||
|
||||
$this->tokens = array();
|
||||
|
||||
$string = $this->normalize($string, $config, $context);
|
||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||
$string = $this->normalize($string, $config);
|
||||
|
||||
$parser=& new XML_HTMLSax3();
|
||||
$parser->set_object($this);
|
||||
|
@@ -1,47 +0,0 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Class that handles operations involving percent-encoding in URIs.
|
||||
*/
|
||||
class HTMLPurifier_PercentEncoder
|
||||
{
|
||||
|
||||
/**
|
||||
* Fix up percent-encoding by decoding unreserved characters and normalizing
|
||||
* @param $string String to normalize
|
||||
*/
|
||||
function normalize($string) {
|
||||
if ($string == '') return '';
|
||||
$parts = explode('%', $string);
|
||||
$ret = array_shift($parts);
|
||||
foreach ($parts as $part) {
|
||||
$length = strlen($part);
|
||||
if ($length < 2) {
|
||||
$ret .= '%25' . $part;
|
||||
continue;
|
||||
}
|
||||
$encoding = substr($part, 0, 2);
|
||||
$text = substr($part, 2);
|
||||
if (!ctype_xdigit($encoding)) {
|
||||
$ret .= '%25' . $part;
|
||||
continue;
|
||||
}
|
||||
$int = hexdec($encoding);
|
||||
if (
|
||||
($int >= 48 && $int <= 57) || // digits
|
||||
($int >= 65 && $int <= 90) || // uppercase letters
|
||||
($int >= 97 && $int <= 122) || // lowercase letters
|
||||
$int == 126 || $int == 45 || $int == 46 || $int == 95 // ~-._
|
||||
) {
|
||||
$ret .= chr($int) . $text;
|
||||
continue;
|
||||
}
|
||||
$encoding = strtoupper($encoding);
|
||||
$ret .= '%' . $encoding . $text;
|
||||
}
|
||||
return $ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -24,7 +24,7 @@ class HTMLPurifier_Strategy
|
||||
* @param $config Configuration options
|
||||
* @returns Processed array of token objects.
|
||||
*/
|
||||
function execute($tokens, $config, &$context) {
|
||||
function execute($tokens, $config = null) {
|
||||
trigger_error('Cannot call abstract function', E_USER_ERROR);
|
||||
}
|
||||
|
||||
|
@@ -18,9 +18,9 @@ class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy
|
||||
trigger_error('Attempt to instantiate abstract object', E_USER_ERROR);
|
||||
}
|
||||
|
||||
function execute($tokens, $config, &$context) {
|
||||
function execute($tokens, $config) {
|
||||
foreach ($this->strategies as $strategy) {
|
||||
$tokens = $strategy->execute($tokens, $config, $context);
|
||||
$tokens = $strategy->execute($tokens, $config);
|
||||
}
|
||||
return $tokens;
|
||||
}
|
||||
|
@@ -34,7 +34,8 @@ require_once 'HTMLPurifier/HTMLDefinition.php';
|
||||
class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
{
|
||||
|
||||
function execute($tokens, $config, &$context) {
|
||||
function execute($tokens, $config) {
|
||||
|
||||
//####################################################################//
|
||||
// Pre-processing
|
||||
|
||||
@@ -48,10 +49,6 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
array_unshift($tokens, new HTMLPurifier_Token_Start($parent_name));
|
||||
$tokens[] = new HTMLPurifier_Token_End($parent_name);
|
||||
|
||||
// setup the context variables
|
||||
$parent_type = 'unknown'; // reference var that we alter
|
||||
$context->register('ParentType', $parent_type);
|
||||
|
||||
//####################################################################//
|
||||
// Loop initialization
|
||||
|
||||
@@ -112,10 +109,10 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
|
||||
// calculate context
|
||||
if (isset($parent_def)) {
|
||||
$parent_type = $parent_def->type;
|
||||
$context = $parent_def->type;
|
||||
} else {
|
||||
// generally found in specialized elements like UL
|
||||
$parent_type = 'unknown';
|
||||
$context = 'unknown';
|
||||
}
|
||||
|
||||
//################################################################//
|
||||
@@ -148,7 +145,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
|
||||
// have DTD child def validate children
|
||||
$result = $child_def->validateChildren(
|
||||
$child_tokens, $config, $context);
|
||||
$child_tokens, $config,$context);
|
||||
|
||||
// determine whether or not this element has any exclusions
|
||||
$excludes = $def->excludes;
|
||||
@@ -250,9 +247,6 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||
array_shift($tokens);
|
||||
array_pop($tokens);
|
||||
|
||||
// remove context variables
|
||||
$context->destroy('ParentType');
|
||||
|
||||
//####################################################################//
|
||||
// Return
|
||||
|
||||
|
@@ -10,7 +10,7 @@ require_once 'HTMLPurifier/Generator.php';
|
||||
class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
{
|
||||
|
||||
function execute($tokens, $config, &$context) {
|
||||
function execute($tokens, $config) {
|
||||
$definition = $config->getHTMLDefinition();
|
||||
$generator = new HTMLPurifier_Generator();
|
||||
$result = array();
|
||||
@@ -86,7 +86,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
if (empty($current_nesting)) {
|
||||
if ($escape_invalid_tags) {
|
||||
$result[] = new HTMLPurifier_Token_Text(
|
||||
$generator->generateFromToken($token, $config, $context)
|
||||
$generator->generateFromToken($token, $config)
|
||||
);
|
||||
}
|
||||
continue;
|
||||
@@ -123,7 +123,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||
if ($skipped_tags === false) {
|
||||
if ($escape_invalid_tags) {
|
||||
$result[] = new HTMLPurifier_Token_Text(
|
||||
$generator->generateFromToken($token, $config, $context)
|
||||
$generator->generateFromToken($token, $config)
|
||||
);
|
||||
}
|
||||
continue;
|
||||
|
@@ -16,7 +16,7 @@ require_once 'HTMLPurifier/TagTransform.php';
|
||||
class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
||||
{
|
||||
|
||||
function execute($tokens, $config, &$context) {
|
||||
function execute($tokens, $config) {
|
||||
$definition = $config->getHTMLDefinition();
|
||||
$generator = new HTMLPurifier_Generator();
|
||||
$result = array();
|
||||
@@ -33,11 +33,11 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
||||
// DEFINITION CALL
|
||||
$token = $definition->
|
||||
info_tag_transform[$token->name]->
|
||||
transform($token, $config, $context);
|
||||
transform($token);
|
||||
} elseif ($escape_invalid_tags) {
|
||||
// invalid tag, generate HTML and insert in
|
||||
$token = new HTMLPurifier_Token_Text(
|
||||
$generator->generateFromToken($token, $config, $context)
|
||||
$generator->generateFromToken($token, $config)
|
||||
);
|
||||
} else {
|
||||
continue;
|
||||
|
@@ -3,6 +3,8 @@
|
||||
require_once 'HTMLPurifier/Strategy.php';
|
||||
require_once 'HTMLPurifier/HTMLDefinition.php';
|
||||
require_once 'HTMLPurifier/IDAccumulator.php';
|
||||
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||
require_once 'HTMLPurifier/AttrContext.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'IDBlacklist', array(), 'list',
|
||||
@@ -15,14 +17,18 @@ HTMLPurifier_ConfigSchema::define(
|
||||
class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
|
||||
{
|
||||
|
||||
function execute($tokens, $config, &$context) {
|
||||
function execute($tokens, $config) {
|
||||
|
||||
$definition = $config->getHTMLDefinition();
|
||||
|
||||
// setup id_accumulator context
|
||||
$id_accumulator = new HTMLPurifier_IDAccumulator();
|
||||
$id_accumulator->load($config->get('Attr', 'IDBlacklist'));
|
||||
$context->register('IDAccumulator', $id_accumulator);
|
||||
// setup StrategyContext
|
||||
$context = new HTMLPurifier_AttrContext();
|
||||
|
||||
// setup ID accumulator and load it with blacklisted IDs
|
||||
// eventually, we'll have a dedicated context object to hold
|
||||
// all these accumulators and caches. For now, just an IDAccumulator
|
||||
$context->id_accumulator = new HTMLPurifier_IDAccumulator();
|
||||
$context->id_accumulator->load($config->get('Attr', 'IDBlacklist'));
|
||||
|
||||
// create alias to global definition array, see also $defs
|
||||
// DEFINITION CALL
|
||||
@@ -38,17 +44,19 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
|
||||
$attr = $token->attributes;
|
||||
|
||||
// do global transformations (pre)
|
||||
// nothing currently utilizes this
|
||||
// ex. <ELEMENT lang="fr"> to <ELEMENT lang="fr" xml:lang="fr">
|
||||
// DEFINITION CALL
|
||||
foreach ($definition->info_attr_transform_pre as $transform) {
|
||||
$attr = $transform->transform($attr, $config, $context);
|
||||
$attr = $transform->transform($attr, $config);
|
||||
}
|
||||
|
||||
// do local transformations only applicable to this element (pre)
|
||||
// ex. <p align="right"> to <p style="text-align:right;">
|
||||
// DEFINITION CALL
|
||||
foreach ($definition->info[$token->name]->attr_transform_pre
|
||||
as $transform
|
||||
) {
|
||||
$attr = $transform->transform($attr, $config, $context);
|
||||
$attr = $transform->transform($attr, $config);
|
||||
}
|
||||
|
||||
// create alias to this element's attribute definition array, see
|
||||
@@ -104,23 +112,17 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
|
||||
}
|
||||
|
||||
// post transforms
|
||||
|
||||
// ex. <x lang="fr"> to <x lang="fr" xml:lang="fr">
|
||||
foreach ($definition->info_attr_transform_post as $transform) {
|
||||
$attr = $transform->transform($attr, $config, $context);
|
||||
$attr = $transform->transform($attr, $config);
|
||||
}
|
||||
|
||||
// ex. <bdo> to <bdo dir="ltr">
|
||||
foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
|
||||
$attr = $transform->transform($attr, $config, $context);
|
||||
$attr = $transform->transform($attr, $config);
|
||||
}
|
||||
|
||||
// commit changes
|
||||
// could interfere with flyweight implementation
|
||||
$tokens[$key]->attributes = $attr;
|
||||
}
|
||||
$context->destroy('IDAccumulator');
|
||||
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
|
@@ -17,10 +17,8 @@ class HTMLPurifier_TagTransform
|
||||
/**
|
||||
* Transforms the obsolete tag into the valid tag.
|
||||
* @param $tag Tag to be transformed.
|
||||
* @param $config Mandatory HTMLPurifier_Config object
|
||||
* @param $context Mandatory HTMLPurifier_Context object
|
||||
*/
|
||||
function transform($tag, $config, &$context) {
|
||||
function transform($tag) {
|
||||
trigger_error('Call to abstract function', E_USER_ERROR);
|
||||
}
|
||||
|
||||
@@ -39,7 +37,7 @@ class HTMLPurifier_TagTransform_Simple extends HTMLPurifier_TagTransform
|
||||
$this->transform_to = $transform_to;
|
||||
}
|
||||
|
||||
function transform($tag, $config, &$context) {
|
||||
function transform($tag) {
|
||||
$new_tag = $tag->copy();
|
||||
$new_tag->name = $this->transform_to;
|
||||
return $new_tag;
|
||||
@@ -57,7 +55,7 @@ class HTMLPurifier_TagTransform_Center extends HTMLPurifier_TagTransform
|
||||
{
|
||||
var $transform_to = 'div';
|
||||
|
||||
function transform($tag, $config, &$context) {
|
||||
function transform($tag) {
|
||||
if ($tag->type == 'end') {
|
||||
$new_tag = new HTMLPurifier_Token_End($this->transform_to);
|
||||
return $new_tag;
|
||||
@@ -108,7 +106,7 @@ class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
|
||||
'+4' => '300%'
|
||||
);
|
||||
|
||||
function transform($tag, $config, &$context) {
|
||||
function transform($tag) {
|
||||
|
||||
if ($tag->type == 'end') {
|
||||
$new_tag = new HTMLPurifier_Token_End($this->transform_to);
|
||||
|
@@ -12,13 +12,6 @@ class HTMLPurifier_URIScheme
|
||||
*/
|
||||
var $default_port = null;
|
||||
|
||||
/**
|
||||
* Whether or not URIs of this schem are locatable by a browser
|
||||
* http and ftp are accessible, while mailto and news are not.
|
||||
* @public
|
||||
*/
|
||||
var $browsable = false;
|
||||
|
||||
/**
|
||||
* Validates the components of a URI
|
||||
* @note This implementation should be called by children if they define
|
||||
@@ -30,10 +23,9 @@ class HTMLPurifier_URIScheme
|
||||
* @param $path Path of URI
|
||||
* @param $query Query of URI, found after question mark
|
||||
* @param $config HTMLPurifier_Config object
|
||||
* @param $context HTMLPurifier_Context object
|
||||
*/
|
||||
function validateComponents(
|
||||
$userinfo, $host, $port, $path, $query, $config, &$context
|
||||
$userinfo, $host, $port, $path, $query, $config
|
||||
) {
|
||||
if ($this->default_port == $port) $port = null;
|
||||
return array($userinfo, $host, $port, $path, $query);
|
||||
|
@@ -8,14 +8,13 @@ require_once 'HTMLPurifier/URIScheme.php';
|
||||
class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme {
|
||||
|
||||
var $default_port = 21;
|
||||
var $browsable = true; // usually
|
||||
|
||||
function validateComponents(
|
||||
$userinfo, $host, $port, $path, $query, $config, &$context
|
||||
$userinfo, $host, $port, $path, $query, $config
|
||||
) {
|
||||
list($userinfo, $host, $port, $path, $query) =
|
||||
parent::validateComponents(
|
||||
$userinfo, $host, $port, $path, $query, $config, $context );
|
||||
$userinfo, $host, $port, $path, $query, $config );
|
||||
$semicolon_pos = strrpos($path, ';'); // reverse
|
||||
if ($semicolon_pos !== false) {
|
||||
// typecode check
|
||||
|
@@ -8,14 +8,13 @@ require_once 'HTMLPurifier/URIScheme.php';
|
||||
class HTMLPurifier_URIScheme_http extends HTMLPurifier_URIScheme {
|
||||
|
||||
var $default_port = 80;
|
||||
var $browsable = true;
|
||||
|
||||
function validateComponents(
|
||||
$userinfo, $host, $port, $path, $query, $config, &$context
|
||||
$userinfo, $host, $port, $path, $query, $config
|
||||
) {
|
||||
list($userinfo, $host, $port, $path, $query) =
|
||||
parent::validateComponents(
|
||||
$userinfo, $host, $port, $path, $query, $config, $context );
|
||||
$userinfo, $host, $port, $path, $query, $config );
|
||||
return array(null, $host, $port, $path, $query);
|
||||
}
|
||||
|
||||
|
@@ -13,14 +13,12 @@ require_once 'HTMLPurifier/URIScheme.php';
|
||||
|
||||
class HTMLPurifier_URIScheme_mailto extends HTMLPurifier_URIScheme {
|
||||
|
||||
var $browsable = false;
|
||||
|
||||
function validateComponents(
|
||||
$userinfo, $host, $port, $path, $query, $config, &$context
|
||||
$userinfo, $host, $port, $path, $query, $config
|
||||
) {
|
||||
list($userinfo, $host, $port, $path, $query) =
|
||||
parent::validateComponents(
|
||||
$userinfo, $host, $port, $path, $query, $config, $context );
|
||||
$userinfo, $host, $port, $path, $query, $config );
|
||||
// we need to validate path against RFC 2368's addr-spec
|
||||
return array(null, null, null, $path, $query);
|
||||
}
|
||||
|
@@ -7,14 +7,12 @@ require_once 'HTMLPurifier/URIScheme.php';
|
||||
*/
|
||||
class HTMLPurifier_URIScheme_news extends HTMLPurifier_URIScheme {
|
||||
|
||||
var $browsable = false;
|
||||
|
||||
function validateComponents(
|
||||
$userinfo, $host, $port, $path, $query, $config, &$context
|
||||
$userinfo, $host, $port, $path, $query, $config
|
||||
) {
|
||||
list($userinfo, $host, $port, $path, $query) =
|
||||
parent::validateComponents(
|
||||
$userinfo, $host, $port, $path, $query, $config, $context );
|
||||
$userinfo, $host, $port, $path, $query, $config );
|
||||
// typecode check needed on path
|
||||
return array(null, null, null, $path, null);
|
||||
}
|
||||
|
@@ -8,14 +8,13 @@ require_once 'HTMLPurifier/URIScheme.php';
|
||||
class HTMLPurifier_URIScheme_nntp extends HTMLPurifier_URIScheme {
|
||||
|
||||
var $default_port = 119;
|
||||
var $browsable = false;
|
||||
|
||||
function validateComponents(
|
||||
$userinfo, $host, $port, $path, $query, $config, &$context
|
||||
$userinfo, $host, $port, $path, $query, $config
|
||||
) {
|
||||
list($userinfo, $host, $port, $path, $query) =
|
||||
parent::validateComponents(
|
||||
$userinfo, $host, $port, $path, $query, $config, $context );
|
||||
$userinfo, $host, $port, $path, $query, $config );
|
||||
return array(null, $host, $port, $path, null);
|
||||
}
|
||||
|
||||
|
@@ -63,9 +63,8 @@ class HTMLPurifier_URISchemeRegistry
|
||||
* Retrieves a scheme validator object
|
||||
* @param $scheme String scheme name like http or mailto
|
||||
* @param $config HTMLPurifier_Config object
|
||||
* @param $config HTMLPurifier_Context object
|
||||
*/
|
||||
function &getScheme($scheme, $config, &$context) {
|
||||
function &getScheme($scheme, $config = null) {
|
||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||
$null = null; // for the sake of passing by reference
|
||||
|
||||
|
@@ -13,7 +13,7 @@ chdir( dirname(__FILE__) );
|
||||
$entity_dir = '../docs/entities/';
|
||||
|
||||
// defines the output file for the serialized content.
|
||||
$output_file = '../library/HTMLPurifier/EntityLookup/entities.ser';
|
||||
$output_file = '../library/HTMLPurifier/EntityLookup/data.txt';
|
||||
|
||||
// courtesy of a PHP manual comment
|
||||
function unichr($dec) {
|
||||
|
@@ -1,91 +0,0 @@
|
||||
|
||||
MODx Plugin
|
||||
|
||||
MODx <http://www.modxcms.com/> is an open source PHP application framework.
|
||||
I first came across them in my referrer logs when tillda asked if anyone
|
||||
could implement an HTML Purifier plugin. This forum thread
|
||||
<http://modxcms.com/forums/index.php/topic,6604.0.html> eventually resulted
|
||||
in the fruition of this plugin that davidm says, "is on top of my favorite
|
||||
list." HTML Purifier goes great with WYSIWYG editors!
|
||||
|
||||
|
||||
|
||||
1. Credits
|
||||
|
||||
PaulGregory wrote the overall structure of the code. I added the
|
||||
slashes hack.
|
||||
|
||||
|
||||
|
||||
2. Install
|
||||
|
||||
First, you need to place HTML Purifier library somewhere. The code here
|
||||
assumes that you've placed in MODx's assets/plugins/htmlpurifier (no version
|
||||
number).
|
||||
|
||||
Log into the manager, and navigate:
|
||||
|
||||
Resources > Manage Resources > Plugins tab > New Plugin
|
||||
|
||||
Type in a name (probably HTML Purifier), and copy paste this code into the
|
||||
textarea:
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
$e = &$modx->Event;
|
||||
if ($e->name == 'OnBeforeDocFormSave') {
|
||||
global $content;
|
||||
|
||||
set_include_path('../assets/plugins/htmlpurifier/library/'
|
||||
. PATH_SEPARATOR . get_include_path());
|
||||
include_once 'HTMLPurifier.php';
|
||||
$purifier = new HTMLPurifier();
|
||||
|
||||
static $magic_quotes = null;
|
||||
if ($magic_quotes === null) {
|
||||
// this is an ugly hack because this hook hasn't
|
||||
// had the backslashes removed yet when magic_quotes_gpc is on,
|
||||
// but HTMLPurifier must not have the quotes slashed.
|
||||
$magic_quotes = get_magic_quotes_gpc();
|
||||
}
|
||||
|
||||
if ($magic_quotes) $content = stripslashes($content);
|
||||
$content = $purifier->purify($content);
|
||||
if ($magic_quotes) $content = addslashes($content);
|
||||
}
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Then navigate to the System Events tab and check "OnBeforeDocFormSave".
|
||||
Save the plugin. HTML Purifier now is integrated!
|
||||
|
||||
|
||||
|
||||
3. Making sure it works
|
||||
|
||||
You can test HTML Purifier by deliberately putting in crappy HTML and seeing
|
||||
whether or not it gets fixed. A better way is to put in something like this:
|
||||
|
||||
<p lang="fr">Il est bon</p>
|
||||
|
||||
...and seeing whether or not the content comes out as:
|
||||
|
||||
<p lang="fr" xml:lang="fr">Il est bon</p>
|
||||
|
||||
(lang to xml:lang synchronization is one of the many features HTML Purifier
|
||||
has).
|
||||
|
||||
|
||||
|
||||
4. Caveat Emptor
|
||||
|
||||
This code does not intercept save requests from the QuickEdit plugin, this may
|
||||
be added in a later version. It also modifies things on save, so there's a
|
||||
slight chance that HTML Purifier may make a boo-boo and accidently mess things
|
||||
up (the original version is not saved).
|
||||
|
||||
Finally, make sure that MODx is using UTF-8. If you are using, say, a French
|
||||
localisation, you may be using Latin-1, if that's the case, configure
|
||||
HTML Purifier properly like this:
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Core', 'Encoding', 'ISO-8859-1'); // or whatever encoding
|
||||
$purifier = new HTMLPurifier($config);
|
@@ -2,19 +2,6 @@
|
||||
|
||||
require_once('common.php');
|
||||
|
||||
function formatCode($string) {
|
||||
return
|
||||
str_replace(
|
||||
array("\t", '»', '\0(null)'),
|
||||
array('<strong>\t</strong>', '<span class="linebreak">»</span>', '<strong>\0</strong>'),
|
||||
escapeHTML(
|
||||
str_replace("\0", '\0(null)',
|
||||
wordwrap($string, 28, " »\n", true)
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
?><!DOCTYPE html
|
||||
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
@@ -22,26 +9,15 @@ function formatCode($string) {
|
||||
<head>
|
||||
<title>HTMLPurifier XSS Attacks Smoketest</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<style type="text/css">
|
||||
.scroll {overflow:auto; width:100%;}
|
||||
.even {background:#EAEAEA;}
|
||||
thead th {border-bottom:1px solid #000;}
|
||||
pre strong {color:#00C;}
|
||||
pre .linebreak {color:#AAA;font-weight:100;}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>HTMLPurifier XSS Attacks Smoketest</h1>
|
||||
<p>XSS attacks are from
|
||||
<a href="http://ha.ckers.org/xss.html">http://ha.ckers.org/xss.html</a>.</p>
|
||||
<p><strong>Caveats:</strong>
|
||||
The last segment of tests regarding blacklisted websites is not
|
||||
<p>The last segment of tests regarding blacklisted websites is not
|
||||
applicable at the moment, but when we add that functionality they'll be
|
||||
relevant. Most XSS broadcasts its presence by spawning an alert dialogue.
|
||||
The displayed code is not strictly correct, as linebreaks have been forced for
|
||||
readability. Linewraps have been marked with <tt>»</tt>. Some tests are
|
||||
omitted for your convenience. Not all control characters are displayed.</p>
|
||||
|
||||
relevant.</p>
|
||||
<p>Most of the XSS broadcasts its presence by spawning an alert dialogue.</p>
|
||||
<h2>Test</h2>
|
||||
<?php
|
||||
|
||||
@@ -51,35 +27,24 @@ $xml = simplexml_load_file('xssAttacks.xml');
|
||||
$purifier = new HTMLPurifier();
|
||||
|
||||
?>
|
||||
<table cellspacing="0" cellpadding="2">
|
||||
<!-- form is used so that we can use textareas and stay valid -->
|
||||
<form method="post" action="xssAttacks.php">
|
||||
<table>
|
||||
<thead><tr><th>Name</th><th width="30%">Raw</th><th>Output</th><th>Render</th></tr></thead>
|
||||
<tbody>
|
||||
<?php
|
||||
|
||||
$i = 0;
|
||||
foreach ($xml->attack as $attack) {
|
||||
$code = $attack->code;
|
||||
|
||||
// custom code for null byte injection tests
|
||||
if (substr($code, 0, 7) == 'perl -e') {
|
||||
$code = substr($code, $i=strpos($code, '"')+1, strrpos($code, '"') - $i);
|
||||
$code = str_replace('\0', "\0", $code);
|
||||
}
|
||||
|
||||
// disable vectors we cannot test in any meaningful way
|
||||
if ($code == 'See Below') continue; // event handlers, whitelist defeats
|
||||
if ($attack->name == 'OBJECT w/Flash 2') continue; // requires ActionScript
|
||||
if ($attack->name == 'IMG Embedded commands 2') continue; // is an HTTP response
|
||||
|
||||
// custom code for US-ASCII, which couldn't be expressed in XML without encoding
|
||||
if ($attack->name == 'US-ASCII encoding') $code = urldecode($code);
|
||||
?>
|
||||
<tr<?php if ($i++ % 2) {echo ' class="even"';} ?>>
|
||||
<tr>
|
||||
<td><?php echo escapeHTML($attack->name); ?></td>
|
||||
<td><pre><?php echo formatCode($code); ?></pre></td>
|
||||
<td><textarea readonly="readonly" cols="20" rows="2"><?php echo escapeHTML($code); ?></textarea></td>
|
||||
<?php $pure_html = $purifier->purify($code); ?>
|
||||
<td><pre><?php echo formatCode($pure_html); ?></pre></td>
|
||||
<td><div class="scroll"><?php echo $pure_html ?></div></td>
|
||||
<td><textarea readonly="readonly" cols="20" rows="2"><?php echo escapeHTML($pure_html); ?></textarea></td>
|
||||
<td><?php echo $pure_html ?></td>
|
||||
</tr>
|
||||
<?php
|
||||
}
|
||||
@@ -87,5 +52,6 @@ foreach ($xml->attack as $attack) {
|
||||
?>
|
||||
</tbody>
|
||||
</table>
|
||||
</form>
|
||||
</body>
|
||||
</html>
|
File diff suppressed because it is too large
Load Diff
@@ -23,7 +23,7 @@ class HTMLPurifier_AttrDef_CompositeTest extends HTMLPurifier_AttrDefHarness
|
||||
generate_mock_once('HTMLPurifier_AttrDef');
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$context = new HTMLPurifier_Context();
|
||||
$context = new HTMLPurifier_AttrContext();
|
||||
|
||||
// first test: value properly validates on first definition
|
||||
// so second def is never called
|
||||
|
@@ -1,16 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef/Email/SimpleCheck.php';
|
||||
require_once 'HTMLPurifier/AttrDef/EmailHarness.php';
|
||||
|
||||
class HTMLPurifier_AttrDef_Email_SimpleCheckTest
|
||||
extends HTMLPurifier_AttrDef_EmailHarness
|
||||
{
|
||||
|
||||
function setUp() {
|
||||
$this->def = new HTMLPurifier_AttrDef_Email_SimpleCheck();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -1,33 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef/Email.php';
|
||||
|
||||
class HTMLPurifier_AttrDef_EmailHarness extends HTMLPurifier_AttrDefHarness
|
||||
{
|
||||
|
||||
/**
|
||||
* Tests common email strings that are obviously pass/fail
|
||||
*/
|
||||
function testCore() {
|
||||
$this->assertDef('bob@example.com');
|
||||
$this->assertDef(' bob@example.com ', 'bob@example.com');
|
||||
$this->assertDef('bob.thebuilder@example.net');
|
||||
$this->assertDef('Bob_the_Builder-the-2nd@example.org');
|
||||
$this->assertDef('Bob%20the%20Builder@white-space.test');
|
||||
|
||||
// extended format, with real name
|
||||
//$this->assertDef('Bob%20Builder%20%3Cbobby.bob.bob@it.is.example.com%3E');
|
||||
//$this->assertDef('Bob Builder <bobby.bob.bob@it.is.example.com>');
|
||||
|
||||
// time to fail
|
||||
$this->assertDef('bob', false);
|
||||
$this->assertDef('bob@home@work', false);
|
||||
$this->assertDef('@example.com', false);
|
||||
$this->assertDef('bob@', false);
|
||||
$this->assertDef('', false);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -7,17 +7,12 @@ require_once 'HTMLPurifier/IDAccumulator.php';
|
||||
class HTMLPurifier_AttrDef_IDTest extends HTMLPurifier_AttrDefHarness
|
||||
{
|
||||
|
||||
function setUp() {
|
||||
parent::setUp();
|
||||
|
||||
$id_accumulator = new HTMLPurifier_IDAccumulator();
|
||||
$this->context->register('IDAccumulator', $id_accumulator);
|
||||
$this->def = new HTMLPurifier_AttrDef_ID();
|
||||
|
||||
}
|
||||
|
||||
function test() {
|
||||
|
||||
$this->context = new HTMLPurifier_AttrContext();
|
||||
$this->context->id_accumulator = new HTMLPurifier_IDAccumulator();
|
||||
$this->def = new HTMLPurifier_AttrDef_ID();
|
||||
|
||||
// valid ID names
|
||||
$this->assertDef('alpha');
|
||||
$this->assertDef('al_ha');
|
||||
@@ -30,52 +25,13 @@ class HTMLPurifier_AttrDef_IDTest extends HTMLPurifier_AttrDefHarness
|
||||
$this->assertDef('.asa', false);
|
||||
|
||||
// test duplicate detection
|
||||
$this->assertDef('once');
|
||||
$this->assertDef('once', false);
|
||||
$this->assertDef('a', false);
|
||||
|
||||
// valid once whitespace stripped, but needs to be amended
|
||||
$this->assertDef(' whee ', 'whee');
|
||||
|
||||
}
|
||||
|
||||
function testPrefix() {
|
||||
|
||||
$this->config->set('Attr', 'IDPrefix', 'user_');
|
||||
|
||||
$this->assertDef('alpha', 'user_alpha');
|
||||
$this->assertDef('<asa', false);
|
||||
$this->assertDef('once', 'user_once');
|
||||
$this->assertDef('once', false);
|
||||
|
||||
// if already prefixed, leave alone
|
||||
$this->assertDef('user_alas');
|
||||
$this->assertDef('user_user_alas'); // how to bypass
|
||||
|
||||
}
|
||||
|
||||
function testTwoPrefixes() {
|
||||
|
||||
$this->config->set('Attr', 'IDPrefix', 'user_');
|
||||
$this->config->set('Attr', 'IDPrefixLocal', 'story95_');
|
||||
|
||||
$this->assertDef('alpha', 'user_story95_alpha');
|
||||
$this->assertDef('<asa', false);
|
||||
$this->assertDef('once', 'user_story95_once');
|
||||
$this->assertDef('once', false);
|
||||
|
||||
$this->assertDef('user_story95_alas');
|
||||
$this->assertDef('user_alas', 'user_story95_user_alas'); // !
|
||||
|
||||
$this->config->set('Attr', 'IDPrefix', '');
|
||||
$this->assertDef('amherst'); // no affect when IDPrefix isn't set
|
||||
$this->assertError('%Attr.IDPrefixLocal cannot be used unless '.
|
||||
'%Attr.IDPrefix is set');
|
||||
// SimpleTest has a bug and throws a sprintf error
|
||||
// $this->assertNoErrors();
|
||||
$this->swallowErrors();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -4,6 +4,7 @@ require_once 'HTMLPurifier/AttrDefHarness.php';
|
||||
require_once 'HTMLPurifier/AttrDef/URI.php';
|
||||
|
||||
// WARNING: INCOMPLETE UNIT TESTS!
|
||||
// we are currently abstaining percent-encode fixing unit tests
|
||||
// we also need to test all the configuration directives defined by this class
|
||||
|
||||
// http: is returned quite often when a URL is invalid. We have to change
|
||||
@@ -82,11 +83,10 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
||||
// %5 - prematurely terminated, encode %
|
||||
// %FC - u with umlaut, correct
|
||||
// note that Apache doesn't do such fixing, rather, it just claims
|
||||
// that the browser sent a "Bad Request". See PercentEncoder.php
|
||||
// for more details
|
||||
$uri[6] = 'http://www.example.com/%56%fc%GJ%5%FC';
|
||||
$components[6] = array(null, 'www.example.com', null, '/V%FC%25GJ%255%FC', null);
|
||||
$expect_uri[6] = 'http://www.example.com/V%FC%25GJ%255%FC';
|
||||
// that the browser sent a "Bad Request".
|
||||
//$uri[6] = 'http://www.example.com/%56%fc%GJ%5%FC';
|
||||
//$components[6] = array('www.example.com', '/V%FC%25GJ%255%FC', null, null);
|
||||
//$expect_uri[6] = 'http://www.example.com/V%FC%25GJ%255%FC';
|
||||
|
||||
// test IPv4 address (behavior may vary with configuration)
|
||||
$uri[7] = 'http://192.0.34.166/';
|
||||
@@ -152,7 +152,9 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
||||
$uri[17] = 'javascript:alert("moo");';
|
||||
$expect_uri[17] = false;
|
||||
|
||||
// relative URIs - basic case
|
||||
// relative URIs
|
||||
|
||||
// test basic case
|
||||
$uri[18] = '/a/b';
|
||||
$components[18] = array(null, null, null, '/a/b', null);
|
||||
|
||||
@@ -168,10 +170,6 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
||||
|
||||
foreach ($uri as $i => $value) {
|
||||
|
||||
// the read in values
|
||||
$this->config = isset($config[$i]) ? $config[$i] : HTMLPurifier_Config::createDefault();
|
||||
$this->context = isset($context[$i]) ? $context[$i] : new HTMLPurifier_Context();
|
||||
|
||||
// setUpAssertDef
|
||||
if ( isset($components[$i]) ) {
|
||||
$this->components = $components[$i];
|
||||
@@ -189,6 +187,10 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
||||
$expect_uri[$i] = $value; // untouched
|
||||
}
|
||||
|
||||
// the read in values
|
||||
$this->config = isset($config[$i]) ? $config[$i] : null;
|
||||
$this->context = isset($context[$i]) ? $context[$i] : null;
|
||||
|
||||
$this->assertDef($value, $expect_uri[$i], true, "Test $i: %s");
|
||||
|
||||
}
|
||||
@@ -205,21 +207,20 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
||||
$fake_registry = new HTMLPurifier_URISchemeRegistryMock($this);
|
||||
$registry =& HTMLPurifier_URISchemeRegistry::instance($fake_registry);
|
||||
|
||||
// now, let's add a pseudo-scheme to the registry
|
||||
// now, let's at a pseudo-scheme to the registry
|
||||
$this->scheme =& new HTMLPurifier_URISchemeMock($this);
|
||||
|
||||
// here are the schemes we will support with overloaded mocks
|
||||
$registry->setReturnReference('getScheme', $this->scheme, array('http', $this->config, $this->context));
|
||||
$registry->setReturnReference('getScheme', $this->scheme, array('mailto', $this->config, $this->context));
|
||||
$registry->setReturnReference('getScheme', $this->scheme, array('http', $this->config));
|
||||
$registry->setReturnReference('getScheme', $this->scheme, array('mailto', $this->config));
|
||||
|
||||
// default return value is false (meaning no scheme defined: reject)
|
||||
$registry->setReturnValue('getScheme', false, array('*', $this->config, $this->context));
|
||||
$registry->setReturnValue('getScheme', false, array('*', $this->config));
|
||||
|
||||
if ($this->components === false) {
|
||||
$this->scheme->expectNever('validateComponents');
|
||||
} else {
|
||||
$this->components[] = $this->config; // append the configuration
|
||||
$this->components[] =& $this->context; // append context
|
||||
$this->scheme->setReturnValue(
|
||||
'validateComponents', $this->return_components, $this->components);
|
||||
$this->scheme->expectOnce('validateComponents', $this->components);
|
||||
@@ -233,6 +234,7 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
||||
function testIntegration() {
|
||||
|
||||
$this->def = new HTMLPurifier_AttrDef_URI();
|
||||
$this->config = $this->context = null;
|
||||
|
||||
$this->assertDef('http://www.google.com/');
|
||||
$this->assertDef('javascript:bad_stuff();', false);
|
||||
@@ -243,34 +245,6 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
||||
|
||||
}
|
||||
|
||||
function testDisableExternal() {
|
||||
|
||||
$this->def = new HTMLPurifier_AttrDef_URI();
|
||||
$this->config->set('URI', 'DisableExternal', true);
|
||||
|
||||
$this->assertDef('/foobar.txt');
|
||||
$this->assertDef('http://google.com/', false);
|
||||
$this->assertDef('http://sub.example.com/alas?foo=asd', false);
|
||||
|
||||
$this->config->set('URI', 'Host', 'sub.example.com');
|
||||
|
||||
$this->assertDef('http://sub.example.com/alas?foo=asd');
|
||||
$this->assertDef('http://example.com/teehee', false);
|
||||
$this->assertDef('http://www.example.com/#man', false);
|
||||
$this->assertDef('http://go.sub.example.com/perhaps?p=foo');
|
||||
|
||||
}
|
||||
|
||||
function testEmbeds() {
|
||||
|
||||
// embedded URI
|
||||
$this->def = new HTMLPurifier_AttrDef_URI(true);
|
||||
|
||||
$this->assertDef('http://sub.example.com/alas?foo=asd');
|
||||
$this->assertDef('mailto:foo@example.com', false);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -7,14 +7,11 @@ class HTMLPurifier_AttrDefHarness extends UnitTestCase
|
||||
var $context;
|
||||
var $config;
|
||||
|
||||
function setUp() {
|
||||
$this->config = HTMLPurifier_Config::createDefault();
|
||||
$this->context = new HTMLPurifier_Context();
|
||||
}
|
||||
|
||||
// cannot be used for accumulator
|
||||
function assertDef($string, $expect = true, $ini = false, $message = '%s') {
|
||||
// $expect can be a string or bool
|
||||
if (!$this->config) $this->config = HTMLPurifier_Config::createDefault();
|
||||
if (!$this->context) $this->context = new HTMLPurifier_AttrContext();
|
||||
if ($ini) $this->setUpAssertDef();
|
||||
$result = $this->def->validate($string, $this->config, $this->context);
|
||||
if ($expect === true) {
|
||||
|
@@ -5,24 +5,29 @@ require_once 'HTMLPurifier/AttrTransform/BdoDir.php';
|
||||
class HTMLPurifier_AttrTransform_BdoDirTest extends HTMLPurifier_AttrTransformHarness
|
||||
{
|
||||
|
||||
function setUp() {
|
||||
parent::setUp();
|
||||
$this->obj = new HTMLPurifier_AttrTransform_BdoDir();
|
||||
}
|
||||
|
||||
function test() {
|
||||
|
||||
$this->assertResult( array(), array('dir' => 'ltr') );
|
||||
$this->transform = new HTMLPurifier_AttrTransform_BdoDir();
|
||||
|
||||
$inputs = array();
|
||||
$expect = array();
|
||||
$config = array();
|
||||
|
||||
// add dir
|
||||
$inputs[0] = array();
|
||||
$expect[0] = array('dir' => 'ltr');
|
||||
|
||||
// leave existing dir alone
|
||||
$this->assertResult( array('dir' => 'rtl') );
|
||||
$inputs[1] = array('dir' => 'rtl');
|
||||
$expect[1] = array('dir' => 'rtl');
|
||||
|
||||
// use a different default
|
||||
$this->assertResult(
|
||||
array(),
|
||||
array('dir' => 'rtl'),
|
||||
array('Attr.DefaultTextDir' => 'rtl')
|
||||
);
|
||||
$config_rtl = HTMLPurifier_Config::createDefault();
|
||||
$config_rtl->set('Attr', 'DefaultTextDir', 'rtl');
|
||||
$inputs[2] = array();
|
||||
$expect[2] = array('dir' => 'rtl');
|
||||
$config[2] = $config_rtl;
|
||||
|
||||
$this->assertTransform($inputs, $expect, $config);
|
||||
|
||||
}
|
||||
|
||||
|
@@ -5,36 +5,28 @@ require_once 'HTMLPurifier/AttrTransform/ImgRequired.php';
|
||||
class HTMLPurifier_AttrTransform_ImgRequiredTest extends HTMLPurifier_AttrTransformHarness
|
||||
{
|
||||
|
||||
function setUp() {
|
||||
parent::setUp();
|
||||
$this->obj = new HTMLPurifier_AttrTransform_ImgRequired();
|
||||
}
|
||||
|
||||
function test() {
|
||||
|
||||
$this->assertResult(
|
||||
array(),
|
||||
array('src' => '', 'alt' => 'Invalid image')
|
||||
);
|
||||
$this->transform = new HTMLPurifier_AttrTransform_ImgRequired();
|
||||
|
||||
$this->assertResult(
|
||||
array(),
|
||||
array('src' => 'blank.png', 'alt' => 'Pawned!'),
|
||||
array(
|
||||
'Attr.DefaultInvalidImage' => 'blank.png',
|
||||
'Attr.DefaultInvalidImageAlt' => 'Pawned!'
|
||||
)
|
||||
);
|
||||
$inputs = $expect = $config = array();
|
||||
|
||||
$this->assertResult(
|
||||
array('src' => '/path/to/foobar.png'),
|
||||
array('src' => '/path/to/foobar.png', 'alt' => 'foobar.png')
|
||||
);
|
||||
$inputs[0] = array();
|
||||
$expect[0] = array('src' => '', 'alt' => 'Invalid image');
|
||||
|
||||
$this->assertResult(
|
||||
array('alt' => 'intrigue'),
|
||||
array('src' => '', 'alt' => 'intrigue')
|
||||
);
|
||||
$inputs[1] = array();
|
||||
$expect[1] = array('src' => 'blank.png', 'alt' => 'Pawned!');
|
||||
$config[1] = HTMLPurifier_Config::createDefault();
|
||||
$config[1]->set('Attr', 'DefaultInvalidImage', 'blank.png');
|
||||
$config[1]->set('Attr', 'DefaultInvalidImageAlt', 'Pawned!');
|
||||
|
||||
$inputs[2] = array('src' => '/path/to/foobar.png');
|
||||
$expect[2] = array('src' => '/path/to/foobar.png', 'alt' => 'foobar.png');
|
||||
|
||||
$inputs[3] = array('alt' => 'intrigue');
|
||||
$expect[3] = array('src' => '', 'alt' => 'intrigue');
|
||||
|
||||
$this->assertTransform($inputs, $expect, $config);
|
||||
|
||||
}
|
||||
|
||||
|
@@ -8,39 +8,34 @@ class HTMLPurifier_AttrTransform_LangTest
|
||||
extends HTMLPurifier_AttrTransformHarness
|
||||
{
|
||||
|
||||
function setUp() {
|
||||
parent::setUp();
|
||||
$this->obj = new HTMLPurifier_AttrTransform_Lang();
|
||||
}
|
||||
|
||||
function test() {
|
||||
|
||||
$this->transform = new HTMLPurifier_AttrTransform_Lang();
|
||||
|
||||
$inputs = array();
|
||||
$expect = array();
|
||||
|
||||
// leave non-lang'ed elements alone
|
||||
$this->assertResult(array(), true);
|
||||
$inputs[0] = array();
|
||||
$expect[0] = true;
|
||||
|
||||
// copy lang to xml:lang
|
||||
$this->assertResult(
|
||||
array('lang' => 'en'),
|
||||
array('lang' => 'en', 'xml:lang' => 'en')
|
||||
);
|
||||
$inputs[1] = array('lang' => 'en');
|
||||
$expect[1] = array('lang' => 'en', 'xml:lang' => 'en');
|
||||
|
||||
// preserve attributes
|
||||
$this->assertResult(
|
||||
array('src' => 'vert.png', 'lang' => 'fr'),
|
||||
array('src' => 'vert.png', 'lang' => 'fr', 'xml:lang' => 'fr')
|
||||
);
|
||||
$inputs[2] = array('src' => 'vert.png', 'lang' => 'fr');
|
||||
$expect[2] = array('src' => 'vert.png', 'lang' => 'fr', 'xml:lang' => 'fr');
|
||||
|
||||
// copy xml:lang to lang
|
||||
$this->assertResult(
|
||||
array('xml:lang' => 'en'),
|
||||
array('lang' => 'en', 'xml:lang' => 'en')
|
||||
);
|
||||
$inputs[3] = array('xml:lang' => 'en');
|
||||
$expect[3] = array('lang' => 'en', 'xml:lang' => 'en');
|
||||
|
||||
// both set, override lang with xml:lang
|
||||
$this->assertResult(
|
||||
array('lang' => 'fr', 'xml:lang' => 'de'),
|
||||
array('lang' => 'de', 'xml:lang' => 'de')
|
||||
);
|
||||
$inputs[4] = array('lang' => 'fr', 'xml:lang' => 'de');
|
||||
$expect[4] = array('lang' => 'de', 'xml:lang' => 'de');
|
||||
|
||||
$this->assertTransform($inputs, $expect);
|
||||
|
||||
}
|
||||
|
||||
|
@@ -5,58 +5,48 @@ require_once 'HTMLPurifier/AttrTransform/TextAlign.php';
|
||||
class HTMLPurifier_AttrTransform_TextAlignTest extends HTMLPurifier_AttrTransformHarness
|
||||
{
|
||||
|
||||
function setUp() {
|
||||
parent::setUp();
|
||||
$this->obj = new HTMLPurifier_AttrTransform_TextAlign();
|
||||
}
|
||||
|
||||
function test() {
|
||||
|
||||
$this->transform = new HTMLPurifier_AttrTransform_TextAlign();
|
||||
|
||||
$inputs = array();
|
||||
$expect = array();
|
||||
|
||||
// leave empty arrays alone
|
||||
$this->assertResult( array() );
|
||||
$inputs[0] = array();
|
||||
$expect[0] = true;
|
||||
|
||||
// leave arrays without interesting stuff alone
|
||||
$this->assertResult( array('style' => 'font-weight:bold;') );
|
||||
$inputs[1] = array('style' => 'font-weight:bold;');
|
||||
$expect[1] = true;
|
||||
|
||||
// test each of the conversions
|
||||
|
||||
$this->assertResult(
|
||||
array('align' => 'left'),
|
||||
array('style' => 'text-align:left;')
|
||||
);
|
||||
$inputs[2] = array('align' => 'left');
|
||||
$expect[2] = array('style' => 'text-align:left;');
|
||||
|
||||
$this->assertResult(
|
||||
array('align' => 'right'),
|
||||
array('style' => 'text-align:right;')
|
||||
);
|
||||
$inputs[3] = array('align' => 'right');
|
||||
$expect[3] = array('style' => 'text-align:right;');
|
||||
|
||||
$this->assertResult(
|
||||
array('align' => 'center'),
|
||||
array('style' => 'text-align:center;')
|
||||
);
|
||||
$inputs[4] = array('align' => 'center');
|
||||
$expect[4] = array('style' => 'text-align:center;');
|
||||
|
||||
$this->assertResult(
|
||||
array('align' => 'justify'),
|
||||
array('style' => 'text-align:justify;')
|
||||
);
|
||||
$inputs[5] = array('align' => 'justify');
|
||||
$expect[5] = array('style' => 'text-align:justify;');
|
||||
|
||||
// drop garbage value
|
||||
$this->assertResult(
|
||||
array('align' => 'invalid'),
|
||||
array()
|
||||
);
|
||||
$inputs[6] = array('align' => 'invalid');
|
||||
$expect[6] = array();
|
||||
|
||||
// test CSS munging
|
||||
$this->assertResult(
|
||||
array('align' => 'left', 'style' => 'font-weight:bold;'),
|
||||
array('style' => 'text-align:left;font-weight:bold;')
|
||||
);
|
||||
$inputs[7] = array('align' => 'left', 'style' => 'font-weight:bold;');
|
||||
$expect[7] = array('style' => 'text-align:left;font-weight:bold;');
|
||||
|
||||
// test case insensitivity
|
||||
$this->assertResult(
|
||||
array('align' => 'CENTER'),
|
||||
array('style' => 'text-align:center;')
|
||||
);
|
||||
$inputs[8] = array('align' => 'CENTER');
|
||||
$expect[8] = array('style' => 'text-align:center;');
|
||||
|
||||
$this->assertTransform($inputs, $expect);
|
||||
|
||||
}
|
||||
|
||||
|
@@ -1,12 +1,18 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/Harness.php';
|
||||
|
||||
class HTMLPurifier_AttrTransformHarness extends HTMLPurifier_Harness
|
||||
class HTMLPurifier_AttrTransformHarness extends UnitTestCase
|
||||
{
|
||||
|
||||
function setUp() {
|
||||
$this->func = 'transform';
|
||||
var $transform;
|
||||
|
||||
function assertTransform($inputs, $expect, $config = array()) {
|
||||
$default_config = HTMLPurifier_Config::createDefault();
|
||||
foreach ($inputs as $i => $input) {
|
||||
if (!isset($config[$i])) $config[$i] = $default_config;
|
||||
$result = $this->transform->transform($input, $config[$i]);
|
||||
if ($expect[$i] === true) $expect[$i] = $input;
|
||||
$this->assertEqual($expect[$i], $result, "Test $i: %s");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -1,78 +1,122 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/Harness.php';
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef.php';
|
||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||
require_once 'HTMLPurifier/Generator.php';
|
||||
|
||||
class HTMLPurifier_ChildDefTest extends HTMLPurifier_Harness
|
||||
class HTMLPurifier_ChildDefTest extends UnitTestCase
|
||||
{
|
||||
|
||||
function setUp() {
|
||||
$this->obj = null;
|
||||
$this->func = 'validateChildren';
|
||||
$this->to_tokens = true;
|
||||
$this->to_html = true;
|
||||
var $def;
|
||||
var $lex;
|
||||
var $gen;
|
||||
|
||||
function HTMLPurifier_ChildDefTest() {
|
||||
// it is vital that the tags be treated as literally as possible
|
||||
$this->lex = new HTMLPurifier_Lexer_DirectLex();
|
||||
$this->gen = new HTMLPurifier_Generator();
|
||||
parent::UnitTestCase();
|
||||
}
|
||||
|
||||
function assertSeries($inputs, $expect, $config, $context = array()) {
|
||||
foreach ($inputs as $i => $input) {
|
||||
|
||||
if (!isset($context[$i])) {
|
||||
$context[$i] = null;
|
||||
}
|
||||
if (!isset($config[$i])) {
|
||||
$config[$i] = HTMLPurifier_Config::createDefault();
|
||||
}
|
||||
|
||||
$tokens = $this->lex->tokenizeHTML($input, $config[$i]);
|
||||
$result = $this->def->validateChildren($tokens, $config[$i], $context[$i]);
|
||||
|
||||
if (is_bool($expect[$i])) {
|
||||
$this->assertIdentical($expect[$i], $result, "Test $i: %s");
|
||||
} else {
|
||||
$result_html = $this->gen->generateFromTokens($result, $config[$i]);
|
||||
$this->assertIdentical($expect[$i], $result_html, "Test $i: %s");
|
||||
paintIf($result_html, $result_html != $expect[$i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function test_custom() {
|
||||
|
||||
$this->obj = new HTMLPurifier_ChildDef_Custom('(a,b?,c*,d+,(a,b)*)');
|
||||
$this->def = new HTMLPurifier_ChildDef_Custom(
|
||||
'(a, b?, c*, d+, (a, b)*)');
|
||||
|
||||
$this->assertResult('', false);
|
||||
$this->assertResult('<a /><a />', false);
|
||||
$inputs = array();
|
||||
$expect = array();
|
||||
$config = array();
|
||||
|
||||
$this->assertResult('<a /><b /><c /><d /><a /><b />');
|
||||
$this->assertResult('<a /><d>Dob</d><a /><b>foo</b>'.
|
||||
'<a href="moo" /><b>foo</b>');
|
||||
$inputs[0] = '';
|
||||
$expect[0] = false;
|
||||
|
||||
$inputs[1] = '<a /><b /><c /><d /><a /><b />';
|
||||
$expect[1] = true;
|
||||
|
||||
$inputs[2] = '<a /><d>Dob</d><a /><b>foo</b><a href="moo" /><b>foo</b>';
|
||||
$expect[2] = true;
|
||||
|
||||
$inputs[3] = '<a /><a />';
|
||||
$expect[3] = false;
|
||||
|
||||
$this->assertSeries($inputs, $expect, $config);
|
||||
}
|
||||
|
||||
function test_table() {
|
||||
|
||||
// currently inactive, awaiting augmentation
|
||||
|
||||
// the table definition
|
||||
$this->obj = new HTMLPurifier_ChildDef_Table();
|
||||
$this->def = new HTMLPurifier_ChildDef_Table();
|
||||
|
||||
$inputs = $expect = $config = array();
|
||||
|
||||
$this->assertResult('', false);
|
||||
$inputs[0] = '';
|
||||
$expect[0] = false;
|
||||
|
||||
// we're using empty tags to compact the tests: under real circumstances
|
||||
// there would be contents in them
|
||||
|
||||
$this->assertResult('<tr />');
|
||||
$this->assertResult('<caption /><col /><thead /><tfoot /><tbody>'.
|
||||
'<tr><td>asdf</td></tr></tbody>');
|
||||
$this->assertResult('<col /><col /><col /><tr />');
|
||||
$inputs[1] = '<tr />';
|
||||
$expect[1] = true;
|
||||
|
||||
$inputs[2] = '<caption /><col /><thead /><tfoot /><tbody>'.
|
||||
'<tr><td>asdf</td></tr></tbody>';
|
||||
$expect[2] = true;
|
||||
|
||||
$inputs[3] = '<col /><col /><col /><tr />';
|
||||
$expect[3] = true;
|
||||
|
||||
// mixed up order
|
||||
$this->assertResult(
|
||||
'<col /><colgroup /><tbody /><tfoot /><thead /><tr>1</tr><caption /><tr />',
|
||||
'<caption /><col /><colgroup /><thead /><tfoot /><tbody /><tr>1</tr><tr />');
|
||||
$inputs[4] = '<col /><colgroup /><tbody /><tfoot /><thead /><tr>1</tr><caption /><tr />';
|
||||
$expect[4] = '<caption /><col /><colgroup /><thead /><tfoot /><tbody /><tr>1</tr><tr />';
|
||||
|
||||
// duplicates of singles
|
||||
// - first caption serves
|
||||
// - trailing tfoots/theads get turned into tbodys
|
||||
$this->assertResult(
|
||||
'<caption>1</caption><caption /><tbody /><tbody /><tfoot>1</tfoot><tfoot />',
|
||||
'<caption>1</caption><tfoot>1</tfoot><tbody /><tbody /><tbody />'
|
||||
);
|
||||
$inputs[5] = '<caption>1</caption><caption /><tbody /><tbody /><tfoot>1</tfoot><tfoot />';
|
||||
$expect[5] = '<caption>1</caption><tfoot>1</tfoot><tbody /><tbody /><tbody />';
|
||||
|
||||
// errant text dropped (until bubbling is implemented)
|
||||
$this->assertResult('foo', false);
|
||||
$inputs[6] = 'foo';
|
||||
$expect[6] = false;
|
||||
|
||||
// whitespace sticks to the previous element, last whitespace is
|
||||
// stationary
|
||||
$this->assertResult("\n <tr />\n <tr />\n ");
|
||||
$this->assertResult(
|
||||
"\n\t<tbody />\n\t\t<tfoot />\n\t\t\t",
|
||||
"\n\t\t<tfoot />\n\t<tbody />\n\t\t\t"
|
||||
);
|
||||
$inputs[7] = "\n <tr />\n <tr />\n ";
|
||||
$expect[7] = true;
|
||||
|
||||
$inputs[8] = "\n\t<tbody />\n\t\t<tfoot />\n\t\t\t";
|
||||
$expect[8] = "\n\t\t<tfoot />\n\t<tbody />\n\t\t\t";
|
||||
|
||||
$this->assertSeries($inputs, $expect, $config);
|
||||
|
||||
}
|
||||
|
||||
function testParsing() {
|
||||
function test_parsing() {
|
||||
|
||||
$def = new HTMLPurifier_ChildDef_Required('foobar | bang |gizmo');
|
||||
$this->assertEqual($def->elements,
|
||||
@@ -88,78 +132,92 @@ class HTMLPurifier_ChildDefTest extends HTMLPurifier_Harness
|
||||
'href' => true
|
||||
,'src' => true
|
||||
));
|
||||
|
||||
}
|
||||
|
||||
function test_required_pcdata_forbidden() {
|
||||
|
||||
$this->obj = new HTMLPurifier_ChildDef_Required('dt | dd');
|
||||
$this->def = new HTMLPurifier_ChildDef_Required('dt | dd');
|
||||
$inputs = $expect = $config = array();
|
||||
|
||||
$this->assertResult('', false);
|
||||
$this->assertResult(
|
||||
'<dt>Term</dt>Text in an illegal location'.
|
||||
'<dd>Definition</dd><b>Illegal tag</b>',
|
||||
'<dt>Term</dt><dd>Definition</dd>');
|
||||
$this->assertResult('How do you do!', false);
|
||||
$inputs[0] = '';
|
||||
$expect[0] = false;
|
||||
|
||||
$inputs[1] = '<dt>Term</dt>Text in an illegal location'.
|
||||
'<dd>Definition</dd><b>Illegal tag</b>';
|
||||
|
||||
$expect[1] = '<dt>Term</dt><dd>Definition</dd>';
|
||||
|
||||
$inputs[2] = 'How do you do!';
|
||||
$expect[2] = false;
|
||||
|
||||
// whitespace shouldn't trigger it
|
||||
$this->assertResult("\n<dd>Definition</dd> ");
|
||||
$inputs[3] = "\n<dd>Definition</dd> ";
|
||||
$expect[3] = true;
|
||||
|
||||
$this->assertResult(
|
||||
'<dd>Definition</dd> <b></b> ',
|
||||
'<dd>Definition</dd> '
|
||||
);
|
||||
$this->assertResult("\t ", false);
|
||||
$inputs[4] ='<dd>Definition</dd> <b></b> ';
|
||||
$expect[4] = '<dd>Definition</dd> ';
|
||||
|
||||
$inputs[5] = "\t ";
|
||||
$expect[5] = false;
|
||||
|
||||
$this->assertSeries($inputs, $expect, $config);
|
||||
|
||||
}
|
||||
|
||||
function test_required_pcdata_allowed() {
|
||||
$this->def = new HTMLPurifier_ChildDef_Required('#PCDATA | b');
|
||||
|
||||
$this->obj = new HTMLPurifier_ChildDef_Required('#PCDATA | b');
|
||||
$inputs = $expect = $config = array();
|
||||
|
||||
$this->assertResult('<b>Bold text</b><img />', '<b>Bold text</b>');
|
||||
$inputs[0] = '<b>Bold text</b><img />';
|
||||
$expect[0] = '<b>Bold text</b>';
|
||||
|
||||
// with child escaping on
|
||||
$this->assertResult(
|
||||
'<b>Bold text</b><img />',
|
||||
'<b>Bold text</b><img />',
|
||||
array(
|
||||
'Core.EscapeInvalidChildren' => true
|
||||
)
|
||||
);
|
||||
$inputs[1] = '<b>Bold text</b><img />';
|
||||
$expect[1] = '<b>Bold text</b><img />';
|
||||
$config[1] = HTMLPurifier_Config::createDefault();
|
||||
$config[1]->set('Core', 'EscapeInvalidChildren', true);
|
||||
|
||||
$this->assertSeries($inputs, $expect, $config);
|
||||
}
|
||||
|
||||
function test_optional() {
|
||||
$this->def = new HTMLPurifier_ChildDef_Optional('b | i');
|
||||
|
||||
$this->obj = new HTMLPurifier_ChildDef_Optional('b | i');
|
||||
$inputs = $expect = $config = array();
|
||||
|
||||
$this->assertResult('<b>Bold text</b><img />', '<b>Bold text</b>');
|
||||
$this->assertResult('Not allowed text', '');
|
||||
$inputs[0] = '<b>Bold text</b><img />';
|
||||
$expect[0] = '<b>Bold text</b>';
|
||||
|
||||
$inputs[1] = 'Not allowed text';
|
||||
$expect[1] = '';
|
||||
|
||||
$this->assertSeries($inputs, $expect, $config);
|
||||
}
|
||||
|
||||
function test_chameleon() {
|
||||
|
||||
$this->obj = new HTMLPurifier_ChildDef_Chameleon(
|
||||
'b | i', // allowed only when in inline context
|
||||
$this->def = new HTMLPurifier_ChildDef_Chameleon(
|
||||
'b | i', // allowed only when in inline context
|
||||
'b | i | div' // allowed only when in block context
|
||||
);
|
||||
|
||||
$this->assertResult(
|
||||
'<b>Allowed.</b>', true,
|
||||
array(), array('ParentType' => 'inline')
|
||||
);
|
||||
$inputs = $expect = $config = array();
|
||||
$context = array();
|
||||
|
||||
$this->assertResult(
|
||||
'<div>Not allowed.</div>', '',
|
||||
array(), array('ParentType' => 'inline')
|
||||
);
|
||||
$inputs[0] = '<b>Allowed.</b>';
|
||||
$expect[0] = true;
|
||||
$context[0] = 'inline';
|
||||
|
||||
$this->assertResult(
|
||||
'<div>Allowed.</div>', true,
|
||||
array(), array('ParentType' => 'block')
|
||||
);
|
||||
$inputs[1] = '<div>Not allowed.</div>';
|
||||
$expect[1] = '';
|
||||
$context[1] = 'inline';
|
||||
|
||||
$inputs[2] = '<div>Allowed.</div>';
|
||||
$expect[2] = true;
|
||||
$context[2] = 'block';
|
||||
|
||||
$this->assertSeries($inputs, $expect, $config, $context);
|
||||
|
||||
}
|
||||
|
||||
|
@@ -77,6 +77,7 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase
|
||||
);
|
||||
$this->assertError('Cannot define directive for undefined namespace');
|
||||
$this->assertNoErrors();
|
||||
$this->swallowErrors();
|
||||
|
||||
|
||||
|
||||
@@ -103,6 +104,7 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase
|
||||
);
|
||||
$this->assertError('Inconsistent default or type, cannot redefine');
|
||||
$this->assertNoErrors();
|
||||
$this->swallowErrors();
|
||||
|
||||
|
||||
|
||||
@@ -148,6 +150,7 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase
|
||||
);
|
||||
$this->assertError('Cannot define allowed values for undefined directive');
|
||||
$this->assertNoErrors();
|
||||
$this->swallowErrors();
|
||||
|
||||
|
||||
|
||||
@@ -189,6 +192,7 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase
|
||||
);
|
||||
$this->assertError('Cannot define alias to value that is not allowed');
|
||||
$this->assertNoErrors();
|
||||
$this->swallowErrors();
|
||||
|
||||
|
||||
|
||||
@@ -200,6 +204,7 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase
|
||||
);
|
||||
$this->assertError('Cannot define alias over allowed value');
|
||||
$this->assertNoErrors();
|
||||
$this->swallowErrors();
|
||||
|
||||
|
||||
|
||||
@@ -211,6 +216,7 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase
|
||||
|
||||
$this->assertError('Invalid type for configuration directive');
|
||||
$this->assertNoErrors();
|
||||
$this->swallowErrors();
|
||||
|
||||
|
||||
|
||||
@@ -222,16 +228,7 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase
|
||||
|
||||
$this->assertError('Default value does not match directive type');
|
||||
$this->assertNoErrors();
|
||||
|
||||
|
||||
|
||||
// define a directive that allows null
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'Foobaz', null, 'string/null',
|
||||
'Nulls are allowed if you add on /null, cool huh?'
|
||||
);
|
||||
|
||||
$this->assertNoErrors();
|
||||
$this->swallowErrors();
|
||||
|
||||
|
||||
// define a directive with bad characters
|
||||
@@ -242,6 +239,7 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase
|
||||
|
||||
$this->assertError('Directive name must be alphanumeric');
|
||||
$this->assertNoErrors();
|
||||
$this->swallowErrors();
|
||||
|
||||
// define a namespace with bad characters
|
||||
HTMLPurifier_ConfigSchema::defineNamespace(
|
||||
@@ -250,6 +248,7 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase
|
||||
|
||||
$this->assertError('Namespace name must be alphanumeric');
|
||||
$this->assertNoErrors();
|
||||
$this->swallowErrors();
|
||||
|
||||
}
|
||||
|
||||
@@ -259,11 +258,7 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase
|
||||
}
|
||||
|
||||
function assertInvalid($var, $type) {
|
||||
$this->assertTrue(
|
||||
$this->our_copy->isError(
|
||||
$this->our_copy->validate($var, $type)
|
||||
)
|
||||
);
|
||||
$this->assertIdentical($this->our_copy->validate($var, $type), null);
|
||||
}
|
||||
|
||||
function testValidate() {
|
||||
@@ -276,7 +271,6 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase
|
||||
$this->assertValid(0, 'bool', false);
|
||||
$this->assertValid(1, 'bool', true);
|
||||
$this->assertInvalid(34, 'bool');
|
||||
$this->assertInvalid(null, 'bool');
|
||||
$this->assertValid(array('1', '2', '3'), 'list');
|
||||
$this->assertValid(array('1' => true, '2' => true), 'lookup');
|
||||
$this->assertValid(array('1', '2'), 'lookup', array('1' => true, '2' => true));
|
||||
@@ -287,22 +281,6 @@ class HTMLPurifier_ConfigSchemaTest extends UnitTestCase
|
||||
|
||||
}
|
||||
|
||||
function testValidate_null() {
|
||||
|
||||
$this->assertTrue(
|
||||
$this->our_copy->isError(
|
||||
$this->our_copy->validate(null, 'string', false)
|
||||
)
|
||||
);
|
||||
|
||||
$this->assertFalse(
|
||||
$this->our_copy->isError(
|
||||
$this->our_copy->validate(null, 'string', true)
|
||||
)
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
function assertMungeFilename($oldname, $newname) {
|
||||
$this->assertIdentical(
|
||||
$this->our_copy->mungeFilename($oldname),
|
||||
|
@@ -37,10 +37,6 @@ class HTMLPurifier_ConfigTest extends UnitTestCase
|
||||
'Core', 'Encoding', 'utf-8', 'istring', 'Case insensitivity!'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Extension', 'CanBeNull', null, 'string/null', 'Null or string!'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||
'Extension', 'Pert', array('foo', 'moo')
|
||||
);
|
||||
@@ -66,21 +62,25 @@ class HTMLPurifier_ConfigTest extends UnitTestCase
|
||||
$config->get('Core', 'NotDefined');
|
||||
$this->assertError('Cannot retrieve value of undefined directive');
|
||||
$this->assertNoErrors();
|
||||
$this->swallowErrors();
|
||||
|
||||
// try to set undefined value
|
||||
$config->set('Foobar', 'Key', 'foobar');
|
||||
$this->assertError('Cannot set undefined directive to value');
|
||||
$this->assertNoErrors();
|
||||
$this->swallowErrors();
|
||||
|
||||
// try to set not allowed value
|
||||
$config->set('Extension', 'Pert', 'wizard');
|
||||
$this->assertError('Value not supported');
|
||||
$this->assertNoErrors();
|
||||
$this->swallowErrors();
|
||||
|
||||
// try to set not allowed value
|
||||
$config->set('Extension', 'Pert', 34);
|
||||
$this->assertError('Value is of invalid type');
|
||||
$this->assertNoErrors();
|
||||
$this->swallowErrors();
|
||||
|
||||
// set aliased value
|
||||
$config->set('Extension', 'Pert', 'cow');
|
||||
@@ -92,29 +92,10 @@ class HTMLPurifier_ConfigTest extends UnitTestCase
|
||||
$this->assertNoErrors();
|
||||
$this->assertIdentical($config->get('Core', 'Encoding'), 'iso-8859-1');
|
||||
|
||||
// set null to directive that allows null
|
||||
$config->set('Extension', 'CanBeNull', null);
|
||||
$this->assertNoErrors();
|
||||
$this->assertIdentical($config->get('Extension', 'CanBeNull'), null);
|
||||
|
||||
$config->set('Extension', 'CanBeNull', 'foobar');
|
||||
$this->assertNoErrors();
|
||||
$this->assertIdentical($config->get('Extension', 'CanBeNull'), 'foobar');
|
||||
|
||||
// set null to directive that doesn't allow null
|
||||
$config->set('Extension', 'Pert', null);
|
||||
$this->assertError('Value is of invalid type');
|
||||
$this->assertNoErrors();
|
||||
|
||||
}
|
||||
|
||||
function test_getDefinition() {
|
||||
|
||||
// we actually want to use the old copy, because the definition
|
||||
// generation routines have dependencies on configuration values
|
||||
|
||||
$this->old_copy = HTMLPurifier_ConfigSchema::instance($this->old_copy);
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$def = $config->getHTMLDefinition();
|
||||
$this->assertIsA($def, 'HTMLPurifier_HTMLDefinition');
|
||||
@@ -124,48 +105,6 @@ class HTMLPurifier_ConfigTest extends UnitTestCase
|
||||
|
||||
}
|
||||
|
||||
function test_loadArray() {
|
||||
// setup a few dummy namespaces/directives for our testing
|
||||
HTMLPurifier_ConfigSchema::defineNamespace('Zoo', 'Animals we have.');
|
||||
HTMLPurifier_ConfigSchema::define('Zoo', 'Aadvark', 0, 'int', 'Have?');
|
||||
HTMLPurifier_ConfigSchema::define('Zoo', 'Boar', 0, 'int', 'Have?');
|
||||
HTMLPurifier_ConfigSchema::define('Zoo', 'Camel', 0, 'int', 'Have?');
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Zoo', 'Others', array(), 'list', 'Other animals we have one of.'
|
||||
);
|
||||
|
||||
$config_manual = HTMLPurifier_Config::createDefault();
|
||||
$config_loadabbr = HTMLPurifier_Config::createDefault();
|
||||
$config_loadfull = HTMLPurifier_Config::createDefault();
|
||||
|
||||
$config_manual->set('Zoo', 'Aadvark', 3);
|
||||
$config_manual->set('Zoo', 'Boar', 5);
|
||||
$config_manual->set('Zoo', 'Camel', 2000); // that's a lotta camels!
|
||||
$config_manual->set('Zoo', 'Others', array('Peacock', 'Dodo')); // wtf!
|
||||
|
||||
// condensed form
|
||||
$config_loadabbr->loadArray(array(
|
||||
'Zoo.Aadvark' => 3,
|
||||
'Zoo.Boar' => 5,
|
||||
'Zoo.Camel' => 2000,
|
||||
'Zoo.Others' => array('Peacock', 'Dodo')
|
||||
));
|
||||
|
||||
// fully expanded form
|
||||
$config_loadfull->loadArray(array(
|
||||
'Zoo' => array(
|
||||
'Aadvark' => 3,
|
||||
'Boar' => 5,
|
||||
'Camel' => 2000,
|
||||
'Others' => array('Peacock', 'Dodo')
|
||||
)
|
||||
));
|
||||
|
||||
$this->assertEqual($config_manual, $config_loadabbr);
|
||||
$this->assertEqual($config_manual, $config_loadfull);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -1,89 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/Context.php';
|
||||
|
||||
// mocks
|
||||
require_once 'HTMLPurifier/IDAccumulator.php';
|
||||
|
||||
class HTMLPurifier_ContextTest extends UnitTestCase
|
||||
{
|
||||
|
||||
var $context;
|
||||
|
||||
function setUp() {
|
||||
$this->context = new HTMLPurifier_Context();
|
||||
}
|
||||
|
||||
function testStandardUsage() {
|
||||
|
||||
generate_mock_once('HTMLPurifier_IDAccumulator');
|
||||
|
||||
$this->assertFalse($this->context->exists('IDAccumulator'));
|
||||
|
||||
$accumulator =& new HTMLPurifier_IDAccumulatorMock($this);
|
||||
$this->context->register('IDAccumulator', $accumulator);
|
||||
$this->assertTrue($this->context->exists('IDAccumulator'));
|
||||
|
||||
$accumulator_2 =& $this->context->get('IDAccumulator');
|
||||
$this->assertReference($accumulator, $accumulator_2);
|
||||
|
||||
$this->context->destroy('IDAccumulator');
|
||||
$this->assertFalse($this->context->exists('IDAccumulator'));
|
||||
$accumulator_3 =& $this->context->get('IDAccumulator');
|
||||
$this->assertError('Attempted to retrieve non-existent variable');
|
||||
$this->assertNull($accumulator_3);
|
||||
|
||||
$this->context->destroy('IDAccumulator');
|
||||
$this->assertError('Attempted to destroy non-existent variable');
|
||||
|
||||
}
|
||||
|
||||
function testReRegister() {
|
||||
|
||||
$var = true;
|
||||
$this->context->register('OnceOnly', $var);
|
||||
$this->assertNoErrors();
|
||||
|
||||
$this->context->register('OnceOnly', $var);
|
||||
$this->assertError('Name collision, cannot re-register');
|
||||
|
||||
// destroy it, now registration is okay
|
||||
$this->context->destroy('OnceOnly');
|
||||
$this->context->register('OnceOnly', $var);
|
||||
$this->assertNoErrors();
|
||||
|
||||
}
|
||||
|
||||
function test_loadArray() {
|
||||
|
||||
// references can be *really* wonky!
|
||||
|
||||
$context_manual = new HTMLPurifier_Context();
|
||||
$context_load = new HTMLPurifier_Context();
|
||||
|
||||
$var1 = 1;
|
||||
$var2 = 2;
|
||||
|
||||
$context_manual->register('var1', $var1);
|
||||
$context_manual->register('var2', $var2);
|
||||
|
||||
// you MUST set up the references when constructing the array,
|
||||
// otherwise the registered version will be a copy
|
||||
$array = array(
|
||||
'var1' => &$var1,
|
||||
'var2' => &$var2
|
||||
);
|
||||
|
||||
$context_load->loadArray($array);
|
||||
$this->assertIdentical($context_manual, $context_load);
|
||||
|
||||
$var1 = 10;
|
||||
$var2 = 20;
|
||||
|
||||
$this->assertIdentical($context_manual, $context_load);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -31,11 +31,10 @@ class HTMLPurifier_EncoderTest extends UnitTestCase
|
||||
|
||||
function test_convertToUTF8() {
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$context = new HTMLPurifier_Context();
|
||||
|
||||
// UTF-8 means that we don't touch it
|
||||
$this->assertIdentical(
|
||||
$this->Encoder->convertToUTF8("\xF6", $config, $context),
|
||||
$this->Encoder->convertToUTF8("\xF6", $config),
|
||||
"\xF6" // this is invalid
|
||||
);
|
||||
$this->assertNoErrors();
|
||||
@@ -44,14 +43,14 @@ class HTMLPurifier_EncoderTest extends UnitTestCase
|
||||
|
||||
// Now it gets converted
|
||||
$this->assertIdentical(
|
||||
$this->Encoder->convertToUTF8("\xF6", $config, $context),
|
||||
$this->Encoder->convertToUTF8("\xF6", $config),
|
||||
"\xC3\xB6"
|
||||
);
|
||||
|
||||
$config->set('Test', 'ForceNoIconv', true);
|
||||
|
||||
$this->assertIdentical(
|
||||
$this->Encoder->convertToUTF8("\xF6", $config, $context),
|
||||
$this->Encoder->convertToUTF8("\xF6", $config),
|
||||
"\xC3\xB6"
|
||||
);
|
||||
|
||||
@@ -59,11 +58,10 @@ class HTMLPurifier_EncoderTest extends UnitTestCase
|
||||
|
||||
function test_convertFromUTF8() {
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$context = new HTMLPurifier_Context();
|
||||
|
||||
// UTF-8 means that we don't touch it
|
||||
$this->assertIdentical(
|
||||
$this->Encoder->convertFromUTF8("\xC3\xB6", $config, $context),
|
||||
$this->Encoder->convertFromUTF8("\xC3\xB6", $config),
|
||||
"\xC3\xB6"
|
||||
);
|
||||
|
||||
@@ -71,14 +69,14 @@ class HTMLPurifier_EncoderTest extends UnitTestCase
|
||||
|
||||
// Now it gets converted
|
||||
$this->assertIdentical(
|
||||
$this->Encoder->convertFromUTF8("\xC3\xB6", $config, $context),
|
||||
$this->Encoder->convertFromUTF8("\xC3\xB6", $config),
|
||||
"\xF6"
|
||||
);
|
||||
|
||||
$config->set('Test', 'ForceNoIconv', true);
|
||||
|
||||
$this->assertIdentical(
|
||||
$this->Encoder->convertFromUTF8("\xC3\xB6", $config, $context),
|
||||
$this->Encoder->convertFromUTF8("\xC3\xB6", $config),
|
||||
"\xF6"
|
||||
);
|
||||
|
||||
|
@@ -3,9 +3,7 @@
|
||||
require_once 'HTMLPurifier/Generator.php';
|
||||
require_once 'HTMLPurifier/EntityLookup.php';
|
||||
|
||||
require_once 'HTMLPurifier/Harness.php';
|
||||
|
||||
class HTMLPurifier_GeneratorTest extends HTMLPurifier_Harness
|
||||
class HTMLPurifier_GeneratorTest extends UnitTestCase
|
||||
{
|
||||
|
||||
var $gen;
|
||||
@@ -17,16 +15,11 @@ class HTMLPurifier_GeneratorTest extends HTMLPurifier_Harness
|
||||
$this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
|
||||
}
|
||||
|
||||
function setUp() {
|
||||
$this->obj = new HTMLPurifier_Generator();
|
||||
$this->func = null;
|
||||
$this->to_tokens = false;
|
||||
$this->to_html = false;
|
||||
}
|
||||
|
||||
function test_generateFromToken() {
|
||||
|
||||
$inputs = $expect = array();
|
||||
$inputs = array();
|
||||
$expect = array();
|
||||
$config = array();
|
||||
|
||||
$inputs[0] = new HTMLPurifier_Token_Text('Foobar.<>');
|
||||
$expect[0] = 'Foobar.<>';
|
||||
@@ -60,7 +53,7 @@ class HTMLPurifier_GeneratorTest extends HTMLPurifier_Harness
|
||||
$expect[7] = $theta_char;
|
||||
|
||||
foreach ($inputs as $i => $input) {
|
||||
$result = $this->obj->generateFromToken($input);
|
||||
$result = $this->gen->generateFromToken($input);
|
||||
$this->assertEqual($result, $expect[$i]);
|
||||
paintIf($result, $result != $expect[$i]);
|
||||
}
|
||||
@@ -69,7 +62,9 @@ class HTMLPurifier_GeneratorTest extends HTMLPurifier_Harness
|
||||
|
||||
function test_generateAttributes() {
|
||||
|
||||
$inputs = $expect = array();
|
||||
$inputs = array();
|
||||
$expect = array();
|
||||
$config = array();
|
||||
|
||||
$inputs[0] = array();
|
||||
$expect[0] = '';
|
||||
@@ -88,8 +83,10 @@ class HTMLPurifier_GeneratorTest extends HTMLPurifier_Harness
|
||||
$inputs[4] = array('title' => 'Theta is ' . $theta_char);
|
||||
$expect[4] = 'title="Theta is ' . $theta_char . '"';
|
||||
|
||||
$default_config = HTMLPurifier_Config::createDefault();
|
||||
foreach ($inputs as $i => $input) {
|
||||
$result = $this->obj->generateAttributes($input);
|
||||
if (!isset($config[$i])) $config[$i] = $default_config;
|
||||
$result = $this->gen->generateAttributes($input, $config[$i]);
|
||||
$this->assertEqual($result, $expect[$i]);
|
||||
paintIf($result, $result != $expect[$i]);
|
||||
}
|
||||
@@ -98,26 +95,34 @@ class HTMLPurifier_GeneratorTest extends HTMLPurifier_Harness
|
||||
|
||||
function test_generateFromTokens() {
|
||||
|
||||
$this->func = 'generateFromTokens';
|
||||
$inputs = array();
|
||||
$expect = array();
|
||||
$config = array();
|
||||
|
||||
$this->assertResult(
|
||||
array(
|
||||
new HTMLPurifier_Token_Start('b'),
|
||||
new HTMLPurifier_Token_Text('Foobar!'),
|
||||
new HTMLPurifier_Token_End('b')
|
||||
),
|
||||
'<b>Foobar!</b>'
|
||||
);
|
||||
$inputs[0] = array(
|
||||
new HTMLPurifier_Token_Start('b'),
|
||||
new HTMLPurifier_Token_Text('Foobar!'),
|
||||
new HTMLPurifier_Token_End('b')
|
||||
);
|
||||
$expect[0] = '<b>Foobar!</b>';
|
||||
|
||||
$inputs[1] = array();
|
||||
$expect[1] = '';
|
||||
|
||||
$default_config = HTMLPurifier_Config::createDefault();
|
||||
foreach ($inputs as $i => $input) {
|
||||
if (!isset($config[$i])) $config[$i] = $default_config;
|
||||
$result = $this->gen->generateFromTokens($input, $config[$i]);
|
||||
$this->assertEqual($expect[$i], $result);
|
||||
paintIf($result, $result != $expect[$i]);
|
||||
}
|
||||
|
||||
$this->assertResult(array(), '');
|
||||
|
||||
}
|
||||
|
||||
var $config;
|
||||
function assertGeneration($tokens, $expect) {
|
||||
$context = new HTMLPurifier_Context();
|
||||
$result = $this->gen->generateFromTokens(
|
||||
$tokens, $this->config, $context);
|
||||
$result = $this->gen->generateFromTokens($tokens, $this->config);
|
||||
// normalized newlines, this probably should be put somewhere else
|
||||
$result = str_replace("\r\n", "\n", $result);
|
||||
$result = str_replace("\r", "\n", $result);
|
||||
|
@@ -1,106 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||
|
||||
/**
|
||||
* General-purpose test-harness that makes testing functions that require
|
||||
* configuration and context objects easier when those two parameters are
|
||||
* meaningless. See HTMLPurifier_ChildDefTest for a good example of usage.
|
||||
*/
|
||||
class HTMLPurifier_Harness extends UnitTestCase
|
||||
{
|
||||
|
||||
/**
|
||||
* Instance of the object that will execute the method
|
||||
*/
|
||||
var $obj;
|
||||
|
||||
/**
|
||||
* Name of the function to be executed
|
||||
*/
|
||||
var $func;
|
||||
|
||||
/**
|
||||
* Whether or not the method deals in tokens. If set to true, assertResult()
|
||||
* will transparently convert HTML to and back from tokens.
|
||||
*/
|
||||
var $to_tokens = false;
|
||||
|
||||
/**
|
||||
* Whether or not to convert tokens back into HTML before performing
|
||||
* equality check, has no effect on bools.
|
||||
*/
|
||||
var $to_html = false;
|
||||
|
||||
/**
|
||||
* Instance of an HTMLPurifier_Lexer implementation.
|
||||
*/
|
||||
var $lexer;
|
||||
|
||||
/**
|
||||
* Instance of HTMLPurifier_Generator
|
||||
*/
|
||||
var $generator;
|
||||
|
||||
function HTMLPurifier_Harness() {
|
||||
$this->lexer = new HTMLPurifier_Lexer_DirectLex();
|
||||
$this->generator = new HTMLPurifier_Generator();
|
||||
parent::UnitTestCase();
|
||||
}
|
||||
|
||||
/**
|
||||
* Asserts a specific result from a one parameter + config/context function
|
||||
* @param $input Input parameter
|
||||
* @param $expect Expectation
|
||||
* @param $config_array Configuration array in form of
|
||||
* Namespace.Directive => Value or an actual config
|
||||
* object.
|
||||
* @param $context_array Context array in form of Key => Value or an actual
|
||||
* context object.
|
||||
*/
|
||||
function assertResult($input, $expect = true,
|
||||
$config_array = array(), $context_array = array()
|
||||
) {
|
||||
|
||||
// setup config object
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->loadArray($config_array);
|
||||
|
||||
// setup context object. Note that we are operating on a copy of it!
|
||||
// We will extend the test harness to allow you to do post-tests
|
||||
// on the context object
|
||||
$context = new HTMLPurifier_Context();
|
||||
$context->loadArray($context_array);
|
||||
|
||||
if ($this->to_tokens && is_string($input)) {
|
||||
$input = $this->lexer->tokenizeHTML($input, $config, $context);
|
||||
}
|
||||
|
||||
// call the function
|
||||
$func = $this->func;
|
||||
$result = $this->obj->$func($input, $config, $context);
|
||||
|
||||
// test a bool result
|
||||
if (is_bool($result)) {
|
||||
$this->assertIdentical($expect, $result);
|
||||
return;
|
||||
} elseif (is_bool($expect)) {
|
||||
$expect = $input;
|
||||
}
|
||||
|
||||
if ($this->to_html) {
|
||||
$result = $this->generator->
|
||||
generateFromTokens($result, $config, $context);
|
||||
if (is_array($expect)) {
|
||||
$expect = $this->generator->
|
||||
generateFromTokens($expect, $config, $context);
|
||||
}
|
||||
}
|
||||
|
||||
$this->assertEqual($expect, $result);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -53,11 +53,9 @@ class HTMLPurifier_Lexer_DirectLexTest extends UnitTestCase
|
||||
$input[10] = 'name="input" selected';
|
||||
$expect[10] = array('name' => 'input', 'selected' => 'selected');
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$context = new HTMLPurifier_Context();
|
||||
$size = count($input);
|
||||
for($i = 0; $i < $size; $i++) {
|
||||
$result = $this->DirectLex->parseAttributeString($input[$i], $config, $context);
|
||||
$result = $this->DirectLex->parseAttributeString($input[$i]);
|
||||
$this->assertEqual($expect[$i], $result, 'Test ' . $i . ': %s');
|
||||
paintIf($result, $expect[$i] != $result);
|
||||
}
|
||||
|
@@ -279,17 +279,16 @@ class HTMLPurifier_LexerTest extends UnitTestCase
|
||||
$expect[18] = array( new HTMLPurifier_Token_Empty('br', array('test' => 'x < 6')) );
|
||||
|
||||
$default_config = HTMLPurifier_Config::createDefault();
|
||||
$default_context = new HTMLPurifier_Context();
|
||||
foreach($input as $i => $discard) {
|
||||
if (!isset($config[$i])) $config[$i] = $default_config;
|
||||
|
||||
$result = $this->DirectLex->tokenizeHTML($input[$i], $config[$i], $default_context);
|
||||
$result = $this->DirectLex->tokenizeHTML($input[$i], $config[$i]);
|
||||
$this->assertEqual($expect[$i], $result, 'DirectLexTest '.$i.': %s');
|
||||
paintIf($result, $expect[$i] != $result);
|
||||
|
||||
if ($this->_has_pear) {
|
||||
// assert unless I say otherwise
|
||||
$sax_result = $this->PEARSax3->tokenizeHTML($input[$i], $config[$i], $default_context);
|
||||
$sax_result = $this->PEARSax3->tokenizeHTML($input[$i], $config[$i]);
|
||||
if (!isset($sax_expect[$i])) {
|
||||
// by default, assert with normal result
|
||||
$this->assertEqual($expect[$i], $sax_result, 'PEARSax3Test '.$i.': %s');
|
||||
@@ -305,7 +304,7 @@ class HTMLPurifier_LexerTest extends UnitTestCase
|
||||
}
|
||||
|
||||
if ($this->_has_dom) {
|
||||
$dom_result = $this->DOMLex->tokenizeHTML($input[$i], $config[$i], $default_context);
|
||||
$dom_result = $this->DOMLex->tokenizeHTML($input[$i], $config[$i]);
|
||||
// same structure as SAX
|
||||
if (!isset($dom_expect[$i])) {
|
||||
$this->assertEqual($expect[$i], $dom_result, 'DOMLexTest '.$i.': %s');
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user