mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-03 12:47:56 +02:00
Compare commits
5 Commits
v1.4.0-str
...
v1.6.1-str
Author | SHA1 | Date | |
---|---|---|---|
|
c35eb3e95f | ||
|
b829e76bbf | ||
|
e967680250 | ||
|
dd2fd06591 | ||
|
cec7a1c087 |
2
Doxyfile
2
Doxyfile
@@ -4,7 +4,7 @@
|
||||
# Project related configuration options
|
||||
#---------------------------------------------------------------------------
|
||||
PROJECT_NAME = HTML Purifier
|
||||
PROJECT_NUMBER = 1.4.0
|
||||
PROJECT_NUMBER = 1.6.1
|
||||
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
|
||||
CREATE_SUBDIRS = NO
|
||||
OUTPUT_LANGUAGE = English
|
||||
|
6
INSTALL
6
INSTALL
@@ -47,7 +47,9 @@ HTML Purifier is all about web-standards, so accordingly your webpages should
|
||||
be standards compliant. HTML Purifier can deal with these doctypes:
|
||||
|
||||
* XHTML 1.0 Transitional (default)
|
||||
* XHTML 1.0 Strict
|
||||
* HTML 4.01 Transitional
|
||||
* HTML 4.01 Strict
|
||||
|
||||
...and these character encodings:
|
||||
|
||||
@@ -87,7 +89,7 @@ into configuring things just for the heck of it, skip to 4.3).
|
||||
* Am I using UTF-8?
|
||||
* Am I using XHTML 1.0 Transitional?
|
||||
|
||||
If you answered yes to any of these questions, instantiate a configuration
|
||||
If you answered no to any of these questions, instantiate a configuration
|
||||
object and read on:
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
@@ -141,7 +143,7 @@ versions will also allow strict-compliant output.
|
||||
4.3. Other settings
|
||||
|
||||
There are more configuration directives which can be read about
|
||||
here: <http://hp.jpsband.org/live/configdoc/plain.html> They're a bit boring,
|
||||
here: <http://htmlpurifier.org/live/configdoc/plain.html> They're a bit boring,
|
||||
but they can help out for those of you who like to exert maximum control over
|
||||
your code.
|
||||
|
||||
|
71
INSTALL.fr.utf8
Normal file
71
INSTALL.fr.utf8
Normal file
@@ -0,0 +1,71 @@
|
||||
|
||||
Installation
|
||||
Comment installer HTML Purifier
|
||||
|
||||
Attention: Ce document a encode en UTF-8. Si les lettres avec les accents
|
||||
est essoreuse, prenez un mieux editeur de texte.
|
||||
|
||||
À L'Aide: Je ne suis pas un diseur natif de français. Si vous trouvez une
|
||||
erreur dans ce document, racontez-moi! Merci.
|
||||
|
||||
|
||||
L'installation de HTML Purifier est trés simple, parce qu'il ne doit pas
|
||||
la configuration. Dans le pied de de document, les utilisateurs
|
||||
impatient peuvent trouver le code, mais je recommande que vous lisez
|
||||
ce document pour quelques choses.
|
||||
|
||||
|
||||
1. Compatibilité
|
||||
|
||||
HTML Purifier fonctionne dans PHP 4 et PHP 5. PHP 4.3.9 est le dernier
|
||||
version que je le testais. Il ne dépend de les autre librairies.
|
||||
|
||||
Les extensions optionnel est iconv (en général déjà installer) et
|
||||
tidy (répandu aussi). Si vous utilisez UTF-8 et ne voulez pas
|
||||
l'indentation, vous pouvez utiliser HTML Purifier sans ces extensions.
|
||||
|
||||
|
||||
2. Inclure la librarie
|
||||
|
||||
Utilisez:
|
||||
|
||||
require_once '/path/to/library/HTMLPurifier.auto.php';
|
||||
|
||||
...quand vous devez utiliser HTML Purifier (ne inclure pas quand vous
|
||||
ne devez pas, parce que HTML Purifier est trés grand.)
|
||||
|
||||
Si vous n'aime pas que HTML Purifier change vos include_path, on peut
|
||||
change vos include_path, et:
|
||||
|
||||
require_once 'HTMLPurifier.php';
|
||||
|
||||
Seuleument les contents dans library/ est essentiel; vous peut enlever
|
||||
les autre fichiers quand vous est dans une atmosphère professionnel.
|
||||
|
||||
|
||||
[En cours de construction]
|
||||
|
||||
|
||||
6. Installation vite
|
||||
|
||||
Si votre site web est en UTF-8 et XHTML Transitional, utilisez:
|
||||
|
||||
<?php
|
||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||
|
||||
$purificateur = new HTMLPurifier();
|
||||
$html_propre = $purificateur->purify($html_salle);
|
||||
?>
|
||||
|
||||
Sinon, utilisez:
|
||||
|
||||
<?php
|
||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Core', 'Encoding', 'ISO-8859-1'); //remplacez avec votre encoding
|
||||
$config->set('Core', 'XHTML', true); //remplacez avec false si HTML 4.01
|
||||
$purificateur = new HTMLPurifier($config);
|
||||
|
||||
$html_propre = $purificateur->purify($html_salle);
|
||||
?>
|
90
NEWS
90
NEWS
@@ -9,6 +9,96 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
. Internal change
|
||||
==========================
|
||||
|
||||
1.7.0, unknown release date
|
||||
|
||||
1.6.1, released 2007-05-05
|
||||
! Support for more deprecated attributes via transformations:
|
||||
+ hspace and vspace in img
|
||||
+ size and noshade in hr
|
||||
+ nowrap in td
|
||||
+ clear in br
|
||||
+ align in caption, table, img and hr
|
||||
+ type in ul, ol and li
|
||||
! DirectLex now preserves text in which a < bracket is followed by
|
||||
a non-alphanumeric character. This means that certain emoticons
|
||||
are now preserved.
|
||||
! %Core.RemoveInvalidImg is now operational, when set to false invalid
|
||||
images will hang around with an empty src
|
||||
! target attribute in a tag supported, use %Attr.AllowedFrameTargets
|
||||
to enable
|
||||
! CSS property white-space now allows nowrap (supported in all modern
|
||||
browsers) but not others (which have spotty browser implementations)
|
||||
! XHTML 1.1 mode now sort-of works without any fatal errors, and
|
||||
lang is now moved over to xml:lang.
|
||||
! Attribute transformation smoketest available at smoketests/attrTransform.php
|
||||
! Transformation of font's size attribute now handles super-large numbers
|
||||
- Possibly fatal bug with __autoload() fixed in module manager
|
||||
- Invert HTMLModuleManager->addModule() processing order to check
|
||||
prefixes first and then the literal module
|
||||
- Empty strings get converted to empty arrays instead of arrays with
|
||||
an empty string in them.
|
||||
- Merging in attribute lists now works.
|
||||
. Demo script removed: it has been added to the website's repository
|
||||
. Basic.php script modified to work out of the box
|
||||
. Refactor AttrTransform classes to reduce duplication
|
||||
. AttrTransform_TextAlign axed in favor of a more general
|
||||
AttrTransform_EnumToCSS, refer to HTMLModule/TransformToStrict.php to
|
||||
see how the new equivalent is implemented
|
||||
. Unit tests now use exclusively assertIdentical
|
||||
|
||||
1.6.0, released 2007-04-01
|
||||
! Support for most common deprecated attributes via transformations:
|
||||
+ bgcolor in td, th, tr and table
|
||||
+ border in img
|
||||
+ name in a and img
|
||||
+ width in td, th and hr
|
||||
+ height in td, th
|
||||
! Support for CSS attribute 'height' added
|
||||
! Support for rel and rev attributes in a tags added, use %Attr.AllowedRel
|
||||
and %Attr.AllowedRev to activate
|
||||
- You can define ID blacklists using regular expressions via
|
||||
%Attr.IDBlacklistRegexp
|
||||
- Error messages are emitted when you attempt to "allow" elements or
|
||||
attributes that HTML Purifier does not support
|
||||
|
||||
1.5.1, unknown release date
|
||||
- Fix segfault in unit test. The problem is not very reproduceable and
|
||||
I don't know what causes it, but a six line patch fixed it.
|
||||
|
||||
1.5.0, released 2007-03-23
|
||||
! Added a rudimentary I18N and L10N system modeled off MediaWiki. It
|
||||
doesn't actually do anything yet, but keep your eyes peeled.
|
||||
! docs/enduser-utf8.html explains how to use UTF-8 and HTML Purifier
|
||||
! Newly structured HTMLDefinition modeled off of XHTML 1.1 modules.
|
||||
I am loathe to release beta quality APIs, but this is exactly that;
|
||||
don't use the internal interfaces if you're not willing to do migration
|
||||
later on.
|
||||
- Allow 'x' subtag in language codes
|
||||
- Fixed buggy chameleon-support for ins and del
|
||||
. Added support for IDREF attributes (i.e. for)
|
||||
. Renamed HTMLPurifier_AttrDef_Class to HTMLPurifier_AttrDef_Nmtokens
|
||||
. Removed context variable ParentType, replaced with IsInline, which
|
||||
is false when you're not inline and an integer of the parent that
|
||||
caused you to become inline when you are (so possibly zero)
|
||||
. Removed ElementDef->type in favor of ElementDef->descendants_are_inline
|
||||
and HTMLDefinition->content_sets
|
||||
. StrictBlockquote now reports what elements its supposed to allow,
|
||||
rather than what it does allow
|
||||
. Removed HTMLDefinition->info_flow_elements in favor of
|
||||
HTMLDefinition->content_sets['Flow']
|
||||
. Removed redundant "exclusionary" definitions from DTD roster
|
||||
. StrictBlockquote now requires a construction parameter as if it
|
||||
were an Required ChildDef, this is the "real" set of allowed elements
|
||||
. AttrDef partitioned into HTML, CSS and URI segments
|
||||
. Modify Youtube filter regexp to be multiline
|
||||
. Require both PHP5 and DOM extension in order to use DOMLex, fixes
|
||||
some edge cases where a DOMDocument class exists in a PHP4 environment
|
||||
due to DOM XML extension.
|
||||
|
||||
1.4.1, released 2007-01-21
|
||||
! docs/enduser-youtube.html updated according to new functionality
|
||||
- YouTube IDs can have underscores and dashes
|
||||
|
||||
1.4.0, released 2007-01-21
|
||||
! Implemented list-style-image, URIs now allowed in list-style
|
||||
! Implemented background-image, background-repeat, background-attachment
|
||||
|
2
README
2
README
@@ -19,4 +19,4 @@ Places to go:
|
||||
an in-depth installation guide.
|
||||
* See WYSIWYG for information on editors like TinyMCE and FCKeditor
|
||||
|
||||
HTML Purifier can be found on the web at: http://hp.jpsband.org/
|
||||
HTML Purifier can be found on the web at: http://htmlpurifier.org/
|
||||
|
54
TODO
54
TODO
@@ -4,32 +4,36 @@ TODO List
|
||||
= KEY ====================
|
||||
# Flagship
|
||||
- Regular
|
||||
? At-risk
|
||||
? Maybe I'll Do It
|
||||
==========================
|
||||
|
||||
1.5 release
|
||||
# Implement all non-essential attribute transforms, configurable
|
||||
1.7 release [Advanced API]
|
||||
# Complete advanced API, and fully document it
|
||||
# Implement all edge-case attribute transforms
|
||||
# Implement all deprecated tags and attributes
|
||||
- Parse TinyMCE-style whitelist into our %HTML.Allow* whitelists (possibly
|
||||
do this earlier)
|
||||
? HTML interface for tweaking configuration to see changes
|
||||
|
||||
1.8 release [Refactor, refactor!]
|
||||
# URI validation routines tighter (see docs/dev-code-quality.html) (COMPLEX)
|
||||
# Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
|
||||
- Configuration profiles: predefined directives set with one func call
|
||||
- Implement IDREF support (harder than it seems, since you cannot have
|
||||
IDREFs to non-existent IDs)
|
||||
- Allow non-ASCII characters in font names
|
||||
|
||||
1.9 release [Error'ed]
|
||||
# Error logging for filtering/cleanup procedures
|
||||
- Requires I18N facilities to be created first (COMPLEX)
|
||||
? Configuration profiles: sets of directives that get set with one func call
|
||||
- XSS-attempt detection
|
||||
|
||||
1.6 release
|
||||
# Add pre-packaged "levels" of cleaning (custom behavior already done)
|
||||
- More fine-grained control over escaping behavior
|
||||
- Silently drop content inbetween SCRIPT tags (can be generalized to allow
|
||||
specification of elements that, when detected as foreign, trigger removal
|
||||
of children, although unbalanced tags could wreck havoc (or at least
|
||||
delete the rest of the document)).
|
||||
- Allow specifying global attributes on a tag-by-tag basis in
|
||||
%HTML.AllowAttributes
|
||||
? More user-friendly warnings when %HTML.Allow* attempts to specify a
|
||||
tag or attribute that is not supported
|
||||
- Parse TinyMCE whitelist into our %HTML.Allow* whitelists
|
||||
|
||||
1.7 release
|
||||
1.10 release [Do What I Mean, Not What I Say]
|
||||
# Additional support for poorly written HTML
|
||||
- Microsoft Word HTML cleaning (i.e. MsoNormal, but research essential!)
|
||||
- Friendly strict handling of <address> (block -> <br>)
|
||||
@@ -44,7 +48,7 @@ TODO List
|
||||
- Append something to duplicate IDs so they're still usable (impl. note: the
|
||||
dupe detector would also need to detect the suffix as well)
|
||||
|
||||
2.0 release
|
||||
2.0 release [Beyond HTML]
|
||||
# Legit token based CSS parsing (will require revamping almost every
|
||||
AttrDef class)
|
||||
# Formatters for plaintext (COMPLEX)
|
||||
@@ -53,33 +57,33 @@ TODO List
|
||||
- Linkify URLs
|
||||
- Smileys
|
||||
- Linkification for HTML Purifier docs: notably configuration and classes
|
||||
|
||||
3.0 release
|
||||
- Extended HTML capabilities based on namespacing and tag transforms (COMPLEX)
|
||||
- Hooks for adding custom processors to custom namespaced tags and
|
||||
attributes, offer default implementation
|
||||
- Lots of documentation and samples
|
||||
- Allow tags to be "armored", an internal flag that protects them
|
||||
from validation and passes them out unharmed
|
||||
- XHTML 1.1 support
|
||||
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
||||
- Automatically add non-breaking spaces to empty table cells when
|
||||
empty-cells:show is applied to have compatibility with Internet Explorer
|
||||
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
|
||||
Also, enable disabling of directionality
|
||||
|
||||
3.0 release [To XML and Beyond]
|
||||
- Extended HTML capabilities based on namespacing and tag transforms (COMPLEX)
|
||||
- Hooks for adding custom processors to custom namespaced tags and
|
||||
attributes, offer default implementation
|
||||
- Lots of documentation and samples
|
||||
- XHTML 1.1 support
|
||||
|
||||
Ongoing
|
||||
- Lots of profiling, make it faster!
|
||||
- Plugins for major CMSes (COMPLEX)
|
||||
- WordPress
|
||||
- WordPress (mostly written, needs beta-testing)
|
||||
- eFiction
|
||||
- more! (look for ones that use WYSIWYGs)
|
||||
|
||||
Unknown release (on a scratch-an-itch basis)
|
||||
- Upgrade SimpleTest testing code to newest versions
|
||||
- Have 'lang' attribute be checked against official lists
|
||||
? Semi-lossy dumb alternate character encoding transformations, achieved by
|
||||
? Semi-lossy dumb alternate character encoding transfor
|
||||
? Have 'lang' attribute be checked against official lists, achieved by
|
||||
encoding all characters that have string entity equivalents
|
||||
- Explain how to use HTML Purifier in non-PHP languages
|
||||
|
||||
Requested
|
||||
? Native content compression, whitespace stripping (don't rely on Tidy, make
|
||||
|
7
WHATSNEW
Normal file
7
WHATSNEW
Normal file
@@ -0,0 +1,7 @@
|
||||
The 1.6.1 release, code-named 'Ach! We missed something! Run!', completes
|
||||
HTML Purifier's roster of attribute transformations. It also implements
|
||||
a number of minor features (such as better font transformations, smarter
|
||||
HTML parsing, the CSS property 'white-space' and XHTML 1.1), a few bug
|
||||
fixes (most notably fixed __autoload compatibility issues) and a ton
|
||||
of refactoring. 1.6 was for things that absolutely could not wait: this
|
||||
release, developed in a more leisurely pace, fills in the gaps.
|
@@ -7,6 +7,7 @@ set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
||||
|
||||
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||
require_once 'HTMLPurifier/Config.php';
|
||||
require_once 'HTMLPurifier/Context.php';
|
||||
|
||||
$LEXERS = array();
|
||||
$RUNS = isset($GLOBALS['HTMLPurifierTest']['Runs'])
|
||||
@@ -93,11 +94,14 @@ function print_lexers() {
|
||||
function do_benchmark($name, $document) {
|
||||
global $LEXERS, $RUNS;
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$context = new HTMLPurifier_Context();
|
||||
|
||||
$timer = new RowTimer($name);
|
||||
$timer->start();
|
||||
|
||||
foreach($LEXERS as $key => $lexer) {
|
||||
for ($i=0; $i<$RUNS; $i++) $tokens = $lexer->tokenizeHTML($document);
|
||||
for ($i=0; $i<$RUNS; $i++) $tokens = $lexer->tokenizeHTML($document, $config, $context);
|
||||
$timer->setMarker($key);
|
||||
}
|
||||
|
||||
|
@@ -5,12 +5,15 @@ set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
||||
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||
require_once 'HTMLPurifier/Config.php';
|
||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||
require_once 'HTMLPurifier/Context.php';
|
||||
|
||||
$input = file_get_contents('samples/Lexer/4.html');
|
||||
$lexer = new HTMLPurifier_Lexer_DirectLex();
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$context = new HTMLPurifier_Context();
|
||||
|
||||
for ($i = 0; $i < 10; $i++) {
|
||||
$tokens = $lexer->tokenizeHTML($input);
|
||||
$tokens = $lexer->tokenizeHTML($input, $config, $context);
|
||||
}
|
||||
|
||||
?>
|
@@ -24,8 +24,7 @@ error_reporting(E_ALL);
|
||||
// ---------------------------------------------------------------------------
|
||||
// Include HTML Purifier library
|
||||
|
||||
set_include_path('../library' . PATH_SEPARATOR . get_include_path());
|
||||
require_once 'HTMLPurifier.php';
|
||||
require_once '../library/HTMLPurifier.auto.php';
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -188,7 +187,7 @@ $xsl_processor->importStylesheet($xsl_dom_stylesheet);
|
||||
$html_output = $xsl_processor->transformToXML($dom_document);
|
||||
|
||||
// some slight fudges to preserve backwards compatibility
|
||||
$html_output = str_replace('/>', ' />', $html_output); // <br /> not <br>
|
||||
$html_output = str_replace('/>', ' />', $html_output); // <br /> not <br/>
|
||||
$html_output = str_replace(' xmlns=""', '', $html_output); // rm unnecessary xmlns
|
||||
|
||||
if (class_exists('Tidy')) {
|
||||
|
287
docs/dev-advanced-api.html
Normal file
287
docs/dev-advanced-api.html
Normal file
@@ -0,0 +1,287 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Functional specification for HTML Purifier's advanced API for defining custom filtering behavior." />
|
||||
<link rel="stylesheet" type="text/css" href="style.css" />
|
||||
|
||||
<title>Advanced API - HTML Purifier</title>
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1>Advanced API</h1>
|
||||
|
||||
<div id="filing">Filed under Development</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>HTML Purifier currently natively supports only a subset of HTML's
|
||||
allowed elements, attributes, and behavior. This is by design,
|
||||
but as the user is always right, they'll need some method to overload
|
||||
these behaviors.</p>
|
||||
|
||||
<p>Our goals are to let the user:</p>
|
||||
|
||||
<dl>
|
||||
<dt>Select</dt>
|
||||
<dd><ul>
|
||||
<li>Doctype</li>
|
||||
<li>Mode: Lenient / Correctional</li>
|
||||
<li>Elements / Attributes / Modules</li>
|
||||
<li>Filterset</li>
|
||||
</ul></dd>
|
||||
<dt>Customize</dt>
|
||||
<dd><ul>
|
||||
<li>Attributes</li>
|
||||
<li>Elements</li>
|
||||
</ul></dd>
|
||||
<dt>Internals</dt>
|
||||
<dd><ul>
|
||||
<li>Modules / Elements / Attributes / Attribute Types</li>
|
||||
<li>Filtersets</li>
|
||||
<li>Doctype</li>
|
||||
</ul></dd>
|
||||
</dl>
|
||||
|
||||
<h2>Select</h2>
|
||||
|
||||
<p>For basic use, the user will have to specify some basic parameters. This
|
||||
is not strictly necessary, as HTML Purifier's default setting will always
|
||||
output safe code, but is required for standards-compliant output.</p>
|
||||
|
||||
<h3>Selecting a Doctype</h3>
|
||||
|
||||
<p>The first thing to select is the <strong>doctype</strong>. This
|
||||
is essential for standards-compliant output.</p>
|
||||
|
||||
<p class="technical">This identifier is based
|
||||
on the name the W3C has given to the document type and <em>not</em>
|
||||
the DTD identifier.</p>
|
||||
|
||||
<p>This parameter is set via the configuration object:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');</pre>
|
||||
|
||||
<p>Due to historical reasons, the default doctype is XHTML 1.0
|
||||
Transitional, however, we really shouldn't be guessing what the user's
|
||||
doctype is. Fortunantely, people who can't be bothered to set this won't
|
||||
be bothered when their pages stop validating.</p>
|
||||
|
||||
<h3>Selecting Mode</h3>
|
||||
|
||||
<p>Within doctypes, there are various <strong>modes</strong> of operation.
|
||||
These indicate variant behaviors that, while not strictly changing the
|
||||
allowed set of elements and attributes, definitely affect the output.
|
||||
Currently, we have two modes, which may be used together:</p>
|
||||
|
||||
<dl>
|
||||
<dt>Lenient</dt>
|
||||
<dd>
|
||||
<p>Deprecated elements and attributes will be transformed into
|
||||
standards-compliant alternatives when explicitly disallowed.</p>
|
||||
<p>For example, in the XHTML 1.0 Strict doctype, a <code>center</code>
|
||||
element would be turned into a <code>div</code> with the CSS property
|
||||
<code>text-align:center;</code>, but in XHTML 1.0 Transitional
|
||||
the element would be preserved.</p>
|
||||
<p>This mode is on by default.</p>
|
||||
</dd>
|
||||
<dt>Correctional[items to correct]</dt>
|
||||
<dd>
|
||||
<p>Deprecated elements and attributes will be transformed into
|
||||
standards-compliant alternatives whenever possible.
|
||||
It may have various levels of operation.</p>
|
||||
<p>Referring back to the previous example, the <code>center</code> element would
|
||||
be transformed in both cases. However, elements without a
|
||||
reasonable standards-compliant alternative will be preserved
|
||||
in their form.</p>
|
||||
<p>A user may want to correct certain deprecated attributes, but
|
||||
not others. For example, the <code>bgcolor</code> attribute may be
|
||||
acceptable, but the <code>center</code> element not; also, possibly,
|
||||
an HTML Purifier transformation may be buggy, so the user wants
|
||||
to forgo it. Thus, correctional accepts an array defining which
|
||||
elements and attributes to cleanup, or no parameter at all, which
|
||||
means everything gets corrected. This also means that each
|
||||
correction needs to be given a unique ID that can be referenced
|
||||
in this manner. (We may also allow globbing, like *.name or a.*
|
||||
for mass-enabling correction, and subtractive mode, where things
|
||||
specified stop correction.) This array gets passed into the
|
||||
constructor of the mode's module.</p>
|
||||
<p>This mode is on by default.</p>
|
||||
</dd>
|
||||
</dl>
|
||||
|
||||
<p>A possible call to select modes would be:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Mode', array('correctional', 'lenient'));</pre>
|
||||
|
||||
<p>If modes have extra parameters, a hash is necessary:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Mode', array(
|
||||
'correctional' => 'center,a.name',
|
||||
'lenient' => true // this one's just boolean
|
||||
));</pre>
|
||||
|
||||
<p>Modes may be specified along with the doctype declaration (we may want
|
||||
to get a better set of separator characters):</p>
|
||||
|
||||
<pre>$config->setDoctype('XHTML Transitional 1.0', '+correctional[center,a.name] -lenient');</pre>
|
||||
|
||||
<p>
|
||||
With regards to the various levels of operation conjectured in the
|
||||
Correctional mode, this is prompted by the fact that a user may want to
|
||||
correct certain problems but not others, for example, fix the <code>center</code>
|
||||
element but not the <code>u</code> element, both of which are deprecated.
|
||||
Having an integer <q>level</q> will not work very well for such fine
|
||||
grained tweaking, but an array of specific settings might.</p>
|
||||
|
||||
<h3>Selecting Elements / Attributes / Modules</h3>
|
||||
|
||||
<p></p>
|
||||
|
||||
<p>If this cookie cutter approach doesn't appeal to a user, they may
|
||||
decide to roll their own filterset by selecting modules, elements and
|
||||
attributes to allow.</p>
|
||||
|
||||
<p class="technical">This would make use of the same facilities
|
||||
as a filterset author would use, except that it would go under an
|
||||
<q>anonymous</q> filterset that would be auto-selected if any of the
|
||||
relevant module/elements/attribute selection configuration directives were
|
||||
non-null.</p>
|
||||
|
||||
<p>In practice, this is the most commonly demanded feature. Most users are
|
||||
perfectly happy defining a filterset that looks like:</p>
|
||||
|
||||
<pre>$config->setAllowedHTML('a[href,title];em;p;blockquote');</pre>
|
||||
|
||||
<p class="technical">The directive %HTML.Allowed is a convenience function
|
||||
that may be fully expressed with the legacy interface, and thus is
|
||||
given its own setter.</p>
|
||||
|
||||
<p>We currently support a separated interface, which also must be preserved:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'AllowedElements', 'a,em,p,blockquote');
|
||||
$config->set('HTML', 'AllowedAttributes', 'a.href,a.title');</pre>
|
||||
|
||||
<p>A user may also choose to allow modules:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'AllowedModules', 'Hypertext,Text,Lists'); // or
|
||||
$config->setAllowedHTML('Hypertext,Text,Lists');</pre>
|
||||
|
||||
<p>But it is not expected that this feature will be widely used.</p>
|
||||
|
||||
<p class="fixme">The granularity of these modules is too coarse for
|
||||
the average user (for example, the core module loads everything from
|
||||
the essential <code>p</code> element to the not-so-safe <code>h1</code>
|
||||
element). How do we make this still a viable solution? Possible answers
|
||||
may be sub-modules or module parameters. This may not even be a problem,
|
||||
considering that most people won't be selecting modules.</p>
|
||||
|
||||
<p class="technical">Modules are distinguished from regular elements by the
|
||||
case of their first letter. While XML distinguishes between and allows
|
||||
lower and uppercase letters in element names, most well-known XML
|
||||
languages use only lower-case
|
||||
element names for sake of consistency.</p>
|
||||
|
||||
<p class="technical">Considering that, internally speaking, as mandated by
|
||||
the XHTML 1.1 Modularization specification, we have organized our
|
||||
elements around modules, considerable gymnastics will be needed to
|
||||
get this sort of functionality working.</p>
|
||||
|
||||
<h3>Unified selector</h3>
|
||||
|
||||
<p>Because selecting each and every one of these configuration options
|
||||
is a chore, we may wish to offer a specialized configuration method
|
||||
for selecting a filterset. Possibility:</p>
|
||||
|
||||
<pre>function selectFilter($doctype, $filterset, $mode)</pre>
|
||||
|
||||
<p>...which is simply a light wrapper over the individual configuration
|
||||
calls. A custom config file format or text format could also be adopted.</p>
|
||||
|
||||
<h2>Customize</h2>
|
||||
|
||||
<p>By reviewing topic posts in the support forum, we determined that
|
||||
there were two primarily demanded customization features people wanted:
|
||||
to add an attribute to an existing element, and to add an element.
|
||||
Thus, we'll want to create convenience functions for these common
|
||||
use-cases.</p>
|
||||
|
||||
<p>Note that the functions described here are only available if
|
||||
a raw copy of <code>HTMLPurifier_HTMLDefinition</code> was retrieved.
|
||||
<code>addAttribute</code> may work on a processed copy, but for
|
||||
consistency's sake we will mandate this for everything.</p>
|
||||
|
||||
<h3>Attributes</h3>
|
||||
|
||||
<p>An attribute is bound to an element by a name and has a specific
|
||||
<code>AttrDef</code> that validates it. Thus, the interface should
|
||||
be:</p>
|
||||
|
||||
<pre>function addAttribute($element, $attribute, $attribute_def);</pre>
|
||||
|
||||
<p>With a use-case that looks like:</p>
|
||||
|
||||
<pre>$def->addAttribute('a', 'rel', new HTMLPurifier_AttrDef_Enum(array('nofollow')));</pre>
|
||||
|
||||
<p>The <code>$attribute_def</code> value can be a little flexible,
|
||||
to make things simpler. We'll let it also be:</p>
|
||||
|
||||
<ul>
|
||||
<li>Class name: We'll instantiate it for you</li>
|
||||
<li>Function name: We'll create an <code>HTMLPurifier_AttrDef_Anonymous</code>
|
||||
class with that function registered as a callback.</li>
|
||||
<li>String attribute type: We'll use <code>HTMLPurifier_AttrTypes</code>
|
||||
</li>
|
||||
<li>String starting with <code>enum(</code>: We'll explode it and stuff it in an
|
||||
<code>HTMLPurifier_AttrDef_Enum</code> for you.</li>
|
||||
</ul>
|
||||
|
||||
<p>Making the previous example written as:</p>
|
||||
|
||||
<pre>$def->addAttribute('a', 'rel', 'enum(nofollow)');</pre>
|
||||
|
||||
<h3>Elements</h3>
|
||||
|
||||
<p>An element requires certain information as specified by
|
||||
<code>HTMLPurifier_ElementDef</code>. However, not all of it is necessary,
|
||||
the usual things required are:</p>
|
||||
|
||||
<ul>
|
||||
<li>Attributes</li>
|
||||
<li>Content model/type</li>
|
||||
<li>Registration in a content set</li>
|
||||
</ul>
|
||||
|
||||
<p>This suggests an API like this:</p>
|
||||
|
||||
<pre>function addElement($element, $type, $content_model, $attributes = array());</pre>
|
||||
|
||||
<p>Each parameter explained in depth:</p>
|
||||
|
||||
<dl>
|
||||
<dt><code>$element</code></dt>
|
||||
<dd>Element name, ex. 'label'</dd>
|
||||
<dt><code>$type</code></dt>
|
||||
<dd>Content set to register in, ex. 'Inline' or 'Flow'</dd>
|
||||
<dt><code>$content_model</code></dt>
|
||||
<dd>Description of allowed children. This is a merged form of
|
||||
<code>HTMLPurifier_ElementDef</code>'s member variables
|
||||
<code>$content_model</code> and <code>$content_model_type</code>,
|
||||
where the form is <q>Type: Model</q>, ex. 'Optional: Inline'.</dd>
|
||||
<dt><code>$attributes</code></dt>
|
||||
<dd>Array of attribute names to attribute definitions, much like
|
||||
the above-described attribute customization.</dd>
|
||||
</dl>
|
||||
|
||||
<p>A possible usage:</p>
|
||||
|
||||
<pre>$def->addElement('font', 'Inline', 'Optional: Inline',
|
||||
array(0 => array('Common'), 'color' => 'Color'));</pre>
|
||||
|
||||
<p>We may want to Common attribute collection inclusion to be added
|
||||
by default.</p>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
@@ -1,31 +1,16 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Discusses code quality issues and places that need to be refactored in HTML Purifier." />
|
||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||
|
||||
<title>Code Quality Issues - HTML Purifier</title>
|
||||
Code Quality Issues
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1>Code Quality Issues</h1>
|
||||
|
||||
<div id="filing">Filed under Development</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>Okay, face it. Programmers can get lazy, cut corners, or make mistakes. They
|
||||
Okay, face it. Programmers can get lazy, cut corners, or make mistakes. They
|
||||
also can do quick prototypes, and then forget to rewrite them later. Well,
|
||||
while I can't list mistakes in here, I can list prototype-like segments
|
||||
of code that should be aggressively refactored. This does not list
|
||||
optimization issues, that needs to be done after intense profiling.</p>
|
||||
optimization issues, that needs to be done after intense profiling.
|
||||
|
||||
<pre>
|
||||
docs/examples/demo.php - ad hoc HTML/PHP soup to the extreme
|
||||
|
||||
AttrDef
|
||||
AttrDef - a lot of duplication, more generic classes need to be created;
|
||||
a lot of strtolower() calls, no legit casing
|
||||
Class - doesn't support Unicode characters (fringe); uses regular
|
||||
expressions
|
||||
Lang - code duplication; premature optimization
|
||||
@@ -45,8 +30,3 @@ URIScheme - needs to have callable generic checks
|
||||
mailto - doesn't validate emails, doesn't validate querystring
|
||||
news - doesn't validate opaque path
|
||||
nntp - doesn't constrain path
|
||||
</pre>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
@@ -14,7 +14,7 @@
|
||||
|
||||
<div id="filing">Filed under Development</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>The classes in this library follow a few naming conventions, which may
|
||||
help you find the correct functionality more quickly. Here they are:</p>
|
||||
|
@@ -14,7 +14,7 @@
|
||||
|
||||
<div id="filing">Filed under Development</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>Here are some possible optimization techniques we can apply to code sections if
|
||||
they turn out to be slow. Be sure not to prematurely optimize: if you get
|
||||
|
@@ -32,7 +32,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
||||
|
||||
<div id="filing">Filed under Development</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<h2>Key</h2>
|
||||
|
||||
@@ -142,7 +142,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="2">Unknown</th></tr>
|
||||
<tr class="danger css1 impl-yes"><td>background-image</td><td>Dangerous, target milestone 1.3</td></tr>
|
||||
<tr class="danger css1 impl-yes"><td>background-image</td><td>Dangerous</td></tr>
|
||||
<tr class="css1 impl-yes"><td>background-attachment</td><td>ENUM(scroll, fixed),
|
||||
Depends on background-image</td></tr>
|
||||
<tr class="css1 impl-yes"><td>background-position</td><td>Depends on background-image</td></tr>
|
||||
@@ -151,7 +151,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
||||
will not implement list-item, run-in (Opera only) or table (no IE);
|
||||
inline-block has incomplete IE6 support and requires -moz-inline-box
|
||||
for Mozilla. Unknown target milestone.</td></tr>
|
||||
<tr class="css1"><td>height</td><td>Interesting, why use it? Unknown target milestone.</td></tr>
|
||||
<tr class="css1 impl-yes"><td>height</td><td>Interesting, why use it? Unknown target milestone.</td></tr>
|
||||
<tr class="danger css1 impl-yes"><td>list-style-image</td><td>Dangerous?</td></tr>
|
||||
<tr class="impl-no"><td>max-height</td><td rowspan="4">No IE 5/6</td></tr>
|
||||
<tr class="impl-no"><td>min-height</td></tr>
|
||||
@@ -168,9 +168,9 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
||||
<tr class="impl-no"><td>quotes</td><td>May be dropped from CSS2, fairly useless for inline context</td></tr>
|
||||
<tr class="impl-no"><td>visibility</td><td>ENUM(visible, hidden, collapse),
|
||||
Dangerous</td></tr>
|
||||
<tr class="css1 feature"><td>white-space</td><td>ENUM(normal, pre, nowrap, pre-wrap,
|
||||
<tr class="css1 feature impl-partial"><td>white-space</td><td>ENUM(normal, pre, nowrap, pre-wrap,
|
||||
pre-line), Spotty implementation:
|
||||
pre (no IE 5/6), nowrap (no IE 5),
|
||||
pre (no IE 5/6), <em>nowrap</em> (no IE 5, supported),
|
||||
pre-wrap (only Opera), pre-line (no support). Fixable? Unknown target milestone.</td></tr>
|
||||
</tbody>
|
||||
|
||||
@@ -238,14 +238,14 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
||||
<tr><th colspan="3">Questionable</th></tr>
|
||||
<tr class="impl-no"><td>accesskey</td><td>A</td><td>May interfere with main interface</td></tr>
|
||||
<tr class="impl-no"><td>tabindex</td><td>A</td><td>May interfere with main interface</td></tr>
|
||||
<tr><td>target</td><td>A</td><td>Config enabled, only useful for frame layouts, disallowed in strict</td></tr>
|
||||
<tr class="impl-yes"><td>target</td><td>A</td><td>Config enabled, only useful for frame layouts, disallowed in strict</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="3">Miscellaneous</th></tr>
|
||||
<tr><td>datetime</td><td>DEL, INS</td><td>No visible effect, ISO format</td></tr>
|
||||
<tr><td>rel</td><td>A</td><td>Largely user-defined: nofollow, tag (see microformats)</td></tr>
|
||||
<tr><td>rev</td><td>A</td><td>Largely user-defined: vote-*</td></tr>
|
||||
<tr class="impl-yes"><td>rel</td><td>A</td><td>Largely user-defined: nofollow, tag (see microformats)</td></tr>
|
||||
<tr class="impl-yes"><td>rev</td><td>A</td><td>Largely user-defined: vote-*</td></tr>
|
||||
<tr class="feature"><td>axis</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||
<tr class="feature"><td>char</td><td>COL, COLGROUP, TBODY, TD, TFOOT, TH, THEAD, TR</td><td>W3C only: No browser implementation</td></tr>
|
||||
<tr class="feature"><td>headers</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||
@@ -262,37 +262,37 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="3">Transform, target milestone 1.4</th></tr>
|
||||
<tr><td rowspan="5">align</td><td>CAPTION</td><td>Near-equiv style 'caption-side', drop left and right</td></tr>
|
||||
<tr><td>IMG</td><td rowspan="2">Margin-left and margin-right = auto or parent div</td></tr>
|
||||
<tr><td>TABLE</td></tr>
|
||||
<tr><td>HR</td><td>Near-equivalent style 'text-align' (Works for IE and Opera, but not Firefox). Also try <code>margin-right:auto; margin-left:0;</code> for left or <code>margin-right:0; margin-left:auto;</code> for right (optionally replacing 0 with the original margin for that side)</td></tr>
|
||||
<tr><th colspan="3">Transform</th></tr>
|
||||
<tr class="impl-yes"><td rowspan="5">align</td><td>CAPTION</td><td>'caption-side' for top/bottom, 'text-align' for left/right</td></tr>
|
||||
<tr class="impl-yes"><td>IMG</td><td rowspan="3">See specimens/html-align-to-css.html</td></tr>
|
||||
<tr class="impl-yes"><td>TABLE</td></tr>
|
||||
<tr class="impl-yes"><td>HR</td></tr>
|
||||
<tr class="impl-yes"><td>H1, H2, H3, H4, H5, H6, P</td><td>Equivalent style 'text-align'</td></tr>
|
||||
<tr class="required impl-yes"><td>alt</td><td>IMG</td><td>Required, insert image filename if src is present or default invalid image text</td></tr>
|
||||
<tr><td rowspan="3">bgcolor</td><td>TABLE</td><td>Equivalent style 'background-color'</td></tr>
|
||||
<tr><td>TR</td><td>Equivalent style 'background-color'</td></tr>
|
||||
<tr><td>TD, TH</td><td>Equivalent style 'background-color'</td></tr>
|
||||
<tr><td>border</td><td>IMG</td><td>Near equivalent style 'border-width', as it only applies when link present</td></tr>
|
||||
<tr><td>clear</td><td>BR</td><td>Near-equiv style 'clear', transform 'all' into 'both'</td></tr>
|
||||
<tr class="impl-yes"><td rowspan="3">bgcolor</td><td>TABLE</td><td>Superset style 'background-color'</td></tr>
|
||||
<tr class="impl-yes"><td>TR</td><td>Superset style 'background-color'</td></tr>
|
||||
<tr class="impl-yes"><td>TD, TH</td><td>Superset style 'background-color'</td></tr>
|
||||
<tr class="impl-yes"><td>border</td><td>IMG</td><td>Equivalent style <code>border:[number]px solid</code></td></tr>
|
||||
<tr class="impl-yes"><td>clear</td><td>BR</td><td>Near-equiv style 'clear', transform 'all' into 'both'</td></tr>
|
||||
<tr class="impl-no"><td>compact</td><td>DL, OL, UL</td><td>Boolean, needs custom CSS class; rarely used anyway</td></tr>
|
||||
<tr class="required impl-yes"><td>dir</td><td>BDO</td><td>Required, insert ltr (or configuration value) if none</td></tr>
|
||||
<tr><td>height</td><td>TD, TH</td><td>Near-equiv style 'height', needs px suffix if original was in pixels</td></tr>
|
||||
<tr><td>hspace</td><td>IMG</td><td>Near-equiv styles 'margin-top' and 'margin-bottom', needs px suffix</td></tr>
|
||||
<tr class="impl-yes"><td>height</td><td>TD, TH</td><td>Near-equiv style 'height', needs px suffix if original was in pixels</td></tr>
|
||||
<tr class="impl-yes"><td>hspace</td><td>IMG</td><td>Near-equiv styles 'margin-top' and 'margin-bottom', needs px suffix</td></tr>
|
||||
<tr class="impl-yes"><td>lang</td><td>*</td><td>Copy value to xml:lang</td></tr>
|
||||
<tr><td rowspan="2">name</td><td>IMG</td><td>Turn into ID</td></tr>
|
||||
<tr><td>A</td><td>Turn into ID? (not deprecated, though in which specs?)</td></tr>
|
||||
<tr><td>noshade</td><td>HR</td><td>Boolean, style 'border-style:solid;'</td></tr>
|
||||
<tr><td>nowrap</td><td>TD, TH</td><td>Boolean, style 'white-space:nowrap;' (not compat with IE5)</td></tr>
|
||||
<tr><td>size</td><td>HR</td><td>Near-equiv 'width', needs px suffix if original was pixels</td></tr>
|
||||
<tr class="impl-yes"><td rowspan="2">name</td><td>IMG</td><td>Turn into ID</td></tr>
|
||||
<tr class="impl-yes"><td>A</td><td>Turn into ID</td></tr>
|
||||
<tr class="impl-yes"><td>noshade</td><td>HR</td><td>Boolean, style 'border-style:solid;'</td></tr>
|
||||
<tr class="impl-yes"><td>nowrap</td><td>TD, TH</td><td>Boolean, style 'white-space:nowrap;' (not compat with IE5)</td></tr>
|
||||
<tr class="impl-yes"><td>size</td><td>HR</td><td>Near-equiv 'height', needs px suffix if original was pixels</td></tr>
|
||||
<tr class="required impl-yes"><td>src</td><td>IMG</td><td>Required, insert blank or default img if not set</td></tr>
|
||||
<tr class="impl-yes"><td>start</td><td>OL</td><td>Poorly supported 'counter-reset', allowed in loose, dropped in strict</td></tr>
|
||||
<tr><td rowspan="3">type</td><td>LI</td><td rowspan="3">Equivalent style 'list-style-type', different allowed values though. (needs testing)</td></tr>
|
||||
<tr><td>OL</td></tr>
|
||||
<tr><td>UL</td></tr>
|
||||
<tr class="impl-yes"><td rowspan="3">type</td><td>LI</td><td rowspan="3">Equivalent style 'list-style-type', different allowed values though. (needs testing)</td></tr>
|
||||
<tr class="impl-yes"><td>OL</td></tr>
|
||||
<tr class="impl-yes"><td>UL</td></tr>
|
||||
<tr class="impl-yes"><td>value</td><td>LI</td><td>Poorly supported 'counter-reset', allowed in loose, dropped in strict</td></tr>
|
||||
<tr><td>vspace</td><td>IMG</td><td>Near-equiv styles 'margin-left' and 'margin-right', needs px suffix, see hspace</td></tr>
|
||||
<tr><td rowspan="2">width</td><td>HR</td><td rowspan="2">Near-equiv style 'width', needs px suffix if original was pixels</td></tr>
|
||||
<tr><td>TD, TH</td></tr>
|
||||
<tr class="impl-yes"><td>vspace</td><td>IMG</td><td>Near-equiv styles 'margin-left' and 'margin-right', needs px suffix, see hspace</td></tr>
|
||||
<tr class="impl-yes"><td rowspan="2">width</td><td>HR</td><td rowspan="2">Near-equiv style 'width', needs px suffix if original was pixels</td></tr>
|
||||
<tr class="impl-yes"><td>TD, TH</td></tr>
|
||||
</tbody>
|
||||
|
||||
</table>
|
||||
|
@@ -15,7 +15,7 @@
|
||||
|
||||
<div id="filing">Filed under End-User</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>Prior to HTML Purifier 1.2.0, this library blithely accepted user input that
|
||||
looked like this:</p>
|
||||
|
@@ -36,7 +36,7 @@ forgiving lexer. You may also be interested in the unit tests located in the
|
||||
tests/ folder, which provide a living document on how exactly the filter deals
|
||||
with malformed input.
|
||||
|
||||
In summary:
|
||||
In summary (see corresponding classes for more details):
|
||||
|
||||
1. Parse document into an array of tag and text tokens (Lexer)
|
||||
2. Remove all elements not on whitelist and transform certain other elements
|
||||
|
@@ -6,45 +6,17 @@ through negligence of people. This class will do its job: no more, no less,
|
||||
and it's up to you to provide it the proper information and proper context
|
||||
to be effective. Things to remember:
|
||||
|
||||
1. Character Encoding: UTF-8.
|
||||
This segment will soon be obsoleted by enduser-utf8.html
|
||||
Currently, the parser runs under the assumption that it is dealing
|
||||
with UTF-8. Not ISO-8859-1 or Windows-1252, UTF-8. And definitely not "no
|
||||
character encoding explicitly stated" or UTF-7. If you're not using UTF-8 as
|
||||
your character encoding, make sure you configure HTML Purifier or switch
|
||||
to UTF-8. Now. Also, make sure any input is properly converted to UTF-8, or
|
||||
the parser will mangle it badly (though it won't be a security risk if you're
|
||||
outputting it as UTF-8 though). Character encoding is, in general, a knotty
|
||||
issue, but do yourself a favor and learn about it:
|
||||
<http://www.joelonsoftware.com/articles/Unicode.html>
|
||||
1. Character Encoding: see enduser-utf8.html for more info.
|
||||
|
||||
2. Doctype: XHTML 1.0 Transitional
|
||||
This is what the parser is outputting. For the most
|
||||
part, it's compatible with HTML 4.01, but XHTML enforces some very nice things
|
||||
that all web developers should use. Regardless, NO DOCTYPE is a NO. Quirks mode
|
||||
has waaaay too many quirks for a little parser to handle. We did not select
|
||||
strict in order to prevent ourselves from being too draconic on users, but
|
||||
this may be configurable in the future. Do you want standards compliance?
|
||||
The doctype is a good place to start.
|
||||
2. Doctype: document pending feature completion
|
||||
Not strictly necessary, actually. More in-depth discussion once we figure
|
||||
out how to get strict loose mode working.
|
||||
|
||||
3. IDs
|
||||
This segment is obsoleted by enduser-id.html
|
||||
They need to be unique, but without some knowledge of the
|
||||
rest of the document, it's difficult to know what's unique. %Attr.IDBlacklist
|
||||
needs to be set: we may want to consider disallowing IDs by default to
|
||||
save lazy programmers.
|
||||
3. IDs: see enduser-id.html for more info
|
||||
|
||||
4. [PROJECTED] Links
|
||||
We're not going to try for spam protection (although
|
||||
some hooks for such a module might be nice) but we may offer the ability to
|
||||
only accept relative URLs. Pick the one that's right for you.
|
||||
4. Links: document pending feature completion
|
||||
Rudimentary blacklisting, we should also allow only relative URIs. We
|
||||
need a doc to explain the stuff.
|
||||
|
||||
5. CSS
|
||||
While we can prevent the most flagrant cases from affecting your
|
||||
layout (such as absolutely positioned elements), no amount of code is going
|
||||
to protect your pages from being attacked by garish colors and plain old
|
||||
bad taste. A neat feature would be the ability to define acceptable colors
|
||||
in a document, but that's not likely to be implemented for a while. In the
|
||||
meantime, be sure to make sure that floated elements (permitted, since they
|
||||
can be quite useful) can't mess up your layout. Once again, we may want to
|
||||
disable this by default to protect lazy developers.
|
||||
5. CSS: document pending
|
||||
Explain which CSS styles we blocked and why.
|
||||
|
@@ -15,7 +15,7 @@
|
||||
|
||||
<div id="filing">Filed under End-User</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>HTML Purifier is a very powerful library. But with power comes great
|
||||
responsibility, in the form of longer execution times. Remember, this
|
||||
|
@@ -10,7 +10,7 @@
|
||||
.minor td {font-style:italic;}
|
||||
</style>
|
||||
|
||||
<title>UTF-8 - HTML Purifier</title>
|
||||
<title>UTF-8: The Secret of Character Encoding - HTML Purifier</title>
|
||||
|
||||
<!-- Note to users: this document, though professing to be UTF-8, attempts
|
||||
to use only ASCII characters, because most webservers are configured
|
||||
@@ -19,21 +19,27 @@ own advice for sake of portability. -->
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1>UTF-8</h1>
|
||||
<h1>UTF-8: The Secret of Character Encoding</h1>
|
||||
|
||||
<div id="filing">Filed under End-User</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>Character encoding and character sets, in truth, are not that
|
||||
difficult to understand. But if you don't understand them, you are going
|
||||
to be caught by surprise by some of HTML Purifier's behavior, namely
|
||||
the fact that it operates UTF-8 or the limitations of the character
|
||||
encoding transformations it does. This document will walk you through
|
||||
<p>Character encoding and character sets are not that
|
||||
difficult to understand, but so many people blithely stumble
|
||||
through the worlds of programming without knowing what to actually
|
||||
do about it, or say "Ah, it's a job for those <em>internationalization</em>
|
||||
experts." No, it is not! This document will walk you through
|
||||
determining the encoding of your system and how you should handle
|
||||
this information. It will stay away from excessive discussion on
|
||||
the internals of character encoding, but offer the information in
|
||||
asides that can easily be skipped.</p>
|
||||
the internals of character encoding.</p>
|
||||
|
||||
<p>This document is not designed to be read in its entirety: it will
|
||||
slowly introduce concepts that build on each other: you need not get to
|
||||
the bottom to have learned something new. However, I strongly
|
||||
recommend you read all the way to <strong>Why UTF-8?</strong>, because at least
|
||||
at that point you'd have made a conscious decision not to migrate,
|
||||
which can be a rewarding (but difficult) task.</p>
|
||||
|
||||
<blockquote class="aside">
|
||||
<div class="label">Asides</div>
|
||||
@@ -43,6 +49,50 @@ asides that can easily be skipped.</p>
|
||||
with a greater understanding of the underlying issues.</p>
|
||||
</blockquote>
|
||||
|
||||
<h2>Table of Contents</h2>
|
||||
|
||||
<ol id="toc">
|
||||
<li><a href="#findcharset">Finding the real encoding</a></li>
|
||||
<li><a href="#findmetacharset">Finding the embedded encoding</a></li>
|
||||
<li><a href="#fixcharset">Fixing the encoding</a><ol>
|
||||
<li><a href="#fixcharset-none">No embedded encoding</a></li>
|
||||
<li><a href="#fixcharset-diff">Embedded encoding disagrees</a></li>
|
||||
<li><a href="#fixcharset-server">Changing the server encoding</a><ol>
|
||||
<li><a href="#fixcharset-server-php">PHP header() function</a></li>
|
||||
<li><a href="#fixcharset-server-phpini">PHP ini directive</a></li>
|
||||
<li><a href="#fixcharset-server-nophp">Non-PHP</a></li>
|
||||
<li><a href="#fixcharset-server-htaccess">.htaccess</a></li>
|
||||
<li><a href="#fixcharset-server-ext">File extensions</a></li>
|
||||
</ol></li>
|
||||
<li><a href="#fixcharset-xml">XML</a></li>
|
||||
<li><a href="#fixcharset-internals">Inside the process</a></li>
|
||||
</ol></li>
|
||||
<li><a href="#whyutf8">Why UTF-8?</a><ol>
|
||||
<li><a href="#whyutf8-i18n">Internationalization</a></li>
|
||||
<li><a href="#whyutf8-user">User-friendly</a></li>
|
||||
<li><a href="#whyutf8-forms">Forms</a><ol>
|
||||
<li><a href="#whyutf8-forms-urlencoded">application/x-www-form-urlencoded</a></li>
|
||||
<li><a href="#whyutf8-forms-multipart">multipart/form-data</a></li>
|
||||
</ol></li>
|
||||
<li><a href="#whyutf8-support">Well supported</a></li>
|
||||
<li><a href="#whyutf8-htmlpurifier">HTML Purifiers</a></li>
|
||||
</ol></li>
|
||||
<li><a href="#migrate">Migrate to UTF-8</a><ol>
|
||||
<li><a href="#migrate-db">Configuring your database</a><ol>
|
||||
<li><a href="#migrate-db-legit">Legit method</a></li>
|
||||
<li><a href="#migrate-db-binary">Binary</a></li>
|
||||
</ol></li>
|
||||
<li><a href="#migrate-editor">Text editor</a></li>
|
||||
<li><a href="#migrate-bom">Byte Order Mark (headers already sent!)</a></li>
|
||||
<li><a href="#migrate-fonts">Fonts</a><ol>
|
||||
<li><a href="#migrate-fonts-obscure">Obscure scripts</a></li>
|
||||
<li><a href="#migrate-fonts-occasional">Occasional use</a></li>
|
||||
</ol></li>
|
||||
<li><a href="#migrate-variablewidth">Dealing with variable width in functions</a></li>
|
||||
</ol></li>
|
||||
<li><a href="#externallinks">Further Reading</a></li>
|
||||
</ol>
|
||||
|
||||
<h2 id="findcharset">Finding the real encoding</h2>
|
||||
|
||||
<p>In the beginning, there was ASCII, and things were simple. But they
|
||||
@@ -275,7 +325,7 @@ your own php.ini file, ask your support for details. Use:</p>
|
||||
|
||||
<h4 id="fixcharset-server-nophp">Non-PHP</h4>
|
||||
|
||||
<p>You may, for whatever reason, may need to set the character encoding
|
||||
<p>You may, for whatever reason, need to set the character encoding
|
||||
on non-PHP files, usually plain ol' HTML files. Doing this
|
||||
is more of a hit-or-miss process: depending on the software being
|
||||
used as a webserver and the configuration of that software, certain
|
||||
@@ -386,8 +436,8 @@ processing instructions. They look like:</p>
|
||||
|
||||
<p>For XHTML, this processing instruction theoretically
|
||||
overrides the <code>META</code> tag. In reality, this happens only when the
|
||||
XHTML is actually served as legit XML and not HTML, which is almost
|
||||
always never due to Internet Explorer's lack of support for
|
||||
XHTML is actually served as legit XML and not HTML, which is almost always
|
||||
never due to Internet Explorer's lack of support for
|
||||
<code>application/xhtml+xml</code> (even though doing so is often
|
||||
argued to be <a href="http://www.hixie.ch/advocacy/xhtml">good practice</a>).</p>
|
||||
|
||||
@@ -398,10 +448,10 @@ for XML files is UTF-8, which often butts heads with more common
|
||||
ISO-8859-1 encoding (you see this in garbled RSS feeds).</p>
|
||||
|
||||
<p>In short, if you use XHTML and have gone through the
|
||||
trouble of adding the XML header, be sure to make sure it jives
|
||||
trouble of adding the XML header, make sure it jives
|
||||
with your <code>META</code> tags and HTTP headers.</p>
|
||||
|
||||
<h3>Inside the process</h3>
|
||||
<h3 id="fixcharset-internals">Inside the process</h3>
|
||||
|
||||
<p>This section is not required reading,
|
||||
but may answer some of your questions on what's going on in all
|
||||
@@ -572,7 +622,7 @@ Each method has deficiencies, especially the former.</p>
|
||||
the page, you still have the trouble of what to do with characters
|
||||
that are outside of the character encoding's range. The behavior, once
|
||||
again, varies: Firefox 2.0 entity-izes them while Internet Explorer
|
||||
7.0 mangles them beyond intelligibility. For serious I18N purposes,
|
||||
7.0 mangles them beyond intelligibility. For serious internationalization purposes,
|
||||
this is not an option.</p>
|
||||
|
||||
<p>The other possibility is to set Accept-Encoding to UTF-8, which
|
||||
@@ -604,22 +654,378 @@ hounding you about broken pages.</p>
|
||||
|
||||
<h3 id="whyutf8-htmlpurifier">HTML Purifier</h3>
|
||||
|
||||
<p>And finally, we get to HTML Purifier.</p>
|
||||
<p>And finally, we get to HTML Purifier. HTML Purifier is built to
|
||||
deal with UTF-8: any indications otherwise are the result of an
|
||||
encoder that converts text from your preferred encoding to UTF-8, and
|
||||
back again. HTML Purifier never touches anything else, and leaves
|
||||
it up to the module iconv to do the dirty work.</p>
|
||||
|
||||
<p>This approach, however, is not perfect. iconv is blithely unaware
|
||||
of HTML character entities. HTML Purifier, in order to
|
||||
protect against sophisticated escaping schemes, normalizes all character
|
||||
and numeric entities before processing the text. This leads to
|
||||
one important ramification:</p>
|
||||
|
||||
<p><strong>Any character that is not supported by the target character
|
||||
set, regardless of whether or not it is in the form of a character
|
||||
entity or a raw character, will be silently ignored.</strong></p>
|
||||
|
||||
<p>Example of this principle at work: say you have <code>&theta;</code>
|
||||
in your HTML, but the output is in Latin-1 (which, understandably,
|
||||
does not understand Greek), the following process will occur (assuming you've
|
||||
set the encoding correctly using %Core.Encoding):</p>
|
||||
|
||||
<ul>
|
||||
<li>The <code>Encoder</code> will transform the text from ISO 8859-1 to UTF-8
|
||||
(note that theta is preserved since it doesn't actually use
|
||||
any non-ASCII characters): <code>&theta;</code></li>
|
||||
<li>The <code>EntityParser</code> will transform all named and numeric
|
||||
character entities to their corresponding raw UTF-8 equivalents:
|
||||
<code>θ</code></li>
|
||||
<li>HTML Purifier processes the code: <code>θ</code></li>
|
||||
<li>The <code>Encoder</code> now transforms the text back from UTF-8
|
||||
to ISO 8859-1. Since Greek is not supported by ISO 8859-1, it
|
||||
will be either ignored or replaced with a question mark:
|
||||
<code>?</code></li>
|
||||
</ul>
|
||||
|
||||
<p>This behaviour is quite unsatisfactory. It is a deal-breaker for
|
||||
international applications, and it can be mildly annoying for the provincial
|
||||
soul who occasionally needs a special character. Since 1.4.0, HTML
|
||||
Purifier has provided a slightly more palatable workaround using
|
||||
%Core.EscapeNonASCIICharacters. The process now looks like:</p>
|
||||
|
||||
<ul>
|
||||
<li>The <code>Encoder</code> transforms encoding to UTF-8: <code>&theta;</code></li>
|
||||
<li>The <code>EntityParser</code> transforms entities: <code>θ</code></li>
|
||||
<li>HTML Purifier processes the code: <code>θ</code></li>
|
||||
<li>The <code>Encoder</code> replaces all non-ASCII characters
|
||||
with numeric entities: <code>&#952;</code></li>
|
||||
<li>For good measure, <code>Encoder</code> transforms encoding back to
|
||||
original (which is strictly unnecessary for 99% of encodings
|
||||
out there): <code>&#952;</code> (remember, it's all ASCII!)</li>
|
||||
</ul>
|
||||
|
||||
<p>...which means that this is only good for an occasional foray into
|
||||
the land of Unicode characters, and is totally unacceptable for Chinese
|
||||
or Japanese texts. The even bigger kicker is that, supposing the
|
||||
input encoding was actually ISO-8859-7, which <em>does</em> support
|
||||
theta, the character would get entity-ized anyway! (The Encoder does
|
||||
not discriminate).</p>
|
||||
|
||||
<p>The current functionality is about where HTML Purifier will be for
|
||||
the rest of eternity. HTML Purifier could attempt to preserve the original
|
||||
form of the entities so that they could be substituted back in, only the
|
||||
DOM extension kills them off irreversibly. HTML Purifier could also attempt
|
||||
to be smart and only convert non-ASCII characters that weren't supported
|
||||
by the target encoding, but that would require reimplementing iconv
|
||||
with HTML awareness, something I will not do.</p>
|
||||
|
||||
<p>So there: either it's UTF-8 or crippled international support. Your pick! (and I'm
|
||||
not being sarcastic here: some people could care less about other languages)</p>
|
||||
|
||||
<h2 id="migrate">Migrate to UTF-8</h2>
|
||||
|
||||
<h3 id="migrate-editor">Text editor</h3>
|
||||
<p>So, you've decided to bite the bullet, and want to migrate to UTF-8.
|
||||
Note that this is not for the faint-hearted, and you should expect
|
||||
the process to take longer than you think it will take.</p>
|
||||
|
||||
<p>The general idea is that you convert all existing text to UTF-8,
|
||||
and then you set all the headers and META tags we discussed earlier
|
||||
to UTF-8. There are many ways going about doing this: you could
|
||||
write a conversion script that runs through the database and re-encodes
|
||||
everything as UTF-8 or you could do the conversion on the fly when someone
|
||||
reads the page. The details depend on your system, but I will cover
|
||||
some of the more subtle points of migration that may trip you up.</p>
|
||||
|
||||
<h3 id="migrate-db">Configuring your database</h3>
|
||||
|
||||
<h3 id="migrate-convert">Convert old text</h3>
|
||||
<p>Most modern databases, the most prominent open-source ones being MySQL
|
||||
4.1+ and PostgreSQL, support character encodings. If you're switching
|
||||
to UTF-8, logically speaking, you'd want to make sure your database
|
||||
knows about the change too. There are some caveats though:</p>
|
||||
|
||||
<h4 id="migrate-db-legit">Legit method</h4>
|
||||
|
||||
<p>Standardization in terms of SQL syntax for specifying character
|
||||
encodings is notoriously spotty. Refer to your respective database's
|
||||
documentation on how to do this properly.</p>
|
||||
|
||||
<p>For <a href="http://dev.mysql.com/doc/refman/5.0/en/charset-conversion.html">MySQL</a>, <code>ALTER</code> will magically perform the
|
||||
character encoding conversion for you. However, you have
|
||||
to make sure that the text inside the column is what is says it is:
|
||||
if you had put Shift-JIS in an ISO 8859-1 column, MySQL will irreversibly mangle
|
||||
the text when you try to convert it to UTF-8. You'll have to convert
|
||||
it to a binary field, convert it to a Shift-JIS field (the real encoding),
|
||||
and then finally to UTF-8. Many a website had pages irreversibly mangled
|
||||
because they didn't realize that they'd been deluding themselves about
|
||||
the character encoding all along, don't become the next victim.</p>
|
||||
|
||||
<p>For <a href="http://www.postgresql.org/docs/8.2/static/multibyte.html">PostgreSQL</a>, there appears to be no direct way to change the
|
||||
encoding of a database (as of 8.2). You will have to dump the data, and then reimport
|
||||
it into a new table. Make sure that your client encoding is set properly:
|
||||
this is how PostgreSQL knows to perform an encoding conversion.</p>
|
||||
|
||||
<p>Many times, you will be also asked about the "collation" of
|
||||
the new column. Collation is how a DBMS sorts text, like ordering
|
||||
B, C and A into A, B and C (the problem gets surprisingly complicated
|
||||
when you get to languages like Thai and Japanese). If in doubt,
|
||||
going with the default setting is usually a safe bet.</p>
|
||||
|
||||
<p>Once the conversion is all said and done, you still have to remember
|
||||
to set the client encoding (your encoding) properly on each database
|
||||
connection using <code>SET NAMES</code> (which is standard SQL and is
|
||||
usually supported).</p>
|
||||
|
||||
<h4 id="migrate-db-binary">Binary</h4>
|
||||
|
||||
<p>Due to the abovementioned compatibility issues, a more interoperable
|
||||
way of storing UTF-8 text is to stuff it in a binary datatype.
|
||||
<code>CHAR</code> becomes <code>BINARY</code>, <code>VARCHAR</code> becomes
|
||||
<code>VARBINARY</code> and <code>TEXT</code> becomes <code>BLOB</code>.
|
||||
Doing so can save you some huge headaches:</p>
|
||||
|
||||
<ul>
|
||||
<li>The syntax for binary data types is very portable,</li>
|
||||
<li>MySQL 4.0 has <em>no</em> support for character encodings, so
|
||||
if you want to support it you <em>have</em> to use binary,</li>
|
||||
<li>MySQL, as of 5.1, has no support for four byte UTF-8 characters,
|
||||
which represent characters beyond the basic multilingual
|
||||
plane, and</li>
|
||||
<li>You will never have to worry about your DBMS being too smart
|
||||
and attempting to convert your text when you don't want it to.</li>
|
||||
</ul>
|
||||
|
||||
<p>MediaWiki, a very prominent international application, uses binary fields
|
||||
for storing their data because of point three.</p>
|
||||
|
||||
<p>There are drawbacks, of course:</p>
|
||||
|
||||
<ul>
|
||||
<li>Database tools like PHPMyAdmin won't be able to offer you inline
|
||||
text editing, since it is declared as binary,</li>
|
||||
<li>It's not semantically correct: it's really text not binary
|
||||
(lying to the database),</li>
|
||||
<li>Unless you use the not-very-portable wizardry mentioned above,
|
||||
you have to change the encoding yourself (usually, you'd do
|
||||
it on the fly), and</li>
|
||||
<li>You will not have collation.</li>
|
||||
</ul>
|
||||
|
||||
<p>Choose based on your circumstances.</p>
|
||||
|
||||
<h3 id="migrate-editor">Text editor</h3>
|
||||
|
||||
<p>For more flat-file oriented systems, you will often be tasked with
|
||||
converting reams of existing text and HTML files into UTF-8, as well as
|
||||
making sure that all new files uploaded are properly encoded. Once again,
|
||||
I can only point vaguely in the right direction for converting your
|
||||
existing files: make sure you backup, make sure you use
|
||||
<a href="http://php.net/ref.iconv">iconv</a>(), and
|
||||
make sure you know what the original character encoding of the files
|
||||
is (or are, depending on the tidiness of your system).</p>
|
||||
|
||||
<p>However, I can proffer more specific advice on the subject of
|
||||
text editors. Many text editors have notoriously spotty Unicode support.
|
||||
To find out how your editor is doing, you can check out <a
|
||||
href="http://www.alanwood.net/unicode/utilities_editors.html">this list</a>
|
||||
or <a href="http://en.wikipedia.org/wiki/Comparison_of_text_editors#Encoding_support">Wikipedia's list.</a>
|
||||
I personally use Notepad++, which works like a charm when it comes to UTF-8.
|
||||
Usually, you will have to <strong>explicitly</strong> tell the editor through some dialogue
|
||||
(usually Save as or Format) what encoding you want it to use. An editor
|
||||
will often offer "Unicode" as a method of saving, which is
|
||||
ambiguous. Make sure you know whether or not they really mean UTF-8
|
||||
or UTF-16 (which is another flavor of Unicode).</p>
|
||||
|
||||
<p>The two things to look out for are whether or not the editor
|
||||
supports <strong>font mixing</strong> (multiple
|
||||
fonts in one document) and whether or not it adds a <strong>BOM</strong>.
|
||||
Font mixing is important because fonts rarely have support for every
|
||||
language known to mankind: in order to be flexible, an editor must
|
||||
be able to take a little from here and a little from there, otherwise
|
||||
all your Chinese characters will come as nice boxes. We'll discuss
|
||||
BOM below.</p>
|
||||
|
||||
<h3 id="migrate-bom">Byte Order Mark (headers already sent!)</h3>
|
||||
|
||||
<p>The BOM, or <a href="http://en.wikipedia.org/wiki/Byte_Order_Mark">Byte
|
||||
Order Mark</a>, is a magical, invisible character placed at
|
||||
the beginning of UTF-8 files to tell people what the encoding is and
|
||||
what the endianness of the text is. It is also unnecessary.</p>
|
||||
|
||||
<p>Because it's invisible, it often
|
||||
catches people by surprise when it starts doing things it shouldn't
|
||||
be doing. For example, this PHP file:</p>
|
||||
|
||||
<pre><strong>BOM</strong><?php
|
||||
header('Location: index.php');
|
||||
?></pre>
|
||||
|
||||
<p>...will fail with the all too familiar <strong>Headers already sent</strong>
|
||||
PHP error. And because the BOM is invisible, this culprit will go unnoticed.
|
||||
My suggestion is to only use ASCII in PHP pages, but if you must, make
|
||||
sure the page is saved WITHOUT the BOM.</p>
|
||||
|
||||
<blockquote class="aside">
|
||||
<p>The headers the error is referring to are <strong>HTTP headers</strong>,
|
||||
which are sent to the browser before any HTML to tell it various
|
||||
information. The moment any regular text (and yes, a BOM counts as
|
||||
ordinary text) is output, the headers must be sent, and you are
|
||||
not allowed to send anymore. Thus, the error.</p>
|
||||
</blockquote>
|
||||
|
||||
<p>If you are reading in text files to insert into the middle of another
|
||||
page, it is strongly advised (but not strictly necessary) that you replace out the UTF-8 byte
|
||||
sequence for BOM <code>"\xEF\xBB\xBF"</code> before inserting it in,
|
||||
via:</p>
|
||||
|
||||
<pre>$text = str_replace("\xEF\xBB\xBF", '', $text);</pre>
|
||||
|
||||
<h3 id="migrate-fonts">Fonts</h3>
|
||||
|
||||
<p>Generally speaking, people who are having trouble with fonts fall
|
||||
into two categories:</p>
|
||||
|
||||
<ul>
|
||||
<li>Those who want to
|
||||
use an extremely obscure language for which there is very little
|
||||
support even among native speakers of the language, and</li>
|
||||
<li>Those where the primary language of the text is
|
||||
well-supported but there are occasional characters
|
||||
that aren't supported.</li>
|
||||
</ul>
|
||||
|
||||
<p>Yes, there's always a chance where an English user happens across
|
||||
a Sinhalese website and doesn't have the right font. But an English user
|
||||
who happens not to have the right fonts probably has no business reading Sinhalese
|
||||
anyway. So we'll deal with the other two edge cases.</p>
|
||||
|
||||
<h4 id="migrate-fonts-obscure">Obscure scripts</h4>
|
||||
|
||||
<p>If you run a Bengali website, you may get comments from users who
|
||||
would like to read your website but get heaps of question marks or
|
||||
other meaningless characters. Fixing this problem requires the
|
||||
installation of a font or language pack which is often highly
|
||||
dependent on what the language is. <a href="http://bn.wikipedia.org/wiki/%E0%A6%89%E0%A6%87%E0%A6%95%E0%A6%BF%E0%A6%AA%E0%A7%87%E0%A6%A1%E0%A6%BF%E0%A6%AF%E0%A6%BC%E0%A6%BE:Bangla_script_display_help">Here is an example</a>
|
||||
of such a help file for the Bengali language, I am sure there are
|
||||
others out there too. You just have to point users to the appropriate
|
||||
help file.</p>
|
||||
|
||||
<h4 id="migrate-fonts-occasional">Occasional use</h4>
|
||||
|
||||
<p>A prime example of when you'll see some very obscure Unicode
|
||||
characters embedded in what otherwise would be very bland ASCII are
|
||||
letters of the
|
||||
<a href="http://en.wikipedia.org/wiki/International_Phonetic_Alphabet">International
|
||||
Phonetic Alphabet (IPA)</a>, use to designate pronounciations in a very standard
|
||||
manner (you probably see them all the time in your dictionary). Your
|
||||
average font probably won't have support for all of the IPA characters
|
||||
like ʘ (bilabial click) or ʒ (voiced postalveolar fricative).
|
||||
So what's a poor browser to do? Font mix! Smart browsers like Mozilla Firefox
|
||||
and Internet Explorer 7 will borrow glyphs from other fonts in order
|
||||
to make sure that all the characters display properly.</p>
|
||||
|
||||
<p>But what happens when the browser isn't smart and happens to be the
|
||||
most widely used browser in the entire world? Microsoft IE 6
|
||||
is not smart enough to borrow from other fonts when a character isn't
|
||||
present, so more often than not you'll be slapped with a nice big �.
|
||||
To get things to work, MSIE 6 needs a little nudge. You could configure it
|
||||
to use a different font to render the text, but you can acheive the same
|
||||
effect by selectively changing the font for blocks of special characters
|
||||
to known good Unicode fonts.</p>
|
||||
|
||||
<p>Fortunantely, the folks over at Wikipedia have already done all the
|
||||
heavy lifting for you. Get the CSS from the horses mouth here:
|
||||
<a href="http://en.wikipedia.org/wiki/MediaWiki:Common.css">Common.css</a>,
|
||||
and search for ".IPA" There are also a smattering of
|
||||
other classes you can use for other purposes, check out
|
||||
<a href="http://meta.wikimedia.org/wiki/Help:Special_characters#Displaying_Special_Characters">this page</a>
|
||||
for more details. For you lazy ones, this should work:</p>
|
||||
|
||||
<pre>.Unicode {
|
||||
font-family: Code2000, "TITUS Cyberbit Basic", "Doulos SIL",
|
||||
"Chrysanthi Unicode", "Bitstream Cyberbit",
|
||||
"Bitstream CyberBase", Thryomanes, Gentium, GentiumAlt,
|
||||
"Lucida Grande", "Arial Unicode MS", "Microsoft Sans Serif",
|
||||
"Lucida Sans Unicode";
|
||||
font-family /**/:inherit; /* resets fonts for everyone but IE6 */
|
||||
}</pre>
|
||||
|
||||
<p>The standard usage goes along the lines of <code><span class="Unicode">Crazy
|
||||
Unicode stuff here</span></code>. Characters in the
|
||||
<a href="http://en.wikipedia.org/wiki/Windows_Glyph_List_4">Windows Glyph List</a>
|
||||
usually don't need to be fixed, but for anything else you probably
|
||||
want to play it safe. Unless, of course, you don't care about IE6
|
||||
users.</p>
|
||||
|
||||
<h3 id="migrate-variablewidth">Dealing with variable width in functions</h3>
|
||||
|
||||
<p>When people claim that PHP6 will solve all our Unicode problems, they're
|
||||
misinformed. It will not fix any of the abovementioned troubles. It will,
|
||||
however, fix the problem we are about to discuss: processing UTF-8 text
|
||||
in PHP.</p>
|
||||
|
||||
<p>PHP (as of PHP5) is blithely unaware of the existence of UTF-8 (with a few
|
||||
notable exceptions). Sometimes, this will cause problems, other times,
|
||||
this won't. So far, we've avoided discussing the architecture of
|
||||
UTF-8, so, we must first ask, what is UTF-8? Yes, it supports Unicode,
|
||||
and yes, it is variable width. Other traits:</p>
|
||||
|
||||
<ul>
|
||||
<li>Every character's byte sequence is unique and will never be found
|
||||
inside the byte sequence of another character,</li>
|
||||
<li>UTF-8 may use up to four bytes to encode a character,</li>
|
||||
<li>UTF-8 text must be checked for well-formedness,</li>
|
||||
<li>Pure ASCII is also valid UTF-8, and</li>
|
||||
<li>Binary sorting will sort UTF-8 in the same order as Unicode.</li>
|
||||
</ul>
|
||||
|
||||
<p>Each of these traits affect different domains of text processing
|
||||
in different ways. It is beyond the scope of this document to explain
|
||||
what precisely these implications are. PHPWact provides
|
||||
a very good <a href="http://www.phpwact.org/php/i18n/utf-8">reference document</a>
|
||||
on what to expect from each functions, although coverage is spotty in
|
||||
some areas. Their more general notes on
|
||||
<a href="http://www.phpwact.org/php/i18n/charsets">character sets</a>
|
||||
are also worth looking at for information on UTF-8. Some rules of thumb
|
||||
when dealing with Unicode text:</p>
|
||||
|
||||
<ul>
|
||||
<li>Do not EVER use functions that:<ul>
|
||||
<li>...convert case (strtolower, strtoupper, ucfirst, ucwords)</li>
|
||||
<li>...claim to be case-insensitive (str_ireplace, stristr, strcasecmp)</li>
|
||||
</ul></li>
|
||||
<li>Think twice before using functions that:<ul>
|
||||
<li>...count characters (strlen will return bytes, not characters;
|
||||
str_split and word_wrap may corrupt)</li>
|
||||
<li>...entity-ize things (UTF-8 doesn't need entities)</li>
|
||||
<li>...do very complex string processing (*printf)</li>
|
||||
</ul></li>
|
||||
</ul>
|
||||
|
||||
<p>Note: this list applies to UTF-8 encoded text only: if you have
|
||||
a string that you are 100% sure is ASCII, be my guest and use
|
||||
<code>strtolower</code> (HTML Purifier uses this function.)</p>
|
||||
|
||||
<p>Regardless, always think in bytes, not characters. If you use strpos()
|
||||
to find the position of a character, it will be in bytes, but this
|
||||
usually won't matter since substr() also operates with byte indices!</p>
|
||||
|
||||
<p>You'll also need to make sure your UTF-8 is well-formed and will
|
||||
probably need replacements for some of these functions. I recommend
|
||||
using Harry Fuecks' <a href="http://phputf8.sourceforge.net/">PHP
|
||||
UTF-8</a> library, rather than use mb_string directly. HTML Purifier
|
||||
also defines a few useful UTF-8 compatible functions: check out
|
||||
<code>Encoder.php</code> in the <code>/library/HTMLPurifier/</code>
|
||||
directory.</p>
|
||||
|
||||
<h2 id="externallinks">Further Reading</h2>
|
||||
|
||||
<p>Well, that's it. Hopefully this document has served as a very
|
||||
practical springboard into knowledge of how UTF-8 works. You may have
|
||||
decided that you don't want to migrate yet: that's fine, just know
|
||||
what will happen to your output and what bug reports you may recieve.</p>
|
||||
|
||||
<p>Many other developers have already discussed the subject of Unicode,
|
||||
UTF-8 and internationalization, and I would like to defer to them for
|
||||
a more in-depth look into character sets and encodings.</p>
|
||||
|
@@ -15,7 +15,7 @@
|
||||
|
||||
<div id="filing">Filed under End-User</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>Clients like their YouTube videos. It gives them a warm fuzzy feeling when
|
||||
they see a neat little embedded video player on their websites that can play
|
||||
@@ -37,7 +37,7 @@ from a specific website, it probably is okay. If no amount of pleading will
|
||||
convince the people upstairs that they should just settle with just linking
|
||||
to their movies, you may find this technique very useful.</p>
|
||||
|
||||
<h2>Sample</h2>
|
||||
<h2>Looking in</h2>
|
||||
|
||||
<p>Below is custom code that allows users to embed
|
||||
YouTube videos. This is not favoritism: this trick can easily be adapted for
|
||||
@@ -69,55 +69,27 @@ into your documents. YouTube's code goes like this:</p>
|
||||
<p>What point 2 means is that if we have code like <code><span
|
||||
class="embed-youtube">AyPzM5WK8ys</span></code> your
|
||||
application can reconstruct the full object from this small snippet that
|
||||
passes through HTML Purifier <em>unharmed</em>.</p>
|
||||
passes through HTML Purifier <em>unharmed</em>.
|
||||
<a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/Filter/YouTube.php">Show me the code!</a></p>
|
||||
|
||||
<pre>
|
||||
<?php
|
||||
<p>And the corresponding usage:</p>
|
||||
|
||||
class HTMLPurifierX_PreserveYouTube extends HTMLPurifier
|
||||
{
|
||||
function purify($html, $config = null) {
|
||||
$pre_regex = '#<object[^>]+>.+?'.
|
||||
'http://www.youtube.com/v/([A-Za-z0-9]+).+?</object>#';
|
||||
$pre_replace = '<span class="youtube-embed">\1</span>';
|
||||
$html = preg_replace($pre_regex, $pre_replace, $html);
|
||||
$html = parent::purify($html, $config);
|
||||
$post_regex = '#<span class="youtube-embed">([A-Za-z0-9]+)</span>#';
|
||||
$post_replace = '<object width="425" height="350" '.
|
||||
'data="http://www.youtube.com/v/\1">'.
|
||||
'<param name="movie" value="http://www.youtube.com/v/\1"></param>'.
|
||||
'<param name="wmode" value="transparent"></param>'.
|
||||
'<!--[if IE]>'.
|
||||
'<embed src="http://www.youtube.com/v/\1"'.
|
||||
'type="application/x-shockwave-flash"'.
|
||||
'wmode="transparent" width="425" height="350" />'.
|
||||
'<![endif]-->'.
|
||||
'</object>';
|
||||
$html = preg_replace($post_regex, $post_replace, $html);
|
||||
return $html;
|
||||
}
|
||||
}
|
||||
<pre><?php
|
||||
// assuming $purifier is an instance of HTMLPurifier
|
||||
require_once 'HTMLPurifier/Filter/YouTube.php';
|
||||
$purifier->addFilter(new HTMLPurifier_Filter_YouTube());
|
||||
?></pre>
|
||||
|
||||
$purifier = new HTMLPurifierX_PreserveYouTube();
|
||||
$html_still_with_youtube = $purifier->purify($html_with_youtube);
|
||||
|
||||
?>
|
||||
</pre>
|
||||
|
||||
<p>There is a bit going on here, so let's explain.</p>
|
||||
<p>There is a bit going in the two code snippets, so let's explain.</p>
|
||||
|
||||
<ol>
|
||||
<li>The class uses the prefix <code>HTMLPurifierX</code> because it's
|
||||
userspace code. Don't use <code>HTMLPurifier</code> in front of your
|
||||
class, since it might clobber another class in the library.</li>
|
||||
<li>In order to keep the interface compatible, we've extended HTMLPurifier
|
||||
into a new class that preserves the YouTube videos. This means that
|
||||
all you have to do is replace all instances of
|
||||
<code>new HTMLPurifier</code> to <code>new
|
||||
HTMLPurifierX_PreserveYouTube</code>. There's other ways to go about
|
||||
doing this: if you were calling a function that wrapped HTML Purifier,
|
||||
you could paste the PHP right there. If you wanted to be really
|
||||
fancy, you could make a decorator for HTMLPurifier.</li>
|
||||
<li>This is a Filter object, which intercepts the HTML that is
|
||||
coming into and out of the purifier. You can add as many
|
||||
filter objects as you like. <code>preFilter()</code>
|
||||
processes the code before it gets purified, and <code>postFilter()</code>
|
||||
processes the code afterwards. So, we'll use <code>preFilter()</code> to
|
||||
replace the object tag with a <code>span</code>, and <code>postFilter()</code>
|
||||
to restore it.</li>
|
||||
<li>The first preg_replace call replaces any YouTube code users may have
|
||||
embedded into the benign span tag. Span is used because it is inline,
|
||||
and objects are inline too. We are very careful to be extremely
|
||||
@@ -165,17 +137,16 @@ it is important that you are cognizant of the risk.</p>
|
||||
|
||||
<p>This should go without saying, but if you're going to adapt this code
|
||||
for Google Video or the like, make sure you do it <em>right</em>. It's
|
||||
extremely easy to allow a character too many in the final section and
|
||||
extremely easy to allow a character too many in <code>postFilter()</code> and
|
||||
suddenly you're introducing XSS into HTML Purifier's XSS free output. HTML
|
||||
Purifier may be well written, but it cannot guard against vulnerabilities
|
||||
introduced after it has finished.</p>
|
||||
|
||||
<h2>Future plans</h2>
|
||||
<h2>Help out!</h2>
|
||||
|
||||
<p>This functionality is part of the core library, using the
|
||||
HTMLPurifier_Filter class to acheive the desired effect. Our implementation
|
||||
is slightly different, and this page will be updated to reflect that
|
||||
once 1.4.0 is released.</p>
|
||||
<p>If you write a filter for your favorite video destination (or anything
|
||||
like that, for that matter), send it over and it might get included
|
||||
with the core!</p>
|
||||
|
||||
</body>
|
||||
</html>
|
@@ -1,14 +1,23 @@
|
||||
<?php exit;
|
||||
<?php
|
||||
|
||||
// This file demonstrates basic usage of HTMLPurifier.
|
||||
|
||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||
// replace this with the path to the HTML Purifier library
|
||||
require_once '../../library/HTMLPurifier.auto.php';
|
||||
|
||||
$purifier = new HTMLPurifier();
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
|
||||
// configuration goes here:
|
||||
$config->set('Core', 'Encoding', 'ISO-8859-1'); //replace with your encoding
|
||||
$config->set('Core', 'XHTML', true); // set to false if HTML 4.01
|
||||
|
||||
$purifier = new HTMLPurifier($config);
|
||||
|
||||
// untrusted input HTML
|
||||
$html = '<b>Simple and short';
|
||||
|
||||
$pure_html = $purifier->purify($html);
|
||||
|
||||
echo $pure_html;
|
||||
echo '<pre>' . htmlspecialchars($pure_html) . '</pre>';
|
||||
|
||||
?>
|
@@ -1,136 +0,0 @@
|
||||
<?php
|
||||
|
||||
// using _REQUEST because we accept GET and POST requests
|
||||
|
||||
$content = empty($_REQUEST['xml']) ? 'text/html' : 'application/xhtml+xml';
|
||||
header("Content-type:$content;charset=UTF-8");
|
||||
|
||||
// prevent PHP versions with shorttags from barfing
|
||||
echo '<?xml version="1.0" encoding="UTF-8" ?>
|
||||
';
|
||||
|
||||
function getFormMethod() {
|
||||
return (isset($_REQUEST['post'])) ? 'post' : 'get';
|
||||
}
|
||||
|
||||
if (empty($_REQUEST['strict'])) {
|
||||
?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<?php
|
||||
} else {
|
||||
?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<?php
|
||||
}
|
||||
?>
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
|
||||
<head>
|
||||
<title>HTML Purifier Live Demo</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
</head>
|
||||
<body>
|
||||
<h1>HTML Purifier Live Demo</h1>
|
||||
<?php
|
||||
|
||||
require_once '../../library/HTMLPurifier.auto.php';
|
||||
|
||||
if (!empty($_REQUEST['html'])) { // start result
|
||||
|
||||
if (strlen($_REQUEST['html']) > 50000) {
|
||||
?>
|
||||
<p>Request exceeds maximum allowed text size of 50kb.</p>
|
||||
<?php
|
||||
} else { // start main processing
|
||||
|
||||
$html = get_magic_quotes_gpc() ? stripslashes($_REQUEST['html']) : $_REQUEST['html'];
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Core', 'TidyFormat', !empty($_REQUEST['tidy']));
|
||||
$config->set('HTML', 'Strict', !empty($_REQUEST['strict']));
|
||||
$purifier = new HTMLPurifier($config);
|
||||
$pure_html = $purifier->purify($html);
|
||||
|
||||
?>
|
||||
<p>Here is your purified HTML:</p>
|
||||
<div style="border:5px solid #CCC;margin:0 10%;padding:1em;">
|
||||
<?php if(getFormMethod() == 'get') { ?>
|
||||
<div style="float:right;">
|
||||
<a href="http://validator.w3.org/check?uri=referer"><img
|
||||
src="http://www.w3.org/Icons/valid-xhtml10"
|
||||
alt="Valid XHTML 1.0 Transitional" height="31" width="88" style="border:0;" /></a>
|
||||
</div>
|
||||
<?php } ?>
|
||||
<?php
|
||||
|
||||
echo $pure_html;
|
||||
|
||||
?>
|
||||
<div style="clear:both;"></div>
|
||||
</div>
|
||||
<p>Here is the source code of the purified HTML:</p>
|
||||
<pre><?php
|
||||
|
||||
echo htmlspecialchars($pure_html, ENT_COMPAT, 'UTF-8');
|
||||
|
||||
?></pre>
|
||||
<?php
|
||||
if (getFormMethod() == 'post') { // start POST validation notice
|
||||
?>
|
||||
<p>If you would like to validate the code with
|
||||
<a href="http://validator.w3.org/#validate-by-input">W3C's
|
||||
validator</a>, copy and paste the <em>entire</em> demo page's source.</p>
|
||||
<?php
|
||||
} // end POST validation notice
|
||||
|
||||
} // end main processing
|
||||
|
||||
// end result
|
||||
} else {
|
||||
|
||||
?>
|
||||
<p>Welcome to the live demo. Enter some HTML and see how HTML Purifier
|
||||
will filter it.</p>
|
||||
<?php
|
||||
|
||||
}
|
||||
|
||||
?>
|
||||
<form id="filter" action="demo.php<?php
|
||||
echo '?' . getFormMethod();
|
||||
if (isset($_REQUEST['profile']) || isset($_REQUEST['XDEBUG_PROFILE'])) {
|
||||
echo '&XDEBUG_PROFILE=1';
|
||||
} ?>" method="<?php echo getFormMethod(); ?>">
|
||||
<fieldset>
|
||||
<legend>HTML Purifier Input (<?php echo getFormMethod(); ?>)</legend>
|
||||
<textarea name="html" cols="60" rows="15"><?php
|
||||
|
||||
if (isset($html)) {
|
||||
echo htmlspecialchars(
|
||||
HTMLPurifier_Encoder::cleanUTF8($html), ENT_COMPAT, 'UTF-8');
|
||||
}
|
||||
?></textarea>
|
||||
<?php if (getFormMethod() == 'get') { ?>
|
||||
<p><strong>Warning:</strong> GET request method can only hold
|
||||
8129 characters (probably less depending on your browser).
|
||||
If you need to test anything
|
||||
larger than that, try the <a href="demo.php?post">POST form</a>.</p>
|
||||
<?php } ?>
|
||||
<?php if (extension_loaded('tidy')) { ?>
|
||||
<div>Nicely format output with Tidy? <input type="checkbox" value="1"
|
||||
name="tidy"<?php if (!empty($_REQUEST['tidy'])) echo ' checked="checked"'; ?> /></div>
|
||||
<?php } ?>
|
||||
<div>XHTML 1.0 Strict output? <input type="checkbox" value="1"
|
||||
name="strict"<?php if (!empty($_REQUEST['strict'])) echo ' checked="checked"'; ?> /></div>
|
||||
<div>Serve as application/xhtml+xml? (not for IE) <input type="checkbox" value="1"
|
||||
name="xml"<?php if (!empty($_REQUEST['xml'])) echo ' checked="checked"'; ?> /></div>
|
||||
<div>
|
||||
<input type="submit" value="Submit" name="submit" class="button" />
|
||||
</div>
|
||||
</fieldset>
|
||||
</form>
|
||||
<p>Return to <a href="http://hp.jpsband.org/">HTML Purifier's home page</a>.
|
||||
Try the form in <a href="demo.php?get">GET</a> and <a href="demo.php?post">POST</a> request
|
||||
flavors (GET is easy to validate with W3C, but POST allows larger inputs).</p>
|
||||
</body>
|
||||
</html>
|
6
docs/fixquotes.htc
Normal file
6
docs/fixquotes.htc
Normal file
@@ -0,0 +1,6 @@
|
||||
<public:attach event="oncontentready" onevent="init();" />
|
||||
<script>
|
||||
function init() {
|
||||
element.innerHTML = '“'+element.innerHTML+'”';
|
||||
}
|
||||
</script>
|
@@ -13,7 +13,7 @@
|
||||
|
||||
<h1>Documentation</h1>
|
||||
|
||||
<p><strong><a href="http://hp.jpsband.org/">HTML Purifier</a></strong> has documentation for all types of people.
|
||||
<p><strong><a href="http://htmlpurifier.org/">HTML Purifier</a></strong> has documentation for all types of people.
|
||||
Here is an index of all of them.</p>
|
||||
|
||||
<h2>End-user</h2>
|
||||
@@ -31,7 +31,7 @@ information for casual developers using HTML Purifier.</p>
|
||||
<dt><a href="enduser-slow.html">Speeding up HTML Purifier</a></dt>
|
||||
<dd>Explains how to speed up HTML Purifier through caching or inbound filtering.</dd>
|
||||
|
||||
<dt><a href="enduser-utf8.html">UTF-8</a></dt>
|
||||
<dt><a href="enduser-utf8.html">UTF-8: The Secret of Character Encoding</a></dt>
|
||||
<dd>Describes the rationale for using UTF-8, the ramifications otherwise, and how to make the switch.</dd>
|
||||
|
||||
</dl>
|
||||
@@ -42,9 +42,6 @@ conventions.</p>
|
||||
|
||||
<dl>
|
||||
|
||||
<dt><a href="dev-code-quality.html">Code Quality Issues</a></dt>
|
||||
<dd>Discusses code quality issues and places that need to be refactored.</dd>
|
||||
|
||||
<dt><a href="dev-progress.html">Implementation Progress</a></dt>
|
||||
<dd>Tables detailing HTML element and CSS property implementation coverage.</dd>
|
||||
|
||||
@@ -54,6 +51,10 @@ conventions.</p>
|
||||
<dt><a href="dev-optimization.html">Optimization</a></dt>
|
||||
<dd>Discusses possible methods of optimizing HTML Purifier.</dd>
|
||||
|
||||
<dt><a href="dev-advanced-api.html">Advanced API</a></dt>
|
||||
<dd>Functional specification for HTML Purifier's advanced API for defining
|
||||
custom filtering behavior.</dd>
|
||||
|
||||
</dl>
|
||||
|
||||
<h2>Proposals</h2>
|
||||
@@ -101,6 +102,12 @@ the code. They may be upgraded to HTML files or stay as TXT scratchpads.</p>
|
||||
<td>Common security issues that may still arise (half-baked).</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Development</td>
|
||||
<td><a href="enduser-code-quality.txt">Code Quality Issues</a></td>
|
||||
<td>Enumerates code quality issues and places that need to be refactored.</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Proposal</td>
|
||||
<td><a href="proposal-filter-levels.txt">Filter levels</a></td>
|
||||
|
@@ -15,7 +15,7 @@
|
||||
|
||||
<div id="filing">Filed under Proposals</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>Your website probably has a color-scheme.
|
||||
<span style="color:#090; background:#FFF;">Green on white</span>,
|
||||
|
@@ -7,7 +7,7 @@ value is used for. This means decentralized configuration declarations that
|
||||
are nevertheless error checking and a centralized configuration object.
|
||||
|
||||
Directives are divided into namespaces, indicating the major portion of
|
||||
functionality they cover (although there may be overlaps. Please consult
|
||||
functionality they cover (although there may be overlaps). Please consult
|
||||
the documentation in ConfigDef for more information on these namespaces.
|
||||
|
||||
Since configuration is dependant on context, internal classes require a
|
||||
@@ -36,4 +36,5 @@ the definition, you'd have to force reconstruction.
|
||||
|
||||
In practice, the pulling directives from the config object are
|
||||
solely need-based, and the flex points are littered throughout the
|
||||
setup() function. Some sort of refactoring is likely in order.
|
||||
setup() function. Some sort of refactoring is likely in order. See
|
||||
ref-xhtml-1.1.txt for more info.
|
||||
|
@@ -1,42 +1,6 @@
|
||||
We are going to model our I18N/L10N off of MediaWiki's system. Their's is
|
||||
obviously quite complicated, so we're going to simplify it a bit for our needs.
|
||||
|
||||
== Structure ==
|
||||
|
||||
First, you have a Language object. This object contains all the localisable
|
||||
message strings, as well as other important language-specific settings and
|
||||
custom behavior (uppercasing, lowercasing, printing dates, formatting
|
||||
numbers, etc.)
|
||||
|
||||
The object is constructed from two sources: subclassed versions of itself
|
||||
(classes) and Message files (messages).
|
||||
|
||||
== General use ==
|
||||
|
||||
You load a language object by calling the Language::factory() function.
|
||||
This function the class file for the object (taking in account fallback
|
||||
languages by using the fallback langauge's object but overloading the
|
||||
language key) and returns that object. Nothing else happens.
|
||||
|
||||
When a message/etc is requested, a lazy load initializor is called. Now the
|
||||
real work starts. We're first going to take the scenario that the language
|
||||
is not cached. The system loads the Messages file by:
|
||||
|
||||
require( $filename );
|
||||
$cache = compact( self::$mLocalisationKeys );
|
||||
|
||||
...where self::$mLocalisationKeys is the name of variables that could be used
|
||||
in the localization file. This lets you use things like:
|
||||
|
||||
$fallback = false;
|
||||
$rtl = false;
|
||||
|
||||
...and easily siphon them into arrays.
|
||||
|
||||
Then, we load the $fallback language (if not set, English) to fill in the gaps in
|
||||
the messages. There is specialized behavior for certain keys, as they can be
|
||||
mergeable maps, lists or alias lists (not sure what the last one is).
|
||||
|
||||
== Caching ==
|
||||
|
||||
MediaWiki has lots of caching mechanisms built in, which make the code somewhat
|
||||
|
@@ -15,7 +15,7 @@
|
||||
|
||||
<div id="filing">Filed under Reference</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>Many thanks to the DevNetwork community for answering questions,
|
||||
theorizing about design, and offering encouragement during
|
||||
|
@@ -32,6 +32,6 @@ A tag's attribute 'target' (for selecting frames) cut
|
||||
current behavior: no substitute, just delete when in strict, allow in loose
|
||||
Attribute 'name' deprecated in favor of 'id'
|
||||
current behavior: dropped silently
|
||||
projected behavior: create proper AttrTransform (currently not allowed at all)
|
||||
projected behavior: create proper AttrTransform
|
||||
[done] PRE tag allows SUB/SUP? (strict dtd comment vs syntax, loose disallows)
|
||||
current behavior: disallow as usual
|
||||
|
@@ -1,21 +1,187 @@
|
||||
|
||||
Getting XHTML 1.1 Working
|
||||
|
||||
It's quite simple, according to <http://www.w3.org/TR/xhtml11/changes.html>
|
||||
XHTML 1.1 and HTML Purifier
|
||||
|
||||
Todo for XHTML 1.1 support <http://www.w3.org/TR/xhtml11/changes.html>
|
||||
1. Scratch lang entirely in favor of xml:lang
|
||||
2. Scratch name entirely in favor of id (partially-done)
|
||||
3. Support Ruby <http://www.w3.org/TR/2001/REC-ruby-20010531/>
|
||||
|
||||
...but that's only an informative section. More things to do:
|
||||
HTML Purifier uses the modularization of XHTML
|
||||
<http://www.w3.org/TR/xhtml-modularization/> to organize the internals
|
||||
of HTMLDefinition into a more manageable and extensible fashion. Rather
|
||||
than have one super-object, HTMLDefinition is split into HTMLModules,
|
||||
each of which are responsible for defining elements, their attributes,
|
||||
and other properties (for a more indepth coverage, see
|
||||
/library/HTMLPurifier/HTMLModule.php's docblock comments).
|
||||
|
||||
1. Scratch style attribute (it's deprecated)
|
||||
2. Be module-aware (this might entail intelligent grouping in the definition
|
||||
and allowing users to specifically remove certain modules (see 5))
|
||||
3. Cross-reference minimal content models with existing DTDs and determine
|
||||
changes (todo)
|
||||
4. Watch out for the Legacy Module
|
||||
<http://www.w3.org/TR/2001/REC-xhtml-modularization-20010410/abstract_modules.html#s_legacymodule>
|
||||
5. Let users specify their own custom modules
|
||||
6. Study Modularization document
|
||||
<http://www.w3.org/TR/2001/REC-xhtml-modularization-20010410/>
|
||||
The modules that W3C defines and we support are:
|
||||
|
||||
* 5.1. Attribute Collections (technically not a module
|
||||
* 5.2. Core Modules
|
||||
o 5.2.2. Text Module
|
||||
o 5.2.3. Hypertext Module
|
||||
o 5.2.4. List Module
|
||||
* 5.4. Text Extension Modules
|
||||
o 5.4.1. Presentation Module
|
||||
o 5.4.2. Edit Module
|
||||
o 5.4.3. Bi-directional Text Module
|
||||
* 5.6. Table Modules
|
||||
o 5.6.2. Tables Module
|
||||
* 5.7. Image Module
|
||||
* 5.18. Style Attribute Module
|
||||
|
||||
Modules that we don't support but coul support are:
|
||||
|
||||
* 5.6. Table Modules
|
||||
o 5.6.1. Basic Tables Module [?]
|
||||
* 5.8. Client-side Image Map Module [?]
|
||||
* 5.9. Server-side Image Map Module [?]
|
||||
* 5.12. Target Module [?]
|
||||
* 5.21. Name Identification Module [deprecated]
|
||||
* 5.22. Legacy Module [deprecated]
|
||||
|
||||
These modules will not be implemented due to their dangerousness or
|
||||
inapplicability as an XHTML fragment:
|
||||
|
||||
* 5.2. Core Modules
|
||||
o 5.2.1. Structure Module
|
||||
* 5.3. Applet Module
|
||||
* 5.5. Forms Modules
|
||||
o 5.5.1. Basic Forms Module
|
||||
o 5.5.2. Forms Module
|
||||
* 5.10. Object Module
|
||||
* 5.11. Frames Module
|
||||
* 5.13. Iframe Module
|
||||
* 5.14. Intrinsic Events Module
|
||||
* 5.15. Metainformation Module
|
||||
* 5.16. Scripting Module
|
||||
* 5.17. Style Sheet Module
|
||||
* 5.19. Link Module
|
||||
* 5.20. Base Module
|
||||
|
||||
We will not be using W3C's XML Schemas or DTDs directly due to the lack
|
||||
of robust tools for handling them (the main problem is that all the
|
||||
current parsers are usually PHP 5 only and solely-validating, not
|
||||
correcting).
|
||||
|
||||
The abstraction of the HTMLDefinition creation process will also
|
||||
contribute to a need for a caching system. Cache invalidation would be
|
||||
difficult, but could be done by comparing the HTML and Attr config
|
||||
namespaces with a copy that was packaged along with the serialized
|
||||
HTMLDefinition object.
|
||||
|
||||
== General Use-Case ==
|
||||
|
||||
The outwards API of HTMLDefinition has been largely preserved, not
|
||||
only for backwards-compatibility but also by design. Instead,
|
||||
HTMLDefinition can be retrieved "raw", in which it loads a structure
|
||||
that closely resembles the modules of XHTML 1.1. This structure is very
|
||||
dynamic, making it easy to make cascading changes to global content
|
||||
sets or remove elements in bulk.
|
||||
|
||||
However, once HTML Purifier needs the actual definition, it retrieves
|
||||
a finalized version of HTMLDefinition. The finalized definition involves
|
||||
processing the modules into a form that it is optimized for multiple
|
||||
calls. This final version is immutable and, even if editable, would
|
||||
be extremely hard to change.
|
||||
|
||||
So, some code taking advantage of the XHTML modularization may look
|
||||
like this:
|
||||
|
||||
<?php
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$def =& $config->getHTMLDefinition(true); // reference to raw
|
||||
unset($def->modules['Hypertext']); // rm ''a'' link
|
||||
$purifier = new HTMLPurifier($config);
|
||||
$purifier->purify($html); // now the definition is finalized
|
||||
?>
|
||||
|
||||
== Inclusions ==
|
||||
|
||||
One of the nice features of HTMLDefinition is that piggy-backing off
|
||||
of global attribute and content sets is extremely easy to do.
|
||||
|
||||
=== Attributes ===
|
||||
|
||||
HTMLModule->elements[$element]->attr stores attribute information for the
|
||||
specific attributes of $element. This is quite close to the final
|
||||
API that HTML Purifier interfaces with, but there's an important
|
||||
extra feature: attr may also contain a array with a member index zero.
|
||||
|
||||
<?php
|
||||
HTMLModule->elements[$element]->attr[0] = array('AttrSet');
|
||||
?>
|
||||
|
||||
Rather than map the attribute key 0 to an array (which should be
|
||||
an AttrDef), it defines a number of attribute collections that should
|
||||
be merged into this elements attribute array.
|
||||
|
||||
Furthermore, the value of an attribute key, attribute value pair need
|
||||
not be a fully fledged AttrDef object. They can also be a string, which
|
||||
signifies a AttrDef that is looked up from a centralized registry
|
||||
AttrTypes. This allows more concise attribute definitions that look
|
||||
more like W3C's declarations, as well as offering a centralized point
|
||||
for modifying the behavior of one attribute type. And, of course, the
|
||||
old method of manually instantiating an AttrDef still works.
|
||||
|
||||
=== Attribute Collections ===
|
||||
|
||||
Attribute collections are stored and processed in the AttrCollections
|
||||
object, which is responsible for performing the inclusions signified
|
||||
by the 0 index. These attribute collections, too, are mutable, by
|
||||
using HTMLModule->attr_collections. You may add new attributes
|
||||
to a collection or define an entirely new collection for your module's
|
||||
use. Inclusions can also be cumulative.
|
||||
|
||||
Attribute collections allow us to get rid of so called "global attributes"
|
||||
(which actually aren't so global).
|
||||
|
||||
=== Content Models and ChildDef ===
|
||||
|
||||
An implementation of the above-mentioned attributes and attribute
|
||||
collections was applied to the ChildDef system. HTML Purifier uses
|
||||
a proprietary system called ChildDef for performance and flexibility
|
||||
reasons, but this does not line up very well with W3C's notion of
|
||||
regexps for defining the allowed children of an element.
|
||||
|
||||
HTMLPurifier->elements[$element]->content_model and
|
||||
HTMLPurifier->elements[$element]->content_model_type store information
|
||||
about the final ChildDef that will be stored in
|
||||
HTMLPurifier->elements[$element]->child (we use a different variable
|
||||
because the two forms are sufficiently different).
|
||||
|
||||
$content_model is an abstract, string representation of the internal
|
||||
state of ChildDef, while $content_model_type is a string identifier
|
||||
of which ChildDef subclass to instantiate. $content_model is processed
|
||||
by substituting all content set identifiers (capitalized element names)
|
||||
with their contents. It is then parsed and passed into the appropriate
|
||||
ChildDef class, as defined by the ContentSets->getChildDef() or the
|
||||
custom fallback HTMLModule->getChildDef() for custom child definitions
|
||||
not in the core.
|
||||
|
||||
You'll need to use these facilities if you plan on referencing a content
|
||||
set like "Inline" or "Block", and using them is recommended even if you're
|
||||
not due to their conciseness.
|
||||
|
||||
A few notes on $content_model: it's structure can be as complicated
|
||||
as you want, but the pipe symbol (|) is reserved for defining possible
|
||||
choices, due to the content sets implementation. For example, a content
|
||||
model that looks like:
|
||||
|
||||
"Inline -> Block -> a"
|
||||
|
||||
...when the Inline content set is defined as "span | b" and the Block
|
||||
content set is defined as "div | blockquote", will expand into:
|
||||
|
||||
"span | b -> div | blockquote -> a"
|
||||
|
||||
The custom HTMLModule->getChildDef() function will need to be able to
|
||||
then feed this information to ChildDef in a usable manner.
|
||||
|
||||
=== Content Sets ===
|
||||
|
||||
Content sets can be altered using HTMLModule->content_sets, an associative
|
||||
array of content set names to content set contents. If the content set
|
||||
already exists, your values are appended on to it (great for, say,
|
||||
registering the font tag as an inline element), otherwise it is
|
||||
created. They are substituted into content_model.
|
8
docs/specimens/LICENSE
Normal file
8
docs/specimens/LICENSE
Normal file
@@ -0,0 +1,8 @@
|
||||
Licensing of Specimens
|
||||
|
||||
Some files in this directory have different licenses:
|
||||
|
||||
windows-live-mail-desktop-beta.html - donated by laacz, public domain
|
||||
img.png - LGPL, from <http://commons.wikimedia.org/wiki/Image:Pastille_chrome.png>
|
||||
|
||||
All other files are by me, and are licensed under LGPL.
|
165
docs/specimens/html-align-to-css.html
Normal file
165
docs/specimens/html-align-to-css.html
Normal file
@@ -0,0 +1,165 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
|
||||
"http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>HTML align attribute to CSS - HTML Purifier Specimen</title>
|
||||
<style type="text/css">
|
||||
div.container {position:relative;height:110px;}
|
||||
div.container.legend .test {text-align:center;line-height:100px;}
|
||||
div.test {width:100px;height:100px;border:1px solid black;
|
||||
position:absolute;top:10px;}
|
||||
div.test.html {left:10px;}
|
||||
div.test.css {left:140px;}
|
||||
table {background:#F00;}
|
||||
img {border:1px solid #000;}
|
||||
hr {width:50px;}
|
||||
div.segment {width:250px; float:left; margin-top:1em;}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>HTML align attribute to CSS</h1>
|
||||
|
||||
<p>Inspect source for methodology.</p>
|
||||
|
||||
<div class="container legend">
|
||||
<div class="test html">
|
||||
HTML
|
||||
</div>
|
||||
<div class="test css">
|
||||
CSS
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="segment">
|
||||
|
||||
<h2>table.align</h2>
|
||||
|
||||
<h3>left</h3>
|
||||
<div class="container">
|
||||
<div class="test html">
|
||||
a<table align="left"><tr><td>O</td></tr></table>a
|
||||
</div>
|
||||
<div class="test css">
|
||||
a<table style="float:left;"><tr><td>O</td></tr></table>a
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h3>center</h3>
|
||||
<div class="container">
|
||||
<div class="test html">
|
||||
a<table align="center"><tr><td>O</td></tr></table>a
|
||||
</div>
|
||||
<div class="test css">
|
||||
a<table style="margin-left:auto; margin-right:auto;"><tr><td>O</td></tr></table>a
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h3>right</h3>
|
||||
<div class="container">
|
||||
<div class="test html">
|
||||
a<table align="right"><tr><td>O</td></tr></table>a
|
||||
</div>
|
||||
<div class="test css">
|
||||
a<table style="float:right;"><tr><td>O</td></tr></table>a
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<!-- ################################################################## -->
|
||||
|
||||
<div class="segment">
|
||||
<h2>img.align</h2>
|
||||
<h3>left</h3>
|
||||
<div class="container">
|
||||
<div class="test html">
|
||||
a<img src="img.png" align="left">a
|
||||
</div>
|
||||
<div class="test css">
|
||||
a<img src="img.png" style="float:left;">a
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h3>right</h3>
|
||||
<div class="container">
|
||||
<div class="test html">
|
||||
a<img src="img.png" align="right">a
|
||||
</div>
|
||||
<div class="test css">
|
||||
a<img src="img.png" style="float:right;">a
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h3>bottom</h3>
|
||||
<div class="container">
|
||||
<div class="test html">
|
||||
a<img src="img.png" align="bottom">a
|
||||
</div>
|
||||
<div class="test css">
|
||||
a<img src="img.png" style="vertical-align:baseline;">a
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h3>middle</h3>
|
||||
<div class="container">
|
||||
<div class="test html">
|
||||
a<img src="img.png" align="middle">a
|
||||
</div>
|
||||
<div class="test css">
|
||||
a<img src="img.png" style="vertical-align:middle;">a
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h3>top</h3>
|
||||
<div class="container">
|
||||
<div class="test html">
|
||||
a<img src="img.png" align="top">a
|
||||
</div>
|
||||
<div class="test css">
|
||||
a<img src="img.png" style="vertical-align:top;">a
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<!-- ################################################################## -->
|
||||
|
||||
<div class="segment">
|
||||
|
||||
<h2>hr.align</h2>
|
||||
|
||||
<h3>left</h3>
|
||||
<div class="container">
|
||||
<div class="test html">
|
||||
<hr align="left" />
|
||||
</div>
|
||||
<div class="test css">
|
||||
<hr style="margin-right:auto; margin-left:0; text-align:left;" />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h3>center</h3>
|
||||
<div class="container">
|
||||
<div class="test html">
|
||||
<hr align="center" />
|
||||
</div>
|
||||
<div class="test css">
|
||||
<hr style="margin-right:auto; margin-left:auto; text-align:center;" />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h3>right</h3>
|
||||
<div class="container">
|
||||
<div class="test html">
|
||||
<hr align="right" />
|
||||
</div>
|
||||
<div class="test css">
|
||||
<hr style="margin-right:0; margin-left:auto; text-align:right;" />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
BIN
docs/specimens/img.png
Normal file
BIN
docs/specimens/img.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.1 KiB |
74
docs/specimens/windows-live-mail-desktop-beta.html
Normal file
74
docs/specimens/windows-live-mail-desktop-beta.html
Normal file
@@ -0,0 +1,74 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<HTML ChildAreas="4" xmlns:canvas><HEAD>
|
||||
<META http-equiv=Content-Type content=text/html;charset=windows-1257>
|
||||
<STYLE></STYLE>
|
||||
|
||||
<META content="MSHTML 6.00.6000.16414" name=GENERATOR></HEAD>
|
||||
<BODY id=MailContainerBody
|
||||
style="PADDING-RIGHT: 10px; PADDING-LEFT: 10px; FONT-SIZE: 10pt; COLOR: #000000; PADDING-TOP: 15px; FONT-FAMILY: Arial"
|
||||
bgColor=#ff6600 leftMargin=0 background="" topMargin=0
|
||||
name="Compose message area" acc_role="text" CanvasTabStop="false">
|
||||
<DIV
|
||||
style="BORDER-TOP: #dddddd 1px solid; FONT-SIZE: 10pt; WIDTH: 100%; MARGIN-RIGHT: 10px; PADDING-TOP: 5px; BORDER-BOTTOM: #dddddd 1px solid; FONT-FAMILY: Verdana; HEIGHT: 25px; BACKGROUND-COLOR: #ffffff"><NOBR><SPAN
|
||||
title="View a slideshow of the pictures in this e-mail message."
|
||||
style="PADDING-RIGHT: 20px"><A style="COLOR: #0088e4"
|
||||
href="http://g.msn.com/5meen_us/171?path=/photomail/{6fc0065f-ffdd-4ca6-9a4c-cc5a93dc122f}&image=47D7B182CFEFB10!127&imagehi=47D7B182CFEFB10!125&CID=323550092004883216">Play
|
||||
slideshow </A></SPAN><SPAN style="COLOR: #909090"><SPAN>|</SPAN><SPAN
|
||||
style="PADDING-LEFT: 20px"> Download the highest quality version of a picture by
|
||||
clicking the + above it </SPAN></SPAN></NOBR></DIV>
|
||||
<DIV
|
||||
style="PADDING-RIGHT: 5px; PADDING-LEFT: 7px; PADDING-BOTTOM: 2px; WIDTH: 100%; PADDING-TOP: 2px">
|
||||
<OL>
|
||||
<LI><IMG title="Angry smile emoticon"
|
||||
style="FLOAT: none; MARGIN: 0px; POSITION: static" tabIndex=-1
|
||||
alt="Angry smile emoticon" src="cid:49F0C856199E4D688D2D740680733D74@wc"
|
||||
MSNNonUserImageOrEmoticon="true">Un ka <FONT style="BACKGROUND-COLOR: #800000"
|
||||
color=#cc99ff><STRONG>Tev</STRONG></FONT> iet, un ko tu dari?
|
||||
<LI>Aha!</LI></OL>
|
||||
|
||||
<UL>
|
||||
<LI>Buletets
|
||||
<LI>
|
||||
<DIV align=justify><A title=http://laacz.lv/blog/
|
||||
href="http://laacz.lv/blog/">http://laacz.lv/blog/</A> un <A
|
||||
title=http://google.com/ href="http://google.com/">gugle</A></DIV>
|
||||
<LI>Sarakstucitis</LI></UL></DIV><SPAN><SPAN xmlns:canvas="canvas-namespace-id"
|
||||
layoutEmptyTextWellFont="Tahoma"><SPAN
|
||||
style="MARGIN-BOTTOM: 15px; OVERFLOW: visible; HEIGHT: 16px"></SPAN><SPAN
|
||||
style="MARGIN-BOTTOM: 25px; VERTICAL-ALIGN: top; OVERFLOW: visible; MARGIN-RIGHT: 25px; HEIGHT: 234px">
|
||||
<TABLE style="DISPLAY: inline">
|
||||
<TBODY>
|
||||
<TR>
|
||||
|
||||
<TD>
|
||||
<DIV
|
||||
style="FONT-WEIGHT: bold; FONT-SIZE: 12pt; FONT-FAMILY: arial; TEXT-ALIGN: center"><A
|
||||
id=HiresARef
|
||||
title="Click here to view or download a high resolution version of this picture"
|
||||
style="COLOR: #0088e4; TEXT-DECORATION: none"
|
||||
href="http://byfiles.storage.msn.com/x1pMvt0I80jTgT6DuaCpEMbprX3nk3jNv_vjigxV_EYVSMyM_PKgEvDEUtuNhQC-F-23mTTcKyqx6eGaeK2e_wMJ0ikwpDdFntk4SY7pfJUv2g2Ck6R2S2vAA?download">+</A></DIV>
|
||||
<DIV
|
||||
title="Click here to view the full image using the online photo viewer."
|
||||
style="DISPLAY: inline; OVERFLOW: hidden; WIDTH: 140px; HEIGHT: 140px"><A
|
||||
href="http://g.msn.com/5meen_us/171?path=/photomail/{6fc0065f-ffdd-4ca6-9a4c-cc5a93dc122f}&image=47D7B182CFEFB10!127&imagehi=47D7B182CFEFB10!125&CID=323550092004883216"
|
||||
border="0"><IMG
|
||||
style="MARGIN-TOP: 15px; DISPLAY: inline-block; MARGIN-LEFT: 0px"
|
||||
height=109 src="cid:006A71303B80404E9FB6184E55D6A446@wc" width=140
|
||||
border=0></A></DIV></TD></TR>
|
||||
<TR>
|
||||
<TD>
|
||||
<DIV
|
||||
style="FONT-SIZE: 10pt; WIDTH: 140px; FONT-FAMILY: verdana; TEXT-ALIGN: center"><EM><STRONG>This
|
||||
<U>is </U></STRONG><U>tit</U>le</EM> fo<STRONG>r <FONT
|
||||
face="Arial Black">t<FONT color=#800000 size=7>h<U>i</U></FONT>s
|
||||
</FONT>picture</STRONG></DIV></TD></TR></TBODY></TABLE></SPAN></SPAN></SPAN>
|
||||
|
||||
<DIV
|
||||
style="PADDING-RIGHT: 5px; PADDING-LEFT: 7px; PADDING-BOTTOM: 2px; WIDTH: 100%; PADDING-TOP: 2px; HEIGHT: 50px">
|
||||
<DIV> </DIV></DIV>
|
||||
<DIV
|
||||
style="BORDER-TOP: #dddddd 1px solid; FONT-SIZE: 10pt; MARGIN-BOTTOM: 10px; WIDTH: 100%; COLOR: #909090; MARGIN-RIGHT: 10px; PADDING-TOP: 9px; FONT-FAMILY: Verdana; HEIGHT: 42px; BACKGROUND-COLOR: #ffffff"><NOBR><SPAN
|
||||
title="Join Windows Live to share photos using Windows Live Photo E-mail.">Online
|
||||
pictures are available for 30 days. <A style="COLOR: #0088e4"
|
||||
href="http://g.msn.com/5meen_us/175">Get Windows Live Mail desktop to create
|
||||
your own photo e-mails. </A></SPAN></NOBR></DIV></BODY></HTML>
|
@@ -42,3 +42,27 @@ blockquote .label {font-weight:bold; font-size:1em; margin:0 0 .1em;
|
||||
|
||||
/* Contains, without exception, $Id$, for SVN version info. */
|
||||
#version {text-align:right; font-style:italic; margin:2em 0;}
|
||||
|
||||
#toc ol ol {list-style-type:lower-roman;}
|
||||
#toc ol {list-style-type:decimal;}
|
||||
#toc {list-style-type:upper-alpha;}
|
||||
|
||||
q {
|
||||
behavior: url(fixquotes.htc); /* IE fix */
|
||||
quotes: '\201C' '\201D' '\2018' '\2019';
|
||||
}
|
||||
q:before {
|
||||
content: open-quote;
|
||||
}
|
||||
q:after {
|
||||
content: close-quote;
|
||||
}
|
||||
|
||||
/* Marks off implementation details interesting only to the person writing
|
||||
the class described in the spec. */
|
||||
.technical {margin-left:2em; }
|
||||
.technical:before {content:"Technical note: "; font-weight:bold; color:#061; }
|
||||
|
||||
/* Marks off sections that are lacking. */
|
||||
.fixme {margin-left:2em; }
|
||||
.fixme:before {content:"Fix me: "; font-weight:bold; color:#C00; }
|
||||
|
@@ -22,7 +22,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
HTML Purifier 1.4.0 - Standards Compliant HTML Filtering
|
||||
HTML Purifier 1.6.1 - Standards Compliant HTML Filtering
|
||||
Copyright (C) 2006 Edward Z. Yang
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
@@ -64,7 +64,7 @@ require_once 'HTMLPurifier/Encoder.php';
|
||||
class HTMLPurifier
|
||||
{
|
||||
|
||||
var $version = '1.4.0';
|
||||
var $version = '1.6.1';
|
||||
|
||||
var $config;
|
||||
var $filters;
|
||||
|
100
library/HTMLPurifier/AttrCollections.php
Normal file
100
library/HTMLPurifier/AttrCollections.php
Normal file
@@ -0,0 +1,100 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTypes.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Lang.php';
|
||||
|
||||
/**
|
||||
* Defines common attribute collections that modules reference
|
||||
*/
|
||||
|
||||
class HTMLPurifier_AttrCollections
|
||||
{
|
||||
|
||||
/**
|
||||
* Associative array of attribute collections, indexed by name
|
||||
* @note Technically, the composition of these is more complicated,
|
||||
* but we bypass it using our own excludes property
|
||||
*/
|
||||
var $info = array();
|
||||
|
||||
/**
|
||||
* Performs all expansions on internal data for use by other inclusions
|
||||
* It also collects all attribute collection extensions from
|
||||
* modules
|
||||
* @param $attr_types HTMLPurifier_AttrTypes instance
|
||||
* @param $modules Hash array of HTMLPurifier_HTMLModule members
|
||||
*/
|
||||
function HTMLPurifier_AttrCollections($attr_types, $modules) {
|
||||
$info =& $this->info;
|
||||
// load extensions from the modules
|
||||
foreach ($modules as $module) {
|
||||
foreach ($module->attr_collections as $coll_i => $coll) {
|
||||
foreach ($coll as $attr_i => $attr) {
|
||||
if ($attr_i === 0 && isset($info[$coll_i][$attr_i])) {
|
||||
// merge in includes
|
||||
$info[$coll_i][$attr_i] = array_merge(
|
||||
$info[$coll_i][$attr_i], $attr);
|
||||
continue;
|
||||
}
|
||||
$info[$coll_i][$attr_i] = $attr;
|
||||
}
|
||||
}
|
||||
}
|
||||
// perform internal expansions and inclusions
|
||||
foreach ($info as $name => $attr) {
|
||||
// merge attribute collections that include others
|
||||
$this->performInclusions($info[$name]);
|
||||
// replace string identifiers with actual attribute objects
|
||||
$this->expandIdentifiers($info[$name], $attr_types);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a reference to an attribute associative array and performs
|
||||
* all inclusions specified by the zero index.
|
||||
* @param &$attr Reference to attribute array
|
||||
*/
|
||||
function performInclusions(&$attr) {
|
||||
if (!isset($attr[0])) return;
|
||||
$merge = $attr[0];
|
||||
// loop through all the inclusions
|
||||
for ($i = 0; isset($merge[$i]); $i++) {
|
||||
// foreach attribute of the inclusion, copy it over
|
||||
foreach ($this->info[$merge[$i]] as $key => $value) {
|
||||
if (isset($attr[$key])) continue; // also catches more inclusions
|
||||
$attr[$key] = $value;
|
||||
}
|
||||
if (isset($info[$merge[$i]][0])) {
|
||||
// recursion
|
||||
$merge = array_merge($merge, isset($info[$merge[$i]][0]));
|
||||
}
|
||||
}
|
||||
unset($attr[0]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expands all string identifiers in an attribute array by replacing
|
||||
* them with the appropriate values inside HTMLPurifier_AttrTypes
|
||||
* @param &$attr Reference to attribute array
|
||||
* @param $attr_types HTMLPurifier_AttrTypes instance
|
||||
*/
|
||||
function expandIdentifiers(&$attr, $attr_types) {
|
||||
foreach ($attr as $def_i => $def) {
|
||||
if ($def_i === 0) continue;
|
||||
if (!is_string($def)) continue;
|
||||
if ($def === false) {
|
||||
unset($attr[$def_i]);
|
||||
continue;
|
||||
}
|
||||
if (isset($attr_types->info[$def])) {
|
||||
$attr[$def_i] = $attr_types->info[$def];
|
||||
} else {
|
||||
trigger_error('Attempted to reference undefined attribute type', E_USER_ERROR);
|
||||
unset($attr[$def_i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -7,7 +7,7 @@ require_once 'HTMLPurifier/CSSDefinition.php';
|
||||
* Validates shorthand CSS property background.
|
||||
* @warning Does not support url tokens that have internal spaces.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Background extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -16,7 +16,7 @@ class HTMLPurifier_AttrDef_Background extends HTMLPurifier_AttrDef
|
||||
*/
|
||||
var $info;
|
||||
|
||||
function HTMLPurifier_AttrDef_Background($config) {
|
||||
function HTMLPurifier_AttrDef_CSS_Background($config) {
|
||||
$def = $config->getCSSDefinition();
|
||||
$this->info['background-color'] = $def->info['background-color'];
|
||||
$this->info['background-image'] = $def->info['background-image'];
|
@@ -1,8 +1,8 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSSLength.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Percentage.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Length.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Percentage.php';
|
||||
|
||||
/* W3C says:
|
||||
[ // adjective and number must be in correct order, even if
|
||||
@@ -45,15 +45,15 @@ require_once 'HTMLPurifier/AttrDef/Percentage.php';
|
||||
/**
|
||||
* Validates the value of background-position.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_BackgroundPosition extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
var $length;
|
||||
var $percentage;
|
||||
|
||||
function HTMLPurifier_AttrDef_BackgroundPosition() {
|
||||
$this->length = new HTMLPurifier_AttrDef_CSSLength();
|
||||
$this->percentage = new HTMLPurifier_AttrDef_Percentage();
|
||||
function HTMLPurifier_AttrDef_CSS_BackgroundPosition() {
|
||||
$this->length = new HTMLPurifier_AttrDef_CSS_Length();
|
||||
$this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage();
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
@@ -5,7 +5,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
/**
|
||||
* Validates the border property as defined by CSS.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Border extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -13,7 +13,7 @@ class HTMLPurifier_AttrDef_Border extends HTMLPurifier_AttrDef
|
||||
*/
|
||||
var $info = array();
|
||||
|
||||
function HTMLPurifier_AttrDef_Border($config) {
|
||||
function HTMLPurifier_AttrDef_CSS_Border($config) {
|
||||
$def = $config->getCSSDefinition();
|
||||
$this->info['border-width'] = $def->info['border-width'];
|
||||
$this->info['border-style'] = $def->info['border-style'];
|
@@ -5,7 +5,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
/**
|
||||
* Validates Color as defined by CSS.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Color extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
@@ -9,7 +9,7 @@
|
||||
* especially useful for CSS values, which often are a choice between
|
||||
* an enumerated set of predefined values or a flexible data type.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Composite extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -21,7 +21,7 @@ class HTMLPurifier_AttrDef_Composite extends HTMLPurifier_AttrDef
|
||||
/**
|
||||
* @param $defs List of HTMLPurifier_AttrDef objects
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_Composite($defs) {
|
||||
function HTMLPurifier_AttrDef_CSS_Composite($defs) {
|
||||
$this->defs = $defs;
|
||||
}
|
||||
|
@@ -5,7 +5,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
/**
|
||||
* Validates shorthand CSS property font.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Font extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -30,7 +30,7 @@ class HTMLPurifier_AttrDef_Font extends HTMLPurifier_AttrDef
|
||||
'status-bar' => true
|
||||
);
|
||||
|
||||
function HTMLPurifier_AttrDef_Font($config) {
|
||||
function HTMLPurifier_AttrDef_CSS_Font($config) {
|
||||
$def = $config->getCSSDefinition();
|
||||
$this->info['font-style'] = $def->info['font-style'];
|
||||
$this->info['font-variant'] = $def->info['font-variant'];
|
@@ -7,7 +7,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
/**
|
||||
* Validates a font family list according to CSS spec
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_FontFamily extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
@@ -1,13 +1,12 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Number.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Number.php';
|
||||
|
||||
/**
|
||||
* Represents a Length as defined by CSS.
|
||||
* @warning Be sure not to confuse this with HTMLPurifier_AttrDef_Length!
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSSLength extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -26,8 +25,8 @@ class HTMLPurifier_AttrDef_CSSLength extends HTMLPurifier_AttrDef
|
||||
* @param $non_negative Bool indication whether or not negative values are
|
||||
* allowed.
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_CSSLength($non_negative = false) {
|
||||
$this->number_def = new HTMLPurifier_AttrDef_Number($non_negative);
|
||||
function HTMLPurifier_AttrDef_CSS_Length($non_negative = false) {
|
||||
$this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
|
||||
}
|
||||
|
||||
function validate($length, $config, &$context) {
|
@@ -6,16 +6,16 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
* Validates shorthand CSS property list-style.
|
||||
* @warning Does not support url tokens that have internal spaces.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_ListStyle extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Local copy of component validators.
|
||||
* @note See HTMLPurifier_AttrDef_Font::$info for a similar impl.
|
||||
* @note See HTMLPurifier_AttrDef_CSS_Font::$info for a similar impl.
|
||||
*/
|
||||
var $info;
|
||||
|
||||
function HTMLPurifier_AttrDef_ListStyle($config) {
|
||||
function HTMLPurifier_AttrDef_CSS_ListStyle($config) {
|
||||
$def = $config->getCSSDefinition();
|
||||
$this->info['list-style-type'] = $def->info['list-style-type'];
|
||||
$this->info['list-style-position'] = $def->info['list-style-position'];
|
@@ -13,7 +13,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
* can only be used alone: it will never manifest as part of a multi
|
||||
* shorthand declaration. Thus, this class does not allow inherit.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Multiple extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -30,7 +30,7 @@ class HTMLPurifier_AttrDef_Multiple extends HTMLPurifier_AttrDef
|
||||
* @param $single HTMLPurifier_AttrDef to multiply
|
||||
* @param $max Max number of values allowed (usually four)
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_Multiple($single, $max = 4) {
|
||||
function HTMLPurifier_AttrDef_CSS_Multiple($single, $max = 4) {
|
||||
$this->single = $single;
|
||||
$this->max = $max;
|
||||
}
|
@@ -3,7 +3,7 @@
|
||||
/**
|
||||
* Validates a number as defined by the CSS spec.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Number extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -14,7 +14,7 @@ class HTMLPurifier_AttrDef_Number extends HTMLPurifier_AttrDef
|
||||
/**
|
||||
* @param $non_negative Bool indicating whether negatives are forbidden
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_Number($non_negative = false) {
|
||||
function HTMLPurifier_AttrDef_CSS_Number($non_negative = false) {
|
||||
$this->non_negative = $non_negative;
|
||||
}
|
||||
|
@@ -1,24 +1,24 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Number.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Number.php';
|
||||
|
||||
/**
|
||||
* Validates a Percentage as defined by the CSS spec.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Percentage extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Instance of HTMLPurifier_AttrDef_Number to defer number validation
|
||||
* Instance of HTMLPurifier_AttrDef_CSS_Number to defer number validation
|
||||
*/
|
||||
var $number_def;
|
||||
|
||||
/**
|
||||
* @param Bool indicating whether to forbid negative values
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_Percentage($non_negative = false) {
|
||||
$this->number_def = new HTMLPurifier_AttrDef_Number($non_negative);
|
||||
function HTMLPurifier_AttrDef_CSS_Percentage($non_negative = false) {
|
||||
$this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
@@ -7,7 +7,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
* @note This class could be generalized into a version that acts sort of
|
||||
* like Enum except you can compound the allowed values.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_TextDecoration extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
@@ -4,17 +4,17 @@ require_once 'HTMLPurifier/AttrDef/URI.php';
|
||||
|
||||
/**
|
||||
* Validates a URI in CSS syntax, which uses url('http://example.com')
|
||||
* @note While theoretically speaking we a URI in a CSS document could
|
||||
* @note While theoretically speaking a URI in a CSS document could
|
||||
* be non-embedded, as of CSS2 there is no such usage so we're
|
||||
* generalizing it. This may need to be changed in the future.
|
||||
* @warning Since HTMLPurifier_AttrDef_CSS blindly uses semicolons as
|
||||
* the separator, you cannot put a literal semicolon in
|
||||
* in the URI. Try percent encoding it, in that case.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSSURI extends HTMLPurifier_AttrDef_URI
|
||||
class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
|
||||
{
|
||||
|
||||
function HTMLPurifier_AttrDef_CSSURI() {
|
||||
function HTMLPurifier_AttrDef_CSS_URI() {
|
||||
$this->HTMLPurifier_AttrDef_URI(true); // always embedded
|
||||
}
|
||||
|
@@ -5,6 +5,9 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
// Enum = Enumerated
|
||||
/**
|
||||
* Validates a keyword against a list of valid values.
|
||||
* @warning The case-insensitive compare of this function uses PHP's
|
||||
* built-in strtolower and ctype_lower functions, which may
|
||||
* cause problems with international comparisons
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
|
||||
{
|
||||
@@ -25,8 +28,8 @@ class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
|
||||
* @param $case_sensitive Bool indicating whether or not case sensitive
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_Enum(
|
||||
$valid_values = array(), $case_sensitive = false) {
|
||||
|
||||
$valid_values = array(), $case_sensitive = false
|
||||
) {
|
||||
$this->valid_values = array_flip($valid_values);
|
||||
$this->case_sensitive = $case_sensitive;
|
||||
}
|
||||
@@ -34,6 +37,7 @@ class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
|
||||
function validate($string, $config, &$context) {
|
||||
$string = trim($string);
|
||||
if (!$this->case_sensitive) {
|
||||
// we may want to do full case-insensitive libraries
|
||||
$string = ctype_lower($string) ? $string : strtolower($string);
|
||||
}
|
||||
$result = isset($this->valid_values[$string]);
|
||||
|
34
library/HTMLPurifier/AttrDef/HTML/FrameTarget.php
Normal file
34
library/HTMLPurifier/AttrDef/HTML/FrameTarget.php
Normal file
@@ -0,0 +1,34 @@
|
||||
<?php
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'AllowedFrameTargets', array(), 'lookup',
|
||||
'Lookup table of all allowed link frame targets. Some commonly used '.
|
||||
'link targets include _blank, _self, _parent and _top. Values should '.
|
||||
'be lowercase, as validation will be done in a case-sensitive manner '.
|
||||
'despite W3C\'s recommendation. XHTML 1.0 Strict does not permit '.
|
||||
'the target attribute so this directive will have no effect in that '.
|
||||
'doctype. XHTML 1.1 does not enable the Target module by default, you '.
|
||||
'will have to manually enable it (see the module documentation for more details.)'
|
||||
);
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef/Enum.php';
|
||||
|
||||
/**
|
||||
* Special-case enum attribute definition that lazy loads allowed frame targets
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum
|
||||
{
|
||||
|
||||
var $valid_values = false; // uninitialized value
|
||||
var $case_sensitive = false;
|
||||
|
||||
function HTMLPurifier_AttrDef_HTML_FrameTarget() {}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
if ($this->valid_values === false) $this->valid_values = $config->get('Attr', 'AllowedFrameTargets');
|
||||
return parent::validate($string, $config, $context);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -3,6 +3,22 @@
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/IDAccumulator.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'EnableID', false, 'bool',
|
||||
'Allows the ID attribute in HTML. This is disabled by default '.
|
||||
'due to the fact that without proper configuration user input can '.
|
||||
'easily break the validation of a webpage by specifying an ID that is '.
|
||||
'already on the surrounding HTML. If you don\'t mind throwing caution to '.
|
||||
'the wind, enable this directive, but I strongly recommend you also '.
|
||||
'consider blacklisting IDs you use (%Attr.IDBlacklist) or prefixing all '.
|
||||
'user supplied IDs (%Attr.IDPrefix). This directive has been available '.
|
||||
'since 1.2.0, and when set to true reverts to the behavior of pre-1.2.0 '.
|
||||
'versions.'
|
||||
);
|
||||
HTMLPurifier_ConfigSchema::defineAlias(
|
||||
'HTML', 'EnableAttrID', 'Attr', 'EnableID'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'IDPrefix', '', 'string',
|
||||
'String to prefix to IDs. If you have no idea what IDs your pages '.
|
||||
@@ -27,6 +43,14 @@ HTMLPurifier_ConfigSchema::define(
|
||||
'is set to a non-empty value! This directive was available since 1.2.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'IDBlacklistRegexp', null, 'string/null',
|
||||
'PCRE regular expression to be matched against all IDs. If the expression '.
|
||||
'is matches, the ID is rejected. Use this with care: may cause '.
|
||||
'significant degradation. ID matching is done after all other '.
|
||||
'validation. This directive was available since 1.6.0.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Validates the HTML attribute ID.
|
||||
* @warning Even though this is the id processor, it
|
||||
@@ -36,11 +60,16 @@ HTMLPurifier_ConfigSchema::define(
|
||||
* blacklist. If you're hacking around, make sure you use load()!
|
||||
*/
|
||||
|
||||
class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
// ref functionality disabled, since we also have to verify
|
||||
// whether or not the ID it refers to exists
|
||||
|
||||
function validate($id, $config, &$context) {
|
||||
|
||||
if (!$config->get('Attr', 'EnableID')) return false;
|
||||
|
||||
$id = trim($id); // trim it first
|
||||
|
||||
if ($id === '') return false;
|
||||
@@ -55,8 +84,10 @@ class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef
|
||||
'%Attr.IDPrefix is set', E_USER_WARNING);
|
||||
}
|
||||
|
||||
$id_accumulator =& $context->get('IDAccumulator');
|
||||
if (isset($id_accumulator->ids[$id])) return false;
|
||||
//if (!$this->ref) {
|
||||
$id_accumulator =& $context->get('IDAccumulator');
|
||||
if (isset($id_accumulator->ids[$id])) return false;
|
||||
//}
|
||||
|
||||
// we purposely avoid using regex, hopefully this is faster
|
||||
|
||||
@@ -71,7 +102,12 @@ class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef
|
||||
$result = ($trim === '');
|
||||
}
|
||||
|
||||
if ($result) $id_accumulator->add($id);
|
||||
$regexp = $config->get('Attr', 'IDBlacklistRegexp');
|
||||
if ($regexp && preg_match($regexp, $id)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (/*!$this->ref && */$result) $id_accumulator->add($id);
|
||||
|
||||
// if no change was made to the ID, return the result
|
||||
// else, return the new id if stripping whitespace made it
|
@@ -1,18 +1,16 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Pixels.php';
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/Pixels.php';
|
||||
|
||||
/**
|
||||
* Validates the HTML type length (not to be confused with CSS's length).
|
||||
*
|
||||
* This accepts integer pixels or percentages as lengths for certain
|
||||
* HTML attributes. Don't use this for CSS: that's
|
||||
* HTMLPurifier_AttrDef_CSSLength which requires prefixes and allows a lot
|
||||
* more different types.
|
||||
* HTML attributes.
|
||||
*/
|
||||
|
||||
class HTMLPurifier_AttrDef_Length extends HTMLPurifier_AttrDef_Pixels
|
||||
class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
75
library/HTMLPurifier/AttrDef/HTML/LinkTypes.php
Normal file
75
library/HTMLPurifier/AttrDef/HTML/LinkTypes.php
Normal file
@@ -0,0 +1,75 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'AllowedRel', array(), 'lookup',
|
||||
'List of allowed forward document relationships in the rel attribute. '.
|
||||
'Common values may be nofollow or print. By default, this is empty, '.
|
||||
'meaning that no document relationships are allowed. This directive '.
|
||||
'was available since 1.6.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'AllowedRev', array(), 'lookup',
|
||||
'List of allowed reverse document relationships in the rev attribute. '.
|
||||
'This attribute is a bit of an edge-case; if you don\'t know what it '.
|
||||
'is for, stay away. This directive was available since 1.6.0.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Validates a rel/rev link attribute against a directive of allowed values
|
||||
* @note We cannot use Enum because link types allow multiple
|
||||
* values.
|
||||
* @note Assumes link types are ASCII text
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/** Lookup array of attribute names to configuration name */
|
||||
var $configLookup = array(
|
||||
'rel' => 'AllowedRel',
|
||||
'rev' => 'AllowedRev'
|
||||
);
|
||||
|
||||
/** Name config attribute to pull. */
|
||||
var $name;
|
||||
|
||||
function HTMLPurifier_AttrDef_HTML_LinkTypes($name) {
|
||||
if (!isset($this->configLookup[$name])) {
|
||||
trigger_error('Unrecognized attribute name for link '.
|
||||
'relationship.', E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
$this->name = $this->configLookup[$name];
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
$allowed = $config->get('Attr', $this->name);
|
||||
if (empty($allowed)) return false;
|
||||
|
||||
$string = $this->parseCDATA($string);
|
||||
$parts = explode(' ', $string);
|
||||
|
||||
// lookup to prevent duplicates
|
||||
$ret_lookup = array();
|
||||
foreach ($parts as $part) {
|
||||
$part = strtolower(trim($part));
|
||||
if (!isset($allowed[$part])) continue;
|
||||
$ret_lookup[$part] = true;
|
||||
}
|
||||
|
||||
if (empty($ret_lookup)) return false;
|
||||
|
||||
$ret_array = array();
|
||||
foreach ($ret_lookup as $part => $bool) $ret_array[] = $part;
|
||||
$string = implode(' ', $ret_array);
|
||||
|
||||
return $string;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -1,7 +1,7 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Length.php';
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/Length.php';
|
||||
|
||||
/**
|
||||
* Validates a MultiLength as defined by the HTML spec.
|
||||
@@ -9,7 +9,7 @@ require_once 'HTMLPurifier/AttrDef/Length.php';
|
||||
* A multilength is either a integer (pixel count), a percentage, or
|
||||
* a relative number.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_MultiLength extends HTMLPurifier_AttrDef_Length
|
||||
class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
@@ -27,12 +27,14 @@ class HTMLPurifier_AttrDef_MultiLength extends HTMLPurifier_AttrDef_Length
|
||||
|
||||
$int = substr($string, 0, $length - 1);
|
||||
|
||||
if ($int == '') return '*';
|
||||
if (!is_numeric($int)) return false;
|
||||
|
||||
$int = (int) $int;
|
||||
|
||||
if ($int < 0) return '0*';
|
||||
|
||||
if ($int < 0) return false;
|
||||
if ($int == 0) return '0';
|
||||
if ($int == 1) return '*';
|
||||
return ((string) $int) . '*';
|
||||
|
||||
}
|
@@ -4,9 +4,13 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/Config.php';
|
||||
|
||||
/**
|
||||
* Validates the contents of the global HTML attribute class.
|
||||
* Validates contents based on NMTOKENS attribute type.
|
||||
* @note The only current use for this is the class attribute in HTML
|
||||
* @note Could have some functionality factored out into Nmtoken class
|
||||
* @warning We cannot assume this class will be used only for 'class'
|
||||
* attributes. Not sure how to hook in magic behavior, then.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Class extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
@@ -31,10 +35,10 @@ class HTMLPurifier_AttrDef_Class extends HTMLPurifier_AttrDef
|
||||
|
||||
if (empty($matches[1])) return false;
|
||||
|
||||
// reconstruct class string
|
||||
// reconstruct string
|
||||
$new_string = '';
|
||||
foreach ($matches[1] as $class_names) {
|
||||
$new_string .= $class_names . ' ';
|
||||
foreach ($matches[1] as $token) {
|
||||
$new_string .= $token . ' ';
|
||||
}
|
||||
$new_string = rtrim($new_string);
|
||||
|
@@ -5,7 +5,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
/**
|
||||
* Validates an integer representation of pixels according to the HTML spec.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Pixels extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
@@ -46,7 +46,7 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
|
||||
|
||||
// process second subtag : $subtags[1]
|
||||
$length = strlen($subtags[1]);
|
||||
if ($length == 0 || $length == 1 || $length > 8 || !ctype_alnum($subtags[1])) {
|
||||
if ($length == 0 || ($length == 1 && $subtags[1] != 'x') || $length > 8 || !ctype_alnum($subtags[1])) {
|
||||
return $new_string;
|
||||
}
|
||||
if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]);
|
||||
|
@@ -3,7 +3,7 @@
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/URIScheme.php';
|
||||
require_once 'HTMLPurifier/URISchemeRegistry.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Host.php';
|
||||
require_once 'HTMLPurifier/AttrDef/URI/Host.php';
|
||||
require_once 'HTMLPurifier/PercentEncoder.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
@@ -77,6 +77,14 @@ HTMLPurifier_ConfigSchema::define(
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'Disable', false, 'bool',
|
||||
'Disables all URIs in all forms. Not sure why you\'d want to do that '.
|
||||
'(after all, the Internet\'s founded on the notion of a hyperlink). '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable');
|
||||
|
||||
/**
|
||||
* Validates a URI as defined by RFC 3986.
|
||||
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
|
||||
@@ -92,7 +100,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
* @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
|
||||
$this->host = new HTMLPurifier_AttrDef_Host();
|
||||
$this->host = new HTMLPurifier_AttrDef_URI_Host();
|
||||
$this->PercentEncoder = new HTMLPurifier_PercentEncoder();
|
||||
$this->embeds_resource = (bool) $embeds_resource;
|
||||
}
|
||||
@@ -102,6 +110,8 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
// We'll write stack-based parsers later, for now, use regexps to
|
||||
// get things working as fast as possible (irony)
|
||||
|
||||
if ($config->get('URI', 'Disable')) return false;
|
||||
|
||||
// parse as CDATA
|
||||
$uri = $this->parseCDATA($uri);
|
||||
|
||||
|
@@ -2,7 +2,7 @@
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
class HTMLPurifier_AttrDef_Email extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
@@ -1,12 +1,12 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef/Email.php';
|
||||
require_once 'HTMLPurifier/AttrDef/URI/Email.php';
|
||||
|
||||
/**
|
||||
* Primitive email validation class based on the regexp found at
|
||||
* http://www.regular-expressions.info/email.html
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Email_SimpleCheck extends HTMLPurifier_AttrDef_Email
|
||||
class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_URI_Email
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
@@ -1,28 +1,28 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/IPv4.php';
|
||||
require_once 'HTMLPurifier/AttrDef/IPv6.php';
|
||||
require_once 'HTMLPurifier/AttrDef/URI/IPv4.php';
|
||||
require_once 'HTMLPurifier/AttrDef/URI/IPv6.php';
|
||||
|
||||
/**
|
||||
* Validates a host according to the IPv4, IPv6 and DNS (future) specifications.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Host extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Instance of HTMLPurifier_AttrDef_IPv4 sub-validator
|
||||
* Instance of HTMLPurifier_AttrDef_URI_IPv4 sub-validator
|
||||
*/
|
||||
var $ipv4;
|
||||
|
||||
/**
|
||||
* Instance of HTMLPurifier_AttrDef_IPv6 sub-validator
|
||||
* Instance of HTMLPurifier_AttrDef_URI_IPv6 sub-validator
|
||||
*/
|
||||
var $ipv6;
|
||||
|
||||
function HTMLPurifier_AttrDef_Host() {
|
||||
$this->ipv4 = new HTMLPurifier_AttrDef_IPv4();
|
||||
$this->ipv6 = new HTMLPurifier_AttrDef_IPv6();
|
||||
function HTMLPurifier_AttrDef_URI_Host() {
|
||||
$this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4();
|
||||
$this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6();
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
@@ -6,7 +6,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
* Validates an IPv4 address
|
||||
* @author Feyd @ forums.devnetwork.net (public domain)
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_IPv4 extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -15,7 +15,7 @@ class HTMLPurifier_AttrDef_IPv4 extends HTMLPurifier_AttrDef
|
||||
*/
|
||||
var $ip4;
|
||||
|
||||
function HTMLPurifier_AttrDef_IPv4() {
|
||||
function HTMLPurifier_AttrDef_URI_IPv4() {
|
||||
$oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
|
||||
$this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
|
||||
}
|
@@ -1,6 +1,6 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef/IPv4.php';
|
||||
require_once 'HTMLPurifier/AttrDef/URI/IPv4.php';
|
||||
|
||||
/**
|
||||
* Validates an IPv6 address.
|
||||
@@ -8,7 +8,7 @@ require_once 'HTMLPurifier/AttrDef/IPv4.php';
|
||||
* @note This function requires brackets to have been removed from address
|
||||
* in URI.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_IPv6 extends HTMLPurifier_AttrDef_IPv4
|
||||
class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
|
||||
{
|
||||
|
||||
function validate($aIP, $config, &$context) {
|
@@ -29,6 +29,30 @@ class HTMLPurifier_AttrTransform
|
||||
function transform($attr, $config, &$context) {
|
||||
trigger_error('Cannot call abstract function', E_USER_ERROR);
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepends CSS properties to the style attribute, creating the
|
||||
* attribute if it doesn't exist.
|
||||
* @param $attr Attribute array to process (passed by reference)
|
||||
* @param $css CSS to prepend
|
||||
*/
|
||||
function prependCSS(&$attr, $css) {
|
||||
$attr['style'] = isset($attr['style']) ? $attr['style'] : '';
|
||||
$attr['style'] = $css . $attr['style'];
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves and removes an attribute
|
||||
* @param $attr Attribute array to process (passed by reference)
|
||||
* @param $key Key of attribute to confiscate
|
||||
*/
|
||||
function confiscateAttr(&$attr, $key) {
|
||||
if (!isset($attr[$key])) return null;
|
||||
$value = $attr[$key];
|
||||
unset($attr[$key]);
|
||||
return $value;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
26
library/HTMLPurifier/AttrTransform/BgColor.php
Normal file
26
library/HTMLPurifier/AttrTransform/BgColor.php
Normal file
@@ -0,0 +1,26 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform.php';
|
||||
|
||||
/**
|
||||
* Pre-transform that changes deprecated bgcolor attribute to CSS.
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_BgColor
|
||||
extends HTMLPurifier_AttrTransform {
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
|
||||
if (!isset($attr['bgcolor'])) return $attr;
|
||||
|
||||
$bgcolor = $this->confiscateAttr($attr, 'bgcolor');
|
||||
// some validation should happen here
|
||||
|
||||
$this->prependCSS($attr, "background-color:$bgcolor;");
|
||||
|
||||
return $attr;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
39
library/HTMLPurifier/AttrTransform/BoolToCSS.php
Normal file
39
library/HTMLPurifier/AttrTransform/BoolToCSS.php
Normal file
@@ -0,0 +1,39 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform.php';
|
||||
|
||||
/**
|
||||
* Pre-transform that changes converts a boolean attribute to fixed CSS
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_BoolToCSS
|
||||
extends HTMLPurifier_AttrTransform {
|
||||
|
||||
/**
|
||||
* Name of boolean attribute that is trigger
|
||||
*/
|
||||
var $attr;
|
||||
|
||||
/**
|
||||
* CSS declarations to add to style, needs trailing semicolon
|
||||
*/
|
||||
var $css;
|
||||
|
||||
/**
|
||||
* @param $attr string attribute name to convert from
|
||||
* @param $css string CSS declarations to add to style (needs semicolon)
|
||||
*/
|
||||
function HTMLPurifier_AttrTransform_BoolToCSS($attr, $css) {
|
||||
$this->attr = $attr;
|
||||
$this->css = $css;
|
||||
}
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
if (!isset($attr[$this->attr])) return $attr;
|
||||
unset($attr[$this->attr]);
|
||||
$this->prependCSS($attr, $this->css);
|
||||
return $attr;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
20
library/HTMLPurifier/AttrTransform/Border.php
Normal file
20
library/HTMLPurifier/AttrTransform/Border.php
Normal file
@@ -0,0 +1,20 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform.php';
|
||||
|
||||
/**
|
||||
* Pre-transform that changes deprecated border attribute to CSS.
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_Border extends HTMLPurifier_AttrTransform {
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
if (!isset($attr['border'])) return $attr;
|
||||
$border_width = $this->confiscateAttr($attr, 'border');
|
||||
// some validation should happen here
|
||||
$this->prependCSS($attr, "border:{$border_width}px solid;");
|
||||
return $attr;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
60
library/HTMLPurifier/AttrTransform/EnumToCSS.php
Normal file
60
library/HTMLPurifier/AttrTransform/EnumToCSS.php
Normal file
@@ -0,0 +1,60 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform.php';
|
||||
|
||||
/**
|
||||
* Generic pre-transform that converts an attribute with a fixed number of
|
||||
* values (enumerated) to CSS.
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_EnumToCSS extends HTMLPurifier_AttrTransform {
|
||||
|
||||
/**
|
||||
* Name of attribute to transform from
|
||||
*/
|
||||
var $attr;
|
||||
|
||||
/**
|
||||
* Lookup array of attribute values to CSS
|
||||
*/
|
||||
var $enumToCSS = array();
|
||||
|
||||
/**
|
||||
* Case sensitivity of the matching
|
||||
* @warning Currently can only be guaranteed to work with ASCII
|
||||
* values.
|
||||
*/
|
||||
var $caseSensitive = false;
|
||||
|
||||
/**
|
||||
* @param $attr String attribute name to transform from
|
||||
* @param $enumToCSS Lookup array of attribute values to CSS
|
||||
* @param $case_sensitive Boolean case sensitivity indicator, default false
|
||||
*/
|
||||
function HTMLPurifier_AttrTransform_EnumToCSS($attr, $enum_to_css, $case_sensitive = false) {
|
||||
$this->attr = $attr;
|
||||
$this->enumToCSS = $enum_to_css;
|
||||
$this->caseSensitive = (bool) $case_sensitive;
|
||||
}
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
|
||||
if (!isset($attr[$this->attr])) return $attr;
|
||||
|
||||
$value = trim($attr[$this->attr]);
|
||||
unset($attr[$this->attr]);
|
||||
|
||||
if (!$this->caseSensitive) $value = strtolower($value);
|
||||
|
||||
if (!isset($this->enumToCSS[$value])) {
|
||||
return $attr;
|
||||
}
|
||||
|
||||
$this->prependCSS($attr, $this->enumToCSS[$value]);
|
||||
|
||||
return $attr;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
47
library/HTMLPurifier/AttrTransform/ImgSpace.php
Normal file
47
library/HTMLPurifier/AttrTransform/ImgSpace.php
Normal file
@@ -0,0 +1,47 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform.php';
|
||||
|
||||
/**
|
||||
* Pre-transform that changes deprecated hspace and vspace attributes to CSS
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_ImgSpace
|
||||
extends HTMLPurifier_AttrTransform {
|
||||
|
||||
var $attr;
|
||||
var $css = array(
|
||||
'hspace' => array('left', 'right'),
|
||||
'vspace' => array('top', 'bottom')
|
||||
);
|
||||
|
||||
function HTMLPurifier_AttrTransform_ImgSpace($attr) {
|
||||
$this->attr = $attr;
|
||||
if (!isset($this->css[$attr])) {
|
||||
trigger_error(htmlspecialchars($attr) . ' is not valid space attribute');
|
||||
}
|
||||
}
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
|
||||
if (!isset($attr[$this->attr])) return $attr;
|
||||
|
||||
$width = $this->confiscateAttr($attr, $this->attr);
|
||||
// some validation could happen here
|
||||
|
||||
if (!isset($this->css[$this->attr])) return $attr;
|
||||
|
||||
$style = '';
|
||||
foreach ($this->css[$this->attr] as $suffix) {
|
||||
$property = "margin-$suffix";
|
||||
$style .= "$property:{$width}px;";
|
||||
}
|
||||
|
||||
$this->prependCSS($attr, $style);
|
||||
|
||||
return $attr;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
29
library/HTMLPurifier/AttrTransform/Length.php
Normal file
29
library/HTMLPurifier/AttrTransform/Length.php
Normal file
@@ -0,0 +1,29 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform.php';
|
||||
|
||||
/**
|
||||
* Class for handling width/height length attribute transformations to CSS
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_Length extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
var $name;
|
||||
var $cssName;
|
||||
|
||||
function HTMLPurifier_AttrTransform_Length($name, $css_name = null) {
|
||||
$this->name = $name;
|
||||
$this->cssName = $css_name ? $css_name : $name;
|
||||
}
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
if (!isset($attr[$this->name])) return $attr;
|
||||
$length = $this->confiscateAttr($attr, $this->name);
|
||||
if(ctype_digit($length)) $length .= 'px';
|
||||
$this->prependCSS($attr, $this->cssName . ":$length;");
|
||||
return $attr;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
21
library/HTMLPurifier/AttrTransform/Name.php
Normal file
21
library/HTMLPurifier/AttrTransform/Name.php
Normal file
@@ -0,0 +1,21 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform.php';
|
||||
|
||||
/**
|
||||
* Pre-transform that changes deprecated name attribute to ID if necessary
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_Name extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
if (!isset($attr['name'])) return $attr;
|
||||
$id = $this->confiscateAttr($attr, 'name');
|
||||
if ( isset($attr['id'])) return $attr;
|
||||
$attr['id'] = $id;
|
||||
return $attr;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -1,36 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTransform.php';
|
||||
|
||||
/**
|
||||
* Pre-transform that changes deprecated align attribute to text-align.
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_TextAlign
|
||||
extends HTMLPurifier_AttrTransform {
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
|
||||
if (!isset($attr['align'])) return $attr;
|
||||
|
||||
$align = strtolower(trim($attr['align']));
|
||||
unset($attr['align']);
|
||||
|
||||
$values = array('left' => 1,
|
||||
'right' => 1,
|
||||
'center' => 1,
|
||||
'justify' => 1);
|
||||
|
||||
if (!isset($values[$align])) {
|
||||
return $attr;
|
||||
}
|
||||
|
||||
$attr['style'] = isset($attr['style']) ? $attr['style'] : '';
|
||||
$attr['style'] = "text-align:$align;" . $attr['style'];
|
||||
|
||||
return $attr;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
41
library/HTMLPurifier/AttrTypes.php
Normal file
41
library/HTMLPurifier/AttrTypes.php
Normal file
@@ -0,0 +1,41 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/ID.php';
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/Length.php';
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/MultiLength.php';
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/Nmtokens.php';
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/Pixels.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Integer.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Text.php';
|
||||
require_once 'HTMLPurifier/AttrDef/URI.php';
|
||||
|
||||
/**
|
||||
* Provides lookup array of attribute types to HTMLPurifier_AttrDef objects
|
||||
*/
|
||||
class HTMLPurifier_AttrTypes
|
||||
{
|
||||
/**
|
||||
* Lookup array of attribute string identifiers to concrete implementations
|
||||
* @public
|
||||
*/
|
||||
var $info = array();
|
||||
|
||||
/**
|
||||
* Constructs the info array
|
||||
*/
|
||||
function HTMLPurifier_AttrTypes() {
|
||||
$this->info['CDATA'] = new HTMLPurifier_AttrDef_Text();
|
||||
$this->info['ID'] = new HTMLPurifier_AttrDef_HTML_ID();
|
||||
$this->info['Length'] = new HTMLPurifier_AttrDef_HTML_Length();
|
||||
$this->info['MultiLength'] = new HTMLPurifier_AttrDef_HTML_MultiLength();
|
||||
$this->info['NMTOKENS'] = new HTMLPurifier_AttrDef_HTML_Nmtokens();
|
||||
$this->info['Pixels'] = new HTMLPurifier_AttrDef_HTML_Pixels();
|
||||
$this->info['Text'] = new HTMLPurifier_AttrDef_Text();
|
||||
$this->info['URI'] = new HTMLPurifier_AttrDef_URI();
|
||||
|
||||
// number is really a positive integer (one or more digits)
|
||||
$this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@@ -1,19 +1,19 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Background.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Border.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Color.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Composite.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Font.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Length.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/ListStyle.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Multiple.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Percentage.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/TextDecoration.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/URI.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Enum.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Color.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Composite.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSSLength.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Percentage.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Multiple.php';
|
||||
require_once 'HTMLPurifier/AttrDef/TextDecoration.php';
|
||||
require_once 'HTMLPurifier/AttrDef/FontFamily.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Font.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Border.php';
|
||||
require_once 'HTMLPurifier/AttrDef/ListStyle.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSSURI.php';
|
||||
require_once 'HTMLPurifier/AttrDef/BackgroundPosition.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Background.php';
|
||||
|
||||
/**
|
||||
* Defines allowed CSS attributes and what their values are.
|
||||
@@ -43,7 +43,7 @@ class HTMLPurifier_CSSDefinition
|
||||
array('none', 'hidden', 'dotted', 'dashed', 'solid', 'double',
|
||||
'groove', 'ridge', 'inset', 'outset'), false);
|
||||
|
||||
$this->info['border-style'] = new HTMLPurifier_AttrDef_Multiple($border_style);
|
||||
$this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style);
|
||||
|
||||
$this->info['clear'] = new HTMLPurifier_AttrDef_Enum(
|
||||
array('none', 'left', 'right', 'both'), false);
|
||||
@@ -54,10 +54,10 @@ class HTMLPurifier_CSSDefinition
|
||||
$this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum(
|
||||
array('normal', 'small-caps'), false);
|
||||
|
||||
$uri_or_none = new HTMLPurifier_AttrDef_Composite(
|
||||
$uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite(
|
||||
array(
|
||||
new HTMLPurifier_AttrDef_Enum(array('none')),
|
||||
new HTMLPurifier_AttrDef_CSSURI()
|
||||
new HTMLPurifier_AttrDef_CSS_URI()
|
||||
)
|
||||
);
|
||||
|
||||
@@ -68,11 +68,11 @@ class HTMLPurifier_CSSDefinition
|
||||
'upper-roman', 'lower-alpha', 'upper-alpha', 'none'), false);
|
||||
$this->info['list-style-image'] = $uri_or_none;
|
||||
|
||||
$this->info['list-style'] = new HTMLPurifier_AttrDef_ListStyle($config);
|
||||
$this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config);
|
||||
|
||||
$this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum(
|
||||
array('capitalize', 'uppercase', 'lowercase', 'none'), false);
|
||||
$this->info['color'] = new HTMLPurifier_AttrDef_Color();
|
||||
$this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color();
|
||||
|
||||
$this->info['background-image'] = $uri_or_none;
|
||||
$this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum(
|
||||
@@ -81,96 +81,98 @@ class HTMLPurifier_CSSDefinition
|
||||
$this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum(
|
||||
array('scroll', 'fixed')
|
||||
);
|
||||
$this->info['background-position'] = new HTMLPurifier_AttrDef_BackgroundPosition();
|
||||
$this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition();
|
||||
|
||||
$border_color =
|
||||
$this->info['border-top-color'] =
|
||||
$this->info['border-bottom-color'] =
|
||||
$this->info['border-left-color'] =
|
||||
$this->info['border-right-color'] =
|
||||
$this->info['background-color'] = new HTMLPurifier_AttrDef_Composite(array(
|
||||
$this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_Enum(array('transparent')),
|
||||
new HTMLPurifier_AttrDef_Color()
|
||||
new HTMLPurifier_AttrDef_CSS_Color()
|
||||
));
|
||||
|
||||
$this->info['background'] = new HTMLPurifier_AttrDef_Background($config);
|
||||
$this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config);
|
||||
|
||||
$this->info['border-color'] = new HTMLPurifier_AttrDef_Multiple($border_color);
|
||||
$this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color);
|
||||
|
||||
$border_width =
|
||||
$this->info['border-top-width'] =
|
||||
$this->info['border-bottom-width'] =
|
||||
$this->info['border-left-width'] =
|
||||
$this->info['border-right-width'] = new HTMLPurifier_AttrDef_Composite(array(
|
||||
$this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
|
||||
new HTMLPurifier_AttrDef_CSSLength(true) //disallow negative
|
||||
new HTMLPurifier_AttrDef_CSS_Length(true) //disallow negative
|
||||
));
|
||||
|
||||
$this->info['border-width'] = new HTMLPurifier_AttrDef_Multiple($border_width);
|
||||
$this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width);
|
||||
|
||||
$this->info['letter-spacing'] = new HTMLPurifier_AttrDef_Composite(array(
|
||||
$this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_Enum(array('normal')),
|
||||
new HTMLPurifier_AttrDef_CSSLength()
|
||||
new HTMLPurifier_AttrDef_CSS_Length()
|
||||
));
|
||||
|
||||
$this->info['word-spacing'] = new HTMLPurifier_AttrDef_Composite(array(
|
||||
$this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_Enum(array('normal')),
|
||||
new HTMLPurifier_AttrDef_CSSLength()
|
||||
new HTMLPurifier_AttrDef_CSS_Length()
|
||||
));
|
||||
|
||||
$this->info['font-size'] = new HTMLPurifier_AttrDef_Composite(array(
|
||||
$this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_Enum(array('xx-small', 'x-small',
|
||||
'small', 'medium', 'large', 'x-large', 'xx-large',
|
||||
'larger', 'smaller')),
|
||||
new HTMLPurifier_AttrDef_Percentage(),
|
||||
new HTMLPurifier_AttrDef_CSSLength()
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage(),
|
||||
new HTMLPurifier_AttrDef_CSS_Length()
|
||||
));
|
||||
|
||||
$this->info['line-height'] = new HTMLPurifier_AttrDef_Composite(array(
|
||||
$this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_Enum(array('normal')),
|
||||
new HTMLPurifier_AttrDef_Number(true), // no negatives
|
||||
new HTMLPurifier_AttrDef_CSSLength(true),
|
||||
new HTMLPurifier_AttrDef_Percentage(true)
|
||||
new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives
|
||||
new HTMLPurifier_AttrDef_CSS_Length(true),
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage(true)
|
||||
));
|
||||
|
||||
$margin =
|
||||
$this->info['margin-top'] =
|
||||
$this->info['margin-bottom'] =
|
||||
$this->info['margin-left'] =
|
||||
$this->info['margin-right'] = new HTMLPurifier_AttrDef_Composite(array(
|
||||
new HTMLPurifier_AttrDef_CSSLength(),
|
||||
new HTMLPurifier_AttrDef_Percentage(),
|
||||
$this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_CSS_Length(),
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage(),
|
||||
new HTMLPurifier_AttrDef_Enum(array('auto'))
|
||||
));
|
||||
|
||||
$this->info['margin'] = new HTMLPurifier_AttrDef_Multiple($margin);
|
||||
$this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin);
|
||||
|
||||
// non-negative
|
||||
$padding =
|
||||
$this->info['padding-top'] =
|
||||
$this->info['padding-bottom'] =
|
||||
$this->info['padding-left'] =
|
||||
$this->info['padding-right'] = new HTMLPurifier_AttrDef_Composite(array(
|
||||
new HTMLPurifier_AttrDef_CSSLength(true),
|
||||
new HTMLPurifier_AttrDef_Percentage(true)
|
||||
$this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_CSS_Length(true),
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage(true)
|
||||
));
|
||||
|
||||
$this->info['padding'] = new HTMLPurifier_AttrDef_Multiple($padding);
|
||||
$this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding);
|
||||
|
||||
$this->info['text-indent'] = new HTMLPurifier_AttrDef_Composite(array(
|
||||
new HTMLPurifier_AttrDef_CSSLength(),
|
||||
new HTMLPurifier_AttrDef_Percentage()
|
||||
$this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_CSS_Length(),
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage()
|
||||
));
|
||||
|
||||
$this->info['width'] = new HTMLPurifier_AttrDef_Composite(array(
|
||||
new HTMLPurifier_AttrDef_CSSLength(true),
|
||||
new HTMLPurifier_AttrDef_Percentage(true),
|
||||
$this->info['width'] =
|
||||
$this->info['height'] =
|
||||
new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_CSS_Length(true),
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage(true),
|
||||
new HTMLPurifier_AttrDef_Enum(array('auto'))
|
||||
));
|
||||
|
||||
$this->info['text-decoration'] = new HTMLPurifier_AttrDef_TextDecoration();
|
||||
$this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();
|
||||
|
||||
$this->info['font-family'] = new HTMLPurifier_AttrDef_FontFamily();
|
||||
$this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily();
|
||||
|
||||
// this could use specialized code
|
||||
$this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum(
|
||||
@@ -179,14 +181,14 @@ class HTMLPurifier_CSSDefinition
|
||||
|
||||
// MUST be called after other font properties, as it references
|
||||
// a CSSDefinition object
|
||||
$this->info['font'] = new HTMLPurifier_AttrDef_Font($config);
|
||||
$this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config);
|
||||
|
||||
// same here
|
||||
$this->info['border'] =
|
||||
$this->info['border-bottom'] =
|
||||
$this->info['border-top'] =
|
||||
$this->info['border-left'] =
|
||||
$this->info['border-right'] = new HTMLPurifier_AttrDef_Border($config);
|
||||
$this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
|
||||
|
||||
$this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||
'collapse', 'seperate'));
|
||||
@@ -197,13 +199,16 @@ class HTMLPurifier_CSSDefinition
|
||||
$this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||
'auto', 'fixed'));
|
||||
|
||||
$this->info['vertical-align'] = new HTMLPurifier_AttrDef_Composite(array(
|
||||
$this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_Enum(array('baseline', 'sub', 'super',
|
||||
'top', 'text-top', 'middle', 'bottom', 'text-bottom')),
|
||||
new HTMLPurifier_AttrDef_CSSLength(),
|
||||
new HTMLPurifier_AttrDef_Percentage()
|
||||
new HTMLPurifier_AttrDef_CSS_Length(),
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage()
|
||||
));
|
||||
|
||||
// partial support
|
||||
$this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap'));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -38,22 +38,13 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
|
||||
}
|
||||
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
$parent_type = $context->get('ParentType');
|
||||
switch ($parent_type) {
|
||||
case 'unknown':
|
||||
case 'inline':
|
||||
$result = $this->inline->validateChildren(
|
||||
$tokens_of_children, $config, $context);
|
||||
break;
|
||||
case 'block':
|
||||
$result = $this->block->validateChildren(
|
||||
$tokens_of_children, $config, $context);
|
||||
break;
|
||||
default:
|
||||
trigger_error('Invalid context', E_USER_ERROR);
|
||||
return false;
|
||||
if ($context->get('IsInline') === false) {
|
||||
return $this->block->validateChildren(
|
||||
$tokens_of_children, $config, $context);
|
||||
} else {
|
||||
return $this->inline->validateChildren(
|
||||
$tokens_of_children, $config, $context);
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -20,10 +20,13 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
|
||||
$elements = str_replace(' ', '', $elements);
|
||||
$elements = explode('|', $elements);
|
||||
}
|
||||
$elements = array_flip($elements);
|
||||
foreach ($elements as $i => $x) {
|
||||
$elements[$i] = true;
|
||||
if (empty($i)) unset($elements[$i]);
|
||||
$keys = array_keys($elements);
|
||||
if ($keys == array_keys($keys)) {
|
||||
$elements = array_flip($elements);
|
||||
foreach ($elements as $i => $x) {
|
||||
$elements[$i] = true;
|
||||
if (empty($i)) unset($elements[$i]);
|
||||
}
|
||||
}
|
||||
$this->elements = $elements;
|
||||
$this->gen = new HTMLPurifier_Generator();
|
||||
|
@@ -4,27 +4,31 @@ require_once 'HTMLPurifier/ChildDef/Required.php';
|
||||
|
||||
/**
|
||||
* Takes the contents of blockquote when in strict and reformats for validation.
|
||||
*
|
||||
* From XHTML 1.0 Transitional to Strict, there is a notable change where
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_StrictBlockquote
|
||||
extends HTMLPurifier_ChildDef_Required
|
||||
{
|
||||
var $real_elements;
|
||||
var $fake_elements;
|
||||
var $allow_empty = true;
|
||||
var $type = 'strictblockquote';
|
||||
var $init = false;
|
||||
function HTMLPurifier_ChildDef_StrictBlockquote() {}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
|
||||
$def = $config->getHTMLDefinition();
|
||||
if (!$this->init) {
|
||||
// allow all inline elements
|
||||
$this->elements = $def->info_flow_elements;
|
||||
$this->elements['#PCDATA'] = true;
|
||||
$this->real_elements = $this->elements;
|
||||
$this->fake_elements = $def->info_content_sets['Flow'];
|
||||
$this->fake_elements['#PCDATA'] = true;
|
||||
$this->init = true;
|
||||
}
|
||||
|
||||
// trick the parent class into thinking it allows more
|
||||
$this->elements = $this->fake_elements;
|
||||
$result = parent::validateChildren($tokens_of_children, $config, $context);
|
||||
$this->elements = $this->real_elements;
|
||||
|
||||
if ($result === false) return array();
|
||||
if ($result === true) $result = $tokens_of_children;
|
||||
|
||||
@@ -40,8 +44,10 @@ extends HTMLPurifier_ChildDef_Required
|
||||
// ifs are nested for readability
|
||||
if (!$is_inline) {
|
||||
if (!$depth) {
|
||||
if (($token->type == 'text') ||
|
||||
($def->info[$token->name]->type == 'inline')) {
|
||||
if (
|
||||
$token->type == 'text' ||
|
||||
!isset($this->elements[$token->name])
|
||||
) {
|
||||
$is_inline = true;
|
||||
$ret[] = $block_wrap_start;
|
||||
}
|
||||
@@ -50,7 +56,7 @@ extends HTMLPurifier_ChildDef_Required
|
||||
if (!$depth) {
|
||||
// starting tokens have been inline text / empty
|
||||
if ($token->type == 'start' || $token->type == 'empty') {
|
||||
if ($def->info[$token->name]->type == 'block') {
|
||||
if (isset($this->elements[$token->name])) {
|
||||
// ended
|
||||
$ret[] = $block_wrap_end;
|
||||
$is_inline = false;
|
||||
|
@@ -149,23 +149,36 @@ class HTMLPurifier_Config
|
||||
return;
|
||||
}
|
||||
$this->conf[$namespace][$key] = $value;
|
||||
if ($namespace == 'HTML' || $namespace == 'Attr') {
|
||||
// reset HTML definition if relevant attributes changed
|
||||
$this->html_definition = null;
|
||||
}
|
||||
if ($namespace == 'CSS') {
|
||||
$this->css_definition = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a copy of the HTML definition.
|
||||
* Retrieves reference to the HTML definition.
|
||||
* @param $raw Return a copy that has not been setup yet. Must be
|
||||
* called before it's been setup, otherwise won't work.
|
||||
*/
|
||||
function getHTMLDefinition() {
|
||||
if ($this->html_definition === null) {
|
||||
$this->html_definition = new HTMLPurifier_HTMLDefinition();
|
||||
$this->html_definition->setup($this);
|
||||
function &getHTMLDefinition($raw = false) {
|
||||
if (
|
||||
empty($this->html_definition) || // hasn't ever been setup
|
||||
($raw && $this->html_definition->setup) // requesting new one
|
||||
) {
|
||||
$this->html_definition = new HTMLPurifier_HTMLDefinition($this);
|
||||
if ($raw) return $this->html_definition; // no setup!
|
||||
}
|
||||
if (!$this->html_definition->setup) $this->html_definition->setup();
|
||||
return $this->html_definition;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a copy of the CSS definition
|
||||
* Retrieves reference to the CSS definition
|
||||
*/
|
||||
function getCSSDefinition() {
|
||||
function &getCSSDefinition() {
|
||||
if ($this->css_definition === null) {
|
||||
$this->css_definition = new HTMLPurifier_CSSDefinition();
|
||||
$this->css_definition->setup($this);
|
||||
|
10
library/HTMLPurifier/ConfigDef.php
Normal file
10
library/HTMLPurifier/ConfigDef.php
Normal file
@@ -0,0 +1,10 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Base class for configuration entity
|
||||
*/
|
||||
class HTMLPurifier_ConfigDef {
|
||||
var $class = false;
|
||||
}
|
||||
|
||||
?>
|
74
library/HTMLPurifier/ConfigDef/Directive.php
Normal file
74
library/HTMLPurifier/ConfigDef/Directive.php
Normal file
@@ -0,0 +1,74 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ConfigDef.php';
|
||||
|
||||
/**
|
||||
* Structure object containing definition of a directive.
|
||||
* @note This structure does not contain default values
|
||||
*/
|
||||
class HTMLPurifier_ConfigDef_Directive extends HTMLPurifier_ConfigDef
|
||||
{
|
||||
|
||||
var $class = 'directive';
|
||||
|
||||
function HTMLPurifier_ConfigDef_Directive(
|
||||
$type = null,
|
||||
$descriptions = null,
|
||||
$allow_null = null,
|
||||
$allowed = null,
|
||||
$aliases = null
|
||||
) {
|
||||
if ( $type !== null) $this->type = $type;
|
||||
if ($descriptions !== null) $this->descriptions = $descriptions;
|
||||
if ( $allow_null !== null) $this->allow_null = $allow_null;
|
||||
if ( $allowed !== null) $this->allowed = $allowed;
|
||||
if ( $aliases !== null) $this->aliases = $aliases;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allowed type of the directive. Values are:
|
||||
* - string
|
||||
* - istring (case insensitive string)
|
||||
* - int
|
||||
* - float
|
||||
* - bool
|
||||
* - lookup (array of value => true)
|
||||
* - list (regular numbered index array)
|
||||
* - hash (array of key => value)
|
||||
* - mixed (anything goes)
|
||||
*/
|
||||
var $type = 'mixed';
|
||||
|
||||
/**
|
||||
* Plaintext descriptions of the configuration entity is. Organized by
|
||||
* file and line number, so multiple descriptions are allowed.
|
||||
*/
|
||||
var $descriptions = array();
|
||||
|
||||
/**
|
||||
* Is null allowed? Has no effect for mixed type.
|
||||
* @bool
|
||||
*/
|
||||
var $allow_null = false;
|
||||
|
||||
/**
|
||||
* Lookup table of allowed values of the element, bool true if all allowed.
|
||||
*/
|
||||
var $allowed = true;
|
||||
|
||||
/**
|
||||
* Hash of value aliases, i.e. values that are equivalent.
|
||||
*/
|
||||
var $aliases = array();
|
||||
|
||||
/**
|
||||
* Adds a description to the array
|
||||
*/
|
||||
function addDescription($file, $line, $description) {
|
||||
if (!isset($this->descriptions[$file])) $this->descriptions[$file] = array();
|
||||
$this->descriptions[$file][$line] = $description;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
27
library/HTMLPurifier/ConfigDef/DirectiveAlias.php
Normal file
27
library/HTMLPurifier/ConfigDef/DirectiveAlias.php
Normal file
@@ -0,0 +1,27 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ConfigDef.php';
|
||||
|
||||
/**
|
||||
* Structure object describing a directive alias
|
||||
*/
|
||||
class HTMLPurifier_ConfigDef_DirectiveAlias extends HTMLPurifier_ConfigDef
|
||||
{
|
||||
var $class = 'alias';
|
||||
|
||||
/**
|
||||
* Namespace being aliased to
|
||||
*/
|
||||
var $namespace;
|
||||
/**
|
||||
* Directive being aliased to
|
||||
*/
|
||||
var $name;
|
||||
|
||||
function HTMLPurifier_ConfigDef_DirectiveAlias($namespace, $name) {
|
||||
$this->namespace = $namespace;
|
||||
$this->name = $name;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
23
library/HTMLPurifier/ConfigDef/Namespace.php
Normal file
23
library/HTMLPurifier/ConfigDef/Namespace.php
Normal file
@@ -0,0 +1,23 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ConfigDef.php';
|
||||
|
||||
/**
|
||||
* Structure object describing of a namespace
|
||||
*/
|
||||
class HTMLPurifier_ConfigDef_Namespace extends HTMLPurifier_ConfigDef {
|
||||
|
||||
function HTMLPurifier_ConfigDef_Namespace($description = null) {
|
||||
$this->description = $description;
|
||||
}
|
||||
|
||||
var $class = 'namespace';
|
||||
|
||||
/**
|
||||
* String description of what kinds of directives go in this namespace.
|
||||
*/
|
||||
var $description;
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -1,6 +1,10 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/Error.php';
|
||||
require_once 'HTMLPurifier/ConfigDef.php';
|
||||
require_once 'HTMLPurifier/ConfigDef/Namespace.php';
|
||||
require_once 'HTMLPurifier/ConfigDef/Directive.php';
|
||||
require_once 'HTMLPurifier/ConfigDef/DirectiveAlias.php';
|
||||
|
||||
/**
|
||||
* Configuration definition, defines directives and their defaults.
|
||||
@@ -138,7 +142,7 @@ class HTMLPurifier_ConfigSchema {
|
||||
return;
|
||||
}
|
||||
$def->info[$namespace][$name] =
|
||||
new HTMLPurifier_ConfigEntity_Directive();
|
||||
new HTMLPurifier_ConfigDef_Directive();
|
||||
$def->info[$namespace][$name]->type = $type;
|
||||
$def->info[$namespace][$name]->allow_null = $allow_null;
|
||||
$def->defaults[$namespace][$name] = $default;
|
||||
@@ -172,7 +176,7 @@ class HTMLPurifier_ConfigSchema {
|
||||
return;
|
||||
}
|
||||
$def->info[$namespace] = array();
|
||||
$def->info_namespace[$namespace] = new HTMLPurifier_ConfigEntity_Namespace();
|
||||
$def->info_namespace[$namespace] = new HTMLPurifier_ConfigDef_Namespace();
|
||||
$def->info_namespace[$namespace]->description = $description;
|
||||
$def->defaults[$namespace] = array();
|
||||
}
|
||||
@@ -284,7 +288,7 @@ class HTMLPurifier_ConfigSchema {
|
||||
return;
|
||||
}
|
||||
$def->info[$namespace][$name] =
|
||||
new HTMLPurifier_ConfigEntity_DirectiveAlias(
|
||||
new HTMLPurifier_ConfigDef_DirectiveAlias(
|
||||
$new_namespace, $new_name);
|
||||
}
|
||||
|
||||
@@ -330,6 +334,10 @@ class HTMLPurifier_ConfigSchema {
|
||||
case 'hash':
|
||||
case 'lookup':
|
||||
if (is_string($var)) {
|
||||
// special case: technically, this is an array with
|
||||
// a single empty string item, but having an empty
|
||||
// array is more intuitive
|
||||
if ($var == '') return array();
|
||||
// simplistic string to array method that only works
|
||||
// for simple lists of tag names or alphanumeric characters
|
||||
$var = explode(',',$var);
|
||||
@@ -379,120 +387,4 @@ class HTMLPurifier_ConfigSchema {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Base class for configuration entity
|
||||
*/
|
||||
class HTMLPurifier_ConfigEntity {
|
||||
var $class = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Structure object describing of a namespace
|
||||
*/
|
||||
class HTMLPurifier_ConfigEntity_Namespace extends HTMLPurifier_ConfigEntity {
|
||||
|
||||
function HTMLPurifier_ConfigEntity_Namespace($description = null) {
|
||||
$this->description = $description;
|
||||
}
|
||||
|
||||
var $class = 'namespace';
|
||||
|
||||
/**
|
||||
* String description of what kinds of directives go in this namespace.
|
||||
*/
|
||||
var $description;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Structure object containing definition of a directive.
|
||||
* @note This structure does not contain default values
|
||||
*/
|
||||
class HTMLPurifier_ConfigEntity_Directive extends HTMLPurifier_ConfigEntity
|
||||
{
|
||||
|
||||
var $class = 'directive';
|
||||
|
||||
function HTMLPurifier_ConfigEntity_Directive(
|
||||
$type = null,
|
||||
$descriptions = null,
|
||||
$allow_null = null,
|
||||
$allowed = null,
|
||||
$aliases = null
|
||||
) {
|
||||
if ( $type !== null) $this->type = $type;
|
||||
if ($descriptions !== null) $this->descriptions = $descriptions;
|
||||
if ( $allow_null !== null) $this->allow_null = $allow_null;
|
||||
if ( $allowed !== null) $this->allowed = $allowed;
|
||||
if ( $aliases !== null) $this->aliases = $aliases;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allowed type of the directive. Values are:
|
||||
* - string
|
||||
* - istring (case insensitive string)
|
||||
* - int
|
||||
* - float
|
||||
* - bool
|
||||
* - lookup (array of value => true)
|
||||
* - list (regular numbered index array)
|
||||
* - hash (array of key => value)
|
||||
* - mixed (anything goes)
|
||||
*/
|
||||
var $type = 'mixed';
|
||||
|
||||
/**
|
||||
* Plaintext descriptions of the configuration entity is. Organized by
|
||||
* file and line number, so multiple descriptions are allowed.
|
||||
*/
|
||||
var $descriptions = array();
|
||||
|
||||
/**
|
||||
* Is null allowed? Has no effect for mixed type.
|
||||
* @bool
|
||||
*/
|
||||
var $allow_null = false;
|
||||
|
||||
/**
|
||||
* Lookup table of allowed values of the element, bool true if all allowed.
|
||||
*/
|
||||
var $allowed = true;
|
||||
|
||||
/**
|
||||
* Hash of value aliases, i.e. values that are equivalent.
|
||||
*/
|
||||
var $aliases = array();
|
||||
|
||||
/**
|
||||
* Adds a description to the array
|
||||
*/
|
||||
function addDescription($file, $line, $description) {
|
||||
if (!isset($this->descriptions[$file])) $this->descriptions[$file] = array();
|
||||
$this->descriptions[$file][$line] = $description;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Structure object describing a directive alias
|
||||
*/
|
||||
class HTMLPurifier_ConfigEntity_DirectiveAlias extends HTMLPurifier_ConfigEntity
|
||||
{
|
||||
var $class = 'alias';
|
||||
|
||||
/**
|
||||
* Namespace being aliased to
|
||||
*/
|
||||
var $namespace;
|
||||
/**
|
||||
* Directive being aliased to
|
||||
*/
|
||||
var $name;
|
||||
|
||||
function HTMLPurifier_ConfigEntity_DirectiveAlias($namespace, $name) {
|
||||
$this->namespace = $namespace;
|
||||
$this->name = $name;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
||||
|
148
library/HTMLPurifier/ContentSets.php
Normal file
148
library/HTMLPurifier/ContentSets.php
Normal file
@@ -0,0 +1,148 @@
|
||||
<?php
|
||||
|
||||
// common defs that we'll support by default
|
||||
require_once 'HTMLPurifier/ChildDef.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Empty.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Required.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Optional.php';
|
||||
|
||||
class HTMLPurifier_ContentSets
|
||||
{
|
||||
|
||||
/**
|
||||
* List of content set strings (pipe seperators) indexed by name.
|
||||
* @public
|
||||
*/
|
||||
var $info = array();
|
||||
|
||||
/**
|
||||
* List of content set lookups (element => true) indexed by name.
|
||||
* @note This is in HTMLPurifier_HTMLDefinition->info_content_sets
|
||||
* @public
|
||||
*/
|
||||
var $lookup = array();
|
||||
|
||||
/**
|
||||
* Synchronized list of defined content sets (keys of info)
|
||||
*/
|
||||
var $keys = array();
|
||||
/**
|
||||
* Synchronized list of defined content values (values of info)
|
||||
*/
|
||||
var $values = array();
|
||||
|
||||
/**
|
||||
* Merges in module's content sets, expands identifiers in the content
|
||||
* sets and populates the keys, values and lookup member variables.
|
||||
* @param $modules List of HTMLPurifier_HTMLModule
|
||||
*/
|
||||
function HTMLPurifier_ContentSets($modules) {
|
||||
if (!is_array($modules)) $modules = array($modules);
|
||||
// populate content_sets based on module hints
|
||||
// sorry, no way of overloading
|
||||
foreach ($modules as $module_i => $module) {
|
||||
foreach ($module->content_sets as $key => $value) {
|
||||
if (isset($this->info[$key])) {
|
||||
// add it into the existing content set
|
||||
$this->info[$key] = $this->info[$key] . ' | ' . $value;
|
||||
} else {
|
||||
$this->info[$key] = $value;
|
||||
}
|
||||
}
|
||||
}
|
||||
// perform content_set expansions
|
||||
$this->keys = array_keys($this->info);
|
||||
foreach ($this->info as $i => $set) {
|
||||
// only performed once, so infinite recursion is not
|
||||
// a problem
|
||||
$this->info[$i] =
|
||||
str_replace(
|
||||
$this->keys,
|
||||
// must be recalculated each time due to
|
||||
// changing substitutions
|
||||
array_values($this->info),
|
||||
$set);
|
||||
}
|
||||
$this->values = array_values($this->info);
|
||||
|
||||
// generate lookup tables
|
||||
foreach ($this->info as $name => $set) {
|
||||
$this->lookup[$name] = $this->convertToLookup($set);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Accepts a definition; generates and assigns a ChildDef for it
|
||||
* @param $def HTMLPurifier_ElementDef reference
|
||||
* @param $module Module that defined the ElementDef
|
||||
*/
|
||||
function generateChildDef(&$def, $module) {
|
||||
if (!empty($def->child)) return; // already done!
|
||||
$content_model = $def->content_model;
|
||||
if (is_string($content_model)) {
|
||||
$def->content_model = str_replace(
|
||||
$this->keys, $this->values, $content_model);
|
||||
}
|
||||
$def->child = $this->getChildDef($def, $module);
|
||||
}
|
||||
|
||||
/**
|
||||
* Instantiates a ChildDef based on content_model and content_model_type
|
||||
* member variables in HTMLPurifier_ElementDef
|
||||
* @note This will also defer to modules for custom HTMLPurifier_ChildDef
|
||||
* subclasses that need content set expansion
|
||||
* @param $def HTMLPurifier_ElementDef to have ChildDef extracted
|
||||
* @return HTMLPurifier_ChildDef corresponding to ElementDef
|
||||
*/
|
||||
function getChildDef($def, $module) {
|
||||
$value = $def->content_model;
|
||||
if (is_object($value)) {
|
||||
trigger_error(
|
||||
'Literal object child definitions should be stored in '.
|
||||
'ElementDef->child not ElementDef->content_model',
|
||||
E_USER_NOTICE
|
||||
);
|
||||
return $value;
|
||||
}
|
||||
switch ($def->content_model_type) {
|
||||
case 'required':
|
||||
return new HTMLPurifier_ChildDef_Required($value);
|
||||
case 'optional':
|
||||
return new HTMLPurifier_ChildDef_Optional($value);
|
||||
case 'empty':
|
||||
return new HTMLPurifier_ChildDef_Empty();
|
||||
case 'custom':
|
||||
return new HTMLPurifier_ChildDef_Custom($value);
|
||||
}
|
||||
// defer to its module
|
||||
$return = false;
|
||||
if ($module->defines_child_def) { // save a func call
|
||||
$return = $module->getChildDef($def);
|
||||
}
|
||||
if ($return !== false) return $return;
|
||||
// error-out
|
||||
trigger_error(
|
||||
'Could not determine which ChildDef class to instantiate',
|
||||
E_USER_ERROR
|
||||
);
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a string list of elements separated by pipes into
|
||||
* a lookup array.
|
||||
* @param $string List of elements
|
||||
* @return Lookup array of elements
|
||||
*/
|
||||
function convertToLookup($string) {
|
||||
$array = explode('|', str_replace(' ', '', $string));
|
||||
$ret = array();
|
||||
foreach ($array as $i => $k) {
|
||||
$ret[$k] = true;
|
||||
}
|
||||
return $ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
122
library/HTMLPurifier/ElementDef.php
Normal file
122
library/HTMLPurifier/ElementDef.php
Normal file
@@ -0,0 +1,122 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Structure that stores an HTML element definition. Used by
|
||||
* HTMLPurifier_HTMLDefinition and HTMLPurifier_HTMLModule.
|
||||
*/
|
||||
class HTMLPurifier_ElementDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Does the definition work by itself, or is it created solely
|
||||
* for the purpose of merging into another definition?
|
||||
*/
|
||||
var $standalone = true;
|
||||
|
||||
/**
|
||||
* Associative array of attribute name to HTMLPurifier_AttrDef
|
||||
* @note Before being processed by HTMLPurifier_AttrCollections
|
||||
* when modules are finalized during
|
||||
* HTMLPurifier_HTMLDefinition->setup(), this array may also
|
||||
* contain an array at index 0 that indicates which attribute
|
||||
* collections to load into the full array. It may also
|
||||
* contain string indentifiers in lieu of HTMLPurifier_AttrDef,
|
||||
* see HTMLPurifier_AttrTypes on how they are expanded during
|
||||
* HTMLPurifier_HTMLDefinition->setup() processing.
|
||||
* @public
|
||||
*/
|
||||
var $attr = array();
|
||||
|
||||
/**
|
||||
* Indexed list of tag's HTMLPurifier_AttrTransform to be done before validation
|
||||
* @public
|
||||
*/
|
||||
var $attr_transform_pre = array();
|
||||
|
||||
/**
|
||||
* Indexed list of tag's HTMLPurifier_AttrTransform to be done after validation
|
||||
* @public
|
||||
*/
|
||||
var $attr_transform_post = array();
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* HTMLPurifier_ChildDef of this tag.
|
||||
* @public
|
||||
*/
|
||||
var $child;
|
||||
|
||||
/**
|
||||
* Abstract string representation of internal ChildDef rules. See
|
||||
* HTMLPurifier_ContentSets for how this is parsed and then transformed
|
||||
* into an HTMLPurifier_ChildDef.
|
||||
* @public
|
||||
*/
|
||||
var $content_model;
|
||||
|
||||
/**
|
||||
* Value of $child->type, used to determine which ChildDef to use,
|
||||
* used in combination with $content_model.
|
||||
* @public
|
||||
*/
|
||||
var $content_model_type;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Lookup table of tags that close this tag. Used during parsing
|
||||
* to make sure we don't attempt to nest unclosed tags.
|
||||
* @public
|
||||
*/
|
||||
var $auto_close = array();
|
||||
|
||||
/**
|
||||
* Does the element have a content model (#PCDATA | Inline)*? This
|
||||
* is important for chameleon ins and del processing in
|
||||
* HTMLPurifier_ChildDef_Chameleon. Dynamically set: modules don't
|
||||
* have to worry about this one.
|
||||
* @public
|
||||
*/
|
||||
var $descendants_are_inline;
|
||||
|
||||
/**
|
||||
* Lookup table of tags excluded from all descendants of this tag.
|
||||
* @public
|
||||
*/
|
||||
var $excludes = array();
|
||||
|
||||
/**
|
||||
* Merges the values of another element definition into this one.
|
||||
* Values from the new element def take precedence if a value is
|
||||
* not mergeable.
|
||||
*/
|
||||
function mergeIn($def) {
|
||||
|
||||
// later keys takes precedence
|
||||
foreach($def->attr as $k => $v) {
|
||||
if ($k === 0) {
|
||||
// merge in the includes
|
||||
// sorry, no way to override an include
|
||||
foreach ($v as $v2) {
|
||||
$def->attr[0][] = $v2;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
$this->attr[$k] = $v;
|
||||
}
|
||||
foreach($def->attr_transform_pre as $k => $v) $this->attr_transform_pre[$k] = $v;
|
||||
foreach($def->attr_transform_post as $k => $v) $this->attr_transform_post[$k] = $v;
|
||||
foreach($def->auto_close as $k => $v) $this->auto_close[$k] = $v;
|
||||
foreach($def->excludes as $k => $v) $this->excludes[$k] = $v;
|
||||
|
||||
if(!is_null($def->child)) $this->child = $def->child;
|
||||
if(!empty($def->content_model)) $this->content_model .= ' | ' . $def->content_model;
|
||||
if(!empty($def->content_model_type)) $this->content_model_type = $def->content_model_type;
|
||||
if(!is_null($def->descendants_are_inline)) $this->descendants_are_inline = $def->descendants_are_inline;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -9,13 +9,13 @@ class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter
|
||||
|
||||
function preFilter($html, $config, &$context) {
|
||||
$pre_regex = '#<object[^>]+>.+?'.
|
||||
'http://www.youtube.com/v/([A-Za-z0-9]+).+?</object>#';
|
||||
'http://www.youtube.com/v/([A-Za-z0-9\-_]+).+?</object>#s';
|
||||
$pre_replace = '<span class="youtube-embed">\1</span>';
|
||||
return preg_replace($pre_regex, $pre_replace, $html);
|
||||
}
|
||||
|
||||
function postFilter($html, $config, &$context) {
|
||||
$post_regex = '#<span class="youtube-embed">([A-Za-z0-9]+)</span>#';
|
||||
$post_regex = '#<span class="youtube-embed">([A-Za-z0-9\-_]+)</span>#';
|
||||
$post_replace = '<object width="425" height="350" '.
|
||||
'data="http://www.youtube.com/v/\1">'.
|
||||
'<param name="movie" value="http://www.youtube.com/v/\1"></param>'.
|
||||
|
@@ -1,46 +1,12 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Enum.php';
|
||||
require_once 'HTMLPurifier/AttrDef/ID.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Class.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Text.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Lang.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Pixels.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Length.php';
|
||||
require_once 'HTMLPurifier/AttrDef/MultiLength.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Integer.php';
|
||||
require_once 'HTMLPurifier/AttrDef/URI.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS.php';
|
||||
require_once 'HTMLPurifier/AttrTransform.php';
|
||||
require_once 'HTMLPurifier/AttrTransform/Lang.php';
|
||||
require_once 'HTMLPurifier/AttrTransform/TextAlign.php';
|
||||
require_once 'HTMLPurifier/AttrTransform/BdoDir.php';
|
||||
require_once 'HTMLPurifier/AttrTransform/ImgRequired.php';
|
||||
require_once 'HTMLPurifier/ChildDef.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Chameleon.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Empty.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Required.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Optional.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Table.php';
|
||||
require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
|
||||
require_once 'HTMLPurifier/Generator.php';
|
||||
require_once 'HTMLPurifier/Token.php';
|
||||
require_once 'HTMLPurifier/TagTransform.php';
|
||||
// components
|
||||
require_once 'HTMLPurifier/HTMLModuleManager.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'HTML', 'EnableAttrID', false, 'bool',
|
||||
'Allows the ID attribute in HTML. This is disabled by default '.
|
||||
'due to the fact that without proper configuration user input can '.
|
||||
'easily break the validation of a webpage by specifying an ID that is '.
|
||||
'already on the surrounding HTML. If you don\'t mind throwing caution to '.
|
||||
'the wind, enable this directive, but I strongly recommend you also '.
|
||||
'consider blacklisting IDs you use (%Attr.IDBlacklist) or prefixing all '.
|
||||
'user supplied IDs (%Attr.IDPrefix). This directive has been available '.
|
||||
'since 1.2.0, and when set to true reverts to the behavior of pre-1.2.0 '.
|
||||
'versions.'
|
||||
);
|
||||
// this definition and its modules MUST NOT define configuration directives
|
||||
// outside of the HTML or Attr namespaces
|
||||
|
||||
// will be superceded by more accurate doctype declaration schemes
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'HTML', 'Strict', false, 'bool',
|
||||
'Determines whether or not to use Transitional (loose) or Strict rulesets. '.
|
||||
@@ -91,33 +57,31 @@ HTMLPurifier_ConfigSchema::define(
|
||||
'IDs at all. This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'DisableURI', false, 'bool',
|
||||
'Disables all URIs in all forms. Not sure why you\'d want to do that '.
|
||||
'(after all, the Internet\'s founded on the notion of a hyperlink). '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Defines the purified HTML type with large amounts of objects.
|
||||
* Definition of the purified HTML that describes allowed children,
|
||||
* attributes, and many other things.
|
||||
*
|
||||
* The main function of this object is its $info array, which is an
|
||||
* associative array of all the child and attribute definitions for
|
||||
* each allowed element. It also contains special use information (always
|
||||
* prefixed by info) for intelligent tag closing and global attributes.
|
||||
* Conventions:
|
||||
*
|
||||
* For optimization, the definition generation may be moved to
|
||||
* a maintenance script and stipulate that definition be created
|
||||
* by a factory method that unserializes a serialized version of Definition.
|
||||
* Customization would entail copying the maintenance script, making the
|
||||
* necessary changes, generating the serialized object, and then hooking it
|
||||
* in via the factory method. We would also offer a LiveDefinition for
|
||||
* automatic recompilation, suggesting that we would have a DefinitionGenerator.
|
||||
* All member variables that are prefixed with info
|
||||
* (including the main $info array) are used by HTML Purifier internals
|
||||
* and should not be directly edited when customizing the HTMLDefinition.
|
||||
* They can usually be set via configuration directives or custom
|
||||
* modules.
|
||||
*
|
||||
* On the other hand, member variables without the info prefix are used
|
||||
* internally by the HTMLDefinition and MUST NOT be used by other HTML
|
||||
* Purifier internals. Many of them, however, are public, and may be
|
||||
* edited by userspace code to tweak the behavior of HTMLDefinition.
|
||||
*
|
||||
* HTMLPurifier_Printer_HTMLDefinition is a notable exception to this
|
||||
* rule: in the interest of comprehensiveness, it will sniff everything.
|
||||
*/
|
||||
|
||||
class HTMLPurifier_HTMLDefinition
|
||||
{
|
||||
|
||||
/** FULLY-PUBLIC VARIABLES */
|
||||
|
||||
/**
|
||||
* Associative array of element names to HTMLPurifier_ElementDef
|
||||
* @public
|
||||
@@ -157,499 +121,169 @@ class HTMLPurifier_HTMLDefinition
|
||||
var $info_tag_transform = array();
|
||||
|
||||
/**
|
||||
* List of HTMLPurifier_AttrTransform to be performed before validation.
|
||||
* Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
|
||||
* @public
|
||||
*/
|
||||
var $info_attr_transform_pre = array();
|
||||
|
||||
/**
|
||||
* List of HTMLPurifier_AttrTransform to be performed after validation/
|
||||
* Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
|
||||
* @public
|
||||
*/
|
||||
var $info_attr_transform_post = array();
|
||||
|
||||
/**
|
||||
* Lookup table of flow elements
|
||||
* Nested lookup array of content set name (Block, Inline) to
|
||||
* element name to whether or not it belongs in that content set.
|
||||
* @public
|
||||
*/
|
||||
var $info_flow_elements = array();
|
||||
var $info_content_sets = array();
|
||||
|
||||
|
||||
|
||||
/** PUBLIC BUT INTERNAL VARIABLES */
|
||||
|
||||
var $setup = false; /**< Has setup() been called yet? */
|
||||
var $config; /**< Temporary instance of HTMLPurifier_Config */
|
||||
|
||||
var $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
|
||||
|
||||
/**
|
||||
* Boolean is a strict definition?
|
||||
* @public
|
||||
* Performs low-cost, preliminary initialization.
|
||||
* @param $config Instance of HTMLPurifier_Config
|
||||
*/
|
||||
var $strict;
|
||||
function HTMLPurifier_HTMLDefinition(&$config) {
|
||||
$this->config =& $config;
|
||||
$this->manager = new HTMLPurifier_HTMLModuleManager();
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the definition, the meat of the class.
|
||||
* Processes internals into form usable by HTMLPurifier internals.
|
||||
* Modifying the definition after calling this function should not
|
||||
* be done.
|
||||
*/
|
||||
function setup($config) {
|
||||
|
||||
// some cached config values
|
||||
$this->strict = $config->get('HTML', 'Strict');
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// info[] : initializes the definition objects
|
||||
|
||||
// if you attempt to define rules later on for a tag not in this array
|
||||
// PHP will create an stdclass
|
||||
|
||||
$allowed_tags =
|
||||
array(
|
||||
'ins', 'del', 'blockquote', 'dd', 'li', 'div', 'em', 'strong',
|
||||
'dfn', 'code', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym',
|
||||
'q', 'sub', 'tt', 'sup', 'i', 'b', 'big', 'small',
|
||||
'bdo', 'span', 'dt', 'p', 'h1', 'h2', 'h3', 'h4',
|
||||
'h5', 'h6', 'ol', 'ul', 'dl', 'address', 'img', 'br', 'hr',
|
||||
'pre', 'a', 'table', 'caption', 'thead', 'tfoot', 'tbody',
|
||||
'colgroup', 'col', 'td', 'th', 'tr'
|
||||
);
|
||||
|
||||
if (!$this->strict) {
|
||||
$allowed_tags[] = 'u';
|
||||
$allowed_tags[] = 's';
|
||||
$allowed_tags[] = 'strike';
|
||||
}
|
||||
|
||||
foreach ($allowed_tags as $tag) {
|
||||
$this->info[$tag] = new HTMLPurifier_ElementDef();
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// info[]->child : defines allowed children for elements
|
||||
|
||||
// emulates the structure of the DTD
|
||||
// however, these are condensed, with bad stuff taken out
|
||||
// screening process was done by hand
|
||||
|
||||
// entities: prefixed with e_ and _ replaces . from DTD
|
||||
// double underlines are entities we made up
|
||||
|
||||
// we don't use an array because that complicates interpolation
|
||||
// strings are used instead of arrays because if you use arrays,
|
||||
// you have to do some hideous manipulation with array_merge()
|
||||
|
||||
// todo: determine whether or not having allowed children
|
||||
// that aren't allowed globally affects security (it shouldn't)
|
||||
// if above works out, extend children definitions to include all
|
||||
// possible elements (allowed elements will dictate which ones
|
||||
// get dropped
|
||||
|
||||
$e_special_extra = 'img';
|
||||
$e_special_basic = 'br | span | bdo';
|
||||
$e_special = "$e_special_basic | $e_special_extra";
|
||||
$e_fontstyle_extra = 'big | small';
|
||||
$e_fontstyle_basic = 'tt | i | b | u | s | strike';
|
||||
$e_fontstyle = "$e_fontstyle_basic | $e_fontstyle_extra";
|
||||
$e_phrase_extra = 'sub | sup';
|
||||
$e_phrase_basic = 'em | strong | dfn | code | q | samp | kbd | var'.
|
||||
' | cite | abbr | acronym';
|
||||
$e_phrase = "$e_phrase_basic | $e_phrase_extra";
|
||||
$e_misc_inline = 'ins | del';
|
||||
$e_misc = "$e_misc_inline";
|
||||
$e_inline = "a | $e_special | $e_fontstyle | $e_phrase";
|
||||
// pseudo-property we created for convenience, see later on
|
||||
$e__inline = "#PCDATA | $e_inline | $e_misc_inline";
|
||||
// note the casing
|
||||
$e_Inline = new HTMLPurifier_ChildDef_Optional($e__inline);
|
||||
$e_heading = 'h1|h2|h3|h4|h5|h6';
|
||||
$e_lists = 'ul | ol | dl';
|
||||
$e_blocktext = 'pre | hr | blockquote | address';
|
||||
$e_block = "p | $e_heading | div | $e_lists | $e_blocktext | table";
|
||||
$e_Block = new HTMLPurifier_ChildDef_Optional($e_block);
|
||||
$e__flow = "#PCDATA | $e_block | $e_inline | $e_misc";
|
||||
$e_Flow = new HTMLPurifier_ChildDef_Optional($e__flow);
|
||||
$e_a_content = new HTMLPurifier_ChildDef_Optional("#PCDATA".
|
||||
" | $e_special | $e_fontstyle | $e_phrase | $e_misc_inline");
|
||||
$e_pre_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | a".
|
||||
" | $e_special_basic | $e_fontstyle_basic | $e_phrase_basic".
|
||||
" | $e_misc_inline");
|
||||
$e_form_content = new HTMLPurifier_ChildDef_Optional('');//unused
|
||||
$e_form_button_content = new HTMLPurifier_ChildDef_Optional('');//unused
|
||||
|
||||
$this->info['ins']->child =
|
||||
$this->info['del']->child =
|
||||
new HTMLPurifier_ChildDef_Chameleon($e__inline, $e__flow);
|
||||
|
||||
$this->info['dd']->child =
|
||||
$this->info['li']->child =
|
||||
$this->info['div']->child = $e_Flow;
|
||||
|
||||
if ($this->strict) {
|
||||
$this->info['blockquote']->child = new HTMLPurifier_ChildDef_StrictBlockquote();
|
||||
} else {
|
||||
$this->info['blockquote']->child = $e_Flow;
|
||||
}
|
||||
|
||||
$this->info['caption']->child =
|
||||
$this->info['em']->child =
|
||||
$this->info['strong']->child =
|
||||
$this->info['dfn']->child =
|
||||
$this->info['code']->child =
|
||||
$this->info['samp']->child =
|
||||
$this->info['kbd']->child =
|
||||
$this->info['var']->child =
|
||||
$this->info['cite']->child =
|
||||
$this->info['abbr']->child =
|
||||
$this->info['acronym']->child =
|
||||
$this->info['q']->child =
|
||||
$this->info['sub']->child =
|
||||
$this->info['tt']->child =
|
||||
$this->info['sup']->child =
|
||||
$this->info['i']->child =
|
||||
$this->info['b']->child =
|
||||
$this->info['big']->child =
|
||||
$this->info['small']->child=
|
||||
$this->info['bdo']->child =
|
||||
$this->info['span']->child =
|
||||
$this->info['dt']->child =
|
||||
$this->info['p']->child =
|
||||
$this->info['h1']->child =
|
||||
$this->info['h2']->child =
|
||||
$this->info['h3']->child =
|
||||
$this->info['h4']->child =
|
||||
$this->info['h5']->child =
|
||||
$this->info['h6']->child = $e_Inline;
|
||||
|
||||
if (!$this->strict) {
|
||||
$this->info['u']->child =
|
||||
$this->info['s']->child =
|
||||
$this->info['strike']->child = $e_Inline;
|
||||
}
|
||||
|
||||
// the only three required definitions, besides custom table code
|
||||
$this->info['ol']->child =
|
||||
$this->info['ul']->child = new HTMLPurifier_ChildDef_Required('li');
|
||||
|
||||
$this->info['dl']->child = new HTMLPurifier_ChildDef_Required('dt|dd');
|
||||
|
||||
if ($this->strict) {
|
||||
$this->info['address']->child = $e_Inline;
|
||||
} else {
|
||||
$this->info['address']->child =
|
||||
new HTMLPurifier_ChildDef_Optional("#PCDATA | p | $e_inline".
|
||||
" | $e_misc_inline");
|
||||
}
|
||||
|
||||
$this->info['img']->child =
|
||||
$this->info['br']->child =
|
||||
$this->info['hr']->child = new HTMLPurifier_ChildDef_Empty();
|
||||
|
||||
$this->info['pre']->child = $e_pre_content;
|
||||
|
||||
$this->info['a']->child = $e_a_content;
|
||||
|
||||
$this->info['table']->child = new HTMLPurifier_ChildDef_Table();
|
||||
|
||||
// not a real entity, watch the double underscore
|
||||
$e__row = new HTMLPurifier_ChildDef_Required('tr');
|
||||
$this->info['thead']->child = $e__row;
|
||||
$this->info['tfoot']->child = $e__row;
|
||||
$this->info['tbody']->child = $e__row;
|
||||
$this->info['colgroup']->child = new HTMLPurifier_ChildDef_Optional('col');
|
||||
$this->info['col']->child = new HTMLPurifier_ChildDef_Empty();
|
||||
$this->info['tr']->child = new HTMLPurifier_ChildDef_Required('th | td');
|
||||
$this->info['th']->child = $e_Flow;
|
||||
$this->info['td']->child = $e_Flow;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// info[]->type : defines the type of the element (block or inline)
|
||||
|
||||
// reuses $e_Inline and $e_Block
|
||||
foreach ($e_Inline->elements as $name => $bool) {
|
||||
if ($name == '#PCDATA') continue;
|
||||
if (!isset($this->info[$name])) continue;
|
||||
$this->info[$name]->type = 'inline';
|
||||
}
|
||||
|
||||
foreach ($e_Block->elements as $name => $bool) {
|
||||
if (!isset($this->info[$name])) continue;
|
||||
$this->info[$name]->type = 'block';
|
||||
}
|
||||
|
||||
foreach ($e_Flow->elements as $name => $bool) {
|
||||
$this->info_flow_elements[$name] = true;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// info[]->excludes : defines elements that aren't allowed in here
|
||||
|
||||
// make sure you test using isset() and not !empty()
|
||||
|
||||
$this->info['a']->excludes = array('a' => true);
|
||||
$this->info['pre']->excludes = array_flip(array('img', 'big', 'small',
|
||||
// technically useless, but good to be indepth
|
||||
'object', 'applet', 'font', 'basefont'));
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// info[]->attr : defines allowed attributes for elements
|
||||
|
||||
// this doesn't include REQUIRED declarations, those are handled
|
||||
// by the transform classes. It will, however, do simple and slightly
|
||||
// complex attribute value substitution
|
||||
|
||||
// the question of varying allowed attributes is more entangling.
|
||||
|
||||
$e_Text = new HTMLPurifier_AttrDef_Text();
|
||||
|
||||
// attrs, included in almost every single one except for a few,
|
||||
// which manually override these in their local definitions
|
||||
$this->info_global_attr = array(
|
||||
// core attrs
|
||||
'class' => new HTMLPurifier_AttrDef_Class(),
|
||||
'title' => $e_Text,
|
||||
'style' => new HTMLPurifier_AttrDef_CSS(),
|
||||
// i18n
|
||||
'dir' => new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false),
|
||||
'lang' => new HTMLPurifier_AttrDef_Lang(),
|
||||
'xml:lang' => new HTMLPurifier_AttrDef_Lang(),
|
||||
);
|
||||
|
||||
if ($config->get('HTML', 'EnableAttrID')) {
|
||||
$this->info_global_attr['id'] = new HTMLPurifier_AttrDef_ID();
|
||||
}
|
||||
|
||||
// required attribute stipulation handled in attribute transformation
|
||||
$this->info['bdo']->attr = array(); // nothing else
|
||||
|
||||
$this->info['br']->attr['dir'] = false;
|
||||
$this->info['br']->attr['lang'] = false;
|
||||
$this->info['br']->attr['xml:lang'] = false;
|
||||
|
||||
$this->info['td']->attr['abbr'] = $e_Text;
|
||||
$this->info['th']->attr['abbr'] = $e_Text;
|
||||
|
||||
$this->setAttrForTableElements('align', new HTMLPurifier_AttrDef_Enum(
|
||||
array('left', 'center', 'right', 'justify', 'char'), false));
|
||||
|
||||
$this->setAttrForTableElements('valign', new HTMLPurifier_AttrDef_Enum(
|
||||
array('top', 'middle', 'bottom', 'baseline'), false));
|
||||
|
||||
$this->info['img']->attr['alt'] = $e_Text;
|
||||
|
||||
$e_TFrame = new HTMLPurifier_AttrDef_Enum(array('void', 'above',
|
||||
'below', 'hsides', 'lhs', 'rhs', 'vsides', 'box', 'border'), false);
|
||||
$this->info['table']->attr['frame'] = $e_TFrame;
|
||||
|
||||
$e_TRules = new HTMLPurifier_AttrDef_Enum(array('none', 'groups',
|
||||
'rows', 'cols', 'all'), false);
|
||||
$this->info['table']->attr['rules'] = $e_TRules;
|
||||
|
||||
$this->info['table']->attr['summary'] = $e_Text;
|
||||
|
||||
$this->info['table']->attr['border'] =
|
||||
new HTMLPurifier_AttrDef_Pixels();
|
||||
|
||||
$e_Length = new HTMLPurifier_AttrDef_Length();
|
||||
$this->info['table']->attr['cellpadding'] =
|
||||
$this->info['table']->attr['cellspacing'] =
|
||||
$this->info['table']->attr['width'] =
|
||||
$this->info['img']->attr['height'] =
|
||||
$this->info['img']->attr['width'] = $e_Length;
|
||||
$this->setAttrForTableElements('charoff', $e_Length);
|
||||
|
||||
$e_MultiLength = new HTMLPurifier_AttrDef_MultiLength();
|
||||
$this->info['col']->attr['width'] =
|
||||
$this->info['colgroup']->attr['width'] = $e_MultiLength;
|
||||
|
||||
$e__NumberSpan = new HTMLPurifier_AttrDef_Integer(false, false, true);
|
||||
$this->info['colgroup']->attr['span'] =
|
||||
$this->info['col']->attr['span'] =
|
||||
$this->info['td']->attr['rowspan'] =
|
||||
$this->info['th']->attr['rowspan'] =
|
||||
$this->info['td']->attr['colspan'] =
|
||||
$this->info['th']->attr['colspan'] = $e__NumberSpan;
|
||||
|
||||
if (!$config->get('Attr', 'DisableURI')) {
|
||||
$e_URI = new HTMLPurifier_AttrDef_URI();
|
||||
$this->info['a']->attr['href'] =
|
||||
$this->info['img']->attr['longdesc'] =
|
||||
$this->info['del']->attr['cite'] =
|
||||
$this->info['ins']->attr['cite'] =
|
||||
$this->info['blockquote']->attr['cite'] =
|
||||
$this->info['q']->attr['cite'] = $e_URI;
|
||||
|
||||
// URI that causes HTTP request
|
||||
$this->info['img']->attr['src'] = new HTMLPurifier_AttrDef_URI(true);
|
||||
}
|
||||
|
||||
if (!$this->strict) {
|
||||
$this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
|
||||
$this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// info_tag_transform : transformations of tags
|
||||
|
||||
$this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
|
||||
$this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
|
||||
$this->info_tag_transform['dir'] = new HTMLPurifier_TagTransform_Simple('ul');
|
||||
$this->info_tag_transform['center'] = new HTMLPurifier_TagTransform_Center();
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// info[]->auto_close : tags that automatically close another
|
||||
|
||||
// todo: determine whether or not SGML-like modeling based on
|
||||
// mandatory/optional end tags would be a better policy
|
||||
|
||||
// make sure you test using isset() not !empty()
|
||||
|
||||
// these are all block elements: blocks aren't allowed in P
|
||||
$this->info['p']->auto_close = array_flip(array(
|
||||
'address', 'blockquote', 'dd', 'dir', 'div', 'dl', 'dt',
|
||||
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'ol', 'p', 'pre',
|
||||
'table', 'ul'
|
||||
));
|
||||
|
||||
$this->info['li']->auto_close = array('li' => true);
|
||||
|
||||
// we need TABLE and heading mismatch code
|
||||
// we may need to make this more flexible for heading mismatch,
|
||||
// or we can just create another info
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// info[]->attr_transform_* : attribute transformations in elements
|
||||
// pre is applied before any validation is done, post is done after
|
||||
|
||||
$this->info['h1']->attr_transform_pre[] =
|
||||
$this->info['h2']->attr_transform_pre[] =
|
||||
$this->info['h3']->attr_transform_pre[] =
|
||||
$this->info['h4']->attr_transform_pre[] =
|
||||
$this->info['h5']->attr_transform_pre[] =
|
||||
$this->info['h6']->attr_transform_pre[] =
|
||||
$this->info['p'] ->attr_transform_pre[] =
|
||||
new HTMLPurifier_AttrTransform_TextAlign();
|
||||
|
||||
$this->info['bdo']->attr_transform_post[] =
|
||||
new HTMLPurifier_AttrTransform_BdoDir();
|
||||
|
||||
$this->info['img']->attr_transform_post[] =
|
||||
new HTMLPurifier_AttrTransform_ImgRequired();
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// info_attr_transform_* : global attribute transformation that is
|
||||
// unconditionally called. Good for transformations that have complex
|
||||
// start conditions
|
||||
// pre is applied before any validation is done, post is done after
|
||||
|
||||
$this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang();
|
||||
|
||||
// protect against stdclasses floating around
|
||||
foreach ($this->info as $key => $obj) {
|
||||
if ($obj instanceof stdClass) {
|
||||
unset($this->info[$key]);
|
||||
function setup() {
|
||||
|
||||
// multiple call guard
|
||||
if ($this->setup) {return;} else {$this->setup = true;}
|
||||
|
||||
$this->processModules();
|
||||
$this->setupConfigStuff();
|
||||
|
||||
unset($this->config);
|
||||
unset($this->manager);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract out the information from the manager
|
||||
*/
|
||||
function processModules() {
|
||||
|
||||
$this->manager->setup($this->config);
|
||||
|
||||
foreach ($this->manager->activeModules as $module) {
|
||||
foreach($module->info_tag_transform as $k => $v) {
|
||||
if ($v === false) unset($this->info_tag_transform[$k]);
|
||||
else $this->info_tag_transform[$k] = $v;
|
||||
}
|
||||
foreach($module->info_attr_transform_pre as $k => $v) {
|
||||
if ($v === false) unset($this->info_attr_transform_pre[$k]);
|
||||
else $this->info_attr_transform_pre[$k] = $v;
|
||||
}
|
||||
foreach($module->info_attr_transform_post as $k => $v) {
|
||||
if ($v === false) unset($this->info_attr_transform_post[$k]);
|
||||
else $this->info_attr_transform_post[$k] = $v;
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// info_block_wrapper : wraps inline elements in block context
|
||||
$this->info = $this->manager->getElements($this->config);
|
||||
$this->info_content_sets = $this->manager->contentSets->lookup;
|
||||
|
||||
$block_wrapper = $config->get('HTML', 'BlockWrapper');
|
||||
if (isset($e_Block->elements[$block_wrapper])) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets up stuff based on config. We need a better way of doing this.
|
||||
*/
|
||||
function setupConfigStuff() {
|
||||
|
||||
$block_wrapper = $this->config->get('HTML', 'BlockWrapper');
|
||||
if (isset($this->info_content_sets['Block'][$block_wrapper])) {
|
||||
$this->info_block_wrapper = $block_wrapper;
|
||||
} else {
|
||||
trigger_error('Cannot use non-block element as block wrapper.',
|
||||
E_USER_ERROR);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// info_parent : parent element of the HTML fragment
|
||||
|
||||
$parent = $config->get('HTML', 'Parent');
|
||||
if (isset($this->info[$parent])) {
|
||||
$parent = $this->config->get('HTML', 'Parent');
|
||||
$def = $this->manager->getElement($parent, $this->config);
|
||||
if ($def) {
|
||||
$this->info_parent = $parent;
|
||||
$this->info_parent_def = $def;
|
||||
} else {
|
||||
trigger_error('Cannot use unrecognized element as parent.',
|
||||
E_USER_ERROR);
|
||||
$this->info_parent_def = $this->manager->getElement(
|
||||
$this->info_parent, $this->config);
|
||||
}
|
||||
$this->info_parent_def = $this->info[$this->info_parent];
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// %HTML.Allowed(Elements|Attributes) : cut non-allowed elements
|
||||
// support template text
|
||||
$support = "(for information on implementing this, see the ".
|
||||
"support forums) ";
|
||||
|
||||
$allowed_elements = $config->get('HTML', 'AllowedElements');
|
||||
// setup allowed elements, SubtractiveWhitelist module
|
||||
$allowed_elements = $this->config->get('HTML', 'AllowedElements');
|
||||
if (is_array($allowed_elements)) {
|
||||
foreach ($this->info as $name => $d) {
|
||||
if(!isset($allowed_elements[$name])) unset($this->info[$name]);
|
||||
unset($allowed_elements[$name]);
|
||||
}
|
||||
// emit errors
|
||||
foreach ($allowed_elements as $element => $d) {
|
||||
trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
|
||||
}
|
||||
}
|
||||
$allowed_attributes = $config->get('HTML', 'AllowedAttributes');
|
||||
|
||||
$allowed_attributes = $this->config->get('HTML', 'AllowedAttributes');
|
||||
$allowed_attributes_mutable = $allowed_attributes; // by copy!
|
||||
if (is_array($allowed_attributes)) {
|
||||
foreach ($this->info_global_attr as $attr_key => $info) {
|
||||
if (!isset($allowed_attributes["*.$attr_key"])) {
|
||||
unset($this->info_global_attr[$attr_key]);
|
||||
} elseif (isset($allowed_attributes_mutable["*.$attr_key"])) {
|
||||
unset($allowed_attributes_mutable["*.$attr_key"]);
|
||||
}
|
||||
}
|
||||
foreach ($this->info as $tag => $info) {
|
||||
foreach ($info->attr as $attr => $attr_info) {
|
||||
if (!isset($allowed_attributes["$tag.$attr"])) {
|
||||
if (!isset($allowed_attributes["$tag.$attr"]) &&
|
||||
!isset($allowed_attributes["*.$attr"])) {
|
||||
unset($this->info[$tag]->attr[$attr]);
|
||||
} else {
|
||||
if (isset($allowed_attributes_mutable["$tag.$attr"])) {
|
||||
unset($allowed_attributes_mutable["$tag.$attr"]);
|
||||
} elseif (isset($allowed_attributes_mutable["*.$attr"])) {
|
||||
unset($allowed_attributes_mutable["*.$attr"]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// emit errors
|
||||
foreach ($allowed_attributes_mutable as $elattr => $d) {
|
||||
list($element, $attribute) = explode('.', $elattr);
|
||||
if ($element == '*') {
|
||||
trigger_error("Global attribute '$attribute' is not ".
|
||||
"supported in any elements $support",
|
||||
E_USER_WARNING);
|
||||
} else {
|
||||
trigger_error("Attribute '$attribute' in element '$element' not supported $support",
|
||||
E_USER_WARNING);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
function setAttrForTableElements($attr, $def) {
|
||||
$this->info['col']->attr[$attr] =
|
||||
$this->info['colgroup']->attr[$attr] =
|
||||
$this->info['tbody']->attr[$attr] =
|
||||
$this->info['td']->attr[$attr] =
|
||||
$this->info['tfoot']->attr[$attr] =
|
||||
$this->info['th']->attr[$attr] =
|
||||
$this->info['thead']->attr[$attr] =
|
||||
$this->info['tr']->attr[$attr] = $def;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Structure that stores an element definition.
|
||||
*/
|
||||
class HTMLPurifier_ElementDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Associative array of attribute name to HTMLPurifier_AttrDef
|
||||
* @public
|
||||
*/
|
||||
var $attr = array();
|
||||
|
||||
/**
|
||||
* List of tag's HTMLPurifier_AttrTransform to be done before validation
|
||||
* @public
|
||||
*/
|
||||
var $attr_transform_pre = array();
|
||||
|
||||
/**
|
||||
* List of tag's HTMLPurifier_AttrTransform to be done after validation
|
||||
* @public
|
||||
*/
|
||||
var $attr_transform_post = array();
|
||||
|
||||
/**
|
||||
* Lookup table of tags that close this tag.
|
||||
* @public
|
||||
*/
|
||||
var $auto_close = array();
|
||||
|
||||
/**
|
||||
* HTMLPurifier_ChildDef of this tag.
|
||||
* @public
|
||||
*/
|
||||
var $child;
|
||||
|
||||
/**
|
||||
* Type of the tag: inline or block or unknown?
|
||||
* @public
|
||||
*/
|
||||
var $type = 'unknown';
|
||||
|
||||
/**
|
||||
* Lookup table of tags excluded from all descendants of this tag.
|
||||
* @public
|
||||
*/
|
||||
var $excludes = array();
|
||||
|
||||
}
|
||||
|
||||
|
125
library/HTMLPurifier/HTMLModule.php
Normal file
125
library/HTMLPurifier/HTMLModule.php
Normal file
@@ -0,0 +1,125 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Represents an XHTML 1.1 module, with information on elements, tags
|
||||
* and attributes.
|
||||
* @note Even though this is technically XHTML 1.1, it is also used for
|
||||
* regular HTML parsing. We are using modulization as a convenient
|
||||
* way to represent the internals of HTMLDefinition, and our
|
||||
* implementation is by no means conforming and does not directly
|
||||
* use the normative DTDs or XML schemas.
|
||||
* @note The public variables in a module should almost directly
|
||||
* correspond to the variables in HTMLPurifier_HTMLDefinition.
|
||||
* However, the prefix info carries no special meaning in these
|
||||
* objects (include it anyway if that's the correspondence though).
|
||||
*/
|
||||
|
||||
class HTMLPurifier_HTMLModule
|
||||
{
|
||||
/**
|
||||
* Short unique string identifier of the module
|
||||
*/
|
||||
var $name;
|
||||
|
||||
/**
|
||||
* Dynamically set integer that specifies when the module was loaded in.
|
||||
*/
|
||||
var $order;
|
||||
|
||||
/**
|
||||
* Informally, a list of elements this module changes. Not used in
|
||||
* any significant way.
|
||||
* @protected
|
||||
*/
|
||||
var $elements = array();
|
||||
|
||||
/**
|
||||
* Associative array of element names to element definitions.
|
||||
* Some definitions may be incomplete, to be merged in later
|
||||
* with the full definition.
|
||||
* @public
|
||||
*/
|
||||
var $info = array();
|
||||
|
||||
/**
|
||||
* Associative array of content set names to content set additions.
|
||||
* This is commonly used to, say, add an A element to the Inline
|
||||
* content set. This corresponds to an internal variable $content_sets
|
||||
* and NOT info_content_sets member variable of HTMLDefinition.
|
||||
* @public
|
||||
*/
|
||||
var $content_sets = array();
|
||||
|
||||
/**
|
||||
* Associative array of attribute collection names to attribute
|
||||
* collection additions. More rarely used for adding attributes to
|
||||
* the global collections. Example is the StyleAttribute module adding
|
||||
* the style attribute to the Core. Corresponds to HTMLDefinition's
|
||||
* attr_collections->info, since the object's data is only info,
|
||||
* with extra behavior associated with it.
|
||||
* @public
|
||||
*/
|
||||
var $attr_collections = array();
|
||||
|
||||
/**
|
||||
* Associative array of deprecated tag name to HTMLPurifier_TagTransform
|
||||
* @public
|
||||
*/
|
||||
var $info_tag_transform = array();
|
||||
|
||||
/**
|
||||
* List of HTMLPurifier_AttrTransform to be performed before validation.
|
||||
* @public
|
||||
*/
|
||||
var $info_attr_transform_pre = array();
|
||||
|
||||
/**
|
||||
* List of HTMLPurifier_AttrTransform to be performed after validation.
|
||||
* @public
|
||||
*/
|
||||
var $info_attr_transform_post = array();
|
||||
|
||||
/**
|
||||
* Boolean flag that indicates whether or not getChildDef is implemented.
|
||||
* For optimization reasons: may save a call to a function. Be sure
|
||||
* to set it if you do implement getChildDef(), otherwise it will have
|
||||
* no effect!
|
||||
* @public
|
||||
*/
|
||||
var $defines_child_def = false;
|
||||
|
||||
/**
|
||||
* Retrieves a proper HTMLPurifier_ChildDef subclass based on
|
||||
* content_model and content_model_type member variables of
|
||||
* the HTMLPurifier_ElementDef class. There is a similar function
|
||||
* in HTMLPurifier_HTMLDefinition.
|
||||
* @param $def HTMLPurifier_ElementDef instance
|
||||
* @return HTMLPurifier_ChildDef subclass
|
||||
* @public
|
||||
*/
|
||||
function getChildDef($def) {return false;}
|
||||
|
||||
/**
|
||||
* Hook method that lets module perform arbitrary operations on
|
||||
* HTMLPurifier_HTMLDefinition before the module gets processed.
|
||||
* @param $definition Reference to HTMLDefinition being setup
|
||||
*/
|
||||
function preProcess(&$definition) {}
|
||||
|
||||
/**
|
||||
* Hook method that lets module perform arbitrary operations
|
||||
* on HTMLPurifier_HTMLDefinition after the module gets processed.
|
||||
* @param $definition Reference to HTMLDefinition being setup
|
||||
*/
|
||||
function postProcess(&$definition) {}
|
||||
|
||||
/**
|
||||
* Hook method that is called when a module gets registered to
|
||||
* the definition.
|
||||
* @param $definition Reference to HTMLDefinition being setup
|
||||
*/
|
||||
function setup(&$definition) {}
|
||||
|
||||
}
|
||||
|
||||
?>
|
42
library/HTMLPurifier/HTMLModule/Bdo.php
Normal file
42
library/HTMLPurifier/HTMLModule/Bdo.php
Normal file
@@ -0,0 +1,42 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/HTMLModule.php';
|
||||
require_once 'HTMLPurifier/AttrTransform/BdoDir.php';
|
||||
|
||||
/**
|
||||
* XHTML 1.1 Bi-directional Text Module, defines elements that
|
||||
* declare directionality of content. Text Extension Module.
|
||||
*/
|
||||
class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
|
||||
var $name = 'Bdo';
|
||||
var $elements = array('bdo');
|
||||
var $content_sets = array('Inline' => 'bdo');
|
||||
var $attr_collections = array(
|
||||
'I18N' => array('dir' => false)
|
||||
);
|
||||
|
||||
function HTMLPurifier_HTMLModule_Bdo() {
|
||||
$dir = new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false);
|
||||
$this->attr_collections['I18N']['dir'] = $dir;
|
||||
$this->info['bdo'] = new HTMLPurifier_ElementDef();
|
||||
$this->info['bdo']->attr = array(
|
||||
0 => array('Core', 'Lang'),
|
||||
'dir' => $dir, // required
|
||||
// The Abstract Module specification has the attribute
|
||||
// inclusions wrong for bdo: bdo allows
|
||||
// xml:lang too (and we'll toss in lang for good measure,
|
||||
// though it is not allowed for XHTML 1.1, this will
|
||||
// be managed with a global attribute transform)
|
||||
);
|
||||
$this->info['bdo']->content_model = '#PCDATA | Inline';
|
||||
$this->info['bdo']->content_model_type = 'optional';
|
||||
// provides fallback behavior if dir's missing (dir is required)
|
||||
$this->info['bdo']->attr_transform_post['required-dir'] =
|
||||
new HTMLPurifier_AttrTransform_BdoDir();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
31
library/HTMLPurifier/HTMLModule/CommonAttributes.php
Normal file
31
library/HTMLPurifier/HTMLModule/CommonAttributes.php
Normal file
@@ -0,0 +1,31 @@
|
||||
<?php
|
||||
|
||||
class HTMLPurifier_HTMLModule_CommonAttributes extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
var $name = 'CommonAttributes';
|
||||
|
||||
var $attr_collections = array(
|
||||
'Core' => array(
|
||||
0 => array('Style'),
|
||||
// 'xml:space' => false,
|
||||
'class' => 'NMTOKENS',
|
||||
'id' => 'ID',
|
||||
'title' => 'CDATA',
|
||||
),
|
||||
'Lang' => array(
|
||||
'xml:lang' => false, // see constructor
|
||||
),
|
||||
'I18N' => array(
|
||||
0 => array('Lang'), // proprietary, for xml:lang/lang
|
||||
),
|
||||
'Common' => array(
|
||||
0 => array('Core', 'I18N')
|
||||
)
|
||||
);
|
||||
|
||||
function HTMLPurifier_HTMLModule_CommonAttributes() {
|
||||
$this->attr_collections['Lang']['xml:lang'] = new HTMLPurifier_AttrDef_Lang();
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
45
library/HTMLPurifier/HTMLModule/Edit.php
Normal file
45
library/HTMLPurifier/HTMLModule/Edit.php
Normal file
@@ -0,0 +1,45 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/HTMLModule.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Chameleon.php';
|
||||
|
||||
/**
|
||||
* XHTML 1.1 Edit Module, defines editing-related elements. Text Extension
|
||||
* Module.
|
||||
*/
|
||||
class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
|
||||
var $name = 'Edit';
|
||||
var $elements = array('del', 'ins');
|
||||
var $content_sets = array('Inline' => 'del | ins');
|
||||
|
||||
function HTMLPurifier_HTMLModule_Edit() {
|
||||
foreach ($this->elements as $element) {
|
||||
$this->info[$element] = new HTMLPurifier_ElementDef();
|
||||
$this->info[$element]->attr = array(
|
||||
0 => array('Common'),
|
||||
'cite' => 'URI',
|
||||
// 'datetime' => 'Datetime' // Datetime not implemented
|
||||
);
|
||||
// Inline context ! Block context (exclamation mark is
|
||||
// separator, see getChildDef for parsing)
|
||||
$this->info[$element]->content_model =
|
||||
'#PCDATA | Inline ! #PCDATA | Flow';
|
||||
// HTML 4.01 specifies that ins/del must not contain block
|
||||
// elements when used in an inline context, chameleon is
|
||||
// a complicated workaround to acheive this effect
|
||||
$this->info[$element]->content_model_type = 'chameleon';
|
||||
}
|
||||
}
|
||||
|
||||
var $defines_child_def = true;
|
||||
function getChildDef($def) {
|
||||
if ($def->content_model_type != 'chameleon') return false;
|
||||
$value = explode('!', $def->content_model);
|
||||
return new HTMLPurifier_ChildDef_Chameleon($value[0], $value[1]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
36
library/HTMLPurifier/HTMLModule/Hypertext.php
Normal file
36
library/HTMLPurifier/HTMLModule/Hypertext.php
Normal file
@@ -0,0 +1,36 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/HTMLModule.php';
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/LinkTypes.php';
|
||||
|
||||
/**
|
||||
* XHTML 1.1 Hypertext Module, defines hypertext links. Core Module.
|
||||
*/
|
||||
class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
|
||||
var $name = 'Hypertext';
|
||||
var $elements = array('a');
|
||||
var $content_sets = array('Inline' => 'a');
|
||||
|
||||
function HTMLPurifier_HTMLModule_Hypertext() {
|
||||
$this->info['a'] = new HTMLPurifier_ElementDef();
|
||||
$this->info['a']->attr = array(
|
||||
0 => array('Common'),
|
||||
// 'accesskey' => 'Character',
|
||||
// 'charset' => 'Charset',
|
||||
'href' => 'URI',
|
||||
//'hreflang' => 'LanguageCode',
|
||||
'rel' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rel'),
|
||||
'rev' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rev'),
|
||||
//'tabindex' => 'Number',
|
||||
//'type' => 'ContentType',
|
||||
);
|
||||
$this->info['a']->content_model = '#PCDATA | Inline';
|
||||
$this->info['a']->content_model_type = 'optional';
|
||||
$this->info['a']->excludes = array('a' => true);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
37
library/HTMLPurifier/HTMLModule/Image.php
Normal file
37
library/HTMLPurifier/HTMLModule/Image.php
Normal file
@@ -0,0 +1,37 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/HTMLModule.php';
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef/URI.php';
|
||||
require_once 'HTMLPurifier/AttrTransform/ImgRequired.php';
|
||||
|
||||
/**
|
||||
* XHTML 1.1 Image Module provides basic image embedding.
|
||||
* @note There is specialized code for removing empty images in
|
||||
* HTMLPurifier_Strategy_RemoveForeignElements
|
||||
*/
|
||||
class HTMLPurifier_HTMLModule_Image extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
|
||||
var $name = 'Image';
|
||||
var $elements = array('img');
|
||||
var $content_sets = array('Inline' => 'img');
|
||||
|
||||
function HTMLPurifier_HTMLModule_Image() {
|
||||
$this->info['img'] = new HTMLPurifier_ElementDef();
|
||||
$this->info['img']->attr = array(
|
||||
0 => array('Common'),
|
||||
'alt' => 'Text',
|
||||
'height' => 'Length',
|
||||
'longdesc' => 'URI',
|
||||
'src' => new HTMLPurifier_AttrDef_URI(true), // embedded
|
||||
'width' => 'Length'
|
||||
);
|
||||
$this->info['img']->content_model_type = 'empty';
|
||||
$this->info['img']->attr_transform_post[] =
|
||||
new HTMLPurifier_AttrTransform_ImgRequired();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
60
library/HTMLPurifier/HTMLModule/Legacy.php
Normal file
60
library/HTMLPurifier/HTMLModule/Legacy.php
Normal file
@@ -0,0 +1,60 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* XHTML 1.1 Legacy module defines elements that were previously
|
||||
* deprecated.
|
||||
*
|
||||
* @note Not all legacy elements have been implemented yet, which
|
||||
* is a bit of a reverse problem as compared to browsers! In
|
||||
* addition, this legacy module may implement a bit more than
|
||||
* mandated by XHTML 1.1.
|
||||
*
|
||||
* This module can be used in combination with TransformToStrict in order
|
||||
* to transform as many deprecated elements as possible, but retain
|
||||
* questionably deprecated elements that do not have good alternatives
|
||||
* as well as transform elements that don't have an implementation.
|
||||
* See docs/ref-strictness.txt for more details.
|
||||
*/
|
||||
|
||||
class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
|
||||
// incomplete
|
||||
|
||||
var $name = 'Legacy';
|
||||
var $elements = array('u', 's', 'strike');
|
||||
var $non_standalone_elements = array('li', 'ol', 'address', 'blockquote');
|
||||
|
||||
function HTMLPurifier_HTMLModule_Legacy() {
|
||||
// setup new elements
|
||||
foreach ($this->elements as $name) {
|
||||
$this->info[$name] = new HTMLPurifier_ElementDef();
|
||||
// for u, s, strike, as more elements get added, add
|
||||
// conditionals as necessary
|
||||
$this->info[$name]->content_model = 'Inline | #PCDATA';
|
||||
$this->info[$name]->content_model_type = 'optional';
|
||||
$this->info[$name]->attr[0] = array('Common');
|
||||
}
|
||||
|
||||
// setup modifications to old elements
|
||||
foreach ($this->non_standalone_elements as $name) {
|
||||
$this->info[$name] = new HTMLPurifier_ElementDef();
|
||||
$this->info[$name]->standalone = false;
|
||||
}
|
||||
|
||||
$this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
|
||||
$this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
|
||||
|
||||
$this->info['address']->content_model = 'Inline | #PCDATA | p';
|
||||
$this->info['address']->content_model_type = 'optional';
|
||||
$this->info['address']->child = false;
|
||||
|
||||
$this->info['blockquote']->content_model = 'Flow | #PCDATA';
|
||||
$this->info['blockquote']->content_model_type = 'optional';
|
||||
$this->info['blockquote']->child = false;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
46
library/HTMLPurifier/HTMLModule/List.php
Normal file
46
library/HTMLPurifier/HTMLModule/List.php
Normal file
@@ -0,0 +1,46 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/HTMLModule.php';
|
||||
|
||||
/**
|
||||
* XHTML 1.1 List Module, defines list-oriented elements. Core Module.
|
||||
*/
|
||||
class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
|
||||
{
|
||||
|
||||
var $name = 'List';
|
||||
var $elements = array('dl', 'dt', 'dd', 'ol', 'ul', 'li');
|
||||
|
||||
// According to the abstract schema, the List content set is a fully formed
|
||||
// one or more expr, but it invariably occurs in an optional declaration
|
||||
// so we're not going to do that subtlety. It might cause trouble
|
||||
// if a user defines "List" and expects that multiple lists are
|
||||
// allowed to be specified, but then again, that's not very intuitive.
|
||||
// Furthermore, the actual XML Schema may disagree. Regardless,
|
||||
// we don't have support for such nested expressions without using
|
||||
// the incredibly inefficient and draconic Custom ChildDef.
|
||||
var $content_sets = array('List' => 'dl | ol | ul', 'Flow' => 'List');
|
||||
|
||||
function HTMLPurifier_HTMLModule_List() {
|
||||
foreach ($this->elements as $element) {
|
||||
$this->info[$element] = new HTMLPurifier_ElementDef();
|
||||
$this->info[$element]->attr = array(0 => array('Common'));
|
||||
if ($element == 'li' || $element == 'dd') {
|
||||
$this->info[$element]->content_model = '#PCDATA | Flow';
|
||||
$this->info[$element]->content_model_type = 'optional';
|
||||
} elseif ($element == 'ol' || $element == 'ul') {
|
||||
$this->info[$element]->content_model = 'li';
|
||||
$this->info[$element]->content_model_type = 'required';
|
||||
}
|
||||
}
|
||||
$this->info['dt']->content_model = '#PCDATA | Inline';
|
||||
$this->info['dt']->content_model_type = 'optional';
|
||||
$this->info['dl']->content_model = 'dt | dd';
|
||||
$this->info['dl']->content_model_type = 'required';
|
||||
// this could be a LOT more robust
|
||||
$this->info['li']->auto_close = array('li' => true);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user