mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-04 05:07:55 +02:00
Compare commits
168 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
7e8c7d8c64 | ||
|
17d32bac7f | ||
|
e2babe5308 | ||
|
5f1a6b883f | ||
|
c5e3796202 | ||
|
72f1984229 | ||
|
918081b372 | ||
|
6c56dd070f | ||
|
299f93f8f0 | ||
|
4169846c57 | ||
|
aff4957531 | ||
|
e4bdf472a6 | ||
|
9a99750474 | ||
|
7eb751b5f5 | ||
|
0d0173eb6e | ||
|
556ed4ea90 | ||
|
cf445a6107 | ||
|
243ad45e59 | ||
|
31d0c621f5 | ||
|
0870974a25 | ||
|
5c4a0a6785 | ||
|
e55babdc53 | ||
|
6e1b540d99 | ||
|
edf20018f0 | ||
|
c09432e171 | ||
|
9c031b5c1e | ||
|
a827cbc3ba | ||
|
c05eebee15 | ||
|
93a69d020a | ||
|
f3fa9c01ba | ||
|
bae5b0c022 | ||
|
67befbc8a8 | ||
|
cac22f01cf | ||
|
94d2dbaa74 | ||
|
6add828bc8 | ||
|
800b67ed65 | ||
|
71e4ddd222 | ||
|
54a68a1713 | ||
|
bd544ad038 | ||
|
d5491da77f | ||
|
591fc0ae28 | ||
|
dac7ac1eae | ||
|
64ee756b7a | ||
|
e2103ce0f2 | ||
|
219902ebff | ||
|
21116373a7 | ||
|
5ed88809f3 | ||
|
bb8b38b1e0 | ||
|
236159242f | ||
|
9d8f839bf2 | ||
|
882148f9ad | ||
|
a863f62489 | ||
|
6478c7c2df | ||
|
129a4ea506 | ||
|
a122243a89 | ||
|
315c55eeb1 | ||
|
cfe50ff8ae | ||
|
d0018a2696 | ||
|
77d9e05a07 | ||
|
80243f377c | ||
|
43b157cf4d | ||
|
f6b50d4bfd | ||
|
806901cfd2 | ||
|
f90eef7f1f | ||
|
06867e14b6 | ||
|
bda2615b30 | ||
|
e1a5d10e75 | ||
|
98fd6b7d82 | ||
|
be264a4b20 | ||
|
01c85b71d2 | ||
|
2d22c0aa55 | ||
|
6e061f5184 | ||
|
44b988f1f6 | ||
|
0ead9558b4 | ||
|
159a1cced1 | ||
|
6871a54d64 | ||
|
96ac7e8797 | ||
|
2d49299621 | ||
|
ab5c782c77 | ||
|
8893b87e04 | ||
|
aeef746060 | ||
|
da13c6ac87 | ||
|
ccae73c25a | ||
|
8d6bfa4037 | ||
|
712d81ebea | ||
|
f7f6fed86a | ||
|
2293c67eec | ||
|
108df87824 | ||
|
5e366b25f8 | ||
|
2e16c4a968 | ||
|
a8db22dfff | ||
|
fbe2c25f8a | ||
|
158be61def | ||
|
d693c4ea09 | ||
|
c24916e1d6 | ||
|
a68b6afda1 | ||
|
78cf7db82e | ||
|
9b375fdfb8 | ||
|
0dd866cc15 | ||
|
ad1169c711 | ||
|
2816ae535f | ||
|
462d3ab72f | ||
|
cf1d868782 | ||
|
c705e17a58 | ||
|
1cce367950 | ||
|
61f852d429 | ||
|
3a73c2cf04 | ||
|
e75b676656 | ||
|
b53370efbf | ||
|
d60f345cab | ||
|
aefda60696 | ||
|
2ffa5d3135 | ||
|
23d3490d49 | ||
|
582ffc4143 | ||
|
d52189a19d | ||
|
02006d6e64 | ||
|
dcaa374dae | ||
|
e2cc37724b | ||
|
3ad6239dc3 | ||
|
663fb4e1b2 | ||
|
a909632d2d | ||
|
18f7c85ebe | ||
|
4c54283642 | ||
|
688b1833f5 | ||
|
ceb1b9ccdb | ||
|
52fb35b0bb | ||
|
b6e222cbc2 | ||
|
dcfd8f5641 | ||
|
48da08ab78 | ||
|
360f984f63 | ||
|
41a25cb6b8 | ||
|
a0fd6a9f5c | ||
|
66e1d2732a | ||
|
b73b5100fd | ||
|
d886ed59fd | ||
|
cbb492c52c | ||
|
4f8f022eac | ||
|
301b2585ae | ||
|
8e733a52fb | ||
|
2a01cf786e | ||
|
825b0671b5 | ||
|
4bdc0446de | ||
|
45a70e8ae4 | ||
|
1fe60c9b9d | ||
|
dc0e2c6b3e | ||
|
9bbbb87ffa | ||
|
b63b0be21f | ||
|
73a1e31fad | ||
|
775763c583 | ||
|
49cb2a4a7c | ||
|
61b6ee7183 | ||
|
d7ce6b4587 | ||
|
f67ee19f31 | ||
|
92b3f0e817 | ||
|
3c4da9666f | ||
|
925a07b828 | ||
|
94db380271 | ||
|
b9e7ba6a2f | ||
|
b1b3377b9c | ||
|
d8673539ab | ||
|
3b26e5dc5b | ||
|
c5ea987069 | ||
|
b152448608 | ||
|
b0575cb888 | ||
|
224ef774f7 | ||
|
18a83acc5d | ||
|
f9090e45c0 | ||
|
450523a9ca |
2
Doxyfile
2
Doxyfile
@@ -4,7 +4,7 @@
|
||||
# Project related configuration options
|
||||
#---------------------------------------------------------------------------
|
||||
PROJECT_NAME = HTML Purifier
|
||||
PROJECT_NUMBER = 1.2.0
|
||||
PROJECT_NUMBER = 1.5.0
|
||||
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
|
||||
CREATE_SUBDIRS = NO
|
||||
OUTPUT_LANGUAGE = English
|
||||
|
1
INSTALL
1
INSTALL
@@ -8,6 +8,7 @@ installation GUI, you've come to the wrong place!) The impatient can scroll
|
||||
down to the bottom of this INSTALL document to see the code, but you really
|
||||
should make sure a few things are properly done.
|
||||
|
||||
Todo: Convert to using the array syntax for configuration.
|
||||
|
||||
|
||||
1. Compatibility
|
||||
|
105
NEWS
105
NEWS
@@ -9,6 +9,111 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
. Internal change
|
||||
==========================
|
||||
|
||||
1.5.0, released 2007-03-23
|
||||
! Added a rudimentary I18N and L10N system modeled off MediaWiki. It
|
||||
doesn't actually do anything yet, but keep your eyes peeled.
|
||||
! docs/enduser-utf8.html explains how to use UTF-8 and HTML Purifier
|
||||
! Newly structured HTMLDefinition modeled off of XHTML 1.1 modules.
|
||||
I am loathe to release beta quality APIs, but this is exactly that;
|
||||
don't use the internal interfaces if you're not willing to do migration
|
||||
later on.
|
||||
- Allow 'x' subtag in language codes
|
||||
- Fixed buggy chameleon-support for ins and del
|
||||
. Added support for IDREF attributes (i.e. for)
|
||||
. Renamed HTMLPurifier_AttrDef_Class to HTMLPurifier_AttrDef_Nmtokens
|
||||
. Removed context variable ParentType, replaced with IsInline, which
|
||||
is false when you're not inline and an integer of the parent that
|
||||
caused you to become inline when you are (so possibly zero)
|
||||
. Removed ElementDef->type in favor of ElementDef->descendants_are_inline
|
||||
and HTMLDefinition->content_sets
|
||||
. StrictBlockquote now reports what elements its supposed to allow,
|
||||
rather than what it does allow
|
||||
. Removed HTMLDefinition->info_flow_elements in favor of
|
||||
HTMLDefinition->content_sets['Flow']
|
||||
. Removed redundant "exclusionary" definitions from DTD roster
|
||||
. StrictBlockquote now requires a construction parameter as if it
|
||||
were an Required ChildDef, this is the "real" set of allowed elements
|
||||
. AttrDef partitioned into HTML, CSS and URI segments
|
||||
. Modify Youtube filter regexp to be multiline
|
||||
. Require both PHP5 and DOM extension in order to use DOMLex, fixes
|
||||
some edge cases where a DOMDocument class exists in a PHP4 environment
|
||||
due to DOM XML extension.
|
||||
|
||||
1.4.1, released 2007-01-21
|
||||
! docs/enduser-youtube.html updated according to new functionality
|
||||
- YouTube IDs can have underscores and dashes
|
||||
|
||||
1.4.0, released 2007-01-21
|
||||
! Implemented list-style-image, URIs now allowed in list-style
|
||||
! Implemented background-image, background-repeat, background-attachment
|
||||
and background-position CSS properties. Shorthand property background
|
||||
supports all of these properties.
|
||||
! Configuration documentation looks nicer
|
||||
! Added %Core.EscapeNonASCIICharacters to workaround loss of Unicode
|
||||
characters while %Core.Encoding is set to a non-UTF-8 encoding.
|
||||
! Support for configuration directive aliases added
|
||||
! Config object can now be instantiated from ini files
|
||||
! YouTube preservation code added to the core, with two lines of code
|
||||
you can add it as a filter to your code. See smoketests/preserveYouTube.php
|
||||
for sample code.
|
||||
! Moved SLOW to docs/enduser-slow.html and added code examples
|
||||
- Replaced version check with functionality check for DOM (thanks Stephen
|
||||
Khoo)
|
||||
. Added smoketest 'all.php', which loads all other smoketests via frames
|
||||
. Implemented AttrDef_CSSURI for url(http://google.com) style declarations
|
||||
. Added convenient single test selector form on test runner
|
||||
|
||||
1.3.2, released 2006-12-25
|
||||
! HTMLPurifier object now accepts configuration arrays, no need to manually
|
||||
instantiate a configuration object
|
||||
! Context object now accessible to outside
|
||||
! Added enduser-youtube.html, explains how to embed YouTube videos. See
|
||||
also corresponding smoketest preserveYouTube.php.
|
||||
! Added purifyArray(), which takes a list of HTML and purifies it all
|
||||
! Added static member variable $version to HTML Purifier with PHP-compatible
|
||||
version number string.
|
||||
- Fixed fatal error thrown by upper-cased language attributes
|
||||
- printDefinition.php: added labels, added better clarification
|
||||
. HTMLPurifier_Config::create() added, takes mixed variable and converts into
|
||||
a HTMLPurifier_Config object.
|
||||
|
||||
1.3.1, released 2006-12-06
|
||||
! Added HTMLPurifier.func.php stub for a convenient function to call the library
|
||||
- Fixed bug in RemoveInvalidImg code that caused all images to be dropped
|
||||
(thanks to .mario for reporting this)
|
||||
. Standardized all attribute handling variables to attr, made it plural
|
||||
|
||||
1.3.0, released 2006-11-26
|
||||
# Invalid images are now removed, rather than replaced with a dud
|
||||
<img src="" alt="Invalid image" />. Previous behavior can be restored
|
||||
with new directive %Core.RemoveInvalidImg set to false.
|
||||
! (X)HTML Strict now supported
|
||||
+ Transparently handles inline elements in block context (blockquote)
|
||||
! Added GET method to demo for easier validation, added 50kb max input size
|
||||
! New directive %HTML.BlockWrapper, for block-ifying inline elements
|
||||
! New directive %HTML.Parent, allows you to only allow inline content
|
||||
! New directives %HTML.AllowedElements and %HTML.AllowedAttributes to let
|
||||
users narrow the set of allowed tags
|
||||
! <li value="4"> and <ul start="2"> now allowed in loose mode
|
||||
! New directives %URI.DisableExternalResources and %URI.DisableResources
|
||||
! New directive %Attr.DisableURI, which eliminates all hyperlinking
|
||||
! New directive %URI.Munge, munges URI so you can use some sort of redirector
|
||||
service to avoid PageRank leaks or warn users that they are exiting your site.
|
||||
! Added spiffy new smoketest printDefinition.php, which lets you twiddle with
|
||||
the configuration settings and see how the internal rules are affected.
|
||||
! New directive %URI.HostBlacklist for blocking links to bad hosts.
|
||||
xssAttacks.php smoketest updated accordingly.
|
||||
- Added missing type to ChildDef_Chameleon
|
||||
- Remove Tidy option from demo if there is not Tidy available
|
||||
. ChildDef_Required guards against empty tags
|
||||
. Lookup table HTMLDefinition->info_flow_elements added
|
||||
. Added peace-of-mind variable initialization to Strategy_FixNesting
|
||||
. Added HTMLPurifier->info_parent_def, parent child processing made special
|
||||
. Added internal documents briefly summarizing future progression of HTML
|
||||
. HTMLPurifier_Config->getBatch($namespace) added
|
||||
. More lenient casting to bool from string in HTMLPurifier_ConfigSchema
|
||||
. Refactored ChildDef classes into their own files
|
||||
|
||||
1.2.0, released 2006-11-19
|
||||
# ID attributes now disabled by default. New directives:
|
||||
+ %HTML.EnableAttrID - restores old behavior by allowing IDs
|
||||
|
25
README
25
README
@@ -1,13 +1,22 @@
|
||||
|
||||
README
|
||||
All about HTMLPurifier
|
||||
All about HTML Purifier
|
||||
|
||||
HTMLPurifier is an HTML filtering solution. It uses a unique combination of
|
||||
robust whitelists and agressive parsing to ensure that not only are XSS
|
||||
attacks thwarted, but the resulting HTML is standards compliant.
|
||||
HTML Purifier is an HTML filtering solution that uses a unique combination
|
||||
of robust whitelists and agressive parsing to ensure that not only are
|
||||
XSS attacks thwarted, but the resulting HTML is standards compliant.
|
||||
|
||||
See INSTALL on how to use the library. See docs/ for more developer-oriented
|
||||
documentation as well as some code examples. Users of TinyMCE or FCKeditor
|
||||
may be especially interested in WYSIWYG.
|
||||
HTML Purifier is oriented towards richly formatted documents from
|
||||
untrusted sources that require CSS and a full tag-set. This library can
|
||||
be configured to accept a more restrictive set of tags, but it won't be
|
||||
as efficient as more bare-bones parsers. It will, however, do the job
|
||||
right, which may be more important.
|
||||
|
||||
HTMLPurifier can be found on the web at: http://hp.jpsband.org/
|
||||
Places to go:
|
||||
|
||||
* See INSTALL for a quick installation guide
|
||||
* See docs/ for developer-oriented documentation, code examples and
|
||||
an in-depth installation guide.
|
||||
* See WYSIWYG for information on editors like TinyMCE and FCKeditor
|
||||
|
||||
HTML Purifier can be found on the web at: http://hp.jpsband.org/
|
||||
|
40
SLOW
40
SLOW
@@ -1,40 +0,0 @@
|
||||
|
||||
SLOW
|
||||
also known as the HELP ME LIBRARY IS TOO SLOW MY PAGE TAKE TOO LONG LOAD page
|
||||
|
||||
HTML Purifier is a very powerful library. But with power comes great
|
||||
responsibility, or, at least, longer execution times. Remember, this
|
||||
library isn't lightly grazing over submitted HTML: it's deconstructing
|
||||
the whole thing, rigorously checking the parts, and then putting it
|
||||
back together.
|
||||
|
||||
So, if it so turns out that HTML Purifier is kinda too slow for outbound
|
||||
filtering, you've got a few options:
|
||||
|
||||
1. Inbound filtering - perform filtering of HTML when it's submitted by the
|
||||
user. Since the user is already submitting something, an extra half a
|
||||
second tacked on to the load time probably isn't going to be that huge of
|
||||
a problem. Then, displaying the content is a simple a manner of outputting
|
||||
it directly from your database/filesystem. The trouble with this method is
|
||||
that your user loses the original text, and when doing edits, will be
|
||||
handling the filtered text. While this may be a good thing, especially if
|
||||
you're using a WYSIWYG editor, it can also result in data-loss if a user
|
||||
makes a typo.
|
||||
|
||||
2. Caching the filtered output - accept the submitted text and put it
|
||||
unaltered into the database, but then also generate a filtered version and
|
||||
stash that in the database. Serve the filtered version to readers, and the
|
||||
unaltered version to editors. If need be, you can invalidate the cache and
|
||||
have the cached filtered version be regenerated on the first page view. Pros?
|
||||
Full data retention. Cons? It's more complicated, and opens other editors
|
||||
up to XSS if they are using a WYSIWYG editor (to fix that, they'd have to
|
||||
be able to get their hands on the *really* original text served in plaintext
|
||||
mode).
|
||||
|
||||
In short, inbound filtering is almost as simple as outbound filtering, but
|
||||
it has some drawbacks which cannot be fixed unless you save both the original
|
||||
and the filtered versions.
|
||||
|
||||
There is a third option: profile and optimize HTMLPurifier yourself. Be sure
|
||||
to report back your results if you decide to do that! Especially if you
|
||||
port HTML Purifier to C++. ;-)
|
123
TODO
123
TODO
@@ -1,72 +1,89 @@
|
||||
|
||||
TODO List
|
||||
|
||||
1.2 release
|
||||
- Make URI validation routines tighter (especially mailto)
|
||||
- More extensive URI filtering schemes (see docs/proposal-new-directives.txt)
|
||||
- Allow for background-image and list-style-image (see above)
|
||||
- Error logging for filtering/cleanup procedures
|
||||
- Rich set* methods and config file loaders for HTMLPurifier_Config
|
||||
= KEY ====================
|
||||
# Flagship
|
||||
- Regular
|
||||
? At-risk
|
||||
==========================
|
||||
|
||||
1.3 release
|
||||
- Add various "levels" of cleaning
|
||||
- Related: Allow strict (X)HTML
|
||||
1.6 release
|
||||
# Implement all non-essential attribute transforms, configurable
|
||||
# URI validation routines tighter (see docs/dev-code-quality.html) (COMPLEX)
|
||||
# Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
|
||||
# Error logging for filtering/cleanup procedures
|
||||
- Requires I18N facilities to be created first (COMPLEX)
|
||||
? Configuration profiles: sets of directives that get set with one func call
|
||||
- XSS-attempt detection
|
||||
- Implement IDREF support
|
||||
|
||||
1.7 release
|
||||
# Add pre-packaged "levels" of cleaning (custom behavior already done)
|
||||
- More fine-grained control over escaping behavior
|
||||
- Silently drop content inbetween SCRIPT tags (can be generalized to allow
|
||||
specification of elements that, when detected as foreign, trigger removal
|
||||
of children, although unbalanced tags could wreck havoc (or at least
|
||||
delete the rest of the document)).
|
||||
- Allow specifying global attributes on a tag-by-tag basis in
|
||||
%HTML.AllowAttributes
|
||||
? More user-friendly warnings when %HTML.Allow* attempts to specify a
|
||||
tag or attribute that is not supported
|
||||
- Parse TinyMCE whitelist into our %HTML.Allow* whitelists
|
||||
|
||||
1.4 release
|
||||
- Additional support for poorly written HTML
|
||||
- Implement all non-essential attribute transforms
|
||||
- Microsoft Word HTML cleaning (i.e. MsoNormal)
|
||||
|
||||
2.0 release
|
||||
- Formatters for plaintext
|
||||
- Auto-paragraphing (be sure to leverage fact that we know when things
|
||||
shouldn't be paragraphed, such as lists and tables).
|
||||
- Linkify URLs
|
||||
- Smileys
|
||||
- Linkification for HTML Purifier docs: notably configuration and
|
||||
class names
|
||||
|
||||
3.0 release
|
||||
- Extended HTML capabilities based on namespacing and tag transforms
|
||||
- Hooks for adding custom processors to custom namespaced tags and
|
||||
attributes, offer default implementation
|
||||
- Lots of documentation and samples
|
||||
|
||||
Ongoing
|
||||
- Lots of profiling, make it faster!
|
||||
- Plugins for major CMSes (very tricky issue)
|
||||
|
||||
Unknown release (on a scratch-an-itch basis)
|
||||
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
||||
- Automatically add non-breaking spaces to empty table cells when
|
||||
empty-cells:show is applied to have compatibility with Internet Explorer
|
||||
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
|
||||
Also, enable disabling of directionality
|
||||
- Append something to duplicate IDs so they're still usable (impl. note: the
|
||||
dupe detector would also need to detect the suffix as well)
|
||||
- Have 'lang' attribute be checked against official lists
|
||||
|
||||
Encoding workarounds
|
||||
- Non-lossy dumb alternate character encoding transformations, achieved by
|
||||
numerically encoding all non-ASCII characters
|
||||
- Semi-lossy dumb alternate character encoding transformations, achieved by
|
||||
encoding all characters that have string entity equivalents
|
||||
|
||||
Requested
|
||||
- Native content compression, whitespace stripping (don't rely on Tidy, make
|
||||
sure we don't remove from <pre> or related tags)
|
||||
- Win32 Phalanger C# binaries (?)
|
||||
1.8 release
|
||||
# Additional support for poorly written HTML
|
||||
- Microsoft Word HTML cleaning (i.e. MsoNormal, but research essential!)
|
||||
- Friendly strict handling of <address> (block -> <br>)
|
||||
- Remove redundant tags, ex. <u><u>Underlined</u></u>. Implementation notes:
|
||||
1. Analyzing which tags to remove duplicants
|
||||
2. Ensure attributes are merged into the parent tag
|
||||
3. Extend the tag exclusion system to specify whether or not the
|
||||
contents should be dropped or not (currently, there's code that could do
|
||||
something like this if it didn't drop the inner text too.)
|
||||
- Remove <span> tags that don't do anything (no attributes)
|
||||
- Remove empty inline tags<i></i>
|
||||
- Append something to duplicate IDs so they're still usable (impl. note: the
|
||||
dupe detector would also need to detect the suffix as well)
|
||||
|
||||
2.0 release
|
||||
# Legit token based CSS parsing (will require revamping almost every
|
||||
AttrDef class)
|
||||
# Formatters for plaintext (COMPLEX)
|
||||
- Auto-paragraphing (be sure to leverage fact that we know when things
|
||||
shouldn't be paragraphed, such as lists and tables).
|
||||
- Linkify URLs
|
||||
- Smileys
|
||||
- Linkification for HTML Purifier docs: notably configuration and classes
|
||||
|
||||
3.0 release
|
||||
- Extended HTML capabilities based on namespacing and tag transforms (COMPLEX)
|
||||
- Hooks for adding custom processors to custom namespaced tags and
|
||||
attributes, offer default implementation
|
||||
- Lots of documentation and samples
|
||||
- Allow tags to be "armored", an internal flag that protects them
|
||||
from validation and passes them out unharmed
|
||||
- XHTML 1.1 support
|
||||
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
||||
- Automatically add non-breaking spaces to empty table cells when
|
||||
empty-cells:show is applied to have compatibility with Internet Explorer
|
||||
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
|
||||
Also, enable disabling of directionality
|
||||
|
||||
Ongoing
|
||||
- Lots of profiling, make it faster!
|
||||
- Plugins for major CMSes (COMPLEX)
|
||||
- WordPress
|
||||
- eFiction
|
||||
- more! (look for ones that use WYSIWYGs)
|
||||
|
||||
Unknown release (on a scratch-an-itch basis)
|
||||
- Have 'lang' attribute be checked against official lists
|
||||
? Semi-lossy dumb alternate character encoding transformations, achieved by
|
||||
encoding all characters that have string entity equivalents
|
||||
|
||||
Requested
|
||||
? Native content compression, whitespace stripping (don't rely on Tidy, make
|
||||
sure we don't remove from <pre> or related tags)
|
||||
|
||||
Wontfix
|
||||
- Non-lossy smart alternate character encoding transformations (unless
|
||||
|
3
WYSIWYG
3
WYSIWYG
@@ -18,4 +18,5 @@ HTML Purifier is perfect for filtering pure-HTML input from WYSIWYG editors.
|
||||
Enough said.
|
||||
|
||||
There is a proof-of-concept integration of HTML Purifier with the Mantis
|
||||
bugtracker at http://hp.jpsband.org/mantis/
|
||||
bugtracker at http://hp.jpsband.org/mantis/ You can see notes on how
|
||||
this integration was acheived at http://hp.jpsband.org/mantis_notes.txt
|
||||
|
BIN
art/1000passes.png
Normal file
BIN
art/1000passes.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 3.4 KiB |
@@ -7,6 +7,7 @@ set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
||||
|
||||
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||
require_once 'HTMLPurifier/Config.php';
|
||||
require_once 'HTMLPurifier/Context.php';
|
||||
|
||||
$LEXERS = array();
|
||||
$RUNS = isset($GLOBALS['HTMLPurifierTest']['Runs'])
|
||||
@@ -93,11 +94,14 @@ function print_lexers() {
|
||||
function do_benchmark($name, $document) {
|
||||
global $LEXERS, $RUNS;
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$context = new HTMLPurifier_Context();
|
||||
|
||||
$timer = new RowTimer($name);
|
||||
$timer->start();
|
||||
|
||||
foreach($LEXERS as $key => $lexer) {
|
||||
for ($i=0; $i<$RUNS; $i++) $tokens = $lexer->tokenizeHTML($document);
|
||||
for ($i=0; $i<$RUNS; $i++) $tokens = $lexer->tokenizeHTML($document, $config, $context);
|
||||
$timer->setMarker($key);
|
||||
}
|
||||
|
||||
|
@@ -5,12 +5,15 @@ set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
||||
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||
require_once 'HTMLPurifier/Config.php';
|
||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||
require_once 'HTMLPurifier/Context.php';
|
||||
|
||||
$input = file_get_contents('samples/Lexer/4.html');
|
||||
$lexer = new HTMLPurifier_Lexer_DirectLex();
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$context = new HTMLPurifier_Context();
|
||||
|
||||
for ($i = 0; $i < 10; $i++) {
|
||||
$tokens = $lexer->tokenizeHTML($input);
|
||||
$tokens = $lexer->tokenizeHTML($input, $config, $context);
|
||||
}
|
||||
|
||||
?>
|
@@ -99,6 +99,8 @@ foreach($schema->info as $namespace_name => $namespace_info) {
|
||||
|
||||
foreach ($namespace_info as $name => $info) {
|
||||
|
||||
if ($info->class == 'alias') continue;
|
||||
|
||||
$dom_directive = $dom_document->createElement('directive');
|
||||
$dom_namespace->appendChild($dom_directive);
|
||||
|
||||
@@ -186,7 +188,7 @@ $xsl_processor->importStylesheet($xsl_dom_stylesheet);
|
||||
$html_output = $xsl_processor->transformToXML($dom_document);
|
||||
|
||||
// some slight fudges to preserve backwards compatibility
|
||||
$html_output = str_replace('/>', ' />', $html_output); // <br /> not <br>
|
||||
$html_output = str_replace('/>', ' />', $html_output); // <br /> not <br/>
|
||||
$html_output = str_replace(' xmlns=""', '', $html_output); // rm unnecessary xmlns
|
||||
|
||||
if (class_exists('Tidy')) {
|
||||
|
@@ -1,3 +1,6 @@
|
||||
|
||||
body {margin:1em 4em;}
|
||||
|
||||
table {border-collapse:collapse;}
|
||||
table td, table th {padding:0.2em;}
|
||||
|
||||
@@ -8,3 +11,14 @@ table.constraints td pre {margin:0;}
|
||||
|
||||
#toc {list-style-type:none; font-weight:bold;}
|
||||
#toc ul {list-style-type:disc; font-weight:normal;}
|
||||
|
||||
.description p {margin-top:0;margin-bottom:1em;}
|
||||
|
||||
#library, h1 {text-align:center; font-family:Garamond, serif;
|
||||
font-variant:small-caps;}
|
||||
#library {font-size:1em;}
|
||||
h1 {margin-top:0;}
|
||||
h2 {border-bottom:1px solid #CCC; font-family:sans-serif; font-weight:normal;
|
||||
font-size:1.3em;}
|
||||
h3 {font-family:sans-serif; font-size:1.1em; font-weight:bold; }
|
||||
h4 {font-family:sans-serif; font-size:0.9em; font-weight:bold; }
|
||||
|
@@ -18,12 +18,13 @@
|
||||
<xsl:template match="/">
|
||||
<html lang="en" xml:lang="en">
|
||||
<head>
|
||||
<title><xsl:value-of select="/configdoc/title" /> Configuration Documentation</title>
|
||||
<title>Configuration Documentation - <xsl:value-of select="/configdoc/title" /></title>
|
||||
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8" />
|
||||
<link rel="stylesheet" type="text/css" href="styles/plain.css" />
|
||||
</head>
|
||||
<body>
|
||||
<h1><xsl:value-of select="/configdoc/title" /> Configuration Documentation</h1>
|
||||
<div id="library"><xsl:value-of select="/configdoc/title" /></div>
|
||||
<h1>Configuration Documentation</h1>
|
||||
<h2>Table of Contents</h2>
|
||||
<ul id="toc">
|
||||
<xsl:apply-templates mode="toc" />
|
||||
|
188
docs/dev-advanced-api.html
Normal file
188
docs/dev-advanced-api.html
Normal file
@@ -0,0 +1,188 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Functional specification for HTML Purifier's advanced API for defining custom filtering behavior." />
|
||||
<link rel="stylesheet" type="text/css" href="style.css" />
|
||||
|
||||
<title>Advanced API - HTML Purifier</title>
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1>Advanced API</h1>
|
||||
|
||||
<div id="filing">Filed under Development</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>It makes no sense to adopt a <q>one-size-fits-all</q> approach to
|
||||
filtersets: therefore, users must be able to define their own sets of
|
||||
<q>allowed</q> elements, as well as switch in-between doctypes of HTML.</p>
|
||||
|
||||
<p>Our goals are to let the user:</p>
|
||||
|
||||
<dl>
|
||||
<dt>Select</dt>
|
||||
<dd><ul>
|
||||
<li>Doctype</li>
|
||||
<li>Filtersets: Rich / Plain / Full ...</li>
|
||||
<li>Mode: Lenient / Correctional</li>
|
||||
<li>Collections (?): Safe / Unsafe</li>
|
||||
<li>Modules / Tags / Attributes</li>
|
||||
</ul></dd>
|
||||
<dt>Customize</dt>
|
||||
<dd><ul>
|
||||
<li>Tags / Attributes / Attribute Types</li>
|
||||
<li>Filtersets</li>
|
||||
<li>Root Node</li>
|
||||
</ul></dd>
|
||||
<dt>Create</dt>
|
||||
<dd><ul>
|
||||
<li>Modules / Tags / Attributes / Attribute Types</li>
|
||||
<li>Filtersets</li>
|
||||
<li>Doctype</li>
|
||||
</ul></dd>
|
||||
</dl>
|
||||
|
||||
<h2>Select</h2>
|
||||
|
||||
<h3>Selecting a Doctype</h3>
|
||||
|
||||
<p>By default, users will use a doctype-based, permissive but secure
|
||||
whitelist. They must define a <strong>doctype</strong>, and this serves
|
||||
as the first method of determining a filterset.</p>
|
||||
|
||||
<p class="technical">This identifier is based
|
||||
on the name the W3C has given to the document type and <em>not</em>
|
||||
the DTD identifier.</p>
|
||||
|
||||
<p>This parameter is set via the configuration object:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');</pre>
|
||||
|
||||
<h3>Selecting a Filterset</h3>
|
||||
|
||||
<p>However, selecting this doctype doesn't mean much, because if we
|
||||
adhered exactly to the definition we would be letting XSS and other
|
||||
nasties through. HTML Purifier must, in its filterset, allow a subset
|
||||
of the doctype, which we shall call a <strong>filterset</strong>.</p>
|
||||
|
||||
<p>By default, HTML Purifier will use the <strong>Rich</strong>
|
||||
filterset, which allows as many elements as possible with untrusted
|
||||
sources. Other possible filtersets could be:</p>
|
||||
|
||||
<dl>
|
||||
<dt>Full</dt>
|
||||
<dd>Allows the full span of elements in the doctype, good if you want
|
||||
HTML Purifier to work as a Tidy substitute but not to strip
|
||||
anything out.</dd>
|
||||
<dt>Plain</dt>
|
||||
<dd>Provides a minimum set of tags for semantic markup of things
|
||||
like blog comments.</dd>
|
||||
</dl>
|
||||
|
||||
<p>Extension-authors would be able to define custom filtersets for
|
||||
other users to use.</p>
|
||||
|
||||
<p>A possible call to select a filterset would be:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Filterset', 'Rich');</pre>
|
||||
|
||||
<h3>Selecting Mode</h3>
|
||||
|
||||
<p>Within filtersets, there are various <strong>modes</strong> of operation.
|
||||
These indicate variant behaviors that, while not strictly changing the
|
||||
allowed set of elements and attributes, will definitely affect the output.
|
||||
Currently, we have two modes, which may be used together:</p>
|
||||
|
||||
<dl>
|
||||
<dt>Lenient</dt>
|
||||
<dd>Deprecated elements and attributes will be transformed into
|
||||
standards-compliant alternatives when explicitly disallowed. For
|
||||
example, in the XHTML 1.0 Strict doctype, a <code>center</code>
|
||||
tag would be turned into a <code>div</code> with the CSS property
|
||||
<code>text-align:center;</code>, but in XHTML 1.0 Transitional
|
||||
the tag would be preserved. This mode is on by default.</dd>
|
||||
<dt>Correctional</dt>
|
||||
<dd>Deprecated elements and attributes will be transformed into
|
||||
standards-compliant alternatives whenever possible. Referring
|
||||
back to the previous example, the <code>center</code> tag would
|
||||
be transformed in both cases. However, tags without a
|
||||
reasonable standards-compliant alternative will be preserved
|
||||
in their form. This mode is on by default. It may have
|
||||
various levels of operation.</dd>
|
||||
</dl>
|
||||
|
||||
<p>A possible call to select modes would be:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Mode', array('correctional', 'lenient'));</pre>
|
||||
|
||||
<p>If modes have extra parameters, a hash might work well:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Mode', array(
|
||||
'correctional' => 9, // strongest level
|
||||
'lenient' => true // this one's just boolean
|
||||
));</pre>
|
||||
|
||||
<p>Modes may possibly be wrapped up with the filterset declaration:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Filterset', 'Rich: correctional, lenient');</pre>
|
||||
|
||||
<p>Further investigation in this field is necessary.</p>
|
||||
|
||||
<h3>Selecting Modules / Tags / Attributes</h3>
|
||||
|
||||
<p>If this cookie cutter approach doesn't appeal to a user, they may
|
||||
decide to roll their own filterset by selecting modules, tags and
|
||||
attributes to allow.</p>
|
||||
|
||||
<p class="technical">This would make use of the same facilities
|
||||
as a filterset author would use, except that it would go under an
|
||||
<q>anonymous</q> filterset that would be auto-selected if any of the
|
||||
relevant module/tag/attribute selection configuration directives were
|
||||
non-null.</p>
|
||||
|
||||
<p>On the highest level, a user will usually be most interested in
|
||||
directly specifying which elements and attributes are desired. For
|
||||
example:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'AllowedElements', 'a,b,em,p,blockquote,code,i');</pre>
|
||||
|
||||
<p>Attribute declarations could be merged into this declaration as such:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Allowed', 'a[href,title],b,em,p[class],blockquote[cite],code,i');</pre>
|
||||
|
||||
<p>...or be kept separate:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'AllowedAttributes', 'a.href,a.title,p.class,blockquote.cite');</pre>
|
||||
|
||||
<p class="technical">Considering that, internally speaking, as mandated by
|
||||
the XHTML 1.1 Modularization specification, we have organized our
|
||||
elements around modules, considerable gymnastics will be needed to
|
||||
get this sort of functionality working.</p>
|
||||
|
||||
<p>A user may also specify a module to load a class of elements and attributes
|
||||
into their filterest:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Allowed', 'Hypertext,Core');</pre>
|
||||
|
||||
<p class="fixme">The granularity of these modules is too coarse for
|
||||
the average user (for example, the core module loads everything from
|
||||
the essential <code>p</code> tag to the not-so-safe <code>h1</code>
|
||||
tag). How do we make this still a viable solution?</p>
|
||||
|
||||
<h3>Unified selector</h3>
|
||||
|
||||
<p>Because selecting each and every one of these configuration options
|
||||
is a chore, we may wish to offer a specialized configuration method
|
||||
for selecting a filterset. Possibility:</p>
|
||||
|
||||
<pre>function selectFilter($doctype, $filterset, $mode)</pre>
|
||||
|
||||
<p>...which is simply a light wrapper over the individual configuration
|
||||
calls. A custom config file format or text format could also be adopted.</p>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
@@ -14,6 +14,7 @@
|
||||
|
||||
<div id="filing">Filed under Development</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>Okay, face it. Programmers can get lazy, cut corners, or make mistakes. They
|
||||
also can do quick prototypes, and then forget to rewrite them later. Well,
|
||||
@@ -22,6 +23,8 @@ of code that should be aggressively refactored. This does not list
|
||||
optimization issues, that needs to be done after intense profiling.</p>
|
||||
|
||||
<pre>
|
||||
docs/examples/demo.php - ad hoc HTML/PHP soup to the extreme
|
||||
|
||||
AttrDef
|
||||
Class - doesn't support Unicode characters (fringe); uses regular
|
||||
expressions
|
||||
@@ -32,7 +35,8 @@ AttrDef
|
||||
Number - constructor interface inconsistent with Integer
|
||||
ConfigSchema - redefinition is a mess
|
||||
Strategy
|
||||
FixNesting - cannot bubble nodes out of structures
|
||||
FixNesting - cannot bubble nodes out of structures, duplicated checks
|
||||
for special-case parent node
|
||||
MakeWellFormed - insufficient automatic closing definitions (check HTML
|
||||
spec for optional end tags, also, closing based on type (block/inline)
|
||||
might be efficient).
|
||||
|
@@ -14,6 +14,7 @@
|
||||
|
||||
<div id="filing">Filed under Development</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>The classes in this library follow a few naming conventions, which may
|
||||
help you find the correct functionality more quickly. Here they are:</p>
|
||||
@@ -54,8 +55,9 @@ help you find the correct functionality more quickly. Here they are:</p>
|
||||
abbreviated version is more readable than the full version. Here, we
|
||||
list common abbreviations:
|
||||
<ul>
|
||||
<li>Attr(s) to Attribute(s)</li>
|
||||
<li>Attr to Attributes (note that it is plural, i.e. <code>$attr = array()</code>)</li>
|
||||
<li>Def to Definition</li>
|
||||
<li><code>$ret</code> is the value to be returned in a function</li>
|
||||
</ul>
|
||||
</dd>
|
||||
|
||||
|
@@ -14,6 +14,7 @@
|
||||
|
||||
<div id="filing">Filed under Development</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>Here are some possible optimization techniques we can apply to code sections if
|
||||
they turn out to be slow. Be sure not to prematurely optimize: if you get
|
||||
|
@@ -32,6 +32,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
||||
|
||||
<div id="filing">Filed under Development</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<h2>Key</h2>
|
||||
|
||||
@@ -59,7 +60,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
||||
<tbody>
|
||||
<tr><th colspan="2">Standard</th></tr>
|
||||
<tr class="css1 impl-yes"><td>background-color</td><td>COMPOSITE(<color>, transparent)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>background</td><td>SHORTHAND, only for color, see below for info on background-image and friends</td></tr>
|
||||
<tr class="css1 impl-yes"><td>background</td><td>SHORTHAND, currently alias for background-color</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border</td><td>SHORTHAND, MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border-color</td><td>MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border-style</td><td>MULTIPLE</td></tr>
|
||||
@@ -128,29 +129,30 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="2">Absolute positioning, unknown release milestone</th></tr>
|
||||
<tr class="danger"><td>bottom</td><td rowspan="4">Dangerous, must be non-negative</td></tr>
|
||||
<tr class="danger"><td>left</td></tr>
|
||||
<tr class="danger"><td>right</td></tr>
|
||||
<tr class="danger"><td>top</td></tr>
|
||||
<tr><td>clip</td><td>-</td></tr>
|
||||
<tr class="danger"><td>position</td><td>ENUM(static, relative, absolute, fixed), permit
|
||||
<tr class="danger impl-no"><td>bottom</td><td rowspan="4">Dangerous, must be non-negative to even be considered,
|
||||
but it's still possible to arbitrarily position by running over.</td></tr>
|
||||
<tr class="danger impl-no"><td>left</td></tr>
|
||||
<tr class="danger impl-no"><td>right</td></tr>
|
||||
<tr class="danger impl-no"><td>top</td></tr>
|
||||
<tr class="impl-no"><td>clip</td><td>-</td></tr>
|
||||
<tr class="danger impl-no"><td>position</td><td>ENUM(static, relative, absolute, fixed)
|
||||
relative not absolute?</td></tr>
|
||||
<tr class="danger"><td>z-index</td><td>Dangerous</td></tr>
|
||||
<tr class="danger impl-no"><td>z-index</td><td>Dangerous</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="2">Unknown</th></tr>
|
||||
<tr class="danger css1"><td>background-image</td><td>Dangerous, target milestone 1.2</td></tr>
|
||||
<tr class="css1"><td>background-attachment</td><td>ENUM(scroll, fixed),
|
||||
<tr class="danger css1 impl-yes"><td>background-image</td><td>Dangerous, target milestone 1.3</td></tr>
|
||||
<tr class="css1 impl-yes"><td>background-attachment</td><td>ENUM(scroll, fixed),
|
||||
Depends on background-image</td></tr>
|
||||
<tr class="css1"><td>background-position</td><td>Depends on background-image</td></tr>
|
||||
<tr class="css1 impl-yes"><td>background-position</td><td>Depends on background-image</td></tr>
|
||||
<tr class="danger impl-no"><td>cursor</td><td>Dangerous but fluffy</td></tr>
|
||||
<tr class="danger css1"><td>display</td><td>ENUM(...), Dangerous but interesting;
|
||||
will not implement list-item, run-in (Opera only) or table (no IE);
|
||||
inline-block has incomplete IE6 support and requires -moz-inline-box
|
||||
for Mozilla. Unknown target milestone.</td></tr>
|
||||
<tr><td class="css1">height</td><td>Interesting, why use it? Unknown target milestone.</td></tr>
|
||||
<tr class="danger css1"><td>list-style-image</td><td>Dangerous? Target milestone 1.2</td></tr>
|
||||
<tr class="css1"><td>height</td><td>Interesting, why use it? Unknown target milestone.</td></tr>
|
||||
<tr class="danger css1 impl-yes"><td>list-style-image</td><td>Dangerous?</td></tr>
|
||||
<tr class="impl-no"><td>max-height</td><td rowspan="4">No IE 5/6</td></tr>
|
||||
<tr class="impl-no"><td>min-height</td></tr>
|
||||
<tr class="impl-no"><td>max-width</td></tr>
|
||||
@@ -229,14 +231,14 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="3">CSS</th></tr>
|
||||
<tr class="impl-yes"><td>style</td><td>All</td><td>Not all properties may be implemented, parser is good though.</td></tr>
|
||||
<tr class="impl-yes"><td>style</td><td>All</td><td>Parser is reasonably functional. Status here doesn't count individual properties.</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="3">Questionable</th></tr>
|
||||
<tr class="impl-no"><td>accesskey</td><td>A</td><td>May interfere with main interface</td></tr>
|
||||
<tr class="impl-no"><td>tabindex</td><td>A</td><td>May interfere with main interface</td></tr>
|
||||
<tr><td>target</td><td>A</td><td>Config enabled, only useful for frame layouts</td></tr>
|
||||
<tr><td>target</td><td>A</td><td>Config enabled, only useful for frame layouts, disallowed in strict</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
@@ -264,13 +266,13 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
||||
<tr><td rowspan="5">align</td><td>CAPTION</td><td>Near-equiv style 'caption-side', drop left and right</td></tr>
|
||||
<tr><td>IMG</td><td rowspan="2">Margin-left and margin-right = auto or parent div</td></tr>
|
||||
<tr><td>TABLE</td></tr>
|
||||
<tr><td>HR</td><td>Equivalent style 'text-align' (IE tested)</td></tr>
|
||||
<tr><td>HR</td><td>Near-equivalent style 'text-align' (Works for IE and Opera, but not Firefox). Also try <code>margin-right:auto; margin-left:0;</code> for left or <code>margin-right:0; margin-left:auto;</code> for right (optionally replacing 0 with the original margin for that side)</td></tr>
|
||||
<tr class="impl-yes"><td>H1, H2, H3, H4, H5, H6, P</td><td>Equivalent style 'text-align'</td></tr>
|
||||
<tr class="required impl-yes"><td>alt</td><td>IMG</td><td>Required, insert image filename if src is present or default invalid image text</td></tr>
|
||||
<tr><td rowspan="3">bgcolor</td><td>TABLE</td><td>Equivalent style 'background-color' (IE tested)</td></tr>
|
||||
<tr><td>TR</td><td>Equivalent style 'background-color' (IE tested)</td></tr>
|
||||
<tr><td rowspan="3">bgcolor</td><td>TABLE</td><td>Equivalent style 'background-color'</td></tr>
|
||||
<tr><td>TR</td><td>Equivalent style 'background-color'</td></tr>
|
||||
<tr><td>TD, TH</td><td>Equivalent style 'background-color'</td></tr>
|
||||
<tr><td>border</td><td>IMG</td><td>Equivalent style 'border-width', only applies when link present</td></tr>
|
||||
<tr><td>border</td><td>IMG</td><td>Near equivalent style 'border-width', as it only applies when link present</td></tr>
|
||||
<tr><td>clear</td><td>BR</td><td>Near-equiv style 'clear', transform 'all' into 'both'</td></tr>
|
||||
<tr class="impl-no"><td>compact</td><td>DL, OL, UL</td><td>Boolean, needs custom CSS class; rarely used anyway</td></tr>
|
||||
<tr class="required impl-yes"><td>dir</td><td>BDO</td><td>Required, insert ltr (or configuration value) if none</td></tr>
|
||||
@@ -283,11 +285,11 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
||||
<tr><td>nowrap</td><td>TD, TH</td><td>Boolean, style 'white-space:nowrap;' (not compat with IE5)</td></tr>
|
||||
<tr><td>size</td><td>HR</td><td>Near-equiv 'width', needs px suffix if original was pixels</td></tr>
|
||||
<tr class="required impl-yes"><td>src</td><td>IMG</td><td>Required, insert blank or default img if not set</td></tr>
|
||||
<tr><td>start</td><td>OL</td><td>Poorly supported 'counter-reset', transform may not be desirable</td></tr>
|
||||
<tr class="impl-yes"><td>start</td><td>OL</td><td>Poorly supported 'counter-reset', allowed in loose, dropped in strict</td></tr>
|
||||
<tr><td rowspan="3">type</td><td>LI</td><td rowspan="3">Equivalent style 'list-style-type', different allowed values though. (needs testing)</td></tr>
|
||||
<tr><td>OL</td></tr>
|
||||
<tr><td>UL</td></tr>
|
||||
<tr><td>value</td><td>LI</td><td>Poorly supported 'counter-reset', transform may not be desirable, see ol.start. Configurable.</td></tr>
|
||||
<tr class="impl-yes"><td>value</td><td>LI</td><td>Poorly supported 'counter-reset', allowed in loose, dropped in strict</td></tr>
|
||||
<tr><td>vspace</td><td>IMG</td><td>Near-equiv styles 'margin-left' and 'margin-right', needs px suffix, see hspace</td></tr>
|
||||
<tr><td rowspan="2">width</td><td>HR</td><td rowspan="2">Near-equiv style 'width', needs px suffix if original was pixels</td></tr>
|
||||
<tr><td>TD, TH</td></tr>
|
||||
|
@@ -15,6 +15,7 @@
|
||||
|
||||
<div id="filing">Filed under End-User</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>Prior to HTML Purifier 1.2.0, this library blithely accepted user input that
|
||||
looked like this:</p>
|
||||
|
@@ -36,7 +36,7 @@ forgiving lexer. You may also be interested in the unit tests located in the
|
||||
tests/ folder, which provide a living document on how exactly the filter deals
|
||||
with malformed input.
|
||||
|
||||
In summary:
|
||||
In summary (see corresponding classes for more details):
|
||||
|
||||
1. Parse document into an array of tag and text tokens (Lexer)
|
||||
2. Remove all elements not on whitelist and transform certain other elements
|
||||
|
@@ -6,43 +6,17 @@ through negligence of people. This class will do its job: no more, no less,
|
||||
and it's up to you to provide it the proper information and proper context
|
||||
to be effective. Things to remember:
|
||||
|
||||
1. Character Encoding: UTF-8.
|
||||
Currently, the parser runs under the assumption that it is dealing
|
||||
with UTF-8. Not ISO-8859-1 or Windows-1252, UTF-8. And definitely not "no
|
||||
character encoding explicitly stated" or UTF-7. If you're not using UTF-8 as
|
||||
your character encoding, make sure you configure HTML Purifier or switch
|
||||
to UTF-8. Now. Also, make sure any input is properly converted to UTF-8, or
|
||||
the parser will mangle it badly (though it won't be a security risk if you're
|
||||
outputting it as UTF-8 though). Character encoding is, in general, a knotty
|
||||
issue, but do yourself a favor and learn about it:
|
||||
<http://www.joelonsoftware.com/articles/Unicode.html>
|
||||
1. Character Encoding: see enduser-utf8.html for more info.
|
||||
|
||||
2. Doctype: XHTML 1.0 Transitional
|
||||
This is what the parser is outputting. For the most
|
||||
part, it's compatible with HTML 4.01, but XHTML enforces some very nice things
|
||||
that all web developers should use. Regardless, NO DOCTYPE is a NO. Quirks mode
|
||||
has waaaay too many quirks for a little parser to handle. We did not select
|
||||
strict in order to prevent ourselves from being too draconic on users, but
|
||||
this may be configurable in the future. Do you want standards compliance?
|
||||
The doctype is a good place to start.
|
||||
2. Doctype: document pending feature completion
|
||||
Not strictly necessary, actually. More in-depth discussion once we figure
|
||||
out how to get strict loose mode working.
|
||||
|
||||
3. IDs
|
||||
They need to be unique, but without some knowledge of the
|
||||
rest of the document, it's difficult to know what's unique. %Attr.IDBlacklist
|
||||
needs to be set: we may want to consider disallowing IDs by default to
|
||||
save lazy programmers.
|
||||
3. IDs: see enduser-id.html for more info
|
||||
|
||||
4. [PROJECTED] Links
|
||||
We're not going to try for spam protection (although
|
||||
some hooks for such a module might be nice) but we may offer the ability to
|
||||
only accept relative URLs. Pick the one that's right for you.
|
||||
4. Links: document pending feature completion
|
||||
Rudimentary blacklisting, we should also allow only relative URIs. We
|
||||
need a doc to explain the stuff.
|
||||
|
||||
5. CSS
|
||||
While we can prevent the most flagrant cases from affecting your
|
||||
layout (such as absolutely positioned elements), no amount of code is going
|
||||
to protect your pages from being attacked by garish colors and plain old
|
||||
bad taste. A neat feature would be the ability to define acceptable colors
|
||||
in a document, but that's not likely to be implemented for a while. In the
|
||||
meantime, be sure to make sure that floated elements (permitted, since they
|
||||
can be quite useful) can't mess up your layout. Once again, we may want to
|
||||
disable this by default to protect lazy developers.
|
||||
5. CSS: document pending
|
||||
Explain which CSS styles we blocked and why.
|
||||
|
117
docs/enduser-slow.html
Normal file
117
docs/enduser-slow.html
Normal file
@@ -0,0 +1,117 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Explains how to speed up HTML Purifier through caching or inbound filtering." />
|
||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||
|
||||
<title>Speeding up HTML Purifier - HTML Purifier</title>
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1 class="subtitled">Speeding up HTML Purifier</h1>
|
||||
<div class="subtitle">...also known as the HELP ME LIBRARY IS TOO SLOW MY PAGE TAKE TOO LONG page</div>
|
||||
|
||||
<div id="filing">Filed under End-User</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>HTML Purifier is a very powerful library. But with power comes great
|
||||
responsibility, in the form of longer execution times. Remember, this
|
||||
library isn't lightly grazing over submitted HTML: it's deconstructing
|
||||
the whole thing, rigorously checking the parts, and then putting it back
|
||||
together. </p>
|
||||
|
||||
<p>So, if it so turns out that HTML Purifier is kinda too slow for outbound
|
||||
filtering, you've got a few options: </p>
|
||||
|
||||
<h2>Inbound filtering</h2>
|
||||
|
||||
<p>Perform filtering of HTML when it's submitted by the user. Since the
|
||||
user is already submitting something, an extra half a second tacked on
|
||||
to the load time probably isn't going to be that huge of a problem.
|
||||
Then, displaying the content is a simple a manner of outputting it
|
||||
directly from your database/filesystem. The trouble with this method is
|
||||
that your user loses the original text, and when doing edits, will be
|
||||
handling the filtered text. While this may be a good thing, especially
|
||||
if you're using a WYSIWYG editor, it can also result in data-loss if a
|
||||
user makes a typo. </p>
|
||||
|
||||
<p>Example (non-functional):</p>
|
||||
|
||||
<pre><?php
|
||||
/**
|
||||
* FORM SUBMISSION PAGE
|
||||
* display_error($message) : displays nice error page with message
|
||||
* display_success() : displays a nice success page
|
||||
* display_form() : displays the HTML submission form
|
||||
* database_insert($html) : inserts data into database as new row
|
||||
*/
|
||||
if (!empty($_POST)) {
|
||||
require_once '/path/to/library/HTMLPurifier.auto.php';
|
||||
require_once 'HTMLPurifier.func.php';
|
||||
$dirty_html = isset($_POST['html']) ? $_POST['html'] : false;
|
||||
if (!$dirty_html) {
|
||||
display_error('You must write some HTML!');
|
||||
}
|
||||
$html = HTMLPurifier($dirty_html);
|
||||
database_insert($html);
|
||||
display_success();
|
||||
// notice that $dirty_html is *not* saved
|
||||
} else {
|
||||
display_form();
|
||||
}
|
||||
?></pre>
|
||||
|
||||
<h2>Caching the filtered output</h2>
|
||||
|
||||
<p>Accept the submitted text and put it unaltered into the database, but
|
||||
then also generate a filtered version and stash that in the database.
|
||||
Serve the filtered version to readers, and the unaltered version to
|
||||
editors. If need be, you can invalidate the cache and have the cached
|
||||
filtered version be regenerated on the first page view. Pros? Full data
|
||||
retention. Cons? It's more complicated, and opens other editors up to
|
||||
XSS if they are using a WYSIWYG editor (to fix that, they'd have to be
|
||||
able to get their hands on the *really* original text served in
|
||||
plaintext mode). </p>
|
||||
|
||||
<p>Example (non-functional):</p>
|
||||
|
||||
<pre><?php
|
||||
/**
|
||||
* VIEW PAGE
|
||||
* display_error($message) : displays nice error page with message
|
||||
* cache_get($id) : retrieves HTML from fast cache (db or file)
|
||||
* cache_insert($id, $html) : inserts good HTML into cache system
|
||||
* database_get($id) : retrieves raw HTML from database
|
||||
*/
|
||||
$id = isset($_GET['id']) ? (int) $_GET['id'] : false;
|
||||
if (!$id) {
|
||||
display_error('Must specify ID.');
|
||||
exit;
|
||||
}
|
||||
$html = cache_get($id); // filesystem or database
|
||||
if ($html === false) {
|
||||
// cache didn't have the HTML, generate it
|
||||
$raw_html = database_get($id);
|
||||
require_once '/path/to/library/HTMLPurifier.auto.php';
|
||||
require_once 'HTMLPurifier.func.php';
|
||||
$html = HTMLPurifier($raw_html);
|
||||
cache_insert($id, $html);
|
||||
}
|
||||
echo $html;
|
||||
?></pre>
|
||||
|
||||
<h2>Summary</h2>
|
||||
|
||||
<p>In short, inbound filtering is the simple option and caching is the
|
||||
robust option (albeit with bigger storage requirements). </p>
|
||||
|
||||
<p>There is a third option, independent of the two we've discussed: profile
|
||||
and optimize HTMLPurifier yourself. Be sure to report back your results
|
||||
if you decide to do that! Especially if you port HTML Purifier to C++.
|
||||
<tt>;-)</tt></p>
|
||||
|
||||
</body>
|
||||
</html>
|
1042
docs/enduser-utf8.html
Normal file
1042
docs/enduser-utf8.html
Normal file
File diff suppressed because it is too large
Load Diff
152
docs/enduser-youtube.html
Normal file
152
docs/enduser-youtube.html
Normal file
@@ -0,0 +1,152 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Explains how to safely allow the embedding of flash from trusted sites in HTML Purifier." />
|
||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||
|
||||
<title>Embedding YouTube Videos - HTML Purifier</title>
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1 class="subtitled">Embedding YouTube Videos</h1>
|
||||
<div class="subtitle">...as well as other dangerous active content</div>
|
||||
|
||||
<div id="filing">Filed under End-User</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>Clients like their YouTube videos. It gives them a warm fuzzy feeling when
|
||||
they see a neat little embedded video player on their websites that can play
|
||||
the latest clips from their documentary "Fido and the Bones of Spring".
|
||||
All joking aside, the ability to embed YouTube videos or other active
|
||||
content in their pages is something that a lot of people like.</p>
|
||||
|
||||
<p>This is a <em>bad</em> idea. The moment you embed anything untrusted,
|
||||
you will definitely be slammed by a manner of nasties that can be
|
||||
embedded in things from your run of the mill Flash movie to
|
||||
<a href="http://blog.spywareguide.com/2006/12/myspace_phish_attack_leads_use.html">Quicktime movies</a>.
|
||||
Even <code>img</code> tags, which HTML Purifier allows by default, can be
|
||||
dangerous. Be distrustful of anything that tells a browser to load content
|
||||
from another website automatically.</p>
|
||||
|
||||
<p>Luckily for us, however, whitelisting saves the day. Sure, letting users
|
||||
include any old random flash file could be dangerous, but if it's
|
||||
from a specific website, it probably is okay. If no amount of pleading will
|
||||
convince the people upstairs that they should just settle with just linking
|
||||
to their movies, you may find this technique very useful.</p>
|
||||
|
||||
<h2>Looking in</h2>
|
||||
|
||||
<p>Below is custom code that allows users to embed
|
||||
YouTube videos. This is not favoritism: this trick can easily be adapted for
|
||||
other forms of embeddable content.</p>
|
||||
|
||||
<p>Usually, websites like YouTube give us boilerplate code that you can insert
|
||||
into your documents. YouTube's code goes like this:</p>
|
||||
|
||||
<pre>
|
||||
<object width="425" height="350">
|
||||
<param name="movie" value="http://www.youtube.com/v/AyPzM5WK8ys" />
|
||||
<param name="wmode" value="transparent" />
|
||||
<embed src="http://www.youtube.com/v/AyPzM5WK8ys"
|
||||
type="application/x-shockwave-flash"
|
||||
wmode="transparent" width="425" height="350" />
|
||||
</object>
|
||||
</pre>
|
||||
|
||||
<p>There are two things to note about this code:</p>
|
||||
|
||||
<ol>
|
||||
<li><code><embed></code> is not recognized by W3C, so if you want
|
||||
standards-compliant code, you'll have to get rid of it.</li>
|
||||
<li>The code is exactly the same for all instances, except for the
|
||||
identifier <tt>AyPzM5WK8ys</tt> which tells us which movie file
|
||||
to retrieve.</li>
|
||||
</ol>
|
||||
|
||||
<p>What point 2 means is that if we have code like <code><span
|
||||
class="embed-youtube">AyPzM5WK8ys</span></code> your
|
||||
application can reconstruct the full object from this small snippet that
|
||||
passes through HTML Purifier <em>unharmed</em>.
|
||||
<a href="http://hp.jpsband.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/Filter/YouTube.php">Show me the code!</a></p>
|
||||
|
||||
<p>And the corresponding usage:</p>
|
||||
|
||||
<pre><?php
|
||||
// assuming $purifier is an instance of HTMLPurifier
|
||||
require_once 'HTMLPurifier/Filter/YouTube.php';
|
||||
$purifier->addFilter(new HTMLPurifier_Filter_YouTube());
|
||||
?></pre>
|
||||
|
||||
<p>There is a bit going in the two code snippets, so let's explain.</p>
|
||||
|
||||
<ol>
|
||||
<li>This is a Filter object, which intercepts the HTML that is
|
||||
coming into and out of the purifier. You can add as many
|
||||
filter objects as you like. <code>preFilter()</code>
|
||||
processes the code before it gets purified, and <code>postFilter()</code>
|
||||
processes the code afterwards. So, we'll use <code>preFilter()</code> to
|
||||
replace the object tag with a <code>span</code>, and <code>postFilter()</code>
|
||||
to restore it.</li>
|
||||
<li>The first preg_replace call replaces any YouTube code users may have
|
||||
embedded into the benign span tag. Span is used because it is inline,
|
||||
and objects are inline too. We are very careful to be extremely
|
||||
restrictive on what goes inside the span tag, as if an errant code
|
||||
gets in there it could get messy.</li>
|
||||
<li>The HTML is then purified as usual.</li>
|
||||
<li>Then, another preg_replace replaces the span tag with a fully fledged
|
||||
object. Note that the embed is removed, and, in its place, a data
|
||||
attribute was added to the object. This makes the tag standards
|
||||
compliant! It also breaks Internet Explorer, so we add in a bit of
|
||||
conditional comments with the old embed code to make it work again.
|
||||
It's all quite convoluted but works.</li>
|
||||
</ol>
|
||||
|
||||
<h2>Warning</h2>
|
||||
|
||||
<p>There are a number of possible problems with the code above, depending
|
||||
on how you look at it.</p>
|
||||
|
||||
<h3>Cannot change width and height</h3>
|
||||
|
||||
<p>The width and height of the final YouTube movie cannot be adjusted. This
|
||||
is because I am lazy. If you really insist on letting users change the size
|
||||
of the movie, what you need to do is package up the attributes inside the
|
||||
span tag (along with the movie ID). It gets complicated though: a malicious
|
||||
user can specify an outrageously large height and width and attempt to crash
|
||||
the user's operating system/browser. You need to either cap it by limiting
|
||||
the amount of digits allowed in the regex or using a callback to check the
|
||||
number.</p>
|
||||
|
||||
<h3>Trusts media's host's security</h3>
|
||||
|
||||
<p>By allowing this code onto our website, we are trusting that YouTube has
|
||||
tech-savvy enough people not to allow their users to inject malicious
|
||||
code into the Flash files. An exploit on YouTube means an exploit on your
|
||||
site. Even though YouTube is run by the reputable Google, it
|
||||
<a href="http://ha.ckers.org/blog/20061213/google-xss-vuln/">doesn't</a>
|
||||
mean they are
|
||||
<a href="http://ha.ckers.org/blog/20061208/xss-in-googles-orkut/">invulnerable.</a>
|
||||
You're putting a certain measure of the job on an external provider (just as
|
||||
you have by entrusting your user input to HTML Purifier), and
|
||||
it is important that you are cognizant of the risk.</p>
|
||||
|
||||
<h3>Poorly written adaptations compromise security</h3>
|
||||
|
||||
<p>This should go without saying, but if you're going to adapt this code
|
||||
for Google Video or the like, make sure you do it <em>right</em>. It's
|
||||
extremely easy to allow a character too many in <code>postFilter()</code> and
|
||||
suddenly you're introducing XSS into HTML Purifier's XSS free output. HTML
|
||||
Purifier may be well written, but it cannot guard against vulnerabilities
|
||||
introduced after it has finished.</p>
|
||||
|
||||
<h2>Help out!</h2>
|
||||
|
||||
<p>If you write a filter for your favorite video destination (or anything
|
||||
like that, for that matter), send it over and it might get included
|
||||
with the core!</p>
|
||||
|
||||
</body>
|
||||
</html>
|
@@ -1,15 +1,14 @@
|
||||
<?php
|
||||
<?php exit;
|
||||
|
||||
// This file demonstrates basic usage of HTMLPurifier.
|
||||
|
||||
exit; // not to be called directly, it will fail fantastically!
|
||||
|
||||
set_include_path('/path/to/htmlpurifier/library' . PATH_SEPARATOR . get_include_path());
|
||||
require_once 'HTMLPurifier.php';
|
||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||
|
||||
$purifier = new HTMLPurifier();
|
||||
$html = '<b>Simple and short';
|
||||
|
||||
$pure_html = $purifier->purify($html);
|
||||
|
||||
echo $pure_html;
|
||||
|
||||
?>
|
@@ -1,34 +1,66 @@
|
||||
<?php
|
||||
|
||||
header('Content-type:text/html;charset=UTF-8');
|
||||
// using _REQUEST because we accept GET and POST requests
|
||||
|
||||
?><!DOCTYPE html
|
||||
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
$content = empty($_REQUEST['xml']) ? 'text/html' : 'application/xhtml+xml';
|
||||
header("Content-type:$content;charset=UTF-8");
|
||||
|
||||
// prevent PHP versions with shorttags from barfing
|
||||
echo '<?xml version="1.0" encoding="UTF-8" ?>
|
||||
';
|
||||
|
||||
function getFormMethod() {
|
||||
return (isset($_REQUEST['post'])) ? 'post' : 'get';
|
||||
}
|
||||
|
||||
if (empty($_REQUEST['strict'])) {
|
||||
?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html>
|
||||
<?php
|
||||
} else {
|
||||
?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<?php
|
||||
}
|
||||
?>
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
|
||||
<head>
|
||||
<title>HTMLPurifier Live Demo</title>
|
||||
<title>HTML Purifier Live Demo</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
</head>
|
||||
<body>
|
||||
<h1>HTMLPurifier Live Demo</h1>
|
||||
<h1>HTML Purifier Live Demo</h1>
|
||||
<?php
|
||||
|
||||
set_include_path('../../library' . PATH_SEPARATOR . get_include_path());
|
||||
require_once 'HTMLPurifier.php';
|
||||
require_once '../../library/HTMLPurifier.auto.php';
|
||||
|
||||
if (!empty($_POST['html'])) {
|
||||
if (!empty($_REQUEST['html'])) { // start result
|
||||
|
||||
$html = get_magic_quotes_gpc() ? stripslashes($_POST['html']) : $_POST['html'];
|
||||
if (strlen($_REQUEST['html']) > 50000) {
|
||||
?>
|
||||
<p>Request exceeds maximum allowed text size of 50kb.</p>
|
||||
<?php
|
||||
} else { // start main processing
|
||||
|
||||
$html = get_magic_quotes_gpc() ? stripslashes($_REQUEST['html']) : $_REQUEST['html'];
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Core', 'TidyFormat', !empty($_POST['tidy']));
|
||||
$config->set('Core', 'TidyFormat', !empty($_REQUEST['tidy']));
|
||||
$config->set('HTML', 'Strict', !empty($_REQUEST['strict']));
|
||||
$purifier = new HTMLPurifier($config);
|
||||
$pure_html = $purifier->purify($html);
|
||||
|
||||
?>
|
||||
<p>Here is your purified HTML:</p>
|
||||
<div style="border:5px solid #CCC;margin:0 10%;padding:1em;">
|
||||
<?php if(getFormMethod() == 'get') { ?>
|
||||
<div style="float:right;">
|
||||
<a href="http://validator.w3.org/check?uri=referer"><img
|
||||
src="http://www.w3.org/Icons/valid-xhtml10"
|
||||
alt="Valid XHTML 1.0 Transitional" height="31" width="88" style="border:0;" /></a>
|
||||
</div>
|
||||
<?php } ?>
|
||||
<?php
|
||||
|
||||
echo $pure_html;
|
||||
@@ -43,23 +75,34 @@ echo htmlspecialchars($pure_html, ENT_COMPAT, 'UTF-8');
|
||||
|
||||
?></pre>
|
||||
<?php
|
||||
|
||||
if (getFormMethod() == 'post') { // start POST validation notice
|
||||
?>
|
||||
<p>If you would like to validate the code with
|
||||
<a href="http://validator.w3.org/#validate-by-input">W3C's
|
||||
validator</a>, copy and paste the <em>entire</em> demo page's source.</p>
|
||||
<?php
|
||||
} // end POST validation notice
|
||||
|
||||
} // end main processing
|
||||
|
||||
// end result
|
||||
} else {
|
||||
|
||||
?>
|
||||
<p>Welcome to the live demo. Enter some HTML and see how HTMLPurifier
|
||||
<p>Welcome to the live demo. Enter some HTML and see how HTML Purifier
|
||||
will filter it.</p>
|
||||
<?php
|
||||
|
||||
}
|
||||
|
||||
?>
|
||||
<form name="filter" action="demo.php<?php
|
||||
if (isset($_GET['profile']) || isset($_GET['XDEBUG_PROFILE'])) {
|
||||
echo '?XDEBUG_PROFILE=1';
|
||||
} ?>" method="post">
|
||||
<form id="filter" action="demo.php<?php
|
||||
echo '?' . getFormMethod();
|
||||
if (isset($_REQUEST['profile']) || isset($_REQUEST['XDEBUG_PROFILE'])) {
|
||||
echo '&XDEBUG_PROFILE=1';
|
||||
} ?>" method="<?php echo getFormMethod(); ?>">
|
||||
<fieldset>
|
||||
<legend>HTML</legend>
|
||||
<legend>HTML Purifier Input (<?php echo getFormMethod(); ?>)</legend>
|
||||
<textarea name="html" cols="60" rows="15"><?php
|
||||
|
||||
if (isset($html)) {
|
||||
@@ -67,13 +110,27 @@ if (isset($html)) {
|
||||
HTMLPurifier_Encoder::cleanUTF8($html), ENT_COMPAT, 'UTF-8');
|
||||
}
|
||||
?></textarea>
|
||||
<div>Nicely format output with Tidy? <input type="checkbox" value="1"
|
||||
name="tidy"<?php if (!empty($_POST['tidy'])) echo ' checked="checked"'; ?> /></div>
|
||||
<?php if (getFormMethod() == 'get') { ?>
|
||||
<p><strong>Warning:</strong> GET request method can only hold
|
||||
8129 characters (probably less depending on your browser).
|
||||
If you need to test anything
|
||||
larger than that, try the <a href="demo.php?post">POST form</a>.</p>
|
||||
<?php } ?>
|
||||
<?php if (extension_loaded('tidy')) { ?>
|
||||
<div>Nicely format output with Tidy? <input type="checkbox" value="1"
|
||||
name="tidy"<?php if (!empty($_REQUEST['tidy'])) echo ' checked="checked"'; ?> /></div>
|
||||
<?php } ?>
|
||||
<div>XHTML 1.0 Strict output? <input type="checkbox" value="1"
|
||||
name="strict"<?php if (!empty($_REQUEST['strict'])) echo ' checked="checked"'; ?> /></div>
|
||||
<div>Serve as application/xhtml+xml? (not for IE) <input type="checkbox" value="1"
|
||||
name="xml"<?php if (!empty($_REQUEST['xml'])) echo ' checked="checked"'; ?> /></div>
|
||||
<div>
|
||||
<input type="submit" value="Submit" name="submit" class="button" />
|
||||
</div>
|
||||
</fieldset>
|
||||
</form>
|
||||
<p>Return to <a href="http://hp.jpsband.org/">HTMLPurifier's home page</a>.</p>
|
||||
<p>Return to <a href="http://hp.jpsband.org/">HTML Purifier's home page</a>.
|
||||
Try the form in <a href="demo.php?get">GET</a> and <a href="demo.php?post">POST</a> request
|
||||
flavors (GET is easy to validate with W3C, but POST allows larger inputs).</p>
|
||||
</body>
|
||||
</html>
|
6
docs/fixquotes.htc
Normal file
6
docs/fixquotes.htc
Normal file
@@ -0,0 +1,6 @@
|
||||
<public:attach event="oncontentready" onevent="init();" />
|
||||
<script>
|
||||
function init() {
|
||||
element.innerHTML = '“'+element.innerHTML+'”';
|
||||
}
|
||||
</script>
|
@@ -13,7 +13,7 @@
|
||||
|
||||
<h1>Documentation</h1>
|
||||
|
||||
<p><strong>HTML Purifier</strong> has documentation for all types of people.
|
||||
<p><strong><a href="http://hp.jpsband.org/">HTML Purifier</a></strong> has documentation for all types of people.
|
||||
Here is an index of all of them.</p>
|
||||
|
||||
<h2>End-user</h2>
|
||||
@@ -23,7 +23,16 @@ information for casual developers using HTML Purifier.</p>
|
||||
<dl>
|
||||
|
||||
<dt><a href="enduser-id.html">IDs</a></dt>
|
||||
<dd>Explains various methods for allowing IDs in documents safely in HTML Purifier.</dd>
|
||||
<dd>Explains various methods for allowing IDs in documents safely.</dd>
|
||||
|
||||
<dt><a href="enduser-youtube.html">Embedding YouTube videos</a></dt>
|
||||
<dd>Explains how to safely allow the embedding of flash from trusted sites.</dd>
|
||||
|
||||
<dt><a href="enduser-slow.html">Speeding up HTML Purifier</a></dt>
|
||||
<dd>Explains how to speed up HTML Purifier through caching or inbound filtering.</dd>
|
||||
|
||||
<dt><a href="enduser-utf8.html">UTF-8: The Secret of Character Encoding</a></dt>
|
||||
<dd>Describes the rationale for using UTF-8, the ramifications otherwise, and how to make the switch.</dd>
|
||||
|
||||
</dl>
|
||||
|
||||
@@ -45,6 +54,10 @@ conventions.</p>
|
||||
<dt><a href="dev-optimization.html">Optimization</a></dt>
|
||||
<dd>Discusses possible methods of optimizing HTML Purifier.</dd>
|
||||
|
||||
<dt><a href="dev-advanced-api.html">Advanced API</a></dt>
|
||||
<dd>Functional specification for HTML Purifier's advanced API for defining
|
||||
custom filtering behavior.</dd>
|
||||
|
||||
</dl>
|
||||
|
||||
<h2>Proposals</h2>
|
||||
@@ -65,6 +78,85 @@ that may not directly discuss HTML Purifier.</p>
|
||||
<dd>Credits and links to DevNetwork forum topics.</dd>
|
||||
</dl>
|
||||
|
||||
<h2>Internal memos</h2>
|
||||
|
||||
<p>Plaintext documents that are more for use by active developers of
|
||||
the code. They may be upgraded to HTML files or stay as TXT scratchpads.</p>
|
||||
|
||||
<table class="table">
|
||||
|
||||
<thead><tr>
|
||||
<th width="10%">Type</th>
|
||||
<th width="20%">Name</th>
|
||||
<th>Description</th>
|
||||
</tr></thead>
|
||||
|
||||
<tbody>
|
||||
|
||||
<tr>
|
||||
<td>End-user</td>
|
||||
<td><a href="enduser-overview.txt">Overview</a></td>
|
||||
<td>High level overview of the general control flow (mostly obsolete).</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>End-user</td>
|
||||
<td><a href="enduser-security.txt">Security</a></td>
|
||||
<td>Common security issues that may still arise (half-baked).</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Proposal</td>
|
||||
<td><a href="proposal-filter-levels.txt">Filter levels</a></td>
|
||||
<td>Outlines details of projected configurable level of filtering.</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Proposal</td>
|
||||
<td><a href="proposal-language.txt">Language</a></td>
|
||||
<td>Specification of I18N for error messages derived from MediaWiki (half-baked).</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Proposal</td>
|
||||
<td><a href="proposal-new-directives.txt">New directives</a></td>
|
||||
<td>Assorted configuration options that could be implemented.</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Reference</td>
|
||||
<td><a href="ref-loose-vs-strict.txt">Loose vs.Strict</a></td>
|
||||
<td>Differences between HTML Strict and Transitional versions.</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Reference</td>
|
||||
<td><a href="ref-proprietary-tags.txt">Proprietary tags</a></td>
|
||||
<td>List of vendor-specific tags we may want to transform to W3C compliant markup.</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Reference</td>
|
||||
<td><a href="ref-strictness.txt">Strictness</a></td>
|
||||
<td>Short essay on how loose definition isn't really loose.</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Reference</td>
|
||||
<td><a href="ref-xhtml-1.1.txt">XHTML 1.1</a></td>
|
||||
<td>What we'd have to do to support XHTML 1.1.</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Reference</td>
|
||||
<td><a href="ref-whatwg.txt">WHATWG</a></td>
|
||||
<td>How WHATWG plays into what we need to do.</td>
|
||||
</tr>
|
||||
|
||||
</tbody>
|
||||
|
||||
</table>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
</body>
|
||||
</html>
|
@@ -15,6 +15,7 @@
|
||||
|
||||
<div id="filing">Filed under Proposals</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>Your website probably has a color-scheme.
|
||||
<span style="color:#090; background:#FFF;">Green on white</span>,
|
||||
|
@@ -7,22 +7,22 @@ value is used for. This means decentralized configuration declarations that
|
||||
are nevertheless error checking and a centralized configuration object.
|
||||
|
||||
Directives are divided into namespaces, indicating the major portion of
|
||||
functionality they cover (although there may be overlaps. Please consult
|
||||
functionality they cover (although there may be overlaps). Please consult
|
||||
the documentation in ConfigDef for more information on these namespaces.
|
||||
|
||||
Since configuration is dependant on context, internal classes require a
|
||||
configuration object to be passed as a parameter. (They also require a
|
||||
Context object).
|
||||
|
||||
In relation to HTMLDefinition and CSSDefinition, there is a special class
|
||||
In relation to HTMLDefinition and CSSDefinition, there could be a special class
|
||||
of directives that influence the *construction* of the Definition object.
|
||||
A standard call pattern would look like:
|
||||
A theoretical call pattern would look like:
|
||||
|
||||
1. Client calls Config->getHTMLDefinition()
|
||||
2. Config calls HTMLDefinition->createNew(this)
|
||||
3. HTMLDefinition constructs itself with base configuration
|
||||
4. HTMLDefinition calls Config->get('HTMLDefinition')
|
||||
5. Config returns array of directives that later construction
|
||||
4. HTMLDefinition calls Config->get('HTML')
|
||||
5. Config returns array of directives
|
||||
6. HTMLDefinition performs operations and changes specified by directives
|
||||
7. HTMLPurifier returns constructed definition
|
||||
8. Config caches definition so it doesn't have to be generated again
|
||||
@@ -33,3 +33,8 @@ custom copy, which OVERRIDES all directives. Only the base, vanilla copy
|
||||
is the Singleton, the object actually interfaced with is a operated-upon
|
||||
clone of that object. Also, if an update to the directives would update
|
||||
the definition, you'd have to force reconstruction.
|
||||
|
||||
In practice, the pulling directives from the config object are
|
||||
solely need-based, and the flex points are littered throughout the
|
||||
setup() function. Some sort of refactoring is likely in order. See
|
||||
ref-xhtml-1.1.txt for more info.
|
||||
|
@@ -8,14 +8,17 @@ could go into this definition: the set of HTML good for blog entries is
|
||||
definitely too large for HTML that would be allowed in blog comments. Going
|
||||
from Transitional to Strict requires changes to the definition.
|
||||
|
||||
However, allowing users to specify their own whitelists was an idea I
|
||||
rejected from the start. Simply put, the typical programmer is too lazy
|
||||
to actually go through the trouble of investigating which tags, attributes
|
||||
and properties to allow. HTMLDefinition makes a big part of what HTMLPurifier
|
||||
is.
|
||||
Allowing users to specify their own whitelists is one step (implemented, btw),
|
||||
but I have doubts on only doing this. Simply put, the typical programmer is too
|
||||
lazy to actually go through the trouble of investigating which tags, attributes
|
||||
and properties to allow. HTMLDefinition makes a big part of what HTMLPurifier
|
||||
is.
|
||||
|
||||
The idea, then, is to setup fundamentally different set of definitions, which
|
||||
can further be customized using simpler configuration options.
|
||||
can further be customized using simpler configuration options. Alternatively,
|
||||
they could be implemented as configuration profiles, which simply load
|
||||
a set of recommended directives to acheive a desired affect (no simpler
|
||||
config options though).
|
||||
|
||||
Here are some fuzzy levels you could set:
|
||||
|
||||
@@ -28,7 +31,7 @@ Here are some fuzzy levels you could set:
|
||||
to be useful)
|
||||
3. Pages - As permissive as possible without allowing XSS. No protection
|
||||
against bad design sense, unfortunantely. Suitable for wiki and page
|
||||
environments.
|
||||
environments. (probably what we have now)
|
||||
4. Lint - Accept everything in the spec, a Tidy wannabe. (This probably won't
|
||||
get implemented as it would require routines for things like <object>
|
||||
and friends to be implemented, which is a lot of work for not a lot of
|
||||
|
@@ -1,42 +1,6 @@
|
||||
We are going to model our I18N/L10N off of MediaWiki's system. Their's is
|
||||
obviously quite complicated, so we're going to simplify it a bit for our needs.
|
||||
|
||||
== Structure ==
|
||||
|
||||
First, you have a Language object. This object contains all the localisable
|
||||
message strings, as well as other important language-specific settings and
|
||||
custom behavior (uppercasing, lowercasing, printing dates, formatting
|
||||
numbers, etc.)
|
||||
|
||||
The object is constructed from two sources: subclassed versions of itself
|
||||
(classes) and Message files (messages).
|
||||
|
||||
== General use ==
|
||||
|
||||
You load a language object by calling the Language::factory() function.
|
||||
This function the class file for the object (taking in account fallback
|
||||
languages by using the fallback langauge's object but overloading the
|
||||
language key) and returns that object. Nothing else happens.
|
||||
|
||||
When a message/etc is requested, a lazy load initializor is called. Now the
|
||||
real work starts. We're first going to take the scenario that the language
|
||||
is not cached. The system loads the Messages file by:
|
||||
|
||||
require( $filename );
|
||||
$cache = compact( self::$mLocalisationKeys );
|
||||
|
||||
...where self::$mLocalisationKeys is the name of variables that could be used
|
||||
in the localization file. This lets you use things like:
|
||||
|
||||
$fallback = false;
|
||||
$rtl = false;
|
||||
|
||||
...and easily siphon them into arrays.
|
||||
|
||||
Then, we load the $fallback language (if not set, English) to fill in the gaps in
|
||||
the messages. There is specialized behavior for certain keys, as they can be
|
||||
mergeable maps, lists or alias lists (not sure what the last one is).
|
||||
|
||||
== Caching ==
|
||||
|
||||
MediaWiki has lots of caching mechanisms built in, which make the code somewhat
|
||||
|
@@ -4,8 +4,6 @@ Configuration Ideas
|
||||
Here are some theoretical configuration ideas that we could implement some
|
||||
time. Note the naming convention: %Namespace.Directive
|
||||
|
||||
%Attr.IDPrefix - prefix all ids with this
|
||||
|
||||
%Attr.RewriteFragments - if there's %Attr.IDPrefix we may want to transparently
|
||||
rewrite the URLs we parse too. However, we can only do it when it's a pure
|
||||
anchor link, so it's not foolproof
|
||||
@@ -21,20 +19,11 @@ time. Note the naming convention: %Namespace.Directive
|
||||
%Attr.MaxHeight - caps for width and height related checks.
|
||||
(the hack in Pixels for an image crashing attack could be replaced by this)
|
||||
|
||||
%URI.Munge - will munge all external URIs to a different URI, which redirects
|
||||
the user to the applicable page. A urlencoded version of the URI
|
||||
will replace any instances of %s in the string. One possible
|
||||
string is 'http://www.google.com/url?q=%s'. Useful for preventing
|
||||
pagerank from being sent to other sites, but can also be used to
|
||||
redirect to a splash page notifying user that they are leaving your
|
||||
website.
|
||||
|
||||
%URI.AddRelNofollow - will add rel="nofollow" to all links, preventing the
|
||||
spread of ill-gotten pagerank
|
||||
|
||||
%URI.RelativeToAbsolute - transforms all relative URIs to absolute form
|
||||
|
||||
%URI.HostBlacklist - strings that if found in the host of a URI are disallowed
|
||||
%URI.HostBlacklistRegex - regexes that if matching the host are disallowed
|
||||
%URI.HostWhitelist - domain names that are excluded from the host blacklist
|
||||
%URI.HostPolicy - determines whether or not its reject all and then whitelist
|
||||
@@ -53,7 +42,3 @@ time. Note the naming convention: %Namespace.Directive
|
||||
absolute DNS. While this is actually the preferred method according to
|
||||
the RFC, most people opt to use a relative domain name relative to . (root).
|
||||
|
||||
%URI.DisableExternalResources - disallow resource links (i.e. URIs that result
|
||||
in immediate requests, such as src in IMG) to external websites
|
||||
|
||||
%HTML.DisableImg - disables all images
|
||||
|
@@ -15,6 +15,7 @@
|
||||
|
||||
<div id="filing">Filed under Reference</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>Many thanks to the DevNetwork community for answering questions,
|
||||
theorizing about design, and offering encouragement during
|
||||
|
37
docs/ref-loose-vs-strict.txt
Normal file
37
docs/ref-loose-vs-strict.txt
Normal file
@@ -0,0 +1,37 @@
|
||||
|
||||
Loose versus Strict
|
||||
Changes from one doctype to another
|
||||
|
||||
There are changes. Wow, how insightful. Not everything changed is relevant
|
||||
to HTML Purifier, though, so let's take a look:
|
||||
|
||||
== Major incompatibilities ==
|
||||
|
||||
[done] BLOCKQUOTE changes from 'flow' to 'block'
|
||||
current behavior: inline inner contents should not be nuked, block-ify as necessary
|
||||
[partially-done] U, S, STRIKE cut
|
||||
current behavior: removed completely
|
||||
projected behavior: replace with appropriate inline span + CSS
|
||||
[done] ADDRESS from potpourri to Inline (removes p tags)
|
||||
current behavior: block tags silently dropped
|
||||
ideal behavior: replace tags with something like <br>. (not high priority)
|
||||
|
||||
== Things we can loosen up ==
|
||||
|
||||
Tags DIR, MENU, CENTER, ISINDEX, FONT, BASEFONT? allowed in loose
|
||||
current behavior: transform to strict-valid forms
|
||||
Attributes allowed in loose (see attribute transforms in 'dev-progress.html')
|
||||
current behavior: projected to transform into strict-valid forms
|
||||
|
||||
== Periphery issues ==
|
||||
|
||||
A tag's attribute 'target' (for selecting frames) cut
|
||||
current behavior: not allowed at all
|
||||
projected behavior: use loose doctype if needed, needs valid values
|
||||
[done] OL/LI tag's attribute 'start'/'value' (for renumbering lists) cut
|
||||
current behavior: no substitute, just delete when in strict, allow in loose
|
||||
Attribute 'name' deprecated in favor of 'id'
|
||||
current behavior: dropped silently
|
||||
projected behavior: create proper AttrTransform
|
||||
[done] PRE tag allows SUB/SUP? (strict dtd comment vs syntax, loose disallows)
|
||||
current behavior: disallow as usual
|
22
docs/ref-proprietary-tags.txt
Normal file
22
docs/ref-proprietary-tags.txt
Normal file
@@ -0,0 +1,22 @@
|
||||
|
||||
Proprietary Tags
|
||||
<nobr> and friends
|
||||
|
||||
Here are some proprietary tags that W3C does not define but occasionally show
|
||||
up in the wild. We have only included tags that would make sense in an
|
||||
HTML Purifier context.
|
||||
|
||||
<align>, block element that aligns (extremely rare)
|
||||
<blackface>, inline that double-bolds text (extremely rare)
|
||||
<comment>, hidden comment for IE and WebTV
|
||||
<multicol cols=number gutter=pixels width=pixels>, multiple columns
|
||||
<nobr>, no linebreaks
|
||||
<spacer align=* type="vertical|horizontal|block">, whitespace in doc,
|
||||
use width/height for block and size for vertical/horizontal (attributes)
|
||||
(extremely rare)
|
||||
<wbr>, potential word break point: allows linebreaks. Only works in <nobr>
|
||||
|
||||
<listing>, monospace pre-variant (extremely rare)
|
||||
<plaintext>, escapes all tags to the end of document
|
||||
<ruby> and friends, (more research needed, appears to be XHTML 1.1 markup)
|
||||
<xmp>, monospace, replace with pre
|
@@ -2,8 +2,8 @@
|
||||
Is HTML Purifier Strict or Transitional?
|
||||
A little bit of helpful guidance
|
||||
|
||||
Despite the fact that HTML Purifier professes only to support transitional
|
||||
HTML, it rejects a lot of attributes and elements that are actually, indeed,
|
||||
Despite the fact that HTML Purifier professes to support both transitional and
|
||||
strict HTML, it rejects a lot of attributes and elements that are actually, indeed,
|
||||
valid. You can investigate progress.html to find out precisely what we
|
||||
are doing to these *deprecated* attributes.
|
||||
|
||||
@@ -11,8 +11,8 @@ However, users have found that Strict HTML imposes some quite unreasonable
|
||||
restrictions on certain things. The start and value attributes in ol and
|
||||
li (respectively) perhaps are the most contested. There's is currently no
|
||||
widely supported browser method short of JavaScript that can replace these
|
||||
two deprecated elements. HTML Purifier does not currently support them, but
|
||||
it might behoove us to do so while our output is still transitional.
|
||||
two deprecated elements. It behooves us to allow these deprecated
|
||||
attributes when the output is transitional.
|
||||
|
||||
Fortunantely, that's the only real bugger case. The others have near-perfect
|
||||
CSS equivalents, and were presentational anyway. However, the other question
|
||||
@@ -22,4 +22,16 @@ whole point about CSS is to seperate styling from content, so inline styling
|
||||
doesn't solve that problem.
|
||||
|
||||
It's an icky question, and we'll have to deal with it as more and more
|
||||
transforms get implemented.
|
||||
transforms get implemented. As of right now, however, we currently support
|
||||
these loose-only constructs in loose mode:
|
||||
|
||||
- <ul start="1">, <li value="1"> attributes
|
||||
- <u>, <strike>, <s> tags
|
||||
- flow children in <blockquote>
|
||||
- mixed children in <address>
|
||||
|
||||
The changed child definitions as well as the ul.start li.value are the most
|
||||
compelling reasons why loose should be used. We may want offer disabling <u>,
|
||||
<strike> and <s> by themselves. We may also want to offer no pre-emptive
|
||||
deprecated conversions. This all must be unified.
|
||||
|
||||
|
9
docs/ref-whatwg.txt
Normal file
9
docs/ref-whatwg.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
|
||||
Web Hypertext Application Technology Working Group
|
||||
WHATWG
|
||||
|
||||
I don't think we need to worry about them. Untrusted users shouldn't be
|
||||
submitting applications, eh? But if some interesting attribute pops up in
|
||||
their spec, and might be worth supporting, stick it here.
|
||||
|
||||
(none so far, as you can see)
|
187
docs/ref-xhtml-1.1.txt
Normal file
187
docs/ref-xhtml-1.1.txt
Normal file
@@ -0,0 +1,187 @@
|
||||
|
||||
XHTML 1.1 and HTML Purifier
|
||||
|
||||
Todo for XHTML 1.1 support <http://www.w3.org/TR/xhtml11/changes.html>
|
||||
1. Scratch lang entirely in favor of xml:lang
|
||||
2. Scratch name entirely in favor of id (partially-done)
|
||||
3. Support Ruby <http://www.w3.org/TR/2001/REC-ruby-20010531/>
|
||||
|
||||
HTML Purifier uses the modularization of XHTML
|
||||
<http://www.w3.org/TR/xhtml-modularization/> to organize the internals
|
||||
of HTMLDefinition into a more manageable and extensible fashion. Rather
|
||||
than have one super-object, HTMLDefinition is split into HTMLModules,
|
||||
each of which are responsible for defining elements, their attributes,
|
||||
and other properties (for a more indepth coverage, see
|
||||
/library/HTMLPurifier/HTMLModule.php's docblock comments).
|
||||
|
||||
The modules that W3C defines and we support are:
|
||||
|
||||
* 5.1. Attribute Collections (technically not a module
|
||||
* 5.2. Core Modules
|
||||
o 5.2.2. Text Module
|
||||
o 5.2.3. Hypertext Module
|
||||
o 5.2.4. List Module
|
||||
* 5.4. Text Extension Modules
|
||||
o 5.4.1. Presentation Module
|
||||
o 5.4.2. Edit Module
|
||||
o 5.4.3. Bi-directional Text Module
|
||||
* 5.6. Table Modules
|
||||
o 5.6.2. Tables Module
|
||||
* 5.7. Image Module
|
||||
* 5.18. Style Attribute Module
|
||||
|
||||
Modules that we don't support but coul support are:
|
||||
|
||||
* 5.6. Table Modules
|
||||
o 5.6.1. Basic Tables Module [?]
|
||||
* 5.8. Client-side Image Map Module [?]
|
||||
* 5.9. Server-side Image Map Module [?]
|
||||
* 5.12. Target Module [?]
|
||||
* 5.21. Name Identification Module [deprecated]
|
||||
* 5.22. Legacy Module [deprecated]
|
||||
|
||||
These modules will not be implemented due to their dangerousness or
|
||||
inapplicability as an XHTML fragment:
|
||||
|
||||
* 5.2. Core Modules
|
||||
o 5.2.1. Structure Module
|
||||
* 5.3. Applet Module
|
||||
* 5.5. Forms Modules
|
||||
o 5.5.1. Basic Forms Module
|
||||
o 5.5.2. Forms Module
|
||||
* 5.10. Object Module
|
||||
* 5.11. Frames Module
|
||||
* 5.13. Iframe Module
|
||||
* 5.14. Intrinsic Events Module
|
||||
* 5.15. Metainformation Module
|
||||
* 5.16. Scripting Module
|
||||
* 5.17. Style Sheet Module
|
||||
* 5.19. Link Module
|
||||
* 5.20. Base Module
|
||||
|
||||
We will not be using W3C's XML Schemas or DTDs directly due to the lack
|
||||
of robust tools for handling them (the main problem is that all the
|
||||
current parsers are usually PHP 5 only and solely-validating, not
|
||||
correcting).
|
||||
|
||||
The abstraction of the HTMLDefinition creation process will also
|
||||
contribute to a need for a caching system. Cache invalidation would be
|
||||
difficult, but could be done by comparing the HTML and Attr config
|
||||
namespaces with a copy that was packaged along with the serialized
|
||||
HTMLDefinition object.
|
||||
|
||||
== General Use-Case ==
|
||||
|
||||
The outwards API of HTMLDefinition has been largely preserved, not
|
||||
only for backwards-compatibility but also by design. Instead,
|
||||
HTMLDefinition can be retrieved "raw", in which it loads a structure
|
||||
that closely resembles the modules of XHTML 1.1. This structure is very
|
||||
dynamic, making it easy to make cascading changes to global content
|
||||
sets or remove elements in bulk.
|
||||
|
||||
However, once HTML Purifier needs the actual definition, it retrieves
|
||||
a finalized version of HTMLDefinition. The finalized definition involves
|
||||
processing the modules into a form that it is optimized for multiple
|
||||
calls. This final version is immutable and, even if editable, would
|
||||
be extremely hard to change.
|
||||
|
||||
So, some code taking advantage of the XHTML modularization may look
|
||||
like this:
|
||||
|
||||
<?php
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$def =& $config->getHTMLDefinition(true); // reference to raw
|
||||
unset($def->modules['Hypertext']); // rm ''a'' link
|
||||
$purifier = new HTMLPurifier($config);
|
||||
$purifier->purify($html); // now the definition is finalized
|
||||
?>
|
||||
|
||||
== Inclusions ==
|
||||
|
||||
One of the nice features of HTMLDefinition is that piggy-backing off
|
||||
of global attribute and content sets is extremely easy to do.
|
||||
|
||||
=== Attributes ===
|
||||
|
||||
HTMLModule->elements[$element]->attr stores attribute information for the
|
||||
specific attributes of $element. This is quite close to the final
|
||||
API that HTML Purifier interfaces with, but there's an important
|
||||
extra feature: attr may also contain a array with a member index zero.
|
||||
|
||||
<?php
|
||||
HTMLModule->elements[$element]->attr[0] = array('AttrSet');
|
||||
?>
|
||||
|
||||
Rather than map the attribute key 0 to an array (which should be
|
||||
an AttrDef), it defines a number of attribute collections that should
|
||||
be merged into this elements attribute array.
|
||||
|
||||
Furthermore, the value of an attribute key, attribute value pair need
|
||||
not be a fully fledged AttrDef object. They can also be a string, which
|
||||
signifies a AttrDef that is looked up from a centralized registry
|
||||
AttrTypes. This allows more concise attribute definitions that look
|
||||
more like W3C's declarations, as well as offering a centralized point
|
||||
for modifying the behavior of one attribute type. And, of course, the
|
||||
old method of manually instantiating an AttrDef still works.
|
||||
|
||||
=== Attribute Collections ===
|
||||
|
||||
Attribute collections are stored and processed in the AttrCollections
|
||||
object, which is responsible for performing the inclusions signified
|
||||
by the 0 index. These attribute collections, too, are mutable, by
|
||||
using HTMLModule->attr_collections. You may add new attributes
|
||||
to a collection or define an entirely new collection for your module's
|
||||
use. Inclusions can also be cumulative.
|
||||
|
||||
Attribute collections allow us to get rid of so called "global attributes"
|
||||
(which actually aren't so global).
|
||||
|
||||
=== Content Models and ChildDef ===
|
||||
|
||||
An implementation of the above-mentioned attributes and attribute
|
||||
collections was applied to the ChildDef system. HTML Purifier uses
|
||||
a proprietary system called ChildDef for performance and flexibility
|
||||
reasons, but this does not line up very well with W3C's notion of
|
||||
regexps for defining the allowed children of an element.
|
||||
|
||||
HTMLPurifier->elements[$element]->content_model and
|
||||
HTMLPurifier->elements[$element]->content_model_type store information
|
||||
about the final ChildDef that will be stored in
|
||||
HTMLPurifier->elements[$element]->child (we use a different variable
|
||||
because the two forms are sufficiently different).
|
||||
|
||||
$content_model is an abstract, string representation of the internal
|
||||
state of ChildDef, while $content_model_type is a string identifier
|
||||
of which ChildDef subclass to instantiate. $content_model is processed
|
||||
by substituting all content set identifiers (capitalized element names)
|
||||
with their contents. It is then parsed and passed into the appropriate
|
||||
ChildDef class, as defined by the ContentSets->getChildDef() or the
|
||||
custom fallback HTMLModule->getChildDef() for custom child definitions
|
||||
not in the core.
|
||||
|
||||
You'll need to use these facilities if you plan on referencing a content
|
||||
set like "Inline" or "Block", and using them is recommended even if you're
|
||||
not due to their conciseness.
|
||||
|
||||
A few notes on $content_model: it's structure can be as complicated
|
||||
as you want, but the pipe symbol (|) is reserved for defining possible
|
||||
choices, due to the content sets implementation. For example, a content
|
||||
model that looks like:
|
||||
|
||||
"Inline -> Block -> a"
|
||||
|
||||
...when the Inline content set is defined as "span | b" and the Block
|
||||
content set is defined as "div | blockquote", will expand into:
|
||||
|
||||
"span | b -> div | blockquote -> a"
|
||||
|
||||
The custom HTMLModule->getChildDef() function will need to be able to
|
||||
then feed this information to ChildDef in a usable manner.
|
||||
|
||||
=== Content Sets ===
|
||||
|
||||
Content sets can be altered using HTMLModule->content_sets, an associative
|
||||
array of content set names to content set contents. If the content set
|
||||
already exists, your values are appended on to it (great for, say,
|
||||
registering the font tag as an inline element), otherwise it is
|
||||
created. They are substituted into content_model.
|
@@ -23,6 +23,8 @@ h4 {font-family:sans-serif; font-size:0.9em; font-weight:bold; }
|
||||
|
||||
/* Marks off asides, discussions on why something is the way it is */
|
||||
.aside {margin-left:2em; font-family:sans-serif; font-size:0.9em; }
|
||||
blockquote .label {font-weight:bold; font-size:1em; margin:0 0 .1em;
|
||||
border-bottom:1px solid #CCC;}
|
||||
|
||||
/* A regular table */
|
||||
.table {border-collapse:collapse; border-bottom:2px solid #888; margin-left:2em; }
|
||||
@@ -36,5 +38,31 @@ h4 {font-family:sans-serif; font-size:0.9em; font-weight:bold; }
|
||||
/* Contains, without exception, Return to index. */
|
||||
#index {font-size:smaller; }
|
||||
|
||||
#home {font-size:smaller;}
|
||||
|
||||
/* Contains, without exception, $Id$, for SVN version info. */
|
||||
#version {text-align:right; font-style:italic; margin:2em 0;}
|
||||
#version {text-align:right; font-style:italic; margin:2em 0;}
|
||||
|
||||
#toc ol ol {list-style-type:lower-roman;}
|
||||
#toc ol {list-style-type:decimal;}
|
||||
#toc {list-style-type:upper-alpha;}
|
||||
|
||||
q {
|
||||
behavior: url(fixquotes.htc); /* IE fix */
|
||||
quotes: '\201C' '\201D' '\2018' '\2019';
|
||||
}
|
||||
q:before {
|
||||
content: open-quote;
|
||||
}
|
||||
q:after {
|
||||
content: close-quote;
|
||||
}
|
||||
|
||||
/* Marks off implementation details interesting only to the person writing
|
||||
the class described in the spec. */
|
||||
.technical {margin-left:2em; }
|
||||
.technical:before {content:"Technical note: "; font-weight:bold; color:#061; }
|
||||
|
||||
/* Marks off sections that are lacking. */
|
||||
.fixme {margin-left:2em; }
|
||||
.fixme:before {content:"Fix me: "; font-weight:bold; color:#C00; }
|
||||
|
21
library/HTMLPurifier.func.php
Normal file
21
library/HTMLPurifier.func.php
Normal file
@@ -0,0 +1,21 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Function wrapper for HTML Purifier for quick use.
|
||||
* @note This function only includes the library when it is called. While
|
||||
* this is efficient for instances when you only use HTML Purifier
|
||||
* on a few of your pages, it murders bytecode caching. You still
|
||||
* need to add HTML Purifier to your path.
|
||||
* @note ''HTMLPurifier()'' is NOT the same as ''new HTMLPurifier()''
|
||||
*/
|
||||
|
||||
function HTMLPurifier($html, $config = null) {
|
||||
static $purifier = false;
|
||||
if (!$purifier) {
|
||||
require_once 'HTMLPurifier.php';
|
||||
$purifier = new HTMLPurifier();
|
||||
}
|
||||
return $purifier->purify($html, $config);
|
||||
}
|
||||
|
||||
?>
|
@@ -22,7 +22,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
HTML Purifier 1.2.0 - Standards Compliant HTML Filtering
|
||||
HTML Purifier 1.4.1 - Standards Compliant HTML Filtering
|
||||
Copyright (C) 2006 Edward Z. Yang
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
@@ -64,52 +64,107 @@ require_once 'HTMLPurifier/Encoder.php';
|
||||
class HTMLPurifier
|
||||
{
|
||||
|
||||
var $version = '1.4.1';
|
||||
|
||||
var $config;
|
||||
var $filters;
|
||||
|
||||
var $lexer, $strategy, $generator;
|
||||
|
||||
/**
|
||||
* Final HTMLPurifier_Context of last run purification. Might be an array.
|
||||
* @public
|
||||
*/
|
||||
var $context;
|
||||
|
||||
/**
|
||||
* Initializes the purifier.
|
||||
* @param $config Optional HTMLPurifier_Config object for all instances of
|
||||
* the purifier, if omitted, a default configuration is
|
||||
* supplied (which can be overridden on a per-use basis).
|
||||
* The parameter can also be any type that
|
||||
* HTMLPurifier_Config::create() supports.
|
||||
*/
|
||||
function HTMLPurifier($config = null) {
|
||||
|
||||
$this->config = $config ? $config : HTMLPurifier_Config::createDefault();
|
||||
$this->config = HTMLPurifier_Config::create($config);
|
||||
|
||||
$this->lexer = HTMLPurifier_Lexer::create();
|
||||
$this->strategy = new HTMLPurifier_Strategy_Core();
|
||||
$this->generator = new HTMLPurifier_Generator();
|
||||
$this->encoder = new HTMLPurifier_Encoder();
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a filter to process the output. First come first serve
|
||||
* @param $filter HTMLPurifier_Filter object
|
||||
*/
|
||||
function addFilter($filter) {
|
||||
$this->filters[] = $filter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Filters an HTML snippet/document to be XSS-free and standards-compliant.
|
||||
*
|
||||
* @param $html String of HTML to purify
|
||||
* @param $config HTMLPurifier_Config object for this operation, if omitted,
|
||||
* defaults to the config object specified during this
|
||||
* object's construction.
|
||||
* object's construction. The parameter can also be any type
|
||||
* that HTMLPurifier_Config::create() supports.
|
||||
* @return Purified HTML
|
||||
*/
|
||||
function purify($html, $config = null) {
|
||||
$config = $config ? $config : $this->config;
|
||||
$context =& new HTMLPurifier_Context();
|
||||
$html = $this->encoder->convertToUTF8($html, $config, $context);
|
||||
|
||||
$config = $config ? HTMLPurifier_Config::create($config) : $this->config;
|
||||
|
||||
$context = new HTMLPurifier_Context();
|
||||
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
|
||||
|
||||
for ($i = 0, $size = count($this->filters); $i < $size; $i++) {
|
||||
$html = $this->filters[$i]->preFilter($html, $config, $context);
|
||||
}
|
||||
|
||||
// purified HTML
|
||||
$html =
|
||||
$this->generator->generateFromTokens(
|
||||
// list of tokens
|
||||
$this->strategy->execute(
|
||||
$this->lexer->tokenizeHTML($html, $config, $context),
|
||||
// list of un-purified tokens
|
||||
$this->lexer->tokenizeHTML(
|
||||
// un-purified HTML
|
||||
$html, $config, $context
|
||||
),
|
||||
$config, $context
|
||||
),
|
||||
$config, $context
|
||||
);
|
||||
$html = $this->encoder->convertFromUTF8($html, $config, $context);
|
||||
|
||||
for ($i = $size - 1; $i >= 0; $i--) {
|
||||
$html = $this->filters[$i]->postFilter($html, $config, $context);
|
||||
}
|
||||
|
||||
$html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
|
||||
$this->context =& $context;
|
||||
return $html;
|
||||
}
|
||||
|
||||
/**
|
||||
* Filters an array of HTML snippets
|
||||
* @param $config Optional HTMLPurifier_Config object for this operation.
|
||||
* See HTMLPurifier::purify() for more details.
|
||||
* @return Array of purified HTML
|
||||
*/
|
||||
function purifyArray($array_of_html, $config = null) {
|
||||
$context_array = array();
|
||||
foreach ($array_of_html as $key => $html) {
|
||||
$array_of_html[$key] = $this->purify($html, $config);
|
||||
$context_array[$key] = $this->context;
|
||||
}
|
||||
$this->context = $context_array;
|
||||
return $array_of_html;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
?>
|
100
library/HTMLPurifier/AttrCollections.php
Normal file
100
library/HTMLPurifier/AttrCollections.php
Normal file
@@ -0,0 +1,100 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTypes.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Lang.php';
|
||||
|
||||
/**
|
||||
* Defines common attribute collections that modules reference
|
||||
*/
|
||||
|
||||
class HTMLPurifier_AttrCollections
|
||||
{
|
||||
|
||||
/**
|
||||
* Associative array of attribute collections, indexed by name
|
||||
* @note Technically, the composition of these is more complicated,
|
||||
* but we bypass it using our own excludes property
|
||||
*/
|
||||
var $info = array();
|
||||
|
||||
/**
|
||||
* Performs all expansions on internal data for use by other inclusions
|
||||
* It also collects all attribute collection extensions from
|
||||
* modules
|
||||
* @param $attr_types HTMLPurifier_AttrTypes instance
|
||||
* @param $modules Hash array of HTMLPurifier_HTMLModule members
|
||||
*/
|
||||
function HTMLPurifier_AttrCollections($attr_types, $modules) {
|
||||
$info =& $this->info;
|
||||
// load extensions from the modules
|
||||
foreach ($modules as $module) {
|
||||
foreach ($module->attr_collections as $coll_i => $coll) {
|
||||
foreach ($coll as $attr_i => $attr) {
|
||||
if ($attr_i === 0 && isset($info[$coll_i][$attr_i])) {
|
||||
// merge in includes
|
||||
$info[$coll_i][$attr_i] = array_merge(
|
||||
$info[$coll_i][$attr_i], $attr);
|
||||
continue;
|
||||
}
|
||||
$info[$coll_i][$attr_i] = $attr;
|
||||
}
|
||||
}
|
||||
}
|
||||
// perform internal expansions and inclusions
|
||||
foreach ($info as $name => $attr) {
|
||||
// merge attribute collections that include others
|
||||
$this->performInclusions($info[$name]);
|
||||
// replace string identifiers with actual attribute objects
|
||||
$this->expandIdentifiers($info[$name], $attr_types);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a reference to an attribute associative array and performs
|
||||
* all inclusions specified by the zero index.
|
||||
* @param &$attr Reference to attribute array
|
||||
*/
|
||||
function performInclusions(&$attr) {
|
||||
if (!isset($attr[0])) return;
|
||||
$merge = $attr[0];
|
||||
// loop through all the inclusions
|
||||
for ($i = 0; isset($merge[$i]); $i++) {
|
||||
// foreach attribute of the inclusion, copy it over
|
||||
foreach ($this->info[$merge[$i]] as $key => $value) {
|
||||
if (isset($attr[$key])) continue; // also catches more inclusions
|
||||
$attr[$key] = $value;
|
||||
}
|
||||
if (isset($info[$merge[$i]][0])) {
|
||||
// recursion
|
||||
$merge = array_merge($merge, isset($info[$merge[$i]][0]));
|
||||
}
|
||||
}
|
||||
unset($attr[0]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expands all string identifiers in an attribute array by replacing
|
||||
* them with the appropriate values inside HTMLPurifier_AttrTypes
|
||||
* @param &$attr Reference to attribute array
|
||||
* @param $attr_types HTMLPurifier_AttrTypes instance
|
||||
*/
|
||||
function expandIdentifiers(&$attr, $attr_types) {
|
||||
foreach ($attr as $def_i => $def) {
|
||||
if ($def_i === 0) continue;
|
||||
if (!is_string($def)) continue;
|
||||
if ($def === false) {
|
||||
unset($attr[$def_i]);
|
||||
continue;
|
||||
}
|
||||
if (isset($attr_types->info[$def])) {
|
||||
$attr[$def_i] = $attr_types->info[$def];
|
||||
} else {
|
||||
trigger_error('Attempted to reference undefined attribute type', E_USER_ERROR);
|
||||
unset($attr[$def_i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -8,6 +8,11 @@ require_once 'HTMLPurifier/CSSDefinition.php';
|
||||
* @note We don't implement the whole CSS specification, so it might be
|
||||
* difficult to reuse this component in the context of validating
|
||||
* actual stylesheet declarations.
|
||||
* @note If we were really serious about validating the CSS, we would
|
||||
* tokenize the styles and then parse the tokens. Obviously, we
|
||||
* are not doing that. Doing that could seriously harm performance,
|
||||
* but would make these components a lot more viable for a CSS
|
||||
* filtering solution.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
|
||||
{
|
||||
@@ -20,6 +25,9 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
|
||||
|
||||
// we're going to break the spec and explode by semicolons.
|
||||
// This is because semicolon rarely appears in escaped form
|
||||
// Doing this is generally flaky but fast
|
||||
// IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI
|
||||
// for details
|
||||
|
||||
$declarations = explode(';', $css);
|
||||
$propvalues = array();
|
||||
|
87
library/HTMLPurifier/AttrDef/CSS/Background.php
Normal file
87
library/HTMLPurifier/AttrDef/CSS/Background.php
Normal file
@@ -0,0 +1,87 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/CSSDefinition.php';
|
||||
|
||||
/**
|
||||
* Validates shorthand CSS property background.
|
||||
* @warning Does not support url tokens that have internal spaces.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Local copy of component validators.
|
||||
* @note See HTMLPurifier_AttrDef_Font::$info for a similar impl.
|
||||
*/
|
||||
var $info;
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_Background($config) {
|
||||
$def = $config->getCSSDefinition();
|
||||
$this->info['background-color'] = $def->info['background-color'];
|
||||
$this->info['background-image'] = $def->info['background-image'];
|
||||
$this->info['background-repeat'] = $def->info['background-repeat'];
|
||||
$this->info['background-attachment'] = $def->info['background-attachment'];
|
||||
$this->info['background-position'] = $def->info['background-position'];
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
// regular pre-processing
|
||||
$string = $this->parseCDATA($string);
|
||||
if ($string === '') return false;
|
||||
|
||||
// assumes URI doesn't have spaces in it
|
||||
$bits = explode(' ', strtolower($string)); // bits to process
|
||||
|
||||
$caught = array();
|
||||
$caught['color'] = false;
|
||||
$caught['image'] = false;
|
||||
$caught['repeat'] = false;
|
||||
$caught['attachment'] = false;
|
||||
$caught['position'] = false;
|
||||
|
||||
$i = 0; // number of catches
|
||||
$none = false;
|
||||
|
||||
foreach ($bits as $bit) {
|
||||
if ($bit === '') continue;
|
||||
foreach ($caught as $key => $status) {
|
||||
if ($key != 'position') {
|
||||
if ($status !== false) continue;
|
||||
$r = $this->info['background-' . $key]->validate($bit, $config, $context);
|
||||
} else {
|
||||
$r = $bit;
|
||||
}
|
||||
if ($r === false) continue;
|
||||
if ($key == 'position') {
|
||||
if ($caught[$key] === false) $caught[$key] = '';
|
||||
$caught[$key] .= $r . ' ';
|
||||
} else {
|
||||
$caught[$key] = $r;
|
||||
}
|
||||
$i++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!$i) return false;
|
||||
if ($caught['position'] !== false) {
|
||||
$caught['position'] = $this->info['background-position']->
|
||||
validate($caught['position'], $config, $context);
|
||||
}
|
||||
|
||||
$ret = array();
|
||||
foreach ($caught as $value) {
|
||||
if ($value === false) continue;
|
||||
$ret[] = $value;
|
||||
}
|
||||
|
||||
if (empty($ret)) return false;
|
||||
return implode(' ', $ret);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
130
library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php
Normal file
130
library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php
Normal file
@@ -0,0 +1,130 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Length.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Percentage.php';
|
||||
|
||||
/* W3C says:
|
||||
[ // adjective and number must be in correct order, even if
|
||||
// you could switch them without introducing ambiguity.
|
||||
// some browsers support that syntax
|
||||
[
|
||||
<percentage> | <length> | left | center | right
|
||||
]
|
||||
[
|
||||
<percentage> | <length> | top | center | bottom
|
||||
]?
|
||||
] |
|
||||
[ // this signifies that the vertical and horizontal adjectives
|
||||
// can be arbitrarily ordered, however, there can only be two,
|
||||
// one of each, or none at all
|
||||
[
|
||||
left | center | right
|
||||
] ||
|
||||
[
|
||||
top | center | bottom
|
||||
]
|
||||
]
|
||||
top, left = 0%
|
||||
center, (none) = 50%
|
||||
bottom, right = 100%
|
||||
*/
|
||||
|
||||
/* QuirksMode says:
|
||||
keyword + length/percentage must be ordered correctly, as per W3C
|
||||
|
||||
Internet Explorer and Opera, however, support arbitrary ordering. We
|
||||
should fix it up.
|
||||
|
||||
Minor issue though, not strictly necessary.
|
||||
*/
|
||||
|
||||
// control freaks may appreciate the ability to convert these to
|
||||
// percentages or something, but it's not necessary
|
||||
|
||||
/**
|
||||
* Validates the value of background-position.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
var $length;
|
||||
var $percentage;
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_BackgroundPosition() {
|
||||
$this->length = new HTMLPurifier_AttrDef_CSS_Length();
|
||||
$this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage();
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
$string = $this->parseCDATA($string);
|
||||
$bits = explode(' ', $string);
|
||||
|
||||
$keywords = array();
|
||||
$keywords['h'] = false; // left, right
|
||||
$keywords['v'] = false; // top, bottom
|
||||
$keywords['c'] = false; // center
|
||||
$measures = array();
|
||||
|
||||
$i = 0;
|
||||
|
||||
$lookup = array(
|
||||
'top' => 'v',
|
||||
'bottom' => 'v',
|
||||
'left' => 'h',
|
||||
'right' => 'h',
|
||||
'center' => 'c'
|
||||
);
|
||||
|
||||
foreach ($bits as $bit) {
|
||||
if ($bit === '') continue;
|
||||
|
||||
// test for keyword
|
||||
$lbit = ctype_lower($bit) ? $bit : strtolower($bit);
|
||||
if (isset($lookup[$lbit])) {
|
||||
$status = $lookup[$lbit];
|
||||
$keywords[$status] = $lbit;
|
||||
$i++;
|
||||
}
|
||||
|
||||
// test for length
|
||||
$r = $this->length->validate($bit, $config, $context);
|
||||
if ($r !== false) {
|
||||
$measures[] = $r;
|
||||
$i++;
|
||||
}
|
||||
|
||||
// test for percentage
|
||||
$r = $this->percentage->validate($bit, $config, $context);
|
||||
if ($r !== false) {
|
||||
$measures[] = $r;
|
||||
$i++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (!$i) return false; // no valid values were caught
|
||||
|
||||
|
||||
$ret = array();
|
||||
|
||||
// first keyword
|
||||
if ($keywords['h']) $ret[] = $keywords['h'];
|
||||
elseif (count($measures)) $ret[] = array_shift($measures);
|
||||
elseif ($keywords['c']) {
|
||||
$ret[] = $keywords['c'];
|
||||
$keywords['c'] = false; // prevent re-use: center = center center
|
||||
}
|
||||
|
||||
if ($keywords['v']) $ret[] = $keywords['v'];
|
||||
elseif (count($measures)) $ret[] = array_shift($measures);
|
||||
elseif ($keywords['c']) $ret[] = $keywords['c'];
|
||||
|
||||
if (empty($ret)) return false;
|
||||
return implode(' ', $ret);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -5,7 +5,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
/**
|
||||
* Validates the border property as defined by CSS.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Border extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -13,7 +13,7 @@ class HTMLPurifier_AttrDef_Border extends HTMLPurifier_AttrDef
|
||||
*/
|
||||
var $info = array();
|
||||
|
||||
function HTMLPurifier_AttrDef_Border($config) {
|
||||
function HTMLPurifier_AttrDef_CSS_Border($config) {
|
||||
$def = $config->getCSSDefinition();
|
||||
$this->info['border-width'] = $def->info['border-width'];
|
||||
$this->info['border-style'] = $def->info['border-style'];
|
@@ -5,7 +5,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
/**
|
||||
* Validates Color as defined by CSS.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Color extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
@@ -9,7 +9,7 @@
|
||||
* especially useful for CSS values, which often are a choice between
|
||||
* an enumerated set of predefined values or a flexible data type.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Composite extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -21,7 +21,7 @@ class HTMLPurifier_AttrDef_Composite extends HTMLPurifier_AttrDef
|
||||
/**
|
||||
* @param $defs List of HTMLPurifier_AttrDef objects
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_Composite($defs) {
|
||||
function HTMLPurifier_AttrDef_CSS_Composite($defs) {
|
||||
$this->defs = $defs;
|
||||
}
|
||||
|
@@ -5,7 +5,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
/**
|
||||
* Validates shorthand CSS property font.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Font extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -30,7 +30,7 @@ class HTMLPurifier_AttrDef_Font extends HTMLPurifier_AttrDef
|
||||
'status-bar' => true
|
||||
);
|
||||
|
||||
function HTMLPurifier_AttrDef_Font($config) {
|
||||
function HTMLPurifier_AttrDef_CSS_Font($config) {
|
||||
$def = $config->getCSSDefinition();
|
||||
$this->info['font-style'] = $def->info['font-style'];
|
||||
$this->info['font-variant'] = $def->info['font-variant'];
|
@@ -7,7 +7,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
/**
|
||||
* Validates a font family list according to CSS spec
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_FontFamily extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
@@ -1,13 +1,12 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Number.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Number.php';
|
||||
|
||||
/**
|
||||
* Represents a Length as defined by CSS.
|
||||
* @warning Be sure not to confuse this with HTMLPurifier_AttrDef_Length!
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSSLength extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -26,8 +25,8 @@ class HTMLPurifier_AttrDef_CSSLength extends HTMLPurifier_AttrDef
|
||||
* @param $non_negative Bool indication whether or not negative values are
|
||||
* allowed.
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_CSSLength($non_negative = false) {
|
||||
$this->number_def = new HTMLPurifier_AttrDef_Number($non_negative);
|
||||
function HTMLPurifier_AttrDef_CSS_Length($non_negative = false) {
|
||||
$this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
|
||||
}
|
||||
|
||||
function validate($length, $config, &$context) {
|
||||
@@ -40,6 +39,7 @@ class HTMLPurifier_AttrDef_CSSLength extends HTMLPurifier_AttrDef
|
||||
|
||||
// we assume all units are two characters
|
||||
$unit = substr($length, $strlen - 2);
|
||||
if (!ctype_lower($unit)) $unit = strtolower($unit);
|
||||
$number = substr($length, 0, $strlen - 2);
|
||||
|
||||
if (!isset($this->units[$unit])) return false;
|
80
library/HTMLPurifier/AttrDef/CSS/ListStyle.php
Normal file
80
library/HTMLPurifier/AttrDef/CSS/ListStyle.php
Normal file
@@ -0,0 +1,80 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
/**
|
||||
* Validates shorthand CSS property list-style.
|
||||
* @warning Does not support url tokens that have internal spaces.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Local copy of component validators.
|
||||
* @note See HTMLPurifier_AttrDef_CSS_Font::$info for a similar impl.
|
||||
*/
|
||||
var $info;
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_ListStyle($config) {
|
||||
$def = $config->getCSSDefinition();
|
||||
$this->info['list-style-type'] = $def->info['list-style-type'];
|
||||
$this->info['list-style-position'] = $def->info['list-style-position'];
|
||||
$this->info['list-style-image'] = $def->info['list-style-image'];
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
// regular pre-processing
|
||||
$string = $this->parseCDATA($string);
|
||||
if ($string === '') return false;
|
||||
|
||||
// assumes URI doesn't have spaces in it
|
||||
$bits = explode(' ', strtolower($string)); // bits to process
|
||||
|
||||
$caught = array();
|
||||
$caught['type'] = false;
|
||||
$caught['position'] = false;
|
||||
$caught['image'] = false;
|
||||
|
||||
$i = 0; // number of catches
|
||||
$none = false;
|
||||
|
||||
foreach ($bits as $bit) {
|
||||
if ($i >= 3) return; // optimization bit
|
||||
if ($bit === '') continue;
|
||||
foreach ($caught as $key => $status) {
|
||||
if ($status !== false) continue;
|
||||
$r = $this->info['list-style-' . $key]->validate($bit, $config, $context);
|
||||
if ($r === false) continue;
|
||||
if ($r === 'none') {
|
||||
if ($none) continue;
|
||||
else $none = true;
|
||||
if ($key == 'image') continue;
|
||||
}
|
||||
$caught[$key] = $r;
|
||||
$i++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!$i) return false;
|
||||
|
||||
$ret = array();
|
||||
|
||||
// construct type
|
||||
if ($caught['type']) $ret[] = $caught['type'];
|
||||
|
||||
// construct image
|
||||
if ($caught['image']) $ret[] = $caught['image'];
|
||||
|
||||
// construct position
|
||||
if ($caught['position']) $ret[] = $caught['position'];
|
||||
|
||||
if (empty($ret)) return false;
|
||||
return implode(' ', $ret);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -13,7 +13,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
* can only be used alone: it will never manifest as part of a multi
|
||||
* shorthand declaration. Thus, this class does not allow inherit.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Multiple extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -30,7 +30,7 @@ class HTMLPurifier_AttrDef_Multiple extends HTMLPurifier_AttrDef
|
||||
* @param $single HTMLPurifier_AttrDef to multiply
|
||||
* @param $max Max number of values allowed (usually four)
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_Multiple($single, $max = 4) {
|
||||
function HTMLPurifier_AttrDef_CSS_Multiple($single, $max = 4) {
|
||||
$this->single = $single;
|
||||
$this->max = $max;
|
||||
}
|
@@ -3,7 +3,7 @@
|
||||
/**
|
||||
* Validates a number as defined by the CSS spec.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Number extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -14,7 +14,7 @@ class HTMLPurifier_AttrDef_Number extends HTMLPurifier_AttrDef
|
||||
/**
|
||||
* @param $non_negative Bool indicating whether negatives are forbidden
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_Number($non_negative = false) {
|
||||
function HTMLPurifier_AttrDef_CSS_Number($non_negative = false) {
|
||||
$this->non_negative = $non_negative;
|
||||
}
|
||||
|
@@ -1,25 +1,24 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Number.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Number.php';
|
||||
|
||||
/**
|
||||
* Validates a Percentage as defined by the HTML spec.
|
||||
* @note This also allows integer pixel values.
|
||||
* Validates a Percentage as defined by the CSS spec.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Percentage extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Instance of HTMLPurifier_AttrDef_Number to defer pixel validation
|
||||
* Instance of HTMLPurifier_AttrDef_CSS_Number to defer number validation
|
||||
*/
|
||||
var $number_def;
|
||||
|
||||
/**
|
||||
* @param Bool indicating whether to forbid negative values
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_Percentage($non_negative = false) {
|
||||
$this->number_def = new HTMLPurifier_AttrDef_Number($non_negative);
|
||||
function HTMLPurifier_AttrDef_CSS_Percentage($non_negative = false) {
|
||||
$this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
@@ -7,7 +7,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
* @note This class could be generalized into a version that acts sort of
|
||||
* like Enum except you can compound the allowed values.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_TextDecoration extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
58
library/HTMLPurifier/AttrDef/CSS/URI.php
Normal file
58
library/HTMLPurifier/AttrDef/CSS/URI.php
Normal file
@@ -0,0 +1,58 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef/URI.php';
|
||||
|
||||
/**
|
||||
* Validates a URI in CSS syntax, which uses url('http://example.com')
|
||||
* @note While theoretically speaking a URI in a CSS document could
|
||||
* be non-embedded, as of CSS2 there is no such usage so we're
|
||||
* generalizing it. This may need to be changed in the future.
|
||||
* @warning Since HTMLPurifier_AttrDef_CSS blindly uses semicolons as
|
||||
* the separator, you cannot put a literal semicolon in
|
||||
* in the URI. Try percent encoding it, in that case.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
|
||||
{
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_URI() {
|
||||
$this->HTMLPurifier_AttrDef_URI(true); // always embedded
|
||||
}
|
||||
|
||||
function validate($uri_string, $config, &$context) {
|
||||
// parse the URI out of the string and then pass it onto
|
||||
// the parent object
|
||||
|
||||
$uri_string = $this->parseCDATA($uri_string);
|
||||
if (strpos($uri_string, 'url(') !== 0) return false;
|
||||
$uri_string = substr($uri_string, 4);
|
||||
$new_length = strlen($uri_string) - 1;
|
||||
if ($uri_string[$new_length] != ')') return false;
|
||||
$uri = trim(substr($uri_string, 0, $new_length));
|
||||
|
||||
if (isset($uri[0]) && ($uri[0] == "'" || $uri[0] == '"')) {
|
||||
$quote = $uri[0];
|
||||
$new_length = strlen($uri) - 1;
|
||||
if ($uri[$new_length] !== $quote) return false;
|
||||
$uri = substr($uri, 1, $new_length - 1);
|
||||
}
|
||||
|
||||
$keys = array( '(', ')', ',', ' ', '"', "'");
|
||||
$values = array('\\(', '\\)', '\\,', '\\ ', '\\"', "\\'");
|
||||
$uri = str_replace($values, $keys, $uri);
|
||||
|
||||
$result = parent::validate($uri, $config, $context);
|
||||
|
||||
if ($result === false) return false;
|
||||
|
||||
// escape necessary characters according to CSS spec
|
||||
// except for the comma, none of these should appear in the
|
||||
// URI at all
|
||||
$result = str_replace($keys, $values, $result);
|
||||
|
||||
return "url($result)";
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -25,8 +25,8 @@ class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
|
||||
* @param $case_sensitive Bool indicating whether or not case sensitive
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_Enum(
|
||||
$valid_values = array(), $case_sensitive = false) {
|
||||
|
||||
$valid_values = array(), $case_sensitive = false
|
||||
) {
|
||||
$this->valid_values = array_flip($valid_values);
|
||||
$this->case_sensitive = $case_sensitive;
|
||||
}
|
||||
|
@@ -3,6 +3,22 @@
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/IDAccumulator.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'EnableID', false, 'bool',
|
||||
'Allows the ID attribute in HTML. This is disabled by default '.
|
||||
'due to the fact that without proper configuration user input can '.
|
||||
'easily break the validation of a webpage by specifying an ID that is '.
|
||||
'already on the surrounding HTML. If you don\'t mind throwing caution to '.
|
||||
'the wind, enable this directive, but I strongly recommend you also '.
|
||||
'consider blacklisting IDs you use (%Attr.IDBlacklist) or prefixing all '.
|
||||
'user supplied IDs (%Attr.IDPrefix). This directive has been available '.
|
||||
'since 1.2.0, and when set to true reverts to the behavior of pre-1.2.0 '.
|
||||
'versions.'
|
||||
);
|
||||
HTMLPurifier_ConfigSchema::defineAlias(
|
||||
'HTML', 'EnableAttrID', 'Attr', 'EnableID'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'IDPrefix', '', 'string',
|
||||
'String to prefix to IDs. If you have no idea what IDs your pages '.
|
||||
@@ -36,11 +52,16 @@ HTMLPurifier_ConfigSchema::define(
|
||||
* blacklist. If you're hacking around, make sure you use load()!
|
||||
*/
|
||||
|
||||
class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
// ref functionality disabled, since we also have to verify
|
||||
// whether or not the ID it refers to exists
|
||||
|
||||
function validate($id, $config, &$context) {
|
||||
|
||||
if (!$config->get('Attr', 'EnableID')) return false;
|
||||
|
||||
$id = trim($id); // trim it first
|
||||
|
||||
if ($id === '') return false;
|
||||
@@ -55,8 +76,10 @@ class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef
|
||||
'%Attr.IDPrefix is set', E_USER_WARNING);
|
||||
}
|
||||
|
||||
$id_accumulator =& $context->get('IDAccumulator');
|
||||
if (isset($id_accumulator->ids[$id])) return false;
|
||||
//if (!$this->ref) {
|
||||
$id_accumulator =& $context->get('IDAccumulator');
|
||||
if (isset($id_accumulator->ids[$id])) return false;
|
||||
//}
|
||||
|
||||
// we purposely avoid using regex, hopefully this is faster
|
||||
|
||||
@@ -71,7 +94,7 @@ class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef
|
||||
$result = ($trim === '');
|
||||
}
|
||||
|
||||
if ($result) $id_accumulator->add($id);
|
||||
if (/*!$this->ref && */$result) $id_accumulator->add($id);
|
||||
|
||||
// if no change was made to the ID, return the result
|
||||
// else, return the new id if stripping whitespace made it
|
@@ -1,18 +1,16 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Pixels.php';
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/Pixels.php';
|
||||
|
||||
/**
|
||||
* Validates the HTML type length (not to be confused with CSS's length).
|
||||
*
|
||||
* This accepts integer pixels or percentages as lengths for certain
|
||||
* HTML attributes. Don't use this for CSS: that's
|
||||
* HTMLPurifier_AttrDef_CSSLength which requires prefixes and allows a lot
|
||||
* more different types.
|
||||
* HTML attributes.
|
||||
*/
|
||||
|
||||
class HTMLPurifier_AttrDef_Length extends HTMLPurifier_AttrDef_Pixels
|
||||
class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
@@ -1,7 +1,7 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Length.php';
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/Length.php';
|
||||
|
||||
/**
|
||||
* Validates a MultiLength as defined by the HTML spec.
|
||||
@@ -9,7 +9,7 @@ require_once 'HTMLPurifier/AttrDef/Length.php';
|
||||
* A multilength is either a integer (pixel count), a percentage, or
|
||||
* a relative number.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_MultiLength extends HTMLPurifier_AttrDef_Length
|
||||
class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
@@ -27,12 +27,14 @@ class HTMLPurifier_AttrDef_MultiLength extends HTMLPurifier_AttrDef_Length
|
||||
|
||||
$int = substr($string, 0, $length - 1);
|
||||
|
||||
if ($int == '') return '*';
|
||||
if (!is_numeric($int)) return false;
|
||||
|
||||
$int = (int) $int;
|
||||
|
||||
if ($int < 0) return '0*';
|
||||
|
||||
if ($int < 0) return false;
|
||||
if ($int == 0) return '0';
|
||||
if ($int == 1) return '*';
|
||||
return ((string) $int) . '*';
|
||||
|
||||
}
|
@@ -4,9 +4,13 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/Config.php';
|
||||
|
||||
/**
|
||||
* Validates the contents of the global HTML attribute class.
|
||||
* Validates contents based on NMTOKENS attribute type.
|
||||
* @note The only current use for this is the class attribute in HTML
|
||||
* @note Could have some functionality factored out into Nmtoken class
|
||||
* @warning We cannot assume this class will be used only for 'class'
|
||||
* attributes. Not sure how to hook in magic behavior, then.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Class extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
@@ -31,10 +35,10 @@ class HTMLPurifier_AttrDef_Class extends HTMLPurifier_AttrDef
|
||||
|
||||
if (empty($matches[1])) return false;
|
||||
|
||||
// reconstruct class string
|
||||
// reconstruct string
|
||||
$new_string = '';
|
||||
foreach ($matches[1] as $class_names) {
|
||||
$new_string .= $class_names . ' ';
|
||||
foreach ($matches[1] as $token) {
|
||||
$new_string .= $token . ' ';
|
||||
}
|
||||
$new_string = rtrim($new_string);
|
||||
|
@@ -5,7 +5,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
/**
|
||||
* Validates an integer representation of pixels according to the HTML spec.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Pixels extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
@@ -46,10 +46,10 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
|
||||
|
||||
// process second subtag : $subtags[1]
|
||||
$length = strlen($subtags[1]);
|
||||
if ($length == 0 || $length == 1 || $length > 8 || !ctype_alnum($subtags[1])) {
|
||||
if ($length == 0 || ($length == 1 && $subtags[1] != 'x') || $length > 8 || !ctype_alnum($subtags[1])) {
|
||||
return $new_string;
|
||||
}
|
||||
if (!ctype_lower($subtags[1])) $subtags[1] = strotolower($subtags[1]);
|
||||
if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]);
|
||||
|
||||
$new_string .= '-' . $subtags[1];
|
||||
if ($num_subtags == 2) return $new_string;
|
||||
@@ -61,7 +61,7 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
|
||||
return $new_string;
|
||||
}
|
||||
if (!ctype_lower($subtags[$i])) {
|
||||
$subtags[$i] = strotolower($subtags[$i]);
|
||||
$subtags[$i] = strtolower($subtags[$i]);
|
||||
}
|
||||
$new_string .= '-' . $subtags[$i];
|
||||
}
|
||||
|
@@ -1,78 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
/**
|
||||
* Validates shorthand CSS property list-style.
|
||||
* @note This currently does not support list-style-image, as that functionality
|
||||
* is not implemented yet elsewhere.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_ListStyle extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Local copy of component validators.
|
||||
* @note See HTMLPurifier_AttrDef_Font::$info for a similar impl.
|
||||
*/
|
||||
var $info;
|
||||
|
||||
function HTMLPurifier_AttrDef_ListStyle($config) {
|
||||
$def = $config->getCSSDefinition();
|
||||
$this->info['list-style-type'] = $def->info['list-style-type'];
|
||||
$this->info['list-style-position'] = $def->info['list-style-position'];
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
// regular pre-processing
|
||||
$string = $this->parseCDATA($string);
|
||||
if ($string === '') return false;
|
||||
|
||||
$bits = explode(' ', strtolower($string)); // bits to process
|
||||
|
||||
$caught_type = false;
|
||||
$caught_position = false;
|
||||
$caught_none = false; // as in keyword none, which is in all of them
|
||||
|
||||
$ret = '';
|
||||
|
||||
foreach ($bits as $bit) {
|
||||
if ($caught_none && ($caught_type || $caught_position)) break;
|
||||
if ($caught_type && $caught_position) break;
|
||||
|
||||
if ($bit === '') continue;
|
||||
|
||||
if ($bit === 'none') {
|
||||
if ($caught_none) continue;
|
||||
$caught_none = true;
|
||||
$ret .= 'none ';
|
||||
continue;
|
||||
}
|
||||
|
||||
// if we add anymore, roll it into a loop
|
||||
|
||||
$r = $this->info['list-style-type']->validate($bit, $config, $context);
|
||||
if ($r !== false) {
|
||||
if ($caught_type) continue;
|
||||
$caught_type = true;
|
||||
$ret .= $r . ' ';
|
||||
continue;
|
||||
}
|
||||
|
||||
$r = $this->info['list-style-position']->validate($bit, $config, $context);
|
||||
if ($r !== false) {
|
||||
if ($caught_position) continue;
|
||||
$caught_position = true;
|
||||
$ret .= $r . ' ';
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
$ret = rtrim($ret);
|
||||
return $ret ? $ret : false;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -3,7 +3,7 @@
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/URIScheme.php';
|
||||
require_once 'HTMLPurifier/URISchemeRegistry.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Host.php';
|
||||
require_once 'HTMLPurifier/AttrDef/URI/Host.php';
|
||||
require_once 'HTMLPurifier/PercentEncoder.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
@@ -24,7 +24,7 @@ HTMLPurifier_ConfigSchema::define(
|
||||
'This directive has been available since 1.2.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::Define(
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DisableExternal', false, 'bool',
|
||||
'Disables links to external websites. This is a highly effective '.
|
||||
'anti-spam and anti-pagerank-leech measure, but comes at a hefty price: no'.
|
||||
@@ -34,6 +34,57 @@ HTMLPurifier_ConfigSchema::Define(
|
||||
'This directive has been available since 1.2.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DisableExternalResources', false, 'bool',
|
||||
'Disables the embedding of external resources, preventing users from '.
|
||||
'embedding things like images from other hosts. This prevents '.
|
||||
'access tracking (good for email viewers), bandwidth leeching, '.
|
||||
'cross-site request forging, goatse.cx posting, and '.
|
||||
'other nasties, but also results in '.
|
||||
'a loss of end-user functionality (they can\'t directly post a pic '.
|
||||
'they posted from Flickr anymore). Use it if you don\'t have a '.
|
||||
'robust user-content moderation team. This directive has been '.
|
||||
'available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DisableResources', false, 'bool',
|
||||
'Disables embedding resources, essentially meaning no pictures. You can '.
|
||||
'still link to them though. See %URI.DisableExternalResources for why '.
|
||||
'this might be a good idea. This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'Munge', null, 'string/null',
|
||||
'Munges all browsable (usually http, https and ftp) URI\'s into some URL '.
|
||||
'redirection service. Pass this directive a URI, with %s inserted where '.
|
||||
'the url-encoded original URI should be inserted (sample: '.
|
||||
'<code>http://www.google.com/url?q=%s</code>). '.
|
||||
'This prevents PageRank leaks, while being as transparent as possible '.
|
||||
'to users (you may also want to add some client side JavaScript to '.
|
||||
'override the text in the statusbar). Warning: many security experts '.
|
||||
'believe that this form of protection does not deter spam-bots. '.
|
||||
'You can also use this directive to redirect users to a splash page '.
|
||||
'telling them they are leaving your website. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'HostBlacklist', array(), 'list',
|
||||
'List of strings that are forbidden in the host of any URI. Use it to '.
|
||||
'kill domain names of spam, etc. Note that it will catch anything in '.
|
||||
'the domain, so <tt>moo.com</tt> will catch <tt>moo.com.example.com</tt>. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'Disable', false, 'bool',
|
||||
'Disables all URIs in all forms. Not sure why you\'d want to do that '.
|
||||
'(after all, the Internet\'s founded on the notion of a hyperlink). '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable');
|
||||
|
||||
/**
|
||||
* Validates a URI as defined by RFC 3986.
|
||||
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
|
||||
@@ -43,15 +94,15 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
|
||||
var $host;
|
||||
var $PercentEncoder;
|
||||
var $embeds;
|
||||
var $embeds_resource;
|
||||
|
||||
/**
|
||||
* @param $embeds Does the URI here result in an extra HTTP request?
|
||||
* @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_URI($embeds = false) {
|
||||
$this->host = new HTMLPurifier_AttrDef_Host();
|
||||
function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
|
||||
$this->host = new HTMLPurifier_AttrDef_URI_Host();
|
||||
$this->PercentEncoder = new HTMLPurifier_PercentEncoder();
|
||||
$this->embeds = (bool) $embeds;
|
||||
$this->embeds_resource = (bool) $embeds_resource;
|
||||
}
|
||||
|
||||
function validate($uri, $config, &$context) {
|
||||
@@ -59,6 +110,8 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
// We'll write stack-based parsers later, for now, use regexps to
|
||||
// get things working as fast as possible (irony)
|
||||
|
||||
if ($config->get('URI', 'Disable')) return false;
|
||||
|
||||
// parse as CDATA
|
||||
$uri = $this->parseCDATA($uri);
|
||||
|
||||
@@ -96,27 +149,34 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
// no need to validate the scheme's fmt since we do that when we
|
||||
// retrieve the specific scheme object from the registry
|
||||
$scheme = ctype_lower($scheme) ? $scheme : strtolower($scheme);
|
||||
$scheme_obj =& $registry->getScheme($scheme, $config, $context);
|
||||
$scheme_obj = $registry->getScheme($scheme, $config, $context);
|
||||
if (!$scheme_obj) return false; // invalid scheme, clean it out
|
||||
} else {
|
||||
$scheme_obj =& $registry->getScheme(
|
||||
$scheme_obj = $registry->getScheme(
|
||||
$config->get('URI', 'DefaultScheme'), $config, $context
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
// the URI we're processing embeds a resource in the page, but the URI
|
||||
// the URI we're processing embeds_resource a resource in the page, but the URI
|
||||
// it references cannot be located
|
||||
if ($this->embeds && !$scheme_obj->browsable) {
|
||||
if ($this->embeds_resource && !$scheme_obj->browsable) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
if ($authority !== null) {
|
||||
|
||||
// remove URI if it's absolute and we disallow externals
|
||||
// remove URI if it's absolute and we disabled externals or
|
||||
// if it's absolute and embedded and we disabled external resources
|
||||
unset($our_host);
|
||||
if ($config->get('URI', 'DisableExternal')) {
|
||||
if (
|
||||
$config->get('URI', 'DisableExternal') ||
|
||||
(
|
||||
$config->get('URI', 'DisableExternalResources') &&
|
||||
$this->embeds_resource
|
||||
)
|
||||
) {
|
||||
$our_host = $config->get('URI', 'Host');
|
||||
if ($our_host === null) return false;
|
||||
}
|
||||
@@ -143,6 +203,8 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
$host = $this->host->validate($host, $config, $context);
|
||||
if ($host === false) $host = null;
|
||||
|
||||
if ($this->checkBlacklist($host, $config, $context)) return false;
|
||||
|
||||
// more lenient absolute checking
|
||||
if (isset($our_host)) {
|
||||
$host_parts = array_reverse(explode('.', $host));
|
||||
@@ -198,10 +260,37 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
if ($query !== null) $result .= "?$query";
|
||||
if ($fragment !== null) $result .= "#$fragment";
|
||||
|
||||
// munge if necessary
|
||||
$munge = $config->get('URI', 'Munge');
|
||||
if (!empty($scheme_obj->browsable) && $munge !== null) {
|
||||
if ($authority !== null) {
|
||||
$result = str_replace('%s', rawurlencode($result), $munge);
|
||||
}
|
||||
}
|
||||
|
||||
return $result;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks a host against an array blacklist
|
||||
* @param $host Host to check
|
||||
* @param $config HTMLPurifier_Config instance
|
||||
* @param $context HTMLPurifier_Context instance
|
||||
* @return bool Is spam?
|
||||
*/
|
||||
function checkBlacklist($host, &$config, &$context) {
|
||||
$blacklist = $config->get('URI', 'HostBlacklist');
|
||||
if (!empty($blacklist)) {
|
||||
foreach($blacklist as $blacklisted_host_fragment) {
|
||||
if (strpos($host, $blacklisted_host_fragment) !== false) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
||||
|
@@ -2,7 +2,7 @@
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
class HTMLPurifier_AttrDef_Email extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
@@ -1,12 +1,12 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef/Email.php';
|
||||
require_once 'HTMLPurifier/AttrDef/URI/Email.php';
|
||||
|
||||
/**
|
||||
* Primitive email validation class based on the regexp found at
|
||||
* http://www.regular-expressions.info/email.html
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Email_SimpleCheck extends HTMLPurifier_AttrDef_Email
|
||||
class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_URI_Email
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
@@ -1,28 +1,28 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/IPv4.php';
|
||||
require_once 'HTMLPurifier/AttrDef/IPv6.php';
|
||||
require_once 'HTMLPurifier/AttrDef/URI/IPv4.php';
|
||||
require_once 'HTMLPurifier/AttrDef/URI/IPv6.php';
|
||||
|
||||
/**
|
||||
* Validates a host according to the IPv4, IPv6 and DNS (future) specifications.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Host extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Instance of HTMLPurifier_AttrDef_IPv4 sub-validator
|
||||
* Instance of HTMLPurifier_AttrDef_URI_IPv4 sub-validator
|
||||
*/
|
||||
var $ipv4;
|
||||
|
||||
/**
|
||||
* Instance of HTMLPurifier_AttrDef_IPv6 sub-validator
|
||||
* Instance of HTMLPurifier_AttrDef_URI_IPv6 sub-validator
|
||||
*/
|
||||
var $ipv6;
|
||||
|
||||
function HTMLPurifier_AttrDef_Host() {
|
||||
$this->ipv4 = new HTMLPurifier_AttrDef_IPv4();
|
||||
$this->ipv6 = new HTMLPurifier_AttrDef_IPv6();
|
||||
function HTMLPurifier_AttrDef_URI_Host() {
|
||||
$this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4();
|
||||
$this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6();
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
@@ -6,7 +6,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
* Validates an IPv4 address
|
||||
* @author Feyd @ forums.devnetwork.net (public domain)
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_IPv4 extends HTMLPurifier_AttrDef
|
||||
class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -15,7 +15,7 @@ class HTMLPurifier_AttrDef_IPv4 extends HTMLPurifier_AttrDef
|
||||
*/
|
||||
var $ip4;
|
||||
|
||||
function HTMLPurifier_AttrDef_IPv4() {
|
||||
function HTMLPurifier_AttrDef_URI_IPv4() {
|
||||
$oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
|
||||
$this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
|
||||
}
|
@@ -1,6 +1,6 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef/IPv4.php';
|
||||
require_once 'HTMLPurifier/AttrDef/URI/IPv4.php';
|
||||
|
||||
/**
|
||||
* Validates an IPv6 address.
|
||||
@@ -8,7 +8,7 @@ require_once 'HTMLPurifier/AttrDef/IPv4.php';
|
||||
* @note This function requires brackets to have been removed from address
|
||||
* in URI.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_IPv6 extends HTMLPurifier_AttrDef_IPv4
|
||||
class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
|
||||
{
|
||||
|
||||
function validate($aIP, $config, &$context) {
|
@@ -21,7 +21,7 @@ class HTMLPurifier_AttrTransform
|
||||
* Abstract: makes changes to the attributes dependent on multiple values.
|
||||
*
|
||||
* @param $attr Assoc array of attributes, usually from
|
||||
* HTMLPurifier_Token_Tag::$attributes
|
||||
* HTMLPurifier_Token_Tag::$attr
|
||||
* @param $config Mandatory HTMLPurifier_Config object.
|
||||
* @param $context Mandatory HTMLPurifier_Context object
|
||||
* @returns Processed attribute array.
|
||||
|
@@ -20,7 +20,7 @@ HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||
class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
function transform($attr, $config, $context) {
|
||||
function transform($attr, $config, &$context) {
|
||||
if (isset($attr['dir'])) return $attr;
|
||||
$attr['dir'] = $config->get('Attr', 'DefaultTextDir');
|
||||
return $attr;
|
||||
|
@@ -25,7 +25,7 @@ HTMLPurifier_ConfigSchema::define(
|
||||
class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
function transform($attr, $config, $context) {
|
||||
function transform($attr, $config, &$context) {
|
||||
|
||||
$src = true;
|
||||
if (!isset($attr['src'])) {
|
||||
|
@@ -10,7 +10,7 @@ require_once 'HTMLPurifier/AttrTransform.php';
|
||||
class HTMLPurifier_AttrTransform_Lang extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
function transform($attr, $config, $context) {
|
||||
function transform($attr, $config, &$context) {
|
||||
|
||||
$lang = isset($attr['lang']) ? $attr['lang'] : false;
|
||||
$xml_lang = isset($attr['xml:lang']) ? $attr['xml:lang'] : false;
|
||||
|
@@ -8,7 +8,7 @@ require_once 'HTMLPurifier/AttrTransform.php';
|
||||
class HTMLPurifier_AttrTransform_TextAlign
|
||||
extends HTMLPurifier_AttrTransform {
|
||||
|
||||
function transform($attr, $config, $context) {
|
||||
function transform($attr, $config, &$context) {
|
||||
|
||||
if (!isset($attr['align'])) return $attr;
|
||||
|
||||
|
41
library/HTMLPurifier/AttrTypes.php
Normal file
41
library/HTMLPurifier/AttrTypes.php
Normal file
@@ -0,0 +1,41 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/ID.php';
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/Length.php';
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/MultiLength.php';
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/Nmtokens.php';
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/Pixels.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Integer.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Text.php';
|
||||
require_once 'HTMLPurifier/AttrDef/URI.php';
|
||||
|
||||
/**
|
||||
* Provides lookup array of attribute types to HTMLPurifier_AttrDef objects
|
||||
*/
|
||||
class HTMLPurifier_AttrTypes
|
||||
{
|
||||
/**
|
||||
* Lookup array of attribute string identifiers to concrete implementations
|
||||
* @public
|
||||
*/
|
||||
var $info = array();
|
||||
|
||||
/**
|
||||
* Constructs the info array
|
||||
*/
|
||||
function HTMLPurifier_AttrTypes() {
|
||||
$this->info['CDATA'] = new HTMLPurifier_AttrDef_Text();
|
||||
$this->info['ID'] = new HTMLPurifier_AttrDef_HTML_ID();
|
||||
$this->info['Length'] = new HTMLPurifier_AttrDef_HTML_Length();
|
||||
$this->info['MultiLength'] = new HTMLPurifier_AttrDef_HTML_MultiLength();
|
||||
$this->info['NMTOKENS'] = new HTMLPurifier_AttrDef_HTML_Nmtokens();
|
||||
$this->info['Pixels'] = new HTMLPurifier_AttrDef_HTML_Pixels();
|
||||
$this->info['Text'] = new HTMLPurifier_AttrDef_Text();
|
||||
$this->info['URI'] = new HTMLPurifier_AttrDef_URI();
|
||||
|
||||
// number is really a positive integer (one or more digits)
|
||||
$this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@@ -1,16 +1,19 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Background.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Border.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Color.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Composite.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Font.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Length.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/ListStyle.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Multiple.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Percentage.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/TextDecoration.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/URI.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Enum.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Color.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Composite.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSSLength.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Percentage.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Multiple.php';
|
||||
require_once 'HTMLPurifier/AttrDef/TextDecoration.php';
|
||||
require_once 'HTMLPurifier/AttrDef/FontFamily.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Font.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Border.php';
|
||||
require_once 'HTMLPurifier/AttrDef/ListStyle.php';
|
||||
|
||||
/**
|
||||
* Defines allowed CSS attributes and what their values are.
|
||||
@@ -40,7 +43,7 @@ class HTMLPurifier_CSSDefinition
|
||||
array('none', 'hidden', 'dotted', 'dashed', 'solid', 'double',
|
||||
'groove', 'ridge', 'inset', 'outset'), false);
|
||||
|
||||
$this->info['border-style'] = new HTMLPurifier_AttrDef_Multiple($border_style);
|
||||
$this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style);
|
||||
|
||||
$this->info['clear'] = new HTMLPurifier_AttrDef_Enum(
|
||||
array('none', 'left', 'right', 'both'), false);
|
||||
@@ -51,113 +54,123 @@ class HTMLPurifier_CSSDefinition
|
||||
$this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum(
|
||||
array('normal', 'small-caps'), false);
|
||||
|
||||
$uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite(
|
||||
array(
|
||||
new HTMLPurifier_AttrDef_Enum(array('none')),
|
||||
new HTMLPurifier_AttrDef_CSS_URI()
|
||||
)
|
||||
);
|
||||
|
||||
$this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum(
|
||||
array('inside', 'outside'), false);
|
||||
$this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum(
|
||||
array('disc', 'circle', 'square', 'decimal', 'lower-roman',
|
||||
'upper-roman', 'lower-alpha', 'upper-alpha'), false);
|
||||
'upper-roman', 'lower-alpha', 'upper-alpha', 'none'), false);
|
||||
$this->info['list-style-image'] = $uri_or_none;
|
||||
|
||||
$this->info['list-style'] = new HTMLPurifier_AttrDef_ListStyle($config);
|
||||
$this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config);
|
||||
|
||||
$this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum(
|
||||
array('capitalize', 'uppercase', 'lowercase', 'none'), false);
|
||||
$this->info['color'] = new HTMLPurifier_AttrDef_Color();
|
||||
$this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color();
|
||||
|
||||
// technically speaking, this one should get its own validator, but
|
||||
// since we don't support background images, it effectively is
|
||||
// equivalent to color. The only trouble is that if the author
|
||||
// specifies an image and a color, they'll both end up getting dropped,
|
||||
// even though we ought to implement it and just discard the image
|
||||
// info. This will be fixed in a later version (see TODO) when
|
||||
// better URI filtering is implemented.
|
||||
$this->info['background'] =
|
||||
$this->info['background-image'] = $uri_or_none;
|
||||
$this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum(
|
||||
array('repeat', 'repeat-x', 'repeat-y', 'no-repeat')
|
||||
);
|
||||
$this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum(
|
||||
array('scroll', 'fixed')
|
||||
);
|
||||
$this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition();
|
||||
|
||||
$border_color =
|
||||
$this->info['border-top-color'] =
|
||||
$this->info['border-bottom-color'] =
|
||||
$this->info['border-left-color'] =
|
||||
$this->info['border-right-color'] =
|
||||
$this->info['background-color'] = new HTMLPurifier_AttrDef_Composite(array(
|
||||
$this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_Enum(array('transparent')),
|
||||
new HTMLPurifier_AttrDef_Color()
|
||||
new HTMLPurifier_AttrDef_CSS_Color()
|
||||
));
|
||||
|
||||
$this->info['border-color'] = new HTMLPurifier_AttrDef_Multiple($border_color);
|
||||
$this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config);
|
||||
|
||||
$this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color);
|
||||
|
||||
$border_width =
|
||||
$this->info['border-top-width'] =
|
||||
$this->info['border-bottom-width'] =
|
||||
$this->info['border-left-width'] =
|
||||
$this->info['border-right-width'] = new HTMLPurifier_AttrDef_Composite(array(
|
||||
$this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
|
||||
new HTMLPurifier_AttrDef_CSSLength(true) //disallow negative
|
||||
new HTMLPurifier_AttrDef_CSS_Length(true) //disallow negative
|
||||
));
|
||||
|
||||
$this->info['border-width'] = new HTMLPurifier_AttrDef_Multiple($border_width);
|
||||
$this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width);
|
||||
|
||||
$this->info['letter-spacing'] = new HTMLPurifier_AttrDef_Composite(array(
|
||||
$this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_Enum(array('normal')),
|
||||
new HTMLPurifier_AttrDef_CSSLength()
|
||||
new HTMLPurifier_AttrDef_CSS_Length()
|
||||
));
|
||||
|
||||
$this->info['word-spacing'] = new HTMLPurifier_AttrDef_Composite(array(
|
||||
$this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_Enum(array('normal')),
|
||||
new HTMLPurifier_AttrDef_CSSLength()
|
||||
new HTMLPurifier_AttrDef_CSS_Length()
|
||||
));
|
||||
|
||||
$this->info['font-size'] = new HTMLPurifier_AttrDef_Composite(array(
|
||||
$this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_Enum(array('xx-small', 'x-small',
|
||||
'small', 'medium', 'large', 'x-large', 'xx-large',
|
||||
'larger', 'smaller')),
|
||||
new HTMLPurifier_AttrDef_Percentage(),
|
||||
new HTMLPurifier_AttrDef_CSSLength()
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage(),
|
||||
new HTMLPurifier_AttrDef_CSS_Length()
|
||||
));
|
||||
|
||||
$this->info['line-height'] = new HTMLPurifier_AttrDef_Composite(array(
|
||||
$this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_Enum(array('normal')),
|
||||
new HTMLPurifier_AttrDef_Number(true), // no negatives
|
||||
new HTMLPurifier_AttrDef_CSSLength(true),
|
||||
new HTMLPurifier_AttrDef_Percentage(true)
|
||||
new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives
|
||||
new HTMLPurifier_AttrDef_CSS_Length(true),
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage(true)
|
||||
));
|
||||
|
||||
$margin =
|
||||
$this->info['margin-top'] =
|
||||
$this->info['margin-bottom'] =
|
||||
$this->info['margin-left'] =
|
||||
$this->info['margin-right'] = new HTMLPurifier_AttrDef_Composite(array(
|
||||
new HTMLPurifier_AttrDef_CSSLength(),
|
||||
new HTMLPurifier_AttrDef_Percentage(),
|
||||
$this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_CSS_Length(),
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage(),
|
||||
new HTMLPurifier_AttrDef_Enum(array('auto'))
|
||||
));
|
||||
|
||||
$this->info['margin'] = new HTMLPurifier_AttrDef_Multiple($margin);
|
||||
$this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin);
|
||||
|
||||
// non-negative
|
||||
$padding =
|
||||
$this->info['padding-top'] =
|
||||
$this->info['padding-bottom'] =
|
||||
$this->info['padding-left'] =
|
||||
$this->info['padding-right'] = new HTMLPurifier_AttrDef_Composite(array(
|
||||
new HTMLPurifier_AttrDef_CSSLength(true),
|
||||
new HTMLPurifier_AttrDef_Percentage(true)
|
||||
$this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_CSS_Length(true),
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage(true)
|
||||
));
|
||||
|
||||
$this->info['padding'] = new HTMLPurifier_AttrDef_Multiple($padding);
|
||||
$this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding);
|
||||
|
||||
$this->info['text-indent'] = new HTMLPurifier_AttrDef_Composite(array(
|
||||
new HTMLPurifier_AttrDef_CSSLength(),
|
||||
new HTMLPurifier_AttrDef_Percentage()
|
||||
$this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_CSS_Length(),
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage()
|
||||
));
|
||||
|
||||
$this->info['width'] = new HTMLPurifier_AttrDef_Composite(array(
|
||||
new HTMLPurifier_AttrDef_CSSLength(true),
|
||||
new HTMLPurifier_AttrDef_Percentage(true),
|
||||
$this->info['width'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_CSS_Length(true),
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage(true),
|
||||
new HTMLPurifier_AttrDef_Enum(array('auto'))
|
||||
));
|
||||
|
||||
$this->info['text-decoration'] = new HTMLPurifier_AttrDef_TextDecoration();
|
||||
$this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();
|
||||
|
||||
$this->info['font-family'] = new HTMLPurifier_AttrDef_FontFamily();
|
||||
$this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily();
|
||||
|
||||
// this could use specialized code
|
||||
$this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum(
|
||||
@@ -166,14 +179,14 @@ class HTMLPurifier_CSSDefinition
|
||||
|
||||
// MUST be called after other font properties, as it references
|
||||
// a CSSDefinition object
|
||||
$this->info['font'] = new HTMLPurifier_AttrDef_Font($config);
|
||||
$this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config);
|
||||
|
||||
// same here
|
||||
$this->info['border'] =
|
||||
$this->info['border-bottom'] =
|
||||
$this->info['border-top'] =
|
||||
$this->info['border-left'] =
|
||||
$this->info['border-right'] = new HTMLPurifier_AttrDef_Border($config);
|
||||
$this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
|
||||
|
||||
$this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||
'collapse', 'seperate'));
|
||||
@@ -184,11 +197,11 @@ class HTMLPurifier_CSSDefinition
|
||||
$this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||
'auto', 'fixed'));
|
||||
|
||||
$this->info['vertical-align'] = new HTMLPurifier_AttrDef_Composite(array(
|
||||
$this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
|
||||
new HTMLPurifier_AttrDef_Enum(array('baseline', 'sub', 'super',
|
||||
'top', 'text-top', 'middle', 'bottom', 'text-bottom')),
|
||||
new HTMLPurifier_AttrDef_CSSLength(),
|
||||
new HTMLPurifier_AttrDef_Percentage()
|
||||
new HTMLPurifier_AttrDef_CSS_Length(),
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage()
|
||||
));
|
||||
|
||||
}
|
||||
|
@@ -20,10 +20,9 @@ HTMLPurifier_ConfigSchema::define(
|
||||
class HTMLPurifier_ChildDef
|
||||
{
|
||||
/**
|
||||
* Type of child definition, usually right-most part of class name lowercase
|
||||
*
|
||||
* Used occasionally in terms of context. Possible values include
|
||||
* custom, required, optional and empty.
|
||||
* Type of child definition, usually right-most part of class name lowercase.
|
||||
* Used occasionally in terms of context.
|
||||
* @public
|
||||
*/
|
||||
var $type;
|
||||
|
||||
@@ -32,12 +31,15 @@ class HTMLPurifier_ChildDef
|
||||
*
|
||||
* This is necessary for redundant checking when changes affecting
|
||||
* a child node may cause a parent node to now be disallowed.
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
var $allow_empty;
|
||||
|
||||
/**
|
||||
* Validates nodes according to definition and returns modification.
|
||||
*
|
||||
* @public
|
||||
* @param $tokens_of_children Array of HTMLPurifier_Token
|
||||
* @param $config HTMLPurifier_Config object
|
||||
* @param $context HTMLPurifier_Context object
|
||||
@@ -50,391 +52,4 @@ class HTMLPurifier_ChildDef
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Custom validation class, accepts DTD child definitions
|
||||
*
|
||||
* @warning Currently this class is an all or nothing proposition, that is,
|
||||
* it will only give a bool return value.
|
||||
* @note This class is currently not used by any code, although it is unit
|
||||
* tested.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $type = 'custom';
|
||||
var $allow_empty = false;
|
||||
/**
|
||||
* Allowed child pattern as defined by the DTD
|
||||
*/
|
||||
var $dtd_regex;
|
||||
/**
|
||||
* PCRE regex derived from $dtd_regex
|
||||
* @private
|
||||
*/
|
||||
var $_pcre_regex;
|
||||
/**
|
||||
* @param $dtd_regex Allowed child pattern from the DTD
|
||||
*/
|
||||
function HTMLPurifier_ChildDef_Custom($dtd_regex) {
|
||||
$this->dtd_regex = $dtd_regex;
|
||||
$this->_compileRegex();
|
||||
}
|
||||
/**
|
||||
* Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
|
||||
*/
|
||||
function _compileRegex() {
|
||||
$raw = str_replace(' ', '', $this->dtd_regex);
|
||||
if ($raw{0} != '(') {
|
||||
$raw = "($raw)";
|
||||
}
|
||||
$reg = str_replace(',', ',?', $raw);
|
||||
$reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
|
||||
$this->_pcre_regex = $reg;
|
||||
}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
$list_of_children = '';
|
||||
$nesting = 0; // depth into the nest
|
||||
foreach ($tokens_of_children as $token) {
|
||||
if (!empty($token->is_whitespace)) continue;
|
||||
|
||||
$is_child = ($nesting == 0); // direct
|
||||
|
||||
if ($token->type == 'start') {
|
||||
$nesting++;
|
||||
} elseif ($token->type == 'end') {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
if ($is_child) {
|
||||
$list_of_children .= $token->name . ',';
|
||||
}
|
||||
}
|
||||
$list_of_children = rtrim($list_of_children, ',');
|
||||
|
||||
$okay =
|
||||
preg_match(
|
||||
'/^'.$this->_pcre_regex.'$/',
|
||||
$list_of_children
|
||||
);
|
||||
|
||||
return (bool) $okay;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Definition that allows a set of elements, but disallows empty children.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
|
||||
{
|
||||
/**
|
||||
* Lookup table of allowed elements.
|
||||
*/
|
||||
var $elements = array();
|
||||
/**
|
||||
* @param $elements List of allowed element names (lowercase).
|
||||
*/
|
||||
function HTMLPurifier_ChildDef_Required($elements) {
|
||||
if (is_string($elements)) {
|
||||
$elements = str_replace(' ', '', $elements);
|
||||
$elements = explode('|', $elements);
|
||||
}
|
||||
$elements = array_flip($elements);
|
||||
foreach ($elements as $i => $x) $elements[$i] = true;
|
||||
$this->elements = $elements;
|
||||
$this->gen = new HTMLPurifier_Generator();
|
||||
}
|
||||
var $allow_empty = false;
|
||||
var $type = 'required';
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
// if there are no tokens, delete parent node
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
// the new set of children
|
||||
$result = array();
|
||||
|
||||
// current depth into the nest
|
||||
$nesting = 0;
|
||||
|
||||
// whether or not we're deleting a node
|
||||
$is_deleting = false;
|
||||
|
||||
// whether or not parsed character data is allowed
|
||||
// this controls whether or not we silently drop a tag
|
||||
// or generate escaped HTML from it
|
||||
$pcdata_allowed = isset($this->elements['#PCDATA']);
|
||||
|
||||
// a little sanity check to make sure it's not ALL whitespace
|
||||
$all_whitespace = true;
|
||||
|
||||
// some configuration
|
||||
$escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren');
|
||||
|
||||
foreach ($tokens_of_children as $token) {
|
||||
if (!empty($token->is_whitespace)) {
|
||||
$result[] = $token;
|
||||
continue;
|
||||
}
|
||||
$all_whitespace = false; // phew, we're not talking about whitespace
|
||||
|
||||
$is_child = ($nesting == 0);
|
||||
|
||||
if ($token->type == 'start') {
|
||||
$nesting++;
|
||||
} elseif ($token->type == 'end') {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
if ($is_child) {
|
||||
$is_deleting = false;
|
||||
if (!isset($this->elements[$token->name])) {
|
||||
$is_deleting = true;
|
||||
if ($pcdata_allowed && $token->type == 'text') {
|
||||
$result[] = $token;
|
||||
} elseif ($pcdata_allowed && $escape_invalid_children) {
|
||||
$result[] = new HTMLPurifier_Token_Text(
|
||||
$this->gen->generateFromToken($token, $config)
|
||||
);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (!$is_deleting || ($pcdata_allowed && $token->type == 'text')) {
|
||||
$result[] = $token;
|
||||
} elseif ($pcdata_allowed && $escape_invalid_children) {
|
||||
$result[] =
|
||||
new HTMLPurifier_Token_Text(
|
||||
$this->gen->generateFromToken( $token, $config )
|
||||
);
|
||||
} else {
|
||||
// drop silently
|
||||
}
|
||||
}
|
||||
if (empty($result)) return false;
|
||||
if ($all_whitespace) return false;
|
||||
if ($tokens_of_children == $result) return true;
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Definition that allows a set of elements, and allows no children.
|
||||
* @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required,
|
||||
* really, one shouldn't inherit from the other. Only altered behavior
|
||||
* is to overload a returned false with an array. Thus, it will never
|
||||
* return false.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
|
||||
{
|
||||
var $allow_empty = true;
|
||||
var $type = 'optional';
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
$result = parent::validateChildren($tokens_of_children, $config, $context);
|
||||
if ($result === false) return array();
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Definition that disallows all elements.
|
||||
* @warning validateChildren() in this class is actually never called, because
|
||||
* empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed
|
||||
* before child definitions are parsed in earnest by
|
||||
* HTMLPurifier_Strategy_FixNesting.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $allow_empty = true;
|
||||
var $type = 'empty';
|
||||
function HTMLPurifier_ChildDef_Empty() {}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
return array();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Definition that uses different definitions depending on context.
|
||||
*
|
||||
* The del and ins tags are notable because they allow different types of
|
||||
* elements depending on whether or not they're in a block or inline context.
|
||||
* Chameleon allows this behavior to happen by using two different
|
||||
* definitions depending on context. While this somewhat generalized,
|
||||
* it is specifically intended for those two tags.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Instance of the definition object to use when inline. Usually stricter.
|
||||
*/
|
||||
var $inline;
|
||||
/**
|
||||
* Instance of the definition object to use when block.
|
||||
*/
|
||||
var $block;
|
||||
|
||||
/**
|
||||
* @param $inline List of elements to allow when inline.
|
||||
* @param $block List of elements to allow when block.
|
||||
*/
|
||||
function HTMLPurifier_ChildDef_Chameleon($inline, $block) {
|
||||
$this->inline = new HTMLPurifier_ChildDef_Optional($inline);
|
||||
$this->block = new HTMLPurifier_ChildDef_Optional($block);
|
||||
}
|
||||
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
$parent_type = $context->get('ParentType');
|
||||
switch ($parent_type) {
|
||||
case 'unknown':
|
||||
case 'inline':
|
||||
$result = $this->inline->validateChildren(
|
||||
$tokens_of_children, $config, $context);
|
||||
break;
|
||||
case 'block':
|
||||
$result = $this->block->validateChildren(
|
||||
$tokens_of_children, $config, $context);
|
||||
break;
|
||||
default:
|
||||
trigger_error('Invalid context', E_USER_ERROR);
|
||||
return false;
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Definition for tables
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $allow_empty = false;
|
||||
var $type = 'table';
|
||||
function HTMLPurifier_ChildDef_Table() {}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
// this ensures that the loop gets run one last time before closing
|
||||
// up. It's a little bit of a hack, but it works! Just make sure you
|
||||
// get rid of the token later.
|
||||
$tokens_of_children[] = false;
|
||||
|
||||
// only one of these elements is allowed in a table
|
||||
$caption = false;
|
||||
$thead = false;
|
||||
$tfoot = false;
|
||||
|
||||
// as many of these as you want
|
||||
$cols = array();
|
||||
$content = array();
|
||||
|
||||
$nesting = 0; // current depth so we can determine nodes
|
||||
$is_collecting = false; // are we globbing together tokens to package
|
||||
// into one of the collectors?
|
||||
$collection = array(); // collected nodes
|
||||
$tag_index = 0; // the first node might be whitespace,
|
||||
// so this tells us where the start tag is
|
||||
|
||||
foreach ($tokens_of_children as $token) {
|
||||
$is_child = ($nesting == 0);
|
||||
|
||||
if ($token === false) {
|
||||
// terminating sequence started
|
||||
} elseif ($token->type == 'start') {
|
||||
$nesting++;
|
||||
} elseif ($token->type == 'end') {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
// handle node collection
|
||||
if ($is_collecting) {
|
||||
if ($is_child) {
|
||||
// okay, let's stash the tokens away
|
||||
// first token tells us the type of the collection
|
||||
switch ($collection[$tag_index]->name) {
|
||||
case 'tr':
|
||||
case 'tbody':
|
||||
$content[] = $collection;
|
||||
break;
|
||||
case 'caption':
|
||||
if ($caption !== false) break;
|
||||
$caption = $collection;
|
||||
break;
|
||||
case 'thead':
|
||||
case 'tfoot':
|
||||
// access the appropriate variable, $thead or $tfoot
|
||||
$var = $collection[$tag_index]->name;
|
||||
if ($$var === false) {
|
||||
$$var = $collection;
|
||||
} else {
|
||||
// transmutate the first and less entries into
|
||||
// tbody tags, and then put into content
|
||||
$collection[$tag_index]->name = 'tbody';
|
||||
$collection[count($collection)-1]->name = 'tbody';
|
||||
$content[] = $collection;
|
||||
}
|
||||
break;
|
||||
case 'colgroup':
|
||||
$cols[] = $collection;
|
||||
break;
|
||||
}
|
||||
$collection = array();
|
||||
$is_collecting = false;
|
||||
$tag_index = 0;
|
||||
} else {
|
||||
// add the node to the collection
|
||||
$collection[] = $token;
|
||||
}
|
||||
}
|
||||
|
||||
// terminate
|
||||
if ($token === false) break;
|
||||
|
||||
if ($is_child) {
|
||||
// determine what we're dealing with
|
||||
if ($token->name == 'col') {
|
||||
// the only empty tag in the possie, we can handle it
|
||||
// immediately
|
||||
$cols[] = array_merge($collection, array($token));
|
||||
$collection = array();
|
||||
$tag_index = 0;
|
||||
continue;
|
||||
}
|
||||
switch($token->name) {
|
||||
case 'caption':
|
||||
case 'colgroup':
|
||||
case 'thead':
|
||||
case 'tfoot':
|
||||
case 'tbody':
|
||||
case 'tr':
|
||||
$is_collecting = true;
|
||||
$collection[] = $token;
|
||||
continue;
|
||||
default:
|
||||
if ($token->type == 'text' && $token->is_whitespace) {
|
||||
$collection[] = $token;
|
||||
$tag_index++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($content)) return false;
|
||||
|
||||
$ret = array();
|
||||
if ($caption !== false) $ret = array_merge($ret, $caption);
|
||||
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
|
||||
if ($thead !== false) $ret = array_merge($ret, $thead);
|
||||
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
|
||||
foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
|
||||
if (!empty($collection) && $is_collecting == false){
|
||||
// grab the trailing space
|
||||
$ret = array_merge($ret, $collection);
|
||||
}
|
||||
|
||||
array_pop($tokens_of_children); // remove phantom token
|
||||
|
||||
return ($ret === $tokens_of_children) ? true : $ret;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
||||
|
51
library/HTMLPurifier/ChildDef/Chameleon.php
Normal file
51
library/HTMLPurifier/ChildDef/Chameleon.php
Normal file
@@ -0,0 +1,51 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef.php';
|
||||
|
||||
/**
|
||||
* Definition that uses different definitions depending on context.
|
||||
*
|
||||
* The del and ins tags are notable because they allow different types of
|
||||
* elements depending on whether or not they're in a block or inline context.
|
||||
* Chameleon allows this behavior to happen by using two different
|
||||
* definitions depending on context. While this somewhat generalized,
|
||||
* it is specifically intended for those two tags.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Instance of the definition object to use when inline. Usually stricter.
|
||||
* @public
|
||||
*/
|
||||
var $inline;
|
||||
|
||||
/**
|
||||
* Instance of the definition object to use when block.
|
||||
* @public
|
||||
*/
|
||||
var $block;
|
||||
|
||||
var $type = 'chameleon';
|
||||
|
||||
/**
|
||||
* @param $inline List of elements to allow when inline.
|
||||
* @param $block List of elements to allow when block.
|
||||
*/
|
||||
function HTMLPurifier_ChildDef_Chameleon($inline, $block) {
|
||||
$this->inline = new HTMLPurifier_ChildDef_Optional($inline);
|
||||
$this->block = new HTMLPurifier_ChildDef_Optional($block);
|
||||
}
|
||||
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
if ($context->get('IsInline') === false) {
|
||||
return $this->block->validateChildren(
|
||||
$tokens_of_children, $config, $context);
|
||||
} else {
|
||||
return $this->inline->validateChildren(
|
||||
$tokens_of_children, $config, $context);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
75
library/HTMLPurifier/ChildDef/Custom.php
Normal file
75
library/HTMLPurifier/ChildDef/Custom.php
Normal file
@@ -0,0 +1,75 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef.php';
|
||||
|
||||
/**
|
||||
* Custom validation class, accepts DTD child definitions
|
||||
*
|
||||
* @warning Currently this class is an all or nothing proposition, that is,
|
||||
* it will only give a bool return value.
|
||||
* @note This class is currently not used by any code, although it is unit
|
||||
* tested.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $type = 'custom';
|
||||
var $allow_empty = false;
|
||||
/**
|
||||
* Allowed child pattern as defined by the DTD
|
||||
*/
|
||||
var $dtd_regex;
|
||||
/**
|
||||
* PCRE regex derived from $dtd_regex
|
||||
* @private
|
||||
*/
|
||||
var $_pcre_regex;
|
||||
/**
|
||||
* @param $dtd_regex Allowed child pattern from the DTD
|
||||
*/
|
||||
function HTMLPurifier_ChildDef_Custom($dtd_regex) {
|
||||
$this->dtd_regex = $dtd_regex;
|
||||
$this->_compileRegex();
|
||||
}
|
||||
/**
|
||||
* Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
|
||||
*/
|
||||
function _compileRegex() {
|
||||
$raw = str_replace(' ', '', $this->dtd_regex);
|
||||
if ($raw{0} != '(') {
|
||||
$raw = "($raw)";
|
||||
}
|
||||
$reg = str_replace(',', ',?', $raw);
|
||||
$reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
|
||||
$this->_pcre_regex = $reg;
|
||||
}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
$list_of_children = '';
|
||||
$nesting = 0; // depth into the nest
|
||||
foreach ($tokens_of_children as $token) {
|
||||
if (!empty($token->is_whitespace)) continue;
|
||||
|
||||
$is_child = ($nesting == 0); // direct
|
||||
|
||||
if ($token->type == 'start') {
|
||||
$nesting++;
|
||||
} elseif ($token->type == 'end') {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
if ($is_child) {
|
||||
$list_of_children .= $token->name . ',';
|
||||
}
|
||||
}
|
||||
$list_of_children = rtrim($list_of_children, ',');
|
||||
|
||||
$okay =
|
||||
preg_match(
|
||||
'/^'.$this->_pcre_regex.'$/',
|
||||
$list_of_children
|
||||
);
|
||||
|
||||
return (bool) $okay;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
22
library/HTMLPurifier/ChildDef/Empty.php
Normal file
22
library/HTMLPurifier/ChildDef/Empty.php
Normal file
@@ -0,0 +1,22 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef.php';
|
||||
|
||||
/**
|
||||
* Definition that disallows all elements.
|
||||
* @warning validateChildren() in this class is actually never called, because
|
||||
* empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed
|
||||
* before child definitions are parsed in earnest by
|
||||
* HTMLPurifier_Strategy_FixNesting.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $allow_empty = true;
|
||||
var $type = 'empty';
|
||||
function HTMLPurifier_ChildDef_Empty() {}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
return array();
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
23
library/HTMLPurifier/ChildDef/Optional.php
Normal file
23
library/HTMLPurifier/ChildDef/Optional.php
Normal file
@@ -0,0 +1,23 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef/Required.php';
|
||||
|
||||
/**
|
||||
* Definition that allows a set of elements, and allows no children.
|
||||
* @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required,
|
||||
* really, one shouldn't inherit from the other. Only altered behavior
|
||||
* is to overload a returned false with an array. Thus, it will never
|
||||
* return false.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
|
||||
{
|
||||
var $allow_empty = true;
|
||||
var $type = 'optional';
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
$result = parent::validateChildren($tokens_of_children, $config, $context);
|
||||
if ($result === false) return array();
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
107
library/HTMLPurifier/ChildDef/Required.php
Normal file
107
library/HTMLPurifier/ChildDef/Required.php
Normal file
@@ -0,0 +1,107 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef.php';
|
||||
|
||||
/**
|
||||
* Definition that allows a set of elements, but disallows empty children.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
|
||||
{
|
||||
/**
|
||||
* Lookup table of allowed elements.
|
||||
* @public
|
||||
*/
|
||||
var $elements = array();
|
||||
/**
|
||||
* @param $elements List of allowed element names (lowercase).
|
||||
*/
|
||||
function HTMLPurifier_ChildDef_Required($elements) {
|
||||
if (is_string($elements)) {
|
||||
$elements = str_replace(' ', '', $elements);
|
||||
$elements = explode('|', $elements);
|
||||
}
|
||||
$keys = array_keys($elements);
|
||||
if ($keys == array_keys($keys)) {
|
||||
$elements = array_flip($elements);
|
||||
foreach ($elements as $i => $x) {
|
||||
$elements[$i] = true;
|
||||
if (empty($i)) unset($elements[$i]);
|
||||
}
|
||||
}
|
||||
$this->elements = $elements;
|
||||
$this->gen = new HTMLPurifier_Generator();
|
||||
}
|
||||
var $allow_empty = false;
|
||||
var $type = 'required';
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
// if there are no tokens, delete parent node
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
// the new set of children
|
||||
$result = array();
|
||||
|
||||
// current depth into the nest
|
||||
$nesting = 0;
|
||||
|
||||
// whether or not we're deleting a node
|
||||
$is_deleting = false;
|
||||
|
||||
// whether or not parsed character data is allowed
|
||||
// this controls whether or not we silently drop a tag
|
||||
// or generate escaped HTML from it
|
||||
$pcdata_allowed = isset($this->elements['#PCDATA']);
|
||||
|
||||
// a little sanity check to make sure it's not ALL whitespace
|
||||
$all_whitespace = true;
|
||||
|
||||
// some configuration
|
||||
$escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren');
|
||||
|
||||
foreach ($tokens_of_children as $token) {
|
||||
if (!empty($token->is_whitespace)) {
|
||||
$result[] = $token;
|
||||
continue;
|
||||
}
|
||||
$all_whitespace = false; // phew, we're not talking about whitespace
|
||||
|
||||
$is_child = ($nesting == 0);
|
||||
|
||||
if ($token->type == 'start') {
|
||||
$nesting++;
|
||||
} elseif ($token->type == 'end') {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
if ($is_child) {
|
||||
$is_deleting = false;
|
||||
if (!isset($this->elements[$token->name])) {
|
||||
$is_deleting = true;
|
||||
if ($pcdata_allowed && $token->type == 'text') {
|
||||
$result[] = $token;
|
||||
} elseif ($pcdata_allowed && $escape_invalid_children) {
|
||||
$result[] = new HTMLPurifier_Token_Text(
|
||||
$this->gen->generateFromToken($token, $config)
|
||||
);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (!$is_deleting || ($pcdata_allowed && $token->type == 'text')) {
|
||||
$result[] = $token;
|
||||
} elseif ($pcdata_allowed && $escape_invalid_children) {
|
||||
$result[] =
|
||||
new HTMLPurifier_Token_Text(
|
||||
$this->gen->generateFromToken( $token, $config )
|
||||
);
|
||||
} else {
|
||||
// drop silently
|
||||
}
|
||||
}
|
||||
if (empty($result)) return false;
|
||||
if ($all_whitespace) return false;
|
||||
if ($tokens_of_children == $result) return true;
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
76
library/HTMLPurifier/ChildDef/StrictBlockquote.php
Normal file
76
library/HTMLPurifier/ChildDef/StrictBlockquote.php
Normal file
@@ -0,0 +1,76 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef/Required.php';
|
||||
|
||||
/**
|
||||
* Takes the contents of blockquote when in strict and reformats for validation.
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_StrictBlockquote
|
||||
extends HTMLPurifier_ChildDef_Required
|
||||
{
|
||||
var $real_elements;
|
||||
var $fake_elements;
|
||||
var $allow_empty = true;
|
||||
var $type = 'strictblockquote';
|
||||
var $init = false;
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
|
||||
$def = $config->getHTMLDefinition();
|
||||
if (!$this->init) {
|
||||
// allow all inline elements
|
||||
$this->real_elements = $this->elements;
|
||||
$this->fake_elements = $def->info_content_sets['Flow'];
|
||||
$this->fake_elements['#PCDATA'] = true;
|
||||
$this->init = true;
|
||||
}
|
||||
|
||||
// trick the parent class into thinking it allows more
|
||||
$this->elements = $this->fake_elements;
|
||||
$result = parent::validateChildren($tokens_of_children, $config, $context);
|
||||
$this->elements = $this->real_elements;
|
||||
|
||||
if ($result === false) return array();
|
||||
if ($result === true) $result = $tokens_of_children;
|
||||
|
||||
$block_wrap_start = new HTMLPurifier_Token_Start($def->info_block_wrapper);
|
||||
$block_wrap_end = new HTMLPurifier_Token_End( $def->info_block_wrapper);
|
||||
$is_inline = false;
|
||||
$depth = 0;
|
||||
$ret = array();
|
||||
|
||||
// assuming that there are no comment tokens
|
||||
foreach ($result as $i => $token) {
|
||||
$token = $result[$i];
|
||||
// ifs are nested for readability
|
||||
if (!$is_inline) {
|
||||
if (!$depth) {
|
||||
if (
|
||||
$token->type == 'text' ||
|
||||
!isset($this->elements[$token->name])
|
||||
) {
|
||||
$is_inline = true;
|
||||
$ret[] = $block_wrap_start;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (!$depth) {
|
||||
// starting tokens have been inline text / empty
|
||||
if ($token->type == 'start' || $token->type == 'empty') {
|
||||
if (isset($this->elements[$token->name])) {
|
||||
// ended
|
||||
$ret[] = $block_wrap_end;
|
||||
$is_inline = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
$ret[] = $token;
|
||||
if ($token->type == 'start') $depth++;
|
||||
if ($token->type == 'end') $depth--;
|
||||
}
|
||||
if ($is_inline) $ret[] = $block_wrap_end;
|
||||
return $ret;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
142
library/HTMLPurifier/ChildDef/Table.php
Normal file
142
library/HTMLPurifier/ChildDef/Table.php
Normal file
@@ -0,0 +1,142 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ChildDef.php';
|
||||
|
||||
/**
|
||||
* Definition for tables
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $allow_empty = false;
|
||||
var $type = 'table';
|
||||
function HTMLPurifier_ChildDef_Table() {}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
// this ensures that the loop gets run one last time before closing
|
||||
// up. It's a little bit of a hack, but it works! Just make sure you
|
||||
// get rid of the token later.
|
||||
$tokens_of_children[] = false;
|
||||
|
||||
// only one of these elements is allowed in a table
|
||||
$caption = false;
|
||||
$thead = false;
|
||||
$tfoot = false;
|
||||
|
||||
// as many of these as you want
|
||||
$cols = array();
|
||||
$content = array();
|
||||
|
||||
$nesting = 0; // current depth so we can determine nodes
|
||||
$is_collecting = false; // are we globbing together tokens to package
|
||||
// into one of the collectors?
|
||||
$collection = array(); // collected nodes
|
||||
$tag_index = 0; // the first node might be whitespace,
|
||||
// so this tells us where the start tag is
|
||||
|
||||
foreach ($tokens_of_children as $token) {
|
||||
$is_child = ($nesting == 0);
|
||||
|
||||
if ($token === false) {
|
||||
// terminating sequence started
|
||||
} elseif ($token->type == 'start') {
|
||||
$nesting++;
|
||||
} elseif ($token->type == 'end') {
|
||||
$nesting--;
|
||||
}
|
||||
|
||||
// handle node collection
|
||||
if ($is_collecting) {
|
||||
if ($is_child) {
|
||||
// okay, let's stash the tokens away
|
||||
// first token tells us the type of the collection
|
||||
switch ($collection[$tag_index]->name) {
|
||||
case 'tr':
|
||||
case 'tbody':
|
||||
$content[] = $collection;
|
||||
break;
|
||||
case 'caption':
|
||||
if ($caption !== false) break;
|
||||
$caption = $collection;
|
||||
break;
|
||||
case 'thead':
|
||||
case 'tfoot':
|
||||
// access the appropriate variable, $thead or $tfoot
|
||||
$var = $collection[$tag_index]->name;
|
||||
if ($$var === false) {
|
||||
$$var = $collection;
|
||||
} else {
|
||||
// transmutate the first and less entries into
|
||||
// tbody tags, and then put into content
|
||||
$collection[$tag_index]->name = 'tbody';
|
||||
$collection[count($collection)-1]->name = 'tbody';
|
||||
$content[] = $collection;
|
||||
}
|
||||
break;
|
||||
case 'colgroup':
|
||||
$cols[] = $collection;
|
||||
break;
|
||||
}
|
||||
$collection = array();
|
||||
$is_collecting = false;
|
||||
$tag_index = 0;
|
||||
} else {
|
||||
// add the node to the collection
|
||||
$collection[] = $token;
|
||||
}
|
||||
}
|
||||
|
||||
// terminate
|
||||
if ($token === false) break;
|
||||
|
||||
if ($is_child) {
|
||||
// determine what we're dealing with
|
||||
if ($token->name == 'col') {
|
||||
// the only empty tag in the possie, we can handle it
|
||||
// immediately
|
||||
$cols[] = array_merge($collection, array($token));
|
||||
$collection = array();
|
||||
$tag_index = 0;
|
||||
continue;
|
||||
}
|
||||
switch($token->name) {
|
||||
case 'caption':
|
||||
case 'colgroup':
|
||||
case 'thead':
|
||||
case 'tfoot':
|
||||
case 'tbody':
|
||||
case 'tr':
|
||||
$is_collecting = true;
|
||||
$collection[] = $token;
|
||||
continue;
|
||||
default:
|
||||
if ($token->type == 'text' && $token->is_whitespace) {
|
||||
$collection[] = $token;
|
||||
$tag_index++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($content)) return false;
|
||||
|
||||
$ret = array();
|
||||
if ($caption !== false) $ret = array_merge($ret, $caption);
|
||||
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
|
||||
if ($thead !== false) $ret = array_merge($ret, $thead);
|
||||
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
|
||||
foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
|
||||
if (!empty($collection) && $is_collecting == false){
|
||||
// grab the trailing space
|
||||
$ret = array_merge($ret, $collection);
|
||||
}
|
||||
|
||||
array_pop($tokens_of_children); // remove phantom token
|
||||
|
||||
return ($ret === $tokens_of_children) ? true : $ret;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
@@ -44,8 +44,26 @@ class HTMLPurifier_Config
|
||||
$this->def = $definition; // keep a copy around for checking
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience constructor that creates a config object based on a mixed var
|
||||
* @static
|
||||
* @param mixed $config Variable that defines the state of the config
|
||||
* object. Can be: a HTMLPurifier_Config() object,
|
||||
* an array of directives based on loadArray(),
|
||||
* or a string filename of an ini file.
|
||||
* @return Configured HTMLPurifier_Config object
|
||||
*/
|
||||
function create($config) {
|
||||
if (is_a($config, 'HTMLPurifier_Config')) return $config;
|
||||
$ret = HTMLPurifier_Config::createDefault();
|
||||
if (is_string($config)) $ret->loadIni($config);
|
||||
elseif (is_array($config)) $ret->loadArray($config);
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience constructor that creates a default configuration object.
|
||||
* @static
|
||||
* @return Default HTMLPurifier_Config object.
|
||||
*/
|
||||
function createDefault() {
|
||||
@@ -59,27 +77,55 @@ class HTMLPurifier_Config
|
||||
* @param $namespace String namespace
|
||||
* @param $key String key
|
||||
*/
|
||||
function get($namespace, $key) {
|
||||
function get($namespace, $key, $from_alias = false) {
|
||||
if (!isset($this->def->info[$namespace][$key])) {
|
||||
trigger_error('Cannot retrieve value of undefined directive',
|
||||
E_USER_WARNING);
|
||||
return;
|
||||
}
|
||||
if ($this->def->info[$namespace][$key]->class == 'alias') {
|
||||
trigger_error('Cannot get value from aliased directive, use real name',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
return $this->conf[$namespace][$key];
|
||||
}
|
||||
|
||||
/**
|
||||
* Retreives an array of directives to values from a given namespace
|
||||
* @param $namespace String namespace
|
||||
*/
|
||||
function getBatch($namespace) {
|
||||
if (!isset($this->def->info[$namespace])) {
|
||||
trigger_error('Cannot retrieve undefined namespace',
|
||||
E_USER_WARNING);
|
||||
return;
|
||||
}
|
||||
return $this->conf[$namespace];
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a value to configuration.
|
||||
* @param $namespace String namespace
|
||||
* @param $key String key
|
||||
* @param $value Mixed value
|
||||
*/
|
||||
function set($namespace, $key, $value) {
|
||||
function set($namespace, $key, $value, $from_alias = false) {
|
||||
if (!isset($this->def->info[$namespace][$key])) {
|
||||
trigger_error('Cannot set undefined directive to value',
|
||||
E_USER_WARNING);
|
||||
return;
|
||||
}
|
||||
if ($this->def->info[$namespace][$key]->class == 'alias') {
|
||||
if ($from_alias) {
|
||||
trigger_error('Double-aliases not allowed, please fix '.
|
||||
'ConfigSchema bug');
|
||||
}
|
||||
$this->set($this->def->info[$namespace][$key]->namespace,
|
||||
$this->def->info[$namespace][$key]->name,
|
||||
$value, true);
|
||||
return;
|
||||
}
|
||||
$value = $this->def->validate(
|
||||
$value,
|
||||
$this->def->info[$namespace][$key]->type,
|
||||
@@ -103,23 +149,36 @@ class HTMLPurifier_Config
|
||||
return;
|
||||
}
|
||||
$this->conf[$namespace][$key] = $value;
|
||||
if ($namespace == 'HTML' || $namespace == 'Attr') {
|
||||
// reset HTML definition if relevant attributes changed
|
||||
$this->html_definition = null;
|
||||
}
|
||||
if ($namespace == 'CSS') {
|
||||
$this->css_definition = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a copy of the HTML definition.
|
||||
* Retrieves reference to the HTML definition.
|
||||
* @param $raw Return a copy that has not been setup yet. Must be
|
||||
* called before it's been setup, otherwise won't work.
|
||||
*/
|
||||
function getHTMLDefinition() {
|
||||
if ($this->html_definition === null) {
|
||||
$this->html_definition = new HTMLPurifier_HTMLDefinition();
|
||||
$this->html_definition->setup($this);
|
||||
function &getHTMLDefinition($raw = false) {
|
||||
if (
|
||||
empty($this->html_definition) || // hasn't ever been setup
|
||||
($raw && $this->html_definition->setup) // requesting new one
|
||||
) {
|
||||
$this->html_definition = new HTMLPurifier_HTMLDefinition($this);
|
||||
if ($raw) return $this->html_definition; // no setup!
|
||||
}
|
||||
if (!$this->html_definition->setup) $this->html_definition->setup();
|
||||
return $this->html_definition;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a copy of the CSS definition
|
||||
* Retrieves reference to the CSS definition
|
||||
*/
|
||||
function getCSSDefinition() {
|
||||
function &getCSSDefinition() {
|
||||
if ($this->css_definition === null) {
|
||||
$this->css_definition = new HTMLPurifier_CSSDefinition();
|
||||
$this->css_definition->setup($this);
|
||||
@@ -134,6 +193,7 @@ class HTMLPurifier_Config
|
||||
*/
|
||||
function loadArray($config_array) {
|
||||
foreach ($config_array as $key => $value) {
|
||||
$key = str_replace('_', '.', $key);
|
||||
if (strpos($key, '.') !== false) {
|
||||
// condensed form
|
||||
list($namespace, $directive) = explode('.', $key);
|
||||
@@ -148,6 +208,15 @@ class HTMLPurifier_Config
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads configuration values from an ini file
|
||||
* @param $filename Name of ini file
|
||||
*/
|
||||
function loadIni($filename) {
|
||||
$array = parse_ini_file($filename, true);
|
||||
$this->loadArray($array);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
||||
?>
|
||||
|
10
library/HTMLPurifier/ConfigDef.php
Normal file
10
library/HTMLPurifier/ConfigDef.php
Normal file
@@ -0,0 +1,10 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Base class for configuration entity
|
||||
*/
|
||||
class HTMLPurifier_ConfigDef {
|
||||
var $class = false;
|
||||
}
|
||||
|
||||
?>
|
74
library/HTMLPurifier/ConfigDef/Directive.php
Normal file
74
library/HTMLPurifier/ConfigDef/Directive.php
Normal file
@@ -0,0 +1,74 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ConfigDef.php';
|
||||
|
||||
/**
|
||||
* Structure object containing definition of a directive.
|
||||
* @note This structure does not contain default values
|
||||
*/
|
||||
class HTMLPurifier_ConfigDef_Directive extends HTMLPurifier_ConfigDef
|
||||
{
|
||||
|
||||
var $class = 'directive';
|
||||
|
||||
function HTMLPurifier_ConfigDef_Directive(
|
||||
$type = null,
|
||||
$descriptions = null,
|
||||
$allow_null = null,
|
||||
$allowed = null,
|
||||
$aliases = null
|
||||
) {
|
||||
if ( $type !== null) $this->type = $type;
|
||||
if ($descriptions !== null) $this->descriptions = $descriptions;
|
||||
if ( $allow_null !== null) $this->allow_null = $allow_null;
|
||||
if ( $allowed !== null) $this->allowed = $allowed;
|
||||
if ( $aliases !== null) $this->aliases = $aliases;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allowed type of the directive. Values are:
|
||||
* - string
|
||||
* - istring (case insensitive string)
|
||||
* - int
|
||||
* - float
|
||||
* - bool
|
||||
* - lookup (array of value => true)
|
||||
* - list (regular numbered index array)
|
||||
* - hash (array of key => value)
|
||||
* - mixed (anything goes)
|
||||
*/
|
||||
var $type = 'mixed';
|
||||
|
||||
/**
|
||||
* Plaintext descriptions of the configuration entity is. Organized by
|
||||
* file and line number, so multiple descriptions are allowed.
|
||||
*/
|
||||
var $descriptions = array();
|
||||
|
||||
/**
|
||||
* Is null allowed? Has no effect for mixed type.
|
||||
* @bool
|
||||
*/
|
||||
var $allow_null = false;
|
||||
|
||||
/**
|
||||
* Lookup table of allowed values of the element, bool true if all allowed.
|
||||
*/
|
||||
var $allowed = true;
|
||||
|
||||
/**
|
||||
* Hash of value aliases, i.e. values that are equivalent.
|
||||
*/
|
||||
var $aliases = array();
|
||||
|
||||
/**
|
||||
* Adds a description to the array
|
||||
*/
|
||||
function addDescription($file, $line, $description) {
|
||||
if (!isset($this->descriptions[$file])) $this->descriptions[$file] = array();
|
||||
$this->descriptions[$file][$line] = $description;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
27
library/HTMLPurifier/ConfigDef/DirectiveAlias.php
Normal file
27
library/HTMLPurifier/ConfigDef/DirectiveAlias.php
Normal file
@@ -0,0 +1,27 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ConfigDef.php';
|
||||
|
||||
/**
|
||||
* Structure object describing a directive alias
|
||||
*/
|
||||
class HTMLPurifier_ConfigDef_DirectiveAlias extends HTMLPurifier_ConfigDef
|
||||
{
|
||||
var $class = 'alias';
|
||||
|
||||
/**
|
||||
* Namespace being aliased to
|
||||
*/
|
||||
var $namespace;
|
||||
/**
|
||||
* Directive being aliased to
|
||||
*/
|
||||
var $name;
|
||||
|
||||
function HTMLPurifier_ConfigDef_DirectiveAlias($namespace, $name) {
|
||||
$this->namespace = $namespace;
|
||||
$this->name = $name;
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
23
library/HTMLPurifier/ConfigDef/Namespace.php
Normal file
23
library/HTMLPurifier/ConfigDef/Namespace.php
Normal file
@@ -0,0 +1,23 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/ConfigDef.php';
|
||||
|
||||
/**
|
||||
* Structure object describing of a namespace
|
||||
*/
|
||||
class HTMLPurifier_ConfigDef_Namespace extends HTMLPurifier_ConfigDef {
|
||||
|
||||
function HTMLPurifier_ConfigDef_Namespace($description = null) {
|
||||
$this->description = $description;
|
||||
}
|
||||
|
||||
var $class = 'namespace';
|
||||
|
||||
/**
|
||||
* String description of what kinds of directives go in this namespace.
|
||||
*/
|
||||
var $description;
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -1,6 +1,10 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/Error.php';
|
||||
require_once 'HTMLPurifier/ConfigDef.php';
|
||||
require_once 'HTMLPurifier/ConfigDef/Namespace.php';
|
||||
require_once 'HTMLPurifier/ConfigDef/Directive.php';
|
||||
require_once 'HTMLPurifier/ConfigDef/DirectiveAlias.php';
|
||||
|
||||
/**
|
||||
* Configuration definition, defines directives and their defaults.
|
||||
@@ -67,6 +71,7 @@ class HTMLPurifier_ConfigSchema {
|
||||
|
||||
/**
|
||||
* Retrieves an instance of the application-wide configuration definition.
|
||||
* @static
|
||||
*/
|
||||
function &instance($prototype = null) {
|
||||
static $instance;
|
||||
@@ -81,6 +86,7 @@ class HTMLPurifier_ConfigSchema {
|
||||
|
||||
/**
|
||||
* Defines a directive for configuration
|
||||
* @static
|
||||
* @warning Will fail of directive's namespace is defined
|
||||
* @param $namespace Namespace the directive is in
|
||||
* @param $name Key of directive
|
||||
@@ -104,6 +110,11 @@ class HTMLPurifier_ConfigSchema {
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (empty($description)) {
|
||||
trigger_error('Description must be non-empty',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (isset($def->info[$namespace][$name])) {
|
||||
if (
|
||||
$def->info[$namespace][$name]->type !== $type ||
|
||||
@@ -131,7 +142,7 @@ class HTMLPurifier_ConfigSchema {
|
||||
return;
|
||||
}
|
||||
$def->info[$namespace][$name] =
|
||||
new HTMLPurifier_ConfigEntity_Directive();
|
||||
new HTMLPurifier_ConfigDef_Directive();
|
||||
$def->info[$namespace][$name]->type = $type;
|
||||
$def->info[$namespace][$name]->allow_null = $allow_null;
|
||||
$def->defaults[$namespace][$name] = $default;
|
||||
@@ -144,6 +155,7 @@ class HTMLPurifier_ConfigSchema {
|
||||
|
||||
/**
|
||||
* Defines a namespace for directives to be put into.
|
||||
* @static
|
||||
* @param $namespace Namespace's name
|
||||
* @param $description Description of the namespace
|
||||
*/
|
||||
@@ -158,8 +170,13 @@ class HTMLPurifier_ConfigSchema {
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (empty($description)) {
|
||||
trigger_error('Description must be non-empty',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
$def->info[$namespace] = array();
|
||||
$def->info_namespace[$namespace] = new HTMLPurifier_ConfigEntity_Namespace();
|
||||
$def->info_namespace[$namespace] = new HTMLPurifier_ConfigDef_Namespace();
|
||||
$def->info_namespace[$namespace]->description = $description;
|
||||
$def->defaults[$namespace] = array();
|
||||
}
|
||||
@@ -169,6 +186,7 @@ class HTMLPurifier_ConfigSchema {
|
||||
*
|
||||
* Directive value aliases are convenient for developers because it lets
|
||||
* them set a directive to several values and get the same result.
|
||||
* @static
|
||||
* @param $namespace Directive's namespace
|
||||
* @param $name Name of Directive
|
||||
* @param $alias Name of aliased value
|
||||
@@ -200,6 +218,7 @@ class HTMLPurifier_ConfigSchema {
|
||||
|
||||
/**
|
||||
* Defines a set of allowed values for a directive.
|
||||
* @static
|
||||
* @param $namespace Namespace of directive
|
||||
* @param $name Name of directive
|
||||
* @param $allowed_values Arraylist of allowed values
|
||||
@@ -211,12 +230,66 @@ class HTMLPurifier_ConfigSchema {
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if ($def->info[$namespace][$name]->allowed === true) {
|
||||
$def->info[$namespace][$name]->allowed = array();
|
||||
$directive =& $def->info[$namespace][$name];
|
||||
$type = $directive->type;
|
||||
if ($type != 'string' && $type != 'istring') {
|
||||
trigger_error('Cannot define allowed values for directive whose type is not string',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if ($directive->allowed === true) {
|
||||
$directive->allowed = array();
|
||||
}
|
||||
foreach ($allowed_values as $value) {
|
||||
$def->info[$namespace][$name]->allowed[$value] = true;
|
||||
$directive->allowed[$value] = true;
|
||||
}
|
||||
if ($def->defaults[$namespace][$name] !== null &&
|
||||
!isset($directive->allowed[$def->defaults[$namespace][$name]])) {
|
||||
trigger_error('Default value must be in allowed range of variables',
|
||||
E_USER_ERROR);
|
||||
$directive->allowed = true; // undo undo!
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Defines a directive alias for backwards compatibility
|
||||
* @static
|
||||
* @param $namespace
|
||||
* @param $name Directive that will be aliased
|
||||
* @param $new_namespace
|
||||
* @param $new_name Directive that the alias will be to
|
||||
*/
|
||||
function defineAlias($namespace, $name, $new_namespace, $new_name) {
|
||||
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||
if (!isset($def->info[$namespace])) {
|
||||
trigger_error('Cannot define directive alias in undefined namespace',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (!ctype_alnum($name)) {
|
||||
trigger_error('Directive name must be alphanumeric',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (isset($def->info[$namespace][$name])) {
|
||||
trigger_error('Cannot define alias over directive',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (!isset($def->info[$new_namespace][$new_name])) {
|
||||
trigger_error('Cannot define alias to undefined directive',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if ($def->info[$new_namespace][$new_name]->class == 'alias') {
|
||||
trigger_error('Cannot define alias to alias',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
$def->info[$namespace][$name] =
|
||||
new HTMLPurifier_ConfigDef_DirectiveAlias(
|
||||
$new_namespace, $new_name);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -247,11 +320,26 @@ class HTMLPurifier_ConfigSchema {
|
||||
case 'bool':
|
||||
if (is_int($var) && ($var === 0 || $var === 1)) {
|
||||
$var = (bool) $var;
|
||||
} elseif (is_string($var)) {
|
||||
if ($var == 'on' || $var == 'true' || $var == '1') {
|
||||
$var = true;
|
||||
} elseif ($var == 'off' || $var == 'false' || $var == '0') {
|
||||
$var = false;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} elseif (!is_bool($var)) break;
|
||||
return $var;
|
||||
case 'list':
|
||||
case 'hash':
|
||||
case 'lookup':
|
||||
if (is_string($var)) {
|
||||
// simplistic string to array method that only works
|
||||
// for simple lists of tag names or alphanumeric characters
|
||||
$var = explode(',',$var);
|
||||
// remove spaces
|
||||
foreach ($var as $i => $j) $var[$i] = trim($j);
|
||||
}
|
||||
if (!is_array($var)) break;
|
||||
$keys = array_keys($var);
|
||||
if ($keys === array_keys($keys)) {
|
||||
@@ -295,74 +383,4 @@ class HTMLPurifier_ConfigSchema {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Base class for configuration entity
|
||||
*/
|
||||
class HTMLPurifier_ConfigEntity {}
|
||||
|
||||
/**
|
||||
* Structure object describing of a namespace
|
||||
*/
|
||||
class HTMLPurifier_ConfigEntity_Namespace extends HTMLPurifier_ConfigEntity {
|
||||
|
||||
/**
|
||||
* String description of what kinds of directives go in this namespace.
|
||||
*/
|
||||
var $description;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Structure object containing definition of a directive.
|
||||
* @note This structure does not contain default values
|
||||
*/
|
||||
class HTMLPurifier_ConfigEntity_Directive extends HTMLPurifier_ConfigEntity
|
||||
{
|
||||
|
||||
/**
|
||||
* Hash of value aliases, i.e. values that are equivalent.
|
||||
*/
|
||||
var $aliases = array();
|
||||
|
||||
/**
|
||||
* Lookup table of allowed values of the element, bool true if all allowed.
|
||||
*/
|
||||
var $allowed = true;
|
||||
|
||||
/**
|
||||
* Allowed type of the directive. Values are:
|
||||
* - string
|
||||
* - istring (case insensitive string)
|
||||
* - int
|
||||
* - float
|
||||
* - bool
|
||||
* - lookup (array of value => true)
|
||||
* - list (regular numbered index array)
|
||||
* - hash (array of key => value)
|
||||
* - mixed (anything goes)
|
||||
*/
|
||||
var $type = 'mixed';
|
||||
|
||||
/**
|
||||
* Is null allowed? Has no affect for mixed type.
|
||||
* @bool
|
||||
*/
|
||||
var $allow_null = false;
|
||||
|
||||
/**
|
||||
* Plaintext descriptions of the configuration entity is. Organized by
|
||||
* file and line number, so multiple descriptions are allowed.
|
||||
*/
|
||||
var $descriptions = array();
|
||||
|
||||
/**
|
||||
* Adds a description to the array
|
||||
*/
|
||||
function addDescription($file, $line, $description) {
|
||||
if (!isset($this->descriptions[$file])) $this->descriptions[$file] = array();
|
||||
$this->descriptions[$file][$line] = $description;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
||||
?>
|
||||
|
148
library/HTMLPurifier/ContentSets.php
Normal file
148
library/HTMLPurifier/ContentSets.php
Normal file
@@ -0,0 +1,148 @@
|
||||
<?php
|
||||
|
||||
// common defs that we'll support by default
|
||||
require_once 'HTMLPurifier/ChildDef.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Empty.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Required.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Optional.php';
|
||||
|
||||
class HTMLPurifier_ContentSets
|
||||
{
|
||||
|
||||
/**
|
||||
* List of content set strings (pipe seperators) indexed by name.
|
||||
* @public
|
||||
*/
|
||||
var $info = array();
|
||||
|
||||
/**
|
||||
* List of content set lookups (element => true) indexed by name.
|
||||
* @note This is in HTMLPurifier_HTMLDefinition->info_content_sets
|
||||
* @public
|
||||
*/
|
||||
var $lookup = array();
|
||||
|
||||
/**
|
||||
* Synchronized list of defined content sets (keys of info)
|
||||
*/
|
||||
var $keys = array();
|
||||
/**
|
||||
* Synchronized list of defined content values (values of info)
|
||||
*/
|
||||
var $values = array();
|
||||
|
||||
/**
|
||||
* Merges in module's content sets, expands identifiers in the content
|
||||
* sets and populates the keys, values and lookup member variables.
|
||||
* @param $modules List of HTMLPurifier_HTMLModule
|
||||
*/
|
||||
function HTMLPurifier_ContentSets($modules) {
|
||||
if (!is_array($modules)) $modules = array($modules);
|
||||
// populate content_sets based on module hints
|
||||
// sorry, no way of overloading
|
||||
foreach ($modules as $module_i => $module) {
|
||||
foreach ($module->content_sets as $key => $value) {
|
||||
if (isset($this->info[$key])) {
|
||||
// add it into the existing content set
|
||||
$this->info[$key] = $this->info[$key] . ' | ' . $value;
|
||||
} else {
|
||||
$this->info[$key] = $value;
|
||||
}
|
||||
}
|
||||
}
|
||||
// perform content_set expansions
|
||||
$this->keys = array_keys($this->info);
|
||||
foreach ($this->info as $i => $set) {
|
||||
// only performed once, so infinite recursion is not
|
||||
// a problem
|
||||
$this->info[$i] =
|
||||
str_replace(
|
||||
$this->keys,
|
||||
// must be recalculated each time due to
|
||||
// changing substitutions
|
||||
array_values($this->info),
|
||||
$set);
|
||||
}
|
||||
$this->values = array_values($this->info);
|
||||
|
||||
// generate lookup tables
|
||||
foreach ($this->info as $name => $set) {
|
||||
$this->lookup[$name] = $this->convertToLookup($set);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Accepts a definition; generates and assigns a ChildDef for it
|
||||
* @param $def HTMLPurifier_ElementDef reference
|
||||
* @param $module Module that defined the ElementDef
|
||||
*/
|
||||
function generateChildDef(&$def, $module) {
|
||||
if (!empty($def->child)) return; // already done!
|
||||
$content_model = $def->content_model;
|
||||
if (is_string($content_model)) {
|
||||
$def->content_model = str_replace(
|
||||
$this->keys, $this->values, $content_model);
|
||||
}
|
||||
$def->child = $this->getChildDef($def, $module);
|
||||
}
|
||||
|
||||
/**
|
||||
* Instantiates a ChildDef based on content_model and content_model_type
|
||||
* member variables in HTMLPurifier_ElementDef
|
||||
* @note This will also defer to modules for custom HTMLPurifier_ChildDef
|
||||
* subclasses that need content set expansion
|
||||
* @param $def HTMLPurifier_ElementDef to have ChildDef extracted
|
||||
* @return HTMLPurifier_ChildDef corresponding to ElementDef
|
||||
*/
|
||||
function getChildDef($def, $module) {
|
||||
$value = $def->content_model;
|
||||
if (is_object($value)) {
|
||||
trigger_error(
|
||||
'Literal object child definitions should be stored in '.
|
||||
'ElementDef->child not ElementDef->content_model',
|
||||
E_USER_NOTICE
|
||||
);
|
||||
return $value;
|
||||
}
|
||||
switch ($def->content_model_type) {
|
||||
case 'required':
|
||||
return new HTMLPurifier_ChildDef_Required($value);
|
||||
case 'optional':
|
||||
return new HTMLPurifier_ChildDef_Optional($value);
|
||||
case 'empty':
|
||||
return new HTMLPurifier_ChildDef_Empty();
|
||||
case 'custom':
|
||||
return new HTMLPurifier_ChildDef_Custom($value);
|
||||
}
|
||||
// defer to its module
|
||||
$return = false;
|
||||
if ($module->defines_child_def) { // save a func call
|
||||
$return = $module->getChildDef($def);
|
||||
}
|
||||
if ($return !== false) return $return;
|
||||
// error-out
|
||||
trigger_error(
|
||||
'Could not determine which ChildDef class to instantiate',
|
||||
E_USER_ERROR
|
||||
);
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a string list of elements separated by pipes into
|
||||
* a lookup array.
|
||||
* @param $string List of elements
|
||||
* @return Lookup array of elements
|
||||
*/
|
||||
function convertToLookup($string) {
|
||||
$array = explode('|', str_replace(' ', '', $string));
|
||||
$ret = array();
|
||||
foreach ($array as $i => $k) {
|
||||
$ret[$k] = true;
|
||||
}
|
||||
return $ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
122
library/HTMLPurifier/ElementDef.php
Normal file
122
library/HTMLPurifier/ElementDef.php
Normal file
@@ -0,0 +1,122 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Structure that stores an HTML element definition. Used by
|
||||
* HTMLPurifier_HTMLDefinition and HTMLPurifier_HTMLModule.
|
||||
*/
|
||||
class HTMLPurifier_ElementDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Does the definition work by itself, or is it created solely
|
||||
* for the purpose of merging into another definition?
|
||||
*/
|
||||
var $standalone = true;
|
||||
|
||||
/**
|
||||
* Associative array of attribute name to HTMLPurifier_AttrDef
|
||||
* @note Before being processed by HTMLPurifier_AttrCollections
|
||||
* when modules are finalized during
|
||||
* HTMLPurifier_HTMLDefinition->setup(), this array may also
|
||||
* contain an array at index 0 that indicates which attribute
|
||||
* collections to load into the full array. It may also
|
||||
* contain string indentifiers in lieu of HTMLPurifier_AttrDef,
|
||||
* see HTMLPurifier_AttrTypes on how they are expanded during
|
||||
* HTMLPurifier_HTMLDefinition->setup() processing.
|
||||
* @public
|
||||
*/
|
||||
var $attr = array();
|
||||
|
||||
/**
|
||||
* Indexed list of tag's HTMLPurifier_AttrTransform to be done before validation
|
||||
* @public
|
||||
*/
|
||||
var $attr_transform_pre = array();
|
||||
|
||||
/**
|
||||
* Indexed list of tag's HTMLPurifier_AttrTransform to be done after validation
|
||||
* @public
|
||||
*/
|
||||
var $attr_transform_post = array();
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* HTMLPurifier_ChildDef of this tag.
|
||||
* @public
|
||||
*/
|
||||
var $child;
|
||||
|
||||
/**
|
||||
* Abstract string representation of internal ChildDef rules. See
|
||||
* HTMLPurifier_ContentSets for how this is parsed and then transformed
|
||||
* into an HTMLPurifier_ChildDef.
|
||||
* @public
|
||||
*/
|
||||
var $content_model;
|
||||
|
||||
/**
|
||||
* Value of $child->type, used to determine which ChildDef to use,
|
||||
* used in combination with $content_model.
|
||||
* @public
|
||||
*/
|
||||
var $content_model_type;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Lookup table of tags that close this tag. Used during parsing
|
||||
* to make sure we don't attempt to nest unclosed tags.
|
||||
* @public
|
||||
*/
|
||||
var $auto_close = array();
|
||||
|
||||
/**
|
||||
* Does the element have a content model (#PCDATA | Inline)*? This
|
||||
* is important for chameleon ins and del processing in
|
||||
* HTMLPurifier_ChildDef_Chameleon. Dynamically set: modules don't
|
||||
* have to worry about this one.
|
||||
* @public
|
||||
*/
|
||||
var $descendants_are_inline;
|
||||
|
||||
/**
|
||||
* Lookup table of tags excluded from all descendants of this tag.
|
||||
* @public
|
||||
*/
|
||||
var $excludes = array();
|
||||
|
||||
/**
|
||||
* Merges the values of another element definition into this one.
|
||||
* Values from the new element def take precedence if a value is
|
||||
* not mergeable.
|
||||
*/
|
||||
function mergeIn($def) {
|
||||
|
||||
// later keys takes precedence
|
||||
foreach($def->attr as $k => $v) {
|
||||
if ($k == 0) {
|
||||
// merge in the includes
|
||||
// sorry, no way to override an include
|
||||
foreach ($v as $v2) {
|
||||
$def->attr[0][] = $v2;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
$this->attr[$k] = $v;
|
||||
}
|
||||
foreach($def->attr_transform_pre as $k => $v) $this->attr_transform_pre[$k] = $v;
|
||||
foreach($def->attr_transform_post as $k => $v) $this->attr_transform_post[$k] = $v;
|
||||
foreach($def->auto_close as $k => $v) $this->auto_close[$k] = $v;
|
||||
foreach($def->excludes as $k => $v) $this->excludes[$k] = $v;
|
||||
|
||||
if(!is_null($def->child)) $this->child = $def->child;
|
||||
if(!empty($def->content_model)) $this->content_model .= ' | ' . $def->content_model;
|
||||
if(!empty($def->content_model_type)) $this->content_model_type = $def->content_model_type;
|
||||
if(!is_null($def->descendants_are_inline)) $this->descendants_are_inline = $def->descendants_are_inline;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -6,15 +6,29 @@ HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'Encoding', 'utf-8', 'istring',
|
||||
'If for some reason you are unable to convert all webpages to UTF-8, '.
|
||||
'you can use this directive as a stop-gap compatibility change to '.
|
||||
'let HTMLPurifier deal with non UTF-8 input. This technique has '.
|
||||
'let HTML Purifier deal with non UTF-8 input. This technique has '.
|
||||
'notable deficiencies: absolutely no characters outside of the selected '.
|
||||
'character encoding will be preserved, not even the ones that have '.
|
||||
'been ampersand escaped (this is due to a UTF-8 specific <em>feature</em> '.
|
||||
'that automatically resolves all entities), making it pretty useless '.
|
||||
'for anything except the most I18N-blind applications. This directive '.
|
||||
'for anything except the most I18N-blind applications, although '.
|
||||
'%Core.EscapeNonASCIICharacters offers fixes this trouble with '.
|
||||
'another tradeoff. This directive '.
|
||||
'only accepts ISO-8859-1 if iconv is not enabled.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'EscapeNonASCIICharacters', false, 'bool',
|
||||
'This directive overcomes a deficiency in %Core.Encoding by blindly '.
|
||||
'converting all non-ASCII characters into decimal numeric entities before '.
|
||||
'converting it to its native encoding. This means that even '.
|
||||
'characters that can be expressed in the non-UTF-8 encoding will '.
|
||||
'be entity-ized, which can be a real downer for encodings like Big5. '.
|
||||
'It also assumes that the ASCII repetoire is available, although '.
|
||||
'this is the case for almost all encodings. Anyway, use UTF-8! This '.
|
||||
'directive has been available since 1.4.0.'
|
||||
);
|
||||
|
||||
if ( !function_exists('iconv') ) {
|
||||
// only encodings with native PHP support
|
||||
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||
@@ -38,16 +52,25 @@ HTMLPurifier_ConfigSchema::define(
|
||||
|
||||
/**
|
||||
* A UTF-8 specific character encoder that handles cleaning and transforming.
|
||||
* @note All functions in this class should be static.
|
||||
*/
|
||||
class HTMLPurifier_Encoder
|
||||
{
|
||||
|
||||
/**
|
||||
* Constructor throws fatal error if you attempt to instantiate class
|
||||
*/
|
||||
function HTMLPurifier_Encoder() {
|
||||
trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR);
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleans a UTF-8 string for well-formedness and SGML validity
|
||||
*
|
||||
* It will parse according to UTF-8 and return a valid UTF8 string, with
|
||||
* non-SGML codepoints excluded.
|
||||
*
|
||||
* @static
|
||||
* @note Just for reference, the non-SGML code points are 0 to 31 and
|
||||
* 127 to 159, inclusive. However, we allow code points 9, 10
|
||||
* and 13, which are the tab, line feed and carriage return
|
||||
@@ -225,6 +248,7 @@ class HTMLPurifier_Encoder
|
||||
|
||||
/**
|
||||
* Translates a Unicode codepoint into its corresponding UTF-8 character.
|
||||
* @static
|
||||
* @note Based on Feyd's function at
|
||||
* <http://forums.devnetwork.net/viewtopic.php?p=191404#191404>,
|
||||
* which is in public domain.
|
||||
@@ -288,6 +312,7 @@ class HTMLPurifier_Encoder
|
||||
|
||||
/**
|
||||
* Converts a string to UTF-8 based on configuration.
|
||||
* @static
|
||||
*/
|
||||
function convertToUTF8($str, $config, &$context) {
|
||||
static $iconv = null;
|
||||
@@ -299,10 +324,12 @@ class HTMLPurifier_Encoder
|
||||
} elseif ($encoding === 'iso-8859-1') {
|
||||
return @utf8_encode($str);
|
||||
}
|
||||
trigger_error('Encoding not supported', E_USER_ERROR);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a string from UTF-8 based on configuration.
|
||||
* @static
|
||||
* @note Currently, this is a lossy conversion, with unexpressable
|
||||
* characters being omitted.
|
||||
*/
|
||||
@@ -311,11 +338,63 @@ class HTMLPurifier_Encoder
|
||||
if ($iconv === null) $iconv = function_exists('iconv');
|
||||
$encoding = $config->get('Core', 'Encoding');
|
||||
if ($encoding === 'utf-8') return $str;
|
||||
if ($config->get('Core', 'EscapeNonASCIICharacters')) {
|
||||
$str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str);
|
||||
}
|
||||
if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
|
||||
return @iconv('utf-8', $encoding . '//IGNORE', $str);
|
||||
} elseif ($encoding === 'iso-8859-1') {
|
||||
return @utf8_decode($str);
|
||||
}
|
||||
trigger_error('Encoding not supported', E_USER_ERROR);
|
||||
}
|
||||
|
||||
/**
|
||||
* Lossless (character-wise) conversion of HTML to ASCII
|
||||
* @static
|
||||
* @param $str UTF-8 string to be converted to ASCII
|
||||
* @returns ASCII encoded string with non-ASCII character entity-ized
|
||||
* @warning Adapted from MediaWiki, claiming fair use: this is a common
|
||||
* algorithm. If you disagree with this license fudgery,
|
||||
* implement it yourself.
|
||||
* @note Uses decimal numeric entities since they are best supported.
|
||||
* @note This is a DUMB function: it has no concept of keeping
|
||||
* character entities that the projected character encoding
|
||||
* can allow. We could possibly implement a smart version
|
||||
* but that would require it to also know which Unicode
|
||||
* codepoints the charset supported (not an easy task).
|
||||
* @note Sort of with cleanUTF8() but it assumes that $str is
|
||||
* well-formed UTF-8
|
||||
*/
|
||||
function convertToASCIIDumbLossless($str) {
|
||||
$bytesleft = 0;
|
||||
$result = '';
|
||||
$working = 0;
|
||||
$len = strlen($str);
|
||||
for( $i = 0; $i < $len; $i++ ) {
|
||||
$bytevalue = ord( $str[$i] );
|
||||
if( $bytevalue <= 0x7F ) { //0xxx xxxx
|
||||
$result .= chr( $bytevalue );
|
||||
$bytesleft = 0;
|
||||
} elseif( $bytevalue <= 0xBF ) { //10xx xxxx
|
||||
$working = $working << 6;
|
||||
$working += ($bytevalue & 0x3F);
|
||||
$bytesleft--;
|
||||
if( $bytesleft <= 0 ) {
|
||||
$result .= "&#" . $working . ";";
|
||||
}
|
||||
} elseif( $bytevalue <= 0xDF ) { //110x xxxx
|
||||
$working = $bytevalue & 0x1F;
|
||||
$bytesleft = 1;
|
||||
} elseif( $bytevalue <= 0xEF ) { //1110 xxxx
|
||||
$working = $bytevalue & 0x0F;
|
||||
$bytesleft = 2;
|
||||
} else { //1111 0xxx
|
||||
$working = $bytevalue & 0x07;
|
||||
$bytesleft = 3;
|
||||
}
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
|
||||
|
||||
|
@@ -26,6 +26,7 @@ class HTMLPurifier_EntityLookup {
|
||||
|
||||
/**
|
||||
* Retrieves sole instance of the object.
|
||||
* @static
|
||||
* @param Optional prototype of custom lookup table to overload with.
|
||||
*/
|
||||
function instance($prototype = false) {
|
||||
|
39
library/HTMLPurifier/Filter.php
Normal file
39
library/HTMLPurifier/Filter.php
Normal file
@@ -0,0 +1,39 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Represents a pre or post processing filter on HTML Purifier's output
|
||||
*
|
||||
* Sometimes, a little ad-hoc fixing of HTML has to be done before
|
||||
* it gets sent through HTML Purifier: you can use filters to acheive
|
||||
* this effect. For instance, YouTube videos can be preserved using
|
||||
* this manner. You could have used a decorator for this task, but
|
||||
* PHP's support for them is not terribly robust, so we're going
|
||||
* to just loop through the filters.
|
||||
*
|
||||
* Filters should be exited first in, last out. If there are three filters,
|
||||
* named 1, 2 and 3, the order of execution should go 1->preFilter,
|
||||
* 2->preFilter, 3->preFilter, purify, 3->postFilter, 2->postFilter,
|
||||
* 1->postFilter.
|
||||
*/
|
||||
|
||||
class HTMLPurifier_Filter
|
||||
{
|
||||
|
||||
/**
|
||||
* Name of the filter for identification purposes
|
||||
*/
|
||||
var $name;
|
||||
|
||||
/**
|
||||
* Pre-processor function, handles HTML before HTML Purifier
|
||||
*/
|
||||
function preFilter($html, $config, &$context) {}
|
||||
|
||||
/**
|
||||
* Post-processor function, handles HTML after HTML Purifier
|
||||
*/
|
||||
function postFilter($html, $config, &$context) {}
|
||||
|
||||
}
|
||||
|
||||
?>
|
34
library/HTMLPurifier/Filter/YouTube.php
Normal file
34
library/HTMLPurifier/Filter/YouTube.php
Normal file
@@ -0,0 +1,34 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/Filter.php';
|
||||
|
||||
class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter
|
||||
{
|
||||
|
||||
var $name = 'YouTube preservation';
|
||||
|
||||
function preFilter($html, $config, &$context) {
|
||||
$pre_regex = '#<object[^>]+>.+?'.
|
||||
'http://www.youtube.com/v/([A-Za-z0-9\-_]+).+?</object>#s';
|
||||
$pre_replace = '<span class="youtube-embed">\1</span>';
|
||||
return preg_replace($pre_regex, $pre_replace, $html);
|
||||
}
|
||||
|
||||
function postFilter($html, $config, &$context) {
|
||||
$post_regex = '#<span class="youtube-embed">([A-Za-z0-9\-_]+)</span>#';
|
||||
$post_replace = '<object width="425" height="350" '.
|
||||
'data="http://www.youtube.com/v/\1">'.
|
||||
'<param name="movie" value="http://www.youtube.com/v/\1"></param>'.
|
||||
'<param name="wmode" value="transparent"></param>'.
|
||||
'<!--[if IE]>'.
|
||||
'<embed src="http://www.youtube.com/v/\1"'.
|
||||
'type="application/x-shockwave-flash"'.
|
||||
'wmode="transparent" width="425" height="350" />'.
|
||||
'<![endif]-->'.
|
||||
'</object>';
|
||||
return preg_replace($post_regex, $post_replace, $html);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -104,14 +104,14 @@ class HTMLPurifier_Generator
|
||||
function generateFromToken($token) {
|
||||
if (!isset($token->type)) return '';
|
||||
if ($token->type == 'start') {
|
||||
$attr = $this->generateAttributes($token->attributes);
|
||||
$attr = $this->generateAttributes($token->attr);
|
||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
|
||||
|
||||
} elseif ($token->type == 'end') {
|
||||
return '</' . $token->name . '>';
|
||||
|
||||
} elseif ($token->type == 'empty') {
|
||||
$attr = $this->generateAttributes($token->attributes);
|
||||
$attr = $this->generateAttributes($token->attr);
|
||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr .
|
||||
( $this->_xhtml ? ' /': '' )
|
||||
. '>';
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user