mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-04 13:18:00 +02:00
Compare commits
111 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
3b979ee846 | ||
|
d151ffd9e6 | ||
|
2a01cf786e | ||
|
825b0671b5 | ||
|
4bdc0446de | ||
|
45a70e8ae4 | ||
|
1fe60c9b9d | ||
|
dc0e2c6b3e | ||
|
9bbbb87ffa | ||
|
b63b0be21f | ||
|
73a1e31fad | ||
|
775763c583 | ||
|
49cb2a4a7c | ||
|
61b6ee7183 | ||
|
d7ce6b4587 | ||
|
f67ee19f31 | ||
|
92b3f0e817 | ||
|
3c4da9666f | ||
|
925a07b828 | ||
|
94db380271 | ||
|
b9e7ba6a2f | ||
|
b1b3377b9c | ||
|
d8673539ab | ||
|
3b26e5dc5b | ||
|
c5ea987069 | ||
|
b152448608 | ||
|
b0575cb888 | ||
|
224ef774f7 | ||
|
18a83acc5d | ||
|
f9090e45c0 | ||
|
450523a9ca | ||
|
1955527a11 | ||
|
a5751c7f20 | ||
|
0960cf6ace | ||
|
83ed9e0fe1 | ||
|
fe9238af3a | ||
|
f0fe829af4 | ||
|
a3968a1ec7 | ||
|
a8298172e1 | ||
|
90dd7f13ae | ||
|
780c7fd309 | ||
|
dec6c52695 | ||
|
1ea3c1e968 | ||
|
bdab77b59e | ||
|
82afd890c4 | ||
|
b0df2f292f | ||
|
7a4c7b3777 | ||
|
2dc8e9c3d5 | ||
|
d48f9b6b21 | ||
|
2df5896324 | ||
|
f38fe431ed | ||
|
926b94bdd3 | ||
|
ad934540da | ||
|
afee1ea9bf | ||
|
a6bbe60e7c | ||
|
d2fd193bc4 | ||
|
e1b29d7c25 | ||
|
9668ac1e38 | ||
|
eb6950d7d0 | ||
|
4a724d0230 | ||
|
504203c0f3 | ||
|
e998b034d1 | ||
|
84e3a28001 | ||
|
4ee1bf94e3 | ||
|
24f2771304 | ||
|
74ba9b8629 | ||
|
b9caa35bf4 | ||
|
6ff78d2f79 | ||
|
8256ca4376 | ||
|
7d2fe4c5d7 | ||
|
f3646a3a06 | ||
|
29716bf8f4 | ||
|
fb38b02135 | ||
|
13790c6db2 | ||
|
2d6bf12fe0 | ||
|
8f515b9cda | ||
|
58be73fcf7 | ||
|
f432a40f50 | ||
|
d660b9018b | ||
|
4d96433c23 | ||
|
a78f0f5f80 | ||
|
d941d30cfa | ||
|
9af9c505e1 | ||
|
7e6a3fc990 | ||
|
c7e798080c | ||
|
32c5b5080b | ||
|
cbdd48811d | ||
|
37def0104b | ||
|
d9bb97cc26 | ||
|
8bff97ec08 | ||
|
fab2b363d0 | ||
|
8e1cfb362d | ||
|
1fa5101511 | ||
|
24663d65ed | ||
|
6adbaf0e5c | ||
|
81cd9b1ee8 | ||
|
f5ff8acbb0 | ||
|
ad8310c1f5 | ||
|
4b5198c5bc | ||
|
a251ec590f | ||
|
2bfdfaa02c | ||
|
4abf83af62 | ||
|
1ad55e0ed5 | ||
|
6c04bbdac1 | ||
|
c046da638a | ||
|
801dbcafb7 | ||
|
4f8d83506d | ||
|
00fce29467 | ||
|
686824262e | ||
|
b93892a3b6 | ||
|
7a6de55f76 |
6
CREDITS
6
CREDITS
@@ -2,6 +2,6 @@
|
|||||||
CREDITS
|
CREDITS
|
||||||
|
|
||||||
Almost everything written by Edward Z. Yang (Ambush Commander). Lots of thanks
|
Almost everything written by Edward Z. Yang (Ambush Commander). Lots of thanks
|
||||||
to the DevNetwork Community for their help (see docs/devnetwork.html for more
|
to the DevNetwork Community for their help (see docs/ref-devnetwork.html for
|
||||||
details), Feyd especially (namely IPv6 and optimization). Thanks to RSnake for
|
more details), Feyd especially (namely IPv6 and optimization). Thanks to RSnake
|
||||||
letting me package his fantastic XSS cheatsheet for a smoketest.
|
for letting me package his fantastic XSS cheatsheet for a smoketest.
|
||||||
|
11
Doxyfile
11
Doxyfile
@@ -4,7 +4,7 @@
|
|||||||
# Project related configuration options
|
# Project related configuration options
|
||||||
#---------------------------------------------------------------------------
|
#---------------------------------------------------------------------------
|
||||||
PROJECT_NAME = HTML Purifier
|
PROJECT_NAME = HTML Purifier
|
||||||
PROJECT_NUMBER = 1.0.0
|
PROJECT_NUMBER = 1.3.0
|
||||||
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
|
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
|
||||||
CREATE_SUBDIRS = NO
|
CREATE_SUBDIRS = NO
|
||||||
OUTPUT_LANGUAGE = English
|
OUTPUT_LANGUAGE = English
|
||||||
@@ -89,9 +89,12 @@ EXCLUDE =
|
|||||||
EXCLUDE_SYMLINKS = NO
|
EXCLUDE_SYMLINKS = NO
|
||||||
EXCLUDE_PATTERNS = */tests/* \
|
EXCLUDE_PATTERNS = */tests/* \
|
||||||
*/benchmarks/* \
|
*/benchmarks/* \
|
||||||
*/docs/phpdoc/* \
|
*/docs/* \
|
||||||
*/docs/doxygen/* \
|
*/test-settings.php \
|
||||||
*/test-settings.php
|
*/configdoc/* \
|
||||||
|
*/test-settings.php \
|
||||||
|
*/maintenance/* \
|
||||||
|
*/smoketests/*
|
||||||
EXAMPLE_PATH =
|
EXAMPLE_PATH =
|
||||||
EXAMPLE_PATTERNS = *
|
EXAMPLE_PATTERNS = *
|
||||||
EXAMPLE_RECURSIVE = NO
|
EXAMPLE_RECURSIVE = NO
|
||||||
|
180
INSTALL
180
INSTALL
@@ -2,141 +2,183 @@
|
|||||||
Install
|
Install
|
||||||
How to install HTML Purifier
|
How to install HTML Purifier
|
||||||
|
|
||||||
Being a library, there's no fancy GUI that will take you step-by-step through
|
HTML Purifier is designed to run out of the box, so actually using the library
|
||||||
configuring database credentials and other mumbo-jumbo. HTML Purifier is
|
is extremely easy. (Although, if you were looking for a step-by-step
|
||||||
designed to run "out of the box." Regardless, there are still a couple of
|
installation GUI, you've come to the wrong place!) The impatient can scroll
|
||||||
things you should be mindful of.
|
down to the bottom of this INSTALL document to see the code, but you really
|
||||||
|
should make sure a few things are properly done.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
0. Compatibility
|
1. Compatibility
|
||||||
|
|
||||||
HTML Purifier works in both PHP 4 and PHP 5. I have run the test suite on
|
HTML Purifier works in both PHP 4 and PHP 5, from PHP 4.3.9 and up. It has no
|
||||||
these versions:
|
core dependencies with other libraries. (Whoopee!)
|
||||||
|
|
||||||
- 4.3.9, 4.3.11
|
Optional extensions are iconv (usually installed) and tidy (also common).
|
||||||
- 4.4.0, 4.4.4
|
If you use UTF-8 and don't plan on pretty-printing HTML, you can get away with
|
||||||
- 5.0.0, 5.0.4
|
not having either of these extensions.
|
||||||
- 5.1.0, 5.1.6
|
|
||||||
|
|
||||||
And can confidently say that HTML Purifier should work in all versions
|
|
||||||
between and afterwards. HTML Purifier definitely does not support PHP 4.2,
|
|
||||||
and PHP 4.3 branch support may go further back than that, but I haven't tested
|
|
||||||
any earlier versions.
|
|
||||||
|
|
||||||
I have been unable to get PHP 5.0.5 working on my computer, so if someone
|
|
||||||
wants to test that, be my guest. All tests were done on Windows XP Home,
|
|
||||||
but operating system should not be a major factor in the library.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
1. Including the proper files
|
2. Including the library
|
||||||
|
|
||||||
The library/ directory must be added to your path: HTML Purifier will not be
|
Simply use:
|
||||||
able to find the necessary includes otherwise. This is as simple as:
|
|
||||||
|
|
||||||
set_include_path('/path/to/htmlpurifier/library' . PATH_SEPARATOR .
|
require_once '/path/to/library/HTMLPurifier.auto.php';
|
||||||
get_include_path() );
|
|
||||||
|
|
||||||
...replacing /path/to/htmlpurifier with the actual location of the folder. Don't
|
...and you're good to go. Since HTML Purifier's codebase is fairly
|
||||||
worry, HTML Purifier is namespaced so unless you have another file named
|
large, I recommend only including HTML Purifier when you need it.
|
||||||
HTMLPurifier.php, the files won't collide with any of your includes.
|
|
||||||
|
|
||||||
Then, it's a simple matter of including the base file:
|
If you don't like your include_path to be fiddled around with, simply set
|
||||||
|
HTML Purifier's library/ directory to the include path yourself and then:
|
||||||
|
|
||||||
require_once 'HTMLPurifier.php';
|
require_once 'HTMLPurifier.php';
|
||||||
|
|
||||||
...and you're good to go.
|
Only the contents in the library/ folder are necessary, so you can remove
|
||||||
|
everything else when using HTML Purifier in a production environment.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
2. Preparing the proper environment
|
3. Preparing the proper output environment
|
||||||
|
|
||||||
While no configuration is necessary, you first should take precautions regarding
|
HTML Purifier is all about web-standards, so accordingly your webpages should
|
||||||
the other output HTML that the filtered content will be going along with. Here
|
be standards compliant. HTML Purifier can deal with these doctypes:
|
||||||
is a (short) checklist:
|
|
||||||
|
|
||||||
* Have I specified XHTML 1.0 Transitional as the doctype?
|
* XHTML 1.0 Transitional (default)
|
||||||
* Have I specified UTF-8 as the character encoding?
|
* HTML 4.01 Transitional
|
||||||
|
|
||||||
|
...and these character encodings:
|
||||||
|
|
||||||
|
* UTF-8 (default)
|
||||||
|
* Any encoding iconv supports (support is crippled for i18n though)
|
||||||
|
|
||||||
|
The defaults are there for a reason: they are best-practice choices that
|
||||||
|
should not be changed lightly. For those of you in the dark, you can determine
|
||||||
|
the doctype from this code in your HTML documents:
|
||||||
|
|
||||||
To find out what these are, browse to your website and view its source code.
|
|
||||||
You can figure out the doctype from the a declaration that looks like
|
|
||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||||
or no doctype. You can figure out the character encoding by looking for
|
|
||||||
|
...and the character encoding from this code:
|
||||||
|
|
||||||
<meta http-equiv="Content-type" content="text/html;charset=ENCODING">
|
<meta http-equiv="Content-type" content="text/html;charset=ENCODING">
|
||||||
|
|
||||||
I cannot stress the importance of these two bullets enough. Omitting either
|
For legacy codebases these declarations may be missing. If that is the case,
|
||||||
of them could have dire consequences not only for security but for plain
|
STOP, and read up on character encodings and doctypes (in that order). Here
|
||||||
old usability. You can find a more in-depth discussion of why this is needed
|
are some links:
|
||||||
in docs/security.txt, in the meantime, try to change your output so this is
|
|
||||||
the case. If you can't, well, we might be able to accomodate you (read
|
* http://www.joelonsoftware.com/articles/Unicode.html
|
||||||
section 3).
|
* http://alistapart.com/stories/doctype/
|
||||||
|
|
||||||
|
You may currently be vulnerable to XSS and other security threats, and HTML
|
||||||
|
Purifier won't be able to fix that.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
3. Configuring HTML Purifier
|
4. Configuration
|
||||||
|
|
||||||
HTML Purifier is designed to run out-of-the-box, but occasionally HTML
|
HTML Purifier is designed to run out-of-the-box, but occasionally HTML
|
||||||
Purifier needs to be told what to do.
|
Purifier needs to be told what to do. If you answered no to any of these
|
||||||
|
questions, read on, otherwise, you can skip to the next section (or, if you're
|
||||||
|
into configuring things just for the heck of it, skip to 4.3).
|
||||||
|
|
||||||
If, for some reason, you are unable to switch to UTF-8 immediately, you can
|
* Am I using UTF-8?
|
||||||
switch HTML Purifier's encoding. Note that the availability of encodings is
|
* Am I using XHTML 1.0 Transitional?
|
||||||
dependent on iconv, and you'll be missing characters if the charset you
|
|
||||||
choose doesn't have them.
|
If you answered yes to any of these questions, instantiate a configuration
|
||||||
|
object and read on:
|
||||||
|
|
||||||
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
4.1. Setting a different character encoding
|
||||||
|
|
||||||
|
You really shouldn't use any other encoding except UTF-8, especially if you
|
||||||
|
plan to support multilingual websites (read section three for more details).
|
||||||
|
However, switching to UTF-8 is not always immediately feasible, so we can
|
||||||
|
adapt.
|
||||||
|
|
||||||
|
HTML Purifier uses iconv to support other character encodings, as such,
|
||||||
|
any encoding that iconv supports <http://www.gnu.org/software/libiconv/>
|
||||||
|
HTML Purifier supports with this code:
|
||||||
|
|
||||||
$config->set('Core', 'Encoding', /* put your encoding here */);
|
$config->set('Core', 'Encoding', /* put your encoding here */);
|
||||||
|
|
||||||
An example usage for Latin-1 websites:
|
An example usage for Latin-1 websites (the most common encoding for English
|
||||||
|
websites):
|
||||||
|
|
||||||
$config->set('Core', 'Encoding', 'ISO-8859-1');
|
$config->set('Core', 'Encoding', 'ISO-8859-1');
|
||||||
|
|
||||||
|
Note that HTML Purifier's support for non-Unicode encodings is crippled by the
|
||||||
|
fact that any character not supported by that encoding will be silently
|
||||||
|
dropped, EVEN if it is ampersand escaped. This is a current limitation of
|
||||||
|
HTML Purifier that we are NOT actively working to fix. Patches are welcome,
|
||||||
|
but there are so many other gotchas and problems in I18N for non-Unicode
|
||||||
|
encodings that this functionality is low priority. See
|
||||||
|
<http://ppewww.ph.gla.ac.uk/~flavell/charset/form-i18n.html> for a more
|
||||||
|
detailed lowdown on the topic.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
4.2. Setting a different doctype
|
||||||
|
|
||||||
For those of you stuck using HTML 4.01 Transitional, you can disable
|
For those of you stuck using HTML 4.01 Transitional, you can disable
|
||||||
XHTML output like this:
|
XHTML output like this:
|
||||||
|
|
||||||
$config->set('Core', 'XHTML', false);
|
$config->set('Core', 'XHTML', false);
|
||||||
|
|
||||||
However, I strongly recommend that you use XHTML. Currently, we can only
|
I recommend that you use XHTML, although not as much as I recommend UTF-8. If
|
||||||
guarantee transitional-complaint output, future versions will also allow strict
|
your HTML 4.01 page validates, good for you!
|
||||||
output.
|
|
||||||
|
Currently, we can only guarantee transitional-complaint output, future
|
||||||
|
versions will also allow strict-compliant output.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
3. Using the code
|
4.3. Other settings
|
||||||
|
|
||||||
|
There are more configuration directives which can be read about
|
||||||
|
here: <http://hp.jpsband.org/live/configdoc/plain.html> They're a bit boring,
|
||||||
|
but they can help out for those of you who like to exert maximum control over
|
||||||
|
your code.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
5. Using the code
|
||||||
|
|
||||||
The interface is mind-numbingly simple:
|
The interface is mind-numbingly simple:
|
||||||
|
|
||||||
$purifier = new HTMLPurifier();
|
$purifier = new HTMLPurifier();
|
||||||
$clean_html = $purifier->purify($dirty_html);
|
$clean_html = $purifier->purify( $dirty_html );
|
||||||
|
|
||||||
Or, if you're using the configuration object:
|
...or, if you're using the configuration object:
|
||||||
|
|
||||||
$purifier = new HTMLPurifier($config);
|
$purifier = new HTMLPurifier($config);
|
||||||
$clean_html = $purifier->purify($dirty_html);
|
$clean_html = $purifier->purify( $dirty_html );
|
||||||
|
|
||||||
That's it. For more examples, check out docs/examples/. Also, SLOW gives
|
That's it! For more examples, check out docs/examples/ (they aren't very
|
||||||
advice on what to do if HTML Purifier is slowing down your application.
|
different though). Also, SLOW gives advice on what to do if HTML Purifier
|
||||||
|
is slowing down your application.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
4. Quick install
|
6. Quick install
|
||||||
|
|
||||||
If your website is in UTF-8 and XHTML Transitional, use this code:
|
If your website is in UTF-8 and XHTML Transitional, use this code:
|
||||||
|
|
||||||
<?php
|
<?php
|
||||||
set_include_path('/path/to/htmlpurifier/library'
|
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||||
. PATH_SEPARATOR . get_include_path() );
|
|
||||||
require_once 'HTMLPurifier.php';
|
|
||||||
$purifier = new HTMLPurifier();
|
|
||||||
|
|
||||||
|
$purifier = new HTMLPurifier();
|
||||||
$clean_html = $purifier->purify($dirty_html);
|
$clean_html = $purifier->purify($dirty_html);
|
||||||
|
?>
|
||||||
|
|
||||||
If your website is in a different encoding or doctype, use this code:
|
If your website is in a different encoding or doctype, use this code:
|
||||||
|
|
||||||
<?php
|
<?php
|
||||||
set_include_path('/path/to/htmlpurifier/library'
|
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||||
. PATH_SEPARATOR . get_include_path() );
|
|
||||||
require_once 'HTMLPurifier.php';
|
|
||||||
|
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
$config->set('Core', 'Encoding', 'ISO-8859-1'); //replace with your encoding
|
$config->set('Core', 'Encoding', 'ISO-8859-1'); //replace with your encoding
|
||||||
|
133
NEWS
133
NEWS
@@ -1,20 +1,119 @@
|
|||||||
NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||||
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||
|
|
||||||
1.2.0, unknown projected release date
|
= KEY ====================
|
||||||
(major feature release)
|
# Breaks back-compat
|
||||||
|
! Feature
|
||||||
|
- Bugfix
|
||||||
|
+ Sub-comment
|
||||||
|
. Internal change
|
||||||
|
==========================
|
||||||
|
|
||||||
1.1.1, unknown projected release date
|
1.3.1, released 2006-12-06
|
||||||
(bugfix release)
|
! Added HTMLPurifier.func.php stub for a convenient function to call the library
|
||||||
|
- Fixed bug in RemoveInvalidImg code that caused all images to be dropped
|
||||||
|
(thanks to .mario for reporting this)
|
||||||
|
. Standardized all attribute handling variables to attr, made it plural
|
||||||
|
|
||||||
|
1.3.0, released 2006-11-26
|
||||||
|
# Invalid images are now removed, rather than replaced with a dud
|
||||||
|
<img src="" alt="Invalid image" />. Previous behavior can be restored
|
||||||
|
with new directive %Core.RemoveInvalidImg set to false.
|
||||||
|
! (X)HTML Strict now supported
|
||||||
|
+ Transparently handles inline elements in block context (blockquote)
|
||||||
|
! Added GET method to demo for easier validation, added 50kb max input size
|
||||||
|
! New directive %HTML.BlockWrapper, for block-ifying inline elements
|
||||||
|
! New directive %HTML.Parent, allows you to only allow inline content
|
||||||
|
! New directives %HTML.AllowedElements and %HTML.AllowedAttributes to let
|
||||||
|
users narrow the set of allowed tags
|
||||||
|
! <li value="4"> and <ul start="2"> now allowed in loose mode
|
||||||
|
! New directives %URI.DisableExternalResources and %URI.DisableResources
|
||||||
|
! New directive %Attr.DisableURI, which eliminates all hyperlinking
|
||||||
|
! New directive %URI.Munge, munges URI so you can use some sort of redirector
|
||||||
|
service to avoid PageRank leaks or warn users that they are exiting your site.
|
||||||
|
! Added spiffy new smoketest printDefinition.php, which lets you twiddle with
|
||||||
|
the configuration settings and see how the internal rules are affected.
|
||||||
|
! New directive %URI.HostBlacklist for blocking links to bad hosts.
|
||||||
|
xssAttacks.php smoketest updated accordingly.
|
||||||
|
- Added missing type to ChildDef_Chameleon
|
||||||
|
- Remove Tidy option from demo if there is not Tidy available
|
||||||
|
. ChildDef_Required guards against empty tags
|
||||||
|
. Lookup table HTMLDefinition->info_flow_elements added
|
||||||
|
. Added peace-of-mind variable initialization to Strategy_FixNesting
|
||||||
|
. Added HTMLPurifier->info_parent_def, parent child processing made special
|
||||||
|
. Added internal documents briefly summarizing future progression of HTML
|
||||||
|
. HTMLPurifier_Config->getBatch($namespace) added
|
||||||
|
. More lenient casting to bool from string in HTMLPurifier_ConfigSchema
|
||||||
|
. Refactored ChildDef classes into their own files
|
||||||
|
|
||||||
|
1.2.0, released 2006-11-19
|
||||||
|
# ID attributes now disabled by default. New directives:
|
||||||
|
+ %HTML.EnableAttrID - restores old behavior by allowing IDs
|
||||||
|
+ %Attr.IDPrefix - %Attr.IDBlacklist alternative that munges all user IDs
|
||||||
|
so that they don't collide with your IDs
|
||||||
|
+ %Attr.IDPrefixLocal - Same as above, but for when there are multiple
|
||||||
|
instances of user content on the page
|
||||||
|
+ Profuse documentation on how to use these available in docs/enduser-id.txt
|
||||||
|
! Added MODx plugin <http://modxcms.com/forums/index.php/topic,6604.0.html>
|
||||||
|
! Added percent encoding normalization
|
||||||
|
! XSS attacks smoketest given facelift
|
||||||
|
! Configuration documentation now has table of contents
|
||||||
|
! Added %URI.DisableExternal, which prevents links to external websites. You
|
||||||
|
can also use %URI.Host to permit absolute linking to subdomains
|
||||||
|
! Non-accessible resources (ex. mailto) blocked from embedded URIs (img src)
|
||||||
|
- Type variable in HTMLDefinition was not being set properly, fixed
|
||||||
|
- Documentation updated
|
||||||
|
+ TODO added request Phalanger
|
||||||
|
+ TODO added request Native compression
|
||||||
|
+ TODO added request Remove redundant tags
|
||||||
|
+ TODO added possible plaintext formatter for HTML Purifier documentation
|
||||||
|
+ Updated ConfigDoc TODO
|
||||||
|
+ Improved inline comments in AttrDef/Class.php, AttrDef/CSS.php
|
||||||
|
and AttrDef/Host.php
|
||||||
|
+ Revamped documentation into HTML, along with misc updates
|
||||||
|
- HTMLPurifier_Context doesn't throw a variable reference error if you attempt
|
||||||
|
to retrieve a non-existent variable
|
||||||
|
. Switched to purify()-wide Context object registry
|
||||||
|
. Refactored unit tests to minimize duplication
|
||||||
|
. XSS attack sheet updated
|
||||||
|
. configdoc.xml now has xml:space attached to default value nodes
|
||||||
|
. Allow configuration directives to permit null values
|
||||||
|
. Cleaned up test-cases to remove unnecessary swallowErrors()
|
||||||
|
|
||||||
|
1.1.2, released 2006-09-30
|
||||||
|
! Add HTMLPurifier.auto.php stub file that configures include_path
|
||||||
|
- Documentation updated
|
||||||
|
+ INSTALL document rewritten
|
||||||
|
+ TODO added semi-lossy conversion
|
||||||
|
+ API Doxygen docs' file exclusions updated
|
||||||
|
+ Added notes on HTML versus XML attribute whitespace handling
|
||||||
|
+ Noted that HTMLPurifier_ChildDef_Custom isn't being used
|
||||||
|
+ Noted that config object's definitions are cached versions
|
||||||
|
- Fixed lack of attribute parsing in HTMLPurifier_Lexer_PEARSax3
|
||||||
|
- ftp:// URIs now have their typecodes checked
|
||||||
|
- Hooked up HTMLPurifier_ChildDef_Custom's unit tests (they weren't being run)
|
||||||
|
. Line endings standardized throughout project (svn:eol-style standardized)
|
||||||
|
. Refactored parseData() to general Lexer class
|
||||||
|
. Tester named "HTML Purifier" not "HTMLPurifier"
|
||||||
|
|
||||||
|
1.1.1, released 2006-09-24
|
||||||
|
! Configuration option to optionally Tidy up output for indentation to make up
|
||||||
|
for dropped whitespace by DOMLex (pretty-printing for the entire application
|
||||||
|
should be done by a page-wide Tidy)
|
||||||
|
- Various documentation updates
|
||||||
|
- Fixed parse error in configuration documentation script
|
||||||
|
- Fixed fatal error in benchmark scripts, slightly augmented
|
||||||
|
- As far as possible, whitespace is preserved in-between table children
|
||||||
|
- Sample test-settings.php file included
|
||||||
|
|
||||||
1.1.0, released 2006-09-16
|
1.1.0, released 2006-09-16
|
||||||
|
! Directive documentation generation using XSLT
|
||||||
|
! XHTML can now be turned off, output becomes <br>
|
||||||
- Made URI validator more forgiving: will ignore leading and trailing
|
- Made URI validator more forgiving: will ignore leading and trailing
|
||||||
quotes, apostrophes and less than or greater than signs.
|
quotes, apostrophes and less than or greater than signs.
|
||||||
- Enforce alphanumeric namespace and directive names for configuration.
|
- Enforce alphanumeric namespace and directive names for configuration.
|
||||||
- Directive documentation generation using XSLT
|
|
||||||
- Table child definition made more flexible, will fix up poorly ordered elements
|
- Table child definition made more flexible, will fix up poorly ordered elements
|
||||||
- XHTML generation can now be turned off, allowing things like <br>
|
. Renamed ConfigDef to ConfigSchema
|
||||||
- Renamed ConfigDef to ConfigSchema
|
|
||||||
|
|
||||||
1.0.1, released 2006-09-04
|
1.0.1, released 2006-09-04
|
||||||
- Fixed slight bug in DOMLex attribute parsing
|
- Fixed slight bug in DOMLex attribute parsing
|
||||||
@@ -24,17 +123,17 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
space in them. This manifested in TinyMCE.
|
space in them. This manifested in TinyMCE.
|
||||||
|
|
||||||
1.0.0, released 2006-09-01
|
1.0.0, released 2006-09-01
|
||||||
|
! Shorthand CSS properties implemented: font, border, background, list-style
|
||||||
|
! Basic color keywords translated into hexadecimal values
|
||||||
|
! Table CSS properties implemented
|
||||||
|
! Support for charsets other than UTF-8 (defined by iconv)
|
||||||
|
! Malformed UTF-8 and non-SGML character detection and cleaning implemented
|
||||||
- Fixed broken numeric entity conversion
|
- Fixed broken numeric entity conversion
|
||||||
- Malformed UTF-8 and non-SGML character detection and cleaning implemented
|
|
||||||
- API documentation completed
|
- API documentation completed
|
||||||
- Shorthand CSS properties implemented: font, border, background, list-style
|
. (HTML|CSS)Definition de-singleton-ized
|
||||||
- Basic color keywords translated into hexadecimal values
|
|
||||||
- Table CSS properties implemented
|
|
||||||
- (HTML|CSS)Definition de-singleton-ized
|
|
||||||
- Support for charsets other than UTF-8 (defined by iconv)
|
|
||||||
|
|
||||||
1.0.0beta, released 2006-08-16
|
1.0.0beta, released 2006-08-16
|
||||||
- First public release, most functionality implemented. Notable omissions are:
|
! First public release, most functionality implemented. Notable omissions are:
|
||||||
. Shorthand CSS properties
|
+ Shorthand CSS properties
|
||||||
. Table CSS properties
|
+ Table CSS properties
|
||||||
. Deprecated attribute transformations
|
+ Deprecated attribute transformations
|
||||||
|
24
README
24
README
@@ -1,13 +1,13 @@
|
|||||||
|
|
||||||
README
|
|
||||||
All about HTMLPurifier
|
|
||||||
|
|
||||||
HTMLPurifier is an HTML filtering solution. It uses a unique combination of
|
README
|
||||||
robust whitelists and agressive parsing to ensure that not only are XSS
|
All about HTMLPurifier
|
||||||
attacks thwarted, but the resulting HTML is standards compliant.
|
|
||||||
|
HTMLPurifier is an HTML filtering solution. It uses a unique combination of
|
||||||
See INSTALL on how to use the library. See docs/ for more developer-oriented
|
robust whitelists and agressive parsing to ensure that not only are XSS
|
||||||
documentation as well as some code examples. Users of TinyMCE or FCKeditor
|
attacks thwarted, but the resulting HTML is standards compliant.
|
||||||
may be especially interested in WYSIWYG.
|
|
||||||
|
See INSTALL on how to use the library. See docs/ for more developer-oriented
|
||||||
HTMLPurifier can be found on the web at: http://hp.jpsband.org/
|
documentation as well as some code examples. Users of TinyMCE or FCKeditor
|
||||||
|
may be especially interested in WYSIWYG.
|
||||||
|
|
||||||
|
HTMLPurifier can be found on the web at: http://hp.jpsband.org/
|
||||||
|
18
SLOW
18
SLOW
@@ -2,13 +2,13 @@
|
|||||||
SLOW
|
SLOW
|
||||||
also known as the HELP ME LIBRARY IS TOO SLOW MY PAGE TAKE TOO LONG LOAD page
|
also known as the HELP ME LIBRARY IS TOO SLOW MY PAGE TAKE TOO LONG LOAD page
|
||||||
|
|
||||||
HTMLPurifier is a very powerful library. But with power comes great
|
HTML Purifier is a very powerful library. But with power comes great
|
||||||
responsibility, or, at least, longer execution times. Remember, this
|
responsibility, or, at least, longer execution times. Remember, this
|
||||||
library isn't lightly grazing over submitted HTML: it's deconstructing
|
library isn't lightly grazing over submitted HTML: it's deconstructing
|
||||||
the whole thing, rigorously checking the parts, and then putting it
|
the whole thing, rigorously checking the parts, and then putting it
|
||||||
back together.
|
back together.
|
||||||
|
|
||||||
So, if it so turns out that HTMLPurifier is kinda too slow for outbound
|
So, if it so turns out that HTML Purifier is kinda too slow for outbound
|
||||||
filtering, you've got a few options:
|
filtering, you've got a few options:
|
||||||
|
|
||||||
1. Inbound filtering - perform filtering of HTML when it's submitted by the
|
1. Inbound filtering - perform filtering of HTML when it's submitted by the
|
||||||
@@ -17,18 +17,24 @@ second tacked on to the load time probably isn't going to be that huge of
|
|||||||
a problem. Then, displaying the content is a simple a manner of outputting
|
a problem. Then, displaying the content is a simple a manner of outputting
|
||||||
it directly from your database/filesystem. The trouble with this method is
|
it directly from your database/filesystem. The trouble with this method is
|
||||||
that your user loses the original text, and when doing edits, will be
|
that your user loses the original text, and when doing edits, will be
|
||||||
handling the filtered text. Of course, maybe that's a good thing. If you
|
handling the filtered text. While this may be a good thing, especially if
|
||||||
don't mind a little extra complexity, you can try...
|
you're using a WYSIWYG editor, it can also result in data-loss if a user
|
||||||
|
makes a typo.
|
||||||
|
|
||||||
2. Caching the filtered output - accept the submitted text and put it
|
2. Caching the filtered output - accept the submitted text and put it
|
||||||
unaltered into the database, but then also generate a filtered version and
|
unaltered into the database, but then also generate a filtered version and
|
||||||
stash that in the database. Serve the filtered version to readers, and the
|
stash that in the database. Serve the filtered version to readers, and the
|
||||||
unaltered version to editors. If need be, you can invalidate the cache and
|
unaltered version to editors. If need be, you can invalidate the cache and
|
||||||
have the cached filtered version be regenerated on the first page view. Pros?
|
have the cached filtered version be regenerated on the first page view. Pros?
|
||||||
Full data retention. Cons? It's more complicated.
|
Full data retention. Cons? It's more complicated, and opens other editors
|
||||||
|
up to XSS if they are using a WYSIWYG editor (to fix that, they'd have to
|
||||||
|
be able to get their hands on the *really* original text served in plaintext
|
||||||
|
mode).
|
||||||
|
|
||||||
In short, inbound filtering is almost as simple as outbound filtering, but
|
In short, inbound filtering is almost as simple as outbound filtering, but
|
||||||
it has some drawbacks which cannot be fixed unless you save both the original
|
it has some drawbacks which cannot be fixed unless you save both the original
|
||||||
and the filtered versions.
|
and the filtered versions.
|
||||||
|
|
||||||
There is a third option: profile and optimize HTMLPurifier yourself. ;-)
|
There is a third option: profile and optimize HTMLPurifier yourself. Be sure
|
||||||
|
to report back your results if you decide to do that! Especially if you
|
||||||
|
port HTML Purifier to C++. ;-)
|
||||||
|
96
TODO
96
TODO
@@ -1,46 +1,92 @@
|
|||||||
|
|
||||||
TODO List
|
TODO List
|
||||||
|
|
||||||
Ongoing
|
= KEY ====================
|
||||||
- Lots of profiling, make it faster!
|
# Flagship
|
||||||
- Plugins for major CMSes (very tricky issue)
|
- Regular
|
||||||
|
? At-risk
|
||||||
|
==========================
|
||||||
|
|
||||||
1.2 release
|
1.4 release
|
||||||
- Additional support for poorly written HTML
|
# More extensive URI filtering schemes (see docs/proposal-new-directives.txt)
|
||||||
- Implement all non-essential attribute transforms
|
# Allow for background-image and list-style-image (intrinsically tied to above)
|
||||||
- Microsoft Word HTML cleaning (i.e. MsoNormal)
|
- Aggressive caching
|
||||||
|
? Rich set* methods and config file loaders for HTMLPurifier_Config
|
||||||
|
? Configuration profiles: sets of directives that get set with one func call
|
||||||
|
? ConfigSchema directive aliases (so we can rename some of them)
|
||||||
|
? URI validation routines tighter (see docs/dev-code-quality.html) (COMPLEX)
|
||||||
|
|
||||||
1.3 release
|
1.5 release
|
||||||
- Formatters for plaintext
|
# Error logging for filtering/cleanup procedures
|
||||||
- Auto-paragraphing (be sure to leverage fact that we know when things
|
- Requires I18N facilities to be created first (COMPLEX)
|
||||||
shouldn't be paragraphed, such as lists and tables).
|
|
||||||
- Make URI validation routines tighter (especially mailto)
|
1.6 release
|
||||||
- More extensive URI filtering schemes
|
# Add pre-packaged "levels" of cleaning (custom behavior already done)
|
||||||
- Allow for background-image and list-style-image (see above)
|
- More fine-grained control over escaping behavior
|
||||||
- Distinguish between different types of URIs, for instance, a mailto URI
|
- Silently drop content inbetween SCRIPT tags (can be generalized to allow
|
||||||
in IMG SRC is nonsensical
|
specification of elements that, when detected as foreign, trigger removal
|
||||||
|
of children, although unbalanced tags could wreck havoc (or at least
|
||||||
|
delete the rest of the document)).
|
||||||
|
|
||||||
|
1.7 release
|
||||||
|
# Additional support for poorly written HTML
|
||||||
|
- Implement all non-essential attribute transforms (BIG!)
|
||||||
|
- Microsoft Word HTML cleaning (i.e. MsoNormal, but research essential!)
|
||||||
|
- Friendly strict handling of <address> (block -> <br>)
|
||||||
|
|
||||||
2.0 release
|
2.0 release
|
||||||
- Add various "levels" of cleaning
|
# Formatters for plaintext (COMPLEX)
|
||||||
- Related: Allow strict (X)HTML
|
- Auto-paragraphing (be sure to leverage fact that we know when things
|
||||||
|
shouldn't be paragraphed, such as lists and tables).
|
||||||
|
- Linkify URLs
|
||||||
|
- Smileys
|
||||||
|
- Linkification for HTML Purifier docs: notably configuration and classes
|
||||||
|
|
||||||
3.0 release
|
3.0 release
|
||||||
- Extended HTML capabilities based on namespacing and tag transforms
|
- Extended HTML capabilities based on namespacing and tag transforms (COMPLEX)
|
||||||
- Hooks for adding custom processors to custom namespaced tags and
|
- Hooks for adding custom processors to custom namespaced tags and
|
||||||
attributes, offer default implementation
|
attributes, offer default implementation
|
||||||
- Lots of documentation and samples
|
- Lots of documentation and samples
|
||||||
|
- XHTML 1.1 support
|
||||||
|
|
||||||
|
Ongoing
|
||||||
|
- Lots of profiling, make it faster!
|
||||||
|
- Plugins for major CMSes (COMPLEX)
|
||||||
|
- Drupal
|
||||||
|
- WordPress
|
||||||
|
- eFiction
|
||||||
|
- more! (look for ones that use WYSIWYGs)
|
||||||
|
|
||||||
Unknown release (on a scratch-an-itch basis)
|
Unknown release (on a scratch-an-itch basis)
|
||||||
- Silently drop content inbetween SCRIPT tags (can be generalized to allow
|
|
||||||
specification of elements that, when detected as foreign, trigger removal
|
|
||||||
of children, although unbalanced tags could wreck havoc (or at least delete
|
|
||||||
the rest of the document)).
|
|
||||||
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
||||||
- Automatically add non-breaking spaces to empty table cells when
|
- Automatically add non-breaking spaces to empty table cells when
|
||||||
empty-cells:show is applied to have compatibility with Internet Explorer
|
empty-cells:show is applied to have compatibility with Internet Explorer
|
||||||
- Pretty-printing HTML (adds dependency of Generator to HTMLDefinition)
|
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
|
||||||
|
Also, enable disabling of directionality
|
||||||
|
- Append something to duplicate IDs so they're still usable (impl. note: the
|
||||||
|
dupe detector would also need to detect the suffix as well)
|
||||||
|
- Have 'lang' attribute be checked against official lists
|
||||||
|
- Docs on how to embed YouTube videos (and friends) without patches
|
||||||
|
|
||||||
|
Encoding workarounds
|
||||||
- Non-lossy dumb alternate character encoding transformations, achieved by
|
- Non-lossy dumb alternate character encoding transformations, achieved by
|
||||||
numerically encoding all non-ASCII characters
|
numerically encoding all non-ASCII characters
|
||||||
|
- Semi-lossy dumb alternate character encoding transformations, achieved by
|
||||||
|
encoding all characters that have string entity equivalents
|
||||||
|
|
||||||
|
Requested
|
||||||
|
- Native content compression, whitespace stripping (don't rely on Tidy, make
|
||||||
|
sure we don't remove from <pre> or related tags)
|
||||||
|
- Win32 Phalanger C# binaries (?)
|
||||||
|
- Remove redundant tags, ex. <u><u>Underlined</u></u>. Implementation notes:
|
||||||
|
1. Analyzing which tags to remove duplicants
|
||||||
|
2. Ensure attributes are merged into the parent tag
|
||||||
|
3. Extend the tag exclusion system to specify whether or not the
|
||||||
|
contents should be dropped or not (currently, there's code that could do
|
||||||
|
something like this if it didn't drop the inner text too.)
|
||||||
|
- Accept array input, by iterating and purifying all of the items
|
||||||
|
|
||||||
Wontfix
|
Wontfix
|
||||||
- Non-lossy smart alternate character encoding transformations
|
- Non-lossy smart alternate character encoding transformations (unless
|
||||||
|
patch provided)
|
||||||
|
- Pretty-printing HTML, users can use Tidy on the output on entire page
|
||||||
|
@@ -3,15 +3,24 @@
|
|||||||
// emulates inserting a dir called HTMLPurifier into your class dir
|
// emulates inserting a dir called HTMLPurifier into your class dir
|
||||||
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
||||||
|
|
||||||
require_once 'HTMLPurifier/ConfigDef.php';
|
@include_once '../test-settings.php';
|
||||||
require_once 'HTMLPurifier/Config.php';
|
|
||||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
|
||||||
require_once 'HTMLPurifier/Lexer/PEARSax3.php';
|
|
||||||
|
|
||||||
$LEXERS = array(
|
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||||
'DirectLex' => new HTMLPurifier_Lexer_DirectLex(),
|
require_once 'HTMLPurifier/Config.php';
|
||||||
'PEARSax3' => new HTMLPurifier_Lexer_PEARSax3()
|
|
||||||
);
|
$LEXERS = array();
|
||||||
|
$RUNS = isset($GLOBALS['HTMLPurifierTest']['Runs'])
|
||||||
|
? $GLOBALS['HTMLPurifierTest']['Runs'] : 2;
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||||
|
$LEXERS['DirectLex'] = new HTMLPurifier_Lexer_DirectLex();
|
||||||
|
|
||||||
|
if (!empty($GLOBALS['HTMLPurifierTest']['PEAR'])) {
|
||||||
|
require_once 'HTMLPurifier/Lexer/PEARSax3.php';
|
||||||
|
$LEXERS['PEARSax3'] = new HTMLPurifier_Lexer_PEARSax3();
|
||||||
|
} else {
|
||||||
|
exit('PEAR required to perform benchmark.');
|
||||||
|
}
|
||||||
|
|
||||||
if (version_compare(PHP_VERSION, '5', '>=')) {
|
if (version_compare(PHP_VERSION, '5', '>=')) {
|
||||||
require_once 'HTMLPurifier/Lexer/DOMLex.php';
|
require_once 'HTMLPurifier/Lexer/DOMLex.php';
|
||||||
@@ -56,9 +65,12 @@ class RowTimer extends Benchmark_Timer
|
|||||||
if ($standard == false) $standard = $v['diff'];
|
if ($standard == false) $standard = $v['diff'];
|
||||||
|
|
||||||
$perc = $v['diff'] * 100 / $standard;
|
$perc = $v['diff'] * 100 / $standard;
|
||||||
|
$bad_run = ($v['diff'] < 0);
|
||||||
|
|
||||||
$out .= '<td align="right">' . number_format($perc, 2, '.', '') .
|
$out .= '<td align="right"'.
|
||||||
'%</td>';
|
($bad_run ? ' style="color:#AAA;"' : '').
|
||||||
|
'>' . number_format($perc, 2, '.', '') .
|
||||||
|
'%</td><td>'.number_format($v['diff'],4,'.','').'</td>';
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -79,13 +91,13 @@ function print_lexers() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function do_benchmark($name, $document) {
|
function do_benchmark($name, $document) {
|
||||||
global $LEXERS;
|
global $LEXERS, $RUNS;
|
||||||
|
|
||||||
$timer = new RowTimer($name);
|
$timer = new RowTimer($name);
|
||||||
$timer->start();
|
$timer->start();
|
||||||
|
|
||||||
foreach($LEXERS as $key => $lexer) {
|
foreach($LEXERS as $key => $lexer) {
|
||||||
$tokens = $lexer->tokenizeHTML($document);
|
for ($i=0; $i<$RUNS; $i++) $tokens = $lexer->tokenizeHTML($document);
|
||||||
$timer->setMarker($key);
|
$timer->setMarker($key);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -103,7 +115,7 @@ function do_benchmark($name, $document) {
|
|||||||
<table border="1">
|
<table border="1">
|
||||||
<tr><th>Case</th><?php
|
<tr><th>Case</th><?php
|
||||||
foreach ($LEXERS as $key => $value) {
|
foreach ($LEXERS as $key => $value) {
|
||||||
echo '<th>' . htmlspecialchars($key) . '</th>';
|
echo '<th colspan="2">' . htmlspecialchars($key) . '</th>';
|
||||||
}
|
}
|
||||||
?></tr>
|
?></tr>
|
||||||
<?php
|
<?php
|
||||||
|
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
||||||
|
|
||||||
require_once 'HTMLPurifier/ConfigDef.php';
|
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||||
require_once 'HTMLPurifier/Config.php';
|
require_once 'HTMLPurifier/Config.php';
|
||||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||||
|
|
||||||
|
@@ -12,10 +12,8 @@ TODO:
|
|||||||
- multipage documentation
|
- multipage documentation
|
||||||
- determine how to multilingualize
|
- determine how to multilingualize
|
||||||
- factor out code into classes
|
- factor out code into classes
|
||||||
- generate a table of contents
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Check and configure environment
|
// Check and configure environment
|
||||||
|
|
||||||
@@ -50,7 +48,7 @@ function appendHTMLDiv($document, $node, $html) {
|
|||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Load copies of HTMLPurifier_ConfigDef and HTMLPurifier
|
// Load copies of HTMLPurifier_ConfigDef and HTMLPurifier
|
||||||
|
|
||||||
$definition = HTMLPurifier_ConfigDef::instance();
|
$schema = HTMLPurifier_ConfigSchema::instance();
|
||||||
$purifier = new HTMLPurifier();
|
$purifier = new HTMLPurifier();
|
||||||
|
|
||||||
|
|
||||||
@@ -61,7 +59,7 @@ $types_document = new DOMDocument('1.0', 'UTF-8');
|
|||||||
$types_root = $types_document->createElement('types');
|
$types_root = $types_document->createElement('types');
|
||||||
$types_document->appendChild($types_root);
|
$types_document->appendChild($types_root);
|
||||||
$types_document->formatOutput = true;
|
$types_document->formatOutput = true;
|
||||||
foreach ($definition->types as $name => $expanded_name) {
|
foreach ($schema->types as $name => $expanded_name) {
|
||||||
$types_type = $types_document->createElement('type', $expanded_name);
|
$types_type = $types_document->createElement('type', $expanded_name);
|
||||||
$types_type->setAttribute('id', $name);
|
$types_type->setAttribute('id', $name);
|
||||||
$types_root->appendChild($types_type);
|
$types_root->appendChild($types_type);
|
||||||
@@ -82,13 +80,10 @@ $dom_root->appendChild($dom_document->createElement('title', 'HTML Purifier'));
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
TODO for XML format:
|
TODO for XML format:
|
||||||
- namespace descriptions
|
|
||||||
- enumerated values
|
|
||||||
- default values
|
|
||||||
- create a definition (DTD or other) once interface stabilizes
|
- create a definition (DTD or other) once interface stabilizes
|
||||||
*/
|
*/
|
||||||
|
|
||||||
foreach($definition->info as $namespace_name => $namespace_info) {
|
foreach($schema->info as $namespace_name => $namespace_info) {
|
||||||
|
|
||||||
$dom_namespace = $dom_document->createElement('namespace');
|
$dom_namespace = $dom_document->createElement('namespace');
|
||||||
$dom_root->appendChild($dom_namespace);
|
$dom_root->appendChild($dom_namespace);
|
||||||
@@ -100,7 +95,7 @@ foreach($definition->info as $namespace_name => $namespace_info) {
|
|||||||
$dom_namespace_description = $dom_document->createElement('description');
|
$dom_namespace_description = $dom_document->createElement('description');
|
||||||
$dom_namespace->appendChild($dom_namespace_description);
|
$dom_namespace->appendChild($dom_namespace_description);
|
||||||
appendHTMLDiv($dom_document, $dom_namespace_description,
|
appendHTMLDiv($dom_document, $dom_namespace_description,
|
||||||
$definition->info_namespace[$namespace_name]->description);
|
$schema->info_namespace[$namespace_name]->description);
|
||||||
|
|
||||||
foreach ($namespace_info as $name => $info) {
|
foreach ($namespace_info as $name => $info) {
|
||||||
|
|
||||||
@@ -115,9 +110,12 @@ foreach($definition->info as $namespace_name => $namespace_info) {
|
|||||||
$dom_constraints = $dom_document->createElement('constraints');
|
$dom_constraints = $dom_document->createElement('constraints');
|
||||||
$dom_directive->appendChild($dom_constraints);
|
$dom_directive->appendChild($dom_constraints);
|
||||||
|
|
||||||
$dom_constraints->appendChild(
|
$dom_type = $dom_document->createElement('type', $info->type);
|
||||||
$dom_document->createElement('type', $info->type)
|
if ($info->allow_null) {
|
||||||
);
|
$dom_type->setAttribute('allow-null', 'yes');
|
||||||
|
}
|
||||||
|
$dom_constraints->appendChild($dom_type);
|
||||||
|
|
||||||
if ($info->allowed !== true) {
|
if ($info->allowed !== true) {
|
||||||
$dom_allowed = $dom_document->createElement('allowed');
|
$dom_allowed = $dom_document->createElement('allowed');
|
||||||
$dom_constraints->appendChild($dom_allowed);
|
$dom_constraints->appendChild($dom_allowed);
|
||||||
@@ -128,19 +126,25 @@ foreach($definition->info as $namespace_name => $namespace_info) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
$raw_default = $definition->defaults[$namespace_name][$name];
|
$raw_default = $schema->defaults[$namespace_name][$name];
|
||||||
if (is_bool($raw_default)) {
|
if (is_bool($raw_default)) {
|
||||||
$default = $raw_default ? 'true' : 'false';
|
$default = $raw_default ? 'true' : 'false';
|
||||||
} elseif (is_string($raw_default)) {
|
} elseif (is_string($raw_default)) {
|
||||||
$default = "\"$raw_default\"";
|
$default = "\"$raw_default\"";
|
||||||
|
} elseif (is_null($raw_default)) {
|
||||||
|
$default = 'null';
|
||||||
} else {
|
} else {
|
||||||
$default = print_r(
|
$default = print_r(
|
||||||
$definition->defaults[$namespace_name][$name], true
|
$schema->defaults[$namespace_name][$name], true
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
$dom_constraints->appendChild(
|
|
||||||
$dom_document->createElement('default', $default)
|
$dom_default = $dom_document->createElement('default', $default);
|
||||||
);
|
|
||||||
|
// remove this once we get a DTD
|
||||||
|
$dom_default->setAttribute('xml:space', 'preserve');
|
||||||
|
|
||||||
|
$dom_constraints->appendChild($dom_default);
|
||||||
|
|
||||||
$dom_descriptions = $dom_document->createElement('descriptions');
|
$dom_descriptions = $dom_document->createElement('descriptions');
|
||||||
$dom_directive->appendChild($dom_descriptions);
|
$dom_directive->appendChild($dom_descriptions);
|
||||||
|
@@ -1,7 +1,10 @@
|
|||||||
table {border-collapse:collapse;}
|
table {border-collapse:collapse;}
|
||||||
table td, table th {padding:0.2em;}
|
table td, table th {padding:0.2em;}
|
||||||
|
|
||||||
table.constraints {margin:0 0 1em;}
|
table.constraints {margin:0 0 1em;}
|
||||||
table.constraints th {text-align:left;padding-left:0.4em;}
|
table.constraints th {text-align:left;padding-left:0.4em;}
|
||||||
table.constraints td {padding-right:0.4em;}
|
table.constraints td {padding-right:0.4em;}
|
||||||
table.constraints td pre {margin:0;}
|
table.constraints td pre {margin:0;}
|
||||||
|
|
||||||
|
#toc {list-style-type:none; font-weight:bold;}
|
||||||
|
#toc ul {list-style-type:disc; font-weight:normal;}
|
||||||
|
@@ -1,105 +1,126 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<xsl:stylesheet
|
<xsl:stylesheet
|
||||||
version = "1.0"
|
version = "1.0"
|
||||||
xmlns = "http://www.w3.org/1999/xhtml"
|
xmlns = "http://www.w3.org/1999/xhtml"
|
||||||
xmlns:xsl = "http://www.w3.org/1999/XSL/Transform"
|
xmlns:xsl = "http://www.w3.org/1999/XSL/Transform"
|
||||||
>
|
>
|
||||||
<xsl:output
|
<xsl:output
|
||||||
method = "xml"
|
method = "xml"
|
||||||
encoding = "UTF-8"
|
encoding = "UTF-8"
|
||||||
doctype-public = "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
doctype-public = "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||||
doctype-system = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
|
doctype-system = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
|
||||||
indent = "no"
|
indent = "no"
|
||||||
media-type = "text/html"
|
media-type = "text/html"
|
||||||
/>
|
/>
|
||||||
|
|
||||||
<xsl:variable name="typeLookup" select="document('../types.xml')" />
|
<xsl:variable name="typeLookup" select="document('../types.xml')" />
|
||||||
|
|
||||||
<xsl:template match="/">
|
<xsl:template match="/">
|
||||||
<html lang="en" xml:lang="en">
|
<html lang="en" xml:lang="en">
|
||||||
<head>
|
<head>
|
||||||
<title><xsl:value-of select="/configdoc/title" /> Configuration Documentation</title>
|
<title><xsl:value-of select="/configdoc/title" /> Configuration Documentation</title>
|
||||||
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8" />
|
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8" />
|
||||||
<link rel="stylesheet" type="text/css" href="styles/plain.css" />
|
<link rel="stylesheet" type="text/css" href="styles/plain.css" />
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<xsl:apply-templates />
|
<h1><xsl:value-of select="/configdoc/title" /> Configuration Documentation</h1>
|
||||||
</body>
|
<h2>Table of Contents</h2>
|
||||||
</html>
|
<ul id="toc">
|
||||||
</xsl:template>
|
<xsl:apply-templates mode="toc" />
|
||||||
|
</ul>
|
||||||
<xsl:template match="title">
|
<xsl:apply-templates />
|
||||||
<h1><xsl:value-of select="/configdoc/title" /> Configuration Documentation</h1>
|
</body>
|
||||||
</xsl:template>
|
</html>
|
||||||
|
</xsl:template>
|
||||||
<xsl:template match="namespace">
|
|
||||||
<xsl:apply-templates />
|
<xsl:template match="title" mode="toc" />
|
||||||
<xsl:if test="count(child::directive)=0">
|
<xsl:template match="namespace" mode="toc">
|
||||||
<p>No configuration directives defined for this namespace.</p>
|
<xsl:if test="count(directive)>0">
|
||||||
</xsl:if>
|
<li>
|
||||||
</xsl:template>
|
<a href="#{@id}"><xsl:value-of select="name" /></a>
|
||||||
<xsl:template match="namespace/name">
|
<ul>
|
||||||
<h2 id="{../@id}"><xsl:value-of select="text()" /></h2>
|
<xsl:apply-templates select="directive" mode="toc" />
|
||||||
</xsl:template>
|
</ul>
|
||||||
<xsl:template match="namespace/description">
|
</li>
|
||||||
<div class="description">
|
</xsl:if>
|
||||||
<xsl:copy-of select="div/node()" />
|
</xsl:template>
|
||||||
</div>
|
<xsl:template match="directive" mode="toc">
|
||||||
</xsl:template>
|
<li><a href="#{@id}"><xsl:value-of select="name" /></a></li>
|
||||||
|
</xsl:template>
|
||||||
<xsl:template match="directive">
|
|
||||||
<xsl:apply-templates />
|
<xsl:template match="title" />
|
||||||
</xsl:template>
|
|
||||||
<xsl:template match="directive/name">
|
<xsl:template match="namespace">
|
||||||
<h3 id="{../@id}"><xsl:value-of select="text()" /></h3>
|
<xsl:apply-templates />
|
||||||
</xsl:template>
|
<xsl:if test="count(directive)=0">
|
||||||
<xsl:template match="directive/constraints">
|
<p>No configuration directives defined for this namespace.</p>
|
||||||
<table class="constraints">
|
</xsl:if>
|
||||||
<xsl:apply-templates />
|
</xsl:template>
|
||||||
<!-- Calculated other values -->
|
<xsl:template match="namespace/name">
|
||||||
<tr>
|
<h2 id="{../@id}"><xsl:value-of select="." /></h2>
|
||||||
<th>Used by:</th>
|
</xsl:template>
|
||||||
<td>
|
<xsl:template match="namespace/description">
|
||||||
<xsl:for-each select="../descriptions/description">
|
<div class="description">
|
||||||
<xsl:if test="position()>1">, </xsl:if>
|
<xsl:copy-of select="div/node()" />
|
||||||
<xsl:value-of select="@file" />
|
</div>
|
||||||
</xsl:for-each>
|
</xsl:template>
|
||||||
</td>
|
|
||||||
</tr>
|
<xsl:template match="directive">
|
||||||
</table>
|
<xsl:apply-templates />
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
<xsl:template match="directive//description">
|
<xsl:template match="directive/name">
|
||||||
<div class="description">
|
<h3 id="{../@id}"><xsl:value-of select="../@id" /></h3>
|
||||||
<xsl:copy-of select="div/node()" />
|
</xsl:template>
|
||||||
</div>
|
<xsl:template match="directive/constraints">
|
||||||
</xsl:template>
|
<table class="constraints">
|
||||||
|
<xsl:apply-templates />
|
||||||
<xsl:template match="constraints/type">
|
<!-- Calculated other values -->
|
||||||
<tr>
|
<tr>
|
||||||
<th>Type:</th>
|
<th>Used by:</th>
|
||||||
<td>
|
<td>
|
||||||
<xsl:variable name="type" select="text()" />
|
<xsl:for-each select="../descriptions/description">
|
||||||
<xsl:attribute name="class">type type-<xsl:value-of select="$type" /></xsl:attribute>
|
<xsl:if test="position()>1">, </xsl:if>
|
||||||
<xsl:value-of select="$typeLookup/types/type[@id=$type]/text()" />
|
<xsl:value-of select="@file" />
|
||||||
</td>
|
</xsl:for-each>
|
||||||
</tr>
|
</td>
|
||||||
</xsl:template>
|
</tr>
|
||||||
<xsl:template match="constraints/allowed">
|
</table>
|
||||||
<tr>
|
</xsl:template>
|
||||||
<th>Allowed values:</th>
|
<xsl:template match="directive//description">
|
||||||
<td>
|
<div class="description">
|
||||||
<xsl:for-each select="value"><!--
|
<xsl:copy-of select="div/node()" />
|
||||||
--><xsl:if test="position()>1">, </xsl:if>
|
</div>
|
||||||
"<xsl:value-of select="." />"<!--
|
</xsl:template>
|
||||||
--></xsl:for-each>
|
|
||||||
</td>
|
<xsl:template match="constraints/type">
|
||||||
</tr>
|
<tr>
|
||||||
</xsl:template>
|
<th>Type:</th>
|
||||||
<xsl:template match="constraints/default">
|
<td>
|
||||||
<tr>
|
<xsl:variable name="type" select="text()" />
|
||||||
<th>Default:</th>
|
<xsl:attribute name="class">type type-<xsl:value-of select="$type" /></xsl:attribute>
|
||||||
<td><pre><xsl:value-of select="." xml:space="preserve" /></pre></td>
|
<xsl:value-of select="$typeLookup/types/type[@id=$type]/text()" />
|
||||||
</tr>
|
<xsl:if test="@allow-null='yes'">
|
||||||
</xsl:template>
|
(or null)
|
||||||
|
</xsl:if>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</xsl:template>
|
||||||
|
<xsl:template match="constraints/allowed">
|
||||||
|
<tr>
|
||||||
|
<th>Allowed values:</th>
|
||||||
|
<td>
|
||||||
|
<xsl:for-each select="value"><!--
|
||||||
|
--><xsl:if test="position()>1">, </xsl:if>
|
||||||
|
"<xsl:value-of select="." />"<!--
|
||||||
|
--></xsl:for-each>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</xsl:template>
|
||||||
|
<xsl:template match="constraints/default">
|
||||||
|
<tr>
|
||||||
|
<th>Default:</th>
|
||||||
|
<td><pre><xsl:value-of select="." xml:space="preserve" /></pre></td>
|
||||||
|
</tr>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
</xsl:stylesheet>
|
</xsl:stylesheet>
|
@@ -1,39 +0,0 @@
|
|||||||
|
|
||||||
Code Quality Issues
|
|
||||||
|
|
||||||
Okay, face it. Programmers can get lazy, cut corners, or make mistakes. They
|
|
||||||
also can do quick prototypes, and then forget to rewrite them later. Well,
|
|
||||||
while I can't list mistakes in here, I can list prototype-like segments
|
|
||||||
of code that should be aggressively refactored after the beta is released.
|
|
||||||
This does not list optimization issues, that needs to be done after intense
|
|
||||||
profiling.
|
|
||||||
|
|
||||||
Here we go:
|
|
||||||
|
|
||||||
AttrDef
|
|
||||||
Class - doesn't support Unicode characters, uses regular expressions
|
|
||||||
Lang - code duplication, premature optimization, doesn't consult official
|
|
||||||
lists
|
|
||||||
Pixels/Length/MultiLength - implemented according to HTML spec (excludes
|
|
||||||
code reuse in CSS)
|
|
||||||
URI - multiple regular expressions, needs host validation routines factored
|
|
||||||
out for mailto scheme, IPv6 validation is broken (fringe), unintuitive
|
|
||||||
variable overwriting, missing validation for query, fragment and path,
|
|
||||||
no percent-encode fixing
|
|
||||||
CSS - parser doesn't accept advanced CSS (fringe)
|
|
||||||
Number - constructor interface is inconsistent with Integer
|
|
||||||
AttrTransform - doesn't accept AttrContext, non-validating
|
|
||||||
ChildDef - not-allowed nodes translated to text, likely invalid handling
|
|
||||||
Config - "load configuration" hooks missing, rich set* accessors missing
|
|
||||||
Strategy
|
|
||||||
FixNesting - cannot bubble nodes out of structures
|
|
||||||
MakeWellFormed - insufficient automatic closing definitions (check HTML
|
|
||||||
spec for optional end tags).
|
|
||||||
RemoveForeignElements - should be run in parallel with MakeWellFormed
|
|
||||||
URIScheme - needs to have callable generic checks
|
|
||||||
ftp - missing typecode check
|
|
||||||
mailto - doesn't validate emails
|
|
||||||
news - doesn't validate opaque path
|
|
||||||
nntp - doesn't constrain path
|
|
||||||
EOL
|
|
||||||
|
|
@@ -1,46 +0,0 @@
|
|||||||
|
|
||||||
Configuration Ideas
|
|
||||||
|
|
||||||
Here are some theoretical configuration ideas that we could implement some
|
|
||||||
time. Note the naming convention: %Namespace.Directive
|
|
||||||
|
|
||||||
%Attr.IDPrefix - prefix all ids with this
|
|
||||||
|
|
||||||
%Attr.RewriteFragments - if there's %Attr.IDPrefix we may want to transparently
|
|
||||||
rewrite the URLs we parse too. However, we can only do it when it's a pure
|
|
||||||
anchor link, so it's not foolproof
|
|
||||||
|
|
||||||
%Attr.ClassBlacklist,
|
|
||||||
%Attr.ClassWhitelist,
|
|
||||||
%Attr.ClassListMode - determines what classes are allowed. When
|
|
||||||
%Attr.ClassListMode is set to Blacklist, only allow those not in
|
|
||||||
%Attr.ClassBlacklist. When it's Whitelist, only allow those in
|
|
||||||
%Attr.ClassWhitelist.
|
|
||||||
|
|
||||||
%Attr.LangAlphaOnly - designate whether or not to allow numerals in language
|
|
||||||
code subtags
|
|
||||||
* RFC 1766, the current standard referenced by XML, does not permit
|
|
||||||
numbers, but,
|
|
||||||
* RFC 3066, the superseding best practice standard since January 2001,
|
|
||||||
permits them.
|
|
||||||
We allow numbers by default, but you generally never see them
|
|
||||||
at all, which makes this a little more sane.
|
|
||||||
|
|
||||||
%Attr.MaxWidth,
|
|
||||||
%Attr.MaxHeight - caps for width and height related checks.
|
|
||||||
(a hack in Pixels for an image crashing attack could be replaced by this)
|
|
||||||
|
|
||||||
%URI.Munge - will munge all URIs to a different URI, which should redirect
|
|
||||||
the user to the applicable page. A urlencoded version of the URI
|
|
||||||
will replace any instances of %s in the string. One possible
|
|
||||||
string is 'http://www.google.com/url?q=%s'. Useful for preventing
|
|
||||||
pagerank from being sent to other sites
|
|
||||||
|
|
||||||
%URI.AddRelNofollow - will add rel="nofollow" to all links, preventing the
|
|
||||||
spread of ill-gotten pagerank
|
|
||||||
|
|
||||||
%URI.Host - host of website, for external link checks
|
|
||||||
|
|
||||||
%URI.RelativeToAbsolute - transforms all relative URIs to absolute form
|
|
||||||
|
|
||||||
%URI.DisableExternal - disable external links
|
|
51
docs/dev-code-quality.html
Normal file
51
docs/dev-code-quality.html
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||||
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||||
|
<meta name="description" content="Discusses code quality issues and places that need to be refactored in HTML Purifier." />
|
||||||
|
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||||
|
|
||||||
|
<title>Code Quality Issues - HTML Purifier</title>
|
||||||
|
|
||||||
|
</head><body>
|
||||||
|
|
||||||
|
<h1>Code Quality Issues</h1>
|
||||||
|
|
||||||
|
<div id="filing">Filed under Development</div>
|
||||||
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
|
|
||||||
|
<p>Okay, face it. Programmers can get lazy, cut corners, or make mistakes. They
|
||||||
|
also can do quick prototypes, and then forget to rewrite them later. Well,
|
||||||
|
while I can't list mistakes in here, I can list prototype-like segments
|
||||||
|
of code that should be aggressively refactored. This does not list
|
||||||
|
optimization issues, that needs to be done after intense profiling.</p>
|
||||||
|
|
||||||
|
<pre>
|
||||||
|
docs/examples/demo.php - ad hoc HTML/PHP soup to the extreme
|
||||||
|
|
||||||
|
AttrDef
|
||||||
|
Class - doesn't support Unicode characters (fringe); uses regular
|
||||||
|
expressions
|
||||||
|
Lang - code duplication; premature optimization
|
||||||
|
Length - easily mistaken for CSSLength
|
||||||
|
URI - multiple regular expressions; missing validation for parts (?)
|
||||||
|
CSS - parser doesn't accept advanced CSS (fringe)
|
||||||
|
Number - constructor interface inconsistent with Integer
|
||||||
|
ConfigSchema - redefinition is a mess
|
||||||
|
Strategy
|
||||||
|
FixNesting - cannot bubble nodes out of structures, duplicated checks
|
||||||
|
for special-case parent node
|
||||||
|
MakeWellFormed - insufficient automatic closing definitions (check HTML
|
||||||
|
spec for optional end tags, also, closing based on type (block/inline)
|
||||||
|
might be efficient).
|
||||||
|
RemoveForeignElements - should be run in parallel with MakeWellFormed
|
||||||
|
URIScheme - needs to have callable generic checks
|
||||||
|
mailto - doesn't validate emails, doesn't validate querystring
|
||||||
|
news - doesn't validate opaque path
|
||||||
|
nntp - doesn't constrain path
|
||||||
|
</pre>
|
||||||
|
|
||||||
|
<div id="version">$Id$</div>
|
||||||
|
|
||||||
|
</body></html>
|
81
docs/dev-naming.html
Normal file
81
docs/dev-naming.html
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||||
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||||
|
<meta name="description" content="Defines class naming conventions in HTML Purifier." />
|
||||||
|
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||||
|
|
||||||
|
<title>Naming Conventions - HTML Purifier</title>
|
||||||
|
|
||||||
|
</head><body>
|
||||||
|
|
||||||
|
<h1>Naming Conventions</h1>
|
||||||
|
|
||||||
|
<div id="filing">Filed under Development</div>
|
||||||
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
|
|
||||||
|
<p>The classes in this library follow a few naming conventions, which may
|
||||||
|
help you find the correct functionality more quickly. Here they are:</p>
|
||||||
|
|
||||||
|
<dl>
|
||||||
|
|
||||||
|
<dt>All classes occupy the HTMLPurifier pseudo-namespace.</dt>
|
||||||
|
<dd>This means that all classes are prefixed with HTMLPurifier_. As such, all
|
||||||
|
names under HTMLPurifier_ are reserved. I recommend that you use the name
|
||||||
|
HTMLPurifierX_YourName_ClassName, especially if you want to take advantage
|
||||||
|
of HTMLPurifier_ConfigDef.</dd>
|
||||||
|
|
||||||
|
<dt>All classes correspond to their path if library/ was in the include path</dt>
|
||||||
|
<dd>HTMLPurifier_AttrDef is located at HTMLPurifier/AttrDef.php; replace
|
||||||
|
underscores with slashes and append .php and you'll have the location of
|
||||||
|
the class.</dd>
|
||||||
|
|
||||||
|
<dt>Harness and Test are reserved class names for unit tests</dt>
|
||||||
|
<dd>The suffix <code>Test</code> indicates that the class is a subclass of UnitTestCase
|
||||||
|
(of the Simpletest library) and is testable. "Harness" indicates a subclass
|
||||||
|
of UnitTestCase that is not meant to be run but to be extended into
|
||||||
|
concrete test cases and contains custom test methods (i.e. assert*())</dd>
|
||||||
|
|
||||||
|
<dt>Class names do not necessarily represent inheritance hierarchies</dt>
|
||||||
|
<dd>While we try to reflect inheritance in naming to some extent, it is not
|
||||||
|
guaranteed (for instance, none of the classes inherit from HTMLPurifier,
|
||||||
|
the base class). However, all class files have the require_once
|
||||||
|
declarations to whichever classes they are tightly coupled to.</dd>
|
||||||
|
|
||||||
|
<dt>Strategy has a meaning different from the Gang of Four pattern</dt>
|
||||||
|
<dd>In Design Patterns, the Gang of Four describes a Strategy object as
|
||||||
|
encapsulating an algorithm so that they can be switched at run-time. While
|
||||||
|
our strategies are indeed algorithms, they are not meant to be substituted:
|
||||||
|
all must be present in order for proper functioning.</dd>
|
||||||
|
|
||||||
|
<dt>Abbreviations are avoided</dt>
|
||||||
|
<dd>We try to avoid abbreviations as much as possible, but in some cases,
|
||||||
|
abbreviated version is more readable than the full version. Here, we
|
||||||
|
list common abbreviations:
|
||||||
|
<ul>
|
||||||
|
<li>Attr to Attributes (note that it is plural, i.e. <code>$attr = array()</code>)</li>
|
||||||
|
<li>Def to Definition</li>
|
||||||
|
<li><code>$ret</code> is the value to be returned in a function</li>
|
||||||
|
</ul>
|
||||||
|
</dd>
|
||||||
|
|
||||||
|
<dt>Ambiguity concerning the definition of Def/Definition</dt>
|
||||||
|
<dd>While a definition normally defines the structure/acceptable values of
|
||||||
|
an entity, most of the definitions in this application also attempt
|
||||||
|
to validate and fix the value. I am unsure of a better name, as
|
||||||
|
"Validator" would exclude fixing the value, "Fixer" doesn't invoke
|
||||||
|
the proper image of "fixing" something, and "ValidatorFixer" is too long!
|
||||||
|
Some other suggestions were "Handler", "Reference", "Check", "Fix",
|
||||||
|
"Repair" and "Heal".</dd>
|
||||||
|
|
||||||
|
<dt>Transform not Transformer</dt>
|
||||||
|
<dd>Transform is both a noun and a verb, and thus we define a "Transform" as
|
||||||
|
something that "transforms," leaving "Transformer" (which sounds like an
|
||||||
|
electrical device/robot toy).</dd>
|
||||||
|
|
||||||
|
</dl>
|
||||||
|
|
||||||
|
<div id="version">$Id$</div>
|
||||||
|
|
||||||
|
</body></html>
|
32
docs/dev-optimization.html
Normal file
32
docs/dev-optimization.html
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||||
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||||
|
<meta name="description" content="Discusses possible methods of optimizing HTML Purifier." />
|
||||||
|
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||||
|
|
||||||
|
<title>Optimization - HTML Purifier</title>
|
||||||
|
|
||||||
|
</head><body>
|
||||||
|
|
||||||
|
<h1>Optimization</h1>
|
||||||
|
|
||||||
|
<div id="filing">Filed under Development</div>
|
||||||
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
|
|
||||||
|
<p>Here are some possible optimization techniques we can apply to code sections if
|
||||||
|
they turn out to be slow. Be sure not to prematurely optimize: if you get
|
||||||
|
that itch, put it here!</p>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li>Make Tokens Flyweights (may prove problematic, probably not worth it)</li>
|
||||||
|
<li>Rewrite regexps into PHP code</li>
|
||||||
|
<li>Serialize the Definition object</li>
|
||||||
|
<li>Batch regexp validation (do as many per function call as possible)</li>
|
||||||
|
<li>Parallelize strategies</li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<div id="version">$Id$</div>
|
||||||
|
|
||||||
|
</body></html>
|
@@ -1,292 +1,301 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||||
<title>HTMLPurifier Progress</title>
|
<meta name="description" content="Tables detailing HTML element and CSS property implementation coverage in HTML Purifier." />
|
||||||
|
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||||
<style type="text/css">
|
|
||||||
|
<title>Implementation Progress - HTML Purifier</title>
|
||||||
td {padding-right:1em;border-bottom:1px solid #000;padding-left:0.5em;}
|
|
||||||
th {text-align:left;padding-top:1.4em;font-size:13pt;
|
<style type="text/css">
|
||||||
border-bottom:2px solid #000;background:#FFF;}
|
|
||||||
thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
td {padding-right:1em;border-bottom:1px solid #000;padding-left:0.5em;}
|
||||||
|
th {text-align:left;padding-top:1.4em;font-size:13pt;
|
||||||
.impl-yes {background:#9D9;}
|
border-bottom:2px solid #000;background:#FFF;}
|
||||||
.impl-partial {background:#FFA;}
|
thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
||||||
.impl-no {background:#CCC;}
|
|
||||||
|
.impl-yes {background:#9D9;}
|
||||||
.danger {color:#600;}
|
.impl-partial {background:#FFA;}
|
||||||
.css1 {color:#060;}
|
.impl-no {background:#CCC;}
|
||||||
.required {font-weight:bold;}
|
|
||||||
.feature {color:#999;}
|
.danger {color:#600;}
|
||||||
|
.css1 {color:#060;}
|
||||||
</style>
|
.required {font-weight:bold;}
|
||||||
|
.feature {color:#999;}
|
||||||
</head><body>
|
|
||||||
|
</style>
|
||||||
<h1>HTMLPurifier Progress</h1>
|
|
||||||
|
</head><body>
|
||||||
<h2>Key</h2>
|
|
||||||
|
<h1>Implementation Progress</h1>
|
||||||
<table cellspacing="0"><tbody>
|
|
||||||
<tr><td class="impl-yes">Implemented</td></tr>
|
<div id="filing">Filed under Development</div>
|
||||||
<tr><td class="impl-partial">Partially implemented</td></tr>
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
<tr><td class="impl-no">Will not implement</td></tr>
|
|
||||||
<tr><td class="danger">Dangerous attribute/property</td></tr>
|
<h2>Key</h2>
|
||||||
<tr><td class="css1">Present in CSS1</td></tr>
|
|
||||||
<tr><td class="feature">Feature, requires extra work</td></tr>
|
<table cellspacing="0"><tbody>
|
||||||
</tbody></table>
|
<tr><td class="impl-yes">Implemented</td></tr>
|
||||||
|
<tr><td class="impl-partial">Partially implemented</td></tr>
|
||||||
<h3>CSS</h3>
|
<tr><td class="impl-no">Will not implement</td></tr>
|
||||||
|
<tr><td class="danger">Dangerous attribute/property</td></tr>
|
||||||
<table cellspacing="0">
|
<tr><td class="css1">Present in CSS1</td></tr>
|
||||||
|
<tr><td class="feature">Feature, requires extra work</td></tr>
|
||||||
<thead>
|
</tbody></table>
|
||||||
<tr><th>Name</th><th>Notes</th></tr>
|
|
||||||
</thead>
|
<h2>CSS</h2>
|
||||||
|
|
||||||
<!--
|
<table cellspacing="0">
|
||||||
<tr><td>-</td><td>-</td></tr>
|
|
||||||
-->
|
<thead>
|
||||||
|
<tr><th>Name</th><th>Notes</th></tr>
|
||||||
<tbody>
|
</thead>
|
||||||
<tr><th colspan="2">Standard</th></tr>
|
|
||||||
<tr class="css1 impl-yes"><td>background-color</td><td>COMPOSITE(<color>, transparent)</td></tr>
|
<!--
|
||||||
<tr class="css1 impl-yes"><td>background</td><td>SHORTHAND, only for color, see below for info on background-image and friends</td></tr>
|
<tr><td>-</td><td>-</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>border</td><td>SHORTHAND, MULTIPLE</td></tr>
|
-->
|
||||||
<tr class="css1 impl-yes"><td>border-color</td><td>MULTIPLE</td></tr>
|
|
||||||
<tr class="css1 impl-yes"><td>border-style</td><td>MULTIPLE</td></tr>
|
<tbody>
|
||||||
<tr class="css1 impl-yes"><td>border-width</td><td>MULTIPLE</td></tr>
|
<tr><th colspan="2">Standard</th></tr>
|
||||||
<tr class="css1 impl-yes"><td>border-*</td><td>SHORTHAND</td></tr>
|
<tr class="css1 impl-yes"><td>background-color</td><td>COMPOSITE(<color>, transparent)</td></tr>
|
||||||
<tr class="impl-yes"><td>border-*-color</td><td>COMPOSITE(<color>, transparent)</td></tr>
|
<tr class="css1 impl-yes"><td>background</td><td>SHORTHAND, only for color, see below for info on background-image and friends</td></tr>
|
||||||
<tr class="impl-yes"><td>border-*-style</td><td>ENUM(none, hidden, dotted, dashed,
|
<tr class="css1 impl-yes"><td>border</td><td>SHORTHAND, MULTIPLE</td></tr>
|
||||||
solid, double, groove, ridge, inset, outset)</td></tr>
|
<tr class="css1 impl-yes"><td>border-color</td><td>MULTIPLE</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>border-*-width</td><td>COMPOSITE(<length>, thin, medium, thick)</td></tr>
|
<tr class="css1 impl-yes"><td>border-style</td><td>MULTIPLE</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>clear</td><td>ENUM(none, left, right, both)</td></tr>
|
<tr class="css1 impl-yes"><td>border-width</td><td>MULTIPLE</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>color</td><td><color></td></tr>
|
<tr class="css1 impl-yes"><td>border-*</td><td>SHORTHAND</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>float</td><td>ENUM(left, right, none), May require layout
|
<tr class="impl-yes"><td>border-*-color</td><td>COMPOSITE(<color>, transparent)</td></tr>
|
||||||
precautions with clear</td></tr>
|
<tr class="impl-yes"><td>border-*-style</td><td>ENUM(none, hidden, dotted, dashed,
|
||||||
<tr class="css1 impl-yes"><td>font</td><td>SHORTHAND</td></tr>
|
solid, double, groove, ridge, inset, outset)</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>font-family</td><td>CSS validator may complain if fallback font
|
<tr class="css1 impl-yes"><td>border-*-width</td><td>COMPOSITE(<length>, thin, medium, thick)</td></tr>
|
||||||
family not specified</td></tr>
|
<tr class="css1 impl-yes"><td>clear</td><td>ENUM(none, left, right, both)</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>font-size</td><td>COMPOSITE(<absolute-size>,
|
<tr class="css1 impl-yes"><td>color</td><td><color></td></tr>
|
||||||
<relative-size>, <length>, <percentage>)</td></tr>
|
<tr class="css1 impl-yes"><td>float</td><td>ENUM(left, right, none), May require layout
|
||||||
<tr class="css1 impl-yes"><td>font-style</td><td>ENUM(normal, italic, oblique)</td></tr>
|
precautions with clear</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>font-variant</td><td>ENUM(normal, small-caps)</td></tr>
|
<tr class="css1 impl-yes"><td>font</td><td>SHORTHAND</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>font-weight</td><td>ENUM(normal, bold, bolder, lighter,
|
<tr class="css1 impl-yes"><td>font-family</td><td>CSS validator may complain if fallback font
|
||||||
100, 200, 300, 400, 500, 600, 700, 800, 900), maybe special code for
|
family not specified</td></tr>
|
||||||
in-between integers</td></tr>
|
<tr class="css1 impl-yes"><td>font-size</td><td>COMPOSITE(<absolute-size>,
|
||||||
<tr class="css1 impl-yes"><td>letter-spacing</td><td>COMPOSITE(<length>, normal)</td></tr>
|
<relative-size>, <length>, <percentage>)</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>line-height</td><td>COMPOSITE(<number>,
|
<tr class="css1 impl-yes"><td>font-style</td><td>ENUM(normal, italic, oblique)</td></tr>
|
||||||
<length>, <percentage>, normal)</td></tr>
|
<tr class="css1 impl-yes"><td>font-variant</td><td>ENUM(normal, small-caps)</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>list-style-position</td><td>ENUM(inside, outside),
|
<tr class="css1 impl-yes"><td>font-weight</td><td>ENUM(normal, bold, bolder, lighter,
|
||||||
Strange behavior in browsers</td></tr>
|
100, 200, 300, 400, 500, 600, 700, 800, 900), maybe special code for
|
||||||
<tr class="css1 impl-yes"><td>list-style-type</td><td>ENUM(...),
|
in-between integers</td></tr>
|
||||||
Well-supported values are: disc, circle, square,
|
<tr class="css1 impl-yes"><td>letter-spacing</td><td>COMPOSITE(<length>, normal)</td></tr>
|
||||||
decimal, lower-roman, upper-roman, lower-alpha and upper-alpha. See also
|
<tr class="css1 impl-yes"><td>line-height</td><td>COMPOSITE(<number>,
|
||||||
CSS 3. Mostly IE lack of support.</td></tr>
|
<length>, <percentage>, normal)</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>list-style</td><td>SHORTHAND, target milestone 1.0</td></tr>
|
<tr class="css1 impl-yes"><td>list-style-position</td><td>ENUM(inside, outside),
|
||||||
<tr class="css1 impl-yes"><td>margin</td><td>MULTIPLE</td></tr>
|
Strange behavior in browsers</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>margin-*</td><td>COMPOSITE(<length>,
|
<tr class="css1 impl-yes"><td>list-style-type</td><td>ENUM(...),
|
||||||
<percentage>, auto)</td></tr>
|
Well-supported values are: disc, circle, square,
|
||||||
<tr class="css1 impl-yes"><td>padding</td><td>MULTIPLE</td></tr>
|
decimal, lower-roman, upper-roman, lower-alpha and upper-alpha. See also
|
||||||
<tr class="css1 impl-yes"><td>padding-*</td><td>COMPOSITE(<length>(positive),
|
CSS 3. Mostly IE lack of support.</td></tr>
|
||||||
<percentage>(positive))</td></tr>
|
<tr class="css1 impl-yes"><td>list-style</td><td>SHORTHAND</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>text-align</td><td>ENUM(left, right,
|
<tr class="css1 impl-yes"><td>margin</td><td>MULTIPLE</td></tr>
|
||||||
center, justify)</td></tr>
|
<tr class="css1 impl-yes"><td>margin-*</td><td>COMPOSITE(<length>,
|
||||||
<tr class="css1 impl-yes"><td>text-decoration</td><td>No blink (argh my eyes), not
|
<percentage>, auto)</td></tr>
|
||||||
enum, can be combined (composite sorta): underline, overline,
|
<tr class="css1 impl-yes"><td>padding</td><td>MULTIPLE</td></tr>
|
||||||
line-through</td></tr>
|
<tr class="css1 impl-yes"><td>padding-*</td><td>COMPOSITE(<length>(positive),
|
||||||
<tr class="css1 impl-yes"><td>text-indent</td><td>COMPOSITE(<length>,
|
<percentage>(positive))</td></tr>
|
||||||
<percentage>)</td></tr>
|
<tr class="css1 impl-yes"><td>text-align</td><td>ENUM(left, right,
|
||||||
<tr class="css1 impl-yes"><td>text-transform</td><td>ENUM(capitalize, uppercase,
|
center, justify)</td></tr>
|
||||||
lowercase, none)</td></tr>
|
<tr class="css1 impl-yes"><td>text-decoration</td><td>No blink (argh my eyes), not
|
||||||
<tr class="css1 impl-yes"><td>width</td><td>COMPOSITE(<length>,
|
enum, can be combined (composite sorta): underline, overline,
|
||||||
<percentage>, auto), Interesting</td></tr>
|
line-through</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>word-spacing</td><td>COMPOSITE(<length>, auto),
|
<tr class="css1 impl-yes"><td>text-indent</td><td>COMPOSITE(<length>,
|
||||||
IE 5 no support</td></tr>
|
<percentage>)</td></tr>
|
||||||
</tbody>
|
<tr class="css1 impl-yes"><td>text-transform</td><td>ENUM(capitalize, uppercase,
|
||||||
|
lowercase, none)</td></tr>
|
||||||
<tbody>
|
<tr class="css1 impl-yes"><td>width</td><td>COMPOSITE(<length>,
|
||||||
<tr><th colspan="2">Table</th></tr>
|
<percentage>, auto), Interesting</td></tr>
|
||||||
<tr class="impl-yes"><td>border-collapse</td><td>ENUM(collapse, seperate)</td></tr>
|
<tr class="css1 impl-yes"><td>word-spacing</td><td>COMPOSITE(<length>, auto),
|
||||||
<tr class="impl-yes"><td>caption-side</td><td>ENUM(top, bottom)</td></tr>
|
IE 5 no support</td></tr>
|
||||||
<tr class="feature"><td>empty-cells</td><td>ENUM(show, hide), No IE support makes this useless,
|
</tbody>
|
||||||
possible fix with &nbsp;? Unknown release milestone.</td></tr>
|
|
||||||
<tr class="impl-yes"><td>table-layout</td><td>ENUM(auto, fixed)</td></tr>
|
<tbody>
|
||||||
<tr class="impl-yes css1"><td>vertical-align</td><td>COMPOSITE(ENUM(baseline, sub,
|
<tr><th colspan="2">Table</th></tr>
|
||||||
super, top, text-top, middle, bottom, text-bottom), <percentage>,
|
<tr class="impl-yes"><td>border-collapse</td><td>ENUM(collapse, seperate)</td></tr>
|
||||||
<length>) Also applies to others with explicit height</td></tr>
|
<tr class="impl-yes"><td>caption-side</td><td>ENUM(top, bottom)</td></tr>
|
||||||
</tbody>
|
<tr class="feature"><td>empty-cells</td><td>ENUM(show, hide), No IE support makes this useless,
|
||||||
|
possible fix with &nbsp;? Unknown release milestone.</td></tr>
|
||||||
<tbody>
|
<tr class="impl-yes"><td>table-layout</td><td>ENUM(auto, fixed)</td></tr>
|
||||||
<tr><th colspan="2">Absolute positioning, unknown release milestone</th></tr>
|
<tr class="impl-yes css1"><td>vertical-align</td><td>COMPOSITE(ENUM(baseline, sub,
|
||||||
<tr class="danger"><td>bottom</td><td rowspan="4">Dangerous, must be non-negative</td></tr>
|
super, top, text-top, middle, bottom, text-bottom), <percentage>,
|
||||||
<tr class="danger"><td>left</td></tr>
|
<length>) Also applies to others with explicit height</td></tr>
|
||||||
<tr class="danger"><td>right</td></tr>
|
</tbody>
|
||||||
<tr class="danger"><td>top</td></tr>
|
|
||||||
<tr><td>clip</td><td>-</td></tr>
|
<tbody>
|
||||||
<tr class="danger"><td>position</td><td>ENUM(static, relative, absolute, fixed), permit
|
<tr><th colspan="2">Absolute positioning, unknown release milestone</th></tr>
|
||||||
relative not absolute?</td></tr>
|
<tr class="danger impl-no"><td>bottom</td><td rowspan="4">Dangerous, must be non-negative to even be considered,
|
||||||
<tr class="danger"><td>z-index</td><td>Dangerous</td></tr>
|
but it's still possible to arbitrarily position by running over.</td></tr>
|
||||||
</tbody>
|
<tr class="danger impl-no"><td>left</td></tr>
|
||||||
|
<tr class="danger impl-no"><td>right</td></tr>
|
||||||
<tbody>
|
<tr class="danger impl-no"><td>top</td></tr>
|
||||||
<tr><th colspan="2">Unknown</th></tr>
|
<tr class="impl-no"><td>clip</td><td>-</td></tr>
|
||||||
<tr class="danger css1"><td>background-image</td><td>Dangerous, target milestone 1.3</td></tr>
|
<tr class="danger impl-no"><td>position</td><td>ENUM(static, relative, absolute, fixed)
|
||||||
<tr class="css1"><td>background-attachment</td><td>ENUM(scroll, fixed),
|
relative not absolute?</td></tr>
|
||||||
Depends on background-image</td></tr>
|
<tr class="danger impl-no"><td>z-index</td><td>Dangerous</td></tr>
|
||||||
<tr class="css1"><td>background-position</td><td>Depends on background-image</td></tr>
|
</tbody>
|
||||||
<tr class="danger impl-no"><td>cursor</td><td>Dangerous but fluffy</td></tr>
|
|
||||||
<tr class="danger css1"><td>display</td><td>ENUM(...), Dangerous but interesting;
|
<tbody>
|
||||||
will not implement list-item, run-in (Opera only) or table (no IE);
|
<tr><th colspan="2">Unknown</th></tr>
|
||||||
inline-block has incomplete IE6 support and requires -moz-inline-box
|
<tr class="danger css1"><td>background-image</td><td>Dangerous, target milestone 1.3</td></tr>
|
||||||
for Mozilla. Unknown target milestone.</td></tr>
|
<tr class="css1"><td>background-attachment</td><td>ENUM(scroll, fixed),
|
||||||
<tr><td class="css1">height</td><td>Interesting, why use it? Unknown target milestone.</td></tr>
|
Depends on background-image</td></tr>
|
||||||
<tr class="danger css1"><td>list-style-image</td><td>Dangerous? Target milestone 1.3</td></tr>
|
<tr class="css1"><td>background-position</td><td>Depends on background-image</td></tr>
|
||||||
<tr class="impl-no"><td>max-height</td><td rowspan="4">No IE 5/6</td></tr>
|
<tr class="danger impl-no"><td>cursor</td><td>Dangerous but fluffy</td></tr>
|
||||||
<tr class="impl-no"><td>min-height</td></tr>
|
<tr class="danger css1"><td>display</td><td>ENUM(...), Dangerous but interesting;
|
||||||
<tr class="impl-no"><td>max-width</td></tr>
|
will not implement list-item, run-in (Opera only) or table (no IE);
|
||||||
<tr class="impl-no"><td>min-width</td></tr>
|
inline-block has incomplete IE6 support and requires -moz-inline-box
|
||||||
<tr class="impl-no"><td>orphans</td><td>No IE support</td></tr>
|
for Mozilla. Unknown target milestone.</td></tr>
|
||||||
<tr class="impl-no"><td>widows</td><td>No IE support</td></tr>
|
<tr><td class="css1">height</td><td>Interesting, why use it? Unknown target milestone.</td></tr>
|
||||||
<tr><td>overflow</td><td>ENUM, IE 5/6 almost (remove visible if set). Unknown target milestone.</td></tr>
|
<tr class="danger css1"><td>list-style-image</td><td>Dangerous? Target milestone 1.3</td></tr>
|
||||||
<tr><td>page-break-after</td><td>ENUM(auto, always, avoid, left, right),
|
<tr class="impl-no"><td>max-height</td><td rowspan="4">No IE 5/6</td></tr>
|
||||||
IE 5.5/6 and Opera. Unknown target milestone.</td></tr>
|
<tr class="impl-no"><td>min-height</td></tr>
|
||||||
<tr><td>page-break-before</td><td>ENUM(auto, always, avoid, left, right),
|
<tr class="impl-no"><td>max-width</td></tr>
|
||||||
Mostly supported. Unknown target milestone.</td></tr>
|
<tr class="impl-no"><td>min-width</td></tr>
|
||||||
<tr><td>page-break-inside</td><td>ENUM(avoid, auto), Opera only. Unknown target milestone.</td></tr>
|
<tr class="impl-no"><td>orphans</td><td>No IE support</td></tr>
|
||||||
<tr class="impl-no"><td>quotes</td><td>May be dropped from CSS2, fairly useless for inline context</td></tr>
|
<tr class="impl-no"><td>widows</td><td>No IE support</td></tr>
|
||||||
<tr class="impl-no"><td>visibility</td><td>ENUM(visible, hidden, collapse),
|
<tr><td>overflow</td><td>ENUM, IE 5/6 almost (remove visible if set). Unknown target milestone.</td></tr>
|
||||||
Dangerous</td></tr>
|
<tr><td>page-break-after</td><td>ENUM(auto, always, avoid, left, right),
|
||||||
<tr class="css1 feature"><td>white-space</td><td>ENUM(normal, pre, nowrap, pre-wrap,
|
IE 5.5/6 and Opera. Unknown target milestone.</td></tr>
|
||||||
pre-line), Spotty implementation:
|
<tr><td>page-break-before</td><td>ENUM(auto, always, avoid, left, right),
|
||||||
pre (no IE 5/6), nowrap (no IE 5),
|
Mostly supported. Unknown target milestone.</td></tr>
|
||||||
pre-wrap (only Opera), pre-line (no support). Fixable? Unknown target milestone.</td></tr>
|
<tr><td>page-break-inside</td><td>ENUM(avoid, auto), Opera only. Unknown target milestone.</td></tr>
|
||||||
</tbody>
|
<tr class="impl-no"><td>quotes</td><td>May be dropped from CSS2, fairly useless for inline context</td></tr>
|
||||||
|
<tr class="impl-no"><td>visibility</td><td>ENUM(visible, hidden, collapse),
|
||||||
<tbody class="impl-no">
|
Dangerous</td></tr>
|
||||||
<tr><th colspan="2">Aural</th></tr>
|
<tr class="css1 feature"><td>white-space</td><td>ENUM(normal, pre, nowrap, pre-wrap,
|
||||||
<tr><td>azimuth</td><td>-</td></tr>
|
pre-line), Spotty implementation:
|
||||||
<tr><td>cue</td><td>-</td></tr>
|
pre (no IE 5/6), nowrap (no IE 5),
|
||||||
<tr><td>cue-after</td><td>-</td></tr>
|
pre-wrap (only Opera), pre-line (no support). Fixable? Unknown target milestone.</td></tr>
|
||||||
<tr><td>cue-before</td><td>-</td></tr>
|
</tbody>
|
||||||
<tr><td>elevation</td><td>-</td></tr>
|
|
||||||
<tr><td>pause-after</td><td>-</td></tr>
|
<tbody class="impl-no">
|
||||||
<tr><td>pause-before</td><td>-</td></tr>
|
<tr><th colspan="2">Aural</th></tr>
|
||||||
<tr><td>pause</td><td>-</td></tr>
|
<tr><td>azimuth</td><td>-</td></tr>
|
||||||
<tr><td>pitch-range</td><td>-</td></tr>
|
<tr><td>cue</td><td>-</td></tr>
|
||||||
<tr><td>pitch</td><td>-</td></tr>
|
<tr><td>cue-after</td><td>-</td></tr>
|
||||||
<tr><td>play-during</td><td>-</td></tr>
|
<tr><td>cue-before</td><td>-</td></tr>
|
||||||
<tr><td>richness</td><td>-</td></tr>
|
<tr><td>elevation</td><td>-</td></tr>
|
||||||
<tr><td>speak-header</td><td>Table related</td></tr>
|
<tr><td>pause-after</td><td>-</td></tr>
|
||||||
<tr><td>speak-numeral</td><td>-</td></tr>
|
<tr><td>pause-before</td><td>-</td></tr>
|
||||||
<tr><td>speak-punctuation</td><td>-</td></tr>
|
<tr><td>pause</td><td>-</td></tr>
|
||||||
<tr><td>speak</td><td>-</td></tr>
|
<tr><td>pitch-range</td><td>-</td></tr>
|
||||||
<tr><td>speech-rate</td><td>-</td></tr>
|
<tr><td>pitch</td><td>-</td></tr>
|
||||||
<tr><td>stress</td><td>-</td></tr>
|
<tr><td>play-during</td><td>-</td></tr>
|
||||||
<tr><td>voice-family</td><td>-</td></tr>
|
<tr><td>richness</td><td>-</td></tr>
|
||||||
<tr><td>volume</td><td>-</td></tr>
|
<tr><td>speak-header</td><td>Table related</td></tr>
|
||||||
</tbody>
|
<tr><td>speak-numeral</td><td>-</td></tr>
|
||||||
|
<tr><td>speak-punctuation</td><td>-</td></tr>
|
||||||
<tbody class="impl-no">
|
<tr><td>speak</td><td>-</td></tr>
|
||||||
<tr><th colspan="2">Will not implement</th></tr>
|
<tr><td>speech-rate</td><td>-</td></tr>
|
||||||
<tr><td>content</td><td>Not applicable for inline styles</td></tr>
|
<tr><td>stress</td><td>-</td></tr>
|
||||||
<tr><td>counter-increment</td><td>Needs content, Opera only</td></tr>
|
<tr><td>voice-family</td><td>-</td></tr>
|
||||||
<tr><td>counter-reset</td><td>Needs content, Opera only</td></tr>
|
<tr><td>volume</td><td>-</td></tr>
|
||||||
<tr><td>direction</td><td>No support</td></tr>
|
</tbody>
|
||||||
<tr><td>outline-color</td><td rowspan="4">IE Mac and Opera on outside,
|
|
||||||
Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
<tbody class="impl-no">
|
||||||
<tr><td>outline-style</td></tr>
|
<tr><th colspan="2">Will not implement</th></tr>
|
||||||
<tr><td>outline-width</td></tr>
|
<tr><td>content</td><td>Not applicable for inline styles</td></tr>
|
||||||
<tr><td>outline</td></tr>
|
<tr><td>counter-increment</td><td>Needs content, Opera only</td></tr>
|
||||||
<tr><td>unicode-bidi</td><td>No support</td></tr>
|
<tr><td>counter-reset</td><td>Needs content, Opera only</td></tr>
|
||||||
</tbody>
|
<tr><td>direction</td><td>No support</td></tr>
|
||||||
|
<tr><td>outline-color</td><td rowspan="4">IE Mac and Opera on outside,
|
||||||
</table>
|
Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
||||||
|
<tr><td>outline-style</td></tr>
|
||||||
<h2>Interesting Attributes</h2>
|
<tr><td>outline-width</td></tr>
|
||||||
|
<tr><td>outline</td></tr>
|
||||||
<table cellspacing="0">
|
<tr><td>unicode-bidi</td><td>No support</td></tr>
|
||||||
|
</tbody>
|
||||||
<thead>
|
|
||||||
<tr><th>Attribute</th><th>Tags</th><th>Notes</th></tr>
|
</table>
|
||||||
</thead>
|
|
||||||
|
<h2>Interesting Attributes</h2>
|
||||||
<!--
|
|
||||||
<tr><th></th></tr>
|
<table cellspacing="0">
|
||||||
<tbody>
|
|
||||||
<tr><td>-</td><td>-</td><td>-</td></tr>
|
<thead>
|
||||||
</tbody>
|
<tr><th>Attribute</th><th>Tags</th><th>Notes</th></tr>
|
||||||
-->
|
</thead>
|
||||||
|
|
||||||
<tbody>
|
<!--
|
||||||
<tr><th colspan="3">CSS</th></tr>
|
<tr><th></th></tr>
|
||||||
<tr class="impl-yes"><td>style</td><td>All</td><td>Not all properties may be implemented, parser is good though.</td></tr>
|
<tbody>
|
||||||
</tbody>
|
<tr><td>-</td><td>-</td><td>-</td></tr>
|
||||||
|
</tbody>
|
||||||
<tbody>
|
-->
|
||||||
<tr><th colspan="3">Questionable</th></tr>
|
|
||||||
<tr class="impl-no"><td>accesskey</td><td>A</td><td>May interfere with main interface</td></tr>
|
<tbody>
|
||||||
<tr class="impl-no"><td>tabindex</td><td>A</td><td>May interfere with main interface</td></tr>
|
<tr><th colspan="3">CSS</th></tr>
|
||||||
<tr><td>target</td><td>A</td><td>Config enabled, only useful for frame layouts</td></tr>
|
<tr class="impl-yes"><td>style</td><td>All</td><td>Not all properties may be implemented, parser is good though.</td></tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
|
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr><th colspan="3">Miscellaneous</th></tr>
|
<tr><th colspan="3">Questionable</th></tr>
|
||||||
<tr><td>datetime</td><td>DEL, INS</td><td>No visible effect, ISO format</td></tr>
|
<tr class="impl-no"><td>accesskey</td><td>A</td><td>May interfere with main interface</td></tr>
|
||||||
<tr><td>rel</td><td>A</td><td>Largely user-defined: nofollow, tag (see microformats)</td></tr>
|
<tr class="impl-no"><td>tabindex</td><td>A</td><td>May interfere with main interface</td></tr>
|
||||||
<tr><td>rev</td><td>A</td><td>Largely user-defined: vote-*</td></tr>
|
<tr><td>target</td><td>A</td><td>Config enabled, only useful for frame layouts, disallowed in strict</td></tr>
|
||||||
<tr class="feature"><td>axis</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
</tbody>
|
||||||
<tr class="feature"><td>char</td><td>COL, COLGROUP, TBODY, TD, TFOOT, TH, THEAD, TR</td><td>W3C only: No browser implementation</td></tr>
|
|
||||||
<tr class="feature"><td>headers</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
<tbody>
|
||||||
<tr class="feature"><td>scope</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
<tr><th colspan="3">Miscellaneous</th></tr>
|
||||||
</tbody>
|
<tr><td>datetime</td><td>DEL, INS</td><td>No visible effect, ISO format</td></tr>
|
||||||
|
<tr><td>rel</td><td>A</td><td>Largely user-defined: nofollow, tag (see microformats)</td></tr>
|
||||||
<tbody class="impl-yes">
|
<tr><td>rev</td><td>A</td><td>Largely user-defined: vote-*</td></tr>
|
||||||
<tr><th colspan="3">URI</th></tr>
|
<tr class="feature"><td>axis</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||||
<tr><td rowspan="2">cite</td><td>BLOCKQUOTE, Q</td><td>For attribution</td></tr>
|
<tr class="feature"><td>char</td><td>COL, COLGROUP, TBODY, TD, TFOOT, TH, THEAD, TR</td><td>W3C only: No browser implementation</td></tr>
|
||||||
<tr><td>DEL, INS</td><td>Link to explanation why it changed</td></tr>
|
<tr class="feature"><td>headers</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||||
<tr><td>href</td><td>A</td><td>-</td></tr>
|
<tr class="feature"><td>scope</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||||
<tr><td>longdesc</td><td>IMG</td><td>-</td></tr>
|
</tbody>
|
||||||
<tr class="required"><td>src</td><td>IMG</td><td>Required</td></tr>
|
|
||||||
</tbody>
|
<tbody class="impl-yes">
|
||||||
|
<tr><th colspan="3">URI</th></tr>
|
||||||
<tbody>
|
<tr><td rowspan="2">cite</td><td>BLOCKQUOTE, Q</td><td>For attribution</td></tr>
|
||||||
<tr><th colspan="3">Transform, target milestone 1.2</th></tr>
|
<tr><td>DEL, INS</td><td>Link to explanation why it changed</td></tr>
|
||||||
<tr><td rowspan="5">align</td><td>CAPTION</td><td>Near-equiv style 'caption-side', drop left and right</td></tr>
|
<tr><td>href</td><td>A</td><td>-</td></tr>
|
||||||
<tr><td>IMG</td><td rowspan="2">Margin-left and margin-right = auto or parent div</td></tr>
|
<tr><td>longdesc</td><td>IMG</td><td>-</td></tr>
|
||||||
<tr><td>TABLE</td></tr>
|
<tr class="required"><td>src</td><td>IMG</td><td>Required</td></tr>
|
||||||
<tr><td>HR</td><td>Equivalent style 'text-align' (IE tested)</td></tr>
|
</tbody>
|
||||||
<tr class="impl-yes"><td>H1, H2, H3, H4, H5, H6, P</td><td>Equivalent style 'text-align'</td></tr>
|
|
||||||
<tr class="required impl-yes"><td>alt</td><td>IMG</td><td>Required, insert image filename if src is present or default invalid image text</td></tr>
|
<tbody>
|
||||||
<tr><td rowspan="3">bgcolor</td><td>TABLE</td><td>Equivalent style 'background-color' (IE tested)</td></tr>
|
<tr><th colspan="3">Transform, target milestone 1.4</th></tr>
|
||||||
<tr><td>TR</td><td>Equivalent style 'background-color' (IE tested)</td></tr>
|
<tr><td rowspan="5">align</td><td>CAPTION</td><td>Near-equiv style 'caption-side', drop left and right</td></tr>
|
||||||
<tr><td>TD, TH</td><td>Equivalent style 'background-color'</td></tr>
|
<tr><td>IMG</td><td rowspan="2">Margin-left and margin-right = auto or parent div</td></tr>
|
||||||
<tr><td>border</td><td>IMG</td><td>Equivalent style 'border-width', only applies when link present</td></tr>
|
<tr><td>TABLE</td></tr>
|
||||||
<tr><td>clear</td><td>BR</td><td>Near-equiv style 'clear', transform 'all' into 'both'</td></tr>
|
<tr><td>HR</td><td>Equivalent style 'text-align' (IE tested)</td></tr>
|
||||||
<tr class="impl-no"><td>compact</td><td>DL, OL, UL</td><td>Boolean, needs custom CSS class; rarely used anyway</td></tr>
|
<tr class="impl-yes"><td>H1, H2, H3, H4, H5, H6, P</td><td>Equivalent style 'text-align'</td></tr>
|
||||||
<tr class="required impl-yes"><td>dir</td><td>BDO</td><td>Required, insert ltr (or configuration value) if none</td></tr>
|
<tr class="required impl-yes"><td>alt</td><td>IMG</td><td>Required, insert image filename if src is present or default invalid image text</td></tr>
|
||||||
<tr><td>height</td><td>TD, TH</td><td>Near-equiv style 'height', needs px suffix if original was in pixels</td></tr>
|
<tr><td rowspan="3">bgcolor</td><td>TABLE</td><td>Equivalent style 'background-color' (IE tested)</td></tr>
|
||||||
<tr><td>hspace</td><td>IMG</td><td>Near-equiv styles 'margin-top' and 'margin-bottom', needs px suffix</td></tr>
|
<tr><td>TR</td><td>Equivalent style 'background-color' (IE tested)</td></tr>
|
||||||
<tr class="impl-yes"><td>lang</td><td>*</td><td>Copy value to xml:lang</td></tr>
|
<tr><td>TD, TH</td><td>Equivalent style 'background-color'</td></tr>
|
||||||
<tr><td rowspan="2">name</td><td>IMG</td><td>Turn into ID</td></tr>
|
<tr><td>border</td><td>IMG</td><td>Equivalent style 'border-width', only applies when link present</td></tr>
|
||||||
<tr><td>A</td><td>Turn into ID? (not deprecated, though in which specs?)</td></tr>
|
<tr><td>clear</td><td>BR</td><td>Near-equiv style 'clear', transform 'all' into 'both'</td></tr>
|
||||||
<tr><td>noshade</td><td>HR</td><td>Boolean, style 'border-style:solid;'</td></tr>
|
<tr class="impl-no"><td>compact</td><td>DL, OL, UL</td><td>Boolean, needs custom CSS class; rarely used anyway</td></tr>
|
||||||
<tr><td>nowrap</td><td>TD, TH</td><td>Boolean, style 'white-space:nowrap;' (not compat with IE5)</td></tr>
|
<tr class="required impl-yes"><td>dir</td><td>BDO</td><td>Required, insert ltr (or configuration value) if none</td></tr>
|
||||||
<tr><td>size</td><td>HR</td><td>Near-equiv 'width', needs px suffix if original was pixels</td></tr>
|
<tr><td>height</td><td>TD, TH</td><td>Near-equiv style 'height', needs px suffix if original was in pixels</td></tr>
|
||||||
<tr class="required impl-yes"><td>src</td><td>IMG</td><td>Required, insert blank or default img if not set</td></tr>
|
<tr><td>hspace</td><td>IMG</td><td>Near-equiv styles 'margin-top' and 'margin-bottom', needs px suffix</td></tr>
|
||||||
<tr><td>start</td><td>OL</td><td>Poorly supported 'counter-reset', transform may not be desirable</td></tr>
|
<tr class="impl-yes"><td>lang</td><td>*</td><td>Copy value to xml:lang</td></tr>
|
||||||
<tr><td rowspan="3">type</td><td>LI</td><td rowspan="3">Equivalent style 'list-style-type', different allowed values though. (needs testing)</td></tr>
|
<tr><td rowspan="2">name</td><td>IMG</td><td>Turn into ID</td></tr>
|
||||||
<tr><td>OL</td></tr>
|
<tr><td>A</td><td>Turn into ID? (not deprecated, though in which specs?)</td></tr>
|
||||||
<tr><td>UL</td></tr>
|
<tr><td>noshade</td><td>HR</td><td>Boolean, style 'border-style:solid;'</td></tr>
|
||||||
<tr><td>value</td><td>LI</td><td>Poorly supported 'counter-reset', transform may not be desirable, see ol.start. Configurable.</td></tr>
|
<tr><td>nowrap</td><td>TD, TH</td><td>Boolean, style 'white-space:nowrap;' (not compat with IE5)</td></tr>
|
||||||
<tr><td>vspace</td><td>IMG</td><td>Near-equiv styles 'margin-left' and 'margin-right', needs px suffix, see hspace</td></tr>
|
<tr><td>size</td><td>HR</td><td>Near-equiv 'width', needs px suffix if original was pixels</td></tr>
|
||||||
<tr><td rowspan="2">width</td><td>HR</td><td rowspan="2">Near-equiv style 'width', needs px suffix if original was pixels</td></tr>
|
<tr class="required impl-yes"><td>src</td><td>IMG</td><td>Required, insert blank or default img if not set</td></tr>
|
||||||
<tr><td>TD, TH</td></tr>
|
<tr class="impl-yes"><td>start</td><td>OL</td><td>Poorly supported 'counter-reset', allowed in loose, dropped in strict</td></tr>
|
||||||
</tbody>
|
<tr><td rowspan="3">type</td><td>LI</td><td rowspan="3">Equivalent style 'list-style-type', different allowed values though. (needs testing)</td></tr>
|
||||||
|
<tr><td>OL</td></tr>
|
||||||
</table>
|
<tr><td>UL</td></tr>
|
||||||
|
<tr class="impl-yes"><td>value</td><td>LI</td><td>Poorly supported 'counter-reset', allowed in loose, dropped in strict</td></tr>
|
||||||
|
<tr><td>vspace</td><td>IMG</td><td>Near-equiv styles 'margin-left' and 'margin-right', needs px suffix, see hspace</td></tr>
|
||||||
|
<tr><td rowspan="2">width</td><td>HR</td><td rowspan="2">Near-equiv style 'width', needs px suffix if original was pixels</td></tr>
|
||||||
|
<tr><td>TD, TH</td></tr>
|
||||||
|
</tbody>
|
||||||
|
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<div id="version">$Id$</div>
|
||||||
|
|
||||||
</body></html>
|
</body></html>
|
146
docs/enduser-id.html
Normal file
146
docs/enduser-id.html
Normal file
@@ -0,0 +1,146 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||||
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||||
|
<meta name="description" content="Explains various methods for allowing IDs in documents safely in HTML Purifier." />
|
||||||
|
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||||
|
|
||||||
|
<title>IDs - HTML Purifier</title>
|
||||||
|
|
||||||
|
</head><body>
|
||||||
|
|
||||||
|
<h1 class="subtitled">IDs</h1>
|
||||||
|
<div class="subtitle">What they are, why you should(n't) wear them, and how to deal with it</div>
|
||||||
|
|
||||||
|
<div id="filing">Filed under End-User</div>
|
||||||
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
|
|
||||||
|
<p>Prior to HTML Purifier 1.2.0, this library blithely accepted user input that
|
||||||
|
looked like this:</p>
|
||||||
|
|
||||||
|
<pre><a id="fragment">Anchor</a></pre>
|
||||||
|
|
||||||
|
<p>...presenting an attractive vector for those that would destroy standards
|
||||||
|
compliance: simply set the ID to one that is already used elsewhere in the
|
||||||
|
document and voila: validation breaks. There was a half-hearted attempt to
|
||||||
|
prevent this by allowing users to blacklist IDs, but I suspect that no one
|
||||||
|
really bothered, and thus, with the release of 1.2.0, IDs are now <em>removed</em>
|
||||||
|
by default.</p>
|
||||||
|
|
||||||
|
<p>IDs, however, are quite useful functionality to have, so if users start
|
||||||
|
complaining about broken anchors you'll probably want to turn them back on
|
||||||
|
with %HTML.EnableAttrID. But before you go mucking around with the config
|
||||||
|
object, it's probably worth to take some precautions to keep your page
|
||||||
|
validating. Why?</p>
|
||||||
|
|
||||||
|
<ol>
|
||||||
|
<li>Standards-compliant pages are good</li>
|
||||||
|
<li>Duplicated IDs interfere with anchors. If there are two id="foobar"s in a
|
||||||
|
document, which spot does a browser presented with the fragment #foobar go
|
||||||
|
to? Most browsers opt for the first appearing ID, making it impossible
|
||||||
|
to references the second section. Similarly, duplicated IDs can hijack
|
||||||
|
client-side scripting that relies on the IDs of elements.</li>
|
||||||
|
</ol>
|
||||||
|
|
||||||
|
<p>You have (currently) four ways of dealing with the problem.</p>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<h2 class="subtitled">Blacklisting IDs</h2>
|
||||||
|
<div class="subsubtitle">Good for pages with single content source and stable templates</div>
|
||||||
|
|
||||||
|
<p>Keeping in terms with the
|
||||||
|
<acronym title="Keep It Simple, Stupid">KISS</acronym> principle, let us
|
||||||
|
deal with the most obvious solution: preventing users from using any IDs that
|
||||||
|
appear elsewhere on the document. The method is simple:</p>
|
||||||
|
|
||||||
|
<pre>$config->set('HTML', 'EnableAttrID', true);
|
||||||
|
$config->set('Attr', 'IDBlacklist' array(
|
||||||
|
'list', 'of', 'attributes', 'that', 'are', 'forbidden'
|
||||||
|
));</pre>
|
||||||
|
|
||||||
|
<p>That being said, there are some notable drawbacks. First of all, you have to
|
||||||
|
know precisely which IDs are being used by the HTML surrounding the user code.
|
||||||
|
This is easier said than done: quite often the page designer and the system
|
||||||
|
coder work separately, so the designer has to constantly be talking with the
|
||||||
|
coder whenever he decides to add a new anchor. Miss one and you open yourself
|
||||||
|
to possible standards-compliance issues.</p>
|
||||||
|
|
||||||
|
<p>Furthermore, this position becomes untenable when a single web page must hold
|
||||||
|
multiple portions of user-submitted content. Since there's obviously no way
|
||||||
|
to find out before-hand what IDs users will use, the blacklist is helpless.
|
||||||
|
And even since HTML Purifier validates each segment seperately, perhaps doing
|
||||||
|
so at different times, it would be extremely difficult to dynamically update
|
||||||
|
the blacklist inbetween runs.</p>
|
||||||
|
|
||||||
|
<p>Finally, simply destroying the ID is extremely un-userfriendly behavior: after
|
||||||
|
all, they might have simply specified a duplicate ID by accident.</p>
|
||||||
|
|
||||||
|
<p>Thus, we get to our second method.</p>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<h2 class="subtitled">Namespacing IDs</h2>
|
||||||
|
<div class="subsubtitle">Lazy developer's way, but needs user education</div>
|
||||||
|
|
||||||
|
<p>This method, too, is quite simple: add a prefix to all user IDs. With this
|
||||||
|
code:</p>
|
||||||
|
|
||||||
|
<pre>$config->set('HTML', 'EnableAttrID', true);
|
||||||
|
$config->set('Attr', 'IDPrefix', 'user_');</pre>
|
||||||
|
|
||||||
|
<p>...this:</p>
|
||||||
|
|
||||||
|
<pre><a id="foobar">Anchor!</a></pre>
|
||||||
|
|
||||||
|
<p>...turns into:</p>
|
||||||
|
|
||||||
|
<pre><a id="user_foobar">Anchor!</a></pre>
|
||||||
|
|
||||||
|
<p>As long as you don't have any IDs that start with user_, collisions are
|
||||||
|
guaranteed not to happen. The drawback is obvious: if a user submits
|
||||||
|
id="foobar", they probably expect to be able to reference their page with
|
||||||
|
#foobar. You'll have to tell them, "No, that doesn't work, you have to add
|
||||||
|
user_ to the beginning."</p>
|
||||||
|
|
||||||
|
<p>And yes, things get hairier. Even with a nice prefix, we still have done
|
||||||
|
nothing about multiple HTML Purifier outputs on one page. Thus, we have
|
||||||
|
a second configuration value to piggy-back off of: %Attr.IDPrefixLocal:</p>
|
||||||
|
|
||||||
|
<pre>$config->set('Attr', 'IDPrefixLocal', 'comment' . $id . '_');</pre>
|
||||||
|
|
||||||
|
<p>This new attributes does nothing but append on to regular IDPrefix, but is
|
||||||
|
special in that it is volatile: it's value is determined at run-time and
|
||||||
|
cannot possibly be cordoned into, say, a .ini config file. As for what to
|
||||||
|
put into the directive, is up to you, but I would recommend the ID number
|
||||||
|
the text has been assigned in the database. Whatever you pick, however, it
|
||||||
|
has to be unique and stable for the text you are validating. Note, however,
|
||||||
|
that we require that %Attr.IDPrefix be set before you use this directive.</p>
|
||||||
|
|
||||||
|
<p>And also remember: the user has to know what this prefix is too!</p>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<h2>Abstinence</h2>
|
||||||
|
|
||||||
|
<p>You may not want to bother. That's okay too, just don't enable IDs.</p>
|
||||||
|
|
||||||
|
<p>Personally, I would take this road whenever user-submitted content would be
|
||||||
|
possibly be shown together on one page. Why a blog comment would need to use
|
||||||
|
anchors is beyond me.</p>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<h2>Denial</h2>
|
||||||
|
|
||||||
|
<p>To revert back to pre-1.2.0 behavior, simply:</p>
|
||||||
|
|
||||||
|
<pre>$config->set('HTML', 'EnableAttrID', true);</pre>
|
||||||
|
|
||||||
|
<p>Don't come crying to me when your page mysteriously stops validating, though.</p>
|
||||||
|
|
||||||
|
<div id="version">$Id$</div>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
@@ -6,30 +6,39 @@ through negligence of people. This class will do its job: no more, no less,
|
|||||||
and it's up to you to provide it the proper information and proper context
|
and it's up to you to provide it the proper information and proper context
|
||||||
to be effective. Things to remember:
|
to be effective. Things to remember:
|
||||||
|
|
||||||
1. UTF-8. Currently, the parser runs under the assumption that it is dealing
|
1. Character Encoding: UTF-8.
|
||||||
|
Currently, the parser runs under the assumption that it is dealing
|
||||||
with UTF-8. Not ISO-8859-1 or Windows-1252, UTF-8. And definitely not "no
|
with UTF-8. Not ISO-8859-1 or Windows-1252, UTF-8. And definitely not "no
|
||||||
character encoding explicitly stated" or UTF-7. If you're not using UTF-8 as
|
character encoding explicitly stated" or UTF-7. If you're not using UTF-8 as
|
||||||
your character encoding, you should switch. Now. Make sure any input is
|
your character encoding, make sure you configure HTML Purifier or switch
|
||||||
properly converted to UTF-8, or the parser will mangle it badly
|
to UTF-8. Now. Also, make sure any input is properly converted to UTF-8, or
|
||||||
(though it won't be a security risk if you're outputting it as UTF-8 though).
|
the parser will mangle it badly (though it won't be a security risk if you're
|
||||||
|
outputting it as UTF-8 though). Character encoding is, in general, a knotty
|
||||||
|
issue, but do yourself a favor and learn about it:
|
||||||
|
<http://www.joelonsoftware.com/articles/Unicode.html>
|
||||||
|
|
||||||
2. XHTML 1.0 Transitional. This is what the parser is outputting. For the most
|
2. Doctype: XHTML 1.0 Transitional
|
||||||
|
This is what the parser is outputting. For the most
|
||||||
part, it's compatible with HTML 4.01, but XHTML enforces some very nice things
|
part, it's compatible with HTML 4.01, but XHTML enforces some very nice things
|
||||||
that all web developers should use. Regardless, NO DOCTYPE is a NO. Quirks mode
|
that all web developers should use. Regardless, NO DOCTYPE is a NO. Quirks mode
|
||||||
has waaaay too many quirks for a little parser to handle. We did not select
|
has waaaay too many quirks for a little parser to handle. We did not select
|
||||||
strict in order to prevent ourselves from being too draconic on users, but
|
strict in order to prevent ourselves from being too draconic on users, but
|
||||||
this may be configurable in the future.
|
this may be configurable in the future. Do you want standards compliance?
|
||||||
|
The doctype is a good place to start.
|
||||||
|
|
||||||
3. IDs. They need to be unique, but without some knowledge of the
|
3. IDs
|
||||||
|
They need to be unique, but without some knowledge of the
|
||||||
rest of the document, it's difficult to know what's unique. %Attr.IDBlacklist
|
rest of the document, it's difficult to know what's unique. %Attr.IDBlacklist
|
||||||
needs to be set: we may want to consider disallowing IDs by default to
|
needs to be set: we may want to consider disallowing IDs by default to
|
||||||
save lazy programmers.
|
save lazy programmers.
|
||||||
|
|
||||||
4. [PROJECTED] Links. We're not going to try for spam protection (although
|
4. [PROJECTED] Links
|
||||||
|
We're not going to try for spam protection (although
|
||||||
some hooks for such a module might be nice) but we may offer the ability to
|
some hooks for such a module might be nice) but we may offer the ability to
|
||||||
only accept relative URLs. Pick the one that's right for you.
|
only accept relative URLs. Pick the one that's right for you.
|
||||||
|
|
||||||
5. CSS. While we can prevent the most flagrant cases from affecting your
|
5. CSS
|
||||||
|
While we can prevent the most flagrant cases from affecting your
|
||||||
layout (such as absolutely positioned elements), no amount of code is going
|
layout (such as absolutely positioned elements), no amount of code is going
|
||||||
to protect your pages from being attacked by garish colors and plain old
|
to protect your pages from being attacked by garish colors and plain old
|
||||||
bad taste. A neat feature would be the ability to define acceptable colors
|
bad taste. A neat feature would be the ability to define acceptable colors
|
@@ -1,32 +1,66 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
header('Content-type:text/html;charset=UTF-8');
|
// using _REQUEST because we accept GET and POST requests
|
||||||
|
|
||||||
?><!DOCTYPE html
|
$content = empty($_REQUEST['xml']) ? 'text/html' : 'application/xhtml+xml';
|
||||||
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
header("Content-type:$content;charset=UTF-8");
|
||||||
|
|
||||||
|
// prevent PHP versions with shorttags from barfing
|
||||||
|
echo '<?xml version="1.0" encoding="UTF-8" ?>
|
||||||
|
';
|
||||||
|
|
||||||
|
function getFormMethod() {
|
||||||
|
return (isset($_REQUEST['post'])) ? 'post' : 'get';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (empty($_REQUEST['strict'])) {
|
||||||
|
?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||||
<html>
|
<?php
|
||||||
|
} else {
|
||||||
|
?>
|
||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||||
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
|
<?php
|
||||||
|
}
|
||||||
|
?>
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
|
||||||
<head>
|
<head>
|
||||||
<title>HTMLPurifier Live Demo</title>
|
<title>HTML Purifier Live Demo</title>
|
||||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<h1>HTMLPurifier Live Demo</h1>
|
<h1>HTML Purifier Live Demo</h1>
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
set_include_path('../../library' . PATH_SEPARATOR . get_include_path());
|
require_once '../../library/HTMLPurifier.auto.php';
|
||||||
require_once 'HTMLPurifier.php';
|
|
||||||
|
|
||||||
if (!empty($_POST['html'])) {
|
if (!empty($_REQUEST['html'])) { // start result
|
||||||
|
|
||||||
$html = get_magic_quotes_gpc() ? stripslashes($_POST['html']) : $_POST['html'];
|
if (strlen($_REQUEST['html']) > 50000) {
|
||||||
|
?>
|
||||||
|
<p>Request exceeds maximum allowed text size of 50kb.</p>
|
||||||
|
<?php
|
||||||
|
} else { // start main processing
|
||||||
|
|
||||||
$purifier = new HTMLPurifier();
|
$html = get_magic_quotes_gpc() ? stripslashes($_REQUEST['html']) : $_REQUEST['html'];
|
||||||
|
|
||||||
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
$config->set('Core', 'TidyFormat', !empty($_REQUEST['tidy']));
|
||||||
|
$config->set('HTML', 'Strict', !empty($_REQUEST['strict']));
|
||||||
|
$purifier = new HTMLPurifier($config);
|
||||||
$pure_html = $purifier->purify($html);
|
$pure_html = $purifier->purify($html);
|
||||||
|
|
||||||
?>
|
?>
|
||||||
<p>Here is your purified HTML:</p>
|
<p>Here is your purified HTML:</p>
|
||||||
<div style="border:5px solid #CCC;margin:0 10%;padding:1em;">
|
<div style="border:5px solid #CCC;margin:0 10%;padding:1em;">
|
||||||
|
<?php if(getFormMethod() == 'get') { ?>
|
||||||
|
<div style="float:right;">
|
||||||
|
<a href="http://validator.w3.org/check?uri=referer"><img
|
||||||
|
src="http://www.w3.org/Icons/valid-xhtml10"
|
||||||
|
alt="Valid XHTML 1.0 Transitional" height="31" width="88" style="border:0;" /></a>
|
||||||
|
</div>
|
||||||
|
<?php } ?>
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
echo $pure_html;
|
echo $pure_html;
|
||||||
@@ -41,23 +75,34 @@ echo htmlspecialchars($pure_html, ENT_COMPAT, 'UTF-8');
|
|||||||
|
|
||||||
?></pre>
|
?></pre>
|
||||||
<?php
|
<?php
|
||||||
|
if (getFormMethod() == 'post') { // start POST validation notice
|
||||||
|
?>
|
||||||
|
<p>If you would like to validate the code with
|
||||||
|
<a href="http://validator.w3.org/#validate-by-input">W3C's
|
||||||
|
validator</a>, copy and paste the <em>entire</em> demo page's source.</p>
|
||||||
|
<?php
|
||||||
|
} // end POST validation notice
|
||||||
|
|
||||||
|
} // end main processing
|
||||||
|
|
||||||
|
// end result
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
?>
|
?>
|
||||||
<p>Welcome to the live demo. Enter some HTML and see how HTMLPurifier
|
<p>Welcome to the live demo. Enter some HTML and see how HTML Purifier
|
||||||
will filter it.</p>
|
will filter it.</p>
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
||||||
<form name="filter" action="demo.php<?php
|
<form id="filter" action="demo.php<?php
|
||||||
if (isset($_GET['profile']) || isset($_GET['XDEBUG_PROFILE'])) {
|
echo '?' . getFormMethod();
|
||||||
echo '?XDEBUG_PROFILE=1';
|
if (isset($_REQUEST['profile']) || isset($_REQUEST['XDEBUG_PROFILE'])) {
|
||||||
} ?>" method="post">
|
echo '&XDEBUG_PROFILE=1';
|
||||||
|
} ?>" method="<?php echo getFormMethod(); ?>">
|
||||||
<fieldset>
|
<fieldset>
|
||||||
<legend>HTML</legend>
|
<legend>HTML Purifier Input (<?php echo getFormMethod(); ?>)</legend>
|
||||||
<textarea name="html" cols="60" rows="15"><?php
|
<textarea name="html" cols="60" rows="15"><?php
|
||||||
|
|
||||||
if (isset($html)) {
|
if (isset($html)) {
|
||||||
@@ -65,11 +110,27 @@ if (isset($html)) {
|
|||||||
HTMLPurifier_Encoder::cleanUTF8($html), ENT_COMPAT, 'UTF-8');
|
HTMLPurifier_Encoder::cleanUTF8($html), ENT_COMPAT, 'UTF-8');
|
||||||
}
|
}
|
||||||
?></textarea>
|
?></textarea>
|
||||||
|
<?php if (getFormMethod() == 'get') { ?>
|
||||||
|
<p><strong>Warning:</strong> GET request method can only hold
|
||||||
|
8129 characters (probably less depending on your browser).
|
||||||
|
If you need to test anything
|
||||||
|
larger than that, try the <a href="demo.php?post">POST form</a>.</p>
|
||||||
|
<?php } ?>
|
||||||
|
<?php if (extension_loaded('tidy')) { ?>
|
||||||
|
<div>Nicely format output with Tidy? <input type="checkbox" value="1"
|
||||||
|
name="tidy"<?php if (!empty($_REQUEST['tidy'])) echo ' checked="checked"'; ?> /></div>
|
||||||
|
<?php } ?>
|
||||||
|
<div>XHTML 1.0 Strict output? <input type="checkbox" value="1"
|
||||||
|
name="strict"<?php if (!empty($_REQUEST['strict'])) echo ' checked="checked"'; ?> /></div>
|
||||||
|
<div>Serve as application/xhtml+xml? (not for IE) <input type="checkbox" value="1"
|
||||||
|
name="xml"<?php if (!empty($_REQUEST['xml'])) echo ' checked="checked"'; ?> /></div>
|
||||||
<div>
|
<div>
|
||||||
<input type="submit" value="Submit" name="submit" class="button" />
|
<input type="submit" value="Submit" name="submit" class="button" />
|
||||||
</div>
|
</div>
|
||||||
</fieldset>
|
</fieldset>
|
||||||
</form>
|
</form>
|
||||||
<p>Return to <a href="http://hp.jpsband.org/">HTMLPurifier's home page</a>.</p>
|
<p>Return to <a href="http://hp.jpsband.org/">HTML Purifier's home page</a>.
|
||||||
|
Try the form in <a href="demo.php?get">GET</a> and <a href="demo.php?post">POST</a> request
|
||||||
|
flavors (GET is easy to validate with W3C, but POST allows larger inputs).</p>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
@@ -1,67 +0,0 @@
|
|||||||
|
|
||||||
Filter Levels
|
|
||||||
When one size *does not* fit all
|
|
||||||
|
|
||||||
The more I think about it, the less sense it makes for maintaining one huge
|
|
||||||
monolithic HTMLDefinition class. There's simply so much variation that
|
|
||||||
could go into this definition: the set of HTML good for blog entries is
|
|
||||||
definitely too large for HTML that would be allowed in blog comments. Going
|
|
||||||
from Transitional to Strict requires changes to the definition.
|
|
||||||
|
|
||||||
However, allowing users to specify their own whitelists was an idea I
|
|
||||||
rejected from the start. Simply put, the typical programmer is too lazy
|
|
||||||
to actually go through the trouble of investigating which tags, attributes
|
|
||||||
and properties to allow. HTMLDefinition makes a big part of what HTMLPurifier
|
|
||||||
is.
|
|
||||||
|
|
||||||
The idea, then, is to setup fundamentally different set of definitions, which
|
|
||||||
can further be customized using simpler configuration options.
|
|
||||||
|
|
||||||
Here are some fuzzy levels you could set:
|
|
||||||
|
|
||||||
1. Comments - Wordpress recommends a, abbr, acronym, b, blockquote, cite,
|
|
||||||
code, em, i, strike, strong; however, you could get away with only a, b and
|
|
||||||
i; also having p and pre tags would be helpful.
|
|
||||||
2. Pages - As permissive as possible without allowing XSS. No protection
|
|
||||||
against bad design sense, unfortunantely. Suitable for wiki and page
|
|
||||||
environments.
|
|
||||||
3. Lint - Accept everything in the spec, a Tidy wannabe.
|
|
||||||
|
|
||||||
I've also decomposed tags into risk levels. An asterisk indicates that no one
|
|
||||||
really uses that tag, tilde indicates it's deprecated.
|
|
||||||
|
|
||||||
1 - blockquote, code, em, i, p, tt / strong, sub, sup
|
|
||||||
1* - abbr, acronym, bdo, cite, dfn, kbd, q, samp
|
|
||||||
2 - b, br, del, div, pre, span / ins, s, strike ~ u
|
|
||||||
3 - h2, h3, h4, h5, h6 ~ center
|
|
||||||
4 - h1, big ~ font
|
|
||||||
5 - a
|
|
||||||
7 - area, map
|
|
||||||
|
|
||||||
Lists - dd, dl, dt, li, ol, ul ~ menu, dir
|
|
||||||
Tables - caption, table, td, th, tr / col, colgroup, tbody, tfoot, thead
|
|
||||||
Forms - fieldset, form, input, lable, legend, optgroup, option, select, textarea
|
|
||||||
XSS - noscript, object, script ~ applet
|
|
||||||
|
|
||||||
Meta - base, basefont, body, head, html, link, meta, style, title
|
|
||||||
Frames - frame, frameset, iframe
|
|
||||||
|
|
||||||
And tag specific notes:
|
|
||||||
|
|
||||||
a - general problems involving linkspam
|
|
||||||
b - too much bold is bad, typographically speaking bold is discouraged
|
|
||||||
br - often misused
|
|
||||||
center - CSS, usually no legit use
|
|
||||||
del - only useful in editing context
|
|
||||||
div - little meaning in certain contexts i.e. blog comment
|
|
||||||
h1 - usually no legit use, as header is already set by application
|
|
||||||
h* - not needed in blog comments
|
|
||||||
hr - usually not necessary in blog comments
|
|
||||||
img - could be extremely undesirable if linking to external pics
|
|
||||||
pre - could use formatting, only useful in code contexts
|
|
||||||
q - very little support
|
|
||||||
s - transform into span with styling or del?
|
|
||||||
small - technically presentational
|
|
||||||
span - depends on attribute allowances
|
|
||||||
sub, sup - specialized
|
|
||||||
u - little legit use, prefer class with text-decoration
|
|
149
docs/index.html
Normal file
149
docs/index.html
Normal file
@@ -0,0 +1,149 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||||
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||||
|
<meta name="description" content="Index to all HTML Purifier documentation." />
|
||||||
|
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||||
|
|
||||||
|
<title>Documentation - HTML Purifier</title>
|
||||||
|
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<h1>Documentation</h1>
|
||||||
|
|
||||||
|
<p><strong>HTML Purifier</strong> has documentation for all types of people.
|
||||||
|
Here is an index of all of them.</p>
|
||||||
|
|
||||||
|
<h2>End-user</h2>
|
||||||
|
<p>End-user documentation that contains articles, tutorials and useful
|
||||||
|
information for casual developers using HTML Purifier.</p>
|
||||||
|
|
||||||
|
<dl>
|
||||||
|
|
||||||
|
<dt><a href="enduser-id.html">IDs</a></dt>
|
||||||
|
<dd>Explains various methods for allowing IDs in documents safely in HTML Purifier.</dd>
|
||||||
|
|
||||||
|
</dl>
|
||||||
|
|
||||||
|
<h2>Development</h2>
|
||||||
|
<p>Developer documentation detailing code issues, roadmaps and project
|
||||||
|
conventions.</p>
|
||||||
|
|
||||||
|
<dl>
|
||||||
|
|
||||||
|
<dt><a href="dev-code-quality.html">Code Quality Issues</a></dt>
|
||||||
|
<dd>Discusses code quality issues and places that need to be refactored.</dd>
|
||||||
|
|
||||||
|
<dt><a href="dev-progress.html">Implementation Progress</a></dt>
|
||||||
|
<dd>Tables detailing HTML element and CSS property implementation coverage.</dd>
|
||||||
|
|
||||||
|
<dt><a href="dev-naming.html">Naming Conventions</a></dt>
|
||||||
|
<dd>Defines class naming conventions.</dd>
|
||||||
|
|
||||||
|
<dt><a href="dev-optimization.html">Optimization</a></dt>
|
||||||
|
<dd>Discusses possible methods of optimizing HTML Purifier.</dd>
|
||||||
|
|
||||||
|
</dl>
|
||||||
|
|
||||||
|
<h2>Proposals</h2>
|
||||||
|
<p>Proposed features, as well as the associated rambling to get a clear
|
||||||
|
objective in place before attempted implementation.</p>
|
||||||
|
|
||||||
|
<dl>
|
||||||
|
<dt><a href="proposal-colors.html">Colors</a></dt>
|
||||||
|
<dd>Proposal to allow for color constraints.</dd>
|
||||||
|
</dl>
|
||||||
|
|
||||||
|
<h2>Reference</h2>
|
||||||
|
<p>Miscellaneous essays, research pieces and other reference type material
|
||||||
|
that may not directly discuss HTML Purifier.</p>
|
||||||
|
|
||||||
|
<dl>
|
||||||
|
<dt><a href="ref-devnetwork.html">DevNetwork Credits</a></dt>
|
||||||
|
<dd>Credits and links to DevNetwork forum topics.</dd>
|
||||||
|
</dl>
|
||||||
|
|
||||||
|
<h2>Internal memos</h2>
|
||||||
|
|
||||||
|
<p>Plaintext documents that are more for use by active developers of
|
||||||
|
the code. They may be upgraded to HTML files or stay as TXT scratchpads.</p>
|
||||||
|
|
||||||
|
<table class="table">
|
||||||
|
|
||||||
|
<thead><tr>
|
||||||
|
<th width="10%">Type</th>
|
||||||
|
<th width="20%">Name</th>
|
||||||
|
<th>Description</th>
|
||||||
|
</tr></thead>
|
||||||
|
|
||||||
|
<tbody>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td>End-user</td>
|
||||||
|
<td><a href="enduser-overview.txt">Overview</a></td>
|
||||||
|
<td>High level overview of the general control flow (mostly obsolete).</td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td>End-user</td>
|
||||||
|
<td><a href="enduser-security.txt">Security</a></td>
|
||||||
|
<td>Common security issues that may still arise (half-baked).</td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td>Proposal</td>
|
||||||
|
<td><a href="proposal-filter-levels.txt">Filter levels</a></td>
|
||||||
|
<td>Outlines details of projected configurable level of filtering.</td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td>Proposal</td>
|
||||||
|
<td><a href="proposal-language.txt">Language</a></td>
|
||||||
|
<td>Specification of I18N for error messages derived from MediaWiki (half-baked).</td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td>Proposal</td>
|
||||||
|
<td><a href="proposal-new-directives.txt">New directives</a></td>
|
||||||
|
<td>Assorted configuration options that could be implemented.</td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td>Reference</td>
|
||||||
|
<td><a href="ref-loose-vs-strict.txt">Loose vs.Strict</a></td>
|
||||||
|
<td>Differences between HTML Strict and Transitional versions.</td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td>Reference</td>
|
||||||
|
<td><a href="ref-proprietary-tags.txt">Proprietary tags</a></td>
|
||||||
|
<td>List of vendor-specific tags we may want to transform to W3C compliant markup.</td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td>Reference</td>
|
||||||
|
<td><a href="ref-strictness.txt">Strictness</a></td>
|
||||||
|
<td>Short essay on how loose definition isn't really loose.</td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td>Reference</td>
|
||||||
|
<td><a href="ref-xhtml-1.1.txt">XHTML 1.1</a></td>
|
||||||
|
<td>What we'd have to do to support XHTML 1.1.</td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td>Reference</td>
|
||||||
|
<td><a href="ref-whatwg.txt">WHATWG</a></td>
|
||||||
|
<td>How WHATWG plays into what we need to do.</td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
</tbody>
|
||||||
|
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<div id="version">$Id$</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
@@ -1,56 +0,0 @@
|
|||||||
|
|
||||||
Naming
|
|
||||||
|
|
||||||
The classes in this library follow a few naming conventions, which may
|
|
||||||
help you find the correct functionality more quickly. Here they are:
|
|
||||||
|
|
||||||
All classes occupy the HTMLPurifier pseudo-namespace.
|
|
||||||
This means that all classes are prefixed with HTMLPurifier_. As such, all
|
|
||||||
names under HTMLPurifier_ are reserved. I recommend that you use the name
|
|
||||||
HTMLPurifierX_YourName_ClassName, especially if you want to take advantage
|
|
||||||
of HTMLPurifier_ConfigDef.
|
|
||||||
|
|
||||||
All classes correspond to their path if library/ was in the include path
|
|
||||||
HTMLPurifier_AttrDef is located at HTMLPurifier/AttrDef.php; replace
|
|
||||||
underscores with slashes and append .php and you'll have the location of
|
|
||||||
the class.
|
|
||||||
|
|
||||||
Harness and Test are reserved class names for unit tests
|
|
||||||
The suffix "Test" indicates that the class is a subclass of UnitTestCase
|
|
||||||
(of the Simpletest library) and is testable. "Harness" indicates a subclass
|
|
||||||
of UnitTestCase that is not meant to be run but to be extended into
|
|
||||||
concrete test cases and contains custom test methods (i.e. assert*())
|
|
||||||
|
|
||||||
Class names do not necessarily represent inheritance hierarchies
|
|
||||||
While we try to reflect inheritance in naming to some extent, it is not
|
|
||||||
guaranteed (for instance, none of the classes inherit from HTMLPurifier,
|
|
||||||
the base class). However, all class files have the require_once
|
|
||||||
declarations to whichever classes they are tightly coupled to.
|
|
||||||
|
|
||||||
Strategy has a meaning different from the Gang of Four pattern
|
|
||||||
In Design Patterns, the Gang of Four describes a Strategy object as
|
|
||||||
encapsulating an algorithm so that they can be switched at run-time. While
|
|
||||||
our strategies are indeed algorithms, they are not meant to be substituted:
|
|
||||||
all must be present in order for proper functioning.
|
|
||||||
|
|
||||||
Abbreviations are avoided
|
|
||||||
We try to avoid abbreviations as much as possible, but in some cases,
|
|
||||||
abbreviated version is more readable than the full version. Here, we
|
|
||||||
list common abbreviations:
|
|
||||||
Attr(s) -> Attribute(s)
|
|
||||||
Def -> Definition
|
|
||||||
|
|
||||||
Ambiguity concerning the definition of Def/Definition
|
|
||||||
While a definition normally defines the structure/acceptable values of
|
|
||||||
an entity, most of the definitions in this application also attempt
|
|
||||||
to validate and fix the value. I am unsure of a better name, as
|
|
||||||
"Validator" would exclude fixing the value, "Fixer" doesn't invoke
|
|
||||||
the proper image of "fixing" something, and "ValidatorFixer" is too long!
|
|
||||||
Some other suggestions were "Handler", "Reference", "Check", "Fix",
|
|
||||||
"Repair" and "Heal".
|
|
||||||
|
|
||||||
Transform not Transformer
|
|
||||||
Transform is both a noun and a verb, and thus we define a "Transform" as
|
|
||||||
something that "transforms," leaving "Transformer" (which sounds like an
|
|
||||||
electrical device/robot toy).
|
|
||||||
|
|
@@ -1,11 +0,0 @@
|
|||||||
|
|
||||||
Optimization
|
|
||||||
|
|
||||||
Here are some possible optimization techniques we can apply to code sections if
|
|
||||||
they turn out to be slow. Be sure not to prematurely optimize though!
|
|
||||||
|
|
||||||
- Make Tokens Flyweights (may prove problematic, probably not worth it)
|
|
||||||
- Rewrite regexps into PHP code
|
|
||||||
- Serialize the Definition object
|
|
||||||
- Batch regexp validation (do as many per function call as possible)
|
|
||||||
- Parallelize strategies
|
|
47
docs/proposal-colors.html
Normal file
47
docs/proposal-colors.html
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||||
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||||
|
<meta name="description" content="Proposal to allow for color constraints in HTML Purifier." />
|
||||||
|
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||||
|
|
||||||
|
<title>Proposal: Colors - HTML Purifier</title>
|
||||||
|
|
||||||
|
</head><body>
|
||||||
|
|
||||||
|
<h1 class="subtitled">Colors</h1>
|
||||||
|
<div class="subtitle">Hammering some sense into those color-blind newbies</div>
|
||||||
|
|
||||||
|
<div id="filing">Filed under Proposals</div>
|
||||||
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
|
|
||||||
|
<p>Your website probably has a color-scheme.
|
||||||
|
<span style="color:#090; background:#FFF;">Green on white</span>,
|
||||||
|
<span style="color:#A0F; background:#FF0;">purple on yellow</span>,
|
||||||
|
whatever. When you give users the ability to style their content, you may
|
||||||
|
want them to keep in line with your styling. If you're website is all
|
||||||
|
about light colors, you don't want a user to come in and vandalize your
|
||||||
|
page with a deep maroon.</p>
|
||||||
|
|
||||||
|
<p>This is an extremely silly feature proposal, but I'm writing it down anyway.</p>
|
||||||
|
|
||||||
|
<p>What if the user could constrain the colors specified in inline styles? You
|
||||||
|
are only allowed to use these shades of dark green for text and these shades
|
||||||
|
of light yellow for the background. At the very least, you could ensure
|
||||||
|
that we did not have pale yellow on white text.</p>
|
||||||
|
|
||||||
|
<h2>Implementation issues</h2>
|
||||||
|
|
||||||
|
<ol>
|
||||||
|
<li>Requires the color attribute definition to know, currently, what the text
|
||||||
|
and background colors are. This becomes difficult when classes are thrown
|
||||||
|
into the mix.</li>
|
||||||
|
<li>The user still has to define the permissible colors, how does one do
|
||||||
|
something like that?</li>
|
||||||
|
</ol>
|
||||||
|
|
||||||
|
<div id="version">$Id$</div>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
@@ -10,12 +10,9 @@ Directives are divided into namespaces, indicating the major portion of
|
|||||||
functionality they cover (although there may be overlaps. Please consult
|
functionality they cover (although there may be overlaps. Please consult
|
||||||
the documentation in ConfigDef for more information on these namespaces.
|
the documentation in ConfigDef for more information on these namespaces.
|
||||||
|
|
||||||
Since configuration is dependent on context, most of the internal classes
|
Since configuration is dependant on context, internal classes require a
|
||||||
require a configuration object to be passed as a parameter. However, a few
|
configuration object to be passed as a parameter. (They also require a
|
||||||
make this optional: they will supply a default configuration object if none
|
Context object).
|
||||||
are passed. These classes are: HTMLPurifier::*, Generator::generateFromTokens
|
|
||||||
and Lexer::tokenizeHTML. However, whenever a valid configuration object
|
|
||||||
is defined, that object should be used.
|
|
||||||
|
|
||||||
In relation to HTMLDefinition and CSSDefinition, there is a special class
|
In relation to HTMLDefinition and CSSDefinition, there is a special class
|
||||||
of directives that influence the *construction* of the Definition object.
|
of directives that influence the *construction* of the Definition object.
|
130
docs/proposal-filter-levels.txt
Normal file
130
docs/proposal-filter-levels.txt
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
|
||||||
|
Filter Levels
|
||||||
|
When one size *does not* fit all
|
||||||
|
|
||||||
|
The more I think about it, the less sense it makes for maintaining one huge
|
||||||
|
monolithic HTMLDefinition class. There's simply so much variation that
|
||||||
|
could go into this definition: the set of HTML good for blog entries is
|
||||||
|
definitely too large for HTML that would be allowed in blog comments. Going
|
||||||
|
from Transitional to Strict requires changes to the definition.
|
||||||
|
|
||||||
|
Allowing users to specify their own whitelists is one step (implemented, btw),
|
||||||
|
but I have doubts on only doing this. Simply put, the typical programmer is too
|
||||||
|
lazy to actually go through the trouble of investigating which tags, attributes
|
||||||
|
and properties to allow. HTMLDefinition makes a big part of what HTMLPurifier
|
||||||
|
is.
|
||||||
|
|
||||||
|
The idea, then, is to setup fundamentally different set of definitions, which
|
||||||
|
can further be customized using simpler configuration options.
|
||||||
|
|
||||||
|
Here are some fuzzy levels you could set:
|
||||||
|
|
||||||
|
1. Comments - Wordpress recommends a, abbr, acronym, b, blockquote, cite,
|
||||||
|
code, em, i, strike, strong; however, you could get away with only a, em and
|
||||||
|
p; also having blockquote and pre tags would be helpful.
|
||||||
|
2. BBCode - Emulate the usual tagset for forums: b, i, img, a, blockquote,
|
||||||
|
pre, div, span and h[2-6] (the last three are for specially formatted
|
||||||
|
posts, div and span require associated classes or inline styling enabled
|
||||||
|
to be useful)
|
||||||
|
3. Pages - As permissive as possible without allowing XSS. No protection
|
||||||
|
against bad design sense, unfortunantely. Suitable for wiki and page
|
||||||
|
environments. (probably what we have now)
|
||||||
|
4. Lint - Accept everything in the spec, a Tidy wannabe. (This probably won't
|
||||||
|
get implemented as it would require routines for things like <object>
|
||||||
|
and friends to be implemented, which is a lot of work for not a lot of
|
||||||
|
benefit)
|
||||||
|
|
||||||
|
One final note: when you start axing tags that are more commonly used, you
|
||||||
|
run the risk of accidentally destroying user data, especially if the data
|
||||||
|
is incoming from a WYSIWYG eidtor that hasn't been synced accordingly. This may
|
||||||
|
make forbidden element to text transformations desirable (for example, images).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
== Element Risk Analysis ==
|
||||||
|
|
||||||
|
Legend:
|
||||||
|
[danger level] - regular tags / uncommon tags ~ deprecated tags
|
||||||
|
[danger level]* - rare tags
|
||||||
|
|
||||||
|
1 - blockquote, code, em, i, p, tt / strong, sub, sup
|
||||||
|
1* - abbr, acronym, bdo, cite, dfn, kbd, q, samp
|
||||||
|
2 - b, br, del, div, pre, span / ins, s, strike ~ u
|
||||||
|
3 - h2, h3, h4, h5, h6 ~ center
|
||||||
|
4 - h1, big ~ font
|
||||||
|
5 - a
|
||||||
|
7 - area, map
|
||||||
|
|
||||||
|
These are special use tags, they should be enabled on a blanket basis.
|
||||||
|
|
||||||
|
Lists - dd, dl, dt, li, ol, ul ~ menu, dir
|
||||||
|
Tables - caption, table, td, th, tr / col, colgroup, tbody, tfoot, thead
|
||||||
|
|
||||||
|
Forms - fieldset, form, input, lable, legend, optgroup, option, select, textarea
|
||||||
|
XSS - noscript, object, script ~ applet
|
||||||
|
Meta - base, basefont, body, head, html, link, meta, style, title
|
||||||
|
Frames - frame, frameset, iframe
|
||||||
|
|
||||||
|
And tag specific notes:
|
||||||
|
|
||||||
|
a - general problems involving linkspam
|
||||||
|
b - too much bold is bad, typographically speaking bold is discouraged
|
||||||
|
br - often misused
|
||||||
|
center - CSS, usually no legit use
|
||||||
|
del - only useful in editing context
|
||||||
|
div - little meaning in certain contexts i.e. blog comment
|
||||||
|
h1 - usually no legit use, as header is already set by application
|
||||||
|
h* - not needed in blog comments
|
||||||
|
hr - usually not necessary in blog comments
|
||||||
|
img - could be extremely undesirable if linking to external pics (CSRF, goatse)
|
||||||
|
pre - could use formatting, only useful in code contexts
|
||||||
|
q - very little support
|
||||||
|
s - transform into span with styling or del?
|
||||||
|
small - technically presentational
|
||||||
|
span - depends on attribute allowances
|
||||||
|
sub, sup - specialized
|
||||||
|
u - little legit use, prefer class with text-decoration
|
||||||
|
|
||||||
|
Based on the riskiness of the items, we may want to offer %HTML.DisableImages
|
||||||
|
attribute and put URI filtering higher up on the priority list.
|
||||||
|
|
||||||
|
|
||||||
|
== Attribute Risk Analysis ==
|
||||||
|
|
||||||
|
We actually have a suprisingly small assortment of allowed attributes (the
|
||||||
|
rest are deprecated in strict, and thus we opted not to allow them, even
|
||||||
|
though our output is XHTML Transitional by default.)
|
||||||
|
|
||||||
|
Required URI - img.alt, img.src, a.href
|
||||||
|
Medium risk - *.class, *.dir
|
||||||
|
High risk - img.height, img.width, *.id, *.style
|
||||||
|
|
||||||
|
Table - colgroup/col.span, td/th.rowspan, td/th.colspan
|
||||||
|
Uncommon - *.title, *.lang, *.xml:lang
|
||||||
|
Rare - td/th.abbr, table.summary, {table}.charoff
|
||||||
|
Rare URI - del.cite, ins.cite, blockquote.cite, q.cite, img.longdesc
|
||||||
|
Presentational - {table}.align, {table}.valign, table.frame, table.rules,
|
||||||
|
table.border
|
||||||
|
Partially presentational - table.cellpadding, table.cellspacing,
|
||||||
|
table.width, col.width, colgroup.width
|
||||||
|
|
||||||
|
|
||||||
|
== CSS Risk Analysis ==
|
||||||
|
|
||||||
|
There are certain CSS elements that are extremely useful inline, but then
|
||||||
|
as you get to more presentation oriented styling it may not always be
|
||||||
|
appropriate to inline them.
|
||||||
|
|
||||||
|
Useful - clear, float, border-collapse, caption-side
|
||||||
|
|
||||||
|
These CSS properties can break layouts if used improperly. We have excluded
|
||||||
|
any CSS properties that are not currently implemented (such as position).
|
||||||
|
|
||||||
|
Dangerous, can go outside container - float
|
||||||
|
Easy to abuse - font-size, font-family (font), width
|
||||||
|
Colored - background-color (background), border-color (border), color
|
||||||
|
Dramatic - border, list-style-position (list-style), margin, padding,
|
||||||
|
text-align, text-indent, text-transform, vertical-align, line-height
|
||||||
|
|
||||||
|
Dramatic elements substantially change the look of text in ways that should
|
||||||
|
probably have been reserved to other areas.
|
98
docs/proposal-language.txt
Normal file
98
docs/proposal-language.txt
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
We are going to model our I18N/L10N off of MediaWiki's system. Their's is
|
||||||
|
obviously quite complicated, so we're going to simplify it a bit for our needs.
|
||||||
|
|
||||||
|
== Structure ==
|
||||||
|
|
||||||
|
First, you have a Language object. This object contains all the localisable
|
||||||
|
message strings, as well as other important language-specific settings and
|
||||||
|
custom behavior (uppercasing, lowercasing, printing dates, formatting
|
||||||
|
numbers, etc.)
|
||||||
|
|
||||||
|
The object is constructed from two sources: subclassed versions of itself
|
||||||
|
(classes) and Message files (messages).
|
||||||
|
|
||||||
|
== General use ==
|
||||||
|
|
||||||
|
You load a language object by calling the Language::factory() function.
|
||||||
|
This function the class file for the object (taking in account fallback
|
||||||
|
languages by using the fallback langauge's object but overloading the
|
||||||
|
language key) and returns that object. Nothing else happens.
|
||||||
|
|
||||||
|
When a message/etc is requested, a lazy load initializor is called. Now the
|
||||||
|
real work starts. We're first going to take the scenario that the language
|
||||||
|
is not cached. The system loads the Messages file by:
|
||||||
|
|
||||||
|
require( $filename );
|
||||||
|
$cache = compact( self::$mLocalisationKeys );
|
||||||
|
|
||||||
|
...where self::$mLocalisationKeys is the name of variables that could be used
|
||||||
|
in the localization file. This lets you use things like:
|
||||||
|
|
||||||
|
$fallback = false;
|
||||||
|
$rtl = false;
|
||||||
|
|
||||||
|
...and easily siphon them into arrays.
|
||||||
|
|
||||||
|
Then, we load the $fallback language (if not set, English) to fill in the gaps in
|
||||||
|
the messages. There is specialized behavior for certain keys, as they can be
|
||||||
|
mergeable maps, lists or alias lists (not sure what the last one is).
|
||||||
|
|
||||||
|
== Caching ==
|
||||||
|
|
||||||
|
MediaWiki has lots of caching mechanisms built in, which make the code somewhat
|
||||||
|
more difficult to understand. Before doing any loading, MediaWiki will check
|
||||||
|
the following places to see if we can be lazy:
|
||||||
|
|
||||||
|
1. $mLocalisationCache[$code] - just a variable where it may have been stashed
|
||||||
|
2. serialized/$code.ser - compiled serialized language file
|
||||||
|
3. Memcached version of file (with expiration checking)
|
||||||
|
|
||||||
|
Expiration checking consists of by ensuring all dependencies have filemtime
|
||||||
|
that match the ones bundled with the cached copy. Similar checking could be
|
||||||
|
implemented for serialized versions, as it seems that they are not updated
|
||||||
|
until manually recompiled.
|
||||||
|
|
||||||
|
== Behavior ==
|
||||||
|
|
||||||
|
Things that are localizable:
|
||||||
|
|
||||||
|
- Weekdays (and abbrev)
|
||||||
|
- Months (and abbrev)
|
||||||
|
- Bookstores
|
||||||
|
- Skin names
|
||||||
|
- Date preferences / Custom date format
|
||||||
|
- Default date format
|
||||||
|
- Default user option overrides
|
||||||
|
-+ Language names
|
||||||
|
- Timezones
|
||||||
|
-+ Character encoding conversion via iconv
|
||||||
|
- UpperLowerCase first (needs casemaps for some)
|
||||||
|
- UpperLowerCase
|
||||||
|
- Uppercase words
|
||||||
|
- Uppercase word breaks
|
||||||
|
- Case folding
|
||||||
|
- Strip punctuation for MySQL search
|
||||||
|
- Get first character
|
||||||
|
-+ Alternate encoding
|
||||||
|
-+ Recoding for edit (and then recode input)
|
||||||
|
-+ RTL
|
||||||
|
-+ Direction mark character depending on RTL
|
||||||
|
-? Arrow depending on RTL
|
||||||
|
- Languages where italics cannot be used
|
||||||
|
-+ Number formatting (commafy, transform digits, transform separators)
|
||||||
|
- Truncate (multibyte)
|
||||||
|
- Grammar conversions for inflected languages
|
||||||
|
- Plural transformations
|
||||||
|
- Formatting expiry times
|
||||||
|
- Segmenting for diffs (Chinese)
|
||||||
|
- Convert to variants of language
|
||||||
|
- Language specific user preference options
|
||||||
|
- Link trails [[foo]]bar
|
||||||
|
-+ Language code (RFC 3066)
|
||||||
|
|
||||||
|
Neat functionality:
|
||||||
|
|
||||||
|
- I18N sprintfDate
|
||||||
|
- Roman numeral formatting
|
||||||
|
|
||||||
|
Items marked with a + likely need to be addressed by HTML Purifier
|
46
docs/proposal-new-directives.txt
Normal file
46
docs/proposal-new-directives.txt
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
|
||||||
|
Configuration Ideas
|
||||||
|
|
||||||
|
Here are some theoretical configuration ideas that we could implement some
|
||||||
|
time. Note the naming convention: %Namespace.Directive
|
||||||
|
|
||||||
|
%Attr.IDPrefix - prefix all ids with this
|
||||||
|
|
||||||
|
%Attr.RewriteFragments - if there's %Attr.IDPrefix we may want to transparently
|
||||||
|
rewrite the URLs we parse too. However, we can only do it when it's a pure
|
||||||
|
anchor link, so it's not foolproof
|
||||||
|
|
||||||
|
%Attr.ClassBlacklist,
|
||||||
|
%Attr.ClassWhitelist,
|
||||||
|
%Attr.ClassPolicy - determines what classes are allowed. When
|
||||||
|
%Attr.ClassPolicy is set to Blacklist, only allow those not in
|
||||||
|
%Attr.ClassBlacklist. When it's Whitelist, only allow those in
|
||||||
|
%Attr.ClassWhitelist.
|
||||||
|
|
||||||
|
%Attr.MaxWidth,
|
||||||
|
%Attr.MaxHeight - caps for width and height related checks.
|
||||||
|
(the hack in Pixels for an image crashing attack could be replaced by this)
|
||||||
|
|
||||||
|
%URI.AddRelNofollow - will add rel="nofollow" to all links, preventing the
|
||||||
|
spread of ill-gotten pagerank
|
||||||
|
|
||||||
|
%URI.RelativeToAbsolute - transforms all relative URIs to absolute form
|
||||||
|
|
||||||
|
%URI.HostBlacklistRegex - regexes that if matching the host are disallowed
|
||||||
|
%URI.HostWhitelist - domain names that are excluded from the host blacklist
|
||||||
|
%URI.HostPolicy - determines whether or not its reject all and then whitelist
|
||||||
|
or allow all in then do specific blacklists with whitelist intervening.
|
||||||
|
'DenyAll' or 'AllowAll' (default)
|
||||||
|
|
||||||
|
%URI.DisableIPHosts - URIs that have IP addresses for hosts are disallowed.
|
||||||
|
Be sure to also grab unusual encodings (dword, hex and octal), which may
|
||||||
|
be currently be caught by regular DNS
|
||||||
|
%URI.DisableIDN - Disallow raw internationalized domain names. Punycode
|
||||||
|
will still be permitted.
|
||||||
|
|
||||||
|
%URI.ConvertUnusualIPHosts - transform dword/hex/octal IP addresses to the
|
||||||
|
regular form
|
||||||
|
%URI.ConvertAbsoluteDNS - Remove extra dots after host names that trigger
|
||||||
|
absolute DNS. While this is actually the preferred method according to
|
||||||
|
the RFC, most people opt to use a relative domain name relative to . (root).
|
||||||
|
|
@@ -1,31 +1,44 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||||
<title>DevNetwork Forums</title>
|
<meta name="description" content="Credits and links to DevNetwork forum topics on HTML Purifier." />
|
||||||
|
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||||
</head>
|
|
||||||
<body>
|
<title>DevNetwork Credits - HTML Purifier</title>
|
||||||
|
|
||||||
<p>Many thanks to the DevNetwork community for answering questions,
|
</head>
|
||||||
theorizing about design, and offering encouragement during
|
<body>
|
||||||
the development of this library in these forum threads:</p>
|
|
||||||
|
<h1>DevNetwork Credits</h1>
|
||||||
<ul>
|
|
||||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=52905">HTMLPurifier PHP Library hompeage</a></li>
|
<div id="filing">Filed under Reference</div>
|
||||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53056">How much of CSS to implement?</a></li>
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53083">Parsing URL only according to URI : Security Risk?</a></li>
|
|
||||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53096">Gimme a name : URI and friends</a></li>
|
<p>Many thanks to the DevNetwork community for answering questions,
|
||||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53415">How to document configuration directives</a></li>
|
theorizing about design, and offering encouragement during
|
||||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53479">IPv6</a></li>
|
the development of this library in these forum threads:</p>
|
||||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53539">http and ftp versus news and mailto</a></li>
|
|
||||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53579">HTMLPurifier - Take your best shot</a></li>
|
<ul>
|
||||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53664">Need help optimizing a block of code</a>
|
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=52905">HTMLPurifier PHP Library hompeage</a></li>
|
||||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53861">Non-SGML characters</a>
|
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53056">How much of CSS to implement?</a></li>
|
||||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=54283">Wordpress makes me cry</a>
|
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53083">Parsing URL only according to URI : Security Risk?</a></li>
|
||||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=54478">Parameter Object vs. Parameter Array vs. Parameter Functions</a>
|
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53096">Gimme a name : URI and friends</a></li>
|
||||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=54521">Convert encoding where output cannot represent characters</a>
|
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53415">How to document configuration directives</a></li>
|
||||||
</ul>
|
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53479">IPv6</a></li>
|
||||||
</body>
|
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53539">http and ftp versus news and mailto</a></li>
|
||||||
|
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53579">HTMLPurifier - Take your best shot</a></li>
|
||||||
|
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53664">Need help optimizing a block of code</a></li>
|
||||||
|
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53861">Non-SGML characters</a></li>
|
||||||
|
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=54283">Wordpress makes me cry</a></li>
|
||||||
|
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=54478">Parameter Object vs. Parameter Array vs. Parameter Functions</a></li>
|
||||||
|
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=54521">Convert encoding where output cannot represent characters</a></li>
|
||||||
|
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=56411">Reporting errors in a document without line numbers</a></li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<p>...as well as any I may have forgotten.</p>
|
||||||
|
|
||||||
|
<div id="version">$Id$</div>
|
||||||
|
</body>
|
||||||
</html>
|
</html>
|
37
docs/ref-loose-vs-strict.txt
Normal file
37
docs/ref-loose-vs-strict.txt
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
|
||||||
|
Loose versus Strict
|
||||||
|
Changes from one doctype to another
|
||||||
|
|
||||||
|
There are changes. Wow, how insightful. Not everything changed is relevant
|
||||||
|
to HTML Purifier, though, so let's take a look:
|
||||||
|
|
||||||
|
== Major incompatibilities ==
|
||||||
|
|
||||||
|
[done] BLOCKQUOTE changes from 'flow' to 'block'
|
||||||
|
current behavior: inline inner contents should not be nuked, block-ify as necessary
|
||||||
|
[partially-done] U, S, STRIKE cut
|
||||||
|
current behavior: removed completely
|
||||||
|
projected behavior: replace with appropriate inline span + CSS
|
||||||
|
[done] ADDRESS from potpourri to Inline (removes p tags)
|
||||||
|
current behavior: block tags silently dropped
|
||||||
|
ideal behavior: replace tags with something like <br>. (not high priority)
|
||||||
|
|
||||||
|
== Things we can loosen up ==
|
||||||
|
|
||||||
|
Tags DIR, MENU, CENTER, ISINDEX, FONT, BASEFONT? allowed in loose
|
||||||
|
current behavior: transform to strict-valid forms
|
||||||
|
Attributes allowed in loose (see attribute transforms in 'dev-progress.html')
|
||||||
|
current behavior: projected to transform into strict-valid forms
|
||||||
|
|
||||||
|
== Periphery issues ==
|
||||||
|
|
||||||
|
A tag's attribute 'target' (for selecting frames) cut
|
||||||
|
current behavior: not allowed at all
|
||||||
|
projected behavior: use loose doctype if needed, needs valid values
|
||||||
|
[done] OL/LI tag's attribute 'start'/'value' (for renumbering lists) cut
|
||||||
|
current behavior: no substitute, just delete when in strict, allow in loose
|
||||||
|
Attribute 'name' deprecated in favor of 'id'
|
||||||
|
current behavior: dropped silently
|
||||||
|
projected behavior: create proper AttrTransform (currently not allowed at all)
|
||||||
|
[done] PRE tag allows SUB/SUP? (strict dtd comment vs syntax, loose disallows)
|
||||||
|
current behavior: disallow as usual
|
22
docs/ref-proprietary-tags.txt
Normal file
22
docs/ref-proprietary-tags.txt
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
|
||||||
|
Proprietary Tags
|
||||||
|
<nobr> and friends
|
||||||
|
|
||||||
|
Here are some proprietary tags that W3C does not define but occasionally show
|
||||||
|
up in the wild. We have only included tags that would make sense in an
|
||||||
|
HTML Purifier context.
|
||||||
|
|
||||||
|
<align>, block element that aligns (extremely rare)
|
||||||
|
<blackface>, inline that double-bolds text (extremely rare)
|
||||||
|
<comment>, hidden comment for IE and WebTV
|
||||||
|
<multicol cols=number gutter=pixels width=pixels>, multiple columns
|
||||||
|
<nobr>, no linebreaks
|
||||||
|
<spacer align=* type="vertical|horizontal|block">, whitespace in doc,
|
||||||
|
use width/height for block and size for vertical/horizontal (attributes)
|
||||||
|
(extremely rare)
|
||||||
|
<wbr>, potential word break point: allows linebreaks. Only works in <nobr>
|
||||||
|
|
||||||
|
<listing>, monospace pre-variant (extremely rare)
|
||||||
|
<plaintext>, escapes all tags to the end of document
|
||||||
|
<ruby> and friends, (more research needed, appears to be XHTML 1.1 markup)
|
||||||
|
<xmp>, monospace, replace with pre
|
36
docs/ref-strictness.txt
Normal file
36
docs/ref-strictness.txt
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
|
||||||
|
Is HTML Purifier Strict or Transitional?
|
||||||
|
A little bit of helpful guidance
|
||||||
|
|
||||||
|
Despite the fact that HTML Purifier professes only to support transitional
|
||||||
|
HTML, it rejects a lot of attributes and elements that are actually, indeed,
|
||||||
|
valid. You can investigate progress.html to find out precisely what we
|
||||||
|
are doing to these *deprecated* attributes.
|
||||||
|
|
||||||
|
However, users have found that Strict HTML imposes some quite unreasonable
|
||||||
|
restrictions on certain things. The start and value attributes in ol and
|
||||||
|
li (respectively) perhaps are the most contested. There's is currently no
|
||||||
|
widely supported browser method short of JavaScript that can replace these
|
||||||
|
two deprecated elements. HTML Purifier does not currently support them, but
|
||||||
|
it might behoove us to do so while our output is still transitional.
|
||||||
|
|
||||||
|
Fortunantely, that's the only real bugger case. The others have near-perfect
|
||||||
|
CSS equivalents, and were presentational anyway. However, the other question
|
||||||
|
pops up: should we always convert these to the CSS forms when 1. the spec
|
||||||
|
allows them anyway and 2. older browsers support them better? After all, the
|
||||||
|
whole point about CSS is to seperate styling from content, so inline styling
|
||||||
|
doesn't solve that problem.
|
||||||
|
|
||||||
|
It's an icky question, and we'll have to deal with it as more and more
|
||||||
|
transforms get implemented. As of right now, however, we currently support
|
||||||
|
these loose-only constructs in loose mode:
|
||||||
|
|
||||||
|
- <ul start="1">, <li value="1"> attributes
|
||||||
|
- <u>, <strike>, <s> tags
|
||||||
|
- flow children in <blockquote>
|
||||||
|
- mixed children in <address>
|
||||||
|
|
||||||
|
The changed child definitions as well as the ul.start li.value are the most
|
||||||
|
compelling reasons why loose should be used. We may want offer disabling <u>,
|
||||||
|
<strike> and <s> by themselves.
|
||||||
|
|
9
docs/ref-whatwg.txt
Normal file
9
docs/ref-whatwg.txt
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
|
||||||
|
Web Hypertext Application Technology Working Group
|
||||||
|
WHATWG
|
||||||
|
|
||||||
|
I don't think we need to worry about them. Untrusted users shouldn't be
|
||||||
|
submitting applications, eh? But if some interesting attribute pops up in
|
||||||
|
their spec, and might be worth supporting, stick it here.
|
||||||
|
|
||||||
|
(none so far, as you can see)
|
20
docs/ref-xhtml-1.1.txt
Normal file
20
docs/ref-xhtml-1.1.txt
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
|
||||||
|
Getting XHTML 1.1 Working
|
||||||
|
|
||||||
|
It's quite simple, according to <http://www.w3.org/TR/xhtml11/changes.html>
|
||||||
|
|
||||||
|
1. Scratch lang entirely in favor of xml:lang
|
||||||
|
2. Scratch name entirely in favor of id (partially-done)
|
||||||
|
3. Support Ruby <http://www.w3.org/TR/2001/REC-ruby-20010531/>
|
||||||
|
|
||||||
|
...but that's only an informative section. More things to do:
|
||||||
|
|
||||||
|
1. Scratch style attribute (it's deprecated)
|
||||||
|
2. Be module-aware
|
||||||
|
3. Cross-reference minimal content models with existing DTDs and determine
|
||||||
|
changes (todo)
|
||||||
|
4. Watch out for the Legacy Module
|
||||||
|
<http://www.w3.org/TR/2001/REC-xhtml-modularization-20010410/abstract_modules.html#s_legacymodule>
|
||||||
|
5. Let users specify their own custom modules
|
||||||
|
6. Study Modularization document
|
||||||
|
<http://www.w3.org/TR/2001/REC-xhtml-modularization-20010410/>
|
40
docs/style.css
Normal file
40
docs/style.css
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
html {font-size:1em; font-family:serif; }
|
||||||
|
body {margin-left:4em; margin-right:4em; }
|
||||||
|
|
||||||
|
dt {font-weight:bold; }
|
||||||
|
pre {margin-left:2em; }
|
||||||
|
pre, code, tt {font-family:monospace; font-size:1em; }
|
||||||
|
|
||||||
|
h1 {text-align:center; font-family:Garamond, serif;
|
||||||
|
font-variant:small-caps;}
|
||||||
|
h2 {border-bottom:1px solid #CCC; font-family:sans-serif; font-weight:normal;
|
||||||
|
font-size:1.3em;}
|
||||||
|
h3 {font-family:sans-serif; font-size:1.1em; font-weight:bold; }
|
||||||
|
h4 {font-family:sans-serif; font-size:0.9em; font-weight:bold; }
|
||||||
|
|
||||||
|
/* For witty quips */
|
||||||
|
.subtitled {margin-bottom:0em;}
|
||||||
|
.subtitle , .subsubtitle {font-size:.8em; margin-bottom:1em;
|
||||||
|
font-style:italic; margin-top:-.2em;text-align:center;}
|
||||||
|
.subsubtitle {text-align:left;margin-left:2em;}
|
||||||
|
|
||||||
|
/* Used for special "See also" links. */
|
||||||
|
.reference {font-style:italic;margin-left:2em;}
|
||||||
|
|
||||||
|
/* Marks off asides, discussions on why something is the way it is */
|
||||||
|
.aside {margin-left:2em; font-family:sans-serif; font-size:0.9em; }
|
||||||
|
|
||||||
|
/* A regular table */
|
||||||
|
.table {border-collapse:collapse; border-bottom:2px solid #888; margin-left:2em; }
|
||||||
|
.table thead th {margin:0; background:#888; color:#FFF; }
|
||||||
|
.table thead th:first-child {-moz-border-radius-topleft:1em;}
|
||||||
|
.table tbody td {border-bottom:1px solid #CCC; padding-right:0.6em;padding-left:0.6em;}
|
||||||
|
|
||||||
|
/* Category of the file */
|
||||||
|
#filing {font-weight:bold; font-size:smaller; }
|
||||||
|
|
||||||
|
/* Contains, without exception, Return to index. */
|
||||||
|
#index {font-size:smaller; }
|
||||||
|
|
||||||
|
/* Contains, without exception, $Id$, for SVN version info. */
|
||||||
|
#version {text-align:right; font-style:italic; margin:2em 0;}
|
10
library/HTMLPurifier.auto.php
Normal file
10
library/HTMLPurifier.auto.php
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is a stub include that automatically configures the include path.
|
||||||
|
*/
|
||||||
|
|
||||||
|
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
|
||||||
|
require_once 'HTMLPurifier.php';
|
||||||
|
|
||||||
|
?>
|
21
library/HTMLPurifier.func.php
Normal file
21
library/HTMLPurifier.func.php
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Function wrapper for HTML Purifier for quick use.
|
||||||
|
* @note This function only includes the library when it is called. While
|
||||||
|
* this is efficient for instances when you only use HTML Purifier
|
||||||
|
* on a few of your pages, it murders bytecode caching. You still
|
||||||
|
* need to add HTML Purifier to your path.
|
||||||
|
*/
|
||||||
|
|
||||||
|
function HTMLPurifier($html, $config = null) {
|
||||||
|
static $purifier = false;
|
||||||
|
if (!$purifier) {
|
||||||
|
$init = true;
|
||||||
|
require_once 'HTMLPurifier.php';
|
||||||
|
$purifier = new HTMLPurifier();
|
||||||
|
}
|
||||||
|
return $purifier->purify($html, $config);
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
@@ -3,7 +3,7 @@
|
|||||||
/*!
|
/*!
|
||||||
* @mainpage
|
* @mainpage
|
||||||
*
|
*
|
||||||
* HTMLPurifier is an HTML filter that will take an arbitrary snippet of
|
* HTML Purifier is an HTML filter that will take an arbitrary snippet of
|
||||||
* HTML and rigorously test, validate and filter it into a version that
|
* HTML and rigorously test, validate and filter it into a version that
|
||||||
* is safe for output onto webpages. It achieves this by:
|
* is safe for output onto webpages. It achieves this by:
|
||||||
*
|
*
|
||||||
@@ -22,7 +22,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
HTMLPurifier - Standards Compliant HTML Filtering
|
HTML Purifier 1.3.1 - Standards Compliant HTML Filtering
|
||||||
Copyright (C) 2006 Edward Z. Yang
|
Copyright (C) 2006 Edward Z. Yang
|
||||||
|
|
||||||
This library is free software; you can redistribute it and/or
|
This library is free software; you can redistribute it and/or
|
||||||
@@ -44,6 +44,7 @@
|
|||||||
// they get included
|
// they get included
|
||||||
require_once 'HTMLPurifier/ConfigSchema.php';
|
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||||
require_once 'HTMLPurifier/Config.php';
|
require_once 'HTMLPurifier/Config.php';
|
||||||
|
require_once 'HTMLPurifier/Context.php';
|
||||||
|
|
||||||
require_once 'HTMLPurifier/Lexer.php';
|
require_once 'HTMLPurifier/Lexer.php';
|
||||||
require_once 'HTMLPurifier/Generator.php';
|
require_once 'HTMLPurifier/Generator.php';
|
||||||
@@ -95,16 +96,17 @@ class HTMLPurifier
|
|||||||
*/
|
*/
|
||||||
function purify($html, $config = null) {
|
function purify($html, $config = null) {
|
||||||
$config = $config ? $config : $this->config;
|
$config = $config ? $config : $this->config;
|
||||||
$html = $this->encoder->convertToUTF8($html, $config);
|
$context =& new HTMLPurifier_Context();
|
||||||
|
$html = $this->encoder->convertToUTF8($html, $config, $context);
|
||||||
$html =
|
$html =
|
||||||
$this->generator->generateFromTokens(
|
$this->generator->generateFromTokens(
|
||||||
$this->strategy->execute(
|
$this->strategy->execute(
|
||||||
$this->lexer->tokenizeHTML($html, $config),
|
$this->lexer->tokenizeHTML($html, $config, $context),
|
||||||
$config
|
$config, $context
|
||||||
),
|
),
|
||||||
$config
|
$config, $context
|
||||||
);
|
);
|
||||||
$html = $this->encoder->convertFromUTF8($html, $config);
|
$html = $this->encoder->convertFromUTF8($html, $config, $context);
|
||||||
return $html;
|
return $html;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1,26 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Internal data-structure used in attribute validation to accumulate state.
|
|
||||||
*
|
|
||||||
* All it is is a data-structure that holds objects that accumulate state, like
|
|
||||||
* HTMLPurifier_IDAccumulator.
|
|
||||||
*
|
|
||||||
* @param Many functions that accept this object have it as a mandatory
|
|
||||||
* parameter, even when there is no use for it. Though this is
|
|
||||||
* for the same reasons as why HTMLPurifier_Config is a mandatory
|
|
||||||
* parameter, it is also because you cannot assign a default value
|
|
||||||
* to a parameter passed by reference (passing by reference is essential
|
|
||||||
* for context to work in PHP 4).
|
|
||||||
*/
|
|
||||||
|
|
||||||
class HTMLPurifier_AttrContext
|
|
||||||
{
|
|
||||||
/**
|
|
||||||
* Contains an HTMLPurifier_IDAccumulator, which keeps track of used IDs.
|
|
||||||
* @public
|
|
||||||
*/
|
|
||||||
var $id_accumulator;
|
|
||||||
}
|
|
||||||
|
|
||||||
?>
|
|
@@ -1,7 +1,5 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrContext.php';
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Base class for all validating attribute definitions.
|
* Base class for all validating attribute definitions.
|
||||||
*
|
*
|
||||||
@@ -22,10 +20,7 @@ class HTMLPurifier_AttrDef
|
|||||||
var $minimized = false;
|
var $minimized = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abstract function defined for functions that validate and clean strings.
|
* Validates and cleans passed string according to a definition.
|
||||||
*
|
|
||||||
* This function forms the basis for all the subclasses: they must
|
|
||||||
* define this method.
|
|
||||||
*
|
*
|
||||||
* @public
|
* @public
|
||||||
* @param $string String to be validated and cleaned.
|
* @param $string String to be validated and cleaned.
|
||||||
@@ -48,7 +43,16 @@ class HTMLPurifier_AttrDef
|
|||||||
*
|
*
|
||||||
* @note This method is not entirely standards compliant, as trim() removes
|
* @note This method is not entirely standards compliant, as trim() removes
|
||||||
* more types of whitespace than specified in the spec. In practice,
|
* more types of whitespace than specified in the spec. In practice,
|
||||||
* this is rarely a problem.
|
* this is rarely a problem, as those extra characters usually have
|
||||||
|
* already been removed by HTMLPurifier_Encoder.
|
||||||
|
*
|
||||||
|
* @warning This processing is inconsistent with XML's whitespace handling
|
||||||
|
* as specified by section 3.3.3 and referenced XHTML 1.0 section
|
||||||
|
* 4.7. Compliant processing requires all line breaks normalized
|
||||||
|
* to "\n", so the fix is not as simple as fixing it in this
|
||||||
|
* function. Trim and whitespace collapsing are supposed to only
|
||||||
|
* occur in NMTOKENs. However, note that we are NOT necessarily
|
||||||
|
* parsing XML, thus, this behavior may still be correct.
|
||||||
*
|
*
|
||||||
* @public
|
* @public
|
||||||
*/
|
*/
|
||||||
|
@@ -43,6 +43,7 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
|
|||||||
$propvalues[$property] = $result;
|
$propvalues[$property] = $result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// procedure does not write the new CSS simultaneously, so it's
|
||||||
// slightly inefficient, but it's the only way of getting rid of
|
// slightly inefficient, but it's the only way of getting rid of
|
||||||
// duplicates. Perhaps config to optimize it, but not now.
|
// duplicates. Perhaps config to optimize it, but not now.
|
||||||
|
|
||||||
|
@@ -24,13 +24,14 @@ class HTMLPurifier_AttrDef_Class extends HTMLPurifier_AttrDef
|
|||||||
// and plus it would complicate optimization efforts (you never
|
// and plus it would complicate optimization efforts (you never
|
||||||
// see that anyway).
|
// see that anyway).
|
||||||
$matches = array();
|
$matches = array();
|
||||||
$pattern = '/(?:(?<=\s)|\A)'.
|
$pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start
|
||||||
'((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
|
'((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
|
||||||
'(?:(?=\s)|\z)/';
|
'(?:(?=\s)|\z)/'; // look ahead for space or string end
|
||||||
preg_match_all($pattern, $string, $matches);
|
preg_match_all($pattern, $string, $matches);
|
||||||
|
|
||||||
if (empty($matches[1])) return false;
|
if (empty($matches[1])) return false;
|
||||||
|
|
||||||
|
// reconstruct class string
|
||||||
$new_string = '';
|
$new_string = '';
|
||||||
foreach ($matches[1] as $class_names) {
|
foreach ($matches[1] as $class_names) {
|
||||||
$new_string .= $class_names . ' ';
|
$new_string .= $class_names . ' ';
|
||||||
|
17
library/HTMLPurifier/AttrDef/Email.php
Normal file
17
library/HTMLPurifier/AttrDef/Email.php
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/AttrDef.php';
|
||||||
|
|
||||||
|
class HTMLPurifier_AttrDef_Email extends HTMLPurifier_AttrDef
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unpacks a mailbox into its display-name and address
|
||||||
|
*/
|
||||||
|
function unpack($string) {
|
||||||
|
// needs to be implemented
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
23
library/HTMLPurifier/AttrDef/Email/SimpleCheck.php
Normal file
23
library/HTMLPurifier/AttrDef/Email/SimpleCheck.php
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/AttrDef/Email.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Primitive email validation class based on the regexp found at
|
||||||
|
* http://www.regular-expressions.info/email.html
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_AttrDef_Email_SimpleCheck extends HTMLPurifier_AttrDef_Email
|
||||||
|
{
|
||||||
|
|
||||||
|
function validate($string, $config, &$context) {
|
||||||
|
// no support for named mailboxes i.e. "Bob <bob@example.com>"
|
||||||
|
// that needs more percent encoding to be done
|
||||||
|
if ($string == '') return false;
|
||||||
|
$string = trim($string);
|
||||||
|
$result = preg_match('/^[A-Z0-9._%-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$/i', $string);
|
||||||
|
return $result ? $string : false;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
@@ -5,15 +5,20 @@ require_once 'HTMLPurifier/AttrDef/IPv4.php';
|
|||||||
require_once 'HTMLPurifier/AttrDef/IPv6.php';
|
require_once 'HTMLPurifier/AttrDef/IPv6.php';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates a host according to the IPv4, IPv6 and DNS specifications.
|
* Validates a host according to the IPv4, IPv6 and DNS (future) specifications.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_AttrDef_Host extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_Host extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instances of HTMLPurifier_AttrDef_IPv4 and HTMLPurifier_AttrDef_IPv6
|
* Instance of HTMLPurifier_AttrDef_IPv4 sub-validator
|
||||||
*/
|
*/
|
||||||
var $ipv4, $ipv6;
|
var $ipv4;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instance of HTMLPurifier_AttrDef_IPv6 sub-validator
|
||||||
|
*/
|
||||||
|
var $ipv6;
|
||||||
|
|
||||||
function HTMLPurifier_AttrDef_Host() {
|
function HTMLPurifier_AttrDef_Host() {
|
||||||
$this->ipv4 = new HTMLPurifier_AttrDef_IPv4();
|
$this->ipv4 = new HTMLPurifier_AttrDef_IPv4();
|
||||||
@@ -30,6 +35,8 @@ class HTMLPurifier_AttrDef_Host extends HTMLPurifier_AttrDef
|
|||||||
if ($valid === false) return false;
|
if ($valid === false) return false;
|
||||||
return '['. $valid . ']';
|
return '['. $valid . ']';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// need to do checks on unusual encodings too
|
||||||
$ipv4 = $this->ipv4->validate($string, $config, $context);
|
$ipv4 = $this->ipv4->validate($string, $config, $context);
|
||||||
if ($ipv4 !== false) return $ipv4;
|
if ($ipv4 !== false) return $ipv4;
|
||||||
|
|
||||||
|
@@ -3,6 +3,30 @@
|
|||||||
require_once 'HTMLPurifier/AttrDef.php';
|
require_once 'HTMLPurifier/AttrDef.php';
|
||||||
require_once 'HTMLPurifier/IDAccumulator.php';
|
require_once 'HTMLPurifier/IDAccumulator.php';
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'Attr', 'IDPrefix', '', 'string',
|
||||||
|
'String to prefix to IDs. If you have no idea what IDs your pages '.
|
||||||
|
'may use, you may opt to simply add a prefix to all user-submitted ID '.
|
||||||
|
'attributes so that they are still usable, but will not conflict with '.
|
||||||
|
'core page IDs. Example: setting the directive to \'user_\' will result in '.
|
||||||
|
'a user submitted \'foo\' to become \'user_foo\' Be sure to set '.
|
||||||
|
'%HTML.EnableAttrID to true before using '.
|
||||||
|
'this. This directive was available since 1.2.0.'
|
||||||
|
);
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'Attr', 'IDPrefixLocal', '', 'string',
|
||||||
|
'Temporary prefix for IDs used in conjunction with %Attr.IDPrefix. If '.
|
||||||
|
'you need to allow multiple sets of '.
|
||||||
|
'user content on web page, you may need to have a seperate prefix that '.
|
||||||
|
'changes with each iteration. This way, seperately submitted user content '.
|
||||||
|
'displayed on the same page doesn\'t clobber each other. Ideal values '.
|
||||||
|
'are unique identifiers for the content it represents (i.e. the id of '.
|
||||||
|
'the row in the database). Be sure to add a seperator (like an underscore) '.
|
||||||
|
'at the end. Warning: this directive will not work unless %Attr.IDPrefix '.
|
||||||
|
'is set to a non-empty value! This directive was available since 1.2.0.'
|
||||||
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates the HTML attribute ID.
|
* Validates the HTML attribute ID.
|
||||||
* @warning Even though this is the id processor, it
|
* @warning Even though this is the id processor, it
|
||||||
@@ -20,7 +44,19 @@ class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef
|
|||||||
$id = trim($id); // trim it first
|
$id = trim($id); // trim it first
|
||||||
|
|
||||||
if ($id === '') return false;
|
if ($id === '') return false;
|
||||||
if (isset($context->id_accumulator->ids[$id])) return false;
|
|
||||||
|
$prefix = $config->get('Attr', 'IDPrefix');
|
||||||
|
if ($prefix !== '') {
|
||||||
|
$prefix .= $config->get('Attr', 'IDPrefixLocal');
|
||||||
|
// prevent re-appending the prefix
|
||||||
|
if (strpos($id, $prefix) !== 0) $id = $prefix . $id;
|
||||||
|
} elseif ($config->get('Attr', 'IDPrefixLocal') !== '') {
|
||||||
|
trigger_error('%Attr.IDPrefixLocal cannot be used unless '.
|
||||||
|
'%Attr.IDPrefix is set', E_USER_WARNING);
|
||||||
|
}
|
||||||
|
|
||||||
|
$id_accumulator =& $context->get('IDAccumulator');
|
||||||
|
if (isset($id_accumulator->ids[$id])) return false;
|
||||||
|
|
||||||
// we purposely avoid using regex, hopefully this is faster
|
// we purposely avoid using regex, hopefully this is faster
|
||||||
|
|
||||||
@@ -35,7 +71,7 @@ class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef
|
|||||||
$result = ($trim === '');
|
$result = ($trim === '');
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($result) $context->id_accumulator->add($id);
|
if ($result) $id_accumulator->add($id);
|
||||||
|
|
||||||
// if no change was made to the ID, return the result
|
// if no change was made to the ID, return the result
|
||||||
// else, return the new id if stripping whitespace made it
|
// else, return the new id if stripping whitespace made it
|
||||||
|
@@ -4,6 +4,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
|||||||
require_once 'HTMLPurifier/URIScheme.php';
|
require_once 'HTMLPurifier/URIScheme.php';
|
||||||
require_once 'HTMLPurifier/URISchemeRegistry.php';
|
require_once 'HTMLPurifier/URISchemeRegistry.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/Host.php';
|
require_once 'HTMLPurifier/AttrDef/Host.php';
|
||||||
|
require_once 'HTMLPurifier/PercentEncoder.php';
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'URI', 'DefaultScheme', 'http', 'string',
|
'URI', 'DefaultScheme', 'http', 'string',
|
||||||
@@ -11,6 +12,71 @@ HTMLPurifier_ConfigSchema::define(
|
|||||||
'select the proper object validator when no scheme information is present.'
|
'select the proper object validator when no scheme information is present.'
|
||||||
);
|
);
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'URI', 'Host', null, 'string/null',
|
||||||
|
'Defines the domain name of the server, so we can determine whether or '.
|
||||||
|
'an absolute URI is from your website or not. Not strictly necessary, '.
|
||||||
|
'as users should be using relative URIs to reference resources on your '.
|
||||||
|
'website. It will, however, let you use absolute URIs to link to '.
|
||||||
|
'subdomains of the domain you post here: i.e. example.com will allow '.
|
||||||
|
'sub.example.com. However, higher up domains will still be excluded: '.
|
||||||
|
'if you set %URI.Host to sub.example.com, example.com will be blocked. '.
|
||||||
|
'This directive has been available since 1.2.0.'
|
||||||
|
);
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'URI', 'DisableExternal', false, 'bool',
|
||||||
|
'Disables links to external websites. This is a highly effective '.
|
||||||
|
'anti-spam and anti-pagerank-leech measure, but comes at a hefty price: no'.
|
||||||
|
'links or images outside of your domain will be allowed. Non-linkified '.
|
||||||
|
'URIs will still be preserved. If you want to be able to link to '.
|
||||||
|
'subdomains or use absolute URIs, specify %URI.Host for your website. '.
|
||||||
|
'This directive has been available since 1.2.0.'
|
||||||
|
);
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'URI', 'DisableExternalResources', false, 'bool',
|
||||||
|
'Disables the embedding of external resources, preventing users from '.
|
||||||
|
'embedding things like images from other hosts. This prevents '.
|
||||||
|
'access tracking (good for email viewers), bandwidth leeching, '.
|
||||||
|
'cross-site request forging, goatse.cx posting, and '.
|
||||||
|
'other nasties, but also results in '.
|
||||||
|
'a loss of end-user functionality (they can\'t directly post a pic '.
|
||||||
|
'they posted from Flickr anymore). Use it if you don\'t have a '.
|
||||||
|
'robust user-content moderation team. This directive has been '.
|
||||||
|
'available since 1.3.0.'
|
||||||
|
);
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'URI', 'DisableResources', false, 'bool',
|
||||||
|
'Disables embedding resources, essentially meaning no pictures. You can '.
|
||||||
|
'still link to them though. See %URI.DisableExternalResources for why '.
|
||||||
|
'this might be a good idea. This directive has been available since 1.3.0.'
|
||||||
|
);
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'URI', 'Munge', null, 'string/null',
|
||||||
|
'Munges all browsable (usually http, https and ftp) URI\'s into some URL '.
|
||||||
|
'redirection service. Pass this directive a URI, with %s inserted where '.
|
||||||
|
'the url-encoded original URI should be inserted (sample: '.
|
||||||
|
'<code>http://www.google.com/url?q=%s</code>). '.
|
||||||
|
'This prevents PageRank leaks, while being as transparent as possible '.
|
||||||
|
'to users (you may also want to add some client side JavaScript to '.
|
||||||
|
'override the text in the statusbar). Warning: many security experts '.
|
||||||
|
'believe that this form of protection does not deter spam-bots. '.
|
||||||
|
'You can also use this directive to redirect users to a splash page '.
|
||||||
|
'telling them they are leaving your website. '.
|
||||||
|
'This directive has been available since 1.3.0.'
|
||||||
|
);
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'URI', 'HostBlacklist', array(), 'list',
|
||||||
|
'List of strings that are forbidden in the host of any URI. Use it to '.
|
||||||
|
'kill domain names of spam, etc. Note that it will catch anything in '.
|
||||||
|
'the domain, so <tt>moo.com</tt> will catch <tt>moo.com.example.com</tt>. '.
|
||||||
|
'This directive has been available since 1.3.0.'
|
||||||
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates a URI as defined by RFC 3986.
|
* Validates a URI as defined by RFC 3986.
|
||||||
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
|
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
|
||||||
@@ -19,9 +85,16 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
|||||||
{
|
{
|
||||||
|
|
||||||
var $host;
|
var $host;
|
||||||
|
var $PercentEncoder;
|
||||||
|
var $embeds_resource;
|
||||||
|
|
||||||
function HTMLPurifier_AttrDef_URI() {
|
/**
|
||||||
|
* @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
|
||||||
|
*/
|
||||||
|
function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
|
||||||
$this->host = new HTMLPurifier_AttrDef_Host();
|
$this->host = new HTMLPurifier_AttrDef_Host();
|
||||||
|
$this->PercentEncoder = new HTMLPurifier_PercentEncoder();
|
||||||
|
$this->embeds_resource = (bool) $embeds_resource;
|
||||||
}
|
}
|
||||||
|
|
||||||
function validate($uri, $config, &$context) {
|
function validate($uri, $config, &$context) {
|
||||||
@@ -32,6 +105,9 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
|||||||
// parse as CDATA
|
// parse as CDATA
|
||||||
$uri = $this->parseCDATA($uri);
|
$uri = $this->parseCDATA($uri);
|
||||||
|
|
||||||
|
// fix up percent-encoding
|
||||||
|
$uri = $this->PercentEncoder->normalize($uri);
|
||||||
|
|
||||||
// while it would be nice to use parse_url(), that's specifically
|
// while it would be nice to use parse_url(), that's specifically
|
||||||
// for HTTP and thus won't work for our generic URI parsing
|
// for HTTP and thus won't work for our generic URI parsing
|
||||||
|
|
||||||
@@ -63,18 +139,38 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
|||||||
// no need to validate the scheme's fmt since we do that when we
|
// no need to validate the scheme's fmt since we do that when we
|
||||||
// retrieve the specific scheme object from the registry
|
// retrieve the specific scheme object from the registry
|
||||||
$scheme = ctype_lower($scheme) ? $scheme : strtolower($scheme);
|
$scheme = ctype_lower($scheme) ? $scheme : strtolower($scheme);
|
||||||
$scheme_obj =& $registry->getScheme($scheme, $config);
|
$scheme_obj =& $registry->getScheme($scheme, $config, $context);
|
||||||
if (!$scheme_obj) return false; // invalid scheme, clean it out
|
if (!$scheme_obj) return false; // invalid scheme, clean it out
|
||||||
} else {
|
} else {
|
||||||
$scheme_obj =& $registry->getScheme(
|
$scheme_obj =& $registry->getScheme(
|
||||||
$config->get('URI', 'DefaultScheme'), $config
|
$config->get('URI', 'DefaultScheme'), $config, $context
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// the URI we're processing embeds_resource a resource in the page, but the URI
|
||||||
|
// it references cannot be located
|
||||||
|
if ($this->embeds_resource && !$scheme_obj->browsable) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if ($authority !== null) {
|
if ($authority !== null) {
|
||||||
|
|
||||||
|
// remove URI if it's absolute and we disabled externals or
|
||||||
|
// if it's absolute and embedded and we disabled external resources
|
||||||
|
unset($our_host);
|
||||||
|
if (
|
||||||
|
$config->get('URI', 'DisableExternal') ||
|
||||||
|
(
|
||||||
|
$config->get('URI', 'DisableExternalResources') &&
|
||||||
|
$this->embeds_resource
|
||||||
|
)
|
||||||
|
) {
|
||||||
|
$our_host = $config->get('URI', 'Host');
|
||||||
|
if ($our_host === null) return false;
|
||||||
|
}
|
||||||
|
|
||||||
$HEXDIG = '[A-Fa-f0-9]';
|
$HEXDIG = '[A-Fa-f0-9]';
|
||||||
$unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with []
|
$unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with []
|
||||||
$sub_delims = '!$&\'()'; // needs []
|
$sub_delims = '!$&\'()'; // needs []
|
||||||
@@ -97,6 +193,19 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
|||||||
$host = $this->host->validate($host, $config, $context);
|
$host = $this->host->validate($host, $config, $context);
|
||||||
if ($host === false) $host = null;
|
if ($host === false) $host = null;
|
||||||
|
|
||||||
|
if ($this->checkBlacklist($host, $config, $context)) return false;
|
||||||
|
|
||||||
|
// more lenient absolute checking
|
||||||
|
if (isset($our_host)) {
|
||||||
|
$host_parts = array_reverse(explode('.', $host));
|
||||||
|
// could be cached
|
||||||
|
$our_host_parts = array_reverse(explode('.', $our_host));
|
||||||
|
foreach ($our_host_parts as $i => $discard) {
|
||||||
|
if (!isset($host_parts[$i])) return false;
|
||||||
|
if ($host_parts[$i] != $our_host_parts[$i]) return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// userinfo and host are validated within the regexp
|
// userinfo and host are validated within the regexp
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
@@ -120,7 +229,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
|||||||
// note that $fragment is omitted
|
// note that $fragment is omitted
|
||||||
list($userinfo, $host, $port, $path, $query) =
|
list($userinfo, $host, $port, $path, $query) =
|
||||||
$scheme_obj->validateComponents(
|
$scheme_obj->validateComponents(
|
||||||
$userinfo, $host, $port, $path, $query, $config
|
$userinfo, $host, $port, $path, $query, $config, $context
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
@@ -141,10 +250,37 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
|||||||
if ($query !== null) $result .= "?$query";
|
if ($query !== null) $result .= "?$query";
|
||||||
if ($fragment !== null) $result .= "#$fragment";
|
if ($fragment !== null) $result .= "#$fragment";
|
||||||
|
|
||||||
|
// munge if necessary
|
||||||
|
$munge = $config->get('URI', 'Munge');
|
||||||
|
if (!empty($scheme_obj->browsable) && $munge !== null) {
|
||||||
|
if ($authority !== null) {
|
||||||
|
$result = str_replace('%s', rawurlencode($result), $munge);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return $result;
|
return $result;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks a host against an array blacklist
|
||||||
|
* @param $host Host to check
|
||||||
|
* @param $config HTMLPurifier_Config instance
|
||||||
|
* @param $context HTMLPurifier_Context instance
|
||||||
|
* @return bool Is spam?
|
||||||
|
*/
|
||||||
|
function checkBlacklist($host, &$config, &$context) {
|
||||||
|
$blacklist = $config->get('URI', 'HostBlacklist');
|
||||||
|
if (!empty($blacklist)) {
|
||||||
|
foreach($blacklist as $blacklisted_host_fragment) {
|
||||||
|
if (strpos($host, $blacklisted_host_fragment) !== false) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
||||||
|
@@ -21,11 +21,12 @@ class HTMLPurifier_AttrTransform
|
|||||||
* Abstract: makes changes to the attributes dependent on multiple values.
|
* Abstract: makes changes to the attributes dependent on multiple values.
|
||||||
*
|
*
|
||||||
* @param $attr Assoc array of attributes, usually from
|
* @param $attr Assoc array of attributes, usually from
|
||||||
* HTMLPurifier_Token_Tag::$attributes
|
* HTMLPurifier_Token_Tag::$attr
|
||||||
* @param $config Mandatory HTMLPurifier_Config object.
|
* @param $config Mandatory HTMLPurifier_Config object.
|
||||||
|
* @param $context Mandatory HTMLPurifier_Context object
|
||||||
* @returns Processed attribute array.
|
* @returns Processed attribute array.
|
||||||
*/
|
*/
|
||||||
function transform($attr, $config) {
|
function transform($attr, $config, &$context) {
|
||||||
trigger_error('Cannot call abstract function', E_USER_ERROR);
|
trigger_error('Cannot call abstract function', E_USER_ERROR);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -20,7 +20,7 @@ HTMLPurifier_ConfigSchema::defineAllowedValues(
|
|||||||
class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform
|
class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform
|
||||||
{
|
{
|
||||||
|
|
||||||
function transform($attr, $config) {
|
function transform($attr, $config, $context) {
|
||||||
if (isset($attr['dir'])) return $attr;
|
if (isset($attr['dir'])) return $attr;
|
||||||
$attr['dir'] = $config->get('Attr', 'DefaultTextDir');
|
$attr['dir'] = $config->get('Attr', 'DefaultTextDir');
|
||||||
return $attr;
|
return $attr;
|
||||||
|
@@ -25,7 +25,7 @@ HTMLPurifier_ConfigSchema::define(
|
|||||||
class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
|
class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
|
||||||
{
|
{
|
||||||
|
|
||||||
function transform($attr, $config) {
|
function transform($attr, $config, $context) {
|
||||||
|
|
||||||
$src = true;
|
$src = true;
|
||||||
if (!isset($attr['src'])) {
|
if (!isset($attr['src'])) {
|
||||||
|
@@ -10,7 +10,7 @@ require_once 'HTMLPurifier/AttrTransform.php';
|
|||||||
class HTMLPurifier_AttrTransform_Lang extends HTMLPurifier_AttrTransform
|
class HTMLPurifier_AttrTransform_Lang extends HTMLPurifier_AttrTransform
|
||||||
{
|
{
|
||||||
|
|
||||||
function transform($attr, $config) {
|
function transform($attr, $config, $context) {
|
||||||
|
|
||||||
$lang = isset($attr['lang']) ? $attr['lang'] : false;
|
$lang = isset($attr['lang']) ? $attr['lang'] : false;
|
||||||
$xml_lang = isset($attr['xml:lang']) ? $attr['xml:lang'] : false;
|
$xml_lang = isset($attr['xml:lang']) ? $attr['xml:lang'] : false;
|
||||||
|
@@ -8,7 +8,7 @@ require_once 'HTMLPurifier/AttrTransform.php';
|
|||||||
class HTMLPurifier_AttrTransform_TextAlign
|
class HTMLPurifier_AttrTransform_TextAlign
|
||||||
extends HTMLPurifier_AttrTransform {
|
extends HTMLPurifier_AttrTransform {
|
||||||
|
|
||||||
function transform($attr, $config) {
|
function transform($attr, $config, $context) {
|
||||||
|
|
||||||
if (!isset($attr['align'])) return $attr;
|
if (!isset($attr['align'])) return $attr;
|
||||||
|
|
||||||
|
@@ -20,10 +20,9 @@ HTMLPurifier_ConfigSchema::define(
|
|||||||
class HTMLPurifier_ChildDef
|
class HTMLPurifier_ChildDef
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
* Type of child definition, usually right-most part of class name lowercase
|
* Type of child definition, usually right-most part of class name lowercase.
|
||||||
*
|
* Used occasionally in terms of context.
|
||||||
* Used occasionally in terms of context. Possible values include
|
* @public
|
||||||
* custom, required, optional and empty.
|
|
||||||
*/
|
*/
|
||||||
var $type;
|
var $type;
|
||||||
|
|
||||||
@@ -32,395 +31,25 @@ class HTMLPurifier_ChildDef
|
|||||||
*
|
*
|
||||||
* This is necessary for redundant checking when changes affecting
|
* This is necessary for redundant checking when changes affecting
|
||||||
* a child node may cause a parent node to now be disallowed.
|
* a child node may cause a parent node to now be disallowed.
|
||||||
|
*
|
||||||
|
* @public
|
||||||
*/
|
*/
|
||||||
var $allow_empty;
|
var $allow_empty;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates nodes according to definition and returns modification.
|
* Validates nodes according to definition and returns modification.
|
||||||
*
|
*
|
||||||
* @warning $context is NOT HTMLPurifier_AttrContext
|
* @public
|
||||||
* @param $tokens_of_children Array of HTMLPurifier_Token
|
* @param $tokens_of_children Array of HTMLPurifier_Token
|
||||||
* @param $config HTMLPurifier_Config object
|
* @param $config HTMLPurifier_Config object
|
||||||
* @param $context String context indicating inline, block or unknown
|
* @param $context HTMLPurifier_Context object
|
||||||
* @return bool true to leave nodes as is
|
* @return bool true to leave nodes as is
|
||||||
* @return bool false to remove parent node
|
* @return bool false to remove parent node
|
||||||
* @return array of replacement child tokens
|
* @return array of replacement child tokens
|
||||||
*/
|
*/
|
||||||
function validateChildren($tokens_of_children, $config, $context) {
|
function validateChildren($tokens_of_children, $config, &$context) {
|
||||||
trigger_error('Call to abstract function', E_USER_ERROR);
|
trigger_error('Call to abstract function', E_USER_ERROR);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
?>
|
||||||
* Custom validation class, accepts DTD child definitions
|
|
||||||
*
|
|
||||||
* @warning Currently this class is an all or nothing proposition, that is,
|
|
||||||
* it will only give a bool return value.
|
|
||||||
*/
|
|
||||||
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
|
|
||||||
{
|
|
||||||
var $type = 'custom';
|
|
||||||
var $allow_empty = false;
|
|
||||||
/**
|
|
||||||
* Allowed child pattern as defined by the DTD
|
|
||||||
*/
|
|
||||||
var $dtd_regex;
|
|
||||||
/**
|
|
||||||
* PCRE regex derived from $dtd_regex
|
|
||||||
* @private
|
|
||||||
*/
|
|
||||||
var $_pcre_regex;
|
|
||||||
/**
|
|
||||||
* @param $dtd_regex Allowed child pattern from the DTD
|
|
||||||
*/
|
|
||||||
function HTMLPurifier_ChildDef_Custom($dtd_regex) {
|
|
||||||
$this->dtd_regex = $dtd_regex;
|
|
||||||
$this->_compileRegex();
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
|
|
||||||
*/
|
|
||||||
function _compileRegex() {
|
|
||||||
$raw = str_replace(' ', '', $this->dtd_regex);
|
|
||||||
if ($raw{0} != '(') {
|
|
||||||
$raw = "($raw)";
|
|
||||||
}
|
|
||||||
$reg = str_replace(',', ',?', $raw);
|
|
||||||
$reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
|
|
||||||
$this->_pcre_regex = $reg;
|
|
||||||
}
|
|
||||||
function validateChildren($tokens_of_children, $config, $context) {
|
|
||||||
$list_of_children = '';
|
|
||||||
$nesting = 0; // depth into the nest
|
|
||||||
foreach ($tokens_of_children as $token) {
|
|
||||||
if (!empty($token->is_whitespace)) continue;
|
|
||||||
|
|
||||||
$is_child = ($nesting == 0); // direct
|
|
||||||
|
|
||||||
if ($token->type == 'start') {
|
|
||||||
$nesting++;
|
|
||||||
} elseif ($token->type == 'end') {
|
|
||||||
$nesting--;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($is_child) {
|
|
||||||
$list_of_children .= $token->name . ',';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
$list_of_children = rtrim($list_of_children, ',');
|
|
||||||
|
|
||||||
$okay =
|
|
||||||
preg_match(
|
|
||||||
'/^'.$this->_pcre_regex.'$/',
|
|
||||||
$list_of_children
|
|
||||||
);
|
|
||||||
|
|
||||||
return (bool) $okay;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Definition that allows a set of elements, but disallows empty children.
|
|
||||||
*/
|
|
||||||
class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
|
|
||||||
{
|
|
||||||
/**
|
|
||||||
* Lookup table of allowed elements.
|
|
||||||
*/
|
|
||||||
var $elements = array();
|
|
||||||
/**
|
|
||||||
* @param $elements List of allowed element names (lowercase).
|
|
||||||
*/
|
|
||||||
function HTMLPurifier_ChildDef_Required($elements) {
|
|
||||||
if (is_string($elements)) {
|
|
||||||
$elements = str_replace(' ', '', $elements);
|
|
||||||
$elements = explode('|', $elements);
|
|
||||||
}
|
|
||||||
$elements = array_flip($elements);
|
|
||||||
foreach ($elements as $i => $x) $elements[$i] = true;
|
|
||||||
$this->elements = $elements;
|
|
||||||
$this->gen = new HTMLPurifier_Generator();
|
|
||||||
}
|
|
||||||
var $allow_empty = false;
|
|
||||||
var $type = 'required';
|
|
||||||
function validateChildren($tokens_of_children, $config, $context) {
|
|
||||||
// if there are no tokens, delete parent node
|
|
||||||
if (empty($tokens_of_children)) return false;
|
|
||||||
|
|
||||||
// the new set of children
|
|
||||||
$result = array();
|
|
||||||
|
|
||||||
// current depth into the nest
|
|
||||||
$nesting = 0;
|
|
||||||
|
|
||||||
// whether or not we're deleting a node
|
|
||||||
$is_deleting = false;
|
|
||||||
|
|
||||||
// whether or not parsed character data is allowed
|
|
||||||
// this controls whether or not we silently drop a tag
|
|
||||||
// or generate escaped HTML from it
|
|
||||||
$pcdata_allowed = isset($this->elements['#PCDATA']);
|
|
||||||
|
|
||||||
// a little sanity check to make sure it's not ALL whitespace
|
|
||||||
$all_whitespace = true;
|
|
||||||
|
|
||||||
// some configuration
|
|
||||||
$escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren');
|
|
||||||
|
|
||||||
foreach ($tokens_of_children as $token) {
|
|
||||||
if (!empty($token->is_whitespace)) {
|
|
||||||
$result[] = $token;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
$all_whitespace = false; // phew, we're not talking about whitespace
|
|
||||||
|
|
||||||
$is_child = ($nesting == 0);
|
|
||||||
|
|
||||||
if ($token->type == 'start') {
|
|
||||||
$nesting++;
|
|
||||||
} elseif ($token->type == 'end') {
|
|
||||||
$nesting--;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($is_child) {
|
|
||||||
$is_deleting = false;
|
|
||||||
if (!isset($this->elements[$token->name])) {
|
|
||||||
$is_deleting = true;
|
|
||||||
if ($pcdata_allowed && $token->type == 'text') {
|
|
||||||
$result[] = $token;
|
|
||||||
} elseif ($pcdata_allowed && $escape_invalid_children) {
|
|
||||||
$result[] = new HTMLPurifier_Token_Text(
|
|
||||||
$this->gen->generateFromToken($token, $config)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!$is_deleting || ($pcdata_allowed && $token->type == 'text')) {
|
|
||||||
$result[] = $token;
|
|
||||||
} elseif ($pcdata_allowed && $escape_invalid_children) {
|
|
||||||
$result[] =
|
|
||||||
new HTMLPurifier_Token_Text(
|
|
||||||
$this->gen->generateFromToken( $token, $config )
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
// drop silently
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (empty($result)) return false;
|
|
||||||
if ($all_whitespace) return false;
|
|
||||||
if ($tokens_of_children == $result) return true;
|
|
||||||
return $result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Definition that allows a set of elements, and allows no children.
|
|
||||||
* @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required,
|
|
||||||
* really, one shouldn't inherit from the other. Only altered behavior
|
|
||||||
* is to overload a returned false with an array. Thus, it will never
|
|
||||||
* return false.
|
|
||||||
*/
|
|
||||||
class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
|
|
||||||
{
|
|
||||||
var $allow_empty = true;
|
|
||||||
var $type = 'optional';
|
|
||||||
function validateChildren($tokens_of_children, $config, $context) {
|
|
||||||
$result = parent::validateChildren($tokens_of_children, $config, $context);
|
|
||||||
if ($result === false) return array();
|
|
||||||
return $result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Definition that disallows all elements.
|
|
||||||
* @warning validateChildren() in this class is actually never called, because
|
|
||||||
* empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed
|
|
||||||
* before child definitions are parsed in earnest by
|
|
||||||
* HTMLPurifier_Strategy_FixNesting.
|
|
||||||
*/
|
|
||||||
class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
|
|
||||||
{
|
|
||||||
var $allow_empty = true;
|
|
||||||
var $type = 'empty';
|
|
||||||
function HTMLPurifier_ChildDef_Empty() {}
|
|
||||||
function validateChildren($tokens_of_children, $config, $context) {
|
|
||||||
return array();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Definition that uses different definitions depending on context.
|
|
||||||
*
|
|
||||||
* The del and ins tags are notable because they allow different types of
|
|
||||||
* elements depending on whether or not they're in a block or inline context.
|
|
||||||
* Chameleon allows this behavior to happen by using two different
|
|
||||||
* definitions depending on context. While this somewhat generalized,
|
|
||||||
* it is specifically intended for those two tags.
|
|
||||||
*/
|
|
||||||
class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
|
|
||||||
{
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Instance of the definition object to use when inline. Usually stricter.
|
|
||||||
*/
|
|
||||||
var $inline;
|
|
||||||
/**
|
|
||||||
* Instance of the definition object to use when block.
|
|
||||||
*/
|
|
||||||
var $block;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param $inline List of elements to allow when inline.
|
|
||||||
* @param $block List of elements to allow when block.
|
|
||||||
*/
|
|
||||||
function HTMLPurifier_ChildDef_Chameleon($inline, $block) {
|
|
||||||
$this->inline = new HTMLPurifier_ChildDef_Optional($inline);
|
|
||||||
$this->block = new HTMLPurifier_ChildDef_Optional($block);
|
|
||||||
}
|
|
||||||
|
|
||||||
function validateChildren($tokens_of_children, $config, $context) {
|
|
||||||
switch ($context) {
|
|
||||||
case 'unknown':
|
|
||||||
case 'inline':
|
|
||||||
$result = $this->inline->validateChildren(
|
|
||||||
$tokens_of_children, $config, $context);
|
|
||||||
break;
|
|
||||||
case 'block':
|
|
||||||
$result = $this->block->validateChildren(
|
|
||||||
$tokens_of_children, $config, $context);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
trigger_error('Invalid context', E_USER_ERROR);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return $result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Definition for tables
|
|
||||||
*/
|
|
||||||
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
|
||||||
{
|
|
||||||
var $allow_empty = false;
|
|
||||||
var $type = 'table';
|
|
||||||
function HTMLPurifier_ChildDef_Table() {}
|
|
||||||
function validateChildren($tokens_of_children, $config, $context) {
|
|
||||||
if (empty($tokens_of_children)) return false;
|
|
||||||
|
|
||||||
// this ensures that the loop gets run one last time before closing
|
|
||||||
// up. It's a little bit of a hack, but it works! Just make sure you
|
|
||||||
// get rid of the token later.
|
|
||||||
$tokens_of_children[] = false;
|
|
||||||
|
|
||||||
// only one of these elements is allowed in a table
|
|
||||||
$caption = false;
|
|
||||||
$thead = false;
|
|
||||||
$tfoot = false;
|
|
||||||
|
|
||||||
// as many of these as you want
|
|
||||||
$cols = array();
|
|
||||||
$content = array();
|
|
||||||
|
|
||||||
$nesting = 0; // current depth so we can determine nodes
|
|
||||||
$is_collecting = false; // are we globbing together tokens to package
|
|
||||||
// into one of the collectors?
|
|
||||||
$collection = array(); // collected nodes
|
|
||||||
|
|
||||||
foreach ($tokens_of_children as $token) {
|
|
||||||
$is_child = ($nesting == 0);
|
|
||||||
|
|
||||||
if ($token === false) {
|
|
||||||
// terminating sequence started
|
|
||||||
} elseif ($token->type == 'start') {
|
|
||||||
$nesting++;
|
|
||||||
} elseif ($token->type == 'end') {
|
|
||||||
$nesting--;
|
|
||||||
}
|
|
||||||
|
|
||||||
// handle node collection
|
|
||||||
if ($is_collecting) {
|
|
||||||
if ($is_child) {
|
|
||||||
// okay, let's stash the tokens away
|
|
||||||
// first token tells us the type of the collection
|
|
||||||
switch ($collection[0]->name) {
|
|
||||||
case 'tr':
|
|
||||||
case 'tbody':
|
|
||||||
$content[] = $collection;
|
|
||||||
break;
|
|
||||||
case 'caption':
|
|
||||||
if ($caption !== false) break;
|
|
||||||
$caption = $collection;
|
|
||||||
break;
|
|
||||||
case 'thead':
|
|
||||||
case 'tfoot':
|
|
||||||
// access the appropriate variable, $thead or $tfoot
|
|
||||||
$var = $collection[0]->name;
|
|
||||||
if ($$var === false) {
|
|
||||||
$$var = $collection;
|
|
||||||
} else {
|
|
||||||
// transmutate the first and less entries into
|
|
||||||
// tbody tags, and then put into content
|
|
||||||
$collection[0]->name = 'tbody';
|
|
||||||
$collection[count($collection)-1]->name = 'tbody';
|
|
||||||
$content[] = $collection;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 'colgroup':
|
|
||||||
$cols[] = $collection;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
$collection = array();
|
|
||||||
$is_collecting = false;
|
|
||||||
} else {
|
|
||||||
// add the node to the collection
|
|
||||||
$collection[] = $token;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// terminate
|
|
||||||
if ($token === false) break;
|
|
||||||
|
|
||||||
if ($is_child) {
|
|
||||||
// determine what we're dealing with
|
|
||||||
if ($token->name == 'col') {
|
|
||||||
// the only empty tag in the possie, we can handle it
|
|
||||||
// immediately
|
|
||||||
$cols[] = array($token);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
switch($token->name) {
|
|
||||||
case 'caption':
|
|
||||||
case 'colgroup':
|
|
||||||
case 'thead':
|
|
||||||
case 'tfoot':
|
|
||||||
case 'tbody':
|
|
||||||
case 'tr':
|
|
||||||
$is_collecting = true;
|
|
||||||
$collection[] = $token;
|
|
||||||
continue;
|
|
||||||
default:
|
|
||||||
// unrecognized, drop silently
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (empty($content)) return false;
|
|
||||||
|
|
||||||
$ret = array();
|
|
||||||
if ($caption !== false) $ret = array_merge($ret, $caption);
|
|
||||||
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
|
|
||||||
if ($thead !== false) $ret = array_merge($ret, $thead);
|
|
||||||
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
|
|
||||||
foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
|
|
||||||
|
|
||||||
array_pop($tokens_of_children); // remove phantom token
|
|
||||||
|
|
||||||
return ($ret === $tokens_of_children) ? true : $ret;
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
?>
|
|
||||||
|
60
library/HTMLPurifier/ChildDef/Chameleon.php
Normal file
60
library/HTMLPurifier/ChildDef/Chameleon.php
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/ChildDef.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Definition that uses different definitions depending on context.
|
||||||
|
*
|
||||||
|
* The del and ins tags are notable because they allow different types of
|
||||||
|
* elements depending on whether or not they're in a block or inline context.
|
||||||
|
* Chameleon allows this behavior to happen by using two different
|
||||||
|
* definitions depending on context. While this somewhat generalized,
|
||||||
|
* it is specifically intended for those two tags.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instance of the definition object to use when inline. Usually stricter.
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $inline;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instance of the definition object to use when block.
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $block;
|
||||||
|
|
||||||
|
var $type = 'chameleon';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param $inline List of elements to allow when inline.
|
||||||
|
* @param $block List of elements to allow when block.
|
||||||
|
*/
|
||||||
|
function HTMLPurifier_ChildDef_Chameleon($inline, $block) {
|
||||||
|
$this->inline = new HTMLPurifier_ChildDef_Optional($inline);
|
||||||
|
$this->block = new HTMLPurifier_ChildDef_Optional($block);
|
||||||
|
}
|
||||||
|
|
||||||
|
function validateChildren($tokens_of_children, $config, &$context) {
|
||||||
|
$parent_type = $context->get('ParentType');
|
||||||
|
switch ($parent_type) {
|
||||||
|
case 'unknown':
|
||||||
|
case 'inline':
|
||||||
|
$result = $this->inline->validateChildren(
|
||||||
|
$tokens_of_children, $config, $context);
|
||||||
|
break;
|
||||||
|
case 'block':
|
||||||
|
$result = $this->block->validateChildren(
|
||||||
|
$tokens_of_children, $config, $context);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
trigger_error('Invalid context', E_USER_ERROR);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
75
library/HTMLPurifier/ChildDef/Custom.php
Normal file
75
library/HTMLPurifier/ChildDef/Custom.php
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/ChildDef.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Custom validation class, accepts DTD child definitions
|
||||||
|
*
|
||||||
|
* @warning Currently this class is an all or nothing proposition, that is,
|
||||||
|
* it will only give a bool return value.
|
||||||
|
* @note This class is currently not used by any code, although it is unit
|
||||||
|
* tested.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
|
||||||
|
{
|
||||||
|
var $type = 'custom';
|
||||||
|
var $allow_empty = false;
|
||||||
|
/**
|
||||||
|
* Allowed child pattern as defined by the DTD
|
||||||
|
*/
|
||||||
|
var $dtd_regex;
|
||||||
|
/**
|
||||||
|
* PCRE regex derived from $dtd_regex
|
||||||
|
* @private
|
||||||
|
*/
|
||||||
|
var $_pcre_regex;
|
||||||
|
/**
|
||||||
|
* @param $dtd_regex Allowed child pattern from the DTD
|
||||||
|
*/
|
||||||
|
function HTMLPurifier_ChildDef_Custom($dtd_regex) {
|
||||||
|
$this->dtd_regex = $dtd_regex;
|
||||||
|
$this->_compileRegex();
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
|
||||||
|
*/
|
||||||
|
function _compileRegex() {
|
||||||
|
$raw = str_replace(' ', '', $this->dtd_regex);
|
||||||
|
if ($raw{0} != '(') {
|
||||||
|
$raw = "($raw)";
|
||||||
|
}
|
||||||
|
$reg = str_replace(',', ',?', $raw);
|
||||||
|
$reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
|
||||||
|
$this->_pcre_regex = $reg;
|
||||||
|
}
|
||||||
|
function validateChildren($tokens_of_children, $config, &$context) {
|
||||||
|
$list_of_children = '';
|
||||||
|
$nesting = 0; // depth into the nest
|
||||||
|
foreach ($tokens_of_children as $token) {
|
||||||
|
if (!empty($token->is_whitespace)) continue;
|
||||||
|
|
||||||
|
$is_child = ($nesting == 0); // direct
|
||||||
|
|
||||||
|
if ($token->type == 'start') {
|
||||||
|
$nesting++;
|
||||||
|
} elseif ($token->type == 'end') {
|
||||||
|
$nesting--;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($is_child) {
|
||||||
|
$list_of_children .= $token->name . ',';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$list_of_children = rtrim($list_of_children, ',');
|
||||||
|
|
||||||
|
$okay =
|
||||||
|
preg_match(
|
||||||
|
'/^'.$this->_pcre_regex.'$/',
|
||||||
|
$list_of_children
|
||||||
|
);
|
||||||
|
|
||||||
|
return (bool) $okay;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
22
library/HTMLPurifier/ChildDef/Empty.php
Normal file
22
library/HTMLPurifier/ChildDef/Empty.php
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/ChildDef.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Definition that disallows all elements.
|
||||||
|
* @warning validateChildren() in this class is actually never called, because
|
||||||
|
* empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed
|
||||||
|
* before child definitions are parsed in earnest by
|
||||||
|
* HTMLPurifier_Strategy_FixNesting.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
|
||||||
|
{
|
||||||
|
var $allow_empty = true;
|
||||||
|
var $type = 'empty';
|
||||||
|
function HTMLPurifier_ChildDef_Empty() {}
|
||||||
|
function validateChildren($tokens_of_children, $config, &$context) {
|
||||||
|
return array();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
23
library/HTMLPurifier/ChildDef/Optional.php
Normal file
23
library/HTMLPurifier/ChildDef/Optional.php
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/ChildDef/Required.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Definition that allows a set of elements, and allows no children.
|
||||||
|
* @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required,
|
||||||
|
* really, one shouldn't inherit from the other. Only altered behavior
|
||||||
|
* is to overload a returned false with an array. Thus, it will never
|
||||||
|
* return false.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
|
||||||
|
{
|
||||||
|
var $allow_empty = true;
|
||||||
|
var $type = 'optional';
|
||||||
|
function validateChildren($tokens_of_children, $config, &$context) {
|
||||||
|
$result = parent::validateChildren($tokens_of_children, $config, $context);
|
||||||
|
if ($result === false) return array();
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
104
library/HTMLPurifier/ChildDef/Required.php
Normal file
104
library/HTMLPurifier/ChildDef/Required.php
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/ChildDef.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Definition that allows a set of elements, but disallows empty children.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Lookup table of allowed elements.
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $elements = array();
|
||||||
|
/**
|
||||||
|
* @param $elements List of allowed element names (lowercase).
|
||||||
|
*/
|
||||||
|
function HTMLPurifier_ChildDef_Required($elements) {
|
||||||
|
if (is_string($elements)) {
|
||||||
|
$elements = str_replace(' ', '', $elements);
|
||||||
|
$elements = explode('|', $elements);
|
||||||
|
}
|
||||||
|
$elements = array_flip($elements);
|
||||||
|
foreach ($elements as $i => $x) {
|
||||||
|
$elements[$i] = true;
|
||||||
|
if (empty($i)) unset($elements[$i]);
|
||||||
|
}
|
||||||
|
$this->elements = $elements;
|
||||||
|
$this->gen = new HTMLPurifier_Generator();
|
||||||
|
}
|
||||||
|
var $allow_empty = false;
|
||||||
|
var $type = 'required';
|
||||||
|
function validateChildren($tokens_of_children, $config, &$context) {
|
||||||
|
// if there are no tokens, delete parent node
|
||||||
|
if (empty($tokens_of_children)) return false;
|
||||||
|
|
||||||
|
// the new set of children
|
||||||
|
$result = array();
|
||||||
|
|
||||||
|
// current depth into the nest
|
||||||
|
$nesting = 0;
|
||||||
|
|
||||||
|
// whether or not we're deleting a node
|
||||||
|
$is_deleting = false;
|
||||||
|
|
||||||
|
// whether or not parsed character data is allowed
|
||||||
|
// this controls whether or not we silently drop a tag
|
||||||
|
// or generate escaped HTML from it
|
||||||
|
$pcdata_allowed = isset($this->elements['#PCDATA']);
|
||||||
|
|
||||||
|
// a little sanity check to make sure it's not ALL whitespace
|
||||||
|
$all_whitespace = true;
|
||||||
|
|
||||||
|
// some configuration
|
||||||
|
$escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren');
|
||||||
|
|
||||||
|
foreach ($tokens_of_children as $token) {
|
||||||
|
if (!empty($token->is_whitespace)) {
|
||||||
|
$result[] = $token;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$all_whitespace = false; // phew, we're not talking about whitespace
|
||||||
|
|
||||||
|
$is_child = ($nesting == 0);
|
||||||
|
|
||||||
|
if ($token->type == 'start') {
|
||||||
|
$nesting++;
|
||||||
|
} elseif ($token->type == 'end') {
|
||||||
|
$nesting--;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($is_child) {
|
||||||
|
$is_deleting = false;
|
||||||
|
if (!isset($this->elements[$token->name])) {
|
||||||
|
$is_deleting = true;
|
||||||
|
if ($pcdata_allowed && $token->type == 'text') {
|
||||||
|
$result[] = $token;
|
||||||
|
} elseif ($pcdata_allowed && $escape_invalid_children) {
|
||||||
|
$result[] = new HTMLPurifier_Token_Text(
|
||||||
|
$this->gen->generateFromToken($token, $config)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!$is_deleting || ($pcdata_allowed && $token->type == 'text')) {
|
||||||
|
$result[] = $token;
|
||||||
|
} elseif ($pcdata_allowed && $escape_invalid_children) {
|
||||||
|
$result[] =
|
||||||
|
new HTMLPurifier_Token_Text(
|
||||||
|
$this->gen->generateFromToken( $token, $config )
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
// drop silently
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (empty($result)) return false;
|
||||||
|
if ($all_whitespace) return false;
|
||||||
|
if ($tokens_of_children == $result) return true;
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
70
library/HTMLPurifier/ChildDef/StrictBlockquote.php
Normal file
70
library/HTMLPurifier/ChildDef/StrictBlockquote.php
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/ChildDef/Required.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes the contents of blockquote when in strict and reformats for validation.
|
||||||
|
*
|
||||||
|
* From XHTML 1.0 Transitional to Strict, there is a notable change where
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_ChildDef_StrictBlockquote
|
||||||
|
extends HTMLPurifier_ChildDef_Required
|
||||||
|
{
|
||||||
|
var $allow_empty = true;
|
||||||
|
var $type = 'strictblockquote';
|
||||||
|
var $init = false;
|
||||||
|
function HTMLPurifier_ChildDef_StrictBlockquote() {}
|
||||||
|
function validateChildren($tokens_of_children, $config, &$context) {
|
||||||
|
|
||||||
|
$def = $config->getHTMLDefinition();
|
||||||
|
if (!$this->init) {
|
||||||
|
// allow all inline elements
|
||||||
|
$this->elements = $def->info_flow_elements;
|
||||||
|
$this->elements['#PCDATA'] = true;
|
||||||
|
$this->init = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
$result = parent::validateChildren($tokens_of_children, $config, $context);
|
||||||
|
if ($result === false) return array();
|
||||||
|
if ($result === true) $result = $tokens_of_children;
|
||||||
|
|
||||||
|
$block_wrap_start = new HTMLPurifier_Token_Start($def->info_block_wrapper);
|
||||||
|
$block_wrap_end = new HTMLPurifier_Token_End( $def->info_block_wrapper);
|
||||||
|
$is_inline = false;
|
||||||
|
$depth = 0;
|
||||||
|
$ret = array();
|
||||||
|
|
||||||
|
// assuming that there are no comment tokens
|
||||||
|
foreach ($result as $i => $token) {
|
||||||
|
$token = $result[$i];
|
||||||
|
// ifs are nested for readability
|
||||||
|
if (!$is_inline) {
|
||||||
|
if (!$depth) {
|
||||||
|
if (($token->type == 'text') ||
|
||||||
|
($def->info[$token->name]->type == 'inline')) {
|
||||||
|
$is_inline = true;
|
||||||
|
$ret[] = $block_wrap_start;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (!$depth) {
|
||||||
|
// starting tokens have been inline text / empty
|
||||||
|
if ($token->type == 'start' || $token->type == 'empty') {
|
||||||
|
if ($def->info[$token->name]->type == 'block') {
|
||||||
|
// ended
|
||||||
|
$ret[] = $block_wrap_end;
|
||||||
|
$is_inline = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$ret[] = $token;
|
||||||
|
if ($token->type == 'start') $depth++;
|
||||||
|
if ($token->type == 'end') $depth--;
|
||||||
|
}
|
||||||
|
if ($is_inline) $ret[] = $block_wrap_end;
|
||||||
|
return $ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
142
library/HTMLPurifier/ChildDef/Table.php
Normal file
142
library/HTMLPurifier/ChildDef/Table.php
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/ChildDef.php';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Definition for tables
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||||
|
{
|
||||||
|
var $allow_empty = false;
|
||||||
|
var $type = 'table';
|
||||||
|
function HTMLPurifier_ChildDef_Table() {}
|
||||||
|
function validateChildren($tokens_of_children, $config, &$context) {
|
||||||
|
if (empty($tokens_of_children)) return false;
|
||||||
|
|
||||||
|
// this ensures that the loop gets run one last time before closing
|
||||||
|
// up. It's a little bit of a hack, but it works! Just make sure you
|
||||||
|
// get rid of the token later.
|
||||||
|
$tokens_of_children[] = false;
|
||||||
|
|
||||||
|
// only one of these elements is allowed in a table
|
||||||
|
$caption = false;
|
||||||
|
$thead = false;
|
||||||
|
$tfoot = false;
|
||||||
|
|
||||||
|
// as many of these as you want
|
||||||
|
$cols = array();
|
||||||
|
$content = array();
|
||||||
|
|
||||||
|
$nesting = 0; // current depth so we can determine nodes
|
||||||
|
$is_collecting = false; // are we globbing together tokens to package
|
||||||
|
// into one of the collectors?
|
||||||
|
$collection = array(); // collected nodes
|
||||||
|
$tag_index = 0; // the first node might be whitespace,
|
||||||
|
// so this tells us where the start tag is
|
||||||
|
|
||||||
|
foreach ($tokens_of_children as $token) {
|
||||||
|
$is_child = ($nesting == 0);
|
||||||
|
|
||||||
|
if ($token === false) {
|
||||||
|
// terminating sequence started
|
||||||
|
} elseif ($token->type == 'start') {
|
||||||
|
$nesting++;
|
||||||
|
} elseif ($token->type == 'end') {
|
||||||
|
$nesting--;
|
||||||
|
}
|
||||||
|
|
||||||
|
// handle node collection
|
||||||
|
if ($is_collecting) {
|
||||||
|
if ($is_child) {
|
||||||
|
// okay, let's stash the tokens away
|
||||||
|
// first token tells us the type of the collection
|
||||||
|
switch ($collection[$tag_index]->name) {
|
||||||
|
case 'tr':
|
||||||
|
case 'tbody':
|
||||||
|
$content[] = $collection;
|
||||||
|
break;
|
||||||
|
case 'caption':
|
||||||
|
if ($caption !== false) break;
|
||||||
|
$caption = $collection;
|
||||||
|
break;
|
||||||
|
case 'thead':
|
||||||
|
case 'tfoot':
|
||||||
|
// access the appropriate variable, $thead or $tfoot
|
||||||
|
$var = $collection[$tag_index]->name;
|
||||||
|
if ($$var === false) {
|
||||||
|
$$var = $collection;
|
||||||
|
} else {
|
||||||
|
// transmutate the first and less entries into
|
||||||
|
// tbody tags, and then put into content
|
||||||
|
$collection[$tag_index]->name = 'tbody';
|
||||||
|
$collection[count($collection)-1]->name = 'tbody';
|
||||||
|
$content[] = $collection;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'colgroup':
|
||||||
|
$cols[] = $collection;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
$collection = array();
|
||||||
|
$is_collecting = false;
|
||||||
|
$tag_index = 0;
|
||||||
|
} else {
|
||||||
|
// add the node to the collection
|
||||||
|
$collection[] = $token;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// terminate
|
||||||
|
if ($token === false) break;
|
||||||
|
|
||||||
|
if ($is_child) {
|
||||||
|
// determine what we're dealing with
|
||||||
|
if ($token->name == 'col') {
|
||||||
|
// the only empty tag in the possie, we can handle it
|
||||||
|
// immediately
|
||||||
|
$cols[] = array_merge($collection, array($token));
|
||||||
|
$collection = array();
|
||||||
|
$tag_index = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
switch($token->name) {
|
||||||
|
case 'caption':
|
||||||
|
case 'colgroup':
|
||||||
|
case 'thead':
|
||||||
|
case 'tfoot':
|
||||||
|
case 'tbody':
|
||||||
|
case 'tr':
|
||||||
|
$is_collecting = true;
|
||||||
|
$collection[] = $token;
|
||||||
|
continue;
|
||||||
|
default:
|
||||||
|
if ($token->type == 'text' && $token->is_whitespace) {
|
||||||
|
$collection[] = $token;
|
||||||
|
$tag_index++;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (empty($content)) return false;
|
||||||
|
|
||||||
|
$ret = array();
|
||||||
|
if ($caption !== false) $ret = array_merge($ret, $caption);
|
||||||
|
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
|
||||||
|
if ($thead !== false) $ret = array_merge($ret, $thead);
|
||||||
|
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
|
||||||
|
foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
|
||||||
|
if (!empty($collection) && $is_collecting == false){
|
||||||
|
// grab the trailing space
|
||||||
|
$ret = array_merge($ret, $collection);
|
||||||
|
}
|
||||||
|
|
||||||
|
array_pop($tokens_of_children); // remove phantom token
|
||||||
|
|
||||||
|
return ($ret === $tokens_of_children) ? true : $ret;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
@@ -26,12 +26,12 @@ class HTMLPurifier_Config
|
|||||||
var $def;
|
var $def;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instance of HTMLPurifier_HTMLDefinition
|
* Cached instance of HTMLPurifier_HTMLDefinition
|
||||||
*/
|
*/
|
||||||
var $html_definition;
|
var $html_definition;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instance of HTMLPurifier_CSSDefinition
|
* Cached instance of HTMLPurifier_CSSDefinition
|
||||||
*/
|
*/
|
||||||
var $css_definition;
|
var $css_definition;
|
||||||
|
|
||||||
@@ -60,7 +60,7 @@ class HTMLPurifier_Config
|
|||||||
* @param $key String key
|
* @param $key String key
|
||||||
*/
|
*/
|
||||||
function get($namespace, $key) {
|
function get($namespace, $key) {
|
||||||
if (!isset($this->conf[$namespace][$key])) {
|
if (!isset($this->def->info[$namespace][$key])) {
|
||||||
trigger_error('Cannot retrieve value of undefined directive',
|
trigger_error('Cannot retrieve value of undefined directive',
|
||||||
E_USER_WARNING);
|
E_USER_WARNING);
|
||||||
return;
|
return;
|
||||||
@@ -68,6 +68,19 @@ class HTMLPurifier_Config
|
|||||||
return $this->conf[$namespace][$key];
|
return $this->conf[$namespace][$key];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retreives an array of directives to values from a given namespace
|
||||||
|
* @param $namespace String namespace
|
||||||
|
*/
|
||||||
|
function getBatch($namespace) {
|
||||||
|
if (!isset($this->def->info[$namespace])) {
|
||||||
|
trigger_error('Cannot retrieve undefined namespace',
|
||||||
|
E_USER_WARNING);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
return $this->conf[$namespace];
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets a value to configuration.
|
* Sets a value to configuration.
|
||||||
* @param $namespace String namespace
|
* @param $namespace String namespace
|
||||||
@@ -75,13 +88,16 @@ class HTMLPurifier_Config
|
|||||||
* @param $value Mixed value
|
* @param $value Mixed value
|
||||||
*/
|
*/
|
||||||
function set($namespace, $key, $value) {
|
function set($namespace, $key, $value) {
|
||||||
if (!isset($this->conf[$namespace][$key])) {
|
if (!isset($this->def->info[$namespace][$key])) {
|
||||||
trigger_error('Cannot set undefined directive to value',
|
trigger_error('Cannot set undefined directive to value',
|
||||||
E_USER_WARNING);
|
E_USER_WARNING);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
$value = $this->def->validate($value,
|
$value = $this->def->validate(
|
||||||
$this->def->info[$namespace][$key]->type);
|
$value,
|
||||||
|
$this->def->info[$namespace][$key]->type,
|
||||||
|
$this->def->info[$namespace][$key]->allow_null
|
||||||
|
);
|
||||||
if (is_string($value)) {
|
if (is_string($value)) {
|
||||||
// resolve value alias if defined
|
// resolve value alias if defined
|
||||||
if (isset($this->def->info[$namespace][$key]->aliases[$value])) {
|
if (isset($this->def->info[$namespace][$key]->aliases[$value])) {
|
||||||
@@ -95,7 +111,7 @@ class HTMLPurifier_Config
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ($value === null) {
|
if ($this->def->isError($value)) {
|
||||||
trigger_error('Value is of invalid type', E_USER_WARNING);
|
trigger_error('Value is of invalid type', E_USER_WARNING);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -124,6 +140,28 @@ class HTMLPurifier_Config
|
|||||||
return $this->css_definition;
|
return $this->css_definition;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loads configuration values from an array with the following structure:
|
||||||
|
* Namespace.Directive => Value
|
||||||
|
* @param $config_array Configuration associative array
|
||||||
|
*/
|
||||||
|
function loadArray($config_array) {
|
||||||
|
foreach ($config_array as $key => $value) {
|
||||||
|
$key = str_replace('_', '.', $key);
|
||||||
|
if (strpos($key, '.') !== false) {
|
||||||
|
// condensed form
|
||||||
|
list($namespace, $directive) = explode('.', $key);
|
||||||
|
$this->set($namespace, $directive, $value);
|
||||||
|
} else {
|
||||||
|
$namespace = $key;
|
||||||
|
$namespace_values = $value;
|
||||||
|
foreach ($namespace_values as $directive => $value) {
|
||||||
|
$this->set($namespace, $directive, $value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@@ -1,8 +1,9 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/Error.php';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Configuration definition, defines directives and their defaults.
|
* Configuration definition, defines directives and their defaults.
|
||||||
* @todo Build documentation generation capabilities.
|
|
||||||
* @todo The ability to define things multiple times is confusing and should
|
* @todo The ability to define things multiple times is confusing and should
|
||||||
* be factored out to its own function named registerDependency() or
|
* be factored out to its own function named registerDependency() or
|
||||||
* addNote(), where only the namespace.name and an extra descriptions
|
* addNote(), where only the namespace.name and an extra descriptions
|
||||||
@@ -39,7 +40,6 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Lookup table of allowed types.
|
* Lookup table of allowed types.
|
||||||
* @todo Add descriptions
|
|
||||||
*/
|
*/
|
||||||
var $types = array(
|
var $types = array(
|
||||||
'string' => 'String',
|
'string' => 'String',
|
||||||
@@ -82,9 +82,6 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
/**
|
/**
|
||||||
* Defines a directive for configuration
|
* Defines a directive for configuration
|
||||||
* @warning Will fail of directive's namespace is defined
|
* @warning Will fail of directive's namespace is defined
|
||||||
* @todo Collect information on description and allow redefinition
|
|
||||||
* so that multiple files can register a dependency on a
|
|
||||||
* configuration directive.
|
|
||||||
* @param $namespace Namespace the directive is in
|
* @param $namespace Namespace the directive is in
|
||||||
* @param $name Key of directive
|
* @param $name Key of directive
|
||||||
* @param $default Default value of directive
|
* @param $default Default value of directive
|
||||||
@@ -116,12 +113,19 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
// process modifiers
|
||||||
|
$type_values = explode('/', $type, 2);
|
||||||
|
$type = $type_values[0];
|
||||||
|
$modifier = isset($type_values[1]) ? $type_values[1] : false;
|
||||||
|
$allow_null = ($modifier === 'null');
|
||||||
|
|
||||||
if (!isset($def->types[$type])) {
|
if (!isset($def->types[$type])) {
|
||||||
trigger_error('Invalid type for configuration directive',
|
trigger_error('Invalid type for configuration directive',
|
||||||
E_USER_ERROR);
|
E_USER_ERROR);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if ($def->validate($default, $type) === null) {
|
$default = $def->validate($default, $type, $allow_null);
|
||||||
|
if ($def->isError($default)) {
|
||||||
trigger_error('Default value does not match directive type',
|
trigger_error('Default value does not match directive type',
|
||||||
E_USER_ERROR);
|
E_USER_ERROR);
|
||||||
return;
|
return;
|
||||||
@@ -129,6 +133,7 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
$def->info[$namespace][$name] =
|
$def->info[$namespace][$name] =
|
||||||
new HTMLPurifier_ConfigEntity_Directive();
|
new HTMLPurifier_ConfigEntity_Directive();
|
||||||
$def->info[$namespace][$name]->type = $type;
|
$def->info[$namespace][$name]->type = $type;
|
||||||
|
$def->info[$namespace][$name]->allow_null = $allow_null;
|
||||||
$def->defaults[$namespace][$name] = $default;
|
$def->defaults[$namespace][$name] = $default;
|
||||||
}
|
}
|
||||||
$backtrace = debug_backtrace();
|
$backtrace = debug_backtrace();
|
||||||
@@ -217,36 +222,52 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
/**
|
/**
|
||||||
* Validate a variable according to type. Return null if invalid.
|
* Validate a variable according to type. Return null if invalid.
|
||||||
*/
|
*/
|
||||||
function validate($var, $type) {
|
function validate($var, $type, $allow_null = false) {
|
||||||
if (!isset($this->types[$type])) {
|
if (!isset($this->types[$type])) {
|
||||||
trigger_error('Invalid type', E_USER_ERROR);
|
trigger_error('Invalid type', E_USER_ERROR);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if ($allow_null && $var === null) return null;
|
||||||
switch ($type) {
|
switch ($type) {
|
||||||
case 'mixed':
|
case 'mixed':
|
||||||
return $var;
|
return $var;
|
||||||
case 'istring':
|
case 'istring':
|
||||||
case 'string':
|
case 'string':
|
||||||
if (!is_string($var)) return;
|
if (!is_string($var)) break;
|
||||||
if ($type === 'istring') $var = strtolower($var);
|
if ($type === 'istring') $var = strtolower($var);
|
||||||
return $var;
|
return $var;
|
||||||
case 'int':
|
case 'int':
|
||||||
if (is_string($var) && ctype_digit($var)) $var = (int) $var;
|
if (is_string($var) && ctype_digit($var)) $var = (int) $var;
|
||||||
elseif (!is_int($var)) return;
|
elseif (!is_int($var)) break;
|
||||||
return $var;
|
return $var;
|
||||||
case 'float':
|
case 'float':
|
||||||
if (is_string($var) && is_numeric($var)) $var = (float) $var;
|
if (is_string($var) && is_numeric($var)) $var = (float) $var;
|
||||||
elseif (!is_float($var)) return;
|
elseif (!is_float($var)) break;
|
||||||
return $var;
|
return $var;
|
||||||
case 'bool':
|
case 'bool':
|
||||||
if (is_int($var) && ($var === 0 || $var === 1)) {
|
if (is_int($var) && ($var === 0 || $var === 1)) {
|
||||||
$var = (bool) $var;
|
$var = (bool) $var;
|
||||||
} elseif (!is_bool($var)) return;
|
} elseif (is_string($var)) {
|
||||||
|
if ($var == 'on' || $var == 'true' || $var == '1') {
|
||||||
|
$var = true;
|
||||||
|
} elseif ($var == 'off' || $var == 'false' || $var == '0') {
|
||||||
|
$var = false;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} elseif (!is_bool($var)) break;
|
||||||
return $var;
|
return $var;
|
||||||
case 'list':
|
case 'list':
|
||||||
case 'hash':
|
case 'hash':
|
||||||
case 'lookup':
|
case 'lookup':
|
||||||
if (!is_array($var)) return;
|
if (is_string($var)) {
|
||||||
|
// simplistic string to array method that only works
|
||||||
|
// for simple lists of tag names or alphanumeric characters
|
||||||
|
$var = explode(',',$var);
|
||||||
|
// remove spaces
|
||||||
|
foreach ($var as $i => $j) $var[$i] = trim($j);
|
||||||
|
}
|
||||||
|
if (!is_array($var)) break;
|
||||||
$keys = array_keys($var);
|
$keys = array_keys($var);
|
||||||
if ($keys === array_keys($keys)) {
|
if ($keys === array_keys($keys)) {
|
||||||
if ($type == 'list') return $var;
|
if ($type == 'list') return $var;
|
||||||
@@ -256,7 +277,7 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
$new[$key] = true;
|
$new[$key] = true;
|
||||||
}
|
}
|
||||||
return $new;
|
return $new;
|
||||||
} else return;
|
} else break;
|
||||||
}
|
}
|
||||||
if ($type === 'lookup') {
|
if ($type === 'lookup') {
|
||||||
foreach ($var as $key => $value) {
|
foreach ($var as $key => $value) {
|
||||||
@@ -265,8 +286,13 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
}
|
}
|
||||||
return $var;
|
return $var;
|
||||||
}
|
}
|
||||||
|
$error = new HTMLPurifier_Error();
|
||||||
|
return $error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes an absolute path and munges it into a more manageable relative path
|
||||||
|
*/
|
||||||
function mungeFilename($filename) {
|
function mungeFilename($filename) {
|
||||||
$offset = strrpos($filename, 'HTMLPurifier');
|
$offset = strrpos($filename, 'HTMLPurifier');
|
||||||
$filename = substr($filename, $offset);
|
$filename = substr($filename, $offset);
|
||||||
@@ -274,6 +300,14 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
return $filename;
|
return $filename;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if var is an HTMLPurifier_Error object
|
||||||
|
*/
|
||||||
|
function isError($var) {
|
||||||
|
if (!is_object($var)) return false;
|
||||||
|
if (!is_a($var, 'HTMLPurifier_Error')) return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -323,6 +357,13 @@ class HTMLPurifier_ConfigEntity_Directive extends HTMLPurifier_ConfigEntity
|
|||||||
* - mixed (anything goes)
|
* - mixed (anything goes)
|
||||||
*/
|
*/
|
||||||
var $type = 'mixed';
|
var $type = 'mixed';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is null allowed? Has no affect for mixed type.
|
||||||
|
* @bool
|
||||||
|
*/
|
||||||
|
var $allow_null = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Plaintext descriptions of the configuration entity is. Organized by
|
* Plaintext descriptions of the configuration entity is. Organized by
|
||||||
* file and line number, so multiple descriptions are allowed.
|
* file and line number, so multiple descriptions are allowed.
|
||||||
|
76
library/HTMLPurifier/Context.php
Normal file
76
library/HTMLPurifier/Context.php
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Registry object that contains information about the current context.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_Context
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Private array that stores the references.
|
||||||
|
* @private
|
||||||
|
*/
|
||||||
|
var $_storage = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Registers a variable into the context.
|
||||||
|
* @param $name String name
|
||||||
|
* @param $ref Variable to be registered
|
||||||
|
*/
|
||||||
|
function register($name, &$ref) {
|
||||||
|
if (isset($this->_storage[$name])) {
|
||||||
|
trigger_error('Name collision, cannot re-register',
|
||||||
|
E_USER_ERROR);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
$this->_storage[$name] =& $ref;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves a variable reference from the context.
|
||||||
|
* @param $name String name
|
||||||
|
*/
|
||||||
|
function &get($name) {
|
||||||
|
if (!isset($this->_storage[$name])) {
|
||||||
|
trigger_error('Attempted to retrieve non-existent variable',
|
||||||
|
E_USER_ERROR);
|
||||||
|
$var = null; // so we can return by reference
|
||||||
|
return $var;
|
||||||
|
}
|
||||||
|
return $this->_storage[$name];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Destorys a variable in the context.
|
||||||
|
* @param $name String name
|
||||||
|
*/
|
||||||
|
function destroy($name) {
|
||||||
|
if (!isset($this->_storage[$name])) {
|
||||||
|
trigger_error('Attempted to destroy non-existent variable',
|
||||||
|
E_USER_ERROR);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
unset($this->_storage[$name]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks whether or not the variable exists.
|
||||||
|
* @param $name String name
|
||||||
|
*/
|
||||||
|
function exists($name) {
|
||||||
|
return isset($this->_storage[$name]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loads a series of variables from an associative array
|
||||||
|
* @param $context_array Assoc array of variables to load
|
||||||
|
*/
|
||||||
|
function loadArray(&$context_array) {
|
||||||
|
foreach ($context_array as $key => $discard) {
|
||||||
|
$this->register($key, $context_array[$key]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
@@ -88,7 +88,7 @@ class HTMLPurifier_Encoder
|
|||||||
if ($iconv && !$force_php) {
|
if ($iconv && !$force_php) {
|
||||||
// do the shortcut way
|
// do the shortcut way
|
||||||
$str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
|
$str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
|
||||||
return strtr($str, $non_sgml_chars);;
|
return strtr($str, $non_sgml_chars);
|
||||||
}
|
}
|
||||||
|
|
||||||
$mState = 0; // cached expected number of octets after the current octet
|
$mState = 0; // cached expected number of octets after the current octet
|
||||||
@@ -225,7 +225,30 @@ class HTMLPurifier_Encoder
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Translates a Unicode codepoint into its corresponding UTF-8 character.
|
* Translates a Unicode codepoint into its corresponding UTF-8 character.
|
||||||
|
* @note Based on Feyd's function at
|
||||||
|
* <http://forums.devnetwork.net/viewtopic.php?p=191404#191404>,
|
||||||
|
* which is in public domain.
|
||||||
|
* @note While we're going to do code point parsing anyway, a good
|
||||||
|
* optimization would be to refuse to translate code points that
|
||||||
|
* are non-SGML characters. However, this could lead to duplication.
|
||||||
|
* @note This is very similar to the unichr function in
|
||||||
|
* maintenance/generate-entity-file.php (although this is superior,
|
||||||
|
* due to its sanity checks).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// +----------+----------+----------+----------+
|
||||||
|
// | 33222222 | 22221111 | 111111 | |
|
||||||
|
// | 10987654 | 32109876 | 54321098 | 76543210 | bit
|
||||||
|
// +----------+----------+----------+----------+
|
||||||
|
// | | | | 0xxxxxxx | 1 byte 0x00000000..0x0000007F
|
||||||
|
// | | | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF
|
||||||
|
// | | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF
|
||||||
|
// | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF
|
||||||
|
// +----------+----------+----------+----------+
|
||||||
|
// | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF)
|
||||||
|
// | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes
|
||||||
|
// +----------+----------+----------+----------+
|
||||||
|
|
||||||
function unichr($code) {
|
function unichr($code) {
|
||||||
if($code > 1114111 or $code < 0 or
|
if($code > 1114111 or $code < 0 or
|
||||||
($code >= 55296 and $code <= 57343) ) {
|
($code >= 55296 and $code <= 57343) ) {
|
||||||
@@ -266,7 +289,7 @@ class HTMLPurifier_Encoder
|
|||||||
/**
|
/**
|
||||||
* Converts a string to UTF-8 based on configuration.
|
* Converts a string to UTF-8 based on configuration.
|
||||||
*/
|
*/
|
||||||
function convertToUTF8($str, $config) {
|
function convertToUTF8($str, $config, &$context) {
|
||||||
static $iconv = null;
|
static $iconv = null;
|
||||||
if ($iconv === null) $iconv = function_exists('iconv');
|
if ($iconv === null) $iconv = function_exists('iconv');
|
||||||
$encoding = $config->get('Core', 'Encoding');
|
$encoding = $config->get('Core', 'Encoding');
|
||||||
@@ -283,7 +306,7 @@ class HTMLPurifier_Encoder
|
|||||||
* @note Currently, this is a lossy conversion, with unexpressable
|
* @note Currently, this is a lossy conversion, with unexpressable
|
||||||
* characters being omitted.
|
* characters being omitted.
|
||||||
*/
|
*/
|
||||||
function convertFromUTF8($str, $config) {
|
function convertFromUTF8($str, $config, &$context) {
|
||||||
static $iconv = null;
|
static $iconv = null;
|
||||||
if ($iconv === null) $iconv = function_exists('iconv');
|
if ($iconv === null) $iconv = function_exists('iconv');
|
||||||
$encoding = $config->get('Core', 'Encoding');
|
$encoding = $config->get('Core', 'Encoding');
|
||||||
|
@@ -19,7 +19,7 @@ class HTMLPurifier_EntityLookup {
|
|||||||
*/
|
*/
|
||||||
function setup($file = false) {
|
function setup($file = false) {
|
||||||
if (!$file) {
|
if (!$file) {
|
||||||
$file = dirname(__FILE__) . '/EntityLookup/data.txt';
|
$file = dirname(__FILE__) . '/EntityLookup/entities.ser';
|
||||||
}
|
}
|
||||||
$this->table = unserialize(file_get_contents($file));
|
$this->table = unserialize(file_get_contents($file));
|
||||||
}
|
}
|
||||||
|
@@ -3,6 +3,10 @@
|
|||||||
require_once 'HTMLPurifier/EntityLookup.php';
|
require_once 'HTMLPurifier/EntityLookup.php';
|
||||||
require_once 'HTMLPurifier/Encoder.php';
|
require_once 'HTMLPurifier/Encoder.php';
|
||||||
|
|
||||||
|
// if want to implement error collecting here, we'll need to use some sort
|
||||||
|
// of global data (probably trigger_error) because it's impossible to pass
|
||||||
|
// $config or $context to the callback functions.
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Handles referencing and derefencing character entities
|
* Handles referencing and derefencing character entities
|
||||||
*/
|
*/
|
||||||
@@ -72,38 +76,12 @@ class HTMLPurifier_EntityParser
|
|||||||
*
|
*
|
||||||
* @warning Though this is public in order to let the callback happen,
|
* @warning Though this is public in order to let the callback happen,
|
||||||
* calling it directly is not recommended.
|
* calling it directly is not recommended.
|
||||||
* @note Based on Feyd's function at
|
|
||||||
* <http://forums.devnetwork.net/viewtopic.php?p=191404#191404>,
|
|
||||||
* which is in public domain.
|
|
||||||
* @note While we're going to do code point parsing anyway, a good
|
|
||||||
* optimization would be to refuse to translate code points that
|
|
||||||
* are non-SGML characters. However, this could lead to duplication.
|
|
||||||
* @note This function is heavily intimate with the inner workings of
|
|
||||||
* UTF-8 and would also be well suited in the Encoder class (or at
|
|
||||||
* least deferring some processing to it). This is also very
|
|
||||||
* similar to the unichr function in
|
|
||||||
* maintenance/generate-entity-file.php (although this is superior,
|
|
||||||
* due to its sanity checks).
|
|
||||||
* @param $matches PCRE matches array, with 0 the entire match, and
|
* @param $matches PCRE matches array, with 0 the entire match, and
|
||||||
* either index 1, 2 or 3 set with a hex value, dec value,
|
* either index 1, 2 or 3 set with a hex value, dec value,
|
||||||
* or string (respectively).
|
* or string (respectively).
|
||||||
* @returns Replacement string.
|
* @returns Replacement string.
|
||||||
* @todo Implement string translations
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// +----------+----------+----------+----------+
|
|
||||||
// | 33222222 | 22221111 | 111111 | |
|
|
||||||
// | 10987654 | 32109876 | 54321098 | 76543210 | bit
|
|
||||||
// +----------+----------+----------+----------+
|
|
||||||
// | | | | 0xxxxxxx | 1 byte 0x00000000..0x0000007F
|
|
||||||
// | | | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF
|
|
||||||
// | | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF
|
|
||||||
// | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF
|
|
||||||
// +----------+----------+----------+----------+
|
|
||||||
// | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF)
|
|
||||||
// | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes
|
|
||||||
// +----------+----------+----------+----------+
|
|
||||||
|
|
||||||
function nonSpecialEntityCallback($matches) {
|
function nonSpecialEntityCallback($matches) {
|
||||||
// replaces all but big five
|
// replaces all but big five
|
||||||
$entity = $matches[0];
|
$entity = $matches[0];
|
||||||
|
8
library/HTMLPurifier/Error.php
Normal file
8
library/HTMLPurifier/Error.php
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return object from functions that signifies error when null doesn't cut it
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_Error {}
|
||||||
|
|
||||||
|
?>
|
@@ -1,7 +1,5 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
// pretty-printing with indentation would be pretty cool
|
|
||||||
|
|
||||||
require_once 'HTMLPurifier/Lexer.php';
|
require_once 'HTMLPurifier/Lexer.php';
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
@@ -23,6 +21,21 @@ HTMLPurifier_ConfigSchema::define(
|
|||||||
'This directive was available since 1.1.'
|
'This directive was available since 1.1.'
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// extension constraints could be factored into ConfigSchema
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'Core', 'TidyFormat', false, 'bool',
|
||||||
|
'<p>Determines whether or not to run Tidy on the final output for pretty '.
|
||||||
|
'formatting reasons, such as indentation and wrap.</p><p>This can greatly '.
|
||||||
|
'improve readability for editors who are hand-editing the HTML, but is '.
|
||||||
|
'by no means necessary as HTML Purifier has already fixed all major '.
|
||||||
|
'errors the HTML may have had. Tidy is a non-default extension, and this directive '.
|
||||||
|
'will silently fail if Tidy is not available.</p><p>If you are looking to make '.
|
||||||
|
'the overall look of your page\'s source better, I recommend running Tidy '.
|
||||||
|
'on the entire page rather than just user-content (after all, the '.
|
||||||
|
'indentation relative to the containing blocks will be incorrect).</p><p>This '.
|
||||||
|
'directive was available since 1.1.1.</p>'
|
||||||
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates HTML from tokens.
|
* Generates HTML from tokens.
|
||||||
*/
|
*/
|
||||||
@@ -37,6 +50,7 @@ class HTMLPurifier_Generator
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Bool cache of %Core.XHTML
|
* Bool cache of %Core.XHTML
|
||||||
|
* @private
|
||||||
*/
|
*/
|
||||||
var $_xhtml = true;
|
var $_xhtml = true;
|
||||||
|
|
||||||
@@ -45,9 +59,8 @@ class HTMLPurifier_Generator
|
|||||||
* @param $tokens Array of HTMLPurifier_Token
|
* @param $tokens Array of HTMLPurifier_Token
|
||||||
* @param $config HTMLPurifier_Config object
|
* @param $config HTMLPurifier_Config object
|
||||||
* @return Generated HTML
|
* @return Generated HTML
|
||||||
* @note Only unit tests may omit configuration: internals MUST pass config
|
|
||||||
*/
|
*/
|
||||||
function generateFromTokens($tokens, $config = null) {
|
function generateFromTokens($tokens, $config, &$context) {
|
||||||
$html = '';
|
$html = '';
|
||||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||||
$this->_clean_utf8 = $config->get('Core', 'CleanUTF8DuringGeneration');
|
$this->_clean_utf8 = $config->get('Core', 'CleanUTF8DuringGeneration');
|
||||||
@@ -56,6 +69,30 @@ class HTMLPurifier_Generator
|
|||||||
foreach ($tokens as $token) {
|
foreach ($tokens as $token) {
|
||||||
$html .= $this->generateFromToken($token);
|
$html .= $this->generateFromToken($token);
|
||||||
}
|
}
|
||||||
|
if ($config->get('Core', 'TidyFormat') && extension_loaded('tidy')) {
|
||||||
|
|
||||||
|
$tidy_options = array(
|
||||||
|
'indent'=> true,
|
||||||
|
'output-xhtml' => $this->_xhtml,
|
||||||
|
'show-body-only' => true,
|
||||||
|
'indent-spaces' => 2,
|
||||||
|
'wrap' => 68,
|
||||||
|
);
|
||||||
|
if (version_compare(PHP_VERSION, '5', '<')) {
|
||||||
|
tidy_set_encoding('utf8');
|
||||||
|
foreach ($tidy_options as $key => $value) {
|
||||||
|
tidy_setopt($key, $value);
|
||||||
|
}
|
||||||
|
tidy_parse_string($html);
|
||||||
|
tidy_clean_repair();
|
||||||
|
$html = tidy_get_output();
|
||||||
|
} else {
|
||||||
|
$tidy = new Tidy;
|
||||||
|
$tidy->parseString($html, $tidy_options, 'utf8');
|
||||||
|
$tidy->cleanRepair();
|
||||||
|
$html = (string) $tidy;
|
||||||
|
}
|
||||||
|
}
|
||||||
return $html;
|
return $html;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -67,14 +104,14 @@ class HTMLPurifier_Generator
|
|||||||
function generateFromToken($token) {
|
function generateFromToken($token) {
|
||||||
if (!isset($token->type)) return '';
|
if (!isset($token->type)) return '';
|
||||||
if ($token->type == 'start') {
|
if ($token->type == 'start') {
|
||||||
$attr = $this->generateAttributes($token->attributes);
|
$attr = $this->generateAttributes($token->attr);
|
||||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
|
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
|
||||||
|
|
||||||
} elseif ($token->type == 'end') {
|
} elseif ($token->type == 'end') {
|
||||||
return '</' . $token->name . '>';
|
return '</' . $token->name . '>';
|
||||||
|
|
||||||
} elseif ($token->type == 'empty') {
|
} elseif ($token->type == 'empty') {
|
||||||
$attr = $this->generateAttributes($token->attributes);
|
$attr = $this->generateAttributes($token->attr);
|
||||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr .
|
return '<' . $token->name . ($attr ? ' ' : '') . $attr .
|
||||||
( $this->_xhtml ? ' /': '' )
|
( $this->_xhtml ? ' /': '' )
|
||||||
. '>';
|
. '>';
|
||||||
|
@@ -18,10 +18,86 @@ require_once 'HTMLPurifier/AttrTransform.php';
|
|||||||
require_once 'HTMLPurifier/AttrTransform/BdoDir.php';
|
require_once 'HTMLPurifier/AttrTransform/BdoDir.php';
|
||||||
require_once 'HTMLPurifier/AttrTransform/ImgRequired.php';
|
require_once 'HTMLPurifier/AttrTransform/ImgRequired.php';
|
||||||
require_once 'HTMLPurifier/ChildDef.php';
|
require_once 'HTMLPurifier/ChildDef.php';
|
||||||
|
require_once 'HTMLPurifier/ChildDef/Chameleon.php';
|
||||||
|
require_once 'HTMLPurifier/ChildDef/Empty.php';
|
||||||
|
require_once 'HTMLPurifier/ChildDef/Required.php';
|
||||||
|
require_once 'HTMLPurifier/ChildDef/Optional.php';
|
||||||
|
require_once 'HTMLPurifier/ChildDef/Table.php';
|
||||||
|
require_once 'HTMLPurifier/ChildDef/StrictBlockquote.php';
|
||||||
require_once 'HTMLPurifier/Generator.php';
|
require_once 'HTMLPurifier/Generator.php';
|
||||||
require_once 'HTMLPurifier/Token.php';
|
require_once 'HTMLPurifier/Token.php';
|
||||||
require_once 'HTMLPurifier/TagTransform.php';
|
require_once 'HTMLPurifier/TagTransform.php';
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'HTML', 'EnableAttrID', false, 'bool',
|
||||||
|
'Allows the ID attribute in HTML. This is disabled by default '.
|
||||||
|
'due to the fact that without proper configuration user input can '.
|
||||||
|
'easily break the validation of a webpage by specifying an ID that is '.
|
||||||
|
'already on the surrounding HTML. If you don\'t mind throwing caution to '.
|
||||||
|
'the wind, enable this directive, but I strongly recommend you also '.
|
||||||
|
'consider blacklisting IDs you use (%Attr.IDBlacklist) or prefixing all '.
|
||||||
|
'user supplied IDs (%Attr.IDPrefix). This directive has been available '.
|
||||||
|
'since 1.2.0, and when set to true reverts to the behavior of pre-1.2.0 '.
|
||||||
|
'versions.'
|
||||||
|
);
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'HTML', 'Strict', false, 'bool',
|
||||||
|
'Determines whether or not to use Transitional (loose) or Strict rulesets. '.
|
||||||
|
'This directive has been available since 1.3.0.'
|
||||||
|
);
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'HTML', 'BlockWrapper', 'p', 'string',
|
||||||
|
'String name of element to wrap inline elements that are inside a block '.
|
||||||
|
'context. This only occurs in the children of blockquote in strict mode. '.
|
||||||
|
'Example: by default value, <code><blockquote>Foo</blockquote></code> '.
|
||||||
|
'would become <code><blockquote><p>Foo</p></blockquote></code>. The '.
|
||||||
|
'<code><p></code> tags can be replaced '.
|
||||||
|
'with whatever you desire, as long as it is a block level element. '.
|
||||||
|
'This directive has been available since 1.3.0.'
|
||||||
|
);
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'HTML', 'Parent', 'div', 'string',
|
||||||
|
'String name of element that HTML fragment passed to library will be '.
|
||||||
|
'inserted in. An interesting variation would be using span as the '.
|
||||||
|
'parent element, meaning that only inline tags would be allowed. '.
|
||||||
|
'This directive has been available since 1.3.0.'
|
||||||
|
);
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'HTML', 'AllowedElements', null, 'lookup/null',
|
||||||
|
'If HTML Purifier\'s tag set is unsatisfactory for your needs, you '.
|
||||||
|
'can overload it with your own list of tags to allow. Note that this '.
|
||||||
|
'method is subtractive: it does its job by taking away from HTML Purifier '.
|
||||||
|
'usual feature set, so you cannot add a tag that HTML Purifier never '.
|
||||||
|
'supported in the first place (like embed, form or head). If you change this, you '.
|
||||||
|
'probably also want to change %HTML.AllowedAttributes. '.
|
||||||
|
'<strong>Warning:</strong> If another directive conflicts with the '.
|
||||||
|
'elements here, <em>that</em> directive will win and override. '.
|
||||||
|
'This directive has been available since 1.3.0.'
|
||||||
|
);
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'HTML', 'AllowedAttributes', null, 'lookup/null',
|
||||||
|
'IF HTML Purifier\'s attribute set is unsatisfactory, overload it! '.
|
||||||
|
'The syntax is \'tag.attr\' or \'*.attr\' for the global attributes '.
|
||||||
|
'(style, id, class, dir, lang, xml:lang).'.
|
||||||
|
'<strong>Warning:</strong> If another directive conflicts with the '.
|
||||||
|
'elements here, <em>that</em> directive will win and override. For '.
|
||||||
|
'example, %HTML.EnableAttrID will take precedence over *.id in this '.
|
||||||
|
'directive. You must set that directive to true before you can use '.
|
||||||
|
'IDs at all. This directive has been available since 1.3.0.'
|
||||||
|
);
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'Attr', 'DisableURI', false, 'bool',
|
||||||
|
'Disables all URIs in all forms. Not sure why you\'d want to do that '.
|
||||||
|
'(after all, the Internet\'s founded on the notion of a hyperlink). '.
|
||||||
|
'This directive has been available since 1.3.0.'
|
||||||
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Defines the purified HTML type with large amounts of objects.
|
* Defines the purified HTML type with large amounts of objects.
|
||||||
*
|
*
|
||||||
@@ -60,6 +136,20 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
*/
|
*/
|
||||||
var $info_parent = 'div';
|
var $info_parent = 'div';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Definition for parent element, allows parent element to be a
|
||||||
|
* tag that's not allowed inside the HTML fragment.
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $info_parent_def;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* String name of element used to wrap inline elements in block context
|
||||||
|
* @note This is rarely used except for BLOCKQUOTEs in strict mode
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $info_block_wrapper = 'p';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Associative array of deprecated tag name to HTMLPurifier_TagTransform
|
* Associative array of deprecated tag name to HTMLPurifier_TagTransform
|
||||||
* @public
|
* @public
|
||||||
@@ -78,14 +168,25 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
*/
|
*/
|
||||||
var $info_attr_transform_post = array();
|
var $info_attr_transform_post = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lookup table of flow elements
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $info_flow_elements = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Boolean is a strict definition?
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $strict;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initializes the definition, the meat of the class.
|
* Initializes the definition, the meat of the class.
|
||||||
*/
|
*/
|
||||||
function setup($config) {
|
function setup($config) {
|
||||||
|
|
||||||
// emulates the structure of the DTD
|
// some cached config values
|
||||||
// these are condensed, however, with bad stuff taken out
|
$this->strict = $config->get('HTML', 'Strict');
|
||||||
// screening process was done by hand
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// info[] : initializes the definition objects
|
// info[] : initializes the definition objects
|
||||||
@@ -97,13 +198,19 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
array(
|
array(
|
||||||
'ins', 'del', 'blockquote', 'dd', 'li', 'div', 'em', 'strong',
|
'ins', 'del', 'blockquote', 'dd', 'li', 'div', 'em', 'strong',
|
||||||
'dfn', 'code', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym',
|
'dfn', 'code', 'samp', 'kbd', 'var', 'cite', 'abbr', 'acronym',
|
||||||
'q', 'sub', 'tt', 'sup', 'i', 'b', 'big', 'small', 'u', 's',
|
'q', 'sub', 'tt', 'sup', 'i', 'b', 'big', 'small',
|
||||||
'strike', 'bdo', 'span', 'dt', 'p', 'h1', 'h2', 'h3', 'h4',
|
'bdo', 'span', 'dt', 'p', 'h1', 'h2', 'h3', 'h4',
|
||||||
'h5', 'h6', 'ol', 'ul', 'dl', 'address', 'img', 'br', 'hr',
|
'h5', 'h6', 'ol', 'ul', 'dl', 'address', 'img', 'br', 'hr',
|
||||||
'pre', 'a', 'table', 'caption', 'thead', 'tfoot', 'tbody',
|
'pre', 'a', 'table', 'caption', 'thead', 'tfoot', 'tbody',
|
||||||
'colgroup', 'col', 'td', 'th', 'tr'
|
'colgroup', 'col', 'td', 'th', 'tr'
|
||||||
);
|
);
|
||||||
|
|
||||||
|
if (!$this->strict) {
|
||||||
|
$allowed_tags[] = 'u';
|
||||||
|
$allowed_tags[] = 's';
|
||||||
|
$allowed_tags[] = 'strike';
|
||||||
|
}
|
||||||
|
|
||||||
foreach ($allowed_tags as $tag) {
|
foreach ($allowed_tags as $tag) {
|
||||||
$this->info[$tag] = new HTMLPurifier_ElementDef();
|
$this->info[$tag] = new HTMLPurifier_ElementDef();
|
||||||
}
|
}
|
||||||
@@ -111,12 +218,23 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// info[]->child : defines allowed children for elements
|
// info[]->child : defines allowed children for elements
|
||||||
|
|
||||||
// entities: prefixed with e_ and _ replaces .
|
// emulates the structure of the DTD
|
||||||
|
// however, these are condensed, with bad stuff taken out
|
||||||
|
// screening process was done by hand
|
||||||
|
|
||||||
|
// entities: prefixed with e_ and _ replaces . from DTD
|
||||||
|
// double underlines are entities we made up
|
||||||
|
|
||||||
// we don't use an array because that complicates interpolation
|
// we don't use an array because that complicates interpolation
|
||||||
// strings are used instead of arrays because if you use arrays,
|
// strings are used instead of arrays because if you use arrays,
|
||||||
// you have to do some hideous manipulation with array_merge()
|
// you have to do some hideous manipulation with array_merge()
|
||||||
|
|
||||||
|
// todo: determine whether or not having allowed children
|
||||||
|
// that aren't allowed globally affects security (it shouldn't)
|
||||||
|
// if above works out, extend children definitions to include all
|
||||||
|
// possible elements (allowed elements will dictate which ones
|
||||||
|
// get dropped
|
||||||
|
|
||||||
$e_special_extra = 'img';
|
$e_special_extra = 'img';
|
||||||
$e_special_basic = 'br | span | bdo';
|
$e_special_basic = 'br | span | bdo';
|
||||||
$e_special = "$e_special_basic | $e_special_extra";
|
$e_special = "$e_special_basic | $e_special_extra";
|
||||||
@@ -127,11 +245,9 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
$e_phrase_basic = 'em | strong | dfn | code | q | samp | kbd | var'.
|
$e_phrase_basic = 'em | strong | dfn | code | q | samp | kbd | var'.
|
||||||
' | cite | abbr | acronym';
|
' | cite | abbr | acronym';
|
||||||
$e_phrase = "$e_phrase_basic | $e_phrase_extra";
|
$e_phrase = "$e_phrase_basic | $e_phrase_extra";
|
||||||
$e_inline_forms = ''; // humor the dtd
|
|
||||||
$e_misc_inline = 'ins | del';
|
$e_misc_inline = 'ins | del';
|
||||||
$e_misc = "$e_misc_inline";
|
$e_misc = "$e_misc_inline";
|
||||||
$e_inline = "a | $e_special | $e_fontstyle | $e_phrase".
|
$e_inline = "a | $e_special | $e_fontstyle | $e_phrase";
|
||||||
" | $e_inline_forms";
|
|
||||||
// pseudo-property we created for convenience, see later on
|
// pseudo-property we created for convenience, see later on
|
||||||
$e__inline = "#PCDATA | $e_inline | $e_misc_inline";
|
$e__inline = "#PCDATA | $e_inline | $e_misc_inline";
|
||||||
// note the casing
|
// note the casing
|
||||||
@@ -140,24 +256,31 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
$e_lists = 'ul | ol | dl';
|
$e_lists = 'ul | ol | dl';
|
||||||
$e_blocktext = 'pre | hr | blockquote | address';
|
$e_blocktext = 'pre | hr | blockquote | address';
|
||||||
$e_block = "p | $e_heading | div | $e_lists | $e_blocktext | table";
|
$e_block = "p | $e_heading | div | $e_lists | $e_blocktext | table";
|
||||||
|
$e_Block = new HTMLPurifier_ChildDef_Optional($e_block);
|
||||||
$e__flow = "#PCDATA | $e_block | $e_inline | $e_misc";
|
$e__flow = "#PCDATA | $e_block | $e_inline | $e_misc";
|
||||||
$e_Flow = new HTMLPurifier_ChildDef_Optional($e__flow);
|
$e_Flow = new HTMLPurifier_ChildDef_Optional($e__flow);
|
||||||
$e_a_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | $e_special".
|
$e_a_content = new HTMLPurifier_ChildDef_Optional("#PCDATA".
|
||||||
" | $e_fontstyle | $e_phrase | $e_inline_forms | $e_misc_inline");
|
" | $e_special | $e_fontstyle | $e_phrase | $e_misc_inline");
|
||||||
$e_pre_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | a".
|
$e_pre_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | a".
|
||||||
" | $e_special_basic | $e_fontstyle_basic | $e_phrase_basic".
|
" | $e_special_basic | $e_fontstyle_basic | $e_phrase_basic".
|
||||||
" | $e_inline_forms | $e_misc_inline");
|
" | $e_misc_inline");
|
||||||
$e_form_content = new HTMLPurifier_ChildDef_Optional(''); //unused
|
$e_form_content = new HTMLPurifier_ChildDef_Optional('');//unused
|
||||||
$e_form_button_content = new HTMLPurifier_ChildDef_Optional(''); // unused
|
$e_form_button_content = new HTMLPurifier_ChildDef_Optional('');//unused
|
||||||
|
|
||||||
$this->info['ins']->child =
|
$this->info['ins']->child =
|
||||||
$this->info['del']->child = new HTMLPurifier_ChildDef_Chameleon($e__inline, $e__flow);
|
$this->info['del']->child =
|
||||||
|
new HTMLPurifier_ChildDef_Chameleon($e__inline, $e__flow);
|
||||||
|
|
||||||
$this->info['blockquote']->child=
|
|
||||||
$this->info['dd']->child =
|
$this->info['dd']->child =
|
||||||
$this->info['li']->child =
|
$this->info['li']->child =
|
||||||
$this->info['div']->child = $e_Flow;
|
$this->info['div']->child = $e_Flow;
|
||||||
|
|
||||||
|
if ($this->strict) {
|
||||||
|
$this->info['blockquote']->child = new HTMLPurifier_ChildDef_StrictBlockquote();
|
||||||
|
} else {
|
||||||
|
$this->info['blockquote']->child = $e_Flow;
|
||||||
|
}
|
||||||
|
|
||||||
$this->info['caption']->child =
|
$this->info['caption']->child =
|
||||||
$this->info['em']->child =
|
$this->info['em']->child =
|
||||||
$this->info['strong']->child =
|
$this->info['strong']->child =
|
||||||
@@ -197,9 +320,13 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
|
|
||||||
$this->info['dl']->child = new HTMLPurifier_ChildDef_Required('dt|dd');
|
$this->info['dl']->child = new HTMLPurifier_ChildDef_Required('dt|dd');
|
||||||
|
|
||||||
$this->info['address']->child =
|
if ($this->strict) {
|
||||||
new HTMLPurifier_ChildDef_Optional("#PCDATA | p | $e_inline".
|
$this->info['address']->child = $e_Inline;
|
||||||
" | $e_misc_inline");
|
} else {
|
||||||
|
$this->info['address']->child =
|
||||||
|
new HTMLPurifier_ChildDef_Optional("#PCDATA | p | $e_inline".
|
||||||
|
" | $e_misc_inline");
|
||||||
|
}
|
||||||
|
|
||||||
$this->info['img']->child =
|
$this->info['img']->child =
|
||||||
$this->info['br']->child =
|
$this->info['br']->child =
|
||||||
@@ -225,17 +352,20 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// info[]->type : defines the type of the element (block or inline)
|
// info[]->type : defines the type of the element (block or inline)
|
||||||
|
|
||||||
// reuses $e_Inline and $e_block
|
// reuses $e_Inline and $e_Block
|
||||||
|
foreach ($e_Inline->elements as $name => $bool) {
|
||||||
foreach ($e_Inline->elements as $name) {
|
if ($name == '#PCDATA') continue;
|
||||||
$this->info[$name]->type = 'inline';
|
$this->info[$name]->type = 'inline';
|
||||||
}
|
}
|
||||||
|
|
||||||
$e_Block = new HTMLPurifier_ChildDef_Optional($e_block);
|
foreach ($e_Block->elements as $name => $bool) {
|
||||||
foreach ($e_Block->elements as $name) {
|
|
||||||
$this->info[$name]->type = 'block';
|
$this->info[$name]->type = 'block';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
foreach ($e_Flow->elements as $name => $bool) {
|
||||||
|
$this->info_flow_elements[$name] = true;
|
||||||
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// info[]->excludes : defines elements that aren't allowed in here
|
// info[]->excludes : defines elements that aren't allowed in here
|
||||||
|
|
||||||
@@ -243,7 +373,7 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
|
|
||||||
$this->info['a']->excludes = array('a' => true);
|
$this->info['a']->excludes = array('a' => true);
|
||||||
$this->info['pre']->excludes = array_flip(array('img', 'big', 'small',
|
$this->info['pre']->excludes = array_flip(array('img', 'big', 'small',
|
||||||
// technically in spec, but we don't allow em anyway
|
// technically useless, but good to be indepth
|
||||||
'object', 'applet', 'font', 'basefont'));
|
'object', 'applet', 'font', 'basefont'));
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
@@ -253,13 +383,14 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
// by the transform classes. It will, however, do simple and slightly
|
// by the transform classes. It will, however, do simple and slightly
|
||||||
// complex attribute value substitution
|
// complex attribute value substitution
|
||||||
|
|
||||||
|
// the question of varying allowed attributes is more entangling.
|
||||||
|
|
||||||
$e_Text = new HTMLPurifier_AttrDef_Text();
|
$e_Text = new HTMLPurifier_AttrDef_Text();
|
||||||
|
|
||||||
// attrs, included in almost every single one except for a few,
|
// attrs, included in almost every single one except for a few,
|
||||||
// which manually override these in their local definitions
|
// which manually override these in their local definitions
|
||||||
$this->info_global_attr = array(
|
$this->info_global_attr = array(
|
||||||
// core attrs
|
// core attrs
|
||||||
'id' => new HTMLPurifier_AttrDef_ID(),
|
|
||||||
'class' => new HTMLPurifier_AttrDef_Class(),
|
'class' => new HTMLPurifier_AttrDef_Class(),
|
||||||
'title' => $e_Text,
|
'title' => $e_Text,
|
||||||
'style' => new HTMLPurifier_AttrDef_CSS(),
|
'style' => new HTMLPurifier_AttrDef_CSS(),
|
||||||
@@ -269,6 +400,10 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
'xml:lang' => new HTMLPurifier_AttrDef_Lang(),
|
'xml:lang' => new HTMLPurifier_AttrDef_Lang(),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
if ($config->get('HTML', 'EnableAttrID')) {
|
||||||
|
$this->info_global_attr['id'] = new HTMLPurifier_AttrDef_ID();
|
||||||
|
}
|
||||||
|
|
||||||
// required attribute stipulation handled in attribute transformation
|
// required attribute stipulation handled in attribute transformation
|
||||||
$this->info['bdo']->attr = array(); // nothing else
|
$this->info['bdo']->attr = array(); // nothing else
|
||||||
|
|
||||||
@@ -297,7 +432,8 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
|
|
||||||
$this->info['table']->attr['summary'] = $e_Text;
|
$this->info['table']->attr['summary'] = $e_Text;
|
||||||
|
|
||||||
$this->info['table']->attr['border'] = new HTMLPurifier_AttrDef_Pixels();
|
$this->info['table']->attr['border'] =
|
||||||
|
new HTMLPurifier_AttrDef_Pixels();
|
||||||
|
|
||||||
$e_Length = new HTMLPurifier_AttrDef_Length();
|
$e_Length = new HTMLPurifier_AttrDef_Length();
|
||||||
$this->info['table']->attr['cellpadding'] =
|
$this->info['table']->attr['cellpadding'] =
|
||||||
@@ -319,17 +455,26 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
$this->info['td']->attr['colspan'] =
|
$this->info['td']->attr['colspan'] =
|
||||||
$this->info['th']->attr['colspan'] = $e__NumberSpan;
|
$this->info['th']->attr['colspan'] = $e__NumberSpan;
|
||||||
|
|
||||||
$e_URI = new HTMLPurifier_AttrDef_URI();
|
if (!$config->get('Attr', 'DisableURI')) {
|
||||||
$this->info['a']->attr['href'] =
|
$e_URI = new HTMLPurifier_AttrDef_URI();
|
||||||
$this->info['img']->attr['longdesc'] =
|
$this->info['a']->attr['href'] =
|
||||||
$this->info['img']->attr['src'] =
|
$this->info['img']->attr['longdesc'] =
|
||||||
$this->info['del']->attr['cite'] =
|
$this->info['del']->attr['cite'] =
|
||||||
$this->info['ins']->attr['cite'] =
|
$this->info['ins']->attr['cite'] =
|
||||||
$this->info['blockquote']->attr['cite'] =
|
$this->info['blockquote']->attr['cite'] =
|
||||||
$this->info['q']->attr['cite'] = $e_URI;
|
$this->info['q']->attr['cite'] = $e_URI;
|
||||||
|
|
||||||
|
// URI that causes HTTP request
|
||||||
|
$this->info['img']->attr['src'] = new HTMLPurifier_AttrDef_URI(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!$this->strict) {
|
||||||
|
$this->info['li']->attr['value'] = new HTMLPurifier_AttrDef_Integer();
|
||||||
|
$this->info['ol']->attr['start'] = new HTMLPurifier_AttrDef_Integer();
|
||||||
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// UNIMP : info_tag_transform : transformations of tags
|
// info_tag_transform : transformations of tags
|
||||||
|
|
||||||
$this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
|
$this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
|
||||||
$this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
|
$this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
|
||||||
@@ -339,6 +484,9 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// info[]->auto_close : tags that automatically close another
|
// info[]->auto_close : tags that automatically close another
|
||||||
|
|
||||||
|
// todo: determine whether or not SGML-like modeling based on
|
||||||
|
// mandatory/optional end tags would be a better policy
|
||||||
|
|
||||||
// make sure you test using isset() not !empty()
|
// make sure you test using isset() not !empty()
|
||||||
|
|
||||||
// these are all block elements: blocks aren't allowed in P
|
// these are all block elements: blocks aren't allowed in P
|
||||||
@@ -381,6 +529,60 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
|
|
||||||
$this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang();
|
$this->info_attr_transform_post[] = new HTMLPurifier_AttrTransform_Lang();
|
||||||
|
|
||||||
|
// protect against stdclasses floating around
|
||||||
|
foreach ($this->info as $key => $obj) {
|
||||||
|
if (is_a($obj, 'stdclass')) {
|
||||||
|
unset($this->info[$key]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// info_block_wrapper : wraps inline elements in block context
|
||||||
|
|
||||||
|
$block_wrapper = $config->get('HTML', 'BlockWrapper');
|
||||||
|
if (isset($e_Block->elements[$block_wrapper])) {
|
||||||
|
$this->info_block_wrapper = $block_wrapper;
|
||||||
|
} else {
|
||||||
|
trigger_error('Cannot use non-block element as block wrapper.',
|
||||||
|
E_USER_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// info_parent : parent element of the HTML fragment
|
||||||
|
|
||||||
|
$parent = $config->get('HTML', 'Parent');
|
||||||
|
if (isset($this->info[$parent])) {
|
||||||
|
$this->info_parent = $parent;
|
||||||
|
} else {
|
||||||
|
trigger_error('Cannot use unrecognized element as parent.',
|
||||||
|
E_USER_ERROR);
|
||||||
|
}
|
||||||
|
$this->info_parent_def = $this->info[$this->info_parent];
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// %HTML.Allowed(Elements|Attributes) : cut non-allowed elements
|
||||||
|
|
||||||
|
$allowed_elements = $config->get('HTML', 'AllowedElements');
|
||||||
|
if (is_array($allowed_elements)) {
|
||||||
|
foreach ($this->info as $name => $d) {
|
||||||
|
if(!isset($allowed_elements[$name])) unset($this->info[$name]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$allowed_attributes = $config->get('HTML', 'AllowedAttributes');
|
||||||
|
if (is_array($allowed_attributes)) {
|
||||||
|
foreach ($this->info_global_attr as $attr_key => $info) {
|
||||||
|
if (!isset($allowed_attributes["*.$attr_key"])) {
|
||||||
|
unset($this->info_global_attr[$attr_key]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
foreach ($this->info as $tag => $info) {
|
||||||
|
foreach ($info->attr as $attr => $attr_info) {
|
||||||
|
if (!isset($allowed_attributes["$tag.$attr"])) {
|
||||||
|
unset($this->info[$tag]->attr[$attr]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function setAttrForTableElements($attr, $def) {
|
function setAttrForTableElements($attr, $def) {
|
||||||
|
@@ -3,6 +3,9 @@
|
|||||||
/**
|
/**
|
||||||
* Component of HTMLPurifier_AttrContext that accumulates IDs to prevent dupes
|
* Component of HTMLPurifier_AttrContext that accumulates IDs to prevent dupes
|
||||||
* @note In Slashdot-speak, dupe means duplicate.
|
* @note In Slashdot-speak, dupe means duplicate.
|
||||||
|
* @note This class does not accept $config or $context, thus, it is the
|
||||||
|
* burden of the callee to register the appropriate errors or
|
||||||
|
* configuration.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_IDAccumulator
|
class HTMLPurifier_IDAccumulator
|
||||||
{
|
{
|
||||||
|
@@ -60,6 +60,60 @@ class HTMLPurifier_Lexer
|
|||||||
$this->_entity_parser = new HTMLPurifier_EntityParser();
|
$this->_entity_parser = new HTMLPurifier_EntityParser();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Most common entity to raw value conversion table for special entities.
|
||||||
|
* @protected
|
||||||
|
*/
|
||||||
|
var $_special_entity2str =
|
||||||
|
array(
|
||||||
|
'"' => '"',
|
||||||
|
'&' => '&',
|
||||||
|
'<' => '<',
|
||||||
|
'>' => '>',
|
||||||
|
''' => "'",
|
||||||
|
''' => "'",
|
||||||
|
''' => "'"
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses special entities into the proper characters.
|
||||||
|
*
|
||||||
|
* This string will translate escaped versions of the special characters
|
||||||
|
* into the correct ones.
|
||||||
|
*
|
||||||
|
* @warning
|
||||||
|
* You should be able to treat the output of this function as
|
||||||
|
* completely parsed, but that's only because all other entities should
|
||||||
|
* have been handled previously in substituteNonSpecialEntities()
|
||||||
|
*
|
||||||
|
* @param $string String character data to be parsed.
|
||||||
|
* @returns Parsed character data.
|
||||||
|
*/
|
||||||
|
function parseData($string) {
|
||||||
|
|
||||||
|
// following functions require at least one character
|
||||||
|
if ($string === '') return '';
|
||||||
|
|
||||||
|
// subtracts amps that cannot possibly be escaped
|
||||||
|
$num_amp = substr_count($string, '&') - substr_count($string, '& ') -
|
||||||
|
($string[strlen($string)-1] === '&' ? 1 : 0);
|
||||||
|
|
||||||
|
if (!$num_amp) return $string; // abort if no entities
|
||||||
|
$num_esc_amp = substr_count($string, '&');
|
||||||
|
$string = strtr($string, $this->_special_entity2str);
|
||||||
|
|
||||||
|
// code duplication for sake of optimization, see above
|
||||||
|
$num_amp_2 = substr_count($string, '&') - substr_count($string, '& ') -
|
||||||
|
($string[strlen($string)-1] === '&' ? 1 : 0);
|
||||||
|
|
||||||
|
if ($num_amp_2 <= $num_esc_amp) return $string;
|
||||||
|
|
||||||
|
// hmm... now we have some uncommon entities. Use the callback.
|
||||||
|
$string = $this->_entity_parser->substituteSpecialEntities($string);
|
||||||
|
return $string;
|
||||||
|
}
|
||||||
|
|
||||||
var $_encoder;
|
var $_encoder;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -68,7 +122,7 @@ class HTMLPurifier_Lexer
|
|||||||
* @param $string String HTML.
|
* @param $string String HTML.
|
||||||
* @return HTMLPurifier_Token array representation of HTML.
|
* @return HTMLPurifier_Token array representation of HTML.
|
||||||
*/
|
*/
|
||||||
function tokenizeHTML($string, $config = null) {
|
function tokenizeHTML($string, $config, &$context) {
|
||||||
trigger_error('Call to abstract class', E_USER_ERROR);
|
trigger_error('Call to abstract class', E_USER_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -142,7 +196,7 @@ class HTMLPurifier_Lexer
|
|||||||
* Takes a piece of HTML and normalizes it by converting entities, fixing
|
* Takes a piece of HTML and normalizes it by converting entities, fixing
|
||||||
* encoding, extracting bits, and other good stuff.
|
* encoding, extracting bits, and other good stuff.
|
||||||
*/
|
*/
|
||||||
function normalize($html, $config) {
|
function normalize($html, $config, &$context) {
|
||||||
|
|
||||||
// extract body from document if applicable
|
// extract body from document if applicable
|
||||||
if ($config->get('Core', 'AcceptFullDocuments')) {
|
if ($config->get('Core', 'AcceptFullDocuments')) {
|
||||||
|
@@ -12,15 +12,19 @@ require_once 'HTMLPurifier/TokenFactory.php';
|
|||||||
* documents, it performs twenty times faster than
|
* documents, it performs twenty times faster than
|
||||||
* HTMLPurifier_Lexer_DirectLex,and is the default choice for PHP 5.
|
* HTMLPurifier_Lexer_DirectLex,and is the default choice for PHP 5.
|
||||||
*
|
*
|
||||||
* @notice
|
* @note Any empty elements will have empty tokens associated with them, even if
|
||||||
* Any empty elements will have empty tokens associated with them, even if
|
|
||||||
* this is prohibited by the spec. This is cannot be fixed until the spec
|
* this is prohibited by the spec. This is cannot be fixed until the spec
|
||||||
* comes into play.
|
* comes into play.
|
||||||
*
|
*
|
||||||
* @todo Determine DOM's entity parsing behavior, point to local entity files
|
* @note PHP's DOM extension does not actually parse any entities, we use
|
||||||
* if necessary.
|
* our own function to do that.
|
||||||
* @todo Make div access less fragile, and refrain from preprocessing when
|
*
|
||||||
* HTML tag and friends are already present.
|
* @warning DOM tends to drop whitespace, which may wreak havoc on indenting.
|
||||||
|
* If this is a huge problem, due to the fact that HTML is hand
|
||||||
|
* edited and youa re unable to get a parser cache that caches the
|
||||||
|
* the output of HTML Purifier while keeping the original HTML lying
|
||||||
|
* around, you may want to run Tidy on the resulting output or use
|
||||||
|
* HTMLPurifier_DirectLex
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||||
@@ -34,10 +38,9 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||||||
$this->factory = new HTMLPurifier_TokenFactory();
|
$this->factory = new HTMLPurifier_TokenFactory();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function tokenizeHTML($string, $config = null) {
|
public function tokenizeHTML($string, $config, &$context) {
|
||||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
|
||||||
|
|
||||||
$string = $this->normalize($string, $config);
|
$string = $this->normalize($string, $config, $context);
|
||||||
|
|
||||||
// preprocess string, essential for UTF-8
|
// preprocess string, essential for UTF-8
|
||||||
$string =
|
$string =
|
||||||
|
@@ -12,75 +12,21 @@ require_once 'HTMLPurifier/Lexer.php';
|
|||||||
* completely eventually.
|
* completely eventually.
|
||||||
*
|
*
|
||||||
* @todo Reread XML spec and document differences.
|
* @todo Reread XML spec and document differences.
|
||||||
* @todo Add support for CDATA sections.
|
*
|
||||||
* @todo Determine correct behavior in outputting comment data. (preserve dashes?)
|
* @todo Determine correct behavior in transforming comment data. (preserve dashes?)
|
||||||
* @todo Optimize main function tokenizeHTML().
|
|
||||||
* @todo Less than sign (<) being prohibited (even as entity) in attr-values?
|
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
|
||||||
* Most common entity to raw value conversion table for special entities.
|
|
||||||
* @protected
|
|
||||||
*/
|
|
||||||
var $_special_entity2str =
|
|
||||||
array(
|
|
||||||
'"' => '"',
|
|
||||||
'&' => '&',
|
|
||||||
'<' => '<',
|
|
||||||
'>' => '>',
|
|
||||||
''' => "'",
|
|
||||||
''' => "'",
|
|
||||||
''' => "'"
|
|
||||||
);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parses special entities into the proper characters.
|
|
||||||
*
|
|
||||||
* This string will translate escaped versions of the special characters
|
|
||||||
* into the correct ones.
|
|
||||||
*
|
|
||||||
* @warning
|
|
||||||
* You should be able to treat the output of this function as
|
|
||||||
* completely parsed, but that's only because all other entities should
|
|
||||||
* have been handled previously in substituteNonSpecialEntities()
|
|
||||||
*
|
|
||||||
* @param $string String character data to be parsed.
|
|
||||||
* @returns Parsed character data.
|
|
||||||
*/
|
|
||||||
function parseData($string) {
|
|
||||||
|
|
||||||
// subtracts amps that cannot possibly be escaped
|
|
||||||
$num_amp = substr_count($string, '&') - substr_count($string, '& ') -
|
|
||||||
($string[strlen($string)-1] === '&' ? 1 : 0);
|
|
||||||
|
|
||||||
if (!$num_amp) return $string; // abort if no entities
|
|
||||||
$num_esc_amp = substr_count($string, '&');
|
|
||||||
$string = strtr($string, $this->_special_entity2str);
|
|
||||||
|
|
||||||
// code duplication for sake of optimization, see above
|
|
||||||
$num_amp_2 = substr_count($string, '&') - substr_count($string, '& ') -
|
|
||||||
($string[strlen($string)-1] === '&' ? 1 : 0);
|
|
||||||
|
|
||||||
if ($num_amp_2 <= $num_esc_amp) return $string;
|
|
||||||
|
|
||||||
// hmm... now we have some uncommon entities. Use the callback.
|
|
||||||
$string = $this->_entity_parser->substituteSpecialEntities($string);
|
|
||||||
return $string;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Whitespace characters for str(c)spn.
|
* Whitespace characters for str(c)spn.
|
||||||
* @protected
|
* @protected
|
||||||
*/
|
*/
|
||||||
var $_whitespace = "\x20\x09\x0D\x0A";
|
var $_whitespace = "\x20\x09\x0D\x0A";
|
||||||
|
|
||||||
function tokenizeHTML($html, $config = null) {
|
function tokenizeHTML($html, $config, &$context) {
|
||||||
|
|
||||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
$html = $this->normalize($html, $config, $context);
|
||||||
|
|
||||||
$html = $this->normalize($html, $config);
|
|
||||||
|
|
||||||
$cursor = 0; // our location in the text
|
$cursor = 0; // our location in the text
|
||||||
$inside_tag = false; // whether or not we're parsing the inside of a tag
|
$inside_tag = false; // whether or not we're parsing the inside of a tag
|
||||||
@@ -197,17 +143,18 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
|||||||
)
|
)
|
||||||
);
|
);
|
||||||
if ($attribute_string) {
|
if ($attribute_string) {
|
||||||
$attributes = $this->parseAttributeString(
|
$attr = $this->parseAttributeString(
|
||||||
$attribute_string
|
$attribute_string
|
||||||
);
|
, $config, $context
|
||||||
|
);
|
||||||
} else {
|
} else {
|
||||||
$attributes = array();
|
$attr = array();
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($is_self_closing) {
|
if ($is_self_closing) {
|
||||||
$array[] = new HTMLPurifier_Token_Empty($type, $attributes);
|
$array[] = new HTMLPurifier_Token_Empty($type, $attr);
|
||||||
} else {
|
} else {
|
||||||
$array[] = new HTMLPurifier_Token_Start($type, $attributes);
|
$array[] = new HTMLPurifier_Token_Start($type, $attr);
|
||||||
}
|
}
|
||||||
$cursor = $position_next_gt + 1;
|
$cursor = $position_next_gt + 1;
|
||||||
$inside_tag = false;
|
$inside_tag = false;
|
||||||
@@ -233,7 +180,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
|||||||
* @param $string Inside of tag excluding name.
|
* @param $string Inside of tag excluding name.
|
||||||
* @returns Assoc array of attributes.
|
* @returns Assoc array of attributes.
|
||||||
*/
|
*/
|
||||||
function parseAttributeString($string) {
|
function parseAttributeString($string, $config, &$context) {
|
||||||
$string = (string) $string; // quick typecast
|
$string = (string) $string; // quick typecast
|
||||||
|
|
||||||
if ($string == '') return array(); // no attributes
|
if ($string == '') return array(); // no attributes
|
||||||
|
@@ -18,6 +18,8 @@ require_once 'HTMLPurifier/Lexer.php';
|
|||||||
* whatever it does for poorly formed HTML is up to it.
|
* whatever it does for poorly formed HTML is up to it.
|
||||||
*
|
*
|
||||||
* @todo Generalize so that XML_HTMLSax is also supported.
|
* @todo Generalize so that XML_HTMLSax is also supported.
|
||||||
|
*
|
||||||
|
* @warning Entity-resolution inside attributes is broken.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
|
class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
|
||||||
@@ -29,18 +31,19 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
|
|||||||
*/
|
*/
|
||||||
var $tokens = array();
|
var $tokens = array();
|
||||||
|
|
||||||
function tokenizeHTML($string, $config = null) {
|
function tokenizeHTML($string, $config, &$context) {
|
||||||
|
|
||||||
$this->tokens = array();
|
$this->tokens = array();
|
||||||
|
|
||||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
$string = $this->normalize($string, $config, $context);
|
||||||
$string = $this->normalize($string, $config);
|
|
||||||
|
|
||||||
$parser=& new XML_HTMLSax3();
|
$parser=& new XML_HTMLSax3();
|
||||||
$parser->set_object($this);
|
$parser->set_object($this);
|
||||||
$parser->set_element_handler('openHandler','closeHandler');
|
$parser->set_element_handler('openHandler','closeHandler');
|
||||||
$parser->set_data_handler('dataHandler');
|
$parser->set_data_handler('dataHandler');
|
||||||
$parser->set_escape_handler('escapeHandler');
|
$parser->set_escape_handler('escapeHandler');
|
||||||
|
|
||||||
|
// doesn't seem to work correctly for attributes
|
||||||
$parser->set_option('XML_OPTION_ENTITIES_PARSED', 1);
|
$parser->set_option('XML_OPTION_ENTITIES_PARSED', 1);
|
||||||
|
|
||||||
$parser->parse($string);
|
$parser->parse($string);
|
||||||
@@ -53,6 +56,10 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
|
|||||||
* Open tag event handler, interface is defined by PEAR package.
|
* Open tag event handler, interface is defined by PEAR package.
|
||||||
*/
|
*/
|
||||||
function openHandler(&$parser, $name, $attrs, $closed) {
|
function openHandler(&$parser, $name, $attrs, $closed) {
|
||||||
|
// entities are not resolved in attrs
|
||||||
|
foreach ($attrs as $key => $attr) {
|
||||||
|
$attrs[$key] = $this->parseData($attr);
|
||||||
|
}
|
||||||
if ($closed) {
|
if ($closed) {
|
||||||
$this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs);
|
$this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs);
|
||||||
} else {
|
} else {
|
||||||
|
47
library/HTMLPurifier/PercentEncoder.php
Normal file
47
library/HTMLPurifier/PercentEncoder.php
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class that handles operations involving percent-encoding in URIs.
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_PercentEncoder
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fix up percent-encoding by decoding unreserved characters and normalizing
|
||||||
|
* @param $string String to normalize
|
||||||
|
*/
|
||||||
|
function normalize($string) {
|
||||||
|
if ($string == '') return '';
|
||||||
|
$parts = explode('%', $string);
|
||||||
|
$ret = array_shift($parts);
|
||||||
|
foreach ($parts as $part) {
|
||||||
|
$length = strlen($part);
|
||||||
|
if ($length < 2) {
|
||||||
|
$ret .= '%25' . $part;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$encoding = substr($part, 0, 2);
|
||||||
|
$text = substr($part, 2);
|
||||||
|
if (!ctype_xdigit($encoding)) {
|
||||||
|
$ret .= '%25' . $part;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$int = hexdec($encoding);
|
||||||
|
if (
|
||||||
|
($int >= 48 && $int <= 57) || // digits
|
||||||
|
($int >= 65 && $int <= 90) || // uppercase letters
|
||||||
|
($int >= 97 && $int <= 122) || // lowercase letters
|
||||||
|
$int == 126 || $int == 45 || $int == 46 || $int == 95 // ~-._
|
||||||
|
) {
|
||||||
|
$ret .= chr($int) . $text;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$encoding = strtoupper($encoding);
|
||||||
|
$ret .= '%' . $encoding . $text;
|
||||||
|
}
|
||||||
|
return $ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
149
library/HTMLPurifier/Printer.php
Normal file
149
library/HTMLPurifier/Printer.php
Normal file
@@ -0,0 +1,149 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/Generator.php';
|
||||||
|
require_once 'HTMLPurifier/Token.php';
|
||||||
|
require_once 'HTMLPurifier/Encoder.php';
|
||||||
|
|
||||||
|
class HTMLPurifier_Printer
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instance of HTMLPurifier_Generator for HTML generation convenience funcs
|
||||||
|
*/
|
||||||
|
var $generator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instance of HTMLPurifier_Config, for easy access
|
||||||
|
*/
|
||||||
|
var $config;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize $generator.
|
||||||
|
*/
|
||||||
|
function HTMLPurifier_Printer() {
|
||||||
|
$this->generator = new HTMLPurifier_Generator();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main function that renders object or aspect of that object
|
||||||
|
* @param $config Configuration object
|
||||||
|
*/
|
||||||
|
function render($config) {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a start tag
|
||||||
|
* @param $tag Tag name
|
||||||
|
* @param $attr Attribute array
|
||||||
|
*/
|
||||||
|
function start($tag, $attr = array()) {
|
||||||
|
return $this->generator->generateFromToken(
|
||||||
|
new HTMLPurifier_Token_Start($tag, $attr ? $attr : array())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an end teg
|
||||||
|
* @param $tag Tag name
|
||||||
|
*/
|
||||||
|
function end($tag) {
|
||||||
|
return $this->generator->generateFromToken(
|
||||||
|
new HTMLPurifier_Token_End($tag)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Prints a complete element with content inside
|
||||||
|
* @param $tag Tag name
|
||||||
|
* @param $contents Element contents
|
||||||
|
* @param $attr Tag attributes
|
||||||
|
* @param $escape Bool whether or not to escape contents
|
||||||
|
*/
|
||||||
|
function element($tag, $contents, $attr = array(), $escape = true) {
|
||||||
|
return $this->start($tag, $attr) .
|
||||||
|
($escape ? $this->escape($contents) : $contents) .
|
||||||
|
$this->end($tag);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Prints a simple key/value row in a table.
|
||||||
|
* @param $name Key
|
||||||
|
* @param $value Value
|
||||||
|
*/
|
||||||
|
function row($name, $value) {
|
||||||
|
if (is_bool($value)) $value = $value ? 'On' : 'Off';
|
||||||
|
return
|
||||||
|
$this->start('tr') . "\n" .
|
||||||
|
$this->element('th', $name) . "\n" .
|
||||||
|
$this->element('td', $value) . "\n" .
|
||||||
|
$this->end('tr')
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Escapes a string for HTML output.
|
||||||
|
* @param $string String to escape
|
||||||
|
*/
|
||||||
|
function escape($string) {
|
||||||
|
$string = HTMLPurifier_Encoder::cleanUTF8($string);
|
||||||
|
$string = htmlspecialchars($string, ENT_COMPAT, 'UTF-8');
|
||||||
|
return $string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes a list of strings and turns them into a single list
|
||||||
|
* @param $array List of strings
|
||||||
|
* @param $polite Bool whether or not to add an end before the last
|
||||||
|
*/
|
||||||
|
function listify($array, $polite = false) {
|
||||||
|
if (empty($array)) return 'None';
|
||||||
|
$ret = '';
|
||||||
|
$i = count($array);
|
||||||
|
foreach ($array as $value) {
|
||||||
|
$i--;
|
||||||
|
$ret .= $value;
|
||||||
|
if ($i > 0 && !($polite && $i == 1)) $ret .= ', ';
|
||||||
|
if ($polite && $i == 1) $ret .= 'and ';
|
||||||
|
}
|
||||||
|
return $ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves the class of an object without prefixes, as well as metadata
|
||||||
|
* @param $obj Object to determine class of
|
||||||
|
* @param $prefix Further prefix to remove
|
||||||
|
*/
|
||||||
|
function getClass($obj, $sec_prefix = '') {
|
||||||
|
static $five = null;
|
||||||
|
if ($five === null) $five = version_compare(PHP_VERSION, '5', '>=');
|
||||||
|
$prefix = 'HTMLPurifier_' . $sec_prefix;
|
||||||
|
if (!$five) $prefix = strtolower($prefix);
|
||||||
|
$class = str_replace($prefix, '', get_class($obj));
|
||||||
|
$lclass = strtolower($class);
|
||||||
|
$class .= '(';
|
||||||
|
switch ($lclass) {
|
||||||
|
case 'enum':
|
||||||
|
$values = array();
|
||||||
|
foreach ($obj->valid_values as $value => $bool) {
|
||||||
|
$values[] = $value;
|
||||||
|
}
|
||||||
|
$class .= implode(', ', $values);
|
||||||
|
break;
|
||||||
|
case 'composite':
|
||||||
|
$values = array();
|
||||||
|
foreach ($obj->defs as $def) {
|
||||||
|
$values[] = $this->getClass($def, $sec_prefix);
|
||||||
|
}
|
||||||
|
$class .= implode(', ', $values);
|
||||||
|
break;
|
||||||
|
case 'multiple':
|
||||||
|
$class .= $this->getClass($obj->single, $sec_prefix) . ', ';
|
||||||
|
$class .= $obj->max;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
$class .= ')';
|
||||||
|
return $class;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
40
library/HTMLPurifier/Printer/CSSDefinition.php
Normal file
40
library/HTMLPurifier/Printer/CSSDefinition.php
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/Printer.php';
|
||||||
|
|
||||||
|
class HTMLPurifier_Printer_CSSDefinition extends HTMLPurifier_Printer
|
||||||
|
{
|
||||||
|
|
||||||
|
var $def;
|
||||||
|
|
||||||
|
function render($config) {
|
||||||
|
$this->def = $config->getCSSDefinition();
|
||||||
|
$ret = '';
|
||||||
|
|
||||||
|
$ret .= $this->start('div', array('class' => 'HTMLPurifier_Printer'));
|
||||||
|
$ret .= $this->start('table');
|
||||||
|
|
||||||
|
$ret .= $this->element('caption', 'Properties ($info)');
|
||||||
|
|
||||||
|
$ret .= $this->start('thead');
|
||||||
|
$ret .= $this->start('tr');
|
||||||
|
$ret .= $this->element('th', 'Property', array('class' => 'heavy'));
|
||||||
|
$ret .= $this->element('th', 'Definition', array('class' => 'heavy', 'style' => 'width:auto;'));
|
||||||
|
$ret .= $this->end('tr');
|
||||||
|
$ret .= $this->end('thead');
|
||||||
|
|
||||||
|
ksort($this->def->info);
|
||||||
|
foreach ($this->def->info as $property => $obj) {
|
||||||
|
$name = $this->getClass($obj, 'AttrDef_');
|
||||||
|
$ret .= $this->row($property, $name);
|
||||||
|
}
|
||||||
|
|
||||||
|
$ret .= $this->end('table');
|
||||||
|
$ret .= $this->end('div');
|
||||||
|
|
||||||
|
return $ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
206
library/HTMLPurifier/Printer/HTMLDefinition.php
Normal file
206
library/HTMLPurifier/Printer/HTMLDefinition.php
Normal file
@@ -0,0 +1,206 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/Printer.php';
|
||||||
|
|
||||||
|
class HTMLPurifier_Printer_HTMLDefinition extends HTMLPurifier_Printer
|
||||||
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instance of HTMLPurifier_HTMLDefinition, for easy access
|
||||||
|
*/
|
||||||
|
var $def;
|
||||||
|
|
||||||
|
function render(&$config) {
|
||||||
|
$ret = '';
|
||||||
|
$this->config =& $config;
|
||||||
|
$this->def =& $config->getHTMLDefinition();
|
||||||
|
$def =& $this->def;
|
||||||
|
|
||||||
|
$ret .= $this->start('div', array('class' => 'HTMLPurifier_Printer'));
|
||||||
|
$ret .= $this->start('table');
|
||||||
|
$ret .= $this->element('caption', 'Environment');
|
||||||
|
|
||||||
|
$ret .= $this->row('Parent of fragment', $def->info_parent);
|
||||||
|
$ret .= $this->row('Strict mode', $def->strict);
|
||||||
|
if ($def->strict) $ret .= $this->row('Block wrap name', $def->info_block_wrapper);
|
||||||
|
|
||||||
|
$ret .= $this->start('tr');
|
||||||
|
$ret .= $this->element('th', 'Global attributes');
|
||||||
|
$ret .= $this->element('td', $this->listifyAttr($def->info_global_attr),0,0);
|
||||||
|
$ret .= $this->end('tr');
|
||||||
|
|
||||||
|
$ret .= $this->renderChildren($def->info_parent_def->child);
|
||||||
|
|
||||||
|
$ret .= $this->start('tr');
|
||||||
|
$ret .= $this->element('th', 'Tag transforms');
|
||||||
|
$list = array();
|
||||||
|
foreach ($def->info_tag_transform as $old => $new) {
|
||||||
|
$new = $this->getClass($new, 'TagTransform_');
|
||||||
|
$list[] = "<$old> with $new";
|
||||||
|
}
|
||||||
|
$ret .= $this->element('td', $this->listify($list));
|
||||||
|
$ret .= $this->end('tr');
|
||||||
|
|
||||||
|
$ret .= $this->start('tr');
|
||||||
|
$ret .= $this->element('th', 'Pre-AttrTransform');
|
||||||
|
$ret .= $this->element('td', $this->listifyObjectList($def->info_attr_transform_pre));
|
||||||
|
$ret .= $this->end('tr');
|
||||||
|
|
||||||
|
$ret .= $this->start('tr');
|
||||||
|
$ret .= $this->element('th', 'Post-AttrTransform');
|
||||||
|
$ret .= $this->element('td', $this->listifyObjectList($def->info_attr_transform_post));
|
||||||
|
$ret .= $this->end('tr');
|
||||||
|
|
||||||
|
$ret .= $this->end('table');
|
||||||
|
|
||||||
|
|
||||||
|
$ret .= $this->renderInfo();
|
||||||
|
|
||||||
|
|
||||||
|
$ret .= $this->end('div');
|
||||||
|
|
||||||
|
return $ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Renders the Elements ($info) table
|
||||||
|
*/
|
||||||
|
function renderInfo() {
|
||||||
|
$ret = '';
|
||||||
|
$ret .= $this->start('table');
|
||||||
|
$ret .= $this->element('caption', 'Elements ($info)');
|
||||||
|
ksort($this->def->info);
|
||||||
|
$ret .= $this->start('tr');
|
||||||
|
$ret .= $this->element('th', 'Allowed tags', array('colspan' => 2, 'class' => 'heavy'));
|
||||||
|
$ret .= $this->end('tr');
|
||||||
|
$ret .= $this->start('tr');
|
||||||
|
$ret .= $this->element('td', $this->listifyTagLookup($this->def->info), array('colspan' => 2));
|
||||||
|
$ret .= $this->end('tr');
|
||||||
|
foreach ($this->def->info as $name => $def) {
|
||||||
|
$ret .= $this->start('tr');
|
||||||
|
$ret .= $this->element('th', "<$name>", array('class'=>'heavy', 'colspan' => 2));
|
||||||
|
$ret .= $this->end('tr');
|
||||||
|
$ret .= $this->start('tr');
|
||||||
|
$ret .= $this->element('th', 'Type');
|
||||||
|
$ret .= $this->element('td', ucfirst($def->type));
|
||||||
|
$ret .= $this->end('tr');
|
||||||
|
if (!empty($def->excludes)) {
|
||||||
|
$ret .= $this->start('tr');
|
||||||
|
$ret .= $this->element('th', 'Excludes');
|
||||||
|
$ret .= $this->element('td', $this->listifyTagLookup($def->excludes));
|
||||||
|
$ret .= $this->end('tr');
|
||||||
|
}
|
||||||
|
if (!empty($def->attr_transform_pre)) {
|
||||||
|
$ret .= $this->start('tr');
|
||||||
|
$ret .= $this->element('th', 'Pre-AttrTransform');
|
||||||
|
$ret .= $this->element('td', $this->listifyObjectList($def->attr_transform_pre));
|
||||||
|
$ret .= $this->end('tr');
|
||||||
|
}
|
||||||
|
if (!empty($def->attr_transform_post)) {
|
||||||
|
$ret .= $this->start('tr');
|
||||||
|
$ret .= $this->element('th', 'Post-AttrTransform');
|
||||||
|
$ret .= $this->element('td', $this->listifyObjectList($def->attr_transform_post));
|
||||||
|
$ret .= $this->end('tr');
|
||||||
|
}
|
||||||
|
if (!empty($def->auto_close)) {
|
||||||
|
$ret .= $this->start('tr');
|
||||||
|
$ret .= $this->element('th', 'Auto closed by');
|
||||||
|
$ret .= $this->element('td', $this->listifyTagLookup($def->auto_close));
|
||||||
|
$ret .= $this->end('tr');
|
||||||
|
}
|
||||||
|
$ret .= $this->start('tr');
|
||||||
|
$ret .= $this->element('th', 'Allowed attributes');
|
||||||
|
$ret .= $this->element('td',$this->listifyAttr($def->attr),0,0);
|
||||||
|
$ret .= $this->end('tr');
|
||||||
|
|
||||||
|
$ret .= $this->renderChildren($def->child);
|
||||||
|
}
|
||||||
|
$ret .= $this->end('table');
|
||||||
|
return $ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Renders a row describing the allowed children of an element
|
||||||
|
* @param $def HTMLPurifier_ChildDef of pertinent element
|
||||||
|
*/
|
||||||
|
function renderChildren($def) {
|
||||||
|
$context = new HTMLPurifier_Context();
|
||||||
|
$ret = '';
|
||||||
|
$ret .= $this->start('tr');
|
||||||
|
$elements = array();
|
||||||
|
$attr = array();
|
||||||
|
if (isset($def->elements)) {
|
||||||
|
if ($def->type == 'strictblockquote') $def->validateChildren(array(), $this->config, $context);
|
||||||
|
$elements = $def->elements;
|
||||||
|
} elseif ($def->type == 'chameleon') {
|
||||||
|
$attr['rowspan'] = 2;
|
||||||
|
} elseif ($def->type == 'empty') {
|
||||||
|
$elements = array();
|
||||||
|
} elseif ($def->type == 'table') {
|
||||||
|
$elements = array('col', 'caption', 'colgroup', 'thead',
|
||||||
|
'tfoot', 'tbody', 'tr');
|
||||||
|
}
|
||||||
|
$ret .= $this->element('th', 'Allowed children', $attr);
|
||||||
|
|
||||||
|
if ($def->type == 'chameleon') {
|
||||||
|
|
||||||
|
$ret .= $this->element('td',
|
||||||
|
'<em>Block</em>: ' .
|
||||||
|
$this->escape($this->listifyTagLookup($def->block->elements)),0,0);
|
||||||
|
$ret .= $this->end('tr');
|
||||||
|
$ret .= $this->start('tr');
|
||||||
|
$ret .= $this->element('td',
|
||||||
|
'<em>Inline</em>: ' .
|
||||||
|
$this->escape($this->listifyTagLookup($def->inline->elements)),0,0);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
$ret .= $this->element('td',
|
||||||
|
'<em>'.ucfirst($def->type).'</em>: ' .
|
||||||
|
$this->escape($this->listifyTagLookup($elements)),0,0);
|
||||||
|
}
|
||||||
|
$ret .= $this->end('tr');
|
||||||
|
return $ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Listifies a tag lookup table.
|
||||||
|
* @param $array Tag lookup array in form of array('tagname' => true)
|
||||||
|
*/
|
||||||
|
function listifyTagLookup($array) {
|
||||||
|
$list = array();
|
||||||
|
foreach ($array as $name => $discard) {
|
||||||
|
if ($name !== '#PCDATA' && !isset($this->def->info[$name])) continue;
|
||||||
|
$list[] = $name;
|
||||||
|
}
|
||||||
|
return $this->listify($list);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Listifies a list of objects by retrieving class names and internal state
|
||||||
|
* @param $array List of objects
|
||||||
|
* @todo Also add information about internal state
|
||||||
|
*/
|
||||||
|
function listifyObjectList($array) {
|
||||||
|
$list = array();
|
||||||
|
foreach ($array as $discard => $obj) {
|
||||||
|
$list[] = $this->getClass($obj, 'AttrTransform_');
|
||||||
|
}
|
||||||
|
return $this->listify($list);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Listifies a hash of attributes to AttrDef classes
|
||||||
|
* @param $array Array hash in form of array('attrname' => HTMLPurifier_AttrDef)
|
||||||
|
*/
|
||||||
|
function listifyAttr($array) {
|
||||||
|
$list = array();
|
||||||
|
foreach ($array as $name => $obj) {
|
||||||
|
if ($obj === false) continue;
|
||||||
|
$list[] = "$name = <i>" . $this->getClass($obj, 'AttrDef_') . '</i>';
|
||||||
|
}
|
||||||
|
return $this->listify($list);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
@@ -24,7 +24,7 @@ class HTMLPurifier_Strategy
|
|||||||
* @param $config Configuration options
|
* @param $config Configuration options
|
||||||
* @returns Processed array of token objects.
|
* @returns Processed array of token objects.
|
||||||
*/
|
*/
|
||||||
function execute($tokens, $config = null) {
|
function execute($tokens, $config, &$context) {
|
||||||
trigger_error('Cannot call abstract function', E_USER_ERROR);
|
trigger_error('Cannot call abstract function', E_USER_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -18,9 +18,9 @@ class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy
|
|||||||
trigger_error('Attempt to instantiate abstract object', E_USER_ERROR);
|
trigger_error('Attempt to instantiate abstract object', E_USER_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
function execute($tokens, $config) {
|
function execute($tokens, $config, &$context) {
|
||||||
foreach ($this->strategies as $strategy) {
|
foreach ($this->strategies as $strategy) {
|
||||||
$tokens = $strategy->execute($tokens, $config);
|
$tokens = $strategy->execute($tokens, $config, $context);
|
||||||
}
|
}
|
||||||
return $tokens;
|
return $tokens;
|
||||||
}
|
}
|
||||||
|
@@ -34,8 +34,7 @@ require_once 'HTMLPurifier/HTMLDefinition.php';
|
|||||||
class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
||||||
{
|
{
|
||||||
|
|
||||||
function execute($tokens, $config) {
|
function execute($tokens, $config, &$context) {
|
||||||
|
|
||||||
//####################################################################//
|
//####################################################################//
|
||||||
// Pre-processing
|
// Pre-processing
|
||||||
|
|
||||||
@@ -49,6 +48,10 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
|||||||
array_unshift($tokens, new HTMLPurifier_Token_Start($parent_name));
|
array_unshift($tokens, new HTMLPurifier_Token_Start($parent_name));
|
||||||
$tokens[] = new HTMLPurifier_Token_End($parent_name);
|
$tokens[] = new HTMLPurifier_Token_End($parent_name);
|
||||||
|
|
||||||
|
// setup the context variables
|
||||||
|
$parent_type = 'unknown'; // reference var that we alter
|
||||||
|
$context->register('ParentType', $parent_type);
|
||||||
|
|
||||||
//####################################################################//
|
//####################################################################//
|
||||||
// Loop initialization
|
// Loop initialization
|
||||||
|
|
||||||
@@ -101,7 +104,11 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
|||||||
if ($count = count($stack)) {
|
if ($count = count($stack)) {
|
||||||
$parent_index = $stack[$count-1];
|
$parent_index = $stack[$count-1];
|
||||||
$parent_name = $tokens[$parent_index]->name;
|
$parent_name = $tokens[$parent_index]->name;
|
||||||
$parent_def = $definition->info[$parent_name];
|
if ($parent_index == 0) {
|
||||||
|
$parent_def = $definition->info_parent_def;
|
||||||
|
} else {
|
||||||
|
$parent_def = $definition->info[$parent_name];
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// unknown info, it won't be used anyway
|
// unknown info, it won't be used anyway
|
||||||
$parent_index = $parent_name = $parent_def = null;
|
$parent_index = $parent_name = $parent_def = null;
|
||||||
@@ -109,10 +116,10 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
|||||||
|
|
||||||
// calculate context
|
// calculate context
|
||||||
if (isset($parent_def)) {
|
if (isset($parent_def)) {
|
||||||
$context = $parent_def->type;
|
$parent_type = $parent_def->type;
|
||||||
} else {
|
} else {
|
||||||
// generally found in specialized elements like UL
|
// generally found in specialized elements like UL
|
||||||
$context = 'unknown';
|
$parent_type = 'unknown';
|
||||||
}
|
}
|
||||||
|
|
||||||
//################################################################//
|
//################################################################//
|
||||||
@@ -138,14 +145,25 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
|||||||
if ($excluded) {
|
if ($excluded) {
|
||||||
// there is an exclusion, remove the entire node
|
// there is an exclusion, remove the entire node
|
||||||
$result = false;
|
$result = false;
|
||||||
|
$excludes = array(); // not used, but good to initialize anyway
|
||||||
} else {
|
} else {
|
||||||
// DEFINITION CALL
|
// DEFINITION CALL
|
||||||
$def = $definition->info[$tokens[$i]->name];
|
if ($i === 0) {
|
||||||
$child_def = $def->child;
|
// special processing for the first node
|
||||||
|
$def = $definition->info_parent_def;
|
||||||
|
} else {
|
||||||
|
$def = $definition->info[$tokens[$i]->name];
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
// have DTD child def validate children
|
if (!empty($def->child)) {
|
||||||
$result = $child_def->validateChildren(
|
// have DTD child def validate children
|
||||||
$child_tokens, $config,$context);
|
$result = $def->child->validateChildren(
|
||||||
|
$child_tokens, $config, $context);
|
||||||
|
} else {
|
||||||
|
// weird, no child definition, get rid of everything
|
||||||
|
$result = false;
|
||||||
|
}
|
||||||
|
|
||||||
// determine whether or not this element has any exclusions
|
// determine whether or not this element has any exclusions
|
||||||
$excludes = $def->excludes;
|
$excludes = $def->excludes;
|
||||||
@@ -225,13 +243,20 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
|||||||
|
|
||||||
// Test if the token indeed is a start tag, if not, move forward
|
// Test if the token indeed is a start tag, if not, move forward
|
||||||
// and test again.
|
// and test again.
|
||||||
|
$size = count($tokens);
|
||||||
while ($i < $size and $tokens[$i]->type != 'start') {
|
while ($i < $size and $tokens[$i]->type != 'start') {
|
||||||
if ($tokens[$i]->type == 'end') {
|
if ($tokens[$i]->type == 'end') {
|
||||||
// pop a token index off the stack if we ended a node
|
// pop a token index off the stack if we ended a node
|
||||||
array_pop($stack);
|
array_pop($stack);
|
||||||
// pop an exclusion lookup off exclusion stack if
|
// pop an exclusion lookup off exclusion stack if
|
||||||
// we ended node and that node had exclusions
|
// we ended node and that node had exclusions
|
||||||
if ($definition->info[$tokens[$i]->name]->excludes) {
|
if ($i == 0 || $i == $size - 1) {
|
||||||
|
// use specialized var if it's the super-parent
|
||||||
|
$s_excludes = $definition->info_parent_def->excludes;
|
||||||
|
} else {
|
||||||
|
$s_excludes = $definition->info[$tokens[$i]->name]->excludes;
|
||||||
|
}
|
||||||
|
if ($s_excludes) {
|
||||||
array_pop($exclude_stack);
|
array_pop($exclude_stack);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -247,6 +272,9 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
|||||||
array_shift($tokens);
|
array_shift($tokens);
|
||||||
array_pop($tokens);
|
array_pop($tokens);
|
||||||
|
|
||||||
|
// remove context variables
|
||||||
|
$context->destroy('ParentType');
|
||||||
|
|
||||||
//####################################################################//
|
//####################################################################//
|
||||||
// Return
|
// Return
|
||||||
|
|
||||||
|
@@ -10,7 +10,7 @@ require_once 'HTMLPurifier/Generator.php';
|
|||||||
class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
||||||
{
|
{
|
||||||
|
|
||||||
function execute($tokens, $config) {
|
function execute($tokens, $config, &$context) {
|
||||||
$definition = $config->getHTMLDefinition();
|
$definition = $config->getHTMLDefinition();
|
||||||
$generator = new HTMLPurifier_Generator();
|
$generator = new HTMLPurifier_Generator();
|
||||||
$result = array();
|
$result = array();
|
||||||
@@ -30,7 +30,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
$token->type == 'start' ) {
|
$token->type == 'start' ) {
|
||||||
|
|
||||||
$result[] = new HTMLPurifier_Token_Empty($token->name,
|
$result[] = new HTMLPurifier_Token_Empty($token->name,
|
||||||
$token->attributes);
|
$token->attr);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -39,7 +39,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
$token->type == 'empty' ) {
|
$token->type == 'empty' ) {
|
||||||
|
|
||||||
$result[] = new HTMLPurifier_Token_Start($token->name,
|
$result[] = new HTMLPurifier_Token_Start($token->name,
|
||||||
$token->attributes);
|
$token->attr);
|
||||||
$result[] = new HTMLPurifier_Token_End($token->name);
|
$result[] = new HTMLPurifier_Token_End($token->name);
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
@@ -86,7 +86,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
if (empty($current_nesting)) {
|
if (empty($current_nesting)) {
|
||||||
if ($escape_invalid_tags) {
|
if ($escape_invalid_tags) {
|
||||||
$result[] = new HTMLPurifier_Token_Text(
|
$result[] = new HTMLPurifier_Token_Text(
|
||||||
$generator->generateFromToken($token, $config)
|
$generator->generateFromToken($token, $config, $context)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
@@ -123,7 +123,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
|
|||||||
if ($skipped_tags === false) {
|
if ($skipped_tags === false) {
|
||||||
if ($escape_invalid_tags) {
|
if ($escape_invalid_tags) {
|
||||||
$result[] = new HTMLPurifier_Token_Text(
|
$result[] = new HTMLPurifier_Token_Text(
|
||||||
$generator->generateFromToken($token, $config)
|
$generator->generateFromToken($token, $config, $context)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
|
@@ -5,6 +5,14 @@ require_once 'HTMLPurifier/HTMLDefinition.php';
|
|||||||
require_once 'HTMLPurifier/Generator.php';
|
require_once 'HTMLPurifier/Generator.php';
|
||||||
require_once 'HTMLPurifier/TagTransform.php';
|
require_once 'HTMLPurifier/TagTransform.php';
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'Core', 'RemoveInvalidImg', true, 'bool',
|
||||||
|
'This directive enables pre-emptive URI checking in <code>img</code> '.
|
||||||
|
'tags, as the attribute validation strategy is not authorized to '.
|
||||||
|
'remove elements from the document. This directive has been available '.
|
||||||
|
'since 1.3.0, revert to pre-1.3.0 behavior by setting to false.'
|
||||||
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Removes all unrecognized tags from the list of tokens.
|
* Removes all unrecognized tags from the list of tokens.
|
||||||
*
|
*
|
||||||
@@ -16,7 +24,7 @@ require_once 'HTMLPurifier/TagTransform.php';
|
|||||||
class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
||||||
{
|
{
|
||||||
|
|
||||||
function execute($tokens, $config) {
|
function execute($tokens, $config, &$context) {
|
||||||
$definition = $config->getHTMLDefinition();
|
$definition = $config->getHTMLDefinition();
|
||||||
$generator = new HTMLPurifier_Generator();
|
$generator = new HTMLPurifier_Generator();
|
||||||
$result = array();
|
$result = array();
|
||||||
@@ -25,7 +33,26 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
|||||||
if (!empty( $token->is_tag )) {
|
if (!empty( $token->is_tag )) {
|
||||||
// DEFINITION CALL
|
// DEFINITION CALL
|
||||||
if (isset($definition->info[$token->name])) {
|
if (isset($definition->info[$token->name])) {
|
||||||
// leave untouched
|
// leave untouched, except for a few special cases:
|
||||||
|
|
||||||
|
// hard-coded image special case, pre-emptively drop
|
||||||
|
// if not available. Probably not abstract-able
|
||||||
|
if ( $token->name == 'img' ) {
|
||||||
|
if (!isset($token->attr['src'])) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!isset($definition->info['img']->attr['src'])) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$token->attr['src'] =
|
||||||
|
$definition->
|
||||||
|
info['img']->
|
||||||
|
attr['src']->
|
||||||
|
validate($token->attr['src'],
|
||||||
|
$config, $context);
|
||||||
|
if ($token->attr['src'] === false) continue;
|
||||||
|
}
|
||||||
|
|
||||||
} elseif (
|
} elseif (
|
||||||
isset($definition->info_tag_transform[$token->name])
|
isset($definition->info_tag_transform[$token->name])
|
||||||
) {
|
) {
|
||||||
@@ -33,11 +60,11 @@ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
|
|||||||
// DEFINITION CALL
|
// DEFINITION CALL
|
||||||
$token = $definition->
|
$token = $definition->
|
||||||
info_tag_transform[$token->name]->
|
info_tag_transform[$token->name]->
|
||||||
transform($token);
|
transform($token, $config, $context);
|
||||||
} elseif ($escape_invalid_tags) {
|
} elseif ($escape_invalid_tags) {
|
||||||
// invalid tag, generate HTML and insert in
|
// invalid tag, generate HTML and insert in
|
||||||
$token = new HTMLPurifier_Token_Text(
|
$token = new HTMLPurifier_Token_Text(
|
||||||
$generator->generateFromToken($token, $config)
|
$generator->generateFromToken($token, $config, $context)
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
continue;
|
continue;
|
||||||
|
@@ -3,8 +3,6 @@
|
|||||||
require_once 'HTMLPurifier/Strategy.php';
|
require_once 'HTMLPurifier/Strategy.php';
|
||||||
require_once 'HTMLPurifier/HTMLDefinition.php';
|
require_once 'HTMLPurifier/HTMLDefinition.php';
|
||||||
require_once 'HTMLPurifier/IDAccumulator.php';
|
require_once 'HTMLPurifier/IDAccumulator.php';
|
||||||
require_once 'HTMLPurifier/ConfigSchema.php';
|
|
||||||
require_once 'HTMLPurifier/AttrContext.php';
|
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Attr', 'IDBlacklist', array(), 'list',
|
'Attr', 'IDBlacklist', array(), 'list',
|
||||||
@@ -17,18 +15,14 @@ HTMLPurifier_ConfigSchema::define(
|
|||||||
class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
|
class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
|
||||||
{
|
{
|
||||||
|
|
||||||
function execute($tokens, $config) {
|
function execute($tokens, $config, &$context) {
|
||||||
|
|
||||||
$definition = $config->getHTMLDefinition();
|
$definition = $config->getHTMLDefinition();
|
||||||
|
|
||||||
// setup StrategyContext
|
// setup id_accumulator context
|
||||||
$context = new HTMLPurifier_AttrContext();
|
$id_accumulator = new HTMLPurifier_IDAccumulator();
|
||||||
|
$id_accumulator->load($config->get('Attr', 'IDBlacklist'));
|
||||||
// setup ID accumulator and load it with blacklisted IDs
|
$context->register('IDAccumulator', $id_accumulator);
|
||||||
// eventually, we'll have a dedicated context object to hold
|
|
||||||
// all these accumulators and caches. For now, just an IDAccumulator
|
|
||||||
$context->id_accumulator = new HTMLPurifier_IDAccumulator();
|
|
||||||
$context->id_accumulator->load($config->get('Attr', 'IDBlacklist'));
|
|
||||||
|
|
||||||
// create alias to global definition array, see also $defs
|
// create alias to global definition array, see also $defs
|
||||||
// DEFINITION CALL
|
// DEFINITION CALL
|
||||||
@@ -41,22 +35,20 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
|
|||||||
if ($token->type !== 'start' && $token->type !== 'empty') continue;
|
if ($token->type !== 'start' && $token->type !== 'empty') continue;
|
||||||
|
|
||||||
// copy out attributes for easy manipulation
|
// copy out attributes for easy manipulation
|
||||||
$attr = $token->attributes;
|
$attr = $token->attr;
|
||||||
|
|
||||||
// do global transformations (pre)
|
// do global transformations (pre)
|
||||||
// ex. <ELEMENT lang="fr"> to <ELEMENT lang="fr" xml:lang="fr">
|
// nothing currently utilizes this
|
||||||
// DEFINITION CALL
|
|
||||||
foreach ($definition->info_attr_transform_pre as $transform) {
|
foreach ($definition->info_attr_transform_pre as $transform) {
|
||||||
$attr = $transform->transform($attr, $config);
|
$attr = $transform->transform($attr, $config, $context);
|
||||||
}
|
}
|
||||||
|
|
||||||
// do local transformations only applicable to this element (pre)
|
// do local transformations only applicable to this element (pre)
|
||||||
// ex. <p align="right"> to <p style="text-align:right;">
|
// ex. <p align="right"> to <p style="text-align:right;">
|
||||||
// DEFINITION CALL
|
|
||||||
foreach ($definition->info[$token->name]->attr_transform_pre
|
foreach ($definition->info[$token->name]->attr_transform_pre
|
||||||
as $transform
|
as $transform
|
||||||
) {
|
) {
|
||||||
$attr = $transform->transform($attr, $config);
|
$attr = $transform->transform($attr, $config, $context);
|
||||||
}
|
}
|
||||||
|
|
||||||
// create alias to this element's attribute definition array, see
|
// create alias to this element's attribute definition array, see
|
||||||
@@ -112,17 +104,23 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
|
|||||||
}
|
}
|
||||||
|
|
||||||
// post transforms
|
// post transforms
|
||||||
|
|
||||||
|
// ex. <x lang="fr"> to <x lang="fr" xml:lang="fr">
|
||||||
foreach ($definition->info_attr_transform_post as $transform) {
|
foreach ($definition->info_attr_transform_post as $transform) {
|
||||||
$attr = $transform->transform($attr, $config);
|
$attr = $transform->transform($attr, $config, $context);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ex. <bdo> to <bdo dir="ltr">
|
||||||
foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
|
foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
|
||||||
$attr = $transform->transform($attr, $config);
|
$attr = $transform->transform($attr, $config, $context);
|
||||||
}
|
}
|
||||||
|
|
||||||
// commit changes
|
// commit changes
|
||||||
// could interfere with flyweight implementation
|
// could interfere with flyweight implementation
|
||||||
$tokens[$key]->attributes = $attr;
|
$tokens[$key]->attr = $attr;
|
||||||
}
|
}
|
||||||
|
$context->destroy('IDAccumulator');
|
||||||
|
|
||||||
return $tokens;
|
return $tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -17,8 +17,10 @@ class HTMLPurifier_TagTransform
|
|||||||
/**
|
/**
|
||||||
* Transforms the obsolete tag into the valid tag.
|
* Transforms the obsolete tag into the valid tag.
|
||||||
* @param $tag Tag to be transformed.
|
* @param $tag Tag to be transformed.
|
||||||
|
* @param $config Mandatory HTMLPurifier_Config object
|
||||||
|
* @param $context Mandatory HTMLPurifier_Context object
|
||||||
*/
|
*/
|
||||||
function transform($tag) {
|
function transform($tag, $config, &$context) {
|
||||||
trigger_error('Call to abstract function', E_USER_ERROR);
|
trigger_error('Call to abstract function', E_USER_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -37,7 +39,7 @@ class HTMLPurifier_TagTransform_Simple extends HTMLPurifier_TagTransform
|
|||||||
$this->transform_to = $transform_to;
|
$this->transform_to = $transform_to;
|
||||||
}
|
}
|
||||||
|
|
||||||
function transform($tag) {
|
function transform($tag, $config, &$context) {
|
||||||
$new_tag = $tag->copy();
|
$new_tag = $tag->copy();
|
||||||
$new_tag->name = $this->transform_to;
|
$new_tag->name = $this->transform_to;
|
||||||
return $new_tag;
|
return $new_tag;
|
||||||
@@ -55,21 +57,21 @@ class HTMLPurifier_TagTransform_Center extends HTMLPurifier_TagTransform
|
|||||||
{
|
{
|
||||||
var $transform_to = 'div';
|
var $transform_to = 'div';
|
||||||
|
|
||||||
function transform($tag) {
|
function transform($tag, $config, &$context) {
|
||||||
if ($tag->type == 'end') {
|
if ($tag->type == 'end') {
|
||||||
$new_tag = new HTMLPurifier_Token_End($this->transform_to);
|
$new_tag = new HTMLPurifier_Token_End($this->transform_to);
|
||||||
return $new_tag;
|
return $new_tag;
|
||||||
}
|
}
|
||||||
$attributes = $tag->attributes;
|
$attr = $tag->attr;
|
||||||
$prepend_css = 'text-align:center;';
|
$prepend_css = 'text-align:center;';
|
||||||
if (isset($attributes['style'])) {
|
if (isset($attr['style'])) {
|
||||||
$attributes['style'] = $prepend_css . $attributes['style'];
|
$attr['style'] = $prepend_css . $attr['style'];
|
||||||
} else {
|
} else {
|
||||||
$attributes['style'] = $prepend_css;
|
$attr['style'] = $prepend_css;
|
||||||
}
|
}
|
||||||
$new_tag = $tag->copy();
|
$new_tag = $tag->copy();
|
||||||
$new_tag->name = $this->transform_to;
|
$new_tag->name = $this->transform_to;
|
||||||
$new_tag->attributes = $attributes;
|
$new_tag->attr = $attr;
|
||||||
return $new_tag;
|
return $new_tag;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -106,46 +108,46 @@ class HTMLPurifier_TagTransform_Font extends HTMLPurifier_TagTransform
|
|||||||
'+4' => '300%'
|
'+4' => '300%'
|
||||||
);
|
);
|
||||||
|
|
||||||
function transform($tag) {
|
function transform($tag, $config, &$context) {
|
||||||
|
|
||||||
if ($tag->type == 'end') {
|
if ($tag->type == 'end') {
|
||||||
$new_tag = new HTMLPurifier_Token_End($this->transform_to);
|
$new_tag = new HTMLPurifier_Token_End($this->transform_to);
|
||||||
return $new_tag;
|
return $new_tag;
|
||||||
}
|
}
|
||||||
|
|
||||||
$attributes = $tag->attributes;
|
$attr = $tag->attr;
|
||||||
$prepend_style = '';
|
$prepend_style = '';
|
||||||
|
|
||||||
// handle color transform
|
// handle color transform
|
||||||
if (isset($attributes['color'])) {
|
if (isset($attr['color'])) {
|
||||||
$prepend_style .= 'color:' . $attributes['color'] . ';';
|
$prepend_style .= 'color:' . $attr['color'] . ';';
|
||||||
unset($attributes['color']);
|
unset($attr['color']);
|
||||||
}
|
}
|
||||||
|
|
||||||
// handle face transform
|
// handle face transform
|
||||||
if (isset($attributes['face'])) {
|
if (isset($attr['face'])) {
|
||||||
$prepend_style .= 'font-family:' . $attributes['face'] . ';';
|
$prepend_style .= 'font-family:' . $attr['face'] . ';';
|
||||||
unset($attributes['face']);
|
unset($attr['face']);
|
||||||
}
|
}
|
||||||
|
|
||||||
// handle size transform
|
// handle size transform
|
||||||
if (isset($attributes['size'])) {
|
if (isset($attr['size'])) {
|
||||||
if (isset($this->_size_lookup[$attributes['size']])) {
|
if (isset($this->_size_lookup[$attr['size']])) {
|
||||||
$prepend_style .= 'font-size:' .
|
$prepend_style .= 'font-size:' .
|
||||||
$this->_size_lookup[$attributes['size']] . ';';
|
$this->_size_lookup[$attr['size']] . ';';
|
||||||
}
|
}
|
||||||
unset($attributes['size']);
|
unset($attr['size']);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($prepend_style) {
|
if ($prepend_style) {
|
||||||
$attributes['style'] = isset($attributes['style']) ?
|
$attr['style'] = isset($attr['style']) ?
|
||||||
$prepend_style . $attributes['style'] :
|
$prepend_style . $attr['style'] :
|
||||||
$prepend_style;
|
$prepend_style;
|
||||||
}
|
}
|
||||||
|
|
||||||
$new_tag = $tag->copy();
|
$new_tag = $tag->copy();
|
||||||
$new_tag->name = $this->transform_to;
|
$new_tag->name = $this->transform_to;
|
||||||
$new_tag->attributes = $attributes;
|
$new_tag->attr = $attr;
|
||||||
|
|
||||||
return $new_tag;
|
return $new_tag;
|
||||||
|
|
||||||
|
@@ -50,30 +50,29 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
|
|||||||
/**
|
/**
|
||||||
* Associative array of the tag's attributes.
|
* Associative array of the tag's attributes.
|
||||||
*/
|
*/
|
||||||
var $attributes = array();
|
var $attr = array();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Non-overloaded constructor, which lower-cases passed tag name.
|
* Non-overloaded constructor, which lower-cases passed tag name.
|
||||||
*
|
*
|
||||||
* @param $name String name.
|
* @param $name String name.
|
||||||
* @param $attributes Associative array of attributes.
|
* @param $attr Associative array of attributes.
|
||||||
*/
|
*/
|
||||||
function HTMLPurifier_Token_Tag($name, $attributes = array()) {
|
function HTMLPurifier_Token_Tag($name, $attr = array()) {
|
||||||
//if ($attributes === null) var_dump(debug_backtrace());
|
|
||||||
$this->name = ctype_lower($name) ? $name : strtolower($name);
|
$this->name = ctype_lower($name) ? $name : strtolower($name);
|
||||||
foreach ($attributes as $key => $value) {
|
foreach ($attr as $key => $value) {
|
||||||
// normalization only necessary when key is not lowercase
|
// normalization only necessary when key is not lowercase
|
||||||
if (!ctype_lower($key)) {
|
if (!ctype_lower($key)) {
|
||||||
$new_key = strtolower($key);
|
$new_key = strtolower($key);
|
||||||
if (!isset($attributes[$new_key])) {
|
if (!isset($attr[$new_key])) {
|
||||||
$attributes[$new_key] = $attributes[$key];
|
$attr[$new_key] = $attr[$key];
|
||||||
}
|
}
|
||||||
if ($new_key !== $key) {
|
if ($new_key !== $key) {
|
||||||
unset($attributes[$key]);
|
unset($attr[$key]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
$this->attributes = $attributes;
|
$this->attr = $attr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -84,7 +83,7 @@ class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
|
|||||||
{
|
{
|
||||||
var $type = 'start';
|
var $type = 'start';
|
||||||
function copy() {
|
function copy() {
|
||||||
return new HTMLPurifier_Token_Start($this->name, $this->attributes);
|
return new HTMLPurifier_Token_Start($this->name, $this->attr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -95,7 +94,7 @@ class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
|
|||||||
{
|
{
|
||||||
var $type = 'empty';
|
var $type = 'empty';
|
||||||
function copy() {
|
function copy() {
|
||||||
return new HTMLPurifier_Token_Empty($this->name, $this->attributes);
|
return new HTMLPurifier_Token_Empty($this->name, $this->attr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -37,12 +37,12 @@ class HTMLPurifier_TokenFactory
|
|||||||
/**
|
/**
|
||||||
* Creates a HTMLPurifier_Token_Start.
|
* Creates a HTMLPurifier_Token_Start.
|
||||||
* @param $name Tag name
|
* @param $name Tag name
|
||||||
* @param $attribute Associative array of attributes
|
* @param $attr Associative array of attributes
|
||||||
* @return Generated HTMLPurifier_Token_Start
|
* @return Generated HTMLPurifier_Token_Start
|
||||||
*/
|
*/
|
||||||
public function createStart($name, $attributes = array()) {
|
public function createStart($name, $attr = array()) {
|
||||||
$p = clone $this->p_start;
|
$p = clone $this->p_start;
|
||||||
$p->HTMLPurifier_Token_Tag($name, $attributes);
|
$p->HTMLPurifier_Token_Tag($name, $attr);
|
||||||
return $p;
|
return $p;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -60,12 +60,12 @@ class HTMLPurifier_TokenFactory
|
|||||||
/**
|
/**
|
||||||
* Creates a HTMLPurifier_Token_Empty.
|
* Creates a HTMLPurifier_Token_Empty.
|
||||||
* @param $name Tag name
|
* @param $name Tag name
|
||||||
* @param $attribute Associative array of attributes
|
* @param $attr Associative array of attributes
|
||||||
* @return Generated HTMLPurifier_Token_Empty
|
* @return Generated HTMLPurifier_Token_Empty
|
||||||
*/
|
*/
|
||||||
public function createEmpty($name, $attributes = array()) {
|
public function createEmpty($name, $attr = array()) {
|
||||||
$p = clone $this->p_empty;
|
$p = clone $this->p_empty;
|
||||||
$p->HTMLPurifier_Token_Tag($name, $attributes);
|
$p->HTMLPurifier_Token_Tag($name, $attr);
|
||||||
return $p;
|
return $p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -12,6 +12,13 @@ class HTMLPurifier_URIScheme
|
|||||||
*/
|
*/
|
||||||
var $default_port = null;
|
var $default_port = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether or not URIs of this schem are locatable by a browser
|
||||||
|
* http and ftp are accessible, while mailto and news are not.
|
||||||
|
* @public
|
||||||
|
*/
|
||||||
|
var $browsable = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates the components of a URI
|
* Validates the components of a URI
|
||||||
* @note This implementation should be called by children if they define
|
* @note This implementation should be called by children if they define
|
||||||
@@ -23,9 +30,10 @@ class HTMLPurifier_URIScheme
|
|||||||
* @param $path Path of URI
|
* @param $path Path of URI
|
||||||
* @param $query Query of URI, found after question mark
|
* @param $query Query of URI, found after question mark
|
||||||
* @param $config HTMLPurifier_Config object
|
* @param $config HTMLPurifier_Config object
|
||||||
|
* @param $context HTMLPurifier_Context object
|
||||||
*/
|
*/
|
||||||
function validateComponents(
|
function validateComponents(
|
||||||
$userinfo, $host, $port, $path, $query, $config
|
$userinfo, $host, $port, $path, $query, $config, &$context
|
||||||
) {
|
) {
|
||||||
if ($this->default_port == $port) $port = null;
|
if ($this->default_port == $port) $port = null;
|
||||||
return array($userinfo, $host, $port, $path, $query);
|
return array($userinfo, $host, $port, $path, $query);
|
||||||
|
@@ -4,19 +4,39 @@ require_once 'HTMLPurifier/URIScheme.php';
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates ftp (File Transfer Protocol) URIs as defined by generic RFC 1738.
|
* Validates ftp (File Transfer Protocol) URIs as defined by generic RFC 1738.
|
||||||
* @todo Typecode check on path
|
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme {
|
class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme {
|
||||||
|
|
||||||
var $default_port = 21;
|
var $default_port = 21;
|
||||||
|
var $browsable = true; // usually
|
||||||
|
|
||||||
function validateComponents(
|
function validateComponents(
|
||||||
$userinfo, $host, $port, $path, $query, $config
|
$userinfo, $host, $port, $path, $query, $config, &$context
|
||||||
) {
|
) {
|
||||||
list($userinfo, $host, $port, $path, $query) =
|
list($userinfo, $host, $port, $path, $query) =
|
||||||
parent::validateComponents(
|
parent::validateComponents(
|
||||||
$userinfo, $host, $port, $path, $query, $config );
|
$userinfo, $host, $port, $path, $query, $config, $context );
|
||||||
// typecode check needed on path
|
$semicolon_pos = strrpos($path, ';'); // reverse
|
||||||
|
if ($semicolon_pos !== false) {
|
||||||
|
// typecode check
|
||||||
|
$type = substr($path, $semicolon_pos + 1); // no semicolon
|
||||||
|
$path = substr($path, 0, $semicolon_pos);
|
||||||
|
$type_ret = '';
|
||||||
|
if (strpos($type, '=') !== false) {
|
||||||
|
// figure out whether or not the declaration is correct
|
||||||
|
list($key, $typecode) = explode('=', $type, 2);
|
||||||
|
if ($key !== 'type') {
|
||||||
|
// invalid key, tack it back on encoded
|
||||||
|
$path .= '%3B' . $type;
|
||||||
|
} elseif ($typecode === 'a' || $typecode === 'i' || $typecode === 'd') {
|
||||||
|
$type_ret = ";type=$typecode";
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$path .= '%3B' . $type;
|
||||||
|
}
|
||||||
|
$path = str_replace(';', '%3B', $path);
|
||||||
|
$path .= $type_ret;
|
||||||
|
}
|
||||||
return array($userinfo, $host, $port, $path, null);
|
return array($userinfo, $host, $port, $path, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -8,13 +8,14 @@ require_once 'HTMLPurifier/URIScheme.php';
|
|||||||
class HTMLPurifier_URIScheme_http extends HTMLPurifier_URIScheme {
|
class HTMLPurifier_URIScheme_http extends HTMLPurifier_URIScheme {
|
||||||
|
|
||||||
var $default_port = 80;
|
var $default_port = 80;
|
||||||
|
var $browsable = true;
|
||||||
|
|
||||||
function validateComponents(
|
function validateComponents(
|
||||||
$userinfo, $host, $port, $path, $query, $config
|
$userinfo, $host, $port, $path, $query, $config, &$context
|
||||||
) {
|
) {
|
||||||
list($userinfo, $host, $port, $path, $query) =
|
list($userinfo, $host, $port, $path, $query) =
|
||||||
parent::validateComponents(
|
parent::validateComponents(
|
||||||
$userinfo, $host, $port, $path, $query, $config );
|
$userinfo, $host, $port, $path, $query, $config, $context );
|
||||||
return array(null, $host, $port, $path, $query);
|
return array(null, $host, $port, $path, $query);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -13,12 +13,14 @@ require_once 'HTMLPurifier/URIScheme.php';
|
|||||||
|
|
||||||
class HTMLPurifier_URIScheme_mailto extends HTMLPurifier_URIScheme {
|
class HTMLPurifier_URIScheme_mailto extends HTMLPurifier_URIScheme {
|
||||||
|
|
||||||
|
var $browsable = false;
|
||||||
|
|
||||||
function validateComponents(
|
function validateComponents(
|
||||||
$userinfo, $host, $port, $path, $query, $config
|
$userinfo, $host, $port, $path, $query, $config, &$context
|
||||||
) {
|
) {
|
||||||
list($userinfo, $host, $port, $path, $query) =
|
list($userinfo, $host, $port, $path, $query) =
|
||||||
parent::validateComponents(
|
parent::validateComponents(
|
||||||
$userinfo, $host, $port, $path, $query, $config );
|
$userinfo, $host, $port, $path, $query, $config, $context );
|
||||||
// we need to validate path against RFC 2368's addr-spec
|
// we need to validate path against RFC 2368's addr-spec
|
||||||
return array(null, null, null, $path, $query);
|
return array(null, null, null, $path, $query);
|
||||||
}
|
}
|
||||||
|
@@ -7,12 +7,14 @@ require_once 'HTMLPurifier/URIScheme.php';
|
|||||||
*/
|
*/
|
||||||
class HTMLPurifier_URIScheme_news extends HTMLPurifier_URIScheme {
|
class HTMLPurifier_URIScheme_news extends HTMLPurifier_URIScheme {
|
||||||
|
|
||||||
|
var $browsable = false;
|
||||||
|
|
||||||
function validateComponents(
|
function validateComponents(
|
||||||
$userinfo, $host, $port, $path, $query, $config
|
$userinfo, $host, $port, $path, $query, $config, &$context
|
||||||
) {
|
) {
|
||||||
list($userinfo, $host, $port, $path, $query) =
|
list($userinfo, $host, $port, $path, $query) =
|
||||||
parent::validateComponents(
|
parent::validateComponents(
|
||||||
$userinfo, $host, $port, $path, $query, $config );
|
$userinfo, $host, $port, $path, $query, $config, $context );
|
||||||
// typecode check needed on path
|
// typecode check needed on path
|
||||||
return array(null, null, null, $path, null);
|
return array(null, null, null, $path, null);
|
||||||
}
|
}
|
||||||
|
@@ -8,13 +8,14 @@ require_once 'HTMLPurifier/URIScheme.php';
|
|||||||
class HTMLPurifier_URIScheme_nntp extends HTMLPurifier_URIScheme {
|
class HTMLPurifier_URIScheme_nntp extends HTMLPurifier_URIScheme {
|
||||||
|
|
||||||
var $default_port = 119;
|
var $default_port = 119;
|
||||||
|
var $browsable = false;
|
||||||
|
|
||||||
function validateComponents(
|
function validateComponents(
|
||||||
$userinfo, $host, $port, $path, $query, $config
|
$userinfo, $host, $port, $path, $query, $config, &$context
|
||||||
) {
|
) {
|
||||||
list($userinfo, $host, $port, $path, $query) =
|
list($userinfo, $host, $port, $path, $query) =
|
||||||
parent::validateComponents(
|
parent::validateComponents(
|
||||||
$userinfo, $host, $port, $path, $query, $config );
|
$userinfo, $host, $port, $path, $query, $config, $context );
|
||||||
return array(null, $host, $port, $path, null);
|
return array(null, $host, $port, $path, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -63,8 +63,9 @@ class HTMLPurifier_URISchemeRegistry
|
|||||||
* Retrieves a scheme validator object
|
* Retrieves a scheme validator object
|
||||||
* @param $scheme String scheme name like http or mailto
|
* @param $scheme String scheme name like http or mailto
|
||||||
* @param $config HTMLPurifier_Config object
|
* @param $config HTMLPurifier_Config object
|
||||||
|
* @param $config HTMLPurifier_Context object
|
||||||
*/
|
*/
|
||||||
function &getScheme($scheme, $config = null) {
|
function &getScheme($scheme, $config, &$context) {
|
||||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||||
$null = null; // for the sake of passing by reference
|
$null = null; // for the sake of passing by reference
|
||||||
|
|
||||||
|
@@ -1 +1 @@
|
|||||||
Deny from all
|
Deny from all
|
||||||
|
@@ -13,7 +13,7 @@ chdir( dirname(__FILE__) );
|
|||||||
$entity_dir = '../docs/entities/';
|
$entity_dir = '../docs/entities/';
|
||||||
|
|
||||||
// defines the output file for the serialized content.
|
// defines the output file for the serialized content.
|
||||||
$output_file = '../library/HTMLPurifier/EntityLookup/data.txt';
|
$output_file = '../library/HTMLPurifier/EntityLookup/entities.ser';
|
||||||
|
|
||||||
// courtesy of a PHP manual comment
|
// courtesy of a PHP manual comment
|
||||||
function unichr($dec) {
|
function unichr($dec) {
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user