mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-04 13:18:00 +02:00
Compare commits
47 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
48ce521572 | ||
|
728e6c5b44 | ||
|
8104145580 | ||
|
6ef8abd04f | ||
|
bc5871f389 | ||
|
30d75c999d | ||
|
64d8ca9831 | ||
|
d7642b8c70 | ||
|
3b30c2ca5b | ||
|
f43616f72d | ||
|
6740ba61af | ||
|
6a33945499 | ||
|
4660791682 | ||
|
b5c69d8ca5 | ||
|
e440f25bce | ||
|
665e80d223 | ||
|
69747ede8a | ||
|
49b3832ebf | ||
|
a365d4c688 | ||
|
7038fad788 | ||
|
50b272d75e | ||
|
bfb642d32c | ||
|
edb39601c7 | ||
|
694139d3bb | ||
|
81721ded5c | ||
|
371fb7c3d2 | ||
|
9e6953e619 | ||
|
2299f0c831 | ||
|
9dd4dcb27a | ||
|
aa0838492e | ||
|
df075c96e0 | ||
|
fbaa909d25 | ||
|
967f40fc11 | ||
|
5ee6ffe20f | ||
|
10d41d7130 | ||
|
65a628bcb7 | ||
|
a5b4ed2126 | ||
|
d20bbd8db3 | ||
|
b99573223d | ||
|
c6cfb68713 | ||
|
2259bfa40e | ||
|
de3b2b70fb | ||
|
4f0a5c0e22 | ||
|
fdd583253c | ||
|
a4be6ffe4d | ||
|
6de42d8d1d | ||
|
e9a519e589 |
11
Doxyfile
11
Doxyfile
@@ -4,7 +4,7 @@
|
|||||||
# Project related configuration options
|
# Project related configuration options
|
||||||
#---------------------------------------------------------------------------
|
#---------------------------------------------------------------------------
|
||||||
PROJECT_NAME = HTML Purifier
|
PROJECT_NAME = HTML Purifier
|
||||||
PROJECT_NUMBER = 1.0.0
|
PROJECT_NUMBER = 1.1.2
|
||||||
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
|
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
|
||||||
CREATE_SUBDIRS = NO
|
CREATE_SUBDIRS = NO
|
||||||
OUTPUT_LANGUAGE = English
|
OUTPUT_LANGUAGE = English
|
||||||
@@ -89,9 +89,12 @@ EXCLUDE =
|
|||||||
EXCLUDE_SYMLINKS = NO
|
EXCLUDE_SYMLINKS = NO
|
||||||
EXCLUDE_PATTERNS = */tests/* \
|
EXCLUDE_PATTERNS = */tests/* \
|
||||||
*/benchmarks/* \
|
*/benchmarks/* \
|
||||||
*/docs/phpdoc/* \
|
*/docs/* \
|
||||||
*/docs/doxygen/* \
|
*/test-settings.php \
|
||||||
*/test-settings.php
|
*/configdoc/* \
|
||||||
|
*/test-settings.php \
|
||||||
|
*/maintenance/* \
|
||||||
|
*/smoketests/*
|
||||||
EXAMPLE_PATH =
|
EXAMPLE_PATH =
|
||||||
EXAMPLE_PATTERNS = *
|
EXAMPLE_PATTERNS = *
|
||||||
EXAMPLE_RECURSIVE = NO
|
EXAMPLE_RECURSIVE = NO
|
||||||
|
203
INSTALL
203
INSTALL
@@ -2,89 +2,188 @@
|
|||||||
Install
|
Install
|
||||||
How to install HTML Purifier
|
How to install HTML Purifier
|
||||||
|
|
||||||
Being a library, there's no fancy GUI that will take you step-by-step through
|
HTML Purifier is designed to run out of the box, so actually using the library
|
||||||
configuring database credentials and other mumbo-jumbo. HTML Purifier is
|
is extremely easy. (Although, if you were looking for a step-by-step
|
||||||
designed to run "out of the box." Regardless, there are still a couple of
|
installation GUI, you've come to the wrong place!) The impatient can scroll
|
||||||
things you should be mindful of.
|
down to the bottom of this INSTALL document to see the code, but you really
|
||||||
|
should make sure a few things are properly done.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
0. Compatibility
|
1. Compatibility
|
||||||
|
|
||||||
HTML Purifier works in both PHP 4 and PHP 5. I have run the test suite on
|
HTML Purifier works in both PHP 4 and PHP 5, from PHP 4.3.9 and up. It has no
|
||||||
these versions:
|
core dependencies with other libraries. (Whoopee!)
|
||||||
|
|
||||||
- 4.3.9, 4.3.11
|
Optional extensions are iconv (usually installed) and tidy (also common).
|
||||||
- 4.4.0, 4.4.4
|
If you use UTF-8 and don't plan on pretty-printing HTML, you can get away with
|
||||||
- 5.0.0, 5.0.4
|
not having either of these extensions.
|
||||||
- 5.1.0, 5.1.6
|
|
||||||
|
|
||||||
And can confidently say that HTML Purifier should work in all versions
|
|
||||||
between and afterwards. HTML Purifier definitely does not support PHP 4.2,
|
|
||||||
and PHP 4.3 branch support may go further back than that, but I haven't tested
|
|
||||||
any earlier versions.
|
|
||||||
|
|
||||||
I have been unable to get PHP 5.0.5 working on my computer, so if someone
|
|
||||||
wants to test that, be my guest. All tests were done on Windows XP Home,
|
|
||||||
but operating system is quite irrelevant in this particular case.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
1. Including the proper files
|
2. Including the library
|
||||||
|
|
||||||
The library/ directory must be added to your path: HTML Purifier will not be
|
Simply use:
|
||||||
able to find the necessary includes otherwise. This is as simple as:
|
|
||||||
|
|
||||||
set_include_path('/path/to/htmlpurifier/library' . PATH_SEPARATOR . get_include_path());
|
require_once '/path/to/library/HTMLPurifier.auto.php';
|
||||||
|
|
||||||
...replacing /path/to/htmlpurifier with the actual location of the folder. Don't
|
...and you're good to go. Since HTML Purifier's codebase is fairly
|
||||||
worry, HTML Purifier is namespaced so unless you have another file named
|
large, I recommend only including HTML Purifier when you need it.
|
||||||
HTMLPurifier.php, the files won't collide with any of your includes.
|
|
||||||
|
|
||||||
Then, it's a simple matter of including the base file:
|
If you don't like your include_path to be fiddled around with, simply set
|
||||||
|
HTML Purifier's library/ directory to the include path yourself and then:
|
||||||
|
|
||||||
require_once 'HTMLPurifier.php';
|
require_once 'HTMLPurifier.php';
|
||||||
|
|
||||||
...and you're good to go.
|
Only the contents in the library/ folder are necessary, so you can remove
|
||||||
|
everything else when using HTML Purifier in a production environment.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
2. Preparing the proper environment
|
3. Preparing the proper output environment
|
||||||
|
|
||||||
While no configuration is necessary, you first should take precautions regarding
|
HTML Purifier is all about web-standards, so accordingly your webpages should
|
||||||
the other output HTML that the filtered content will be going along with. Here
|
be standards compliant. HTML Purifier can deal with these doctypes:
|
||||||
is a (short) checklist:
|
|
||||||
|
|
||||||
* Have I specified XHTML 1.0 Transitional as the doctype?
|
* XHTML 1.0 Transitional (default)
|
||||||
* Have I specified UTF-8 as the character encoding?
|
* HTML 4.01 Transitional
|
||||||
|
|
||||||
I cannot stress the importance of these two bullets enough. Omitting either
|
...and these character encodings:
|
||||||
of them could have dire consequences not only for security but for plain
|
|
||||||
old usability. You can find a more in-depth discussion of why this is needed
|
|
||||||
in docs/security.txt, in the meantime, try to change your output so this is
|
|
||||||
the case.
|
|
||||||
|
|
||||||
If, for some reason, you are unable to switch to UTF-8 immediately, you can
|
* UTF-8 (default)
|
||||||
switch HTML Purifier's encoding. Note that the availability of encodings is
|
* Any encoding iconv supports (support is crippled for i18n though)
|
||||||
dependent on iconv, and you'll be missing characters if the charset you
|
|
||||||
choose doesn't have them.
|
The defaults are there for a reason: they are best-practice choices that
|
||||||
|
should not be changed lightly. For those of you in the dark, you can determine
|
||||||
|
the doctype from this code in your HTML documents:
|
||||||
|
|
||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||||
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||||
|
|
||||||
|
...and the character encoding from this code:
|
||||||
|
|
||||||
|
<meta http-equiv="Content-type" content="text/html;charset=ENCODING">
|
||||||
|
|
||||||
|
For legacy codebases these declarations may be missing. If that is the case,
|
||||||
|
STOP, and read up on character encodings and doctypes (in that order). Here
|
||||||
|
are some links:
|
||||||
|
|
||||||
|
* http://www.joelonsoftware.com/articles/Unicode.html
|
||||||
|
* http://alistapart.com/stories/doctype/
|
||||||
|
|
||||||
|
You may currently be vulnerable to XSS and other security threats, and HTML
|
||||||
|
Purifier won't be able to fix that.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
4. Configuration
|
||||||
|
|
||||||
|
HTML Purifier is designed to run out-of-the-box, but occasionally HTML
|
||||||
|
Purifier needs to be told what to do. If you answered no to any of these
|
||||||
|
questions, read on, otherwise, you can skip to the next section (or, if you're
|
||||||
|
into configuring things just for the heck of it, skip to 4.3).
|
||||||
|
|
||||||
|
* Am I using UTF-8?
|
||||||
|
* Am I using XHTML 1.0 Transitional?
|
||||||
|
|
||||||
|
If you answered yes to any of these questions, instantiate a configuration
|
||||||
|
object and read on:
|
||||||
|
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
$config->set('Core', 'Encoding', $encoding);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
3. Using the code
|
4.1. Setting a different character encoding
|
||||||
|
|
||||||
|
You really shouldn't use any other encoding except UTF-8, especially if you
|
||||||
|
plan to support multilingual websites (read section three for more details).
|
||||||
|
However, switching to UTF-8 is not always immediately feasible, so we can
|
||||||
|
adapt.
|
||||||
|
|
||||||
|
HTML Purifier uses iconv to support other character encodings, as such,
|
||||||
|
any encoding that iconv supports <http://www.gnu.org/software/libiconv/>
|
||||||
|
HTML Purifier supports with this code:
|
||||||
|
|
||||||
|
$config->set('Core', 'Encoding', /* put your encoding here */);
|
||||||
|
|
||||||
|
An example usage for Latin-1 websites (the most common encoding for English
|
||||||
|
websites):
|
||||||
|
|
||||||
|
$config->set('Core', 'Encoding', 'ISO-8859-1');
|
||||||
|
|
||||||
|
Note that HTML Purifier's support for non-Unicode encodings is crippled by the
|
||||||
|
fact that any character not supported by that encoding will be silently
|
||||||
|
dropped, EVEN if it is ampersand escaped. This is a current limitation of
|
||||||
|
HTML Purifier that we are NOT actively working to fix. Patches are welcome,
|
||||||
|
but there are so many other gotchas and problems in I18N for non-Unicode
|
||||||
|
encodings that this functionality is low priority. See
|
||||||
|
<http://ppewww.ph.gla.ac.uk/~flavell/charset/form-i18n.html> for a more
|
||||||
|
detailed lowdown on the topic.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
4.2. Setting a different doctype
|
||||||
|
|
||||||
|
For those of you stuck using HTML 4.01 Transitional, you can disable
|
||||||
|
XHTML output like this:
|
||||||
|
|
||||||
|
$config->set('Core', 'XHTML', false);
|
||||||
|
|
||||||
|
I recommend that you use XHTML, although not as much as I recommend UTF-8. If
|
||||||
|
your HTML 4.01 page validates, good for you!
|
||||||
|
|
||||||
|
Currently, we can only guarantee transitional-complaint output, future
|
||||||
|
versions will also allow strict-compliant output.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
4.3. Other settings
|
||||||
|
|
||||||
|
There are more configuration directives which can be read about
|
||||||
|
here: <http://hp.jpsband.org/live/configdoc/plain.html> They're a bit boring,
|
||||||
|
but they can help out for those of you who like to exert maximum control over
|
||||||
|
your code.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
5. Using the code
|
||||||
|
|
||||||
The interface is mind-numbingly simple:
|
The interface is mind-numbingly simple:
|
||||||
|
|
||||||
$purifier = new HTMLPurifier();
|
$purifier = new HTMLPurifier();
|
||||||
$clean_html = $purifier->purify($dirty_html);
|
$clean_html = $purifier->purify( $dirty_html );
|
||||||
|
|
||||||
Or, if you're using the configuration object:
|
...or, if you're using the configuration object:
|
||||||
|
|
||||||
$purifier = new HTMLPurifier($config);
|
$purifier = new HTMLPurifier($config);
|
||||||
$clean_html = $purifier->purify($dirty_html);
|
$clean_html = $purifier->purify( $dirty_html );
|
||||||
|
|
||||||
That's it. For more examples, check out docs/examples/. Also, SLOW gives
|
That's it! For more examples, check out docs/examples/ (they aren't very
|
||||||
advice on what to do if HTML Purifier is slowing down your application.
|
different though). Also, SLOW gives advice on what to do if HTML Purifier
|
||||||
|
is slowing down your application.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
6. Quick install
|
||||||
|
|
||||||
|
If your website is in UTF-8 and XHTML Transitional, use this code:
|
||||||
|
|
||||||
|
<?php
|
||||||
|
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||||
|
|
||||||
|
$purifier = new HTMLPurifier();
|
||||||
|
$clean_html = $purifier->purify($dirty_html);
|
||||||
|
?>
|
||||||
|
|
||||||
|
If your website is in a different encoding or doctype, use this code:
|
||||||
|
|
||||||
|
<?php
|
||||||
|
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||||
|
|
||||||
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
$config->set('Core', 'Encoding', 'ISO-8859-1'); //replace with your encoding
|
||||||
|
$config->set('Core', 'XHTML', true); //replace with false if HTML 4.01
|
||||||
|
$purifier = new HTMLPurifier($config);
|
||||||
|
|
||||||
|
$clean_html = $purifier->purify($dirty_html);
|
||||||
|
?>
|
64
NEWS
64
NEWS
@@ -1,7 +1,49 @@
|
|||||||
NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||||
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||
|
|
||||||
1.0.1, unknown release date
|
= KEY ====================
|
||||||
|
! Feature
|
||||||
|
- Bugfix
|
||||||
|
+ Sub-comment
|
||||||
|
. Internal change
|
||||||
|
==========================
|
||||||
|
|
||||||
|
1.1.2, released 2006-09-30
|
||||||
|
! Add HTMLPurifier.auto.php stub file that configures include_path
|
||||||
|
- Documentation updated
|
||||||
|
+ INSTALL document rewritten
|
||||||
|
+ TODO added semi-lossy conversion
|
||||||
|
+ API Doxygen docs' file exclusions updated
|
||||||
|
+ Added notes on HTML versus XML attribute whitespace handling
|
||||||
|
+ Noted that HTMLPurifier_ChildDef_Custom isn't being used
|
||||||
|
+ Noted that config object's definitions are cached versions
|
||||||
|
- Fixed lack of attribute parsing in HTMLPurifier_Lexer_PEARSax3
|
||||||
|
- ftp:// URIs now have their typecodes checked
|
||||||
|
- Hooked up HTMLPurifier_ChildDef_Custom's unit tests (they weren't being run)
|
||||||
|
. Line endings standardized throughout project (svn:eol-style standardized)
|
||||||
|
. Refactored parseData() to general Lexer class
|
||||||
|
. Tester named "HTML Purifier" not "HTMLPurifier"
|
||||||
|
|
||||||
|
1.1.1, released 2006-09-24
|
||||||
|
! Configuration option to optionally Tidy up output for indentation to make up
|
||||||
|
for dropped whitespace by DOMLex (pretty-printing for the entire application
|
||||||
|
should be done by a page-wide Tidy)
|
||||||
|
- Various documentation updates
|
||||||
|
- Fixed parse error in configuration documentation script
|
||||||
|
- Fixed fatal error in benchmark scripts, slightly augmented
|
||||||
|
- As far as possible, whitespace is preserved in-between table children
|
||||||
|
- Sample test-settings.php file included
|
||||||
|
|
||||||
|
1.1.0, released 2006-09-16
|
||||||
|
! Directive documentation generation using XSLT
|
||||||
|
! XHTML can now be turned off, output becomes <br>
|
||||||
|
- Made URI validator more forgiving: will ignore leading and trailing
|
||||||
|
quotes, apostrophes and less than or greater than signs.
|
||||||
|
- Enforce alphanumeric namespace and directive names for configuration.
|
||||||
|
- Table child definition made more flexible, will fix up poorly ordered elements
|
||||||
|
. Renamed ConfigDef to ConfigSchema
|
||||||
|
|
||||||
|
1.0.1, released 2006-09-04
|
||||||
- Fixed slight bug in DOMLex attribute parsing
|
- Fixed slight bug in DOMLex attribute parsing
|
||||||
- Fixed rejection of case-insensitive configuration values when there is a
|
- Fixed rejection of case-insensitive configuration values when there is a
|
||||||
set of allowed values. This manifested in %Core.Encoding.
|
set of allowed values. This manifested in %Core.Encoding.
|
||||||
@@ -9,17 +51,17 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
space in them. This manifested in TinyMCE.
|
space in them. This manifested in TinyMCE.
|
||||||
|
|
||||||
1.0.0, released 2006-09-01
|
1.0.0, released 2006-09-01
|
||||||
|
! Shorthand CSS properties implemented: font, border, background, list-style
|
||||||
|
! Basic color keywords translated into hexadecimal values
|
||||||
|
! Table CSS properties implemented
|
||||||
|
! Support for charsets other than UTF-8 (defined by iconv)
|
||||||
|
! Malformed UTF-8 and non-SGML character detection and cleaning implemented
|
||||||
- Fixed broken numeric entity conversion
|
- Fixed broken numeric entity conversion
|
||||||
- Malformed UTF-8 and non-SGML character detection and cleaning implemented
|
|
||||||
- API documentation completed
|
- API documentation completed
|
||||||
- Shorthand CSS properties implemented: font, border, background, list-style
|
. (HTML|CSS)Definition de-singleton-ized
|
||||||
- Basic color keywords translated into hexadecimal values
|
|
||||||
- Table CSS properties implemented
|
|
||||||
- (HTML|CSS)Definition de-singleton-ized
|
|
||||||
- Support for charsets other than UTF-8 (defined by iconv)
|
|
||||||
|
|
||||||
1.0.0beta, released 2006-08-16
|
1.0.0beta, released 2006-08-16
|
||||||
- First public release, most functionality implemented. Notable omissions are:
|
! First public release, most functionality implemented. Notable omissions are:
|
||||||
. Shorthand CSS properties
|
+ Shorthand CSS properties
|
||||||
. Table CSS properties
|
+ Table CSS properties
|
||||||
. Deprecated attribute transformations
|
+ Deprecated attribute transformations
|
||||||
|
24
README
24
README
@@ -1,13 +1,13 @@
|
|||||||
|
|
||||||
README
|
|
||||||
All about HTMLPurifier
|
|
||||||
|
|
||||||
HTMLPurifier is an HTML filtering solution. It uses a unique combination of
|
README
|
||||||
robust whitelists and agressive parsing to ensure that not only are XSS
|
All about HTMLPurifier
|
||||||
attacks thwarted, but the resulting HTML is standards compliant.
|
|
||||||
|
HTMLPurifier is an HTML filtering solution. It uses a unique combination of
|
||||||
See INSTALL on how to use the library. See docs/ for more developer-oriented
|
robust whitelists and agressive parsing to ensure that not only are XSS
|
||||||
documentation as well as some code examples. Users of TinyMCE or FCKeditor
|
attacks thwarted, but the resulting HTML is standards compliant.
|
||||||
may be especially interested in WYSIWYG.
|
|
||||||
|
See INSTALL on how to use the library. See docs/ for more developer-oriented
|
||||||
HTMLPurifier can be found on the web at: http://hp.jpsband.org/
|
documentation as well as some code examples. Users of TinyMCE or FCKeditor
|
||||||
|
may be especially interested in WYSIWYG.
|
||||||
|
|
||||||
|
HTMLPurifier can be found on the web at: http://hp.jpsband.org/
|
||||||
|
13
SLOW
13
SLOW
@@ -17,18 +17,23 @@ second tacked on to the load time probably isn't going to be that huge of
|
|||||||
a problem. Then, displaying the content is a simple a manner of outputting
|
a problem. Then, displaying the content is a simple a manner of outputting
|
||||||
it directly from your database/filesystem. The trouble with this method is
|
it directly from your database/filesystem. The trouble with this method is
|
||||||
that your user loses the original text, and when doing edits, will be
|
that your user loses the original text, and when doing edits, will be
|
||||||
handling the filtered text. Of course, maybe that's a good thing. If you
|
handling the filtered text. While this may be a good thing, especially if
|
||||||
don't mind a little extra complexity, you can try...
|
you're using a WYSIWYG editor, it can also result in data-loss if a user
|
||||||
|
expects a certain to be available but it doesn't.
|
||||||
|
|
||||||
2. Caching the filtered output - accept the submitted text and put it
|
2. Caching the filtered output - accept the submitted text and put it
|
||||||
unaltered into the database, but then also generate a filtered version and
|
unaltered into the database, but then also generate a filtered version and
|
||||||
stash that in the database. Serve the filtered version to readers, and the
|
stash that in the database. Serve the filtered version to readers, and the
|
||||||
unaltered version to editors. If need be, you can invalidate the cache and
|
unaltered version to editors. If need be, you can invalidate the cache and
|
||||||
have the cached filtered version be regenerated on the first page view. Pros?
|
have the cached filtered version be regenerated on the first page view. Pros?
|
||||||
Full data retention. Cons? It's more complicated.
|
Full data retention. Cons? It's more complicated, and opens other editors
|
||||||
|
up to XSS if they are using a WYSIWYG editor (to fix that, they'd have to
|
||||||
|
be able to get their hands on the *really* original text served in plaintext
|
||||||
|
mode).
|
||||||
|
|
||||||
In short, inbound filtering is almost as simple as outbound filtering, but
|
In short, inbound filtering is almost as simple as outbound filtering, but
|
||||||
it has some drawbacks which cannot be fixed unless you save both the original
|
it has some drawbacks which cannot be fixed unless you save both the original
|
||||||
and the filtered versions.
|
and the filtered versions.
|
||||||
|
|
||||||
There is a third option: profile and optimize HTMLPurifier yourself. ;-)
|
There is a third option: profile and optimize HTMLPurifier yourself. Be sure
|
||||||
|
to tell me if you decide to do that! ;-)
|
||||||
|
31
TODO
31
TODO
@@ -5,33 +5,34 @@ Ongoing
|
|||||||
- Lots of profiling, make it faster!
|
- Lots of profiling, make it faster!
|
||||||
- Plugins for major CMSes (very tricky issue)
|
- Plugins for major CMSes (very tricky issue)
|
||||||
|
|
||||||
1.1 release
|
|
||||||
- Directive documentation generation
|
|
||||||
- Rewrite table's child definition to be faster, smart, and regexp free
|
|
||||||
- Allow HTML 4.01 output (cosmetic changes to the generator)
|
|
||||||
|
|
||||||
1.2 release
|
1.2 release
|
||||||
- Additional support for poorly written HTML
|
|
||||||
- Implement all non-essential attribute transforms
|
|
||||||
- Microsoft Word HTML cleaning (i.e. MsoNormal)
|
|
||||||
|
|
||||||
1.3 release
|
|
||||||
- Make URI validation routines tighter (especially mailto)
|
- Make URI validation routines tighter (especially mailto)
|
||||||
- More extensive URI filtering schemes
|
- More extensive URI filtering schemes
|
||||||
- Allow for background-image and list-style-image (see above)
|
- Allow for background-image and list-style-image (see above)
|
||||||
- Distinguish between different types of URIs, for instance, a mailto URI
|
- Distinguish between different types of URIs, for instance, a mailto URI
|
||||||
in IMG SRC is nonsensical
|
in IMG SRC is nonsensical
|
||||||
|
- Error logging for filtering/cleanup procedures
|
||||||
|
|
||||||
2.0 release
|
1.3 release
|
||||||
- Add various "levels" of cleaning
|
- Add various "levels" of cleaning
|
||||||
- Related: Allow strict (X)HTML
|
- Related: Allow strict (X)HTML
|
||||||
|
|
||||||
|
1.4 release
|
||||||
|
- Additional support for poorly written HTML
|
||||||
|
- Implement all non-essential attribute transforms
|
||||||
|
- Microsoft Word HTML cleaning (i.e. MsoNormal)
|
||||||
|
|
||||||
|
2.0 release
|
||||||
|
- Formatters for plaintext
|
||||||
|
- Auto-paragraphing (be sure to leverage fact that we know when things
|
||||||
|
shouldn't be paragraphed, such as lists and tables).
|
||||||
|
- Linkify URLs
|
||||||
|
- Smileys
|
||||||
|
|
||||||
3.0 release
|
3.0 release
|
||||||
- Extended HTML capabilities based on namespacing and tag transforms
|
- Extended HTML capabilities based on namespacing and tag transforms
|
||||||
- Hooks for adding custom processors to custom namespaced tags and
|
- Hooks for adding custom processors to custom namespaced tags and
|
||||||
attributes, offer default implementation
|
attributes, offer default implementation
|
||||||
- Auto-paragraphing (be sure to leverage fact that we know when things
|
|
||||||
shouldn't be paragraphed, such as lists and tables).
|
|
||||||
- Lots of documentation and samples
|
- Lots of documentation and samples
|
||||||
|
|
||||||
Unknown release (on a scratch-an-itch basis)
|
Unknown release (on a scratch-an-itch basis)
|
||||||
@@ -42,9 +43,11 @@ Unknown release (on a scratch-an-itch basis)
|
|||||||
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
||||||
- Automatically add non-breaking spaces to empty table cells when
|
- Automatically add non-breaking spaces to empty table cells when
|
||||||
empty-cells:show is applied to have compatibility with Internet Explorer
|
empty-cells:show is applied to have compatibility with Internet Explorer
|
||||||
- Pretty-printing HTML (adds dependency of Generator to HTMLDefinition)
|
|
||||||
- Non-lossy dumb alternate character encoding transformations, achieved by
|
- Non-lossy dumb alternate character encoding transformations, achieved by
|
||||||
numerically encoding all non-ASCII characters
|
numerically encoding all non-ASCII characters
|
||||||
|
- Semi-lossy dumb alternate character encoding transformations, achieved by
|
||||||
|
encoding all characters that have string entity equivalents
|
||||||
|
|
||||||
Wontfix
|
Wontfix
|
||||||
- Non-lossy smart alternate character encoding transformations
|
- Non-lossy smart alternate character encoding transformations
|
||||||
|
- Pretty-printing HTML, users can use Tidy on the output on entire page
|
||||||
|
7
WYSIWYG
7
WYSIWYG
@@ -1,6 +1,6 @@
|
|||||||
|
|
||||||
WYSIWYG - What You See Is What You Get
|
WYSIWYG - What You See Is What You Get
|
||||||
HTMLPurifier: A Pretty Good Fit for TinyMCE and FCKeditor
|
HTML Purifier: A Pretty Good Fit for TinyMCE and FCKeditor
|
||||||
|
|
||||||
Javascript-based WYSIWYG editors, simply stated, are quite amazing. But I've
|
Javascript-based WYSIWYG editors, simply stated, are quite amazing. But I've
|
||||||
always been wary about using them due to security issues: they handle the
|
always been wary about using them due to security issues: they handle the
|
||||||
@@ -13,6 +13,9 @@ other markup languages still reign supreme. Put simply: filtering HTML is
|
|||||||
hard work, and these WYSIWYG authors don't offer anything to alleviate that
|
hard work, and these WYSIWYG authors don't offer anything to alleviate that
|
||||||
trouble. Therein lies the solution:
|
trouble. Therein lies the solution:
|
||||||
|
|
||||||
HTMLPurifier is perfect for filtering pure-HTML input from WYSIWYG editors.
|
HTML Purifier is perfect for filtering pure-HTML input from WYSIWYG editors.
|
||||||
|
|
||||||
Enough said.
|
Enough said.
|
||||||
|
|
||||||
|
There is a proof-of-concept integration of HTML Purifier with the Mantis
|
||||||
|
bugtracker at http://hp.jpsband.org/mantis/
|
||||||
|
@@ -3,15 +3,24 @@
|
|||||||
// emulates inserting a dir called HTMLPurifier into your class dir
|
// emulates inserting a dir called HTMLPurifier into your class dir
|
||||||
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
||||||
|
|
||||||
require_once 'HTMLPurifier/ConfigDef.php';
|
@include_once '../test-settings.php';
|
||||||
require_once 'HTMLPurifier/Config.php';
|
|
||||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
|
||||||
require_once 'HTMLPurifier/Lexer/PEARSax3.php';
|
|
||||||
|
|
||||||
$LEXERS = array(
|
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||||
'DirectLex' => new HTMLPurifier_Lexer_DirectLex(),
|
require_once 'HTMLPurifier/Config.php';
|
||||||
'PEARSax3' => new HTMLPurifier_Lexer_PEARSax3()
|
|
||||||
);
|
$LEXERS = array();
|
||||||
|
$RUNS = isset($GLOBALS['HTMLPurifierTest']['Runs'])
|
||||||
|
? $GLOBALS['HTMLPurifierTest']['Runs'] : 2;
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||||
|
$LEXERS['DirectLex'] = new HTMLPurifier_Lexer_DirectLex();
|
||||||
|
|
||||||
|
if (!empty($GLOBALS['HTMLPurifierTest']['PEAR'])) {
|
||||||
|
require_once 'HTMLPurifier/Lexer/PEARSax3.php';
|
||||||
|
$LEXERS['PEARSax3'] = new HTMLPurifier_Lexer_PEARSax3();
|
||||||
|
} else {
|
||||||
|
exit('PEAR required to perform benchmark.');
|
||||||
|
}
|
||||||
|
|
||||||
if (version_compare(PHP_VERSION, '5', '>=')) {
|
if (version_compare(PHP_VERSION, '5', '>=')) {
|
||||||
require_once 'HTMLPurifier/Lexer/DOMLex.php';
|
require_once 'HTMLPurifier/Lexer/DOMLex.php';
|
||||||
@@ -56,9 +65,12 @@ class RowTimer extends Benchmark_Timer
|
|||||||
if ($standard == false) $standard = $v['diff'];
|
if ($standard == false) $standard = $v['diff'];
|
||||||
|
|
||||||
$perc = $v['diff'] * 100 / $standard;
|
$perc = $v['diff'] * 100 / $standard;
|
||||||
|
$bad_run = ($v['diff'] < 0);
|
||||||
|
|
||||||
$out .= '<td align="right">' . number_format($perc, 2, '.', '') .
|
$out .= '<td align="right"'.
|
||||||
'%</td>';
|
($bad_run ? ' style="color:#AAA;"' : '').
|
||||||
|
'>' . number_format($perc, 2, '.', '') .
|
||||||
|
'%</td><td>'.number_format($v['diff'],4,'.','').'</td>';
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -79,13 +91,13 @@ function print_lexers() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function do_benchmark($name, $document) {
|
function do_benchmark($name, $document) {
|
||||||
global $LEXERS;
|
global $LEXERS, $RUNS;
|
||||||
|
|
||||||
$timer = new RowTimer($name);
|
$timer = new RowTimer($name);
|
||||||
$timer->start();
|
$timer->start();
|
||||||
|
|
||||||
foreach($LEXERS as $key => $lexer) {
|
foreach($LEXERS as $key => $lexer) {
|
||||||
$tokens = $lexer->tokenizeHTML($document);
|
for ($i=0; $i<$RUNS; $i++) $tokens = $lexer->tokenizeHTML($document);
|
||||||
$timer->setMarker($key);
|
$timer->setMarker($key);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -103,7 +115,7 @@ function do_benchmark($name, $document) {
|
|||||||
<table border="1">
|
<table border="1">
|
||||||
<tr><th>Case</th><?php
|
<tr><th>Case</th><?php
|
||||||
foreach ($LEXERS as $key => $value) {
|
foreach ($LEXERS as $key => $value) {
|
||||||
echo '<th>' . htmlspecialchars($key) . '</th>';
|
echo '<th colspan="2">' . htmlspecialchars($key) . '</th>';
|
||||||
}
|
}
|
||||||
?></tr>
|
?></tr>
|
||||||
<?php
|
<?php
|
||||||
|
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
||||||
|
|
||||||
require_once 'HTMLPurifier/ConfigDef.php';
|
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||||
require_once 'HTMLPurifier/Config.php';
|
require_once 'HTMLPurifier/Config.php';
|
||||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||||
|
|
||||||
|
214
configdoc/generate.php
Normal file
214
configdoc/generate.php
Normal file
@@ -0,0 +1,214 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generates XML and HTML documents describing configuration.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
TODO:
|
||||||
|
- make XML format richer (see below)
|
||||||
|
- extend XSLT transformation (see the corresponding XSLT file)
|
||||||
|
- allow generation of packaged docs that can be easily moved
|
||||||
|
- multipage documentation
|
||||||
|
- determine how to multilingualize
|
||||||
|
- factor out code into classes
|
||||||
|
- generate a table of contents
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Check and configure environment
|
||||||
|
|
||||||
|
if (version_compare('5', PHP_VERSION, '>')) exit('Requires PHP 5 or higher.');
|
||||||
|
error_reporting(E_ALL);
|
||||||
|
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Include HTML Purifier library
|
||||||
|
|
||||||
|
set_include_path('../library' . PATH_SEPARATOR . get_include_path());
|
||||||
|
require_once 'HTMLPurifier.php';
|
||||||
|
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Setup convenience functions
|
||||||
|
|
||||||
|
function appendHTMLDiv($document, $node, $html) {
|
||||||
|
global $purifier;
|
||||||
|
$html = $purifier->purify($html);
|
||||||
|
$dom_html = $document->createDocumentFragment();
|
||||||
|
$dom_html->appendXML($html);
|
||||||
|
|
||||||
|
$dom_div = $document->createElement('div');
|
||||||
|
$dom_div->setAttribute('xmlns', 'http://www.w3.org/1999/xhtml');
|
||||||
|
$dom_div->appendChild($dom_html);
|
||||||
|
|
||||||
|
$node->appendChild($dom_div);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Load copies of HTMLPurifier_ConfigDef and HTMLPurifier
|
||||||
|
|
||||||
|
$schema = HTMLPurifier_ConfigSchema::instance();
|
||||||
|
$purifier = new HTMLPurifier();
|
||||||
|
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Generate types.xml, a document describing the constraint "type"
|
||||||
|
|
||||||
|
$types_document = new DOMDocument('1.0', 'UTF-8');
|
||||||
|
$types_root = $types_document->createElement('types');
|
||||||
|
$types_document->appendChild($types_root);
|
||||||
|
$types_document->formatOutput = true;
|
||||||
|
foreach ($schema->types as $name => $expanded_name) {
|
||||||
|
$types_type = $types_document->createElement('type', $expanded_name);
|
||||||
|
$types_type->setAttribute('id', $name);
|
||||||
|
$types_root->appendChild($types_type);
|
||||||
|
}
|
||||||
|
$types_document->save('types.xml');
|
||||||
|
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Generate configdoc.xml, a document documenting configuration directives
|
||||||
|
|
||||||
|
$dom_document = new DOMDocument('1.0', 'UTF-8');
|
||||||
|
$dom_root = $dom_document->createElement('configdoc');
|
||||||
|
$dom_document->appendChild($dom_root);
|
||||||
|
$dom_document->formatOutput = true;
|
||||||
|
|
||||||
|
// add the name of the application
|
||||||
|
$dom_root->appendChild($dom_document->createElement('title', 'HTML Purifier'));
|
||||||
|
|
||||||
|
/*
|
||||||
|
TODO for XML format:
|
||||||
|
- namespace descriptions
|
||||||
|
- enumerated values
|
||||||
|
- default values
|
||||||
|
- create a definition (DTD or other) once interface stabilizes
|
||||||
|
*/
|
||||||
|
|
||||||
|
foreach($schema->info as $namespace_name => $namespace_info) {
|
||||||
|
|
||||||
|
$dom_namespace = $dom_document->createElement('namespace');
|
||||||
|
$dom_root->appendChild($dom_namespace);
|
||||||
|
|
||||||
|
$dom_namespace->setAttribute('id', $namespace_name);
|
||||||
|
$dom_namespace->appendChild(
|
||||||
|
$dom_document->createElement('name', $namespace_name)
|
||||||
|
);
|
||||||
|
$dom_namespace_description = $dom_document->createElement('description');
|
||||||
|
$dom_namespace->appendChild($dom_namespace_description);
|
||||||
|
appendHTMLDiv($dom_document, $dom_namespace_description,
|
||||||
|
$schema->info_namespace[$namespace_name]->description);
|
||||||
|
|
||||||
|
foreach ($namespace_info as $name => $info) {
|
||||||
|
|
||||||
|
$dom_directive = $dom_document->createElement('directive');
|
||||||
|
$dom_namespace->appendChild($dom_directive);
|
||||||
|
|
||||||
|
$dom_directive->setAttribute('id', $namespace_name . '.' . $name);
|
||||||
|
$dom_directive->appendChild(
|
||||||
|
$dom_document->createElement('name', $name)
|
||||||
|
);
|
||||||
|
|
||||||
|
$dom_constraints = $dom_document->createElement('constraints');
|
||||||
|
$dom_directive->appendChild($dom_constraints);
|
||||||
|
|
||||||
|
$dom_constraints->appendChild(
|
||||||
|
$dom_document->createElement('type', $info->type)
|
||||||
|
);
|
||||||
|
if ($info->allowed !== true) {
|
||||||
|
$dom_allowed = $dom_document->createElement('allowed');
|
||||||
|
$dom_constraints->appendChild($dom_allowed);
|
||||||
|
foreach ($info->allowed as $allowed => $bool) {
|
||||||
|
$dom_allowed->appendChild(
|
||||||
|
$dom_document->createElement('value', $allowed)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$raw_default = $schema->defaults[$namespace_name][$name];
|
||||||
|
if (is_bool($raw_default)) {
|
||||||
|
$default = $raw_default ? 'true' : 'false';
|
||||||
|
} elseif (is_string($raw_default)) {
|
||||||
|
$default = "\"$raw_default\"";
|
||||||
|
} else {
|
||||||
|
$default = print_r(
|
||||||
|
$schema->defaults[$namespace_name][$name], true
|
||||||
|
);
|
||||||
|
}
|
||||||
|
$dom_constraints->appendChild(
|
||||||
|
$dom_document->createElement('default', $default)
|
||||||
|
);
|
||||||
|
|
||||||
|
$dom_descriptions = $dom_document->createElement('descriptions');
|
||||||
|
$dom_directive->appendChild($dom_descriptions);
|
||||||
|
|
||||||
|
foreach ($info->descriptions as $file => $file_descriptions) {
|
||||||
|
foreach ($file_descriptions as $line => $description) {
|
||||||
|
$dom_description = $dom_document->createElement('description');
|
||||||
|
$dom_description->setAttribute('file', $file);
|
||||||
|
$dom_description->setAttribute('line', $line);
|
||||||
|
appendHTMLDiv($dom_document, $dom_description, $description);
|
||||||
|
$dom_descriptions->appendChild($dom_description);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// print_r($dom_document->saveXML());
|
||||||
|
|
||||||
|
// save a copy of the raw XML
|
||||||
|
$dom_document->save('configdoc.xml');
|
||||||
|
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Generate final output using XSLT
|
||||||
|
|
||||||
|
// load the stylesheet
|
||||||
|
$xsl_stylesheet_name = 'plain';
|
||||||
|
$xsl_stylesheet = "styles/$xsl_stylesheet_name.xsl";
|
||||||
|
$xsl_dom_stylesheet = new DOMDocument();
|
||||||
|
$xsl_dom_stylesheet->load($xsl_stylesheet);
|
||||||
|
|
||||||
|
// setup the XSLT processor
|
||||||
|
$xsl_processor = new XSLTProcessor();
|
||||||
|
|
||||||
|
// perform the transformation
|
||||||
|
$xsl_processor->importStylesheet($xsl_dom_stylesheet);
|
||||||
|
$html_output = $xsl_processor->transformToXML($dom_document);
|
||||||
|
|
||||||
|
// some slight fudges to preserve backwards compatibility
|
||||||
|
$html_output = str_replace('/>', ' />', $html_output); // <br /> not <br>
|
||||||
|
$html_output = str_replace(' xmlns=""', '', $html_output); // rm unnecessary xmlns
|
||||||
|
|
||||||
|
if (class_exists('Tidy')) {
|
||||||
|
// cleanup output
|
||||||
|
$config = array(
|
||||||
|
'indent' => true,
|
||||||
|
'output-xhtml' => true,
|
||||||
|
'wrap' => 80
|
||||||
|
);
|
||||||
|
$tidy = new Tidy;
|
||||||
|
$tidy->parseString($html_output, $config, 'utf8');
|
||||||
|
$tidy->cleanRepair();
|
||||||
|
$html_output = (string) $tidy;
|
||||||
|
}
|
||||||
|
|
||||||
|
// write it to a file (todo: parse into seperate pages)
|
||||||
|
file_put_contents("$xsl_stylesheet_name.html", $html_output);
|
||||||
|
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Output for instant feedback
|
||||||
|
|
||||||
|
if (php_sapi_name() != 'cli') {
|
||||||
|
echo $html_output;
|
||||||
|
} else {
|
||||||
|
echo 'Files generated successfully.';
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
7
configdoc/styles/plain.css
Normal file
7
configdoc/styles/plain.css
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
table {border-collapse:collapse;}
|
||||||
|
table td, table th {padding:0.2em;}
|
||||||
|
|
||||||
|
table.constraints {margin:0 0 1em;}
|
||||||
|
table.constraints th {text-align:left;padding-left:0.4em;}
|
||||||
|
table.constraints td {padding-right:0.4em;}
|
||||||
|
table.constraints td pre {margin:0;}
|
105
configdoc/styles/plain.xsl
Normal file
105
configdoc/styles/plain.xsl
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<xsl:stylesheet
|
||||||
|
version = "1.0"
|
||||||
|
xmlns = "http://www.w3.org/1999/xhtml"
|
||||||
|
xmlns:xsl = "http://www.w3.org/1999/XSL/Transform"
|
||||||
|
>
|
||||||
|
<xsl:output
|
||||||
|
method = "xml"
|
||||||
|
encoding = "UTF-8"
|
||||||
|
doctype-public = "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||||
|
doctype-system = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
|
||||||
|
indent = "no"
|
||||||
|
media-type = "text/html"
|
||||||
|
/>
|
||||||
|
|
||||||
|
<xsl:variable name="typeLookup" select="document('../types.xml')" />
|
||||||
|
|
||||||
|
<xsl:template match="/">
|
||||||
|
<html lang="en" xml:lang="en">
|
||||||
|
<head>
|
||||||
|
<title><xsl:value-of select="/configdoc/title" /> Configuration Documentation</title>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8" />
|
||||||
|
<link rel="stylesheet" type="text/css" href="styles/plain.css" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<xsl:apply-templates />
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="title">
|
||||||
|
<h1><xsl:value-of select="/configdoc/title" /> Configuration Documentation</h1>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="namespace">
|
||||||
|
<xsl:apply-templates />
|
||||||
|
<xsl:if test="count(child::directive)=0">
|
||||||
|
<p>No configuration directives defined for this namespace.</p>
|
||||||
|
</xsl:if>
|
||||||
|
</xsl:template>
|
||||||
|
<xsl:template match="namespace/name">
|
||||||
|
<h2 id="{../@id}"><xsl:value-of select="text()" /></h2>
|
||||||
|
</xsl:template>
|
||||||
|
<xsl:template match="namespace/description">
|
||||||
|
<div class="description">
|
||||||
|
<xsl:copy-of select="div/node()" />
|
||||||
|
</div>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="directive">
|
||||||
|
<xsl:apply-templates />
|
||||||
|
</xsl:template>
|
||||||
|
<xsl:template match="directive/name">
|
||||||
|
<h3 id="{../@id}"><xsl:value-of select="text()" /></h3>
|
||||||
|
</xsl:template>
|
||||||
|
<xsl:template match="directive/constraints">
|
||||||
|
<table class="constraints">
|
||||||
|
<xsl:apply-templates />
|
||||||
|
<!-- Calculated other values -->
|
||||||
|
<tr>
|
||||||
|
<th>Used by:</th>
|
||||||
|
<td>
|
||||||
|
<xsl:for-each select="../descriptions/description">
|
||||||
|
<xsl:if test="position()>1">, </xsl:if>
|
||||||
|
<xsl:value-of select="@file" />
|
||||||
|
</xsl:for-each>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
</xsl:template>
|
||||||
|
<xsl:template match="directive//description">
|
||||||
|
<div class="description">
|
||||||
|
<xsl:copy-of select="div/node()" />
|
||||||
|
</div>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="constraints/type">
|
||||||
|
<tr>
|
||||||
|
<th>Type:</th>
|
||||||
|
<td>
|
||||||
|
<xsl:variable name="type" select="text()" />
|
||||||
|
<xsl:attribute name="class">type type-<xsl:value-of select="$type" /></xsl:attribute>
|
||||||
|
<xsl:value-of select="$typeLookup/types/type[@id=$type]/text()" />
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</xsl:template>
|
||||||
|
<xsl:template match="constraints/allowed">
|
||||||
|
<tr>
|
||||||
|
<th>Allowed values:</th>
|
||||||
|
<td>
|
||||||
|
<xsl:for-each select="value"><!--
|
||||||
|
--><xsl:if test="position()>1">, </xsl:if>
|
||||||
|
"<xsl:value-of select="." />"<!--
|
||||||
|
--></xsl:for-each>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</xsl:template>
|
||||||
|
<xsl:template match="constraints/default">
|
||||||
|
<tr>
|
||||||
|
<th>Default:</th>
|
||||||
|
<td><pre><xsl:value-of select="." xml:space="preserve" /></pre></td>
|
||||||
|
</tr>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
</xsl:stylesheet>
|
@@ -11,25 +11,24 @@ profiling.
|
|||||||
Here we go:
|
Here we go:
|
||||||
|
|
||||||
AttrDef
|
AttrDef
|
||||||
Class - doesn't support Unicode characters, uses regular expressions
|
Class - doesn't support Unicode characters (fringe); uses regular
|
||||||
Lang - code duplication, premature optimization, doesn't consult official
|
expressions
|
||||||
lists
|
Lang - code duplication; premature optimization; doesn't consult official
|
||||||
Pixels/Length/MultiLength - implemented according to HTML spec (excludes
|
lists (fringe)
|
||||||
code reuse in CSS)
|
Length - easily mistaken for CSSLength
|
||||||
URI - multiple regular expressions, needs host validation routines factored
|
URI - multiple regular expressions; needs host validation routines factored
|
||||||
out for mailto scheme, IPv6 validation is broken (fringe), unintuitive
|
out for mailto scheme; missing validation for query; fragment and path,
|
||||||
variable overwriting, missing validation for query, fragment and path,
|
|
||||||
no percent-encode fixing
|
no percent-encode fixing
|
||||||
CSS - parser doesn't accept advanced CSS (fringe)
|
CSS - parser doesn't accept advanced CSS (fringe)
|
||||||
Number - constructor interface is inconsistent with Integer
|
Number - constructor interface is inconsistent with Integer
|
||||||
AttrTransform - doesn't accept AttrContext, non-validating
|
AttrTransform - doesn't accept AttrContext
|
||||||
ChildDef - not-allowed nodes translated to text, likely invalid handling
|
Config - "load configuration" hooks missing, rich set* accessors missing
|
||||||
Config - "load configuration" hooks missing, rich set* accessors missing,
|
ConfigSchema - redefinition is a mess
|
||||||
needs redefined relationship with the definitions
|
|
||||||
Strategy
|
Strategy
|
||||||
FixNesting - cannot bubble nodes out of structures
|
FixNesting - cannot bubble nodes out of structures
|
||||||
MakeWellFormed - insufficient automatic closing definitions (check HTML
|
MakeWellFormed - insufficient automatic closing definitions (check HTML
|
||||||
spec for optional end tags).
|
spec for optional end tags, also, closing based on type (block/inline)
|
||||||
|
might be efficient).
|
||||||
RemoveForeignElements - should be run in parallel with MakeWellFormed
|
RemoveForeignElements - should be run in parallel with MakeWellFormed
|
||||||
URIScheme - needs to have callable generic checks
|
URIScheme - needs to have callable generic checks
|
||||||
ftp - missing typecode check
|
ftp - missing typecode check
|
||||||
|
23
docs/colors.txt
Normal file
23
docs/colors.txt
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
|
||||||
|
Colors
|
||||||
|
Hammering some sense into those content-makers
|
||||||
|
|
||||||
|
Your website probably has a color-scheme. Green on white, purple on yellow,
|
||||||
|
whatever. When you give users the ability to style their content, you may
|
||||||
|
want them to keep in line with your styling. If you're website is all
|
||||||
|
about light colors, you don't want a user to come in and vandalize your
|
||||||
|
page with a deep maroon.
|
||||||
|
|
||||||
|
This is an extremely silly feature proposal, but I'm writing it down anyway.
|
||||||
|
|
||||||
|
What if the user could constrain the colors specified in inline styles? You
|
||||||
|
are only allowed to use these shades of dark green for text and these shades
|
||||||
|
of light yellow for the background. At the very least, you could ensure
|
||||||
|
that we did not have pale yellow on white text.
|
||||||
|
|
||||||
|
Implementation issues:
|
||||||
|
1. Requires the color attribute definition to know, currently, what the text
|
||||||
|
and background colors are. This becomes difficult when classes are thrown
|
||||||
|
into the mix.
|
||||||
|
2. The user still has to define the permissible colors, how does one do
|
||||||
|
something like that?
|
@@ -17,18 +17,9 @@ are passed. These classes are: HTMLPurifier::*, Generator::generateFromTokens
|
|||||||
and Lexer::tokenizeHTML. However, whenever a valid configuration object
|
and Lexer::tokenizeHTML. However, whenever a valid configuration object
|
||||||
is defined, that object should be used.
|
is defined, that object should be used.
|
||||||
|
|
||||||
-- the following is projected changes to the configuration system --
|
In relation to HTMLDefinition and CSSDefinition, there is a special class
|
||||||
|
of directives that influence the *construction* of the Definition object.
|
||||||
In relation to HTMLDefinition and CSSDefinition, there are going to be some
|
A standard call pattern would look like:
|
||||||
major structural changes to enable the easy configuration of these objects.
|
|
||||||
Due to the intricacy of these objects, it's not feasible to ask an average
|
|
||||||
user to twiddle around with an element and its 20 other dependencies. However,
|
|
||||||
these objects are the only possible point where change could occur in the
|
|
||||||
context of configuration.
|
|
||||||
|
|
||||||
The solution is to introduce a special class of directives that influence the
|
|
||||||
*construction* of the Definition object. A standard call pattern would look
|
|
||||||
like:
|
|
||||||
|
|
||||||
1. Client calls Config->getHTMLDefinition()
|
1. Client calls Config->getHTMLDefinition()
|
||||||
2. Config calls HTMLDefinition->createNew(this)
|
2. Config calls HTMLDefinition->createNew(this)
|
||||||
|
@@ -1,272 +0,0 @@
|
|||||||
<!-- Transform %TextAlign to align:value in style -->
|
|
||||||
|
|
||||||
<!-- text alignment for p, div, h1-h6. The default is
|
|
||||||
align="left" for ltr headings, "right" for rtl
|
|
||||||
|
|
||||||
Move to style! -->
|
|
||||||
<!ENTITY % TextAlign "DEPRECATED align (left|center|right|justify) #IMPLIED">
|
|
||||||
|
|
||||||
<!-- type and start should have CSS equivalents, but they'll need to
|
|
||||||
be translated intelligently -->
|
|
||||||
<!ENTITY % ULStyle "(disc|square|circle)">
|
|
||||||
<!-- Ordered list numbering style
|
|
||||||
|
|
||||||
1 arabic numbers 1, 2, 3, ...
|
|
||||||
a lower alpha a, b, c, ...
|
|
||||||
A upper alpha A, B, C, ...
|
|
||||||
i lower roman i, ii, iii, ...
|
|
||||||
I upper roman I, II, III, ...
|
|
||||||
|
|
||||||
The style is applied to the sequence number which by default
|
|
||||||
is reset to 1 for the first list item in an ordered list.
|
|
||||||
-->
|
|
||||||
<!ENTITY % OLStyle "CDATA">
|
|
||||||
<!-- LIStyle is constrained to: "(%ULStyle;|%OLStyle;)" -->
|
|
||||||
<!ENTITY % LIStyle "CDATA">
|
|
||||||
|
|
||||||
<!ATTLIST ol
|
|
||||||
%attrs;
|
|
||||||
DEPRECATED type %OLStyle; #IMPLIED
|
|
||||||
DEPRECATED start %Number; #IMPLIED
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST li
|
|
||||||
%attrs;
|
|
||||||
DEPRECATED type %LIStyle; #IMPLIED
|
|
||||||
DEPRECATED value %Number; #IMPLIED
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST hr
|
|
||||||
%attrs;
|
|
||||||
DEPRECATED align (left|center|right) #IMPLIED
|
|
||||||
DEPRECATED size %Pixels; #IMPLIED
|
|
||||||
DEPRECATED width %Length; #IMPLIED
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST pre
|
|
||||||
%attrs;
|
|
||||||
DEPRECATED width %Number; #IMPLIED
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST blockquote
|
|
||||||
%attrs;
|
|
||||||
cite %URI; #IMPLIED
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST ins
|
|
||||||
%attrs;
|
|
||||||
cite %URI; #IMPLIED
|
|
||||||
datetime %Datetime; #IMPLIED
|
|
||||||
>
|
|
||||||
<!ATTLIST del
|
|
||||||
%attrs;
|
|
||||||
cite %URI; #IMPLIED
|
|
||||||
datetime %Datetime; #IMPLIED
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST a
|
|
||||||
%attrs;
|
|
||||||
name NMTOKEN #IMPLIED // ID
|
|
||||||
href %URI; #IMPLIED
|
|
||||||
rel %LinkTypes; #IMPLIED // needs policing
|
|
||||||
rev %LinkTypes; #IMPLIED // see rel
|
|
||||||
target %FrameTarget; #IMPLIED // usually not used, but might be
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST bdo
|
|
||||||
%coreattrs; // !#!
|
|
||||||
lang %LanguageCode; #IMPLIED
|
|
||||||
xml:lang %LanguageCode; #IMPLIED
|
|
||||||
dir (ltr|rtl) #REQUIRED
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST br
|
|
||||||
%coreattrs; // !#!
|
|
||||||
DEPRECATED clear (left|all|right|none) "none"
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ELEMENT q %Inline;> <!-- inlined quote -->
|
|
||||||
<!ATTLIST q
|
|
||||||
%attrs;
|
|
||||||
cite %URI; #IMPLIED
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST img
|
|
||||||
%attrs;
|
|
||||||
src %URI; #REQUIRED
|
|
||||||
alt %Text; #REQUIRED
|
|
||||||
DEPRECATED name NMTOKEN #IMPLIED // ID
|
|
||||||
longdesc %URI; #IMPLIED
|
|
||||||
height %Length; #IMPLIED // dubious, but we'll allow
|
|
||||||
width %Length; #IMPLIED //
|
|
||||||
DEPRECATED align %ImgAlign; #IMPLIED
|
|
||||||
DEPRECATED border %Length; #IMPLIED
|
|
||||||
DEPRECATED hspace %Pixels; #IMPLIED // left/right margin
|
|
||||||
DEPRECATED vspace %Pixels; #IMPLIED // up/down margin
|
|
||||||
>
|
|
||||||
|
|
||||||
<!--
|
|
||||||
The border attribute sets the thickness of the frame around the
|
|
||||||
table. The default units are screen pixels.
|
|
||||||
|
|
||||||
The frame attribute specifies which parts of the frame around
|
|
||||||
the table should be rendered. The values are not the same as
|
|
||||||
CALS to avoid a name clash with the valign attribute.
|
|
||||||
-->
|
|
||||||
<!ENTITY % TFrame "(void|above|below|hsides|lhs|rhs|vsides|box|border)">
|
|
||||||
|
|
||||||
<!--
|
|
||||||
The rules attribute defines which rules to draw between cells:
|
|
||||||
|
|
||||||
If rules is absent then assume:
|
|
||||||
"none" if border is absent or border="0" otherwise "all"
|
|
||||||
-->
|
|
||||||
|
|
||||||
<!ENTITY % TRules "(none | groups | rows | cols | all)">
|
|
||||||
|
|
||||||
<!-- horizontal placement of table relative to document -->
|
|
||||||
<!ENTITY % TAlign "(left|center|right)">
|
|
||||||
|
|
||||||
<!-- horizontal alignment attributes for cell contents
|
|
||||||
|
|
||||||
char alignment char, e.g. char=':'
|
|
||||||
charoff offset for alignment char
|
|
||||||
-->
|
|
||||||
<!ENTITY % cellhalign
|
|
||||||
"align (left|center|right|justify|char) #IMPLIED
|
|
||||||
char %Character; #IMPLIED
|
|
||||||
charoff %Length; #IMPLIED"
|
|
||||||
>
|
|
||||||
|
|
||||||
<!-- vertical alignment attributes for cell contents -->
|
|
||||||
<!ENTITY % cellvalign
|
|
||||||
"valign (top|middle|bottom|baseline) #IMPLIED"
|
|
||||||
>
|
|
||||||
|
|
||||||
<!-- we may want to convert some of these nonetheless -->
|
|
||||||
<!ATTLIST table
|
|
||||||
%attrs;
|
|
||||||
summary %Text; #IMPLIED
|
|
||||||
width %Length; #IMPLIED
|
|
||||||
border %Pixels; #IMPLIED
|
|
||||||
frame %TFrame; #IMPLIED
|
|
||||||
rules %TRules; #IMPLIED
|
|
||||||
cellspacing %Length; #IMPLIED
|
|
||||||
cellpadding %Length; #IMPLIED
|
|
||||||
DEPRECATED align %TAlign; #IMPLIED
|
|
||||||
DEPRECATED bgcolor %Color; #IMPLIED
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ENTITY % CAlign "(top|bottom|left|right)">
|
|
||||||
|
|
||||||
<!ATTLIST caption
|
|
||||||
%attrs;
|
|
||||||
DEPRECATED align %CAlign; #IMPLIED // watch, it's a special set
|
|
||||||
>
|
|
||||||
|
|
||||||
<!--
|
|
||||||
colgroup groups a set of col elements. It allows you to group
|
|
||||||
several semantically related columns together.
|
|
||||||
-->
|
|
||||||
<!ATTLIST colgroup
|
|
||||||
%attrs;
|
|
||||||
span %Number; "1"
|
|
||||||
width %MultiLength; #IMPLIED
|
|
||||||
%cellhalign; // very interesting
|
|
||||||
%cellvalign;
|
|
||||||
>
|
|
||||||
|
|
||||||
<!--
|
|
||||||
col elements define the alignment properties for cells in
|
|
||||||
one or more columns.
|
|
||||||
|
|
||||||
The width attribute specifies the width of the columns, e.g.
|
|
||||||
|
|
||||||
width=64 width in screen pixels
|
|
||||||
width=0.5* relative width of 0.5
|
|
||||||
|
|
||||||
The span attribute causes the attributes of one
|
|
||||||
col element to apply to more than one column.
|
|
||||||
-->
|
|
||||||
<!ATTLIST col
|
|
||||||
%attrs;
|
|
||||||
span %Number; "1"
|
|
||||||
width %MultiLength; #IMPLIED
|
|
||||||
%cellhalign;
|
|
||||||
%cellvalign;
|
|
||||||
>
|
|
||||||
|
|
||||||
<!--
|
|
||||||
Use thead to duplicate headers when breaking table
|
|
||||||
across page boundaries, or for static headers when
|
|
||||||
tbody sections are rendered in scrolling panel.
|
|
||||||
|
|
||||||
Use tfoot to duplicate footers when breaking table
|
|
||||||
across page boundaries, or for static footers when
|
|
||||||
tbody sections are rendered in scrolling panel.
|
|
||||||
|
|
||||||
Use multiple tbody sections when rules are needed
|
|
||||||
between groups of table rows.
|
|
||||||
-->
|
|
||||||
<!ATTLIST thead
|
|
||||||
%attrs;
|
|
||||||
%cellhalign;
|
|
||||||
%cellvalign;
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST tfoot
|
|
||||||
%attrs;
|
|
||||||
%cellhalign;
|
|
||||||
%cellvalign;
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST tbody
|
|
||||||
%attrs;
|
|
||||||
%cellhalign;
|
|
||||||
%cellvalign;
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST tr
|
|
||||||
%attrs;
|
|
||||||
%cellhalign;
|
|
||||||
%cellvalign;
|
|
||||||
DEPRECATED bgcolor %Color; #IMPLIED
|
|
||||||
>
|
|
||||||
|
|
||||||
<!-- Scope is simpler than headers attribute for common tables -->
|
|
||||||
<!ENTITY % Scope "(row|col|rowgroup|colgroup)">
|
|
||||||
|
|
||||||
<!-- th is for headers, td for data and for cells acting as both -->
|
|
||||||
|
|
||||||
<!ATTLIST th
|
|
||||||
%attrs;
|
|
||||||
abbr %Text; #IMPLIED
|
|
||||||
axis CDATA #IMPLIED
|
|
||||||
headers IDREFS #IMPLIED
|
|
||||||
scope %Scope; #IMPLIED
|
|
||||||
rowspan %Number; "1"
|
|
||||||
colspan %Number; "1"
|
|
||||||
%cellhalign;
|
|
||||||
%cellvalign;
|
|
||||||
DEPRECATED nowrap (nowrap) #IMPLIED
|
|
||||||
DEPRECATED bgcolor %Color; #IMPLIED
|
|
||||||
DEPRECATED width %Length; #IMPLIED
|
|
||||||
DEPRECATED height %Length; #IMPLIED
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST td
|
|
||||||
%attrs;
|
|
||||||
abbr %Text; #IMPLIED
|
|
||||||
axis CDATA #IMPLIED
|
|
||||||
headers IDREFS #IMPLIED
|
|
||||||
scope %Scope; #IMPLIED
|
|
||||||
rowspan %Number; "1"
|
|
||||||
colspan %Number; "1"
|
|
||||||
%cellhalign;
|
|
||||||
%cellvalign;
|
|
||||||
DEPRECATED nowrap (nowrap) #IMPLIED
|
|
||||||
DEPRECATED bgcolor %Color; #IMPLIED
|
|
||||||
DEPRECATED width %Length; #IMPLIED
|
|
||||||
DEPRECATED height %Length; #IMPLIED
|
|
||||||
>
|
|
||||||
|
|
@@ -21,7 +21,9 @@ if (!empty($_POST['html'])) {
|
|||||||
|
|
||||||
$html = get_magic_quotes_gpc() ? stripslashes($_POST['html']) : $_POST['html'];
|
$html = get_magic_quotes_gpc() ? stripslashes($_POST['html']) : $_POST['html'];
|
||||||
|
|
||||||
$purifier = new HTMLPurifier();
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
$config->set('Core', 'TidyFormat', !empty($_POST['tidy']));
|
||||||
|
$purifier = new HTMLPurifier($config);
|
||||||
$pure_html = $purifier->purify($html);
|
$pure_html = $purifier->purify($html);
|
||||||
|
|
||||||
?>
|
?>
|
||||||
@@ -65,6 +67,8 @@ if (isset($html)) {
|
|||||||
HTMLPurifier_Encoder::cleanUTF8($html), ENT_COMPAT, 'UTF-8');
|
HTMLPurifier_Encoder::cleanUTF8($html), ENT_COMPAT, 'UTF-8');
|
||||||
}
|
}
|
||||||
?></textarea>
|
?></textarea>
|
||||||
|
<div>Nicely format output with Tidy? <input type="checkbox" value="1"
|
||||||
|
name="tidy"<?php if (!empty($_POST['tidy'])) echo ' checked="checked"'; ?> /></div>
|
||||||
<div>
|
<div>
|
||||||
<input type="submit" value="Submit" name="submit" class="button" />
|
<input type="submit" value="Submit" name="submit" class="button" />
|
||||||
</div>
|
</div>
|
||||||
|
@@ -20,15 +20,32 @@ can further be customized using simpler configuration options.
|
|||||||
Here are some fuzzy levels you could set:
|
Here are some fuzzy levels you could set:
|
||||||
|
|
||||||
1. Comments - Wordpress recommends a, abbr, acronym, b, blockquote, cite,
|
1. Comments - Wordpress recommends a, abbr, acronym, b, blockquote, cite,
|
||||||
code, em, i, strike, strong; however, you could get away with only a, b and
|
code, em, i, strike, strong; however, you could get away with only a, em and
|
||||||
i; also having p and pre tags would be helpful.
|
p; also having blockquote and pre tags would be helpful.
|
||||||
2. Pages - As permissive as possible without allowing XSS. No protection
|
2. BBCode - Emulate the usual tagset for forums: b, i, img, a, blockquote,
|
||||||
|
pre, div, span and h[2-6] (the last three are for specially formatted
|
||||||
|
posts, div and span require associated classes or inline styling enabled
|
||||||
|
to be useful)
|
||||||
|
3. Pages - As permissive as possible without allowing XSS. No protection
|
||||||
against bad design sense, unfortunantely. Suitable for wiki and page
|
against bad design sense, unfortunantely. Suitable for wiki and page
|
||||||
environments.
|
environments.
|
||||||
3. Lint - Accept everything in the spec, a Tidy wannabe.
|
4. Lint - Accept everything in the spec, a Tidy wannabe. (This probably won't
|
||||||
|
get implemented as it would require routines for things like <object>
|
||||||
|
and friends to be implemented, which is a lot of work for not a lot of
|
||||||
|
benefit)
|
||||||
|
|
||||||
I've also decomposed tags into risk levels. An asterisk indicates that no one
|
One final note: when you start axing tags that are more commonly used, you
|
||||||
really uses that tag, tilde indicates it's deprecated.
|
run the risk of accidentally destroying user data, especially if the data
|
||||||
|
is incoming from a WYSIWYG eidtor that hasn't been synced accordingly. This may
|
||||||
|
make forbidden element to text transformations desirable (for example, images).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
== Element Risk Analysis ==
|
||||||
|
|
||||||
|
Legend:
|
||||||
|
[danger level] - regular tags / uncommon tags ~ deprecated tags
|
||||||
|
[danger level]* - rare tags
|
||||||
|
|
||||||
1 - blockquote, code, em, i, p, tt / strong, sub, sup
|
1 - blockquote, code, em, i, p, tt / strong, sub, sup
|
||||||
1* - abbr, acronym, bdo, cite, dfn, kbd, q, samp
|
1* - abbr, acronym, bdo, cite, dfn, kbd, q, samp
|
||||||
@@ -38,30 +55,76 @@ really uses that tag, tilde indicates it's deprecated.
|
|||||||
5 - a
|
5 - a
|
||||||
7 - area, map
|
7 - area, map
|
||||||
|
|
||||||
|
These are special use tags, they should be enabled on a blanket basis.
|
||||||
|
|
||||||
Lists - dd, dl, dt, li, ol, ul ~ menu, dir
|
Lists - dd, dl, dt, li, ol, ul ~ menu, dir
|
||||||
Tables - caption, table, td, th, tr / col, colgroup, tbody, tfoot, thead
|
Tables - caption, table, td, th, tr / col, colgroup, tbody, tfoot, thead
|
||||||
|
|
||||||
Forms - fieldset, form, input, lable, legend, optgroup, option, select, textarea
|
Forms - fieldset, form, input, lable, legend, optgroup, option, select, textarea
|
||||||
XSS - noscript, object, script ~ applet
|
XSS - noscript, object, script ~ applet
|
||||||
|
|
||||||
Meta - base, basefont, body, head, html, link, meta, style, title
|
Meta - base, basefont, body, head, html, link, meta, style, title
|
||||||
Frames - frame, frameset, iframe
|
Frames - frame, frameset, iframe
|
||||||
|
|
||||||
And tag specific notes:
|
And tag specific notes:
|
||||||
|
|
||||||
a - general problems involving linkspam
|
a - general problems involving linkspam
|
||||||
b - too much bold is bad, typographically speaking bold is discouraged
|
b - too much bold is bad, typographically speaking bold is discouraged
|
||||||
br - often misused
|
br - often misused
|
||||||
center - CSS, usually no legit use
|
center - CSS, usually no legit use
|
||||||
del - only useful in editing context
|
del - only useful in editing context
|
||||||
div - little meaning in certain contexts i.e. blog comment
|
div - little meaning in certain contexts i.e. blog comment
|
||||||
h1 - usually no legit use, as header is already set by application
|
h1 - usually no legit use, as header is already set by application
|
||||||
h* - not needed in blog comments
|
h* - not needed in blog comments
|
||||||
hr - usually not necessary in blog comments
|
hr - usually not necessary in blog comments
|
||||||
img - could be extremely undesirable if linking to external pics
|
img - could be extremely undesirable if linking to external pics (CSRF, goatse)
|
||||||
pre - could use formatting, only useful in code contexts
|
pre - could use formatting, only useful in code contexts
|
||||||
q - very little support
|
q - very little support
|
||||||
s - transform into span with styling or del?
|
s - transform into span with styling or del?
|
||||||
small - technically presentational
|
small - technically presentational
|
||||||
span - depends on attribute allowances
|
span - depends on attribute allowances
|
||||||
sub, sup - specialized
|
sub, sup - specialized
|
||||||
u - little legit use, prefer class with text-decoration
|
u - little legit use, prefer class with text-decoration
|
||||||
|
|
||||||
|
Based on the riskiness of the items, we may want to offer %HTML.DisableImages
|
||||||
|
attribute and put URI filtering higher up on the priority list.
|
||||||
|
|
||||||
|
|
||||||
|
== Attribute Risk Analysis ==
|
||||||
|
|
||||||
|
We actually have a suprisingly small assortment of allowed attributes (the
|
||||||
|
rest are deprecated in strict, and thus we opted not to allow them, even
|
||||||
|
though our output is XHTML Transitional by default.)
|
||||||
|
|
||||||
|
Required URI - img.alt, img.src, a.href
|
||||||
|
Medium risk - *.class, *.dir
|
||||||
|
High risk - img.height, img.width, *.id, *.style
|
||||||
|
|
||||||
|
Table - colgroup/col.span, td/th.rowspan, td/th.colspan
|
||||||
|
Uncommon - *.title, *.lang, *.xml:lang
|
||||||
|
Rare - td/th.abbr, table.summary, {table}.charoff
|
||||||
|
Rare URI - del.cite, ins.cite, blockquote.cite, q.cite, img.longdesc
|
||||||
|
Presentational - {table}.align, {table}.valign, table.frame, table.rules,
|
||||||
|
table.border
|
||||||
|
Partially presentational - table.cellpadding, table.cellspacing,
|
||||||
|
table.width, col.width, colgroup.width
|
||||||
|
|
||||||
|
|
||||||
|
== CSS Risk Analysis ==
|
||||||
|
|
||||||
|
There are certain CSS elements that are extremely useful inline, but then
|
||||||
|
as you get to more presentation oriented styling it may not always be
|
||||||
|
appropriate to inline them.
|
||||||
|
|
||||||
|
Useful - clear, float, border-collapse, caption-side
|
||||||
|
|
||||||
|
These CSS properties can break layouts if used improperly. We have excluded
|
||||||
|
any CSS properties that are not currently implemented (such as position).
|
||||||
|
|
||||||
|
Dangerous, can go outside container - float
|
||||||
|
Easy to abuse - font-size, font-family (font), width
|
||||||
|
Colored - background-color (background), border-color (border), color
|
||||||
|
Dramatic - border, list-style-position (list-style), margin, padding,
|
||||||
|
text-align, text-indent, text-transform, vertical-align, line-height
|
||||||
|
|
||||||
|
Dramatic elements substnatially change the look of text in ways that should
|
||||||
|
probably have been reserved to other areas.
|
||||||
|
@@ -2,9 +2,10 @@
|
|||||||
Optimization
|
Optimization
|
||||||
|
|
||||||
Here are some possible optimization techniques we can apply to code sections if
|
Here are some possible optimization techniques we can apply to code sections if
|
||||||
they turn out to be slow. Be sure not to prematurely optimize though!
|
they turn out to be slow. Be sure not to prematurely optimize: if you get
|
||||||
|
that itch, put it here!
|
||||||
|
|
||||||
- Make Tokens Flyweights
|
- Make Tokens Flyweights (may prove problematic, probably not worth it)
|
||||||
- Rewrite regexps into PHP code
|
- Rewrite regexps into PHP code
|
||||||
- Serialize the Definition object
|
- Serialize the Definition object
|
||||||
- Batch regexp validation (do as many per function call as possible)
|
- Batch regexp validation (do as many per function call as possible)
|
||||||
|
@@ -86,7 +86,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
|||||||
Well-supported values are: disc, circle, square,
|
Well-supported values are: disc, circle, square,
|
||||||
decimal, lower-roman, upper-roman, lower-alpha and upper-alpha. See also
|
decimal, lower-roman, upper-roman, lower-alpha and upper-alpha. See also
|
||||||
CSS 3. Mostly IE lack of support.</td></tr>
|
CSS 3. Mostly IE lack of support.</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>list-style</td><td>SHORTHAND, target milestone 1.0</td></tr>
|
<tr class="css1 impl-yes"><td>list-style</td><td>SHORTHAND</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>margin</td><td>MULTIPLE</td></tr>
|
<tr class="css1 impl-yes"><td>margin</td><td>MULTIPLE</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>margin-*</td><td>COMPOSITE(<length>,
|
<tr class="css1 impl-yes"><td>margin-*</td><td>COMPOSITE(<length>,
|
||||||
<percentage>, auto)</td></tr>
|
<percentage>, auto)</td></tr>
|
||||||
@@ -134,7 +134,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
|||||||
|
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr><th colspan="2">Unknown</th></tr>
|
<tr><th colspan="2">Unknown</th></tr>
|
||||||
<tr class="danger css1"><td>background-image</td><td>Dangerous, target milestone 1.3</td></tr>
|
<tr class="danger css1"><td>background-image</td><td>Dangerous, target milestone 1.2</td></tr>
|
||||||
<tr class="css1"><td>background-attachment</td><td>ENUM(scroll, fixed),
|
<tr class="css1"><td>background-attachment</td><td>ENUM(scroll, fixed),
|
||||||
Depends on background-image</td></tr>
|
Depends on background-image</td></tr>
|
||||||
<tr class="css1"><td>background-position</td><td>Depends on background-image</td></tr>
|
<tr class="css1"><td>background-position</td><td>Depends on background-image</td></tr>
|
||||||
@@ -144,7 +144,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
|||||||
inline-block has incomplete IE6 support and requires -moz-inline-box
|
inline-block has incomplete IE6 support and requires -moz-inline-box
|
||||||
for Mozilla. Unknown target milestone.</td></tr>
|
for Mozilla. Unknown target milestone.</td></tr>
|
||||||
<tr><td class="css1">height</td><td>Interesting, why use it? Unknown target milestone.</td></tr>
|
<tr><td class="css1">height</td><td>Interesting, why use it? Unknown target milestone.</td></tr>
|
||||||
<tr class="danger css1"><td>list-style-image</td><td>Dangerous? Target milestone 1.3</td></tr>
|
<tr class="danger css1"><td>list-style-image</td><td>Dangerous? Target milestone 1.2</td></tr>
|
||||||
<tr class="impl-no"><td>max-height</td><td rowspan="4">No IE 5/6</td></tr>
|
<tr class="impl-no"><td>max-height</td><td rowspan="4">No IE 5/6</td></tr>
|
||||||
<tr class="impl-no"><td>min-height</td></tr>
|
<tr class="impl-no"><td>min-height</td></tr>
|
||||||
<tr class="impl-no"><td>max-width</td></tr>
|
<tr class="impl-no"><td>max-width</td></tr>
|
||||||
@@ -254,7 +254,7 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
|||||||
</tbody>
|
</tbody>
|
||||||
|
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr><th colspan="3">Transform, target milestone 1.2</th></tr>
|
<tr><th colspan="3">Transform, target milestone 1.4</th></tr>
|
||||||
<tr><td rowspan="5">align</td><td>CAPTION</td><td>Near-equiv style 'caption-side', drop left and right</td></tr>
|
<tr><td rowspan="5">align</td><td>CAPTION</td><td>Near-equiv style 'caption-side', drop left and right</td></tr>
|
||||||
<tr><td>IMG</td><td rowspan="2">Margin-left and margin-right = auto or parent div</td></tr>
|
<tr><td>IMG</td><td rowspan="2">Margin-left and margin-right = auto or parent div</td></tr>
|
||||||
<tr><td>TABLE</td></tr>
|
<tr><td>TABLE</td></tr>
|
||||||
|
@@ -6,35 +6,43 @@ through negligence of people. This class will do its job: no more, no less,
|
|||||||
and it's up to you to provide it the proper information and proper context
|
and it's up to you to provide it the proper information and proper context
|
||||||
to be effective. Things to remember:
|
to be effective. Things to remember:
|
||||||
|
|
||||||
1. UTF-8. Currently, the parser runs under the assumption that it is dealing
|
1. Character Encoding: UTF-8.
|
||||||
|
Currently, the parser runs under the assumption that it is dealing
|
||||||
with UTF-8. Not ISO-8859-1 or Windows-1252, UTF-8. And definitely not "no
|
with UTF-8. Not ISO-8859-1 or Windows-1252, UTF-8. And definitely not "no
|
||||||
character encoding explicitly stated" or UTF-7. If you're not using UTF-8 as
|
character encoding explicitly stated" or UTF-7. If you're not using UTF-8 as
|
||||||
your character encoding, you should switch. Now. Make sure any input is
|
your character encoding, make sure you configure HTML Purifier or switch
|
||||||
properly converted to UTF-8, or the parser will mangle it badly
|
to UTF-8. Now. Also, make sure any input is properly converted to UTF-8, or
|
||||||
(though it won't be a security risk if you're outputting it as UTF-8 though).
|
the parser will mangle it badly (though it won't be a security risk if you're
|
||||||
We will be adding out-of-the-box support for the other major character
|
outputting it as UTF-8 though). Character encoding is, in general, a knotty
|
||||||
encodings shortly.
|
issue, but do yourself a favor and learn about it:
|
||||||
|
<http://www.joelonsoftware.com/articles/Unicode.html>
|
||||||
|
|
||||||
2. XHTML 1.0 Transitional. This is what the parser is outputting. For the most
|
2. Doctype: XHTML 1.0 Transitional
|
||||||
|
This is what the parser is outputting. For the most
|
||||||
part, it's compatible with HTML 4.01, but XHTML enforces some very nice things
|
part, it's compatible with HTML 4.01, but XHTML enforces some very nice things
|
||||||
that all web developers should use. Regardless, NO DOCTYPE is a NO. Quirks mode
|
that all web developers should use. Regardless, NO DOCTYPE is a NO. Quirks mode
|
||||||
has waaaay too many quirks for a little parser to handle. We did not select
|
has waaaay too many quirks for a little parser to handle. We did not select
|
||||||
strict in order to prevent ourselves from being too draconic on users, but
|
strict in order to prevent ourselves from being too draconic on users, but
|
||||||
this may be configurable in the future.
|
this may be configurable in the future. Do you want standards compliance?
|
||||||
|
The doctype is a good place to start.
|
||||||
|
|
||||||
3. IDs. They need to be unique, but without some knowledge of the
|
3. IDs
|
||||||
|
They need to be unique, but without some knowledge of the
|
||||||
rest of the document, it's difficult to know what's unique. %Attr.IDBlacklist
|
rest of the document, it's difficult to know what's unique. %Attr.IDBlacklist
|
||||||
needs to be set: we may want to consider disallowing IDs by default to
|
needs to be set: we may want to consider disallowing IDs by default to
|
||||||
save lazy programmers.
|
save lazy programmers.
|
||||||
|
|
||||||
4. [PROJECTED] Links. We're not going to try for spam protection (although
|
4. [PROJECTED] Links
|
||||||
|
We're not going to try for spam protection (although
|
||||||
some hooks for such a module might be nice) but we may offer the ability to
|
some hooks for such a module might be nice) but we may offer the ability to
|
||||||
only accept relative URLs. Pick the one that's right for you.
|
only accept relative URLs. Pick the one that's right for you.
|
||||||
|
|
||||||
5. CSS. While we can prevent the most flagrant cases from affecting your
|
5. CSS
|
||||||
|
While we can prevent the most flagrant cases from affecting your
|
||||||
layout (such as absolutely positioned elements), no amount of code is going
|
layout (such as absolutely positioned elements), no amount of code is going
|
||||||
to protect your pages from being attacked by garish colors and plain old
|
to protect your pages from being attacked by garish colors and plain old
|
||||||
bad taste. A neat feature would be the ability to define acceptable colors
|
bad taste. A neat feature would be the ability to define acceptable colors
|
||||||
in a document, but that's not likely to be implemented for a while. In the
|
in a document, but that's not likely to be implemented for a while. In the
|
||||||
meantime, be sure to make sure that floated elements (permitted, since they
|
meantime, be sure to make sure that floated elements (permitted, since they
|
||||||
can be quite useful) can't mess up your layout.
|
can be quite useful) can't mess up your layout. Once again, we may want to
|
||||||
|
disable this by default to protect lazy developers.
|
||||||
|
@@ -54,4 +54,4 @@ HTML Purifier is best suited for documents that require a rich array of
|
|||||||
HTML tags. Things like blog comments are, in all likelihood, most appropriately
|
HTML tags. Things like blog comments are, in all likelihood, most appropriately
|
||||||
written in an extremely restrictive set of markup that doesn't require
|
written in an extremely restrictive set of markup that doesn't require
|
||||||
all this functionality (or not written in HTML at all), although this may
|
all this functionality (or not written in HTML at all), although this may
|
||||||
be changing in the future.
|
be changing in the future with the addition of levels of filtering.
|
||||||
|
25
docs/strictness.txt
Normal file
25
docs/strictness.txt
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
|
||||||
|
Is HTML Purifier Strict or Transitional?
|
||||||
|
A little bit of helpful guidance
|
||||||
|
|
||||||
|
Despite the fact that HTML Purifier professes only to support transitional
|
||||||
|
HTML, it rejects a lot of attributes and elements that are actually, indeed,
|
||||||
|
valid. You can investigate progress.html to find out precisely what we
|
||||||
|
are doing to these *deprecated* attributes.
|
||||||
|
|
||||||
|
However, users have found that Strict HTML imposes some quite unreasonable
|
||||||
|
restrictions on certain things. The start and value attributes in ol and
|
||||||
|
li (respectively) perhaps are the most contested. There's is currently no
|
||||||
|
widely supported browser method short of JavaScript that can replace these
|
||||||
|
two deprecated elements. HTML Purifier does not currently support them, but
|
||||||
|
it might behoove us to do so while our output is still transitional.
|
||||||
|
|
||||||
|
Fortunantely, that's the only real bugger case. The others have near-perfect
|
||||||
|
CSS equivalents, and were presentational anyway. However, the other question
|
||||||
|
pops up: should we always convert these to the CSS forms when 1. the spec
|
||||||
|
allows them anyway and 2. older browsers support them better? After all, the
|
||||||
|
whole point about CSS is to seperate styling from content, so inline styling
|
||||||
|
doesn't solve that problem.
|
||||||
|
|
||||||
|
It's an icky question, and we'll have to deal with it as more and more
|
||||||
|
transforms get implemented.
|
10
library/HTMLPurifier.auto.php
Normal file
10
library/HTMLPurifier.auto.php
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is a stub include that automatically configures the include path.
|
||||||
|
*/
|
||||||
|
|
||||||
|
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
|
||||||
|
require_once 'HTMLPurifier.php';
|
||||||
|
|
||||||
|
?>
|
@@ -3,7 +3,7 @@
|
|||||||
/*!
|
/*!
|
||||||
* @mainpage
|
* @mainpage
|
||||||
*
|
*
|
||||||
* HTMLPurifier is an HTML filter that will take an arbitrary snippet of
|
* HTML Purifier is an HTML filter that will take an arbitrary snippet of
|
||||||
* HTML and rigorously test, validate and filter it into a version that
|
* HTML and rigorously test, validate and filter it into a version that
|
||||||
* is safe for output onto webpages. It achieves this by:
|
* is safe for output onto webpages. It achieves this by:
|
||||||
*
|
*
|
||||||
@@ -18,11 +18,11 @@
|
|||||||
* However, most users will only need to interface with the HTMLPurifier
|
* However, most users will only need to interface with the HTMLPurifier
|
||||||
* class, so this massive amount of infrastructure is usually concealed.
|
* class, so this massive amount of infrastructure is usually concealed.
|
||||||
* If you plan on working with the internals, be sure to include
|
* If you plan on working with the internals, be sure to include
|
||||||
* HTMLPurifier_ConfigDef and HTMLPurifier_Config.
|
* HTMLPurifier_ConfigSchema and HTMLPurifier_Config.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
HTMLPurifier - Standards Compliant HTML Filtering
|
HTML Purifier - Standards Compliant HTML Filtering
|
||||||
Copyright (C) 2006 Edward Z. Yang
|
Copyright (C) 2006 Edward Z. Yang
|
||||||
|
|
||||||
This library is free software; you can redistribute it and/or
|
This library is free software; you can redistribute it and/or
|
||||||
@@ -39,11 +39,13 @@
|
|||||||
License along with this library; if not, write to the Free Software
|
License along with this library; if not, write to the Free Software
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
*/
|
*/
|
||||||
|
|
||||||
require_once 'HTMLPurifier/ConfigDef.php';
|
// almost every class has an undocumented dependency to these, so make sure
|
||||||
|
// they get included
|
||||||
|
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||||
require_once 'HTMLPurifier/Config.php';
|
require_once 'HTMLPurifier/Config.php';
|
||||||
|
|
||||||
require_once 'HTMLPurifier/Lexer.php';
|
require_once 'HTMLPurifier/Lexer.php';
|
||||||
require_once 'HTMLPurifier/HTMLDefinition.php';
|
|
||||||
require_once 'HTMLPurifier/Generator.php';
|
require_once 'HTMLPurifier/Generator.php';
|
||||||
require_once 'HTMLPurifier/Strategy/Core.php';
|
require_once 'HTMLPurifier/Strategy/Core.php';
|
||||||
require_once 'HTMLPurifier/Encoder.php';
|
require_once 'HTMLPurifier/Encoder.php';
|
||||||
|
@@ -3,15 +3,15 @@
|
|||||||
/**
|
/**
|
||||||
* Internal data-structure used in attribute validation to accumulate state.
|
* Internal data-structure used in attribute validation to accumulate state.
|
||||||
*
|
*
|
||||||
* All it is is a data-structure that holds objects that accumulate state, like
|
* This is a data-structure that holds objects that accumulate state, like
|
||||||
* HTMLPurifier_IDAccumulator.
|
* HTMLPurifier_IDAccumulator. It's better than using globals!
|
||||||
*
|
*
|
||||||
* @param Many functions that accept this object have it as a mandatory
|
* @note Many functions that accept this object have it as a mandatory
|
||||||
* parameter, even when there is no use for it. Though this is
|
* parameter, even when there is no use for it. Though this is
|
||||||
* for the same reasons as why HTMLPurifier_Config is a mandatory
|
* for the same reasons as why HTMLPurifier_Config is a mandatory
|
||||||
* parameter, it is also because you cannot assign a default value
|
* parameter, it is also because you cannot assign a default value
|
||||||
* to a parameter passed by reference (passing by reference is essential
|
* to a parameter passed by reference (passing by reference is essential
|
||||||
* for context to work in PHP 4).
|
* for context to work in PHP 4).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class HTMLPurifier_AttrContext
|
class HTMLPurifier_AttrContext
|
||||||
|
@@ -15,6 +15,12 @@ require_once 'HTMLPurifier/AttrContext.php';
|
|||||||
class HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tells us whether or not an HTML attribute is minimized. Only the
|
||||||
|
* boolean attribute vapourware would use this.
|
||||||
|
*/
|
||||||
|
var $minimized = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abstract function defined for functions that validate and clean strings.
|
* Abstract function defined for functions that validate and clean strings.
|
||||||
*
|
*
|
||||||
@@ -42,7 +48,16 @@ class HTMLPurifier_AttrDef
|
|||||||
*
|
*
|
||||||
* @note This method is not entirely standards compliant, as trim() removes
|
* @note This method is not entirely standards compliant, as trim() removes
|
||||||
* more types of whitespace than specified in the spec. In practice,
|
* more types of whitespace than specified in the spec. In practice,
|
||||||
* this is rarely a problem.
|
* this is rarely a problem, as those extra characters usually have
|
||||||
|
* already been removed by HTMLPurifier_Encoder.
|
||||||
|
*
|
||||||
|
* @warning This processing is inconsistent with XML's whitespace handling
|
||||||
|
* as specified by section 3.3.3 and referenced XHTML 1.0 section
|
||||||
|
* 4.7. Compliant processing requires all line breaks normalized
|
||||||
|
* to "\n", so the fix is not as simple as fixing it in this
|
||||||
|
* function. Trim and whitespace collapsing are supposed to only
|
||||||
|
* occur in NMTOKENs. However, note that we are NOT necessarily
|
||||||
|
* parsing XML, thus, this behavior may still be correct.
|
||||||
*
|
*
|
||||||
* @public
|
* @public
|
||||||
*/
|
*/
|
||||||
|
@@ -11,9 +11,14 @@ class HTMLPurifier_AttrDef_Host extends HTMLPurifier_AttrDef
|
|||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instances of HTMLPurifier_AttrDef_IPv4 and HTMLPurifier_AttrDef_IPv6
|
* Instance of HTMLPurifier_AttrDef_IPv4 sub-validator
|
||||||
*/
|
*/
|
||||||
var $ipv4, $ipv6;
|
var $ipv4;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instance of HTMLPurifier_AttrDef_IPv6 sub-validator
|
||||||
|
*/
|
||||||
|
var $ipv6;
|
||||||
|
|
||||||
function HTMLPurifier_AttrDef_Host() {
|
function HTMLPurifier_AttrDef_Host() {
|
||||||
$this->ipv4 = new HTMLPurifier_AttrDef_IPv4();
|
$this->ipv4 = new HTMLPurifier_AttrDef_IPv4();
|
||||||
|
@@ -5,7 +5,7 @@ require_once 'HTMLPurifier/URIScheme.php';
|
|||||||
require_once 'HTMLPurifier/URISchemeRegistry.php';
|
require_once 'HTMLPurifier/URISchemeRegistry.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/Host.php';
|
require_once 'HTMLPurifier/AttrDef/Host.php';
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'URI', 'DefaultScheme', 'http', 'string',
|
'URI', 'DefaultScheme', 'http', 'string',
|
||||||
'Defines through what scheme the output will be served, in order to '.
|
'Defines through what scheme the output will be served, in order to '.
|
||||||
'select the proper object validator when no scheme information is present.'
|
'select the proper object validator when no scheme information is present.'
|
||||||
@@ -36,13 +36,13 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
|||||||
// for HTTP and thus won't work for our generic URI parsing
|
// for HTTP and thus won't work for our generic URI parsing
|
||||||
|
|
||||||
// according to the RFC... (but this cuts corners, i.e. non-validating)
|
// according to the RFC... (but this cuts corners, i.e. non-validating)
|
||||||
$r_URI = '!^'.
|
$r_URI = '!'.
|
||||||
'(([^:/?#<>]+):)?'. // 2. Scheme
|
'(([^:/?#<>\'"]+):)?'. // 2. Scheme
|
||||||
'(//([^/?#<>]*))?'. // 4. Authority
|
'(//([^/?#<>\'"]*))?'. // 4. Authority
|
||||||
'([^?#<>]*)'. // 5. Path
|
'([^?#<>\'"]*)'. // 5. Path
|
||||||
'(\?([^#<>]*))?'. // 7. Query
|
'(\?([^#<>\'"]*))?'. // 7. Query
|
||||||
'(#([^<>]*))?'. // 8. Fragment
|
'(#([^<>\'"]*))?'. // 8. Fragment
|
||||||
'$!';
|
'!';
|
||||||
|
|
||||||
$matches = array();
|
$matches = array();
|
||||||
$result = preg_match($r_URI, $uri, $matches);
|
$result = preg_match($r_URI, $uri, $matches);
|
||||||
|
@@ -4,13 +4,13 @@ require_once 'HTMLPurifier/AttrTransform.php';
|
|||||||
|
|
||||||
// this MUST be placed in post, as it assumes that any value in dir is valid
|
// this MUST be placed in post, as it assumes that any value in dir is valid
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Attr', 'DefaultTextDir', 'ltr', 'string',
|
'Attr', 'DefaultTextDir', 'ltr', 'string',
|
||||||
'Defines the default text direction (ltr or rtl) of the document '.
|
'Defines the default text direction (ltr or rtl) of the document '.
|
||||||
'being parsed. This generally is the same as the value of the dir '.
|
'being parsed. This generally is the same as the value of the dir '.
|
||||||
'attribute in HTML, or ltr if that is not specified.'
|
'attribute in HTML, or ltr if that is not specified.'
|
||||||
);
|
);
|
||||||
HTMLPurifier_ConfigDef::defineAllowedValues(
|
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||||
'Attr', 'DefaultTextDir', array( 'ltr', 'rtl' )
|
'Attr', 'DefaultTextDir', array( 'ltr', 'rtl' )
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@@ -4,7 +4,7 @@ require_once 'HTMLPurifier/AttrTransform.php';
|
|||||||
|
|
||||||
// must be called POST validation
|
// must be called POST validation
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Attr', 'DefaultInvalidImage', '', 'string',
|
'Attr', 'DefaultInvalidImage', '', 'string',
|
||||||
'This is the default image an img tag will be pointed to if it does '.
|
'This is the default image an img tag will be pointed to if it does '.
|
||||||
'not have a valid src attribute. In future versions, we may allow the '.
|
'not have a valid src attribute. In future versions, we may allow the '.
|
||||||
@@ -12,7 +12,7 @@ HTMLPurifier_ConfigDef::define(
|
|||||||
'not possible right now.'
|
'not possible right now.'
|
||||||
);
|
);
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Attr', 'DefaultInvalidImageAlt', 'Invalid image', 'string',
|
'Attr', 'DefaultInvalidImageAlt', 'Invalid image', 'string',
|
||||||
'This is the content of the alt tag of an invalid image if the user '.
|
'This is the content of the alt tag of an invalid image if the user '.
|
||||||
'had not previously specified an alt attribute. It has no effect when the '.
|
'had not previously specified an alt attribute. It has no effect when the '.
|
||||||
|
@@ -5,14 +5,7 @@
|
|||||||
// false = delete parent node and all children
|
// false = delete parent node and all children
|
||||||
// array(...) = replace children nodes with these
|
// array(...) = replace children nodes with these
|
||||||
|
|
||||||
// this is the hardest one to implement. We'll use fancy regexp tricks
|
HTMLPurifier_ConfigSchema::define(
|
||||||
// right now, we only expect it to return TRUE or FALSE (it won't attempt
|
|
||||||
// to fix the tree)
|
|
||||||
|
|
||||||
// we may end up writing custom code for each HTML case
|
|
||||||
// in order to make it self correcting
|
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
|
||||||
'Core', 'EscapeInvalidChildren', false, 'bool',
|
'Core', 'EscapeInvalidChildren', false, 'bool',
|
||||||
'When true, a child is found that is not allowed in the context of the '.
|
'When true, a child is found that is not allowed in the context of the '.
|
||||||
'parent element will be transformed into text as if it were ASCII. When '.
|
'parent element will be transformed into text as if it were ASCII. When '.
|
||||||
@@ -62,9 +55,9 @@ class HTMLPurifier_ChildDef
|
|||||||
* Custom validation class, accepts DTD child definitions
|
* Custom validation class, accepts DTD child definitions
|
||||||
*
|
*
|
||||||
* @warning Currently this class is an all or nothing proposition, that is,
|
* @warning Currently this class is an all or nothing proposition, that is,
|
||||||
* it will only give a bool return value. Table is the only
|
* it will only give a bool return value.
|
||||||
* child definition that uses this class, and we ought to give
|
* @note This class is currently not used by any code, although it is unit
|
||||||
* it a dedicated one.
|
* tested.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
|
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
|
||||||
{
|
{
|
||||||
@@ -307,4 +300,141 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
/**
|
||||||
|
* Definition for tables
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||||
|
{
|
||||||
|
var $allow_empty = false;
|
||||||
|
var $type = 'table';
|
||||||
|
function HTMLPurifier_ChildDef_Table() {}
|
||||||
|
function validateChildren($tokens_of_children, $config, $context) {
|
||||||
|
if (empty($tokens_of_children)) return false;
|
||||||
|
|
||||||
|
// this ensures that the loop gets run one last time before closing
|
||||||
|
// up. It's a little bit of a hack, but it works! Just make sure you
|
||||||
|
// get rid of the token later.
|
||||||
|
$tokens_of_children[] = false;
|
||||||
|
|
||||||
|
// only one of these elements is allowed in a table
|
||||||
|
$caption = false;
|
||||||
|
$thead = false;
|
||||||
|
$tfoot = false;
|
||||||
|
|
||||||
|
// as many of these as you want
|
||||||
|
$cols = array();
|
||||||
|
$content = array();
|
||||||
|
|
||||||
|
$nesting = 0; // current depth so we can determine nodes
|
||||||
|
$is_collecting = false; // are we globbing together tokens to package
|
||||||
|
// into one of the collectors?
|
||||||
|
$collection = array(); // collected nodes
|
||||||
|
$tag_index = 0; // the first node might be whitespace,
|
||||||
|
// so this tells us where the start tag is
|
||||||
|
|
||||||
|
foreach ($tokens_of_children as $token) {
|
||||||
|
$is_child = ($nesting == 0);
|
||||||
|
|
||||||
|
if ($token === false) {
|
||||||
|
// terminating sequence started
|
||||||
|
} elseif ($token->type == 'start') {
|
||||||
|
$nesting++;
|
||||||
|
} elseif ($token->type == 'end') {
|
||||||
|
$nesting--;
|
||||||
|
}
|
||||||
|
|
||||||
|
// handle node collection
|
||||||
|
if ($is_collecting) {
|
||||||
|
if ($is_child) {
|
||||||
|
// okay, let's stash the tokens away
|
||||||
|
// first token tells us the type of the collection
|
||||||
|
switch ($collection[$tag_index]->name) {
|
||||||
|
case 'tr':
|
||||||
|
case 'tbody':
|
||||||
|
$content[] = $collection;
|
||||||
|
break;
|
||||||
|
case 'caption':
|
||||||
|
if ($caption !== false) break;
|
||||||
|
$caption = $collection;
|
||||||
|
break;
|
||||||
|
case 'thead':
|
||||||
|
case 'tfoot':
|
||||||
|
// access the appropriate variable, $thead or $tfoot
|
||||||
|
$var = $collection[$tag_index]->name;
|
||||||
|
if ($$var === false) {
|
||||||
|
$$var = $collection;
|
||||||
|
} else {
|
||||||
|
// transmutate the first and less entries into
|
||||||
|
// tbody tags, and then put into content
|
||||||
|
$collection[$tag_index]->name = 'tbody';
|
||||||
|
$collection[count($collection)-1]->name = 'tbody';
|
||||||
|
$content[] = $collection;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'colgroup':
|
||||||
|
$cols[] = $collection;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
$collection = array();
|
||||||
|
$is_collecting = false;
|
||||||
|
$tag_index = 0;
|
||||||
|
} else {
|
||||||
|
// add the node to the collection
|
||||||
|
$collection[] = $token;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// terminate
|
||||||
|
if ($token === false) break;
|
||||||
|
|
||||||
|
if ($is_child) {
|
||||||
|
// determine what we're dealing with
|
||||||
|
if ($token->name == 'col') {
|
||||||
|
// the only empty tag in the possie, we can handle it
|
||||||
|
// immediately
|
||||||
|
$cols[] = array_merge($collection, array($token));
|
||||||
|
$collection = array();
|
||||||
|
$tag_index = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
switch($token->name) {
|
||||||
|
case 'caption':
|
||||||
|
case 'colgroup':
|
||||||
|
case 'thead':
|
||||||
|
case 'tfoot':
|
||||||
|
case 'tbody':
|
||||||
|
case 'tr':
|
||||||
|
$is_collecting = true;
|
||||||
|
$collection[] = $token;
|
||||||
|
continue;
|
||||||
|
default:
|
||||||
|
if ($token->type == 'text' && $token->is_whitespace) {
|
||||||
|
$collection[] = $token;
|
||||||
|
$tag_index++;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (empty($content)) return false;
|
||||||
|
|
||||||
|
$ret = array();
|
||||||
|
if ($caption !== false) $ret = array_merge($ret, $caption);
|
||||||
|
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
|
||||||
|
if ($thead !== false) $ret = array_merge($ret, $thead);
|
||||||
|
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
|
||||||
|
foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
|
||||||
|
if (!empty($collection) && $is_collecting == false){
|
||||||
|
// grab the trailing space
|
||||||
|
$ret = array_merge($ret, $collection);
|
||||||
|
}
|
||||||
|
|
||||||
|
array_pop($tokens_of_children); // remove phantom token
|
||||||
|
|
||||||
|
return ($ret === $tokens_of_children) ? true : $ret;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
||||||
|
@@ -21,22 +21,22 @@ class HTMLPurifier_Config
|
|||||||
var $conf;
|
var $conf;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reference HTMLPurifier_ConfigDef for value checking
|
* Reference HTMLPurifier_ConfigSchema for value checking
|
||||||
*/
|
*/
|
||||||
var $def;
|
var $def;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instance of HTMLPurifier_HTMLDefinition
|
* Cached instance of HTMLPurifier_HTMLDefinition
|
||||||
*/
|
*/
|
||||||
var $html_definition;
|
var $html_definition;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instance of HTMLPurifier_CSSDefinition
|
* Cached instance of HTMLPurifier_CSSDefinition
|
||||||
*/
|
*/
|
||||||
var $css_definition;
|
var $css_definition;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param $definition HTMLPurifier_ConfigDef that defines what directives
|
* @param $definition HTMLPurifier_ConfigSchema that defines what directives
|
||||||
* are allowed.
|
* are allowed.
|
||||||
*/
|
*/
|
||||||
function HTMLPurifier_Config(&$definition) {
|
function HTMLPurifier_Config(&$definition) {
|
||||||
@@ -49,7 +49,7 @@ class HTMLPurifier_Config
|
|||||||
* @return Default HTMLPurifier_Config object.
|
* @return Default HTMLPurifier_Config object.
|
||||||
*/
|
*/
|
||||||
function createDefault() {
|
function createDefault() {
|
||||||
$definition =& HTMLPurifier_ConfigDef::instance();
|
$definition =& HTMLPurifier_ConfigSchema::instance();
|
||||||
$config = new HTMLPurifier_Config($definition);
|
$config = new HTMLPurifier_Config($definition);
|
||||||
return $config;
|
return $config;
|
||||||
}
|
}
|
||||||
|
@@ -2,9 +2,23 @@
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Configuration definition, defines directives and their defaults.
|
* Configuration definition, defines directives and their defaults.
|
||||||
* @todo Build documentation generation capabilities.
|
* @todo The ability to define things multiple times is confusing and should
|
||||||
|
* be factored out to its own function named registerDependency() or
|
||||||
|
* addNote(), where only the namespace.name and an extra descriptions
|
||||||
|
* documenting the nature of the dependency are needed. Since it's
|
||||||
|
* possible that the dependency is registered before the configuration
|
||||||
|
* is defined, deferring it to some sort of cache until it actually
|
||||||
|
* gets defined would be wise, keeping it opaque until it does get
|
||||||
|
* defined. We could add a finalize() method which would cause it to
|
||||||
|
* error out if we get a dangling dependency. It's difficult, however,
|
||||||
|
* to know whether or not it's a dependency, or a codependency, that is
|
||||||
|
* neither of them fully depends on it. Where does the configuration go
|
||||||
|
* then? This could be partially resolved by allowing blanket definitions
|
||||||
|
* and then splitting them up into finer-grained versions, however, there
|
||||||
|
* might be implementation difficulties in ini files regarding order of
|
||||||
|
* execution.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_ConfigDef {
|
class HTMLPurifier_ConfigSchema {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Defaults of the directives and namespaces.
|
* Defaults of the directives and namespaces.
|
||||||
@@ -26,15 +40,15 @@ class HTMLPurifier_ConfigDef {
|
|||||||
* Lookup table of allowed types.
|
* Lookup table of allowed types.
|
||||||
*/
|
*/
|
||||||
var $types = array(
|
var $types = array(
|
||||||
'string' => true,
|
'string' => 'String',
|
||||||
'istring' => true,
|
'istring' => 'Case-insensitive string',
|
||||||
'int' => true,
|
'int' => 'Integer',
|
||||||
'float' => true,
|
'float' => 'Float',
|
||||||
'bool' => true,
|
'bool' => 'Boolean',
|
||||||
'lookup' => true,
|
'lookup' => 'Lookup array',
|
||||||
'list' => true,
|
'list' => 'Array list',
|
||||||
'hash' => true,
|
'hash' => 'Associative array',
|
||||||
'mixed' => true
|
'mixed' => 'Mixed'
|
||||||
);
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -46,7 +60,7 @@ class HTMLPurifier_ConfigDef {
|
|||||||
$this->defineNamespace('URI', 'Features regarding Uniform Resource Identifiers.');
|
$this->defineNamespace('URI', 'Features regarding Uniform Resource Identifiers.');
|
||||||
$this->defineNamespace('HTML', 'Configuration regarding allowed HTML.');
|
$this->defineNamespace('HTML', 'Configuration regarding allowed HTML.');
|
||||||
$this->defineNamespace('CSS', 'Configuration regarding allowed CSS.');
|
$this->defineNamespace('CSS', 'Configuration regarding allowed CSS.');
|
||||||
$this->defineNamespace('Test', 'Testing configuration for our unit tests.');
|
$this->defineNamespace('Test', 'Developer testing configuration for our unit tests.');
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -57,7 +71,7 @@ class HTMLPurifier_ConfigDef {
|
|||||||
if ($prototype !== null) {
|
if ($prototype !== null) {
|
||||||
$instance = $prototype;
|
$instance = $prototype;
|
||||||
} elseif ($instance === null || $prototype === true) {
|
} elseif ($instance === null || $prototype === true) {
|
||||||
$instance = new HTMLPurifier_ConfigDef();
|
$instance = new HTMLPurifier_ConfigSchema();
|
||||||
$instance->initialize();
|
$instance->initialize();
|
||||||
}
|
}
|
||||||
return $instance;
|
return $instance;
|
||||||
@@ -66,9 +80,6 @@ class HTMLPurifier_ConfigDef {
|
|||||||
/**
|
/**
|
||||||
* Defines a directive for configuration
|
* Defines a directive for configuration
|
||||||
* @warning Will fail of directive's namespace is defined
|
* @warning Will fail of directive's namespace is defined
|
||||||
* @todo Collect information on description and allow redefinition
|
|
||||||
* so that multiple files can register a dependency on a
|
|
||||||
* configuration directive.
|
|
||||||
* @param $namespace Namespace the directive is in
|
* @param $namespace Namespace the directive is in
|
||||||
* @param $name Key of directive
|
* @param $name Key of directive
|
||||||
* @param $default Default value of directive
|
* @param $default Default value of directive
|
||||||
@@ -80,12 +91,17 @@ class HTMLPurifier_ConfigDef {
|
|||||||
$namespace, $name, $default, $type,
|
$namespace, $name, $default, $type,
|
||||||
$description
|
$description
|
||||||
) {
|
) {
|
||||||
$def =& HTMLPurifier_ConfigDef::instance();
|
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||||
if (!isset($def->info[$namespace])) {
|
if (!isset($def->info[$namespace])) {
|
||||||
trigger_error('Cannot define directive for undefined namespace',
|
trigger_error('Cannot define directive for undefined namespace',
|
||||||
E_USER_ERROR);
|
E_USER_ERROR);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (!ctype_alnum($name)) {
|
||||||
|
trigger_error('Directive name must be alphanumeric',
|
||||||
|
E_USER_ERROR);
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (isset($def->info[$namespace][$name])) {
|
if (isset($def->info[$namespace][$name])) {
|
||||||
if (
|
if (
|
||||||
$def->info[$namespace][$name]->type !== $type ||
|
$def->info[$namespace][$name]->type !== $type ||
|
||||||
@@ -122,17 +138,19 @@ class HTMLPurifier_ConfigDef {
|
|||||||
* @param $description Description of the namespace
|
* @param $description Description of the namespace
|
||||||
*/
|
*/
|
||||||
function defineNamespace($namespace, $description) {
|
function defineNamespace($namespace, $description) {
|
||||||
$def =& HTMLPurifier_ConfigDef::instance();
|
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||||
if (isset($def->info[$namespace])) {
|
if (isset($def->info[$namespace])) {
|
||||||
trigger_error('Cannot redefine namespace', E_USER_ERROR);
|
trigger_error('Cannot redefine namespace', E_USER_ERROR);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (!ctype_alnum($namespace)) {
|
||||||
|
trigger_error('Namespace name must be alphanumeric',
|
||||||
|
E_USER_ERROR);
|
||||||
|
return;
|
||||||
|
}
|
||||||
$def->info[$namespace] = array();
|
$def->info[$namespace] = array();
|
||||||
$def->info_namespace[$namespace] = new HTMLPurifier_ConfigEntity_Namespace();
|
$def->info_namespace[$namespace] = new HTMLPurifier_ConfigEntity_Namespace();
|
||||||
$backtrace = debug_backtrace();
|
$def->info_namespace[$namespace]->description = $description;
|
||||||
$file = $def->mungeFilename($backtrace[0]['file']);
|
|
||||||
$line = $backtrace[0]['line'];
|
|
||||||
$def->info_namespace[$namespace]->addDescription($file,$line,$description);
|
|
||||||
$def->defaults[$namespace] = array();
|
$def->defaults[$namespace] = array();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -147,7 +165,7 @@ class HTMLPurifier_ConfigDef {
|
|||||||
* @param $real Value aliased value will be converted into
|
* @param $real Value aliased value will be converted into
|
||||||
*/
|
*/
|
||||||
function defineValueAliases($namespace, $name, $aliases) {
|
function defineValueAliases($namespace, $name, $aliases) {
|
||||||
$def =& HTMLPurifier_ConfigDef::instance();
|
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||||
if (!isset($def->info[$namespace][$name])) {
|
if (!isset($def->info[$namespace][$name])) {
|
||||||
trigger_error('Cannot set value alias for non-existant directive',
|
trigger_error('Cannot set value alias for non-existant directive',
|
||||||
E_USER_ERROR);
|
E_USER_ERROR);
|
||||||
@@ -177,7 +195,7 @@ class HTMLPurifier_ConfigDef {
|
|||||||
* @param $allowed_values Arraylist of allowed values
|
* @param $allowed_values Arraylist of allowed values
|
||||||
*/
|
*/
|
||||||
function defineAllowedValues($namespace, $name, $allowed_values) {
|
function defineAllowedValues($namespace, $name, $allowed_values) {
|
||||||
$def =& HTMLPurifier_ConfigDef::instance();
|
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||||
if (!isset($def->info[$namespace][$name])) {
|
if (!isset($def->info[$namespace][$name])) {
|
||||||
trigger_error('Cannot define allowed values for undefined directive',
|
trigger_error('Cannot define allowed values for undefined directive',
|
||||||
E_USER_ERROR);
|
E_USER_ERROR);
|
||||||
@@ -256,27 +274,19 @@ class HTMLPurifier_ConfigDef {
|
|||||||
/**
|
/**
|
||||||
* Base class for configuration entity
|
* Base class for configuration entity
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_ConfigEntity
|
class HTMLPurifier_ConfigEntity {}
|
||||||
{
|
|
||||||
/**
|
|
||||||
* Plaintext descriptions of the configuration entity is. Organized by
|
|
||||||
* file and line number, so multiple descriptions are allowed.
|
|
||||||
*/
|
|
||||||
var $descriptions = array();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds a description to the array
|
|
||||||
*/
|
|
||||||
function addDescription($file, $line, $description) {
|
|
||||||
if (!isset($this->descriptions[$file])) $this->descriptions[$file] = array();
|
|
||||||
$this->descriptions[$file][$line] = $description;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Structure object describing of a namespace
|
* Structure object describing of a namespace
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_ConfigEntity_Namespace extends HTMLPurifier_ConfigEntity {}
|
class HTMLPurifier_ConfigEntity_Namespace extends HTMLPurifier_ConfigEntity {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* String description of what kinds of directives go in this namespace.
|
||||||
|
*/
|
||||||
|
var $description;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Structure object containing definition of a directive.
|
* Structure object containing definition of a directive.
|
||||||
@@ -308,6 +318,19 @@ class HTMLPurifier_ConfigEntity_Directive extends HTMLPurifier_ConfigEntity
|
|||||||
* - mixed (anything goes)
|
* - mixed (anything goes)
|
||||||
*/
|
*/
|
||||||
var $type = 'mixed';
|
var $type = 'mixed';
|
||||||
|
/**
|
||||||
|
* Plaintext descriptions of the configuration entity is. Organized by
|
||||||
|
* file and line number, so multiple descriptions are allowed.
|
||||||
|
*/
|
||||||
|
var $descriptions = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a description to the array
|
||||||
|
*/
|
||||||
|
function addDescription($file, $line, $description) {
|
||||||
|
if (!isset($this->descriptions[$file])) $this->descriptions[$file] = array();
|
||||||
|
$this->descriptions[$file][$line] = $description;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
require_once 'HTMLPurifier/EntityLookup.php';
|
require_once 'HTMLPurifier/EntityLookup.php';
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'Encoding', 'utf-8', 'istring',
|
'Core', 'Encoding', 'utf-8', 'istring',
|
||||||
'If for some reason you are unable to convert all webpages to UTF-8, '.
|
'If for some reason you are unable to convert all webpages to UTF-8, '.
|
||||||
'you can use this directive as a stop-gap compatibility change to '.
|
'you can use this directive as a stop-gap compatibility change to '.
|
||||||
@@ -17,20 +17,20 @@ HTMLPurifier_ConfigDef::define(
|
|||||||
|
|
||||||
if ( !function_exists('iconv') ) {
|
if ( !function_exists('iconv') ) {
|
||||||
// only encodings with native PHP support
|
// only encodings with native PHP support
|
||||||
HTMLPurifier_ConfigDef::defineAllowedValues(
|
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||||
'Core', 'Encoding', array(
|
'Core', 'Encoding', array(
|
||||||
'utf-8',
|
'utf-8',
|
||||||
'iso-8859-1'
|
'iso-8859-1'
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
HTMLPurifier_ConfigDef::defineValueAliases(
|
HTMLPurifier_ConfigSchema::defineValueAliases(
|
||||||
'Core', 'Encoding', array(
|
'Core', 'Encoding', array(
|
||||||
'iso8859-1' => 'iso-8859-1'
|
'iso8859-1' => 'iso-8859-1'
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Test', 'ForceNoIconv', false, 'bool',
|
'Test', 'ForceNoIconv', false, 'bool',
|
||||||
'When set to true, HTMLPurifier_Encoder will act as if iconv does not '.
|
'When set to true, HTMLPurifier_Encoder will act as if iconv does not '.
|
||||||
'exist and use only pure PHP implementations.'
|
'exist and use only pure PHP implementations.'
|
||||||
@@ -88,7 +88,7 @@ class HTMLPurifier_Encoder
|
|||||||
if ($iconv && !$force_php) {
|
if ($iconv && !$force_php) {
|
||||||
// do the shortcut way
|
// do the shortcut way
|
||||||
$str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
|
$str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
|
||||||
return strtr($str, $non_sgml_chars);;
|
return strtr($str, $non_sgml_chars);
|
||||||
}
|
}
|
||||||
|
|
||||||
$mState = 0; // cached expected number of octets after the current octet
|
$mState = 0; // cached expected number of octets after the current octet
|
||||||
|
@@ -88,7 +88,6 @@ class HTMLPurifier_EntityParser
|
|||||||
* either index 1, 2 or 3 set with a hex value, dec value,
|
* either index 1, 2 or 3 set with a hex value, dec value,
|
||||||
* or string (respectively).
|
* or string (respectively).
|
||||||
* @returns Replacement string.
|
* @returns Replacement string.
|
||||||
* @todo Implement string translations
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// +----------+----------+----------+----------+
|
// +----------+----------+----------+----------+
|
||||||
|
@@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
require_once 'HTMLPurifier/Lexer.php';
|
require_once 'HTMLPurifier/Lexer.php';
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'CleanUTF8DuringGeneration', false, 'bool',
|
'Core', 'CleanUTF8DuringGeneration', false, 'bool',
|
||||||
'When true, HTMLPurifier_Generator will also check all strings it '.
|
'When true, HTMLPurifier_Generator will also check all strings it '.
|
||||||
'escapes for UTF-8 well-formedness as a defense in depth measure. '.
|
'escapes for UTF-8 well-formedness as a defense in depth measure. '.
|
||||||
@@ -15,6 +15,29 @@ HTMLPurifier_ConfigDef::define(
|
|||||||
'generateFromTokens.'
|
'generateFromTokens.'
|
||||||
);
|
);
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'Core', 'XHTML', true, 'bool',
|
||||||
|
'Determines whether or not output is XHTML or not. When disabled, HTML '.
|
||||||
|
'Purifier goes into HTML 4.01 removes XHTML-specific markup constructs, '.
|
||||||
|
'such as boolean attribute expansion and trailing slashes in empty tags. '.
|
||||||
|
'This directive was available since 1.1.'
|
||||||
|
);
|
||||||
|
|
||||||
|
// extension constraints could be factored into ConfigSchema
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'Core', 'TidyFormat', false, 'bool',
|
||||||
|
'<p>Determines whether or not to run Tidy on the final output for pretty '.
|
||||||
|
'formatting reasons, such as indentation and wrap.</p><p>This can greatly '.
|
||||||
|
'improve readability for editors who are hand-editing the HTML, but is '.
|
||||||
|
'by no means necessary as HTML Purifier has already fixed all major '.
|
||||||
|
'errors the HTML may have had. Tidy is a non-default extension, and this directive '.
|
||||||
|
'will silently fail if Tidy is not available.</p><p>If you are looking to make '.
|
||||||
|
'the overall look of your page\'s source better, I recommend running Tidy '.
|
||||||
|
'on the entire page rather than just user-content (after all, the '.
|
||||||
|
'indentation relative to the containing blocks will be incorrect).</p><p>This '.
|
||||||
|
'directive was available since 1.1.1.</p>'
|
||||||
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates HTML from tokens.
|
* Generates HTML from tokens.
|
||||||
*/
|
*/
|
||||||
@@ -22,11 +45,16 @@ class HTMLPurifier_Generator
|
|||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Bool cache of the CleanUTF8DuringGeneration directive.
|
* Bool cache of %Core.CleanUTF8DuringGeneration
|
||||||
* @private
|
* @private
|
||||||
*/
|
*/
|
||||||
var $_clean_utf8 = false;
|
var $_clean_utf8 = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bool cache of %Core.XHTML
|
||||||
|
*/
|
||||||
|
var $_xhtml = true;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates HTML from an array of tokens.
|
* Generates HTML from an array of tokens.
|
||||||
* @param $tokens Array of HTMLPurifier_Token
|
* @param $tokens Array of HTMLPurifier_Token
|
||||||
@@ -38,10 +66,35 @@ class HTMLPurifier_Generator
|
|||||||
$html = '';
|
$html = '';
|
||||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||||
$this->_clean_utf8 = $config->get('Core', 'CleanUTF8DuringGeneration');
|
$this->_clean_utf8 = $config->get('Core', 'CleanUTF8DuringGeneration');
|
||||||
|
$this->_xhtml = $config->get('Core', 'XHTML');
|
||||||
if (!$tokens) return '';
|
if (!$tokens) return '';
|
||||||
foreach ($tokens as $token) {
|
foreach ($tokens as $token) {
|
||||||
$html .= $this->generateFromToken($token);
|
$html .= $this->generateFromToken($token);
|
||||||
}
|
}
|
||||||
|
if ($config->get('Core', 'TidyFormat') && extension_loaded('tidy')) {
|
||||||
|
|
||||||
|
$tidy_options = array(
|
||||||
|
'indent'=> true,
|
||||||
|
'output-xhtml' => $this->_xhtml,
|
||||||
|
'show-body-only' => true,
|
||||||
|
'indent-spaces' => 2,
|
||||||
|
'wrap' => 68,
|
||||||
|
);
|
||||||
|
if (version_compare(PHP_VERSION, '5', '<')) {
|
||||||
|
tidy_set_encoding('utf8');
|
||||||
|
foreach ($tidy_options as $key => $value) {
|
||||||
|
tidy_setopt($key, $value);
|
||||||
|
}
|
||||||
|
tidy_parse_string($html);
|
||||||
|
tidy_clean_repair();
|
||||||
|
$html = tidy_get_output();
|
||||||
|
} else {
|
||||||
|
$tidy = new Tidy;
|
||||||
|
$tidy->parseString($html, $tidy_options, 'utf8');
|
||||||
|
$tidy->cleanRepair();
|
||||||
|
$html = (string) $tidy;
|
||||||
|
}
|
||||||
|
}
|
||||||
return $html;
|
return $html;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -61,7 +114,9 @@ class HTMLPurifier_Generator
|
|||||||
|
|
||||||
} elseif ($token->type == 'empty') {
|
} elseif ($token->type == 'empty') {
|
||||||
$attr = $this->generateAttributes($token->attributes);
|
$attr = $this->generateAttributes($token->attributes);
|
||||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr . ' />';
|
return '<' . $token->name . ($attr ? ' ' : '') . $attr .
|
||||||
|
( $this->_xhtml ? ' /': '' )
|
||||||
|
. '>';
|
||||||
|
|
||||||
} elseif ($token->type == 'text') {
|
} elseif ($token->type == 'text') {
|
||||||
return $this->escape($token->data);
|
return $this->escape($token->data);
|
||||||
@@ -80,6 +135,11 @@ class HTMLPurifier_Generator
|
|||||||
function generateAttributes($assoc_array_of_attributes) {
|
function generateAttributes($assoc_array_of_attributes) {
|
||||||
$html = '';
|
$html = '';
|
||||||
foreach ($assoc_array_of_attributes as $key => $value) {
|
foreach ($assoc_array_of_attributes as $key => $value) {
|
||||||
|
if (!$this->_xhtml) {
|
||||||
|
// remove namespaced attributes
|
||||||
|
if (strpos($key, ':') !== false) continue;
|
||||||
|
// also needed: check for attribute minimization
|
||||||
|
}
|
||||||
$html .= $key.'="'.$this->escape($value).'" ';
|
$html .= $key.'="'.$this->escape($value).'" ';
|
||||||
}
|
}
|
||||||
return rtrim($html);
|
return rtrim($html);
|
||||||
|
@@ -56,6 +56,7 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* String name of parent element HTML will be going into.
|
* String name of parent element HTML will be going into.
|
||||||
|
* @todo Allow this to be overloaded by user config
|
||||||
* @public
|
* @public
|
||||||
*/
|
*/
|
||||||
var $info_parent = 'div';
|
var $info_parent = 'div';
|
||||||
@@ -111,12 +112,19 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// info[]->child : defines allowed children for elements
|
// info[]->child : defines allowed children for elements
|
||||||
|
|
||||||
// entities: prefixed with e_ and _ replaces .
|
// entities: prefixed with e_ and _ replaces . from DTD
|
||||||
|
// double underlines are entities we made up
|
||||||
|
|
||||||
// we don't use an array because that complicates interpolation
|
// we don't use an array because that complicates interpolation
|
||||||
// strings are used instead of arrays because if you use arrays,
|
// strings are used instead of arrays because if you use arrays,
|
||||||
// you have to do some hideous manipulation with array_merge()
|
// you have to do some hideous manipulation with array_merge()
|
||||||
|
|
||||||
|
// todo: determine whether or not having allowed children
|
||||||
|
// that aren't allowed globally affects security (it shouldn't)
|
||||||
|
// if above works out, extend children definitions to include all
|
||||||
|
// possible elements (allowed elements will dictate which ones
|
||||||
|
// get dropped
|
||||||
|
|
||||||
$e_special_extra = 'img';
|
$e_special_extra = 'img';
|
||||||
$e_special_basic = 'br | span | bdo';
|
$e_special_basic = 'br | span | bdo';
|
||||||
$e_special = "$e_special_basic | $e_special_extra";
|
$e_special = "$e_special_basic | $e_special_extra";
|
||||||
@@ -142,16 +150,18 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
$e_block = "p | $e_heading | div | $e_lists | $e_blocktext | table";
|
$e_block = "p | $e_heading | div | $e_lists | $e_blocktext | table";
|
||||||
$e__flow = "#PCDATA | $e_block | $e_inline | $e_misc";
|
$e__flow = "#PCDATA | $e_block | $e_inline | $e_misc";
|
||||||
$e_Flow = new HTMLPurifier_ChildDef_Optional($e__flow);
|
$e_Flow = new HTMLPurifier_ChildDef_Optional($e__flow);
|
||||||
$e_a_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | $e_special".
|
$e_a_content = new HTMLPurifier_ChildDef_Optional("#PCDATA".
|
||||||
" | $e_fontstyle | $e_phrase | $e_inline_forms | $e_misc_inline");
|
" | $e_special | $e_fontstyle | $e_phrase | $e_inline_forms".
|
||||||
|
" | $e_misc_inline");
|
||||||
$e_pre_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | a".
|
$e_pre_content = new HTMLPurifier_ChildDef_Optional("#PCDATA | a".
|
||||||
" | $e_special_basic | $e_fontstyle_basic | $e_phrase_basic".
|
" | $e_special_basic | $e_fontstyle_basic | $e_phrase_basic".
|
||||||
" | $e_inline_forms | $e_misc_inline");
|
" | $e_inline_forms | $e_misc_inline");
|
||||||
$e_form_content = new HTMLPurifier_ChildDef_Optional(''); //unused
|
$e_form_content = new HTMLPurifier_ChildDef_Optional('');//unused
|
||||||
$e_form_button_content = new HTMLPurifier_ChildDef_Optional(''); // unused
|
$e_form_button_content = new HTMLPurifier_ChildDef_Optional('');//unused
|
||||||
|
|
||||||
$this->info['ins']->child =
|
$this->info['ins']->child =
|
||||||
$this->info['del']->child = new HTMLPurifier_ChildDef_Chameleon($e__inline, $e__flow);
|
$this->info['del']->child =
|
||||||
|
new HTMLPurifier_ChildDef_Chameleon($e__inline, $e__flow);
|
||||||
|
|
||||||
$this->info['blockquote']->child=
|
$this->info['blockquote']->child=
|
||||||
$this->info['dd']->child =
|
$this->info['dd']->child =
|
||||||
@@ -209,8 +219,7 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
|
|
||||||
$this->info['a']->child = $e_a_content;
|
$this->info['a']->child = $e_a_content;
|
||||||
|
|
||||||
$this->info['table']->child = new HTMLPurifier_ChildDef_Custom(
|
$this->info['table']->child = new HTMLPurifier_ChildDef_Table();
|
||||||
'(caption?, (col*|colgroup*), thead?, tfoot?, (tbody+|tr+))');
|
|
||||||
|
|
||||||
// not a real entity, watch the double underscore
|
// not a real entity, watch the double underscore
|
||||||
$e__row = new HTMLPurifier_ChildDef_Required('tr');
|
$e__row = new HTMLPurifier_ChildDef_Required('tr');
|
||||||
@@ -226,7 +235,7 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// info[]->type : defines the type of the element (block or inline)
|
// info[]->type : defines the type of the element (block or inline)
|
||||||
|
|
||||||
// reuses $e_Inline and $e_block
|
// reuses $e_Inline and $e_Block
|
||||||
|
|
||||||
foreach ($e_Inline->elements as $name) {
|
foreach ($e_Inline->elements as $name) {
|
||||||
$this->info[$name]->type = 'inline';
|
$this->info[$name]->type = 'inline';
|
||||||
@@ -244,7 +253,7 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
|
|
||||||
$this->info['a']->excludes = array('a' => true);
|
$this->info['a']->excludes = array('a' => true);
|
||||||
$this->info['pre']->excludes = array_flip(array('img', 'big', 'small',
|
$this->info['pre']->excludes = array_flip(array('img', 'big', 'small',
|
||||||
// technically in spec, but we don't allow em anyway
|
// technically useless, but good to be indepth
|
||||||
'object', 'applet', 'font', 'basefont'));
|
'object', 'applet', 'font', 'basefont'));
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
@@ -254,6 +263,8 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
// by the transform classes. It will, however, do simple and slightly
|
// by the transform classes. It will, however, do simple and slightly
|
||||||
// complex attribute value substitution
|
// complex attribute value substitution
|
||||||
|
|
||||||
|
// the question of varying allowed attributes is more entangling.
|
||||||
|
|
||||||
$e_Text = new HTMLPurifier_AttrDef_Text();
|
$e_Text = new HTMLPurifier_AttrDef_Text();
|
||||||
|
|
||||||
// attrs, included in almost every single one except for a few,
|
// attrs, included in almost every single one except for a few,
|
||||||
@@ -298,7 +309,8 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
|
|
||||||
$this->info['table']->attr['summary'] = $e_Text;
|
$this->info['table']->attr['summary'] = $e_Text;
|
||||||
|
|
||||||
$this->info['table']->attr['border'] = new HTMLPurifier_AttrDef_Pixels();
|
$this->info['table']->attr['border'] =
|
||||||
|
new HTMLPurifier_AttrDef_Pixels();
|
||||||
|
|
||||||
$e_Length = new HTMLPurifier_AttrDef_Length();
|
$e_Length = new HTMLPurifier_AttrDef_Length();
|
||||||
$this->info['table']->attr['cellpadding'] =
|
$this->info['table']->attr['cellpadding'] =
|
||||||
@@ -330,7 +342,7 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
$this->info['q']->attr['cite'] = $e_URI;
|
$this->info['q']->attr['cite'] = $e_URI;
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// UNIMP : info_tag_transform : transformations of tags
|
// info_tag_transform : transformations of tags
|
||||||
|
|
||||||
$this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
|
$this->info_tag_transform['font'] = new HTMLPurifier_TagTransform_Font();
|
||||||
$this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
|
$this->info_tag_transform['menu'] = new HTMLPurifier_TagTransform_Simple('ul');
|
||||||
@@ -340,6 +352,9 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// info[]->auto_close : tags that automatically close another
|
// info[]->auto_close : tags that automatically close another
|
||||||
|
|
||||||
|
// todo: determine whether or not SGML-like modeling based on
|
||||||
|
// mandatory/optional end tags would be a better policy
|
||||||
|
|
||||||
// make sure you test using isset() not !empty()
|
// make sure you test using isset() not !empty()
|
||||||
|
|
||||||
// these are all block elements: blocks aren't allowed in P
|
// these are all block elements: blocks aren't allowed in P
|
||||||
|
@@ -4,12 +4,11 @@ require_once 'HTMLPurifier/Token.php';
|
|||||||
require_once 'HTMLPurifier/Encoder.php';
|
require_once 'HTMLPurifier/Encoder.php';
|
||||||
require_once 'HTMLPurifier/EntityParser.php';
|
require_once 'HTMLPurifier/EntityParser.php';
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'AcceptFullDocuments', true, 'bool',
|
'Core', 'AcceptFullDocuments', true, 'bool',
|
||||||
'This parameter determines whether or not the filter should accept full '.
|
'This parameter determines whether or not the filter should accept full '.
|
||||||
'HTML documents, not just HTML fragments. When on, it will '.
|
'HTML documents, not just HTML fragments. When on, it will '.
|
||||||
'drop all sections except the content between body. Depending on '.
|
'drop all sections except the content between body.'
|
||||||
'the implementation in use, this may speed up document parse times.'
|
|
||||||
);
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -61,6 +60,60 @@ class HTMLPurifier_Lexer
|
|||||||
$this->_entity_parser = new HTMLPurifier_EntityParser();
|
$this->_entity_parser = new HTMLPurifier_EntityParser();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Most common entity to raw value conversion table for special entities.
|
||||||
|
* @protected
|
||||||
|
*/
|
||||||
|
var $_special_entity2str =
|
||||||
|
array(
|
||||||
|
'"' => '"',
|
||||||
|
'&' => '&',
|
||||||
|
'<' => '<',
|
||||||
|
'>' => '>',
|
||||||
|
''' => "'",
|
||||||
|
''' => "'",
|
||||||
|
''' => "'"
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses special entities into the proper characters.
|
||||||
|
*
|
||||||
|
* This string will translate escaped versions of the special characters
|
||||||
|
* into the correct ones.
|
||||||
|
*
|
||||||
|
* @warning
|
||||||
|
* You should be able to treat the output of this function as
|
||||||
|
* completely parsed, but that's only because all other entities should
|
||||||
|
* have been handled previously in substituteNonSpecialEntities()
|
||||||
|
*
|
||||||
|
* @param $string String character data to be parsed.
|
||||||
|
* @returns Parsed character data.
|
||||||
|
*/
|
||||||
|
function parseData($string) {
|
||||||
|
|
||||||
|
// following functions require at least one character
|
||||||
|
if ($string === '') return '';
|
||||||
|
|
||||||
|
// subtracts amps that cannot possibly be escaped
|
||||||
|
$num_amp = substr_count($string, '&') - substr_count($string, '& ') -
|
||||||
|
($string[strlen($string)-1] === '&' ? 1 : 0);
|
||||||
|
|
||||||
|
if (!$num_amp) return $string; // abort if no entities
|
||||||
|
$num_esc_amp = substr_count($string, '&');
|
||||||
|
$string = strtr($string, $this->_special_entity2str);
|
||||||
|
|
||||||
|
// code duplication for sake of optimization, see above
|
||||||
|
$num_amp_2 = substr_count($string, '&') - substr_count($string, '& ') -
|
||||||
|
($string[strlen($string)-1] === '&' ? 1 : 0);
|
||||||
|
|
||||||
|
if ($num_amp_2 <= $num_esc_amp) return $string;
|
||||||
|
|
||||||
|
// hmm... now we have some uncommon entities. Use the callback.
|
||||||
|
$string = $this->_entity_parser->substituteSpecialEntities($string);
|
||||||
|
return $string;
|
||||||
|
}
|
||||||
|
|
||||||
var $_encoder;
|
var $_encoder;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@@ -12,15 +12,19 @@ require_once 'HTMLPurifier/TokenFactory.php';
|
|||||||
* documents, it performs twenty times faster than
|
* documents, it performs twenty times faster than
|
||||||
* HTMLPurifier_Lexer_DirectLex,and is the default choice for PHP 5.
|
* HTMLPurifier_Lexer_DirectLex,and is the default choice for PHP 5.
|
||||||
*
|
*
|
||||||
* @notice
|
* @note Any empty elements will have empty tokens associated with them, even if
|
||||||
* Any empty elements will have empty tokens associated with them, even if
|
|
||||||
* this is prohibited by the spec. This is cannot be fixed until the spec
|
* this is prohibited by the spec. This is cannot be fixed until the spec
|
||||||
* comes into play.
|
* comes into play.
|
||||||
*
|
*
|
||||||
* @todo Determine DOM's entity parsing behavior, point to local entity files
|
* @note PHP's DOM extension does not actually parse any entities, we use
|
||||||
* if necessary.
|
* our own function to do that.
|
||||||
* @todo Make div access less fragile, and refrain from preprocessing when
|
*
|
||||||
* HTML tag and friends are already present.
|
* @warning DOM tends to drop whitespace, which may wreak havoc on indenting.
|
||||||
|
* If this is a huge problem, due to the fact that HTML is hand
|
||||||
|
* edited and youa re unable to get a parser cache that caches the
|
||||||
|
* the output of HTML Purifier while keeping the original HTML lying
|
||||||
|
* around, you may want to run Tidy on the resulting output or use
|
||||||
|
* HTMLPurifier_DirectLex
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||||
|
@@ -12,64 +12,12 @@ require_once 'HTMLPurifier/Lexer.php';
|
|||||||
* completely eventually.
|
* completely eventually.
|
||||||
*
|
*
|
||||||
* @todo Reread XML spec and document differences.
|
* @todo Reread XML spec and document differences.
|
||||||
* @todo Add support for CDATA sections.
|
*
|
||||||
* @todo Determine correct behavior in outputting comment data. (preserve dashes?)
|
* @todo Determine correct behavior in transforming comment data. (preserve dashes?)
|
||||||
* @todo Optimize main function tokenizeHTML().
|
|
||||||
* @todo Less than sign (<) being prohibited (even as entity) in attr-values?
|
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
|
||||||
* Most common entity to raw value conversion table for special entities.
|
|
||||||
* @protected
|
|
||||||
*/
|
|
||||||
var $_special_entity2str =
|
|
||||||
array(
|
|
||||||
'"' => '"',
|
|
||||||
'&' => '&',
|
|
||||||
'<' => '<',
|
|
||||||
'>' => '>',
|
|
||||||
''' => "'",
|
|
||||||
''' => "'",
|
|
||||||
''' => "'"
|
|
||||||
);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parses special entities into the proper characters.
|
|
||||||
*
|
|
||||||
* This string will translate escaped versions of the special characters
|
|
||||||
* into the correct ones.
|
|
||||||
*
|
|
||||||
* @warning
|
|
||||||
* You should be able to treat the output of this function as
|
|
||||||
* completely parsed, but that's only because all other entities should
|
|
||||||
* have been handled previously in substituteNonSpecialEntities()
|
|
||||||
*
|
|
||||||
* @param $string String character data to be parsed.
|
|
||||||
* @returns Parsed character data.
|
|
||||||
*/
|
|
||||||
function parseData($string) {
|
|
||||||
|
|
||||||
// subtracts amps that cannot possibly be escaped
|
|
||||||
$num_amp = substr_count($string, '&') - substr_count($string, '& ') -
|
|
||||||
($string[strlen($string)-1] === '&' ? 1 : 0);
|
|
||||||
|
|
||||||
if (!$num_amp) return $string; // abort if no entities
|
|
||||||
$num_esc_amp = substr_count($string, '&');
|
|
||||||
$string = strtr($string, $this->_special_entity2str);
|
|
||||||
|
|
||||||
// code duplication for sake of optimization, see above
|
|
||||||
$num_amp_2 = substr_count($string, '&') - substr_count($string, '& ') -
|
|
||||||
($string[strlen($string)-1] === '&' ? 1 : 0);
|
|
||||||
|
|
||||||
if ($num_amp_2 <= $num_esc_amp) return $string;
|
|
||||||
|
|
||||||
// hmm... now we have some uncommon entities. Use the callback.
|
|
||||||
$string = $this->_entity_parser->substituteSpecialEntities($string);
|
|
||||||
return $string;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Whitespace characters for str(c)spn.
|
* Whitespace characters for str(c)spn.
|
||||||
* @protected
|
* @protected
|
||||||
|
@@ -18,6 +18,8 @@ require_once 'HTMLPurifier/Lexer.php';
|
|||||||
* whatever it does for poorly formed HTML is up to it.
|
* whatever it does for poorly formed HTML is up to it.
|
||||||
*
|
*
|
||||||
* @todo Generalize so that XML_HTMLSax is also supported.
|
* @todo Generalize so that XML_HTMLSax is also supported.
|
||||||
|
*
|
||||||
|
* @warning Entity-resolution inside attributes is broken.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
|
class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
|
||||||
@@ -41,6 +43,8 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
|
|||||||
$parser->set_element_handler('openHandler','closeHandler');
|
$parser->set_element_handler('openHandler','closeHandler');
|
||||||
$parser->set_data_handler('dataHandler');
|
$parser->set_data_handler('dataHandler');
|
||||||
$parser->set_escape_handler('escapeHandler');
|
$parser->set_escape_handler('escapeHandler');
|
||||||
|
|
||||||
|
// doesn't seem to work correctly for attributes
|
||||||
$parser->set_option('XML_OPTION_ENTITIES_PARSED', 1);
|
$parser->set_option('XML_OPTION_ENTITIES_PARSED', 1);
|
||||||
|
|
||||||
$parser->parse($string);
|
$parser->parse($string);
|
||||||
@@ -53,6 +57,10 @@ class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer
|
|||||||
* Open tag event handler, interface is defined by PEAR package.
|
* Open tag event handler, interface is defined by PEAR package.
|
||||||
*/
|
*/
|
||||||
function openHandler(&$parser, $name, $attrs, $closed) {
|
function openHandler(&$parser, $name, $attrs, $closed) {
|
||||||
|
// entities are not resolved in attrs
|
||||||
|
foreach ($attrs as $key => $attr) {
|
||||||
|
$attrs[$key] = $this->parseData($attr);
|
||||||
|
}
|
||||||
if ($closed) {
|
if ($closed) {
|
||||||
$this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs);
|
$this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs);
|
||||||
} else {
|
} else {
|
||||||
|
@@ -8,7 +8,7 @@
|
|||||||
* features, such as custom tags, custom parsing of text, etc.
|
* features, such as custom tags, custom parsing of text, etc.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'EscapeInvalidTags', false, 'bool',
|
'Core', 'EscapeInvalidTags', false, 'bool',
|
||||||
'When true, invalid tags will be written back to the document as plain '.
|
'When true, invalid tags will be written back to the document as plain '.
|
||||||
'text. Otherwise, they are silently dropped.'
|
'text. Otherwise, they are silently dropped.'
|
||||||
|
@@ -187,6 +187,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
|||||||
if (!$parent_def->child->allow_empty) {
|
if (!$parent_def->child->allow_empty) {
|
||||||
// we need to do a double-check
|
// we need to do a double-check
|
||||||
$i = $parent_index;
|
$i = $parent_index;
|
||||||
|
array_pop($stack);
|
||||||
}
|
}
|
||||||
|
|
||||||
// PROJECTED OPTIMIZATION: Process all children elements before
|
// PROJECTED OPTIMIZATION: Process all children elements before
|
||||||
@@ -255,4 +256,4 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
||||||
|
@@ -3,10 +3,10 @@
|
|||||||
require_once 'HTMLPurifier/Strategy.php';
|
require_once 'HTMLPurifier/Strategy.php';
|
||||||
require_once 'HTMLPurifier/HTMLDefinition.php';
|
require_once 'HTMLPurifier/HTMLDefinition.php';
|
||||||
require_once 'HTMLPurifier/IDAccumulator.php';
|
require_once 'HTMLPurifier/IDAccumulator.php';
|
||||||
require_once 'HTMLPurifier/ConfigDef.php';
|
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||||
require_once 'HTMLPurifier/AttrContext.php';
|
require_once 'HTMLPurifier/AttrContext.php';
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Attr', 'IDBlacklist', array(), 'list',
|
'Attr', 'IDBlacklist', array(), 'list',
|
||||||
'Array of IDs not allowed in the document.');
|
'Array of IDs not allowed in the document.');
|
||||||
|
|
||||||
|
@@ -4,7 +4,6 @@ require_once 'HTMLPurifier/URIScheme.php';
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates ftp (File Transfer Protocol) URIs as defined by generic RFC 1738.
|
* Validates ftp (File Transfer Protocol) URIs as defined by generic RFC 1738.
|
||||||
* @todo Typecode check on path
|
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme {
|
class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme {
|
||||||
|
|
||||||
@@ -16,7 +15,27 @@ class HTMLPurifier_URIScheme_ftp extends HTMLPurifier_URIScheme {
|
|||||||
list($userinfo, $host, $port, $path, $query) =
|
list($userinfo, $host, $port, $path, $query) =
|
||||||
parent::validateComponents(
|
parent::validateComponents(
|
||||||
$userinfo, $host, $port, $path, $query, $config );
|
$userinfo, $host, $port, $path, $query, $config );
|
||||||
// typecode check needed on path
|
$semicolon_pos = strrpos($path, ';'); // reverse
|
||||||
|
if ($semicolon_pos !== false) {
|
||||||
|
// typecode check
|
||||||
|
$type = substr($path, $semicolon_pos + 1); // no semicolon
|
||||||
|
$path = substr($path, 0, $semicolon_pos);
|
||||||
|
$type_ret = '';
|
||||||
|
if (strpos($type, '=') !== false) {
|
||||||
|
// figure out whether or not the declaration is correct
|
||||||
|
list($key, $typecode) = explode('=', $type, 2);
|
||||||
|
if ($key !== 'type') {
|
||||||
|
// invalid key, tack it back on encoded
|
||||||
|
$path .= '%3B' . $type;
|
||||||
|
} elseif ($typecode === 'a' || $typecode === 'i' || $typecode === 'd') {
|
||||||
|
$type_ret = ";type=$typecode";
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$path .= '%3B' . $type;
|
||||||
|
}
|
||||||
|
$path = str_replace(';', '%3B', $path);
|
||||||
|
$path .= $type_ret;
|
||||||
|
}
|
||||||
return array($userinfo, $host, $port, $path, null);
|
return array($userinfo, $host, $port, $path, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'URI', 'AllowedSchemes', array(
|
'URI', 'AllowedSchemes', array(
|
||||||
'http' => true, // "Hypertext Transfer Protocol", nuf' said
|
'http' => true, // "Hypertext Transfer Protocol", nuf' said
|
||||||
'https' => true, // HTTP over SSL (Secure Socket Layer)
|
'https' => true, // HTTP over SSL (Secure Socket Layer)
|
||||||
@@ -16,7 +16,7 @@ HTMLPurifier_ConfigDef::define(
|
|||||||
'prevents XSS attacks from using pseudo-schemes like javascript or mocha.'
|
'prevents XSS attacks from using pseudo-schemes like javascript or mocha.'
|
||||||
);
|
);
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'URI', 'OverrideAllowedSchemes', true, 'bool',
|
'URI', 'OverrideAllowedSchemes', true, 'bool',
|
||||||
'If this is set to true (which it is by default), you can override '.
|
'If this is set to true (which it is by default), you can override '.
|
||||||
'%URI.AllowedSchemes by simply registering a HTMLPurifier_URIScheme '.
|
'%URI.AllowedSchemes by simply registering a HTMLPurifier_URIScheme '.
|
||||||
|
@@ -1 +1 @@
|
|||||||
Deny from all
|
Deny from all
|
||||||
|
198
phpdoc.ini
198
phpdoc.ini
@@ -1,100 +1,100 @@
|
|||||||
;; phpDocumentor parse configuration file
|
;; phpDocumentor parse configuration file
|
||||||
;;
|
;;
|
||||||
;; This file is designed to cut down on repetitive typing on the command-line or web interface
|
;; This file is designed to cut down on repetitive typing on the command-line or web interface
|
||||||
;; You can copy this file to create a number of configuration files that can be used with the
|
;; You can copy this file to create a number of configuration files that can be used with the
|
||||||
;; command-line switch -c, as in phpdoc -c default.ini or phpdoc -c myini.ini. The web
|
;; command-line switch -c, as in phpdoc -c default.ini or phpdoc -c myini.ini. The web
|
||||||
;; interface will automatically generate a list of .ini files that can be used.
|
;; interface will automatically generate a list of .ini files that can be used.
|
||||||
;;
|
;;
|
||||||
;; default.ini is used to generate the online manual at http://www.phpdoc.org/docs
|
;; default.ini is used to generate the online manual at http://www.phpdoc.org/docs
|
||||||
;;
|
;;
|
||||||
;; ALL .ini files must be in the user subdirectory of phpDocumentor with an extension of .ini
|
;; ALL .ini files must be in the user subdirectory of phpDocumentor with an extension of .ini
|
||||||
;;
|
;;
|
||||||
;; Copyright 2002, Greg Beaver <cellog@users.sourceforge.net>
|
;; Copyright 2002, Greg Beaver <cellog@users.sourceforge.net>
|
||||||
;;
|
;;
|
||||||
;; WARNING: do not change the name of any command-line parameters, phpDocumentor will ignore them
|
;; WARNING: do not change the name of any command-line parameters, phpDocumentor will ignore them
|
||||||
|
|
||||||
[Parse Data]
|
[Parse Data]
|
||||||
;; title of all the documentation
|
;; title of all the documentation
|
||||||
;; legal values: any string
|
;; legal values: any string
|
||||||
title = HTML Purifier API Documentation
|
title = HTML Purifier API Documentation
|
||||||
|
|
||||||
;; parse files that start with a . like .bash_profile
|
;; parse files that start with a . like .bash_profile
|
||||||
;; legal values: true, false
|
;; legal values: true, false
|
||||||
hidden = false
|
hidden = false
|
||||||
|
|
||||||
;; show elements marked @access private in documentation by setting this to on
|
;; show elements marked @access private in documentation by setting this to on
|
||||||
;; legal values: on, off
|
;; legal values: on, off
|
||||||
parseprivate = off
|
parseprivate = off
|
||||||
|
|
||||||
;; parse with javadoc-like description (first sentence is always the short description)
|
;; parse with javadoc-like description (first sentence is always the short description)
|
||||||
;; legal values: on, off
|
;; legal values: on, off
|
||||||
javadocdesc = on
|
javadocdesc = on
|
||||||
|
|
||||||
;; add any custom @tags separated by commas here
|
;; add any custom @tags separated by commas here
|
||||||
;; legal values: any legal tagname separated by commas.
|
;; legal values: any legal tagname separated by commas.
|
||||||
;customtags = mytag1,mytag2
|
;customtags = mytag1,mytag2
|
||||||
|
|
||||||
;; This is only used by the XML:DocBook/peardoc2 converter
|
;; This is only used by the XML:DocBook/peardoc2 converter
|
||||||
defaultcategoryname = Documentation
|
defaultcategoryname = Documentation
|
||||||
|
|
||||||
;; what is the main package?
|
;; what is the main package?
|
||||||
;; legal values: alphanumeric string plus - and _
|
;; legal values: alphanumeric string plus - and _
|
||||||
defaultpackagename = HTMLPurifier
|
defaultpackagename = HTMLPurifier
|
||||||
|
|
||||||
;; output any parsing information? set to on for cron jobs
|
;; output any parsing information? set to on for cron jobs
|
||||||
;; legal values: on
|
;; legal values: on
|
||||||
;quiet = on
|
;quiet = on
|
||||||
|
|
||||||
;; parse a PEAR-style repository. Do not turn this on if your project does
|
;; parse a PEAR-style repository. Do not turn this on if your project does
|
||||||
;; not have a parent directory named "pear"
|
;; not have a parent directory named "pear"
|
||||||
;; legal values: on/off
|
;; legal values: on/off
|
||||||
;pear = on
|
;pear = on
|
||||||
|
|
||||||
;; where should the documentation be written?
|
;; where should the documentation be written?
|
||||||
;; legal values: a legal path
|
;; legal values: a legal path
|
||||||
target = docs/phpdoc
|
target = docs/phpdoc
|
||||||
|
|
||||||
;; Which files should be parsed out as special documentation files, such as README,
|
;; Which files should be parsed out as special documentation files, such as README,
|
||||||
;; INSTALL and CHANGELOG? This overrides the default files found in
|
;; INSTALL and CHANGELOG? This overrides the default files found in
|
||||||
;; phpDocumentor.ini (this file is not a user .ini file, but the global file)
|
;; phpDocumentor.ini (this file is not a user .ini file, but the global file)
|
||||||
readmeinstallchangelog = README, INSTALL, NEWS, WYSIWYG, SLOW, LICENSE, CREDITS
|
readmeinstallchangelog = README, INSTALL, NEWS, WYSIWYG, SLOW, LICENSE, CREDITS
|
||||||
|
|
||||||
;; limit output to the specified packages, even if others are parsed
|
;; limit output to the specified packages, even if others are parsed
|
||||||
;; legal values: package names separated by commas
|
;; legal values: package names separated by commas
|
||||||
;packageoutput = package1,package2
|
;packageoutput = package1,package2
|
||||||
|
|
||||||
;; comma-separated list of files to parse
|
;; comma-separated list of files to parse
|
||||||
;; legal values: paths separated by commas
|
;; legal values: paths separated by commas
|
||||||
;filename = /path/to/file1,/path/to/file2,fileincurrentdirectory
|
;filename = /path/to/file1,/path/to/file2,fileincurrentdirectory
|
||||||
|
|
||||||
;; comma-separated list of directories to parse
|
;; comma-separated list of directories to parse
|
||||||
;; legal values: directory paths separated by commas
|
;; legal values: directory paths separated by commas
|
||||||
;directory = /path1,/path2,.,..,subdirectory
|
;directory = /path1,/path2,.,..,subdirectory
|
||||||
;directory = /home/jeichorn/cvs/pear
|
;directory = /home/jeichorn/cvs/pear
|
||||||
directory = ./
|
directory = ./
|
||||||
|
|
||||||
;; template base directory (the equivalent directory of <installdir>/phpDocumentor)
|
;; template base directory (the equivalent directory of <installdir>/phpDocumentor)
|
||||||
;templatebase = /path/to/my/templates
|
;templatebase = /path/to/my/templates
|
||||||
|
|
||||||
;; directory to find any example files in through @example and {@example} tags
|
;; directory to find any example files in through @example and {@example} tags
|
||||||
;examplesdir = /path/to/my/templates
|
;examplesdir = /path/to/my/templates
|
||||||
|
|
||||||
;; comma-separated list of files, directories or wildcards ? and * (any wildcard) to ignore
|
;; comma-separated list of files, directories or wildcards ? and * (any wildcard) to ignore
|
||||||
;; legal values: any wildcard strings separated by commas
|
;; legal values: any wildcard strings separated by commas
|
||||||
;ignore = /path/to/ignore*,*list.php,myfile.php,subdirectory/
|
;ignore = /path/to/ignore*,*list.php,myfile.php,subdirectory/
|
||||||
ignore = pear-*,templates/,Documentation/,test*.php,Lexer.inc
|
ignore = pear-*,templates/,Documentation/,test*.php,Lexer.inc
|
||||||
|
|
||||||
sourcecode = on
|
sourcecode = on
|
||||||
|
|
||||||
;; comma-separated list of Converters to use in outputformat:Convertername:templatedirectory format
|
;; comma-separated list of Converters to use in outputformat:Convertername:templatedirectory format
|
||||||
;; legal values: HTML:frames:default,HTML:frames:l0l33t,HTML:frames:phpdoc.de,HTML:frames:phphtmllib,
|
;; legal values: HTML:frames:default,HTML:frames:l0l33t,HTML:frames:phpdoc.de,HTML:frames:phphtmllib,
|
||||||
;; HTML:frames:earthli,
|
;; HTML:frames:earthli,
|
||||||
;; HTML:frames:DOM/default,HTML:frames:DOM/l0l33t,HTML:frames:DOM/phpdoc.de,
|
;; HTML:frames:DOM/default,HTML:frames:DOM/l0l33t,HTML:frames:DOM/phpdoc.de,
|
||||||
;; HTML:frames:DOM/phphtmllib,HTML:frames:DOM/earthli
|
;; HTML:frames:DOM/phphtmllib,HTML:frames:DOM/earthli
|
||||||
;; HTML:Smarty:default,HTML:Smarty:PHP,HTML:Smarty:HandS
|
;; HTML:Smarty:default,HTML:Smarty:PHP,HTML:Smarty:HandS
|
||||||
;; PDF:default:default,CHM:default:default,XML:DocBook/peardoc2:default
|
;; PDF:default:default,CHM:default:default,XML:DocBook/peardoc2:default
|
||||||
output=HTML:frames:default
|
output=HTML:frames:default
|
||||||
|
|
||||||
;; turn this option on if you want highlighted source code for every file
|
;; turn this option on if you want highlighted source code for every file
|
||||||
;; legal values: on/off
|
;; legal values: on/off
|
||||||
sourcecode = on
|
sourcecode = on
|
17
test-settings.sample.php
Normal file
17
test-settings.sample.php
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
// This file is necessary to run the unit tests and profiling
|
||||||
|
// scripts.
|
||||||
|
|
||||||
|
// Is PEAR available on your system? If it isn't, set to false. If PEAR
|
||||||
|
// is not part of the default include_path, add it.
|
||||||
|
$GLOBALS['HTMLPurifierTest']['PEAR'] = true;
|
||||||
|
|
||||||
|
// How many times should profiling scripts iterate over the function? More runs
|
||||||
|
// means more accurate results, but they'll take longer to perform.
|
||||||
|
$GLOBALS['HTMLPurifierTest']['Runs'] = 2;
|
||||||
|
|
||||||
|
// Where is SimpleTest located?
|
||||||
|
$simpletest_location = '/path/to/simpletest/';
|
||||||
|
|
||||||
|
?>
|
@@ -158,9 +158,15 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
|||||||
$uri[18] = '/a/b';
|
$uri[18] = '/a/b';
|
||||||
$components[18] = array(null, null, null, '/a/b', null);
|
$components[18] = array(null, null, null, '/a/b', null);
|
||||||
|
|
||||||
// it's not allowed, so generic URI should get it
|
// result of malformed tag, gracefully handle error
|
||||||
$uri[19] = '<';
|
$uri[19] = 'http://www.google.com/\'>"';
|
||||||
$expect_uri[19] = false;
|
$components[19] = array(null, 'www.google.com', null, '/', null);
|
||||||
|
$expect_uri[19] = 'http://www.google.com/';
|
||||||
|
|
||||||
|
// test empty
|
||||||
|
$uri[20] = '';
|
||||||
|
$components[20] = array(null, null, null, '', null);
|
||||||
|
$expect_uri[20] = '';
|
||||||
|
|
||||||
foreach ($uri as $i => $value) {
|
foreach ($uri as $i => $value) {
|
||||||
|
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/ChildDef.php';
|
require_once 'HTMLPurifier/ChildDef.php';
|
||||||
require_once 'HTMLPurifier/Lexer.php';
|
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||||
require_once 'HTMLPurifier/Generator.php';
|
require_once 'HTMLPurifier/Generator.php';
|
||||||
|
|
||||||
class HTMLPurifier_ChildDefTest extends UnitTestCase
|
class HTMLPurifier_ChildDefTest extends UnitTestCase
|
||||||
@@ -12,7 +12,8 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
|
|||||||
var $gen;
|
var $gen;
|
||||||
|
|
||||||
function HTMLPurifier_ChildDefTest() {
|
function HTMLPurifier_ChildDefTest() {
|
||||||
$this->lex = HTMLPurifier_Lexer::create();
|
// it is vital that the tags be treated as literally as possible
|
||||||
|
$this->lex = new HTMLPurifier_Lexer_DirectLex();
|
||||||
$this->gen = new HTMLPurifier_Generator();
|
$this->gen = new HTMLPurifier_Generator();
|
||||||
parent::UnitTestCase();
|
parent::UnitTestCase();
|
||||||
}
|
}
|
||||||
@@ -42,28 +43,75 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
|
|||||||
|
|
||||||
function test_custom() {
|
function test_custom() {
|
||||||
|
|
||||||
// the table definition
|
|
||||||
$this->def = new HTMLPurifier_ChildDef_Custom(
|
$this->def = new HTMLPurifier_ChildDef_Custom(
|
||||||
'(caption?, (col*|colgroup*), thead?, tfoot?, (tbody+|tr+))');
|
'(a, b?, c*, d+, (a, b)*)');
|
||||||
|
|
||||||
|
$inputs = array();
|
||||||
|
$expect = array();
|
||||||
|
$config = array();
|
||||||
|
|
||||||
|
$inputs[0] = '';
|
||||||
|
$expect[0] = false;
|
||||||
|
|
||||||
|
$inputs[1] = '<a /><b /><c /><d /><a /><b />';
|
||||||
|
$expect[1] = true;
|
||||||
|
|
||||||
|
$inputs[2] = '<a /><d>Dob</d><a /><b>foo</b><a href="moo" /><b>foo</b>';
|
||||||
|
$expect[2] = true;
|
||||||
|
|
||||||
|
$inputs[3] = '<a /><a />';
|
||||||
|
$expect[3] = false;
|
||||||
|
|
||||||
|
$this->assertSeries($inputs, $expect, $config);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_table() {
|
||||||
|
|
||||||
|
// currently inactive, awaiting augmentation
|
||||||
|
|
||||||
|
// the table definition
|
||||||
|
$this->def = new HTMLPurifier_ChildDef_Table();
|
||||||
|
|
||||||
$inputs = $expect = $config = array();
|
$inputs = $expect = $config = array();
|
||||||
|
|
||||||
$inputs[0] = '';
|
$inputs[0] = '';
|
||||||
$expect[0] = false;
|
$expect[0] = false;
|
||||||
|
|
||||||
// we really don't care what's inside, because if it turns out
|
// we're using empty tags to compact the tests: under real circumstances
|
||||||
// this tr is illegal, we'll end up re-evaluating the parent node
|
// there would be contents in them
|
||||||
// anyway.
|
|
||||||
$inputs[1] = '<tr></tr>';
|
$inputs[1] = '<tr />';
|
||||||
$expect[1] = true;
|
$expect[1] = true;
|
||||||
|
|
||||||
$inputs[2] = '<caption></caption><col></col><thead></thead>' .
|
$inputs[2] = '<caption /><col /><thead /><tfoot /><tbody>'.
|
||||||
'<tfoot></tfoot><tbody></tbody>';
|
'<tr><td>asdf</td></tr></tbody>';
|
||||||
$expect[2] = true;
|
$expect[2] = true;
|
||||||
|
|
||||||
$inputs[3] = '<col></col><col></col><col></col><tr></tr>';
|
$inputs[3] = '<col /><col /><col /><tr />';
|
||||||
$expect[3] = true;
|
$expect[3] = true;
|
||||||
|
|
||||||
|
// mixed up order
|
||||||
|
$inputs[4] = '<col /><colgroup /><tbody /><tfoot /><thead /><tr>1</tr><caption /><tr />';
|
||||||
|
$expect[4] = '<caption /><col /><colgroup /><thead /><tfoot /><tbody /><tr>1</tr><tr />';
|
||||||
|
|
||||||
|
// duplicates of singles
|
||||||
|
// - first caption serves
|
||||||
|
// - trailing tfoots/theads get turned into tbodys
|
||||||
|
$inputs[5] = '<caption>1</caption><caption /><tbody /><tbody /><tfoot>1</tfoot><tfoot />';
|
||||||
|
$expect[5] = '<caption>1</caption><tfoot>1</tfoot><tbody /><tbody /><tbody />';
|
||||||
|
|
||||||
|
// errant text dropped (until bubbling is implemented)
|
||||||
|
$inputs[6] = 'foo';
|
||||||
|
$expect[6] = false;
|
||||||
|
|
||||||
|
// whitespace sticks to the previous element, last whitespace is
|
||||||
|
// stationary
|
||||||
|
$inputs[7] = "\n <tr />\n <tr />\n ";
|
||||||
|
$expect[7] = true;
|
||||||
|
|
||||||
|
$inputs[8] = "\n\t<tbody />\n\t\t<tfoot />\n\t\t\t";
|
||||||
|
$expect[8] = "\n\t\t<tfoot />\n\t<tbody />\n\t\t\t";
|
||||||
|
|
||||||
$this->assertSeries($inputs, $expect, $config);
|
$this->assertSeries($inputs, $expect, $config);
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -175,4 +223,4 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
||||||
|
@@ -1,8 +1,8 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/ConfigDef.php';
|
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||||
|
|
||||||
class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
class HTMLPurifier_ConfigSchemaTest extends UnitTestCase
|
||||||
{
|
{
|
||||||
|
|
||||||
var $old_copy;
|
var $old_copy;
|
||||||
@@ -13,16 +13,16 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
// you pay for using Singletons. Good thing we can overload it.
|
// you pay for using Singletons. Good thing we can overload it.
|
||||||
|
|
||||||
// first, let's get a clean copy to do tests
|
// first, let's get a clean copy to do tests
|
||||||
$our_copy = new HTMLPurifier_ConfigDef();
|
$our_copy = new HTMLPurifier_ConfigSchema();
|
||||||
// get the old copy
|
// get the old copy
|
||||||
$this->old_copy = HTMLPurifier_ConfigDef::instance();
|
$this->old_copy = HTMLPurifier_ConfigSchema::instance();
|
||||||
// put in our copy, and reassign to the REAL reference
|
// put in our copy, and reassign to the REAL reference
|
||||||
$this->our_copy =& HTMLPurifier_ConfigDef::instance($our_copy);
|
$this->our_copy =& HTMLPurifier_ConfigSchema::instance($our_copy);
|
||||||
}
|
}
|
||||||
|
|
||||||
function tearDown() {
|
function tearDown() {
|
||||||
// testing is done, restore the old copy
|
// testing is done, restore the old copy
|
||||||
HTMLPurifier_ConfigDef::instance($this->old_copy);
|
HTMLPurifier_ConfigSchema::instance($this->old_copy);
|
||||||
}
|
}
|
||||||
|
|
||||||
function testNormal() {
|
function testNormal() {
|
||||||
@@ -31,9 +31,9 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
// define a namespace
|
// define a namespace
|
||||||
$description = 'Configuration that is always available.';
|
$description = 'Configuration that is always available.';
|
||||||
HTMLPurifier_ConfigDef::defineNamespace(
|
HTMLPurifier_ConfigSchema::defineNamespace(
|
||||||
'Core', $description
|
'Core', $description
|
||||||
); $line = __LINE__;
|
);
|
||||||
$this->assertIdentical($this->our_copy->defaults, array(
|
$this->assertIdentical($this->our_copy->defaults, array(
|
||||||
'Core' => array()
|
'Core' => array()
|
||||||
));
|
));
|
||||||
@@ -41,7 +41,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
'Core' => array()
|
'Core' => array()
|
||||||
));
|
));
|
||||||
$namespace = new HTMLPurifier_ConfigEntity_Namespace();
|
$namespace = new HTMLPurifier_ConfigEntity_Namespace();
|
||||||
$namespace->addDescription($file, $line, $description);
|
$namespace->description = $description;
|
||||||
$this->assertIdentical($this->our_copy->info_namespace, array(
|
$this->assertIdentical($this->our_copy->info_namespace, array(
|
||||||
'Core' => $namespace
|
'Core' => $namespace
|
||||||
));
|
));
|
||||||
@@ -50,7 +50,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
// define a directive
|
// define a directive
|
||||||
$description = 'This is a description of the directive.';
|
$description = 'This is a description of the directive.';
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'Name', 'default value', 'string',
|
'Core', 'Name', 'default value', 'string',
|
||||||
$description
|
$description
|
||||||
); $line = __LINE__;
|
); $line = __LINE__;
|
||||||
@@ -71,7 +71,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
|
|
||||||
// define a directive in an undefined namespace
|
// define a directive in an undefined namespace
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Extension', 'Name', false, 'bool',
|
'Extension', 'Name', false, 'bool',
|
||||||
'This is for an extension, but we have not defined its namespace!'
|
'This is for an extension, but we have not defined its namespace!'
|
||||||
);
|
);
|
||||||
@@ -83,7 +83,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
// redefine a value in a valid manner
|
// redefine a value in a valid manner
|
||||||
$description = 'Alternative configuration definition';
|
$description = 'Alternative configuration definition';
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'Name', 'default value', 'string',
|
'Core', 'Name', 'default value', 'string',
|
||||||
$description
|
$description
|
||||||
); $line = __LINE__;
|
); $line = __LINE__;
|
||||||
@@ -98,7 +98,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
|
|
||||||
// redefine a directive in an invalid manner
|
// redefine a directive in an invalid manner
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'Name', 'different default', 'string',
|
'Core', 'Name', 'different default', 'string',
|
||||||
'Inconsistent default or type, cannot redefine'
|
'Inconsistent default or type, cannot redefine'
|
||||||
);
|
);
|
||||||
@@ -109,7 +109,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
|
|
||||||
// make an enumeration
|
// make an enumeration
|
||||||
HTMLPurifier_ConfigDef::defineAllowedValues(
|
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||||
'Core', 'Name', array(
|
'Core', 'Name', array(
|
||||||
'Real Value',
|
'Real Value',
|
||||||
'Real Value 2'
|
'Real Value 2'
|
||||||
@@ -128,7 +128,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
|
|
||||||
// redefinition of enumeration is cumulative
|
// redefinition of enumeration is cumulative
|
||||||
HTMLPurifier_ConfigDef::defineAllowedValues(
|
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||||
'Core', 'Name', array(
|
'Core', 'Name', array(
|
||||||
'Real Value 3',
|
'Real Value 3',
|
||||||
)
|
)
|
||||||
@@ -143,7 +143,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
|
|
||||||
// cannot define enumeration for undefined directive
|
// cannot define enumeration for undefined directive
|
||||||
HTMLPurifier_ConfigDef::defineAllowedValues(
|
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||||
'Core', 'Foobar', array(
|
'Core', 'Foobar', array(
|
||||||
'Real Value 9',
|
'Real Value 9',
|
||||||
)
|
)
|
||||||
@@ -155,7 +155,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
|
|
||||||
// test defining value aliases for an enumerated value
|
// test defining value aliases for an enumerated value
|
||||||
HTMLPurifier_ConfigDef::defineValueAliases(
|
HTMLPurifier_ConfigSchema::defineValueAliases(
|
||||||
'Core', 'Name', array(
|
'Core', 'Name', array(
|
||||||
'Aliased Value' => 'Real Value'
|
'Aliased Value' => 'Real Value'
|
||||||
)
|
)
|
||||||
@@ -170,7 +170,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
|
|
||||||
// redefine should be cumulative
|
// redefine should be cumulative
|
||||||
HTMLPurifier_ConfigDef::defineValueAliases(
|
HTMLPurifier_ConfigSchema::defineValueAliases(
|
||||||
'Core', 'Name', array(
|
'Core', 'Name', array(
|
||||||
'Aliased Value 2' => 'Real Value 2'
|
'Aliased Value 2' => 'Real Value 2'
|
||||||
)
|
)
|
||||||
@@ -185,7 +185,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
|
|
||||||
// cannot create alias to not-allowed value
|
// cannot create alias to not-allowed value
|
||||||
HTMLPurifier_ConfigDef::defineValueAliases(
|
HTMLPurifier_ConfigSchema::defineValueAliases(
|
||||||
'Core', 'Name', array(
|
'Core', 'Name', array(
|
||||||
'Aliased Value 3' => 'Invalid Value'
|
'Aliased Value 3' => 'Invalid Value'
|
||||||
)
|
)
|
||||||
@@ -197,7 +197,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
|
|
||||||
// cannot create alias for already allowed value
|
// cannot create alias for already allowed value
|
||||||
HTMLPurifier_ConfigDef::defineValueAliases(
|
HTMLPurifier_ConfigSchema::defineValueAliases(
|
||||||
'Core', 'Name', array(
|
'Core', 'Name', array(
|
||||||
'Real Value' => 'Real Value 2'
|
'Real Value' => 'Real Value 2'
|
||||||
)
|
)
|
||||||
@@ -209,7 +209,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
|
|
||||||
// define a directive with an invalid type
|
// define a directive with an invalid type
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'Foobar', false, 'omen',
|
'Core', 'Foobar', false, 'omen',
|
||||||
'Omen is not a valid type, so we reject this.'
|
'Omen is not a valid type, so we reject this.'
|
||||||
);
|
);
|
||||||
@@ -221,7 +221,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
|
|
||||||
// define a directive with inconsistent type
|
// define a directive with inconsistent type
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'Foobaz', 10, 'string',
|
'Core', 'Foobaz', 10, 'string',
|
||||||
'If we say string, we should mean it, not integer 10.'
|
'If we say string, we should mean it, not integer 10.'
|
||||||
);
|
);
|
||||||
@@ -231,6 +231,24 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
$this->swallowErrors();
|
$this->swallowErrors();
|
||||||
|
|
||||||
|
|
||||||
|
// define a directive with bad characters
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'Core', 'Core.Attr', 10, 'int',
|
||||||
|
'No periods! >:-('
|
||||||
|
);
|
||||||
|
|
||||||
|
$this->assertError('Directive name must be alphanumeric');
|
||||||
|
$this->assertNoErrors();
|
||||||
|
$this->swallowErrors();
|
||||||
|
|
||||||
|
// define a namespace with bad characters
|
||||||
|
HTMLPurifier_ConfigSchema::defineNamespace(
|
||||||
|
'Foobar&Gromit', $description
|
||||||
|
);
|
||||||
|
|
||||||
|
$this->assertError('Namespace name must be alphanumeric');
|
||||||
|
$this->assertNoErrors();
|
||||||
|
$this->swallowErrors();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@@ -8,41 +8,42 @@ class HTMLPurifier_ConfigTest extends UnitTestCase
|
|||||||
var $our_copy, $old_copy;
|
var $our_copy, $old_copy;
|
||||||
|
|
||||||
function setUp() {
|
function setUp() {
|
||||||
$our_copy = new HTMLPurifier_ConfigDef();
|
// set up a dummy schema object for testing
|
||||||
$this->old_copy = HTMLPurifier_ConfigDef::instance();
|
$our_copy = new HTMLPurifier_ConfigSchema();
|
||||||
$this->our_copy =& HTMLPurifier_ConfigDef::instance($our_copy);
|
$this->old_copy = HTMLPurifier_ConfigSchema::instance();
|
||||||
|
$this->our_copy =& HTMLPurifier_ConfigSchema::instance($our_copy);
|
||||||
}
|
}
|
||||||
|
|
||||||
function tearDown() {
|
function tearDown() {
|
||||||
HTMLPurifier_ConfigDef::instance($this->old_copy);
|
HTMLPurifier_ConfigSchema::instance($this->old_copy);
|
||||||
}
|
}
|
||||||
|
|
||||||
function test() {
|
function test() {
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::defineNamespace('Core', 'Corestuff');
|
HTMLPurifier_ConfigSchema::defineNamespace('Core', 'Corestuff');
|
||||||
HTMLPurifier_ConfigDef::defineNamespace('Attr', 'Attributes');
|
HTMLPurifier_ConfigSchema::defineNamespace('Attr', 'Attributes');
|
||||||
HTMLPurifier_ConfigDef::defineNamespace('Extension', 'Extensible');
|
HTMLPurifier_ConfigSchema::defineNamespace('Extension', 'Extensible');
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'Key', false, 'bool', 'A boolean directive.'
|
'Core', 'Key', false, 'bool', 'A boolean directive.'
|
||||||
);
|
);
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Attr', 'Key', 42, 'int', 'An integer directive.'
|
'Attr', 'Key', 42, 'int', 'An integer directive.'
|
||||||
);
|
);
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Extension', 'Pert', 'foo', 'string', 'A string directive.'
|
'Extension', 'Pert', 'foo', 'string', 'A string directive.'
|
||||||
);
|
);
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'Encoding', 'utf-8', 'istring', 'Case insensitivity!'
|
'Core', 'Encoding', 'utf-8', 'istring', 'Case insensitivity!'
|
||||||
);
|
);
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::defineAllowedValues(
|
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||||
'Extension', 'Pert', array('foo', 'moo')
|
'Extension', 'Pert', array('foo', 'moo')
|
||||||
);
|
);
|
||||||
HTMLPurifier_ConfigDef::defineValueAliases(
|
HTMLPurifier_ConfigSchema::defineValueAliases(
|
||||||
'Extension', 'Pert', array('cow' => 'moo')
|
'Extension', 'Pert', array('cow' => 'moo')
|
||||||
);
|
);
|
||||||
HTMLPurifier_ConfigDef::defineAllowedValues(
|
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||||
'Core', 'Encoding', array('utf-8', 'iso-8859-1')
|
'Core', 'Encoding', array('utf-8', 'iso-8859-1')
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -93,6 +94,17 @@ class HTMLPurifier_ConfigTest extends UnitTestCase
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function test_getDefinition() {
|
||||||
|
|
||||||
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
$def = $config->getHTMLDefinition();
|
||||||
|
$this->assertIsA($def, 'HTMLPurifier_HTMLDefinition');
|
||||||
|
|
||||||
|
$def = $config->getCSSDefinition();
|
||||||
|
$this->assertIsA($def, 'HTMLPurifier_CSSDefinition');
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@@ -52,10 +52,8 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
|
|||||||
$inputs[7] = new HTMLPurifier_Token_Text($theta_char);
|
$inputs[7] = new HTMLPurifier_Token_Text($theta_char);
|
||||||
$expect[7] = $theta_char;
|
$expect[7] = $theta_char;
|
||||||
|
|
||||||
$default_config = HTMLPurifier_Config::createDefault();
|
|
||||||
foreach ($inputs as $i => $input) {
|
foreach ($inputs as $i => $input) {
|
||||||
if (!isset($config[$i])) $config[$i] = $default_config;
|
$result = $this->gen->generateFromToken($input);
|
||||||
$result = $this->gen->generateFromToken($input, $config[$i]);
|
|
||||||
$this->assertEqual($result, $expect[$i]);
|
$this->assertEqual($result, $expect[$i]);
|
||||||
paintIf($result, $result != $expect[$i]);
|
paintIf($result, $result != $expect[$i]);
|
||||||
}
|
}
|
||||||
@@ -122,6 +120,56 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var $config;
|
||||||
|
function assertGeneration($tokens, $expect) {
|
||||||
|
$result = $this->gen->generateFromTokens($tokens, $this->config);
|
||||||
|
// normalized newlines, this probably should be put somewhere else
|
||||||
|
$result = str_replace("\r\n", "\n", $result);
|
||||||
|
$result = str_replace("\r", "\n", $result);
|
||||||
|
$this->assertEqual($expect, $result);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_generateFromTokens_XHTMLoff() {
|
||||||
|
$this->config = HTMLPurifier_Config::createDefault();
|
||||||
|
$this->config->set('Core', 'XHTML', false);
|
||||||
|
|
||||||
|
// omit trailing slash
|
||||||
|
$this->assertGeneration(
|
||||||
|
array( new HTMLPurifier_Token_Empty('br') ),
|
||||||
|
'<br>'
|
||||||
|
);
|
||||||
|
|
||||||
|
// there should be a test for attribute minimization, but it is
|
||||||
|
// impossible for something like that to happen due to our current
|
||||||
|
// definitions! fix it later
|
||||||
|
|
||||||
|
// namespaced attributes must be dropped
|
||||||
|
$this->assertGeneration(
|
||||||
|
array( new HTMLPurifier_Token_Start('p', array('xml:lang'=>'fr')) ),
|
||||||
|
'<p>'
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_generateFromTokens_TidyFormat() {
|
||||||
|
// abort test if tidy isn't loaded
|
||||||
|
if (!extension_loaded('tidy')) return;
|
||||||
|
|
||||||
|
$this->config = HTMLPurifier_Config::createDefault();
|
||||||
|
$this->config->set('Core', 'TidyFormat', true);
|
||||||
|
|
||||||
|
// nice wrapping please
|
||||||
|
$this->assertGeneration(
|
||||||
|
array(
|
||||||
|
new HTMLPurifier_Token_Start('div'),
|
||||||
|
new HTMLPurifier_Token_Text('Text'),
|
||||||
|
new HTMLPurifier_Token_End('div')
|
||||||
|
),
|
||||||
|
"<div>\n Text\n</div>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@@ -11,24 +11,6 @@ class HTMLPurifier_Lexer_DirectLexTest extends UnitTestCase
|
|||||||
$this->DirectLex = new HTMLPurifier_Lexer_DirectLex();
|
$this->DirectLex = new HTMLPurifier_Lexer_DirectLex();
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_parseData() {
|
|
||||||
$HP =& $this->DirectLex;
|
|
||||||
|
|
||||||
$this->assertIdentical('asdf', $HP->parseData('asdf'));
|
|
||||||
$this->assertIdentical('&', $HP->parseData('&'));
|
|
||||||
$this->assertIdentical('"', $HP->parseData('"'));
|
|
||||||
$this->assertIdentical("'", $HP->parseData('''));
|
|
||||||
$this->assertIdentical("'", $HP->parseData('''));
|
|
||||||
$this->assertIdentical('&&&', $HP->parseData('&&&'));
|
|
||||||
$this->assertIdentical('&&', $HP->parseData('&&')); // [INVALID]
|
|
||||||
$this->assertIdentical('Procter & Gamble',
|
|
||||||
$HP->parseData('Procter & Gamble')); // [INVALID]
|
|
||||||
|
|
||||||
// This is not special, thus not converted. Test of fault tolerance,
|
|
||||||
// realistically speaking, this should never happen
|
|
||||||
$this->assertIdentical('-', $HP->parseData('-'));
|
|
||||||
}
|
|
||||||
|
|
||||||
// internals testing
|
// internals testing
|
||||||
function test_parseAttributeString() {
|
function test_parseAttributeString() {
|
||||||
|
|
||||||
|
@@ -38,6 +38,25 @@ class HTMLPurifier_LexerTest extends UnitTestCase
|
|||||||
$this->assertIdentical($extract, $result);
|
$this->assertIdentical($extract, $result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function test_parseData() {
|
||||||
|
$HP =& $this->Lexer;
|
||||||
|
|
||||||
|
$this->assertIdentical('asdf', $HP->parseData('asdf'));
|
||||||
|
$this->assertIdentical('&', $HP->parseData('&'));
|
||||||
|
$this->assertIdentical('"', $HP->parseData('"'));
|
||||||
|
$this->assertIdentical("'", $HP->parseData('''));
|
||||||
|
$this->assertIdentical("'", $HP->parseData('''));
|
||||||
|
$this->assertIdentical('&&&', $HP->parseData('&&&'));
|
||||||
|
$this->assertIdentical('&&', $HP->parseData('&&')); // [INVALID]
|
||||||
|
$this->assertIdentical('Procter & Gamble',
|
||||||
|
$HP->parseData('Procter & Gamble')); // [INVALID]
|
||||||
|
|
||||||
|
// This is not special, thus not converted. Test of fault tolerance,
|
||||||
|
// realistically speaking, this should never happen
|
||||||
|
$this->assertIdentical('-', $HP->parseData('-'));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
function test_extractBody() {
|
function test_extractBody() {
|
||||||
$this->assertExtractBody('<b>Bold</b>');
|
$this->assertExtractBody('<b>Bold</b>');
|
||||||
$this->assertExtractBody('<html><body><b>Bold</b></body></html>', '<b>Bold</b>');
|
$this->assertExtractBody('<html><body><b>Bold</b></body></html>', '<b>Bold</b>');
|
||||||
@@ -249,13 +268,16 @@ class HTMLPurifier_LexerTest extends UnitTestCase
|
|||||||
,new HTMLPurifier_Token_Text('Link')
|
,new HTMLPurifier_Token_Text('Link')
|
||||||
,new HTMLPurifier_Token_End('a')
|
,new HTMLPurifier_Token_End('a')
|
||||||
);
|
);
|
||||||
$sax_expect[16] = false; // PEARSax doesn't support it!
|
|
||||||
|
|
||||||
// test that UTF-8 is preserved
|
// test that UTF-8 is preserved
|
||||||
$char_hearts = $this->_entity_lookup->table['hearts'];
|
$char_hearts = $this->_entity_lookup->table['hearts'];
|
||||||
$input[17] = $char_hearts;
|
$input[17] = $char_hearts;
|
||||||
$expect[17] = array( new HTMLPurifier_Token_Text($char_hearts) );
|
$expect[17] = array( new HTMLPurifier_Token_Text($char_hearts) );
|
||||||
|
|
||||||
|
// test weird characters in attributes
|
||||||
|
$input[18] = '<br test="x < 6" />';
|
||||||
|
$expect[18] = array( new HTMLPurifier_Token_Empty('br', array('test' => 'x < 6')) );
|
||||||
|
|
||||||
$default_config = HTMLPurifier_Config::createDefault();
|
$default_config = HTMLPurifier_Config::createDefault();
|
||||||
foreach($input as $i => $discard) {
|
foreach($input as $i => $discard) {
|
||||||
if (!isset($config[$i])) $config[$i] = $default_config;
|
if (!isset($config[$i])) $config[$i] = $default_config;
|
||||||
|
@@ -54,12 +54,34 @@ class HTMLPurifier_URISchemeTest extends UnitTestCase
|
|||||||
|
|
||||||
$scheme = new HTMLPurifier_URIScheme_ftp();
|
$scheme = new HTMLPurifier_URIScheme_ftp();
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
|
||||||
$this->assertIdentical(
|
$this->assertIdentical(
|
||||||
$scheme->validateComponents(
|
$scheme->validateComponents(
|
||||||
'user', 'www.example.com', 21, '/', 's=foobar', $config),
|
'user', 'www.example.com', 21, '/', 's=foobar', $config),
|
||||||
array('user', 'www.example.com', null, '/', null)
|
array('user', 'www.example.com', null, '/', null)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// valid typecode
|
||||||
|
$this->assertIdentical(
|
||||||
|
$scheme->validateComponents(
|
||||||
|
null, 'www.example.com', null, '/file.txt;type=a', null, $config),
|
||||||
|
array(null, 'www.example.com', null, '/file.txt;type=a', null)
|
||||||
|
);
|
||||||
|
|
||||||
|
// remove invalid typecode
|
||||||
|
$this->assertIdentical(
|
||||||
|
$scheme->validateComponents(
|
||||||
|
null, 'www.example.com', null, '/file.txt;type=z', null, $config),
|
||||||
|
array(null, 'www.example.com', null, '/file.txt', null)
|
||||||
|
);
|
||||||
|
|
||||||
|
// encode errant semicolons
|
||||||
|
$this->assertIdentical(
|
||||||
|
$scheme->validateComponents(
|
||||||
|
null, 'www.example.com', null, '/too;many;semicolons=1', null, $config),
|
||||||
|
array(null, 'www.example.com', null, '/too%3Bmany%3Bsemicolons=1', null)
|
||||||
|
);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_news() {
|
function test_news() {
|
||||||
|
@@ -40,7 +40,7 @@ require_once 'HTMLPurifier.php';
|
|||||||
// define callable test files
|
// define callable test files
|
||||||
$test_files = array();
|
$test_files = array();
|
||||||
$test_files[] = 'ConfigTest.php';
|
$test_files[] = 'ConfigTest.php';
|
||||||
$test_files[] = 'ConfigDefTest.php';
|
$test_files[] = 'ConfigSchemaTest.php';
|
||||||
$test_files[] = 'LexerTest.php';
|
$test_files[] = 'LexerTest.php';
|
||||||
$test_files[] = 'Lexer/DirectLexTest.php';
|
$test_files[] = 'Lexer/DirectLexTest.php';
|
||||||
$test_files[] = 'TokenTest.php';
|
$test_files[] = 'TokenTest.php';
|
||||||
@@ -114,14 +114,14 @@ if (isset($_GET['file']) && isset($test_file_lookup[$_GET['file']])) {
|
|||||||
// execute only one test
|
// execute only one test
|
||||||
$test_file = $_GET['file'];
|
$test_file = $_GET['file'];
|
||||||
|
|
||||||
$test = new GroupTest('HTMLPurifier - ' . $test_file);
|
$test = new GroupTest('HTML Purifier - ' . $test_file);
|
||||||
$path = 'HTMLPurifier/' . $test_file;
|
$path = 'HTMLPurifier/' . $test_file;
|
||||||
require_once $path;
|
require_once $path;
|
||||||
$test->addTestClass(htmlpurifier_path2class($path));
|
$test->addTestClass(htmlpurifier_path2class($path));
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
$test = new GroupTest('HTMLPurifier');
|
$test = new GroupTest('HTML Purifier');
|
||||||
|
|
||||||
foreach ($test_files as $test_file) {
|
foreach ($test_files as $test_file) {
|
||||||
$path = 'HTMLPurifier/' . $test_file;
|
$path = 'HTMLPurifier/' . $test_file;
|
||||||
|
Reference in New Issue
Block a user