mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-10 16:14:08 +02:00
Compare commits
44 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
6ef8abd04f | ||
|
bc5871f389 | ||
|
30d75c999d | ||
|
64d8ca9831 | ||
|
d7642b8c70 | ||
|
3b30c2ca5b | ||
|
f43616f72d | ||
|
6740ba61af | ||
|
6a33945499 | ||
|
4660791682 | ||
|
b5c69d8ca5 | ||
|
e440f25bce | ||
|
665e80d223 | ||
|
69747ede8a | ||
|
49b3832ebf | ||
|
a365d4c688 | ||
|
7038fad788 | ||
|
50b272d75e | ||
|
bfb642d32c | ||
|
edb39601c7 | ||
|
694139d3bb | ||
|
81721ded5c | ||
|
371fb7c3d2 | ||
|
9e6953e619 | ||
|
2299f0c831 | ||
|
9dd4dcb27a | ||
|
aa0838492e | ||
|
df075c96e0 | ||
|
fbaa909d25 | ||
|
967f40fc11 | ||
|
5ee6ffe20f | ||
|
10d41d7130 | ||
|
65a628bcb7 | ||
|
a5b4ed2126 | ||
|
d20bbd8db3 | ||
|
b99573223d | ||
|
c6cfb68713 | ||
|
2259bfa40e | ||
|
de3b2b70fb | ||
|
4f0a5c0e22 | ||
|
fdd583253c | ||
|
a4be6ffe4d | ||
|
6de42d8d1d | ||
|
e9a519e589 |
2
Doxyfile
2
Doxyfile
@@ -4,7 +4,7 @@
|
|||||||
# Project related configuration options
|
# Project related configuration options
|
||||||
#---------------------------------------------------------------------------
|
#---------------------------------------------------------------------------
|
||||||
PROJECT_NAME = HTML Purifier
|
PROJECT_NAME = HTML Purifier
|
||||||
PROJECT_NUMBER = trunk
|
PROJECT_NUMBER = 1.0.0
|
||||||
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
|
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
|
||||||
CREATE_SUBDIRS = NO
|
CREATE_SUBDIRS = NO
|
||||||
OUTPUT_LANGUAGE = English
|
OUTPUT_LANGUAGE = English
|
||||||
|
75
INSTALL
75
INSTALL
@@ -17,7 +17,7 @@ these versions:
|
|||||||
- 4.3.9, 4.3.11
|
- 4.3.9, 4.3.11
|
||||||
- 4.4.0, 4.4.4
|
- 4.4.0, 4.4.4
|
||||||
- 5.0.0, 5.0.4
|
- 5.0.0, 5.0.4
|
||||||
- 5.1.0, 5.1.5
|
- 5.1.0, 5.1.6
|
||||||
|
|
||||||
And can confidently say that HTML Purifier should work in all versions
|
And can confidently say that HTML Purifier should work in all versions
|
||||||
between and afterwards. HTML Purifier definitely does not support PHP 4.2,
|
between and afterwards. HTML Purifier definitely does not support PHP 4.2,
|
||||||
@@ -26,7 +26,7 @@ any earlier versions.
|
|||||||
|
|
||||||
I have been unable to get PHP 5.0.5 working on my computer, so if someone
|
I have been unable to get PHP 5.0.5 working on my computer, so if someone
|
||||||
wants to test that, be my guest. All tests were done on Windows XP Home,
|
wants to test that, be my guest. All tests were done on Windows XP Home,
|
||||||
but operating system is quite irrelevant in this particular case.
|
but operating system should not be a major factor in the library.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -35,7 +35,8 @@ but operating system is quite irrelevant in this particular case.
|
|||||||
The library/ directory must be added to your path: HTML Purifier will not be
|
The library/ directory must be added to your path: HTML Purifier will not be
|
||||||
able to find the necessary includes otherwise. This is as simple as:
|
able to find the necessary includes otherwise. This is as simple as:
|
||||||
|
|
||||||
set_include_path('/path/to/htmlpurifier/library' . PATH_SEPARATOR . get_include_path());
|
set_include_path('/path/to/htmlpurifier/library' . PATH_SEPARATOR .
|
||||||
|
get_include_path() );
|
||||||
|
|
||||||
...replacing /path/to/htmlpurifier with the actual location of the folder. Don't
|
...replacing /path/to/htmlpurifier with the actual location of the folder. Don't
|
||||||
worry, HTML Purifier is namespaced so unless you have another file named
|
worry, HTML Purifier is namespaced so unless you have another file named
|
||||||
@@ -45,7 +46,9 @@ Then, it's a simple matter of including the base file:
|
|||||||
|
|
||||||
require_once 'HTMLPurifier.php';
|
require_once 'HTMLPurifier.php';
|
||||||
|
|
||||||
...and you're good to go.
|
...and you're good to go. The library/ folder contains all the files you need,
|
||||||
|
so you can get rid of most of everything else when using the library in a
|
||||||
|
production environment.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -58,19 +61,47 @@ is a (short) checklist:
|
|||||||
* Have I specified XHTML 1.0 Transitional as the doctype?
|
* Have I specified XHTML 1.0 Transitional as the doctype?
|
||||||
* Have I specified UTF-8 as the character encoding?
|
* Have I specified UTF-8 as the character encoding?
|
||||||
|
|
||||||
|
To find out what these are, browse to your website and view its source code.
|
||||||
|
You can figure out the doctype from the a declaration that looks like
|
||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||||
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||||
|
or no doctype. You can figure out the character encoding by looking for
|
||||||
|
<meta http-equiv="Content-type" content="text/html;charset=ENCODING">
|
||||||
|
|
||||||
I cannot stress the importance of these two bullets enough. Omitting either
|
I cannot stress the importance of these two bullets enough. Omitting either
|
||||||
of them could have dire consequences not only for security but for plain
|
of them could have dire consequences not only for security but for plain
|
||||||
old usability. You can find a more in-depth discussion of why this is needed
|
old usability. You can find a more in-depth discussion of why this is needed
|
||||||
in docs/security.txt, in the meantime, try to change your output so this is
|
in docs/security.txt, in the meantime, try to change your output so this is
|
||||||
the case.
|
the case. If you can't, well, we might be able to accomodate you (read
|
||||||
|
section 3).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
3. Configuring HTML Purifier
|
||||||
|
|
||||||
|
HTML Purifier is designed to run out-of-the-box, but occasionally HTML
|
||||||
|
Purifier needs to be told what to do.
|
||||||
|
|
||||||
If, for some reason, you are unable to switch to UTF-8 immediately, you can
|
If, for some reason, you are unable to switch to UTF-8 immediately, you can
|
||||||
switch HTML Purifier's encoding. Note that the availability of encodings is
|
switch HTML Purifier's encoding. Note that the availability of encodings is
|
||||||
dependent on iconv, and you'll be missing characters if the charset you
|
dependent on iconv, and you'll be missing characters if the charset you
|
||||||
choose doesn't have them.
|
choose doesn't have them.
|
||||||
|
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
$config->set('Core', 'Encoding', /* put your encoding here */);
|
||||||
$config->set('Core', 'Encoding', $encoding);
|
|
||||||
|
An example usage for Latin-1 websites:
|
||||||
|
|
||||||
|
$config->set('Core', 'Encoding', 'ISO-8859-1');
|
||||||
|
|
||||||
|
For those of you stuck using HTML 4.01 Transitional, you can disable
|
||||||
|
XHTML output like this:
|
||||||
|
|
||||||
|
$config->set('Core', 'XHTML', false);
|
||||||
|
|
||||||
|
However, I strongly recommend that you use XHTML. Currently, we can only
|
||||||
|
guarantee transitional-complaint output, future versions will also allow strict
|
||||||
|
output. There are more configuration directives which can be read about
|
||||||
|
here: http://hp.jpsband.org/live/configdoc/plain.html
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -88,3 +119,33 @@ Or, if you're using the configuration object:
|
|||||||
|
|
||||||
That's it. For more examples, check out docs/examples/. Also, SLOW gives
|
That's it. For more examples, check out docs/examples/. Also, SLOW gives
|
||||||
advice on what to do if HTML Purifier is slowing down your application.
|
advice on what to do if HTML Purifier is slowing down your application.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
4. Quick install
|
||||||
|
|
||||||
|
If your website is in UTF-8 and XHTML Transitional, use this code:
|
||||||
|
|
||||||
|
<?php
|
||||||
|
set_include_path('/path/to/htmlpurifier/library'
|
||||||
|
. PATH_SEPARATOR . get_include_path() );
|
||||||
|
require_once 'HTMLPurifier.php';
|
||||||
|
$purifier = new HTMLPurifier();
|
||||||
|
|
||||||
|
$clean_html = $purifier->purify($dirty_html);
|
||||||
|
?>
|
||||||
|
|
||||||
|
If your website is in a different encoding or doctype, use this code:
|
||||||
|
|
||||||
|
<?php
|
||||||
|
set_include_path('/path/to/htmlpurifier/library'
|
||||||
|
. PATH_SEPARATOR . get_include_path() );
|
||||||
|
require_once 'HTMLPurifier.php';
|
||||||
|
|
||||||
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
$config->set('Core', 'Encoding', 'ISO-8859-1'); //replace with your encoding
|
||||||
|
$config->set('Core', 'XHTML', true); //replace with false if HTML 4.01
|
||||||
|
$purifier = new HTMLPurifier($config);
|
||||||
|
|
||||||
|
$clean_html = $purifier->purify($dirty_html);
|
||||||
|
?>
|
26
NEWS
26
NEWS
@@ -1,6 +1,32 @@
|
|||||||
NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||||
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||
|
|
||||||
|
1.1.1, released 2006-09-24
|
||||||
|
- Various documentation updates
|
||||||
|
- Fixed parse error in configuration documentation script
|
||||||
|
- Fixed fatal error in benchmark scripts, slightly augmented
|
||||||
|
- As far as possible, whitespace is preserved in-between table children
|
||||||
|
- Configuration option to optionally Tidy up output for indentation to make up
|
||||||
|
for dropped whitespace by DOMLex (pretty-printing for the entire application
|
||||||
|
should be done by a page-wide Tidy)
|
||||||
|
- Sample test-settings.php file included
|
||||||
|
|
||||||
|
1.1.0, released 2006-09-16
|
||||||
|
- Made URI validator more forgiving: will ignore leading and trailing
|
||||||
|
quotes, apostrophes and less than or greater than signs.
|
||||||
|
- Enforce alphanumeric namespace and directive names for configuration.
|
||||||
|
- Directive documentation generation using XSLT
|
||||||
|
- Table child definition made more flexible, will fix up poorly ordered elements
|
||||||
|
- XHTML generation can now be turned off, allowing things like <br>
|
||||||
|
- Renamed ConfigDef to ConfigSchema
|
||||||
|
|
||||||
|
1.0.1, released 2006-09-04
|
||||||
|
- Fixed slight bug in DOMLex attribute parsing
|
||||||
|
- Fixed rejection of case-insensitive configuration values when there is a
|
||||||
|
set of allowed values. This manifested in %Core.Encoding.
|
||||||
|
- Fixed rejection of inline style declarations that had lots of extra
|
||||||
|
space in them. This manifested in TinyMCE.
|
||||||
|
|
||||||
1.0.0, released 2006-09-01
|
1.0.0, released 2006-09-01
|
||||||
- Fixed broken numeric entity conversion
|
- Fixed broken numeric entity conversion
|
||||||
- Malformed UTF-8 and non-SGML character detection and cleaning implemented
|
- Malformed UTF-8 and non-SGML character detection and cleaning implemented
|
||||||
|
13
SLOW
13
SLOW
@@ -17,18 +17,23 @@ second tacked on to the load time probably isn't going to be that huge of
|
|||||||
a problem. Then, displaying the content is a simple a manner of outputting
|
a problem. Then, displaying the content is a simple a manner of outputting
|
||||||
it directly from your database/filesystem. The trouble with this method is
|
it directly from your database/filesystem. The trouble with this method is
|
||||||
that your user loses the original text, and when doing edits, will be
|
that your user loses the original text, and when doing edits, will be
|
||||||
handling the filtered text. Of course, maybe that's a good thing. If you
|
handling the filtered text. While this may be a good thing, especially if
|
||||||
don't mind a little extra complexity, you can try...
|
you're using a WYSIWYG editor, it can also result in data-loss if a user
|
||||||
|
expects a certain to be available but it doesn't.
|
||||||
|
|
||||||
2. Caching the filtered output - accept the submitted text and put it
|
2. Caching the filtered output - accept the submitted text and put it
|
||||||
unaltered into the database, but then also generate a filtered version and
|
unaltered into the database, but then also generate a filtered version and
|
||||||
stash that in the database. Serve the filtered version to readers, and the
|
stash that in the database. Serve the filtered version to readers, and the
|
||||||
unaltered version to editors. If need be, you can invalidate the cache and
|
unaltered version to editors. If need be, you can invalidate the cache and
|
||||||
have the cached filtered version be regenerated on the first page view. Pros?
|
have the cached filtered version be regenerated on the first page view. Pros?
|
||||||
Full data retention. Cons? It's more complicated.
|
Full data retention. Cons? It's more complicated, and opens other editors
|
||||||
|
up to XSS if they are using a WYSIWYG editor (to fix that, they'd have to
|
||||||
|
be able to get their hands on the *really* original text served in plaintext
|
||||||
|
mode).
|
||||||
|
|
||||||
In short, inbound filtering is almost as simple as outbound filtering, but
|
In short, inbound filtering is almost as simple as outbound filtering, but
|
||||||
it has some drawbacks which cannot be fixed unless you save both the original
|
it has some drawbacks which cannot be fixed unless you save both the original
|
||||||
and the filtered versions.
|
and the filtered versions.
|
||||||
|
|
||||||
There is a third option: profile and optimize HTMLPurifier yourself. ;-)
|
There is a third option: profile and optimize HTMLPurifier yourself. Be sure
|
||||||
|
to tell me if you decide to do that! ;-)
|
||||||
|
29
TODO
29
TODO
@@ -5,33 +5,34 @@ Ongoing
|
|||||||
- Lots of profiling, make it faster!
|
- Lots of profiling, make it faster!
|
||||||
- Plugins for major CMSes (very tricky issue)
|
- Plugins for major CMSes (very tricky issue)
|
||||||
|
|
||||||
1.1 release
|
|
||||||
- Directive documentation generation
|
|
||||||
- Rewrite table's child definition to be faster, smart, and regexp free
|
|
||||||
- Allow HTML 4.01 output (cosmetic changes to the generator)
|
|
||||||
|
|
||||||
1.2 release
|
1.2 release
|
||||||
- Additional support for poorly written HTML
|
|
||||||
- Implement all non-essential attribute transforms
|
|
||||||
- Microsoft Word HTML cleaning (i.e. MsoNormal)
|
|
||||||
|
|
||||||
1.3 release
|
|
||||||
- Make URI validation routines tighter (especially mailto)
|
- Make URI validation routines tighter (especially mailto)
|
||||||
- More extensive URI filtering schemes
|
- More extensive URI filtering schemes
|
||||||
- Allow for background-image and list-style-image (see above)
|
- Allow for background-image and list-style-image (see above)
|
||||||
- Distinguish between different types of URIs, for instance, a mailto URI
|
- Distinguish between different types of URIs, for instance, a mailto URI
|
||||||
in IMG SRC is nonsensical
|
in IMG SRC is nonsensical
|
||||||
|
- Error logging for filtering/cleanup procedures
|
||||||
|
|
||||||
2.0 release
|
1.3 release
|
||||||
- Add various "levels" of cleaning
|
- Add various "levels" of cleaning
|
||||||
- Related: Allow strict (X)HTML
|
- Related: Allow strict (X)HTML
|
||||||
|
|
||||||
|
1.4 release
|
||||||
|
- Additional support for poorly written HTML
|
||||||
|
- Implement all non-essential attribute transforms
|
||||||
|
- Microsoft Word HTML cleaning (i.e. MsoNormal)
|
||||||
|
|
||||||
|
2.0 release
|
||||||
|
- Formatters for plaintext
|
||||||
|
- Auto-paragraphing (be sure to leverage fact that we know when things
|
||||||
|
shouldn't be paragraphed, such as lists and tables).
|
||||||
|
- Linkify URLs
|
||||||
|
- Smileys
|
||||||
|
|
||||||
3.0 release
|
3.0 release
|
||||||
- Extended HTML capabilities based on namespacing and tag transforms
|
- Extended HTML capabilities based on namespacing and tag transforms
|
||||||
- Hooks for adding custom processors to custom namespaced tags and
|
- Hooks for adding custom processors to custom namespaced tags and
|
||||||
attributes, offer default implementation
|
attributes, offer default implementation
|
||||||
- Auto-paragraphing (be sure to leverage fact that we know when things
|
|
||||||
shouldn't be paragraphed, such as lists and tables).
|
|
||||||
- Lots of documentation and samples
|
- Lots of documentation and samples
|
||||||
|
|
||||||
Unknown release (on a scratch-an-itch basis)
|
Unknown release (on a scratch-an-itch basis)
|
||||||
@@ -42,9 +43,9 @@ Unknown release (on a scratch-an-itch basis)
|
|||||||
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
||||||
- Automatically add non-breaking spaces to empty table cells when
|
- Automatically add non-breaking spaces to empty table cells when
|
||||||
empty-cells:show is applied to have compatibility with Internet Explorer
|
empty-cells:show is applied to have compatibility with Internet Explorer
|
||||||
- Pretty-printing HTML (adds dependency of Generator to HTMLDefinition)
|
|
||||||
- Non-lossy dumb alternate character encoding transformations, achieved by
|
- Non-lossy dumb alternate character encoding transformations, achieved by
|
||||||
numerically encoding all non-ASCII characters
|
numerically encoding all non-ASCII characters
|
||||||
|
|
||||||
Wontfix
|
Wontfix
|
||||||
- Non-lossy smart alternate character encoding transformations
|
- Non-lossy smart alternate character encoding transformations
|
||||||
|
- Pretty-printing HTML, users can use Tidy on the output on entire page
|
||||||
|
3
WYSIWYG
3
WYSIWYG
@@ -16,3 +16,6 @@ trouble. Therein lies the solution:
|
|||||||
HTML Purifier is perfect for filtering pure-HTML input from WYSIWYG editors.
|
HTML Purifier is perfect for filtering pure-HTML input from WYSIWYG editors.
|
||||||
|
|
||||||
Enough said.
|
Enough said.
|
||||||
|
|
||||||
|
There is a proof-of-concept integration of HTML Purifier with the Mantis
|
||||||
|
bugtracker at http://hp.jpsband.org/mantis/
|
||||||
|
@@ -3,15 +3,24 @@
|
|||||||
// emulates inserting a dir called HTMLPurifier into your class dir
|
// emulates inserting a dir called HTMLPurifier into your class dir
|
||||||
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
||||||
|
|
||||||
require_once 'HTMLPurifier/ConfigDef.php';
|
@include_once '../test-settings.php';
|
||||||
require_once 'HTMLPurifier/Config.php';
|
|
||||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
|
||||||
require_once 'HTMLPurifier/Lexer/PEARSax3.php';
|
|
||||||
|
|
||||||
$LEXERS = array(
|
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||||
'DirectLex' => new HTMLPurifier_Lexer_DirectLex(),
|
require_once 'HTMLPurifier/Config.php';
|
||||||
'PEARSax3' => new HTMLPurifier_Lexer_PEARSax3()
|
|
||||||
);
|
$LEXERS = array();
|
||||||
|
$RUNS = isset($GLOBALS['HTMLPurifierTest']['Runs'])
|
||||||
|
? $GLOBALS['HTMLPurifierTest']['Runs'] : 2;
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||||
|
$LEXERS['DirectLex'] = new HTMLPurifier_Lexer_DirectLex();
|
||||||
|
|
||||||
|
if (!empty($GLOBALS['HTMLPurifierTest']['PEAR'])) {
|
||||||
|
require_once 'HTMLPurifier/Lexer/PEARSax3.php';
|
||||||
|
$LEXERS['PEARSax3'] = new HTMLPurifier_Lexer_PEARSax3();
|
||||||
|
} else {
|
||||||
|
exit('PEAR required to perform benchmark.');
|
||||||
|
}
|
||||||
|
|
||||||
if (version_compare(PHP_VERSION, '5', '>=')) {
|
if (version_compare(PHP_VERSION, '5', '>=')) {
|
||||||
require_once 'HTMLPurifier/Lexer/DOMLex.php';
|
require_once 'HTMLPurifier/Lexer/DOMLex.php';
|
||||||
@@ -56,9 +65,12 @@ class RowTimer extends Benchmark_Timer
|
|||||||
if ($standard == false) $standard = $v['diff'];
|
if ($standard == false) $standard = $v['diff'];
|
||||||
|
|
||||||
$perc = $v['diff'] * 100 / $standard;
|
$perc = $v['diff'] * 100 / $standard;
|
||||||
|
$bad_run = ($v['diff'] < 0);
|
||||||
|
|
||||||
$out .= '<td align="right">' . number_format($perc, 2, '.', '') .
|
$out .= '<td align="right"'.
|
||||||
'%</td>';
|
($bad_run ? ' style="color:#AAA;"' : '').
|
||||||
|
'>' . number_format($perc, 2, '.', '') .
|
||||||
|
'%</td><td>'.number_format($v['diff'],4,'.','').'</td>';
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -79,13 +91,13 @@ function print_lexers() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function do_benchmark($name, $document) {
|
function do_benchmark($name, $document) {
|
||||||
global $LEXERS;
|
global $LEXERS, $RUNS;
|
||||||
|
|
||||||
$timer = new RowTimer($name);
|
$timer = new RowTimer($name);
|
||||||
$timer->start();
|
$timer->start();
|
||||||
|
|
||||||
foreach($LEXERS as $key => $lexer) {
|
foreach($LEXERS as $key => $lexer) {
|
||||||
$tokens = $lexer->tokenizeHTML($document);
|
for ($i=0; $i<$RUNS; $i++) $tokens = $lexer->tokenizeHTML($document);
|
||||||
$timer->setMarker($key);
|
$timer->setMarker($key);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -103,7 +115,7 @@ function do_benchmark($name, $document) {
|
|||||||
<table border="1">
|
<table border="1">
|
||||||
<tr><th>Case</th><?php
|
<tr><th>Case</th><?php
|
||||||
foreach ($LEXERS as $key => $value) {
|
foreach ($LEXERS as $key => $value) {
|
||||||
echo '<th>' . htmlspecialchars($key) . '</th>';
|
echo '<th colspan="2">' . htmlspecialchars($key) . '</th>';
|
||||||
}
|
}
|
||||||
?></tr>
|
?></tr>
|
||||||
<?php
|
<?php
|
||||||
|
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
||||||
|
|
||||||
require_once 'HTMLPurifier/ConfigDef.php';
|
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||||
require_once 'HTMLPurifier/Config.php';
|
require_once 'HTMLPurifier/Config.php';
|
||||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||||
|
|
||||||
|
214
configdoc/generate.php
Normal file
214
configdoc/generate.php
Normal file
@@ -0,0 +1,214 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generates XML and HTML documents describing configuration.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
TODO:
|
||||||
|
- make XML format richer (see below)
|
||||||
|
- extend XSLT transformation (see the corresponding XSLT file)
|
||||||
|
- allow generation of packaged docs that can be easily moved
|
||||||
|
- multipage documentation
|
||||||
|
- determine how to multilingualize
|
||||||
|
- factor out code into classes
|
||||||
|
- generate a table of contents
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Check and configure environment
|
||||||
|
|
||||||
|
if (version_compare('5', PHP_VERSION, '>')) exit('Requires PHP 5 or higher.');
|
||||||
|
error_reporting(E_ALL);
|
||||||
|
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Include HTML Purifier library
|
||||||
|
|
||||||
|
set_include_path('../library' . PATH_SEPARATOR . get_include_path());
|
||||||
|
require_once 'HTMLPurifier.php';
|
||||||
|
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Setup convenience functions
|
||||||
|
|
||||||
|
function appendHTMLDiv($document, $node, $html) {
|
||||||
|
global $purifier;
|
||||||
|
$html = $purifier->purify($html);
|
||||||
|
$dom_html = $document->createDocumentFragment();
|
||||||
|
$dom_html->appendXML($html);
|
||||||
|
|
||||||
|
$dom_div = $document->createElement('div');
|
||||||
|
$dom_div->setAttribute('xmlns', 'http://www.w3.org/1999/xhtml');
|
||||||
|
$dom_div->appendChild($dom_html);
|
||||||
|
|
||||||
|
$node->appendChild($dom_div);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Load copies of HTMLPurifier_ConfigDef and HTMLPurifier
|
||||||
|
|
||||||
|
$schema = HTMLPurifier_ConfigSchema::instance();
|
||||||
|
$purifier = new HTMLPurifier();
|
||||||
|
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Generate types.xml, a document describing the constraint "type"
|
||||||
|
|
||||||
|
$types_document = new DOMDocument('1.0', 'UTF-8');
|
||||||
|
$types_root = $types_document->createElement('types');
|
||||||
|
$types_document->appendChild($types_root);
|
||||||
|
$types_document->formatOutput = true;
|
||||||
|
foreach ($schema->types as $name => $expanded_name) {
|
||||||
|
$types_type = $types_document->createElement('type', $expanded_name);
|
||||||
|
$types_type->setAttribute('id', $name);
|
||||||
|
$types_root->appendChild($types_type);
|
||||||
|
}
|
||||||
|
$types_document->save('types.xml');
|
||||||
|
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Generate configdoc.xml, a document documenting configuration directives
|
||||||
|
|
||||||
|
$dom_document = new DOMDocument('1.0', 'UTF-8');
|
||||||
|
$dom_root = $dom_document->createElement('configdoc');
|
||||||
|
$dom_document->appendChild($dom_root);
|
||||||
|
$dom_document->formatOutput = true;
|
||||||
|
|
||||||
|
// add the name of the application
|
||||||
|
$dom_root->appendChild($dom_document->createElement('title', 'HTML Purifier'));
|
||||||
|
|
||||||
|
/*
|
||||||
|
TODO for XML format:
|
||||||
|
- namespace descriptions
|
||||||
|
- enumerated values
|
||||||
|
- default values
|
||||||
|
- create a definition (DTD or other) once interface stabilizes
|
||||||
|
*/
|
||||||
|
|
||||||
|
foreach($schema->info as $namespace_name => $namespace_info) {
|
||||||
|
|
||||||
|
$dom_namespace = $dom_document->createElement('namespace');
|
||||||
|
$dom_root->appendChild($dom_namespace);
|
||||||
|
|
||||||
|
$dom_namespace->setAttribute('id', $namespace_name);
|
||||||
|
$dom_namespace->appendChild(
|
||||||
|
$dom_document->createElement('name', $namespace_name)
|
||||||
|
);
|
||||||
|
$dom_namespace_description = $dom_document->createElement('description');
|
||||||
|
$dom_namespace->appendChild($dom_namespace_description);
|
||||||
|
appendHTMLDiv($dom_document, $dom_namespace_description,
|
||||||
|
$schema->info_namespace[$namespace_name]->description);
|
||||||
|
|
||||||
|
foreach ($namespace_info as $name => $info) {
|
||||||
|
|
||||||
|
$dom_directive = $dom_document->createElement('directive');
|
||||||
|
$dom_namespace->appendChild($dom_directive);
|
||||||
|
|
||||||
|
$dom_directive->setAttribute('id', $namespace_name . '.' . $name);
|
||||||
|
$dom_directive->appendChild(
|
||||||
|
$dom_document->createElement('name', $name)
|
||||||
|
);
|
||||||
|
|
||||||
|
$dom_constraints = $dom_document->createElement('constraints');
|
||||||
|
$dom_directive->appendChild($dom_constraints);
|
||||||
|
|
||||||
|
$dom_constraints->appendChild(
|
||||||
|
$dom_document->createElement('type', $info->type)
|
||||||
|
);
|
||||||
|
if ($info->allowed !== true) {
|
||||||
|
$dom_allowed = $dom_document->createElement('allowed');
|
||||||
|
$dom_constraints->appendChild($dom_allowed);
|
||||||
|
foreach ($info->allowed as $allowed => $bool) {
|
||||||
|
$dom_allowed->appendChild(
|
||||||
|
$dom_document->createElement('value', $allowed)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$raw_default = $schema->defaults[$namespace_name][$name];
|
||||||
|
if (is_bool($raw_default)) {
|
||||||
|
$default = $raw_default ? 'true' : 'false';
|
||||||
|
} elseif (is_string($raw_default)) {
|
||||||
|
$default = "\"$raw_default\"";
|
||||||
|
} else {
|
||||||
|
$default = print_r(
|
||||||
|
$schema->defaults[$namespace_name][$name], true
|
||||||
|
);
|
||||||
|
}
|
||||||
|
$dom_constraints->appendChild(
|
||||||
|
$dom_document->createElement('default', $default)
|
||||||
|
);
|
||||||
|
|
||||||
|
$dom_descriptions = $dom_document->createElement('descriptions');
|
||||||
|
$dom_directive->appendChild($dom_descriptions);
|
||||||
|
|
||||||
|
foreach ($info->descriptions as $file => $file_descriptions) {
|
||||||
|
foreach ($file_descriptions as $line => $description) {
|
||||||
|
$dom_description = $dom_document->createElement('description');
|
||||||
|
$dom_description->setAttribute('file', $file);
|
||||||
|
$dom_description->setAttribute('line', $line);
|
||||||
|
appendHTMLDiv($dom_document, $dom_description, $description);
|
||||||
|
$dom_descriptions->appendChild($dom_description);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// print_r($dom_document->saveXML());
|
||||||
|
|
||||||
|
// save a copy of the raw XML
|
||||||
|
$dom_document->save('configdoc.xml');
|
||||||
|
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Generate final output using XSLT
|
||||||
|
|
||||||
|
// load the stylesheet
|
||||||
|
$xsl_stylesheet_name = 'plain';
|
||||||
|
$xsl_stylesheet = "styles/$xsl_stylesheet_name.xsl";
|
||||||
|
$xsl_dom_stylesheet = new DOMDocument();
|
||||||
|
$xsl_dom_stylesheet->load($xsl_stylesheet);
|
||||||
|
|
||||||
|
// setup the XSLT processor
|
||||||
|
$xsl_processor = new XSLTProcessor();
|
||||||
|
|
||||||
|
// perform the transformation
|
||||||
|
$xsl_processor->importStylesheet($xsl_dom_stylesheet);
|
||||||
|
$html_output = $xsl_processor->transformToXML($dom_document);
|
||||||
|
|
||||||
|
// some slight fudges to preserve backwards compatibility
|
||||||
|
$html_output = str_replace('/>', ' />', $html_output); // <br /> not <br>
|
||||||
|
$html_output = str_replace(' xmlns=""', '', $html_output); // rm unnecessary xmlns
|
||||||
|
|
||||||
|
if (class_exists('Tidy')) {
|
||||||
|
// cleanup output
|
||||||
|
$config = array(
|
||||||
|
'indent' => true,
|
||||||
|
'output-xhtml' => true,
|
||||||
|
'wrap' => 80
|
||||||
|
);
|
||||||
|
$tidy = new Tidy;
|
||||||
|
$tidy->parseString($html_output, $config, 'utf8');
|
||||||
|
$tidy->cleanRepair();
|
||||||
|
$html_output = (string) $tidy;
|
||||||
|
}
|
||||||
|
|
||||||
|
// write it to a file (todo: parse into seperate pages)
|
||||||
|
file_put_contents("$xsl_stylesheet_name.html", $html_output);
|
||||||
|
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Output for instant feedback
|
||||||
|
|
||||||
|
if (php_sapi_name() != 'cli') {
|
||||||
|
echo $html_output;
|
||||||
|
} else {
|
||||||
|
echo 'Files generated successfully.';
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
7
configdoc/styles/plain.css
Normal file
7
configdoc/styles/plain.css
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
table {border-collapse:collapse;}
|
||||||
|
table td, table th {padding:0.2em;}
|
||||||
|
|
||||||
|
table.constraints {margin:0 0 1em;}
|
||||||
|
table.constraints th {text-align:left;padding-left:0.4em;}
|
||||||
|
table.constraints td {padding-right:0.4em;}
|
||||||
|
table.constraints td pre {margin:0;}
|
105
configdoc/styles/plain.xsl
Normal file
105
configdoc/styles/plain.xsl
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<xsl:stylesheet
|
||||||
|
version = "1.0"
|
||||||
|
xmlns = "http://www.w3.org/1999/xhtml"
|
||||||
|
xmlns:xsl = "http://www.w3.org/1999/XSL/Transform"
|
||||||
|
>
|
||||||
|
<xsl:output
|
||||||
|
method = "xml"
|
||||||
|
encoding = "UTF-8"
|
||||||
|
doctype-public = "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||||
|
doctype-system = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
|
||||||
|
indent = "no"
|
||||||
|
media-type = "text/html"
|
||||||
|
/>
|
||||||
|
|
||||||
|
<xsl:variable name="typeLookup" select="document('../types.xml')" />
|
||||||
|
|
||||||
|
<xsl:template match="/">
|
||||||
|
<html lang="en" xml:lang="en">
|
||||||
|
<head>
|
||||||
|
<title><xsl:value-of select="/configdoc/title" /> Configuration Documentation</title>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8" />
|
||||||
|
<link rel="stylesheet" type="text/css" href="styles/plain.css" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<xsl:apply-templates />
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="title">
|
||||||
|
<h1><xsl:value-of select="/configdoc/title" /> Configuration Documentation</h1>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="namespace">
|
||||||
|
<xsl:apply-templates />
|
||||||
|
<xsl:if test="count(child::directive)=0">
|
||||||
|
<p>No configuration directives defined for this namespace.</p>
|
||||||
|
</xsl:if>
|
||||||
|
</xsl:template>
|
||||||
|
<xsl:template match="namespace/name">
|
||||||
|
<h2 id="{../@id}"><xsl:value-of select="text()" /></h2>
|
||||||
|
</xsl:template>
|
||||||
|
<xsl:template match="namespace/description">
|
||||||
|
<div class="description">
|
||||||
|
<xsl:copy-of select="div/node()" />
|
||||||
|
</div>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="directive">
|
||||||
|
<xsl:apply-templates />
|
||||||
|
</xsl:template>
|
||||||
|
<xsl:template match="directive/name">
|
||||||
|
<h3 id="{../@id}"><xsl:value-of select="text()" /></h3>
|
||||||
|
</xsl:template>
|
||||||
|
<xsl:template match="directive/constraints">
|
||||||
|
<table class="constraints">
|
||||||
|
<xsl:apply-templates />
|
||||||
|
<!-- Calculated other values -->
|
||||||
|
<tr>
|
||||||
|
<th>Used by:</th>
|
||||||
|
<td>
|
||||||
|
<xsl:for-each select="../descriptions/description">
|
||||||
|
<xsl:if test="position()>1">, </xsl:if>
|
||||||
|
<xsl:value-of select="@file" />
|
||||||
|
</xsl:for-each>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
</xsl:template>
|
||||||
|
<xsl:template match="directive//description">
|
||||||
|
<div class="description">
|
||||||
|
<xsl:copy-of select="div/node()" />
|
||||||
|
</div>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<xsl:template match="constraints/type">
|
||||||
|
<tr>
|
||||||
|
<th>Type:</th>
|
||||||
|
<td>
|
||||||
|
<xsl:variable name="type" select="text()" />
|
||||||
|
<xsl:attribute name="class">type type-<xsl:value-of select="$type" /></xsl:attribute>
|
||||||
|
<xsl:value-of select="$typeLookup/types/type[@id=$type]/text()" />
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</xsl:template>
|
||||||
|
<xsl:template match="constraints/allowed">
|
||||||
|
<tr>
|
||||||
|
<th>Allowed values:</th>
|
||||||
|
<td>
|
||||||
|
<xsl:for-each select="value"><!--
|
||||||
|
--><xsl:if test="position()>1">, </xsl:if>
|
||||||
|
"<xsl:value-of select="." />"<!--
|
||||||
|
--></xsl:for-each>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</xsl:template>
|
||||||
|
<xsl:template match="constraints/default">
|
||||||
|
<tr>
|
||||||
|
<th>Default:</th>
|
||||||
|
<td><pre><xsl:value-of select="." xml:space="preserve" /></pre></td>
|
||||||
|
</tr>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
</xsl:stylesheet>
|
@@ -11,25 +11,24 @@ profiling.
|
|||||||
Here we go:
|
Here we go:
|
||||||
|
|
||||||
AttrDef
|
AttrDef
|
||||||
Class - doesn't support Unicode characters, uses regular expressions
|
Class - doesn't support Unicode characters (fringe); uses regular
|
||||||
Lang - code duplication, premature optimization, doesn't consult official
|
expressions
|
||||||
lists
|
Lang - code duplication; premature optimization; doesn't consult official
|
||||||
Pixels/Length/MultiLength - implemented according to HTML spec (excludes
|
lists (fringe)
|
||||||
code reuse in CSS)
|
Length - easily mistaken for CSSLength
|
||||||
URI - multiple regular expressions, needs host validation routines factored
|
URI - multiple regular expressions; needs host validation routines factored
|
||||||
out for mailto scheme, IPv6 validation is broken (fringe), unintuitive
|
out for mailto scheme; missing validation for query; fragment and path,
|
||||||
variable overwriting, missing validation for query, fragment and path,
|
|
||||||
no percent-encode fixing
|
no percent-encode fixing
|
||||||
CSS - parser doesn't accept advanced CSS (fringe)
|
CSS - parser doesn't accept advanced CSS (fringe)
|
||||||
Number - constructor interface is inconsistent with Integer
|
Number - constructor interface is inconsistent with Integer
|
||||||
AttrTransform - doesn't accept AttrContext, non-validating
|
AttrTransform - doesn't accept AttrContext
|
||||||
ChildDef - not-allowed nodes translated to text, likely invalid handling
|
Config - "load configuration" hooks missing, rich set* accessors missing
|
||||||
Config - "load configuration" hooks missing, rich set* accessors missing,
|
ConfigSchema - redefinition is a mess
|
||||||
needs redefined relationship with the definitions
|
|
||||||
Strategy
|
Strategy
|
||||||
FixNesting - cannot bubble nodes out of structures
|
FixNesting - cannot bubble nodes out of structures
|
||||||
MakeWellFormed - insufficient automatic closing definitions (check HTML
|
MakeWellFormed - insufficient automatic closing definitions (check HTML
|
||||||
spec for optional end tags).
|
spec for optional end tags, also, closing based on type (block/inline)
|
||||||
|
might be efficient).
|
||||||
RemoveForeignElements - should be run in parallel with MakeWellFormed
|
RemoveForeignElements - should be run in parallel with MakeWellFormed
|
||||||
URIScheme - needs to have callable generic checks
|
URIScheme - needs to have callable generic checks
|
||||||
ftp - missing typecode check
|
ftp - missing typecode check
|
||||||
|
@@ -17,18 +17,9 @@ are passed. These classes are: HTMLPurifier::*, Generator::generateFromTokens
|
|||||||
and Lexer::tokenizeHTML. However, whenever a valid configuration object
|
and Lexer::tokenizeHTML. However, whenever a valid configuration object
|
||||||
is defined, that object should be used.
|
is defined, that object should be used.
|
||||||
|
|
||||||
-- the following is projected changes to the configuration system --
|
In relation to HTMLDefinition and CSSDefinition, there is a special class
|
||||||
|
of directives that influence the *construction* of the Definition object.
|
||||||
In relation to HTMLDefinition and CSSDefinition, there are going to be some
|
A standard call pattern would look like:
|
||||||
major structural changes to enable the easy configuration of these objects.
|
|
||||||
Due to the intricacy of these objects, it's not feasible to ask an average
|
|
||||||
user to twiddle around with an element and its 20 other dependencies. However,
|
|
||||||
these objects are the only possible point where change could occur in the
|
|
||||||
context of configuration.
|
|
||||||
|
|
||||||
The solution is to introduce a special class of directives that influence the
|
|
||||||
*construction* of the Definition object. A standard call pattern would look
|
|
||||||
like:
|
|
||||||
|
|
||||||
1. Client calls Config->getHTMLDefinition()
|
1. Client calls Config->getHTMLDefinition()
|
||||||
2. Config calls HTMLDefinition->createNew(this)
|
2. Config calls HTMLDefinition->createNew(this)
|
||||||
|
@@ -1,272 +0,0 @@
|
|||||||
<!-- Transform %TextAlign to align:value in style -->
|
|
||||||
|
|
||||||
<!-- text alignment for p, div, h1-h6. The default is
|
|
||||||
align="left" for ltr headings, "right" for rtl
|
|
||||||
|
|
||||||
Move to style! -->
|
|
||||||
<!ENTITY % TextAlign "DEPRECATED align (left|center|right|justify) #IMPLIED">
|
|
||||||
|
|
||||||
<!-- type and start should have CSS equivalents, but they'll need to
|
|
||||||
be translated intelligently -->
|
|
||||||
<!ENTITY % ULStyle "(disc|square|circle)">
|
|
||||||
<!-- Ordered list numbering style
|
|
||||||
|
|
||||||
1 arabic numbers 1, 2, 3, ...
|
|
||||||
a lower alpha a, b, c, ...
|
|
||||||
A upper alpha A, B, C, ...
|
|
||||||
i lower roman i, ii, iii, ...
|
|
||||||
I upper roman I, II, III, ...
|
|
||||||
|
|
||||||
The style is applied to the sequence number which by default
|
|
||||||
is reset to 1 for the first list item in an ordered list.
|
|
||||||
-->
|
|
||||||
<!ENTITY % OLStyle "CDATA">
|
|
||||||
<!-- LIStyle is constrained to: "(%ULStyle;|%OLStyle;)" -->
|
|
||||||
<!ENTITY % LIStyle "CDATA">
|
|
||||||
|
|
||||||
<!ATTLIST ol
|
|
||||||
%attrs;
|
|
||||||
DEPRECATED type %OLStyle; #IMPLIED
|
|
||||||
DEPRECATED start %Number; #IMPLIED
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST li
|
|
||||||
%attrs;
|
|
||||||
DEPRECATED type %LIStyle; #IMPLIED
|
|
||||||
DEPRECATED value %Number; #IMPLIED
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST hr
|
|
||||||
%attrs;
|
|
||||||
DEPRECATED align (left|center|right) #IMPLIED
|
|
||||||
DEPRECATED size %Pixels; #IMPLIED
|
|
||||||
DEPRECATED width %Length; #IMPLIED
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST pre
|
|
||||||
%attrs;
|
|
||||||
DEPRECATED width %Number; #IMPLIED
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST blockquote
|
|
||||||
%attrs;
|
|
||||||
cite %URI; #IMPLIED
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST ins
|
|
||||||
%attrs;
|
|
||||||
cite %URI; #IMPLIED
|
|
||||||
datetime %Datetime; #IMPLIED
|
|
||||||
>
|
|
||||||
<!ATTLIST del
|
|
||||||
%attrs;
|
|
||||||
cite %URI; #IMPLIED
|
|
||||||
datetime %Datetime; #IMPLIED
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST a
|
|
||||||
%attrs;
|
|
||||||
name NMTOKEN #IMPLIED // ID
|
|
||||||
href %URI; #IMPLIED
|
|
||||||
rel %LinkTypes; #IMPLIED // needs policing
|
|
||||||
rev %LinkTypes; #IMPLIED // see rel
|
|
||||||
target %FrameTarget; #IMPLIED // usually not used, but might be
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST bdo
|
|
||||||
%coreattrs; // !#!
|
|
||||||
lang %LanguageCode; #IMPLIED
|
|
||||||
xml:lang %LanguageCode; #IMPLIED
|
|
||||||
dir (ltr|rtl) #REQUIRED
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST br
|
|
||||||
%coreattrs; // !#!
|
|
||||||
DEPRECATED clear (left|all|right|none) "none"
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ELEMENT q %Inline;> <!-- inlined quote -->
|
|
||||||
<!ATTLIST q
|
|
||||||
%attrs;
|
|
||||||
cite %URI; #IMPLIED
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST img
|
|
||||||
%attrs;
|
|
||||||
src %URI; #REQUIRED
|
|
||||||
alt %Text; #REQUIRED
|
|
||||||
DEPRECATED name NMTOKEN #IMPLIED // ID
|
|
||||||
longdesc %URI; #IMPLIED
|
|
||||||
height %Length; #IMPLIED // dubious, but we'll allow
|
|
||||||
width %Length; #IMPLIED //
|
|
||||||
DEPRECATED align %ImgAlign; #IMPLIED
|
|
||||||
DEPRECATED border %Length; #IMPLIED
|
|
||||||
DEPRECATED hspace %Pixels; #IMPLIED // left/right margin
|
|
||||||
DEPRECATED vspace %Pixels; #IMPLIED // up/down margin
|
|
||||||
>
|
|
||||||
|
|
||||||
<!--
|
|
||||||
The border attribute sets the thickness of the frame around the
|
|
||||||
table. The default units are screen pixels.
|
|
||||||
|
|
||||||
The frame attribute specifies which parts of the frame around
|
|
||||||
the table should be rendered. The values are not the same as
|
|
||||||
CALS to avoid a name clash with the valign attribute.
|
|
||||||
-->
|
|
||||||
<!ENTITY % TFrame "(void|above|below|hsides|lhs|rhs|vsides|box|border)">
|
|
||||||
|
|
||||||
<!--
|
|
||||||
The rules attribute defines which rules to draw between cells:
|
|
||||||
|
|
||||||
If rules is absent then assume:
|
|
||||||
"none" if border is absent or border="0" otherwise "all"
|
|
||||||
-->
|
|
||||||
|
|
||||||
<!ENTITY % TRules "(none | groups | rows | cols | all)">
|
|
||||||
|
|
||||||
<!-- horizontal placement of table relative to document -->
|
|
||||||
<!ENTITY % TAlign "(left|center|right)">
|
|
||||||
|
|
||||||
<!-- horizontal alignment attributes for cell contents
|
|
||||||
|
|
||||||
char alignment char, e.g. char=':'
|
|
||||||
charoff offset for alignment char
|
|
||||||
-->
|
|
||||||
<!ENTITY % cellhalign
|
|
||||||
"align (left|center|right|justify|char) #IMPLIED
|
|
||||||
char %Character; #IMPLIED
|
|
||||||
charoff %Length; #IMPLIED"
|
|
||||||
>
|
|
||||||
|
|
||||||
<!-- vertical alignment attributes for cell contents -->
|
|
||||||
<!ENTITY % cellvalign
|
|
||||||
"valign (top|middle|bottom|baseline) #IMPLIED"
|
|
||||||
>
|
|
||||||
|
|
||||||
<!-- we may want to convert some of these nonetheless -->
|
|
||||||
<!ATTLIST table
|
|
||||||
%attrs;
|
|
||||||
summary %Text; #IMPLIED
|
|
||||||
width %Length; #IMPLIED
|
|
||||||
border %Pixels; #IMPLIED
|
|
||||||
frame %TFrame; #IMPLIED
|
|
||||||
rules %TRules; #IMPLIED
|
|
||||||
cellspacing %Length; #IMPLIED
|
|
||||||
cellpadding %Length; #IMPLIED
|
|
||||||
DEPRECATED align %TAlign; #IMPLIED
|
|
||||||
DEPRECATED bgcolor %Color; #IMPLIED
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ENTITY % CAlign "(top|bottom|left|right)">
|
|
||||||
|
|
||||||
<!ATTLIST caption
|
|
||||||
%attrs;
|
|
||||||
DEPRECATED align %CAlign; #IMPLIED // watch, it's a special set
|
|
||||||
>
|
|
||||||
|
|
||||||
<!--
|
|
||||||
colgroup groups a set of col elements. It allows you to group
|
|
||||||
several semantically related columns together.
|
|
||||||
-->
|
|
||||||
<!ATTLIST colgroup
|
|
||||||
%attrs;
|
|
||||||
span %Number; "1"
|
|
||||||
width %MultiLength; #IMPLIED
|
|
||||||
%cellhalign; // very interesting
|
|
||||||
%cellvalign;
|
|
||||||
>
|
|
||||||
|
|
||||||
<!--
|
|
||||||
col elements define the alignment properties for cells in
|
|
||||||
one or more columns.
|
|
||||||
|
|
||||||
The width attribute specifies the width of the columns, e.g.
|
|
||||||
|
|
||||||
width=64 width in screen pixels
|
|
||||||
width=0.5* relative width of 0.5
|
|
||||||
|
|
||||||
The span attribute causes the attributes of one
|
|
||||||
col element to apply to more than one column.
|
|
||||||
-->
|
|
||||||
<!ATTLIST col
|
|
||||||
%attrs;
|
|
||||||
span %Number; "1"
|
|
||||||
width %MultiLength; #IMPLIED
|
|
||||||
%cellhalign;
|
|
||||||
%cellvalign;
|
|
||||||
>
|
|
||||||
|
|
||||||
<!--
|
|
||||||
Use thead to duplicate headers when breaking table
|
|
||||||
across page boundaries, or for static headers when
|
|
||||||
tbody sections are rendered in scrolling panel.
|
|
||||||
|
|
||||||
Use tfoot to duplicate footers when breaking table
|
|
||||||
across page boundaries, or for static footers when
|
|
||||||
tbody sections are rendered in scrolling panel.
|
|
||||||
|
|
||||||
Use multiple tbody sections when rules are needed
|
|
||||||
between groups of table rows.
|
|
||||||
-->
|
|
||||||
<!ATTLIST thead
|
|
||||||
%attrs;
|
|
||||||
%cellhalign;
|
|
||||||
%cellvalign;
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST tfoot
|
|
||||||
%attrs;
|
|
||||||
%cellhalign;
|
|
||||||
%cellvalign;
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST tbody
|
|
||||||
%attrs;
|
|
||||||
%cellhalign;
|
|
||||||
%cellvalign;
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST tr
|
|
||||||
%attrs;
|
|
||||||
%cellhalign;
|
|
||||||
%cellvalign;
|
|
||||||
DEPRECATED bgcolor %Color; #IMPLIED
|
|
||||||
>
|
|
||||||
|
|
||||||
<!-- Scope is simpler than headers attribute for common tables -->
|
|
||||||
<!ENTITY % Scope "(row|col|rowgroup|colgroup)">
|
|
||||||
|
|
||||||
<!-- th is for headers, td for data and for cells acting as both -->
|
|
||||||
|
|
||||||
<!ATTLIST th
|
|
||||||
%attrs;
|
|
||||||
abbr %Text; #IMPLIED
|
|
||||||
axis CDATA #IMPLIED
|
|
||||||
headers IDREFS #IMPLIED
|
|
||||||
scope %Scope; #IMPLIED
|
|
||||||
rowspan %Number; "1"
|
|
||||||
colspan %Number; "1"
|
|
||||||
%cellhalign;
|
|
||||||
%cellvalign;
|
|
||||||
DEPRECATED nowrap (nowrap) #IMPLIED
|
|
||||||
DEPRECATED bgcolor %Color; #IMPLIED
|
|
||||||
DEPRECATED width %Length; #IMPLIED
|
|
||||||
DEPRECATED height %Length; #IMPLIED
|
|
||||||
>
|
|
||||||
|
|
||||||
<!ATTLIST td
|
|
||||||
%attrs;
|
|
||||||
abbr %Text; #IMPLIED
|
|
||||||
axis CDATA #IMPLIED
|
|
||||||
headers IDREFS #IMPLIED
|
|
||||||
scope %Scope; #IMPLIED
|
|
||||||
rowspan %Number; "1"
|
|
||||||
colspan %Number; "1"
|
|
||||||
%cellhalign;
|
|
||||||
%cellvalign;
|
|
||||||
DEPRECATED nowrap (nowrap) #IMPLIED
|
|
||||||
DEPRECATED bgcolor %Color; #IMPLIED
|
|
||||||
DEPRECATED width %Length; #IMPLIED
|
|
||||||
DEPRECATED height %Length; #IMPLIED
|
|
||||||
>
|
|
||||||
|
|
@@ -21,7 +21,9 @@ if (!empty($_POST['html'])) {
|
|||||||
|
|
||||||
$html = get_magic_quotes_gpc() ? stripslashes($_POST['html']) : $_POST['html'];
|
$html = get_magic_quotes_gpc() ? stripslashes($_POST['html']) : $_POST['html'];
|
||||||
|
|
||||||
$purifier = new HTMLPurifier();
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
$config->set('Core', 'TidyFormat', !empty($_POST['tidy']));
|
||||||
|
$purifier = new HTMLPurifier($config);
|
||||||
$pure_html = $purifier->purify($html);
|
$pure_html = $purifier->purify($html);
|
||||||
|
|
||||||
?>
|
?>
|
||||||
@@ -65,6 +67,8 @@ if (isset($html)) {
|
|||||||
HTMLPurifier_Encoder::cleanUTF8($html), ENT_COMPAT, 'UTF-8');
|
HTMLPurifier_Encoder::cleanUTF8($html), ENT_COMPAT, 'UTF-8');
|
||||||
}
|
}
|
||||||
?></textarea>
|
?></textarea>
|
||||||
|
<div>Nicely format output with Tidy? <input type="checkbox" value="1"
|
||||||
|
name="tidy"<?php if (!empty($_POST['tidy'])) echo ' checked="checked"'; ?> /></div>
|
||||||
<div>
|
<div>
|
||||||
<input type="submit" value="Submit" name="submit" class="button" />
|
<input type="submit" value="Submit" name="submit" class="button" />
|
||||||
</div>
|
</div>
|
||||||
|
@@ -2,9 +2,10 @@
|
|||||||
Optimization
|
Optimization
|
||||||
|
|
||||||
Here are some possible optimization techniques we can apply to code sections if
|
Here are some possible optimization techniques we can apply to code sections if
|
||||||
they turn out to be slow. Be sure not to prematurely optimize though!
|
they turn out to be slow. Be sure not to prematurely optimize: if you get
|
||||||
|
that itch, put it here!
|
||||||
|
|
||||||
- Make Tokens Flyweights
|
- Make Tokens Flyweights (may prove problematic, probably not worth it)
|
||||||
- Rewrite regexps into PHP code
|
- Rewrite regexps into PHP code
|
||||||
- Serialize the Definition object
|
- Serialize the Definition object
|
||||||
- Batch regexp validation (do as many per function call as possible)
|
- Batch regexp validation (do as many per function call as possible)
|
||||||
|
@@ -86,7 +86,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
|||||||
Well-supported values are: disc, circle, square,
|
Well-supported values are: disc, circle, square,
|
||||||
decimal, lower-roman, upper-roman, lower-alpha and upper-alpha. See also
|
decimal, lower-roman, upper-roman, lower-alpha and upper-alpha. See also
|
||||||
CSS 3. Mostly IE lack of support.</td></tr>
|
CSS 3. Mostly IE lack of support.</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>list-style</td><td>SHORTHAND, target milestone 1.0</td></tr>
|
<tr class="css1 impl-yes"><td>list-style</td><td>SHORTHAND</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>margin</td><td>MULTIPLE</td></tr>
|
<tr class="css1 impl-yes"><td>margin</td><td>MULTIPLE</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>margin-*</td><td>COMPOSITE(<length>,
|
<tr class="css1 impl-yes"><td>margin-*</td><td>COMPOSITE(<length>,
|
||||||
<percentage>, auto)</td></tr>
|
<percentage>, auto)</td></tr>
|
||||||
@@ -134,7 +134,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
|||||||
|
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr><th colspan="2">Unknown</th></tr>
|
<tr><th colspan="2">Unknown</th></tr>
|
||||||
<tr class="danger css1"><td>background-image</td><td>Dangerous, target milestone 1.3</td></tr>
|
<tr class="danger css1"><td>background-image</td><td>Dangerous, target milestone 1.2</td></tr>
|
||||||
<tr class="css1"><td>background-attachment</td><td>ENUM(scroll, fixed),
|
<tr class="css1"><td>background-attachment</td><td>ENUM(scroll, fixed),
|
||||||
Depends on background-image</td></tr>
|
Depends on background-image</td></tr>
|
||||||
<tr class="css1"><td>background-position</td><td>Depends on background-image</td></tr>
|
<tr class="css1"><td>background-position</td><td>Depends on background-image</td></tr>
|
||||||
@@ -144,7 +144,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
|||||||
inline-block has incomplete IE6 support and requires -moz-inline-box
|
inline-block has incomplete IE6 support and requires -moz-inline-box
|
||||||
for Mozilla. Unknown target milestone.</td></tr>
|
for Mozilla. Unknown target milestone.</td></tr>
|
||||||
<tr><td class="css1">height</td><td>Interesting, why use it? Unknown target milestone.</td></tr>
|
<tr><td class="css1">height</td><td>Interesting, why use it? Unknown target milestone.</td></tr>
|
||||||
<tr class="danger css1"><td>list-style-image</td><td>Dangerous? Target milestone 1.3</td></tr>
|
<tr class="danger css1"><td>list-style-image</td><td>Dangerous? Target milestone 1.2</td></tr>
|
||||||
<tr class="impl-no"><td>max-height</td><td rowspan="4">No IE 5/6</td></tr>
|
<tr class="impl-no"><td>max-height</td><td rowspan="4">No IE 5/6</td></tr>
|
||||||
<tr class="impl-no"><td>min-height</td></tr>
|
<tr class="impl-no"><td>min-height</td></tr>
|
||||||
<tr class="impl-no"><td>max-width</td></tr>
|
<tr class="impl-no"><td>max-width</td></tr>
|
||||||
@@ -254,7 +254,7 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
|||||||
</tbody>
|
</tbody>
|
||||||
|
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr><th colspan="3">Transform, target milestone 1.2</th></tr>
|
<tr><th colspan="3">Transform, target milestone 1.4</th></tr>
|
||||||
<tr><td rowspan="5">align</td><td>CAPTION</td><td>Near-equiv style 'caption-side', drop left and right</td></tr>
|
<tr><td rowspan="5">align</td><td>CAPTION</td><td>Near-equiv style 'caption-side', drop left and right</td></tr>
|
||||||
<tr><td>IMG</td><td rowspan="2">Margin-left and margin-right = auto or parent div</td></tr>
|
<tr><td>IMG</td><td rowspan="2">Margin-left and margin-right = auto or parent div</td></tr>
|
||||||
<tr><td>TABLE</td></tr>
|
<tr><td>TABLE</td></tr>
|
||||||
|
@@ -6,35 +6,43 @@ through negligence of people. This class will do its job: no more, no less,
|
|||||||
and it's up to you to provide it the proper information and proper context
|
and it's up to you to provide it the proper information and proper context
|
||||||
to be effective. Things to remember:
|
to be effective. Things to remember:
|
||||||
|
|
||||||
1. UTF-8. Currently, the parser runs under the assumption that it is dealing
|
1. Character Encoding: UTF-8.
|
||||||
|
Currently, the parser runs under the assumption that it is dealing
|
||||||
with UTF-8. Not ISO-8859-1 or Windows-1252, UTF-8. And definitely not "no
|
with UTF-8. Not ISO-8859-1 or Windows-1252, UTF-8. And definitely not "no
|
||||||
character encoding explicitly stated" or UTF-7. If you're not using UTF-8 as
|
character encoding explicitly stated" or UTF-7. If you're not using UTF-8 as
|
||||||
your character encoding, you should switch. Now. Make sure any input is
|
your character encoding, make sure you configure HTML Purifier or switch
|
||||||
properly converted to UTF-8, or the parser will mangle it badly
|
to UTF-8. Now. Also, make sure any input is properly converted to UTF-8, or
|
||||||
(though it won't be a security risk if you're outputting it as UTF-8 though).
|
the parser will mangle it badly (though it won't be a security risk if you're
|
||||||
We will be adding out-of-the-box support for the other major character
|
outputting it as UTF-8 though). Character encoding is, in general, a knotty
|
||||||
encodings shortly.
|
issue, but do yourself a favor and learn about it:
|
||||||
|
<http://www.joelonsoftware.com/articles/Unicode.html>
|
||||||
|
|
||||||
2. XHTML 1.0 Transitional. This is what the parser is outputting. For the most
|
2. Doctype: XHTML 1.0 Transitional
|
||||||
|
This is what the parser is outputting. For the most
|
||||||
part, it's compatible with HTML 4.01, but XHTML enforces some very nice things
|
part, it's compatible with HTML 4.01, but XHTML enforces some very nice things
|
||||||
that all web developers should use. Regardless, NO DOCTYPE is a NO. Quirks mode
|
that all web developers should use. Regardless, NO DOCTYPE is a NO. Quirks mode
|
||||||
has waaaay too many quirks for a little parser to handle. We did not select
|
has waaaay too many quirks for a little parser to handle. We did not select
|
||||||
strict in order to prevent ourselves from being too draconic on users, but
|
strict in order to prevent ourselves from being too draconic on users, but
|
||||||
this may be configurable in the future.
|
this may be configurable in the future. Do you want standards compliance?
|
||||||
|
The doctype is a good place to start.
|
||||||
|
|
||||||
3. IDs. They need to be unique, but without some knowledge of the
|
3. IDs
|
||||||
|
They need to be unique, but without some knowledge of the
|
||||||
rest of the document, it's difficult to know what's unique. %Attr.IDBlacklist
|
rest of the document, it's difficult to know what's unique. %Attr.IDBlacklist
|
||||||
needs to be set: we may want to consider disallowing IDs by default to
|
needs to be set: we may want to consider disallowing IDs by default to
|
||||||
save lazy programmers.
|
save lazy programmers.
|
||||||
|
|
||||||
4. [PROJECTED] Links. We're not going to try for spam protection (although
|
4. [PROJECTED] Links
|
||||||
|
We're not going to try for spam protection (although
|
||||||
some hooks for such a module might be nice) but we may offer the ability to
|
some hooks for such a module might be nice) but we may offer the ability to
|
||||||
only accept relative URLs. Pick the one that's right for you.
|
only accept relative URLs. Pick the one that's right for you.
|
||||||
|
|
||||||
5. CSS. While we can prevent the most flagrant cases from affecting your
|
5. CSS
|
||||||
|
While we can prevent the most flagrant cases from affecting your
|
||||||
layout (such as absolutely positioned elements), no amount of code is going
|
layout (such as absolutely positioned elements), no amount of code is going
|
||||||
to protect your pages from being attacked by garish colors and plain old
|
to protect your pages from being attacked by garish colors and plain old
|
||||||
bad taste. A neat feature would be the ability to define acceptable colors
|
bad taste. A neat feature would be the ability to define acceptable colors
|
||||||
in a document, but that's not likely to be implemented for a while. In the
|
in a document, but that's not likely to be implemented for a while. In the
|
||||||
meantime, be sure to make sure that floated elements (permitted, since they
|
meantime, be sure to make sure that floated elements (permitted, since they
|
||||||
can be quite useful) can't mess up your layout.
|
can be quite useful) can't mess up your layout. Once again, we may want to
|
||||||
|
disable this by default to protect lazy developers.
|
||||||
|
@@ -54,4 +54,4 @@ HTML Purifier is best suited for documents that require a rich array of
|
|||||||
HTML tags. Things like blog comments are, in all likelihood, most appropriately
|
HTML tags. Things like blog comments are, in all likelihood, most appropriately
|
||||||
written in an extremely restrictive set of markup that doesn't require
|
written in an extremely restrictive set of markup that doesn't require
|
||||||
all this functionality (or not written in HTML at all), although this may
|
all this functionality (or not written in HTML at all), although this may
|
||||||
be changing in the future.
|
be changing in the future with the addition of levels of filtering.
|
||||||
|
@@ -18,7 +18,7 @@
|
|||||||
* However, most users will only need to interface with the HTMLPurifier
|
* However, most users will only need to interface with the HTMLPurifier
|
||||||
* class, so this massive amount of infrastructure is usually concealed.
|
* class, so this massive amount of infrastructure is usually concealed.
|
||||||
* If you plan on working with the internals, be sure to include
|
* If you plan on working with the internals, be sure to include
|
||||||
* HTMLPurifier_ConfigDef and HTMLPurifier_Config.
|
* HTMLPurifier_ConfigSchema and HTMLPurifier_Config.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -40,10 +40,12 @@
|
|||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
*/
|
*/
|
||||||
|
|
||||||
require_once 'HTMLPurifier/ConfigDef.php';
|
// almost every class has an undocumented dependency to these, so make sure
|
||||||
|
// they get included
|
||||||
|
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||||
require_once 'HTMLPurifier/Config.php';
|
require_once 'HTMLPurifier/Config.php';
|
||||||
|
|
||||||
require_once 'HTMLPurifier/Lexer.php';
|
require_once 'HTMLPurifier/Lexer.php';
|
||||||
require_once 'HTMLPurifier/HTMLDefinition.php';
|
|
||||||
require_once 'HTMLPurifier/Generator.php';
|
require_once 'HTMLPurifier/Generator.php';
|
||||||
require_once 'HTMLPurifier/Strategy/Core.php';
|
require_once 'HTMLPurifier/Strategy/Core.php';
|
||||||
require_once 'HTMLPurifier/Encoder.php';
|
require_once 'HTMLPurifier/Encoder.php';
|
||||||
|
@@ -3,10 +3,10 @@
|
|||||||
/**
|
/**
|
||||||
* Internal data-structure used in attribute validation to accumulate state.
|
* Internal data-structure used in attribute validation to accumulate state.
|
||||||
*
|
*
|
||||||
* All it is is a data-structure that holds objects that accumulate state, like
|
* This is a data-structure that holds objects that accumulate state, like
|
||||||
* HTMLPurifier_IDAccumulator.
|
* HTMLPurifier_IDAccumulator. It's better than using globals!
|
||||||
*
|
*
|
||||||
* @param Many functions that accept this object have it as a mandatory
|
* @note Many functions that accept this object have it as a mandatory
|
||||||
* parameter, even when there is no use for it. Though this is
|
* parameter, even when there is no use for it. Though this is
|
||||||
* for the same reasons as why HTMLPurifier_Config is a mandatory
|
* for the same reasons as why HTMLPurifier_Config is a mandatory
|
||||||
* parameter, it is also because you cannot assign a default value
|
* parameter, it is also because you cannot assign a default value
|
||||||
|
@@ -15,6 +15,12 @@ require_once 'HTMLPurifier/AttrContext.php';
|
|||||||
class HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tells us whether or not an HTML attribute is minimized. Only the
|
||||||
|
* boolean attribute vapourware would use this.
|
||||||
|
*/
|
||||||
|
var $minimized = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abstract function defined for functions that validate and clean strings.
|
* Abstract function defined for functions that validate and clean strings.
|
||||||
*
|
*
|
||||||
|
@@ -28,6 +28,8 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
|
|||||||
if (!$declaration) continue;
|
if (!$declaration) continue;
|
||||||
if (!strpos($declaration, ':')) continue;
|
if (!strpos($declaration, ':')) continue;
|
||||||
list($property, $value) = explode(':', $declaration, 2);
|
list($property, $value) = explode(':', $declaration, 2);
|
||||||
|
$property = trim($property);
|
||||||
|
$value = trim($value);
|
||||||
if (!isset($definition->info[$property])) continue;
|
if (!isset($definition->info[$property])) continue;
|
||||||
// inefficient call, since the validator will do this again
|
// inefficient call, since the validator will do this again
|
||||||
if (strtolower(trim($value)) !== 'inherit') {
|
if (strtolower(trim($value)) !== 'inherit') {
|
||||||
|
@@ -11,9 +11,14 @@ class HTMLPurifier_AttrDef_Host extends HTMLPurifier_AttrDef
|
|||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instances of HTMLPurifier_AttrDef_IPv4 and HTMLPurifier_AttrDef_IPv6
|
* Instance of HTMLPurifier_AttrDef_IPv4 sub-validator
|
||||||
*/
|
*/
|
||||||
var $ipv4, $ipv6;
|
var $ipv4;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instance of HTMLPurifier_AttrDef_IPv6 sub-validator
|
||||||
|
*/
|
||||||
|
var $ipv6;
|
||||||
|
|
||||||
function HTMLPurifier_AttrDef_Host() {
|
function HTMLPurifier_AttrDef_Host() {
|
||||||
$this->ipv4 = new HTMLPurifier_AttrDef_IPv4();
|
$this->ipv4 = new HTMLPurifier_AttrDef_IPv4();
|
||||||
|
@@ -5,7 +5,7 @@ require_once 'HTMLPurifier/URIScheme.php';
|
|||||||
require_once 'HTMLPurifier/URISchemeRegistry.php';
|
require_once 'HTMLPurifier/URISchemeRegistry.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/Host.php';
|
require_once 'HTMLPurifier/AttrDef/Host.php';
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'URI', 'DefaultScheme', 'http', 'string',
|
'URI', 'DefaultScheme', 'http', 'string',
|
||||||
'Defines through what scheme the output will be served, in order to '.
|
'Defines through what scheme the output will be served, in order to '.
|
||||||
'select the proper object validator when no scheme information is present.'
|
'select the proper object validator when no scheme information is present.'
|
||||||
@@ -36,13 +36,13 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
|||||||
// for HTTP and thus won't work for our generic URI parsing
|
// for HTTP and thus won't work for our generic URI parsing
|
||||||
|
|
||||||
// according to the RFC... (but this cuts corners, i.e. non-validating)
|
// according to the RFC... (but this cuts corners, i.e. non-validating)
|
||||||
$r_URI = '!^'.
|
$r_URI = '!'.
|
||||||
'(([^:/?#<>]+):)?'. // 2. Scheme
|
'(([^:/?#<>\'"]+):)?'. // 2. Scheme
|
||||||
'(//([^/?#<>]*))?'. // 4. Authority
|
'(//([^/?#<>\'"]*))?'. // 4. Authority
|
||||||
'([^?#<>]*)'. // 5. Path
|
'([^?#<>\'"]*)'. // 5. Path
|
||||||
'(\?([^#<>]*))?'. // 7. Query
|
'(\?([^#<>\'"]*))?'. // 7. Query
|
||||||
'(#([^<>]*))?'. // 8. Fragment
|
'(#([^<>\'"]*))?'. // 8. Fragment
|
||||||
'$!';
|
'!';
|
||||||
|
|
||||||
$matches = array();
|
$matches = array();
|
||||||
$result = preg_match($r_URI, $uri, $matches);
|
$result = preg_match($r_URI, $uri, $matches);
|
||||||
|
@@ -4,13 +4,13 @@ require_once 'HTMLPurifier/AttrTransform.php';
|
|||||||
|
|
||||||
// this MUST be placed in post, as it assumes that any value in dir is valid
|
// this MUST be placed in post, as it assumes that any value in dir is valid
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Attr', 'DefaultTextDir', 'ltr', 'string',
|
'Attr', 'DefaultTextDir', 'ltr', 'string',
|
||||||
'Defines the default text direction (ltr or rtl) of the document '.
|
'Defines the default text direction (ltr or rtl) of the document '.
|
||||||
'being parsed. This generally is the same as the value of the dir '.
|
'being parsed. This generally is the same as the value of the dir '.
|
||||||
'attribute in HTML, or ltr if that is not specified.'
|
'attribute in HTML, or ltr if that is not specified.'
|
||||||
);
|
);
|
||||||
HTMLPurifier_ConfigDef::defineAllowedValues(
|
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||||
'Attr', 'DefaultTextDir', array( 'ltr', 'rtl' )
|
'Attr', 'DefaultTextDir', array( 'ltr', 'rtl' )
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@@ -4,7 +4,7 @@ require_once 'HTMLPurifier/AttrTransform.php';
|
|||||||
|
|
||||||
// must be called POST validation
|
// must be called POST validation
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Attr', 'DefaultInvalidImage', '', 'string',
|
'Attr', 'DefaultInvalidImage', '', 'string',
|
||||||
'This is the default image an img tag will be pointed to if it does '.
|
'This is the default image an img tag will be pointed to if it does '.
|
||||||
'not have a valid src attribute. In future versions, we may allow the '.
|
'not have a valid src attribute. In future versions, we may allow the '.
|
||||||
@@ -12,7 +12,7 @@ HTMLPurifier_ConfigDef::define(
|
|||||||
'not possible right now.'
|
'not possible right now.'
|
||||||
);
|
);
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Attr', 'DefaultInvalidImageAlt', 'Invalid image', 'string',
|
'Attr', 'DefaultInvalidImageAlt', 'Invalid image', 'string',
|
||||||
'This is the content of the alt tag of an invalid image if the user '.
|
'This is the content of the alt tag of an invalid image if the user '.
|
||||||
'had not previously specified an alt attribute. It has no effect when the '.
|
'had not previously specified an alt attribute. It has no effect when the '.
|
||||||
|
@@ -5,14 +5,7 @@
|
|||||||
// false = delete parent node and all children
|
// false = delete parent node and all children
|
||||||
// array(...) = replace children nodes with these
|
// array(...) = replace children nodes with these
|
||||||
|
|
||||||
// this is the hardest one to implement. We'll use fancy regexp tricks
|
HTMLPurifier_ConfigSchema::define(
|
||||||
// right now, we only expect it to return TRUE or FALSE (it won't attempt
|
|
||||||
// to fix the tree)
|
|
||||||
|
|
||||||
// we may end up writing custom code for each HTML case
|
|
||||||
// in order to make it self correcting
|
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
|
||||||
'Core', 'EscapeInvalidChildren', false, 'bool',
|
'Core', 'EscapeInvalidChildren', false, 'bool',
|
||||||
'When true, a child is found that is not allowed in the context of the '.
|
'When true, a child is found that is not allowed in the context of the '.
|
||||||
'parent element will be transformed into text as if it were ASCII. When '.
|
'parent element will be transformed into text as if it were ASCII. When '.
|
||||||
@@ -62,9 +55,7 @@ class HTMLPurifier_ChildDef
|
|||||||
* Custom validation class, accepts DTD child definitions
|
* Custom validation class, accepts DTD child definitions
|
||||||
*
|
*
|
||||||
* @warning Currently this class is an all or nothing proposition, that is,
|
* @warning Currently this class is an all or nothing proposition, that is,
|
||||||
* it will only give a bool return value. Table is the only
|
* it will only give a bool return value.
|
||||||
* child definition that uses this class, and we ought to give
|
|
||||||
* it a dedicated one.
|
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
|
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
|
||||||
{
|
{
|
||||||
@@ -307,4 +298,141 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Definition for tables
|
||||||
|
*/
|
||||||
|
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||||
|
{
|
||||||
|
var $allow_empty = false;
|
||||||
|
var $type = 'table';
|
||||||
|
function HTMLPurifier_ChildDef_Table() {}
|
||||||
|
function validateChildren($tokens_of_children, $config, $context) {
|
||||||
|
if (empty($tokens_of_children)) return false;
|
||||||
|
|
||||||
|
// this ensures that the loop gets run one last time before closing
|
||||||
|
// up. It's a little bit of a hack, but it works! Just make sure you
|
||||||
|
// get rid of the token later.
|
||||||
|
$tokens_of_children[] = false;
|
||||||
|
|
||||||
|
// only one of these elements is allowed in a table
|
||||||
|
$caption = false;
|
||||||
|
$thead = false;
|
||||||
|
$tfoot = false;
|
||||||
|
|
||||||
|
// as many of these as you want
|
||||||
|
$cols = array();
|
||||||
|
$content = array();
|
||||||
|
|
||||||
|
$nesting = 0; // current depth so we can determine nodes
|
||||||
|
$is_collecting = false; // are we globbing together tokens to package
|
||||||
|
// into one of the collectors?
|
||||||
|
$collection = array(); // collected nodes
|
||||||
|
$tag_index = 0; // the first node might be whitespace,
|
||||||
|
// so this tells us where the start tag is
|
||||||
|
|
||||||
|
foreach ($tokens_of_children as $token) {
|
||||||
|
$is_child = ($nesting == 0);
|
||||||
|
|
||||||
|
if ($token === false) {
|
||||||
|
// terminating sequence started
|
||||||
|
} elseif ($token->type == 'start') {
|
||||||
|
$nesting++;
|
||||||
|
} elseif ($token->type == 'end') {
|
||||||
|
$nesting--;
|
||||||
|
}
|
||||||
|
|
||||||
|
// handle node collection
|
||||||
|
if ($is_collecting) {
|
||||||
|
if ($is_child) {
|
||||||
|
// okay, let's stash the tokens away
|
||||||
|
// first token tells us the type of the collection
|
||||||
|
switch ($collection[$tag_index]->name) {
|
||||||
|
case 'tr':
|
||||||
|
case 'tbody':
|
||||||
|
$content[] = $collection;
|
||||||
|
break;
|
||||||
|
case 'caption':
|
||||||
|
if ($caption !== false) break;
|
||||||
|
$caption = $collection;
|
||||||
|
break;
|
||||||
|
case 'thead':
|
||||||
|
case 'tfoot':
|
||||||
|
// access the appropriate variable, $thead or $tfoot
|
||||||
|
$var = $collection[$tag_index]->name;
|
||||||
|
if ($$var === false) {
|
||||||
|
$$var = $collection;
|
||||||
|
} else {
|
||||||
|
// transmutate the first and less entries into
|
||||||
|
// tbody tags, and then put into content
|
||||||
|
$collection[$tag_index]->name = 'tbody';
|
||||||
|
$collection[count($collection)-1]->name = 'tbody';
|
||||||
|
$content[] = $collection;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'colgroup':
|
||||||
|
$cols[] = $collection;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
$collection = array();
|
||||||
|
$is_collecting = false;
|
||||||
|
$tag_index = 0;
|
||||||
|
} else {
|
||||||
|
// add the node to the collection
|
||||||
|
$collection[] = $token;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// terminate
|
||||||
|
if ($token === false) break;
|
||||||
|
|
||||||
|
if ($is_child) {
|
||||||
|
// determine what we're dealing with
|
||||||
|
if ($token->name == 'col') {
|
||||||
|
// the only empty tag in the possie, we can handle it
|
||||||
|
// immediately
|
||||||
|
$cols[] = array_merge($collection, array($token));
|
||||||
|
$collection = array();
|
||||||
|
$tag_index = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
switch($token->name) {
|
||||||
|
case 'caption':
|
||||||
|
case 'colgroup':
|
||||||
|
case 'thead':
|
||||||
|
case 'tfoot':
|
||||||
|
case 'tbody':
|
||||||
|
case 'tr':
|
||||||
|
$is_collecting = true;
|
||||||
|
$collection[] = $token;
|
||||||
|
continue;
|
||||||
|
default:
|
||||||
|
if ($token->type == 'text' && $token->is_whitespace) {
|
||||||
|
$collection[] = $token;
|
||||||
|
$tag_index++;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (empty($content)) return false;
|
||||||
|
|
||||||
|
$ret = array();
|
||||||
|
if ($caption !== false) $ret = array_merge($ret, $caption);
|
||||||
|
if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
|
||||||
|
if ($thead !== false) $ret = array_merge($ret, $thead);
|
||||||
|
if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
|
||||||
|
foreach ($content as $token_array) $ret = array_merge($ret, $token_array);
|
||||||
|
if (!empty($collection) && $is_collecting == false){
|
||||||
|
// grab the trailing space
|
||||||
|
$ret = array_merge($ret, $collection);
|
||||||
|
}
|
||||||
|
|
||||||
|
array_pop($tokens_of_children); // remove phantom token
|
||||||
|
|
||||||
|
return ($ret === $tokens_of_children) ? true : $ret;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
?>
|
?>
|
@@ -21,7 +21,7 @@ class HTMLPurifier_Config
|
|||||||
var $conf;
|
var $conf;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reference HTMLPurifier_ConfigDef for value checking
|
* Reference HTMLPurifier_ConfigSchema for value checking
|
||||||
*/
|
*/
|
||||||
var $def;
|
var $def;
|
||||||
|
|
||||||
@@ -36,7 +36,7 @@ class HTMLPurifier_Config
|
|||||||
var $css_definition;
|
var $css_definition;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param $definition HTMLPurifier_ConfigDef that defines what directives
|
* @param $definition HTMLPurifier_ConfigSchema that defines what directives
|
||||||
* are allowed.
|
* are allowed.
|
||||||
*/
|
*/
|
||||||
function HTMLPurifier_Config(&$definition) {
|
function HTMLPurifier_Config(&$definition) {
|
||||||
@@ -49,7 +49,7 @@ class HTMLPurifier_Config
|
|||||||
* @return Default HTMLPurifier_Config object.
|
* @return Default HTMLPurifier_Config object.
|
||||||
*/
|
*/
|
||||||
function createDefault() {
|
function createDefault() {
|
||||||
$definition =& HTMLPurifier_ConfigDef::instance();
|
$definition =& HTMLPurifier_ConfigSchema::instance();
|
||||||
$config = new HTMLPurifier_Config($definition);
|
$config = new HTMLPurifier_Config($definition);
|
||||||
return $config;
|
return $config;
|
||||||
}
|
}
|
||||||
@@ -80,6 +80,8 @@ class HTMLPurifier_Config
|
|||||||
E_USER_WARNING);
|
E_USER_WARNING);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
$value = $this->def->validate($value,
|
||||||
|
$this->def->info[$namespace][$key]->type);
|
||||||
if (is_string($value)) {
|
if (is_string($value)) {
|
||||||
// resolve value alias if defined
|
// resolve value alias if defined
|
||||||
if (isset($this->def->info[$namespace][$key]->aliases[$value])) {
|
if (isset($this->def->info[$namespace][$key]->aliases[$value])) {
|
||||||
@@ -93,8 +95,6 @@ class HTMLPurifier_Config
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
$value = $this->def->validate($value,
|
|
||||||
$this->def->info[$namespace][$key]->type);
|
|
||||||
if ($value === null) {
|
if ($value === null) {
|
||||||
trigger_error('Value is of invalid type', E_USER_WARNING);
|
trigger_error('Value is of invalid type', E_USER_WARNING);
|
||||||
return;
|
return;
|
||||||
|
@@ -2,9 +2,23 @@
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Configuration definition, defines directives and their defaults.
|
* Configuration definition, defines directives and their defaults.
|
||||||
* @todo Build documentation generation capabilities.
|
* @todo The ability to define things multiple times is confusing and should
|
||||||
|
* be factored out to its own function named registerDependency() or
|
||||||
|
* addNote(), where only the namespace.name and an extra descriptions
|
||||||
|
* documenting the nature of the dependency are needed. Since it's
|
||||||
|
* possible that the dependency is registered before the configuration
|
||||||
|
* is defined, deferring it to some sort of cache until it actually
|
||||||
|
* gets defined would be wise, keeping it opaque until it does get
|
||||||
|
* defined. We could add a finalize() method which would cause it to
|
||||||
|
* error out if we get a dangling dependency. It's difficult, however,
|
||||||
|
* to know whether or not it's a dependency, or a codependency, that is
|
||||||
|
* neither of them fully depends on it. Where does the configuration go
|
||||||
|
* then? This could be partially resolved by allowing blanket definitions
|
||||||
|
* and then splitting them up into finer-grained versions, however, there
|
||||||
|
* might be implementation difficulties in ini files regarding order of
|
||||||
|
* execution.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_ConfigDef {
|
class HTMLPurifier_ConfigSchema {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Defaults of the directives and namespaces.
|
* Defaults of the directives and namespaces.
|
||||||
@@ -26,15 +40,15 @@ class HTMLPurifier_ConfigDef {
|
|||||||
* Lookup table of allowed types.
|
* Lookup table of allowed types.
|
||||||
*/
|
*/
|
||||||
var $types = array(
|
var $types = array(
|
||||||
'string' => true,
|
'string' => 'String',
|
||||||
'istring' => true,
|
'istring' => 'Case-insensitive string',
|
||||||
'int' => true,
|
'int' => 'Integer',
|
||||||
'float' => true,
|
'float' => 'Float',
|
||||||
'bool' => true,
|
'bool' => 'Boolean',
|
||||||
'lookup' => true,
|
'lookup' => 'Lookup array',
|
||||||
'list' => true,
|
'list' => 'Array list',
|
||||||
'hash' => true,
|
'hash' => 'Associative array',
|
||||||
'mixed' => true
|
'mixed' => 'Mixed'
|
||||||
);
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -46,6 +60,7 @@ class HTMLPurifier_ConfigDef {
|
|||||||
$this->defineNamespace('URI', 'Features regarding Uniform Resource Identifiers.');
|
$this->defineNamespace('URI', 'Features regarding Uniform Resource Identifiers.');
|
||||||
$this->defineNamespace('HTML', 'Configuration regarding allowed HTML.');
|
$this->defineNamespace('HTML', 'Configuration regarding allowed HTML.');
|
||||||
$this->defineNamespace('CSS', 'Configuration regarding allowed CSS.');
|
$this->defineNamespace('CSS', 'Configuration regarding allowed CSS.');
|
||||||
|
$this->defineNamespace('Test', 'Developer testing configuration for our unit tests.');
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -56,7 +71,7 @@ class HTMLPurifier_ConfigDef {
|
|||||||
if ($prototype !== null) {
|
if ($prototype !== null) {
|
||||||
$instance = $prototype;
|
$instance = $prototype;
|
||||||
} elseif ($instance === null || $prototype === true) {
|
} elseif ($instance === null || $prototype === true) {
|
||||||
$instance = new HTMLPurifier_ConfigDef();
|
$instance = new HTMLPurifier_ConfigSchema();
|
||||||
$instance->initialize();
|
$instance->initialize();
|
||||||
}
|
}
|
||||||
return $instance;
|
return $instance;
|
||||||
@@ -65,9 +80,6 @@ class HTMLPurifier_ConfigDef {
|
|||||||
/**
|
/**
|
||||||
* Defines a directive for configuration
|
* Defines a directive for configuration
|
||||||
* @warning Will fail of directive's namespace is defined
|
* @warning Will fail of directive's namespace is defined
|
||||||
* @todo Collect information on description and allow redefinition
|
|
||||||
* so that multiple files can register a dependency on a
|
|
||||||
* configuration directive.
|
|
||||||
* @param $namespace Namespace the directive is in
|
* @param $namespace Namespace the directive is in
|
||||||
* @param $name Key of directive
|
* @param $name Key of directive
|
||||||
* @param $default Default value of directive
|
* @param $default Default value of directive
|
||||||
@@ -79,12 +91,17 @@ class HTMLPurifier_ConfigDef {
|
|||||||
$namespace, $name, $default, $type,
|
$namespace, $name, $default, $type,
|
||||||
$description
|
$description
|
||||||
) {
|
) {
|
||||||
$def =& HTMLPurifier_ConfigDef::instance();
|
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||||
if (!isset($def->info[$namespace])) {
|
if (!isset($def->info[$namespace])) {
|
||||||
trigger_error('Cannot define directive for undefined namespace',
|
trigger_error('Cannot define directive for undefined namespace',
|
||||||
E_USER_ERROR);
|
E_USER_ERROR);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (!ctype_alnum($name)) {
|
||||||
|
trigger_error('Directive name must be alphanumeric',
|
||||||
|
E_USER_ERROR);
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (isset($def->info[$namespace][$name])) {
|
if (isset($def->info[$namespace][$name])) {
|
||||||
if (
|
if (
|
||||||
$def->info[$namespace][$name]->type !== $type ||
|
$def->info[$namespace][$name]->type !== $type ||
|
||||||
@@ -121,17 +138,19 @@ class HTMLPurifier_ConfigDef {
|
|||||||
* @param $description Description of the namespace
|
* @param $description Description of the namespace
|
||||||
*/
|
*/
|
||||||
function defineNamespace($namespace, $description) {
|
function defineNamespace($namespace, $description) {
|
||||||
$def =& HTMLPurifier_ConfigDef::instance();
|
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||||
if (isset($def->info[$namespace])) {
|
if (isset($def->info[$namespace])) {
|
||||||
trigger_error('Cannot redefine namespace', E_USER_ERROR);
|
trigger_error('Cannot redefine namespace', E_USER_ERROR);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (!ctype_alnum($namespace)) {
|
||||||
|
trigger_error('Namespace name must be alphanumeric',
|
||||||
|
E_USER_ERROR);
|
||||||
|
return;
|
||||||
|
}
|
||||||
$def->info[$namespace] = array();
|
$def->info[$namespace] = array();
|
||||||
$def->info_namespace[$namespace] = new HTMLPurifier_ConfigEntity_Namespace();
|
$def->info_namespace[$namespace] = new HTMLPurifier_ConfigEntity_Namespace();
|
||||||
$backtrace = debug_backtrace();
|
$def->info_namespace[$namespace]->description = $description;
|
||||||
$file = $def->mungeFilename($backtrace[0]['file']);
|
|
||||||
$line = $backtrace[0]['line'];
|
|
||||||
$def->info_namespace[$namespace]->addDescription($file,$line,$description);
|
|
||||||
$def->defaults[$namespace] = array();
|
$def->defaults[$namespace] = array();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -146,7 +165,7 @@ class HTMLPurifier_ConfigDef {
|
|||||||
* @param $real Value aliased value will be converted into
|
* @param $real Value aliased value will be converted into
|
||||||
*/
|
*/
|
||||||
function defineValueAliases($namespace, $name, $aliases) {
|
function defineValueAliases($namespace, $name, $aliases) {
|
||||||
$def =& HTMLPurifier_ConfigDef::instance();
|
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||||
if (!isset($def->info[$namespace][$name])) {
|
if (!isset($def->info[$namespace][$name])) {
|
||||||
trigger_error('Cannot set value alias for non-existant directive',
|
trigger_error('Cannot set value alias for non-existant directive',
|
||||||
E_USER_ERROR);
|
E_USER_ERROR);
|
||||||
@@ -176,7 +195,7 @@ class HTMLPurifier_ConfigDef {
|
|||||||
* @param $allowed_values Arraylist of allowed values
|
* @param $allowed_values Arraylist of allowed values
|
||||||
*/
|
*/
|
||||||
function defineAllowedValues($namespace, $name, $allowed_values) {
|
function defineAllowedValues($namespace, $name, $allowed_values) {
|
||||||
$def =& HTMLPurifier_ConfigDef::instance();
|
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||||
if (!isset($def->info[$namespace][$name])) {
|
if (!isset($def->info[$namespace][$name])) {
|
||||||
trigger_error('Cannot define allowed values for undefined directive',
|
trigger_error('Cannot define allowed values for undefined directive',
|
||||||
E_USER_ERROR);
|
E_USER_ERROR);
|
||||||
@@ -255,27 +274,19 @@ class HTMLPurifier_ConfigDef {
|
|||||||
/**
|
/**
|
||||||
* Base class for configuration entity
|
* Base class for configuration entity
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_ConfigEntity
|
class HTMLPurifier_ConfigEntity {}
|
||||||
{
|
|
||||||
/**
|
|
||||||
* Plaintext descriptions of the configuration entity is. Organized by
|
|
||||||
* file and line number, so multiple descriptions are allowed.
|
|
||||||
*/
|
|
||||||
var $descriptions = array();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds a description to the array
|
|
||||||
*/
|
|
||||||
function addDescription($file, $line, $description) {
|
|
||||||
if (!isset($this->descriptions[$file])) $this->descriptions[$file] = array();
|
|
||||||
$this->descriptions[$file][$line] = $description;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Structure object describing of a namespace
|
* Structure object describing of a namespace
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier_ConfigEntity_Namespace extends HTMLPurifier_ConfigEntity {}
|
class HTMLPurifier_ConfigEntity_Namespace extends HTMLPurifier_ConfigEntity {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* String description of what kinds of directives go in this namespace.
|
||||||
|
*/
|
||||||
|
var $description;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Structure object containing definition of a directive.
|
* Structure object containing definition of a directive.
|
||||||
@@ -307,6 +318,19 @@ class HTMLPurifier_ConfigEntity_Directive extends HTMLPurifier_ConfigEntity
|
|||||||
* - mixed (anything goes)
|
* - mixed (anything goes)
|
||||||
*/
|
*/
|
||||||
var $type = 'mixed';
|
var $type = 'mixed';
|
||||||
|
/**
|
||||||
|
* Plaintext descriptions of the configuration entity is. Organized by
|
||||||
|
* file and line number, so multiple descriptions are allowed.
|
||||||
|
*/
|
||||||
|
var $descriptions = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a description to the array
|
||||||
|
*/
|
||||||
|
function addDescription($file, $line, $description) {
|
||||||
|
if (!isset($this->descriptions[$file])) $this->descriptions[$file] = array();
|
||||||
|
$this->descriptions[$file][$line] = $description;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
require_once 'HTMLPurifier/EntityLookup.php';
|
require_once 'HTMLPurifier/EntityLookup.php';
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'Encoding', 'utf-8', 'istring',
|
'Core', 'Encoding', 'utf-8', 'istring',
|
||||||
'If for some reason you are unable to convert all webpages to UTF-8, '.
|
'If for some reason you are unable to convert all webpages to UTF-8, '.
|
||||||
'you can use this directive as a stop-gap compatibility change to '.
|
'you can use this directive as a stop-gap compatibility change to '.
|
||||||
@@ -17,14 +17,25 @@ HTMLPurifier_ConfigDef::define(
|
|||||||
|
|
||||||
if ( !function_exists('iconv') ) {
|
if ( !function_exists('iconv') ) {
|
||||||
// only encodings with native PHP support
|
// only encodings with native PHP support
|
||||||
HTMLPurifier_ConfigDef::defineAllowedValues(
|
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||||
'Core', 'Encoding', array(
|
'Core', 'Encoding', array(
|
||||||
'utf-8',
|
'utf-8',
|
||||||
'iso-8859-1'
|
'iso-8859-1'
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
HTMLPurifier_ConfigSchema::defineValueAliases(
|
||||||
|
'Core', 'Encoding', array(
|
||||||
|
'iso8859-1' => 'iso-8859-1'
|
||||||
|
)
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'Test', 'ForceNoIconv', false, 'bool',
|
||||||
|
'When set to true, HTMLPurifier_Encoder will act as if iconv does not '.
|
||||||
|
'exist and use only pure PHP implementations.'
|
||||||
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A UTF-8 specific character encoder that handles cleaning and transforming.
|
* A UTF-8 specific character encoder that handles cleaning and transforming.
|
||||||
*/
|
*/
|
||||||
@@ -77,7 +88,7 @@ class HTMLPurifier_Encoder
|
|||||||
if ($iconv && !$force_php) {
|
if ($iconv && !$force_php) {
|
||||||
// do the shortcut way
|
// do the shortcut way
|
||||||
$str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
|
$str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
|
||||||
return strtr($str, $non_sgml_chars);;
|
return strtr($str, $non_sgml_chars);
|
||||||
}
|
}
|
||||||
|
|
||||||
$mState = 0; // cached expected number of octets after the current octet
|
$mState = 0; // cached expected number of octets after the current octet
|
||||||
@@ -260,9 +271,9 @@ class HTMLPurifier_Encoder
|
|||||||
if ($iconv === null) $iconv = function_exists('iconv');
|
if ($iconv === null) $iconv = function_exists('iconv');
|
||||||
$encoding = $config->get('Core', 'Encoding');
|
$encoding = $config->get('Core', 'Encoding');
|
||||||
if ($encoding === 'utf-8') return $str;
|
if ($encoding === 'utf-8') return $str;
|
||||||
if ($iconv) {
|
if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
|
||||||
return @iconv($encoding, 'utf-8//IGNORE', $str);
|
return @iconv($encoding, 'utf-8//IGNORE', $str);
|
||||||
} elseif ($encoding === 'iso-8895-1') {
|
} elseif ($encoding === 'iso-8859-1') {
|
||||||
return @utf8_encode($str);
|
return @utf8_encode($str);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -277,10 +288,10 @@ class HTMLPurifier_Encoder
|
|||||||
if ($iconv === null) $iconv = function_exists('iconv');
|
if ($iconv === null) $iconv = function_exists('iconv');
|
||||||
$encoding = $config->get('Core', 'Encoding');
|
$encoding = $config->get('Core', 'Encoding');
|
||||||
if ($encoding === 'utf-8') return $str;
|
if ($encoding === 'utf-8') return $str;
|
||||||
if ($iconv) {
|
if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
|
||||||
return @iconv('utf-8', $encoding . '//IGNORE', $str);
|
return @iconv('utf-8', $encoding . '//IGNORE', $str);
|
||||||
} elseif ($encoding === 'iso-8895-1') {
|
} elseif ($encoding === 'iso-8859-1') {
|
||||||
return @utf8_encode($str);
|
return @utf8_decode($str);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -88,7 +88,6 @@ class HTMLPurifier_EntityParser
|
|||||||
* either index 1, 2 or 3 set with a hex value, dec value,
|
* either index 1, 2 or 3 set with a hex value, dec value,
|
||||||
* or string (respectively).
|
* or string (respectively).
|
||||||
* @returns Replacement string.
|
* @returns Replacement string.
|
||||||
* @todo Implement string translations
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// +----------+----------+----------+----------+
|
// +----------+----------+----------+----------+
|
||||||
|
@@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
require_once 'HTMLPurifier/Lexer.php';
|
require_once 'HTMLPurifier/Lexer.php';
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'CleanUTF8DuringGeneration', false, 'bool',
|
'Core', 'CleanUTF8DuringGeneration', false, 'bool',
|
||||||
'When true, HTMLPurifier_Generator will also check all strings it '.
|
'When true, HTMLPurifier_Generator will also check all strings it '.
|
||||||
'escapes for UTF-8 well-formedness as a defense in depth measure. '.
|
'escapes for UTF-8 well-formedness as a defense in depth measure. '.
|
||||||
@@ -15,6 +15,29 @@ HTMLPurifier_ConfigDef::define(
|
|||||||
'generateFromTokens.'
|
'generateFromTokens.'
|
||||||
);
|
);
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'Core', 'XHTML', true, 'bool',
|
||||||
|
'Determines whether or not output is XHTML or not. When disabled, HTML '.
|
||||||
|
'Purifier goes into HTML 4.01 removes XHTML-specific markup constructs, '.
|
||||||
|
'such as boolean attribute expansion and trailing slashes in empty tags. '.
|
||||||
|
'This directive was available since 1.1.'
|
||||||
|
);
|
||||||
|
|
||||||
|
// extension constraints could be factored into ConfigSchema
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'Core', 'TidyFormat', false, 'bool',
|
||||||
|
'<p>Determines whether or not to run Tidy on the final output for pretty '.
|
||||||
|
'formatting reasons, such as indentation and wrap.</p><p>This can greatly '.
|
||||||
|
'improve readability for editors who are hand-editing the HTML, but is '.
|
||||||
|
'by no means necessary as HTML Purifier has already fixed all major '.
|
||||||
|
'errors the HTML may have had. Tidy is a non-default extension, and this directive '.
|
||||||
|
'will silently fail if Tidy is not available.</p><p>If you are looking to make '.
|
||||||
|
'the overall look of your page\'s source better, I recommend running Tidy '.
|
||||||
|
'on the entire page rather than just user-content (after all, the '.
|
||||||
|
'indentation relative to the containing blocks will be incorrect).</p><p>This '.
|
||||||
|
'directive was available since 1.1.1.</p>'
|
||||||
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates HTML from tokens.
|
* Generates HTML from tokens.
|
||||||
*/
|
*/
|
||||||
@@ -22,11 +45,16 @@ class HTMLPurifier_Generator
|
|||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Bool cache of the CleanUTF8DuringGeneration directive.
|
* Bool cache of %Core.CleanUTF8DuringGeneration
|
||||||
* @private
|
* @private
|
||||||
*/
|
*/
|
||||||
var $_clean_utf8 = false;
|
var $_clean_utf8 = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bool cache of %Core.XHTML
|
||||||
|
*/
|
||||||
|
var $_xhtml = true;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates HTML from an array of tokens.
|
* Generates HTML from an array of tokens.
|
||||||
* @param $tokens Array of HTMLPurifier_Token
|
* @param $tokens Array of HTMLPurifier_Token
|
||||||
@@ -38,10 +66,35 @@ class HTMLPurifier_Generator
|
|||||||
$html = '';
|
$html = '';
|
||||||
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
if (!$config) $config = HTMLPurifier_Config::createDefault();
|
||||||
$this->_clean_utf8 = $config->get('Core', 'CleanUTF8DuringGeneration');
|
$this->_clean_utf8 = $config->get('Core', 'CleanUTF8DuringGeneration');
|
||||||
|
$this->_xhtml = $config->get('Core', 'XHTML');
|
||||||
if (!$tokens) return '';
|
if (!$tokens) return '';
|
||||||
foreach ($tokens as $token) {
|
foreach ($tokens as $token) {
|
||||||
$html .= $this->generateFromToken($token);
|
$html .= $this->generateFromToken($token);
|
||||||
}
|
}
|
||||||
|
if ($config->get('Core', 'TidyFormat') && extension_loaded('tidy')) {
|
||||||
|
|
||||||
|
$tidy_options = array(
|
||||||
|
'indent'=> true,
|
||||||
|
'output-xhtml' => $this->_xhtml,
|
||||||
|
'show-body-only' => true,
|
||||||
|
'indent-spaces' => 2,
|
||||||
|
'wrap' => 68,
|
||||||
|
);
|
||||||
|
if (version_compare(PHP_VERSION, '5', '<')) {
|
||||||
|
tidy_set_encoding('utf8');
|
||||||
|
foreach ($tidy_options as $key => $value) {
|
||||||
|
tidy_setopt($key, $value);
|
||||||
|
}
|
||||||
|
tidy_parse_string($html);
|
||||||
|
tidy_clean_repair();
|
||||||
|
$html = tidy_get_output();
|
||||||
|
} else {
|
||||||
|
$tidy = new Tidy;
|
||||||
|
$tidy->parseString($html, $tidy_options, 'utf8');
|
||||||
|
$tidy->cleanRepair();
|
||||||
|
$html = (string) $tidy;
|
||||||
|
}
|
||||||
|
}
|
||||||
return $html;
|
return $html;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -61,7 +114,9 @@ class HTMLPurifier_Generator
|
|||||||
|
|
||||||
} elseif ($token->type == 'empty') {
|
} elseif ($token->type == 'empty') {
|
||||||
$attr = $this->generateAttributes($token->attributes);
|
$attr = $this->generateAttributes($token->attributes);
|
||||||
return '<' . $token->name . ($attr ? ' ' : '') . $attr . ' />';
|
return '<' . $token->name . ($attr ? ' ' : '') . $attr .
|
||||||
|
( $this->_xhtml ? ' /': '' )
|
||||||
|
. '>';
|
||||||
|
|
||||||
} elseif ($token->type == 'text') {
|
} elseif ($token->type == 'text') {
|
||||||
return $this->escape($token->data);
|
return $this->escape($token->data);
|
||||||
@@ -80,6 +135,11 @@ class HTMLPurifier_Generator
|
|||||||
function generateAttributes($assoc_array_of_attributes) {
|
function generateAttributes($assoc_array_of_attributes) {
|
||||||
$html = '';
|
$html = '';
|
||||||
foreach ($assoc_array_of_attributes as $key => $value) {
|
foreach ($assoc_array_of_attributes as $key => $value) {
|
||||||
|
if (!$this->_xhtml) {
|
||||||
|
// remove namespaced attributes
|
||||||
|
if (strpos($key, ':') !== false) continue;
|
||||||
|
// also needed: check for attribute minimization
|
||||||
|
}
|
||||||
$html .= $key.'="'.$this->escape($value).'" ';
|
$html .= $key.'="'.$this->escape($value).'" ';
|
||||||
}
|
}
|
||||||
return rtrim($html);
|
return rtrim($html);
|
||||||
|
@@ -209,8 +209,7 @@ class HTMLPurifier_HTMLDefinition
|
|||||||
|
|
||||||
$this->info['a']->child = $e_a_content;
|
$this->info['a']->child = $e_a_content;
|
||||||
|
|
||||||
$this->info['table']->child = new HTMLPurifier_ChildDef_Custom(
|
$this->info['table']->child = new HTMLPurifier_ChildDef_Table();
|
||||||
'(caption?, (col*|colgroup*), thead?, tfoot?, (tbody+|tr+))');
|
|
||||||
|
|
||||||
// not a real entity, watch the double underscore
|
// not a real entity, watch the double underscore
|
||||||
$e__row = new HTMLPurifier_ChildDef_Required('tr');
|
$e__row = new HTMLPurifier_ChildDef_Required('tr');
|
||||||
|
@@ -4,12 +4,11 @@ require_once 'HTMLPurifier/Token.php';
|
|||||||
require_once 'HTMLPurifier/Encoder.php';
|
require_once 'HTMLPurifier/Encoder.php';
|
||||||
require_once 'HTMLPurifier/EntityParser.php';
|
require_once 'HTMLPurifier/EntityParser.php';
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'AcceptFullDocuments', true, 'bool',
|
'Core', 'AcceptFullDocuments', true, 'bool',
|
||||||
'This parameter determines whether or not the filter should accept full '.
|
'This parameter determines whether or not the filter should accept full '.
|
||||||
'HTML documents, not just HTML fragments. When on, it will '.
|
'HTML documents, not just HTML fragments. When on, it will '.
|
||||||
'drop all sections except the content between body. Depending on '.
|
'drop all sections except the content between body.'
|
||||||
'the implementation in use, this may speed up document parse times.'
|
|
||||||
);
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@@ -12,15 +12,19 @@ require_once 'HTMLPurifier/TokenFactory.php';
|
|||||||
* documents, it performs twenty times faster than
|
* documents, it performs twenty times faster than
|
||||||
* HTMLPurifier_Lexer_DirectLex,and is the default choice for PHP 5.
|
* HTMLPurifier_Lexer_DirectLex,and is the default choice for PHP 5.
|
||||||
*
|
*
|
||||||
* @notice
|
* @note Any empty elements will have empty tokens associated with them, even if
|
||||||
* Any empty elements will have empty tokens associated with them, even if
|
|
||||||
* this is prohibited by the spec. This is cannot be fixed until the spec
|
* this is prohibited by the spec. This is cannot be fixed until the spec
|
||||||
* comes into play.
|
* comes into play.
|
||||||
*
|
*
|
||||||
* @todo Determine DOM's entity parsing behavior, point to local entity files
|
* @note PHP's DOM extension does not actually parse any entities, we use
|
||||||
* if necessary.
|
* our own function to do that.
|
||||||
* @todo Make div access less fragile, and refrain from preprocessing when
|
*
|
||||||
* HTML tag and friends are already present.
|
* @warning DOM tends to drop whitespace, which may wreak havoc on indenting.
|
||||||
|
* If this is a huge problem, due to the fact that HTML is hand
|
||||||
|
* edited and youa re unable to get a parser cache that caches the
|
||||||
|
* the output of HTML Purifier while keeping the original HTML lying
|
||||||
|
* around, you may want to run Tidy on the resulting output or use
|
||||||
|
* HTMLPurifier_DirectLex
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
||||||
@@ -87,19 +91,20 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$attr = $node->hasAttributes() ?
|
||||||
|
$this->transformAttrToAssoc($node->attributes) :
|
||||||
|
array();
|
||||||
|
|
||||||
// We still have to make sure that the element actually IS empty
|
// We still have to make sure that the element actually IS empty
|
||||||
if (!$node->childNodes->length) {
|
if (!$node->childNodes->length) {
|
||||||
if ($collect) {
|
if ($collect) {
|
||||||
$tokens[] = $this->factory->createEmpty(
|
$tokens[] = $this->factory->createEmpty($node->tagName, $attr);
|
||||||
$node->tagName,
|
|
||||||
$this->transformAttrToAssoc($node->attributes)
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if ($collect) { // don't wrap on first iteration
|
if ($collect) { // don't wrap on first iteration
|
||||||
$tokens[] = $this->factory->createStart(
|
$tokens[] = $this->factory->createStart(
|
||||||
$tag_name = $node->tagName, // somehow, it get's dropped
|
$tag_name = $node->tagName, // somehow, it get's dropped
|
||||||
$this->transformAttrToAssoc($node->attributes)
|
$attr
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
foreach ($node->childNodes as $node) {
|
foreach ($node->childNodes as $node) {
|
||||||
|
@@ -8,7 +8,7 @@
|
|||||||
* features, such as custom tags, custom parsing of text, etc.
|
* features, such as custom tags, custom parsing of text, etc.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'EscapeInvalidTags', false, 'bool',
|
'Core', 'EscapeInvalidTags', false, 'bool',
|
||||||
'When true, invalid tags will be written back to the document as plain '.
|
'When true, invalid tags will be written back to the document as plain '.
|
||||||
'text. Otherwise, they are silently dropped.'
|
'text. Otherwise, they are silently dropped.'
|
||||||
|
@@ -187,6 +187,7 @@ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy
|
|||||||
if (!$parent_def->child->allow_empty) {
|
if (!$parent_def->child->allow_empty) {
|
||||||
// we need to do a double-check
|
// we need to do a double-check
|
||||||
$i = $parent_index;
|
$i = $parent_index;
|
||||||
|
array_pop($stack);
|
||||||
}
|
}
|
||||||
|
|
||||||
// PROJECTED OPTIMIZATION: Process all children elements before
|
// PROJECTED OPTIMIZATION: Process all children elements before
|
||||||
|
@@ -3,10 +3,10 @@
|
|||||||
require_once 'HTMLPurifier/Strategy.php';
|
require_once 'HTMLPurifier/Strategy.php';
|
||||||
require_once 'HTMLPurifier/HTMLDefinition.php';
|
require_once 'HTMLPurifier/HTMLDefinition.php';
|
||||||
require_once 'HTMLPurifier/IDAccumulator.php';
|
require_once 'HTMLPurifier/IDAccumulator.php';
|
||||||
require_once 'HTMLPurifier/ConfigDef.php';
|
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||||
require_once 'HTMLPurifier/AttrContext.php';
|
require_once 'HTMLPurifier/AttrContext.php';
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Attr', 'IDBlacklist', array(), 'list',
|
'Attr', 'IDBlacklist', array(), 'list',
|
||||||
'Array of IDs not allowed in the document.');
|
'Array of IDs not allowed in the document.');
|
||||||
|
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'URI', 'AllowedSchemes', array(
|
'URI', 'AllowedSchemes', array(
|
||||||
'http' => true, // "Hypertext Transfer Protocol", nuf' said
|
'http' => true, // "Hypertext Transfer Protocol", nuf' said
|
||||||
'https' => true, // HTTP over SSL (Secure Socket Layer)
|
'https' => true, // HTTP over SSL (Secure Socket Layer)
|
||||||
@@ -16,7 +16,7 @@ HTMLPurifier_ConfigDef::define(
|
|||||||
'prevents XSS attacks from using pseudo-schemes like javascript or mocha.'
|
'prevents XSS attacks from using pseudo-schemes like javascript or mocha.'
|
||||||
);
|
);
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'URI', 'OverrideAllowedSchemes', true, 'bool',
|
'URI', 'OverrideAllowedSchemes', true, 'bool',
|
||||||
'If this is set to true (which it is by default), you can override '.
|
'If this is set to true (which it is by default), you can override '.
|
||||||
'%URI.AllowedSchemes by simply registering a HTMLPurifier_URIScheme '.
|
'%URI.AllowedSchemes by simply registering a HTMLPurifier_URIScheme '.
|
||||||
|
17
test-settings.sample.php
Normal file
17
test-settings.sample.php
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
// This file is necessary to run the unit tests and profiling
|
||||||
|
// scripts.
|
||||||
|
|
||||||
|
// Is PEAR available on your system? If it isn't, set to false. If PEAR
|
||||||
|
// is not part of the default include_path, add it.
|
||||||
|
$GLOBALS['HTMLPurifierTest']['PEAR'] = true;
|
||||||
|
|
||||||
|
// How many times should profiling scripts iterate over the function? More runs
|
||||||
|
// means more accurate results, but they'll take longer to perform.
|
||||||
|
$GLOBALS['HTMLPurifierTest']['Runs'] = 2;
|
||||||
|
|
||||||
|
// Where is SimpleTest located?
|
||||||
|
$simpletest_location = '/path/to/simpletest/';
|
||||||
|
|
||||||
|
?>
|
@@ -92,6 +92,10 @@ class HTMLPurifier_AttrDef_CSSTest extends HTMLPurifier_AttrDefHarness
|
|||||||
$this->assertDef('position:absolute;', false);
|
$this->assertDef('position:absolute;', false);
|
||||||
$this->assertDef('background-image:url(javascript:alert\(\));', false);
|
$this->assertDef('background-image:url(javascript:alert\(\));', false);
|
||||||
|
|
||||||
|
// airy input
|
||||||
|
$this->assertDef(' font-weight : bold; color : #ff0000',
|
||||||
|
'font-weight:bold;color:#ff0000;');
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@@ -158,9 +158,15 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
|
|||||||
$uri[18] = '/a/b';
|
$uri[18] = '/a/b';
|
||||||
$components[18] = array(null, null, null, '/a/b', null);
|
$components[18] = array(null, null, null, '/a/b', null);
|
||||||
|
|
||||||
// it's not allowed, so generic URI should get it
|
// result of malformed tag, gracefully handle error
|
||||||
$uri[19] = '<';
|
$uri[19] = 'http://www.google.com/\'>"';
|
||||||
$expect_uri[19] = false;
|
$components[19] = array(null, 'www.google.com', null, '/', null);
|
||||||
|
$expect_uri[19] = 'http://www.google.com/';
|
||||||
|
|
||||||
|
// test empty
|
||||||
|
$uri[20] = '';
|
||||||
|
$components[20] = array(null, null, null, '', null);
|
||||||
|
$expect_uri[20] = '';
|
||||||
|
|
||||||
foreach ($uri as $i => $value) {
|
foreach ($uri as $i => $value) {
|
||||||
|
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/ChildDef.php';
|
require_once 'HTMLPurifier/ChildDef.php';
|
||||||
require_once 'HTMLPurifier/Lexer.php';
|
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||||
require_once 'HTMLPurifier/Generator.php';
|
require_once 'HTMLPurifier/Generator.php';
|
||||||
|
|
||||||
class HTMLPurifier_ChildDefTest extends UnitTestCase
|
class HTMLPurifier_ChildDefTest extends UnitTestCase
|
||||||
@@ -12,7 +12,8 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
|
|||||||
var $gen;
|
var $gen;
|
||||||
|
|
||||||
function HTMLPurifier_ChildDefTest() {
|
function HTMLPurifier_ChildDefTest() {
|
||||||
$this->lex = HTMLPurifier_Lexer::create();
|
// it is vital that the tags be treated as literally as possible
|
||||||
|
$this->lex = new HTMLPurifier_Lexer_DirectLex();
|
||||||
$this->gen = new HTMLPurifier_Generator();
|
$this->gen = new HTMLPurifier_Generator();
|
||||||
parent::UnitTestCase();
|
parent::UnitTestCase();
|
||||||
}
|
}
|
||||||
@@ -42,28 +43,70 @@ class HTMLPurifier_ChildDefTest extends UnitTestCase
|
|||||||
|
|
||||||
function test_custom() {
|
function test_custom() {
|
||||||
|
|
||||||
// the table definition
|
|
||||||
$this->def = new HTMLPurifier_ChildDef_Custom(
|
$this->def = new HTMLPurifier_ChildDef_Custom(
|
||||||
'(caption?, (col*|colgroup*), thead?, tfoot?, (tbody+|tr+))');
|
'(a, b?, c*, d+, (a, b)*)');
|
||||||
|
|
||||||
|
$inputs[0] = '';
|
||||||
|
$expect[0] = false;
|
||||||
|
|
||||||
|
$inputs[1] = '<a /><b /><c /><d /><a /><b />';
|
||||||
|
$expect[1] = true;
|
||||||
|
|
||||||
|
$inputs[2] = '<a /><d>Dob</d><a /><b>foo</b><a href="moo"><b>foo</b>';
|
||||||
|
$expect[2] = true;
|
||||||
|
|
||||||
|
$inputs[3] = '<a /><a />';
|
||||||
|
$expect[3] = false;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_table() {
|
||||||
|
|
||||||
|
// currently inactive, awaiting augmentation
|
||||||
|
|
||||||
|
// the table definition
|
||||||
|
$this->def = new HTMLPurifier_ChildDef_Table();
|
||||||
|
|
||||||
$inputs = $expect = $config = array();
|
$inputs = $expect = $config = array();
|
||||||
|
|
||||||
$inputs[0] = '';
|
$inputs[0] = '';
|
||||||
$expect[0] = false;
|
$expect[0] = false;
|
||||||
|
|
||||||
// we really don't care what's inside, because if it turns out
|
// we're using empty tags to compact the tests: under real circumstances
|
||||||
// this tr is illegal, we'll end up re-evaluating the parent node
|
// there would be contents in them
|
||||||
// anyway.
|
|
||||||
$inputs[1] = '<tr></tr>';
|
$inputs[1] = '<tr />';
|
||||||
$expect[1] = true;
|
$expect[1] = true;
|
||||||
|
|
||||||
$inputs[2] = '<caption></caption><col></col><thead></thead>' .
|
$inputs[2] = '<caption /><col /><thead /><tfoot /><tbody>'.
|
||||||
'<tfoot></tfoot><tbody></tbody>';
|
'<tr><td>asdf</td></tr></tbody>';
|
||||||
$expect[2] = true;
|
$expect[2] = true;
|
||||||
|
|
||||||
$inputs[3] = '<col></col><col></col><col></col><tr></tr>';
|
$inputs[3] = '<col /><col /><col /><tr />';
|
||||||
$expect[3] = true;
|
$expect[3] = true;
|
||||||
|
|
||||||
|
// mixed up order
|
||||||
|
$inputs[4] = '<col /><colgroup /><tbody /><tfoot /><thead /><tr>1</tr><caption /><tr />';
|
||||||
|
$expect[4] = '<caption /><col /><colgroup /><thead /><tfoot /><tbody /><tr>1</tr><tr />';
|
||||||
|
|
||||||
|
// duplicates of singles
|
||||||
|
// - first caption serves
|
||||||
|
// - trailing tfoots/theads get turned into tbodys
|
||||||
|
$inputs[5] = '<caption>1</caption><caption /><tbody /><tbody /><tfoot>1</tfoot><tfoot />';
|
||||||
|
$expect[5] = '<caption>1</caption><tfoot>1</tfoot><tbody /><tbody /><tbody />';
|
||||||
|
|
||||||
|
// errant text dropped (until bubbling is implemented)
|
||||||
|
$inputs[6] = 'foo';
|
||||||
|
$expect[6] = false;
|
||||||
|
|
||||||
|
// whitespace sticks to the previous element, last whitespace is
|
||||||
|
// stationary
|
||||||
|
$inputs[7] = "\n <tr />\n <tr />\n ";
|
||||||
|
$expect[7] = true;
|
||||||
|
|
||||||
|
$inputs[8] = "\n\t<tbody />\n\t\t<tfoot />\n\t\t\t";
|
||||||
|
$expect[8] = "\n\t\t<tfoot />\n\t<tbody />\n\t\t\t";
|
||||||
|
|
||||||
$this->assertSeries($inputs, $expect, $config);
|
$this->assertSeries($inputs, $expect, $config);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@@ -1,8 +1,8 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/ConfigDef.php';
|
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||||
|
|
||||||
class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
class HTMLPurifier_ConfigSchemaTest extends UnitTestCase
|
||||||
{
|
{
|
||||||
|
|
||||||
var $old_copy;
|
var $old_copy;
|
||||||
@@ -13,16 +13,16 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
// you pay for using Singletons. Good thing we can overload it.
|
// you pay for using Singletons. Good thing we can overload it.
|
||||||
|
|
||||||
// first, let's get a clean copy to do tests
|
// first, let's get a clean copy to do tests
|
||||||
$our_copy = new HTMLPurifier_ConfigDef();
|
$our_copy = new HTMLPurifier_ConfigSchema();
|
||||||
// get the old copy
|
// get the old copy
|
||||||
$this->old_copy = HTMLPurifier_ConfigDef::instance();
|
$this->old_copy = HTMLPurifier_ConfigSchema::instance();
|
||||||
// put in our copy, and reassign to the REAL reference
|
// put in our copy, and reassign to the REAL reference
|
||||||
$this->our_copy =& HTMLPurifier_ConfigDef::instance($our_copy);
|
$this->our_copy =& HTMLPurifier_ConfigSchema::instance($our_copy);
|
||||||
}
|
}
|
||||||
|
|
||||||
function tearDown() {
|
function tearDown() {
|
||||||
// testing is done, restore the old copy
|
// testing is done, restore the old copy
|
||||||
HTMLPurifier_ConfigDef::instance($this->old_copy);
|
HTMLPurifier_ConfigSchema::instance($this->old_copy);
|
||||||
}
|
}
|
||||||
|
|
||||||
function testNormal() {
|
function testNormal() {
|
||||||
@@ -31,9 +31,9 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
// define a namespace
|
// define a namespace
|
||||||
$description = 'Configuration that is always available.';
|
$description = 'Configuration that is always available.';
|
||||||
HTMLPurifier_ConfigDef::defineNamespace(
|
HTMLPurifier_ConfigSchema::defineNamespace(
|
||||||
'Core', $description
|
'Core', $description
|
||||||
); $line = __LINE__;
|
);
|
||||||
$this->assertIdentical($this->our_copy->defaults, array(
|
$this->assertIdentical($this->our_copy->defaults, array(
|
||||||
'Core' => array()
|
'Core' => array()
|
||||||
));
|
));
|
||||||
@@ -41,7 +41,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
'Core' => array()
|
'Core' => array()
|
||||||
));
|
));
|
||||||
$namespace = new HTMLPurifier_ConfigEntity_Namespace();
|
$namespace = new HTMLPurifier_ConfigEntity_Namespace();
|
||||||
$namespace->addDescription($file, $line, $description);
|
$namespace->description = $description;
|
||||||
$this->assertIdentical($this->our_copy->info_namespace, array(
|
$this->assertIdentical($this->our_copy->info_namespace, array(
|
||||||
'Core' => $namespace
|
'Core' => $namespace
|
||||||
));
|
));
|
||||||
@@ -50,7 +50,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
// define a directive
|
// define a directive
|
||||||
$description = 'This is a description of the directive.';
|
$description = 'This is a description of the directive.';
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'Name', 'default value', 'string',
|
'Core', 'Name', 'default value', 'string',
|
||||||
$description
|
$description
|
||||||
); $line = __LINE__;
|
); $line = __LINE__;
|
||||||
@@ -71,7 +71,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
|
|
||||||
// define a directive in an undefined namespace
|
// define a directive in an undefined namespace
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Extension', 'Name', false, 'bool',
|
'Extension', 'Name', false, 'bool',
|
||||||
'This is for an extension, but we have not defined its namespace!'
|
'This is for an extension, but we have not defined its namespace!'
|
||||||
);
|
);
|
||||||
@@ -83,7 +83,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
// redefine a value in a valid manner
|
// redefine a value in a valid manner
|
||||||
$description = 'Alternative configuration definition';
|
$description = 'Alternative configuration definition';
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'Name', 'default value', 'string',
|
'Core', 'Name', 'default value', 'string',
|
||||||
$description
|
$description
|
||||||
); $line = __LINE__;
|
); $line = __LINE__;
|
||||||
@@ -98,7 +98,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
|
|
||||||
// redefine a directive in an invalid manner
|
// redefine a directive in an invalid manner
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'Name', 'different default', 'string',
|
'Core', 'Name', 'different default', 'string',
|
||||||
'Inconsistent default or type, cannot redefine'
|
'Inconsistent default or type, cannot redefine'
|
||||||
);
|
);
|
||||||
@@ -109,7 +109,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
|
|
||||||
// make an enumeration
|
// make an enumeration
|
||||||
HTMLPurifier_ConfigDef::defineAllowedValues(
|
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||||
'Core', 'Name', array(
|
'Core', 'Name', array(
|
||||||
'Real Value',
|
'Real Value',
|
||||||
'Real Value 2'
|
'Real Value 2'
|
||||||
@@ -128,7 +128,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
|
|
||||||
// redefinition of enumeration is cumulative
|
// redefinition of enumeration is cumulative
|
||||||
HTMLPurifier_ConfigDef::defineAllowedValues(
|
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||||
'Core', 'Name', array(
|
'Core', 'Name', array(
|
||||||
'Real Value 3',
|
'Real Value 3',
|
||||||
)
|
)
|
||||||
@@ -143,7 +143,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
|
|
||||||
// cannot define enumeration for undefined directive
|
// cannot define enumeration for undefined directive
|
||||||
HTMLPurifier_ConfigDef::defineAllowedValues(
|
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||||
'Core', 'Foobar', array(
|
'Core', 'Foobar', array(
|
||||||
'Real Value 9',
|
'Real Value 9',
|
||||||
)
|
)
|
||||||
@@ -155,7 +155,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
|
|
||||||
// test defining value aliases for an enumerated value
|
// test defining value aliases for an enumerated value
|
||||||
HTMLPurifier_ConfigDef::defineValueAliases(
|
HTMLPurifier_ConfigSchema::defineValueAliases(
|
||||||
'Core', 'Name', array(
|
'Core', 'Name', array(
|
||||||
'Aliased Value' => 'Real Value'
|
'Aliased Value' => 'Real Value'
|
||||||
)
|
)
|
||||||
@@ -170,7 +170,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
|
|
||||||
// redefine should be cumulative
|
// redefine should be cumulative
|
||||||
HTMLPurifier_ConfigDef::defineValueAliases(
|
HTMLPurifier_ConfigSchema::defineValueAliases(
|
||||||
'Core', 'Name', array(
|
'Core', 'Name', array(
|
||||||
'Aliased Value 2' => 'Real Value 2'
|
'Aliased Value 2' => 'Real Value 2'
|
||||||
)
|
)
|
||||||
@@ -185,7 +185,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
|
|
||||||
// cannot create alias to not-allowed value
|
// cannot create alias to not-allowed value
|
||||||
HTMLPurifier_ConfigDef::defineValueAliases(
|
HTMLPurifier_ConfigSchema::defineValueAliases(
|
||||||
'Core', 'Name', array(
|
'Core', 'Name', array(
|
||||||
'Aliased Value 3' => 'Invalid Value'
|
'Aliased Value 3' => 'Invalid Value'
|
||||||
)
|
)
|
||||||
@@ -197,7 +197,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
|
|
||||||
// cannot create alias for already allowed value
|
// cannot create alias for already allowed value
|
||||||
HTMLPurifier_ConfigDef::defineValueAliases(
|
HTMLPurifier_ConfigSchema::defineValueAliases(
|
||||||
'Core', 'Name', array(
|
'Core', 'Name', array(
|
||||||
'Real Value' => 'Real Value 2'
|
'Real Value' => 'Real Value 2'
|
||||||
)
|
)
|
||||||
@@ -209,7 +209,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
|
|
||||||
// define a directive with an invalid type
|
// define a directive with an invalid type
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'Foobar', false, 'omen',
|
'Core', 'Foobar', false, 'omen',
|
||||||
'Omen is not a valid type, so we reject this.'
|
'Omen is not a valid type, so we reject this.'
|
||||||
);
|
);
|
||||||
@@ -221,7 +221,7 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
|
|
||||||
|
|
||||||
// define a directive with inconsistent type
|
// define a directive with inconsistent type
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'Foobaz', 10, 'string',
|
'Core', 'Foobaz', 10, 'string',
|
||||||
'If we say string, we should mean it, not integer 10.'
|
'If we say string, we should mean it, not integer 10.'
|
||||||
);
|
);
|
||||||
@@ -231,6 +231,24 @@ class HTMLPurifier_ConfigDefTest extends UnitTestCase
|
|||||||
$this->swallowErrors();
|
$this->swallowErrors();
|
||||||
|
|
||||||
|
|
||||||
|
// define a directive with bad characters
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'Core', 'Core.Attr', 10, 'int',
|
||||||
|
'No periods! >:-('
|
||||||
|
);
|
||||||
|
|
||||||
|
$this->assertError('Directive name must be alphanumeric');
|
||||||
|
$this->assertNoErrors();
|
||||||
|
$this->swallowErrors();
|
||||||
|
|
||||||
|
// define a namespace with bad characters
|
||||||
|
HTMLPurifier_ConfigSchema::defineNamespace(
|
||||||
|
'Foobar&Gromit', $description
|
||||||
|
);
|
||||||
|
|
||||||
|
$this->assertError('Namespace name must be alphanumeric');
|
||||||
|
$this->assertNoErrors();
|
||||||
|
$this->swallowErrors();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@@ -8,37 +8,43 @@ class HTMLPurifier_ConfigTest extends UnitTestCase
|
|||||||
var $our_copy, $old_copy;
|
var $our_copy, $old_copy;
|
||||||
|
|
||||||
function setUp() {
|
function setUp() {
|
||||||
$our_copy = new HTMLPurifier_ConfigDef();
|
$our_copy = new HTMLPurifier_ConfigSchema();
|
||||||
$this->old_copy = HTMLPurifier_ConfigDef::instance();
|
$this->old_copy = HTMLPurifier_ConfigSchema::instance();
|
||||||
$this->our_copy =& HTMLPurifier_ConfigDef::instance($our_copy);
|
$this->our_copy =& HTMLPurifier_ConfigSchema::instance($our_copy);
|
||||||
}
|
}
|
||||||
|
|
||||||
function tearDown() {
|
function tearDown() {
|
||||||
HTMLPurifier_ConfigDef::instance($this->old_copy);
|
HTMLPurifier_ConfigSchema::instance($this->old_copy);
|
||||||
}
|
}
|
||||||
|
|
||||||
function test() {
|
function test() {
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::defineNamespace('Core', 'Corestuff');
|
HTMLPurifier_ConfigSchema::defineNamespace('Core', 'Corestuff');
|
||||||
HTMLPurifier_ConfigDef::defineNamespace('Attr', 'Attributes');
|
HTMLPurifier_ConfigSchema::defineNamespace('Attr', 'Attributes');
|
||||||
HTMLPurifier_ConfigDef::defineNamespace('Extension', 'Extensible');
|
HTMLPurifier_ConfigSchema::defineNamespace('Extension', 'Extensible');
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'Key', false, 'bool', 'A boolean directive.'
|
'Core', 'Key', false, 'bool', 'A boolean directive.'
|
||||||
);
|
);
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Attr', 'Key', 42, 'int', 'An integer directive.'
|
'Attr', 'Key', 42, 'int', 'An integer directive.'
|
||||||
);
|
);
|
||||||
HTMLPurifier_ConfigDef::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Extension', 'Pert', 'foo', 'string', 'A string directive.'
|
'Extension', 'Pert', 'foo', 'string', 'A string directive.'
|
||||||
);
|
);
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'Core', 'Encoding', 'utf-8', 'istring', 'Case insensitivity!'
|
||||||
|
);
|
||||||
|
|
||||||
HTMLPurifier_ConfigDef::defineAllowedValues(
|
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||||
'Extension', 'Pert', array('foo', 'moo')
|
'Extension', 'Pert', array('foo', 'moo')
|
||||||
);
|
);
|
||||||
HTMLPurifier_ConfigDef::defineValueAliases(
|
HTMLPurifier_ConfigSchema::defineValueAliases(
|
||||||
'Extension', 'Pert', array('cow' => 'moo')
|
'Extension', 'Pert', array('cow' => 'moo')
|
||||||
);
|
);
|
||||||
|
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||||
|
'Core', 'Encoding', array('utf-8', 'iso-8859-1')
|
||||||
|
);
|
||||||
|
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
|
||||||
@@ -80,6 +86,11 @@ class HTMLPurifier_ConfigTest extends UnitTestCase
|
|||||||
$this->assertNoErrors();
|
$this->assertNoErrors();
|
||||||
$this->assertIdentical($config->get('Extension', 'Pert'), 'moo');
|
$this->assertIdentical($config->get('Extension', 'Pert'), 'moo');
|
||||||
|
|
||||||
|
// case-insensitive attempt to set value that is allowed
|
||||||
|
$config->set('Core', 'Encoding', 'ISO-8859-1');
|
||||||
|
$this->assertNoErrors();
|
||||||
|
$this->assertIdentical($config->get('Core', 'Encoding'), 'iso-8859-1');
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@@ -14,8 +14,8 @@ class HTMLPurifier_EncoderTest extends UnitTestCase
|
|||||||
|
|
||||||
function assertCleanUTF8($string, $expect = null) {
|
function assertCleanUTF8($string, $expect = null) {
|
||||||
if ($expect === null) $expect = $string;
|
if ($expect === null) $expect = $string;
|
||||||
$this->assertIdentical($this->Encoder->cleanUTF8($string), $expect);
|
$this->assertIdentical($this->Encoder->cleanUTF8($string), $expect, 'iconv: %s');
|
||||||
$this->assertIdentical($this->Encoder->cleanUTF8($string, true), $expect);
|
$this->assertIdentical($this->Encoder->cleanUTF8($string, true), $expect, 'PHP: %s');
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_cleanUTF8() {
|
function test_cleanUTF8() {
|
||||||
@@ -46,6 +46,14 @@ class HTMLPurifier_EncoderTest extends UnitTestCase
|
|||||||
$this->Encoder->convertToUTF8("\xF6", $config),
|
$this->Encoder->convertToUTF8("\xF6", $config),
|
||||||
"\xC3\xB6"
|
"\xC3\xB6"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
$config->set('Test', 'ForceNoIconv', true);
|
||||||
|
|
||||||
|
$this->assertIdentical(
|
||||||
|
$this->Encoder->convertToUTF8("\xF6", $config),
|
||||||
|
"\xC3\xB6"
|
||||||
|
);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function test_convertFromUTF8() {
|
function test_convertFromUTF8() {
|
||||||
@@ -64,6 +72,14 @@ class HTMLPurifier_EncoderTest extends UnitTestCase
|
|||||||
$this->Encoder->convertFromUTF8("\xC3\xB6", $config),
|
$this->Encoder->convertFromUTF8("\xC3\xB6", $config),
|
||||||
"\xF6"
|
"\xF6"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
$config->set('Test', 'ForceNoIconv', true);
|
||||||
|
|
||||||
|
$this->assertIdentical(
|
||||||
|
$this->Encoder->convertFromUTF8("\xC3\xB6", $config),
|
||||||
|
"\xF6"
|
||||||
|
);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@@ -52,10 +52,8 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
|
|||||||
$inputs[7] = new HTMLPurifier_Token_Text($theta_char);
|
$inputs[7] = new HTMLPurifier_Token_Text($theta_char);
|
||||||
$expect[7] = $theta_char;
|
$expect[7] = $theta_char;
|
||||||
|
|
||||||
$default_config = HTMLPurifier_Config::createDefault();
|
|
||||||
foreach ($inputs as $i => $input) {
|
foreach ($inputs as $i => $input) {
|
||||||
if (!isset($config[$i])) $config[$i] = $default_config;
|
$result = $this->gen->generateFromToken($input);
|
||||||
$result = $this->gen->generateFromToken($input, $config[$i]);
|
|
||||||
$this->assertEqual($result, $expect[$i]);
|
$this->assertEqual($result, $expect[$i]);
|
||||||
paintIf($result, $result != $expect[$i]);
|
paintIf($result, $result != $expect[$i]);
|
||||||
}
|
}
|
||||||
@@ -122,6 +120,56 @@ class HTMLPurifier_GeneratorTest extends UnitTestCase
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var $config;
|
||||||
|
function assertGeneration($tokens, $expect) {
|
||||||
|
$result = $this->gen->generateFromTokens($tokens, $this->config);
|
||||||
|
// normalized newlines, this probably should be put somewhere else
|
||||||
|
$result = str_replace("\r\n", "\n", $result);
|
||||||
|
$result = str_replace("\r", "\n", $result);
|
||||||
|
$this->assertEqual($expect, $result);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_generateFromTokens_XHTMLoff() {
|
||||||
|
$this->config = HTMLPurifier_Config::createDefault();
|
||||||
|
$this->config->set('Core', 'XHTML', false);
|
||||||
|
|
||||||
|
// omit trailing slash
|
||||||
|
$this->assertGeneration(
|
||||||
|
array( new HTMLPurifier_Token_Empty('br') ),
|
||||||
|
'<br>'
|
||||||
|
);
|
||||||
|
|
||||||
|
// there should be a test for attribute minimization, but it is
|
||||||
|
// impossible for something like that to happen due to our current
|
||||||
|
// definitions! fix it later
|
||||||
|
|
||||||
|
// namespaced attributes must be dropped
|
||||||
|
$this->assertGeneration(
|
||||||
|
array( new HTMLPurifier_Token_Start('p', array('xml:lang'=>'fr')) ),
|
||||||
|
'<p>'
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
function test_generateFromTokens_TidyFormat() {
|
||||||
|
// abort test if tidy isn't loaded
|
||||||
|
if (!extension_loaded('tidy')) return;
|
||||||
|
|
||||||
|
$this->config = HTMLPurifier_Config::createDefault();
|
||||||
|
$this->config->set('Core', 'TidyFormat', true);
|
||||||
|
|
||||||
|
// nice wrapping please
|
||||||
|
$this->assertGeneration(
|
||||||
|
array(
|
||||||
|
new HTMLPurifier_Token_Start('div'),
|
||||||
|
new HTMLPurifier_Token_Text('Text'),
|
||||||
|
new HTMLPurifier_Token_End('div')
|
||||||
|
),
|
||||||
|
"<div>\n Text\n</div>\n"
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
?>
|
24
tests/HTMLPurifier/Test.php
Normal file
24
tests/HTMLPurifier/Test.php
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier.php';
|
||||||
|
|
||||||
|
// integration test
|
||||||
|
|
||||||
|
class HTMLPurifier_Test extends UnitTestCase
|
||||||
|
{
|
||||||
|
var $purifier;
|
||||||
|
|
||||||
|
function assertPurification($input, $expect = null) {
|
||||||
|
if ($expect === null) $expect = $input;
|
||||||
|
$result = $this->purifier->purify($input);
|
||||||
|
$this->assertIdentical($expect, $result);
|
||||||
|
}
|
||||||
|
|
||||||
|
function test() {
|
||||||
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
$this->purifier = new HTMLPurifier($config);
|
||||||
|
$this->assertPurification("Null byte\0", "Null byte");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
@@ -40,7 +40,7 @@ require_once 'HTMLPurifier.php';
|
|||||||
// define callable test files
|
// define callable test files
|
||||||
$test_files = array();
|
$test_files = array();
|
||||||
$test_files[] = 'ConfigTest.php';
|
$test_files[] = 'ConfigTest.php';
|
||||||
$test_files[] = 'ConfigDefTest.php';
|
$test_files[] = 'ConfigSchemaTest.php';
|
||||||
$test_files[] = 'LexerTest.php';
|
$test_files[] = 'LexerTest.php';
|
||||||
$test_files[] = 'Lexer/DirectLexTest.php';
|
$test_files[] = 'Lexer/DirectLexTest.php';
|
||||||
$test_files[] = 'TokenTest.php';
|
$test_files[] = 'TokenTest.php';
|
||||||
@@ -88,6 +88,7 @@ $test_files[] = 'URISchemeRegistryTest.php';
|
|||||||
$test_files[] = 'URISchemeTest.php';
|
$test_files[] = 'URISchemeTest.php';
|
||||||
$test_files[] = 'EncoderTest.php';
|
$test_files[] = 'EncoderTest.php';
|
||||||
$test_files[] = 'EntityParserTest.php';
|
$test_files[] = 'EntityParserTest.php';
|
||||||
|
$test_files[] = 'Test.php';
|
||||||
|
|
||||||
if (version_compare(PHP_VERSION, '5', '>=')) {
|
if (version_compare(PHP_VERSION, '5', '>=')) {
|
||||||
$test_files[] = 'TokenFactoryTest.php';
|
$test_files[] = 'TokenFactoryTest.php';
|
||||||
|
Reference in New Issue
Block a user