mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-06 14:16:32 +02:00
Compare commits
134 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
b9d886d53b | ||
|
5b3c8c5534 | ||
|
dd40d41bc3 | ||
|
37a80f1295 | ||
|
fb367dc871 | ||
|
29c3c21b34 | ||
|
e45cc503a2 | ||
|
85cdea0120 | ||
|
c7676afb0d | ||
|
d75c695994 | ||
|
6f6fcbc354 | ||
|
c31d6ec80e | ||
|
cb92a57e4e | ||
|
423afedbf4 | ||
|
7827a95273 | ||
|
9881a34712 | ||
|
a19f30fdcf | ||
|
8f58c7f49e | ||
|
71301b36eb | ||
|
4f0d012dfa | ||
|
24a4dfdf83 | ||
|
f922285383 | ||
|
3af6457801 | ||
|
d51d3c127b | ||
|
4f92c0377f | ||
|
c3efafb07d | ||
|
79c18eb781 | ||
|
7b64bc37e2 | ||
|
b3aa5fa0dc | ||
|
350d8301dd | ||
|
a40e16dd2e | ||
|
ee388e86c0 | ||
|
79df79b2fd | ||
|
f5b72c623c | ||
|
7bccc24977 | ||
|
25fe416ab2 | ||
|
a9012f4387 | ||
|
82f8561123 | ||
|
0b743fb2db | ||
|
08e32597df | ||
|
2b82fbacad | ||
|
710820cbe9 | ||
|
22ef52a7f6 | ||
|
4919187fc6 | ||
|
797b899305 | ||
|
8c9dbe142d | ||
|
2a002857ce | ||
|
9d98b45dea | ||
|
b0f3116b9e | ||
|
b03a44abff | ||
|
cf257cabde | ||
|
ab950a1909 | ||
|
a12ea4bb3b | ||
|
f80de908bd | ||
|
349c4de75b | ||
|
89622c964e | ||
|
732fe5cad7 | ||
|
e7e81c0a5b | ||
|
626b2a13c8 | ||
|
35487c02ae | ||
|
4bc1761b12 | ||
|
63f5414f2e | ||
|
88d014706b | ||
|
f6de73d7e7 | ||
|
733868a76d | ||
|
fab6a212c8 | ||
|
ea1362ce5c | ||
|
cff498ef67 | ||
|
1765a7537a | ||
|
d7157d0ccd | ||
|
ed44b5c5ba | ||
|
5e5c0f3aa4 | ||
|
b2ed0aff01 | ||
|
148681d1b0 | ||
|
2e7e411491 | ||
|
02051e465c | ||
|
a96b5bf612 | ||
|
9dd7c8c7dd | ||
|
0c59db1da3 | ||
|
584a1abd15 | ||
|
a6ede3804e | ||
|
4476745003 | ||
|
45748500ec | ||
|
e99520ab96 | ||
|
1e2abb7f8f | ||
|
362c802191 | ||
|
3a1d505b3d | ||
|
a005da8a4c | ||
|
9a66394abb | ||
|
62c0575468 | ||
|
6a95d91a1a | ||
|
275932ec05 | ||
|
ae90bb919d | ||
|
3c734b4c72 | ||
|
3d02a2a7d4 | ||
|
0bfa42f9b7 | ||
|
7a8edc88f9 | ||
|
98b4e70a93 | ||
|
6f5592ae60 | ||
|
9f996b125a | ||
|
96b571d236 | ||
|
0e9904a9ba | ||
|
e66a98c396 | ||
|
728088f2ba | ||
|
8ae2604440 | ||
|
7b087c7bbe | ||
|
58064592ff | ||
|
b19fc32a5a | ||
|
b15cbbb42a | ||
|
5f0663cad7 | ||
|
75e52a12a6 | ||
|
269268b843 | ||
|
62c6d93b6d | ||
|
31704c92f6 | ||
|
291fa4cb29 | ||
|
389fcc9a5d | ||
|
e5191b3ada | ||
|
5d0a992579 | ||
|
ae83bebc98 | ||
|
9191877740 | ||
|
3066ca357a | ||
|
53fd096641 | ||
|
2166246b7e | ||
|
49bb6ec35d | ||
|
401612dc3a | ||
|
dc0fb7d2b4 | ||
|
eee45fed37 | ||
|
03657ad51a | ||
|
dda4038446 | ||
|
996ccdbdda | ||
|
008348db21 | ||
|
b10a380ff4 | ||
|
bf0d659c47 | ||
|
e55551ecdd |
2
Doxyfile
2
Doxyfile
@@ -4,7 +4,7 @@
|
|||||||
# Project related configuration options
|
# Project related configuration options
|
||||||
#---------------------------------------------------------------------------
|
#---------------------------------------------------------------------------
|
||||||
PROJECT_NAME = HTML Purifier
|
PROJECT_NAME = HTML Purifier
|
||||||
PROJECT_NUMBER = 2.0.0
|
PROJECT_NUMBER = 2.1.2
|
||||||
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
|
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
|
||||||
CREATE_SUBDIRS = NO
|
CREATE_SUBDIRS = NO
|
||||||
OUTPUT_LANGUAGE = English
|
OUTPUT_LANGUAGE = English
|
||||||
|
54
INSTALL
54
INSTALL
@@ -48,7 +48,7 @@ be standards compliant. HTML Purifier can deal with these doctypes:
|
|||||||
* XHTML 1.0 Strict
|
* XHTML 1.0 Strict
|
||||||
* HTML 4.01 Transitional
|
* HTML 4.01 Transitional
|
||||||
* HTML 4.01 Strict
|
* HTML 4.01 Strict
|
||||||
* XHTML 1.1 sans Ruby
|
* XHTML 1.1 (sans Ruby)
|
||||||
|
|
||||||
...and these character encodings:
|
...and these character encodings:
|
||||||
|
|
||||||
@@ -112,8 +112,9 @@ websites):
|
|||||||
Note that HTML Purifier's support for non-Unicode encodings is crippled by the
|
Note that HTML Purifier's support for non-Unicode encodings is crippled by the
|
||||||
fact that any character not supported by that encoding will be silently
|
fact that any character not supported by that encoding will be silently
|
||||||
dropped, EVEN if it is ampersand escaped. If you want to work around
|
dropped, EVEN if it is ampersand escaped. If you want to work around
|
||||||
this, you are welcome to read docs/enduser-utf8.html for a workaround,
|
this, you are welcome to read docs/enduser-utf8.html for a fix,
|
||||||
but please be cognizant of the issues the "solution" creates.
|
but please be cognizant of the issues the "solution" creates (for this
|
||||||
|
reason, I do not include the solution in this document).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -139,7 +140,9 @@ Other supported doctypes include:
|
|||||||
There are more configuration directives which can be read about
|
There are more configuration directives which can be read about
|
||||||
here: <http://htmlpurifier.org/live/configdoc/plain.html> They're a bit boring,
|
here: <http://htmlpurifier.org/live/configdoc/plain.html> They're a bit boring,
|
||||||
but they can help out for those of you who like to exert maximum control over
|
but they can help out for those of you who like to exert maximum control over
|
||||||
your code.
|
your code. Some of the more interesting ones are configurable at the
|
||||||
|
demo <http://htmlpurifier.org/demo.php> and are well worth looking into
|
||||||
|
for your own system.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -156,13 +159,15 @@ The interface is mind-numbingly simple:
|
|||||||
$clean_html = $purifier->purify( $dirty_html );
|
$clean_html = $purifier->purify( $dirty_html );
|
||||||
|
|
||||||
That's it! For more examples, check out docs/examples/ (they aren't very
|
That's it! For more examples, check out docs/examples/ (they aren't very
|
||||||
different though). Also, SLOW gives advice on what to do if HTML Purifier
|
different though). Also, docs/enduser-slow.html gives advice on what to
|
||||||
is slowing down your application.
|
do if HTML Purifier is slowing down your application.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
6. Quick install
|
6. Quick install
|
||||||
|
|
||||||
|
First, make sure library/HTMLPurifier/DefinitionCache/Serializer is
|
||||||
|
writable by the webserver (see Section 7: Caching below for details).
|
||||||
If your website is in UTF-8 and XHTML Transitional, use this code:
|
If your website is in UTF-8 and XHTML Transitional, use this code:
|
||||||
|
|
||||||
<?php
|
<?php
|
||||||
@@ -189,6 +194,37 @@ If your website is in a different encoding or doctype, use this code:
|
|||||||
|
|
||||||
7. Caching
|
7. Caching
|
||||||
|
|
||||||
HTML Purifier generates some cache files to speed up its execution. For
|
HTML Purifier generates some cache files (generally one or two) to speed up
|
||||||
maximum performance, make sure that library/HTMLPurifier/DefinitionCache/Serializer
|
its execution. For maximum performance, make sure that
|
||||||
is writeable by the webserver.
|
library/HTMLPurifier/DefinitionCache/Serializer is writeable by the webserver.
|
||||||
|
|
||||||
|
If you are in the library/ folder of HTML Purifier, you can set the
|
||||||
|
appropriate permissions using:
|
||||||
|
|
||||||
|
chmod -R 0755 HTMLPurifier/DefinitionCache/Serializer
|
||||||
|
|
||||||
|
If the above command doesn't work, you may need to assign write permissions
|
||||||
|
to all. This may be necessary if your webserver runs as nobody, but is
|
||||||
|
not recommended since it means any other user can write files in the
|
||||||
|
directory. Use:
|
||||||
|
|
||||||
|
chmod -R 0777 HTMLPurifier/DefinitionCache/Serializer
|
||||||
|
|
||||||
|
You can also chmod files via your FTP client; this option
|
||||||
|
is usually accessible by right clicking the corresponding directory and
|
||||||
|
then selecting "chmod" or "file permissions".
|
||||||
|
|
||||||
|
Starting with 2.0.1, HTML Purifier will generate friendly error messages
|
||||||
|
that will tell you exactly what you have to chmod the directory to, if in doubt,
|
||||||
|
follow its advice.
|
||||||
|
|
||||||
|
If you are unable or unwilling to give write permissions to the cache
|
||||||
|
directory, you can either disable the cache (and suffer a performance
|
||||||
|
hit):
|
||||||
|
|
||||||
|
$config->set('Core', 'DefinitionCache', null);
|
||||||
|
|
||||||
|
Or move the cache directory somewhere else (no trailing slash):
|
||||||
|
|
||||||
|
$config->set('Cache', 'SerializerPath', '/home/user/absolute/path');
|
||||||
|
|
||||||
|
134
NEWS
134
NEWS
@@ -9,6 +9,140 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
|||||||
. Internal change
|
. Internal change
|
||||||
==========================
|
==========================
|
||||||
|
|
||||||
|
2.1.2, released 2007-09-03
|
||||||
|
! Implemented Object module for trusted users
|
||||||
|
! Implemented experimental HTML5 parsing mode using PH5P. To use, add
|
||||||
|
this to your code:
|
||||||
|
require_once 'HTMLPurifier/Lexer/PH5P.php';
|
||||||
|
$config->set('Core', 'LexerImpl', 'PH5P');
|
||||||
|
Note that this Lexer introduces some classes not in the HTMLPurifier
|
||||||
|
namespace. Also, this is PHP5 only.
|
||||||
|
! CSS property border-spacing implemented
|
||||||
|
- Fix non-visible parsing error in DirectLex with empty tags that have
|
||||||
|
slashes inside attribute values.
|
||||||
|
- Fix typo in CSS definition: border-collapse:seperate; was incorrectly
|
||||||
|
accepted as valid CSS. Usually non-visible, because this styling is the
|
||||||
|
default for tables in most browsers. Thanks Brett Zamir for pointing
|
||||||
|
this out.
|
||||||
|
- Fix validation errors in configuration form
|
||||||
|
- Hammer out a bunch of edge-case bugs in the standalone distribution
|
||||||
|
- Inclusion reflection removed from URISchemeRegistry; you must manually
|
||||||
|
include any new schema files you wish to use
|
||||||
|
- Numerous typo fixes in documentation thanks to Brett Zamir
|
||||||
|
. Unit test refactoring for one logical test per test function
|
||||||
|
. Config and context parameters in ComplexHarness deprecated: instead, edit
|
||||||
|
the $config and $context member variables
|
||||||
|
. HTML wrapper in DOMLex now takes DTD identifiers into account; doesn't
|
||||||
|
really make a difference, but is good for completeness sake
|
||||||
|
. merge-library.php script refactored for greater code reusability and
|
||||||
|
PHP4 compatibility
|
||||||
|
|
||||||
|
2.1.1, released 2007-08-04
|
||||||
|
- Fix show-stopper bug in %URI.MakeAbsolute functionality
|
||||||
|
- Fix PHP4 syntax error in standalone version
|
||||||
|
. Add prefix directory to include path for standalone, this prevents
|
||||||
|
other installations from clobbering the standalone's URI schemes
|
||||||
|
. Single test methods can be invoked by prefixing with __only
|
||||||
|
|
||||||
|
2.1.0, released 2007-08-02
|
||||||
|
# flush-htmldefinition-cache.php superseded in favor of a generic
|
||||||
|
flush-definition-cache.php script, you can clear a specific cache
|
||||||
|
by passing its name as a parameter to the script
|
||||||
|
! Phorum mod implemented for HTML Purifier
|
||||||
|
! With %Core.AggressivelyFixLt, <3 and similar emoticons no longer
|
||||||
|
trigger HTML removal in PHP5 (DOMLex). This directive is not necessary
|
||||||
|
for PHP4 (DirectLex).
|
||||||
|
! Standalone file now available, which greatly reduces the amount of
|
||||||
|
includes (although there are still a few files that reside in the
|
||||||
|
standalone folder)
|
||||||
|
! Relative URIs can now be transformed into their absolute equivalents
|
||||||
|
using %URI.Base and %URI.MakeAbsolute
|
||||||
|
! Ruby implemented for XHTML 1.1
|
||||||
|
! You can now define custom URI filtering behavior, see enduser-uri-filter.html
|
||||||
|
for more details
|
||||||
|
! UTF-8 font names now supported in CSS
|
||||||
|
- AutoFormatters emit friendly error messages if tags or attributes they
|
||||||
|
need are not allowed
|
||||||
|
- ConfigForm's compactification of directive names is now configurable
|
||||||
|
- AutoParagraph autoformatter algorithm refined after field-testing
|
||||||
|
- XHTML 1.1 now applies XHTML 1.0 Strict cleanup routines, namely
|
||||||
|
blockquote wrapping
|
||||||
|
- Contents of <style> tags removed by default when tags are removed
|
||||||
|
. HTMLPurifier_Config->getSerial() implemented, this is extremely useful
|
||||||
|
for output cache invalidation
|
||||||
|
. ConfigForm printer now can retrieve CSS and JS files as strings, in
|
||||||
|
case HTML Purifier's directory is not publically accessible
|
||||||
|
. Introduce new text/itext configuration directive values: these represent
|
||||||
|
longer strings that would be more appropriately edited with a textarea
|
||||||
|
. Allow newlines to act as separators for lists, hashes, lookups and
|
||||||
|
%HTML.Allowed
|
||||||
|
. ConfigForm generates textareas instead of text inputs for lists, hashes,
|
||||||
|
lookups, text and itext fields
|
||||||
|
. Hidden element content removal genericized: %Core.HiddenElements can
|
||||||
|
be used to customize this behavior, by default <script> and <style> are
|
||||||
|
hidden
|
||||||
|
. Added HTMLPURIFIER_PREFIX constant, should be used instead of dirname(__FILE__)
|
||||||
|
. Custom ChildDef added to default include list
|
||||||
|
. URIScheme reflection improved: will not attempt to include file if class
|
||||||
|
already exists. May clobber autoload, so I need to keep an eye on it
|
||||||
|
. ConfigSchema heavily optimized, will only collect information and validate
|
||||||
|
definitions when HTMLPURIFIER_SCHEMA_STRICT is true.
|
||||||
|
. AttrDef_URI unit tests and implementation refactored
|
||||||
|
. benchmarks/ directory now protected from public view with .htaccess file;
|
||||||
|
run the tests via command line
|
||||||
|
. URI scheme is munged off if there is no authority and the scheme is the
|
||||||
|
default one
|
||||||
|
. All unit tests inherit from HTMLPurifier_Harness, not UnitTestCase
|
||||||
|
. Interface for URIScheme changed
|
||||||
|
. Generic URI object to hold components of URI added, most systems involved
|
||||||
|
in URI validation have been migrated to use it
|
||||||
|
. Custom filtering for URIs factored out to URIDefinition interface for
|
||||||
|
maximum extensibility
|
||||||
|
|
||||||
|
2.0.1, released 2007-06-27
|
||||||
|
! Tag auto-closing now based on a ChildDef heuristic rather than a
|
||||||
|
manually set auto_close array; some behavior may change
|
||||||
|
! Experimental AutoFormat functionality added: auto-paragraph and
|
||||||
|
linkify your HTML input by setting %AutoFormat.AutoParagraph and
|
||||||
|
%AutoFormat.Linkify to true
|
||||||
|
! Newlines normalized internally, and then converted back to the
|
||||||
|
value of PHP_EOL. If this is not desired, set your newline format
|
||||||
|
using %Output.Newline.
|
||||||
|
! Beta error collection, messages are implemented for the most generic
|
||||||
|
cases involving Lexing or Strategies
|
||||||
|
- Clean up special case code for <script> tags
|
||||||
|
- Reorder includes for DefinitionCache decorators, fixes a possible
|
||||||
|
missing class error
|
||||||
|
- Fixed bug where manually modified definitions were not saved via cache
|
||||||
|
(mostly harmless, except for the fact that it would be a little slower)
|
||||||
|
- Configuration objects with different serials do not clobber each
|
||||||
|
others when revision numbers are unequal
|
||||||
|
- Improve Serializer DefinitionCache directory permissions checks
|
||||||
|
- DefinitionCache no longer throws errors when it encounters old
|
||||||
|
serial files that do not conform to the current style
|
||||||
|
- Stray xmlns attributes removed from configuration documentation
|
||||||
|
- configForm.php smoketest no longer has XSS vulnerability due to
|
||||||
|
unescaped print_r output
|
||||||
|
- Printer adheres to configuration's directives on output format
|
||||||
|
- Fix improperly named form field in ConfigForm printer
|
||||||
|
. Rewire some test-cases to swallow errors rather than expect them
|
||||||
|
. HTMLDefinition printer updated with some of the new attributes
|
||||||
|
. DefinitionCache keys reordered to reflect precedence: version number,
|
||||||
|
hash, then revision number
|
||||||
|
. %Core.DefinitionCache renamed to %Cache.DefinitionImpl
|
||||||
|
. Interlinking in configuration documentation added using
|
||||||
|
Injector_PurifierLinkify
|
||||||
|
. Directives now keep track of aliases to themselves
|
||||||
|
. Error collector now requires a severity to be passed, use PHP's internal
|
||||||
|
error constants for this
|
||||||
|
. HTMLPurifier_Config::getAllowedDirectivesForForm implemented, allows
|
||||||
|
much easier selective embedding of configuration values
|
||||||
|
. Doctype objects now accept public and system DTD identifiers
|
||||||
|
. %HTML.Doctype is now constrained by specific values, to specify a custom
|
||||||
|
doctype use new %HTML.CustomDoctype
|
||||||
|
. ConfigForm truncates long directives to keep the form small, and does
|
||||||
|
not re-output namespaces
|
||||||
|
|
||||||
2.0.0, released 2007-06-20
|
2.0.0, released 2007-06-20
|
||||||
# Completely refactored HTMLModuleManager, decentralizing safety
|
# Completely refactored HTMLModuleManager, decentralizing safety
|
||||||
information
|
information
|
||||||
|
52
TODO
52
TODO
@@ -7,20 +7,13 @@ TODO List
|
|||||||
? Maybe I'll Do It
|
? Maybe I'll Do It
|
||||||
==========================
|
==========================
|
||||||
|
|
||||||
2.1 release [Refactor, refactor!]
|
If no interest is expressed for a feature that may required a considerable
|
||||||
# URI validation routines tighter (see docs/dev-code-quality.html) (COMPLEX)
|
amount of effort to implement, it may get endlessly delayed. Do not be
|
||||||
# Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
|
afraid to cast your vote for the next feature to be implemented!
|
||||||
- Configuration profiles: predefined directives set with one func call
|
|
||||||
- Implement IDREF support (harder than it seems, since you cannot have
|
|
||||||
IDREFs to non-existent IDs)
|
|
||||||
- Allow non-ASCII characters in font names
|
|
||||||
- Genericize special cases in RemoveForeignElements
|
|
||||||
|
|
||||||
2.2 release [Error'ed]
|
2.2 release [Error'ed]
|
||||||
# Error logging for filtering/cleanup procedures
|
# Error logging for filtering/cleanup procedures
|
||||||
- Requires I18N facilities to be created first (COMPLEX)
|
|
||||||
- XSS-attempt detection
|
- XSS-attempt detection
|
||||||
- More fine-grained control over escaping behavior
|
|
||||||
|
|
||||||
2.3 release [Do What I Mean, Not What I Say]
|
2.3 release [Do What I Mean, Not What I Say]
|
||||||
# Additional support for poorly written HTML
|
# Additional support for poorly written HTML
|
||||||
@@ -36,26 +29,22 @@ TODO List
|
|||||||
- Remove empty inline tags<i></i>
|
- Remove empty inline tags<i></i>
|
||||||
- Append something to duplicate IDs so they're still usable (impl. note: the
|
- Append something to duplicate IDs so they're still usable (impl. note: the
|
||||||
dupe detector would also need to detect the suffix as well)
|
dupe detector would also need to detect the suffix as well)
|
||||||
|
- Externalize inline CSS to promote clean HTML
|
||||||
|
|
||||||
2.4 release [It's All About Trust] (floating)
|
2.4 release [It's All About Trust] (floating)
|
||||||
# Implement untrusted, dangerous elements/attributes
|
# Implement untrusted, dangerous elements/attributes
|
||||||
|
# Implement IDREF support (harder than it seems, since you cannot have
|
||||||
|
IDREFs to non-existent IDs)
|
||||||
|
# Frameset XHTML 1.0 and HTML 4.01 doctypes
|
||||||
|
|
||||||
3.0 release [Beyond HTML]
|
3.0 release [Beyond HTML]
|
||||||
# Legit token based CSS parsing (will require revamping almost every
|
# Legit token based CSS parsing (will require revamping almost every
|
||||||
AttrDef class)
|
AttrDef class). Probably will use CSSTidy class
|
||||||
# More control over allowed CSS properties (maybe modularize it in the
|
# More control over allowed CSS properties (maybe modularize it in the
|
||||||
same fashion!)
|
same fashion!)
|
||||||
# Formatters for plaintext (COMPLEX)
|
# Formatters for plaintext
|
||||||
- Auto-paragraphing (be sure to leverage fact that we know when things
|
|
||||||
shouldn't be paragraphed, such as lists and tables).
|
|
||||||
- Linkify URLs
|
|
||||||
- Smileys
|
- Smileys
|
||||||
- Linkification for HTML Purifier docs: notably configuration and classes
|
- Standardize token armor for all areas of processing
|
||||||
- Allow tags to be "armored", an internal flag that protects them
|
|
||||||
from validation and passes them out unharmed
|
|
||||||
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
|
||||||
- Automatically add non-breaking spaces to empty table cells when
|
|
||||||
empty-cells:show is applied to have compatibility with Internet Explorer
|
|
||||||
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
|
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
|
||||||
Also, enable disabling of directionality
|
Also, enable disabling of directionality
|
||||||
|
|
||||||
@@ -64,32 +53,37 @@ TODO List
|
|||||||
- Hooks for adding custom processors to custom namespaced tags and
|
- Hooks for adding custom processors to custom namespaced tags and
|
||||||
attributes, offer default implementation
|
attributes, offer default implementation
|
||||||
- Lots of documentation and samples
|
- Lots of documentation and samples
|
||||||
- XHTML 1.1 support
|
|
||||||
|
|
||||||
Ongoing
|
Ongoing
|
||||||
- Lots of profiling, make it faster!
|
- Lots of profiling, make it faster!
|
||||||
- Plugins for major CMSes (COMPLEX)
|
- Plugins for major CMSes (COMPLEX)
|
||||||
- WordPress (mostly written, needs beta-testing)
|
- phpBB
|
||||||
- eFiction
|
- eFiction
|
||||||
- more! (look for ones that use WYSIWYGs)
|
- more! (look for ones that use WYSIWYGs)
|
||||||
- Complete basic smoketests
|
- Complete basic smoketests
|
||||||
|
|
||||||
Unknown release (on a scratch-an-itch basis)
|
Unknown release (on a scratch-an-itch basis)
|
||||||
? Semi-lossy dumb alternate character encoding transfor
|
# CHMOD install script for PEAR installs
|
||||||
? Have 'lang' attribute be checked against official lists, achieved by
|
? Have 'lang' attribute be checked against official lists, achieved by
|
||||||
encoding all characters that have string entity equivalents
|
encoding all characters that have string entity equivalents
|
||||||
- Explain how to use HTML Purifier in non-PHP languages
|
|
||||||
- Abstract ChildDef_BlockQuote to work with all elements that only
|
- Abstract ChildDef_BlockQuote to work with all elements that only
|
||||||
allow blocks in them, required or optional
|
allow blocks in them, required or optional
|
||||||
- Reorganize Unit Tests
|
- Reorganize Unit Tests
|
||||||
- Refactor loop tests (esp. AttrDef_URI)
|
|
||||||
- Reorganize configuration directives (Create more namespaces! Get messy!)
|
- Reorganize configuration directives (Create more namespaces! Get messy!)
|
||||||
|
- Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
|
||||||
|
- Implement lenient <ruby> child validation
|
||||||
|
- Explain how to use HTML Purifier in non-PHP languages / create
|
||||||
|
a simple command line stub (or complicated?)
|
||||||
|
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
||||||
|
- Automatically add non-breaking spaces to empty table cells when
|
||||||
|
empty-cells:show is applied to have compatibility with Internet Explorer
|
||||||
|
|
||||||
Requested
|
Requested
|
||||||
? Native content compression, whitespace stripping (don't rely on Tidy, make
|
|
||||||
sure we don't remove from <pre> or related tags)
|
|
||||||
|
|
||||||
Wontfix
|
Wontfix
|
||||||
- Non-lossy smart alternate character encoding transformations (unless
|
- Non-lossy smart alternate character encoding transformations (unless
|
||||||
patch provided)
|
patch provided)
|
||||||
- Pretty-printing HTML, users can use Tidy on the output on entire page
|
- Pretty-printing HTML: users can use Tidy on the output on entire page
|
||||||
|
- Native content compression, whitespace stripping (don't rely on Tidy, make
|
||||||
|
sure we don't remove from <pre> or related tags): use gzip if this is
|
||||||
|
really important
|
||||||
|
15
WHATSNEW
15
WHATSNEW
@@ -1,7 +1,8 @@
|
|||||||
HTML Purifier 2.0 is the culmination of two major architectural changes.
|
Version 2.1.2 is a mix of experimental features and stability updates.
|
||||||
The first is Tidy, which enables HTML Purifier to both natively support
|
Among new features: an Object module for trusted users, support for the
|
||||||
deprecated elements and also convert them to standards-compliant
|
CSS property 'border-spacing', and HTML 5 style parsing using PH5P.
|
||||||
alternatives. The second is the Advanced API, which enables users to
|
Bug fixes ihave resolved a few obscure issues including border-collapse:seperate,
|
||||||
create new elements and attributes with ease. Keeping in line with a
|
a DirectLex parsing error, broken HTML in printDefinition.php, and problems
|
||||||
commitment to high quality, there are also five esoteric bug-fixes and a
|
with the experimental standalone distribution. Also, there were large
|
||||||
plethora of subtle improvements that enhance the library.
|
amounts of behind-the-scenes refactoring and the removal of URIScheme
|
||||||
|
inclusion reflection.
|
||||||
|
BIN
art/100cases.png
Normal file
BIN
art/100cases.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.7 KiB |
1
benchmarks/.htaccess
Normal file
1
benchmarks/.htaccess
Normal file
@@ -0,0 +1 @@
|
|||||||
|
Deny from all
|
@@ -15,5 +15,3 @@ $context = new HTMLPurifier_Context();
|
|||||||
for ($i = 0; $i < 10; $i++) {
|
for ($i = 0; $i < 10; $i++) {
|
||||||
$tokens = $lexer->tokenizeHTML($input, $config, $context);
|
$tokens = $lexer->tokenizeHTML($input, $config, $context);
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
12
benchmarks/Trace.php
Normal file
12
benchmarks/Trace.php
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
ini_set('xdebug.trace_format', 1);
|
||||||
|
ini_set('xdebug.show_mem_delta', true);
|
||||||
|
|
||||||
|
xdebug_start_trace(dirname(__FILE__) . '/Trace');
|
||||||
|
require_once '../library/HTMLPurifier.auto.php';
|
||||||
|
|
||||||
|
$purifier = new HTMLPurifier();
|
||||||
|
|
||||||
|
$data = $purifier->purify(file_get_contents('samples/Lexer/4.html'));
|
||||||
|
xdebug_stop_trace();
|
@@ -18,10 +18,16 @@ TODO:
|
|||||||
if (version_compare('5', PHP_VERSION, '>')) exit('Requires PHP 5 or higher.');
|
if (version_compare('5', PHP_VERSION, '>')) exit('Requires PHP 5 or higher.');
|
||||||
error_reporting(E_ALL); // probably not possible to use E_STRICT
|
error_reporting(E_ALL); // probably not possible to use E_STRICT
|
||||||
|
|
||||||
|
define('HTMLPURIFIER_SCHEMA_STRICT', true); // description data needs to be collected
|
||||||
|
|
||||||
// load dual-libraries
|
// load dual-libraries
|
||||||
require_once '../library/HTMLPurifier.auto.php';
|
require_once '../library/HTMLPurifier.auto.php';
|
||||||
require_once 'library/ConfigDoc.auto.php';
|
require_once 'library/ConfigDoc.auto.php';
|
||||||
|
|
||||||
|
$purifier = HTMLPurifier::getInstance(array(
|
||||||
|
'AutoFormat.PurifierLinkify' => true
|
||||||
|
));
|
||||||
|
|
||||||
$schema = HTMLPurifier_ConfigSchema::instance();
|
$schema = HTMLPurifier_ConfigSchema::instance();
|
||||||
$style = 'plain'; // use $_GET in the future
|
$style = 'plain'; // use $_GET in the future
|
||||||
$configdoc = new ConfigDoc();
|
$configdoc = new ConfigDoc();
|
||||||
@@ -37,4 +43,3 @@ if (php_sapi_name() != 'cli') {
|
|||||||
echo 'Files generated successfully.';
|
echo 'Files generated successfully.';
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -7,4 +7,3 @@
|
|||||||
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
|
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
|
||||||
require_once 'ConfigDoc.php';
|
require_once 'ConfigDoc.php';
|
||||||
|
|
||||||
?>
|
|
@@ -36,4 +36,3 @@ class ConfigDoc
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -36,6 +36,7 @@ class ConfigDoc_HTMLXSLTProcessor
|
|||||||
// fudges for HTML backwards compatibility
|
// fudges for HTML backwards compatibility
|
||||||
$out = str_replace('/>', ' />', $out); // <br /> not <br/>
|
$out = str_replace('/>', ' />', $out); // <br /> not <br/>
|
||||||
$out = str_replace(' xmlns=""', '', $out); // rm unnecessary xmlns
|
$out = str_replace(' xmlns=""', '', $out); // rm unnecessary xmlns
|
||||||
|
$out = str_replace(' xmlns="http://www.w3.org/1999/xhtml"', '', $out); // rm unnecessary xmlns
|
||||||
if (class_exists('Tidy')) {
|
if (class_exists('Tidy')) {
|
||||||
// cleanup output
|
// cleanup output
|
||||||
$config = array(
|
$config = array(
|
||||||
@@ -59,4 +60,3 @@ class ConfigDoc_HTMLXSLTProcessor
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -23,4 +23,3 @@ class ConfigDoc_XMLSerializer
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -50,6 +50,12 @@ class ConfigDoc_XMLSerializer_ConfigSchema extends ConfigDoc_XMLSerializer
|
|||||||
$dom_document->createElement('name', $name)
|
$dom_document->createElement('name', $name)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
$dom_aliases = $dom_document->createElement('aliases');
|
||||||
|
$dom_directive->appendChild($dom_aliases);
|
||||||
|
foreach ($info->directiveAliases as $alias) {
|
||||||
|
$dom_aliases->appendChild($dom_document->createElement('alias', $alias));
|
||||||
|
}
|
||||||
|
|
||||||
$dom_constraints = $dom_document->createElement('constraints');
|
$dom_constraints = $dom_document->createElement('constraints');
|
||||||
$dom_directive->appendChild($dom_constraints);
|
$dom_directive->appendChild($dom_constraints);
|
||||||
|
|
||||||
@@ -115,4 +121,3 @@ class ConfigDoc_XMLSerializer_ConfigSchema extends ConfigDoc_XMLSerializer
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -24,4 +24,3 @@ class ConfigDoc_XMLSerializer_Types extends ConfigDoc_XMLSerializer
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -72,8 +72,16 @@
|
|||||||
<xsl:apply-templates />
|
<xsl:apply-templates />
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
<xsl:template match="directive/name">
|
<xsl:template match="directive/name">
|
||||||
|
<xsl:apply-templates select="../aliases/alias" mode="anchor" />
|
||||||
<h3 id="{../@id}"><xsl:value-of select="../@id" /></h3>
|
<h3 id="{../@id}"><xsl:value-of select="../@id" /></h3>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
<xsl:template match="alias" mode="anchor">
|
||||||
|
<a id="{.}"></a>
|
||||||
|
</xsl:template>
|
||||||
|
|
||||||
|
<!-- Do not pass through -->
|
||||||
|
<xsl:template match="alias"></xsl:template>
|
||||||
|
|
||||||
<xsl:template match="directive/constraints">
|
<xsl:template match="directive/constraints">
|
||||||
<table class="constraints">
|
<table class="constraints">
|
||||||
<xsl:apply-templates />
|
<xsl:apply-templates />
|
||||||
@@ -89,8 +97,20 @@
|
|||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
|
<xsl:if test="../aliases/alias">
|
||||||
|
<xsl:apply-templates select="../aliases" mode="constraints" />
|
||||||
|
</xsl:if>
|
||||||
</table>
|
</table>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
<xsl:template match="directive/aliases" mode="constraints">
|
||||||
|
<th>Aliases:</th>
|
||||||
|
<td>
|
||||||
|
<xsl:for-each select="alias">
|
||||||
|
<xsl:if test="position()>1">, </xsl:if>
|
||||||
|
<xsl:value-of select="." />
|
||||||
|
</xsl:for-each>
|
||||||
|
</td>
|
||||||
|
</xsl:template>
|
||||||
<xsl:template match="directive//description">
|
<xsl:template match="directive//description">
|
||||||
<div class="description">
|
<div class="description">
|
||||||
<xsl:copy-of select="div/node()" />
|
<xsl:copy-of select="div/node()" />
|
||||||
|
@@ -11,8 +11,7 @@ docs/examples/demo.php - ad hoc HTML/PHP soup to the extreme
|
|||||||
|
|
||||||
AttrDef - a lot of duplication, more generic classes need to be created;
|
AttrDef - a lot of duplication, more generic classes need to be created;
|
||||||
a lot of strtolower() calls, no legit casing
|
a lot of strtolower() calls, no legit casing
|
||||||
Class - doesn't support Unicode characters (fringe); uses regular
|
Class - doesn't support Unicode characters (fringe); uses regular expressions
|
||||||
expressions
|
|
||||||
Lang - code duplication; premature optimization
|
Lang - code duplication; premature optimization
|
||||||
Length - easily mistaken for CSSLength
|
Length - easily mistaken for CSSLength
|
||||||
URI - multiple regular expressions; missing validation for parts (?)
|
URI - multiple regular expressions; missing validation for parts (?)
|
||||||
@@ -22,9 +21,6 @@ ConfigSchema - redefinition is a mess
|
|||||||
Strategy
|
Strategy
|
||||||
FixNesting - cannot bubble nodes out of structures, duplicated checks
|
FixNesting - cannot bubble nodes out of structures, duplicated checks
|
||||||
for special-case parent node
|
for special-case parent node
|
||||||
MakeWellFormed - insufficient automatic closing definitions (check HTML
|
|
||||||
spec for optional end tags, also, closing based on type (block/inline)
|
|
||||||
might be efficient).
|
|
||||||
RemoveForeignElements - should be run in parallel with MakeWellFormed
|
RemoveForeignElements - should be run in parallel with MakeWellFormed
|
||||||
URIScheme - needs to have callable generic checks
|
URIScheme - needs to have callable generic checks
|
||||||
mailto - doesn't validate emails, doesn't validate querystring
|
mailto - doesn't validate emails, doesn't validate querystring
|
||||||
|
@@ -39,7 +39,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
|||||||
<table cellspacing="0"><tbody>
|
<table cellspacing="0"><tbody>
|
||||||
<tr><td class="impl-yes">Implemented</td></tr>
|
<tr><td class="impl-yes">Implemented</td></tr>
|
||||||
<tr><td class="impl-partial">Partially implemented</td></tr>
|
<tr><td class="impl-partial">Partially implemented</td></tr>
|
||||||
<tr><td class="impl-no">Will not implement</td></tr>
|
<tr><td class="impl-no">Not priority to implement</td></tr>
|
||||||
<tr><td class="danger">Dangerous attribute/property</td></tr>
|
<tr><td class="danger">Dangerous attribute/property</td></tr>
|
||||||
<tr><td class="css1">Present in CSS1</td></tr>
|
<tr><td class="css1">Present in CSS1</td></tr>
|
||||||
<tr><td class="feature">Feature, requires extra work</td></tr>
|
<tr><td class="feature">Feature, requires extra work</td></tr>
|
||||||
@@ -118,6 +118,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
|||||||
<tbody>
|
<tbody>
|
||||||
<tr><th colspan="2">Table</th></tr>
|
<tr><th colspan="2">Table</th></tr>
|
||||||
<tr class="impl-yes"><td>border-collapse</td><td>ENUM(collapse, seperate)</td></tr>
|
<tr class="impl-yes"><td>border-collapse</td><td>ENUM(collapse, seperate)</td></tr>
|
||||||
|
<tr class="impl-yes"><td>border-space</td><td>MULTIPLE</td></tr>
|
||||||
<tr class="impl-yes"><td>caption-side</td><td>ENUM(top, bottom)</td></tr>
|
<tr class="impl-yes"><td>caption-side</td><td>ENUM(top, bottom)</td></tr>
|
||||||
<tr class="feature"><td>empty-cells</td><td>ENUM(show, hide), No IE support makes this useless,
|
<tr class="feature"><td>empty-cells</td><td>ENUM(show, hide), No IE support makes this useless,
|
||||||
possible fix with &nbsp;? Unknown release milestone.</td></tr>
|
possible fix with &nbsp;? Unknown release milestone.</td></tr>
|
||||||
|
@@ -17,11 +17,6 @@
|
|||||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||||
|
|
||||||
<div id="applicability">
|
|
||||||
This document covers currently unreleased functionality and
|
|
||||||
only applies to recent SVN checkouts.
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
You may have heard of the <a href="dev-advanced-api.html">Advanced API</a>.
|
You may have heard of the <a href="dev-advanced-api.html">Advanced API</a>.
|
||||||
If you're interested in reading dry prose and boring functional
|
If you're interested in reading dry prose and boring functional
|
||||||
@@ -37,7 +32,7 @@
|
|||||||
Before we even write any code, it is paramount to consider whether or
|
Before we even write any code, it is paramount to consider whether or
|
||||||
not the code we're writing is necessary or not. HTML Purifier, by default,
|
not the code we're writing is necessary or not. HTML Purifier, by default,
|
||||||
contains a large set of elements and attributes: large enough so that
|
contains a large set of elements and attributes: large enough so that
|
||||||
<em>any</em> element or attribute in XHTML 1.0 (and its HTML variant)
|
<em>any</em> element or attribute in XHTML 1.0 or 1.1 (and its HTML variants)
|
||||||
that can be safely used by the general public is implemented.
|
that can be safely used by the general public is implemented.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
@@ -81,11 +76,12 @@
|
|||||||
<h3>XHTML 1.1</h3>
|
<h3>XHTML 1.1</h3>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
We have not implemented the
|
As of HTMLPurifier 2.1.0, we have implemented the
|
||||||
<a href="http://www.w3.org/TR/2001/REC-ruby-20010531/">Ruby module</a>,
|
<a href="http://www.w3.org/TR/2001/REC-ruby-20010531/">Ruby module</a>,
|
||||||
which defines a set of tags
|
which defines a set of tags
|
||||||
for publishing short annotations for text, used mostly in Japanese
|
for publishing short annotations for text, used mostly in Japanese
|
||||||
and Chinese school texts.
|
and Chinese school texts, but applicable for positioning any text (not
|
||||||
|
limited to translations) above or below other corresponding text.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<h3>XHTML 2.0</h3>
|
<h3>XHTML 2.0</h3>
|
||||||
@@ -497,10 +493,11 @@ $def =& $config->getHTMLDefinition(true);
|
|||||||
<p>
|
<p>
|
||||||
The <code>(%flow;)*</code> indicates the allowed children of the
|
The <code>(%flow;)*</code> indicates the allowed children of the
|
||||||
<code>li</code> tag: <code>li</code> allows any number of flow
|
<code>li</code> tag: <code>li</code> allows any number of flow
|
||||||
elements as its children. In HTML Purifier, we'd write it like
|
elements as its children. (The <code>- O</code> allows the closing tag to be
|
||||||
<code>Flow</code> (here's where the content sets we were
|
omitted, though in XML this is not allowed.) In HTML Purifier,
|
||||||
discussing earlier come into play). There are three shorthand content models you
|
we'd write it like <code>Flow</code> (here's where the content sets
|
||||||
can specify:
|
we were discussing earlier come into play). There are three shorthand
|
||||||
|
content models you can specify:
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<table class="table">
|
<table class="table">
|
||||||
@@ -673,12 +670,22 @@ $def =& $config->getHTMLDefinition(true);
|
|||||||
Common is a combination of the above-mentioned collections.
|
Common is a combination of the above-mentioned collections.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
<p class="aside">
|
||||||
|
Readers familiar with the modularization may have noticed that the Core
|
||||||
|
attribute collection differs from that specified by the <a
|
||||||
|
href="http://www.w3.org/TR/xhtml-modularization/abstract_modules.html#s_commonatts">abstract
|
||||||
|
modules of the XHTML Modularization 1.1</a>. We believe this section
|
||||||
|
to be in error, as <code>br</code> permits the use of the <code>style</code>
|
||||||
|
attribute even though it uses the <code>Core</code> collection, and
|
||||||
|
the DTD and XML Schemas supplied by W3C support our interpretation.
|
||||||
|
</p>
|
||||||
|
|
||||||
<h3>Attributes</h3>
|
<h3>Attributes</h3>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
If you didn't read the <a href="#addAttribute">previous section on
|
If you didn't read the <a href="#addAttribute">earlier section on
|
||||||
adding attributes</a>, read it now. The last parameter is simply
|
adding attributes</a>, read it now. The last parameter is simply
|
||||||
array of attribute names to attribute implementations, in the exact
|
an array of attribute names to attribute implementations, in the exact
|
||||||
same format as <code>addAttribute()</code>.
|
same format as <code>addAttribute()</code>.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
@@ -58,7 +58,7 @@ appear elsewhere on the document. The method is simple:</p>
|
|||||||
|
|
||||||
<pre>$config->set('HTML', 'EnableAttrID', true);
|
<pre>$config->set('HTML', 'EnableAttrID', true);
|
||||||
$config->set('Attr', 'IDBlacklist' array(
|
$config->set('Attr', 'IDBlacklist' array(
|
||||||
'list', 'of', 'attributes', 'that', 'are', 'forbidden'
|
'list', 'of', 'attribute', 'values', 'that', 'are', 'forbidden'
|
||||||
));</pre>
|
));</pre>
|
||||||
|
|
||||||
<p>That being said, there are some notable drawbacks. First of all, you have to
|
<p>That being said, there are some notable drawbacks. First of all, you have to
|
||||||
@@ -71,7 +71,7 @@ to possible standards-compliance issues.</p>
|
|||||||
<p>Furthermore, this position becomes untenable when a single web page must hold
|
<p>Furthermore, this position becomes untenable when a single web page must hold
|
||||||
multiple portions of user-submitted content. Since there's obviously no way
|
multiple portions of user-submitted content. Since there's obviously no way
|
||||||
to find out before-hand what IDs users will use, the blacklist is helpless.
|
to find out before-hand what IDs users will use, the blacklist is helpless.
|
||||||
And even since HTML Purifier validates each segment seperately, perhaps doing
|
And since HTML Purifier validates each segment separately, perhaps doing
|
||||||
so at different times, it would be extremely difficult to dynamically update
|
so at different times, it would be extremely difficult to dynamically update
|
||||||
the blacklist in between runs.</p>
|
the blacklist in between runs.</p>
|
||||||
|
|
||||||
|
@@ -10,9 +10,7 @@ to be effective. Things to remember:
|
|||||||
|
|
||||||
2. IDs: see enduser-id.html for more info
|
2. IDs: see enduser-id.html for more info
|
||||||
|
|
||||||
3. Links: document pending feature completion
|
3. URIs: see enduser-uri-filter.html
|
||||||
Rudimentary blacklisting, we should also allow only relative URIs. We
|
|
||||||
need a doc to explain the stuff.
|
|
||||||
|
|
||||||
4. CSS: document pending
|
4. CSS: document pending
|
||||||
Explain which CSS styles we blocked and why.
|
Explain which CSS styles we blocked and why.
|
||||||
|
@@ -16,18 +16,13 @@
|
|||||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||||
|
|
||||||
<div id="applicability">
|
|
||||||
This document covers currently unreleased functionality and
|
|
||||||
only applies to recent SVN checkouts.
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<p>You've probably heard of HTML Tidy, Dave Raggett's little piece
|
<p>You've probably heard of HTML Tidy, Dave Raggett's little piece
|
||||||
of software that cleans up poorly written HTML. Let me say it straight
|
of software that cleans up poorly written HTML. Let me say it straight
|
||||||
out:</p>
|
out:</p>
|
||||||
|
|
||||||
<p class="emphasis">This ain't HTML Tidy!</p>
|
<p class="emphasis">This ain't HTML Tidy!</p>
|
||||||
|
|
||||||
<p>Rather, Tidy stands for a cool set of Tidy-inspired in HTML Purifier
|
<p>Rather, Tidy stands for a cool set of Tidy-inspired features in HTML Purifier
|
||||||
that allows users to submit deprecated elements and attributes and get
|
that allows users to submit deprecated elements and attributes and get
|
||||||
valid strict markup back. For example:</p>
|
valid strict markup back. For example:</p>
|
||||||
|
|
||||||
@@ -38,8 +33,8 @@ valid strict markup back. For example:</p>
|
|||||||
<pre><div style="text-align:center;">Centered</div></pre>
|
<pre><div style="text-align:center;">Centered</div></pre>
|
||||||
|
|
||||||
<p>...when this particular fix is run on the HTML. This tutorial will give
|
<p>...when this particular fix is run on the HTML. This tutorial will give
|
||||||
you down the lowdown of what exactly HTML Purifier will do when Tidy
|
you the lowdown of what exactly HTML Purifier will do when Tidy
|
||||||
is on, and how to fine tune this behavior. Once again, <strong>you do
|
is on, and how to fine-tune this behavior. Once again, <strong>you do
|
||||||
not need Tidy installed on your PHP to use these features!</strong></p>
|
not need Tidy installed on your PHP to use these features!</strong></p>
|
||||||
|
|
||||||
<h2>What does it do?</h2>
|
<h2>What does it do?</h2>
|
||||||
@@ -226,7 +221,7 @@ general syntax:</p>
|
|||||||
|
|
||||||
<p>The lowdown is, quite frankly, HTML Purifier's default settings are
|
<p>The lowdown is, quite frankly, HTML Purifier's default settings are
|
||||||
probably good enough. The next step is to bump the level up to heavy,
|
probably good enough. The next step is to bump the level up to heavy,
|
||||||
and if that still doesn't satisfy your appetite, do some fine tuning.
|
and if that still doesn't satisfy your appetite, do some fine-tuning.
|
||||||
Other than that, don't worry about it: this all works silently and
|
Other than that, don't worry about it: this all works silently and
|
||||||
effectively in the background.</p>
|
effectively in the background.</p>
|
||||||
|
|
||||||
|
201
docs/enduser-uri-filter.html
Normal file
201
docs/enduser-uri-filter.html
Normal file
@@ -0,0 +1,201 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||||
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||||
|
<meta name="description" content="Tutorial for creating custom URI filters." />
|
||||||
|
<link rel="stylesheet" type="text/css" href="style.css" />
|
||||||
|
|
||||||
|
<title>URI Filters - HTML Purifier</title>
|
||||||
|
|
||||||
|
</head><body>
|
||||||
|
|
||||||
|
<h1>URI Filters</h1>
|
||||||
|
|
||||||
|
<div id="filing">Filed under End-User</div>
|
||||||
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
|
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
This is a quick and dirty document to get you on your way to writing
|
||||||
|
custom URI filters for your own URL filtering needs. Why would you
|
||||||
|
want to write a URI filter? If you need URIs your users put into
|
||||||
|
HTML to magically change into a different URI, this is
|
||||||
|
exactly what you need!
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<h2>Creating the class</h2>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Any URI filter you make will be a subclass of <code>HTMLPurifier_URIFilter</code>.
|
||||||
|
The scaffolding is thus:
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<pre>class HTMLPurifier_URIFilter_<strong>NameOfFilter</strong> extends HTMLPurifier_URIFilter
|
||||||
|
{
|
||||||
|
var $name = '<strong>NameOfFilter</strong>';
|
||||||
|
function prepare($config) {}
|
||||||
|
function filter(&$uri, $config, &$context) {}
|
||||||
|
}</pre>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Fill in the variable <code>$name</code> with the name of your filter, and
|
||||||
|
take a look at the two methods. <code>prepare()</code> is an initialization
|
||||||
|
method that is called only once, before any filtering has been done of the
|
||||||
|
HTML. Use it to perform any costly setup work that only needs to be done
|
||||||
|
once. <code>filter()</code> is the guts and innards of our filter:
|
||||||
|
it takes the URI and does whatever needs to be done to it.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
If you've worked with HTML Purifier, you'll recognize the <code>$config</code>
|
||||||
|
and <code>$context</code> parameters. On the other hand, <code>$uri</code>
|
||||||
|
is something unique to this section of the application: it's a
|
||||||
|
<code>HTMLPurifier_URI</code> object. The interface is thus:
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<pre>class HTMLPurifier_URI
|
||||||
|
{
|
||||||
|
var $scheme, $userinfo, $host, $port, $path, $query, $fragment;
|
||||||
|
function HTMLPurifier_URI($scheme, $userinfo, $host, $port, $path, $query, $fragment);
|
||||||
|
function toString();
|
||||||
|
function copy();
|
||||||
|
function getSchemeObj($config, &$context);
|
||||||
|
function validate($config, &$context);
|
||||||
|
}</pre>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
The first three methods are fairly self-explanatory: you have a constructor,
|
||||||
|
a serializer, and a cloner. Generally, you won't be using them when
|
||||||
|
you are manipulating the URI objects themselves.
|
||||||
|
<code>getSchemeObj()</code> is a special purpose method that returns
|
||||||
|
a <code>HTMLPurifier_URIScheme</code> object corresponding to the specific
|
||||||
|
URI at hand. <code>validate()</code> performs general-purpose validation
|
||||||
|
on the internal components of a URI. Once again, you don't need to
|
||||||
|
worry about these: they've already been handled for you.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<h2>URI format</h2>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
As a URIFilter, we're interested in the member variables of the URI object.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<table class="quick"><tbody>
|
||||||
|
<tr><th>Scheme</th> <td>The protocol for identifying (and possibly locating) a resource (http, ftp, https)</td></tr>
|
||||||
|
<tr><th>Userinfo</th> <td>User information such as a username (bob)</td></tr>
|
||||||
|
<tr><th>Host</th> <td>Domain name or IP address of the server (example.com, 127.0.0.1)</td></tr>
|
||||||
|
<tr><th>Port</th> <td>Network port number for the server (80, 12345)</td></tr>
|
||||||
|
<tr><th>Path</th> <td>Data that identifies the resource, possibly hierarchical (/path/to, ed@example.com)</td></tr>
|
||||||
|
<tr><th>Query</th> <td>String of information to be interpreted by the resource (?q=search-term)</td></tr>
|
||||||
|
<tr><th>Fragment</th> <td>Additional information for the resource after retrieval (#bookmark)</td></tr>
|
||||||
|
</tbody></table>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Because the URI is presented to us in this form, and not
|
||||||
|
<code>http://bob@example.com:8080/foo.php?q=string#hash</code>, it saves us
|
||||||
|
a lot of trouble in having to parse the URI every time we want to filter
|
||||||
|
it. For the record, the above URI has the following components:
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<table class="quick"><tbody>
|
||||||
|
<tr><th>Scheme</th> <td>http</td></tr>
|
||||||
|
<tr><th>Userinfo</th> <td>bob</td></tr>
|
||||||
|
<tr><th>Host</th> <td>example.com</td></tr>
|
||||||
|
<tr><th>Port</th> <td>8080</td></tr>
|
||||||
|
<tr><th>Path</th> <td>/foo.php</td></tr>
|
||||||
|
<tr><th>Query</th> <td>q=string</td></tr>
|
||||||
|
<tr><th>Fragment</th> <td>hash</td></tr>
|
||||||
|
</tbody></table>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Note that there is no question mark or octothorpe in the query or
|
||||||
|
fragment: these get removed during parsing.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
With this information, you can get straight to implementing your
|
||||||
|
<code>filter()</code> method. But one more thing...
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<h2>Return value: Boolean, not URI</h2>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
You may have noticed that the URI is being passed in by reference.
|
||||||
|
This means that whatever changes you make to it, those changes will
|
||||||
|
be reflected in the URI object the callee had. <strong>Do not
|
||||||
|
return the URI object: it is unnecessary and will cause bugs.</strong>
|
||||||
|
Instead, return a boolean value, true if the filtering was successful,
|
||||||
|
or false if the URI is beyond repair and needs to be axed.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Let's suppose I wanted to write a filter that de-internationalized domain
|
||||||
|
names by converting them to <a href="http://en.wikipedia.org/wiki/Punycode">Punycode</a>.
|
||||||
|
Assuming that <code>punycode_encode($input)</code> converts <code>$input</code> to
|
||||||
|
Punycode and returns <code>false</code> on failure:
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<pre>class HTMLPurifier_URIFilter_ConvertIDNToPunycode extends HTMLPurifier_URIFilter
|
||||||
|
{
|
||||||
|
var $name = 'ConvertIDNToPunycode';
|
||||||
|
function filter(&$uri, $config, &$context) {
|
||||||
|
if (is_null($uri->host)) return true;
|
||||||
|
if ($uri->host == utf8_decode($uri->host)) {
|
||||||
|
// is ASCII, abort
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
$host = punycode_encode($uri->host);
|
||||||
|
if ($host === false) return false;
|
||||||
|
$uri->host = $host;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}</pre>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Notice I did not <code>return $uri;</code>.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<h2>Activating your filter</h2>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Having a filter is all well and good, but you need to tell HTML Purifier
|
||||||
|
to use it. Fortunately, this part's simple:
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<pre>$uri =& $config->getDefinition('URI');
|
||||||
|
$uri->addFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>());</pre>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
If you want to be really fancy, you can define a configuration directive
|
||||||
|
for your filter and have HTML Purifier automatically manage whether or
|
||||||
|
not your filter gets loaded or not (this is how internal filters manage
|
||||||
|
things):
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<pre>HTMLPurifier_ConfigSchema::define(
|
||||||
|
'URI', '<strong>NameOfFilter</strong>', false, 'bool',
|
||||||
|
'<strong>What your filter does.</strong>'
|
||||||
|
);
|
||||||
|
$uri =& $config->getDefinition('URI', true);
|
||||||
|
$uri->registerFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>());
|
||||||
|
</pre>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Now, your filter will only be called when %URI.<strong>NameOfFilter</strong>
|
||||||
|
is set to true.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<h2>Examples</h2>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Check the
|
||||||
|
<a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/URIFilter/">URIFilter</a>
|
||||||
|
directory for more implementation examples, and see <a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/docs/proposal-new-directives.txt">the
|
||||||
|
new directives proposal document</a> for ideas on what could be implemented
|
||||||
|
as a filter.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<div id="version">$Id$</div>
|
||||||
|
|
||||||
|
</body></html>
|
@@ -96,7 +96,7 @@ which can be a rewarding (but difficult) task.</p>
|
|||||||
<h2 id="findcharset">Finding the real encoding</h2>
|
<h2 id="findcharset">Finding the real encoding</h2>
|
||||||
|
|
||||||
<p>In the beginning, there was ASCII, and things were simple. But they
|
<p>In the beginning, there was ASCII, and things were simple. But they
|
||||||
weren't good, for no one could write in Cryllic or Thai. So there
|
weren't good, for no one could write in Cyrillic or Thai. So there
|
||||||
exploded a proliferation of character encodings to remedy the problem
|
exploded a proliferation of character encodings to remedy the problem
|
||||||
by extending the characters ASCII could express. This ridiculously
|
by extending the characters ASCII could express. This ridiculously
|
||||||
simplified version of the history of character encodings shows us that
|
simplified version of the history of character encodings shows us that
|
||||||
@@ -138,7 +138,7 @@ browser:</p>
|
|||||||
<dd>View > Encoding: bulleted item is unofficial name</dd>
|
<dd>View > Encoding: bulleted item is unofficial name</dd>
|
||||||
</dl>
|
</dl>
|
||||||
|
|
||||||
<p>Internet Explorer won't give you the mime (i.e. useful/real) name of the
|
<p>Internet Explorer won't give you the MIME (i.e. useful/real) name of the
|
||||||
character encoding, so you'll have to look it up using their description.
|
character encoding, so you'll have to look it up using their description.
|
||||||
Some common ones:</p>
|
Some common ones:</p>
|
||||||
|
|
||||||
@@ -216,6 +216,12 @@ if your <code>META</code> tag claims that either:</p>
|
|||||||
|
|
||||||
<h2 id="fixcharset">Fixing the encoding</h2>
|
<h2 id="fixcharset">Fixing the encoding</h2>
|
||||||
|
|
||||||
|
<p class="aside">The advice given here is for pages being served as
|
||||||
|
vanilla <code>text/html</code>. Different practices must be used
|
||||||
|
for <code>application/xml</code> or <code>application/xml+xhtml</code>, see
|
||||||
|
<a href="http://www.w3.org/TR/2002/NOTE-xhtml-media-types-20020430/">W3C's
|
||||||
|
document on XHTML media types</a> for more information.</p>
|
||||||
|
|
||||||
<p>If your <code>META</code> encoding and your real encoding match,
|
<p>If your <code>META</code> encoding and your real encoding match,
|
||||||
savvy! You can skip this section. If they don't...</p>
|
savvy! You can skip this section. If they don't...</p>
|
||||||
|
|
||||||
@@ -231,7 +237,7 @@ of your real encoding.</p>
|
|||||||
why the character encoding should be explicitly stated. When the
|
why the character encoding should be explicitly stated. When the
|
||||||
browser isn't told what the character encoding of a text is, it
|
browser isn't told what the character encoding of a text is, it
|
||||||
has to guess: and sometimes the guess is wrong. Hackers can manipulate
|
has to guess: and sometimes the guess is wrong. Hackers can manipulate
|
||||||
this guess in order to slip XSS pass filters and then fool the
|
this guess in order to slip XSS past filters and then fool the
|
||||||
browser into executing it as active code. A great example of this
|
browser into executing it as active code. A great example of this
|
||||||
is the <a href="http://shiflett.org/archive/177">Google UTF-7
|
is the <a href="http://shiflett.org/archive/177">Google UTF-7
|
||||||
exploit</a>.</p>
|
exploit</a>.</p>
|
||||||
@@ -302,7 +308,8 @@ languages</a>. The appropriate code is:</p>
|
|||||||
|
|
||||||
<p>...replacing UTF-8 with whatever your embedded encoding is.
|
<p>...replacing UTF-8 with whatever your embedded encoding is.
|
||||||
This code must come before any output, so be careful about
|
This code must come before any output, so be careful about
|
||||||
stray whitespace in your application.</p>
|
stray whitespace in your application (i.e., any whitespace before
|
||||||
|
output excluding whitespace within <?php ?> tags).</p>
|
||||||
|
|
||||||
<h4 id="fixcharset-server-phpini">PHP ini directive</h4>
|
<h4 id="fixcharset-server-phpini">PHP ini directive</h4>
|
||||||
|
|
||||||
@@ -313,8 +320,8 @@ header call: <code><a href="http://php.net/ini.core#ini.default-charset">default
|
|||||||
|
|
||||||
<p>...will also do the trick. If PHP is running as an Apache module (and
|
<p>...will also do the trick. If PHP is running as an Apache module (and
|
||||||
not as FastCGI, consult
|
not as FastCGI, consult
|
||||||
<a href="http://php.net/phpinfo">phpinfo</a>() for details), you can even use htaccess do apply this property
|
<a href="http://php.net/phpinfo">phpinfo</a>() for details), you can even use htaccess to apply this property
|
||||||
globally:</p>
|
across many PHP files:</p>
|
||||||
|
|
||||||
<pre><a href="http://php.net/configuration.changes#configuration.changes.apache">php_value</a> default_charset "UTF-8"</pre>
|
<pre><a href="http://php.net/configuration.changes#configuration.changes.apache">php_value</a> default_charset "UTF-8"</pre>
|
||||||
|
|
||||||
@@ -360,10 +367,11 @@ to send anything at all:</p>
|
|||||||
|
|
||||||
<pre><a href="http://httpd.apache.org/docs/1.3/mod/core.html#adddefaultcharset">AddDefaultCharset</a> Off</pre>
|
<pre><a href="http://httpd.apache.org/docs/1.3/mod/core.html#adddefaultcharset">AddDefaultCharset</a> Off</pre>
|
||||||
|
|
||||||
<p>...making your <code>META</code> tags the sole source of
|
<p>...making your internal charset declaration (usually the <code>META</code> tags)
|
||||||
character encoding information. In these cases, it is
|
the sole source of character encoding
|
||||||
<em>especially</em> important to make sure you have valid <code>META</code>
|
information. In these cases, it is <em>especially</em> important to make
|
||||||
tags on your pages and all the text before them is ASCII.</p>
|
sure you have valid <code>META</code> tags on your pages and all the
|
||||||
|
text before them is ASCII.</p>
|
||||||
|
|
||||||
<blockquote class="aside"><p>These directives can also be
|
<blockquote class="aside"><p>These directives can also be
|
||||||
placed in httpd.conf file for Apache, but
|
placed in httpd.conf file for Apache, but
|
||||||
@@ -428,28 +436,30 @@ IIS to change character encodings, I'd be grateful.</p>
|
|||||||
|
|
||||||
<p><code>META</code> tags are the most common source of embedded
|
<p><code>META</code> tags are the most common source of embedded
|
||||||
encodings, but they can also come from somewhere else: XML
|
encodings, but they can also come from somewhere else: XML
|
||||||
processing instructions. They look like:</p>
|
Declarations. They look like:</p>
|
||||||
|
|
||||||
<pre><?xml version="1.0" encoding="UTF-8"?></pre>
|
<pre><?xml version="1.0" encoding="UTF-8"?></pre>
|
||||||
|
|
||||||
<p>...and are most often found in XML documents (including XHTML).</p>
|
<p>...and are most often found in XML documents (including XHTML).</p>
|
||||||
|
|
||||||
<p>For XHTML, this processing instruction theoretically
|
<p>For XHTML, this XML Declaration theoretically
|
||||||
overrides the <code>META</code> tag. In reality, this happens only when the
|
overrides the <code>META</code> tag. In reality, this happens only when the
|
||||||
XHTML is actually served as legit XML and not HTML, which is almost always
|
XHTML is actually served as legit XML and not HTML, which is almost always
|
||||||
never due to Internet Explorer's lack of support for
|
never due to Internet Explorer's lack of support for
|
||||||
<code>application/xhtml+xml</code> (even though doing so is often
|
<code>application/xhtml+xml</code> (even though doing so is often
|
||||||
argued to be <a href="http://www.hixie.ch/advocacy/xhtml">good practice</a>).</p>
|
argued to be <a href="http://www.hixie.ch/advocacy/xhtml">good
|
||||||
|
practice</a> and is required by the XHTML 1.1 specification).</p>
|
||||||
|
|
||||||
<p>For XML, however, this processing instruction is extremely important.
|
<p>For XML, however, this XML Declaration is extremely important.
|
||||||
Since most webservers are not configured to send charsets for .xml files,
|
Since most webservers are not configured to send charsets for .xml files,
|
||||||
this is the only thing a parser has to go on. Furthermore, the default
|
this is the only thing a parser has to go on. Furthermore, the default
|
||||||
for XML files is UTF-8, which often butts heads with more common
|
for XML files is UTF-8, which often butts heads with more common
|
||||||
ISO-8859-1 encoding (you see this in garbled RSS feeds).</p>
|
ISO-8859-1 encoding (you see this in garbled RSS feeds).</p>
|
||||||
|
|
||||||
<p>In short, if you use XHTML and have gone through the
|
<p>In short, if you use XHTML and have gone through the
|
||||||
trouble of adding the XML header, make sure it jives
|
trouble of adding the XML Declaration, make sure it jives
|
||||||
with your <code>META</code> tags and HTTP headers.</p>
|
with your <code>META</code> tags (which should only be present
|
||||||
|
if served in text/html) and HTTP headers.</p>
|
||||||
|
|
||||||
<h3 id="fixcharset-internals">Inside the process</h3>
|
<h3 id="fixcharset-internals">Inside the process</h3>
|
||||||
|
|
||||||
@@ -506,7 +516,7 @@ usage in one language sometimes requires the occasional special character
|
|||||||
that, without surprise, is not available in your character set. Sometimes
|
that, without surprise, is not available in your character set. Sometimes
|
||||||
developers get around this by adding support for multiple encodings: when
|
developers get around this by adding support for multiple encodings: when
|
||||||
using Chinese, use Big5, when using Japanese, use Shift-JIS, when
|
using Chinese, use Big5, when using Japanese, use Shift-JIS, when
|
||||||
using Greek, etc. Other times, they use character entities with great
|
using Greek, etc. Other times, they use character references with great
|
||||||
zeal.</p>
|
zeal.</p>
|
||||||
|
|
||||||
<p>UTF-8, however, obviates the need for any of these complicated
|
<p>UTF-8, however, obviates the need for any of these complicated
|
||||||
@@ -520,14 +530,14 @@ you don't have to use those user-unfriendly entities.</p>
|
|||||||
|
|
||||||
<p>Websites encoded in Latin-1 (ISO-8859-1) which ocassionally need
|
<p>Websites encoded in Latin-1 (ISO-8859-1) which ocassionally need
|
||||||
a special character outside of their scope often will use a character
|
a special character outside of their scope often will use a character
|
||||||
entity to achieve the desired effect. For instance, θ can be
|
entity reference to achieve the desired effect. For instance, θ can be
|
||||||
written <code>&theta;</code>, regardless of the character encoding's
|
written <code>&theta;</code>, regardless of the character encoding's
|
||||||
support of Greek letters.</p>
|
support of Greek letters.</p>
|
||||||
|
|
||||||
<p>This works nicely for limited use of special characters, but
|
<p>This works nicely for limited use of special characters, but
|
||||||
say you wanted this sentence of Chinese text: 激光,
|
say you wanted this sentence of Chinese text: 激光,
|
||||||
這兩個字是甚麼意思.
|
這兩個字是甚麼意思.
|
||||||
The entity-ized version would look like this:</p>
|
The ampersand encoded version would look like this:</p>
|
||||||
|
|
||||||
<pre>&#28608;&#20809;, &#36889;&#20841;&#20491;&#23383;&#26159;&#29978;&#40636;&#24847;&#24605;</pre>
|
<pre>&#28608;&#20809;, &#36889;&#20841;&#20491;&#23383;&#26159;&#29978;&#40636;&#24847;&#24605;</pre>
|
||||||
|
|
||||||
@@ -545,7 +555,7 @@ an application that originally used ISO-8859-1 but switched to UTF-8
|
|||||||
when it became far to cumbersome to support foreign languages. Bots
|
when it became far to cumbersome to support foreign languages. Bots
|
||||||
will now actually go through articles and convert character entities
|
will now actually go through articles and convert character entities
|
||||||
to their corresponding real characters for the sake of user-friendliness
|
to their corresponding real characters for the sake of user-friendliness
|
||||||
and searcheability. See
|
and searchability. See
|
||||||
<a href="http://meta.wikimedia.org/wiki/Help:Special_characters">Meta's
|
<a href="http://meta.wikimedia.org/wiki/Help:Special_characters">Meta's
|
||||||
page on special characters</a> for more details.
|
page on special characters</a> for more details.
|
||||||
</p></blockquote>
|
</p></blockquote>
|
||||||
@@ -567,10 +577,11 @@ which may be used by POST, and is required when you want to upload
|
|||||||
files.</p>
|
files.</p>
|
||||||
|
|
||||||
<p>The following is a summarization of notes from
|
<p>The following is a summarization of notes from
|
||||||
<a href="http://ppewww.physics.gla.ac.uk/~flavell/charset/form-i18n.html">
|
<a href="http://web.archive.org/web/20060427015200/ppewww.ph.gla.ac.uk/~flavell/charset/form-i18n.html">
|
||||||
<code>FORM</code> submission and i18n</a>. That document contains lots
|
<code>FORM</code> submission and i18n</a>. That document contains lots
|
||||||
of useful information, but is written in a rambly manner, so
|
of useful information, but is written in a rambly manner, so
|
||||||
here I try to get right to the point.</p>
|
here I try to get right to the point. (Note: the original has
|
||||||
|
disappeared off the web, so I am linking to the Web Archive copy.)</p>
|
||||||
|
|
||||||
<h4 id="whyutf8-forms-urlencoded"><code>application/x-www-form-urlencoded</code></h4>
|
<h4 id="whyutf8-forms-urlencoded"><code>application/x-www-form-urlencoded</code></h4>
|
||||||
|
|
||||||
@@ -592,7 +603,7 @@ browser you're using, they might:</p>
|
|||||||
<ul>
|
<ul>
|
||||||
<li>Replace the unsupported characters with useless question marks,</li>
|
<li>Replace the unsupported characters with useless question marks,</li>
|
||||||
<li>Attempt to fix the characters (example: smart quotes to regular quotes),</li>
|
<li>Attempt to fix the characters (example: smart quotes to regular quotes),</li>
|
||||||
<li>Replace the character with a character entity, or</li>
|
<li>Replace the character with a character entity reference, or</li>
|
||||||
<li>Send it anyway as a different character encoding mixed in
|
<li>Send it anyway as a different character encoding mixed in
|
||||||
with the original encoding (usually Windows-1252 rather than
|
with the original encoding (usually Windows-1252 rather than
|
||||||
iso-8859-1 or UTF-8 interspersed in 8-bit)</li>
|
iso-8859-1 or UTF-8 interspersed in 8-bit)</li>
|
||||||
@@ -621,9 +632,9 @@ Each method has deficiencies, especially the former.</p>
|
|||||||
<p>If you tell the browser to send the form in the same encoding as
|
<p>If you tell the browser to send the form in the same encoding as
|
||||||
the page, you still have the trouble of what to do with characters
|
the page, you still have the trouble of what to do with characters
|
||||||
that are outside of the character encoding's range. The behavior, once
|
that are outside of the character encoding's range. The behavior, once
|
||||||
again, varies: Firefox 2.0 entity-izes them while Internet Explorer
|
again, varies: Firefox 2.0 converts them to character entity references
|
||||||
7.0 mangles them beyond intelligibility. For serious internationalization purposes,
|
while Internet Explorer 7.0 mangles them beyond intelligibility. For
|
||||||
this is not an option.</p>
|
serious internationalization purposes, this is not an option.</p>
|
||||||
|
|
||||||
<p>The other possibility is to set Accept-Encoding to UTF-8, which
|
<p>The other possibility is to set Accept-Encoding to UTF-8, which
|
||||||
begs the question: Why aren't you using UTF-8 for everything then?
|
begs the question: Why aren't you using UTF-8 for everything then?
|
||||||
@@ -663,12 +674,12 @@ it up to the module iconv to do the dirty work.</p>
|
|||||||
<p>This approach, however, is not perfect. iconv is blithely unaware
|
<p>This approach, however, is not perfect. iconv is blithely unaware
|
||||||
of HTML character entities. HTML Purifier, in order to
|
of HTML character entities. HTML Purifier, in order to
|
||||||
protect against sophisticated escaping schemes, normalizes all character
|
protect against sophisticated escaping schemes, normalizes all character
|
||||||
and numeric entities before processing the text. This leads to
|
and numeric entitie references before processing the text. This leads to
|
||||||
one important ramification:</p>
|
one important ramification:</p>
|
||||||
|
|
||||||
<p><strong>Any character that is not supported by the target character
|
<p><strong>Any character that is not supported by the target character
|
||||||
set, regardless of whether or not it is in the form of a character
|
set, regardless of whether or not it is in the form of a character
|
||||||
entity or a raw character, will be silently ignored.</strong></p>
|
entity reference or a raw character, will be silently ignored.</strong></p>
|
||||||
|
|
||||||
<p>Example of this principle at work: say you have <code>&theta;</code>
|
<p>Example of this principle at work: say you have <code>&theta;</code>
|
||||||
in your HTML, but the output is in Latin-1 (which, understandably,
|
in your HTML, but the output is in Latin-1 (which, understandably,
|
||||||
@@ -677,7 +688,7 @@ set the encoding correctly using %Core.Encoding):</p>
|
|||||||
|
|
||||||
<ul>
|
<ul>
|
||||||
<li>The <code>Encoder</code> will transform the text from ISO 8859-1 to UTF-8
|
<li>The <code>Encoder</code> will transform the text from ISO 8859-1 to UTF-8
|
||||||
(note that theta is preserved since it doesn't actually use
|
(note that theta is preserved here since it doesn't actually use
|
||||||
any non-ASCII characters): <code>&theta;</code></li>
|
any non-ASCII characters): <code>&theta;</code></li>
|
||||||
<li>The <code>EntityParser</code> will transform all named and numeric
|
<li>The <code>EntityParser</code> will transform all named and numeric
|
||||||
character entities to their corresponding raw UTF-8 equivalents:
|
character entities to their corresponding raw UTF-8 equivalents:
|
||||||
@@ -700,7 +711,7 @@ Purifier has provided a slightly more palatable workaround using
|
|||||||
<li>The <code>EntityParser</code> transforms entities: <code>θ</code></li>
|
<li>The <code>EntityParser</code> transforms entities: <code>θ</code></li>
|
||||||
<li>HTML Purifier processes the code: <code>θ</code></li>
|
<li>HTML Purifier processes the code: <code>θ</code></li>
|
||||||
<li>The <code>Encoder</code> replaces all non-ASCII characters
|
<li>The <code>Encoder</code> replaces all non-ASCII characters
|
||||||
with numeric entities: <code>&#952;</code></li>
|
with numeric entity reference: <code>&#952;</code></li>
|
||||||
<li>For good measure, <code>Encoder</code> transforms encoding back to
|
<li>For good measure, <code>Encoder</code> transforms encoding back to
|
||||||
original (which is strictly unnecessary for 99% of encodings
|
original (which is strictly unnecessary for 99% of encodings
|
||||||
out there): <code>&#952;</code> (remember, it's all ASCII!)</li>
|
out there): <code>&#952;</code> (remember, it's all ASCII!)</li>
|
||||||
@@ -710,19 +721,19 @@ Purifier has provided a slightly more palatable workaround using
|
|||||||
the land of Unicode characters, and is totally unacceptable for Chinese
|
the land of Unicode characters, and is totally unacceptable for Chinese
|
||||||
or Japanese texts. The even bigger kicker is that, supposing the
|
or Japanese texts. The even bigger kicker is that, supposing the
|
||||||
input encoding was actually ISO-8859-7, which <em>does</em> support
|
input encoding was actually ISO-8859-7, which <em>does</em> support
|
||||||
theta, the character would get entity-ized anyway! (The Encoder does
|
theta, the character would get converted into a character entity reference
|
||||||
not discriminate).</p>
|
anyway! (The Encoder does not discriminate).</p>
|
||||||
|
|
||||||
<p>The current functionality is about where HTML Purifier will be for
|
<p>The current functionality is about where HTML Purifier will be for
|
||||||
the rest of eternity. HTML Purifier could attempt to preserve the original
|
the rest of eternity. HTML Purifier could attempt to preserve the original
|
||||||
form of the entities so that they could be substituted back in, only the
|
form of the character references so that they could be substituted back in, only the
|
||||||
DOM extension kills them off irreversibly. HTML Purifier could also attempt
|
DOM extension kills them off irreversibly. HTML Purifier could also attempt
|
||||||
to be smart and only convert non-ASCII characters that weren't supported
|
to be smart and only convert non-ASCII characters that weren't supported
|
||||||
by the target encoding, but that would require reimplementing iconv
|
by the target encoding, but that would require reimplementing iconv
|
||||||
with HTML awareness, something I will not do.</p>
|
with HTML awareness, something I will not do.</p>
|
||||||
|
|
||||||
<p>So there: either it's UTF-8 or crippled international support. Your pick! (and I'm
|
<p>So there: either it's UTF-8 or crippled international support. Your pick! (and I'm
|
||||||
not being sarcastic here: some people could care less about other languages)</p>
|
not being sarcastic here: some people could care less about other languages).</p>
|
||||||
|
|
||||||
<h2 id="migrate">Migrate to UTF-8</h2>
|
<h2 id="migrate">Migrate to UTF-8</h2>
|
||||||
|
|
||||||
@@ -984,7 +995,7 @@ and yes, it is variable width. Other traits:</p>
|
|||||||
in different ways. It is beyond the scope of this document to explain
|
in different ways. It is beyond the scope of this document to explain
|
||||||
what precisely these implications are. PHPWact provides
|
what precisely these implications are. PHPWact provides
|
||||||
a very good <a href="http://www.phpwact.org/php/i18n/utf-8">reference document</a>
|
a very good <a href="http://www.phpwact.org/php/i18n/utf-8">reference document</a>
|
||||||
on what to expect from each functions, although coverage is spotty in
|
on what to expect from each function, although coverage is spotty in
|
||||||
some areas. Their more general notes on
|
some areas. Their more general notes on
|
||||||
<a href="http://www.phpwact.org/php/i18n/charsets">character sets</a>
|
<a href="http://www.phpwact.org/php/i18n/charsets">character sets</a>
|
||||||
are also worth looking at for information on UTF-8. Some rules of thumb
|
are also worth looking at for information on UTF-8. Some rules of thumb
|
||||||
@@ -998,7 +1009,7 @@ when dealing with Unicode text:</p>
|
|||||||
<li>Think twice before using functions that:<ul>
|
<li>Think twice before using functions that:<ul>
|
||||||
<li>...count characters (strlen will return bytes, not characters;
|
<li>...count characters (strlen will return bytes, not characters;
|
||||||
str_split and word_wrap may corrupt)</li>
|
str_split and word_wrap may corrupt)</li>
|
||||||
<li>...entity-ize things (UTF-8 doesn't need entities)</li>
|
<li>...convert characters to entity references (UTF-8 doesn't need entities)</li>
|
||||||
<li>...do very complex string processing (*printf)</li>
|
<li>...do very complex string processing (*printf)</li>
|
||||||
</ul></li>
|
</ul></li>
|
||||||
</ul>
|
</ul>
|
||||||
|
@@ -20,4 +20,3 @@ $pure_html = $purifier->purify($html);
|
|||||||
|
|
||||||
echo '<pre>' . htmlspecialchars($pure_html) . '</pre>';
|
echo '<pre>' . htmlspecialchars($pure_html) . '</pre>';
|
||||||
|
|
||||||
?>
|
|
@@ -40,6 +40,9 @@ information for casual developers using HTML Purifier.</p>
|
|||||||
<dt><a href="enduser-customize.html">Customize</a></dt>
|
<dt><a href="enduser-customize.html">Customize</a></dt>
|
||||||
<dd>Tutorial for customizing HTML Purifier's tag and attribute sets.</dd>
|
<dd>Tutorial for customizing HTML Purifier's tag and attribute sets.</dd>
|
||||||
|
|
||||||
|
<dt><a href="enduser-uri-filter.html">URI Filters</a></dt>
|
||||||
|
<dd>Tutorial for creating custom URI filters.</dd>
|
||||||
|
|
||||||
</dl>
|
</dl>
|
||||||
|
|
||||||
<h2>Development</h2>
|
<h2>Development</h2>
|
||||||
|
@@ -32,7 +32,7 @@ Here are some fuzzy levels you could set:
|
|||||||
|
|
||||||
One final note: when you start axing tags that are more commonly used, you
|
One final note: when you start axing tags that are more commonly used, you
|
||||||
run the risk of accidentally destroying user data, especially if the data
|
run the risk of accidentally destroying user data, especially if the data
|
||||||
is incoming from a WYSIWYG eidtor that hasn't been synced accordingly. This may
|
is incoming from a WYSIWYG editor that hasn't been synced accordingly. This may
|
||||||
make forbidden element to text transformations desirable (for example, images).
|
make forbidden element to text transformations desirable (for example, images).
|
||||||
|
|
||||||
|
|
||||||
|
@@ -2,7 +2,8 @@
|
|||||||
Configuration Ideas
|
Configuration Ideas
|
||||||
|
|
||||||
Here are some theoretical configuration ideas that we could implement some
|
Here are some theoretical configuration ideas that we could implement some
|
||||||
time. Note the naming convention: %Namespace.Directive
|
time. Note the naming convention: %Namespace.Directive. If you want one
|
||||||
|
implemented, give us a ring, and we'll move it up the priority chain.
|
||||||
|
|
||||||
%Attr.RewriteFragments - if there's %Attr.IDPrefix we may want to transparently
|
%Attr.RewriteFragments - if there's %Attr.IDPrefix we may want to transparently
|
||||||
rewrite the URLs we parse too. However, we can only do it when it's a pure
|
rewrite the URLs we parse too. However, we can only do it when it's a pure
|
||||||
@@ -22,8 +23,6 @@ time. Note the naming convention: %Namespace.Directive
|
|||||||
%URI.AddRelNofollow - will add rel="nofollow" to all links, preventing the
|
%URI.AddRelNofollow - will add rel="nofollow" to all links, preventing the
|
||||||
spread of ill-gotten pagerank
|
spread of ill-gotten pagerank
|
||||||
|
|
||||||
%URI.RelativeToAbsolute - transforms all relative URIs to absolute form
|
|
||||||
|
|
||||||
%URI.HostBlacklistRegex - regexes that if matching the host are disallowed
|
%URI.HostBlacklistRegex - regexes that if matching the host are disallowed
|
||||||
%URI.HostWhitelist - domain names that are excluded from the host blacklist
|
%URI.HostWhitelist - domain names that are excluded from the host blacklist
|
||||||
%URI.HostPolicy - determines whether or not its reject all and then whitelist
|
%URI.HostPolicy - determines whether or not its reject all and then whitelist
|
||||||
|
28
docs/ref-css-length.txt
Normal file
28
docs/ref-css-length.txt
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
|
||||||
|
CSS Length Reference
|
||||||
|
To bound, or not to bound, that is the question
|
||||||
|
|
||||||
|
It's quite a reasonable request, really, and it's already been implemented
|
||||||
|
for HTML. That is, length bounding. It makes little sense to let users
|
||||||
|
define text blocks that have a font-size of 63,360 inches (that's a mile,
|
||||||
|
by the way) or a width of forty-fold the parent container.
|
||||||
|
|
||||||
|
But it's a little more complicated then that. There are multiple units
|
||||||
|
one can use, and we have to a little unit conversion to get things working.
|
||||||
|
Here's what we have:
|
||||||
|
|
||||||
|
Absolute:
|
||||||
|
1 in ~= 2.54 cm
|
||||||
|
1 cm = 10 mm
|
||||||
|
1 pt = 1/72 in
|
||||||
|
1 pc = 12 pt
|
||||||
|
|
||||||
|
Relative:
|
||||||
|
1 em ~= 10.0667 px
|
||||||
|
1 ex ~= 0.5 em, though Mozilla Firefox says 1 ex = 6px
|
||||||
|
1 px ~= 1 pt
|
||||||
|
|
||||||
|
Watch out: font-sizes can also be nested to get successively larger
|
||||||
|
(although I do not relish having to keep track of context font-sizes,
|
||||||
|
this may be necessary, especially for some of the more advanced features
|
||||||
|
for preventing things like white on white).
|
@@ -33,6 +33,9 @@ blockquote .label {font-weight:bold; font-size:1em; margin:0 0 .1em;
|
|||||||
.table thead th:first-child {-moz-border-radius-topleft:1em;}
|
.table thead th:first-child {-moz-border-radius-topleft:1em;}
|
||||||
.table tbody td {border-bottom:1px solid #CCC; padding-right:0.6em;padding-left:0.6em;}
|
.table tbody td {border-bottom:1px solid #CCC; padding-right:0.6em;padding-left:0.6em;}
|
||||||
|
|
||||||
|
/* A quick table*/
|
||||||
|
table.quick tbody th {text-align:right; padding-right:1em;}
|
||||||
|
|
||||||
/* Category of the file */
|
/* Category of the file */
|
||||||
#filing {font-weight:bold; font-size:smaller; }
|
#filing {font-weight:bold; font-size:smaller; }
|
||||||
|
|
||||||
|
@@ -7,4 +7,3 @@
|
|||||||
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
|
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
|
||||||
require_once 'HTMLPurifier.php';
|
require_once 'HTMLPurifier.php';
|
||||||
|
|
||||||
?>
|
|
@@ -18,4 +18,3 @@ function HTMLPurifier($html, $config = null) {
|
|||||||
return $purifier->purify($html, $config);
|
return $purifier->purify($html, $config);
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -22,7 +22,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
HTML Purifier 2.0.0 - Standards Compliant HTML Filtering
|
HTML Purifier 2.1.2 - Standards Compliant HTML Filtering
|
||||||
Copyright (C) 2006 Edward Z. Yang
|
Copyright (C) 2006 Edward Z. Yang
|
||||||
|
|
||||||
This library is free software; you can redistribute it and/or
|
This library is free software; you can redistribute it and/or
|
||||||
@@ -40,6 +40,9 @@
|
|||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// constants are slow, but we'll make one exception
|
||||||
|
define('HTMLPURIFIER_PREFIX', dirname(__FILE__));
|
||||||
|
|
||||||
// almost every class has an undocumented dependency to these, so make sure
|
// almost every class has an undocumented dependency to these, so make sure
|
||||||
// they get included
|
// they get included
|
||||||
require_once 'HTMLPurifier/ConfigSchema.php'; // important
|
require_once 'HTMLPurifier/ConfigSchema.php'; // important
|
||||||
@@ -51,16 +54,9 @@ require_once 'HTMLPurifier/Generator.php';
|
|||||||
require_once 'HTMLPurifier/Strategy/Core.php';
|
require_once 'HTMLPurifier/Strategy/Core.php';
|
||||||
require_once 'HTMLPurifier/Encoder.php';
|
require_once 'HTMLPurifier/Encoder.php';
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/ErrorCollector.php';
|
||||||
require_once 'HTMLPurifier/LanguageFactory.php';
|
require_once 'HTMLPurifier/LanguageFactory.php';
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
|
||||||
'Core', 'Language', 'en', 'string', '
|
|
||||||
ISO 639 language code for localizable things in HTML Purifier to use,
|
|
||||||
which is mainly error reporting. There is currently only an English (en)
|
|
||||||
translation, so this directive is currently useless.
|
|
||||||
This directive has been available since 2.0.0.
|
|
||||||
');
|
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'Core', 'CollectErrors', false, 'bool', '
|
'Core', 'CollectErrors', false, 'bool', '
|
||||||
Whether or not to collect errors found while filtering the document. This
|
Whether or not to collect errors found while filtering the document. This
|
||||||
@@ -81,7 +77,7 @@ This directive has been available since 2.0.0.
|
|||||||
class HTMLPurifier
|
class HTMLPurifier
|
||||||
{
|
{
|
||||||
|
|
||||||
var $version = '2.0.0';
|
var $version = '2.1.2';
|
||||||
|
|
||||||
var $config;
|
var $config;
|
||||||
var $filters;
|
var $filters;
|
||||||
@@ -139,15 +135,19 @@ class HTMLPurifier
|
|||||||
|
|
||||||
$context = new HTMLPurifier_Context();
|
$context = new HTMLPurifier_Context();
|
||||||
|
|
||||||
|
// our friendly neighborhood generator, all primed with configuration too!
|
||||||
|
$this->generator->generateFromTokens(array(), $config, $context);
|
||||||
|
$context->register('Generator', $this->generator);
|
||||||
|
|
||||||
// set up global context variables
|
// set up global context variables
|
||||||
if ($config->get('Core', 'CollectErrors')) {
|
if ($config->get('Core', 'CollectErrors')) {
|
||||||
// may get moved out if other facilities use it
|
// may get moved out if other facilities use it
|
||||||
$language_factory = HTMLPurifier_LanguageFactory::instance();
|
$language_factory = HTMLPurifier_LanguageFactory::instance();
|
||||||
$language = $language_factory->create($config->get('Core', 'Language'));
|
$language = $language_factory->create($config, $context);
|
||||||
$context->register('Locale', $language);
|
$context->register('Locale', $language);
|
||||||
|
|
||||||
$error_collector = new HTMLPurifier_ErrorCollector();
|
$error_collector = new HTMLPurifier_ErrorCollector($context);
|
||||||
$context->register('ErrorCollector', $language);
|
$context->register('ErrorCollector', $error_collector);
|
||||||
}
|
}
|
||||||
|
|
||||||
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
|
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
|
||||||
@@ -205,7 +205,7 @@ class HTMLPurifier
|
|||||||
if (is_a($prototype, 'HTMLPurifier')) {
|
if (is_a($prototype, 'HTMLPurifier')) {
|
||||||
$htmlpurifier = $prototype;
|
$htmlpurifier = $prototype;
|
||||||
} elseif ($prototype) {
|
} elseif ($prototype) {
|
||||||
$htmlpurifier = new HTMLPurifier(HTMLPurifier_Config::create($prototype));
|
$htmlpurifier = new HTMLPurifier($prototype);
|
||||||
} else {
|
} else {
|
||||||
$htmlpurifier = new HTMLPurifier();
|
$htmlpurifier = new HTMLPurifier();
|
||||||
}
|
}
|
||||||
@@ -216,4 +216,3 @@ class HTMLPurifier
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -127,4 +127,3 @@ class HTMLPurifier_AttrCollections
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -84,4 +84,3 @@ class HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -66,4 +66,3 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -84,4 +84,3 @@ class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -127,4 +127,3 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -42,4 +42,3 @@ class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -98,4 +98,3 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -35,4 +35,3 @@ class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -148,4 +148,3 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -38,19 +38,24 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
|||||||
$quote = $font[0];
|
$quote = $font[0];
|
||||||
if ($font[$length - 1] !== $quote) continue;
|
if ($font[$length - 1] !== $quote) continue;
|
||||||
$font = substr($font, 1, $length - 2);
|
$font = substr($font, 1, $length - 2);
|
||||||
|
// double-backslash processing is buggy
|
||||||
|
$font = str_replace("\\$quote", $quote, $font); // de-escape quote
|
||||||
|
$font = str_replace("\\\n", "\n", $font); // de-escape newlines
|
||||||
}
|
}
|
||||||
// process font
|
// $font is a pure representation of the font name
|
||||||
|
|
||||||
if (ctype_alnum($font)) {
|
if (ctype_alnum($font)) {
|
||||||
// very simple font, allow it in unharmed
|
// very simple font, allow it in unharmed
|
||||||
$final .= $font . ', ';
|
$final .= $font . ', ';
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
$nospace = str_replace(array(' ', '.', '!'), '', $font);
|
|
||||||
if (ctype_alnum($nospace)) {
|
// complicated font, requires quoting
|
||||||
// font with spaces in it
|
|
||||||
|
// armor single quotes and new lines
|
||||||
|
$font = str_replace("'", "\\'", $font);
|
||||||
|
$font = str_replace("\n", "\\\n", $font);
|
||||||
$final .= "'$font', ";
|
$final .= "'$font', ";
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
$final = rtrim($final, ', ');
|
$final = rtrim($final, ', ');
|
||||||
if ($final === '') return false;
|
if ($final === '') return false;
|
||||||
@@ -59,4 +64,3 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -53,4 +53,3 @@ class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -77,4 +77,3 @@ class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -55,4 +55,3 @@ class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -58,4 +58,3 @@ class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -40,4 +40,3 @@ class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -34,4 +34,3 @@ class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -15,7 +15,7 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
|
|||||||
{
|
{
|
||||||
|
|
||||||
function HTMLPurifier_AttrDef_CSS_URI() {
|
function HTMLPurifier_AttrDef_CSS_URI() {
|
||||||
$this->HTMLPurifier_AttrDef_URI(true); // always embedded
|
parent::HTMLPurifier_AttrDef_URI(true); // always embedded
|
||||||
}
|
}
|
||||||
|
|
||||||
function validate($uri_string, $config, &$context) {
|
function validate($uri_string, $config, &$context) {
|
||||||
@@ -55,4 +55,3 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -63,4 +63,3 @@ class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -27,4 +27,3 @@ class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -32,4 +32,3 @@ class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -31,4 +31,3 @@ class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -118,4 +118,3 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -41,4 +41,3 @@ class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -70,4 +70,3 @@ class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -41,4 +41,3 @@ class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Le
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -48,4 +48,3 @@ class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -34,4 +34,3 @@ class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -72,4 +72,3 @@ class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -72,4 +72,3 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -14,4 +14,3 @@ class HTMLPurifier_AttrDef_Text extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -1,90 +1,66 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDef.php';
|
require_once 'HTMLPurifier/AttrDef.php';
|
||||||
|
require_once 'HTMLPurifier/URIParser.php';
|
||||||
require_once 'HTMLPurifier/URIScheme.php';
|
require_once 'HTMLPurifier/URIScheme.php';
|
||||||
require_once 'HTMLPurifier/URISchemeRegistry.php';
|
require_once 'HTMLPurifier/URISchemeRegistry.php';
|
||||||
require_once 'HTMLPurifier/AttrDef/URI/Host.php';
|
require_once 'HTMLPurifier/AttrDef/URI/Host.php';
|
||||||
require_once 'HTMLPurifier/PercentEncoder.php';
|
require_once 'HTMLPurifier/PercentEncoder.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/URI/Email.php';
|
||||||
|
|
||||||
|
// special case filtering directives
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'URI', 'DefaultScheme', 'http', 'string',
|
'URI', 'Munge', null, 'string/null', '
|
||||||
'Defines through what scheme the output will be served, in order to '.
|
<p>
|
||||||
'select the proper object validator when no scheme information is present.'
|
Munges all browsable (usually http, https and ftp)
|
||||||
);
|
absolute URI\'s into another URI, usually a URI redirection service.
|
||||||
|
This directive accepts a URI, formatted with a <code>%s</code> where
|
||||||
|
the url-encoded original URI should be inserted (sample:
|
||||||
|
<code>http://www.google.com/url?q=%s</code>).
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
Uses for this directive:
|
||||||
|
</p>
|
||||||
|
<ul>
|
||||||
|
<li>
|
||||||
|
Prevent PageRank leaks, while being fairly transparent
|
||||||
|
to users (you may also want to add some client side JavaScript to
|
||||||
|
override the text in the statusbar). <strong>Notice</strong>:
|
||||||
|
Many security experts believe that this form of protection does not deter spam-bots.
|
||||||
|
</li>
|
||||||
|
<li>
|
||||||
|
Redirect users to a splash page telling them they are leaving your
|
||||||
|
website. While this is poor usability practice, it is often mandated
|
||||||
|
in corporate environments.
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
<p>
|
||||||
|
This directive has been available since 1.3.0.
|
||||||
|
</p>
|
||||||
|
');
|
||||||
|
|
||||||
|
// disabling directives
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
HTMLPurifier_ConfigSchema::define(
|
||||||
'URI', 'Host', null, 'string/null',
|
'URI', 'Disable', false, 'bool', '
|
||||||
'Defines the domain name of the server, so we can determine whether or '.
|
<p>
|
||||||
'an absolute URI is from your website or not. Not strictly necessary, '.
|
Disables all URIs in all forms. Not sure why you\'d want to do that
|
||||||
'as users should be using relative URIs to reference resources on your '.
|
(after all, the Internet\'s founded on the notion of a hyperlink).
|
||||||
'website. It will, however, let you use absolute URIs to link to '.
|
This directive has been available since 1.3.0.
|
||||||
'subdomains of the domain you post here: i.e. example.com will allow '.
|
</p>
|
||||||
'sub.example.com. However, higher up domains will still be excluded: '.
|
');
|
||||||
'if you set %URI.Host to sub.example.com, example.com will be blocked. '.
|
|
||||||
'This directive has been available since 1.2.0.'
|
|
||||||
);
|
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
|
||||||
'URI', 'DisableExternal', false, 'bool',
|
|
||||||
'Disables links to external websites. This is a highly effective '.
|
|
||||||
'anti-spam and anti-pagerank-leech measure, but comes at a hefty price: no'.
|
|
||||||
'links or images outside of your domain will be allowed. Non-linkified '.
|
|
||||||
'URIs will still be preserved. If you want to be able to link to '.
|
|
||||||
'subdomains or use absolute URIs, specify %URI.Host for your website. '.
|
|
||||||
'This directive has been available since 1.2.0.'
|
|
||||||
);
|
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
|
||||||
'URI', 'DisableExternalResources', false, 'bool',
|
|
||||||
'Disables the embedding of external resources, preventing users from '.
|
|
||||||
'embedding things like images from other hosts. This prevents '.
|
|
||||||
'access tracking (good for email viewers), bandwidth leeching, '.
|
|
||||||
'cross-site request forging, goatse.cx posting, and '.
|
|
||||||
'other nasties, but also results in '.
|
|
||||||
'a loss of end-user functionality (they can\'t directly post a pic '.
|
|
||||||
'they posted from Flickr anymore). Use it if you don\'t have a '.
|
|
||||||
'robust user-content moderation team. This directive has been '.
|
|
||||||
'available since 1.3.0.'
|
|
||||||
);
|
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
|
||||||
'URI', 'DisableResources', false, 'bool',
|
|
||||||
'Disables embedding resources, essentially meaning no pictures. You can '.
|
|
||||||
'still link to them though. See %URI.DisableExternalResources for why '.
|
|
||||||
'this might be a good idea. This directive has been available since 1.3.0.'
|
|
||||||
);
|
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
|
||||||
'URI', 'Munge', null, 'string/null',
|
|
||||||
'Munges all browsable (usually http, https and ftp) URI\'s into some URL '.
|
|
||||||
'redirection service. Pass this directive a URI, with %s inserted where '.
|
|
||||||
'the url-encoded original URI should be inserted (sample: '.
|
|
||||||
'<code>http://www.google.com/url?q=%s</code>). '.
|
|
||||||
'This prevents PageRank leaks, while being as transparent as possible '.
|
|
||||||
'to users (you may also want to add some client side JavaScript to '.
|
|
||||||
'override the text in the statusbar). Warning: many security experts '.
|
|
||||||
'believe that this form of protection does not deter spam-bots. '.
|
|
||||||
'You can also use this directive to redirect users to a splash page '.
|
|
||||||
'telling them they are leaving your website. '.
|
|
||||||
'This directive has been available since 1.3.0.'
|
|
||||||
);
|
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
|
||||||
'URI', 'HostBlacklist', array(), 'list',
|
|
||||||
'List of strings that are forbidden in the host of any URI. Use it to '.
|
|
||||||
'kill domain names of spam, etc. Note that it will catch anything in '.
|
|
||||||
'the domain, so <tt>moo.com</tt> will catch <tt>moo.com.example.com</tt>. '.
|
|
||||||
'This directive has been available since 1.3.0.'
|
|
||||||
);
|
|
||||||
|
|
||||||
HTMLPurifier_ConfigSchema::define(
|
|
||||||
'URI', 'Disable', false, 'bool',
|
|
||||||
'Disables all URIs in all forms. Not sure why you\'d want to do that '.
|
|
||||||
'(after all, the Internet\'s founded on the notion of a hyperlink). '.
|
|
||||||
'This directive has been available since 1.3.0.'
|
|
||||||
);
|
|
||||||
HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable');
|
HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable');
|
||||||
|
|
||||||
|
HTMLPurifier_ConfigSchema::define(
|
||||||
|
'URI', 'DisableResources', false, 'bool', '
|
||||||
|
<p>
|
||||||
|
Disables embedding resources, essentially meaning no pictures. You can
|
||||||
|
still link to them though. See %URI.DisableExternalResources for why
|
||||||
|
this might be a good idea. This directive has been available since 1.3.0.
|
||||||
|
</p>
|
||||||
|
');
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates a URI as defined by RFC 3986.
|
* Validates a URI as defined by RFC 3986.
|
||||||
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
|
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
|
||||||
@@ -92,214 +68,83 @@ HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable');
|
|||||||
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
var $host;
|
var $parser, $percentEncoder;
|
||||||
var $embeds_resource;
|
var $embedsResource;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
|
* @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
|
||||||
*/
|
*/
|
||||||
function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
|
function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
|
||||||
$this->host = new HTMLPurifier_AttrDef_URI_Host();
|
$this->parser = new HTMLPurifier_URIParser();
|
||||||
$this->embeds_resource = (bool) $embeds_resource;
|
$this->percentEncoder = new HTMLPurifier_PercentEncoder();
|
||||||
|
$this->embedsResource = (bool) $embeds_resource;
|
||||||
}
|
}
|
||||||
|
|
||||||
function validate($uri, $config, &$context) {
|
function validate($uri, $config, &$context) {
|
||||||
|
|
||||||
static $PercentEncoder = null;
|
|
||||||
if ($PercentEncoder === null) $PercentEncoder = new HTMLPurifier_PercentEncoder();
|
|
||||||
|
|
||||||
// We'll write stack-based parsers later, for now, use regexps to
|
|
||||||
// get things working as fast as possible (irony)
|
|
||||||
|
|
||||||
if ($config->get('URI', 'Disable')) return false;
|
if ($config->get('URI', 'Disable')) return false;
|
||||||
|
|
||||||
// parse as CDATA
|
// initial operations
|
||||||
$uri = $this->parseCDATA($uri);
|
$uri = $this->parseCDATA($uri);
|
||||||
|
$uri = $this->percentEncoder->normalize($uri);
|
||||||
|
|
||||||
// fix up percent-encoding
|
// parse the URI
|
||||||
$uri = $PercentEncoder->normalize($uri);
|
$uri = $this->parser->parse($uri);
|
||||||
|
if ($uri === false) return false;
|
||||||
|
|
||||||
// while it would be nice to use parse_url(), that's specifically
|
// add embedded flag to context for validators
|
||||||
// for HTTP and thus won't work for our generic URI parsing
|
$context->register('EmbeddedURI', $this->embedsResource);
|
||||||
|
|
||||||
// according to the RFC... (but this cuts corners, i.e. non-validating)
|
$ok = false;
|
||||||
$r_URI = '!'.
|
do {
|
||||||
'(([^:/?#<>\'"]+):)?'. // 2. Scheme
|
|
||||||
'(//([^/?#<>\'"]*))?'. // 4. Authority
|
|
||||||
'([^?#<>\'"]*)'. // 5. Path
|
|
||||||
'(\?([^#<>\'"]*))?'. // 7. Query
|
|
||||||
'(#([^<>\'"]*))?'. // 8. Fragment
|
|
||||||
'!';
|
|
||||||
|
|
||||||
$matches = array();
|
// generic validation
|
||||||
$result = preg_match($r_URI, $uri, $matches);
|
$result = $uri->validate($config, $context);
|
||||||
|
if (!$result) break;
|
||||||
|
|
||||||
if (!$result) return false; // invalid URI
|
// chained validation
|
||||||
|
$uri_def =& $config->getDefinition('URI');
|
||||||
|
$result = $uri_def->filter($uri, $config, $context);
|
||||||
|
if (!$result) break;
|
||||||
|
|
||||||
// seperate out parts
|
// scheme-specific validation
|
||||||
$scheme = !empty($matches[1]) ? $matches[2] : null;
|
$scheme_obj = $uri->getSchemeObj($config, $context);
|
||||||
$authority = !empty($matches[3]) ? $matches[4] : null;
|
if (!$scheme_obj) break;
|
||||||
$path = $matches[5]; // always present, can be empty
|
if ($this->embedsResource && !$scheme_obj->browsable) break;
|
||||||
$query = !empty($matches[6]) ? $matches[7] : null;
|
$result = $scheme_obj->validate($uri, $config, $context);
|
||||||
$fragment = !empty($matches[8]) ? $matches[9] : null;
|
if (!$result) break;
|
||||||
|
|
||||||
|
// survived gauntlet
|
||||||
|
$ok = true;
|
||||||
|
|
||||||
|
} while (false);
|
||||||
|
|
||||||
$registry =& HTMLPurifier_URISchemeRegistry::instance();
|
$context->destroy('EmbeddedURI');
|
||||||
if ($scheme !== null) {
|
if (!$ok) return false;
|
||||||
// no need to validate the scheme's fmt since we do that when we
|
|
||||||
// retrieve the specific scheme object from the registry
|
// munge scheme off if necessary (this must be last)
|
||||||
$scheme = ctype_lower($scheme) ? $scheme : strtolower($scheme);
|
if (!is_null($uri->scheme) && is_null($uri->host)) {
|
||||||
$scheme_obj = $registry->getScheme($scheme, $config, $context);
|
if ($uri_def->defaultScheme == $uri->scheme) {
|
||||||
if (!$scheme_obj) return false; // invalid scheme, clean it out
|
$uri->scheme = null;
|
||||||
} else {
|
}
|
||||||
$scheme_obj = $registry->getScheme(
|
|
||||||
$config->get('URI', 'DefaultScheme'), $config, $context
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// something funky weird happened in the registry, abort!
|
// back to string
|
||||||
if (!$scheme_obj) {
|
$result = $uri->toString();
|
||||||
trigger_error(
|
|
||||||
'Default scheme object "' . $config->get('URI', 'DefaultScheme') . '" was not readable',
|
|
||||||
E_USER_WARNING
|
|
||||||
);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// the URI we're processing embeds_resource a resource in the page, but the URI
|
// munge entire URI if necessary
|
||||||
// it references cannot be located
|
|
||||||
if ($this->embeds_resource && !$scheme_obj->browsable) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if ($authority !== null) {
|
|
||||||
|
|
||||||
// remove URI if it's absolute and we disabled externals or
|
|
||||||
// if it's absolute and embedded and we disabled external resources
|
|
||||||
unset($our_host);
|
|
||||||
if (
|
if (
|
||||||
$config->get('URI', 'DisableExternal') ||
|
!is_null($uri->host) && // indicator for authority
|
||||||
(
|
!empty($scheme_obj->browsable) &&
|
||||||
$config->get('URI', 'DisableExternalResources') &&
|
!is_null($munge = $config->get('URI', 'Munge'))
|
||||||
$this->embeds_resource
|
|
||||||
)
|
|
||||||
) {
|
) {
|
||||||
$our_host = $config->get('URI', 'Host');
|
|
||||||
if ($our_host === null) return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
$HEXDIG = '[A-Fa-f0-9]';
|
|
||||||
$unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with []
|
|
||||||
$sub_delims = '!$&\'()'; // needs []
|
|
||||||
$pct_encoded = "%$HEXDIG$HEXDIG";
|
|
||||||
$r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*";
|
|
||||||
$r_authority = "/^(($r_userinfo)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
|
|
||||||
$matches = array();
|
|
||||||
preg_match($r_authority, $authority, $matches);
|
|
||||||
// overloads regexp!
|
|
||||||
$userinfo = !empty($matches[1]) ? $matches[2] : null;
|
|
||||||
$host = !empty($matches[3]) ? $matches[3] : null;
|
|
||||||
$port = !empty($matches[4]) ? $matches[5] : null;
|
|
||||||
|
|
||||||
// validate port
|
|
||||||
if ($port !== null) {
|
|
||||||
$port = (int) $port;
|
|
||||||
if ($port < 1 || $port > 65535) $port = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
$host = $this->host->validate($host, $config, $context);
|
|
||||||
if ($host === false) $host = null;
|
|
||||||
|
|
||||||
if ($this->checkBlacklist($host, $config, $context)) return false;
|
|
||||||
|
|
||||||
// more lenient absolute checking
|
|
||||||
if (isset($our_host)) {
|
|
||||||
$host_parts = array_reverse(explode('.', $host));
|
|
||||||
// could be cached
|
|
||||||
$our_host_parts = array_reverse(explode('.', $our_host));
|
|
||||||
foreach ($our_host_parts as $i => $discard) {
|
|
||||||
if (!isset($host_parts[$i])) return false;
|
|
||||||
if ($host_parts[$i] != $our_host_parts[$i]) return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// userinfo and host are validated within the regexp
|
|
||||||
|
|
||||||
} else {
|
|
||||||
$port = $host = $userinfo = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// query and fragment are quite simple in terms of definition:
|
|
||||||
// *( pchar / "/" / "?" ), so define their validation routines
|
|
||||||
// when we start fixing percent encoding
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// path gets to be validated against a hodge-podge of rules depending
|
|
||||||
// on the status of authority and scheme, but it's not that important,
|
|
||||||
// esp. since it won't be applicable to everyone
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// okay, now we defer execution to the subobject for more processing
|
|
||||||
// note that $fragment is omitted
|
|
||||||
list($userinfo, $host, $port, $path, $query) =
|
|
||||||
$scheme_obj->validateComponents(
|
|
||||||
$userinfo, $host, $port, $path, $query, $config, $context
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
// reconstruct authority
|
|
||||||
$authority = null;
|
|
||||||
if (!is_null($userinfo) || !is_null($host) || !is_null($port)) {
|
|
||||||
$authority = '';
|
|
||||||
if($userinfo !== null) $authority .= $userinfo . '@';
|
|
||||||
$authority .= $host;
|
|
||||||
if($port !== null) $authority .= ':' . $port;
|
|
||||||
}
|
|
||||||
|
|
||||||
// reconstruct the result
|
|
||||||
$result = '';
|
|
||||||
if ($scheme !== null) $result .= "$scheme:";
|
|
||||||
if ($authority !== null) $result .= "//$authority";
|
|
||||||
$result .= $path;
|
|
||||||
if ($query !== null) $result .= "?$query";
|
|
||||||
if ($fragment !== null) $result .= "#$fragment";
|
|
||||||
|
|
||||||
// munge if necessary
|
|
||||||
$munge = $config->get('URI', 'Munge');
|
|
||||||
if (!empty($scheme_obj->browsable) && $munge !== null) {
|
|
||||||
if ($authority !== null) {
|
|
||||||
$result = str_replace('%s', rawurlencode($result), $munge);
|
$result = str_replace('%s', rawurlencode($result), $munge);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return $result;
|
return $result;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks a host against an array blacklist
|
|
||||||
* @param $host Host to check
|
|
||||||
* @param $config HTMLPurifier_Config instance
|
|
||||||
* @param $context HTMLPurifier_Context instance
|
|
||||||
* @return bool Is spam?
|
|
||||||
*/
|
|
||||||
function checkBlacklist($host, &$config, &$context) {
|
|
||||||
$blacklist = $config->get('URI', 'HostBlacklist');
|
|
||||||
if (!empty($blacklist)) {
|
|
||||||
foreach($blacklist as $blacklisted_host_fragment) {
|
|
||||||
if (strpos($host, $blacklisted_host_fragment) !== false) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
?>
|
|
||||||
|
@@ -1,6 +1,7 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once 'HTMLPurifier/AttrDef.php';
|
require_once 'HTMLPurifier/AttrDef.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
|
||||||
|
|
||||||
class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
@@ -14,4 +15,3 @@ class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -20,4 +20,3 @@ class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_UR
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -51,4 +51,3 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -39,4 +39,3 @@ class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -98,4 +98,3 @@ class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -55,4 +55,3 @@ class HTMLPurifier_AttrTransform
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -28,4 +28,3 @@ class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -23,4 +23,3 @@ extends HTMLPurifier_AttrTransform {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -36,4 +36,3 @@ extends HTMLPurifier_AttrTransform {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -17,4 +17,3 @@ class HTMLPurifier_AttrTransform_Border extends HTMLPurifier_AttrTransform {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -57,4 +57,3 @@ class HTMLPurifier_AttrTransform_EnumToCSS extends HTMLPurifier_AttrTransform {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -51,4 +51,3 @@ class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -44,4 +44,3 @@ extends HTMLPurifier_AttrTransform {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -27,4 +27,3 @@ class HTMLPurifier_AttrTransform_Lang extends HTMLPurifier_AttrTransform
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -26,4 +26,3 @@ class HTMLPurifier_AttrTransform_Length extends HTMLPurifier_AttrTransform
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -18,4 +18,3 @@ class HTMLPurifier_AttrTransform_Name extends HTMLPurifier_AttrTransform
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -44,6 +44,9 @@ class HTMLPurifier_AttrTypes
|
|||||||
$this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
|
$this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
|
||||||
$this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color();
|
$this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color();
|
||||||
|
|
||||||
|
// unimplemented aliases
|
||||||
|
$this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
|
||||||
|
|
||||||
// number is really a positive integer (one or more digits)
|
// number is really a positive integer (one or more digits)
|
||||||
// FIXME: ^^ not always, see start and value of list items
|
// FIXME: ^^ not always, see start and value of list items
|
||||||
$this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);
|
$this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);
|
||||||
@@ -79,4 +82,4 @@ class HTMLPurifier_AttrTypes
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
||||||
|
@@ -1,32 +1,53 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validates the attributes of a token. Doesn't manage required attributes
|
||||||
|
* very well. The only reason we factored this out was because RemoveForeignElements
|
||||||
|
* also needed it besides ValidateAttributes.
|
||||||
|
*/
|
||||||
class HTMLPurifier_AttrValidator
|
class HTMLPurifier_AttrValidator
|
||||||
{
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
function validateToken($token, &$config, &$context) {
|
* Validates the attributes of a token, returning a modified token
|
||||||
|
* that has valid tokens
|
||||||
|
* @param $token Reference to token to validate. We require a reference
|
||||||
|
* because the operation this class performs on the token are
|
||||||
|
* not atomic, so the context CurrentToken to be updated
|
||||||
|
* throughout
|
||||||
|
* @param $config Instance of HTMLPurifier_Config
|
||||||
|
* @param $context Instance of HTMLPurifier_Context
|
||||||
|
*/
|
||||||
|
function validateToken(&$token, &$config, &$context) {
|
||||||
|
|
||||||
$definition = $config->getHTMLDefinition();
|
$definition = $config->getHTMLDefinition();
|
||||||
|
$e =& $context->get('ErrorCollector', true);
|
||||||
|
|
||||||
|
// initialize CurrentToken if necessary
|
||||||
|
$current_token =& $context->get('CurrentToken', true);
|
||||||
|
if (!$current_token) $context->register('CurrentToken', $token);
|
||||||
|
|
||||||
|
if ($token->type !== 'start' && $token->type !== 'empty') return $token;
|
||||||
|
|
||||||
// create alias to global definition array, see also $defs
|
// create alias to global definition array, see also $defs
|
||||||
// DEFINITION CALL
|
// DEFINITION CALL
|
||||||
$d_defs = $definition->info_global_attr;
|
$d_defs = $definition->info_global_attr;
|
||||||
|
|
||||||
// copy out attributes for easy manipulation
|
// reference attributes for easy manipulation
|
||||||
$attr = $token->attr;
|
$attr =& $token->attr;
|
||||||
|
|
||||||
// do global transformations (pre)
|
// do global transformations (pre)
|
||||||
// nothing currently utilizes this
|
// nothing currently utilizes this
|
||||||
foreach ($definition->info_attr_transform_pre as $transform) {
|
foreach ($definition->info_attr_transform_pre as $transform) {
|
||||||
$attr = $transform->transform($attr, $config, $context);
|
$attr = $transform->transform($o = $attr, $config, $context);
|
||||||
|
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
|
||||||
}
|
}
|
||||||
|
|
||||||
// do local transformations only applicable to this element (pre)
|
// do local transformations only applicable to this element (pre)
|
||||||
// ex. <p align="right"> to <p style="text-align:right;">
|
// ex. <p align="right"> to <p style="text-align:right;">
|
||||||
foreach ($definition->info[$token->name]->attr_transform_pre
|
foreach ($definition->info[$token->name]->attr_transform_pre as $transform) {
|
||||||
as $transform
|
$attr = $transform->transform($o = $attr, $config, $context);
|
||||||
) {
|
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
|
||||||
$attr = $transform->transform($attr, $config, $context);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// create alias to this element's attribute definition array, see
|
// create alias to this element's attribute definition array, see
|
||||||
@@ -34,6 +55,9 @@ class HTMLPurifier_AttrValidator
|
|||||||
// DEFINITION CALL
|
// DEFINITION CALL
|
||||||
$defs = $definition->info[$token->name]->attr;
|
$defs = $definition->info[$token->name]->attr;
|
||||||
|
|
||||||
|
$attr_key = false;
|
||||||
|
$context->register('CurrentAttr', $attr_key);
|
||||||
|
|
||||||
// iterate through all the attribute keypairs
|
// iterate through all the attribute keypairs
|
||||||
// Watch out for name collisions: $key has previously been used
|
// Watch out for name collisions: $key has previously been used
|
||||||
foreach ($attr as $attr_key => $value) {
|
foreach ($attr as $attr_key => $value) {
|
||||||
@@ -67,9 +91,17 @@ class HTMLPurifier_AttrValidator
|
|||||||
|
|
||||||
// put the results into effect
|
// put the results into effect
|
||||||
if ($result === false || $result === null) {
|
if ($result === false || $result === null) {
|
||||||
|
// this is a generic error message that should replaced
|
||||||
|
// with more specific ones when possible
|
||||||
|
if ($e) $e->send(E_ERROR, 'AttrValidator: Attribute removed');
|
||||||
|
|
||||||
// remove the attribute
|
// remove the attribute
|
||||||
unset($attr[$attr_key]);
|
unset($attr[$attr_key]);
|
||||||
} elseif (is_string($result)) {
|
} elseif (is_string($result)) {
|
||||||
|
// generally, if a substitution is happening, there
|
||||||
|
// was some sort of implicit correction going on. We'll
|
||||||
|
// delegate it to the attribute classes to say exactly what.
|
||||||
|
|
||||||
// simple substitution
|
// simple substitution
|
||||||
$attr[$attr_key] = $result;
|
$attr[$attr_key] = $result;
|
||||||
}
|
}
|
||||||
@@ -81,25 +113,27 @@ class HTMLPurifier_AttrValidator
|
|||||||
// others would prepend themselves).
|
// others would prepend themselves).
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$context->destroy('CurrentAttr');
|
||||||
|
|
||||||
// post transforms
|
// post transforms
|
||||||
|
|
||||||
// ex. <x lang="fr"> to <x lang="fr" xml:lang="fr">
|
// global (error reporting untested)
|
||||||
foreach ($definition->info_attr_transform_post as $transform) {
|
foreach ($definition->info_attr_transform_post as $transform) {
|
||||||
$attr = $transform->transform($attr, $config, $context);
|
$attr = $transform->transform($o = $attr, $config, $context);
|
||||||
|
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ex. <bdo> to <bdo dir="ltr">
|
// local (error reporting untested)
|
||||||
foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
|
foreach ($definition->info[$token->name]->attr_transform_post as $transform) {
|
||||||
$attr = $transform->transform($attr, $config, $context);
|
$attr = $transform->transform($o = $attr, $config, $context);
|
||||||
|
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
|
||||||
}
|
}
|
||||||
|
|
||||||
// commit changes
|
// destroy CurrentToken if we made it ourselves
|
||||||
$token->attr = $attr;
|
if (!$current_token) $context->destroy('CurrentToken');
|
||||||
return $token;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -204,7 +204,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
|
|||||||
$this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
|
$this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
|
||||||
|
|
||||||
$this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(array(
|
$this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||||
'collapse', 'seperate'));
|
'collapse', 'separate'));
|
||||||
|
|
||||||
$this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(array(
|
$this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||||
'top', 'bottom'));
|
'top', 'bottom'));
|
||||||
@@ -219,6 +219,8 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
|
|||||||
new HTMLPurifier_AttrDef_CSS_Percentage()
|
new HTMLPurifier_AttrDef_CSS_Percentage()
|
||||||
));
|
));
|
||||||
|
|
||||||
|
$this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2);
|
||||||
|
|
||||||
// partial support
|
// partial support
|
||||||
$this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap'));
|
$this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap'));
|
||||||
|
|
||||||
@@ -226,4 +228,3 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -36,6 +36,11 @@ class HTMLPurifier_ChildDef
|
|||||||
*/
|
*/
|
||||||
var $allow_empty;
|
var $allow_empty;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lookup array of all elements that this definition could possibly allow
|
||||||
|
*/
|
||||||
|
var $elements = array();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates nodes according to definition and returns modification.
|
* Validates nodes according to definition and returns modification.
|
||||||
*
|
*
|
||||||
@@ -52,4 +57,4 @@ class HTMLPurifier_ChildDef
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
||||||
|
@@ -35,6 +35,7 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
|
|||||||
function HTMLPurifier_ChildDef_Chameleon($inline, $block) {
|
function HTMLPurifier_ChildDef_Chameleon($inline, $block) {
|
||||||
$this->inline = new HTMLPurifier_ChildDef_Optional($inline);
|
$this->inline = new HTMLPurifier_ChildDef_Optional($inline);
|
||||||
$this->block = new HTMLPurifier_ChildDef_Optional($block);
|
$this->block = new HTMLPurifier_ChildDef_Optional($block);
|
||||||
|
$this->elements = $this->block->elements;
|
||||||
}
|
}
|
||||||
|
|
||||||
function validateChildren($tokens_of_children, $config, &$context) {
|
function validateChildren($tokens_of_children, $config, &$context) {
|
||||||
@@ -48,4 +49,3 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -44,6 +44,12 @@ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
|
|||||||
// COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M
|
// COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M
|
||||||
// DOING! Seriously: if there's problems, please report them.
|
// DOING! Seriously: if there's problems, please report them.
|
||||||
|
|
||||||
|
// collect all elements into the $elements array
|
||||||
|
preg_match_all("/$el/", $reg, $matches);
|
||||||
|
foreach ($matches[0] as $match) {
|
||||||
|
$this->elements[$match] = true;
|
||||||
|
}
|
||||||
|
|
||||||
// setup all elements as parentheticals with leading commas
|
// setup all elements as parentheticals with leading commas
|
||||||
$reg = preg_replace("/$el/", '(,\\0)', $reg);
|
$reg = preg_replace("/$el/", '(,\\0)', $reg);
|
||||||
|
|
||||||
@@ -85,4 +91,3 @@ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -19,4 +19,3 @@ class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -20,4 +20,3 @@ class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -25,7 +25,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
|
|||||||
$elements = array_flip($elements);
|
$elements = array_flip($elements);
|
||||||
foreach ($elements as $i => $x) {
|
foreach ($elements as $i => $x) {
|
||||||
$elements[$i] = true;
|
$elements[$i] = true;
|
||||||
if (empty($i)) unset($elements[$i]);
|
if (empty($i)) unset($elements[$i]); // remove blank
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
$this->elements = $elements;
|
$this->elements = $elements;
|
||||||
@@ -109,4 +109,3 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -73,4 +73,3 @@ extends HTMLPurifier_ChildDef_Required
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -9,6 +9,8 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
|||||||
{
|
{
|
||||||
var $allow_empty = false;
|
var $allow_empty = false;
|
||||||
var $type = 'table';
|
var $type = 'table';
|
||||||
|
var $elements = array('tr' => true, 'tbody' => true, 'thead' => true,
|
||||||
|
'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true);
|
||||||
function HTMLPurifier_ChildDef_Table() {}
|
function HTMLPurifier_ChildDef_Table() {}
|
||||||
function validateChildren($tokens_of_children, $config, &$context) {
|
function validateChildren($tokens_of_children, $config, &$context) {
|
||||||
if (empty($tokens_of_children)) return false;
|
if (empty($tokens_of_children)) return false;
|
||||||
@@ -139,4 +141,3 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -5,6 +5,7 @@ require_once 'HTMLPurifier/ConfigSchema.php';
|
|||||||
// member variables
|
// member variables
|
||||||
require_once 'HTMLPurifier/HTMLDefinition.php';
|
require_once 'HTMLPurifier/HTMLDefinition.php';
|
||||||
require_once 'HTMLPurifier/CSSDefinition.php';
|
require_once 'HTMLPurifier/CSSDefinition.php';
|
||||||
|
require_once 'HTMLPurifier/URIDefinition.php';
|
||||||
require_once 'HTMLPurifier/Doctype.php';
|
require_once 'HTMLPurifier/Doctype.php';
|
||||||
require_once 'HTMLPurifier/DefinitionCacheFactory.php';
|
require_once 'HTMLPurifier/DefinitionCacheFactory.php';
|
||||||
|
|
||||||
@@ -41,7 +42,7 @@ class HTMLPurifier_Config
|
|||||||
/**
|
/**
|
||||||
* HTML Purifier's version
|
* HTML Purifier's version
|
||||||
*/
|
*/
|
||||||
var $version = '2.0.0';
|
var $version = '2.1.2';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Two-level associative array of configuration directives
|
* Two-level associative array of configuration directives
|
||||||
@@ -75,6 +76,11 @@ class HTMLPurifier_Config
|
|||||||
*/
|
*/
|
||||||
var $serials = array();
|
var $serials = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Serial for entire configuration object
|
||||||
|
*/
|
||||||
|
var $serial;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param $definition HTMLPurifier_ConfigSchema that defines what directives
|
* @param $definition HTMLPurifier_ConfigSchema that defines what directives
|
||||||
* are allowed.
|
* are allowed.
|
||||||
@@ -101,7 +107,6 @@ class HTMLPurifier_Config
|
|||||||
$ret = HTMLPurifier_Config::createDefault();
|
$ret = HTMLPurifier_Config::createDefault();
|
||||||
if (is_string($config)) $ret->loadIni($config);
|
if (is_string($config)) $ret->loadIni($config);
|
||||||
elseif (is_array($config)) $ret->loadArray($config);
|
elseif (is_array($config)) $ret->loadArray($config);
|
||||||
if (isset($revision)) $ret->revision = $revision;
|
|
||||||
return $ret;
|
return $ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -155,15 +160,30 @@ class HTMLPurifier_Config
|
|||||||
/**
|
/**
|
||||||
* Returns a md5 signature of a segment of the configuration object
|
* Returns a md5 signature of a segment of the configuration object
|
||||||
* that uniquely identifies that particular configuration
|
* that uniquely identifies that particular configuration
|
||||||
|
* @note Revision is handled specially and is removed from the batch
|
||||||
|
* before processing!
|
||||||
* @param $namespace Namespace to get serial for
|
* @param $namespace Namespace to get serial for
|
||||||
*/
|
*/
|
||||||
function getBatchSerial($namespace) {
|
function getBatchSerial($namespace) {
|
||||||
if (empty($this->serials[$namespace])) {
|
if (empty($this->serials[$namespace])) {
|
||||||
$this->serials[$namespace] = md5(serialize($this->getBatch($namespace)));
|
$batch = $this->getBatch($namespace);
|
||||||
|
unset($batch['DefinitionRev']);
|
||||||
|
$this->serials[$namespace] = md5(serialize($batch));
|
||||||
}
|
}
|
||||||
return $this->serials[$namespace];
|
return $this->serials[$namespace];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a md5 signature for the entire configuration object
|
||||||
|
* that uniquely identifies that particular configuration
|
||||||
|
*/
|
||||||
|
function getSerial() {
|
||||||
|
if (empty($this->serial)) {
|
||||||
|
$this->serial = md5(serialize($this->getAll()));
|
||||||
|
}
|
||||||
|
return $this->serial;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves all directives, organized by namespace
|
* Retrieves all directives, organized by namespace
|
||||||
*/
|
*/
|
||||||
@@ -246,14 +266,16 @@ class HTMLPurifier_Config
|
|||||||
* called before it's been setup, otherwise won't work.
|
* called before it's been setup, otherwise won't work.
|
||||||
*/
|
*/
|
||||||
function &getHTMLDefinition($raw = false) {
|
function &getHTMLDefinition($raw = false) {
|
||||||
return $this->getDefinition('HTML', $raw);
|
$def =& $this->getDefinition('HTML', $raw);
|
||||||
|
return $def; // prevent PHP 4.4.0 from complaining
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves reference to the CSS definition
|
* Retrieves reference to the CSS definition
|
||||||
*/
|
*/
|
||||||
function &getCSSDefinition($raw = false) {
|
function &getCSSDefinition($raw = false) {
|
||||||
return $this->getDefinition('CSS', $raw);
|
$def =& $this->getDefinition('CSS', $raw);
|
||||||
|
return $def;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -270,6 +292,7 @@ class HTMLPurifier_Config
|
|||||||
if (!empty($this->definitions[$type])) {
|
if (!empty($this->definitions[$type])) {
|
||||||
if (!$this->definitions[$type]->setup) {
|
if (!$this->definitions[$type]->setup) {
|
||||||
$this->definitions[$type]->setup($this);
|
$this->definitions[$type]->setup($this);
|
||||||
|
$cache->set($this->definitions[$type], $this);
|
||||||
}
|
}
|
||||||
return $this->definitions[$type];
|
return $this->definitions[$type];
|
||||||
}
|
}
|
||||||
@@ -291,6 +314,8 @@ class HTMLPurifier_Config
|
|||||||
$this->definitions[$type] = new HTMLPurifier_HTMLDefinition();
|
$this->definitions[$type] = new HTMLPurifier_HTMLDefinition();
|
||||||
} elseif ($type == 'CSS') {
|
} elseif ($type == 'CSS') {
|
||||||
$this->definitions[$type] = new HTMLPurifier_CSSDefinition();
|
$this->definitions[$type] = new HTMLPurifier_CSSDefinition();
|
||||||
|
} elseif ($type == 'URI') {
|
||||||
|
$this->definitions[$type] = new HTMLPurifier_URIDefinition();
|
||||||
} else {
|
} else {
|
||||||
trigger_error("Definition of $type type not supported");
|
trigger_error("Definition of $type type not supported");
|
||||||
$false = false;
|
$false = false;
|
||||||
@@ -301,7 +326,7 @@ class HTMLPurifier_Config
|
|||||||
if (is_null($this->get($type, 'DefinitionID'))) {
|
if (is_null($this->get($type, 'DefinitionID'))) {
|
||||||
// fatally error out if definition ID not set
|
// fatally error out if definition ID not set
|
||||||
trigger_error("Cannot retrieve raw version without specifying %$type.DefinitionID", E_USER_ERROR);
|
trigger_error("Cannot retrieve raw version without specifying %$type.DefinitionID", E_USER_ERROR);
|
||||||
$false = false;
|
$false = new HTMLPurifier_Error();
|
||||||
return $false;
|
return $false;
|
||||||
}
|
}
|
||||||
return $this->definitions[$type];
|
return $this->definitions[$type];
|
||||||
@@ -336,25 +361,96 @@ class HTMLPurifier_Config
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a list of array(namespace, directive) for all directives
|
||||||
|
* that are allowed in a web-form context as per an allowed
|
||||||
|
* namespaces/directives list.
|
||||||
|
* @param $allowed List of allowed namespaces/directives
|
||||||
|
* @static
|
||||||
|
*/
|
||||||
|
function getAllowedDirectivesForForm($allowed) {
|
||||||
|
$schema = HTMLPurifier_ConfigSchema::instance();
|
||||||
|
if ($allowed !== true) {
|
||||||
|
if (is_string($allowed)) $allowed = array($allowed);
|
||||||
|
$allowed_ns = array();
|
||||||
|
$allowed_directives = array();
|
||||||
|
$blacklisted_directives = array();
|
||||||
|
foreach ($allowed as $ns_or_directive) {
|
||||||
|
if (strpos($ns_or_directive, '.') !== false) {
|
||||||
|
// directive
|
||||||
|
if ($ns_or_directive[0] == '-') {
|
||||||
|
$blacklisted_directives[substr($ns_or_directive, 1)] = true;
|
||||||
|
} else {
|
||||||
|
$allowed_directives[$ns_or_directive] = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// namespace
|
||||||
|
$allowed_ns[$ns_or_directive] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$ret = array();
|
||||||
|
foreach ($schema->info as $ns => $keypairs) {
|
||||||
|
foreach ($keypairs as $directive => $def) {
|
||||||
|
if ($allowed !== true) {
|
||||||
|
if (isset($blacklisted_directives["$ns.$directive"])) continue;
|
||||||
|
if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue;
|
||||||
|
}
|
||||||
|
if ($def->class == 'alias') continue;
|
||||||
|
if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue;
|
||||||
|
$ret[] = array($ns, $directive);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $ret;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Loads configuration values from $_GET/$_POST that were posted
|
* Loads configuration values from $_GET/$_POST that were posted
|
||||||
* via ConfigForm
|
* via ConfigForm
|
||||||
* @param $array $_GET or $_POST array to import
|
* @param $array $_GET or $_POST array to import
|
||||||
* @param $index Index/name that the config variables are in
|
* @param $index Index/name that the config variables are in
|
||||||
|
* @param $allowed List of allowed namespaces/directives
|
||||||
* @param $mq_fix Boolean whether or not to enable magic quotes fix
|
* @param $mq_fix Boolean whether or not to enable magic quotes fix
|
||||||
* @static
|
* @static
|
||||||
*/
|
*/
|
||||||
function loadArrayFromForm($array, $index, $mq_fix = true) {
|
function loadArrayFromForm($array, $index, $allowed = true, $mq_fix = true) {
|
||||||
|
$ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix);
|
||||||
|
$config = HTMLPurifier_Config::create($ret);
|
||||||
|
return $config;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merges in configuration values from $_GET/$_POST to object. NOT STATIC.
|
||||||
|
* @note Same parameters as loadArrayFromForm
|
||||||
|
*/
|
||||||
|
function mergeArrayFromForm($array, $index, $allowed = true, $mq_fix = true) {
|
||||||
|
$ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix);
|
||||||
|
$this->loadArray($ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Prepares an array from a form into something usable for the more
|
||||||
|
* strict parts of HTMLPurifier_Config
|
||||||
|
* @static
|
||||||
|
*/
|
||||||
|
function prepareArrayFromForm($array, $index, $allowed = true, $mq_fix = true) {
|
||||||
$array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array();
|
$array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array();
|
||||||
$mq = get_magic_quotes_gpc() && $mq_fix;
|
$mq = get_magic_quotes_gpc() && $mq_fix;
|
||||||
foreach ($array as $key => $value) {
|
|
||||||
if (!strncmp($key, 'Null_', 5) && !empty($value)) {
|
$allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed);
|
||||||
unset($array[substr($key, 5)]);
|
$ret = array();
|
||||||
unset($array[$key]);
|
foreach ($allowed as $key) {
|
||||||
|
list($ns, $directive) = $key;
|
||||||
|
$skey = "$ns.$directive";
|
||||||
|
if (!empty($array["Null_$skey"])) {
|
||||||
|
$ret[$ns][$directive] = null;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
if ($mq) $array[$key] = stripslashes($value);
|
if (!isset($array[$skey])) continue;
|
||||||
|
$value = $mq ? stripslashes($array[$skey]) : $array[$skey];
|
||||||
|
$ret[$ns][$directive] = $value;
|
||||||
}
|
}
|
||||||
return @HTMLPurifier_Config::create($array);
|
return $ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -395,4 +491,4 @@ class HTMLPurifier_Config
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
||||||
|
@@ -7,4 +7,3 @@ class HTMLPurifier_ConfigDef {
|
|||||||
var $class = false;
|
var $class = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -61,6 +61,12 @@ class HTMLPurifier_ConfigDef_Directive extends HTMLPurifier_ConfigDef
|
|||||||
*/
|
*/
|
||||||
var $aliases = array();
|
var $aliases = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Advisory list of directive aliases, i.e. other directives that
|
||||||
|
* redirect here
|
||||||
|
*/
|
||||||
|
var $directiveAliases = array();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds a description to the array
|
* Adds a description to the array
|
||||||
*/
|
*/
|
||||||
@@ -71,4 +77,3 @@ class HTMLPurifier_ConfigDef_Directive extends HTMLPurifier_ConfigDef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -24,4 +24,3 @@ class HTMLPurifier_ConfigDef_DirectiveAlias extends HTMLPurifier_ConfigDef
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -20,4 +20,3 @@ class HTMLPurifier_ConfigDef_Namespace extends HTMLPurifier_ConfigDef {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
@@ -6,6 +6,8 @@ require_once 'HTMLPurifier/ConfigDef/Namespace.php';
|
|||||||
require_once 'HTMLPurifier/ConfigDef/Directive.php';
|
require_once 'HTMLPurifier/ConfigDef/Directive.php';
|
||||||
require_once 'HTMLPurifier/ConfigDef/DirectiveAlias.php';
|
require_once 'HTMLPurifier/ConfigDef/DirectiveAlias.php';
|
||||||
|
|
||||||
|
if (!defined('HTMLPURIFIER_SCHEMA_STRICT')) define('HTMLPURIFIER_SCHEMA_STRICT', false);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Configuration definition, defines directives and their defaults.
|
* Configuration definition, defines directives and their defaults.
|
||||||
* @note If you update this, please update Printer_ConfigForm
|
* @note If you update this, please update Printer_ConfigForm
|
||||||
@@ -49,6 +51,8 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
var $types = array(
|
var $types = array(
|
||||||
'string' => 'String',
|
'string' => 'String',
|
||||||
'istring' => 'Case-insensitive string',
|
'istring' => 'Case-insensitive string',
|
||||||
|
'text' => 'Text',
|
||||||
|
'itext' => 'Case-insensitive text',
|
||||||
'int' => 'Integer',
|
'int' => 'Integer',
|
||||||
'float' => 'Float',
|
'float' => 'Float',
|
||||||
'bool' => 'Boolean',
|
'bool' => 'Boolean',
|
||||||
@@ -67,6 +71,8 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
$this->defineNamespace('URI', 'Features regarding Uniform Resource Identifiers.');
|
$this->defineNamespace('URI', 'Features regarding Uniform Resource Identifiers.');
|
||||||
$this->defineNamespace('HTML', 'Configuration regarding allowed HTML.');
|
$this->defineNamespace('HTML', 'Configuration regarding allowed HTML.');
|
||||||
$this->defineNamespace('CSS', 'Configuration regarding allowed CSS.');
|
$this->defineNamespace('CSS', 'Configuration regarding allowed CSS.');
|
||||||
|
$this->defineNamespace('AutoFormat', 'Configuration for activating auto-formatting functionality (also known as <code>Injector</code>s)');
|
||||||
|
$this->defineNamespace('AutoFormatParam', 'Configuration for customizing auto-formatting functionality');
|
||||||
$this->defineNamespace('Output', 'Configuration relating to the generation of (X)HTML.');
|
$this->defineNamespace('Output', 'Configuration relating to the generation of (X)HTML.');
|
||||||
$this->defineNamespace('Cache', 'Configuration for DefinitionCache and related subclasses.');
|
$this->defineNamespace('Cache', 'Configuration for DefinitionCache and related subclasses.');
|
||||||
$this->defineNamespace('Test', 'Developer testing configuration for our unit tests.');
|
$this->defineNamespace('Test', 'Developer testing configuration for our unit tests.');
|
||||||
@@ -98,11 +104,11 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
* HTMLPurifier_DirectiveDef::$type for allowed values
|
* HTMLPurifier_DirectiveDef::$type for allowed values
|
||||||
* @param $description Description of directive for documentation
|
* @param $description Description of directive for documentation
|
||||||
*/
|
*/
|
||||||
function define(
|
function define($namespace, $name, $default, $type, $description) {
|
||||||
$namespace, $name, $default, $type,
|
|
||||||
$description
|
|
||||||
) {
|
|
||||||
$def =& HTMLPurifier_ConfigSchema::instance();
|
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||||
|
|
||||||
|
// basic sanity checks
|
||||||
|
if (HTMLPURIFIER_SCHEMA_STRICT) {
|
||||||
if (!isset($def->info[$namespace])) {
|
if (!isset($def->info[$namespace])) {
|
||||||
trigger_error('Cannot define directive for undefined namespace',
|
trigger_error('Cannot define directive for undefined namespace',
|
||||||
E_USER_ERROR);
|
E_USER_ERROR);
|
||||||
@@ -118,7 +124,10 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
E_USER_ERROR);
|
E_USER_ERROR);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (isset($def->info[$namespace][$name])) {
|
if (isset($def->info[$namespace][$name])) {
|
||||||
|
// already defined
|
||||||
if (
|
if (
|
||||||
$def->info[$namespace][$name]->type !== $type ||
|
$def->info[$namespace][$name]->type !== $type ||
|
||||||
$def->defaults[$namespace][$name] !== $default
|
$def->defaults[$namespace][$name] !== $default
|
||||||
@@ -127,12 +136,15 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// process modifiers
|
// needs defining
|
||||||
|
|
||||||
|
// process modifiers (OPTIMIZE!)
|
||||||
$type_values = explode('/', $type, 2);
|
$type_values = explode('/', $type, 2);
|
||||||
$type = $type_values[0];
|
$type = $type_values[0];
|
||||||
$modifier = isset($type_values[1]) ? $type_values[1] : false;
|
$modifier = isset($type_values[1]) ? $type_values[1] : false;
|
||||||
$allow_null = ($modifier === 'null');
|
$allow_null = ($modifier === 'null');
|
||||||
|
|
||||||
|
if (HTMLPURIFIER_SCHEMA_STRICT) {
|
||||||
if (!isset($def->types[$type])) {
|
if (!isset($def->types[$type])) {
|
||||||
trigger_error('Invalid type for configuration directive',
|
trigger_error('Invalid type for configuration directive',
|
||||||
E_USER_ERROR);
|
E_USER_ERROR);
|
||||||
@@ -144,12 +156,15 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
E_USER_ERROR);
|
E_USER_ERROR);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
$def->info[$namespace][$name] =
|
$def->info[$namespace][$name] =
|
||||||
new HTMLPurifier_ConfigDef_Directive();
|
new HTMLPurifier_ConfigDef_Directive();
|
||||||
$def->info[$namespace][$name]->type = $type;
|
$def->info[$namespace][$name]->type = $type;
|
||||||
$def->info[$namespace][$name]->allow_null = $allow_null;
|
$def->info[$namespace][$name]->allow_null = $allow_null;
|
||||||
$def->defaults[$namespace][$name] = $default;
|
$def->defaults[$namespace][$name] = $default;
|
||||||
}
|
}
|
||||||
|
if (!HTMLPURIFIER_SCHEMA_STRICT) return;
|
||||||
$backtrace = debug_backtrace();
|
$backtrace = debug_backtrace();
|
||||||
$file = $def->mungeFilename($backtrace[0]['file']);
|
$file = $def->mungeFilename($backtrace[0]['file']);
|
||||||
$line = $backtrace[0]['line'];
|
$line = $backtrace[0]['line'];
|
||||||
@@ -164,6 +179,7 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
*/
|
*/
|
||||||
function defineNamespace($namespace, $description) {
|
function defineNamespace($namespace, $description) {
|
||||||
$def =& HTMLPurifier_ConfigSchema::instance();
|
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||||
|
if (HTMLPURIFIER_SCHEMA_STRICT) {
|
||||||
if (isset($def->info[$namespace])) {
|
if (isset($def->info[$namespace])) {
|
||||||
trigger_error('Cannot redefine namespace', E_USER_ERROR);
|
trigger_error('Cannot redefine namespace', E_USER_ERROR);
|
||||||
return;
|
return;
|
||||||
@@ -178,6 +194,7 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
E_USER_ERROR);
|
E_USER_ERROR);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
$def->info[$namespace] = array();
|
$def->info[$namespace] = array();
|
||||||
$def->info_namespace[$namespace] = new HTMLPurifier_ConfigDef_Namespace();
|
$def->info_namespace[$namespace] = new HTMLPurifier_ConfigDef_Namespace();
|
||||||
$def->info_namespace[$namespace]->description = $description;
|
$def->info_namespace[$namespace]->description = $description;
|
||||||
@@ -197,12 +214,13 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
*/
|
*/
|
||||||
function defineValueAliases($namespace, $name, $aliases) {
|
function defineValueAliases($namespace, $name, $aliases) {
|
||||||
$def =& HTMLPurifier_ConfigSchema::instance();
|
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||||
if (!isset($def->info[$namespace][$name])) {
|
if (HTMLPURIFIER_SCHEMA_STRICT && !isset($def->info[$namespace][$name])) {
|
||||||
trigger_error('Cannot set value alias for non-existant directive',
|
trigger_error('Cannot set value alias for non-existant directive',
|
||||||
E_USER_ERROR);
|
E_USER_ERROR);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
foreach ($aliases as $alias => $real) {
|
foreach ($aliases as $alias => $real) {
|
||||||
|
if (HTMLPURIFIER_SCHEMA_STRICT) {
|
||||||
if (!$def->info[$namespace][$name] !== true &&
|
if (!$def->info[$namespace][$name] !== true &&
|
||||||
!isset($def->info[$namespace][$name]->allowed[$real])
|
!isset($def->info[$namespace][$name]->allowed[$real])
|
||||||
) {
|
) {
|
||||||
@@ -215,6 +233,7 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
E_USER_ERROR);
|
E_USER_ERROR);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
$def->info[$namespace][$name]->aliases[$alias] = $real;
|
$def->info[$namespace][$name]->aliases[$alias] = $real;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -228,14 +247,14 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
*/
|
*/
|
||||||
function defineAllowedValues($namespace, $name, $allowed_values) {
|
function defineAllowedValues($namespace, $name, $allowed_values) {
|
||||||
$def =& HTMLPurifier_ConfigSchema::instance();
|
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||||
if (!isset($def->info[$namespace][$name])) {
|
if (HTMLPURIFIER_SCHEMA_STRICT && !isset($def->info[$namespace][$name])) {
|
||||||
trigger_error('Cannot define allowed values for undefined directive',
|
trigger_error('Cannot define allowed values for undefined directive',
|
||||||
E_USER_ERROR);
|
E_USER_ERROR);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
$directive =& $def->info[$namespace][$name];
|
$directive =& $def->info[$namespace][$name];
|
||||||
$type = $directive->type;
|
$type = $directive->type;
|
||||||
if ($type != 'string' && $type != 'istring') {
|
if (HTMLPURIFIER_SCHEMA_STRICT && $type != 'string' && $type != 'istring') {
|
||||||
trigger_error('Cannot define allowed values for directive whose type is not string',
|
trigger_error('Cannot define allowed values for directive whose type is not string',
|
||||||
E_USER_ERROR);
|
E_USER_ERROR);
|
||||||
return;
|
return;
|
||||||
@@ -246,8 +265,11 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
foreach ($allowed_values as $value) {
|
foreach ($allowed_values as $value) {
|
||||||
$directive->allowed[$value] = true;
|
$directive->allowed[$value] = true;
|
||||||
}
|
}
|
||||||
if ($def->defaults[$namespace][$name] !== null &&
|
if (
|
||||||
!isset($directive->allowed[$def->defaults[$namespace][$name]])) {
|
HTMLPURIFIER_SCHEMA_STRICT &&
|
||||||
|
$def->defaults[$namespace][$name] !== null &&
|
||||||
|
!isset($directive->allowed[$def->defaults[$namespace][$name]])
|
||||||
|
) {
|
||||||
trigger_error('Default value must be in allowed range of variables',
|
trigger_error('Default value must be in allowed range of variables',
|
||||||
E_USER_ERROR);
|
E_USER_ERROR);
|
||||||
$directive->allowed = true; // undo undo!
|
$directive->allowed = true; // undo undo!
|
||||||
@@ -265,6 +287,7 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
*/
|
*/
|
||||||
function defineAlias($namespace, $name, $new_namespace, $new_name) {
|
function defineAlias($namespace, $name, $new_namespace, $new_name) {
|
||||||
$def =& HTMLPurifier_ConfigSchema::instance();
|
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||||
|
if (HTMLPURIFIER_SCHEMA_STRICT) {
|
||||||
if (!isset($def->info[$namespace])) {
|
if (!isset($def->info[$namespace])) {
|
||||||
trigger_error('Cannot define directive alias in undefined namespace',
|
trigger_error('Cannot define directive alias in undefined namespace',
|
||||||
E_USER_ERROR);
|
E_USER_ERROR);
|
||||||
@@ -290,9 +313,11 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
E_USER_ERROR);
|
E_USER_ERROR);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
$def->info[$namespace][$name] =
|
$def->info[$namespace][$name] =
|
||||||
new HTMLPurifier_ConfigDef_DirectiveAlias(
|
new HTMLPurifier_ConfigDef_DirectiveAlias(
|
||||||
$new_namespace, $new_name);
|
$new_namespace, $new_name);
|
||||||
|
$def->info[$new_namespace][$new_name]->directiveAliases[] = "$namespace.$name";
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -310,8 +335,10 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
return $var;
|
return $var;
|
||||||
case 'istring':
|
case 'istring':
|
||||||
case 'string':
|
case 'string':
|
||||||
|
case 'text': // no difference, just is longer/multiple line string
|
||||||
|
case 'itext':
|
||||||
if (!is_string($var)) break;
|
if (!is_string($var)) break;
|
||||||
if ($type === 'istring') $var = strtolower($var);
|
if ($type === 'istring' || $type === 'itext') $var = strtolower($var);
|
||||||
return $var;
|
return $var;
|
||||||
case 'int':
|
case 'int':
|
||||||
if (is_string($var) && ctype_digit($var)) $var = (int) $var;
|
if (is_string($var) && ctype_digit($var)) $var = (int) $var;
|
||||||
@@ -342,9 +369,13 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
// a single empty string item, but having an empty
|
// a single empty string item, but having an empty
|
||||||
// array is more intuitive
|
// array is more intuitive
|
||||||
if ($var == '') return array();
|
if ($var == '') return array();
|
||||||
|
if (strpos($var, "\n") === false && strpos($var, "\r") === false) {
|
||||||
// simplistic string to array method that only works
|
// simplistic string to array method that only works
|
||||||
// for simple lists of tag names or alphanumeric characters
|
// for simple lists of tag names or alphanumeric characters
|
||||||
$var = explode(',',$var);
|
$var = explode(',',$var);
|
||||||
|
} else {
|
||||||
|
$var = preg_split('/(,|[\n\r]+)/', $var);
|
||||||
|
}
|
||||||
// remove spaces
|
// remove spaces
|
||||||
foreach ($var as $i => $j) $var[$i] = trim($j);
|
foreach ($var as $i => $j) $var[$i] = trim($j);
|
||||||
if ($type === 'hash') {
|
if ($type === 'hash') {
|
||||||
@@ -385,6 +416,7 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
* Takes an absolute path and munges it into a more manageable relative path
|
* Takes an absolute path and munges it into a more manageable relative path
|
||||||
*/
|
*/
|
||||||
function mungeFilename($filename) {
|
function mungeFilename($filename) {
|
||||||
|
if (!HTMLPURIFIER_SCHEMA_STRICT) return $filename;
|
||||||
$offset = strrpos($filename, 'HTMLPurifier');
|
$offset = strrpos($filename, 'HTMLPurifier');
|
||||||
$filename = substr($filename, $offset);
|
$filename = substr($filename, $offset);
|
||||||
$filename = str_replace('\\', '/', $filename);
|
$filename = str_replace('\\', '/', $filename);
|
||||||
@@ -401,4 +433,4 @@ class HTMLPurifier_ConfigSchema {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
||||||
|
@@ -5,6 +5,7 @@ require_once 'HTMLPurifier/ChildDef.php';
|
|||||||
require_once 'HTMLPurifier/ChildDef/Empty.php';
|
require_once 'HTMLPurifier/ChildDef/Empty.php';
|
||||||
require_once 'HTMLPurifier/ChildDef/Required.php';
|
require_once 'HTMLPurifier/ChildDef/Required.php';
|
||||||
require_once 'HTMLPurifier/ChildDef/Optional.php';
|
require_once 'HTMLPurifier/ChildDef/Optional.php';
|
||||||
|
require_once 'HTMLPurifier/ChildDef/Custom.php';
|
||||||
|
|
||||||
// NOT UNIT TESTED!!!
|
// NOT UNIT TESTED!!!
|
||||||
|
|
||||||
@@ -147,4 +148,3 @@ class HTMLPurifier_ContentSets
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
?>
|
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user