mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-04 13:18:00 +02:00
Compare commits
9 Commits
v4.18.0
...
v1.5.0-str
Author | SHA1 | Date | |
---|---|---|---|
|
6bab867afb | ||
|
dd2fd06591 | ||
|
cec7a1c087 | ||
|
c2d3d5b859 | ||
|
9a84e11f34 | ||
|
37ea1673dd | ||
|
5395d8b4bd | ||
|
c980e76197 | ||
|
2bf912d528 |
23
.gitattributes
vendored
23
.gitattributes
vendored
@@ -1,23 +0,0 @@
|
|||||||
/.gitattributes export-ignore
|
|
||||||
/.github export-ignore
|
|
||||||
/.gitignore export-ignore
|
|
||||||
/art export-ignore
|
|
||||||
/benchmarks export-ignore
|
|
||||||
/configdoc export-ignore
|
|
||||||
/configdoc/usage.xml -crlf
|
|
||||||
/docs export-ignore
|
|
||||||
/Doxyfile export-ignore
|
|
||||||
/extras export-ignore
|
|
||||||
/INSTALL* export-ignore
|
|
||||||
/maintenance export-ignore
|
|
||||||
/NEWS export-ignore
|
|
||||||
/package.php export-ignore
|
|
||||||
/plugins export-ignore
|
|
||||||
/phpdoc.ini export-ignore
|
|
||||||
/smoketests export-ignore
|
|
||||||
/test-* export-ignore
|
|
||||||
/tests export-ignore
|
|
||||||
/TODO export-ignore
|
|
||||||
/update-for-release export-ignore
|
|
||||||
/WYSIWYG export-ignore
|
|
||||||
/release.config.js export-ignore
|
|
36
.github/workflows/ci.yml
vendored
36
.github/workflows/ci.yml
vendored
@@ -1,36 +0,0 @@
|
|||||||
name: ci
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
pull_request:
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
linux_tests:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
strategy:
|
|
||||||
fail-fast: true
|
|
||||||
matrix:
|
|
||||||
php: ['5.6', '7.0', '7.1', '7.2', '7.3', '7.4', '8.0', '8.1', '8.2', '8.3', '8.4']
|
|
||||||
|
|
||||||
name: PHP ${{ matrix.php }}
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout code
|
|
||||||
uses: actions/checkout@v3
|
|
||||||
|
|
||||||
- name: Setup PHP
|
|
||||||
uses: shivammathur/setup-php@v2
|
|
||||||
with:
|
|
||||||
php-version: ${{ matrix.php }}
|
|
||||||
tools: composer:v2
|
|
||||||
ini-values: error_reporting=E_ALL
|
|
||||||
extensions: iconv, bcmath, tidy, mbstring, intl
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
run: composer install
|
|
||||||
|
|
||||||
- name: Configure simpletest
|
|
||||||
run: cp test-settings.sample.php test-settings.php
|
|
||||||
|
|
||||||
- name: Execute Unit tests
|
|
||||||
run: php tests/index.php
|
|
19
.github/workflows/lint-pr.yml
vendored
19
.github/workflows/lint-pr.yml
vendored
@@ -1,19 +0,0 @@
|
|||||||
name: "Lint PR"
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request_target:
|
|
||||||
types:
|
|
||||||
- opened
|
|
||||||
- edited
|
|
||||||
- synchronize
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
main:
|
|
||||||
name: Validate PR title
|
|
||||||
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- uses: amannn/action-semantic-pull-request@v4
|
|
||||||
env:
|
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
29
.github/workflows/release.yml
vendored
29
.github/workflows/release.yml
vendored
@@ -1,29 +0,0 @@
|
|||||||
name: release
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
release:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
name: Release
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout code
|
|
||||||
uses: actions/checkout@v2
|
|
||||||
|
|
||||||
- name: Setup PHP
|
|
||||||
uses: shivammathur/setup-php@v2
|
|
||||||
with:
|
|
||||||
php-version: 8.2
|
|
||||||
|
|
||||||
- name: Run automated release process with semantic-release
|
|
||||||
uses: cycjimmy/semantic-release-action@v4
|
|
||||||
with:
|
|
||||||
extra_plugins: |
|
|
||||||
@semantic-release/changelog
|
|
||||||
@semantic-release/git
|
|
||||||
@semantic-release/exec
|
|
||||||
env:
|
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
28
.gitignore
vendored
28
.gitignore
vendored
@@ -1,28 +0,0 @@
|
|||||||
tags
|
|
||||||
conf/
|
|
||||||
test-settings.php
|
|
||||||
config-schema.php
|
|
||||||
library/HTMLPurifier/DefinitionCache/Serializer/*/
|
|
||||||
library/standalone/
|
|
||||||
library/HTMLPurifier.standalone.php
|
|
||||||
library/HTMLPurifier*.tgz
|
|
||||||
library/package*.xml
|
|
||||||
smoketests/test-schema.html
|
|
||||||
configdoc/*.html
|
|
||||||
configdoc/configdoc.xml
|
|
||||||
docs/doxygen*
|
|
||||||
*.phpt.diff
|
|
||||||
*.phpt.exp
|
|
||||||
*.phpt.log
|
|
||||||
*.phpt.out
|
|
||||||
*.phpt.php
|
|
||||||
*.phpt.skip.php
|
|
||||||
*.htmlt.ini
|
|
||||||
*.patch
|
|
||||||
/*.php
|
|
||||||
vendor
|
|
||||||
composer.lock
|
|
||||||
*.rej
|
|
||||||
*.orig
|
|
||||||
*.bak
|
|
||||||
core
|
|
2
CREDITS
2
CREDITS
@@ -5,5 +5,3 @@ Almost everything written by Edward Z. Yang (Ambush Commander). Lots of thanks
|
|||||||
to the DevNetwork Community for their help (see docs/ref-devnetwork.html for
|
to the DevNetwork Community for their help (see docs/ref-devnetwork.html for
|
||||||
more details), Feyd especially (namely IPv6 and optimization). Thanks to RSnake
|
more details), Feyd especially (namely IPv6 and optimization). Thanks to RSnake
|
||||||
for letting me package his fantastic XSS cheatsheet for a smoketest.
|
for letting me package his fantastic XSS cheatsheet for a smoketest.
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
|
||||||
|
309
INSTALL
309
INSTALL
@@ -2,61 +2,61 @@
|
|||||||
Install
|
Install
|
||||||
How to install HTML Purifier
|
How to install HTML Purifier
|
||||||
|
|
||||||
HTML Purifier is designed to run out of the box, so actually using the
|
HTML Purifier is designed to run out of the box, so actually using the library
|
||||||
library is extremely easy. (Although... if you were looking for a
|
is extremely easy. (Although, if you were looking for a step-by-step
|
||||||
step-by-step installation GUI, you've downloaded the wrong software!)
|
installation GUI, you've come to the wrong place!) The impatient can scroll
|
||||||
|
down to the bottom of this INSTALL document to see the code, but you really
|
||||||
|
should make sure a few things are properly done.
|
||||||
|
|
||||||
While the impatient can get going immediately with some of the sample
|
Todo: Convert to using the array syntax for configuration.
|
||||||
code at the bottom of this library, it's well worth reading this entire
|
|
||||||
document--most of the other documentation assumes that you are familiar
|
|
||||||
with these contents.
|
|
||||||
|
|
||||||
|
|
||||||
---------------------------------------------------------------------------
|
|
||||||
1. Compatibility
|
1. Compatibility
|
||||||
|
|
||||||
HTML Purifier is PHP 5 and PHP 7, and is actively tested from PHP 5.3
|
HTML Purifier works in both PHP 4 and PHP 5, from PHP 4.3.9 and up. It has no
|
||||||
and up. It has no core dependencies with other libraries.
|
core dependencies with other libraries. (Whoopee!)
|
||||||
|
|
||||||
These optional extensions can enhance the capabilities of HTML Purifier:
|
Optional extensions are iconv (usually installed) and tidy (also common).
|
||||||
|
If you use UTF-8 and don't plan on pretty-printing HTML, you can get away with
|
||||||
|
not having either of these extensions.
|
||||||
|
|
||||||
* iconv : Converts text to and from non-UTF-8 encodings
|
|
||||||
* bcmath : Used for unit conversion and imagecrash protection
|
|
||||||
* tidy : Used for pretty-printing HTML
|
|
||||||
|
|
||||||
These optional libraries can enhance the capabilities of HTML Purifier:
|
|
||||||
|
|
||||||
* CSSTidy : Clean CSS stylesheets using %Core.ExtractStyleBlocks
|
2. Including the library
|
||||||
Note: You should use the modernized fork of CSSTidy available
|
|
||||||
at https://github.com/Cerdic/CSSTidy
|
|
||||||
* Net_IDNA2 (PEAR) : IRI support using %Core.EnableIDNA
|
|
||||||
Note: This is not necessary for PHP 5.3 or later
|
|
||||||
|
|
||||||
---------------------------------------------------------------------------
|
Simply use:
|
||||||
2. Reconnaissance
|
|
||||||
|
|
||||||
A big plus of HTML Purifier is its inerrant support of standards, so
|
require_once '/path/to/library/HTMLPurifier.auto.php';
|
||||||
your web-pages should be standards-compliant. (They should also use
|
|
||||||
semantic markup, but that's another issue altogether, one HTML Purifier
|
|
||||||
cannot fix without reading your mind.)
|
|
||||||
|
|
||||||
HTML Purifier can process these doctypes:
|
...and you're good to go. Since HTML Purifier's codebase is fairly
|
||||||
|
large, I recommend only including HTML Purifier when you need it.
|
||||||
|
|
||||||
|
If you don't like your include_path to be fiddled around with, simply set
|
||||||
|
HTML Purifier's library/ directory to the include path yourself and then:
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier.php';
|
||||||
|
|
||||||
|
Only the contents in the library/ folder are necessary, so you can remove
|
||||||
|
everything else when using HTML Purifier in a production environment.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
3. Preparing the proper output environment
|
||||||
|
|
||||||
|
HTML Purifier is all about web-standards, so accordingly your webpages should
|
||||||
|
be standards compliant. HTML Purifier can deal with these doctypes:
|
||||||
|
|
||||||
* XHTML 1.0 Transitional (default)
|
* XHTML 1.0 Transitional (default)
|
||||||
* XHTML 1.0 Strict
|
|
||||||
* HTML 4.01 Transitional
|
* HTML 4.01 Transitional
|
||||||
* HTML 4.01 Strict
|
|
||||||
* XHTML 1.1
|
|
||||||
|
|
||||||
...and these character encodings:
|
...and these character encodings:
|
||||||
|
|
||||||
* UTF-8 (default)
|
* UTF-8 (default)
|
||||||
* Any encoding iconv supports (with crippled internationalization support)
|
* Any encoding iconv supports (support is crippled for i18n though)
|
||||||
|
|
||||||
These defaults reflect what my choices would be if I were authoring an
|
The defaults are there for a reason: they are best-practice choices that
|
||||||
HTML document, however, what you choose depends on the nature of your
|
should not be changed lightly. For those of you in the dark, you can determine
|
||||||
codebase. If you don't know what doctype you are using, you can determine
|
the doctype from this code in your HTML documents:
|
||||||
the doctype from this identifier at the top of your source code:
|
|
||||||
|
|
||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||||
@@ -65,135 +65,35 @@ the doctype from this identifier at the top of your source code:
|
|||||||
|
|
||||||
<meta http-equiv="Content-type" content="text/html;charset=ENCODING">
|
<meta http-equiv="Content-type" content="text/html;charset=ENCODING">
|
||||||
|
|
||||||
If the character encoding declaration is missing, STOP NOW, and
|
For legacy codebases these declarations may be missing. If that is the case,
|
||||||
read 'docs/enduser-utf8.html' (web accessible at
|
STOP, and read up on character encodings and doctypes (in that order). Here
|
||||||
http://htmlpurifier.org/docs/enduser-utf8.html). In fact, even if it is
|
are some links:
|
||||||
present, read this document anyway, as many websites specify their
|
|
||||||
document's character encoding incorrectly.
|
* http://www.joelonsoftware.com/articles/Unicode.html
|
||||||
|
* http://alistapart.com/stories/doctype/
|
||||||
|
|
||||||
|
You may currently be vulnerable to XSS and other security threats, and HTML
|
||||||
|
Purifier won't be able to fix that.
|
||||||
|
|
||||||
|
|
||||||
---------------------------------------------------------------------------
|
|
||||||
3. Including the library
|
|
||||||
|
|
||||||
The procedure is quite simple:
|
|
||||||
|
|
||||||
require_once '/path/to/library/HTMLPurifier.auto.php';
|
|
||||||
|
|
||||||
This will setup an autoloader, so the library's files are only included
|
|
||||||
when you use them.
|
|
||||||
|
|
||||||
Only the contents in the library/ folder are necessary, so you can remove
|
|
||||||
everything else when using HTML Purifier in a production environment.
|
|
||||||
|
|
||||||
If you installed HTML Purifier via PEAR, all you need to do is:
|
|
||||||
|
|
||||||
require_once 'HTMLPurifier.auto.php';
|
|
||||||
|
|
||||||
Please note that the usual PEAR practice of including just the classes you
|
|
||||||
want will not work with HTML Purifier's autoloading scheme.
|
|
||||||
|
|
||||||
Advanced users, read on; other users can skip to section 4.
|
|
||||||
|
|
||||||
Autoload compatibility
|
|
||||||
----------------------
|
|
||||||
|
|
||||||
HTML Purifier attempts to be as smart as possible when registering an
|
|
||||||
autoloader, but there are some cases where you will need to change
|
|
||||||
your own code to accomodate HTML Purifier. These are those cases:
|
|
||||||
|
|
||||||
AN __autoload FUNCTION IS DECLARED AFTER OUR AUTOLOADER IS REGISTERED
|
|
||||||
spl_autoload_register() has the curious behavior of disabling
|
|
||||||
the existing __autoload() handler. Users need to explicitly
|
|
||||||
spl_autoload_register('__autoload'). Because we use SPL when it
|
|
||||||
is available, __autoload() will ALWAYS be disabled. If __autoload()
|
|
||||||
is declared before HTML Purifier is loaded, this is not a problem:
|
|
||||||
HTML Purifier will register the function for you. But if it is
|
|
||||||
declared afterwards, it will mysteriously not work. This
|
|
||||||
snippet of code (after your autoloader is defined) will fix it:
|
|
||||||
|
|
||||||
spl_autoload_register('__autoload')
|
|
||||||
|
|
||||||
|
|
||||||
For better performance
|
|
||||||
----------------------
|
|
||||||
|
|
||||||
Opcode caches, which greatly speed up PHP initialization for scripts
|
|
||||||
with large amounts of code (HTML Purifier included), don't like
|
|
||||||
autoloaders. We offer an include file that includes all of HTML Purifier's
|
|
||||||
files in one go in an opcode cache friendly manner:
|
|
||||||
|
|
||||||
// If /path/to/library isn't already in your include path, uncomment
|
|
||||||
// the below line:
|
|
||||||
// require '/path/to/library/HTMLPurifier.path.php';
|
|
||||||
|
|
||||||
require 'HTMLPurifier.includes.php';
|
|
||||||
|
|
||||||
Optional components still need to be included--you'll know if you try to
|
|
||||||
use a feature and you get a class doesn't exists error! The autoloader
|
|
||||||
can be used in conjunction with this approach to catch classes that are
|
|
||||||
missing. Simply add this afterwards:
|
|
||||||
|
|
||||||
require 'HTMLPurifier.autoload.php';
|
|
||||||
|
|
||||||
Standalone version
|
|
||||||
------------------
|
|
||||||
|
|
||||||
HTML Purifier has a standalone distribution; you can also generate
|
|
||||||
a standalone file from the full version by running the script
|
|
||||||
maintenance/generate-standalone.php . The standalone version has the
|
|
||||||
benefit of having most of its code in one file, so parsing is much
|
|
||||||
faster and the library is easier to manage.
|
|
||||||
|
|
||||||
If HTMLPurifier.standalone.php exists in the library directory, you
|
|
||||||
can use it like this:
|
|
||||||
|
|
||||||
require '/path/to/HTMLPurifier.standalone.php';
|
|
||||||
|
|
||||||
This is equivalent to including HTMLPurifier.includes.php, except that
|
|
||||||
the contents of standalone/ will be added to your path. To override this
|
|
||||||
behavior, specify a new HTMLPURIFIER_PREFIX where standalone files can
|
|
||||||
be found (usually, this will be one directory up, the "true" library
|
|
||||||
directory in full distributions). Don't forget to set your path too!
|
|
||||||
|
|
||||||
The autoloader can be added to the end to ensure the classes are
|
|
||||||
loaded when necessary; otherwise you can manually include them.
|
|
||||||
To use the autoloader, use this:
|
|
||||||
|
|
||||||
require 'HTMLPurifier.autoload.php';
|
|
||||||
|
|
||||||
For advanced users
|
|
||||||
------------------
|
|
||||||
|
|
||||||
HTMLPurifier.auto.php performs a number of operations that can be done
|
|
||||||
individually. These are:
|
|
||||||
|
|
||||||
HTMLPurifier.path.php
|
|
||||||
Puts /path/to/library in the include path. For high performance,
|
|
||||||
this should be done in php.ini.
|
|
||||||
|
|
||||||
HTMLPurifier.autoload.php
|
|
||||||
Registers our autoload handler HTMLPurifier_Bootstrap::autoload($class).
|
|
||||||
|
|
||||||
You can do these operations by yourself, if you like.
|
|
||||||
|
|
||||||
|
|
||||||
---------------------------------------------------------------------------
|
|
||||||
4. Configuration
|
4. Configuration
|
||||||
|
|
||||||
HTML Purifier is designed to run out-of-the-box, but occasionally HTML
|
HTML Purifier is designed to run out-of-the-box, but occasionally HTML
|
||||||
Purifier needs to be told what to do. If you answer no to any of these
|
Purifier needs to be told what to do. If you answered no to any of these
|
||||||
questions, read on; otherwise, you can skip to the next section (or, if you're
|
questions, read on, otherwise, you can skip to the next section (or, if you're
|
||||||
into configuring things just for the heck of it, skip to 4.3).
|
into configuring things just for the heck of it, skip to 4.3).
|
||||||
|
|
||||||
* Am I using UTF-8?
|
* Am I using UTF-8?
|
||||||
* Am I using XHTML 1.0 Transitional?
|
* Am I using XHTML 1.0 Transitional?
|
||||||
|
|
||||||
If you answered no to any of these questions, instantiate a configuration
|
If you answered yes to any of these questions, instantiate a configuration
|
||||||
object and read on:
|
object and read on:
|
||||||
|
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
4.1. Setting a different character encoding
|
4.1. Setting a different character encoding
|
||||||
|
|
||||||
You really shouldn't use any other encoding except UTF-8, especially if you
|
You really shouldn't use any other encoding except UTF-8, especially if you
|
||||||
@@ -205,123 +105,74 @@ HTML Purifier uses iconv to support other character encodings, as such,
|
|||||||
any encoding that iconv supports <http://www.gnu.org/software/libiconv/>
|
any encoding that iconv supports <http://www.gnu.org/software/libiconv/>
|
||||||
HTML Purifier supports with this code:
|
HTML Purifier supports with this code:
|
||||||
|
|
||||||
$config->set('Core.Encoding', /* put your encoding here */);
|
$config->set('Core', 'Encoding', /* put your encoding here */);
|
||||||
|
|
||||||
An example usage for Latin-1 websites (the most common encoding for English
|
An example usage for Latin-1 websites (the most common encoding for English
|
||||||
websites):
|
websites):
|
||||||
|
|
||||||
$config->set('Core.Encoding', 'ISO-8859-1');
|
$config->set('Core', 'Encoding', 'ISO-8859-1');
|
||||||
|
|
||||||
Note that HTML Purifier's support for non-Unicode encodings is crippled by the
|
Note that HTML Purifier's support for non-Unicode encodings is crippled by the
|
||||||
fact that any character not supported by that encoding will be silently
|
fact that any character not supported by that encoding will be silently
|
||||||
dropped, EVEN if it is ampersand escaped. If you want to work around
|
dropped, EVEN if it is ampersand escaped. This is a current limitation of
|
||||||
this, you are welcome to read docs/enduser-utf8.html for a fix,
|
HTML Purifier that we are NOT actively working to fix. Patches are welcome,
|
||||||
but please be cognizant of the issues the "solution" creates (for this
|
but there are so many other gotchas and problems in I18N for non-Unicode
|
||||||
reason, I do not include the solution in this document).
|
encodings that this functionality is low priority. See
|
||||||
|
<http://ppewww.ph.gla.ac.uk/~flavell/charset/form-i18n.html> for a more
|
||||||
|
detailed lowdown on the topic.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
4.2. Setting a different doctype
|
4.2. Setting a different doctype
|
||||||
|
|
||||||
For those of you using HTML 4.01 Transitional, you can disable
|
For those of you stuck using HTML 4.01 Transitional, you can disable
|
||||||
XHTML output like this:
|
XHTML output like this:
|
||||||
|
|
||||||
$config->set('HTML.Doctype', 'HTML 4.01 Transitional');
|
$config->set('Core', 'XHTML', false);
|
||||||
|
|
||||||
Other supported doctypes include:
|
I recommend that you use XHTML, although not as much as I recommend UTF-8. If
|
||||||
|
your HTML 4.01 page validates, good for you!
|
||||||
|
|
||||||
|
Currently, we can only guarantee transitional-complaint output, future
|
||||||
|
versions will also allow strict-compliant output.
|
||||||
|
|
||||||
* HTML 4.01 Strict
|
|
||||||
* HTML 4.01 Transitional
|
|
||||||
* XHTML 1.0 Strict
|
|
||||||
* XHTML 1.0 Transitional
|
|
||||||
* XHTML 1.1
|
|
||||||
|
|
||||||
|
|
||||||
4.3. Other settings
|
4.3. Other settings
|
||||||
|
|
||||||
There are more configuration directives which can be read about
|
There are more configuration directives which can be read about
|
||||||
here: <http://htmlpurifier.org/live/configdoc/plain.html> They're a bit boring,
|
here: <http://hp.jpsband.org/live/configdoc/plain.html> They're a bit boring,
|
||||||
but they can help out for those of you who like to exert maximum control over
|
but they can help out for those of you who like to exert maximum control over
|
||||||
your code. Some of the more interesting ones are configurable at the
|
your code.
|
||||||
demo <http://htmlpurifier.org/demo.php> and are well worth looking into
|
|
||||||
for your own system.
|
|
||||||
|
|
||||||
For example, you can fine tune allowed elements and attributes, convert
|
|
||||||
relative URLs to absolute ones, and even autoparagraph input text! These
|
|
||||||
are, respectively, %HTML.Allowed, %URI.MakeAbsolute and %URI.Base, and
|
|
||||||
%AutoFormat.AutoParagraph. The %Namespace.Directive naming convention
|
|
||||||
translates to:
|
|
||||||
|
|
||||||
$config->set('Namespace.Directive', $value);
|
|
||||||
|
|
||||||
E.g.
|
|
||||||
|
|
||||||
$config->set('HTML.Allowed', 'p,b,a[href],i');
|
|
||||||
$config->set('URI.Base', 'http://www.example.com');
|
|
||||||
$config->set('URI.MakeAbsolute', true);
|
|
||||||
$config->set('AutoFormat.AutoParagraph', true);
|
|
||||||
|
|
||||||
|
|
||||||
---------------------------------------------------------------------------
|
|
||||||
5. Caching
|
|
||||||
|
|
||||||
HTML Purifier generates some cache files (generally one or two) to speed up
|
5. Using the code
|
||||||
its execution. For maximum performance, make sure that
|
|
||||||
library/HTMLPurifier/DefinitionCache/Serializer is writeable by the webserver.
|
|
||||||
|
|
||||||
If you are in the library/ folder of HTML Purifier, you can set the
|
|
||||||
appropriate permissions using:
|
|
||||||
|
|
||||||
chmod -R 0755 HTMLPurifier/DefinitionCache/Serializer
|
|
||||||
|
|
||||||
If the above command doesn't work, you may need to assign write permissions
|
|
||||||
to group:
|
|
||||||
|
|
||||||
chmod -R 0775 HTMLPurifier/DefinitionCache/Serializer
|
|
||||||
|
|
||||||
You can also chmod files via your FTP client; this option
|
|
||||||
is usually accessible by right clicking the corresponding directory and
|
|
||||||
then selecting "chmod" or "file permissions".
|
|
||||||
|
|
||||||
Starting with 2.0.1, HTML Purifier will generate friendly error messages
|
|
||||||
that will tell you exactly what you have to chmod the directory to, if in doubt,
|
|
||||||
follow its advice.
|
|
||||||
|
|
||||||
If you are unable or unwilling to give write permissions to the cache
|
|
||||||
directory, you can either disable the cache (and suffer a performance
|
|
||||||
hit):
|
|
||||||
|
|
||||||
$config->set('Core.DefinitionCache', null);
|
|
||||||
|
|
||||||
Or move the cache directory somewhere else (no trailing slash):
|
|
||||||
|
|
||||||
$config->set('Cache.SerializerPath', '/home/user/absolute/path');
|
|
||||||
|
|
||||||
|
|
||||||
---------------------------------------------------------------------------
|
|
||||||
6. Using the code
|
|
||||||
|
|
||||||
The interface is mind-numbingly simple:
|
The interface is mind-numbingly simple:
|
||||||
|
|
||||||
|
$purifier = new HTMLPurifier();
|
||||||
|
$clean_html = $purifier->purify( $dirty_html );
|
||||||
|
|
||||||
|
...or, if you're using the configuration object:
|
||||||
|
|
||||||
$purifier = new HTMLPurifier($config);
|
$purifier = new HTMLPurifier($config);
|
||||||
$clean_html = $purifier->purify( $dirty_html );
|
$clean_html = $purifier->purify( $dirty_html );
|
||||||
|
|
||||||
That's it! For more examples, check out docs/examples/ (they aren't very
|
That's it! For more examples, check out docs/examples/ (they aren't very
|
||||||
different though). Also, docs/enduser-slow.html gives advice on what to
|
different though). Also, SLOW gives advice on what to do if HTML Purifier
|
||||||
do if HTML Purifier is slowing down your application.
|
is slowing down your application.
|
||||||
|
|
||||||
|
|
||||||
---------------------------------------------------------------------------
|
|
||||||
7. Quick install
|
|
||||||
|
|
||||||
First, make sure library/HTMLPurifier/DefinitionCache/Serializer is
|
6. Quick install
|
||||||
writable by the webserver (see Section 5: Caching above for details).
|
|
||||||
If your website is in UTF-8 and XHTML Transitional, use this code:
|
If your website is in UTF-8 and XHTML Transitional, use this code:
|
||||||
|
|
||||||
<?php
|
<?php
|
||||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||||
|
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
$purifier = new HTMLPurifier();
|
||||||
$purifier = new HTMLPurifier($config);
|
|
||||||
$clean_html = $purifier->purify($dirty_html);
|
$clean_html = $purifier->purify($dirty_html);
|
||||||
?>
|
?>
|
||||||
|
|
||||||
@@ -331,11 +182,9 @@ If your website is in a different encoding or doctype, use this code:
|
|||||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||||
|
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
$config->set('Core.Encoding', 'ISO-8859-1'); // replace with your encoding
|
$config->set('Core', 'Encoding', 'ISO-8859-1'); //replace with your encoding
|
||||||
$config->set('HTML.Doctype', 'HTML 4.01 Transitional'); // replace with your doctype
|
$config->set('Core', 'XHTML', true); //replace with false if HTML 4.01
|
||||||
$purifier = new HTMLPurifier($config);
|
$purifier = new HTMLPurifier($config);
|
||||||
|
|
||||||
$clean_html = $purifier->purify($dirty_html);
|
$clean_html = $purifier->purify($dirty_html);
|
||||||
?>
|
?>
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
|
||||||
|
@@ -1,60 +0,0 @@
|
|||||||
|
|
||||||
Installation
|
|
||||||
Comment installer HTML Purifier
|
|
||||||
|
|
||||||
Attention : Ce document est encodé en UTF-8, si les lettres avec des accents
|
|
||||||
ne s'affichent pas, prenez un meilleur éditeur de texte.
|
|
||||||
|
|
||||||
L'installation de HTML Purifier est très simple, parce qu'il n'a pas besoin
|
|
||||||
de configuration. Pour les utilisateurs impatients, le code se trouve dans le
|
|
||||||
pied de page, mais je recommande de lire le document.
|
|
||||||
|
|
||||||
1. Compatibilité
|
|
||||||
|
|
||||||
HTML Purifier fonctionne avec PHP 5. PHP 5.3 est la dernière version testée.
|
|
||||||
Il ne dépend pas d'autres librairies.
|
|
||||||
|
|
||||||
Les extensions optionnelles sont iconv (généralement déjà installée) et tidy
|
|
||||||
(répendue aussi). Si vous utilisez UTF-8 et que vous ne voulez pas l'indentation,
|
|
||||||
vous pouvez utiliser HTML Purifier sans ces extensions.
|
|
||||||
|
|
||||||
|
|
||||||
2. Inclure la librairie
|
|
||||||
|
|
||||||
Quand vous devez l'utilisez, incluez le :
|
|
||||||
|
|
||||||
require_once('/path/to/library/HTMLPurifier.auto.php');
|
|
||||||
|
|
||||||
Ne pas l'inclure si ce n'est pas nécessaire, car HTML Purifier est lourd.
|
|
||||||
|
|
||||||
HTML Purifier utilise "autoload". Si vous avez défini la fonction __autoload,
|
|
||||||
vous devez ajouter cette fonction :
|
|
||||||
|
|
||||||
spl_autoload_register('__autoload')
|
|
||||||
|
|
||||||
Plus d'informations dans le document "INSTALL".
|
|
||||||
|
|
||||||
3. Installation rapide
|
|
||||||
|
|
||||||
Si votre site Web est en UTF-8 et XHTML Transitional, utilisez :
|
|
||||||
|
|
||||||
<?php
|
|
||||||
require_once('/path/to/htmlpurifier/library/HTMLPurifier.auto.php');
|
|
||||||
$purificateur = new HTMLPurifier();
|
|
||||||
$html_propre = $purificateur->purify($html_a_purifier);
|
|
||||||
?>
|
|
||||||
|
|
||||||
Sinon, utilisez :
|
|
||||||
|
|
||||||
<?php
|
|
||||||
require_once('/path/to/html/purifier/library/HTMLPurifier.auto.load');
|
|
||||||
$config = $HTMLPurifier_Config::createDefault();
|
|
||||||
$config->set('Core', 'Encoding', 'ISO-8859-1'); //Remplacez par votre
|
|
||||||
encodage
|
|
||||||
$config->set('Core', 'XHTML', true); //Remplacer par false si HTML 4.01
|
|
||||||
$purificateur = new HTMLPurifier($config);
|
|
||||||
$html_propre = $purificateur->purify($html_a_purifier);
|
|
||||||
?>
|
|
||||||
|
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
|
2
LICENSE
2
LICENSE
@@ -501,4 +501,4 @@ necessary. Here is a sample; alter the names:
|
|||||||
|
|
||||||
That's all there is to it!
|
That's all there is to it!
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
|
||||||
|
22
README
Normal file
22
README
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
|
||||||
|
README
|
||||||
|
All about HTML Purifier
|
||||||
|
|
||||||
|
HTML Purifier is an HTML filtering solution that uses a unique combination
|
||||||
|
of robust whitelists and agressive parsing to ensure that not only are
|
||||||
|
XSS attacks thwarted, but the resulting HTML is standards compliant.
|
||||||
|
|
||||||
|
HTML Purifier is oriented towards richly formatted documents from
|
||||||
|
untrusted sources that require CSS and a full tag-set. This library can
|
||||||
|
be configured to accept a more restrictive set of tags, but it won't be
|
||||||
|
as efficient as more bare-bones parsers. It will, however, do the job
|
||||||
|
right, which may be more important.
|
||||||
|
|
||||||
|
Places to go:
|
||||||
|
|
||||||
|
* See INSTALL for a quick installation guide
|
||||||
|
* See docs/ for developer-oriented documentation, code examples and
|
||||||
|
an in-depth installation guide.
|
||||||
|
* See WYSIWYG for information on editors like TinyMCE and FCKeditor
|
||||||
|
|
||||||
|
HTML Purifier can be found on the web at: http://hp.jpsband.org/
|
29
README.md
29
README.md
@@ -1,29 +0,0 @@
|
|||||||
HTML Purifier [](https://github.com/ezyang/htmlpurifier/actions/workflows/ci.yml)
|
|
||||||
=============
|
|
||||||
|
|
||||||
HTML Purifier is an HTML filtering solution that uses a unique combination
|
|
||||||
of robust whitelists and aggressive parsing to ensure that not only are
|
|
||||||
XSS attacks thwarted, but the resulting HTML is standards compliant.
|
|
||||||
|
|
||||||
HTML Purifier is oriented towards richly formatted documents from
|
|
||||||
untrusted sources that require CSS and a full tag-set. This library can
|
|
||||||
be configured to accept a more restrictive set of tags, but it won't be
|
|
||||||
as efficient as more bare-bones parsers. It will, however, do the job
|
|
||||||
right, which may be more important.
|
|
||||||
|
|
||||||
Places to go:
|
|
||||||
|
|
||||||
* See INSTALL for a quick installation guide
|
|
||||||
* See docs/ for developer-oriented documentation, code examples and
|
|
||||||
an in-depth installation guide.
|
|
||||||
* See WYSIWYG for information on editors like TinyMCE and FCKeditor
|
|
||||||
|
|
||||||
HTML Purifier can be found on the web at: [http://htmlpurifier.org/](http://htmlpurifier.org/)
|
|
||||||
|
|
||||||
## Installation
|
|
||||||
|
|
||||||
Package available on [Composer](https://packagist.org/packages/ezyang/htmlpurifier).
|
|
||||||
|
|
||||||
If you're using Composer to manage dependencies, you can use
|
|
||||||
|
|
||||||
$ composer require ezyang/htmlpurifier
|
|
195
TODO
195
TODO
@@ -4,147 +4,88 @@ TODO List
|
|||||||
= KEY ====================
|
= KEY ====================
|
||||||
# Flagship
|
# Flagship
|
||||||
- Regular
|
- Regular
|
||||||
? Maybe I'll Do It
|
? At-risk
|
||||||
==========================
|
==========================
|
||||||
|
|
||||||
If no interest is expressed for a feature that may require a considerable
|
1.6 release
|
||||||
amount of effort to implement, it may get endlessly delayed. Do not be
|
# Implement all non-essential attribute transforms, configurable
|
||||||
afraid to cast your vote for the next feature to be implemented!
|
# URI validation routines tighter (see docs/dev-code-quality.html) (COMPLEX)
|
||||||
|
# Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
|
||||||
Things to do as soon as possible:
|
|
||||||
|
|
||||||
- http://htmlpurifier.org/phorum/read.php?3,5560,6307#msg-6307
|
|
||||||
- Think about allowing explicit order of operations hooks for transforms
|
|
||||||
- Fix "<.<" bug (trailing < is removed if not EOD)
|
|
||||||
- Build in better internal state dumps and debugging tools for remote
|
|
||||||
debugging
|
|
||||||
- Allowed/Allowed* have strange interactions when both set
|
|
||||||
? Transform lone embeds into object tags
|
|
||||||
- Deprecated config options that emit warnings when you set them (with'
|
|
||||||
a way of muting the warning if you really want to)
|
|
||||||
- Make HTML.Trusted work with Output.FlashCompat
|
|
||||||
- HTML.Trusted and HTML.SafeObject have funny interaction; general
|
|
||||||
problem is what to do when a module "supersedes" another
|
|
||||||
(see also tables and basic tables.) This is a little dicier
|
|
||||||
because HTML.SafeObject has some extra functionality that
|
|
||||||
trusted might find useful. See http://htmlpurifier.org/phorum/read.php?3,5762,6100
|
|
||||||
|
|
||||||
FUTURE VERSIONS
|
|
||||||
---------------
|
|
||||||
|
|
||||||
4.9 release [OMG CONFIG PONIES]
|
|
||||||
! Fix Printer. It's from the old days when we didn't have decent XML classes
|
|
||||||
! Factor demo.php into a set of Printer classes, and then create a stub
|
|
||||||
file for users here (inside the actual HTML Purifier library)
|
|
||||||
- Fix error handling with form construction
|
|
||||||
- Do encoding validation in Printers, or at least, where user data comes in
|
|
||||||
- Config: Add examples to everything (make built-in which also automatically
|
|
||||||
gives output)
|
|
||||||
- Add "register" field to config schemas to eliminate dependence on
|
|
||||||
naming conventions (try to remember why we ultimately decided on tihs)
|
|
||||||
|
|
||||||
5.0 release [HTML 5]
|
|
||||||
# Swap out code to use html5lib tokenizer and tree-builder
|
|
||||||
! Allow turning off of FixNesting and required attribute insertion
|
|
||||||
|
|
||||||
5.1 release [It's All About Trust] (floating)
|
|
||||||
# Implement untrusted, dangerous elements/attributes
|
|
||||||
# Implement IDREF support (harder than it seems, since you cannot have
|
|
||||||
IDREFs to non-existent IDs)
|
|
||||||
- Implement <area> (client and server side image maps are blocking
|
|
||||||
on IDREF support)
|
|
||||||
# Frameset XHTML 1.0 and HTML 4.01 doctypes
|
|
||||||
- Figure out how to simultaneously set %CSS.Trusted and %HTML.Trusted (?)
|
|
||||||
|
|
||||||
5.2 release [Error'ed]
|
|
||||||
# Error logging for filtering/cleanup procedures
|
# Error logging for filtering/cleanup procedures
|
||||||
|
- Requires I18N facilities to be created first (COMPLEX)
|
||||||
|
? Configuration profiles: sets of directives that get set with one func call
|
||||||
|
- XSS-attempt detection
|
||||||
|
- Implement IDREF support
|
||||||
|
|
||||||
|
1.7 release
|
||||||
|
# Add pre-packaged "levels" of cleaning (custom behavior already done)
|
||||||
|
- More fine-grained control over escaping behavior
|
||||||
|
- Silently drop content inbetween SCRIPT tags (can be generalized to allow
|
||||||
|
specification of elements that, when detected as foreign, trigger removal
|
||||||
|
of children, although unbalanced tags could wreck havoc (or at least
|
||||||
|
delete the rest of the document)).
|
||||||
|
- Allow specifying global attributes on a tag-by-tag basis in
|
||||||
|
%HTML.AllowAttributes
|
||||||
|
? More user-friendly warnings when %HTML.Allow* attempts to specify a
|
||||||
|
tag or attribute that is not supported
|
||||||
|
- Parse TinyMCE whitelist into our %HTML.Allow* whitelists
|
||||||
|
|
||||||
|
1.8 release
|
||||||
# Additional support for poorly written HTML
|
# Additional support for poorly written HTML
|
||||||
- Microsoft Word HTML cleaning (i.e. MsoNormal, but research essential!)
|
- Microsoft Word HTML cleaning (i.e. MsoNormal, but research essential!)
|
||||||
- Friendly strict handling of <address> (block -> <br>)
|
- Friendly strict handling of <address> (block -> <br>)
|
||||||
- XSS-attempt detection--certain errors are flagged XSS-like
|
- Remove redundant tags, ex. <u><u>Underlined</u></u>. Implementation notes:
|
||||||
- Append something to duplicate IDs so they're still usable (impl. note: the
|
|
||||||
dupe detector would also need to detect the suffix as well)
|
|
||||||
|
|
||||||
6.0 release [Beyond HTML]
|
|
||||||
# Legit token based CSS parsing (will require revamping almost every
|
|
||||||
AttrDef class). Probably will use CSSTidy
|
|
||||||
# More control over allowed CSS properties using a modularization
|
|
||||||
# IRI support (this includes IDN)
|
|
||||||
- Standardize token armor for all areas of processing
|
|
||||||
|
|
||||||
7.0 release [To XML and Beyond]
|
|
||||||
- Extended HTML capabilities based on namespacing and tag transforms (COMPLEX)
|
|
||||||
- Hooks for adding custom processors to custom namespaced tags and
|
|
||||||
attributes, offer default implementation
|
|
||||||
- Lots of documentation and samples
|
|
||||||
|
|
||||||
Ongoing
|
|
||||||
- More refactoring to take advantage of PHP5's facilities
|
|
||||||
- Refactor unit tests into lots of test methods
|
|
||||||
- Plugins for major CMSes (COMPLEX)
|
|
||||||
- phpBB
|
|
||||||
- Also, a FAQ for extension writers with HTML Purifier
|
|
||||||
|
|
||||||
AutoFormat
|
|
||||||
- Smileys
|
|
||||||
- Syntax highlighting (with GeSHi) with <pre> and possibly <?php
|
|
||||||
- Look at http://drupal.org/project/Modules/category/63 for ideas
|
|
||||||
|
|
||||||
Neat feature related
|
|
||||||
! Support exporting configuration, so users can easily tweak settings
|
|
||||||
in the demo, and then copy-paste into their own setup
|
|
||||||
- Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
|
|
||||||
- Allow scoped="scoped" attribute in <style> tags; may be troublesome
|
|
||||||
because regular CSS has no way of uniquely identifying nodes, so we'd
|
|
||||||
have to generate IDs
|
|
||||||
- Explain how to use HTML Purifier in non-PHP languages / create
|
|
||||||
a simple command line stub (or complicated?)
|
|
||||||
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
|
||||||
- Automatically add non-breaking spaces to empty table cells when
|
|
||||||
empty-cells:show is applied to have compatibility with Internet Explorer
|
|
||||||
- Table of Contents generation (XHTML Compiler might be reusable). May also
|
|
||||||
be out-of-band information.
|
|
||||||
- Full set of color keywords. Also, a way to add onto them without
|
|
||||||
finalizing the configuration object.
|
|
||||||
- Write a var_export and memcached DefinitionCache - Denis
|
|
||||||
- Built-in support for target="_blank" on all external links
|
|
||||||
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
|
|
||||||
Also, enable disabling of directionality
|
|
||||||
? Externalize inline CSS to promote clean HTML, proposed by Sander Tekelenburg
|
|
||||||
? Remove redundant tags, ex. <u><u>Underlined</u></u>. Implementation notes:
|
|
||||||
1. Analyzing which tags to remove duplicants
|
1. Analyzing which tags to remove duplicants
|
||||||
2. Ensure attributes are merged into the parent tag
|
2. Ensure attributes are merged into the parent tag
|
||||||
3. Extend the tag exclusion system to specify whether or not the
|
3. Extend the tag exclusion system to specify whether or not the
|
||||||
contents should be dropped or not (currently, there's code that could do
|
contents should be dropped or not (currently, there's code that could do
|
||||||
something like this if it didn't drop the inner text too.)
|
something like this if it didn't drop the inner text too.)
|
||||||
? Make AutoParagraph also support paragraph-izing double <br> tags, and not
|
- Remove <span> tags that don't do anything (no attributes)
|
||||||
just double newlines. This is kind of tough to do in the current framework,
|
- Remove empty inline tags<i></i>
|
||||||
though, and might be reasonably approximated by search replacing double <br>s
|
- Append something to duplicate IDs so they're still usable (impl. note: the
|
||||||
with newlines before running it through HTML Purifier.
|
dupe detector would also need to detect the suffix as well)
|
||||||
|
|
||||||
Maintenance related (slightly boring)
|
2.0 release
|
||||||
# CHMOD install script for PEAR installs
|
# Legit token based CSS parsing (will require revamping almost every
|
||||||
! Factor out command line parser into its own class, and unit test it
|
AttrDef class)
|
||||||
- Reduce size of internal data-structures (esp. HTMLDefinition)
|
# Formatters for plaintext (COMPLEX)
|
||||||
- Allow merging configurations. Thus,
|
- Auto-paragraphing (be sure to leverage fact that we know when things
|
||||||
a -> b -> default
|
shouldn't be paragraphed, such as lists and tables).
|
||||||
c -> d -> default
|
- Linkify URLs
|
||||||
becomes
|
- Smileys
|
||||||
a -> b -> c -> d -> default
|
- Linkification for HTML Purifier docs: notably configuration and classes
|
||||||
Maybe allow more fine-grained tuning of this behavior. Alternatively,
|
|
||||||
encourage people to use short plist depths before building them up.
|
|
||||||
- Time PHPT tests
|
|
||||||
|
|
||||||
ChildDef related (very boring)
|
3.0 release
|
||||||
- Abstract ChildDef_BlockQuote to work with all elements that only
|
- Extended HTML capabilities based on namespacing and tag transforms (COMPLEX)
|
||||||
allow blocks in them, required or optional
|
- Hooks for adding custom processors to custom namespaced tags and
|
||||||
- Implement lenient <ruby> child validation
|
attributes, offer default implementation
|
||||||
|
- Lots of documentation and samples
|
||||||
|
- Allow tags to be "armored", an internal flag that protects them
|
||||||
|
from validation and passes them out unharmed
|
||||||
|
- XHTML 1.1 support
|
||||||
|
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
||||||
|
- Automatically add non-breaking spaces to empty table cells when
|
||||||
|
empty-cells:show is applied to have compatibility with Internet Explorer
|
||||||
|
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
|
||||||
|
Also, enable disabling of directionality
|
||||||
|
|
||||||
|
Ongoing
|
||||||
|
- Lots of profiling, make it faster!
|
||||||
|
- Plugins for major CMSes (COMPLEX)
|
||||||
|
- WordPress
|
||||||
|
- eFiction
|
||||||
|
- more! (look for ones that use WYSIWYGs)
|
||||||
|
|
||||||
|
Unknown release (on a scratch-an-itch basis)
|
||||||
|
- Have 'lang' attribute be checked against official lists
|
||||||
|
? Semi-lossy dumb alternate character encoding transformations, achieved by
|
||||||
|
encoding all characters that have string entity equivalents
|
||||||
|
|
||||||
|
Requested
|
||||||
|
? Native content compression, whitespace stripping (don't rely on Tidy, make
|
||||||
|
sure we don't remove from <pre> or related tags)
|
||||||
|
|
||||||
Wontfix
|
Wontfix
|
||||||
- Non-lossy smart alternate character encoding transformations (unless
|
- Non-lossy smart alternate character encoding transformations (unless
|
||||||
patch provided)
|
patch provided)
|
||||||
- Pretty-printing HTML: users can use Tidy on the output on entire page
|
- Pretty-printing HTML, users can use Tidy on the output on entire page
|
||||||
- Native content compression, whitespace stripping: use gzip if this is
|
|
||||||
really important
|
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
|
||||||
|
4
WYSIWYG
4
WYSIWYG
@@ -17,4 +17,6 @@ HTML Purifier is perfect for filtering pure-HTML input from WYSIWYG editors.
|
|||||||
|
|
||||||
Enough said.
|
Enough said.
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
There is a proof-of-concept integration of HTML Purifier with the Mantis
|
||||||
|
bugtracker at http://hp.jpsband.org/mantis/ You can see notes on how
|
||||||
|
this integration was acheived at http://hp.jpsband.org/mantis_notes.txt
|
||||||
|
BIN
art/100cases.png
BIN
art/100cases.png
Binary file not shown.
Before Width: | Height: | Size: 2.7 KiB |
@@ -1,7 +0,0 @@
|
|||||||
<IfModule mod_authz_core.c>
|
|
||||||
Require all denied
|
|
||||||
</IfModule>
|
|
||||||
|
|
||||||
<IfModule !mod_authz_core.c>
|
|
||||||
Deny from all
|
|
||||||
</ifModule>
|
|
@@ -1,16 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
chdir(dirname(__FILE__));
|
|
||||||
|
|
||||||
//require_once '../library/HTMLPurifier.path.php';
|
|
||||||
shell_exec('php ../maintenance/generate-schema-cache.php');
|
|
||||||
require_once '../library/HTMLPurifier.path.php';
|
|
||||||
require_once 'HTMLPurifier.includes.php';
|
|
||||||
|
|
||||||
$begin = xdebug_memory_usage();
|
|
||||||
|
|
||||||
$schema = HTMLPurifier_ConfigSchema::makeFromSerial();
|
|
||||||
|
|
||||||
echo xdebug_memory_usage() - $begin;
|
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
|
@@ -1,11 +1,13 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
require_once '../library/HTMLPurifier.auto.php';
|
// emulates inserting a dir called HTMLPurifier into your class dir
|
||||||
|
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
||||||
|
|
||||||
@include_once '../test-settings.php';
|
@include_once '../test-settings.php';
|
||||||
|
|
||||||
// PEAR
|
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||||
require_once 'Benchmark/Timer.php'; // to do the timing
|
require_once 'HTMLPurifier/Config.php';
|
||||||
require_once 'Text/Password.php'; // for generating random input
|
require_once 'HTMLPurifier/Context.php';
|
||||||
|
|
||||||
$LEXERS = array();
|
$LEXERS = array();
|
||||||
$RUNS = isset($GLOBALS['HTMLPurifierTest']['Runs'])
|
$RUNS = isset($GLOBALS['HTMLPurifierTest']['Runs'])
|
||||||
@@ -14,25 +16,35 @@ $RUNS = isset($GLOBALS['HTMLPurifierTest']['Runs'])
|
|||||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||||
$LEXERS['DirectLex'] = new HTMLPurifier_Lexer_DirectLex();
|
$LEXERS['DirectLex'] = new HTMLPurifier_Lexer_DirectLex();
|
||||||
|
|
||||||
|
if (!empty($GLOBALS['HTMLPurifierTest']['PEAR'])) {
|
||||||
|
require_once 'HTMLPurifier/Lexer/PEARSax3.php';
|
||||||
|
$LEXERS['PEARSax3'] = new HTMLPurifier_Lexer_PEARSax3();
|
||||||
|
} else {
|
||||||
|
exit('PEAR required to perform benchmark.');
|
||||||
|
}
|
||||||
|
|
||||||
if (version_compare(PHP_VERSION, '5', '>=')) {
|
if (version_compare(PHP_VERSION, '5', '>=')) {
|
||||||
require_once 'HTMLPurifier/Lexer/DOMLex.php';
|
require_once 'HTMLPurifier/Lexer/DOMLex.php';
|
||||||
$LEXERS['DOMLex'] = new HTMLPurifier_Lexer_DOMLex();
|
$LEXERS['DOMLex'] = new HTMLPurifier_Lexer_DOMLex();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PEAR
|
||||||
|
require_once 'Benchmark/Timer.php'; // to do the timing
|
||||||
|
require_once 'Text/Password.php'; // for generating random input
|
||||||
|
|
||||||
// custom class to aid unit testing
|
// custom class to aid unit testing
|
||||||
class RowTimer extends Benchmark_Timer
|
class RowTimer extends Benchmark_Timer
|
||||||
{
|
{
|
||||||
|
|
||||||
public $name;
|
var $name;
|
||||||
|
|
||||||
public function __construct($name, $auto = false)
|
function RowTimer($name, $auto = false) {
|
||||||
{
|
|
||||||
$this->name = htmlentities($name);
|
$this->name = htmlentities($name);
|
||||||
$this->Benchmark_Timer($auto);
|
$this->Benchmark_Timer($auto);
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getOutput()
|
function getOutput() {
|
||||||
{
|
|
||||||
$total = $this->TimeElapsed();
|
$total = $this->TimeElapsed();
|
||||||
$result = $this->getProfiling();
|
$result = $this->getProfiling();
|
||||||
$dashes = '';
|
$dashes = '';
|
||||||
@@ -69,8 +81,7 @@ class RowTimer extends Benchmark_Timer
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function print_lexers()
|
function print_lexers() {
|
||||||
{
|
|
||||||
global $LEXERS;
|
global $LEXERS;
|
||||||
$first = true;
|
$first = true;
|
||||||
foreach ($LEXERS as $key => $value) {
|
foreach ($LEXERS as $key => $value) {
|
||||||
@@ -80,8 +91,7 @@ function print_lexers()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function do_benchmark($name, $document)
|
function do_benchmark($name, $document) {
|
||||||
{
|
|
||||||
global $LEXERS, $RUNS;
|
global $LEXERS, $RUNS;
|
||||||
|
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
@@ -156,6 +166,3 @@ echo '<div>Random input was: ' .
|
|||||||
|
|
||||||
|
|
||||||
</body></html>
|
</body></html>
|
||||||
<?php
|
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
|
||||||
|
19
benchmarks/ProfileDirectLex.php
Normal file
19
benchmarks/ProfileDirectLex.php
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||||
|
require_once 'HTMLPurifier/Config.php';
|
||||||
|
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||||
|
require_once 'HTMLPurifier/Context.php';
|
||||||
|
|
||||||
|
$input = file_get_contents('samples/Lexer/4.html');
|
||||||
|
$lexer = new HTMLPurifier_Lexer_DirectLex();
|
||||||
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
$context = new HTMLPurifier_Context();
|
||||||
|
|
||||||
|
for ($i = 0; $i < 10; $i++) {
|
||||||
|
$tokens = $lexer->tokenizeHTML($input, $config, $context);
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
@@ -1,21 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
ini_set('xdebug.trace_format', 1);
|
|
||||||
ini_set('xdebug.show_mem_delta', true);
|
|
||||||
|
|
||||||
if (file_exists('Trace.xt')) {
|
|
||||||
echo "Previous trace Trace.xt must be removed before this script can be run.";
|
|
||||||
exit;
|
|
||||||
}
|
|
||||||
|
|
||||||
xdebug_start_trace(dirname(__FILE__) . '/Trace');
|
|
||||||
require_once '../library/HTMLPurifier.auto.php';
|
|
||||||
|
|
||||||
$purifier = new HTMLPurifier();
|
|
||||||
|
|
||||||
$data = $purifier->purify(file_get_contents('samples/Lexer/4.html'));
|
|
||||||
xdebug_stop_trace();
|
|
||||||
|
|
||||||
echo "Trace finished.";
|
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
|
@@ -51,6 +51,3 @@
|
|||||||
<div style="text-align:center;">Click on photo to see HR version</div></div>
|
<div style="text-align:center;">Click on photo to see HR version</div></div>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
||||||
<!-- vim: et sw=4 sts=4
|
|
||||||
-->
|
|
||||||
|
@@ -15,6 +15,3 @@ function rwt(el,ct,cd,sg){var e = window.encodeURIComponent ? encodeURIComponent
|
|||||||
function qs(el) {if (window.RegExp && window.encodeURIComponent) {var ue=el.href;var qe=encodeURIComponent(document.f.q.value);if(ue.indexOf("q=")!=-1){el.href=ue.replace(new RegExp("q=[^&$]*"),"q="+qe);}else{el.href=ue+"&q="+qe;}}return 1;}
|
function qs(el) {if (window.RegExp && window.encodeURIComponent) {var ue=el.href;var qe=encodeURIComponent(document.f.q.value);if(ue.indexOf("q=")!=-1){el.href=ue.replace(new RegExp("q=[^&$]*"),"q="+qe);}else{el.href=ue+"&q="+qe;}}return 1;}
|
||||||
// -->
|
// -->
|
||||||
</script><table border=0 cellspacing=0 cellpadding=4><tr><td nowrap><font size=-1><b>Web</b> <a id=1a class=q href="/imghp?hl=en&tab=wi" onClick="return qs(this);">Images</a> <a id=2a class=q href="http://groups.google.com/grphp?hl=en&tab=wg" onClick="return qs(this);">Groups</a> <a id=4a class=q href="http://news.google.com/nwshp?hl=en&tab=wn" onClick="return qs(this);">News</a> <a id=5a class=q href="http://froogle.google.com/frghp?hl=en&tab=wf" onClick="return qs(this);">Froogle</a> <a id=8a class=q href="/lochp?hl=en&tab=wl" onClick="return qs(this);">Local</a> <b><a href="/intl/en/options/" class=q>more »</a></b></font></td></tr></table><table cellspacing=0 cellpadding=0><tr><td width=25%> </td><td align=center><input type=hidden name=hl value=en><input maxlength=2048 size=55 name=q value="" title="Google Search"><br><input type=submit value="Google Search" name=btnG><input type=submit value="I'm Feeling Lucky" name=btnI></td><td valign=top nowrap width=25%><font size=-2> <a href=/advanced_search?hl=en>Advanced Search</a><br> <a href=/preferences?hl=en>Preferences</a><br> <a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><br><font size=-1><a href="/ads/">Advertising Programs</a> - <a href=/services/>Business Solutions</a> - <a href=/about.html>About Google</a></font><p><font size=-2>©2006 Google</font></p></center></body></html>
|
</script><table border=0 cellspacing=0 cellpadding=4><tr><td nowrap><font size=-1><b>Web</b> <a id=1a class=q href="/imghp?hl=en&tab=wi" onClick="return qs(this);">Images</a> <a id=2a class=q href="http://groups.google.com/grphp?hl=en&tab=wg" onClick="return qs(this);">Groups</a> <a id=4a class=q href="http://news.google.com/nwshp?hl=en&tab=wn" onClick="return qs(this);">News</a> <a id=5a class=q href="http://froogle.google.com/frghp?hl=en&tab=wf" onClick="return qs(this);">Froogle</a> <a id=8a class=q href="/lochp?hl=en&tab=wl" onClick="return qs(this);">Local</a> <b><a href="/intl/en/options/" class=q>more »</a></b></font></td></tr></table><table cellspacing=0 cellpadding=0><tr><td width=25%> </td><td align=center><input type=hidden name=hl value=en><input maxlength=2048 size=55 name=q value="" title="Google Search"><br><input type=submit value="Google Search" name=btnG><input type=submit value="I'm Feeling Lucky" name=btnI></td><td valign=top nowrap width=25%><font size=-2> <a href=/advanced_search?hl=en>Advanced Search</a><br> <a href=/preferences?hl=en>Preferences</a><br> <a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><br><font size=-1><a href="/ads/">Advertising Programs</a> - <a href=/services/>Business Solutions</a> - <a href=/about.html>About Google</a></font><p><font size=-2>©2006 Google</font></p></center></body></html>
|
||||||
|
|
||||||
<!-- vim: et sw=4 sts=4
|
|
||||||
-->
|
|
||||||
|
@@ -126,6 +126,3 @@ if (objAdMgr.isSlotAvailable("leaderboard")) {
|
|||||||
</body>
|
</body>
|
||||||
|
|
||||||
</html>
|
</html>
|
||||||
|
|
||||||
<!-- vim: et sw=4 sts=4
|
|
||||||
-->
|
|
||||||
|
@@ -538,6 +538,3 @@ Retrieved from "<a href="http://en.wikipedia.org/wiki/Tai_Chi_Chuan">http://en.w
|
|||||||
|
|
||||||
<!-- Served by srv25 in 0.089 secs. -->
|
<!-- Served by srv25 in 0.089 secs. -->
|
||||||
</body></html>
|
</body></html>
|
||||||
|
|
||||||
<!-- vim: et sw=4 sts=4
|
|
||||||
-->
|
|
||||||
|
@@ -3,5 +3,3 @@ Disclaimer:
|
|||||||
The HTML used in these samples are taken from random websites. I claim
|
The HTML used in these samples are taken from random websites. I claim
|
||||||
no copyright over these and assert that I may use them like this under
|
no copyright over these and assert that I may use them like this under
|
||||||
fair use.
|
fair use.
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
|
||||||
|
@@ -1,45 +0,0 @@
|
|||||||
{
|
|
||||||
"name": "ezyang/htmlpurifier",
|
|
||||||
"description": "Standards compliant HTML filter written in PHP",
|
|
||||||
"type": "library",
|
|
||||||
"keywords": ["html"],
|
|
||||||
"homepage": "http://htmlpurifier.org/",
|
|
||||||
"license": "LGPL-2.1-or-later",
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "Edward Z. Yang",
|
|
||||||
"email": "admin@htmlpurifier.org",
|
|
||||||
"homepage": "http://ezyang.com"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"require": {
|
|
||||||
"php": "~5.6.0 || ~7.0.0 || ~7.1.0 || ~7.2.0 || ~7.3.0 || ~7.4.0 || ~8.0.0 || ~8.1.0 || ~8.2.0 || ~8.3.0 || ~8.4.0"
|
|
||||||
},
|
|
||||||
"require-dev": {
|
|
||||||
"cerdic/css-tidy": "^1.7 || ^2.0",
|
|
||||||
"simpletest/simpletest": "dev-master"
|
|
||||||
},
|
|
||||||
"autoload": {
|
|
||||||
"psr-0": { "HTMLPurifier": "library/" },
|
|
||||||
"files": ["library/HTMLPurifier.composer.php"],
|
|
||||||
"exclude-from-classmap": [
|
|
||||||
"/library/HTMLPurifier/Language/"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"suggest": {
|
|
||||||
"cerdic/css-tidy": "If you want to use the filter 'Filter.ExtractStyleBlocks'.",
|
|
||||||
"ext-iconv": "Converts text to and from non-UTF-8 encodings",
|
|
||||||
"ext-bcmath": "Used for unit conversion and imagecrash protection",
|
|
||||||
"ext-tidy": "Used for pretty-printing HTML"
|
|
||||||
},
|
|
||||||
"config": {
|
|
||||||
"sort-packages": true
|
|
||||||
},
|
|
||||||
"repositories": [
|
|
||||||
{
|
|
||||||
"type": "vcs",
|
|
||||||
"url": "https://github.com/ezyang/simpletest.git",
|
|
||||||
"no-api": true
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
@@ -2,63 +2,219 @@
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates XML and HTML documents describing configuration.
|
* Generates XML and HTML documents describing configuration.
|
||||||
* @note PHP 5.2+ only!
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
TODO:
|
TODO:
|
||||||
- make XML format richer
|
- make XML format richer (see below)
|
||||||
- extend XSLT transformation (see the corresponding XSLT file)
|
- extend XSLT transformation (see the corresponding XSLT file)
|
||||||
- allow generation of packaged docs that can be easily moved
|
- allow generation of packaged docs that can be easily moved
|
||||||
- multipage documentation
|
- multipage documentation
|
||||||
- determine how to multilingualize
|
- determine how to multilingualize
|
||||||
- add blurbs to ToC
|
- factor out code into classes
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (version_compare(PHP_VERSION, '5.2', '<')) exit('PHP 5.2+ required.');
|
// ---------------------------------------------------------------------------
|
||||||
error_reporting(E_ALL | E_STRICT);
|
// Check and configure environment
|
||||||
|
|
||||||
// load dual-libraries
|
if (version_compare('5', PHP_VERSION, '>')) exit('Requires PHP 5 or higher.');
|
||||||
require_once dirname(__FILE__) . '/../extras/HTMLPurifierExtras.auto.php';
|
error_reporting(E_ALL);
|
||||||
require_once dirname(__FILE__) . '/../library/HTMLPurifier.auto.php';
|
|
||||||
|
|
||||||
// setup HTML Purifier singleton
|
|
||||||
HTMLPurifier::getInstance(array(
|
|
||||||
'AutoFormat.PurifierLinkify' => true
|
|
||||||
));
|
|
||||||
|
|
||||||
$builder = new HTMLPurifier_ConfigSchema_InterchangeBuilder();
|
// ---------------------------------------------------------------------------
|
||||||
$interchange = new HTMLPurifier_ConfigSchema_Interchange();
|
// Include HTML Purifier library
|
||||||
$builder->buildDir($interchange);
|
|
||||||
$loader = dirname(__FILE__) . '/../config-schema.php';
|
|
||||||
if (file_exists($loader)) include $loader;
|
|
||||||
$interchange->validate();
|
|
||||||
|
|
||||||
$style = 'plain'; // use $_GET in the future, careful to validate!
|
set_include_path('../library' . PATH_SEPARATOR . get_include_path());
|
||||||
$configdoc_xml = dirname(__FILE__) . '/configdoc.xml';
|
require_once 'HTMLPurifier.php';
|
||||||
|
|
||||||
$xml_builder = new HTMLPurifier_ConfigSchema_Builder_Xml();
|
|
||||||
$xml_builder->openURI($configdoc_xml);
|
|
||||||
$xml_builder->build($interchange);
|
|
||||||
unset($xml_builder); // free handle
|
|
||||||
|
|
||||||
$xslt = new ConfigDoc_HTMLXSLTProcessor();
|
// ---------------------------------------------------------------------------
|
||||||
$xslt->importStylesheet(dirname(__FILE__) . "/styles/$style.xsl");
|
// Setup convenience functions
|
||||||
$output = $xslt->transformToHTML($configdoc_xml);
|
|
||||||
|
|
||||||
if (!$output) {
|
function appendHTMLDiv($document, $node, $html) {
|
||||||
echo "Error in generating files\n";
|
global $purifier;
|
||||||
exit(1);
|
$html = $purifier->purify($html);
|
||||||
|
$dom_html = $document->createDocumentFragment();
|
||||||
|
$dom_html->appendXML($html);
|
||||||
|
|
||||||
|
$dom_div = $document->createElement('div');
|
||||||
|
$dom_div->setAttribute('xmlns', 'http://www.w3.org/1999/xhtml');
|
||||||
|
$dom_div->appendChild($dom_html);
|
||||||
|
|
||||||
|
$node->appendChild($dom_div);
|
||||||
}
|
}
|
||||||
|
|
||||||
// write out
|
|
||||||
file_put_contents(dirname(__FILE__) . "/$style.html", $output);
|
// ---------------------------------------------------------------------------
|
||||||
|
// Load copies of HTMLPurifier_ConfigDef and HTMLPurifier
|
||||||
|
|
||||||
|
$schema = HTMLPurifier_ConfigSchema::instance();
|
||||||
|
$purifier = new HTMLPurifier();
|
||||||
|
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Generate types.xml, a document describing the constraint "type"
|
||||||
|
|
||||||
|
$types_document = new DOMDocument('1.0', 'UTF-8');
|
||||||
|
$types_root = $types_document->createElement('types');
|
||||||
|
$types_document->appendChild($types_root);
|
||||||
|
$types_document->formatOutput = true;
|
||||||
|
foreach ($schema->types as $name => $expanded_name) {
|
||||||
|
$types_type = $types_document->createElement('type', $expanded_name);
|
||||||
|
$types_type->setAttribute('id', $name);
|
||||||
|
$types_root->appendChild($types_type);
|
||||||
|
}
|
||||||
|
$types_document->save('types.xml');
|
||||||
|
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Generate configdoc.xml, a document documenting configuration directives
|
||||||
|
|
||||||
|
$dom_document = new DOMDocument('1.0', 'UTF-8');
|
||||||
|
$dom_root = $dom_document->createElement('configdoc');
|
||||||
|
$dom_document->appendChild($dom_root);
|
||||||
|
$dom_document->formatOutput = true;
|
||||||
|
|
||||||
|
// add the name of the application
|
||||||
|
$dom_root->appendChild($dom_document->createElement('title', 'HTML Purifier'));
|
||||||
|
|
||||||
|
/*
|
||||||
|
TODO for XML format:
|
||||||
|
- create a definition (DTD or other) once interface stabilizes
|
||||||
|
*/
|
||||||
|
|
||||||
|
foreach($schema->info as $namespace_name => $namespace_info) {
|
||||||
|
|
||||||
|
$dom_namespace = $dom_document->createElement('namespace');
|
||||||
|
$dom_root->appendChild($dom_namespace);
|
||||||
|
|
||||||
|
$dom_namespace->setAttribute('id', $namespace_name);
|
||||||
|
$dom_namespace->appendChild(
|
||||||
|
$dom_document->createElement('name', $namespace_name)
|
||||||
|
);
|
||||||
|
$dom_namespace_description = $dom_document->createElement('description');
|
||||||
|
$dom_namespace->appendChild($dom_namespace_description);
|
||||||
|
appendHTMLDiv($dom_document, $dom_namespace_description,
|
||||||
|
$schema->info_namespace[$namespace_name]->description);
|
||||||
|
|
||||||
|
foreach ($namespace_info as $name => $info) {
|
||||||
|
|
||||||
|
if ($info->class == 'alias') continue;
|
||||||
|
|
||||||
|
$dom_directive = $dom_document->createElement('directive');
|
||||||
|
$dom_namespace->appendChild($dom_directive);
|
||||||
|
|
||||||
|
$dom_directive->setAttribute('id', $namespace_name . '.' . $name);
|
||||||
|
$dom_directive->appendChild(
|
||||||
|
$dom_document->createElement('name', $name)
|
||||||
|
);
|
||||||
|
|
||||||
|
$dom_constraints = $dom_document->createElement('constraints');
|
||||||
|
$dom_directive->appendChild($dom_constraints);
|
||||||
|
|
||||||
|
$dom_type = $dom_document->createElement('type', $info->type);
|
||||||
|
if ($info->allow_null) {
|
||||||
|
$dom_type->setAttribute('allow-null', 'yes');
|
||||||
|
}
|
||||||
|
$dom_constraints->appendChild($dom_type);
|
||||||
|
|
||||||
|
if ($info->allowed !== true) {
|
||||||
|
$dom_allowed = $dom_document->createElement('allowed');
|
||||||
|
$dom_constraints->appendChild($dom_allowed);
|
||||||
|
foreach ($info->allowed as $allowed => $bool) {
|
||||||
|
$dom_allowed->appendChild(
|
||||||
|
$dom_document->createElement('value', $allowed)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$raw_default = $schema->defaults[$namespace_name][$name];
|
||||||
|
if (is_bool($raw_default)) {
|
||||||
|
$default = $raw_default ? 'true' : 'false';
|
||||||
|
} elseif (is_string($raw_default)) {
|
||||||
|
$default = "\"$raw_default\"";
|
||||||
|
} elseif (is_null($raw_default)) {
|
||||||
|
$default = 'null';
|
||||||
|
} else {
|
||||||
|
$default = print_r(
|
||||||
|
$schema->defaults[$namespace_name][$name], true
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
$dom_default = $dom_document->createElement('default', $default);
|
||||||
|
|
||||||
|
// remove this once we get a DTD
|
||||||
|
$dom_default->setAttribute('xml:space', 'preserve');
|
||||||
|
|
||||||
|
$dom_constraints->appendChild($dom_default);
|
||||||
|
|
||||||
|
$dom_descriptions = $dom_document->createElement('descriptions');
|
||||||
|
$dom_directive->appendChild($dom_descriptions);
|
||||||
|
|
||||||
|
foreach ($info->descriptions as $file => $file_descriptions) {
|
||||||
|
foreach ($file_descriptions as $line => $description) {
|
||||||
|
$dom_description = $dom_document->createElement('description');
|
||||||
|
$dom_description->setAttribute('file', $file);
|
||||||
|
$dom_description->setAttribute('line', $line);
|
||||||
|
appendHTMLDiv($dom_document, $dom_description, $description);
|
||||||
|
$dom_descriptions->appendChild($dom_description);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// print_r($dom_document->saveXML());
|
||||||
|
|
||||||
|
// save a copy of the raw XML
|
||||||
|
$dom_document->save('configdoc.xml');
|
||||||
|
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Generate final output using XSLT
|
||||||
|
|
||||||
|
// load the stylesheet
|
||||||
|
$xsl_stylesheet_name = 'plain';
|
||||||
|
$xsl_stylesheet = "styles/$xsl_stylesheet_name.xsl";
|
||||||
|
$xsl_dom_stylesheet = new DOMDocument();
|
||||||
|
$xsl_dom_stylesheet->load($xsl_stylesheet);
|
||||||
|
|
||||||
|
// setup the XSLT processor
|
||||||
|
$xsl_processor = new XSLTProcessor();
|
||||||
|
|
||||||
|
// perform the transformation
|
||||||
|
$xsl_processor->importStylesheet($xsl_dom_stylesheet);
|
||||||
|
$html_output = $xsl_processor->transformToXML($dom_document);
|
||||||
|
|
||||||
|
// some slight fudges to preserve backwards compatibility
|
||||||
|
$html_output = str_replace('/>', ' />', $html_output); // <br /> not <br/>
|
||||||
|
$html_output = str_replace(' xmlns=""', '', $html_output); // rm unnecessary xmlns
|
||||||
|
|
||||||
|
if (class_exists('Tidy')) {
|
||||||
|
// cleanup output
|
||||||
|
$config = array(
|
||||||
|
'indent' => true,
|
||||||
|
'output-xhtml' => true,
|
||||||
|
'wrap' => 80
|
||||||
|
);
|
||||||
|
$tidy = new Tidy;
|
||||||
|
$tidy->parseString($html_output, $config, 'utf8');
|
||||||
|
$tidy->cleanRepair();
|
||||||
|
$html_output = (string) $tidy;
|
||||||
|
}
|
||||||
|
|
||||||
|
// write it to a file (todo: parse into seperate pages)
|
||||||
|
file_put_contents("$xsl_stylesheet_name.html", $html_output);
|
||||||
|
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Output for instant feedback
|
||||||
|
|
||||||
if (php_sapi_name() != 'cli') {
|
if (php_sapi_name() != 'cli') {
|
||||||
// output (instant feedback if it's a browser)
|
echo $html_output;
|
||||||
echo $output;
|
|
||||||
} else {
|
} else {
|
||||||
echo "Files generated successfully.\n";
|
echo 'Files generated successfully.';
|
||||||
}
|
}
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
?>
|
@@ -1,30 +1,16 @@
|
|||||||
|
|
||||||
body {margin:0;padding:0;}
|
body {margin:1em 4em;}
|
||||||
#content {
|
|
||||||
margin:1em auto;
|
|
||||||
max-width: 47em;
|
|
||||||
width: expression(document.body.clientWidth >
|
|
||||||
85 * parseInt(document.body.currentStyle.fontSize) ?
|
|
||||||
"54em": "auto");
|
|
||||||
}
|
|
||||||
|
|
||||||
table {border-collapse:collapse;}
|
table {border-collapse:collapse;}
|
||||||
table td, table th {padding:0.2em;}
|
table td, table th {padding:0.2em;}
|
||||||
|
|
||||||
table.constraints {margin:0 0 1em;}
|
table.constraints {margin:0 0 1em;}
|
||||||
table.constraints th {
|
table.constraints th {text-align:left;padding-left:0.4em;}
|
||||||
text-align:right;padding-left:0.4em;padding-right:0.4em;background:#EEE;
|
table.constraints td {padding-right:0.4em;}
|
||||||
width:8em;vertical-align:top;}
|
|
||||||
table.constraints td {padding-right:0.4em; padding-left: 1em;}
|
|
||||||
table.constraints td ul {padding:0; margin:0; list-style:none;}
|
|
||||||
table.constraints td pre {margin:0;}
|
table.constraints td pre {margin:0;}
|
||||||
|
|
||||||
#tocContainer {position:relative;}
|
#toc {list-style-type:none; font-weight:bold;}
|
||||||
#toc {list-style-type:none; font-weight:bold; font-size:1em; margin-bottom:1em;}
|
#toc ul {list-style-type:disc; font-weight:normal;}
|
||||||
#toc li {position:relative; line-height: 1.2em;}
|
|
||||||
#toc .col-2 {margin-left:50%;}
|
|
||||||
#toc .col-l {float:left;}
|
|
||||||
#toc ul {list-style-type:disc; font-weight:normal; padding-bottom:1.2em;}
|
|
||||||
|
|
||||||
.description p {margin-top:0;margin-bottom:1em;}
|
.description p {margin-top:0;margin-bottom:1em;}
|
||||||
|
|
||||||
@@ -33,12 +19,6 @@ table.constraints td pre {margin:0;}
|
|||||||
#library {font-size:1em;}
|
#library {font-size:1em;}
|
||||||
h1 {margin-top:0;}
|
h1 {margin-top:0;}
|
||||||
h2 {border-bottom:1px solid #CCC; font-family:sans-serif; font-weight:normal;
|
h2 {border-bottom:1px solid #CCC; font-family:sans-serif; font-weight:normal;
|
||||||
font-size:1.3em; clear:both;}
|
font-size:1.3em;}
|
||||||
h3 {font-family:sans-serif; font-size:1.1em; font-weight:bold; }
|
h3 {font-family:sans-serif; font-size:1.1em; font-weight:bold; }
|
||||||
h4 {font-family:sans-serif; font-size:0.9em; font-weight:bold; }
|
h4 {font-family:sans-serif; font-size:0.9em; font-weight:bold; }
|
||||||
|
|
||||||
.deprecated {color: #CCC;}
|
|
||||||
.deprecated table.constraints th {background:#FFF;}
|
|
||||||
.deprecated-notice {color: #000; text-align:center; margin-bottom: 1em;}
|
|
||||||
|
|
||||||
/* vim: et sw=4 sts=4 */
|
|
||||||
|
@@ -12,213 +12,103 @@
|
|||||||
indent = "no"
|
indent = "no"
|
||||||
media-type = "text/html"
|
media-type = "text/html"
|
||||||
/>
|
/>
|
||||||
<xsl:param name="css" select="'styles/plain.css'"/>
|
|
||||||
<xsl:param name="title" select="'Configuration Documentation'"/>
|
|
||||||
|
|
||||||
<xsl:variable name="typeLookup" select="document('../types.xml')/types" />
|
<xsl:variable name="typeLookup" select="document('../types.xml')" />
|
||||||
<xsl:variable name="usageLookup" select="document('../usage.xml')/usage" />
|
|
||||||
|
|
||||||
<!-- Twiddle this variable to get the columns as even as possible -->
|
|
||||||
<xsl:variable name="maxNumberAdjust" select="2" />
|
|
||||||
|
|
||||||
<xsl:template match="/">
|
<xsl:template match="/">
|
||||||
<html lang="en" xml:lang="en">
|
<html lang="en" xml:lang="en">
|
||||||
<head>
|
<head>
|
||||||
<title><xsl:value-of select="$title" /> - <xsl:value-of select="/configdoc/title" /></title>
|
<title>Configuration Documentation - <xsl:value-of select="/configdoc/title" /></title>
|
||||||
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8" />
|
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8" />
|
||||||
<link rel="stylesheet" type="text/css" href="{$css}" />
|
<link rel="stylesheet" type="text/css" href="styles/plain.css" />
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<div id="content">
|
<div id="library"><xsl:value-of select="/configdoc/title" /></div>
|
||||||
<div id="library"><xsl:value-of select="/configdoc/title" /></div>
|
<h1>Configuration Documentation</h1>
|
||||||
<h1><xsl:value-of select="$title" /></h1>
|
<h2>Table of Contents</h2>
|
||||||
<div id="tocContainer">
|
<ul id="toc">
|
||||||
<h2>Table of Contents</h2>
|
<xsl:apply-templates mode="toc" />
|
||||||
<ul id="toc">
|
</ul>
|
||||||
<xsl:apply-templates mode="toc">
|
<xsl:apply-templates />
|
||||||
<xsl:with-param name="overflowNumber" select="round(count(/configdoc/namespace) div 2) + $maxNumberAdjust" />
|
|
||||||
</xsl:apply-templates>
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
<div id="typesContainer">
|
|
||||||
<h2>Types</h2>
|
|
||||||
<xsl:apply-templates select="$typeLookup" mode="types" />
|
|
||||||
</div>
|
|
||||||
<xsl:apply-templates />
|
|
||||||
</div>
|
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="type" mode="types">
|
|
||||||
<div class="type-block">
|
|
||||||
<xsl:attribute name="id">type-<xsl:value-of select="@id" /></xsl:attribute>
|
|
||||||
<h3><code><xsl:value-of select="@id" /></code>: <xsl:value-of select="@name" /></h3>
|
|
||||||
<div class="type-description">
|
|
||||||
<xsl:copy-of xmlns:xhtml="http://www.w3.org/1999/xhtml" select="xhtml:div/node()" />
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</xsl:template>
|
|
||||||
|
|
||||||
<xsl:template match="title" mode="toc" />
|
<xsl:template match="title" mode="toc" />
|
||||||
<xsl:template match="namespace" mode="toc">
|
<xsl:template match="namespace" mode="toc">
|
||||||
<xsl:param name="overflowNumber" />
|
|
||||||
<xsl:variable name="number"><xsl:number level="single" /></xsl:variable>
|
|
||||||
<xsl:variable name="directiveNumber"><xsl:number level="any" count="directive" /></xsl:variable>
|
|
||||||
<xsl:if test="count(directive)>0">
|
<xsl:if test="count(directive)>0">
|
||||||
<li>
|
<li>
|
||||||
<!-- BEGIN multicolumn code -->
|
|
||||||
<xsl:if test="$number >= $overflowNumber">
|
|
||||||
<xsl:attribute name="class">col-2</xsl:attribute>
|
|
||||||
</xsl:if>
|
|
||||||
<xsl:if test="$number = $overflowNumber">
|
|
||||||
<xsl:attribute name="style">margin-top:-<xsl:value-of select="($number * 2 + $directiveNumber - 3) * 1.2" />em</xsl:attribute>
|
|
||||||
</xsl:if>
|
|
||||||
<!-- END multicolumn code -->
|
|
||||||
<a href="#{@id}"><xsl:value-of select="name" /></a>
|
<a href="#{@id}"><xsl:value-of select="name" /></a>
|
||||||
<ul>
|
<ul>
|
||||||
<xsl:apply-templates select="directive" mode="toc">
|
<xsl:apply-templates select="directive" mode="toc" />
|
||||||
<xsl:with-param name="overflowNumber" select="$overflowNumber" />
|
|
||||||
</xsl:apply-templates>
|
|
||||||
</ul>
|
</ul>
|
||||||
<xsl:if test="$number + 1 = $overflowNumber">
|
|
||||||
<div class="col-l" />
|
|
||||||
</xsl:if>
|
|
||||||
</li>
|
</li>
|
||||||
</xsl:if>
|
</xsl:if>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
<xsl:template match="directive" mode="toc">
|
<xsl:template match="directive" mode="toc">
|
||||||
<xsl:variable name="number">
|
<li><a href="#{@id}"><xsl:value-of select="name" /></a></li>
|
||||||
<xsl:number level="any" count="directive|namespace" />
|
|
||||||
</xsl:variable>
|
|
||||||
<xsl:if test="not(deprecated)">
|
|
||||||
<li>
|
|
||||||
<a href="#{@id}"><xsl:value-of select="name" /></a>
|
|
||||||
</li>
|
|
||||||
</xsl:if>
|
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="title" />
|
<xsl:template match="title" />
|
||||||
|
|
||||||
<xsl:template match="namespace">
|
<xsl:template match="namespace">
|
||||||
<div class="namespace">
|
<xsl:apply-templates />
|
||||||
<xsl:apply-templates />
|
<xsl:if test="count(directive)=0">
|
||||||
<xsl:if test="count(directive)=0">
|
<p>No configuration directives defined for this namespace.</p>
|
||||||
<p>No configuration directives defined for this namespace.</p>
|
</xsl:if>
|
||||||
</xsl:if>
|
|
||||||
</div>
|
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
<xsl:template match="namespace/name">
|
<xsl:template match="namespace/name">
|
||||||
<h2 id="{../@id}"><xsl:value-of select="." /></h2>
|
<h2 id="{../@id}"><xsl:value-of select="." /></h2>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
<xsl:template match="namespace/description">
|
<xsl:template match="namespace/description">
|
||||||
<div class="description">
|
<div class="description">
|
||||||
<xsl:copy-of xmlns:xhtml="http://www.w3.org/1999/xhtml" select="xhtml:div/node()" />
|
<xsl:copy-of select="div/node()" />
|
||||||
</div>
|
</div>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
|
|
||||||
<xsl:template match="directive">
|
<xsl:template match="directive">
|
||||||
<div>
|
<xsl:apply-templates />
|
||||||
<xsl:attribute name="class"><!--
|
|
||||||
-->directive<!--
|
|
||||||
--><xsl:if test="deprecated"> deprecated</xsl:if><!--
|
|
||||||
--></xsl:attribute>
|
|
||||||
<xsl:apply-templates>
|
|
||||||
<xsl:with-param name="id" select="@id" />
|
|
||||||
</xsl:apply-templates>
|
|
||||||
</div>
|
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
<xsl:template match="directive/name">
|
<xsl:template match="directive/name">
|
||||||
<xsl:param name="id" />
|
<h3 id="{../@id}"><xsl:value-of select="../@id" /></h3>
|
||||||
<xsl:apply-templates select="../aliases/alias" mode="anchor" />
|
|
||||||
<h3 id="{$id}"><xsl:value-of select="$id" /></h3>
|
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
<xsl:template match="alias" mode="anchor">
|
|
||||||
<a id="{.}"></a>
|
|
||||||
</xsl:template>
|
|
||||||
|
|
||||||
<!-- Do not pass through -->
|
|
||||||
<xsl:template match="alias"></xsl:template>
|
|
||||||
|
|
||||||
<xsl:template match="directive/constraints">
|
<xsl:template match="directive/constraints">
|
||||||
<xsl:param name="id" />
|
|
||||||
<table class="constraints">
|
<table class="constraints">
|
||||||
<xsl:apply-templates />
|
<xsl:apply-templates />
|
||||||
<xsl:if test="../aliases/alias">
|
<!-- Calculated other values -->
|
||||||
<xsl:apply-templates select="../aliases" mode="constraints" />
|
<tr>
|
||||||
</xsl:if>
|
<th>Used by:</th>
|
||||||
<xsl:apply-templates select="$usageLookup/directive[@id=$id]" />
|
<td>
|
||||||
|
<xsl:for-each select="../descriptions/description">
|
||||||
|
<xsl:if test="position()>1">, </xsl:if>
|
||||||
|
<xsl:value-of select="@file" />
|
||||||
|
</xsl:for-each>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
</table>
|
</table>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
<xsl:template match="directive/aliases" mode="constraints">
|
<xsl:template match="directive//description">
|
||||||
<tr>
|
|
||||||
<th>Aliases</th>
|
|
||||||
<td>
|
|
||||||
<xsl:for-each select="alias">
|
|
||||||
<xsl:if test="position()>1">, </xsl:if>
|
|
||||||
<xsl:value-of select="." />
|
|
||||||
</xsl:for-each>
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
</xsl:template>
|
|
||||||
<xsl:template match="directive/description">
|
|
||||||
<div class="description">
|
<div class="description">
|
||||||
<xsl:copy-of xmlns:xhtml="http://www.w3.org/1999/xhtml" select="xhtml:div/node()" />
|
<xsl:copy-of select="div/node()" />
|
||||||
</div>
|
</div>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
<xsl:template match="directive/deprecated">
|
|
||||||
<div class="deprecated-notice">
|
|
||||||
<strong>Warning:</strong>
|
|
||||||
This directive was deprecated in version <xsl:value-of select="version" />.
|
|
||||||
<a href="#{use}">%<xsl:value-of select="use" /></a> should be used instead.
|
|
||||||
</div>
|
|
||||||
</xsl:template>
|
|
||||||
<xsl:template match="usage/directive">
|
|
||||||
<tr>
|
|
||||||
<th>Used in</th>
|
|
||||||
<td>
|
|
||||||
<ul>
|
|
||||||
<xsl:apply-templates />
|
|
||||||
</ul>
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
</xsl:template>
|
|
||||||
<xsl:template match="usage/directive/file">
|
|
||||||
<li>
|
|
||||||
<em><xsl:value-of select="@name" /></em> on line<xsl:if test="count(line)>1">s</xsl:if>
|
|
||||||
<xsl:text> </xsl:text>
|
|
||||||
<xsl:for-each select="line">
|
|
||||||
<xsl:if test="position()>1">, </xsl:if>
|
|
||||||
<xsl:value-of select="." />
|
|
||||||
</xsl:for-each>
|
|
||||||
</li>
|
|
||||||
</xsl:template>
|
|
||||||
|
|
||||||
<xsl:template match="constraints/version">
|
|
||||||
<tr>
|
|
||||||
<th>Version added</th>
|
|
||||||
<td><xsl:value-of select="." /></td>
|
|
||||||
</tr>
|
|
||||||
</xsl:template>
|
|
||||||
<xsl:template match="constraints/type">
|
<xsl:template match="constraints/type">
|
||||||
<tr>
|
<tr>
|
||||||
<th>Type</th>
|
<th>Type:</th>
|
||||||
<td>
|
<td>
|
||||||
<xsl:variable name="type" select="text()" />
|
<xsl:variable name="type" select="text()" />
|
||||||
<xsl:attribute name="class">type type-<xsl:value-of select="$type" /></xsl:attribute>
|
<xsl:attribute name="class">type type-<xsl:value-of select="$type" /></xsl:attribute>
|
||||||
<a>
|
<xsl:value-of select="$typeLookup/types/type[@id=$type]/text()" />
|
||||||
<xsl:attribute name="href">#type-<xsl:value-of select="$type" /></xsl:attribute>
|
<xsl:if test="@allow-null='yes'">
|
||||||
<xsl:value-of select="$typeLookup/type[@id=$type]/@name" />
|
(or null)
|
||||||
<xsl:if test="@allow-null='yes'">
|
</xsl:if>
|
||||||
(or null)
|
|
||||||
</xsl:if>
|
|
||||||
</a>
|
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
<xsl:template match="constraints/allowed">
|
<xsl:template match="constraints/allowed">
|
||||||
<tr>
|
<tr>
|
||||||
<th>Allowed values</th>
|
<th>Allowed values:</th>
|
||||||
<td>
|
<td>
|
||||||
<xsl:for-each select="value"><!--
|
<xsl:for-each select="value"><!--
|
||||||
--><xsl:if test="position()>1">, </xsl:if>
|
--><xsl:if test="position()>1">, </xsl:if>
|
||||||
@@ -229,25 +119,9 @@
|
|||||||
</xsl:template>
|
</xsl:template>
|
||||||
<xsl:template match="constraints/default">
|
<xsl:template match="constraints/default">
|
||||||
<tr>
|
<tr>
|
||||||
<th>Default</th>
|
<th>Default:</th>
|
||||||
<td><pre><xsl:value-of select="." xml:space="preserve" /></pre></td>
|
<td><pre><xsl:value-of select="." xml:space="preserve" /></pre></td>
|
||||||
</tr>
|
</tr>
|
||||||
</xsl:template>
|
</xsl:template>
|
||||||
<xsl:template match="constraints/external">
|
|
||||||
<tr>
|
|
||||||
<th>External deps</th>
|
|
||||||
<td>
|
|
||||||
<ul>
|
|
||||||
<xsl:apply-templates />
|
|
||||||
</ul>
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
</xsl:template>
|
|
||||||
<xsl:template match="constraints/external/project">
|
|
||||||
<li><xsl:value-of select="." /></li>
|
|
||||||
</xsl:template>
|
|
||||||
|
|
||||||
</xsl:stylesheet>
|
</xsl:stylesheet>
|
||||||
|
|
||||||
<!-- vim: et sw=4 sts=4
|
|
||||||
-->
|
|
||||||
|
@@ -1,69 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<types>
|
|
||||||
<type id="string" name="String"><div xmlns="http://www.w3.org/1999/xhtml">
|
|
||||||
A <a
|
|
||||||
href="http://docs.php.net/manual/en/language.types.string.php">sequence
|
|
||||||
of characters</a>.
|
|
||||||
</div></type>
|
|
||||||
<type id="istring" name="Case-insensitive string"><div xmlns="http://www.w3.org/1999/xhtml">
|
|
||||||
A series of case-insensitive characters. Internally, upper-case
|
|
||||||
ASCII characters will be converted to lower-case.
|
|
||||||
</div></type>
|
|
||||||
<type id="text" name="Text"><div xmlns="http://www.w3.org/1999/xhtml">
|
|
||||||
A series of characters that may contain newlines. Text tends to
|
|
||||||
indicate human-oriented text, as opposed to a machine format.
|
|
||||||
</div></type>
|
|
||||||
<type id="itext" name="Case-insensitive text"><div xmlns="http://www.w3.org/1999/xhtml">
|
|
||||||
A series of case-insensitive characters that may contain newlines.
|
|
||||||
</div></type>
|
|
||||||
<type id="int" name="Integer"><div xmlns="http://www.w3.org/1999/xhtml">
|
|
||||||
An <a
|
|
||||||
href="http://docs.php.net/manual/en/language.types.integer.php">
|
|
||||||
integer</a>. You are alternatively permitted to pass a string of
|
|
||||||
digits instead, which will be cast to an integer using
|
|
||||||
<code>(int)</code>.
|
|
||||||
</div></type>
|
|
||||||
<type id="float" name="Float"><div xmlns="http://www.w3.org/1999/xhtml">
|
|
||||||
A <a href="http://docs.php.net/manual/en/language.types.float.php">
|
|
||||||
floating point number</a>. You are alternatively permitted to
|
|
||||||
pass a numeric string (as defined by <code>is_numeric()</code>),
|
|
||||||
which will be cast to a float using <code>(float)</code>.
|
|
||||||
</div></type>
|
|
||||||
<type id="bool" name="Boolean"><div xmlns="http://www.w3.org/1999/xhtml">
|
|
||||||
A <a
|
|
||||||
href="http://docs.php.net/manual/en/language.types.boolean.php">boolean</a>.
|
|
||||||
You are alternatively permitted to pass an integer <code>0</code> or
|
|
||||||
<code>1</code> (other integers are not permitted) or a string
|
|
||||||
<code>"on"</code>, <code>"true"</code> or <code>"1"</code> for
|
|
||||||
<code>true</code>, and <code>"off"</code>, <code>"false"</code> or
|
|
||||||
<code>"0"</code> for <code>false</code>.
|
|
||||||
</div></type>
|
|
||||||
<type id="lookup" name="Lookup array"><div xmlns="http://www.w3.org/1999/xhtml">
|
|
||||||
An array whose values are <code>true</code>, e.g. <code>array('key'
|
|
||||||
=> true, 'key2' => true)</code>. You are alternatively permitted
|
|
||||||
to pass an array list of the keys <code>array('key', 'key2')</code>
|
|
||||||
or a comma-separated string of keys <code>"key, key2"</code>. If
|
|
||||||
you pass an array list of values, ensure that your values are
|
|
||||||
strictly numerically indexed: <code>array('key1', 2 =>
|
|
||||||
'key2')</code> will not do what you expect and emits a warning.
|
|
||||||
</div></type>
|
|
||||||
<type id="list" name="Array list"><div xmlns="http://www.w3.org/1999/xhtml">
|
|
||||||
An array which has consecutive integer indexes, e.g.
|
|
||||||
<code>array('val1', 'val2')</code>. You are alternatively permitted
|
|
||||||
to pass a comma-separated string of keys <code>"val1, val2"</code>.
|
|
||||||
If your array is not in this form, <code>array_values</code> is run
|
|
||||||
on the array and a warning is emitted.
|
|
||||||
</div></type>
|
|
||||||
<type id="hash" name="Associative array"><div xmlns="http://www.w3.org/1999/xhtml">
|
|
||||||
An array which is a mapping of keys to values, e.g.
|
|
||||||
<code>array('key1' => 'val1', 'key2' => 'val2')</code>. You are
|
|
||||||
alternatively permitted to pass a comma-separated string of
|
|
||||||
key-colon-value strings, e.g. <code>"key1: val1, key2: val2"</code>.
|
|
||||||
</div></type>
|
|
||||||
<type id="mixed" name="Mixed"><div xmlns="http://www.w3.org/1999/xhtml">
|
|
||||||
An arbitrary PHP value of any type.
|
|
||||||
</div></type>
|
|
||||||
</types>
|
|
||||||
|
|
||||||
<!-- vim: et sw=4 sts=4
|
|
||||||
-->
|
|
@@ -1,611 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<usage>
|
|
||||||
<directive id="Core.CollectErrors">
|
|
||||||
<file name="HTMLPurifier.php">
|
|
||||||
<line>162</line>
|
|
||||||
</file>
|
|
||||||
<file name="HTMLPurifier/Lexer.php">
|
|
||||||
<line>90</line>
|
|
||||||
<line>315</line>
|
|
||||||
</file>
|
|
||||||
<file name="HTMLPurifier/Lexer/DirectLex.php">
|
|
||||||
<line>67</line>
|
|
||||||
<line>87</line>
|
|
||||||
<line>385</line>
|
|
||||||
</file>
|
|
||||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
|
||||||
<line>57</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="CSS.MaxImgLength">
|
|
||||||
<file name="HTMLPurifier/CSSDefinition.php">
|
|
||||||
<line>253</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="CSS.Proprietary">
|
|
||||||
<file name="HTMLPurifier/CSSDefinition.php">
|
|
||||||
<line>397</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="CSS.AllowTricky">
|
|
||||||
<file name="HTMLPurifier/CSSDefinition.php">
|
|
||||||
<line>401</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="CSS.Trusted">
|
|
||||||
<file name="HTMLPurifier/CSSDefinition.php">
|
|
||||||
<line>405</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="CSS.AllowImportant">
|
|
||||||
<file name="HTMLPurifier/CSSDefinition.php">
|
|
||||||
<line>409</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="CSS.AllowedProperties">
|
|
||||||
<file name="HTMLPurifier/CSSDefinition.php">
|
|
||||||
<line>538</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="CSS.ForbiddenProperties">
|
|
||||||
<file name="HTMLPurifier/CSSDefinition.php">
|
|
||||||
<line>554</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Cache.DefinitionImpl">
|
|
||||||
<file name="HTMLPurifier/DefinitionCacheFactory.php">
|
|
||||||
<line>66</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.Doctype">
|
|
||||||
<file name="HTMLPurifier/DoctypeRegistry.php">
|
|
||||||
<line>119</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.CustomDoctype">
|
|
||||||
<file name="HTMLPurifier/DoctypeRegistry.php">
|
|
||||||
<line>123</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.XHTML">
|
|
||||||
<file name="HTMLPurifier/DoctypeRegistry.php">
|
|
||||||
<line>128</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.Strict">
|
|
||||||
<file name="HTMLPurifier/DoctypeRegistry.php">
|
|
||||||
<line>133</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Core.Encoding">
|
|
||||||
<file name="HTMLPurifier/Encoder.php">
|
|
||||||
<line>380</line>
|
|
||||||
<line>428</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Test.ForceNoIconv">
|
|
||||||
<file name="HTMLPurifier/Encoder.php">
|
|
||||||
<line>388</line>
|
|
||||||
<line>439</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Core.EscapeNonASCIICharacters">
|
|
||||||
<file name="HTMLPurifier/Encoder.php">
|
|
||||||
<line>429</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Output.CommentScriptContents">
|
|
||||||
<file name="HTMLPurifier/Generator.php">
|
|
||||||
<line>70</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Output.FixInnerHTML">
|
|
||||||
<file name="HTMLPurifier/Generator.php">
|
|
||||||
<line>71</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Output.SortAttr">
|
|
||||||
<file name="HTMLPurifier/Generator.php">
|
|
||||||
<line>72</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Output.FlashCompat">
|
|
||||||
<file name="HTMLPurifier/Generator.php">
|
|
||||||
<line>73</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Output.TidyFormat">
|
|
||||||
<file name="HTMLPurifier/Generator.php">
|
|
||||||
<line>104</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Core.NormalizeNewlines">
|
|
||||||
<file name="HTMLPurifier/Generator.php">
|
|
||||||
<line>122</line>
|
|
||||||
</file>
|
|
||||||
<file name="HTMLPurifier/Lexer.php">
|
|
||||||
<line>299</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Output.Newline">
|
|
||||||
<file name="HTMLPurifier/Generator.php">
|
|
||||||
<line>123</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.BlockWrapper">
|
|
||||||
<file name="HTMLPurifier/HTMLDefinition.php">
|
|
||||||
<line>263</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.Parent">
|
|
||||||
<file name="HTMLPurifier/HTMLDefinition.php">
|
|
||||||
<line>273</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.AllowedElements">
|
|
||||||
<file name="HTMLPurifier/HTMLDefinition.php">
|
|
||||||
<line>291</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.AllowedAttributes">
|
|
||||||
<file name="HTMLPurifier/HTMLDefinition.php">
|
|
||||||
<line>292</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.Allowed">
|
|
||||||
<file name="HTMLPurifier/HTMLDefinition.php">
|
|
||||||
<line>295</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.ForbiddenElements">
|
|
||||||
<file name="HTMLPurifier/HTMLDefinition.php">
|
|
||||||
<line>399</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.ForbiddenAttributes">
|
|
||||||
<file name="HTMLPurifier/HTMLDefinition.php">
|
|
||||||
<line>400</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.Trusted">
|
|
||||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
|
||||||
<line>234</line>
|
|
||||||
</file>
|
|
||||||
<file name="HTMLPurifier/Lexer.php">
|
|
||||||
<line>304</line>
|
|
||||||
<line>342</line>
|
|
||||||
</file>
|
|
||||||
<file name="HTMLPurifier/AttrDef/HTML/ContentEditable.php">
|
|
||||||
<line>8</line>
|
|
||||||
</file>
|
|
||||||
<file name="HTMLPurifier/HTMLModule/Iframe.php">
|
|
||||||
<line>43</line>
|
|
||||||
</file>
|
|
||||||
<file name="HTMLPurifier/HTMLModule/Image.php">
|
|
||||||
<line>37</line>
|
|
||||||
</file>
|
|
||||||
<file name="HTMLPurifier/Lexer/DirectLex.php">
|
|
||||||
<line>47</line>
|
|
||||||
</file>
|
|
||||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
|
||||||
<line>30</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.AllowedModules">
|
|
||||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
|
||||||
<line>241</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.CoreModules">
|
|
||||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
|
||||||
<line>242</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.Proprietary">
|
|
||||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
|
||||||
<line>256</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.SafeObject">
|
|
||||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
|
||||||
<line>259</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.SafeEmbed">
|
|
||||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
|
||||||
<line>262</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.SafeScripting">
|
|
||||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
|
||||||
<line>265</line>
|
|
||||||
</file>
|
|
||||||
<file name="HTMLPurifier/HTMLModule/SafeScripting.php">
|
|
||||||
<line>22</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.Nofollow">
|
|
||||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
|
||||||
<line>268</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.TargetBlank">
|
|
||||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
|
||||||
<line>271</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.TargetNoreferrer">
|
|
||||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
|
||||||
<line>276</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.TargetNoopener">
|
|
||||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
|
||||||
<line>279</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Attr.IDBlacklist">
|
|
||||||
<file name="HTMLPurifier/IDAccumulator.php">
|
|
||||||
<line>27</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Core.Language">
|
|
||||||
<file name="HTMLPurifier/LanguageFactory.php">
|
|
||||||
<line>93</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Core.LexerImpl">
|
|
||||||
<file name="HTMLPurifier/Lexer.php">
|
|
||||||
<line>85</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Core.MaintainLineNumbers">
|
|
||||||
<file name="HTMLPurifier/Lexer.php">
|
|
||||||
<line>89</line>
|
|
||||||
</file>
|
|
||||||
<file name="HTMLPurifier/Lexer/DirectLex.php">
|
|
||||||
<line>62</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Core.LegacyEntityDecoder">
|
|
||||||
<file name="HTMLPurifier/Lexer.php">
|
|
||||||
<line>220</line>
|
|
||||||
<line>326</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Core.ConvertDocumentToFragment">
|
|
||||||
<file name="HTMLPurifier/Lexer.php">
|
|
||||||
<line>313</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Core.RemoveProcessingInstructions">
|
|
||||||
<file name="HTMLPurifier/Lexer.php">
|
|
||||||
<line>336</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Core.HiddenElements">
|
|
||||||
<file name="HTMLPurifier/Lexer.php">
|
|
||||||
<line>340</line>
|
|
||||||
</file>
|
|
||||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
|
||||||
<line>36</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Core.AggressivelyRemoveScript">
|
|
||||||
<file name="HTMLPurifier/Lexer.php">
|
|
||||||
<line>341</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Core.RemoveScriptContents">
|
|
||||||
<file name="HTMLPurifier/Lexer.php">
|
|
||||||
<line>342</line>
|
|
||||||
</file>
|
|
||||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
|
||||||
<line>35</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="URI.">
|
|
||||||
<file name="HTMLPurifier/URIDefinition.php">
|
|
||||||
<line>65</line>
|
|
||||||
</file>
|
|
||||||
<file name="HTMLPurifier/URIFilter/Munge.php">
|
|
||||||
<line>46</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="URI.Host">
|
|
||||||
<file name="HTMLPurifier/URIDefinition.php">
|
|
||||||
<line>76</line>
|
|
||||||
</file>
|
|
||||||
<file name="HTMLPurifier/URIScheme.php">
|
|
||||||
<line>89</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="URI.Base">
|
|
||||||
<file name="HTMLPurifier/URIDefinition.php">
|
|
||||||
<line>77</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="URI.DefaultScheme">
|
|
||||||
<file name="HTMLPurifier/URIDefinition.php">
|
|
||||||
<line>84</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="URI.AllowedSchemes">
|
|
||||||
<file name="HTMLPurifier/URISchemeRegistry.php">
|
|
||||||
<line>48</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="URI.OverrideAllowedSchemes">
|
|
||||||
<file name="HTMLPurifier/URISchemeRegistry.php">
|
|
||||||
<line>49</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="CSS.AllowDuplicates">
|
|
||||||
<file name="HTMLPurifier/AttrDef/CSS.php">
|
|
||||||
<line>28</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="URI.Disable">
|
|
||||||
<file name="HTMLPurifier/AttrDef/URI.php">
|
|
||||||
<line>47</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Core.ColorKeywords">
|
|
||||||
<file name="HTMLPurifier/AttrDef/CSS/Color.php">
|
|
||||||
<line>29</line>
|
|
||||||
</file>
|
|
||||||
<file name="HTMLPurifier/AttrDef/HTML/Color.php">
|
|
||||||
<line>19</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="CSS.AllowedFonts">
|
|
||||||
<file name="HTMLPurifier/AttrDef/CSS/FontFamily.php">
|
|
||||||
<line>62</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Attr.AllowedClasses">
|
|
||||||
<file name="HTMLPurifier/AttrDef/HTML/Class.php">
|
|
||||||
<line>33</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Attr.ForbiddenClasses">
|
|
||||||
<file name="HTMLPurifier/AttrDef/HTML/Class.php">
|
|
||||||
<line>34</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Attr.AllowedFrameTargets">
|
|
||||||
<file name="HTMLPurifier/AttrDef/HTML/FrameTarget.php">
|
|
||||||
<line>32</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Attr.EnableID">
|
|
||||||
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
|
|
||||||
<line>41</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Attr.IDPrefix">
|
|
||||||
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
|
|
||||||
<line>51</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Attr.IDPrefixLocal">
|
|
||||||
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
|
|
||||||
<line>53</line>
|
|
||||||
<line>58</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Attr.ID.HTML5">
|
|
||||||
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
|
|
||||||
<line>75</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Attr.IDBlacklistRegexp">
|
|
||||||
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
|
|
||||||
<line>97</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Attr.">
|
|
||||||
<file name="HTMLPurifier/AttrDef/HTML/LinkTypes.php">
|
|
||||||
<line>46</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Core.AllowHostnameUnderscore">
|
|
||||||
<file name="HTMLPurifier/AttrDef/URI/Host.php">
|
|
||||||
<line>71</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Core.EnableIDNA">
|
|
||||||
<file name="HTMLPurifier/AttrDef/URI/Host.php">
|
|
||||||
<line>103</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Attr.DefaultTextDir">
|
|
||||||
<file name="HTMLPurifier/AttrTransform/BdoDir.php">
|
|
||||||
<line>22</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Core.RemoveInvalidImg">
|
|
||||||
<file name="HTMLPurifier/AttrTransform/ImgRequired.php">
|
|
||||||
<line>24</line>
|
|
||||||
</file>
|
|
||||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
|
||||||
<line>27</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Attr.DefaultInvalidImage">
|
|
||||||
<file name="HTMLPurifier/AttrTransform/ImgRequired.php">
|
|
||||||
<line>27</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Attr.DefaultImageAlt">
|
|
||||||
<file name="HTMLPurifier/AttrTransform/ImgRequired.php">
|
|
||||||
<line>33</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Attr.DefaultInvalidImageAlt">
|
|
||||||
<file name="HTMLPurifier/AttrTransform/ImgRequired.php">
|
|
||||||
<line>40</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.Attr.Name.UseCDATA">
|
|
||||||
<file name="HTMLPurifier/AttrTransform/Name.php">
|
|
||||||
<line>18</line>
|
|
||||||
</file>
|
|
||||||
<file name="HTMLPurifier/HTMLModule/Name.php">
|
|
||||||
<line>19</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.FlashAllowFullScreen">
|
|
||||||
<file name="HTMLPurifier/AttrTransform/SafeParam.php">
|
|
||||||
<line>58</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Cache.SerializerPath">
|
|
||||||
<file name="HTMLPurifier/DefinitionCache/Serializer.php">
|
|
||||||
<line>185</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Cache.SerializerPermissions">
|
|
||||||
<file name="HTMLPurifier/DefinitionCache/Serializer.php">
|
|
||||||
<line>202</line>
|
|
||||||
<line>218</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Filter.ExtractStyleBlocks.TidyImpl">
|
|
||||||
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
|
|
||||||
<line>106</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Filter.ExtractStyleBlocks.Scope">
|
|
||||||
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
|
|
||||||
<line>137</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Filter.ExtractStyleBlocks.Escaping">
|
|
||||||
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
|
|
||||||
<line>351</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.Forms">
|
|
||||||
<file name="HTMLPurifier/HTMLModule/Forms.php">
|
|
||||||
<line>31</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.SafeIframe">
|
|
||||||
<file name="HTMLPurifier/HTMLModule/Iframe.php">
|
|
||||||
<line>28</line>
|
|
||||||
</file>
|
|
||||||
<file name="HTMLPurifier/URIFilter/SafeIframe.php">
|
|
||||||
<line>48</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.MaxImgLength">
|
|
||||||
<file name="HTMLPurifier/HTMLModule/Image.php">
|
|
||||||
<line>21</line>
|
|
||||||
</file>
|
|
||||||
<file name="HTMLPurifier/HTMLModule/SafeEmbed.php">
|
|
||||||
<line>18</line>
|
|
||||||
</file>
|
|
||||||
<file name="HTMLPurifier/HTMLModule/SafeObject.php">
|
|
||||||
<line>24</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.TidyLevel">
|
|
||||||
<file name="HTMLPurifier/HTMLModule/Tidy.php">
|
|
||||||
<line>50</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.TidyAdd">
|
|
||||||
<file name="HTMLPurifier/HTMLModule/Tidy.php">
|
|
||||||
<line>54</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.TidyRemove">
|
|
||||||
<file name="HTMLPurifier/HTMLModule/Tidy.php">
|
|
||||||
<line>55</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="AutoFormat.PurifierLinkify.DocURL">
|
|
||||||
<file name="HTMLPurifier/Injector/PurifierLinkify.php">
|
|
||||||
<line>31</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="AutoFormat.RemoveEmpty.RemoveNbsp">
|
|
||||||
<file name="HTMLPurifier/Injector/RemoveEmpty.php">
|
|
||||||
<line>46</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions">
|
|
||||||
<file name="HTMLPurifier/Injector/RemoveEmpty.php">
|
|
||||||
<line>47</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="AutoFormat.RemoveEmpty.Predicate">
|
|
||||||
<file name="HTMLPurifier/Injector/RemoveEmpty.php">
|
|
||||||
<line>48</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Core.AggressivelyFixLt">
|
|
||||||
<file name="HTMLPurifier/Lexer/DOMLex.php">
|
|
||||||
<line>54</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Core.AllowParseManyTags">
|
|
||||||
<file name="HTMLPurifier/Lexer/DOMLex.php">
|
|
||||||
<line>72</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Core.RemoveBlanks">
|
|
||||||
<file name="HTMLPurifier/Lexer/DOMLex.php">
|
|
||||||
<line>75</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Core.DirectLexLineNumberSyncInterval">
|
|
||||||
<file name="HTMLPurifier/Lexer/DirectLex.php">
|
|
||||||
<line>84</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Core.DisableExcludes">
|
|
||||||
<file name="HTMLPurifier/Strategy/FixNesting.php">
|
|
||||||
<line>54</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="Core.EscapeInvalidTags">
|
|
||||||
<file name="HTMLPurifier/Strategy/MakeWellFormed.php">
|
|
||||||
<line>72</line>
|
|
||||||
</file>
|
|
||||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
|
||||||
<line>26</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.AllowedComments">
|
|
||||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
|
||||||
<line>31</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="HTML.AllowedCommentsRegexp">
|
|
||||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
|
||||||
<line>32</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="URI.HostBlacklist">
|
|
||||||
<file name="HTMLPurifier/URIFilter/HostBlacklist.php">
|
|
||||||
<line>25</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="URI.MungeResources">
|
|
||||||
<file name="HTMLPurifier/URIFilter/Munge.php">
|
|
||||||
<line>48</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="URI.MungeSecretKey">
|
|
||||||
<file name="HTMLPurifier/URIFilter/Munge.php">
|
|
||||||
<line>49</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
<directive id="URI.SafeIframeRegexp">
|
|
||||||
<file name="HTMLPurifier/URIFilter/SafeIframe.php">
|
|
||||||
<line>35</line>
|
|
||||||
</file>
|
|
||||||
</directive>
|
|
||||||
</usage>
|
|
@@ -3,7 +3,7 @@
|
|||||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||||
<meta name="description" content="Specification for HTML Purifier's advanced API for defining custom filtering behavior." />
|
<meta name="description" content="Functional specification for HTML Purifier's advanced API for defining custom filtering behavior." />
|
||||||
<link rel="stylesheet" type="text/css" href="style.css" />
|
<link rel="stylesheet" type="text/css" href="style.css" />
|
||||||
|
|
||||||
<title>Advanced API - HTML Purifier</title>
|
<title>Advanced API - HTML Purifier</title>
|
||||||
@@ -14,13 +14,175 @@
|
|||||||
|
|
||||||
<div id="filing">Filed under Development</div>
|
<div id="filing">Filed under Development</div>
|
||||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||||
|
|
||||||
<p>
|
<p>It makes no sense to adopt a <q>one-size-fits-all</q> approach to
|
||||||
Please see <a href="enduser-customize.html">Customize!</a>
|
filtersets: therefore, users must be able to define their own sets of
|
||||||
</p>
|
<q>allowed</q> elements, as well as switch in-between doctypes of HTML.</p>
|
||||||
|
|
||||||
|
<p>Our goals are to let the user:</p>
|
||||||
|
|
||||||
|
<dl>
|
||||||
|
<dt>Select</dt>
|
||||||
|
<dd><ul>
|
||||||
|
<li>Doctype</li>
|
||||||
|
<li>Filtersets: Rich / Plain / Full ...</li>
|
||||||
|
<li>Mode: Lenient / Correctional</li>
|
||||||
|
<li>Collections (?): Safe / Unsafe</li>
|
||||||
|
<li>Modules / Tags / Attributes</li>
|
||||||
|
</ul></dd>
|
||||||
|
<dt>Customize</dt>
|
||||||
|
<dd><ul>
|
||||||
|
<li>Tags / Attributes / Attribute Types</li>
|
||||||
|
<li>Filtersets</li>
|
||||||
|
<li>Root Node</li>
|
||||||
|
</ul></dd>
|
||||||
|
<dt>Create</dt>
|
||||||
|
<dd><ul>
|
||||||
|
<li>Modules / Tags / Attributes / Attribute Types</li>
|
||||||
|
<li>Filtersets</li>
|
||||||
|
<li>Doctype</li>
|
||||||
|
</ul></dd>
|
||||||
|
</dl>
|
||||||
|
|
||||||
|
<h2>Select</h2>
|
||||||
|
|
||||||
|
<h3>Selecting a Doctype</h3>
|
||||||
|
|
||||||
|
<p>By default, users will use a doctype-based, permissive but secure
|
||||||
|
whitelist. They must define a <strong>doctype</strong>, and this serves
|
||||||
|
as the first method of determining a filterset.</p>
|
||||||
|
|
||||||
|
<p class="technical">This identifier is based
|
||||||
|
on the name the W3C has given to the document type and <em>not</em>
|
||||||
|
the DTD identifier.</p>
|
||||||
|
|
||||||
|
<p>This parameter is set via the configuration object:</p>
|
||||||
|
|
||||||
|
<pre>$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');</pre>
|
||||||
|
|
||||||
|
<h3>Selecting a Filterset</h3>
|
||||||
|
|
||||||
|
<p>However, selecting this doctype doesn't mean much, because if we
|
||||||
|
adhered exactly to the definition we would be letting XSS and other
|
||||||
|
nasties through. HTML Purifier must, in its filterset, allow a subset
|
||||||
|
of the doctype, which we shall call a <strong>filterset</strong>.</p>
|
||||||
|
|
||||||
|
<p>By default, HTML Purifier will use the <strong>Rich</strong>
|
||||||
|
filterset, which allows as many elements as possible with untrusted
|
||||||
|
sources. Other possible filtersets could be:</p>
|
||||||
|
|
||||||
|
<dl>
|
||||||
|
<dt>Full</dt>
|
||||||
|
<dd>Allows the full span of elements in the doctype, good if you want
|
||||||
|
HTML Purifier to work as a Tidy substitute but not to strip
|
||||||
|
anything out.</dd>
|
||||||
|
<dt>Plain</dt>
|
||||||
|
<dd>Provides a minimum set of tags for semantic markup of things
|
||||||
|
like blog comments.</dd>
|
||||||
|
</dl>
|
||||||
|
|
||||||
|
<p>Extension-authors would be able to define custom filtersets for
|
||||||
|
other users to use.</p>
|
||||||
|
|
||||||
|
<p>A possible call to select a filterset would be:</p>
|
||||||
|
|
||||||
|
<pre>$config->set('HTML', 'Filterset', 'Rich');</pre>
|
||||||
|
|
||||||
|
<h3>Selecting Mode</h3>
|
||||||
|
|
||||||
|
<p>Within filtersets, there are various <strong>modes</strong> of operation.
|
||||||
|
These indicate variant behaviors that, while not strictly changing the
|
||||||
|
allowed set of elements and attributes, will definitely affect the output.
|
||||||
|
Currently, we have two modes, which may be used together:</p>
|
||||||
|
|
||||||
|
<dl>
|
||||||
|
<dt>Lenient</dt>
|
||||||
|
<dd>Deprecated elements and attributes will be transformed into
|
||||||
|
standards-compliant alternatives when explicitly disallowed. For
|
||||||
|
example, in the XHTML 1.0 Strict doctype, a <code>center</code>
|
||||||
|
tag would be turned into a <code>div</code> with the CSS property
|
||||||
|
<code>text-align:center;</code>, but in XHTML 1.0 Transitional
|
||||||
|
the tag would be preserved. This mode is on by default.</dd>
|
||||||
|
<dt>Correctional</dt>
|
||||||
|
<dd>Deprecated elements and attributes will be transformed into
|
||||||
|
standards-compliant alternatives whenever possible. Referring
|
||||||
|
back to the previous example, the <code>center</code> tag would
|
||||||
|
be transformed in both cases. However, tags without a
|
||||||
|
reasonable standards-compliant alternative will be preserved
|
||||||
|
in their form. This mode is on by default. It may have
|
||||||
|
various levels of operation.</dd>
|
||||||
|
</dl>
|
||||||
|
|
||||||
|
<p>A possible call to select modes would be:</p>
|
||||||
|
|
||||||
|
<pre>$config->set('HTML', 'Mode', array('correctional', 'lenient'));</pre>
|
||||||
|
|
||||||
|
<p>If modes have extra parameters, a hash might work well:</p>
|
||||||
|
|
||||||
|
<pre>$config->set('HTML', 'Mode', array(
|
||||||
|
'correctional' => 9, // strongest level
|
||||||
|
'lenient' => true // this one's just boolean
|
||||||
|
));</pre>
|
||||||
|
|
||||||
|
<p>Modes may possibly be wrapped up with the filterset declaration:</p>
|
||||||
|
|
||||||
|
<pre>$config->set('HTML', 'Filterset', 'Rich: correctional, lenient');</pre>
|
||||||
|
|
||||||
|
<p>Further investigation in this field is necessary.</p>
|
||||||
|
|
||||||
|
<h3>Selecting Modules / Tags / Attributes</h3>
|
||||||
|
|
||||||
|
<p>If this cookie cutter approach doesn't appeal to a user, they may
|
||||||
|
decide to roll their own filterset by selecting modules, tags and
|
||||||
|
attributes to allow.</p>
|
||||||
|
|
||||||
|
<p class="technical">This would make use of the same facilities
|
||||||
|
as a filterset author would use, except that it would go under an
|
||||||
|
<q>anonymous</q> filterset that would be auto-selected if any of the
|
||||||
|
relevant module/tag/attribute selection configuration directives were
|
||||||
|
non-null.</p>
|
||||||
|
|
||||||
|
<p>On the highest level, a user will usually be most interested in
|
||||||
|
directly specifying which elements and attributes are desired. For
|
||||||
|
example:</p>
|
||||||
|
|
||||||
|
<pre>$config->set('HTML', 'AllowedElements', 'a,b,em,p,blockquote,code,i');</pre>
|
||||||
|
|
||||||
|
<p>Attribute declarations could be merged into this declaration as such:</p>
|
||||||
|
|
||||||
|
<pre>$config->set('HTML', 'Allowed', 'a[href,title],b,em,p[class],blockquote[cite],code,i');</pre>
|
||||||
|
|
||||||
|
<p>...or be kept separate:</p>
|
||||||
|
|
||||||
|
<pre>$config->set('HTML', 'AllowedAttributes', 'a.href,a.title,p.class,blockquote.cite');</pre>
|
||||||
|
|
||||||
|
<p class="technical">Considering that, internally speaking, as mandated by
|
||||||
|
the XHTML 1.1 Modularization specification, we have organized our
|
||||||
|
elements around modules, considerable gymnastics will be needed to
|
||||||
|
get this sort of functionality working.</p>
|
||||||
|
|
||||||
|
<p>A user may also specify a module to load a class of elements and attributes
|
||||||
|
into their filterest:</p>
|
||||||
|
|
||||||
|
<pre>$config->set('HTML', 'Allowed', 'Hypertext,Core');</pre>
|
||||||
|
|
||||||
|
<p class="fixme">The granularity of these modules is too coarse for
|
||||||
|
the average user (for example, the core module loads everything from
|
||||||
|
the essential <code>p</code> tag to the not-so-safe <code>h1</code>
|
||||||
|
tag). How do we make this still a viable solution?</p>
|
||||||
|
|
||||||
|
<h3>Unified selector</h3>
|
||||||
|
|
||||||
|
<p>Because selecting each and every one of these configuration options
|
||||||
|
is a chore, we may wish to offer a specialized configuration method
|
||||||
|
for selecting a filterset. Possibility:</p>
|
||||||
|
|
||||||
|
<pre>function selectFilter($doctype, $filterset, $mode)</pre>
|
||||||
|
|
||||||
|
<p>...which is simply a light wrapper over the individual configuration
|
||||||
|
calls. A custom config file format or text format could also be adopted.</p>
|
||||||
|
|
||||||
|
<div id="version">$Id$</div>
|
||||||
|
|
||||||
</body></html>
|
</body></html>
|
||||||
|
|
||||||
<!-- vim: et sw=4 sts=4
|
|
||||||
-->
|
|
||||||
|
52
docs/dev-code-quality.html
Normal file
52
docs/dev-code-quality.html
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||||
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||||
|
<meta name="description" content="Discusses code quality issues and places that need to be refactored in HTML Purifier." />
|
||||||
|
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||||
|
|
||||||
|
<title>Code Quality Issues - HTML Purifier</title>
|
||||||
|
|
||||||
|
</head><body>
|
||||||
|
|
||||||
|
<h1>Code Quality Issues</h1>
|
||||||
|
|
||||||
|
<div id="filing">Filed under Development</div>
|
||||||
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
|
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||||
|
|
||||||
|
<p>Okay, face it. Programmers can get lazy, cut corners, or make mistakes. They
|
||||||
|
also can do quick prototypes, and then forget to rewrite them later. Well,
|
||||||
|
while I can't list mistakes in here, I can list prototype-like segments
|
||||||
|
of code that should be aggressively refactored. This does not list
|
||||||
|
optimization issues, that needs to be done after intense profiling.</p>
|
||||||
|
|
||||||
|
<pre>
|
||||||
|
docs/examples/demo.php - ad hoc HTML/PHP soup to the extreme
|
||||||
|
|
||||||
|
AttrDef
|
||||||
|
Class - doesn't support Unicode characters (fringe); uses regular
|
||||||
|
expressions
|
||||||
|
Lang - code duplication; premature optimization
|
||||||
|
Length - easily mistaken for CSSLength
|
||||||
|
URI - multiple regular expressions; missing validation for parts (?)
|
||||||
|
CSS - parser doesn't accept advanced CSS (fringe)
|
||||||
|
Number - constructor interface inconsistent with Integer
|
||||||
|
ConfigSchema - redefinition is a mess
|
||||||
|
Strategy
|
||||||
|
FixNesting - cannot bubble nodes out of structures, duplicated checks
|
||||||
|
for special-case parent node
|
||||||
|
MakeWellFormed - insufficient automatic closing definitions (check HTML
|
||||||
|
spec for optional end tags, also, closing based on type (block/inline)
|
||||||
|
might be efficient).
|
||||||
|
RemoveForeignElements - should be run in parallel with MakeWellFormed
|
||||||
|
URIScheme - needs to have callable generic checks
|
||||||
|
mailto - doesn't validate emails, doesn't validate querystring
|
||||||
|
news - doesn't validate opaque path
|
||||||
|
nntp - doesn't constrain path
|
||||||
|
</pre>
|
||||||
|
|
||||||
|
<div id="version">$Id$</div>
|
||||||
|
|
||||||
|
</body></html>
|
@@ -1,30 +0,0 @@
|
|||||||
|
|
||||||
Code Quality Issues
|
|
||||||
|
|
||||||
Okay, face it. Programmers can get lazy, cut corners, or make mistakes. They
|
|
||||||
also can do quick prototypes, and then forget to rewrite them later. Well,
|
|
||||||
while I can't list mistakes in here, I can list prototype-like segments
|
|
||||||
of code that should be aggressively refactored. This does not list
|
|
||||||
optimization issues, that needs to be done after intense profiling.
|
|
||||||
|
|
||||||
docs/examples/demo.php - ad hoc HTML/PHP soup to the extreme
|
|
||||||
|
|
||||||
AttrDef - a lot of duplication, more generic classes need to be created;
|
|
||||||
a lot of strtolower() calls, no legit casing
|
|
||||||
Class - doesn't support Unicode characters (fringe); uses regular expressions
|
|
||||||
Lang - code duplication; premature optimization
|
|
||||||
Length - easily mistaken for CSSLength
|
|
||||||
URI - multiple regular expressions; missing validation for parts (?)
|
|
||||||
CSS - parser doesn't accept advanced CSS (fringe)
|
|
||||||
Number - constructor interface inconsistent with Integer
|
|
||||||
Strategy
|
|
||||||
FixNesting - cannot bubble nodes out of structures, duplicated checks
|
|
||||||
for special-case parent node
|
|
||||||
RemoveForeignElements - should be run in parallel with MakeWellFormed
|
|
||||||
URIScheme - needs to have callable generic checks
|
|
||||||
mailto - doesn't validate emails, doesn't validate querystring
|
|
||||||
news - doesn't validate opaque path
|
|
||||||
nntp - doesn't constrain path
|
|
||||||
tel - doesn't validate phone numbers, only allows characters '+', '1-9', and 'x'
|
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
|
@@ -1,79 +0,0 @@
|
|||||||
|
|
||||||
Configuration Backwards-Compatibility Breaks
|
|
||||||
|
|
||||||
In version 4.0.0, the configuration subsystem (composed of the outwards
|
|
||||||
facing Config class, as well as the ConfigSchema and ConfigSchema_Interchange
|
|
||||||
subsystems), was significantly revamped to make use of property lists.
|
|
||||||
While most of the changes are internal, some internal APIs were changed for the
|
|
||||||
sake of clarity. HTMLPurifier_Config was kept completely backwards compatible,
|
|
||||||
although some of the functions were retrofitted with an unambiguous alternate
|
|
||||||
syntax. Both of these changes are discussed in this document.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
1. Outwards Facing Changes
|
|
||||||
--------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
The HTMLPurifier_Config class now takes an alternate syntax. The general rule
|
|
||||||
is:
|
|
||||||
|
|
||||||
If you passed $namespace, $directive, pass "$namespace.$directive"
|
|
||||||
instead.
|
|
||||||
|
|
||||||
An example:
|
|
||||||
|
|
||||||
$config->set('HTML', 'Allowed', 'p');
|
|
||||||
|
|
||||||
becomes:
|
|
||||||
|
|
||||||
$config->set('HTML.Allowed', 'p');
|
|
||||||
|
|
||||||
New configuration options may have more than one namespace, they might
|
|
||||||
look something like %Filter.YouTube.Blacklist. While you could technically
|
|
||||||
set it with ('HTML', 'YouTube.Blacklist'), the logical extension
|
|
||||||
('HTML', 'YouTube', 'Blacklist') does not work.
|
|
||||||
|
|
||||||
The old API will still work, but will emit E_USER_NOTICEs.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
2. Internal API Changes
|
|
||||||
--------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
Some overarching notes: we've completely eliminated the notion of namespace;
|
|
||||||
it's now an informal construct for organizing related configuration directives.
|
|
||||||
|
|
||||||
Also, the validation routines for keys (formerly "$namespace.$directive")
|
|
||||||
have been completely relaxed. I don't think it really should be necessary.
|
|
||||||
|
|
||||||
2.1 HTMLPurifier_ConfigSchema
|
|
||||||
|
|
||||||
First off, if you're interfacing with this class, you really shouldn't.
|
|
||||||
HTMLPurifier_ConfigSchema_Builder_ConfigSchema is really the only class that
|
|
||||||
should ever be creating HTMLPurifier_ConfigSchema, and HTMLPurifier_Config the
|
|
||||||
only class that should be reading it.
|
|
||||||
|
|
||||||
All namespace related methods were removed; they are completely unnecessary
|
|
||||||
now. Any $namespace, $name arguments must be replaced with $key (where
|
|
||||||
$key == "$namespace.$name"), including for addAlias().
|
|
||||||
|
|
||||||
The $info and $defaults member variables are no longer indexed as
|
|
||||||
[$namespace][$name]; they are now indexed as ["$namespace.$name"].
|
|
||||||
|
|
||||||
All deprecated methods were finally removed, after having yelled at you as
|
|
||||||
an E_USER_NOTICE for a while now.
|
|
||||||
|
|
||||||
2.2 HTMLPurifier_ConfigSchema_Interchange
|
|
||||||
|
|
||||||
Member variable $namespaces was removed.
|
|
||||||
|
|
||||||
2.3 HTMLPurifier_ConfigSchema_Interchange_Id
|
|
||||||
|
|
||||||
Member variable $namespace and $directive removed; member variable $key added.
|
|
||||||
Any method that took $namespace, $directive now takes $key.
|
|
||||||
|
|
||||||
2.4 HTMLPurifier_ConfigSchema_Interchange_Namespace
|
|
||||||
|
|
||||||
Removed.
|
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
|
@@ -1,165 +0,0 @@
|
|||||||
Configuration naming
|
|
||||||
|
|
||||||
HTML Purifier 4.0.0 features a new configuration naming system that
|
|
||||||
allows arbitrary nesting of namespaces. While there are certain cases
|
|
||||||
in which using two namespaces is obviously better (the canonical example
|
|
||||||
is where we were using AutoFormatParam to contain directives for AutoFormat
|
|
||||||
parameters), it is unclear whether or not a general migration to highly
|
|
||||||
namespaced directives is a good idea or not.
|
|
||||||
|
|
||||||
== Case studies ==
|
|
||||||
|
|
||||||
=== Attr.* ===
|
|
||||||
|
|
||||||
We have a dead duck HTML.Attr.Name.UseCDATA which migrated before we decided
|
|
||||||
to think this out thoroughly.
|
|
||||||
|
|
||||||
We currently have a large number of directives in the Attr.* namespace.
|
|
||||||
These directives tweak the behavior of some HTML attributes. They have
|
|
||||||
the properties:
|
|
||||||
|
|
||||||
* While they apply to only one attribute at a time, the attribute can
|
|
||||||
span over multiple elements (not necessarily all attributes, either).
|
|
||||||
The information of which elements it impacts is either omitted or
|
|
||||||
informally stated (EnableID applies to all elements, DefaultImageAlt
|
|
||||||
applies to <img> tags, AllowedRev doesn't say but only applies to a tags).
|
|
||||||
|
|
||||||
* There is a certain degree of clustering that could be applied, especially
|
|
||||||
to the ID directives. The clustering could be done with respect to
|
|
||||||
what element/attribute was used, i.e.
|
|
||||||
|
|
||||||
*.id -> EnableID, IDBlacklistRegexp, IDBlacklist, IDPrefixLocal, IDPrefix
|
|
||||||
img.src -> DefaultInvalidImage
|
|
||||||
img.alt -> DefaultImageAlt, DefaultInvalidImageAlt
|
|
||||||
bdo.dir -> DefaultTextDir
|
|
||||||
a.rel -> AllowedRel
|
|
||||||
a.rev -> AllowedRev
|
|
||||||
a.target -> AllowedFrameTargets
|
|
||||||
a.name -> Name.UseCDATA
|
|
||||||
|
|
||||||
* The directives often reference generic attribute types that were specified
|
|
||||||
in the DTD/specification. However, some of the behavior specifically relies
|
|
||||||
on the fact that other use cases of the attribute are not, at current,
|
|
||||||
supported by HTML Purifier.
|
|
||||||
|
|
||||||
AllowedRel, AllowedRev -> heavily <a> specific; if <link> ends up being
|
|
||||||
allowed, we will also have to give users specificity there (we also
|
|
||||||
want to preserve generality) DTD %Linktypes, HTML5 distinguishes
|
|
||||||
between <link> and <a>/<area>
|
|
||||||
AllowedFrameTargets -> heavily <a> specific, but also used by <area>
|
|
||||||
and <form>. Transitional DTD %FrameTarget, not present in strict,
|
|
||||||
HTML5 calls them "browsing contexts"
|
|
||||||
Default*Image* -> as a default parameter, is almost entirely exlcusive
|
|
||||||
to <img>
|
|
||||||
EnableID -> global attribute
|
|
||||||
Name.UseCDATA -> heavily <a> specific, but has heavy other usage by
|
|
||||||
many things
|
|
||||||
|
|
||||||
== AutoFormat.* ==
|
|
||||||
|
|
||||||
These have the fairly normal pluggable architecture that lends itself to
|
|
||||||
large amounts of namespaces (pluggability may be the key to figuring
|
|
||||||
out when gratuitous namespacing is good.) Properties:
|
|
||||||
|
|
||||||
* Boolean directives are fair game for being namespaced: for example,
|
|
||||||
RemoveEmpty.RemoveNbsp triggers RemoveEmpty.RemoveNbsp.Exceptions,
|
|
||||||
the latter of which only makes sense when RemoveEmpty.RemoveNbsp
|
|
||||||
is set to true. (The same applies to RemoveNbsp too)
|
|
||||||
|
|
||||||
The AutoFormat string is a bit long, but is the only bit of repeated
|
|
||||||
context.
|
|
||||||
|
|
||||||
== Core.* ==
|
|
||||||
|
|
||||||
Core is the potpourri of directives, mostly regarding some minor behavioral
|
|
||||||
tweaks for HTML handling abilities.
|
|
||||||
|
|
||||||
AggressivelyFixLt
|
|
||||||
AllowParseManyTags
|
|
||||||
ConvertDocumentToFragment
|
|
||||||
DirectLexLineNumberSyncInterval
|
|
||||||
LexerImpl
|
|
||||||
MaintainLineNumbers
|
|
||||||
Lexer
|
|
||||||
CollectErrors
|
|
||||||
Language
|
|
||||||
Error handling (Language is ostensibly a little more general, but
|
|
||||||
it's only used for error handling right now)
|
|
||||||
ColorKeywords
|
|
||||||
CSS and HTML
|
|
||||||
Encoding
|
|
||||||
EscapeNonASCIICharacters
|
|
||||||
Character encoding
|
|
||||||
EscapeInvalidChildren
|
|
||||||
EscapeInvalidTags
|
|
||||||
HiddenElements
|
|
||||||
RemoveInvalidImg
|
|
||||||
Lexing/Output
|
|
||||||
RemoveScriptContents
|
|
||||||
Deprecated
|
|
||||||
|
|
||||||
== HTML.* ==
|
|
||||||
|
|
||||||
AllowedAttributes
|
|
||||||
AllowedElements
|
|
||||||
AllowedModules
|
|
||||||
Allowed
|
|
||||||
ForbiddenAttributes
|
|
||||||
ForbiddenElements
|
|
||||||
Element set tuning
|
|
||||||
BlockWrapper
|
|
||||||
Child def advanced twiddle
|
|
||||||
CoreModules
|
|
||||||
CustomDoctype
|
|
||||||
Advanced HTMLModuleManager twiddles
|
|
||||||
DefinitionID
|
|
||||||
DefinitionRev
|
|
||||||
Caching
|
|
||||||
Doctype
|
|
||||||
Parent
|
|
||||||
Strict
|
|
||||||
XHTML
|
|
||||||
Global environment
|
|
||||||
MaxImgLength
|
|
||||||
Attribute twiddle? (applies to two attributes)
|
|
||||||
Proprietary
|
|
||||||
SafeEmbed
|
|
||||||
SafeObject
|
|
||||||
Trusted
|
|
||||||
Extra functionality/tagsets
|
|
||||||
TidyAdd
|
|
||||||
TidyLevel
|
|
||||||
TidyRemove
|
|
||||||
Tidy
|
|
||||||
|
|
||||||
== Output.* ==
|
|
||||||
|
|
||||||
These directly affect the output of Generator. These are all advanced
|
|
||||||
twiddles.
|
|
||||||
|
|
||||||
== URI.* ==
|
|
||||||
|
|
||||||
AllowedSchemes
|
|
||||||
OverrideAllowedSchemes
|
|
||||||
Scheme tuning
|
|
||||||
Base
|
|
||||||
DefaultScheme
|
|
||||||
Host
|
|
||||||
Global environment
|
|
||||||
DefinitionID
|
|
||||||
DefinitionRev
|
|
||||||
Caching
|
|
||||||
DisableExternalResources
|
|
||||||
DisableExternal
|
|
||||||
DisableResources
|
|
||||||
Disable
|
|
||||||
Contextual/authority tuning
|
|
||||||
HostBlacklist
|
|
||||||
Authority tuning
|
|
||||||
MakeAbsolute
|
|
||||||
MungeResources
|
|
||||||
MungeSecretKey
|
|
||||||
Munge
|
|
||||||
Transformation behavior (munge can be grouped)
|
|
||||||
|
|
||||||
|
|
@@ -1,412 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
|
||||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
|
||||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
|
||||||
<head>
|
|
||||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
|
||||||
<meta name="description" content="Describes config schema framework in HTML Purifier." />
|
|
||||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
|
||||||
<title>Config Schema - HTML Purifier</title>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
|
|
||||||
<h1>Config Schema</h1>
|
|
||||||
|
|
||||||
<div id="filing">Filed under Development</div>
|
|
||||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
|
||||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
HTML Purifier has a fairly complex system for configuration. Users
|
|
||||||
interact with a <code>HTMLPurifier_Config</code> object to
|
|
||||||
set configuration directives. The values they set are validated according
|
|
||||||
to a configuration schema, <code>HTMLPurifier_ConfigSchema</code>.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
The schema is mostly transparent to end-users, but if you're doing development
|
|
||||||
work for HTML Purifier and need to define a new configuration directive,
|
|
||||||
you'll need to interact with it. We'll also talk about how to define
|
|
||||||
userspace configuration directives at the very end.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h2>Write a directive file</h2>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Directive files define configuration directives to be used by
|
|
||||||
HTML Purifier. They are placed in <code>library/HTMLPurifier/ConfigSchema/schema/</code>
|
|
||||||
in the form <code><em>Namespace</em>.<em>Directive</em>.txt</code> (I
|
|
||||||
couldn't think of a more descriptive file extension.)
|
|
||||||
Directive files are actually what we call <code>StringHash</code>es,
|
|
||||||
i.e. associative arrays represented in a string form reminiscent of
|
|
||||||
<a href="http://qa.php.net/write-test.php">PHPT</a> tests. Here's a
|
|
||||||
sample directive file, <code>Test.Sample.txt</code>:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<pre>Test.Sample
|
|
||||||
TYPE: string/null
|
|
||||||
DEFAULT: NULL
|
|
||||||
ALLOWED: 'foo', 'bar'
|
|
||||||
VALUE-ALIASES: 'baz' => 'bar'
|
|
||||||
VERSION: 3.1.0
|
|
||||||
--DESCRIPTION--
|
|
||||||
This is a sample configuration directive for the purposes of the
|
|
||||||
<code>dev-config-schema.html<code> documentation.
|
|
||||||
--ALIASES--
|
|
||||||
Test.Example</pre>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Each of these segments has a specific meaning:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<table class="table">
|
|
||||||
<thead>
|
|
||||||
<tr>
|
|
||||||
<th>Key</th>
|
|
||||||
<th>Example</th>
|
|
||||||
<th>Description</th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
<tbody>
|
|
||||||
<tr>
|
|
||||||
<td>ID</td>
|
|
||||||
<td>Test.Sample</td>
|
|
||||||
<td>The name of the directive, in the form Namespace.Directive
|
|
||||||
(implicitly the first line)</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>TYPE</td>
|
|
||||||
<td>string/null</td>
|
|
||||||
<td>The type of variable this directive accepts. See below for
|
|
||||||
details. You can also add <code>/null</code> to the end of
|
|
||||||
any basic type to allow null values too.</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>DEFAULT</td>
|
|
||||||
<td>NULL</td>
|
|
||||||
<td>A parseable PHP expression of the default value.</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>DESCRIPTION</td>
|
|
||||||
<td>This is a...</td>
|
|
||||||
<td>An HTML description of what this directive does.</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>VERSION</td>
|
|
||||||
<td>3.1.0</td>
|
|
||||||
<td><em>Recommended</em>. The version of HTML Purifier this directive was added.
|
|
||||||
Directives that have been around since 1.0.0 don't have this,
|
|
||||||
but any new ones should.</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>ALIASES</td>
|
|
||||||
<td>Test.Example</td>
|
|
||||||
<td><em>Optional</em>. A comma separated list of aliases for this directive.
|
|
||||||
This is most useful for backwards compatibility and should
|
|
||||||
not be used otherwise.</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>ALLOWED</td>
|
|
||||||
<td>'foo', 'bar'</td>
|
|
||||||
<td><em>Optional</em>. Set of allowed value for a directive,
|
|
||||||
a comma separated list of parseable PHP expressions. This
|
|
||||||
is only allowed string, istring, text and itext TYPEs.</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>VALUE-ALIASES</td>
|
|
||||||
<td>'baz' => 'bar'</td>
|
|
||||||
<td><em>Optional</em>. Mapping of one value to another, and
|
|
||||||
should be a comma separated list of keypair duples. This
|
|
||||||
is only allowed string, istring, text and itext TYPEs.</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>DEPRECATED-VERSION</td>
|
|
||||||
<td>3.1.0</td>
|
|
||||||
<td><em>Not shown</em>. Indicates that the directive was
|
|
||||||
deprecated this version.</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>DEPRECATED-USE</td>
|
|
||||||
<td>Test.NewDirective</td>
|
|
||||||
<td><em>Not shown</em>. Indicates what new directive should be
|
|
||||||
used instead. Note that the directives will functionally be
|
|
||||||
different, although they should offer the same functionality.
|
|
||||||
If they are identical, use an alias instead.</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>EXTERNAL</td>
|
|
||||||
<td>CSSTidy</td>
|
|
||||||
<td><em>Not shown</em>. Indicates if there is an external library
|
|
||||||
the user will need to download and install to use this configuration
|
|
||||||
directive. As of right now, this is merely a Google-able name; future
|
|
||||||
versions may also provide links and instructions.</td>
|
|
||||||
</tr>
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Some notes on format and style:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<ul>
|
|
||||||
<li>
|
|
||||||
Each of these keys can be expressed in the short format
|
|
||||||
(<code>KEY: Value</code>) or the long format
|
|
||||||
(<code>--KEY--</code> with value beneath). You must use the
|
|
||||||
long format if multiple lines are needed, or if a long format
|
|
||||||
has been used already (that's why <code>ALIASES</code> in our
|
|
||||||
example is in the long format); otherwise, it's user preference.
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
The HTML descriptions should be wrapped at about 80 columns; do
|
|
||||||
not rely on editor word-wrapping.
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Also, as promised, here is the set of possible types:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<table class="table">
|
|
||||||
<thead>
|
|
||||||
<tr>
|
|
||||||
<th>Type</th>
|
|
||||||
<th>Example</th>
|
|
||||||
<th>Description</th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
<tbody>
|
|
||||||
<tr>
|
|
||||||
<td>string</td>
|
|
||||||
<td>'Foo'</td>
|
|
||||||
<td><a href="http://docs.php.net/manual/en/language.types.string.php">String</a> without newlines</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>istring</td>
|
|
||||||
<td>'foo'</td>
|
|
||||||
<td>Case insensitive ASCII string without newlines</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>text</td>
|
|
||||||
<td>"A<em>\n</em>b"</td>
|
|
||||||
<td>String with newlines</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>itext</td>
|
|
||||||
<td>"a<em>\n</em>b"</td>
|
|
||||||
<td>Case insensitive ASCII string without newlines</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>int</td>
|
|
||||||
<td>23</td>
|
|
||||||
<td>Integer</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>float</td>
|
|
||||||
<td>3.0</td>
|
|
||||||
<td>Floating point number</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>bool</td>
|
|
||||||
<td>true</td>
|
|
||||||
<td>Boolean</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>lookup</td>
|
|
||||||
<td>array('key' => true)</td>
|
|
||||||
<td>Lookup array, used with <code>isset($var[$key])</code></td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>list</td>
|
|
||||||
<td>array('f', 'b')</td>
|
|
||||||
<td>List array, with ordered numerical indexes</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>hash</td>
|
|
||||||
<td>array('key' => 'val')</td>
|
|
||||||
<td>Associative array of keys to values</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>mixed</td>
|
|
||||||
<td>new stdClass</td>
|
|
||||||
<td>Any PHP variable is fine</td>
|
|
||||||
</tr>
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
The examples represent what will be returned out of the configuration
|
|
||||||
object; users have a little bit of leeway when setting configuration
|
|
||||||
values (for example, a lookup value can be specified as a list;
|
|
||||||
HTML Purifier will flip it as necessary.) These types are defined
|
|
||||||
in <a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/VarParser.php">
|
|
||||||
library/HTMLPurifier/VarParser.php</a>.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
For more information on what values are allowed, and how they are parsed,
|
|
||||||
consult <a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ConfigSchema/InterchangeBuilder.php">
|
|
||||||
library/HTMLPurifier/ConfigSchema/InterchangeBuilder.php</a>, as well
|
|
||||||
as <a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ConfigSchema/Interchange/Directive.php">
|
|
||||||
library/HTMLPurifier/ConfigSchema/Interchange/Directive.php</a> for
|
|
||||||
the semantics of the parsed values.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h2>Refreshing the cache</h2>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
You may have noticed that your directive file isn't doing anything
|
|
||||||
yet. That's because it hasn't been added to the runtime
|
|
||||||
<code>HTMLPurifier_ConfigSchema</code> instance. Run
|
|
||||||
<code>maintenance/generate-schema-cache.php</code> to fix this.
|
|
||||||
If there were no errors, you're good to go! Don't forget to add
|
|
||||||
some unit tests for your functionality!
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
If you ever make changes to your configuration directives, you
|
|
||||||
will need to run this script again.
|
|
||||||
</p>
|
|
||||||
<h2>Adding in-house schema definitions</h2>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Placing stuff directly in HTML Purifier's source tree is generally not a
|
|
||||||
good idea, so HTML Purifier 4.0.0+ has some facilities in place to make your
|
|
||||||
life easier.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
The first is to pass an extra parameter to <code>maintenance/generate-schema-cache.php</code>
|
|
||||||
with the location of your directory (relative or absolute path will do). For example,
|
|
||||||
if I'm storing my custom definitions in <em>/var/htmlpurifier/myschema</em>, run:
|
|
||||||
<code>php maintenance/generate-schema-cache.php /var/htmlpurifier/myschema</code>.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Alternatively, you can create a small loader PHP file in the HTML Purifier base
|
|
||||||
directory named <code>config-schema.php</code> (this is the same directory
|
|
||||||
you would place a <code>test-settings.php</code> file). In this file, add
|
|
||||||
the following line for each directory you want to load:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<pre>$builder->buildDir($interchange, '/var/htmlpurifier/myschema');</pre>
|
|
||||||
|
|
||||||
<p>You can even load a single file using:</p>
|
|
||||||
|
|
||||||
<pre>$builder->buildFile($interchange, '/var/htmlpurifier/myschema/MyApp.Directive.txt');</pre>
|
|
||||||
|
|
||||||
<p>Storing custom definitions that you don't plan on sending back upstream in
|
|
||||||
a separate directory is <em>definitely</em> a good idea! Additionally, picking
|
|
||||||
a good namespace can go a long way to saving you grief if you want to use
|
|
||||||
someone else's change, but they picked the same name, or if HTML Purifier
|
|
||||||
decides to add support for a configuration directive that has the same name.</p>
|
|
||||||
|
|
||||||
<!-- TODO: how to name directives that rely on naming conventions -->
|
|
||||||
|
|
||||||
<h2>Errors</h2>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
All directive files go through a rigorous validation process
|
|
||||||
through <a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ConfigSchema/Validator.php">
|
|
||||||
library/HTMLPurifier/ConfigSchema/Validator.php</a>, as well
|
|
||||||
as some basic checks during building. While
|
|
||||||
listing every error out here is out-of-scope for this document, we
|
|
||||||
can give some general tips for interpreting error messages.
|
|
||||||
There are two types of errors: builder errors and validation errors.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h3>Builder errors</h3>
|
|
||||||
|
|
||||||
<blockquote>
|
|
||||||
<p>
|
|
||||||
<strong>Exception:</strong> Expected type string, got
|
|
||||||
integer in DEFAULT in directive hash 'Ns.Dir'
|
|
||||||
</p>
|
|
||||||
</blockquote>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
You can identify a builder error by the keyword "directive hash."
|
|
||||||
These are the easiest to deal with, because they directly correspond
|
|
||||||
with your directive file. Find the offending directive file (which
|
|
||||||
is the directive hash plus the .txt extension), find the
|
|
||||||
offending index ("in DEFAULT" means the DEFAULT key) and fix the error.
|
|
||||||
This particular error would occur if your default value is not the same
|
|
||||||
type as TYPE.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h3>Validation errors</h3>
|
|
||||||
|
|
||||||
<blockquote>
|
|
||||||
<p>
|
|
||||||
<strong>Exception:</strong> Alias 3 in valueAliases in directive
|
|
||||||
'Ns.Dir' must be a string
|
|
||||||
</p>
|
|
||||||
</blockquote>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
These are a little trickier, because we're not actually validating
|
|
||||||
your directive file, or even the direct string hash representation.
|
|
||||||
We're validating an Interchange object, and the error messages do
|
|
||||||
not mention any string hash keys.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Nevertheless, it's not difficult to figure out what went wrong.
|
|
||||||
Read the "context" statements in reverse:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<dl>
|
|
||||||
<dt>in directive 'Ns.Dir'</dt>
|
|
||||||
<dd>This means we need to look at the directive file <code>Ns.Dir.txt</code></dd>
|
|
||||||
<dt>in valueAliases</dt>
|
|
||||||
<dd>There's no key actually called this, but there's one that's close:
|
|
||||||
VALUE-ALIASES. Indeed, that's where to look.</dd>
|
|
||||||
<dt>Alias 3</dt>
|
|
||||||
<dd>The value alias that is equal to 3 is the culprit.</dd>
|
|
||||||
</dl>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
In this particular case, you're not allowed to alias integers values to
|
|
||||||
strings values.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
The most difficult part is translating the Interchange member variable (valueAliases)
|
|
||||||
into a directive file key (VALUE-ALIASES), but there's a one-to-one
|
|
||||||
correspondence currently. If the two formats diverge, any discrepancies
|
|
||||||
will be described in <a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ConfigSchema/InterchangeBuilder.php">
|
|
||||||
library/HTMLPurifier/ConfigSchema/InterchangeBuilder.php</a>.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h2>Internals</h2>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Much of the configuration schema framework's codebase deals with
|
|
||||||
shuffling data from one format to another, and doing validation on this
|
|
||||||
data.
|
|
||||||
The keystone of all of this is the <code>HTMLPurifier_ConfigSchema_Interchange</code>
|
|
||||||
class, which represents the purest, parsed representation of the schema.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Hand-writing this data is unwieldy, however, so we write directive files.
|
|
||||||
These directive files are parsed by <code>HTMLPurifier_StringHashParser</code>
|
|
||||||
into <code>HTMLPurifier_StringHash</code>es, which then
|
|
||||||
are run through <code>HTMLPurifier_ConfigSchema_InterchangeBuilder</code>
|
|
||||||
to construct the interchange object.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
From the interchange object, the data can be siphoned into other forms
|
|
||||||
using <code>HTMLPurifier_ConfigSchema_Builder</code> subclasses.
|
|
||||||
For example, <code>HTMLPurifier_ConfigSchema_Builder_ConfigSchema</code>
|
|
||||||
generates a runtime <code>HTMLPurifier_ConfigSchema</code> object,
|
|
||||||
which <code>HTMLPurifier_Config</code> uses to validate its incoming
|
|
||||||
data. There is also an XML serializer, which is used to build documentation.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
||||||
<!-- vim: et sw=4 sts=4
|
|
||||||
-->
|
|
@@ -1,68 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
|
||||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
|
||||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
|
||||||
<head>
|
|
||||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
|
||||||
<meta name="description" content="Discusses when to flush HTML Purifier's various caches." />
|
|
||||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
|
||||||
<title>Flushing the Purifier - HTML Purifier</title>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
|
|
||||||
<h1>Flushing the Purifier</h1>
|
|
||||||
|
|
||||||
<div id="filing">Filed under Development</div>
|
|
||||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
|
||||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
If you've been poking around the various folders in HTML Purifier,
|
|
||||||
you may have noticed the <code>maintenance</code> directory. Almost
|
|
||||||
all of these scripts are devoted to flushing out the various caches
|
|
||||||
HTML Purifier uses. Normal users don't have to worry about this:
|
|
||||||
regular library usage is transparent. However, when doing development
|
|
||||||
work on HTML Purifier, you may find you have to flush one of the
|
|
||||||
caches.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
As a general rule of thumb, run <code>flush.php</code> whenever you make
|
|
||||||
any <em>major</em> changes, or when tests start mysteriously failing.
|
|
||||||
In more detail, run this script if:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<ul>
|
|
||||||
<li>
|
|
||||||
You added new source files to HTML Purifier's main library.
|
|
||||||
(see <code>generate-includes.php</code>)
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
You modified the configuration schema (see
|
|
||||||
<code>generate-schema-cache.php</code>). This usually means
|
|
||||||
adding or modifying files in <code>HTMLPurifier/ConfigSchema/schema/</code>,
|
|
||||||
although in rare cases modifying <code>HTMLPurifier/ConfigSchema.php</code>
|
|
||||||
will also require this.
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
You modified a Definition, or its subsystems. The most usual candidate
|
|
||||||
is <code>HTMLPurifier/HTMLDefinition.php</code>, which also encompasses
|
|
||||||
the files in <code>HTMLPurifier/HTMLModule/</code> as well as if you've
|
|
||||||
<a href="enduser-customize.html">customizing definitions</a> without
|
|
||||||
the cache disabled. (see <code>flush-generation-cache.php</code>)
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
You modified source files, and have been using the standalone
|
|
||||||
version from the full installation. (see <code>generate-standalone.php</code>)
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
You can check out the corresponding scripts for more information on what they
|
|
||||||
do.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
</body></html>
|
|
||||||
|
|
||||||
<!-- vim: et sw=4 sts=4
|
|
||||||
-->
|
|
@@ -1,281 +0,0 @@
|
|||||||
|
|
||||||
INCLUDES, AUTOLOAD, BYTECODE CACHES and OPTIMIZATION
|
|
||||||
|
|
||||||
The Problem
|
|
||||||
-----------
|
|
||||||
|
|
||||||
HTML Purifier contains a number of extra components that are not used all
|
|
||||||
of the time, only if the user explicitly specifies that we should use
|
|
||||||
them.
|
|
||||||
|
|
||||||
Some of these optional components are optionally included (Filter,
|
|
||||||
Language, Lexer, Printer), while others are included all the time
|
|
||||||
(Injector, URIFilter, HTMLModule, URIScheme). We will stipulate that these
|
|
||||||
are all developer specified: it is conceivable that certain Tokens are not
|
|
||||||
used, but this is user-dependent and should not be trusted.
|
|
||||||
|
|
||||||
We should come up with a consistent way to handle these things and ensure
|
|
||||||
that we get the maximum performance when there is bytecode caches and
|
|
||||||
when there are not. Unfortunately, these two goals seem contrary to each
|
|
||||||
other.
|
|
||||||
|
|
||||||
A peripheral issue is the performance of ConfigSchema, which has been
|
|
||||||
shown take a large, constant amount of initialization time, and is
|
|
||||||
intricately linked to the issue of includes due to its pervasive use
|
|
||||||
in our plugin architecture.
|
|
||||||
|
|
||||||
Pros and Cons
|
|
||||||
-------------
|
|
||||||
|
|
||||||
We will assume that user-based extensions will be included by them.
|
|
||||||
|
|
||||||
Conditional includes:
|
|
||||||
Pros:
|
|
||||||
- User management is simplified; only a single directive needs to be set
|
|
||||||
- Only necessary code is included
|
|
||||||
Cons:
|
|
||||||
- Doesn't play nicely with opcode caches
|
|
||||||
- Adds complexity to standalone version
|
|
||||||
- Optional configuration directives are not exposed without a little
|
|
||||||
extra coaxing (not implemented yet)
|
|
||||||
|
|
||||||
Include it all:
|
|
||||||
Pros:
|
|
||||||
- User management is still simple
|
|
||||||
- Plays nicely with opcode caches and standalone version
|
|
||||||
- All configuration directives are present
|
|
||||||
Cons:
|
|
||||||
- Lots of (how much?) extra code is included
|
|
||||||
- Classes that inherit from external libraries will cause compile
|
|
||||||
errors
|
|
||||||
|
|
||||||
Build an include stub (Let's do this!):
|
|
||||||
Pros:
|
|
||||||
- Only necessary code is included
|
|
||||||
- Plays nicely with opcode caches and standalone version
|
|
||||||
- require (without once) can be used, see above
|
|
||||||
- Could further extend as a compilation to one file
|
|
||||||
Cons:
|
|
||||||
- Not implemented yet
|
|
||||||
- Requires user intervention and use of a command line script
|
|
||||||
- Standalone script must be chained to this
|
|
||||||
- More complex and compiled-language-like
|
|
||||||
- Requires a whole new class of system-wide configuration directives,
|
|
||||||
as configuration objects can be reused
|
|
||||||
- Determining what needs to be included can be complex (see above)
|
|
||||||
- No way of autodetecting dynamically instantiated classes
|
|
||||||
- Might be slow
|
|
||||||
|
|
||||||
Include stubs
|
|
||||||
-------------
|
|
||||||
|
|
||||||
This solution may be "just right" for users who are heavily oriented
|
|
||||||
towards performance. However, there are a number of picky implementation
|
|
||||||
details to work out beforehand.
|
|
||||||
|
|
||||||
The number one concern is how to make the HTML Purifier files "work
|
|
||||||
out of the box", while still being able to easily get them into a form
|
|
||||||
that works with this setup. As the codebase stands right now, it would
|
|
||||||
be necessary to strip out all of the require_once calls. The only way
|
|
||||||
we could get rid of the require_once calls is to use __autoload or
|
|
||||||
use the stub for all cases (which might not be a bad idea).
|
|
||||||
|
|
||||||
Aside
|
|
||||||
-----
|
|
||||||
An important thing to remember, however, is that these require_once's
|
|
||||||
are valuable data about what classes a file needs. Unfortunately, there's
|
|
||||||
no distinction between whether or not the file is needed all the time,
|
|
||||||
or whether or not it is one of our "optional" files. Thus, it is
|
|
||||||
effectively useless.
|
|
||||||
|
|
||||||
Deprecated
|
|
||||||
----------
|
|
||||||
One of the things I'd like to do is have the code search for any classes
|
|
||||||
that are explicitly mentioned in the code. If a class isn't mentioned, I
|
|
||||||
get to assume that it is "optional," i.e. included via introspection.
|
|
||||||
The choice is either to use PHP's tokenizer or use regexps; regexps would
|
|
||||||
be faster but a tokenizer would be more correct. If this ends up being
|
|
||||||
unfeasible, adding dependency comments isn't a bad idea. (This could
|
|
||||||
even be done automatically by search/replacing require_once, although
|
|
||||||
we'd have to manually inspect the results for the optional requires.)
|
|
||||||
|
|
||||||
NOTE: This ends up not being necessary, as we're going to make the user
|
|
||||||
figure out all the extra classes they need, and only include the core
|
|
||||||
which is predetermined.
|
|
||||||
|
|
||||||
Using the autoload framework with include stubs works nicely with
|
|
||||||
introspective classes: instead of having to have require_once inside
|
|
||||||
the function, we can let autoload do the work; we simply need to
|
|
||||||
new $class or accept the object straight from the caller. Handling filters
|
|
||||||
becomes a simple matter of ticking off configuration directives, and
|
|
||||||
if ConfigSchema spits out errors, adding the necessary includes. We could
|
|
||||||
also use the autoload framework as a fallback, in case the user forgets
|
|
||||||
to make the include, but doesn't really care about performance.
|
|
||||||
|
|
||||||
Insight
|
|
||||||
-------
|
|
||||||
All of this talk is merely a natural extension of what our current
|
|
||||||
standalone functionality does. However, instead of having our code
|
|
||||||
perform the includes, or attempting to inline everything that possibly
|
|
||||||
could be used, we boot the issue to the user, making them include
|
|
||||||
everything or setup the fallback autoload handler.
|
|
||||||
|
|
||||||
Configuration Schema
|
|
||||||
--------------------
|
|
||||||
|
|
||||||
A common deficiency for all of the conditional include setups (including
|
|
||||||
the dynamically built include PHP stub) is that if one of this
|
|
||||||
conditionally included files includes a configuration directive, it
|
|
||||||
is not accessible to configdoc. A stopgap solution for this problem is
|
|
||||||
to have it piggy-back off of the data in the merge-library.php script
|
|
||||||
to figure out what extra files it needs to include, but if the file also
|
|
||||||
inherits classes that don't exist, we're in big trouble.
|
|
||||||
|
|
||||||
I think it's high time we centralized the configuration documentation.
|
|
||||||
However, the type checking has been a great boon for the library, and
|
|
||||||
I'd like to keep that. The compromise is to use some other source, and
|
|
||||||
then parse it into the ConfigSchema internal format (sans all of those
|
|
||||||
nasty documentation strings which we really don't need at runtime) and
|
|
||||||
serialize that for future use.
|
|
||||||
|
|
||||||
The next question is that of format. XML is very verbose, and the prospect
|
|
||||||
of setting defaults in it gives me willies. However, this may be necessary.
|
|
||||||
Splitting up the file into manageable chunks may alleviate this trouble,
|
|
||||||
and we may be even want to create our own format optimized for specifying
|
|
||||||
configuration. It might look like (based off the PHPT format, which is
|
|
||||||
nicely compact yet unambiguous and human-readable):
|
|
||||||
|
|
||||||
Core.HiddenElements
|
|
||||||
TYPE: lookup
|
|
||||||
DEFAULT: array('script', 'style') // auto-converted during processing
|
|
||||||
--ALIASES--
|
|
||||||
Core.InvisibleElements, Core.StupidElements
|
|
||||||
--DESCRIPTION--
|
|
||||||
<p>
|
|
||||||
Blah blah
|
|
||||||
</p>
|
|
||||||
|
|
||||||
The first line is the directive name, the lines after that prior to the
|
|
||||||
first --HEADER-- block are single-line values, and then after that
|
|
||||||
the multiline values are there. No value is restricted to a particular
|
|
||||||
format: DEFAULT could very well be multiline if that would be easier.
|
|
||||||
This would make it insanely easy, also, to add arbitrary extra parameters,
|
|
||||||
like:
|
|
||||||
|
|
||||||
VERSION: 3.0.0
|
|
||||||
ALLOWED: 'none', 'light', 'medium', 'heavy' // this is wrapped in array()
|
|
||||||
EXTERNAL: CSSTidy // this would be documented somewhere else with a URL
|
|
||||||
|
|
||||||
The final loss would be that you wouldn't know what file the directive
|
|
||||||
was used in; with some clever regexps it should be possible to
|
|
||||||
figure out where $config->get($ns, $d); occurs. Reflective calls to
|
|
||||||
the configuration object is mitigated by the fact that getBatch is
|
|
||||||
used, so we can simply talk about that in the namespace definition page.
|
|
||||||
This might be slow, but it would only happen when we are creating
|
|
||||||
the documentation for consumption, and is sugar.
|
|
||||||
|
|
||||||
We can put this in a schema/ directory, outside of HTML Purifier. The serialized
|
|
||||||
data gets treated like entities.ser.
|
|
||||||
|
|
||||||
The final thing that needs to be handled is user defined configurations.
|
|
||||||
They can be added at runtime using ConfigSchema::registerDirectory()
|
|
||||||
which globs the directory and grabs all of the directives to be incorporated
|
|
||||||
in. Then, the result is saved. We may want to take advantage of the
|
|
||||||
DefinitionCache framework, although it is not altogether certain what
|
|
||||||
configuration directives would be used to generate our key (meta-directives!)
|
|
||||||
|
|
||||||
Further thoughts
|
|
||||||
----------------
|
|
||||||
Our master configuration schema will only need to be updated once
|
|
||||||
every new version, so it's easily versionable. User specified
|
|
||||||
schema files are far more volatile, but it's far too expensive
|
|
||||||
to check the filemtimes of all the files, so a DefinitionRev style
|
|
||||||
mechanism works better. However, we can uniquely identify the
|
|
||||||
schema based on the directories they loaded, so there's no need
|
|
||||||
for a DefinitionId until we give them full programmatic control.
|
|
||||||
|
|
||||||
These variables should be directly incorporated into ConfigSchema,
|
|
||||||
and ConfigSchema should handle serialization. Some refactoring will be
|
|
||||||
necessary for the DefinitionCache classes, as they are built with
|
|
||||||
Config in mind. If the user changes something, the cache file gets
|
|
||||||
rebuilt. If the version changes, the cache file gets rebuilt. Since
|
|
||||||
our unit tests flush the caches before we start, and the operation is
|
|
||||||
pretty fast, this will not negatively impact unit testing.
|
|
||||||
|
|
||||||
One last thing: certain configuration directives require that files
|
|
||||||
get added. They may even be specified dynamically. It is not a good idea
|
|
||||||
for the HTMLPurifier_Config object to be used directly for such matters.
|
|
||||||
Instead, the userland code should explicitly perform the includes. We may
|
|
||||||
put in something like:
|
|
||||||
|
|
||||||
REQUIRES: HTMLPurifier_Filter_ExtractStyleBlocks
|
|
||||||
|
|
||||||
To indicate that if that class doesn't exist, and the user is attempting
|
|
||||||
to use the directive, we should fatally error out. The stub includes the core files,
|
|
||||||
and the user includes everything else. Any reflective things like new
|
|
||||||
$class would be required to tie in with the configuration.
|
|
||||||
|
|
||||||
It would work very well with rarely used configuration options, but it
|
|
||||||
wouldn't be so good for "core" parts that can be disabled. In such cases
|
|
||||||
the core include file would need to be modified, and the only way
|
|
||||||
to properly do this is use the configuration object. Once again, our
|
|
||||||
ability to create cache keys saves the day again: we can create arbitrary
|
|
||||||
stub files for arbitrary configurations and include those. They could
|
|
||||||
even be the single file affairs. The only thing we'd need to include,
|
|
||||||
then, would be HTMLPurifier_Config! Then, the configuration object would
|
|
||||||
load the library.
|
|
||||||
|
|
||||||
An aside...
|
|
||||||
-----------
|
|
||||||
One questions, however, the wisdom of letting PHP files write other PHP
|
|
||||||
files. It seems like a recipe for disaster, or at least lots of headaches
|
|
||||||
in highly secured setups, where PHP does not have the ability to write
|
|
||||||
to its root. In such cases, we could use sticky bits or tell the user
|
|
||||||
to manually generate the file.
|
|
||||||
|
|
||||||
The other troublesome bit is actually doing the calculations necessary.
|
|
||||||
For certain cases, it's simple (such as URIScheme), but for AttrDef
|
|
||||||
and HTMLModule the dependency trees are very complex in relation to
|
|
||||||
%HTML.Allowed and friends. I think that this idea should be shelved
|
|
||||||
and looked at a later, less insane date.
|
|
||||||
|
|
||||||
An interesting dilemma presents itself when a configuration form is offered
|
|
||||||
to the user. Normally, the configuration object is not accessible without
|
|
||||||
editing PHP code; this facility changes thing. The sensible thing to do
|
|
||||||
is stipulate that all classes required by the directives you allow must
|
|
||||||
be included.
|
|
||||||
|
|
||||||
Unit testing
|
|
||||||
------------
|
|
||||||
|
|
||||||
Setting up the parsing and translation into our existing format would not
|
|
||||||
be difficult to do. It might represent a good time for us to rethink our
|
|
||||||
tests for these facilities; as creative as they are, they are often hacky
|
|
||||||
and require public visibility for things that ought to be protected.
|
|
||||||
This is especially applicable for our DefinitionCache tests.
|
|
||||||
|
|
||||||
Migration
|
|
||||||
---------
|
|
||||||
|
|
||||||
Because we are not *adding* anything essentially new, it should be trivial
|
|
||||||
to write a script to take our existing data and dump it into the new format.
|
|
||||||
Well, not trivial, but fairly easy to accomplish. Primary implementation
|
|
||||||
difficulties would probably involve formatting the file nicely.
|
|
||||||
|
|
||||||
Backwards-compatibility
|
|
||||||
-----------------------
|
|
||||||
|
|
||||||
I expect that the ConfigSchema methods should stick around for a little bit,
|
|
||||||
but display E_USER_NOTICE warnings that they are deprecated. This will
|
|
||||||
require documentation!
|
|
||||||
|
|
||||||
New stuff
|
|
||||||
---------
|
|
||||||
|
|
||||||
VERSION: Version number directive was introduced
|
|
||||||
DEPRECATED-VERSION: If the directive was deprecated, when was it deprecated?
|
|
||||||
DEPRECATED-USE: If the directive was deprecated, what should the user use now?
|
|
||||||
REQUIRES: What classes does this configuration directive require, but are
|
|
||||||
not part of the HTML Purifier core?
|
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
|
@@ -14,7 +14,7 @@
|
|||||||
|
|
||||||
<div id="filing">Filed under Development</div>
|
<div id="filing">Filed under Development</div>
|
||||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||||
|
|
||||||
<p>The classes in this library follow a few naming conventions, which may
|
<p>The classes in this library follow a few naming conventions, which may
|
||||||
help you find the correct functionality more quickly. Here they are:</p>
|
help you find the correct functionality more quickly. Here they are:</p>
|
||||||
@@ -77,7 +77,6 @@ help you find the correct functionality more quickly. Here they are:</p>
|
|||||||
|
|
||||||
</dl>
|
</dl>
|
||||||
|
|
||||||
</body></html>
|
<div id="version">$Id$</div>
|
||||||
|
|
||||||
<!-- vim: et sw=4 sts=4
|
</body></html>
|
||||||
-->
|
|
@@ -14,7 +14,7 @@
|
|||||||
|
|
||||||
<div id="filing">Filed under Development</div>
|
<div id="filing">Filed under Development</div>
|
||||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||||
|
|
||||||
<p>Here are some possible optimization techniques we can apply to code sections if
|
<p>Here are some possible optimization techniques we can apply to code sections if
|
||||||
they turn out to be slow. Be sure not to prematurely optimize: if you get
|
they turn out to be slow. Be sure not to prematurely optimize: if you get
|
||||||
@@ -23,11 +23,11 @@ that itch, put it here!</p>
|
|||||||
<ul>
|
<ul>
|
||||||
<li>Make Tokens Flyweights (may prove problematic, probably not worth it)</li>
|
<li>Make Tokens Flyweights (may prove problematic, probably not worth it)</li>
|
||||||
<li>Rewrite regexps into PHP code</li>
|
<li>Rewrite regexps into PHP code</li>
|
||||||
|
<li>Serialize the Definition object</li>
|
||||||
<li>Batch regexp validation (do as many per function call as possible)</li>
|
<li>Batch regexp validation (do as many per function call as possible)</li>
|
||||||
<li>Parallelize strategies</li>
|
<li>Parallelize strategies</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
</body></html>
|
<div id="version">$Id$</div>
|
||||||
|
|
||||||
<!-- vim: et sw=4 sts=4
|
</body></html>
|
||||||
-->
|
|
@@ -32,19 +32,14 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
|||||||
|
|
||||||
<div id="filing">Filed under Development</div>
|
<div id="filing">Filed under Development</div>
|
||||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||||
|
|
||||||
<p>
|
|
||||||
<strong>Warning:</strong> This table is kept for historical purposes and
|
|
||||||
is not being actively updated.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h2>Key</h2>
|
<h2>Key</h2>
|
||||||
|
|
||||||
<table cellspacing="0"><tbody>
|
<table cellspacing="0"><tbody>
|
||||||
<tr><td class="impl-yes">Implemented</td></tr>
|
<tr><td class="impl-yes">Implemented</td></tr>
|
||||||
<tr><td class="impl-partial">Partially implemented</td></tr>
|
<tr><td class="impl-partial">Partially implemented</td></tr>
|
||||||
<tr><td class="impl-no">Not priority to implement</td></tr>
|
<tr><td class="impl-no">Will not implement</td></tr>
|
||||||
<tr><td class="danger">Dangerous attribute/property</td></tr>
|
<tr><td class="danger">Dangerous attribute/property</td></tr>
|
||||||
<tr><td class="css1">Present in CSS1</td></tr>
|
<tr><td class="css1">Present in CSS1</td></tr>
|
||||||
<tr><td class="feature">Feature, requires extra work</td></tr>
|
<tr><td class="feature">Feature, requires extra work</td></tr>
|
||||||
@@ -123,7 +118,6 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
|||||||
<tbody>
|
<tbody>
|
||||||
<tr><th colspan="2">Table</th></tr>
|
<tr><th colspan="2">Table</th></tr>
|
||||||
<tr class="impl-yes"><td>border-collapse</td><td>ENUM(collapse, seperate)</td></tr>
|
<tr class="impl-yes"><td>border-collapse</td><td>ENUM(collapse, seperate)</td></tr>
|
||||||
<tr class="impl-yes"><td>border-space</td><td>MULTIPLE</td></tr>
|
|
||||||
<tr class="impl-yes"><td>caption-side</td><td>ENUM(top, bottom)</td></tr>
|
<tr class="impl-yes"><td>caption-side</td><td>ENUM(top, bottom)</td></tr>
|
||||||
<tr class="feature"><td>empty-cells</td><td>ENUM(show, hide), No IE support makes this useless,
|
<tr class="feature"><td>empty-cells</td><td>ENUM(show, hide), No IE support makes this useless,
|
||||||
possible fix with &nbsp;? Unknown release milestone.</td></tr>
|
possible fix with &nbsp;? Unknown release milestone.</td></tr>
|
||||||
@@ -148,16 +142,16 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
|||||||
|
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr><th colspan="2">Unknown</th></tr>
|
<tr><th colspan="2">Unknown</th></tr>
|
||||||
<tr class="danger css1 impl-yes"><td>background-image</td><td>Dangerous</td></tr>
|
<tr class="danger css1 impl-yes"><td>background-image</td><td>Dangerous, target milestone 1.3</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>background-attachment</td><td>ENUM(scroll, fixed),
|
<tr class="css1 impl-yes"><td>background-attachment</td><td>ENUM(scroll, fixed),
|
||||||
Depends on background-image</td></tr>
|
Depends on background-image</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>background-position</td><td>Depends on background-image</td></tr>
|
<tr class="css1 impl-yes"><td>background-position</td><td>Depends on background-image</td></tr>
|
||||||
<tr class="danger impl-no"><td>cursor</td><td>Dangerous but fluffy</td></tr>
|
<tr class="danger impl-no"><td>cursor</td><td>Dangerous but fluffy</td></tr>
|
||||||
<tr class="danger impl-yes"><td>display</td><td>ENUM(...), Dangerous but interesting;
|
<tr class="danger css1"><td>display</td><td>ENUM(...), Dangerous but interesting;
|
||||||
will not implement list-item, run-in (Opera only) or table (no IE);
|
will not implement list-item, run-in (Opera only) or table (no IE);
|
||||||
inline-block has incomplete IE6 support and requires -moz-inline-box
|
inline-block has incomplete IE6 support and requires -moz-inline-box
|
||||||
for Mozilla. Unknown target milestone.</td></tr>
|
for Mozilla. Unknown target milestone.</td></tr>
|
||||||
<tr class="css1 impl-yes"><td>height</td><td>Interesting, why use it? Unknown target milestone.</td></tr>
|
<tr class="css1"><td>height</td><td>Interesting, why use it? Unknown target milestone.</td></tr>
|
||||||
<tr class="danger css1 impl-yes"><td>list-style-image</td><td>Dangerous?</td></tr>
|
<tr class="danger css1 impl-yes"><td>list-style-image</td><td>Dangerous?</td></tr>
|
||||||
<tr class="impl-no"><td>max-height</td><td rowspan="4">No IE 5/6</td></tr>
|
<tr class="impl-no"><td>max-height</td><td rowspan="4">No IE 5/6</td></tr>
|
||||||
<tr class="impl-no"><td>min-height</td></tr>
|
<tr class="impl-no"><td>min-height</td></tr>
|
||||||
@@ -172,11 +166,11 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
|||||||
Mostly supported. Unknown target milestone.</td></tr>
|
Mostly supported. Unknown target milestone.</td></tr>
|
||||||
<tr><td>page-break-inside</td><td>ENUM(avoid, auto), Opera only. Unknown target milestone.</td></tr>
|
<tr><td>page-break-inside</td><td>ENUM(avoid, auto), Opera only. Unknown target milestone.</td></tr>
|
||||||
<tr class="impl-no"><td>quotes</td><td>May be dropped from CSS2, fairly useless for inline context</td></tr>
|
<tr class="impl-no"><td>quotes</td><td>May be dropped from CSS2, fairly useless for inline context</td></tr>
|
||||||
<tr class="danger impl-yes"><td>visibility</td><td>ENUM(visible, hidden, collapse),
|
<tr class="impl-no"><td>visibility</td><td>ENUM(visible, hidden, collapse),
|
||||||
Dangerous</td></tr>
|
Dangerous</td></tr>
|
||||||
<tr class="css1 feature impl-partial"><td>white-space</td><td>ENUM(normal, pre, nowrap, pre-wrap,
|
<tr class="css1 feature"><td>white-space</td><td>ENUM(normal, pre, nowrap, pre-wrap,
|
||||||
pre-line), Spotty implementation:
|
pre-line), Spotty implementation:
|
||||||
pre (no IE 5/6), <em>nowrap</em> (no IE 5, supported),
|
pre (no IE 5/6), nowrap (no IE 5),
|
||||||
pre-wrap (only Opera), pre-line (no support). Fixable? Unknown target milestone.</td></tr>
|
pre-wrap (only Opera), pre-line (no support). Fixable? Unknown target milestone.</td></tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
|
|
||||||
@@ -244,18 +238,18 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
|||||||
<tr><th colspan="3">Questionable</th></tr>
|
<tr><th colspan="3">Questionable</th></tr>
|
||||||
<tr class="impl-no"><td>accesskey</td><td>A</td><td>May interfere with main interface</td></tr>
|
<tr class="impl-no"><td>accesskey</td><td>A</td><td>May interfere with main interface</td></tr>
|
||||||
<tr class="impl-no"><td>tabindex</td><td>A</td><td>May interfere with main interface</td></tr>
|
<tr class="impl-no"><td>tabindex</td><td>A</td><td>May interfere with main interface</td></tr>
|
||||||
<tr class="impl-yes"><td>target</td><td>A</td><td>Config enabled, only useful for frame layouts, disallowed in strict</td></tr>
|
<tr><td>target</td><td>A</td><td>Config enabled, only useful for frame layouts, disallowed in strict</td></tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
|
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr><th colspan="3">Miscellaneous</th></tr>
|
<tr><th colspan="3">Miscellaneous</th></tr>
|
||||||
<tr><td>datetime</td><td>DEL, INS</td><td>No visible effect, ISO format</td></tr>
|
<tr><td>datetime</td><td>DEL, INS</td><td>No visible effect, ISO format</td></tr>
|
||||||
<tr class="impl-yes"><td>rel</td><td>A</td><td>Largely user-defined: nofollow, tag (see microformats)</td></tr>
|
<tr><td>rel</td><td>A</td><td>Largely user-defined: nofollow, tag (see microformats)</td></tr>
|
||||||
<tr class="impl-yes"><td>rev</td><td>A</td><td>Largely user-defined: vote-*</td></tr>
|
<tr><td>rev</td><td>A</td><td>Largely user-defined: vote-*</td></tr>
|
||||||
<tr class="feature"><td>axis</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
<tr class="feature"><td>axis</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||||
<tr class="feature"><td>char</td><td>COL, COLGROUP, TBODY, TD, TFOOT, TH, THEAD, TR</td><td>W3C only: No browser implementation</td></tr>
|
<tr class="feature"><td>char</td><td>COL, COLGROUP, TBODY, TD, TFOOT, TH, THEAD, TR</td><td>W3C only: No browser implementation</td></tr>
|
||||||
<tr class="feature"><td>headers</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
<tr class="feature"><td>headers</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||||
<tr class="impl-yes"><td>scope</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
<tr class="feature"><td>scope</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
|
|
||||||
<tbody class="impl-yes">
|
<tbody class="impl-yes">
|
||||||
@@ -268,42 +262,41 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
|||||||
</tbody>
|
</tbody>
|
||||||
|
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr><th colspan="3">Transform</th></tr>
|
<tr><th colspan="3">Transform, target milestone 1.4</th></tr>
|
||||||
<tr class="impl-yes"><td rowspan="5">align</td><td>CAPTION</td><td>'caption-side' for top/bottom, 'text-align' for left/right</td></tr>
|
<tr><td rowspan="5">align</td><td>CAPTION</td><td>Near-equiv style 'caption-side', drop left and right</td></tr>
|
||||||
<tr class="impl-yes"><td>IMG</td><td rowspan="3">See specimens/html-align-to-css.html</td></tr>
|
<tr><td>IMG</td><td rowspan="2">Margin-left and margin-right = auto or parent div</td></tr>
|
||||||
<tr class="impl-yes"><td>TABLE</td></tr>
|
<tr><td>TABLE</td></tr>
|
||||||
<tr class="impl-yes"><td>HR</td></tr>
|
<tr><td>HR</td><td>Near-equivalent style 'text-align' (Works for IE and Opera, but not Firefox). Also try <code>margin-right:auto; margin-left:0;</code> for left or <code>margin-right:0; margin-left:auto;</code> for right (optionally replacing 0 with the original margin for that side)</td></tr>
|
||||||
<tr class="impl-yes"><td>H1, H2, H3, H4, H5, H6, P</td><td>Equivalent style 'text-align'</td></tr>
|
<tr class="impl-yes"><td>H1, H2, H3, H4, H5, H6, P</td><td>Equivalent style 'text-align'</td></tr>
|
||||||
<tr class="required impl-yes"><td>alt</td><td>IMG</td><td>Required, insert image filename if src is present or default invalid image text</td></tr>
|
<tr class="required impl-yes"><td>alt</td><td>IMG</td><td>Required, insert image filename if src is present or default invalid image text</td></tr>
|
||||||
<tr class="impl-yes"><td rowspan="3">bgcolor</td><td>TABLE</td><td>Superset style 'background-color'</td></tr>
|
<tr><td rowspan="3">bgcolor</td><td>TABLE</td><td>Equivalent style 'background-color'</td></tr>
|
||||||
<tr class="impl-yes"><td>TR</td><td>Superset style 'background-color'</td></tr>
|
<tr><td>TR</td><td>Equivalent style 'background-color'</td></tr>
|
||||||
<tr class="impl-yes"><td>TD, TH</td><td>Superset style 'background-color'</td></tr>
|
<tr><td>TD, TH</td><td>Equivalent style 'background-color'</td></tr>
|
||||||
<tr class="impl-yes"><td>border</td><td>IMG</td><td>Equivalent style <code>border:[number]px solid</code></td></tr>
|
<tr><td>border</td><td>IMG</td><td>Near equivalent style 'border-width', as it only applies when link present</td></tr>
|
||||||
<tr class="impl-yes"><td>clear</td><td>BR</td><td>Near-equiv style 'clear', transform 'all' into 'both'</td></tr>
|
<tr><td>clear</td><td>BR</td><td>Near-equiv style 'clear', transform 'all' into 'both'</td></tr>
|
||||||
<tr class="impl-no"><td>compact</td><td>DL, OL, UL</td><td>Boolean, needs custom CSS class; rarely used anyway</td></tr>
|
<tr class="impl-no"><td>compact</td><td>DL, OL, UL</td><td>Boolean, needs custom CSS class; rarely used anyway</td></tr>
|
||||||
<tr class="required impl-yes"><td>dir</td><td>BDO</td><td>Required, insert ltr (or configuration value) if none</td></tr>
|
<tr class="required impl-yes"><td>dir</td><td>BDO</td><td>Required, insert ltr (or configuration value) if none</td></tr>
|
||||||
<tr class="impl-yes"><td>height</td><td>TD, TH</td><td>Near-equiv style 'height', needs px suffix if original was in pixels</td></tr>
|
<tr><td>height</td><td>TD, TH</td><td>Near-equiv style 'height', needs px suffix if original was in pixels</td></tr>
|
||||||
<tr class="impl-yes"><td>hspace</td><td>IMG</td><td>Near-equiv styles 'margin-top' and 'margin-bottom', needs px suffix</td></tr>
|
<tr><td>hspace</td><td>IMG</td><td>Near-equiv styles 'margin-top' and 'margin-bottom', needs px suffix</td></tr>
|
||||||
<tr class="impl-yes"><td>lang</td><td>*</td><td>Copy value to xml:lang</td></tr>
|
<tr class="impl-yes"><td>lang</td><td>*</td><td>Copy value to xml:lang</td></tr>
|
||||||
<tr class="impl-yes"><td rowspan="2">name</td><td>IMG</td><td>Turn into ID</td></tr>
|
<tr><td rowspan="2">name</td><td>IMG</td><td>Turn into ID</td></tr>
|
||||||
<tr class="impl-yes"><td>A</td><td>Turn into ID</td></tr>
|
<tr><td>A</td><td>Turn into ID? (not deprecated, though in which specs?)</td></tr>
|
||||||
<tr class="impl-yes"><td>noshade</td><td>HR</td><td>Boolean, style 'border-style:solid;'</td></tr>
|
<tr><td>noshade</td><td>HR</td><td>Boolean, style 'border-style:solid;'</td></tr>
|
||||||
<tr class="impl-yes"><td>nowrap</td><td>TD, TH</td><td>Boolean, style 'white-space:nowrap;' (not compat with IE5)</td></tr>
|
<tr><td>nowrap</td><td>TD, TH</td><td>Boolean, style 'white-space:nowrap;' (not compat with IE5)</td></tr>
|
||||||
<tr class="impl-yes"><td>size</td><td>HR</td><td>Near-equiv 'height', needs px suffix if original was pixels</td></tr>
|
<tr><td>size</td><td>HR</td><td>Near-equiv 'width', needs px suffix if original was pixels</td></tr>
|
||||||
<tr class="required impl-yes"><td>src</td><td>IMG</td><td>Required, insert blank or default img if not set</td></tr>
|
<tr class="required impl-yes"><td>src</td><td>IMG</td><td>Required, insert blank or default img if not set</td></tr>
|
||||||
<tr class="impl-yes"><td>start</td><td>OL</td><td>Poorly supported 'counter-reset', allowed in loose, dropped in strict</td></tr>
|
<tr class="impl-yes"><td>start</td><td>OL</td><td>Poorly supported 'counter-reset', allowed in loose, dropped in strict</td></tr>
|
||||||
<tr class="impl-yes"><td rowspan="3">type</td><td>LI</td><td rowspan="3">Equivalent style 'list-style-type', different allowed values though. (needs testing)</td></tr>
|
<tr><td rowspan="3">type</td><td>LI</td><td rowspan="3">Equivalent style 'list-style-type', different allowed values though. (needs testing)</td></tr>
|
||||||
<tr class="impl-yes"><td>OL</td></tr>
|
<tr><td>OL</td></tr>
|
||||||
<tr class="impl-yes"><td>UL</td></tr>
|
<tr><td>UL</td></tr>
|
||||||
<tr class="impl-yes"><td>value</td><td>LI</td><td>Poorly supported 'counter-reset', allowed in loose, dropped in strict</td></tr>
|
<tr class="impl-yes"><td>value</td><td>LI</td><td>Poorly supported 'counter-reset', allowed in loose, dropped in strict</td></tr>
|
||||||
<tr class="impl-yes"><td>vspace</td><td>IMG</td><td>Near-equiv styles 'margin-left' and 'margin-right', needs px suffix, see hspace</td></tr>
|
<tr><td>vspace</td><td>IMG</td><td>Near-equiv styles 'margin-left' and 'margin-right', needs px suffix, see hspace</td></tr>
|
||||||
<tr class="impl-yes"><td rowspan="2">width</td><td>HR</td><td rowspan="2">Near-equiv style 'width', needs px suffix if original was pixels</td></tr>
|
<tr><td rowspan="2">width</td><td>HR</td><td rowspan="2">Near-equiv style 'width', needs px suffix if original was pixels</td></tr>
|
||||||
<tr class="impl-yes"><td>TD, TH</td></tr>
|
<tr><td>TD, TH</td></tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
|
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
</body></html>
|
<div id="version">$Id$</div>
|
||||||
|
|
||||||
<!-- vim: et sw=4 sts=4
|
</body></html>
|
||||||
-->
|
|
@@ -1,850 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
|
||||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
|
||||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
|
||||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
|
||||||
<meta name="description" content="Tutorial for customizing HTML Purifier's tag and attribute sets." />
|
|
||||||
<link rel="stylesheet" type="text/css" href="style.css" />
|
|
||||||
|
|
||||||
<title>Customize - HTML Purifier</title>
|
|
||||||
|
|
||||||
</head><body>
|
|
||||||
|
|
||||||
<h1 class="subtitled">Customize!</h1>
|
|
||||||
<div class="subtitle">HTML Purifier is a Swiss-Army Knife</div>
|
|
||||||
|
|
||||||
<div id="filing">Filed under End-User</div>
|
|
||||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
|
||||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
HTML Purifier has this quirk where if you try to allow certain elements or
|
|
||||||
attributes, HTML Purifier will tell you that it's not supported, and that
|
|
||||||
you should go to the forums to find out how to implement it. Well, this
|
|
||||||
document is how to implement elements and attributes which HTML Purifier
|
|
||||||
doesn't support out of the box.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h2>Is it necessary?</h2>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Before we even write any code, it is paramount to consider whether or
|
|
||||||
not the code we're writing is necessary or not. HTML Purifier, by default,
|
|
||||||
contains a large set of elements and attributes: large enough so that
|
|
||||||
<em>any</em> element or attribute in XHTML 1.0 or 1.1 (and its HTML variants)
|
|
||||||
that can be safely used by the general public is implemented.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
So what needs to be implemented? (Feel free to skip this section if
|
|
||||||
you know what you want).
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h3>XHTML 1.0</h3>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
All of the modules listed below are based off of the
|
|
||||||
<a href="http://www.w3.org/TR/2001/REC-xhtml-modularization-20010410/abstract_modules.html#sec_5.2.">modularization of
|
|
||||||
XHTML</a>, which, while technically for XHTML 1.1, is quite a useful
|
|
||||||
resource.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<ul>
|
|
||||||
<li>Structure</li>
|
|
||||||
<li>Frames</li>
|
|
||||||
<li>Applets (deprecated)</li>
|
|
||||||
<li>Forms</li>
|
|
||||||
<li>Image maps</li>
|
|
||||||
<li>Objects</li>
|
|
||||||
<li>Frames</li>
|
|
||||||
<li>Events</li>
|
|
||||||
<li>Meta-information</li>
|
|
||||||
<li>Style sheets</li>
|
|
||||||
<li>Link (not hypertext)</li>
|
|
||||||
<li>Base</li>
|
|
||||||
<li>Name</li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
If you don't recognize it, you probably don't need it. But the curious
|
|
||||||
can look all of these modules up in the above-mentioned document. Note
|
|
||||||
that inline scripting comes packaged with HTML Purifier (more on this
|
|
||||||
later).
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h3>XHTML 1.1</h3>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
As of HTMLPurifier 2.1.0, we have implemented the
|
|
||||||
<a href="http://www.w3.org/TR/2001/REC-ruby-20010531/">Ruby module</a>,
|
|
||||||
which defines a set of tags
|
|
||||||
for publishing short annotations for text, used mostly in Japanese
|
|
||||||
and Chinese school texts, but applicable for positioning any text (not
|
|
||||||
limited to translations) above or below other corresponding text.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h3>HTML 5</h3>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
<a href="http://www.whatwg.org/specs/web-apps/current-work/">HTML 5</a>
|
|
||||||
is a fork of HTML 4.01 by WHATWG, who believed that XHTML 2.0 was headed
|
|
||||||
in the wrong direction. It too is a working draft, and may change
|
|
||||||
drastically before publication, but it should be noted that the
|
|
||||||
<code>canvas</code> tag has been implemented by many browser vendors.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h3>Proprietary</h3>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
There are a number of proprietary tags still in the wild. Many of them
|
|
||||||
have been documented in <a href="ref-proprietary-tags.txt">ref-proprietary-tags.txt</a>,
|
|
||||||
but there is currently no implementation for any of them.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h3>Extensions</h3>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
There are also a number of other XML languages out there that can
|
|
||||||
be embedded in HTML documents: two of the most popular are MathML and
|
|
||||||
SVG, and I frequently get requests to implement these. But they are
|
|
||||||
expansive, comprehensive specifications, and it would take far too long
|
|
||||||
to implement them <em>correctly</em> (most systems I've seen go as far
|
|
||||||
as whitelisting tags and no further; come on, what about nesting!)
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Word of warning: HTML Purifier is currently <em>not</em> namespace
|
|
||||||
aware.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h2>Giving back</h2>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
As you may imagine from the details above (don't be abashed if you didn't
|
|
||||||
read it all: a glance over would have done), there's quite a bit that
|
|
||||||
HTML Purifier doesn't implement. Recent architectural changes have
|
|
||||||
allowed HTML Purifier to implement elements and attributes that are not
|
|
||||||
safe! Don't worry, they won't be activated unless you set %HTML.Trusted
|
|
||||||
to true, but they certainly help out users who need to put, say, forms
|
|
||||||
on their page and don't want to go through the trouble of reading this
|
|
||||||
and implementing it themself.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
So any of the above that you implement for your own application could
|
|
||||||
help out some other poor sap on the other side of the globe. Help us
|
|
||||||
out, and send back code so that it can be hammered into a module and
|
|
||||||
released with the core. Any code would be greatly appreciated!
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h2>And now...</h2>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Enough philosophical talk, time for some code:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
|
||||||
$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
|
|
||||||
$config->set('HTML.DefinitionRev', 1);
|
|
||||||
if ($def = $config->maybeGetRawHTMLDefinition()) {
|
|
||||||
// our code will go here
|
|
||||||
}</pre>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Assuming that HTML Purifier has already been properly loaded (hint:
|
|
||||||
include <code>HTMLPurifier.auto.php</code>), this code will set up
|
|
||||||
the environment that you need to start customizing the HTML definition.
|
|
||||||
What's going on?
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<ul>
|
|
||||||
<li>
|
|
||||||
The first three lines are regular configuration code:
|
|
||||||
<ul>
|
|
||||||
<li>
|
|
||||||
%HTML.DefinitionID is set to a unique identifier for your
|
|
||||||
custom HTML definition. This prevents it from clobbering
|
|
||||||
other custom definitions on the same installation.
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
%HTML.DefinitionRev is a revision integer of your HTML
|
|
||||||
definition. Because HTML definitions are cached, you'll need
|
|
||||||
to increment this whenever you make a change in order to flush
|
|
||||||
the cache.
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
The fourth line retrieves a raw <code>HTMLPurifier_HTMLDefinition</code>
|
|
||||||
object that we will be tweaking. Interestingly enough, we have
|
|
||||||
placed it in an if block: this is because
|
|
||||||
<code>maybeGetRawHTMLDefinition</code>, as its name suggests, may
|
|
||||||
return a NULL, in which case we should skip doing any
|
|
||||||
initialization. This, in fact, will correspond to when our fully
|
|
||||||
customized object is already in the cache.
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
<h2>Turn off caching</h2>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
To make development easier, we're going to temporarily turn off
|
|
||||||
definition caching:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
|
||||||
$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
|
|
||||||
$config->set('HTML.DefinitionRev', 1);
|
|
||||||
<strong>$config->set('Cache.DefinitionImpl', null); // TODO: remove this later!</strong>
|
|
||||||
$def = $config->getHTMLDefinition(true);</pre>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
A few things should be mentioned about the caching mechanism before
|
|
||||||
we move on. For performance reasons, HTML Purifier caches generated
|
|
||||||
<code>HTMLPurifier_Definition</code> objects in serialized files
|
|
||||||
stored (by default) in <code>library/HTMLPurifier/DefinitionCache/Serializer</code>.
|
|
||||||
A lot of processing is done in order to create these objects, so it
|
|
||||||
makes little sense to repeat the same processing over and over again
|
|
||||||
whenever HTML Purifier is called.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
In order to identify a cache entry, HTML Purifier uses three variables:
|
|
||||||
the library's version number, the value of %HTML.DefinitionRev and
|
|
||||||
a serial of relevant configuration. Whenever any of these changes,
|
|
||||||
a new HTML definition is generated. Notice that there is no way
|
|
||||||
for the definition object to track changes to customizations: here, it
|
|
||||||
is up to you to supply appropriate information to DefinitionID and
|
|
||||||
DefinitionRev.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h2 id="addAttribute">Add an attribute</h2>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
For this example, we're going to implement the <code>target</code> attribute found
|
|
||||||
on <code>a</code> elements. To implement an attribute, we have to
|
|
||||||
ask a few questions:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<ol>
|
|
||||||
<li>What element is it found on?</li>
|
|
||||||
<li>What is its name?</li>
|
|
||||||
<li>Is it required or optional?</li>
|
|
||||||
<li>What are valid values for it?</li>
|
|
||||||
</ol>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
The first three are easy: the element is <code>a</code>, the attribute
|
|
||||||
is <code>target</code>, and it is not a required attribute. (If it
|
|
||||||
was required, we'd need to append an asterisk to the attribute name,
|
|
||||||
you'll see an example of this in the addElement() example).
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
The last question is a little trickier.
|
|
||||||
Lets allow the special values: _blank, _self, _target and _top.
|
|
||||||
The form of this is called an <strong>enumeration</strong>, a list of
|
|
||||||
valid values, although only one can be used at a time. To translate
|
|
||||||
this into code form, we write:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
|
||||||
$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
|
|
||||||
$config->set('HTML.DefinitionRev', 1);
|
|
||||||
$config->set('Cache.DefinitionImpl', null); // remove this later!
|
|
||||||
$def = $config->getHTMLDefinition(true);
|
|
||||||
<strong>$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');</strong></pre>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
The <code>Enum#_blank,_self,_target,_top</code> does all the magic.
|
|
||||||
The string is split into two parts, separated by a hash mark (#):
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<ol>
|
|
||||||
<li>The first part is the name of what we call an <code>AttrDef</code></li>
|
|
||||||
<li>The second part is the parameter of the above-mentioned <code>AttrDef</code></li>
|
|
||||||
</ol>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
If that sounds vague and generic, it's because it is! HTML Purifier defines
|
|
||||||
an assortment of different attribute types one can use, and each of these
|
|
||||||
has their own specialized parameter format. Here are some of the more useful
|
|
||||||
ones:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<table class="table">
|
|
||||||
<thead>
|
|
||||||
<tr>
|
|
||||||
<th>Type</th>
|
|
||||||
<th>Format</th>
|
|
||||||
<th>Description</th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
<tbody>
|
|
||||||
<tr>
|
|
||||||
<th>Enum</th>
|
|
||||||
<td><em>[s:]</em>value1,value2,...</td>
|
|
||||||
<td>
|
|
||||||
Attribute with a number of valid values, one of which may be used. When
|
|
||||||
s: is present, the enumeration is case sensitive.
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<th>Bool</th>
|
|
||||||
<td>attribute_name</td>
|
|
||||||
<td>
|
|
||||||
Boolean attribute, with only one valid value: the name
|
|
||||||
of the attribute.
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<th>CDATA</th>
|
|
||||||
<td></td>
|
|
||||||
<td>
|
|
||||||
Attribute of arbitrary text. Can also be referred to as <strong>Text</strong>
|
|
||||||
(the specification makes a semantic distinction between the two).
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<th>ID</th>
|
|
||||||
<td></td>
|
|
||||||
<td>
|
|
||||||
Attribute that specifies a unique ID
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<th>Pixels</th>
|
|
||||||
<td></td>
|
|
||||||
<td>
|
|
||||||
Attribute that specifies an integer pixel length
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<th>Length</th>
|
|
||||||
<td></td>
|
|
||||||
<td>
|
|
||||||
Attribute that specifies a pixel or percentage length
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<th>NMTOKENS</th>
|
|
||||||
<td></td>
|
|
||||||
<td>
|
|
||||||
Attribute that specifies a number of name tokens, example: the
|
|
||||||
<code>class</code> attribute
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<th>URI</th>
|
|
||||||
<td></td>
|
|
||||||
<td>
|
|
||||||
Attribute that specifies a URI, example: the <code>href</code>
|
|
||||||
attribute
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<th>Number</th>
|
|
||||||
<td></td>
|
|
||||||
<td>
|
|
||||||
Attribute that specifies an positive integer number
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
For a complete list, consult
|
|
||||||
<a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/AttrTypes.php"><code>library/HTMLPurifier/AttrTypes.php</code></a>;
|
|
||||||
more information on attributes that accept parameters can be found on their
|
|
||||||
respective includes in
|
|
||||||
<a href="http://repo.or.cz/w/htmlpurifier.git?a=tree;hb=HEAD;f=library/HTMLPurifier/AttrDef"><code>library/HTMLPurifier/AttrDef</code></a>.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Sometimes, the restrictive list in AttrTypes just doesn't cut it. Don't
|
|
||||||
sweat: you can also use a fully instantiated object as the value. The
|
|
||||||
equivalent, verbose form of the above example is:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
|
||||||
$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
|
|
||||||
$config->set('HTML.DefinitionRev', 1);
|
|
||||||
$config->set('Cache.DefinitionImpl', null); // remove this later!
|
|
||||||
$def = $config->getHTMLDefinition(true);
|
|
||||||
<strong>$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
|
|
||||||
array('_blank','_self','_target','_top')
|
|
||||||
));</strong></pre>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Trust me, you'll learn to love the shorthand.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h2>Add an element</h2>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Adding attributes is really small-fry stuff, though, and it was possible
|
|
||||||
to add them (albeit a bit more wordy) prior to 2.0. The real gem of
|
|
||||||
the Advanced API is adding elements. There are five questions to
|
|
||||||
ask when adding a new element:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<ol>
|
|
||||||
<li>What is the element's name?</li>
|
|
||||||
<li>What content set does this element belong to?</li>
|
|
||||||
<li>What are the allowed children of this element?</li>
|
|
||||||
<li>What attributes does the element allow that are general?</li>
|
|
||||||
<li>What attributes does the element allow that are specific to this element?</li>
|
|
||||||
</ol>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
It's a mouthful, and you'll be slightly lost if your not familiar with
|
|
||||||
the HTML specification, so let's explain them step by step.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h3>Content set</h3>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
The HTML specification defines two major content sets: Inline
|
|
||||||
and Block. Each of these
|
|
||||||
content sets contain a list of elements: Inline contains things like
|
|
||||||
<code>span</code> and <code>b</code> while Block contains things like
|
|
||||||
<code>div</code> and <code>blockquote</code>.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
These content sets amount to a macro mechanism for HTML definition. Most
|
|
||||||
elements in HTML are organized into one of these two sets, and most
|
|
||||||
elements in HTML allow elements from one of these sets. If we had
|
|
||||||
to write each element verbatim into each other element's allowed
|
|
||||||
children, we would have ridiculously large lists; instead we use
|
|
||||||
content sets to compactify the declaration.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Practically speaking, there are several useful values you can use here:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<table class="table">
|
|
||||||
<thead>
|
|
||||||
<tr>
|
|
||||||
<th>Content set</th>
|
|
||||||
<th>Description</th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
<tbody>
|
|
||||||
<tr>
|
|
||||||
<th>Inline</th>
|
|
||||||
<td>Character level elements, text</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<th>Block</th>
|
|
||||||
<td>Block-like elements, like paragraphs and lists</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<th><em>false</em></th>
|
|
||||||
<td>
|
|
||||||
Any element that doesn't fit into the mold, for example <code>li</code>
|
|
||||||
or <code>tr</code>
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
By specifying a valid value here, all other elements that use that
|
|
||||||
content set will also allow your element, without you having to do
|
|
||||||
anything. If you specify <em>false</em>, you'll have to register
|
|
||||||
your element manually.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h3>Allowed children</h3>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Allowed children defines the elements that this element can contain.
|
|
||||||
The allowed values may range from none to a complex regexp depending on
|
|
||||||
your element.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
If you've ever taken a look at the HTML DTD's before, you may have
|
|
||||||
noticed declarations like this:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<pre><!ELEMENT LI - O (%flow;)* -- list item --></pre>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
The <code>(%flow;)*</code> indicates the allowed children of the
|
|
||||||
<code>li</code> tag: <code>li</code> allows any number of flow
|
|
||||||
elements as its children. (The <code>- O</code> allows the closing tag to be
|
|
||||||
omitted, though in XML this is not allowed.) In HTML Purifier,
|
|
||||||
we'd write it like <code>Flow</code> (here's where the content sets
|
|
||||||
we were discussing earlier come into play). There are three shorthand
|
|
||||||
content models you can specify:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<table class="table">
|
|
||||||
<thead>
|
|
||||||
<tr>
|
|
||||||
<th>Content model</th>
|
|
||||||
<th>Description</th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
<tbody>
|
|
||||||
<tr>
|
|
||||||
<th>Empty</th>
|
|
||||||
<td>No children allowed, like <code>br</code> or <code>hr</code></td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<th>Inline</th>
|
|
||||||
<td>Any number of inline elements and text, like <code>span</code></td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<th>Flow</th>
|
|
||||||
<td>Any number of inline elements, block elements and text, like <code>div</code></td>
|
|
||||||
</tr>
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
This covers 90% of all the cases out there, but what about elements that
|
|
||||||
break the mold like <code>ul</code>? This guy requires at least one
|
|
||||||
child, and the only valid children for it are <code>li</code>. The
|
|
||||||
content model is: <code>Required: li</code>. There are two parts: the
|
|
||||||
first type determines what <code>ChildDef</code> will be used to validate
|
|
||||||
content models. The most common values are:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<table class="table">
|
|
||||||
<thead>
|
|
||||||
<tr>
|
|
||||||
<th>Type</th>
|
|
||||||
<th>Description</th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
<tbody>
|
|
||||||
<tr>
|
|
||||||
<th>Required</th>
|
|
||||||
<td>Children must be one or more of the valid elements</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<th>Optional</th>
|
|
||||||
<td>Children can be any number of the valid elements</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<th>Custom</th>
|
|
||||||
<td>Children must follow the DTD-style regex</td>
|
|
||||||
</tr>
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
You can also implement your own <code>ChildDef</code>: this was done
|
|
||||||
for a few special cases in HTML Purifier such as <code>Chameleon</code>
|
|
||||||
(for <code>ins</code> and <code>del</code>), <code>StrictBlockquote</code>
|
|
||||||
and <code>Table</code>.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
The second part specifies either valid elements or a regular expression.
|
|
||||||
Valid elements are separated with horizontal bars (|), i.e.
|
|
||||||
"<code>a | b | c</code>". Use #PCDATA to represent plain text.
|
|
||||||
Regular expressions are based off of DTD's style:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<ul>
|
|
||||||
<li>Parentheses () are used for grouping</li>
|
|
||||||
<li>Commas (,) separate elements that should come one after another</li>
|
|
||||||
<li>Horizontal bars (|) indicate one or the other elements should be used</li>
|
|
||||||
<li>Plus signs (+) are used for a one or more match</li>
|
|
||||||
<li>Asterisks (*) are used for a zero or more match</li>
|
|
||||||
<li>Question marks (?) are used for a zero or one match</li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
For example, "<code>a, b?, (c | d), e+, f*</code>" means "In this order,
|
|
||||||
one <code>a</code> element, at most one <code>b</code> element,
|
|
||||||
one <code>c</code> or <code>d</code> element (but not both), one or more
|
|
||||||
<code>e</code> elements, and any number of <code>f</code> elements."
|
|
||||||
Regex veterans should be able to jump right in, and those not so savvy
|
|
||||||
can always copy-paste W3C's content model definitions into HTML Purifier
|
|
||||||
and hope for the best.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
A word of warning: while the regex format is extremely flexible on
|
|
||||||
the developer's side, it is
|
|
||||||
quite unforgiving on the user's side. If the user input does not <em>exactly</em>
|
|
||||||
match the specification, the entire contents of the element will
|
|
||||||
be nuked. This is why there is are specific content model types like
|
|
||||||
Optional and Required: while they could be implemented as <code>Custom:
|
|
||||||
(valid | elements)*</code>, the custom classes contain special recovery
|
|
||||||
measures that make sure as much of the user's original content gets
|
|
||||||
through. HTML Purifier's core, as a rule, does not use Custom.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
One final note: you can also use Content Sets inside your valid elements
|
|
||||||
lists or regular expressions. In fact, the three shorthand content models
|
|
||||||
mentioned above are just that: abbreviations:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<table class="table">
|
|
||||||
<thead>
|
|
||||||
<tr>
|
|
||||||
<th>Content model</th>
|
|
||||||
<th>Implementation</th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
<tbody>
|
|
||||||
<tr>
|
|
||||||
<th>Inline</th>
|
|
||||||
<td>Optional: Inline | #PCDATA</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<th>Flow</th>
|
|
||||||
<td>Optional: Flow | #PCDATA</td>
|
|
||||||
</tr>
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
When the definition is compiled, Inline will be replaced with a
|
|
||||||
horizontal-bar separated list of inline elements. Also, notice that
|
|
||||||
it does not contain text: you have to specify that yourself.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h3>Common attributes</h3>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Congratulations: you have just gotten over the proverbial hump (Allowed
|
|
||||||
children). Common attributes is much simpler, and boils down to
|
|
||||||
one question: does your element have the <code>id</code>, <code>style</code>,
|
|
||||||
<code>class</code>, <code>title</code> and <code>lang</code> attributes?
|
|
||||||
If so, you'll want to specify the <code>Common</code> attribute collection,
|
|
||||||
which contains these five attributes that are found on almost every
|
|
||||||
HTML element in the specification.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
There are a few more collections, but they're really edge cases:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<table class="table">
|
|
||||||
<thead>
|
|
||||||
<tr>
|
|
||||||
<th>Collection</th>
|
|
||||||
<th>Attributes</th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
<tbody>
|
|
||||||
<tr>
|
|
||||||
<th>I18N</th>
|
|
||||||
<td><code>lang</code>, possibly <code>xml:lang</code></td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<th>Core</th>
|
|
||||||
<td><code>style</code>, <code>class</code>, <code>id</code> and <code>title</code></td>
|
|
||||||
</tr>
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Common is a combination of the above-mentioned collections.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p class="aside">
|
|
||||||
Readers familiar with the modularization may have noticed that the Core
|
|
||||||
attribute collection differs from that specified by the <a
|
|
||||||
href="http://www.w3.org/TR/xhtml-modularization/abstract_modules.html#s_commonatts">abstract
|
|
||||||
modules of the XHTML Modularization 1.1</a>. We believe this section
|
|
||||||
to be in error, as <code>br</code> permits the use of the <code>style</code>
|
|
||||||
attribute even though it uses the <code>Core</code> collection, and
|
|
||||||
the DTD and XML Schemas supplied by W3C support our interpretation.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h3>Attributes</h3>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
If you didn't read the <a href="#addAttribute">earlier section on
|
|
||||||
adding attributes</a>, read it now. The last parameter is simply
|
|
||||||
an array of attribute names to attribute implementations, in the exact
|
|
||||||
same format as <code>addAttribute()</code>.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h3>Putting it all together</h3>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
We're going to implement <code>form</code>. Before we embark, lets
|
|
||||||
grab a reference implementation from over at the
|
|
||||||
<a href="http://www.w3.org/TR/html4/sgml/loosedtd.html">transitional DTD</a>:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<pre><!ELEMENT FORM - - (%flow;)* -(FORM) -- interactive form -->
|
|
||||||
<!ATTLIST FORM
|
|
||||||
%attrs; -- %coreattrs, %i18n, %events --
|
|
||||||
action %URI; #REQUIRED -- server-side form handler --
|
|
||||||
method (GET|POST) GET -- HTTP method used to submit the form--
|
|
||||||
enctype %ContentType; "application/x-www-form-urlencoded"
|
|
||||||
accept %ContentTypes; #IMPLIED -- list of MIME types for file upload --
|
|
||||||
name CDATA #IMPLIED -- name of form for scripting --
|
|
||||||
onsubmit %Script; #IMPLIED -- the form was submitted --
|
|
||||||
onreset %Script; #IMPLIED -- the form was reset --
|
|
||||||
target %FrameTarget; #IMPLIED -- render in this frame --
|
|
||||||
accept-charset %Charsets; #IMPLIED -- list of supported charsets --
|
|
||||||
></pre>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Juicy! With just this, we can answer four of our five questions:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<ol>
|
|
||||||
<li>What is the element's name? <strong>form</strong></li>
|
|
||||||
<li>What content set does this element belong to? <strong>Block</strong>
|
|
||||||
(this needs a little sleuthing, I find the easiest way is to search
|
|
||||||
the DTD for <code>FORM</code> and determine which set it is in.)</li>
|
|
||||||
<li>What are the allowed children of this element? <strong>One
|
|
||||||
or more flow elements, but no nested <code>form</code>s</strong></li>
|
|
||||||
<li>What attributes does the element allow that are general? <strong>Common</strong></li>
|
|
||||||
<li>What attributes does the element allow that are specific to this element? <strong>A whole bunch, see ATTLIST;
|
|
||||||
we're going to do the vital ones: <code>action</code>, <code>method</code> and <code>name</code></strong></li>
|
|
||||||
</ol>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Time for some code:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
|
||||||
$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
|
|
||||||
$config->set('HTML.DefinitionRev', 1);
|
|
||||||
$config->set('Cache.DefinitionImpl', null); // remove this later!
|
|
||||||
$def = $config->getHTMLDefinition(true);
|
|
||||||
$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
|
|
||||||
array('_blank','_self','_target','_top')
|
|
||||||
));
|
|
||||||
<strong>$form = $def->addElement(
|
|
||||||
'form', // name
|
|
||||||
'Block', // content set
|
|
||||||
'Flow', // allowed children
|
|
||||||
'Common', // attribute collection
|
|
||||||
array( // attributes
|
|
||||||
'action*' => 'URI',
|
|
||||||
'method' => 'Enum#get|post',
|
|
||||||
'name' => 'ID'
|
|
||||||
)
|
|
||||||
);
|
|
||||||
$form->excludes = array('form' => true);</strong></pre>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Each of the parameters corresponds to one of the questions we asked.
|
|
||||||
Notice that we added an asterisk to the end of the <code>action</code>
|
|
||||||
attribute to indicate that it is required. If someone specifies a
|
|
||||||
<code>form</code> without that attribute, the tag will be axed.
|
|
||||||
Also, the extra line at the end is a special extra declaration that
|
|
||||||
prevents forms from being nested within each other.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
And that's all there is to it! Implementing the rest of the form
|
|
||||||
module is left as an exercise to the user; to see more examples
|
|
||||||
check the <a href="http://repo.or.cz/w/htmlpurifier.git?a=tree;hb=HEAD;f=library/HTMLPurifier/HTMLModule"><code>library/HTMLPurifier/HTMLModule/</code></a> directory
|
|
||||||
in your local HTML Purifier installation.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h2>And beyond...</h2>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Perceptive users may have realized that, to a certain extent, we
|
|
||||||
have simply re-implemented the facilities of XML Schema or the
|
|
||||||
Document Type Definition. What you are seeing here, however, is
|
|
||||||
not just an XML Schema or Document Type Definition: it is a fully
|
|
||||||
expressive method of specifying the definition of HTML that is
|
|
||||||
a portable superset of the capabilities of the two above-mentioned schema
|
|
||||||
languages. What makes HTMLDefinition so powerful is the fact that
|
|
||||||
if we don't have an implementation for a content model or an attribute
|
|
||||||
definition, you can supply it yourself by writing a PHP class.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
There are many facets of HTMLDefinition beyond the Advanced API I have
|
|
||||||
walked you through today. To find out more about these, you can
|
|
||||||
check out these source files:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<ul>
|
|
||||||
<li><a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/HTMLModule.php"><code>library/HTMLPurifier/HTMLModule.php</code></a></li>
|
|
||||||
<li><a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ElementDef.php"><code>library/HTMLPurifier/ElementDef.php</code></a></li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
<h2 id="optimized">Notes for HTML Purifier 4.2.0 and earlier</h3>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Previously, this tutorial gave some incorrect template code for
|
|
||||||
editing raw definitions, and that template code will now produce the
|
|
||||||
error <q>Due to a documentation error in previous version of HTML
|
|
||||||
Purifier...</q> Here is how to mechanically transform old-style
|
|
||||||
code into new-style code.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
First, identify all code that edits the raw definition object, and
|
|
||||||
put it together. Ensure none of this code must be run on every
|
|
||||||
request; if some sub-part needs to always be run, move it outside
|
|
||||||
this block. Here is an example below, with the raw definition
|
|
||||||
object code bolded.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
|
||||||
$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
|
|
||||||
$config->set('HTML.DefinitionRev', 1);
|
|
||||||
$def = $config->getHTMLDefinition(true);
|
|
||||||
<strong>$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');</strong>
|
|
||||||
$purifier = new HTMLPurifier($config);</pre>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Next, replace the raw definition retrieval with a
|
|
||||||
maybeGetRawHTMLDefinition method call inside an if conditional, and
|
|
||||||
place the editing code inside that if block.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
|
||||||
$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
|
|
||||||
$config->set('HTML.DefinitionRev', 1);
|
|
||||||
<strong>if ($def = $config->maybeGetRawHTMLDefinition()) {
|
|
||||||
$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');
|
|
||||||
}</strong>
|
|
||||||
$purifier = new HTMLPurifier($config);</pre>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
And you're done! Alternatively, if you're OK with not ever caching
|
|
||||||
your code, the following will still work and not emit warnings.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
|
||||||
$def = $config->getHTMLDefinition(true);
|
|
||||||
$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');
|
|
||||||
$purifier = new HTMLPurifier($config);</pre>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
A slightly less efficient version of this was what was going on with
|
|
||||||
old versions of HTML Purifier.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
<em>Technical notes:</em> ajh pointed out on <a
|
|
||||||
href="http://htmlpurifier.org/phorum/read.php?5,5164,5169#msg-5169">in a forum topic</a> that
|
|
||||||
HTML Purifier appeared to be repeatedly writing to the cache even
|
|
||||||
when a cache entry already existed. Investigation lead to the
|
|
||||||
discovery of the following infelicity: caching of customized
|
|
||||||
definitions didn't actually work! The problem was that even though
|
|
||||||
a cache file would be written out at the end of the process, there
|
|
||||||
was no way for HTML Purifier to say, <q>Actually, I've already got a
|
|
||||||
copy of your work, no need to reconfigure your
|
|
||||||
customizations</q>. This required the API to change: placing
|
|
||||||
all of the customizations to the raw definition object in a
|
|
||||||
conditional which could be skipped.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
</body></html>
|
|
||||||
|
|
||||||
<!-- vim: et sw=4 sts=4
|
|
||||||
-->
|
|
@@ -15,7 +15,7 @@
|
|||||||
|
|
||||||
<div id="filing">Filed under End-User</div>
|
<div id="filing">Filed under End-User</div>
|
||||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||||
|
|
||||||
<p>Prior to HTML Purifier 1.2.0, this library blithely accepted user input that
|
<p>Prior to HTML Purifier 1.2.0, this library blithely accepted user input that
|
||||||
looked like this:</p>
|
looked like this:</p>
|
||||||
@@ -31,7 +31,7 @@ by default.</p>
|
|||||||
|
|
||||||
<p>IDs, however, are quite useful functionality to have, so if users start
|
<p>IDs, however, are quite useful functionality to have, so if users start
|
||||||
complaining about broken anchors you'll probably want to turn them back on
|
complaining about broken anchors you'll probably want to turn them back on
|
||||||
with %Attr.EnableID. But before you go mucking around with the config
|
with %HTML.EnableAttrID. But before you go mucking around with the config
|
||||||
object, it's probably worth to take some precautions to keep your page
|
object, it's probably worth to take some precautions to keep your page
|
||||||
validating. Why?</p>
|
validating. Why?</p>
|
||||||
|
|
||||||
@@ -56,9 +56,9 @@ validating. Why?</p>
|
|||||||
deal with the most obvious solution: preventing users from using any IDs that
|
deal with the most obvious solution: preventing users from using any IDs that
|
||||||
appear elsewhere on the document. The method is simple:</p>
|
appear elsewhere on the document. The method is simple:</p>
|
||||||
|
|
||||||
<pre>$config->set('Attr.EnableID', true);
|
<pre>$config->set('HTML', 'EnableAttrID', true);
|
||||||
$config->set('Attr.IDBlacklist' array(
|
$config->set('Attr', 'IDBlacklist' array(
|
||||||
'list', 'of', 'attribute', 'values', 'that', 'are', 'forbidden'
|
'list', 'of', 'attributes', 'that', 'are', 'forbidden'
|
||||||
));</pre>
|
));</pre>
|
||||||
|
|
||||||
<p>That being said, there are some notable drawbacks. First of all, you have to
|
<p>That being said, there are some notable drawbacks. First of all, you have to
|
||||||
@@ -71,9 +71,9 @@ to possible standards-compliance issues.</p>
|
|||||||
<p>Furthermore, this position becomes untenable when a single web page must hold
|
<p>Furthermore, this position becomes untenable when a single web page must hold
|
||||||
multiple portions of user-submitted content. Since there's obviously no way
|
multiple portions of user-submitted content. Since there's obviously no way
|
||||||
to find out before-hand what IDs users will use, the blacklist is helpless.
|
to find out before-hand what IDs users will use, the blacklist is helpless.
|
||||||
And since HTML Purifier validates each segment separately, perhaps doing
|
And even since HTML Purifier validates each segment seperately, perhaps doing
|
||||||
so at different times, it would be extremely difficult to dynamically update
|
so at different times, it would be extremely difficult to dynamically update
|
||||||
the blacklist in between runs.</p>
|
the blacklist inbetween runs.</p>
|
||||||
|
|
||||||
<p>Finally, simply destroying the ID is extremely un-userfriendly behavior: after
|
<p>Finally, simply destroying the ID is extremely un-userfriendly behavior: after
|
||||||
all, they might have simply specified a duplicate ID by accident.</p>
|
all, they might have simply specified a duplicate ID by accident.</p>
|
||||||
@@ -88,8 +88,8 @@ all, they might have simply specified a duplicate ID by accident.</p>
|
|||||||
<p>This method, too, is quite simple: add a prefix to all user IDs. With this
|
<p>This method, too, is quite simple: add a prefix to all user IDs. With this
|
||||||
code:</p>
|
code:</p>
|
||||||
|
|
||||||
<pre>$config->set('Attr.EnableID', true);
|
<pre>$config->set('HTML', 'EnableAttrID', true);
|
||||||
$config->set('Attr.IDPrefix', 'user_');</pre>
|
$config->set('Attr', 'IDPrefix', 'user_');</pre>
|
||||||
|
|
||||||
<p>...this:</p>
|
<p>...this:</p>
|
||||||
|
|
||||||
@@ -109,7 +109,7 @@ user_ to the beginning."</p>
|
|||||||
nothing about multiple HTML Purifier outputs on one page. Thus, we have
|
nothing about multiple HTML Purifier outputs on one page. Thus, we have
|
||||||
a second configuration value to piggy-back off of: %Attr.IDPrefixLocal:</p>
|
a second configuration value to piggy-back off of: %Attr.IDPrefixLocal:</p>
|
||||||
|
|
||||||
<pre>$config->set('Attr.IDPrefixLocal', 'comment' . $id . '_');</pre>
|
<pre>$config->set('Attr', 'IDPrefixLocal', 'comment' . $id . '_');</pre>
|
||||||
|
|
||||||
<p>This new attributes does nothing but append on to regular IDPrefix, but is
|
<p>This new attributes does nothing but append on to regular IDPrefix, but is
|
||||||
special in that it is volatile: it's value is determined at run-time and
|
special in that it is volatile: it's value is determined at run-time and
|
||||||
@@ -137,12 +137,11 @@ anchors is beyond me.</p>
|
|||||||
|
|
||||||
<p>To revert back to pre-1.2.0 behavior, simply:</p>
|
<p>To revert back to pre-1.2.0 behavior, simply:</p>
|
||||||
|
|
||||||
<pre>$config->set('Attr.EnableID', true);</pre>
|
<pre>$config->set('HTML', 'EnableAttrID', true);</pre>
|
||||||
|
|
||||||
<p>Don't come crying to me when your page mysteriously stops validating, though.</p>
|
<p>Don't come crying to me when your page mysteriously stops validating, though.</p>
|
||||||
|
|
||||||
|
<div id="version">$Id$</div>
|
||||||
|
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
||||||
<!-- vim: et sw=4 sts=4
|
|
||||||
-->
|
|
||||||
|
@@ -55,5 +55,3 @@ HTML tags. Things like blog comments are, in all likelihood, most appropriately
|
|||||||
written in an extremely restrictive set of markup that doesn't require
|
written in an extremely restrictive set of markup that doesn't require
|
||||||
all this functionality (or not written in HTML at all), although this may
|
all this functionality (or not written in HTML at all), although this may
|
||||||
be changing in the future with the addition of levels of filtering.
|
be changing in the future with the addition of levels of filtering.
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
|
||||||
|
@@ -8,11 +8,15 @@ to be effective. Things to remember:
|
|||||||
|
|
||||||
1. Character Encoding: see enduser-utf8.html for more info.
|
1. Character Encoding: see enduser-utf8.html for more info.
|
||||||
|
|
||||||
2. IDs: see enduser-id.html for more info
|
2. Doctype: document pending feature completion
|
||||||
|
Not strictly necessary, actually. More in-depth discussion once we figure
|
||||||
|
out how to get strict loose mode working.
|
||||||
|
|
||||||
3. URIs: see enduser-uri-filter.html
|
3. IDs: see enduser-id.html for more info
|
||||||
|
|
||||||
4. CSS: document pending
|
4. Links: document pending feature completion
|
||||||
|
Rudimentary blacklisting, we should also allow only relative URIs. We
|
||||||
|
need a doc to explain the stuff.
|
||||||
|
|
||||||
|
5. CSS: document pending
|
||||||
Explain which CSS styles we blocked and why.
|
Explain which CSS styles we blocked and why.
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
|
||||||
|
@@ -15,7 +15,7 @@
|
|||||||
|
|
||||||
<div id="filing">Filed under End-User</div>
|
<div id="filing">Filed under End-User</div>
|
||||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||||
|
|
||||||
<p>HTML Purifier is a very powerful library. But with power comes great
|
<p>HTML Purifier is a very powerful library. But with power comes great
|
||||||
responsibility, in the form of longer execution times. Remember, this
|
responsibility, in the form of longer execution times. Remember, this
|
||||||
@@ -115,6 +115,3 @@ if you decide to do that! Especially if you port HTML Purifier to C++.
|
|||||||
|
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
||||||
<!-- vim: et sw=4 sts=4
|
|
||||||
-->
|
|
||||||
|
@@ -1,231 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
|
||||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
|
||||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
|
||||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
|
||||||
<meta name="description" content="Tutorial for tweaking HTML Purifier's Tidy-like behavior." />
|
|
||||||
<link rel="stylesheet" type="text/css" href="style.css" />
|
|
||||||
|
|
||||||
<title>Tidy - HTML Purifier</title>
|
|
||||||
|
|
||||||
</head><body>
|
|
||||||
|
|
||||||
<h1>Tidy</h1>
|
|
||||||
|
|
||||||
<div id="filing">Filed under Development</div>
|
|
||||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
|
||||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
|
||||||
|
|
||||||
<p>You've probably heard of HTML Tidy, Dave Raggett's little piece
|
|
||||||
of software that cleans up poorly written HTML. Let me say it straight
|
|
||||||
out:</p>
|
|
||||||
|
|
||||||
<p class="emphasis">This ain't HTML Tidy!</p>
|
|
||||||
|
|
||||||
<p>Rather, Tidy stands for a cool set of Tidy-inspired features in HTML Purifier
|
|
||||||
that allows users to submit deprecated elements and attributes and get
|
|
||||||
valid strict markup back. For example:</p>
|
|
||||||
|
|
||||||
<pre><center>Centered</center></pre>
|
|
||||||
|
|
||||||
<p>...becomes:</p>
|
|
||||||
|
|
||||||
<pre><div style="text-align:center;">Centered</div></pre>
|
|
||||||
|
|
||||||
<p>...when this particular fix is run on the HTML. This tutorial will give
|
|
||||||
you the lowdown of what exactly HTML Purifier will do when Tidy
|
|
||||||
is on, and how to fine-tune this behavior. Once again, <strong>you do
|
|
||||||
not need Tidy installed on your PHP to use these features!</strong></p>
|
|
||||||
|
|
||||||
<h2>What does it do?</h2>
|
|
||||||
|
|
||||||
<p>Tidy will do several things to your HTML:</p>
|
|
||||||
|
|
||||||
<ul>
|
|
||||||
<li>Convert deprecated elements and attributes to standards-compliant
|
|
||||||
alternatives</li>
|
|
||||||
<li>Enforce XHTML compatibility guidelines and other best practices</li>
|
|
||||||
<li>Preserve data that would normally be removed as per W3C</li>
|
|
||||||
</ul>
|
|
||||||
|
|
||||||
<h2>What are levels?</h2>
|
|
||||||
|
|
||||||
<p>Levels describe how aggressive the Tidy module should be when
|
|
||||||
cleaning up HTML. There are four levels to pick: none, light, medium
|
|
||||||
and heavy. Each of these levels has a well-defined set of behavior
|
|
||||||
associated with it, although it may change depending on your doctype.</p>
|
|
||||||
|
|
||||||
<dl>
|
|
||||||
<dt>light</dt>
|
|
||||||
<dd>This is the <strong>lenient</strong> level. If a tag or attribute
|
|
||||||
is about to be removed because it isn't supported by the
|
|
||||||
doctype, Tidy will step in and change into an alternative that
|
|
||||||
is supported.</dd>
|
|
||||||
<dt>medium</dt>
|
|
||||||
<dd>This is the <strong>correctional</strong> level. At this level,
|
|
||||||
all the functions of light are performed, as well as some extra,
|
|
||||||
non-essential best practices enforcement. Changes made on this
|
|
||||||
level are very benign and are unlikely to cause problems.</dd>
|
|
||||||
<dt>heavy</dt>
|
|
||||||
<dd>This is the <strong>aggressive</strong> level. If a tag or
|
|
||||||
attribute is deprecated, it will be converted into a non-deprecated
|
|
||||||
version, no ifs ands or buts.</dd>
|
|
||||||
</dl>
|
|
||||||
|
|
||||||
<p>By default, Tidy operates on the <strong>medium</strong> level. You can
|
|
||||||
change the level of cleaning by setting the %HTML.TidyLevel configuration
|
|
||||||
directive:</p>
|
|
||||||
|
|
||||||
<pre>$config->set('HTML.TidyLevel', 'heavy'); // burn baby burn!</pre>
|
|
||||||
|
|
||||||
<h2>Is the light level really light?</h2>
|
|
||||||
|
|
||||||
<p>It depends on what doctype you're using. If your documents are HTML
|
|
||||||
4.01 <em>Transitional</em>, HTML Purifier will be lazy
|
|
||||||
and won't clean up your <code>center</code>
|
|
||||||
or <code>font</code> tags. But if you're using HTML 4.01 <em>Strict</em>,
|
|
||||||
HTML Purifier has no choice: it has to convert them, or they will
|
|
||||||
be nuked out of existence. So while light on Transitional will result
|
|
||||||
in little to no changes, light on Strict will still result in quite
|
|
||||||
a lot of fixes.</p>
|
|
||||||
|
|
||||||
<p>This is different behavior from 1.6 or before, where deprecated
|
|
||||||
tags in transitional documents would
|
|
||||||
always be cleaned up regardless. This is also better behavior.</p>
|
|
||||||
|
|
||||||
<h2>My pages look different!</h2>
|
|
||||||
|
|
||||||
<p>HTML Purifier is tasked with converting deprecated tags and
|
|
||||||
attributes to standards-compliant alternatives, which usually
|
|
||||||
need copious amounts of CSS. It's also not foolproof: sometimes
|
|
||||||
things do get lost in the translation. This is why when HTML Purifier
|
|
||||||
can get away with not doing cleaning, it won't; this is why
|
|
||||||
the default value is <strong>medium</strong> and not heavy.</p>
|
|
||||||
|
|
||||||
<p>Fortunately, only a few attributes have problems with the switch
|
|
||||||
over. They are described below:</p>
|
|
||||||
|
|
||||||
<table class="table">
|
|
||||||
<thead><tr>
|
|
||||||
<th>Element@Attr</th>
|
|
||||||
<th>Changes</th>
|
|
||||||
</tr></thead>
|
|
||||||
<tbody>
|
|
||||||
<tr>
|
|
||||||
<td>caption@align</td>
|
|
||||||
<td>Firefox supports stuffing the caption on the
|
|
||||||
left and right side of the table, a feature that
|
|
||||||
Internet Explorer, understandably, does not have.
|
|
||||||
When align equals right or left, the text will simply
|
|
||||||
be aligned on the left or right side.</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>img@align</td>
|
|
||||||
<td>The implementation for align bottom is good, but not
|
|
||||||
perfect. There are a few pixel differences.</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>br@clear</td>
|
|
||||||
<td>Clear both gets a little wonky in Internet Explorer. Haven't
|
|
||||||
really been able to figure out why.</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>hr@noshade</td>
|
|
||||||
<td>All browsers implement this slightly differently: we've
|
|
||||||
chosen to make noshade horizontal rules gray.</td>
|
|
||||||
</tr>
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
<p>There are a few more minor, although irritating, bugs.
|
|
||||||
Some older browsers support deprecated attributes,
|
|
||||||
but not CSS. Transformed elements and attributes will look unstyled
|
|
||||||
to said browsers. Also, CSS precedence is slightly different for
|
|
||||||
inline styles versus presentational markup. In increasing precedence:</p>
|
|
||||||
|
|
||||||
<ol>
|
|
||||||
<li>Presentational attributes</li>
|
|
||||||
<li>External style sheets</li>
|
|
||||||
<li>Inline styling</li>
|
|
||||||
</ol>
|
|
||||||
|
|
||||||
<p>This means that styling that may have been masked by external CSS
|
|
||||||
declarations will start showing up (a good thing, perhaps). Finally,
|
|
||||||
if you've turned off the style attribute, almost all of
|
|
||||||
these transformations will not work. Sorry mates.</p>
|
|
||||||
|
|
||||||
<p>You can review the rendering before and after of these transformations
|
|
||||||
by consulting the <a
|
|
||||||
href="http://htmlpurifier.org/live/smoketests/attrTransform.php">attrTransform.php
|
|
||||||
smoketest</a>.</p>
|
|
||||||
|
|
||||||
<h2>I like the general idea, but the specifics bug me!</h2>
|
|
||||||
|
|
||||||
<p>So you want HTML Purifier to clean up your HTML, but you're not
|
|
||||||
so happy about the br@clear implementation. That's perfectly fine!
|
|
||||||
HTML Purifier will make accomodations:</p>
|
|
||||||
|
|
||||||
<pre>$config->set('HTML.Doctype', 'XHTML 1.0 Transitional');
|
|
||||||
$config->set('HTML.TidyLevel', 'heavy'); // all changes, minus...
|
|
||||||
<strong>$config->set('HTML.TidyRemove', 'br@clear');</strong></pre>
|
|
||||||
|
|
||||||
<p>That third line does the magic, removing the br@clear fix
|
|
||||||
from the module, ensuring that <code><br clear="both" /></code>
|
|
||||||
will pass through unharmed. The reverse is possible too:</p>
|
|
||||||
|
|
||||||
<pre>$config->set('HTML.Doctype', 'XHTML 1.0 Transitional');
|
|
||||||
$config->set('HTML.TidyLevel', 'none'); // no changes, plus...
|
|
||||||
<strong>$config->set('HTML.TidyAdd', 'p@align');</strong></pre>
|
|
||||||
|
|
||||||
<p>In this case, all transformations are shut off, except for the p@align
|
|
||||||
one, which you found handy.</p>
|
|
||||||
|
|
||||||
<p>To find out what the names of fixes you want to turn on or off are,
|
|
||||||
you'll have to consult the source code, specifically the files in
|
|
||||||
<code>HTMLPurifier/HTMLModule/Tidy/</code>. There is, however, a
|
|
||||||
general syntax:</p>
|
|
||||||
|
|
||||||
<table class="table">
|
|
||||||
<thead>
|
|
||||||
<tr>
|
|
||||||
<th>Name</th>
|
|
||||||
<th>Example</th>
|
|
||||||
<th>Interpretation</th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
|
||||||
<tbody>
|
|
||||||
<tr>
|
|
||||||
<td>element</td>
|
|
||||||
<td>font</td>
|
|
||||||
<td>Tag transform for <em>element</em></td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>element@attr</td>
|
|
||||||
<td>br@clear</td>
|
|
||||||
<td>Attribute transform for <em>attr</em> on <em>element</em></td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>@attr</td>
|
|
||||||
<td>@lang</td>
|
|
||||||
<td>Global attribute transform for <em>attr</em></td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>e#content_model_type</td>
|
|
||||||
<td>blockquote#content_model_type</td>
|
|
||||||
<td>Change of child processing implementation for <em>e</em></td>
|
|
||||||
</tr>
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
<h2>So... what's the lowdown?</h2>
|
|
||||||
|
|
||||||
<p>The lowdown is, quite frankly, HTML Purifier's default settings are
|
|
||||||
probably good enough. The next step is to bump the level up to heavy,
|
|
||||||
and if that still doesn't satisfy your appetite, do some fine-tuning.
|
|
||||||
Other than that, don't worry about it: this all works silently and
|
|
||||||
effectively in the background.</p>
|
|
||||||
|
|
||||||
</body></html>
|
|
||||||
|
|
||||||
<!-- vim: et sw=4 sts=4
|
|
||||||
-->
|
|
@@ -1,204 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
|
||||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
|
||||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
|
||||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
|
||||||
<meta name="description" content="Tutorial for creating custom URI filters." />
|
|
||||||
<link rel="stylesheet" type="text/css" href="style.css" />
|
|
||||||
|
|
||||||
<title>URI Filters - HTML Purifier</title>
|
|
||||||
|
|
||||||
</head><body>
|
|
||||||
|
|
||||||
<h1>URI Filters</h1>
|
|
||||||
|
|
||||||
<div id="filing">Filed under End-User</div>
|
|
||||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
|
||||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
This is a quick and dirty document to get you on your way to writing
|
|
||||||
custom URI filters for your own URL filtering needs. Why would you
|
|
||||||
want to write a URI filter? If you need URIs your users put into
|
|
||||||
HTML to magically change into a different URI, this is
|
|
||||||
exactly what you need!
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h2>Creating the class</h2>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Any URI filter you make will be a subclass of <code>HTMLPurifier_URIFilter</code>.
|
|
||||||
The scaffolding is thus:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<pre>class HTMLPurifier_URIFilter_<strong>NameOfFilter</strong> extends HTMLPurifier_URIFilter
|
|
||||||
{
|
|
||||||
public $name = '<strong>NameOfFilter</strong>';
|
|
||||||
public function prepare($config) {}
|
|
||||||
public function filter(&$uri, $config, $context) {}
|
|
||||||
}</pre>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Fill in the variable <code>$name</code> with the name of your filter, and
|
|
||||||
take a look at the two methods. <code>prepare()</code> is an initialization
|
|
||||||
method that is called only once, before any filtering has been done of the
|
|
||||||
HTML. Use it to perform any costly setup work that only needs to be done
|
|
||||||
once. <code>filter()</code> is the guts and innards of our filter:
|
|
||||||
it takes the URI and does whatever needs to be done to it.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
If you've worked with HTML Purifier, you'll recognize the <code>$config</code>
|
|
||||||
and <code>$context</code> parameters. On the other hand, <code>$uri</code>
|
|
||||||
is something unique to this section of the application: it's a
|
|
||||||
<code>HTMLPurifier_URI</code> object. The interface is thus:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<pre>class HTMLPurifier_URI
|
|
||||||
{
|
|
||||||
public $scheme, $userinfo, $host, $port, $path, $query, $fragment;
|
|
||||||
public function HTMLPurifier_URI($scheme, $userinfo, $host, $port, $path, $query, $fragment);
|
|
||||||
public function toString();
|
|
||||||
public function copy();
|
|
||||||
public function getSchemeObj($config, $context);
|
|
||||||
public function validate($config, $context);
|
|
||||||
}</pre>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
The first three methods are fairly self-explanatory: you have a constructor,
|
|
||||||
a serializer, and a cloner. Generally, you won't be using them when
|
|
||||||
you are manipulating the URI objects themselves.
|
|
||||||
<code>getSchemeObj()</code> is a special purpose method that returns
|
|
||||||
a <code>HTMLPurifier_URIScheme</code> object corresponding to the specific
|
|
||||||
URI at hand. <code>validate()</code> performs general-purpose validation
|
|
||||||
on the internal components of a URI. Once again, you don't need to
|
|
||||||
worry about these: they've already been handled for you.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h2>URI format</h2>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
As a URIFilter, we're interested in the member variables of the URI object.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<table class="quick"><tbody>
|
|
||||||
<tr><th>Scheme</th> <td>The protocol for identifying (and possibly locating) a resource (http, ftp, https)</td></tr>
|
|
||||||
<tr><th>Userinfo</th> <td>User information such as a username (bob)</td></tr>
|
|
||||||
<tr><th>Host</th> <td>Domain name or IP address of the server (example.com, 127.0.0.1)</td></tr>
|
|
||||||
<tr><th>Port</th> <td>Network port number for the server (80, 12345)</td></tr>
|
|
||||||
<tr><th>Path</th> <td>Data that identifies the resource, possibly hierarchical (/path/to, ed@example.com)</td></tr>
|
|
||||||
<tr><th>Query</th> <td>String of information to be interpreted by the resource (?q=search-term)</td></tr>
|
|
||||||
<tr><th>Fragment</th> <td>Additional information for the resource after retrieval (#bookmark)</td></tr>
|
|
||||||
</tbody></table>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Because the URI is presented to us in this form, and not
|
|
||||||
<code>http://bob@example.com:8080/foo.php?q=string#hash</code>, it saves us
|
|
||||||
a lot of trouble in having to parse the URI every time we want to filter
|
|
||||||
it. For the record, the above URI has the following components:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<table class="quick"><tbody>
|
|
||||||
<tr><th>Scheme</th> <td>http</td></tr>
|
|
||||||
<tr><th>Userinfo</th> <td>bob</td></tr>
|
|
||||||
<tr><th>Host</th> <td>example.com</td></tr>
|
|
||||||
<tr><th>Port</th> <td>8080</td></tr>
|
|
||||||
<tr><th>Path</th> <td>/foo.php</td></tr>
|
|
||||||
<tr><th>Query</th> <td>q=string</td></tr>
|
|
||||||
<tr><th>Fragment</th> <td>hash</td></tr>
|
|
||||||
</tbody></table>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Note that there is no question mark or octothorpe in the query or
|
|
||||||
fragment: these get removed during parsing.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
With this information, you can get straight to implementing your
|
|
||||||
<code>filter()</code> method. But one more thing...
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h2>Return value: Boolean, not URI</h2>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
You may have noticed that the URI is being passed in by reference.
|
|
||||||
This means that whatever changes you make to it, those changes will
|
|
||||||
be reflected in the URI object the callee had. <strong>Do not
|
|
||||||
return the URI object: it is unnecessary and will cause bugs.</strong>
|
|
||||||
Instead, return a boolean value, true if the filtering was successful,
|
|
||||||
or false if the URI is beyond repair and needs to be axed.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Let's suppose I wanted to write a filter that converted links with a
|
|
||||||
custom <code>image</code> scheme to its corresponding real path on
|
|
||||||
our website:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<pre>class HTMLPurifier_URIFilter_TransformImageScheme extends HTMLPurifier_URIFilter
|
|
||||||
{
|
|
||||||
public $name = 'TransformImageScheme';
|
|
||||||
public function filter(&$uri, $config, $context) {
|
|
||||||
if ($uri->scheme !== 'image') return true;
|
|
||||||
$img_name = $uri->path;
|
|
||||||
// Overwrite the previous URI object
|
|
||||||
$uri = new HTMLPurifier_URI('http', null, null, null, '/img/' . $img_name . '.png', null, null);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}</pre>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Notice I did not <code>return $uri;</code>. This filter would turn
|
|
||||||
<code>image:Foo</code> into <code>/img/Foo.png</code>.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<h2>Activating your filter</h2>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Having a filter is all well and good, but you need to tell HTML Purifier
|
|
||||||
to use it. Fortunately, this part's simple:
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<pre>$uri = $config->getDefinition('URI');
|
|
||||||
$uri->addFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>(), $config);</pre>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
After adding a filter, you won't be able to set configuration directives.
|
|
||||||
Structure your code accordingly.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<!-- XXX: link to new documentation system -->
|
|
||||||
|
|
||||||
<h2>Post-filter</h2>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Remember our TransformImageScheme filter? That filter acted before we had
|
|
||||||
performed scheme validation; otherwise, the URI would have been filtered
|
|
||||||
out when it was discovered that there was no image scheme. Well, a post-filter
|
|
||||||
is run after scheme specific validation, so it's ideal for bulk
|
|
||||||
post-processing of URIs, including munging. To specify a URI as a post-filter,
|
|
||||||
set the <code>$post</code> member variable to TRUE.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<pre>class HTMLPurifier_URIFilter_MyPostFilter extends HTMLPurifier_URIFilter
|
|
||||||
{
|
|
||||||
public $name = 'MyPostFilter';
|
|
||||||
public $post = true;
|
|
||||||
// ... extra code here
|
|
||||||
}
|
|
||||||
</pre>
|
|
||||||
|
|
||||||
<h2>Examples</h2>
|
|
||||||
|
|
||||||
<p>
|
|
||||||
Check the
|
|
||||||
<a href="http://repo.or.cz/w/htmlpurifier.git?a=tree;hb=HEAD;f=library/HTMLPurifier/URIFilter">URIFilter</a>
|
|
||||||
directory for more implementation examples, and see <a href="proposal-new-directives.txt">the
|
|
||||||
new directives proposal document</a> for ideas on what could be implemented
|
|
||||||
as a filter.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
</body></html>
|
|
||||||
|
|
||||||
<!-- vim: et sw=4 sts=4
|
|
||||||
-->
|
|
@@ -5,6 +5,7 @@
|
|||||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||||
<meta name="description" content="Describes the rationale for using UTF-8, the ramifications otherwise, and how to make the switch." />
|
<meta name="description" content="Describes the rationale for using UTF-8, the ramifications otherwise, and how to make the switch." />
|
||||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||||
|
<script defer="defer" type="text/javascript" src="./toc-gen.js"></script>
|
||||||
<style type="text/css">
|
<style type="text/css">
|
||||||
.minor td {font-style:italic;}
|
.minor td {font-style:italic;}
|
||||||
</style>
|
</style>
|
||||||
@@ -22,7 +23,7 @@ own advice for sake of portability. -->
|
|||||||
|
|
||||||
<div id="filing">Filed under End-User</div>
|
<div id="filing">Filed under End-User</div>
|
||||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||||
|
|
||||||
<p>Character encoding and character sets are not that
|
<p>Character encoding and character sets are not that
|
||||||
difficult to understand, but so many people blithely stumble
|
difficult to understand, but so many people blithely stumble
|
||||||
@@ -95,7 +96,7 @@ which can be a rewarding (but difficult) task.</p>
|
|||||||
<h2 id="findcharset">Finding the real encoding</h2>
|
<h2 id="findcharset">Finding the real encoding</h2>
|
||||||
|
|
||||||
<p>In the beginning, there was ASCII, and things were simple. But they
|
<p>In the beginning, there was ASCII, and things were simple. But they
|
||||||
weren't good, for no one could write in Cyrillic or Thai. So there
|
weren't good, for no one could write in Cryllic or Thai. So there
|
||||||
exploded a proliferation of character encodings to remedy the problem
|
exploded a proliferation of character encodings to remedy the problem
|
||||||
by extending the characters ASCII could express. This ridiculously
|
by extending the characters ASCII could express. This ridiculously
|
||||||
simplified version of the history of character encodings shows us that
|
simplified version of the history of character encodings shows us that
|
||||||
@@ -118,8 +119,9 @@ there are now many character encodings floating around.</p>
|
|||||||
see a page on the web, chances are it's encoded in one
|
see a page on the web, chances are it's encoded in one
|
||||||
of these encodings.</li>
|
of these encodings.</li>
|
||||||
<li><strong>Unicode-based encodings</strong> implement the
|
<li><strong>Unicode-based encodings</strong> implement the
|
||||||
Unicode standard and include UTF-8, UTF-16 and UTF-32/UCS-4.
|
Unicode standard and include UTF-8, UCS-2 and UTF-16.
|
||||||
They go beyond 8-bits and support almost
|
They go beyond 8-bits (the first two are variable length,
|
||||||
|
while the second one uses 16-bits), and support almost
|
||||||
every language in the world. UTF-8 is gaining traction
|
every language in the world. UTF-8 is gaining traction
|
||||||
as the dominant international encoding of the web.</li>
|
as the dominant international encoding of the web.</li>
|
||||||
</ul>
|
</ul>
|
||||||
@@ -136,7 +138,7 @@ browser:</p>
|
|||||||
<dd>View > Encoding: bulleted item is unofficial name</dd>
|
<dd>View > Encoding: bulleted item is unofficial name</dd>
|
||||||
</dl>
|
</dl>
|
||||||
|
|
||||||
<p>Internet Explorer won't give you the MIME (i.e. useful/real) name of the
|
<p>Internet Explorer won't give you the mime (i.e. useful/real) name of the
|
||||||
character encoding, so you'll have to look it up using their description.
|
character encoding, so you'll have to look it up using their description.
|
||||||
Some common ones:</p>
|
Some common ones:</p>
|
||||||
|
|
||||||
@@ -214,12 +216,6 @@ if your <code>META</code> tag claims that either:</p>
|
|||||||
|
|
||||||
<h2 id="fixcharset">Fixing the encoding</h2>
|
<h2 id="fixcharset">Fixing the encoding</h2>
|
||||||
|
|
||||||
<p class="aside">The advice given here is for pages being served as
|
|
||||||
vanilla <code>text/html</code>. Different practices must be used
|
|
||||||
for <code>application/xml</code> or <code>application/xml+xhtml</code>, see
|
|
||||||
<a href="http://www.w3.org/TR/2002/NOTE-xhtml-media-types-20020430/">W3C's
|
|
||||||
document on XHTML media types</a> for more information.</p>
|
|
||||||
|
|
||||||
<p>If your <code>META</code> encoding and your real encoding match,
|
<p>If your <code>META</code> encoding and your real encoding match,
|
||||||
savvy! You can skip this section. If they don't...</p>
|
savvy! You can skip this section. If they don't...</p>
|
||||||
|
|
||||||
@@ -235,7 +231,7 @@ of your real encoding.</p>
|
|||||||
why the character encoding should be explicitly stated. When the
|
why the character encoding should be explicitly stated. When the
|
||||||
browser isn't told what the character encoding of a text is, it
|
browser isn't told what the character encoding of a text is, it
|
||||||
has to guess: and sometimes the guess is wrong. Hackers can manipulate
|
has to guess: and sometimes the guess is wrong. Hackers can manipulate
|
||||||
this guess in order to slip XSS past filters and then fool the
|
this guess in order to slip XSS pass filters and then fool the
|
||||||
browser into executing it as active code. A great example of this
|
browser into executing it as active code. A great example of this
|
||||||
is the <a href="http://shiflett.org/archive/177">Google UTF-7
|
is the <a href="http://shiflett.org/archive/177">Google UTF-7
|
||||||
exploit</a>.</p>
|
exploit</a>.</p>
|
||||||
@@ -306,8 +302,7 @@ languages</a>. The appropriate code is:</p>
|
|||||||
|
|
||||||
<p>...replacing UTF-8 with whatever your embedded encoding is.
|
<p>...replacing UTF-8 with whatever your embedded encoding is.
|
||||||
This code must come before any output, so be careful about
|
This code must come before any output, so be careful about
|
||||||
stray whitespace in your application (i.e., any whitespace before
|
stray whitespace in your application.</p>
|
||||||
output excluding whitespace within <?php ?> tags).</p>
|
|
||||||
|
|
||||||
<h4 id="fixcharset-server-phpini">PHP ini directive</h4>
|
<h4 id="fixcharset-server-phpini">PHP ini directive</h4>
|
||||||
|
|
||||||
@@ -318,8 +313,8 @@ header call: <code><a href="http://php.net/ini.core#ini.default-charset">default
|
|||||||
|
|
||||||
<p>...will also do the trick. If PHP is running as an Apache module (and
|
<p>...will also do the trick. If PHP is running as an Apache module (and
|
||||||
not as FastCGI, consult
|
not as FastCGI, consult
|
||||||
<a href="http://php.net/phpinfo">phpinfo</a>() for details), you can even use htaccess to apply this property
|
<a href="http://php.net/phpinfo">phpinfo</a>() for details), you can even use htaccess do apply this property
|
||||||
across many PHP files:</p>
|
globally:</p>
|
||||||
|
|
||||||
<pre><a href="http://php.net/configuration.changes#configuration.changes.apache">php_value</a> default_charset "UTF-8"</pre>
|
<pre><a href="http://php.net/configuration.changes#configuration.changes.apache">php_value</a> default_charset "UTF-8"</pre>
|
||||||
|
|
||||||
@@ -365,11 +360,10 @@ to send anything at all:</p>
|
|||||||
|
|
||||||
<pre><a href="http://httpd.apache.org/docs/1.3/mod/core.html#adddefaultcharset">AddDefaultCharset</a> Off</pre>
|
<pre><a href="http://httpd.apache.org/docs/1.3/mod/core.html#adddefaultcharset">AddDefaultCharset</a> Off</pre>
|
||||||
|
|
||||||
<p>...making your internal charset declaration (usually the <code>META</code> tags)
|
<p>...making your <code>META</code> tags the sole source of
|
||||||
the sole source of character encoding
|
character encoding information. In these cases, it is
|
||||||
information. In these cases, it is <em>especially</em> important to make
|
<em>especially</em> important to make sure you have valid <code>META</code>
|
||||||
sure you have valid <code>META</code> tags on your pages and all the
|
tags on your pages and all the text before them is ASCII.</p>
|
||||||
text before them is ASCII.</p>
|
|
||||||
|
|
||||||
<blockquote class="aside"><p>These directives can also be
|
<blockquote class="aside"><p>These directives can also be
|
||||||
placed in httpd.conf file for Apache, but
|
placed in httpd.conf file for Apache, but
|
||||||
@@ -434,30 +428,28 @@ IIS to change character encodings, I'd be grateful.</p>
|
|||||||
|
|
||||||
<p><code>META</code> tags are the most common source of embedded
|
<p><code>META</code> tags are the most common source of embedded
|
||||||
encodings, but they can also come from somewhere else: XML
|
encodings, but they can also come from somewhere else: XML
|
||||||
Declarations. They look like:</p>
|
processing instructions. They look like:</p>
|
||||||
|
|
||||||
<pre><?xml version="1.0" encoding="UTF-8"?></pre>
|
<pre><?xml version="1.0" encoding="UTF-8"?></pre>
|
||||||
|
|
||||||
<p>...and are most often found in XML documents (including XHTML).</p>
|
<p>...and are most often found in XML documents (including XHTML).</p>
|
||||||
|
|
||||||
<p>For XHTML, this XML Declaration theoretically
|
<p>For XHTML, this processing instruction theoretically
|
||||||
overrides the <code>META</code> tag. In reality, this happens only when the
|
overrides the <code>META</code> tag. In reality, this happens only when the
|
||||||
XHTML is actually served as legit XML and not HTML, which is almost always
|
XHTML is actually served as legit XML and not HTML, which is almost always
|
||||||
never due to Internet Explorer's lack of support for
|
never due to Internet Explorer's lack of support for
|
||||||
<code>application/xhtml+xml</code> (even though doing so is often
|
<code>application/xhtml+xml</code> (even though doing so is often
|
||||||
argued to be <a href="http://www.hixie.ch/advocacy/xhtml">good
|
argued to be <a href="http://www.hixie.ch/advocacy/xhtml">good practice</a>).</p>
|
||||||
practice</a> and is required by the XHTML 1.1 specification).</p>
|
|
||||||
|
|
||||||
<p>For XML, however, this XML Declaration is extremely important.
|
<p>For XML, however, this processing instruction is extremely important.
|
||||||
Since most webservers are not configured to send charsets for .xml files,
|
Since most webservers are not configured to send charsets for .xml files,
|
||||||
this is the only thing a parser has to go on. Furthermore, the default
|
this is the only thing a parser has to go on. Furthermore, the default
|
||||||
for XML files is UTF-8, which often butts heads with more common
|
for XML files is UTF-8, which often butts heads with more common
|
||||||
ISO-8859-1 encoding (you see this in garbled RSS feeds).</p>
|
ISO-8859-1 encoding (you see this in garbled RSS feeds).</p>
|
||||||
|
|
||||||
<p>In short, if you use XHTML and have gone through the
|
<p>In short, if you use XHTML and have gone through the
|
||||||
trouble of adding the XML Declaration, make sure it jives
|
trouble of adding the XML header, make sure it jives
|
||||||
with your <code>META</code> tags (which should only be present
|
with your <code>META</code> tags and HTTP headers.</p>
|
||||||
if served in text/html) and HTTP headers.</p>
|
|
||||||
|
|
||||||
<h3 id="fixcharset-internals">Inside the process</h3>
|
<h3 id="fixcharset-internals">Inside the process</h3>
|
||||||
|
|
||||||
@@ -481,7 +473,7 @@ if we don't know it's character encoding? And how do we figure out
|
|||||||
the character encoding, if we don't know the contents of the
|
the character encoding, if we don't know the contents of the
|
||||||
<code>META</code> tag?</p>
|
<code>META</code> tag?</p>
|
||||||
|
|
||||||
<p>Fortunately for us, the characters we need to write the
|
<p>Fortunantely for us, the characters we need to write the
|
||||||
<code>META</code> are in ASCII, which is pretty much universal
|
<code>META</code> are in ASCII, which is pretty much universal
|
||||||
over every character encoding that is in common use today. So,
|
over every character encoding that is in common use today. So,
|
||||||
all the web-browser has to do is parse all the way down until
|
all the web-browser has to do is parse all the way down until
|
||||||
@@ -514,7 +506,7 @@ usage in one language sometimes requires the occasional special character
|
|||||||
that, without surprise, is not available in your character set. Sometimes
|
that, without surprise, is not available in your character set. Sometimes
|
||||||
developers get around this by adding support for multiple encodings: when
|
developers get around this by adding support for multiple encodings: when
|
||||||
using Chinese, use Big5, when using Japanese, use Shift-JIS, when
|
using Chinese, use Big5, when using Japanese, use Shift-JIS, when
|
||||||
using Greek, etc. Other times, they use character references with great
|
using Greek, etc. Other times, they use character entities with great
|
||||||
zeal.</p>
|
zeal.</p>
|
||||||
|
|
||||||
<p>UTF-8, however, obviates the need for any of these complicated
|
<p>UTF-8, however, obviates the need for any of these complicated
|
||||||
@@ -526,16 +518,16 @@ you don't have to use those user-unfriendly entities.</p>
|
|||||||
|
|
||||||
<h3 id="whyutf8-user">User-friendly</h3>
|
<h3 id="whyutf8-user">User-friendly</h3>
|
||||||
|
|
||||||
<p>Websites encoded in Latin-1 (ISO-8859-1) which occasionally need
|
<p>Websites encoded in Latin-1 (ISO-8859-1) which ocassionally need
|
||||||
a special character outside of their scope often will use a character
|
a special character outside of their scope often will use a character
|
||||||
entity reference to achieve the desired effect. For instance, θ can be
|
entity to achieve the desired effect. For instance, θ can be
|
||||||
written <code>&theta;</code>, regardless of the character encoding's
|
written <code>&theta;</code>, regardless of the character encoding's
|
||||||
support of Greek letters.</p>
|
support of Greek letters.</p>
|
||||||
|
|
||||||
<p>This works nicely for limited use of special characters, but
|
<p>This works nicely for limited use of special characters, but
|
||||||
say you wanted this sentence of Chinese text: 激光,
|
say you wanted this sentence of Chinese text: 激光,
|
||||||
這兩個字是甚麼意思.
|
這兩個字是甚麼意思.
|
||||||
The ampersand encoded version would look like this:</p>
|
The entity-ized version would look like this:</p>
|
||||||
|
|
||||||
<pre>&#28608;&#20809;, &#36889;&#20841;&#20491;&#23383;&#26159;&#29978;&#40636;&#24847;&#24605;</pre>
|
<pre>&#28608;&#20809;, &#36889;&#20841;&#20491;&#23383;&#26159;&#29978;&#40636;&#24847;&#24605;</pre>
|
||||||
|
|
||||||
@@ -553,7 +545,7 @@ an application that originally used ISO-8859-1 but switched to UTF-8
|
|||||||
when it became far to cumbersome to support foreign languages. Bots
|
when it became far to cumbersome to support foreign languages. Bots
|
||||||
will now actually go through articles and convert character entities
|
will now actually go through articles and convert character entities
|
||||||
to their corresponding real characters for the sake of user-friendliness
|
to their corresponding real characters for the sake of user-friendliness
|
||||||
and searchability. See
|
and searcheability. See
|
||||||
<a href="http://meta.wikimedia.org/wiki/Help:Special_characters">Meta's
|
<a href="http://meta.wikimedia.org/wiki/Help:Special_characters">Meta's
|
||||||
page on special characters</a> for more details.
|
page on special characters</a> for more details.
|
||||||
</p></blockquote>
|
</p></blockquote>
|
||||||
@@ -561,7 +553,7 @@ page on special characters</a> for more details.
|
|||||||
<h3 id="whyutf8-forms">Forms</h3>
|
<h3 id="whyutf8-forms">Forms</h3>
|
||||||
|
|
||||||
<p>While we're on the tack of users, how do non-UTF-8 web forms deal
|
<p>While we're on the tack of users, how do non-UTF-8 web forms deal
|
||||||
with characters that are outside of their character set? Rather than
|
with characters that our outside of their character set? Rather than
|
||||||
discuss what UTF-8 does right, we're going to show what could go wrong
|
discuss what UTF-8 does right, we're going to show what could go wrong
|
||||||
if you didn't use UTF-8 and people tried to use characters outside
|
if you didn't use UTF-8 and people tried to use characters outside
|
||||||
of your character encoding.</p>
|
of your character encoding.</p>
|
||||||
@@ -575,24 +567,21 @@ which may be used by POST, and is required when you want to upload
|
|||||||
files.</p>
|
files.</p>
|
||||||
|
|
||||||
<p>The following is a summarization of notes from
|
<p>The following is a summarization of notes from
|
||||||
<a href="http://web.archive.org/web/20060427015200/ppewww.ph.gla.ac.uk/~flavell/charset/form-i18n.html">
|
<a href="http://ppewww.physics.gla.ac.uk/~flavell/charset/form-i18n.html">
|
||||||
<code>FORM</code> submission and i18n</a>. That document contains lots
|
<code>FORM</code> submission and i18n</a>. That document contains lots
|
||||||
of useful information, but is written in a rambly manner, so
|
of useful information, but is written in a rambly manner, so
|
||||||
here I try to get right to the point. (Note: the original has
|
here I try to get right to the point.</p>
|
||||||
disappeared off the web, so I am linking to the Web Archive copy.)</p>
|
|
||||||
|
|
||||||
<h4 id="whyutf8-forms-urlencoded"><code>application/x-www-form-urlencoded</code></h4>
|
<h4 id="whyutf8-forms-urlencoded"><code>application/x-www-form-urlencoded</code></h4>
|
||||||
|
|
||||||
<p>This is the Content-Type that GET requests must use, and POST requests
|
<p>This is the Content-Type that GET requests must use, and POST requests
|
||||||
use by default. It involves the ubiquitous percent encoding format that
|
use by default. It involves the ubiquituous percent encoding format that
|
||||||
looks something like: <code>%C3%86</code>. There is no official way of
|
looks something like: <code>%C3%86</code>. There is no official way of
|
||||||
determining the character encoding of such a request, since the percent
|
determining the character encoding of such a request, since the percent
|
||||||
encoding operates on a byte level, so it is usually assumed that it
|
encoding operates on a byte level, so it is usually assumed that it
|
||||||
is the same as the encoding the page containing the form was submitted
|
is the same as the encoding the page containing the form was submitted
|
||||||
in. (<a href="http://tools.ietf.org/html/rfc3986#section-2.5">RFC 3986</a>
|
in. You'll run into very few problems if you only use characters in
|
||||||
recommends that textual identifiers be translated to UTF-8; however, browser
|
the character encoding you chose.</p>
|
||||||
compliance is spotty.) You'll run into very few problems
|
|
||||||
if you only use characters in the character encoding you chose.</p>
|
|
||||||
|
|
||||||
<p>However, once you start adding characters outside of your encoding
|
<p>However, once you start adding characters outside of your encoding
|
||||||
(and this is a lot more common than you may think: take curly
|
(and this is a lot more common than you may think: take curly
|
||||||
@@ -603,7 +592,7 @@ browser you're using, they might:</p>
|
|||||||
<ul>
|
<ul>
|
||||||
<li>Replace the unsupported characters with useless question marks,</li>
|
<li>Replace the unsupported characters with useless question marks,</li>
|
||||||
<li>Attempt to fix the characters (example: smart quotes to regular quotes),</li>
|
<li>Attempt to fix the characters (example: smart quotes to regular quotes),</li>
|
||||||
<li>Replace the character with a character entity reference, or</li>
|
<li>Replace the character with a character entity, or</li>
|
||||||
<li>Send it anyway as a different character encoding mixed in
|
<li>Send it anyway as a different character encoding mixed in
|
||||||
with the original encoding (usually Windows-1252 rather than
|
with the original encoding (usually Windows-1252 rather than
|
||||||
iso-8859-1 or UTF-8 interspersed in 8-bit)</li>
|
iso-8859-1 or UTF-8 interspersed in 8-bit)</li>
|
||||||
@@ -619,7 +608,7 @@ since UTF-8 supports every character.</p>
|
|||||||
|
|
||||||
<h4 id="whyutf8-forms-multipart"><code>multipart/form-data</code></h4>
|
<h4 id="whyutf8-forms-multipart"><code>multipart/form-data</code></h4>
|
||||||
|
|
||||||
<p>Multipart form submission takes away a lot of the ambiguity
|
<p>Multipart form submission takes a way a lot of the ambiguity
|
||||||
that percent-encoding had: the server now can explicitly ask for
|
that percent-encoding had: the server now can explicitly ask for
|
||||||
certain encodings, and the client can explicitly tell the server
|
certain encodings, and the client can explicitly tell the server
|
||||||
during the form submission what encoding the fields are in.</p>
|
during the form submission what encoding the fields are in.</p>
|
||||||
@@ -632,9 +621,9 @@ Each method has deficiencies, especially the former.</p>
|
|||||||
<p>If you tell the browser to send the form in the same encoding as
|
<p>If you tell the browser to send the form in the same encoding as
|
||||||
the page, you still have the trouble of what to do with characters
|
the page, you still have the trouble of what to do with characters
|
||||||
that are outside of the character encoding's range. The behavior, once
|
that are outside of the character encoding's range. The behavior, once
|
||||||
again, varies: Firefox 2.0 converts them to character entity references
|
again, varies: Firefox 2.0 entity-izes them while Internet Explorer
|
||||||
while Internet Explorer 7.0 mangles them beyond intelligibility. For
|
7.0 mangles them beyond intelligibility. For serious internationalization purposes,
|
||||||
serious internationalization purposes, this is not an option.</p>
|
this is not an option.</p>
|
||||||
|
|
||||||
<p>The other possibility is to set Accept-Encoding to UTF-8, which
|
<p>The other possibility is to set Accept-Encoding to UTF-8, which
|
||||||
begs the question: Why aren't you using UTF-8 for everything then?
|
begs the question: Why aren't you using UTF-8 for everything then?
|
||||||
@@ -674,12 +663,12 @@ it up to the module iconv to do the dirty work.</p>
|
|||||||
<p>This approach, however, is not perfect. iconv is blithely unaware
|
<p>This approach, however, is not perfect. iconv is blithely unaware
|
||||||
of HTML character entities. HTML Purifier, in order to
|
of HTML character entities. HTML Purifier, in order to
|
||||||
protect against sophisticated escaping schemes, normalizes all character
|
protect against sophisticated escaping schemes, normalizes all character
|
||||||
and numeric entity references before processing the text. This leads to
|
and numeric entities before processing the text. This leads to
|
||||||
one important ramification:</p>
|
one important ramification:</p>
|
||||||
|
|
||||||
<p><strong>Any character that is not supported by the target character
|
<p><strong>Any character that is not supported by the target character
|
||||||
set, regardless of whether or not it is in the form of a character
|
set, regardless of whether or not it is in the form of a character
|
||||||
entity reference or a raw character, will be silently ignored.</strong></p>
|
entity or a raw character, will be silently ignored.</strong></p>
|
||||||
|
|
||||||
<p>Example of this principle at work: say you have <code>&theta;</code>
|
<p>Example of this principle at work: say you have <code>&theta;</code>
|
||||||
in your HTML, but the output is in Latin-1 (which, understandably,
|
in your HTML, but the output is in Latin-1 (which, understandably,
|
||||||
@@ -688,7 +677,7 @@ set the encoding correctly using %Core.Encoding):</p>
|
|||||||
|
|
||||||
<ul>
|
<ul>
|
||||||
<li>The <code>Encoder</code> will transform the text from ISO 8859-1 to UTF-8
|
<li>The <code>Encoder</code> will transform the text from ISO 8859-1 to UTF-8
|
||||||
(note that theta is preserved here since it doesn't actually use
|
(note that theta is preserved since it doesn't actually use
|
||||||
any non-ASCII characters): <code>&theta;</code></li>
|
any non-ASCII characters): <code>&theta;</code></li>
|
||||||
<li>The <code>EntityParser</code> will transform all named and numeric
|
<li>The <code>EntityParser</code> will transform all named and numeric
|
||||||
character entities to their corresponding raw UTF-8 equivalents:
|
character entities to their corresponding raw UTF-8 equivalents:
|
||||||
@@ -711,7 +700,7 @@ Purifier has provided a slightly more palatable workaround using
|
|||||||
<li>The <code>EntityParser</code> transforms entities: <code>θ</code></li>
|
<li>The <code>EntityParser</code> transforms entities: <code>θ</code></li>
|
||||||
<li>HTML Purifier processes the code: <code>θ</code></li>
|
<li>HTML Purifier processes the code: <code>θ</code></li>
|
||||||
<li>The <code>Encoder</code> replaces all non-ASCII characters
|
<li>The <code>Encoder</code> replaces all non-ASCII characters
|
||||||
with numeric entity reference: <code>&#952;</code></li>
|
with numeric entities: <code>&#952;</code></li>
|
||||||
<li>For good measure, <code>Encoder</code> transforms encoding back to
|
<li>For good measure, <code>Encoder</code> transforms encoding back to
|
||||||
original (which is strictly unnecessary for 99% of encodings
|
original (which is strictly unnecessary for 99% of encodings
|
||||||
out there): <code>&#952;</code> (remember, it's all ASCII!)</li>
|
out there): <code>&#952;</code> (remember, it's all ASCII!)</li>
|
||||||
@@ -721,19 +710,19 @@ Purifier has provided a slightly more palatable workaround using
|
|||||||
the land of Unicode characters, and is totally unacceptable for Chinese
|
the land of Unicode characters, and is totally unacceptable for Chinese
|
||||||
or Japanese texts. The even bigger kicker is that, supposing the
|
or Japanese texts. The even bigger kicker is that, supposing the
|
||||||
input encoding was actually ISO-8859-7, which <em>does</em> support
|
input encoding was actually ISO-8859-7, which <em>does</em> support
|
||||||
theta, the character would get converted into a character entity reference
|
theta, the character would get entity-ized anyway! (The Encoder does
|
||||||
anyway! (The Encoder does not discriminate).</p>
|
not discriminate).</p>
|
||||||
|
|
||||||
<p>The current functionality is about where HTML Purifier will be for
|
<p>The current functionality is about where HTML Purifier will be for
|
||||||
the rest of eternity. HTML Purifier could attempt to preserve the original
|
the rest of eternity. HTML Purifier could attempt to preserve the original
|
||||||
form of the character references so that they could be substituted back in, only the
|
form of the entities so that they could be substituted back in, only the
|
||||||
DOM extension kills them off irreversibly. HTML Purifier could also attempt
|
DOM extension kills them off irreversibly. HTML Purifier could also attempt
|
||||||
to be smart and only convert non-ASCII characters that weren't supported
|
to be smart and only convert non-ASCII characters that weren't supported
|
||||||
by the target encoding, but that would require reimplementing iconv
|
by the target encoding, but that would require reimplementing iconv
|
||||||
with HTML awareness, something I will not do.</p>
|
with HTML awareness, something I will not do.</p>
|
||||||
|
|
||||||
<p>So there: either it's UTF-8 or crippled international support. Your pick! (and I'm
|
<p>So there: either it's UTF-8 or crippled international support. Your pick! (and I'm
|
||||||
not being sarcastic here: some people could care less about other languages).</p>
|
not being sarcastic here: some people could care less about other languages)</p>
|
||||||
|
|
||||||
<h2 id="migrate">Migrate to UTF-8</h2>
|
<h2 id="migrate">Migrate to UTF-8</h2>
|
||||||
|
|
||||||
@@ -770,7 +759,7 @@ the text when you try to convert it to UTF-8. You'll have to convert
|
|||||||
it to a binary field, convert it to a Shift-JIS field (the real encoding),
|
it to a binary field, convert it to a Shift-JIS field (the real encoding),
|
||||||
and then finally to UTF-8. Many a website had pages irreversibly mangled
|
and then finally to UTF-8. Many a website had pages irreversibly mangled
|
||||||
because they didn't realize that they'd been deluding themselves about
|
because they didn't realize that they'd been deluding themselves about
|
||||||
the character encoding all along; don't become the next victim.</p>
|
the character encoding all along, don't become the next victim.</p>
|
||||||
|
|
||||||
<p>For <a href="http://www.postgresql.org/docs/8.2/static/multibyte.html">PostgreSQL</a>, there appears to be no direct way to change the
|
<p>For <a href="http://www.postgresql.org/docs/8.2/static/multibyte.html">PostgreSQL</a>, there appears to be no direct way to change the
|
||||||
encoding of a database (as of 8.2). You will have to dump the data, and then reimport
|
encoding of a database (as of 8.2). You will have to dump the data, and then reimport
|
||||||
@@ -790,7 +779,7 @@ usually supported).</p>
|
|||||||
|
|
||||||
<h4 id="migrate-db-binary">Binary</h4>
|
<h4 id="migrate-db-binary">Binary</h4>
|
||||||
|
|
||||||
<p>Due to the aforementioned compatibility issues, a more interoperable
|
<p>Due to the abovementioned compatibility issues, a more interoperable
|
||||||
way of storing UTF-8 text is to stuff it in a binary datatype.
|
way of storing UTF-8 text is to stuff it in a binary datatype.
|
||||||
<code>CHAR</code> becomes <code>BINARY</code>, <code>VARCHAR</code> becomes
|
<code>CHAR</code> becomes <code>BINARY</code>, <code>VARCHAR</code> becomes
|
||||||
<code>VARBINARY</code> and <code>TEXT</code> becomes <code>BLOB</code>.
|
<code>VARBINARY</code> and <code>TEXT</code> becomes <code>BLOB</code>.
|
||||||
@@ -917,8 +906,8 @@ anyway. So we'll deal with the other two edge cases.</p>
|
|||||||
would like to read your website but get heaps of question marks or
|
would like to read your website but get heaps of question marks or
|
||||||
other meaningless characters. Fixing this problem requires the
|
other meaningless characters. Fixing this problem requires the
|
||||||
installation of a font or language pack which is often highly
|
installation of a font or language pack which is often highly
|
||||||
dependent on what the language is. <a href="http://bn.wikipedia.org/wiki/%E0%A6%89%E0%A6%87%E0%A6%95%E0%A6%BF%E0%A6%AA%E0%A7%87%E0%A6%A1%E0%A6%BF%E0%A6%AF%E0%A6%BC%E0%A6%BE:Bangla_script_display_and_input_help">Here is an example</a>
|
dependent on what the language is. <a href="http://bn.wikipedia.org/wiki/%E0%A6%89%E0%A6%87%E0%A6%95%E0%A6%BF%E0%A6%AA%E0%A7%87%E0%A6%A1%E0%A6%BF%E0%A6%AF%E0%A6%BC%E0%A6%BE:Bangla_script_display_help">Here is an example</a>
|
||||||
of such a help file for the Bengali language; I am sure there are
|
of such a help file for the Bengali language, I am sure there are
|
||||||
others out there too. You just have to point users to the appropriate
|
others out there too. You just have to point users to the appropriate
|
||||||
help file.</p>
|
help file.</p>
|
||||||
|
|
||||||
@@ -928,7 +917,7 @@ help file.</p>
|
|||||||
characters embedded in what otherwise would be very bland ASCII are
|
characters embedded in what otherwise would be very bland ASCII are
|
||||||
letters of the
|
letters of the
|
||||||
<a href="http://en.wikipedia.org/wiki/International_Phonetic_Alphabet">International
|
<a href="http://en.wikipedia.org/wiki/International_Phonetic_Alphabet">International
|
||||||
Phonetic Alphabet (IPA)</a>, use to designate pronunciations in a very standard
|
Phonetic Alphabet (IPA)</a>, use to designate pronounciations in a very standard
|
||||||
manner (you probably see them all the time in your dictionary). Your
|
manner (you probably see them all the time in your dictionary). Your
|
||||||
average font probably won't have support for all of the IPA characters
|
average font probably won't have support for all of the IPA characters
|
||||||
like ʘ (bilabial click) or ʒ (voiced postalveolar fricative).
|
like ʘ (bilabial click) or ʒ (voiced postalveolar fricative).
|
||||||
@@ -941,11 +930,11 @@ most widely used browser in the entire world? Microsoft IE 6
|
|||||||
is not smart enough to borrow from other fonts when a character isn't
|
is not smart enough to borrow from other fonts when a character isn't
|
||||||
present, so more often than not you'll be slapped with a nice big �.
|
present, so more often than not you'll be slapped with a nice big �.
|
||||||
To get things to work, MSIE 6 needs a little nudge. You could configure it
|
To get things to work, MSIE 6 needs a little nudge. You could configure it
|
||||||
to use a different font to render the text, but you can achieve the same
|
to use a different font to render the text, but you can acheive the same
|
||||||
effect by selectively changing the font for blocks of special characters
|
effect by selectively changing the font for blocks of special characters
|
||||||
to known good Unicode fonts.</p>
|
to known good Unicode fonts.</p>
|
||||||
|
|
||||||
<p>Fortunately, the folks over at Wikipedia have already done all the
|
<p>Fortunantely, the folks over at Wikipedia have already done all the
|
||||||
heavy lifting for you. Get the CSS from the horses mouth here:
|
heavy lifting for you. Get the CSS from the horses mouth here:
|
||||||
<a href="http://en.wikipedia.org/wiki/MediaWiki:Common.css">Common.css</a>,
|
<a href="http://en.wikipedia.org/wiki/MediaWiki:Common.css">Common.css</a>,
|
||||||
and search for ".IPA" There are also a smattering of
|
and search for ".IPA" There are also a smattering of
|
||||||
@@ -972,7 +961,7 @@ users.</p>
|
|||||||
<h3 id="migrate-variablewidth">Dealing with variable width in functions</h3>
|
<h3 id="migrate-variablewidth">Dealing with variable width in functions</h3>
|
||||||
|
|
||||||
<p>When people claim that PHP6 will solve all our Unicode problems, they're
|
<p>When people claim that PHP6 will solve all our Unicode problems, they're
|
||||||
misinformed. It will not fix any of the aforementioned troubles. It will,
|
misinformed. It will not fix any of the abovementioned troubles. It will,
|
||||||
however, fix the problem we are about to discuss: processing UTF-8 text
|
however, fix the problem we are about to discuss: processing UTF-8 text
|
||||||
in PHP.</p>
|
in PHP.</p>
|
||||||
|
|
||||||
@@ -995,7 +984,7 @@ and yes, it is variable width. Other traits:</p>
|
|||||||
in different ways. It is beyond the scope of this document to explain
|
in different ways. It is beyond the scope of this document to explain
|
||||||
what precisely these implications are. PHPWact provides
|
what precisely these implications are. PHPWact provides
|
||||||
a very good <a href="http://www.phpwact.org/php/i18n/utf-8">reference document</a>
|
a very good <a href="http://www.phpwact.org/php/i18n/utf-8">reference document</a>
|
||||||
on what to expect from each function, although coverage is spotty in
|
on what to expect from each functions, although coverage is spotty in
|
||||||
some areas. Their more general notes on
|
some areas. Their more general notes on
|
||||||
<a href="http://www.phpwact.org/php/i18n/charsets">character sets</a>
|
<a href="http://www.phpwact.org/php/i18n/charsets">character sets</a>
|
||||||
are also worth looking at for information on UTF-8. Some rules of thumb
|
are also worth looking at for information on UTF-8. Some rules of thumb
|
||||||
@@ -1009,16 +998,12 @@ when dealing with Unicode text:</p>
|
|||||||
<li>Think twice before using functions that:<ul>
|
<li>Think twice before using functions that:<ul>
|
||||||
<li>...count characters (strlen will return bytes, not characters;
|
<li>...count characters (strlen will return bytes, not characters;
|
||||||
str_split and word_wrap may corrupt)</li>
|
str_split and word_wrap may corrupt)</li>
|
||||||
<li>...convert characters to entity references (UTF-8 doesn't need entities)</li>
|
<li>...entity-ize things (UTF-8 doesn't need entities)</li>
|
||||||
<li>...do very complex string processing (*printf)</li>
|
<li>...do very complex string processing (*printf)</li>
|
||||||
</ul></li>
|
</ul></li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
<p>Note: this list applies to UTF-8 encoded text only: if you have
|
<p>...and always think in bytes, not characters. If you use strpos()
|
||||||
a string that you are 100% sure is ASCII, be my guest and use
|
|
||||||
<code>strtolower</code> (HTML Purifier uses this function.)</p>
|
|
||||||
|
|
||||||
<p>Regardless, always think in bytes, not characters. If you use strpos()
|
|
||||||
to find the position of a character, it will be in bytes, but this
|
to find the position of a character, it will be in bytes, but this
|
||||||
usually won't matter since substr() also operates with byte indices!</p>
|
usually won't matter since substr() also operates with byte indices!</p>
|
||||||
|
|
||||||
@@ -1035,7 +1020,7 @@ directory.</p>
|
|||||||
<p>Well, that's it. Hopefully this document has served as a very
|
<p>Well, that's it. Hopefully this document has served as a very
|
||||||
practical springboard into knowledge of how UTF-8 works. You may have
|
practical springboard into knowledge of how UTF-8 works. You may have
|
||||||
decided that you don't want to migrate yet: that's fine, just know
|
decided that you don't want to migrate yet: that's fine, just know
|
||||||
what will happen to your output and what bug reports you may receive.</p>
|
what will happen to your output and what bug reports you may recieve.</p>
|
||||||
|
|
||||||
<p>Many other developers have already discussed the subject of Unicode,
|
<p>Many other developers have already discussed the subject of Unicode,
|
||||||
UTF-8 and internationalization, and I would like to defer to them for
|
UTF-8 and internationalization, and I would like to defer to them for
|
||||||
@@ -1055,6 +1040,3 @@ a more in-depth look into character sets and encodings.</p>
|
|||||||
|
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
||||||
<!-- vim: et sw=4 sts=4
|
|
||||||
-->
|
|
||||||
|
@@ -15,7 +15,7 @@
|
|||||||
|
|
||||||
<div id="filing">Filed under End-User</div>
|
<div id="filing">Filed under End-User</div>
|
||||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||||
|
|
||||||
<p>Clients like their YouTube videos. It gives them a warm fuzzy feeling when
|
<p>Clients like their YouTube videos. It gives them a warm fuzzy feeling when
|
||||||
they see a neat little embedded video player on their websites that can play
|
they see a neat little embedded video player on their websites that can play
|
||||||
@@ -67,15 +67,17 @@ into your documents. YouTube's code goes like this:</p>
|
|||||||
</ol>
|
</ol>
|
||||||
|
|
||||||
<p>What point 2 means is that if we have code like <code><span
|
<p>What point 2 means is that if we have code like <code><span
|
||||||
class="youtube-embed">AyPzM5WK8ys</span></code> your
|
class="embed-youtube">AyPzM5WK8ys</span></code> your
|
||||||
application can reconstruct the full object from this small snippet that
|
application can reconstruct the full object from this small snippet that
|
||||||
passes through HTML Purifier <em>unharmed</em>.
|
passes through HTML Purifier <em>unharmed</em>.
|
||||||
<a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/Filter/YouTube.php">Show me the code!</a></p>
|
<a href="http://hp.jpsband.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/Filter/YouTube.php">Show me the code!</a></p>
|
||||||
|
|
||||||
<p>And the corresponding usage:</p>
|
<p>And the corresponding usage:</p>
|
||||||
|
|
||||||
<pre><?php
|
<pre><?php
|
||||||
$config->set('Filter.YouTube', true);
|
// assuming $purifier is an instance of HTMLPurifier
|
||||||
|
require_once 'HTMLPurifier/Filter/YouTube.php';
|
||||||
|
$purifier->addFilter(new HTMLPurifier_Filter_YouTube());
|
||||||
?></pre>
|
?></pre>
|
||||||
|
|
||||||
<p>There is a bit going in the two code snippets, so let's explain.</p>
|
<p>There is a bit going in the two code snippets, so let's explain.</p>
|
||||||
@@ -148,6 +150,3 @@ with the core!</p>
|
|||||||
|
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
||||||
<!-- vim: et sw=4 sts=4
|
|
||||||
-->
|
|
||||||
|
@@ -1,23 +1,14 @@
|
|||||||
<?php
|
<?php exit;
|
||||||
|
|
||||||
// This file demonstrates basic usage of HTMLPurifier.
|
// This file demonstrates basic usage of HTMLPurifier.
|
||||||
|
|
||||||
// replace this with the path to the HTML Purifier library
|
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||||
require_once '../../library/HTMLPurifier.auto.php';
|
|
||||||
|
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
$purifier = new HTMLPurifier();
|
||||||
|
|
||||||
// configuration goes here:
|
|
||||||
$config->set('Core.Encoding', 'UTF-8'); // replace with your encoding
|
|
||||||
$config->set('HTML.Doctype', 'XHTML 1.0 Transitional'); // replace with your doctype
|
|
||||||
|
|
||||||
$purifier = new HTMLPurifier($config);
|
|
||||||
|
|
||||||
// untrusted input HTML
|
|
||||||
$html = '<b>Simple and short';
|
$html = '<b>Simple and short';
|
||||||
|
|
||||||
$pure_html = $purifier->purify($html);
|
$pure_html = $purifier->purify($html);
|
||||||
|
|
||||||
echo '<pre>' . htmlspecialchars($pure_html) . '</pre>';
|
echo $pure_html;
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
?>
|
136
docs/examples/demo.php
Normal file
136
docs/examples/demo.php
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
// using _REQUEST because we accept GET and POST requests
|
||||||
|
|
||||||
|
$content = empty($_REQUEST['xml']) ? 'text/html' : 'application/xhtml+xml';
|
||||||
|
header("Content-type:$content;charset=UTF-8");
|
||||||
|
|
||||||
|
// prevent PHP versions with shorttags from barfing
|
||||||
|
echo '<?xml version="1.0" encoding="UTF-8" ?>
|
||||||
|
';
|
||||||
|
|
||||||
|
function getFormMethod() {
|
||||||
|
return (isset($_REQUEST['post'])) ? 'post' : 'get';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (empty($_REQUEST['strict'])) {
|
||||||
|
?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||||
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||||
|
<?php
|
||||||
|
} else {
|
||||||
|
?>
|
||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||||
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||||
|
<?php
|
||||||
|
}
|
||||||
|
?>
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
|
||||||
|
<head>
|
||||||
|
<title>HTML Purifier Live Demo</title>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>HTML Purifier Live Demo</h1>
|
||||||
|
<?php
|
||||||
|
|
||||||
|
require_once '../../library/HTMLPurifier.auto.php';
|
||||||
|
|
||||||
|
if (!empty($_REQUEST['html'])) { // start result
|
||||||
|
|
||||||
|
if (strlen($_REQUEST['html']) > 50000) {
|
||||||
|
?>
|
||||||
|
<p>Request exceeds maximum allowed text size of 50kb.</p>
|
||||||
|
<?php
|
||||||
|
} else { // start main processing
|
||||||
|
|
||||||
|
$html = get_magic_quotes_gpc() ? stripslashes($_REQUEST['html']) : $_REQUEST['html'];
|
||||||
|
|
||||||
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
|
$config->set('Core', 'TidyFormat', !empty($_REQUEST['tidy']));
|
||||||
|
$config->set('HTML', 'Strict', !empty($_REQUEST['strict']));
|
||||||
|
$purifier = new HTMLPurifier($config);
|
||||||
|
$pure_html = $purifier->purify($html);
|
||||||
|
|
||||||
|
?>
|
||||||
|
<p>Here is your purified HTML:</p>
|
||||||
|
<div style="border:5px solid #CCC;margin:0 10%;padding:1em;">
|
||||||
|
<?php if(getFormMethod() == 'get') { ?>
|
||||||
|
<div style="float:right;">
|
||||||
|
<a href="http://validator.w3.org/check?uri=referer"><img
|
||||||
|
src="http://www.w3.org/Icons/valid-xhtml10"
|
||||||
|
alt="Valid XHTML 1.0 Transitional" height="31" width="88" style="border:0;" /></a>
|
||||||
|
</div>
|
||||||
|
<?php } ?>
|
||||||
|
<?php
|
||||||
|
|
||||||
|
echo $pure_html;
|
||||||
|
|
||||||
|
?>
|
||||||
|
<div style="clear:both;"></div>
|
||||||
|
</div>
|
||||||
|
<p>Here is the source code of the purified HTML:</p>
|
||||||
|
<pre><?php
|
||||||
|
|
||||||
|
echo htmlspecialchars($pure_html, ENT_COMPAT, 'UTF-8');
|
||||||
|
|
||||||
|
?></pre>
|
||||||
|
<?php
|
||||||
|
if (getFormMethod() == 'post') { // start POST validation notice
|
||||||
|
?>
|
||||||
|
<p>If you would like to validate the code with
|
||||||
|
<a href="http://validator.w3.org/#validate-by-input">W3C's
|
||||||
|
validator</a>, copy and paste the <em>entire</em> demo page's source.</p>
|
||||||
|
<?php
|
||||||
|
} // end POST validation notice
|
||||||
|
|
||||||
|
} // end main processing
|
||||||
|
|
||||||
|
// end result
|
||||||
|
} else {
|
||||||
|
|
||||||
|
?>
|
||||||
|
<p>Welcome to the live demo. Enter some HTML and see how HTML Purifier
|
||||||
|
will filter it.</p>
|
||||||
|
<?php
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
?>
|
||||||
|
<form id="filter" action="demo.php<?php
|
||||||
|
echo '?' . getFormMethod();
|
||||||
|
if (isset($_REQUEST['profile']) || isset($_REQUEST['XDEBUG_PROFILE'])) {
|
||||||
|
echo '&XDEBUG_PROFILE=1';
|
||||||
|
} ?>" method="<?php echo getFormMethod(); ?>">
|
||||||
|
<fieldset>
|
||||||
|
<legend>HTML Purifier Input (<?php echo getFormMethod(); ?>)</legend>
|
||||||
|
<textarea name="html" cols="60" rows="15"><?php
|
||||||
|
|
||||||
|
if (isset($html)) {
|
||||||
|
echo htmlspecialchars(
|
||||||
|
HTMLPurifier_Encoder::cleanUTF8($html), ENT_COMPAT, 'UTF-8');
|
||||||
|
}
|
||||||
|
?></textarea>
|
||||||
|
<?php if (getFormMethod() == 'get') { ?>
|
||||||
|
<p><strong>Warning:</strong> GET request method can only hold
|
||||||
|
8129 characters (probably less depending on your browser).
|
||||||
|
If you need to test anything
|
||||||
|
larger than that, try the <a href="demo.php?post">POST form</a>.</p>
|
||||||
|
<?php } ?>
|
||||||
|
<?php if (extension_loaded('tidy')) { ?>
|
||||||
|
<div>Nicely format output with Tidy? <input type="checkbox" value="1"
|
||||||
|
name="tidy"<?php if (!empty($_REQUEST['tidy'])) echo ' checked="checked"'; ?> /></div>
|
||||||
|
<?php } ?>
|
||||||
|
<div>XHTML 1.0 Strict output? <input type="checkbox" value="1"
|
||||||
|
name="strict"<?php if (!empty($_REQUEST['strict'])) echo ' checked="checked"'; ?> /></div>
|
||||||
|
<div>Serve as application/xhtml+xml? (not for IE) <input type="checkbox" value="1"
|
||||||
|
name="xml"<?php if (!empty($_REQUEST['xml'])) echo ' checked="checked"'; ?> /></div>
|
||||||
|
<div>
|
||||||
|
<input type="submit" value="Submit" name="submit" class="button" />
|
||||||
|
</div>
|
||||||
|
</fieldset>
|
||||||
|
</form>
|
||||||
|
<p>Return to <a href="http://hp.jpsband.org/">HTML Purifier's home page</a>.
|
||||||
|
Try the form in <a href="demo.php?get">GET</a> and <a href="demo.php?post">POST</a> request
|
||||||
|
flavors (GET is easy to validate with W3C, but POST allows larger inputs).</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
@@ -4,6 +4,3 @@ function init() {
|
|||||||
element.innerHTML = '“'+element.innerHTML+'”';
|
element.innerHTML = '“'+element.innerHTML+'”';
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<!-- vim: et sw=4 sts=4
|
|
||||||
-->
|
|
||||||
|
@@ -13,7 +13,7 @@
|
|||||||
|
|
||||||
<h1>Documentation</h1>
|
<h1>Documentation</h1>
|
||||||
|
|
||||||
<p><strong><a href="http://htmlpurifier.org/">HTML Purifier</a></strong> has documentation for all types of people.
|
<p><strong><a href="http://hp.jpsband.org/">HTML Purifier</a></strong> has documentation for all types of people.
|
||||||
Here is an index of all of them.</p>
|
Here is an index of all of them.</p>
|
||||||
|
|
||||||
<h2>End-user</h2>
|
<h2>End-user</h2>
|
||||||
@@ -34,15 +34,6 @@ information for casual developers using HTML Purifier.</p>
|
|||||||
<dt><a href="enduser-utf8.html">UTF-8: The Secret of Character Encoding</a></dt>
|
<dt><a href="enduser-utf8.html">UTF-8: The Secret of Character Encoding</a></dt>
|
||||||
<dd>Describes the rationale for using UTF-8, the ramifications otherwise, and how to make the switch.</dd>
|
<dd>Describes the rationale for using UTF-8, the ramifications otherwise, and how to make the switch.</dd>
|
||||||
|
|
||||||
<dt><a href="enduser-tidy.html">Tidy</a></dt>
|
|
||||||
<dd>Tutorial for tweaking HTML Purifier's Tidy-like behavior.</dd>
|
|
||||||
|
|
||||||
<dt><a href="enduser-customize.html">Customize</a></dt>
|
|
||||||
<dd>Tutorial for customizing HTML Purifier's tag and attribute sets.</dd>
|
|
||||||
|
|
||||||
<dt><a href="enduser-uri-filter.html">URI Filters</a></dt>
|
|
||||||
<dd>Tutorial for creating custom URI filters.</dd>
|
|
||||||
|
|
||||||
</dl>
|
</dl>
|
||||||
|
|
||||||
<h2>Development</h2>
|
<h2>Development</h2>
|
||||||
@@ -51,6 +42,9 @@ conventions.</p>
|
|||||||
|
|
||||||
<dl>
|
<dl>
|
||||||
|
|
||||||
|
<dt><a href="dev-code-quality.html">Code Quality Issues</a></dt>
|
||||||
|
<dd>Discusses code quality issues and places that need to be refactored.</dd>
|
||||||
|
|
||||||
<dt><a href="dev-progress.html">Implementation Progress</a></dt>
|
<dt><a href="dev-progress.html">Implementation Progress</a></dt>
|
||||||
<dd>Tables detailing HTML element and CSS property implementation coverage.</dd>
|
<dd>Tables detailing HTML element and CSS property implementation coverage.</dd>
|
||||||
|
|
||||||
@@ -60,16 +54,10 @@ conventions.</p>
|
|||||||
<dt><a href="dev-optimization.html">Optimization</a></dt>
|
<dt><a href="dev-optimization.html">Optimization</a></dt>
|
||||||
<dd>Discusses possible methods of optimizing HTML Purifier.</dd>
|
<dd>Discusses possible methods of optimizing HTML Purifier.</dd>
|
||||||
|
|
||||||
<dt><a href="dev-flush.html">Flushing the Purifier</a></dt>
|
|
||||||
<dd>Discusses when to flush HTML Purifier's various caches.</dd>
|
|
||||||
|
|
||||||
<dt><a href="dev-advanced-api.html">Advanced API</a></dt>
|
<dt><a href="dev-advanced-api.html">Advanced API</a></dt>
|
||||||
<dd>Specification for HTML Purifier's advanced API for defining
|
<dd>Functional specification for HTML Purifier's advanced API for defining
|
||||||
custom filtering behavior.</dd>
|
custom filtering behavior.</dd>
|
||||||
|
|
||||||
<dt><a href="dev-config-schema.html">Config Schema</a></dt>
|
|
||||||
<dd>Describes config schema framework in HTML Purifier.</dd>
|
|
||||||
|
|
||||||
</dl>
|
</dl>
|
||||||
|
|
||||||
<h2>Proposals</h2>
|
<h2>Proposals</h2>
|
||||||
@@ -98,8 +86,8 @@ the code. They may be upgraded to HTML files or stay as TXT scratchpads.</p>
|
|||||||
<table class="table">
|
<table class="table">
|
||||||
|
|
||||||
<thead><tr>
|
<thead><tr>
|
||||||
<th style="width:10%">Type</th>
|
<th width="10%">Type</th>
|
||||||
<th style="width:20%">Name</th>
|
<th width="20%">Name</th>
|
||||||
<th>Description</th>
|
<th>Description</th>
|
||||||
</tr></thead>
|
</tr></thead>
|
||||||
|
|
||||||
@@ -117,18 +105,6 @@ the code. They may be upgraded to HTML files or stay as TXT scratchpads.</p>
|
|||||||
<td>Common security issues that may still arise (half-baked).</td>
|
<td>Common security issues that may still arise (half-baked).</td>
|
||||||
</tr>
|
</tr>
|
||||||
|
|
||||||
<tr>
|
|
||||||
<td>Development</td>
|
|
||||||
<td><a href="dev-config-bcbreaks.txt">Config BC Breaks</a></td>
|
|
||||||
<td>Backwards-incompatible changes in HTML Purifier 4.0.0</td>
|
|
||||||
</tr>
|
|
||||||
|
|
||||||
<tr>
|
|
||||||
<td>Development</td>
|
|
||||||
<td><a href="dev-code-quality.txt">Code Quality Issues</a></td>
|
|
||||||
<td>Enumerates code quality issues and places that need to be refactored.</td>
|
|
||||||
</tr>
|
|
||||||
|
|
||||||
<tr>
|
<tr>
|
||||||
<td>Proposal</td>
|
<td>Proposal</td>
|
||||||
<td><a href="proposal-filter-levels.txt">Filter levels</a></td>
|
<td><a href="proposal-filter-levels.txt">Filter levels</a></td>
|
||||||
@@ -147,16 +123,10 @@ the code. They may be upgraded to HTML files or stay as TXT scratchpads.</p>
|
|||||||
<td>Assorted configuration options that could be implemented.</td>
|
<td>Assorted configuration options that could be implemented.</td>
|
||||||
</tr>
|
</tr>
|
||||||
|
|
||||||
<tr>
|
|
||||||
<td>Proposal</td>
|
|
||||||
<td><a href="proposal-css-extraction.txt">CSS extraction</a></td>
|
|
||||||
<td>Taking the inline CSS out of documents and into <code>style</code>.</td>
|
|
||||||
</tr>
|
|
||||||
|
|
||||||
<tr>
|
<tr>
|
||||||
<td>Reference</td>
|
<td>Reference</td>
|
||||||
<td><a href="ref-content-models.txt">Handling Content Model Changes</a></td>
|
<td><a href="ref-loose-vs-strict.txt">Loose vs.Strict</a></td>
|
||||||
<td>Discusses how to tidy up content model changes using custom ChildDef classes.</td>
|
<td>Differences between HTML Strict and Transitional versions.</td>
|
||||||
</tr>
|
</tr>
|
||||||
|
|
||||||
<tr>
|
<tr>
|
||||||
@@ -167,8 +137,14 @@ the code. They may be upgraded to HTML files or stay as TXT scratchpads.</p>
|
|||||||
|
|
||||||
<tr>
|
<tr>
|
||||||
<td>Reference</td>
|
<td>Reference</td>
|
||||||
<td><a href="ref-html-modularization.txt">Modularization of HTMLDefinition</a></td>
|
<td><a href="ref-strictness.txt">Strictness</a></td>
|
||||||
<td>Provides a high-level overview of the concepts behind HTMLModules.</td>
|
<td>Short essay on how loose definition isn't really loose.</td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td>Reference</td>
|
||||||
|
<td><a href="ref-xhtml-1.1.txt">XHTML 1.1</a></td>
|
||||||
|
<td>What we'd have to do to support XHTML 1.1.</td>
|
||||||
</tr>
|
</tr>
|
||||||
|
|
||||||
<tr>
|
<tr>
|
||||||
@@ -181,8 +157,6 @@ the code. They may be upgraded to HTML files or stay as TXT scratchpads.</p>
|
|||||||
|
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
|
<div id="version">$Id$</div>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
||||||
<!-- vim: et sw=4 sts=4
|
|
||||||
-->
|
|
||||||
|
@@ -15,7 +15,7 @@
|
|||||||
|
|
||||||
<div id="filing">Filed under Proposals</div>
|
<div id="filing">Filed under Proposals</div>
|
||||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||||
|
|
||||||
<p>Your website probably has a color-scheme.
|
<p>Your website probably has a color-scheme.
|
||||||
<span style="color:#090; background:#FFF;">Green on white</span>,
|
<span style="color:#090; background:#FFF;">Green on white</span>,
|
||||||
@@ -42,8 +42,7 @@ into the mix.</li>
|
|||||||
something like that?</li>
|
something like that?</li>
|
||||||
</ol>
|
</ol>
|
||||||
|
|
||||||
|
<div id="version">$Id$</div>
|
||||||
|
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
||||||
<!-- vim: et sw=4 sts=4
|
|
||||||
-->
|
|
||||||
|
@@ -12,12 +12,29 @@ the documentation in ConfigDef for more information on these namespaces.
|
|||||||
|
|
||||||
Since configuration is dependant on context, internal classes require a
|
Since configuration is dependant on context, internal classes require a
|
||||||
configuration object to be passed as a parameter. (They also require a
|
configuration object to be passed as a parameter. (They also require a
|
||||||
Context object). A majority of classes do not need the config object,
|
Context object).
|
||||||
but for those who do, it is a lifesaver.
|
|
||||||
|
|
||||||
Definition objects are complex datatypes influenced by their respective
|
In relation to HTMLDefinition and CSSDefinition, there could be a special class
|
||||||
directive namespaces (HTMLDefinition with HTML and CSSDefinition with CSS).
|
of directives that influence the *construction* of the Definition object.
|
||||||
If any of these directives is updated, HTML Purifier forces the definition
|
A theoretical call pattern would look like:
|
||||||
to be regenerated.
|
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
1. Client calls Config->getHTMLDefinition()
|
||||||
|
2. Config calls HTMLDefinition->createNew(this)
|
||||||
|
3. HTMLDefinition constructs itself with base configuration
|
||||||
|
4. HTMLDefinition calls Config->get('HTML')
|
||||||
|
5. Config returns array of directives
|
||||||
|
6. HTMLDefinition performs operations and changes specified by directives
|
||||||
|
7. HTMLPurifier returns constructed definition
|
||||||
|
8. Config caches definition so it doesn't have to be generated again
|
||||||
|
9. Config returns definition
|
||||||
|
|
||||||
|
You could also override Config's copy of the definition with your own
|
||||||
|
custom copy, which OVERRIDES all directives. Only the base, vanilla copy
|
||||||
|
is the Singleton, the object actually interfaced with is a operated-upon
|
||||||
|
clone of that object. Also, if an update to the directives would update
|
||||||
|
the definition, you'd have to force reconstruction.
|
||||||
|
|
||||||
|
In practice, the pulling directives from the config object are
|
||||||
|
solely need-based, and the flex points are littered throughout the
|
||||||
|
setup() function. Some sort of refactoring is likely in order. See
|
||||||
|
ref-xhtml-1.1.txt for more info.
|
||||||
|
@@ -1,34 +0,0 @@
|
|||||||
|
|
||||||
Extracting inline CSS from HTML Purifier
|
|
||||||
voodoofied: Assigning semantics to elements
|
|
||||||
|
|
||||||
Sander Tekelenburg brought to my attention the poor programming style of
|
|
||||||
inline CSS in HTML documents. In an ideal world, we wouldn't be using inline
|
|
||||||
CSS at all: everything would be assigned using semantic class attributes
|
|
||||||
from an external stylesheet.
|
|
||||||
|
|
||||||
With ExtractStyleBlocks and CSSTidy, this is now possible (when allowed, users
|
|
||||||
can specify a style element which gets extracted from the user-submitted HTML, which
|
|
||||||
the application can place in the head of the HTML document). But there still
|
|
||||||
is the issue of inline CSS that refuses to go away.
|
|
||||||
|
|
||||||
The basic idea behind this feature is assign every element a unique identifier,
|
|
||||||
and then move all of the CSS data to a style-sheet. This HTML:
|
|
||||||
|
|
||||||
<div style="text-align:center">Big <span style="color:red;">things</span>!</div>
|
|
||||||
|
|
||||||
into
|
|
||||||
|
|
||||||
<div id="hp-12345">Big <span id="hp-12346">things</span>!</div>
|
|
||||||
|
|
||||||
and a stylesheet that is:
|
|
||||||
|
|
||||||
#hp-12345 {text-align:center;}
|
|
||||||
#hp-12346 {color:red;}
|
|
||||||
|
|
||||||
Beyond that, HTML Purifier can magically merge common CSS values together,
|
|
||||||
and a whole manner of other heuristic things. HTML Purifier should also
|
|
||||||
make it easy for an admin to re-style the HTML semantically. Speed is not
|
|
||||||
an issue. Also, better WYSIWYG editors are needed.
|
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
|
@@ -1,211 +0,0 @@
|
|||||||
Considerations for ErrorCollection
|
|
||||||
|
|
||||||
Presently, HTML Purifier takes a code-execution centric approach to handling
|
|
||||||
errors. Errors are organized and grouped according to which segment of the
|
|
||||||
code triggers them, not necessarily the portion of the input document that
|
|
||||||
triggered the error. This means that errors are pseudo-sorted by category,
|
|
||||||
rather than location in the document.
|
|
||||||
|
|
||||||
One easy way to "fix" this problem would be to re-sort according to line number.
|
|
||||||
However, the "category" style information we derive from naively following
|
|
||||||
program execution is still useful. After all, each of the strategies which
|
|
||||||
can report errors still process the document mostly linearly. Furthermore,
|
|
||||||
not only do they process linearly, but the way they pass off operations to
|
|
||||||
sub-systems mirrors that of the document. For example, AttrValidator will
|
|
||||||
linearly proceed through elements, and on each element will use AttrDef to
|
|
||||||
validate those contents. From there, the attribute might have more
|
|
||||||
sub-components, which have execution passed off accordingly.
|
|
||||||
|
|
||||||
In fact, each strategy handles a very specific class of "error."
|
|
||||||
|
|
||||||
RemoveForeignElements - element tokens
|
|
||||||
MakeWellFormed - element token ordering
|
|
||||||
FixNesting - element token ordering
|
|
||||||
ValidateAttributes - attributes of elements
|
|
||||||
|
|
||||||
The crucial point is that while we care about the hierarchy governing these
|
|
||||||
different errors, we *don't* care about any other information about what actually
|
|
||||||
happens to the elements. This brings up another point: if HTML Purifier fixes
|
|
||||||
something, this is not really a notice/warning/error; it's really a suggestion
|
|
||||||
of a way to fix the aforementioned defects.
|
|
||||||
|
|
||||||
In short, the refactoring to take this into account kinda sucks.
|
|
||||||
|
|
||||||
Errors should not be recorded in order that they are reported. Instead, they
|
|
||||||
should be bound to the line (and preferably element) in which they were found.
|
|
||||||
This means we need some way to uniquely identify every element in the document,
|
|
||||||
which doesn't presently exist. An easy way of adding this would be to track
|
|
||||||
line columns. An important ramification of this is that we *must* use the
|
|
||||||
DirectLex implementation.
|
|
||||||
|
|
||||||
1. Implement column numbers for DirectLex [DONE!]
|
|
||||||
2. Disable error collection when not using DirectLex [DONE!]
|
|
||||||
|
|
||||||
Next, we need to re-orient all of the error declarations to place CurrentToken
|
|
||||||
at utmost important. Since this is passed via Context, it's not always clear
|
|
||||||
if that's available. ErrorCollector should complain HARD if it isn't available.
|
|
||||||
There are some locations when we don't have a token available. These include:
|
|
||||||
|
|
||||||
* Lexing - this can actually have a row and column, but NOT correspond to
|
|
||||||
a token
|
|
||||||
* End of document errors - bump this to the end
|
|
||||||
|
|
||||||
Actually, we *don't* have to complain if CurrentToken isn't available; we just
|
|
||||||
set it as a document-wide error. And actually, nothing needs to be done here.
|
|
||||||
|
|
||||||
Something interesting to consider is whether or not we care about the locations
|
|
||||||
of attributes and CSS properties, i.e. the sub-objects that compose these things.
|
|
||||||
In terms of consistency, at the very least attributes should have column/line
|
|
||||||
numbers attached to them. However, this may be overkill, as attributes are
|
|
||||||
uniquely identifiable. You could go even further, with CSS, but they are also
|
|
||||||
uniquely identifiable.
|
|
||||||
|
|
||||||
Bottom-line is, however, this information must be available, in form of the
|
|
||||||
CurrentAttribute and CurrentCssProperty (theoretical) context variables, and
|
|
||||||
it must be used to organize the errors that the sub-processes may throw.
|
|
||||||
There is also a hierarchy of sorts that may make merging this into one context
|
|
||||||
variable more sense, if it hadn't been for HTML's reasonably rigid structure.
|
|
||||||
A CSS property will never contain an HTML attribute. So we won't ever get
|
|
||||||
recursive relations, and having multiple depths won't ever make sense. Leave
|
|
||||||
this be.
|
|
||||||
|
|
||||||
We already have this information, and consequently, using start and end is
|
|
||||||
*unnecessary*, so long as the context variables are set appropriately. We don't
|
|
||||||
care if an error was thrown by an attribute transform or an attribute definition;
|
|
||||||
to the end user these are the same (for a developer, they are different, but
|
|
||||||
they're better off with a stack trace (which we should add support for) in such
|
|
||||||
cases).
|
|
||||||
|
|
||||||
3. Remove start()/end() code. Don't get rid of recursion, though [DONE]
|
|
||||||
4. Setup ErrorCollector to use context information to setup hierarchies.
|
|
||||||
This may require a different internal format. Use objects if it gets
|
|
||||||
complex. [DONE]
|
|
||||||
|
|
||||||
ASIDE
|
|
||||||
More on this topic: since we are now binding errors to lines
|
|
||||||
and columns, a particular error can have three relationships to that
|
|
||||||
specific location:
|
|
||||||
|
|
||||||
1. The token at that location directly
|
|
||||||
RemoveForeignElements
|
|
||||||
AttrValidator (transforms)
|
|
||||||
MakeWellFormed
|
|
||||||
2. A "component" of that token (i.e. attribute)
|
|
||||||
AttrValidator (removals)
|
|
||||||
3. A modification to that node (i.e. contents from start to end
|
|
||||||
token) as a whole
|
|
||||||
FixNesting
|
|
||||||
|
|
||||||
This needs to be marked accordingly. In the presentation, it might
|
|
||||||
make sense keep (3) separate, have (2) a sublist of (1). (1) can
|
|
||||||
be a closing tag, in which case (3) makes no sense at all, OR it
|
|
||||||
should be related with its opening tag (this may not necessarily
|
|
||||||
be possible before MakeWellFormed is run).
|
|
||||||
|
|
||||||
So, the line and column counts as our identifier, so:
|
|
||||||
|
|
||||||
$errors[$line][$col] = ...
|
|
||||||
|
|
||||||
Then, we need to identify case 1, 2 or 3. They are identified as
|
|
||||||
such:
|
|
||||||
|
|
||||||
1. Need some sort of semaphore in RemoveForeignElements, etc.
|
|
||||||
2. If CurrentAttr/CurrentCssProperty is non-null
|
|
||||||
3. Default (FixNesting, MakeWellFormed)
|
|
||||||
|
|
||||||
One consideration about (1) is that it usually is actually a
|
|
||||||
(3) modification, but we have no way of knowing about that because
|
|
||||||
of various optimizations. However, they can probably be treated
|
|
||||||
the same. The other difficulty is that (3) is never a line and
|
|
||||||
column; rather, it is a range (i.e. a duple) and telling the user
|
|
||||||
the very start of the range may confuse them. For example,
|
|
||||||
|
|
||||||
<b>Foo<div>bar</div></b>
|
|
||||||
^ ^
|
|
||||||
|
|
||||||
The node being operated on is <b>, so the error would be assigned
|
|
||||||
to the first caret, with a "node reorganized" error. Then, the
|
|
||||||
ChildDef would have submitted its own suggestions and errors with
|
|
||||||
regard to what's going in the internals. So I suppose this is
|
|
||||||
ok. :-)
|
|
||||||
|
|
||||||
Now, the structure of the earlier mentioned ... would be something
|
|
||||||
like this:
|
|
||||||
|
|
||||||
object {
|
|
||||||
type = (token|attr|property),
|
|
||||||
value, // appropriate for type
|
|
||||||
errors => array(),
|
|
||||||
sub-errors = [recursive],
|
|
||||||
}
|
|
||||||
|
|
||||||
This helps us keep things agnostic. It is also sufficiently complex
|
|
||||||
enough to warrant an object.
|
|
||||||
|
|
||||||
So, more wanking about the object format is in order. The way HTML Purifier is
|
|
||||||
currently setup, the only possible hierarchy is:
|
|
||||||
|
|
||||||
token -> attr -> css property
|
|
||||||
|
|
||||||
These relations do not exist all of the time; a comment or end token would not
|
|
||||||
ever have any attributes, and non-style attributes would never have CSS properties
|
|
||||||
associated with them.
|
|
||||||
|
|
||||||
I believe that it is worth supporting multiple paths. At some point, we might
|
|
||||||
have a hierarchy like:
|
|
||||||
|
|
||||||
* -> syntax
|
|
||||||
-> token -> attr -> css property
|
|
||||||
-> url
|
|
||||||
-> css stylesheet <style>
|
|
||||||
|
|
||||||
et cetera. Now, one of the practical implications of this is that every "node"
|
|
||||||
on our tree is well-defined, so in theory it should be possible to either 1.
|
|
||||||
create a separate class for each error struct, or 2. embed this information
|
|
||||||
directly into HTML Purifier's token stream. Embedding the information in the
|
|
||||||
token stream is not a terribly good idea, since tokens can be removed, etc.
|
|
||||||
So that leaves us with 1... and if we use a generic interface we can cut down
|
|
||||||
on a lot of code we might need. So let's leave it like this.
|
|
||||||
|
|
||||||
~~~~
|
|
||||||
|
|
||||||
Then we setup suggestions.
|
|
||||||
|
|
||||||
5. Setup a separate error class which tells the user any modifications
|
|
||||||
HTML Purifier made.
|
|
||||||
|
|
||||||
Some information about this:
|
|
||||||
|
|
||||||
Our current paradigm is to tell the user what HTML Purifier did to the HTML.
|
|
||||||
This is the most natural mode of operation, since that's what HTML Purifier
|
|
||||||
is all about; it was not meant to be a validator.
|
|
||||||
|
|
||||||
However, most other people have experience dealing with a validator. In cases
|
|
||||||
where HTML Purifier unambiguously does the right thing, simply giving the user
|
|
||||||
the correct version isn't a bad idea, but problems arise when:
|
|
||||||
|
|
||||||
- The user has such bad HTML we do something odd, when we should have just
|
|
||||||
flagged the HTML as an error. Such examples are when we do things like
|
|
||||||
remove text from directly inside a <table> tag. It was probably meant to
|
|
||||||
be in a <td> tag or be outside the table, but we're not smart enough to
|
|
||||||
realize this so we just remove it. In such a case, we should tell the user
|
|
||||||
that there was foreign data in the table, but then we shouldn't "demand"
|
|
||||||
the user remove the data; it's more of a "here's a possible way of
|
|
||||||
rectifying the problem"
|
|
||||||
|
|
||||||
- Giving line context for input is hard enough, but feasible; giving output
|
|
||||||
line context will be extremely difficult due to shifting lines; we'd probably
|
|
||||||
have to track what the tokens are and then find the appropriate out context
|
|
||||||
and it's not guaranteed to work etc etc etc.
|
|
||||||
|
|
||||||
````````````
|
|
||||||
|
|
||||||
Don't forget to spruce up output.
|
|
||||||
|
|
||||||
6. Output needs to automatically give line and column numbers, basically
|
|
||||||
"at line" on steroids. Look at W3C's output; it's ok. [PARTIALLY DONE]
|
|
||||||
|
|
||||||
- We need a standard CSS to apply (check demo.css for some starting
|
|
||||||
styling; some buttons would also be hip)
|
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
|
@@ -2,16 +2,23 @@
|
|||||||
Filter Levels
|
Filter Levels
|
||||||
When one size *does not* fit all
|
When one size *does not* fit all
|
||||||
|
|
||||||
It makes little sense to constrain users to one set of HTML elements and
|
The more I think about it, the less sense it makes for maintaining one huge
|
||||||
attributes and tell them that they are not allowed to mold this in
|
monolithic HTMLDefinition class. There's simply so much variation that
|
||||||
any fashion. Many users demand to be able to custom-select which elements
|
could go into this definition: the set of HTML good for blog entries is
|
||||||
and attributes they want. This is fine: because HTML Purifier keeps close
|
definitely too large for HTML that would be allowed in blog comments. Going
|
||||||
track of what elements are safe to use, there is no way for them to
|
from Transitional to Strict requires changes to the definition.
|
||||||
accidently allow an XSS-able tag.
|
|
||||||
|
|
||||||
However, combing through the HTML spec to make your own whitelist can
|
Allowing users to specify their own whitelists is one step (implemented, btw),
|
||||||
be a daunting task. HTML Purifier ought to offer pre-canned filter levels
|
but I have doubts on only doing this. Simply put, the typical programmer is too
|
||||||
that amateur users can select based on what they think is their use-case.
|
lazy to actually go through the trouble of investigating which tags, attributes
|
||||||
|
and properties to allow. HTMLDefinition makes a big part of what HTMLPurifier
|
||||||
|
is.
|
||||||
|
|
||||||
|
The idea, then, is to setup fundamentally different set of definitions, which
|
||||||
|
can further be customized using simpler configuration options. Alternatively,
|
||||||
|
they could be implemented as configuration profiles, which simply load
|
||||||
|
a set of recommended directives to acheive a desired affect (no simpler
|
||||||
|
config options though).
|
||||||
|
|
||||||
Here are some fuzzy levels you could set:
|
Here are some fuzzy levels you could set:
|
||||||
|
|
||||||
@@ -32,17 +39,13 @@ Here are some fuzzy levels you could set:
|
|||||||
|
|
||||||
One final note: when you start axing tags that are more commonly used, you
|
One final note: when you start axing tags that are more commonly used, you
|
||||||
run the risk of accidentally destroying user data, especially if the data
|
run the risk of accidentally destroying user data, especially if the data
|
||||||
is incoming from a WYSIWYG editor that hasn't been synced accordingly. This may
|
is incoming from a WYSIWYG eidtor that hasn't been synced accordingly. This may
|
||||||
make forbidden element to text transformations desirable (for example, images).
|
make forbidden element to text transformations desirable (for example, images).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
== Element Risk Analysis ==
|
== Element Risk Analysis ==
|
||||||
|
|
||||||
Although none of the currently supported elements presents a security
|
|
||||||
threat per-say, some can cause problems for page layouts or be
|
|
||||||
extremely complicated.
|
|
||||||
|
|
||||||
Legend:
|
Legend:
|
||||||
[danger level] - regular tags / uncommon tags ~ deprecated tags
|
[danger level] - regular tags / uncommon tags ~ deprecated tags
|
||||||
[danger level]* - rare tags
|
[danger level]* - rare tags
|
||||||
@@ -111,10 +114,6 @@ Partially presentational - table.cellpadding, table.cellspacing,
|
|||||||
|
|
||||||
== CSS Risk Analysis ==
|
== CSS Risk Analysis ==
|
||||||
|
|
||||||
Currently, there is no support for fine-grained "allowed CSS" specification,
|
|
||||||
mainly because I'm lazy, partially because no one has asked for it. However,
|
|
||||||
this will be added eventually.
|
|
||||||
|
|
||||||
There are certain CSS elements that are extremely useful inline, but then
|
There are certain CSS elements that are extremely useful inline, but then
|
||||||
as you get to more presentation oriented styling it may not always be
|
as you get to more presentation oriented styling it may not always be
|
||||||
appropriate to inline them.
|
appropriate to inline them.
|
||||||
@@ -127,11 +126,8 @@ any CSS properties that are not currently implemented (such as position).
|
|||||||
Dangerous, can go outside container - float
|
Dangerous, can go outside container - float
|
||||||
Easy to abuse - font-size, font-family (font), width
|
Easy to abuse - font-size, font-family (font), width
|
||||||
Colored - background-color (background), border-color (border), color
|
Colored - background-color (background), border-color (border), color
|
||||||
(see proposal-colors.html)
|
|
||||||
Dramatic - border, list-style-position (list-style), margin, padding,
|
Dramatic - border, list-style-position (list-style), margin, padding,
|
||||||
text-align, text-indent, text-transform, vertical-align, line-height
|
text-align, text-indent, text-transform, vertical-align, line-height
|
||||||
|
|
||||||
Dramatic elements substantially change the look of text in ways that should
|
Dramatic elements substantially change the look of text in ways that should
|
||||||
probably have been reserved to other areas.
|
probably have been reserved to other areas.
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
|
||||||
|
@@ -60,5 +60,3 @@ Neat functionality:
|
|||||||
- Roman numeral formatting
|
- Roman numeral formatting
|
||||||
|
|
||||||
Items marked with a + likely need to be addressed by HTML Purifier
|
Items marked with a + likely need to be addressed by HTML Purifier
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
|
||||||
|
@@ -2,8 +2,7 @@
|
|||||||
Configuration Ideas
|
Configuration Ideas
|
||||||
|
|
||||||
Here are some theoretical configuration ideas that we could implement some
|
Here are some theoretical configuration ideas that we could implement some
|
||||||
time. Note the naming convention: %Namespace.Directive. If you want one
|
time. Note the naming convention: %Namespace.Directive
|
||||||
implemented, give us a ring, and we'll move it up the priority chain.
|
|
||||||
|
|
||||||
%Attr.RewriteFragments - if there's %Attr.IDPrefix we may want to transparently
|
%Attr.RewriteFragments - if there's %Attr.IDPrefix we may want to transparently
|
||||||
rewrite the URLs we parse too. However, we can only do it when it's a pure
|
rewrite the URLs we parse too. However, we can only do it when it's a pure
|
||||||
@@ -23,6 +22,8 @@ implemented, give us a ring, and we'll move it up the priority chain.
|
|||||||
%URI.AddRelNofollow - will add rel="nofollow" to all links, preventing the
|
%URI.AddRelNofollow - will add rel="nofollow" to all links, preventing the
|
||||||
spread of ill-gotten pagerank
|
spread of ill-gotten pagerank
|
||||||
|
|
||||||
|
%URI.RelativeToAbsolute - transforms all relative URIs to absolute form
|
||||||
|
|
||||||
%URI.HostBlacklistRegex - regexes that if matching the host are disallowed
|
%URI.HostBlacklistRegex - regexes that if matching the host are disallowed
|
||||||
%URI.HostWhitelist - domain names that are excluded from the host blacklist
|
%URI.HostWhitelist - domain names that are excluded from the host blacklist
|
||||||
%URI.HostPolicy - determines whether or not its reject all and then whitelist
|
%URI.HostPolicy - determines whether or not its reject all and then whitelist
|
||||||
@@ -41,4 +42,3 @@ implemented, give us a ring, and we'll move it up the priority chain.
|
|||||||
absolute DNS. While this is actually the preferred method according to
|
absolute DNS. While this is actually the preferred method according to
|
||||||
the RFC, most people opt to use a relative domain name relative to . (root).
|
the RFC, most people opt to use a relative domain name relative to . (root).
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
|
||||||
|
@@ -1,218 +0,0 @@
|
|||||||
THE UNIVERSAL DESIGN PATTERN: PROPERTIES
|
|
||||||
Steve Yegge
|
|
||||||
|
|
||||||
Implementation:
|
|
||||||
get(name)
|
|
||||||
put(name, value)
|
|
||||||
has(name)
|
|
||||||
remove(name)
|
|
||||||
iteration, with filtering [this will be our namespaces]
|
|
||||||
parent
|
|
||||||
|
|
||||||
Representations:
|
|
||||||
- Keys are strings
|
|
||||||
- It's nice to not need to quote keys (if we formulate our own language,
|
|
||||||
consider this)
|
|
||||||
- Property not present representation (key missing)
|
|
||||||
- Frequent removal/re-add may have null help. If null is valid, use
|
|
||||||
another value. (PHP semantics are weird here)
|
|
||||||
|
|
||||||
Data structures:
|
|
||||||
- LinkedHashMap is wonderful (O(1) access and maintains order)
|
|
||||||
- Using a special property that points to the parent is usual
|
|
||||||
- Multiple inheritance possible, need rules for which to lookup first
|
|
||||||
- Iterative inheritance is best
|
|
||||||
- Consider performance!
|
|
||||||
|
|
||||||
Deletion
|
|
||||||
- Tricky problem with inheritance
|
|
||||||
- Distinguish between "not found" and "look in my parent for the property"
|
|
||||||
[Maybe HTML Purifier won't allow deletion]
|
|
||||||
|
|
||||||
Read/write asymmetry (it's correct!)
|
|
||||||
|
|
||||||
Read-only plists
|
|
||||||
- Allow ability to freeze [this is what we have already]
|
|
||||||
- Don't overuse it
|
|
||||||
|
|
||||||
Performance:
|
|
||||||
- Intern strings (PHP does this already)
|
|
||||||
- Don't be case-insensitive
|
|
||||||
- If all properties in a plist are known a-priori, you can use a "perfect"
|
|
||||||
hash function. Often overkill.
|
|
||||||
- Copy-on-read caching "plundering" reduces lookup, but uses memory and can
|
|
||||||
grow stale. Use as last resort.
|
|
||||||
- Refactoring to fields. Watch for API compatibility, system complexity,
|
|
||||||
and lack of flexibility.
|
|
||||||
- Refrigerator: external data-structure to hold plists
|
|
||||||
|
|
||||||
Transient properties:
|
|
||||||
[Don't need to worry about this]
|
|
||||||
- Use a separate plist for transient properties
|
|
||||||
- Non-numeric override; numeric should ADD
|
|
||||||
- Deletion: removeTransientProperty() and transientlyRemoveProperty()
|
|
||||||
|
|
||||||
Persistence:
|
|
||||||
- XML/JSON are good
|
|
||||||
- Text-based is good for readability, maintainability and bootstrapping
|
|
||||||
- Compressed binary format for network transport [not necessary]
|
|
||||||
- RDBMS or XML database
|
|
||||||
|
|
||||||
Querying: [not relevant]
|
|
||||||
- XML database is nice for XPath/XQuery
|
|
||||||
- jQuery for JSON
|
|
||||||
- Just load it all into a program
|
|
||||||
|
|
||||||
Backfills/Data integrity:
|
|
||||||
- Use usual methods
|
|
||||||
- Lazy backfill is a nice hack
|
|
||||||
|
|
||||||
Type systems:
|
|
||||||
- Flags: ReadOnly, Permanent, DontEnum
|
|
||||||
- Typed properties isn't that useful [It's also Not-PHP]
|
|
||||||
- Seperate meta-list of directive properties IS useful
|
|
||||||
- Duck typing is useful for systems designed fully around properties pattern
|
|
||||||
|
|
||||||
Trade-off:
|
|
||||||
+ Flexibility
|
|
||||||
+ Extensibility
|
|
||||||
+ Unit-testing/prototype-speed
|
|
||||||
- Performance
|
|
||||||
- Data integrity
|
|
||||||
- Navagability/Query-ability
|
|
||||||
- Reversability (hard to go back)
|
|
||||||
|
|
||||||
HTML Purifier
|
|
||||||
|
|
||||||
We are not happy with our current system of defining configuration directives,
|
|
||||||
because it has become clear that things will get a lot nicer if we allow
|
|
||||||
multiple namespaces, and there are some features that naturally lend themselves
|
|
||||||
to inheritance, which we do not really support well.
|
|
||||||
|
|
||||||
One of the considered implementation changes would be to go from a structure
|
|
||||||
like:
|
|
||||||
|
|
||||||
array(
|
|
||||||
'Namespace' => array(
|
|
||||||
'Directive' => 'val1',
|
|
||||||
'Directive2' => 'val2',
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
to:
|
|
||||||
|
|
||||||
array(
|
|
||||||
'Namespace.Directive' => 'val1',
|
|
||||||
'Namespace.Directive2' => 'val2',
|
|
||||||
)
|
|
||||||
|
|
||||||
The below implementation takes more memory, however, and it makes it a bit
|
|
||||||
complicated to grab all values from a namespace.
|
|
||||||
|
|
||||||
The alternate implementation choice is to allow nested plists. This keeps
|
|
||||||
iteration easy, but is problematic for inheritance (it would be difficult
|
|
||||||
to distinguish a plist from an array) and retrieval (when specifying multiple
|
|
||||||
namespaces we would need some multiple de-referencing).
|
|
||||||
|
|
||||||
----
|
|
||||||
|
|
||||||
We can bite the performance hit, and just do iteration with filter
|
|
||||||
(the strncmp call should be relatively cheap). Then, users should be able
|
|
||||||
to optimize doing something like:
|
|
||||||
|
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
|
||||||
if (!file_exists('config.php')) {
|
|
||||||
// set up $config
|
|
||||||
$config->save('config.php');
|
|
||||||
} else {
|
|
||||||
$config->load('config.php');
|
|
||||||
}
|
|
||||||
|
|
||||||
Or maybe memcache, or something. This means that "// set up $config" must
|
|
||||||
not have any dynamic parts, or the user has to invalidate the cache when
|
|
||||||
they do update it. We have to think about this a little more carefully; the
|
|
||||||
file call might be more expensive.
|
|
||||||
|
|
||||||
----
|
|
||||||
|
|
||||||
This might get expensive, however, when we actually care about iterating
|
|
||||||
over the configuration and want the actual values. So what about nesting the
|
|
||||||
lists?
|
|
||||||
|
|
||||||
"ns.sub.directive" => values['ns']['sub']['directive']
|
|
||||||
|
|
||||||
We can distinguish between plists and arrays by using ArrayObjects for the
|
|
||||||
plists, and regular arrays for the arrays? Alternatively, use ArrayObjects
|
|
||||||
for the arrays, and regular arrays for the plists.
|
|
||||||
|
|
||||||
----
|
|
||||||
|
|
||||||
Implementation demands, and what has caused them:
|
|
||||||
|
|
||||||
1. DefinitionCache, the HTML, CSS and URI namespaces have caches attached to them
|
|
||||||
Results:
|
|
||||||
- getBatchSerial()
|
|
||||||
- getBatch() : in general, the ability to traverse just a namespace
|
|
||||||
|
|
||||||
2. AutoFormat/Filter, this is a plugin architecture, directives not hard-coded
|
|
||||||
- getBatch()
|
|
||||||
|
|
||||||
3. Configuration form
|
|
||||||
- Namespaces used to organize directives
|
|
||||||
|
|
||||||
Other than that, we have a pure plist. PERHAPS we should maintain separate things
|
|
||||||
for these different demands.
|
|
||||||
|
|
||||||
Issue 2: Directives for configuring the plugins are regular plists, but
|
|
||||||
when enabling them, while it's "plist-ish", what you're really doing is adding
|
|
||||||
them to an array of "autoformatters"/"filters" to enable. We can setup
|
|
||||||
magic BC as well as in the new interface, but there should also be an
|
|
||||||
add('AutoFormat', 'AutoParagraph'); which does the right thing.
|
|
||||||
|
|
||||||
One thing to consider is whether or not inheritance rules will apply to these.
|
|
||||||
I'd say yes. That means that they're still plisty, in fact, the underlying
|
|
||||||
implementation will probably be a plist. However, they will get their OWN
|
|
||||||
plists, and will NOT support nesting.
|
|
||||||
|
|
||||||
Issue 1: Our current implementation is generally not efficient; md5(serialize($foo))
|
|
||||||
is pretty expensive. So, I don't think there will be any problems if it
|
|
||||||
gets "less" efficient, as long as we give users a properly fast alternative;
|
|
||||||
DefinitionRev gives us a way to do this, by simply telling the user they must
|
|
||||||
update it whenever they update Configuration directives as well. (There are
|
|
||||||
obvious BC concerns here).
|
|
||||||
|
|
||||||
In such a case, we simply iterate over our plist (performing full retrievals
|
|
||||||
for each value), grab the entries we care about, and then serialize and hash.
|
|
||||||
It's going to be slow either way, due to the ability of plists to inherit.
|
|
||||||
If we ksort(), we don't have to traverse the entire array, however, the
|
|
||||||
cost of a ksort() call may not be worth it.
|
|
||||||
|
|
||||||
At this point, last time, I started worrying about the performance implications
|
|
||||||
of allowing inheritance, and wondering whether or not I wanted to squash
|
|
||||||
the plist. At first blush, our code might be under the assumption that
|
|
||||||
accessing properties is cheap; but actually we prefer to copy out the value
|
|
||||||
into a member variable if it's going to be used many times. With this is mind
|
|
||||||
I don't think CPU consumption from a few nested function calls is going to
|
|
||||||
be a problem. We *are* going to enforce a function only interface.
|
|
||||||
|
|
||||||
The next issue at hand is how we're going to manage the "special" plists,
|
|
||||||
which should still be able to be inherited. Basically, it means that multiple
|
|
||||||
plists would be attached to the configuration object, which is not the
|
|
||||||
best for memory performance. The alternative is to keep them all in one
|
|
||||||
big plist, and then eat the one-time cost of traversing the entire plist
|
|
||||||
to grab the appropriate values.
|
|
||||||
|
|
||||||
I think at this point we can write the generic interface, and then set up separate
|
|
||||||
plists if that ends up being necessary for performance (it probably won't.) Now
|
|
||||||
lets code our generic plist implementation.
|
|
||||||
|
|
||||||
----
|
|
||||||
|
|
||||||
Iterating over the plist presents some problems. The way we've chosen to solve
|
|
||||||
this is to squash all of the parents.
|
|
||||||
|
|
||||||
----
|
|
||||||
|
|
||||||
But I don't need iteration.
|
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
|
@@ -1,50 +0,0 @@
|
|||||||
|
|
||||||
Handling Content Model Changes
|
|
||||||
|
|
||||||
|
|
||||||
1. Context
|
|
||||||
|
|
||||||
The distinction between Transitional and Strict document types is somewhat
|
|
||||||
of an anomaly in the lineage of XHTML document types (following 1.0, no
|
|
||||||
doctypes do not have flavors: instead, modularization is used to let
|
|
||||||
document authors vary their elements). This transition is usually quite
|
|
||||||
straight-forward, as W3C usually deprecates attributes or elements, which
|
|
||||||
are quite easily handled using tag and attribute transforms.
|
|
||||||
|
|
||||||
However, for two elements, <blockquote>, <body> and <address>, W3C elected
|
|
||||||
to also change the content model. <blockquote> and <body> originally
|
|
||||||
accepted both inline and block elements, but in the strict doctype they
|
|
||||||
only allow block elements. With <address>, the situation is inverted:
|
|
||||||
<p> tags were now forbidden from appearing within this tag.
|
|
||||||
|
|
||||||
|
|
||||||
2. Current situation
|
|
||||||
|
|
||||||
Currently, HTML Purifier treats <blockquote> specially during Tidy mode
|
|
||||||
using a custom ChildDef class StrictBlockquote. StrictBlockquote
|
|
||||||
operates similarly to Required, except that when it encounters an inline
|
|
||||||
element, it will wrap it in a block tag (as specified by
|
|
||||||
%HTML.BlockWrapper, the default is <p>). The naming suggests it can
|
|
||||||
only be used for <blockquote>s, although it may be possible to
|
|
||||||
genericize it to work on other cases of this nature (this would be of
|
|
||||||
little practical application, as no other element in XHTML 1.1 or earlier
|
|
||||||
has a block-only content model).
|
|
||||||
|
|
||||||
Tidy currently contains no custom, lenient implementation for <address>.
|
|
||||||
If one were to be written, it would likely operate on the principle that,
|
|
||||||
when a <p> tag were to be encountered, it would be replaced with a
|
|
||||||
leading and trailing <br /> tag (the contents of <p>, being inline, are
|
|
||||||
not an issue). There is no prior work with this sort of operation.
|
|
||||||
|
|
||||||
|
|
||||||
3. Outside applicability
|
|
||||||
|
|
||||||
There are a number of other elements that contain restrictive content
|
|
||||||
models, such as <ul> or <span> (the latter is restrictive in that it
|
|
||||||
does not allow block elements). In the former case, an errant node
|
|
||||||
is eliminated completely, in the latter case, the text of the node
|
|
||||||
would is preserved (as the parent node does allow PCDATA). Custom
|
|
||||||
content model implementations probably are not the best way of handling
|
|
||||||
these cases, instead, node bubbling should be implemented instead.
|
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
|
@@ -1,30 +0,0 @@
|
|||||||
|
|
||||||
CSS Length Reference
|
|
||||||
To bound, or not to bound, that is the question
|
|
||||||
|
|
||||||
It's quite a reasonable request, really, and it's already been implemented
|
|
||||||
for HTML. That is, length bounding. It makes little sense to let users
|
|
||||||
define text blocks that have a font-size of 63,360 inches (that's a mile,
|
|
||||||
by the way) or a width of forty-fold the parent container.
|
|
||||||
|
|
||||||
But it's a little more complicated then that. There are multiple units
|
|
||||||
one can use, and we have to a little unit conversion to get things working.
|
|
||||||
Here's what we have:
|
|
||||||
|
|
||||||
Absolute:
|
|
||||||
1 in ~= 2.54 cm
|
|
||||||
1 cm = 10 mm
|
|
||||||
1 pt = 1/72 in
|
|
||||||
1 pc = 12 pt
|
|
||||||
|
|
||||||
Relative:
|
|
||||||
1 em ~= 10.0667 px
|
|
||||||
1 ex ~= 0.5 em, though Mozilla Firefox says 1 ex = 6px
|
|
||||||
1 px ~= 1 pt
|
|
||||||
|
|
||||||
Watch out: font-sizes can also be nested to get successively larger
|
|
||||||
(although I do not relish having to keep track of context font-sizes,
|
|
||||||
this may be necessary, especially for some of the more advanced features
|
|
||||||
for preventing things like white on white).
|
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
|
@@ -15,7 +15,7 @@
|
|||||||
|
|
||||||
<div id="filing">Filed under Reference</div>
|
<div id="filing">Filed under Reference</div>
|
||||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
<div id="home"><a href="http://hp.jpsband.org/">HTML Purifier</a> End-User Documentation</div>
|
||||||
|
|
||||||
<p>Many thanks to the DevNetwork community for answering questions,
|
<p>Many thanks to the DevNetwork community for answering questions,
|
||||||
theorizing about design, and offering encouragement during
|
theorizing about design, and offering encouragement during
|
||||||
@@ -40,8 +40,6 @@ the development of this library in these forum threads:</p>
|
|||||||
|
|
||||||
<p>...as well as any I may have forgotten.</p>
|
<p>...as well as any I may have forgotten.</p>
|
||||||
|
|
||||||
|
<div id="version">$Id$</div>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
||||||
<!-- vim: et sw=4 sts=4
|
|
||||||
-->
|
|
||||||
|
37
docs/ref-loose-vs-strict.txt
Normal file
37
docs/ref-loose-vs-strict.txt
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
|
||||||
|
Loose versus Strict
|
||||||
|
Changes from one doctype to another
|
||||||
|
|
||||||
|
There are changes. Wow, how insightful. Not everything changed is relevant
|
||||||
|
to HTML Purifier, though, so let's take a look:
|
||||||
|
|
||||||
|
== Major incompatibilities ==
|
||||||
|
|
||||||
|
[done] BLOCKQUOTE changes from 'flow' to 'block'
|
||||||
|
current behavior: inline inner contents should not be nuked, block-ify as necessary
|
||||||
|
[partially-done] U, S, STRIKE cut
|
||||||
|
current behavior: removed completely
|
||||||
|
projected behavior: replace with appropriate inline span + CSS
|
||||||
|
[done] ADDRESS from potpourri to Inline (removes p tags)
|
||||||
|
current behavior: block tags silently dropped
|
||||||
|
ideal behavior: replace tags with something like <br>. (not high priority)
|
||||||
|
|
||||||
|
== Things we can loosen up ==
|
||||||
|
|
||||||
|
Tags DIR, MENU, CENTER, ISINDEX, FONT, BASEFONT? allowed in loose
|
||||||
|
current behavior: transform to strict-valid forms
|
||||||
|
Attributes allowed in loose (see attribute transforms in 'dev-progress.html')
|
||||||
|
current behavior: projected to transform into strict-valid forms
|
||||||
|
|
||||||
|
== Periphery issues ==
|
||||||
|
|
||||||
|
A tag's attribute 'target' (for selecting frames) cut
|
||||||
|
current behavior: not allowed at all
|
||||||
|
projected behavior: use loose doctype if needed, needs valid values
|
||||||
|
[done] OL/LI tag's attribute 'start'/'value' (for renumbering lists) cut
|
||||||
|
current behavior: no substitute, just delete when in strict, allow in loose
|
||||||
|
Attribute 'name' deprecated in favor of 'id'
|
||||||
|
current behavior: dropped silently
|
||||||
|
projected behavior: create proper AttrTransform
|
||||||
|
[done] PRE tag allows SUB/SUP? (strict dtd comment vs syntax, loose disallows)
|
||||||
|
current behavior: disallow as usual
|
@@ -18,9 +18,5 @@ HTML Purifier context.
|
|||||||
|
|
||||||
<listing>, monospace pre-variant (extremely rare)
|
<listing>, monospace pre-variant (extremely rare)
|
||||||
<plaintext>, escapes all tags to the end of document
|
<plaintext>, escapes all tags to the end of document
|
||||||
|
<ruby> and friends, (more research needed, appears to be XHTML 1.1 markup)
|
||||||
<xmp>, monospace, replace with pre
|
<xmp>, monospace, replace with pre
|
||||||
|
|
||||||
These should be put into their own Tidy module, not loaded by default(?). These
|
|
||||||
all qualify as "lenient" transforms.
|
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
|
||||||
|
37
docs/ref-strictness.txt
Normal file
37
docs/ref-strictness.txt
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
|
||||||
|
Is HTML Purifier Strict or Transitional?
|
||||||
|
A little bit of helpful guidance
|
||||||
|
|
||||||
|
Despite the fact that HTML Purifier professes to support both transitional and
|
||||||
|
strict HTML, it rejects a lot of attributes and elements that are actually, indeed,
|
||||||
|
valid. You can investigate progress.html to find out precisely what we
|
||||||
|
are doing to these *deprecated* attributes.
|
||||||
|
|
||||||
|
However, users have found that Strict HTML imposes some quite unreasonable
|
||||||
|
restrictions on certain things. The start and value attributes in ol and
|
||||||
|
li (respectively) perhaps are the most contested. There's is currently no
|
||||||
|
widely supported browser method short of JavaScript that can replace these
|
||||||
|
two deprecated elements. It behooves us to allow these deprecated
|
||||||
|
attributes when the output is transitional.
|
||||||
|
|
||||||
|
Fortunantely, that's the only real bugger case. The others have near-perfect
|
||||||
|
CSS equivalents, and were presentational anyway. However, the other question
|
||||||
|
pops up: should we always convert these to the CSS forms when 1. the spec
|
||||||
|
allows them anyway and 2. older browsers support them better? After all, the
|
||||||
|
whole point about CSS is to seperate styling from content, so inline styling
|
||||||
|
doesn't solve that problem.
|
||||||
|
|
||||||
|
It's an icky question, and we'll have to deal with it as more and more
|
||||||
|
transforms get implemented. As of right now, however, we currently support
|
||||||
|
these loose-only constructs in loose mode:
|
||||||
|
|
||||||
|
- <ul start="1">, <li value="1"> attributes
|
||||||
|
- <u>, <strike>, <s> tags
|
||||||
|
- flow children in <blockquote>
|
||||||
|
- mixed children in <address>
|
||||||
|
|
||||||
|
The changed child definitions as well as the ul.start li.value are the most
|
||||||
|
compelling reasons why loose should be used. We may want offer disabling <u>,
|
||||||
|
<strike> and <s> by themselves. We may also want to offer no pre-emptive
|
||||||
|
deprecated conversions. This all must be unified.
|
||||||
|
|
@@ -2,25 +2,8 @@
|
|||||||
Web Hypertext Application Technology Working Group
|
Web Hypertext Application Technology Working Group
|
||||||
WHATWG
|
WHATWG
|
||||||
|
|
||||||
== HTML 5 ==
|
I don't think we need to worry about them. Untrusted users shouldn't be
|
||||||
|
submitting applications, eh? But if some interesting attribute pops up in
|
||||||
|
their spec, and might be worth supporting, stick it here.
|
||||||
|
|
||||||
URL: http://www.whatwg.org/specs/web-apps/current-work/
|
(none so far, as you can see)
|
||||||
|
|
||||||
HTML 5 defines a kaboodle of new elements and attributes, as well as
|
|
||||||
some well-defined, "quirks mode" HTML parsing. Although WHATWG professes
|
|
||||||
to be targeted towards web applications, many of their semantic additions
|
|
||||||
would be quite useful in regular documents. Eventually, HTML
|
|
||||||
Purifier will need to audit their lists and figure out what changes need
|
|
||||||
to be made. This process is complicated by the fact that the WHATWG
|
|
||||||
doesn't buy into W3C's modularization of XHTML 1.1: we may need
|
|
||||||
to remodularize HTML 5 (probably done by section name). No sense in
|
|
||||||
committing ourselves till the spec stabilizes, though.
|
|
||||||
|
|
||||||
More immediately speaking though, however, is the well-defined parsing
|
|
||||||
behavior that HTML 5 adds. While I have little interest in writing
|
|
||||||
another DirectLex parser, other parsers like ph5p
|
|
||||||
<http://jero.net/lab/ph5p/> can be adapted to DOMLex to support much more
|
|
||||||
flexible HTML parsing (a cool feature I've seen is how they resolve
|
|
||||||
<b>bold<i>both</b>italic</i>).
|
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
|
||||||
|
@@ -1,8 +1,10 @@
|
|||||||
|
|
||||||
The Modularization of HTMLDefinition in HTML Purifier
|
XHTML 1.1 and HTML Purifier
|
||||||
|
|
||||||
WARNING: This document was drafted before the implementation of this
|
Todo for XHTML 1.1 support <http://www.w3.org/TR/xhtml11/changes.html>
|
||||||
system, and some implementation details may have evolved over time.
|
1. Scratch lang entirely in favor of xml:lang
|
||||||
|
2. Scratch name entirely in favor of id (partially-done)
|
||||||
|
3. Support Ruby <http://www.w3.org/TR/2001/REC-ruby-20010531/>
|
||||||
|
|
||||||
HTML Purifier uses the modularization of XHTML
|
HTML Purifier uses the modularization of XHTML
|
||||||
<http://www.w3.org/TR/xhtml-modularization/> to organize the internals
|
<http://www.w3.org/TR/xhtml-modularization/> to organize the internals
|
||||||
@@ -10,10 +12,25 @@ of HTMLDefinition into a more manageable and extensible fashion. Rather
|
|||||||
than have one super-object, HTMLDefinition is split into HTMLModules,
|
than have one super-object, HTMLDefinition is split into HTMLModules,
|
||||||
each of which are responsible for defining elements, their attributes,
|
each of which are responsible for defining elements, their attributes,
|
||||||
and other properties (for a more indepth coverage, see
|
and other properties (for a more indepth coverage, see
|
||||||
/library/HTMLPurifier/HTMLModule.php's docblock comments). These modules
|
/library/HTMLPurifier/HTMLModule.php's docblock comments).
|
||||||
are managed by HTMLModuleManager.
|
|
||||||
|
|
||||||
Modules that we don't support but could support are:
|
The modules that W3C defines and we support are:
|
||||||
|
|
||||||
|
* 5.1. Attribute Collections (technically not a module
|
||||||
|
* 5.2. Core Modules
|
||||||
|
o 5.2.2. Text Module
|
||||||
|
o 5.2.3. Hypertext Module
|
||||||
|
o 5.2.4. List Module
|
||||||
|
* 5.4. Text Extension Modules
|
||||||
|
o 5.4.1. Presentation Module
|
||||||
|
o 5.4.2. Edit Module
|
||||||
|
o 5.4.3. Bi-directional Text Module
|
||||||
|
* 5.6. Table Modules
|
||||||
|
o 5.6.2. Tables Module
|
||||||
|
* 5.7. Image Module
|
||||||
|
* 5.18. Style Attribute Module
|
||||||
|
|
||||||
|
Modules that we don't support but coul support are:
|
||||||
|
|
||||||
* 5.6. Table Modules
|
* 5.6. Table Modules
|
||||||
o 5.6.1. Basic Tables Module [?]
|
o 5.6.1. Basic Tables Module [?]
|
||||||
@@ -21,8 +38,10 @@ Modules that we don't support but could support are:
|
|||||||
* 5.9. Server-side Image Map Module [?]
|
* 5.9. Server-side Image Map Module [?]
|
||||||
* 5.12. Target Module [?]
|
* 5.12. Target Module [?]
|
||||||
* 5.21. Name Identification Module [deprecated]
|
* 5.21. Name Identification Module [deprecated]
|
||||||
|
* 5.22. Legacy Module [deprecated]
|
||||||
|
|
||||||
These modules would be implemented as "unsafe":
|
These modules will not be implemented due to their dangerousness or
|
||||||
|
inapplicability as an XHTML fragment:
|
||||||
|
|
||||||
* 5.2. Core Modules
|
* 5.2. Core Modules
|
||||||
o 5.2.1. Structure Module
|
o 5.2.1. Structure Module
|
||||||
@@ -45,7 +64,11 @@ of robust tools for handling them (the main problem is that all the
|
|||||||
current parsers are usually PHP 5 only and solely-validating, not
|
current parsers are usually PHP 5 only and solely-validating, not
|
||||||
correcting).
|
correcting).
|
||||||
|
|
||||||
This system may be generalized and ported over for CSS.
|
The abstraction of the HTMLDefinition creation process will also
|
||||||
|
contribute to a need for a caching system. Cache invalidation would be
|
||||||
|
difficult, but could be done by comparing the HTML and Attr config
|
||||||
|
namespaces with a copy that was packaged along with the serialized
|
||||||
|
HTMLDefinition object.
|
||||||
|
|
||||||
== General Use-Case ==
|
== General Use-Case ==
|
||||||
|
|
||||||
@@ -68,7 +91,7 @@ like this:
|
|||||||
<?php
|
<?php
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
$def =& $config->getHTMLDefinition(true); // reference to raw
|
$def =& $config->getHTMLDefinition(true); // reference to raw
|
||||||
$def->addElement('marquee', 'Block', 'Flow', 'Common');
|
unset($def->modules['Hypertext']); // rm ''a'' link
|
||||||
$purifier = new HTMLPurifier($config);
|
$purifier = new HTMLPurifier($config);
|
||||||
$purifier->purify($html); // now the definition is finalized
|
$purifier->purify($html); // now the definition is finalized
|
||||||
?>
|
?>
|
||||||
@@ -162,5 +185,3 @@ array of content set names to content set contents. If the content set
|
|||||||
already exists, your values are appended on to it (great for, say,
|
already exists, your values are appended on to it (great for, say,
|
||||||
registering the font tag as an inline element), otherwise it is
|
registering the font tag as an inline element), otherwise it is
|
||||||
created. They are substituted into content_model.
|
created. They are substituted into content_model.
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
|
@@ -1,10 +0,0 @@
|
|||||||
Licensing of Specimens
|
|
||||||
|
|
||||||
Some files in this directory have different licenses:
|
|
||||||
|
|
||||||
windows-live-mail-desktop-beta.html - donated by laacz, public domain
|
|
||||||
img.png - LGPL, from <http://commons.wikimedia.org/wiki/Image:Pastille_chrome.png>
|
|
||||||
|
|
||||||
All other files are by me, and are licensed under LGPL.
|
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
|
@@ -1,165 +0,0 @@
|
|||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
|
|
||||||
"http://www.w3.org/TR/html4/loose.dtd">
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<title>HTML align attribute to CSS - HTML Purifier Specimen</title>
|
|
||||||
<style type="text/css">
|
|
||||||
div.container {position:relative;height:110px;}
|
|
||||||
div.container.legend .test {text-align:center;line-height:100px;}
|
|
||||||
div.test {width:100px;height:100px;border:1px solid black;
|
|
||||||
position:absolute;top:10px;}
|
|
||||||
div.test.html {left:10px;}
|
|
||||||
div.test.css {left:140px;}
|
|
||||||
table {background:#F00;}
|
|
||||||
img {border:1px solid #000;}
|
|
||||||
hr {width:50px;}
|
|
||||||
div.segment {width:250px; float:left; margin-top:1em;}
|
|
||||||
</style>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
|
|
||||||
<h1>HTML align attribute to CSS</h1>
|
|
||||||
|
|
||||||
<p>Inspect source for methodology.</p>
|
|
||||||
|
|
||||||
<div class="container legend">
|
|
||||||
<div class="test html">
|
|
||||||
HTML
|
|
||||||
</div>
|
|
||||||
<div class="test css">
|
|
||||||
CSS
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="segment">
|
|
||||||
|
|
||||||
<h2>table.align</h2>
|
|
||||||
|
|
||||||
<h3>left</h3>
|
|
||||||
<div class="container">
|
|
||||||
<div class="test html">
|
|
||||||
a<table align="left"><tr><td>O</td></tr></table>a
|
|
||||||
</div>
|
|
||||||
<div class="test css">
|
|
||||||
a<table style="float:left;"><tr><td>O</td></tr></table>a
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<h3>center</h3>
|
|
||||||
<div class="container">
|
|
||||||
<div class="test html">
|
|
||||||
a<table align="center"><tr><td>O</td></tr></table>a
|
|
||||||
</div>
|
|
||||||
<div class="test css">
|
|
||||||
a<table style="margin-left:auto; margin-right:auto;"><tr><td>O</td></tr></table>a
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<h3>right</h3>
|
|
||||||
<div class="container">
|
|
||||||
<div class="test html">
|
|
||||||
a<table align="right"><tr><td>O</td></tr></table>a
|
|
||||||
</div>
|
|
||||||
<div class="test css">
|
|
||||||
a<table style="float:right;"><tr><td>O</td></tr></table>a
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<!-- ################################################################## -->
|
|
||||||
|
|
||||||
<div class="segment">
|
|
||||||
<h2>img.align</h2>
|
|
||||||
<h3>left</h3>
|
|
||||||
<div class="container">
|
|
||||||
<div class="test html">
|
|
||||||
a<img src="img.png" align="left">a
|
|
||||||
</div>
|
|
||||||
<div class="test css">
|
|
||||||
a<img src="img.png" style="float:left;">a
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<h3>right</h3>
|
|
||||||
<div class="container">
|
|
||||||
<div class="test html">
|
|
||||||
a<img src="img.png" align="right">a
|
|
||||||
</div>
|
|
||||||
<div class="test css">
|
|
||||||
a<img src="img.png" style="float:right;">a
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<h3>bottom</h3>
|
|
||||||
<div class="container">
|
|
||||||
<div class="test html">
|
|
||||||
a<img src="img.png" align="bottom">a
|
|
||||||
</div>
|
|
||||||
<div class="test css">
|
|
||||||
a<img src="img.png" style="vertical-align:baseline;">a
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<h3>middle</h3>
|
|
||||||
<div class="container">
|
|
||||||
<div class="test html">
|
|
||||||
a<img src="img.png" align="middle">a
|
|
||||||
</div>
|
|
||||||
<div class="test css">
|
|
||||||
a<img src="img.png" style="vertical-align:middle;">a
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<h3>top</h3>
|
|
||||||
<div class="container">
|
|
||||||
<div class="test html">
|
|
||||||
a<img src="img.png" align="top">a
|
|
||||||
</div>
|
|
||||||
<div class="test css">
|
|
||||||
a<img src="img.png" style="vertical-align:top;">a
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<!-- ################################################################## -->
|
|
||||||
|
|
||||||
<div class="segment">
|
|
||||||
|
|
||||||
<h2>hr.align</h2>
|
|
||||||
|
|
||||||
<h3>left</h3>
|
|
||||||
<div class="container">
|
|
||||||
<div class="test html">
|
|
||||||
<hr align="left" />
|
|
||||||
</div>
|
|
||||||
<div class="test css">
|
|
||||||
<hr style="margin-right:auto; margin-left:0; text-align:left;" />
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<h3>center</h3>
|
|
||||||
<div class="container">
|
|
||||||
<div class="test html">
|
|
||||||
<hr align="center" />
|
|
||||||
</div>
|
|
||||||
<div class="test css">
|
|
||||||
<hr style="margin-right:auto; margin-left:auto; text-align:center;" />
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<h3>right</h3>
|
|
||||||
<div class="container">
|
|
||||||
<div class="test html">
|
|
||||||
<hr align="right" />
|
|
||||||
</div>
|
|
||||||
<div class="test css">
|
|
||||||
<hr style="margin-right:0; margin-left:auto; text-align:right;" />
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
</html>
|
|
Binary file not shown.
Before Width: | Height: | Size: 2.1 KiB |
@@ -1,129 +0,0 @@
|
|||||||
<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40">
|
|
||||||
|
|
||||||
<head>
|
|
||||||
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=us-ascii">
|
|
||||||
<meta name=Generator content="Microsoft Word 12 (filtered medium)">
|
|
||||||
<!--[if !mso]>
|
|
||||||
<style>
|
|
||||||
v\:* {behavior:url(#default#VML);}
|
|
||||||
o\:* {behavior:url(#default#VML);}
|
|
||||||
w\:* {behavior:url(#default#VML);}
|
|
||||||
..shape {behavior:url(#default#VML);}
|
|
||||||
</style>
|
|
||||||
<![endif]-->
|
|
||||||
<style>
|
|
||||||
<!--
|
|
||||||
/* Font Definitions */
|
|
||||||
@font-face
|
|
||||||
{font-family:"Cambria Math";
|
|
||||||
panose-1:2 4 5 3 5 4 6 3 2 4;}
|
|
||||||
@font-face
|
|
||||||
{font-family:Calibri;
|
|
||||||
panose-1:2 15 5 2 2 2 4 3 2 4;}
|
|
||||||
@font-face
|
|
||||||
{font-family:Tahoma;
|
|
||||||
panose-1:2 11 6 4 3 5 4 4 2 4;}
|
|
||||||
@font-face
|
|
||||||
{font-family:Verdana;
|
|
||||||
panose-1:2 11 6 4 3 5 4 4 2 4;}
|
|
||||||
/* Style Definitions */
|
|
||||||
p.MsoNormal, li.MsoNormal, div.MsoNormal
|
|
||||||
{margin:0cm;
|
|
||||||
margin-bottom:.0001pt;
|
|
||||||
font-size:10.0pt;
|
|
||||||
font-family:"Verdana","sans-serif";}
|
|
||||||
a:link, span.MsoHyperlink
|
|
||||||
{mso-style-priority:99;
|
|
||||||
color:blue;
|
|
||||||
text-decoration:underline;}
|
|
||||||
a:visited, span.MsoHyperlinkFollowed
|
|
||||||
{mso-style-priority:99;
|
|
||||||
color:purple;
|
|
||||||
text-decoration:underline;}
|
|
||||||
p.MsoAcetate, li.MsoAcetate, div.MsoAcetate
|
|
||||||
{mso-style-priority:99;
|
|
||||||
mso-style-link:"Balloon Text Char";
|
|
||||||
margin:0cm;
|
|
||||||
margin-bottom:.0001pt;
|
|
||||||
font-size:8.0pt;
|
|
||||||
font-family:"Tahoma","sans-serif";}
|
|
||||||
span.EmailStyle17
|
|
||||||
{mso-style-type:personal-compose;
|
|
||||||
font-family:"Verdana","sans-serif";
|
|
||||||
color:windowtext;}
|
|
||||||
span.BalloonTextChar
|
|
||||||
{mso-style-name:"Balloon Text Char";
|
|
||||||
mso-style-priority:99;
|
|
||||||
mso-style-link:"Balloon Text";
|
|
||||||
font-family:"Tahoma","sans-serif";}
|
|
||||||
..MsoChpDefault
|
|
||||||
{mso-style-type:export-only;}
|
|
||||||
@page Section1
|
|
||||||
{size:612.0pt 792.0pt;
|
|
||||||
margin:70.85pt 70.85pt 70.85pt 70.85pt;}
|
|
||||||
div.Section1
|
|
||||||
{page:Section1;}
|
|
||||||
-->
|
|
||||||
</style>
|
|
||||||
<!--[if gte mso 9]><xml>
|
|
||||||
<o:shapedefaults v:ext="edit" spidmax="2050" />
|
|
||||||
</xml><![endif]--><!--[if gte mso 9]><xml>
|
|
||||||
<o:shapelayout v:ext="edit">
|
|
||||||
<o:idmap v:ext="edit" data="1" />
|
|
||||||
</o:shapelayout></xml><![endif]-->
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<body lang=NL link=blue vlink=purple>
|
|
||||||
|
|
||||||
<div class=Section1>
|
|
||||||
|
|
||||||
<p class=MsoNormal><img width=1277 height=994 id="Picture_x0020_1"
|
|
||||||
src="cid:image001.png@01C8CBDF.5D1BAEE0"><o:p></o:p></p>
|
|
||||||
|
|
||||||
<p class=MsoNormal><o:p> </o:p></p>
|
|
||||||
|
|
||||||
<p class=MsoNormal><b>Name<o:p></o:p></b></p>
|
|
||||||
|
|
||||||
<p class=MsoNormal>E-mail : <a href="mailto:mail@example.com"><span
|
|
||||||
style='color:windowtext'>mail@example.com</span></a><o:p></o:p></p>
|
|
||||||
|
|
||||||
<p class=MsoNormal><o:p> </o:p></p>
|
|
||||||
|
|
||||||
<p class=MsoNormal><b>Company<o:p></o:p></b></p>
|
|
||||||
|
|
||||||
<p class=MsoNormal>Address 1<o:p></o:p></p>
|
|
||||||
|
|
||||||
<p class=MsoNormal>Address 2<o:p></o:p></p>
|
|
||||||
|
|
||||||
<p class=MsoNormal><o:p> </o:p></p>
|
|
||||||
|
|
||||||
<p class=MsoNormal>Telefoon : +xx xx xxx xxx xx <span style='color:black'><o:p></o:p></span></p>
|
|
||||||
|
|
||||||
<p class=MsoNormal><span lang=EN-US style='color:black'>Fax : +xx xx xxx xx xx<o:p></o:p></span></p>
|
|
||||||
|
|
||||||
<p class=MsoNormal><span lang=EN-US style='color:black'>Internet : </span><span
|
|
||||||
style='color:black'><a href="http://www.example.com/"><span lang=EN-US
|
|
||||||
style='color:black'>http://www.example.com</span></a></span><span
|
|
||||||
lang=EN-US style='color:black'><o:p></o:p></span></p>
|
|
||||||
|
|
||||||
<p class=MsoNormal><span lang=EN-US style='color:black'>Kamer van koophandel
|
|
||||||
xxxxxxxxx<o:p></o:p></span></p>
|
|
||||||
|
|
||||||
<p class=MsoNormal><span lang=EN-US style='color:black'><o:p> </o:p></span></p>
|
|
||||||
|
|
||||||
<p class=MsoNormal><span lang=EN-US style='font-size:7.5pt;color:black'>Op deze
|
|
||||||
e-mail is een disclaimer van toepassing, ga naar </span><span lang=EN-US
|
|
||||||
style='font-size:7.5pt'><a
|
|
||||||
href="http://www.example.com/disclaimer"><span
|
|
||||||
style='color:black'>www.example.com/disclaimer</span></a><br>
|
|
||||||
<span style='color:black'>A disclaimer is applicable to this email, please
|
|
||||||
refer to </span><a href="http://www.example.com/disclaimer"><span
|
|
||||||
style='color:black'>www.example.com/disclaimer</span></a><o:p></o:p></span></p>
|
|
||||||
|
|
||||||
<p class=MsoNormal><span lang=EN-US><o:p> </o:p></span></p>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
|
|
||||||
</body>
|
|
||||||
|
|
||||||
</html>
|
|
@@ -1,74 +0,0 @@
|
|||||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
|
||||||
<HTML ChildAreas="4" xmlns:canvas><HEAD>
|
|
||||||
<META http-equiv=Content-Type content=text/html;charset=windows-1257>
|
|
||||||
<STYLE></STYLE>
|
|
||||||
|
|
||||||
<META content="MSHTML 6.00.6000.16414" name=GENERATOR></HEAD>
|
|
||||||
<BODY id=MailContainerBody
|
|
||||||
style="PADDING-RIGHT: 10px; PADDING-LEFT: 10px; FONT-SIZE: 10pt; COLOR: #000000; PADDING-TOP: 15px; FONT-FAMILY: Arial"
|
|
||||||
bgColor=#ff6600 leftMargin=0 background="" topMargin=0
|
|
||||||
name="Compose message area" acc_role="text" CanvasTabStop="false">
|
|
||||||
<DIV
|
|
||||||
style="BORDER-TOP: #dddddd 1px solid; FONT-SIZE: 10pt; WIDTH: 100%; MARGIN-RIGHT: 10px; PADDING-TOP: 5px; BORDER-BOTTOM: #dddddd 1px solid; FONT-FAMILY: Verdana; HEIGHT: 25px; BACKGROUND-COLOR: #ffffff"><NOBR><SPAN
|
|
||||||
title="View a slideshow of the pictures in this e-mail message."
|
|
||||||
style="PADDING-RIGHT: 20px"><A style="COLOR: #0088e4"
|
|
||||||
href="http://g.msn.com/5meen_us/171?path=/photomail/{6fc0065f-ffdd-4ca6-9a4c-cc5a93dc122f}&image=47D7B182CFEFB10!127&imagehi=47D7B182CFEFB10!125&CID=323550092004883216">Play
|
|
||||||
slideshow </A></SPAN><SPAN style="COLOR: #909090"><SPAN>|</SPAN><SPAN
|
|
||||||
style="PADDING-LEFT: 20px"> Download the highest quality version of a picture by
|
|
||||||
clicking the + above it </SPAN></SPAN></NOBR></DIV>
|
|
||||||
<DIV
|
|
||||||
style="PADDING-RIGHT: 5px; PADDING-LEFT: 7px; PADDING-BOTTOM: 2px; WIDTH: 100%; PADDING-TOP: 2px">
|
|
||||||
<OL>
|
|
||||||
<LI><IMG title="Angry smile emoticon"
|
|
||||||
style="FLOAT: none; MARGIN: 0px; POSITION: static" tabIndex=-1
|
|
||||||
alt="Angry smile emoticon" src="cid:49F0C856199E4D688D2D740680733D74@wc"
|
|
||||||
MSNNonUserImageOrEmoticon="true">Un ka <FONT style="BACKGROUND-COLOR: #800000"
|
|
||||||
color=#cc99ff><STRONG>Tev</STRONG></FONT> iet, un ko tu dari?
|
|
||||||
<LI>Aha!</LI></OL>
|
|
||||||
|
|
||||||
<UL>
|
|
||||||
<LI>Buletets
|
|
||||||
<LI>
|
|
||||||
<DIV align=justify><A title=http://laacz.lv/blog/
|
|
||||||
href="http://laacz.lv/blog/">http://laacz.lv/blog/</A> un <A
|
|
||||||
title=http://google.com/ href="http://google.com/">gugle</A></DIV>
|
|
||||||
<LI>Sarakstucitis</LI></UL></DIV><SPAN><SPAN xmlns:canvas="canvas-namespace-id"
|
|
||||||
layoutEmptyTextWellFont="Tahoma"><SPAN
|
|
||||||
style="MARGIN-BOTTOM: 15px; OVERFLOW: visible; HEIGHT: 16px"></SPAN><SPAN
|
|
||||||
style="MARGIN-BOTTOM: 25px; VERTICAL-ALIGN: top; OVERFLOW: visible; MARGIN-RIGHT: 25px; HEIGHT: 234px">
|
|
||||||
<TABLE style="DISPLAY: inline">
|
|
||||||
<TBODY>
|
|
||||||
<TR>
|
|
||||||
|
|
||||||
<TD>
|
|
||||||
<DIV
|
|
||||||
style="FONT-WEIGHT: bold; FONT-SIZE: 12pt; FONT-FAMILY: arial; TEXT-ALIGN: center"><A
|
|
||||||
id=HiresARef
|
|
||||||
title="Click here to view or download a high resolution version of this picture"
|
|
||||||
style="COLOR: #0088e4; TEXT-DECORATION: none"
|
|
||||||
href="http://byfiles.storage.msn.com/x1pMvt0I80jTgT6DuaCpEMbprX3nk3jNv_vjigxV_EYVSMyM_PKgEvDEUtuNhQC-F-23mTTcKyqx6eGaeK2e_wMJ0ikwpDdFntk4SY7pfJUv2g2Ck6R2S2vAA?download">+</A></DIV>
|
|
||||||
<DIV
|
|
||||||
title="Click here to view the full image using the online photo viewer."
|
|
||||||
style="DISPLAY: inline; OVERFLOW: hidden; WIDTH: 140px; HEIGHT: 140px"><A
|
|
||||||
href="http://g.msn.com/5meen_us/171?path=/photomail/{6fc0065f-ffdd-4ca6-9a4c-cc5a93dc122f}&image=47D7B182CFEFB10!127&imagehi=47D7B182CFEFB10!125&CID=323550092004883216"
|
|
||||||
border="0"><IMG
|
|
||||||
style="MARGIN-TOP: 15px; DISPLAY: inline-block; MARGIN-LEFT: 0px"
|
|
||||||
height=109 src="cid:006A71303B80404E9FB6184E55D6A446@wc" width=140
|
|
||||||
border=0></A></DIV></TD></TR>
|
|
||||||
<TR>
|
|
||||||
<TD>
|
|
||||||
<DIV
|
|
||||||
style="FONT-SIZE: 10pt; WIDTH: 140px; FONT-FAMILY: verdana; TEXT-ALIGN: center"><EM><STRONG>This
|
|
||||||
<U>is </U></STRONG><U>tit</U>le</EM> fo<STRONG>r <FONT
|
|
||||||
face="Arial Black">t<FONT color=#800000 size=7>h<U>i</U></FONT>s
|
|
||||||
</FONT>picture</STRONG></DIV></TD></TR></TBODY></TABLE></SPAN></SPAN></SPAN>
|
|
||||||
|
|
||||||
<DIV
|
|
||||||
style="PADDING-RIGHT: 5px; PADDING-LEFT: 7px; PADDING-BOTTOM: 2px; WIDTH: 100%; PADDING-TOP: 2px; HEIGHT: 50px">
|
|
||||||
<DIV> </DIV></DIV>
|
|
||||||
<DIV
|
|
||||||
style="BORDER-TOP: #dddddd 1px solid; FONT-SIZE: 10pt; MARGIN-BOTTOM: 10px; WIDTH: 100%; COLOR: #909090; MARGIN-RIGHT: 10px; PADDING-TOP: 9px; FONT-FAMILY: Verdana; HEIGHT: 42px; BACKGROUND-COLOR: #ffffff"><NOBR><SPAN
|
|
||||||
title="Join Windows Live to share photos using Windows Live Photo E-mail.">Online
|
|
||||||
pictures are available for 30 days. <A style="COLOR: #0088e4"
|
|
||||||
href="http://g.msn.com/5meen_us/175">Get Windows Live Mail desktop to create
|
|
||||||
your own photo e-mails. </A></SPAN></NOBR></DIV></BODY></HTML>
|
|
@@ -25,7 +25,6 @@ h4 {font-family:sans-serif; font-size:0.9em; font-weight:bold; }
|
|||||||
.aside {margin-left:2em; font-family:sans-serif; font-size:0.9em; }
|
.aside {margin-left:2em; font-family:sans-serif; font-size:0.9em; }
|
||||||
blockquote .label {font-weight:bold; font-size:1em; margin:0 0 .1em;
|
blockquote .label {font-weight:bold; font-size:1em; margin:0 0 .1em;
|
||||||
border-bottom:1px solid #CCC;}
|
border-bottom:1px solid #CCC;}
|
||||||
.emphasis {font-weight:bold; text-align:center; font-size:1.3em;}
|
|
||||||
|
|
||||||
/* A regular table */
|
/* A regular table */
|
||||||
.table {border-collapse:collapse; border-bottom:2px solid #888; margin-left:2em; }
|
.table {border-collapse:collapse; border-bottom:2px solid #888; margin-left:2em; }
|
||||||
@@ -33,9 +32,6 @@ blockquote .label {font-weight:bold; font-size:1em; margin:0 0 .1em;
|
|||||||
.table thead th:first-child {-moz-border-radius-topleft:1em;}
|
.table thead th:first-child {-moz-border-radius-topleft:1em;}
|
||||||
.table tbody td {border-bottom:1px solid #CCC; padding-right:0.6em;padding-left:0.6em;}
|
.table tbody td {border-bottom:1px solid #CCC; padding-right:0.6em;padding-left:0.6em;}
|
||||||
|
|
||||||
/* A quick table*/
|
|
||||||
table.quick tbody th {text-align:right; padding-right:1em;}
|
|
||||||
|
|
||||||
/* Category of the file */
|
/* Category of the file */
|
||||||
#filing {font-weight:bold; font-size:smaller; }
|
#filing {font-weight:bold; font-size:smaller; }
|
||||||
|
|
||||||
@@ -70,7 +66,3 @@ q:after {
|
|||||||
/* Marks off sections that are lacking. */
|
/* Marks off sections that are lacking. */
|
||||||
.fixme {margin-left:2em; }
|
.fixme {margin-left:2em; }
|
||||||
.fixme:before {content:"Fix me: "; font-weight:bold; color:#C00; }
|
.fixme:before {content:"Fix me: "; font-weight:bold; color:#C00; }
|
||||||
|
|
||||||
#applicability {margin: 1em 5%; font-style:italic;}
|
|
||||||
|
|
||||||
/* vim: et sw=4 sts=4 */
|
|
||||||
|
@@ -1,91 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Decorator/extender XSLT processor specifically for HTML documents.
|
|
||||||
*/
|
|
||||||
class ConfigDoc_HTMLXSLTProcessor
|
|
||||||
{
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Instance of XSLTProcessor
|
|
||||||
*/
|
|
||||||
protected $xsltProcessor;
|
|
||||||
|
|
||||||
public function __construct($proc = false)
|
|
||||||
{
|
|
||||||
if ($proc === false) $proc = new XSLTProcessor();
|
|
||||||
$this->xsltProcessor = $proc;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @note Allows a string $xsl filename to be passed
|
|
||||||
*/
|
|
||||||
public function importStylesheet($xsl)
|
|
||||||
{
|
|
||||||
if (is_string($xsl)) {
|
|
||||||
$xsl_file = $xsl;
|
|
||||||
$xsl = new DOMDocument();
|
|
||||||
$xsl->load($xsl_file);
|
|
||||||
}
|
|
||||||
return $this->xsltProcessor->importStylesheet($xsl);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Transforms an XML file into compatible XHTML based on the stylesheet
|
|
||||||
* @param $xml XML DOM tree, or string filename
|
|
||||||
* @return string HTML output
|
|
||||||
* @todo Rename to transformToXHTML, as transformToHTML is misleading
|
|
||||||
*/
|
|
||||||
public function transformToHTML($xml)
|
|
||||||
{
|
|
||||||
if (is_string($xml)) {
|
|
||||||
$dom = new DOMDocument();
|
|
||||||
$dom->load($xml);
|
|
||||||
} else {
|
|
||||||
$dom = $xml;
|
|
||||||
}
|
|
||||||
$out = $this->xsltProcessor->transformToXML($dom);
|
|
||||||
|
|
||||||
// fudges for HTML backwards compatibility
|
|
||||||
// assumes that document is XHTML
|
|
||||||
$out = str_replace('/>', ' />', $out); // <br /> not <br/>
|
|
||||||
$out = str_replace(' xmlns=""', '', $out); // rm unnecessary xmlns
|
|
||||||
|
|
||||||
if (class_exists('Tidy')) {
|
|
||||||
// cleanup output
|
|
||||||
$config = array(
|
|
||||||
'indent' => true,
|
|
||||||
'output-xhtml' => true,
|
|
||||||
'wrap' => 80
|
|
||||||
);
|
|
||||||
$tidy = new Tidy;
|
|
||||||
$tidy->parseString($out, $config, 'utf8');
|
|
||||||
$tidy->cleanRepair();
|
|
||||||
$out = (string) $tidy;
|
|
||||||
}
|
|
||||||
|
|
||||||
return $out;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Bulk sets parameters for the XSL stylesheet
|
|
||||||
* @param array $options Associative array of options to set
|
|
||||||
*/
|
|
||||||
public function setParameters($options)
|
|
||||||
{
|
|
||||||
foreach ($options as $name => $value) {
|
|
||||||
$this->xsltProcessor->setParameter('', $name, $value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Forward any other calls to the XSLT processor
|
|
||||||
*/
|
|
||||||
public function __call($name, $arguments)
|
|
||||||
{
|
|
||||||
call_user_func_array(array($this->xsltProcessor, $name), $arguments);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
|
@@ -1,164 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Filesystem tools not provided by default; can recursively create, copy
|
|
||||||
* and delete folders. Some template methods are provided for extensibility.
|
|
||||||
*
|
|
||||||
* @note This class must be instantiated to be used, although it does
|
|
||||||
* not maintain state.
|
|
||||||
*/
|
|
||||||
class FSTools
|
|
||||||
{
|
|
||||||
|
|
||||||
private static $singleton;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a global instance of FSTools
|
|
||||||
*/
|
|
||||||
public static function singleton()
|
|
||||||
{
|
|
||||||
if (empty(FSTools::$singleton)) FSTools::$singleton = new FSTools();
|
|
||||||
return FSTools::$singleton;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets our global singleton to something else; useful for overloading
|
|
||||||
* functions.
|
|
||||||
*/
|
|
||||||
public static function setSingleton($singleton)
|
|
||||||
{
|
|
||||||
FSTools::$singleton = $singleton;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Recursively creates a directory
|
|
||||||
* @param string $folder Name of folder to create
|
|
||||||
* @note Adapted from the PHP manual comment 76612
|
|
||||||
*/
|
|
||||||
public function mkdirr($folder)
|
|
||||||
{
|
|
||||||
$folders = preg_split("#[\\\\/]#", $folder);
|
|
||||||
$base = '';
|
|
||||||
for($i = 0, $c = count($folders); $i < $c; $i++) {
|
|
||||||
if(empty($folders[$i])) {
|
|
||||||
if (!$i) {
|
|
||||||
// special case for root level
|
|
||||||
$base .= DIRECTORY_SEPARATOR;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
$base .= $folders[$i];
|
|
||||||
if(!is_dir($base)){
|
|
||||||
$this->mkdir($base);
|
|
||||||
}
|
|
||||||
$base .= DIRECTORY_SEPARATOR;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Copy a file, or recursively copy a folder and its contents; modified
|
|
||||||
* so that copied files, if PHP, have includes removed
|
|
||||||
* @note Adapted from http://aidanlister.com/repos/v/function.copyr.php
|
|
||||||
*/
|
|
||||||
public function copyr($source, $dest)
|
|
||||||
{
|
|
||||||
// Simple copy for a file
|
|
||||||
if (is_file($source)) {
|
|
||||||
return $this->copy($source, $dest);
|
|
||||||
}
|
|
||||||
// Make destination directory
|
|
||||||
if (!is_dir($dest)) {
|
|
||||||
$this->mkdir($dest);
|
|
||||||
}
|
|
||||||
// Loop through the folder
|
|
||||||
$dir = $this->dir($source);
|
|
||||||
while ( false !== ($entry = $dir->read()) ) {
|
|
||||||
// Skip pointers
|
|
||||||
if ($entry == '.' || $entry == '..') {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (!$this->copyable($entry)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Deep copy directories
|
|
||||||
if ($dest !== "$source/$entry") {
|
|
||||||
$this->copyr("$source/$entry", "$dest/$entry");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Clean up
|
|
||||||
$dir->close();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Overloadable function that tests a filename for copyability. By
|
|
||||||
* default, everything should be copied; you can restrict things to
|
|
||||||
* ignore hidden files, unreadable files, etc. This function
|
|
||||||
* applies to copyr().
|
|
||||||
*/
|
|
||||||
public function copyable($file)
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Delete a file, or a folder and its contents
|
|
||||||
* @note Adapted from http://aidanlister.com/repos/v/function.rmdirr.php
|
|
||||||
*/
|
|
||||||
public function rmdirr($dirname)
|
|
||||||
{
|
|
||||||
// Sanity check
|
|
||||||
if (!$this->file_exists($dirname)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Simple delete for a file
|
|
||||||
if ($this->is_file($dirname) || $this->is_link($dirname)) {
|
|
||||||
return $this->unlink($dirname);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Loop through the folder
|
|
||||||
$dir = $this->dir($dirname);
|
|
||||||
while (false !== $entry = $dir->read()) {
|
|
||||||
// Skip pointers
|
|
||||||
if ($entry == '.' || $entry == '..') {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Recurse
|
|
||||||
$this->rmdirr($dirname . DIRECTORY_SEPARATOR . $entry);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clean up
|
|
||||||
$dir->close();
|
|
||||||
return $this->rmdir($dirname);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Recursively globs a directory.
|
|
||||||
*/
|
|
||||||
public function globr($dir, $pattern, $flags = 0)
|
|
||||||
{
|
|
||||||
$files = $this->glob("$dir/$pattern", $flags);
|
|
||||||
if ($files === false) $files = array();
|
|
||||||
$sub_dirs = $this->glob("$dir/*", GLOB_ONLYDIR);
|
|
||||||
if ($sub_dirs === false) $sub_dirs = array();
|
|
||||||
foreach ($sub_dirs as $sub_dir) {
|
|
||||||
$sub_files = $this->globr($sub_dir, $pattern, $flags);
|
|
||||||
$files = array_merge($files, $sub_files);
|
|
||||||
}
|
|
||||||
return $files;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Allows for PHP functions to be called and be stubbed.
|
|
||||||
* @warning This function will not work for functions that need
|
|
||||||
* to pass references; manually define a stub function for those.
|
|
||||||
*/
|
|
||||||
public function __call($name, $args)
|
|
||||||
{
|
|
||||||
return call_user_func_array($name, $args);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
|
@@ -1,141 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Represents a file in the filesystem
|
|
||||||
*
|
|
||||||
* @warning Be sure to distinguish between get() and write() versus
|
|
||||||
* read() and put(), the former operates on the entire file, while
|
|
||||||
* the latter operates on a handle.
|
|
||||||
*/
|
|
||||||
class FSTools_File
|
|
||||||
{
|
|
||||||
|
|
||||||
/** Filename of file this object represents */
|
|
||||||
protected $name;
|
|
||||||
|
|
||||||
/** Handle for the file */
|
|
||||||
protected $handle = false;
|
|
||||||
|
|
||||||
/** Instance of FSTools for interfacing with filesystem */
|
|
||||||
protected $fs;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Filename of file you wish to instantiate.
|
|
||||||
* @note This file need not exist
|
|
||||||
*/
|
|
||||||
public function __construct($name, $fs = false)
|
|
||||||
{
|
|
||||||
$this->name = $name;
|
|
||||||
$this->fs = $fs ? $fs : FSTools::singleton();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns the filename of the file. */
|
|
||||||
public function getName() {return $this->name;}
|
|
||||||
|
|
||||||
/** Returns directory of the file without trailing slash */
|
|
||||||
public function getDirectory() {return $this->fs->dirname($this->name);}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Retrieves the contents of a file
|
|
||||||
* @todo Throw an exception if file doesn't exist
|
|
||||||
*/
|
|
||||||
public function get()
|
|
||||||
{
|
|
||||||
return $this->fs->file_get_contents($this->name);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Writes contents to a file, creates new file if necessary */
|
|
||||||
public function write($contents)
|
|
||||||
{
|
|
||||||
return $this->fs->file_put_contents($this->name, $contents);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Deletes the file */
|
|
||||||
public function delete()
|
|
||||||
{
|
|
||||||
return $this->fs->unlink($this->name);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns true if file exists and is a file. */
|
|
||||||
public function exists()
|
|
||||||
{
|
|
||||||
return $this->fs->is_file($this->name);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns last file modification time */
|
|
||||||
public function getMTime()
|
|
||||||
{
|
|
||||||
return $this->fs->filemtime($this->name);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Chmod a file
|
|
||||||
* @note We ignore errors because of some weird owner trickery due
|
|
||||||
* to SVN duality
|
|
||||||
*/
|
|
||||||
public function chmod($octal_code)
|
|
||||||
{
|
|
||||||
return @$this->fs->chmod($this->name, $octal_code);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Opens file's handle */
|
|
||||||
public function open($mode)
|
|
||||||
{
|
|
||||||
if ($this->handle) $this->close();
|
|
||||||
$this->handle = $this->fs->fopen($this->name, $mode);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Closes file's handle */
|
|
||||||
public function close()
|
|
||||||
{
|
|
||||||
if (!$this->handle) return false;
|
|
||||||
$status = $this->fs->fclose($this->handle);
|
|
||||||
$this->handle = false;
|
|
||||||
return $status;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Retrieves a line from an open file, with optional max length $length */
|
|
||||||
public function getLine($length = null)
|
|
||||||
{
|
|
||||||
if (!$this->handle) $this->open('r');
|
|
||||||
if ($length === null) return $this->fs->fgets($this->handle);
|
|
||||||
else return $this->fs->fgets($this->handle, $length);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Retrieves a character from an open file */
|
|
||||||
public function getChar()
|
|
||||||
{
|
|
||||||
if (!$this->handle) $this->open('r');
|
|
||||||
return $this->fs->fgetc($this->handle);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Retrieves an $length bytes of data from an open data */
|
|
||||||
public function read($length)
|
|
||||||
{
|
|
||||||
if (!$this->handle) $this->open('r');
|
|
||||||
return $this->fs->fread($this->handle, $length);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Writes to an open file */
|
|
||||||
public function put($string)
|
|
||||||
{
|
|
||||||
if (!$this->handle) $this->open('a');
|
|
||||||
return $this->fs->fwrite($this->handle, $string);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns TRUE if the end of the file has been reached */
|
|
||||||
public function eof()
|
|
||||||
{
|
|
||||||
if (!$this->handle) return true;
|
|
||||||
return $this->fs->feof($this->handle);
|
|
||||||
}
|
|
||||||
|
|
||||||
public function __destruct()
|
|
||||||
{
|
|
||||||
if ($this->handle) $this->close();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
|
@@ -1,11 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This is a stub include that automatically configures the include path.
|
|
||||||
*/
|
|
||||||
|
|
||||||
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
|
|
||||||
require_once 'HTMLPurifierExtras.php';
|
|
||||||
require_once 'HTMLPurifierExtras.autoload.php';
|
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
|
@@ -1,15 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @file
|
|
||||||
* Legacy autoloader for systems lacking spl_autoload_register
|
|
||||||
*
|
|
||||||
* Must be separate to prevent deprecation warning on PHP 7.2
|
|
||||||
*/
|
|
||||||
|
|
||||||
function __autoload($class)
|
|
||||||
{
|
|
||||||
return HTMLPurifierExtras::autoload($class);
|
|
||||||
}
|
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
|
@@ -1,23 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @file
|
|
||||||
* Convenience file that registers autoload handler for HTML Purifier.
|
|
||||||
*
|
|
||||||
* @warning
|
|
||||||
* This autoloader does not contain the compatibility code seen in
|
|
||||||
* HTMLPurifier_Bootstrap; the user is expected to make any necessary
|
|
||||||
* changes to use this library.
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (function_exists('spl_autoload_register')) {
|
|
||||||
spl_autoload_register(array('HTMLPurifierExtras', 'autoload'));
|
|
||||||
if (function_exists('__autoload')) {
|
|
||||||
// Be polite and ensure that userland autoload gets retained
|
|
||||||
spl_autoload_register('__autoload');
|
|
||||||
}
|
|
||||||
} elseif (!function_exists('__autoload')) {
|
|
||||||
require dirname(__FILE__) . '/HTMLPurifierExtras.autoload-legacy.php';
|
|
||||||
}
|
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
|
@@ -1,31 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Meta-class for HTML Purifier's extra class hierarchies, similar to
|
|
||||||
* HTMLPurifier_Bootstrap.
|
|
||||||
*/
|
|
||||||
class HTMLPurifierExtras
|
|
||||||
{
|
|
||||||
|
|
||||||
public static function autoload($class)
|
|
||||||
{
|
|
||||||
$path = HTMLPurifierExtras::getPath($class);
|
|
||||||
if (!$path) return false;
|
|
||||||
require $path;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static function getPath($class)
|
|
||||||
{
|
|
||||||
if (
|
|
||||||
strncmp('FSTools', $class, 7) !== 0 &&
|
|
||||||
strncmp('ConfigDoc', $class, 9) !== 0
|
|
||||||
) return false;
|
|
||||||
// Custom implementations can go here
|
|
||||||
// Standard implementation:
|
|
||||||
return str_replace('_', '/', $class) . '.php';
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
|
@@ -1,32 +0,0 @@
|
|||||||
|
|
||||||
HTML Purifier Extras
|
|
||||||
The Method Behind The Madness!
|
|
||||||
|
|
||||||
The extras/ folder in HTML Purifier contains--you guessed it--extra things
|
|
||||||
for HTML Purifier. Specifically, these are two extra libraries called
|
|
||||||
FSTools and ConfigSchema. They're extra for a reason: you don't need them
|
|
||||||
if you're using HTML Purifier for normal usage: filtering HTML. However,
|
|
||||||
if you're a developer, and would like to test HTML Purifier, or need to
|
|
||||||
use one of HTML Purifier's maintenance scripts, chances are they'll need
|
|
||||||
these libraries. Who knows: maybe you'll find them useful too!
|
|
||||||
|
|
||||||
Here are the libraries:
|
|
||||||
|
|
||||||
|
|
||||||
FSTools
|
|
||||||
-------
|
|
||||||
|
|
||||||
Short for File System Tools, this is a poor-man's object-oriented wrapper for
|
|
||||||
the filesystem. It currently consists of two classes:
|
|
||||||
|
|
||||||
- FSTools: This is a singleton that contains a manner of useful functions
|
|
||||||
such as recursive glob, directory removal, etc, as well as the ability
|
|
||||||
to call arbitrary native PHP functions through it like $FS->fopen(...).
|
|
||||||
This makes it a lot simpler to mock these filesystem calls for unit testing.
|
|
||||||
|
|
||||||
- FSTools_File: This object represents a single file, and has almost any
|
|
||||||
method imaginable one would need.
|
|
||||||
|
|
||||||
Check the files themselves for more information.
|
|
||||||
|
|
||||||
vim: et sw=4 sts=4
|
|
@@ -5,7 +5,6 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
|
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
|
||||||
require_once 'HTMLPurifier/Bootstrap.php';
|
require_once 'HTMLPurifier.php';
|
||||||
require_once 'HTMLPurifier.autoload.php';
|
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
?>
|
@@ -1,14 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @file
|
|
||||||
* Legacy autoloader for systems lacking spl_autoload_register
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
spl_autoload_register(function($class)
|
|
||||||
{
|
|
||||||
return HTMLPurifier_Bootstrap::autoload($class);
|
|
||||||
});
|
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
|
@@ -1,25 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @file
|
|
||||||
* Convenience file that registers autoload handler for HTML Purifier.
|
|
||||||
* It also does some sanity checks.
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (function_exists('spl_autoload_register') && function_exists('spl_autoload_unregister')) {
|
|
||||||
// We need unregister for our pre-registering functionality
|
|
||||||
HTMLPurifier_Bootstrap::registerAutoload();
|
|
||||||
if (function_exists('__autoload')) {
|
|
||||||
// Be polite and ensure that userland autoload gets retained
|
|
||||||
spl_autoload_register('__autoload');
|
|
||||||
}
|
|
||||||
} elseif (!function_exists('__autoload')) {
|
|
||||||
require dirname(__FILE__) . '/HTMLPurifier.autoload-legacy.php';
|
|
||||||
}
|
|
||||||
|
|
||||||
// phpcs:ignore PHPCompatibility.IniDirectives.RemovedIniDirectives.zend_ze1_compatibility_modeRemoved
|
|
||||||
if (ini_get('zend.ze1_compatibility_mode')) {
|
|
||||||
trigger_error("HTML Purifier is not compatible with zend.ze1_compatibility_mode; please turn it off", E_USER_ERROR);
|
|
||||||
}
|
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
|
@@ -1,4 +0,0 @@
|
|||||||
<?php
|
|
||||||
if (!defined('HTMLPURIFIER_PREFIX')) {
|
|
||||||
define('HTMLPURIFIER_PREFIX', dirname(__FILE__));
|
|
||||||
}
|
|
@@ -1,25 +1,21 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @file
|
* Function wrapper for HTML Purifier for quick use.
|
||||||
* Defines a function wrapper for HTML Purifier for quick use.
|
* @note This function only includes the library when it is called. While
|
||||||
|
* this is efficient for instances when you only use HTML Purifier
|
||||||
|
* on a few of your pages, it murders bytecode caching. You still
|
||||||
|
* need to add HTML Purifier to your path.
|
||||||
* @note ''HTMLPurifier()'' is NOT the same as ''new HTMLPurifier()''
|
* @note ''HTMLPurifier()'' is NOT the same as ''new HTMLPurifier()''
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
function HTMLPurifier($html, $config = null) {
|
||||||
* Purify HTML.
|
|
||||||
* @param string $html String HTML to purify
|
|
||||||
* @param mixed $config Configuration to use, can be any value accepted by
|
|
||||||
* HTMLPurifier_Config::create()
|
|
||||||
* @return string
|
|
||||||
*/
|
|
||||||
function HTMLPurifier($html, $config = null)
|
|
||||||
{
|
|
||||||
static $purifier = false;
|
static $purifier = false;
|
||||||
if (!$purifier) {
|
if (!$purifier) {
|
||||||
|
require_once 'HTMLPurifier.php';
|
||||||
$purifier = new HTMLPurifier();
|
$purifier = new HTMLPurifier();
|
||||||
}
|
}
|
||||||
return $purifier->purify($html, $config);
|
return $purifier->purify($html, $config);
|
||||||
}
|
}
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
?>
|
@@ -1,236 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @file
|
|
||||||
* This file was auto-generated by generate-includes.php and includes all of
|
|
||||||
* the core files required by HTML Purifier. Use this if performance is a
|
|
||||||
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
|
|
||||||
* FILE, changes will be overwritten the next time the script is run.
|
|
||||||
*
|
|
||||||
* @version 4.18.0
|
|
||||||
*
|
|
||||||
* @warning
|
|
||||||
* You must *not* include any other HTML Purifier files before this file,
|
|
||||||
* because 'require' not 'require_once' is used.
|
|
||||||
*
|
|
||||||
* @warning
|
|
||||||
* This file requires that the include path contains the HTML Purifier
|
|
||||||
* library directory; this is not auto-set.
|
|
||||||
*/
|
|
||||||
|
|
||||||
require 'HTMLPurifier.php';
|
|
||||||
require 'HTMLPurifier/Arborize.php';
|
|
||||||
require 'HTMLPurifier/AttrCollections.php';
|
|
||||||
require 'HTMLPurifier/AttrDef.php';
|
|
||||||
require 'HTMLPurifier/AttrTransform.php';
|
|
||||||
require 'HTMLPurifier/AttrTypes.php';
|
|
||||||
require 'HTMLPurifier/AttrValidator.php';
|
|
||||||
require 'HTMLPurifier/Bootstrap.php';
|
|
||||||
require 'HTMLPurifier/Definition.php';
|
|
||||||
require 'HTMLPurifier/CSSDefinition.php';
|
|
||||||
require 'HTMLPurifier/ChildDef.php';
|
|
||||||
require 'HTMLPurifier/Config.php';
|
|
||||||
require 'HTMLPurifier/ConfigSchema.php';
|
|
||||||
require 'HTMLPurifier/ContentSets.php';
|
|
||||||
require 'HTMLPurifier/Context.php';
|
|
||||||
require 'HTMLPurifier/DefinitionCache.php';
|
|
||||||
require 'HTMLPurifier/DefinitionCacheFactory.php';
|
|
||||||
require 'HTMLPurifier/Doctype.php';
|
|
||||||
require 'HTMLPurifier/DoctypeRegistry.php';
|
|
||||||
require 'HTMLPurifier/ElementDef.php';
|
|
||||||
require 'HTMLPurifier/Encoder.php';
|
|
||||||
require 'HTMLPurifier/EntityLookup.php';
|
|
||||||
require 'HTMLPurifier/EntityParser.php';
|
|
||||||
require 'HTMLPurifier/ErrorCollector.php';
|
|
||||||
require 'HTMLPurifier/ErrorStruct.php';
|
|
||||||
require 'HTMLPurifier/Exception.php';
|
|
||||||
require 'HTMLPurifier/Filter.php';
|
|
||||||
require 'HTMLPurifier/Generator.php';
|
|
||||||
require 'HTMLPurifier/HTMLDefinition.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule.php';
|
|
||||||
require 'HTMLPurifier/HTMLModuleManager.php';
|
|
||||||
require 'HTMLPurifier/IDAccumulator.php';
|
|
||||||
require 'HTMLPurifier/Injector.php';
|
|
||||||
require 'HTMLPurifier/Language.php';
|
|
||||||
require 'HTMLPurifier/LanguageFactory.php';
|
|
||||||
require 'HTMLPurifier/Length.php';
|
|
||||||
require 'HTMLPurifier/Lexer.php';
|
|
||||||
require 'HTMLPurifier/Node.php';
|
|
||||||
require 'HTMLPurifier/PercentEncoder.php';
|
|
||||||
require 'HTMLPurifier/PropertyList.php';
|
|
||||||
require 'HTMLPurifier/PropertyListIterator.php';
|
|
||||||
require 'HTMLPurifier/Queue.php';
|
|
||||||
require 'HTMLPurifier/Strategy.php';
|
|
||||||
require 'HTMLPurifier/StringHash.php';
|
|
||||||
require 'HTMLPurifier/StringHashParser.php';
|
|
||||||
require 'HTMLPurifier/TagTransform.php';
|
|
||||||
require 'HTMLPurifier/Token.php';
|
|
||||||
require 'HTMLPurifier/TokenFactory.php';
|
|
||||||
require 'HTMLPurifier/URI.php';
|
|
||||||
require 'HTMLPurifier/URIDefinition.php';
|
|
||||||
require 'HTMLPurifier/URIFilter.php';
|
|
||||||
require 'HTMLPurifier/URIParser.php';
|
|
||||||
require 'HTMLPurifier/URIScheme.php';
|
|
||||||
require 'HTMLPurifier/URISchemeRegistry.php';
|
|
||||||
require 'HTMLPurifier/UnitConverter.php';
|
|
||||||
require 'HTMLPurifier/VarParser.php';
|
|
||||||
require 'HTMLPurifier/VarParserException.php';
|
|
||||||
require 'HTMLPurifier/Zipper.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/CSS.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/Clone.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/Enum.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/Integer.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/Lang.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/Switch.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/Text.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/URI.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/CSS/Number.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/CSS/AlphaValue.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/CSS/Background.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/CSS/Border.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/CSS/Color.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/CSS/Composite.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/CSS/Filter.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/CSS/Font.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/CSS/Ident.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/CSS/Length.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/CSS/ListStyle.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/CSS/Multiple.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/CSS/Percentage.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/CSS/Ratio.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/CSS/TextDecoration.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/CSS/URI.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/HTML/Bool.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/HTML/Nmtokens.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/HTML/Class.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/HTML/Color.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/HTML/ContentEditable.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/HTML/FrameTarget.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/HTML/ID.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/HTML/Pixels.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/HTML/Length.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/HTML/LinkTypes.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/HTML/MultiLength.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/URI/Email.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/URI/Host.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/URI/IPv4.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/URI/IPv6.php';
|
|
||||||
require 'HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
|
|
||||||
require 'HTMLPurifier/AttrTransform/Background.php';
|
|
||||||
require 'HTMLPurifier/AttrTransform/BdoDir.php';
|
|
||||||
require 'HTMLPurifier/AttrTransform/BgColor.php';
|
|
||||||
require 'HTMLPurifier/AttrTransform/BoolToCSS.php';
|
|
||||||
require 'HTMLPurifier/AttrTransform/Border.php';
|
|
||||||
require 'HTMLPurifier/AttrTransform/EnumToCSS.php';
|
|
||||||
require 'HTMLPurifier/AttrTransform/ImgRequired.php';
|
|
||||||
require 'HTMLPurifier/AttrTransform/ImgSpace.php';
|
|
||||||
require 'HTMLPurifier/AttrTransform/Input.php';
|
|
||||||
require 'HTMLPurifier/AttrTransform/Lang.php';
|
|
||||||
require 'HTMLPurifier/AttrTransform/Length.php';
|
|
||||||
require 'HTMLPurifier/AttrTransform/Name.php';
|
|
||||||
require 'HTMLPurifier/AttrTransform/NameSync.php';
|
|
||||||
require 'HTMLPurifier/AttrTransform/Nofollow.php';
|
|
||||||
require 'HTMLPurifier/AttrTransform/SafeEmbed.php';
|
|
||||||
require 'HTMLPurifier/AttrTransform/SafeObject.php';
|
|
||||||
require 'HTMLPurifier/AttrTransform/SafeParam.php';
|
|
||||||
require 'HTMLPurifier/AttrTransform/ScriptRequired.php';
|
|
||||||
require 'HTMLPurifier/AttrTransform/TargetBlank.php';
|
|
||||||
require 'HTMLPurifier/AttrTransform/TargetNoopener.php';
|
|
||||||
require 'HTMLPurifier/AttrTransform/TargetNoreferrer.php';
|
|
||||||
require 'HTMLPurifier/AttrTransform/Textarea.php';
|
|
||||||
require 'HTMLPurifier/ChildDef/Chameleon.php';
|
|
||||||
require 'HTMLPurifier/ChildDef/Custom.php';
|
|
||||||
require 'HTMLPurifier/ChildDef/Empty.php';
|
|
||||||
require 'HTMLPurifier/ChildDef/List.php';
|
|
||||||
require 'HTMLPurifier/ChildDef/Required.php';
|
|
||||||
require 'HTMLPurifier/ChildDef/Optional.php';
|
|
||||||
require 'HTMLPurifier/ChildDef/StrictBlockquote.php';
|
|
||||||
require 'HTMLPurifier/ChildDef/Table.php';
|
|
||||||
require 'HTMLPurifier/DefinitionCache/Decorator.php';
|
|
||||||
require 'HTMLPurifier/DefinitionCache/Null.php';
|
|
||||||
require 'HTMLPurifier/DefinitionCache/Serializer.php';
|
|
||||||
require 'HTMLPurifier/DefinitionCache/Decorator/Cleanup.php';
|
|
||||||
require 'HTMLPurifier/DefinitionCache/Decorator/Memory.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/Bdo.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/CommonAttributes.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/Edit.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/Forms.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/Hypertext.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/Iframe.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/Image.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/Legacy.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/List.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/Name.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/Nofollow.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/Object.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/Presentation.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/Proprietary.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/Ruby.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/SafeEmbed.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/SafeObject.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/SafeScripting.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/Scripting.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/StyleAttribute.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/Tables.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/Target.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/TargetBlank.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/TargetNoopener.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/TargetNoreferrer.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/Text.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/Tidy.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/Tidy/Name.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/Tidy/Strict.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/Tidy/Transitional.php';
|
|
||||||
require 'HTMLPurifier/HTMLModule/Tidy/XHTML.php';
|
|
||||||
require 'HTMLPurifier/Injector/AutoParagraph.php';
|
|
||||||
require 'HTMLPurifier/Injector/DisplayLinkURI.php';
|
|
||||||
require 'HTMLPurifier/Injector/Linkify.php';
|
|
||||||
require 'HTMLPurifier/Injector/PurifierLinkify.php';
|
|
||||||
require 'HTMLPurifier/Injector/RemoveEmpty.php';
|
|
||||||
require 'HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
|
|
||||||
require 'HTMLPurifier/Injector/SafeObject.php';
|
|
||||||
require 'HTMLPurifier/Lexer/DOMLex.php';
|
|
||||||
require 'HTMLPurifier/Lexer/DirectLex.php';
|
|
||||||
require 'HTMLPurifier/Node/Comment.php';
|
|
||||||
require 'HTMLPurifier/Node/Element.php';
|
|
||||||
require 'HTMLPurifier/Node/Text.php';
|
|
||||||
require 'HTMLPurifier/Strategy/Composite.php';
|
|
||||||
require 'HTMLPurifier/Strategy/Core.php';
|
|
||||||
require 'HTMLPurifier/Strategy/FixNesting.php';
|
|
||||||
require 'HTMLPurifier/Strategy/MakeWellFormed.php';
|
|
||||||
require 'HTMLPurifier/Strategy/RemoveForeignElements.php';
|
|
||||||
require 'HTMLPurifier/Strategy/ValidateAttributes.php';
|
|
||||||
require 'HTMLPurifier/TagTransform/Font.php';
|
|
||||||
require 'HTMLPurifier/TagTransform/Simple.php';
|
|
||||||
require 'HTMLPurifier/Token/Comment.php';
|
|
||||||
require 'HTMLPurifier/Token/Tag.php';
|
|
||||||
require 'HTMLPurifier/Token/Empty.php';
|
|
||||||
require 'HTMLPurifier/Token/End.php';
|
|
||||||
require 'HTMLPurifier/Token/Start.php';
|
|
||||||
require 'HTMLPurifier/Token/Text.php';
|
|
||||||
require 'HTMLPurifier/URIFilter/DisableExternal.php';
|
|
||||||
require 'HTMLPurifier/URIFilter/DisableExternalResources.php';
|
|
||||||
require 'HTMLPurifier/URIFilter/DisableResources.php';
|
|
||||||
require 'HTMLPurifier/URIFilter/HostBlacklist.php';
|
|
||||||
require 'HTMLPurifier/URIFilter/MakeAbsolute.php';
|
|
||||||
require 'HTMLPurifier/URIFilter/Munge.php';
|
|
||||||
require 'HTMLPurifier/URIFilter/SafeIframe.php';
|
|
||||||
require 'HTMLPurifier/URIScheme/data.php';
|
|
||||||
require 'HTMLPurifier/URIScheme/file.php';
|
|
||||||
require 'HTMLPurifier/URIScheme/ftp.php';
|
|
||||||
require 'HTMLPurifier/URIScheme/http.php';
|
|
||||||
require 'HTMLPurifier/URIScheme/https.php';
|
|
||||||
require 'HTMLPurifier/URIScheme/mailto.php';
|
|
||||||
require 'HTMLPurifier/URIScheme/news.php';
|
|
||||||
require 'HTMLPurifier/URIScheme/nntp.php';
|
|
||||||
require 'HTMLPurifier/URIScheme/tel.php';
|
|
||||||
require 'HTMLPurifier/VarParser/Flexible.php';
|
|
||||||
require 'HTMLPurifier/VarParser/Native.php';
|
|
@@ -1,30 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @file
|
|
||||||
* Emulation layer for code that used kses(), substituting in HTML Purifier.
|
|
||||||
*/
|
|
||||||
|
|
||||||
require_once dirname(__FILE__) . '/HTMLPurifier.auto.php';
|
|
||||||
|
|
||||||
function kses($string, $allowed_html, $allowed_protocols = null)
|
|
||||||
{
|
|
||||||
$config = HTMLPurifier_Config::createDefault();
|
|
||||||
$allowed_elements = array();
|
|
||||||
$allowed_attributes = array();
|
|
||||||
foreach ($allowed_html as $element => $attributes) {
|
|
||||||
$allowed_elements[$element] = true;
|
|
||||||
foreach ($attributes as $attribute => $x) {
|
|
||||||
$allowed_attributes["$element.$attribute"] = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
$config->set('HTML.AllowedElements', $allowed_elements);
|
|
||||||
$config->set('HTML.AllowedAttributes', $allowed_attributes);
|
|
||||||
if ($allowed_protocols !== null) {
|
|
||||||
$config->set('URI.AllowedSchemes', $allowed_protocols);
|
|
||||||
}
|
|
||||||
$purifier = new HTMLPurifier($config);
|
|
||||||
return $purifier->purify($string);
|
|
||||||
}
|
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
|
@@ -1,11 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @file
|
|
||||||
* Convenience stub file that adds HTML Purifier's library file to the path
|
|
||||||
* without any other side-effects.
|
|
||||||
*/
|
|
||||||
|
|
||||||
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
|
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
|
@@ -1,6 +1,7 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
/*! @mainpage
|
/*!
|
||||||
|
* @mainpage
|
||||||
*
|
*
|
||||||
* HTML Purifier is an HTML filter that will take an arbitrary snippet of
|
* HTML Purifier is an HTML filter that will take an arbitrary snippet of
|
||||||
* HTML and rigorously test, validate and filter it into a version that
|
* HTML and rigorously test, validate and filter it into a version that
|
||||||
@@ -15,12 +16,14 @@
|
|||||||
* -# Generating HTML from the purified tokens.
|
* -# Generating HTML from the purified tokens.
|
||||||
*
|
*
|
||||||
* However, most users will only need to interface with the HTMLPurifier
|
* However, most users will only need to interface with the HTMLPurifier
|
||||||
* and HTMLPurifier_Config.
|
* class, so this massive amount of infrastructure is usually concealed.
|
||||||
|
* If you plan on working with the internals, be sure to include
|
||||||
|
* HTMLPurifier_ConfigSchema and HTMLPurifier_Config.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
HTML Purifier 4.18.0 - Standards Compliant HTML Filtering
|
HTML Purifier 1.4.1 - Standards Compliant HTML Filtering
|
||||||
Copyright (C) 2006-2008 Edward Z. Yang
|
Copyright (C) 2006 Edward Z. Yang
|
||||||
|
|
||||||
This library is free software; you can redistribute it and/or
|
This library is free software; you can redistribute it and/or
|
||||||
modify it under the terms of the GNU Lesser General Public
|
modify it under the terms of the GNU Lesser General Public
|
||||||
@@ -37,169 +40,88 @@
|
|||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// almost every class has an undocumented dependency to these, so make sure
|
||||||
|
// they get included
|
||||||
|
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||||
|
require_once 'HTMLPurifier/Config.php';
|
||||||
|
require_once 'HTMLPurifier/Context.php';
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/Lexer.php';
|
||||||
|
require_once 'HTMLPurifier/Generator.php';
|
||||||
|
require_once 'HTMLPurifier/Strategy/Core.php';
|
||||||
|
require_once 'HTMLPurifier/Encoder.php';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
|
* Main library execution class.
|
||||||
*
|
*
|
||||||
* @note There are several points in which configuration can be specified
|
* Facade that performs calls to the HTMLPurifier_Lexer,
|
||||||
* for HTML Purifier. The precedence of these (from lowest to
|
* HTMLPurifier_Strategy and HTMLPurifier_Generator subsystems in order to
|
||||||
* highest) is as follows:
|
* purify HTML.
|
||||||
* -# Instance: new HTMLPurifier($config)
|
|
||||||
* -# Invocation: purify($html, $config)
|
|
||||||
* These configurations are entirely independent of each other and
|
|
||||||
* are *not* merged (this behavior may change in the future).
|
|
||||||
*
|
*
|
||||||
* @todo We need an easier way to inject strategies using the configuration
|
* @todo We need an easier way to inject strategies, it'll probably end
|
||||||
* object.
|
* up getting done through config though.
|
||||||
*/
|
*/
|
||||||
class HTMLPurifier
|
class HTMLPurifier
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
var $version = '1.4.1';
|
||||||
* Version of HTML Purifier.
|
|
||||||
* @type string
|
var $config;
|
||||||
*/
|
var $filters;
|
||||||
public $version = '4.18.0';
|
|
||||||
|
var $lexer, $strategy, $generator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constant with version of HTML Purifier.
|
* Final HTMLPurifier_Context of last run purification. Might be an array.
|
||||||
|
* @public
|
||||||
*/
|
*/
|
||||||
const VERSION = '4.18.0';
|
var $context;
|
||||||
|
|
||||||
/**
|
|
||||||
* Global configuration object.
|
|
||||||
* @type HTMLPurifier_Config
|
|
||||||
*/
|
|
||||||
public $config;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Array of extra filter objects to run on HTML,
|
|
||||||
* for backwards compatibility.
|
|
||||||
* @type HTMLPurifier_Filter[]
|
|
||||||
*/
|
|
||||||
private $filters = array();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Single instance of HTML Purifier.
|
|
||||||
* @type HTMLPurifier
|
|
||||||
*/
|
|
||||||
private static $instance;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @type HTMLPurifier_Strategy_Core
|
|
||||||
*/
|
|
||||||
protected $strategy;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @type HTMLPurifier_Generator
|
|
||||||
*/
|
|
||||||
protected $generator;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Resultant context of last run purification.
|
|
||||||
* Is an array of contexts if the last called method was purifyArray().
|
|
||||||
* @type HTMLPurifier_Context
|
|
||||||
*/
|
|
||||||
public $context;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initializes the purifier.
|
* Initializes the purifier.
|
||||||
*
|
* @param $config Optional HTMLPurifier_Config object for all instances of
|
||||||
* @param HTMLPurifier_Config|mixed $config Optional HTMLPurifier_Config object
|
* the purifier, if omitted, a default configuration is
|
||||||
* for all instances of the purifier, if omitted, a default
|
* supplied (which can be overridden on a per-use basis).
|
||||||
* configuration is supplied (which can be overridden on a
|
|
||||||
* per-use basis).
|
|
||||||
* The parameter can also be any type that
|
* The parameter can also be any type that
|
||||||
* HTMLPurifier_Config::create() supports.
|
* HTMLPurifier_Config::create() supports.
|
||||||
*/
|
*/
|
||||||
public function __construct($config = null)
|
function HTMLPurifier($config = null) {
|
||||||
{
|
|
||||||
$this->config = HTMLPurifier_Config::create($config);
|
$this->config = HTMLPurifier_Config::create($config);
|
||||||
$this->strategy = new HTMLPurifier_Strategy_Core();
|
|
||||||
|
$this->lexer = HTMLPurifier_Lexer::create();
|
||||||
|
$this->strategy = new HTMLPurifier_Strategy_Core();
|
||||||
|
$this->generator = new HTMLPurifier_Generator();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds a filter to process the output. First come first serve
|
* Adds a filter to process the output. First come first serve
|
||||||
*
|
* @param $filter HTMLPurifier_Filter object
|
||||||
* @param HTMLPurifier_Filter $filter HTMLPurifier_Filter object
|
|
||||||
*/
|
*/
|
||||||
public function addFilter($filter)
|
function addFilter($filter) {
|
||||||
{
|
|
||||||
trigger_error(
|
|
||||||
'HTMLPurifier->addFilter() is deprecated, use configuration directives' .
|
|
||||||
' in the Filter namespace or Filter.Custom',
|
|
||||||
E_USER_WARNING
|
|
||||||
);
|
|
||||||
$this->filters[] = $filter;
|
$this->filters[] = $filter;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Filters an HTML snippet/document to be XSS-free and standards-compliant.
|
* Filters an HTML snippet/document to be XSS-free and standards-compliant.
|
||||||
*
|
*
|
||||||
* @param string $html String of HTML to purify
|
* @param $html String of HTML to purify
|
||||||
* @param HTMLPurifier_Config $config Config object for this operation,
|
* @param $config HTMLPurifier_Config object for this operation, if omitted,
|
||||||
* if omitted, defaults to the config object specified during this
|
* defaults to the config object specified during this
|
||||||
* object's construction. The parameter can also be any type
|
* object's construction. The parameter can also be any type
|
||||||
* that HTMLPurifier_Config::create() supports.
|
* that HTMLPurifier_Config::create() supports.
|
||||||
*
|
* @return Purified HTML
|
||||||
* @return string Purified HTML
|
|
||||||
*/
|
*/
|
||||||
public function purify($html, $config = null)
|
function purify($html, $config = null) {
|
||||||
{
|
|
||||||
// :TODO: make the config merge in, instead of replace
|
|
||||||
$config = $config ? HTMLPurifier_Config::create($config) : $this->config;
|
$config = $config ? HTMLPurifier_Config::create($config) : $this->config;
|
||||||
|
|
||||||
// implementation is partially environment dependant, partially
|
|
||||||
// configuration dependant
|
|
||||||
$lexer = HTMLPurifier_Lexer::create($config);
|
|
||||||
|
|
||||||
$context = new HTMLPurifier_Context();
|
$context = new HTMLPurifier_Context();
|
||||||
|
|
||||||
// setup HTML generator
|
|
||||||
$this->generator = new HTMLPurifier_Generator($config, $context);
|
|
||||||
$context->register('Generator', $this->generator);
|
|
||||||
|
|
||||||
// set up global context variables
|
|
||||||
if ($config->get('Core.CollectErrors')) {
|
|
||||||
// may get moved out if other facilities use it
|
|
||||||
$language_factory = HTMLPurifier_LanguageFactory::instance();
|
|
||||||
$language = $language_factory->create($config, $context);
|
|
||||||
$context->register('Locale', $language);
|
|
||||||
|
|
||||||
$error_collector = new HTMLPurifier_ErrorCollector($context);
|
|
||||||
$context->register('ErrorCollector', $error_collector);
|
|
||||||
}
|
|
||||||
|
|
||||||
// setup id_accumulator context, necessary due to the fact that
|
|
||||||
// AttrValidator can be called from many places
|
|
||||||
$id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
|
|
||||||
$context->register('IDAccumulator', $id_accumulator);
|
|
||||||
|
|
||||||
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
|
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
|
||||||
|
|
||||||
// setup filters
|
for ($i = 0, $size = count($this->filters); $i < $size; $i++) {
|
||||||
$filter_flags = $config->getBatch('Filter');
|
$html = $this->filters[$i]->preFilter($html, $config, $context);
|
||||||
$custom_filters = $filter_flags['Custom'];
|
|
||||||
unset($filter_flags['Custom']);
|
|
||||||
$filters = array();
|
|
||||||
foreach ($filter_flags as $filter => $flag) {
|
|
||||||
if (!$flag) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (strpos($filter, '.') !== false) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
$class = "HTMLPurifier_Filter_$filter";
|
|
||||||
$filters[] = new $class;
|
|
||||||
}
|
|
||||||
foreach ($custom_filters as $filter) {
|
|
||||||
// maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat
|
|
||||||
$filters[] = $filter;
|
|
||||||
}
|
|
||||||
$filters = array_merge($filters, $this->filters);
|
|
||||||
// maybe prepare(), but later
|
|
||||||
|
|
||||||
for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
|
|
||||||
$html = $filters[$i]->preFilter($html, $config, $context);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// purified HTML
|
// purified HTML
|
||||||
@@ -208,19 +130,17 @@ class HTMLPurifier
|
|||||||
// list of tokens
|
// list of tokens
|
||||||
$this->strategy->execute(
|
$this->strategy->execute(
|
||||||
// list of un-purified tokens
|
// list of un-purified tokens
|
||||||
$lexer->tokenizeHTML(
|
$this->lexer->tokenizeHTML(
|
||||||
// un-purified HTML
|
// un-purified HTML
|
||||||
$html,
|
$html, $config, $context
|
||||||
$config,
|
|
||||||
$context
|
|
||||||
),
|
),
|
||||||
$config,
|
$config, $context
|
||||||
$context
|
),
|
||||||
)
|
$config, $context
|
||||||
);
|
);
|
||||||
|
|
||||||
for ($i = $filter_size - 1; $i >= 0; $i--) {
|
for ($i = $size - 1; $i >= 0; $i--) {
|
||||||
$html = $filters[$i]->postFilter($html, $config, $context);
|
$html = $this->filters[$i]->postFilter($html, $config, $context);
|
||||||
}
|
}
|
||||||
|
|
||||||
$html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
|
$html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
|
||||||
@@ -230,68 +150,21 @@ class HTMLPurifier
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Filters an array of HTML snippets
|
* Filters an array of HTML snippets
|
||||||
*
|
* @param $config Optional HTMLPurifier_Config object for this operation.
|
||||||
* @param string[] $array_of_html Array of html snippets
|
|
||||||
* @param HTMLPurifier_Config $config Optional config object for this operation.
|
|
||||||
* See HTMLPurifier::purify() for more details.
|
* See HTMLPurifier::purify() for more details.
|
||||||
*
|
* @return Array of purified HTML
|
||||||
* @return string[] Array of purified HTML
|
|
||||||
*/
|
*/
|
||||||
public function purifyArray($array_of_html, $config = null)
|
function purifyArray($array_of_html, $config = null) {
|
||||||
{
|
|
||||||
$context_array = array();
|
$context_array = array();
|
||||||
$array = array();
|
foreach ($array_of_html as $key => $html) {
|
||||||
foreach($array_of_html as $key=>$value){
|
$array_of_html[$key] = $this->purify($html, $config);
|
||||||
if (is_array($value)) {
|
|
||||||
$array[$key] = $this->purifyArray($value, $config);
|
|
||||||
} else {
|
|
||||||
$array[$key] = $this->purify($value, $config);
|
|
||||||
}
|
|
||||||
$context_array[$key] = $this->context;
|
$context_array[$key] = $this->context;
|
||||||
}
|
}
|
||||||
$this->context = $context_array;
|
$this->context = $context_array;
|
||||||
return $array;
|
return $array_of_html;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Singleton for enforcing just one HTML Purifier in your system
|
|
||||||
*
|
|
||||||
* @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
|
|
||||||
* HTMLPurifier instance to overload singleton with,
|
|
||||||
* or HTMLPurifier_Config instance to configure the
|
|
||||||
* generated version with.
|
|
||||||
*
|
|
||||||
* @return HTMLPurifier
|
|
||||||
*/
|
|
||||||
public static function instance($prototype = null)
|
|
||||||
{
|
|
||||||
if (!self::$instance || $prototype) {
|
|
||||||
if ($prototype instanceof HTMLPurifier) {
|
|
||||||
self::$instance = $prototype;
|
|
||||||
} elseif ($prototype) {
|
|
||||||
self::$instance = new HTMLPurifier($prototype);
|
|
||||||
} else {
|
|
||||||
self::$instance = new HTMLPurifier();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return self::$instance;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Singleton for enforcing just one HTML Purifier in your system
|
|
||||||
*
|
|
||||||
* @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
|
|
||||||
* HTMLPurifier instance to overload singleton with,
|
|
||||||
* or HTMLPurifier_Config instance to configure the
|
|
||||||
* generated version with.
|
|
||||||
*
|
|
||||||
* @return HTMLPurifier
|
|
||||||
* @note Backwards compatibility, see instance()
|
|
||||||
*/
|
|
||||||
public static function getInstance($prototype = null)
|
|
||||||
{
|
|
||||||
return HTMLPurifier::instance($prototype);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
?>
|
@@ -1,230 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @file
|
|
||||||
* This file was auto-generated by generate-includes.php and includes all of
|
|
||||||
* the core files required by HTML Purifier. This is a convenience stub that
|
|
||||||
* includes all files using dirname(__FILE__) and require_once. PLEASE DO NOT
|
|
||||||
* EDIT THIS FILE, changes will be overwritten the next time the script is run.
|
|
||||||
*
|
|
||||||
* Changes to include_path are not necessary.
|
|
||||||
*/
|
|
||||||
|
|
||||||
$__dir = dirname(__FILE__);
|
|
||||||
|
|
||||||
require_once $__dir . '/HTMLPurifier.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Arborize.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrCollections.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTypes.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrValidator.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Bootstrap.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Definition.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/CSSDefinition.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/ChildDef.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Config.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/ConfigSchema.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/ContentSets.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Context.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/DefinitionCache.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/DefinitionCacheFactory.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Doctype.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/DoctypeRegistry.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/ElementDef.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Encoder.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/EntityLookup.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/EntityParser.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/ErrorCollector.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/ErrorStruct.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Exception.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Filter.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Generator.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLDefinition.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModuleManager.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/IDAccumulator.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Injector.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Language.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/LanguageFactory.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Length.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Lexer.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Node.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/PercentEncoder.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/PropertyList.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/PropertyListIterator.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Queue.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Strategy.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/StringHash.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/StringHashParser.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/TagTransform.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Token.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/TokenFactory.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/URI.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/URIDefinition.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/URIFilter.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/URIParser.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/URIScheme.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/URISchemeRegistry.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/UnitConverter.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/VarParser.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/VarParserException.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Zipper.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/Clone.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/Enum.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/Integer.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/Lang.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/Switch.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/Text.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/URI.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Number.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/AlphaValue.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Background.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Border.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Color.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Composite.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Filter.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Font.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/FontFamily.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Ident.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Length.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ListStyle.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Multiple.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Percentage.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Ratio.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/TextDecoration.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/URI.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Bool.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Nmtokens.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Class.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Color.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/ContentEditable.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/FrameTarget.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/ID.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Pixels.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Length.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/LinkTypes.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/MultiLength.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/URI/Email.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/URI/Host.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/URI/IPv4.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/URI/IPv6.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Background.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/BdoDir.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/BgColor.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/BoolToCSS.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Border.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/EnumToCSS.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/ImgRequired.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/ImgSpace.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Input.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Lang.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Length.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Name.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/NameSync.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Nofollow.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeEmbed.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/TargetBlank.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/TargetNoopener.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/TargetNoreferrer.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/ChildDef/List.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/ChildDef/Required.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/ChildDef/Table.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/DefinitionCache/Decorator.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/DefinitionCache/Null.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/DefinitionCache/Serializer.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/DefinitionCache/Decorator/Cleanup.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/DefinitionCache/Decorator/Memory.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Bdo.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/CommonAttributes.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Edit.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Forms.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Hypertext.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Iframe.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Image.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/List.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Name.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Nofollow.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Object.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Presentation.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Proprietary.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Ruby.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeEmbed.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeObject.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeScripting.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Scripting.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/StyleAttribute.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tables.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Target.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/TargetBlank.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/TargetNoopener.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/TargetNoreferrer.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Text.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Name.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Strict.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Transitional.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/XHTML.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Injector/AutoParagraph.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Injector/DisplayLinkURI.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Injector/Linkify.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Injector/PurifierLinkify.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Injector/RemoveEmpty.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Injector/SafeObject.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Lexer/DOMLex.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Lexer/DirectLex.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Node/Comment.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Node/Element.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Node/Text.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Strategy/Composite.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Strategy/Core.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Strategy/FixNesting.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Strategy/MakeWellFormed.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Strategy/RemoveForeignElements.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Strategy/ValidateAttributes.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/TagTransform/Font.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/TagTransform/Simple.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Token/Comment.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Token/Tag.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Token/Empty.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Token/End.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Token/Start.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/Token/Text.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternal.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternalResources.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/URIFilter/DisableResources.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/URIFilter/SafeIframe.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/URIScheme/data.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/URIScheme/file.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/URIScheme/http.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/URIScheme/https.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/URIScheme/mailto.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/URIScheme/news.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/URIScheme/nntp.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/URIScheme/tel.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/VarParser/Flexible.php';
|
|
||||||
require_once $__dir . '/HTMLPurifier/VarParser/Native.php';
|
|
@@ -1,71 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Converts a stream of HTMLPurifier_Token into an HTMLPurifier_Node,
|
|
||||||
* and back again.
|
|
||||||
*
|
|
||||||
* @note This transformation is not an equivalence. We mutate the input
|
|
||||||
* token stream to make it so; see all [MUT] markers in code.
|
|
||||||
*/
|
|
||||||
class HTMLPurifier_Arborize
|
|
||||||
{
|
|
||||||
public static function arborize($tokens, $config, $context) {
|
|
||||||
$definition = $config->getHTMLDefinition();
|
|
||||||
$parent = new HTMLPurifier_Token_Start($definition->info_parent);
|
|
||||||
$stack = array($parent->toNode());
|
|
||||||
foreach ($tokens as $token) {
|
|
||||||
$token->skip = null; // [MUT]
|
|
||||||
$token->carryover = null; // [MUT]
|
|
||||||
if ($token instanceof HTMLPurifier_Token_End) {
|
|
||||||
$token->start = null; // [MUT]
|
|
||||||
$r = array_pop($stack);
|
|
||||||
//assert($r->name === $token->name);
|
|
||||||
//assert(empty($token->attr));
|
|
||||||
$r->endCol = $token->col;
|
|
||||||
$r->endLine = $token->line;
|
|
||||||
$r->endArmor = $token->armor;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
$node = $token->toNode();
|
|
||||||
$stack[count($stack)-1]->children[] = $node;
|
|
||||||
if ($token instanceof HTMLPurifier_Token_Start) {
|
|
||||||
$stack[] = $node;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//assert(count($stack) == 1);
|
|
||||||
return $stack[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
public static function flatten($node, $config, $context) {
|
|
||||||
$level = 0;
|
|
||||||
$nodes = array($level => new HTMLPurifier_Queue(array($node)));
|
|
||||||
$closingTokens = array();
|
|
||||||
$tokens = array();
|
|
||||||
do {
|
|
||||||
while (!$nodes[$level]->isEmpty()) {
|
|
||||||
$node = $nodes[$level]->shift(); // FIFO
|
|
||||||
list($start, $end) = $node->toTokenPair();
|
|
||||||
if ($level > 0) {
|
|
||||||
$tokens[] = $start;
|
|
||||||
}
|
|
||||||
if ($end !== NULL) {
|
|
||||||
$closingTokens[$level][] = $end;
|
|
||||||
}
|
|
||||||
if ($node instanceof HTMLPurifier_Node_Element) {
|
|
||||||
$level++;
|
|
||||||
$nodes[$level] = new HTMLPurifier_Queue();
|
|
||||||
foreach ($node->children as $childNode) {
|
|
||||||
$nodes[$level]->push($childNode);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
$level--;
|
|
||||||
if ($level && isset($closingTokens[$level])) {
|
|
||||||
while ($token = array_pop($closingTokens[$level])) {
|
|
||||||
$tokens[] = $token;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} while ($level > 0);
|
|
||||||
return $tokens;
|
|
||||||
}
|
|
||||||
}
|
|
@@ -1,5 +1,8 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
require_once 'HTMLPurifier/AttrTypes.php';
|
||||||
|
require_once 'HTMLPurifier/AttrDef/Lang.php';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Defines common attribute collections that modules reference
|
* Defines common attribute collections that modules reference
|
||||||
*/
|
*/
|
||||||
@@ -8,84 +11,62 @@ class HTMLPurifier_AttrCollections
|
|||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Associative array of attribute collections, indexed by name.
|
* Associative array of attribute collections, indexed by name
|
||||||
* @type array
|
* @note Technically, the composition of these is more complicated,
|
||||||
|
* but we bypass it using our own excludes property
|
||||||
*/
|
*/
|
||||||
public $info = array();
|
var $info = array();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Performs all expansions on internal data for use by other inclusions
|
* Performs all expansions on internal data for use by other inclusions
|
||||||
* It also collects all attribute collection extensions from
|
* It also collects all attribute collection extensions from
|
||||||
* modules
|
* modules
|
||||||
* @param HTMLPurifier_AttrTypes $attr_types HTMLPurifier_AttrTypes instance
|
* @param $attr_types HTMLPurifier_AttrTypes instance
|
||||||
* @param HTMLPurifier_HTMLModule[] $modules Hash array of HTMLPurifier_HTMLModule members
|
* @param $modules Hash array of HTMLPurifier_HTMLModule members
|
||||||
*/
|
*/
|
||||||
public function __construct($attr_types, $modules)
|
function HTMLPurifier_AttrCollections($attr_types, $modules) {
|
||||||
{
|
$info =& $this->info;
|
||||||
$this->doConstruct($attr_types, $modules);
|
|
||||||
}
|
|
||||||
|
|
||||||
public function doConstruct($attr_types, $modules)
|
|
||||||
{
|
|
||||||
// load extensions from the modules
|
// load extensions from the modules
|
||||||
foreach ($modules as $module) {
|
foreach ($modules as $module) {
|
||||||
foreach ($module->attr_collections as $coll_i => $coll) {
|
foreach ($module->attr_collections as $coll_i => $coll) {
|
||||||
if (!isset($this->info[$coll_i])) {
|
|
||||||
$this->info[$coll_i] = array();
|
|
||||||
}
|
|
||||||
foreach ($coll as $attr_i => $attr) {
|
foreach ($coll as $attr_i => $attr) {
|
||||||
if ($attr_i === 0 && isset($this->info[$coll_i][$attr_i])) {
|
if ($attr_i === 0 && isset($info[$coll_i][$attr_i])) {
|
||||||
// merge in includes
|
// merge in includes
|
||||||
$this->info[$coll_i][$attr_i] = array_merge(
|
$info[$coll_i][$attr_i] = array_merge(
|
||||||
$this->info[$coll_i][$attr_i],
|
$info[$coll_i][$attr_i], $attr);
|
||||||
$attr
|
|
||||||
);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
$this->info[$coll_i][$attr_i] = $attr;
|
$info[$coll_i][$attr_i] = $attr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// perform internal expansions and inclusions
|
// perform internal expansions and inclusions
|
||||||
foreach ($this->info as $name => $attr) {
|
foreach ($info as $name => $attr) {
|
||||||
// merge attribute collections that include others
|
// merge attribute collections that include others
|
||||||
$this->performInclusions($this->info[$name]);
|
$this->performInclusions($info[$name]);
|
||||||
// replace string identifiers with actual attribute objects
|
// replace string identifiers with actual attribute objects
|
||||||
$this->expandIdentifiers($this->info[$name], $attr_types);
|
$this->expandIdentifiers($info[$name], $attr_types);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Takes a reference to an attribute associative array and performs
|
* Takes a reference to an attribute associative array and performs
|
||||||
* all inclusions specified by the zero index.
|
* all inclusions specified by the zero index.
|
||||||
* @param array &$attr Reference to attribute array
|
* @param &$attr Reference to attribute array
|
||||||
*/
|
*/
|
||||||
public function performInclusions(&$attr)
|
function performInclusions(&$attr) {
|
||||||
{
|
if (!isset($attr[0])) return;
|
||||||
if (!isset($attr[0])) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
$merge = $attr[0];
|
$merge = $attr[0];
|
||||||
$seen = array(); // recursion guard
|
|
||||||
// loop through all the inclusions
|
// loop through all the inclusions
|
||||||
for ($i = 0; isset($merge[$i]); $i++) {
|
for ($i = 0; isset($merge[$i]); $i++) {
|
||||||
if (isset($seen[$merge[$i]])) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
$seen[$merge[$i]] = true;
|
|
||||||
// foreach attribute of the inclusion, copy it over
|
// foreach attribute of the inclusion, copy it over
|
||||||
if (!isset($this->info[$merge[$i]])) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
foreach ($this->info[$merge[$i]] as $key => $value) {
|
foreach ($this->info[$merge[$i]] as $key => $value) {
|
||||||
if (isset($attr[$key])) {
|
if (isset($attr[$key])) continue; // also catches more inclusions
|
||||||
continue;
|
|
||||||
} // also catches more inclusions
|
|
||||||
$attr[$key] = $value;
|
$attr[$key] = $value;
|
||||||
}
|
}
|
||||||
if (isset($this->info[$merge[$i]][0])) {
|
if (isset($info[$merge[$i]][0])) {
|
||||||
// recursion
|
// recursion
|
||||||
$merge = array_merge($merge, $this->info[$merge[$i]][0]);
|
$merge = array_merge($merge, isset($info[$merge[$i]][0]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
unset($attr[0]);
|
unset($attr[0]);
|
||||||
@@ -94,55 +75,26 @@ class HTMLPurifier_AttrCollections
|
|||||||
/**
|
/**
|
||||||
* Expands all string identifiers in an attribute array by replacing
|
* Expands all string identifiers in an attribute array by replacing
|
||||||
* them with the appropriate values inside HTMLPurifier_AttrTypes
|
* them with the appropriate values inside HTMLPurifier_AttrTypes
|
||||||
* @param array &$attr Reference to attribute array
|
* @param &$attr Reference to attribute array
|
||||||
* @param HTMLPurifier_AttrTypes $attr_types HTMLPurifier_AttrTypes instance
|
* @param $attr_types HTMLPurifier_AttrTypes instance
|
||||||
*/
|
*/
|
||||||
public function expandIdentifiers(&$attr, $attr_types)
|
function expandIdentifiers(&$attr, $attr_types) {
|
||||||
{
|
|
||||||
// because foreach will process new elements we add, make sure we
|
|
||||||
// skip duplicates
|
|
||||||
$processed = array();
|
|
||||||
|
|
||||||
foreach ($attr as $def_i => $def) {
|
foreach ($attr as $def_i => $def) {
|
||||||
// skip inclusions
|
if ($def_i === 0) continue;
|
||||||
if ($def_i === 0) {
|
if (!is_string($def)) continue;
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isset($processed[$def_i])) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// determine whether or not attribute is required
|
|
||||||
if ($required = (strpos($def_i, '*') !== false)) {
|
|
||||||
// rename the definition
|
|
||||||
unset($attr[$def_i]);
|
|
||||||
$def_i = trim($def_i, '*');
|
|
||||||
$attr[$def_i] = $def;
|
|
||||||
}
|
|
||||||
|
|
||||||
$processed[$def_i] = true;
|
|
||||||
|
|
||||||
// if we've already got a literal object, move on
|
|
||||||
if (is_object($def)) {
|
|
||||||
// preserve previous required
|
|
||||||
$attr[$def_i]->required = ($required || $attr[$def_i]->required);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($def === false) {
|
if ($def === false) {
|
||||||
unset($attr[$def_i]);
|
unset($attr[$def_i]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (isset($attr_types->info[$def])) {
|
||||||
if ($t = $attr_types->get($def)) {
|
$attr[$def_i] = $attr_types->info[$def];
|
||||||
$attr[$def_i] = $t;
|
|
||||||
$attr[$def_i]->required = $required;
|
|
||||||
} else {
|
} else {
|
||||||
|
trigger_error('Attempted to reference undefined attribute type', E_USER_ERROR);
|
||||||
unset($attr[$def_i]);
|
unset($attr[$def_i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
?>
|
@@ -10,31 +10,26 @@
|
|||||||
* subclasses are also responsible for cleaning the code if possible.
|
* subclasses are also responsible for cleaning the code if possible.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
abstract class HTMLPurifier_AttrDef
|
class HTMLPurifier_AttrDef
|
||||||
{
|
{
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tells us whether or not an HTML attribute is minimized.
|
* Tells us whether or not an HTML attribute is minimized. Only the
|
||||||
* Has no meaning in other contexts.
|
* boolean attribute vapourware would use this.
|
||||||
* @type bool
|
|
||||||
*/
|
*/
|
||||||
public $minimized = false;
|
var $minimized = false;
|
||||||
|
|
||||||
/**
|
|
||||||
* Tells us whether or not an HTML attribute is required.
|
|
||||||
* Has no meaning in other contexts
|
|
||||||
* @type bool
|
|
||||||
*/
|
|
||||||
public $required = false;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates and cleans passed string according to a definition.
|
* Validates and cleans passed string according to a definition.
|
||||||
*
|
*
|
||||||
* @param string $string String to be validated and cleaned.
|
* @public
|
||||||
* @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object.
|
* @param $string String to be validated and cleaned.
|
||||||
* @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object.
|
* @param $config Mandatory HTMLPurifier_Config object.
|
||||||
|
* @param $context Mandatory HTMLPurifier_AttrContext object.
|
||||||
*/
|
*/
|
||||||
abstract public function validate($string, $config, $context);
|
function validate($string, $config, &$context) {
|
||||||
|
trigger_error('Cannot call abstract function', E_USER_ERROR);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convenience method that parses a string as if it were CDATA.
|
* Convenience method that parses a string as if it were CDATA.
|
||||||
@@ -53,92 +48,20 @@ abstract class HTMLPurifier_AttrDef
|
|||||||
*
|
*
|
||||||
* @warning This processing is inconsistent with XML's whitespace handling
|
* @warning This processing is inconsistent with XML's whitespace handling
|
||||||
* as specified by section 3.3.3 and referenced XHTML 1.0 section
|
* as specified by section 3.3.3 and referenced XHTML 1.0 section
|
||||||
* 4.7. However, note that we are NOT necessarily
|
* 4.7. Compliant processing requires all line breaks normalized
|
||||||
* parsing XML, thus, this behavior may still be correct. We
|
* to "\n", so the fix is not as simple as fixing it in this
|
||||||
* assume that newlines have been normalized.
|
* function. Trim and whitespace collapsing are supposed to only
|
||||||
|
* occur in NMTOKENs. However, note that we are NOT necessarily
|
||||||
|
* parsing XML, thus, this behavior may still be correct.
|
||||||
|
*
|
||||||
|
* @public
|
||||||
*/
|
*/
|
||||||
public function parseCDATA($string)
|
function parseCDATA($string) {
|
||||||
{
|
|
||||||
$string = trim($string);
|
$string = trim($string);
|
||||||
$string = str_replace(array("\n", "\t", "\r"), ' ', $string);
|
$string = str_replace("\n", '', $string);
|
||||||
|
$string = str_replace(array("\r", "\t"), ' ', $string);
|
||||||
return $string;
|
return $string;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Factory method for creating this class from a string.
|
|
||||||
* @param string $string String construction info
|
|
||||||
* @return HTMLPurifier_AttrDef Created AttrDef object corresponding to $string
|
|
||||||
*/
|
|
||||||
public function make($string)
|
|
||||||
{
|
|
||||||
// default implementation, return a flyweight of this object.
|
|
||||||
// If $string has an effect on the returned object (i.e. you
|
|
||||||
// need to overload this method), it is best
|
|
||||||
// to clone or instantiate new copies. (Instantiation is safer.)
|
|
||||||
return $this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
|
|
||||||
* properly. THIS IS A HACK!
|
|
||||||
* @param string $string a CSS colour definition
|
|
||||||
* @return string
|
|
||||||
*/
|
|
||||||
protected function mungeRgb($string)
|
|
||||||
{
|
|
||||||
$p = '\s*(\d+(\.\d+)?([%]?))\s*';
|
|
||||||
|
|
||||||
if (preg_match('/(rgba|hsla)\(/', $string)) {
|
|
||||||
return preg_replace('/(rgba|hsla)\('.$p.','.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8,\11)', $string);
|
|
||||||
}
|
|
||||||
|
|
||||||
return preg_replace('/(rgb|hsl)\('.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8)', $string);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parses a possibly escaped CSS string and returns the "pure"
|
|
||||||
* version of it.
|
|
||||||
*/
|
|
||||||
protected function expandCSSEscape($string)
|
|
||||||
{
|
|
||||||
// flexibly parse it
|
|
||||||
$ret = '';
|
|
||||||
for ($i = 0, $c = strlen($string); $i < $c; $i++) {
|
|
||||||
if ($string[$i] === '\\') {
|
|
||||||
$i++;
|
|
||||||
if ($i >= $c) {
|
|
||||||
$ret .= '\\';
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (ctype_xdigit($string[$i])) {
|
|
||||||
$code = $string[$i];
|
|
||||||
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
|
|
||||||
if (!ctype_xdigit($string[$i])) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
$code .= $string[$i];
|
|
||||||
}
|
|
||||||
// We have to be extremely careful when adding
|
|
||||||
// new characters, to make sure we're not breaking
|
|
||||||
// the encoding.
|
|
||||||
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
|
|
||||||
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
$ret .= $char;
|
|
||||||
if ($i < $c && trim($string[$i]) !== '') {
|
|
||||||
$i--;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if ($string[$i] === "\n") {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
$ret .= $string[$i];
|
|
||||||
}
|
|
||||||
return $ret;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// vim: et sw=4 sts=4
|
?>
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user