mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-02 20:27:40 +02:00
Compare commits
459 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
882ffed9ba | ||
|
86990a21f1 | ||
|
9573f0933d | ||
|
632bf2bbd4 | ||
|
ec86598446 | ||
|
b6c3f5e89b | ||
|
7c91104532 | ||
|
eac628f490 | ||
|
92913bc816 | ||
|
479d793562 | ||
|
e2c15f1c98 | ||
|
57ced3f361 | ||
|
c04a441b3e | ||
|
1bed8b6d5f | ||
|
33afd7d9e0 | ||
|
18e538317a | ||
|
96a4193fc9 | ||
|
00c66fa9cb | ||
|
d3abcb90e3 | ||
|
df3100b1b3 | ||
|
143e1ad718 | ||
|
875b0febde | ||
|
3166b8a10f | ||
|
1a70bffd5a | ||
|
f4c6e10ff7 | ||
|
c1cbd9e565 | ||
|
da94d3d6ac | ||
|
80793e925e | ||
|
8ef4fb22db | ||
|
70a7a3f5dd | ||
|
4d612d5a77 | ||
|
63a854ee5d | ||
|
0229458f8f | ||
|
baa477ac08 | ||
|
dc90e8e85b | ||
|
97125ed18b | ||
|
9a9036c689 | ||
|
aea7d02dfe | ||
|
b3ca1498c2 | ||
|
ac18672aba | ||
|
faf28682ad | ||
|
e2cd852bcf | ||
|
694583259c | ||
|
bde4de3c78 | ||
|
5b4e5c983e | ||
|
1ad8fd5ce9 | ||
|
6bdf161afd | ||
|
af45a6c191 | ||
|
2b72d0445f | ||
|
d7b3117678 | ||
|
53ff3e2744 | ||
|
6776efccdd | ||
|
ba9fd175d7 | ||
|
4d27906b02 | ||
|
8f573df3dc | ||
|
c7594487a2 | ||
|
733a5ce5c3 | ||
|
e8abd5953c | ||
|
1b8c8865b2 | ||
|
6e66dc9cad | ||
|
77b60a4206 | ||
|
5bf7ac4e9f | ||
|
a025203b18 | ||
|
809da84ae1 | ||
|
777781a95c | ||
|
4a87f732ca | ||
|
a2885181df | ||
|
84abae08f5 | ||
|
10e2d32a79 | ||
|
baf053b016 | ||
|
bf71c3f392 | ||
|
bfbe29d5a1 | ||
|
e194b8efc6 | ||
|
4214ac9d67 | ||
|
24f761d84a | ||
|
41c9226f3d | ||
|
e3c2063f69 | ||
|
398a02039e | ||
|
84e2e141fc | ||
|
47bbbad000 | ||
|
eaa906f8fc | ||
|
86ca784da3 | ||
|
b107eec452 | ||
|
fcbf724e6e | ||
|
92344cc83a | ||
|
e9f529e78f | ||
|
e802065b65 | ||
|
1d70929eba | ||
|
77f57aa264 | ||
|
db218c7b2b | ||
|
762c089431 | ||
|
07ed1bbf8c | ||
|
b9094d5ec8 | ||
|
b31f280d41 | ||
|
1b962e68f0 | ||
|
0c9dc02d4a | ||
|
bfe474042f | ||
|
119ebcda71 | ||
|
3dfcd016d3 | ||
|
0c9dfc6c3d | ||
|
33a873f5cb | ||
|
12b811d749 | ||
|
781f9a4084 | ||
|
2c955af135 | ||
|
3a6b63dff1 | ||
|
90110a4e3a | ||
|
d67e17a69c | ||
|
5cfecebb33 | ||
|
f5cd2c07ea | ||
|
6691676666 | ||
|
e128c09132 | ||
|
527f154d3d | ||
|
778ddf7c96 | ||
|
c5d4b1ec93 | ||
|
6fe6cc8901 | ||
|
280211f70b | ||
|
3fd51d527c | ||
|
0e6e2c4edf | ||
|
22d24e6b04 | ||
|
3a2fd0b5db | ||
|
25fa53c15b | ||
|
0b6ae1c3c1 | ||
|
ab263a0bf1 | ||
|
c5b18d345c | ||
|
d26418ca3a | ||
|
d304c5c976 | ||
|
f7bc0b0875 | ||
|
70515dd48f | ||
|
1555cb617f | ||
|
cd4500457e | ||
|
fa413e96ac | ||
|
d0fdcc103e | ||
|
6a06b92f0c | ||
|
3184fee468 | ||
|
ed7983b559 | ||
|
92df9e5b28 | ||
|
2f41bd07fa | ||
|
c6914dce51 | ||
|
9977350143 | ||
|
d9e60350d3 | ||
|
c807ed5fe2 | ||
|
c9b6f125aa | ||
|
dc28346677 | ||
|
8423daef05 | ||
|
617f70a8ac | ||
|
0423985b45 | ||
|
e013bc9126 | ||
|
1d90bb2397 | ||
|
03dabec2c0 | ||
|
85090520f1 | ||
|
3b6aa10592 | ||
|
3a4b92da81 | ||
|
0ec9731184 | ||
|
e05bd77344 | ||
|
334ffac5b4 | ||
|
a227cb483a | ||
|
aa0fdeee30 | ||
|
ba418a1f19 | ||
|
c845f0bb78 | ||
|
594268ca3b | ||
|
965be3bd73 | ||
|
700d5bcbfc | ||
|
fd384129bf | ||
|
f8b47c64dd | ||
|
a5ceb1e22a | ||
|
636e2883df | ||
|
dba3ed7770 | ||
|
de9869d942 | ||
|
cfcdce0db8 | ||
|
6bc04e0e10 | ||
|
24f6db6fb2 | ||
|
85fb192d93 | ||
|
7727cea112 | ||
|
6bb8c1fcac | ||
|
a84b6d5be0 | ||
|
6e43cac9c9 | ||
|
656a0c95bf | ||
|
7015aaff46 | ||
|
1009bd41a6 | ||
|
511dfe2d4a | ||
|
463aa3a0fa | ||
|
7189ec2790 | ||
|
e901d832ab | ||
|
643ed1bddc | ||
|
41830cd902 | ||
|
261aa1aeaa | ||
|
486b401cf7 | ||
|
f2794e59c5 | ||
|
d702077d2e | ||
|
36bd06d53e | ||
|
13eb016e06 | ||
|
32025a12e1 | ||
|
7dae94c44b | ||
|
54cc691ba7 | ||
|
3af2ff8f98 | ||
|
36fb284d2f | ||
|
8d1f1e8e73 | ||
|
322288e6c0 | ||
|
3c4346cb1e | ||
|
14d934c7ca | ||
|
bb16d8eae5 | ||
|
10530d7f81 | ||
|
c7e172f660 | ||
|
917d2ea5ef | ||
|
895141e0b5 | ||
|
8ab30e24b7 | ||
|
9db891c3aa | ||
|
eb9f9bc7f6 | ||
|
fcebb7731d | ||
|
8d0d0d1a03 | ||
|
80f59206d7 | ||
|
af3f5190dc | ||
|
5620241165 | ||
|
c06727190e | ||
|
1a95852007 | ||
|
c3fab7200e | ||
|
6d7a17e9b6 | ||
|
64b5581bf2 | ||
|
d8da5ff406 | ||
|
fda310f1e7 | ||
|
fc7dbdbd33 | ||
|
02ac821503 | ||
|
16fa73afa0 | ||
|
32a6afa27c | ||
|
587d642826 | ||
|
0bef016271 | ||
|
ef6a1c9274 | ||
|
86b1da9b6f | ||
|
00ea2062d4 | ||
|
cb5d5d0648 | ||
|
77ce3e8b4a | ||
|
e0c0d8eab6 | ||
|
ce46fb618c | ||
|
9f37764614 | ||
|
aaf6ba421c | ||
|
4b862f64e6 | ||
|
be2cfb7918 | ||
|
2f29c27a59 | ||
|
144bd6f07a | ||
|
a95f600e76 | ||
|
84aa2ca390 | ||
|
1f8619cda5 | ||
|
04b1ec33cb | ||
|
6d9643a92e | ||
|
438d973073 | ||
|
f295465ad4 | ||
|
eaabccdd9b | ||
|
893cdd0301 | ||
|
1ba77fedd4 | ||
|
fae720115a | ||
|
c0f2e69c9f | ||
|
ca6b20ff2b | ||
|
c4aa3ee40c | ||
|
e9c7873057 | ||
|
d45f42e6a8 | ||
|
f46aef698e | ||
|
8fdf8c2e44 | ||
|
4fe475c57f | ||
|
d3710518ce | ||
|
e1876c18ad | ||
|
e616f07739 | ||
|
8708c0617a | ||
|
39be09ee14 | ||
|
949f605857 | ||
|
50aa0ea714 | ||
|
59605d592b | ||
|
5dbd455afb | ||
|
64d2da72f8 | ||
|
5489537b4b | ||
|
a4181a80d7 | ||
|
a1ea149105 | ||
|
3b5effcb72 | ||
|
d1ace6af17 | ||
|
a391dfe1de | ||
|
119c70fc05 | ||
|
b997076dfa | ||
|
04a6b1d3f2 | ||
|
27ba8f2192 | ||
|
9f1e678b48 | ||
|
c216968087 | ||
|
d467af6c4b | ||
|
0ee090bc7b | ||
|
6b21a841c4 | ||
|
08bdeb2ac2 | ||
|
9676e8580e | ||
|
dac98cdb06 | ||
|
870a4029ec | ||
|
e78df4dc9f | ||
|
1d25be875d | ||
|
0e51e42197 | ||
|
51cbb72649 | ||
|
9f2f6c3166 | ||
|
48311b3b02 | ||
|
880a5b9dae | ||
|
043099549c | ||
|
0c051df108 | ||
|
7e59923029 | ||
|
6d517fab09 | ||
|
77302f845f | ||
|
82c9a737f4 | ||
|
aedfbd1e93 | ||
|
848795d4a0 | ||
|
b8f00ace1a | ||
|
34ba0e408f | ||
|
56cfcba5d1 | ||
|
ec59062a9d | ||
|
93babf0a88 | ||
|
42d2858c9d | ||
|
c0dd6944a3 | ||
|
e83573a3ad | ||
|
b65942a2c5 | ||
|
e4ab6d584e | ||
|
e21e9b23ad | ||
|
6cdcc8b8e1 | ||
|
ff60e09780 | ||
|
bd64a8346d | ||
|
7480e7b956 | ||
|
b9eb44bf03 | ||
|
c0b5bc3eea | ||
|
14437cbf47 | ||
|
4c798bd17e | ||
|
1b434f0ecc | ||
|
fab5a9b3e1 | ||
|
d8cb360f3b | ||
|
18320d59a4 | ||
|
0d9c05d13c | ||
|
d81bcbd208 | ||
|
fa6b6fe85f | ||
|
8bda0c4dfb | ||
|
d682a59a68 | ||
|
240b565513 | ||
|
c521e3a534 | ||
|
d765628d24 | ||
|
2cc535ad84 | ||
|
30eb982961 | ||
|
a2d044f58d | ||
|
002fe649f7 | ||
|
d3c04de9dc | ||
|
e4ce3362a5 | ||
|
cb793cd9b9 | ||
|
b5f1c76ee8 | ||
|
f2863557f5 | ||
|
6c9c8f2380 | ||
|
fbc595ebed | ||
|
6651941e3f | ||
|
f5a77c523b | ||
|
ed2aa44bd2 | ||
|
21cf2c94d4 | ||
|
a4abc45505 | ||
|
969a027a5b | ||
|
e3fdda1f3c | ||
|
40d3d5b961 | ||
|
a3b6a15595 | ||
|
4c24a51054 | ||
|
f4c4354ae4 | ||
|
51da183f80 | ||
|
212958a9c7 | ||
|
9f851b2139 | ||
|
8c439aa62c | ||
|
5c0a1d467a | ||
|
929d932234 | ||
|
3441421e8b | ||
|
e28d39e46b | ||
|
bf6de96bd0 | ||
|
65d0e1fdfe | ||
|
d228d66785 | ||
|
e9c22df148 | ||
|
de6e024464 | ||
|
66229121bf | ||
|
0eadf98ee2 | ||
|
6eb193a316 | ||
|
8165adb6c8 | ||
|
37b24b6732 | ||
|
35f8b3c801 | ||
|
c7e115c81c | ||
|
4c502b25f2 | ||
|
3d56c1253b | ||
|
d00cb1e64d | ||
|
b6c9dcefd7 | ||
|
14ef0b75e5 | ||
|
3ba42106ba | ||
|
dea117032f | ||
|
b8a46821f3 | ||
|
2135597553 | ||
|
d238b1a9ed | ||
|
c5e1e1711d | ||
|
f2e42d1d3e | ||
|
a74a590f1c | ||
|
3baf1774b2 | ||
|
41e3c091e1 | ||
|
5a6021599a | ||
|
40d7a296b5 | ||
|
fd81fbac82 | ||
|
2598b26778 | ||
|
4fe6661c64 | ||
|
522c8ed7c2 | ||
|
81a4cf6a14 | ||
|
25551c4b78 | ||
|
c9bf2e8489 | ||
|
07ca96a132 | ||
|
43a5ef3cc6 | ||
|
ff72b2d012 | ||
|
5eee08c548 | ||
|
dd8ef4d3f5 | ||
|
c43c0660f5 | ||
|
c85fd83d2b | ||
|
c23b9da2cd | ||
|
aca282104f | ||
|
a8f7cddd49 | ||
|
8a17b1fbc3 | ||
|
57f897661e | ||
|
4e32902c63 | ||
|
02658df8b2 | ||
|
be7c1e7a8f | ||
|
562f53b54c | ||
|
38a59ef5b8 | ||
|
8779b46fc4 | ||
|
a7fab00cdd | ||
|
beefb11879 | ||
|
ae1c8f47cc | ||
|
0f961c6af4 | ||
|
a840c24796 | ||
|
66c0407bef | ||
|
5b3431d889 | ||
|
831f552ec5 | ||
|
54b37674f1 | ||
|
62f3fd894d | ||
|
b5546ff6f0 | ||
|
7ddd9d0afe | ||
|
620ab75906 | ||
|
3ef9bdf8a2 | ||
|
43f01925cd | ||
|
85a23bacb6 | ||
|
4066416160 | ||
|
fad6aa45fa | ||
|
a7e6d85f6d | ||
|
c330860606 | ||
|
0ea53e5a3d | ||
|
68167176dc | ||
|
bb08f679f0 | ||
|
8cd1806ec8 | ||
|
1274cfed49 | ||
|
1ab47ba949 | ||
|
da95ee096a | ||
|
6d7250c309 | ||
|
df55df1083 | ||
|
1a8d864a42 | ||
|
552102f7f2 | ||
|
f5371bbad4 | ||
|
c8b020879d | ||
|
094b20f58f | ||
|
f2df669eec | ||
|
ca43df9fdd | ||
|
5f76796e14 | ||
|
1f9a6ba30e | ||
|
ccca8cc34f | ||
|
28c29656af | ||
|
88f4f57a47 | ||
|
43a98de909 |
1
.gitattributes
vendored
Normal file
1
.gitattributes
vendored
Normal file
@@ -0,0 +1 @@
|
||||
configdoc/usage.xml -crlf
|
22
.gitignore
vendored
Normal file
22
.gitignore
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
tags
|
||||
conf/
|
||||
test-settings.php
|
||||
config-schema.php
|
||||
library/HTMLPurifier/DefinitionCache/Serializer/*/
|
||||
library/standalone/
|
||||
library/HTMLPurifier.standalone.php
|
||||
library/HTMLPurifier*.tgz
|
||||
library/package*.xml
|
||||
smoketests/test-schema.html
|
||||
configdoc/*.html
|
||||
configdoc/configdoc.xml
|
||||
docs/doxygen*
|
||||
*.phpt.diff
|
||||
*.phpt.exp
|
||||
*.phpt.log
|
||||
*.phpt.out
|
||||
*.phpt.php
|
||||
*.phpt.skip.php
|
||||
*.htmlt.ini
|
||||
*.patch
|
||||
/*.php
|
2
CREDITS
2
CREDITS
@@ -5,3 +5,5 @@ Almost everything written by Edward Z. Yang (Ambush Commander). Lots of thanks
|
||||
to the DevNetwork Community for their help (see docs/ref-devnetwork.html for
|
||||
more details), Feyd especially (namely IPv6 and optimization). Thanks to RSnake
|
||||
for letting me package his fantastic XSS cheatsheet for a smoketest.
|
||||
|
||||
vim: et sw=4 sts=4
|
||||
|
13
FOCUS
Normal file
13
FOCUS
Normal file
@@ -0,0 +1,13 @@
|
||||
4 - Minor feature enhancements
|
||||
|
||||
[ Appendix A: Release focus IDs ]
|
||||
0 - N/A
|
||||
1 - Initial freshmeat announcement
|
||||
2 - Documentation
|
||||
3 - Code cleanup
|
||||
4 - Minor feature enhancements
|
||||
5 - Major feature enhancements
|
||||
6 - Minor bugfixes
|
||||
7 - Major bugfixes
|
||||
8 - Minor security fixes
|
||||
9 - Major security fixes
|
341
INSTALL
341
INSTALL
@@ -2,62 +2,55 @@
|
||||
Install
|
||||
How to install HTML Purifier
|
||||
|
||||
HTML Purifier is designed to run out of the box, so actually using the library
|
||||
is extremely easy. (Although, if you were looking for a step-by-step
|
||||
installation GUI, you've come to the wrong place!) The impatient can scroll
|
||||
down to the bottom of this INSTALL document to see the code, but you really
|
||||
should make sure a few things are properly done.
|
||||
HTML Purifier is designed to run out of the box, so actually using the
|
||||
library is extremely easy. (Although... if you were looking for a
|
||||
step-by-step installation GUI, you've downloaded the wrong software!)
|
||||
|
||||
While the impatient can get going immediately with some of the sample
|
||||
code at the bottom of this library, it's well worth reading this entire
|
||||
document--most of the other documentation assumes that you are familiar
|
||||
with these contents.
|
||||
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
1. Compatibility
|
||||
|
||||
HTML Purifier works in both PHP 4 and PHP 5, from PHP 4.3.2 and up. It has no
|
||||
core dependencies with other libraries.
|
||||
HTML Purifier is PHP 5 only, and is actively tested from PHP 5.0.5 and
|
||||
up. It has no core dependencies with other libraries. PHP
|
||||
4 support was deprecated on December 31, 2007 with HTML Purifier 3.0.0.
|
||||
|
||||
Optional extensions are iconv (usually installed) and tidy (also common).
|
||||
If you use UTF-8 and don't plan on pretty-printing HTML, you can get away with
|
||||
not having either of these extensions.
|
||||
These optional extensions can enhance the capabilities of HTML Purifier:
|
||||
|
||||
* iconv : Converts text to and from non-UTF-8 encodings
|
||||
* bcmath : Used for unit conversion and imagecrash protection
|
||||
* tidy : Used for pretty-printing HTML
|
||||
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
2. Reconnaissance
|
||||
|
||||
2. Including the library
|
||||
A big plus of HTML Purifier is its inerrant support of standards, so
|
||||
your web-pages should be standards-compliant. (They should also use
|
||||
semantic markup, but that's another issue altogether, one HTML Purifier
|
||||
cannot fix without reading your mind.)
|
||||
|
||||
Simply use:
|
||||
|
||||
require_once '/path/to/library/HTMLPurifier.auto.php';
|
||||
|
||||
...and you're good to go. Since HTML Purifier's codebase is fairly
|
||||
large, I recommend only including HTML Purifier when you need it.
|
||||
|
||||
If you don't like your include_path to be fiddled around with, simply set
|
||||
HTML Purifier's library/ directory to the include path yourself and then:
|
||||
|
||||
require_once 'HTMLPurifier.php';
|
||||
|
||||
Only the contents in the library/ folder are necessary, so you can remove
|
||||
everything else when using HTML Purifier in a production environment.
|
||||
|
||||
|
||||
|
||||
3. Preparing the proper output environment
|
||||
|
||||
HTML Purifier is all about web-standards, so accordingly your webpages should
|
||||
be standards compliant. HTML Purifier can deal with these doctypes:
|
||||
HTML Purifier can process these doctypes:
|
||||
|
||||
* XHTML 1.0 Transitional (default)
|
||||
* XHTML 1.0 Strict
|
||||
* HTML 4.01 Transitional
|
||||
* HTML 4.01 Strict
|
||||
* XHTML 1.1 (sans Ruby)
|
||||
* XHTML 1.1
|
||||
|
||||
...and these character encodings:
|
||||
|
||||
* UTF-8 (default)
|
||||
* Any encoding iconv supports (support is crippled for i18n though)
|
||||
* Any encoding iconv supports (with crippled internationalization support)
|
||||
|
||||
The defaults are there for a reason: they are best-practice choices that
|
||||
should not be changed lightly. For those of you in the dark, you can determine
|
||||
the doctype from this code in your HTML documents:
|
||||
These defaults reflect what my choices would be if I were authoring an
|
||||
HTML document, however, what you choose depends on the nature of your
|
||||
codebase. If you don't know what doctype you are using, you can determine
|
||||
the doctype from this identifier at the top of your source code:
|
||||
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
@@ -66,19 +59,156 @@ the doctype from this code in your HTML documents:
|
||||
|
||||
<meta http-equiv="Content-type" content="text/html;charset=ENCODING">
|
||||
|
||||
For legacy codebases these declarations may be missing. If that is the case,
|
||||
STOP, and read docs/enduser-utf8.html
|
||||
|
||||
You may currently be vulnerable to XSS and other security threats, and HTML
|
||||
Purifier won't be able to fix that.
|
||||
If the character encoding declaration is missing, STOP NOW, and
|
||||
read 'docs/enduser-utf8.html' (web accessible at
|
||||
http://htmlpurifier.org/docs/enduser-utf8.html). In fact, even if it is
|
||||
present, read this document anyway, as many websites specify their
|
||||
document's character encoding incorrectly.
|
||||
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
3. Including the library
|
||||
|
||||
The procedure is quite simple:
|
||||
|
||||
require_once '/path/to/library/HTMLPurifier.auto.php';
|
||||
|
||||
This will setup an autoloader, so the library's files are only included
|
||||
when you use them.
|
||||
|
||||
Only the contents in the library/ folder are necessary, so you can remove
|
||||
everything else when using HTML Purifier in a production environment.
|
||||
|
||||
If you installed HTML Purifier via PEAR, all you need to do is:
|
||||
|
||||
require_once 'HTMLPurifier.auto.php';
|
||||
|
||||
Please note that the usual PEAR practice of including just the classes you
|
||||
want will not work with HTML Purifier's autoloading scheme.
|
||||
|
||||
Advanced users, read on; other users can skip to section 4.
|
||||
|
||||
Autoload compatibility
|
||||
----------------------
|
||||
|
||||
HTML Purifier attempts to be as smart as possible when registering an
|
||||
autoloader, but there are some cases where you will need to change
|
||||
your own code to accomodate HTML Purifier. These are those cases:
|
||||
|
||||
PHP VERSION IS LESS THAN 5.1.2, AND YOU'VE DEFINED __autoload
|
||||
Because spl_autoload_register() doesn't exist in early versions
|
||||
of PHP 5, HTML Purifier has no way of adding itself to the autoload
|
||||
stack. Modify your __autoload function to test
|
||||
HTMLPurifier_Bootstrap::autoload($class)
|
||||
|
||||
For example, suppose your autoload function looks like this:
|
||||
|
||||
function __autoload($class) {
|
||||
require str_replace('_', '/', $class) . '.php';
|
||||
return true;
|
||||
}
|
||||
|
||||
A modified version with HTML Purifier would look like this:
|
||||
|
||||
function __autoload($class) {
|
||||
if (HTMLPurifier_Bootstrap::autoload($class)) return true;
|
||||
require str_replace('_', '/', $class) . '.php';
|
||||
return true;
|
||||
}
|
||||
|
||||
Note that there *is* some custom behavior in our autoloader; the
|
||||
original autoloader in our example would work for 99% of the time,
|
||||
but would fail when including language files.
|
||||
|
||||
AN __autoload FUNCTION IS DECLARED AFTER OUR AUTOLOADER IS REGISTERED
|
||||
spl_autoload_register() has the curious behavior of disabling
|
||||
the existing __autoload() handler. Users need to explicitly
|
||||
spl_autoload_register('__autoload'). Because we use SPL when it
|
||||
is available, __autoload() will ALWAYS be disabled. If __autoload()
|
||||
is declared before HTML Purifier is loaded, this is not a problem:
|
||||
HTML Purifier will register the function for you. But if it is
|
||||
declared afterwards, it will mysteriously not work. This
|
||||
snippet of code (after your autoloader is defined) will fix it:
|
||||
|
||||
spl_autoload_register('__autoload')
|
||||
|
||||
Users should also be on guard if they use a version of PHP previous
|
||||
to 5.1.2 without an autoloader--HTML Purifier will define __autoload()
|
||||
for you, which can collide with an autoloader that was added by *you*
|
||||
later.
|
||||
|
||||
|
||||
For better performance
|
||||
----------------------
|
||||
|
||||
Opcode caches, which greatly speed up PHP initialization for scripts
|
||||
with large amounts of code (HTML Purifier included), don't like
|
||||
autoloaders. We offer an include file that includes all of HTML Purifier's
|
||||
files in one go in an opcode cache friendly manner:
|
||||
|
||||
// If /path/to/library isn't already in your include path, uncomment
|
||||
// the below line:
|
||||
// require '/path/to/library/HTMLPurifier.path.php';
|
||||
|
||||
require 'HTMLPurifier.includes.php';
|
||||
|
||||
Optional components still need to be included--you'll know if you try to
|
||||
use a feature and you get a class doesn't exists error! The autoloader
|
||||
can be used in conjunction with this approach to catch classes that are
|
||||
missing. Simply add this afterwards:
|
||||
|
||||
require 'HTMLPurifier.autoload.php';
|
||||
|
||||
Standalone version
|
||||
------------------
|
||||
|
||||
HTML Purifier has a standalone distribution; you can also generate
|
||||
a standalone file from the full version by running the script
|
||||
maintenance/generate-standalone.php . The standalone version has the
|
||||
benefit of having most of its code in one file, so parsing is much
|
||||
faster and the library is easier to manage.
|
||||
|
||||
If HTMLPurifier.standalone.php exists in the library directory, you
|
||||
can use it like this:
|
||||
|
||||
require '/path/to/HTMLPurifier.standalone.php';
|
||||
|
||||
This is equivalent to including HTMLPurifier.includes.php, except that
|
||||
the contents of standalone/ will be added to your path. To override this
|
||||
behavior, specify a new HTMLPURIFIER_PREFIX where standalone files can
|
||||
be found (usually, this will be one directory up, the "true" library
|
||||
directory in full distributions). Don't forget to set your path too!
|
||||
|
||||
The autoloader can be added to the end to ensure the classes are
|
||||
loaded when necessary; otherwise you can manually include them.
|
||||
To use the autoloader, use this:
|
||||
|
||||
require 'HTMLPurifier.autoload.php';
|
||||
|
||||
For advanced users
|
||||
------------------
|
||||
|
||||
HTMLPurifier.auto.php performs a number of operations that can be done
|
||||
individually. These are:
|
||||
|
||||
HTMLPurifier.path.php
|
||||
Puts /path/to/library in the include path. For high performance,
|
||||
this should be done in php.ini.
|
||||
|
||||
HTMLPurifier.autoload.php
|
||||
Registers our autoload handler HTMLPurifier_Bootstrap::autoload($class).
|
||||
|
||||
You can do these operations by yourself--in fact, you must modify your own
|
||||
autoload handler if you are using a version of PHP earlier than PHP 5.1.2
|
||||
(See "Autoload compatibility" above).
|
||||
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
4. Configuration
|
||||
|
||||
HTML Purifier is designed to run out-of-the-box, but occasionally HTML
|
||||
Purifier needs to be told what to do. If you answered no to any of these
|
||||
questions, read on, otherwise, you can skip to the next section (or, if you're
|
||||
Purifier needs to be told what to do. If you answer no to any of these
|
||||
questions, read on; otherwise, you can skip to the next section (or, if you're
|
||||
into configuring things just for the heck of it, skip to 4.3).
|
||||
|
||||
* Am I using UTF-8?
|
||||
@@ -90,7 +220,6 @@ object and read on:
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
|
||||
|
||||
|
||||
4.1. Setting a different character encoding
|
||||
|
||||
You really shouldn't use any other encoding except UTF-8, especially if you
|
||||
@@ -102,12 +231,12 @@ HTML Purifier uses iconv to support other character encodings, as such,
|
||||
any encoding that iconv supports <http://www.gnu.org/software/libiconv/>
|
||||
HTML Purifier supports with this code:
|
||||
|
||||
$config->set('Core', 'Encoding', /* put your encoding here */);
|
||||
$config->set('Core.Encoding', /* put your encoding here */);
|
||||
|
||||
An example usage for Latin-1 websites (the most common encoding for English
|
||||
websites):
|
||||
|
||||
$config->set('Core', 'Encoding', 'ISO-8859-1');
|
||||
$config->set('Core.Encoding', 'ISO-8859-1');
|
||||
|
||||
Note that HTML Purifier's support for non-Unicode encodings is crippled by the
|
||||
fact that any character not supported by that encoding will be silently
|
||||
@@ -117,13 +246,12 @@ but please be cognizant of the issues the "solution" creates (for this
|
||||
reason, I do not include the solution in this document).
|
||||
|
||||
|
||||
|
||||
4.2. Setting a different doctype
|
||||
|
||||
For those of you using HTML 4.01 Transitional, you can disable
|
||||
XHTML output like this:
|
||||
|
||||
$config->set('HTML', 'Doctype', 'HTML 4.01 Transitional');
|
||||
$config->set('HTML.Doctype', 'HTML 4.01 Transitional');
|
||||
|
||||
Other supported doctypes include:
|
||||
|
||||
@@ -134,7 +262,6 @@ Other supported doctypes include:
|
||||
* XHTML 1.1
|
||||
|
||||
|
||||
|
||||
4.3. Other settings
|
||||
|
||||
There are more configuration directives which can be read about
|
||||
@@ -144,55 +271,24 @@ your code. Some of the more interesting ones are configurable at the
|
||||
demo <http://htmlpurifier.org/demo.php> and are well worth looking into
|
||||
for your own system.
|
||||
|
||||
For example, you can fine tune allowed elements and attributes, convert
|
||||
relative URLs to absolute ones, and even autoparagraph input text! These
|
||||
are, respectively, %HTML.Allowed, %URI.MakeAbsolute and %URI.Base, and
|
||||
%AutoFormat.AutoParagraph. The %Namespace.Directive naming convention
|
||||
translates to:
|
||||
|
||||
$config->set('Namespace.Directive', $value);
|
||||
|
||||
E.g.
|
||||
|
||||
$config->set('HTML.Allowed', 'p,b,a[href],i');
|
||||
$config->set('URI.Base', 'http://www.example.com');
|
||||
$config->set('URI.MakeAbsolute', true);
|
||||
$config->set('AutoFormat.AutoParagraph', true);
|
||||
|
||||
|
||||
5. Using the code
|
||||
|
||||
The interface is mind-numbingly simple:
|
||||
|
||||
$purifier = new HTMLPurifier();
|
||||
$clean_html = $purifier->purify( $dirty_html );
|
||||
|
||||
...or, if you're using the configuration object:
|
||||
|
||||
$purifier = new HTMLPurifier($config);
|
||||
$clean_html = $purifier->purify( $dirty_html );
|
||||
|
||||
That's it! For more examples, check out docs/examples/ (they aren't very
|
||||
different though). Also, docs/enduser-slow.html gives advice on what to
|
||||
do if HTML Purifier is slowing down your application.
|
||||
|
||||
|
||||
|
||||
6. Quick install
|
||||
|
||||
First, make sure library/HTMLPurifier/DefinitionCache/Serializer is
|
||||
writable by the webserver (see Section 7: Caching below for details).
|
||||
If your website is in UTF-8 and XHTML Transitional, use this code:
|
||||
|
||||
<?php
|
||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||
|
||||
$purifier = new HTMLPurifier();
|
||||
$clean_html = $purifier->purify($dirty_html);
|
||||
?>
|
||||
|
||||
If your website is in a different encoding or doctype, use this code:
|
||||
|
||||
<?php
|
||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Core', 'Encoding', 'ISO-8859-1'); // replace with your encoding
|
||||
$config->set('HTML', 'Doctype', 'HTML 4.01 Transitional'); // replace with your doctype
|
||||
$purifier = new HTMLPurifier($config);
|
||||
|
||||
$clean_html = $purifier->purify($dirty_html);
|
||||
?>
|
||||
|
||||
|
||||
|
||||
7. Caching
|
||||
---------------------------------------------------------------------------
|
||||
5. Caching
|
||||
|
||||
HTML Purifier generates some cache files (generally one or two) to speed up
|
||||
its execution. For maximum performance, make sure that
|
||||
@@ -222,9 +318,56 @@ If you are unable or unwilling to give write permissions to the cache
|
||||
directory, you can either disable the cache (and suffer a performance
|
||||
hit):
|
||||
|
||||
$config->set('Core', 'DefinitionCache', null);
|
||||
$config->set('Core.DefinitionCache', null);
|
||||
|
||||
Or move the cache directory somewhere else (no trailing slash):
|
||||
|
||||
$config->set('Cache', 'SerializerPath', '/home/user/absolute/path');
|
||||
$config->set('Cache.SerializerPath', '/home/user/absolute/path');
|
||||
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
6. Using the code
|
||||
|
||||
The interface is mind-numbingly simple:
|
||||
|
||||
$purifier = new HTMLPurifier();
|
||||
$clean_html = $purifier->purify( $dirty_html );
|
||||
|
||||
...or, if you're using the configuration object:
|
||||
|
||||
$purifier = new HTMLPurifier($config);
|
||||
$clean_html = $purifier->purify( $dirty_html );
|
||||
|
||||
That's it! For more examples, check out docs/examples/ (they aren't very
|
||||
different though). Also, docs/enduser-slow.html gives advice on what to
|
||||
do if HTML Purifier is slowing down your application.
|
||||
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
7. Quick install
|
||||
|
||||
First, make sure library/HTMLPurifier/DefinitionCache/Serializer is
|
||||
writable by the webserver (see Section 5: Caching above for details).
|
||||
If your website is in UTF-8 and XHTML Transitional, use this code:
|
||||
|
||||
<?php
|
||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||
|
||||
$purifier = new HTMLPurifier();
|
||||
$clean_html = $purifier->purify($dirty_html);
|
||||
?>
|
||||
|
||||
If your website is in a different encoding or doctype, use this code:
|
||||
|
||||
<?php
|
||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Core.Encoding', 'ISO-8859-1'); // replace with your encoding
|
||||
$config->set('HTML.Doctype', 'HTML 4.01 Transitional'); // replace with your doctype
|
||||
$purifier = new HTMLPurifier($config);
|
||||
|
||||
$clean_html = $purifier->purify($dirty_html);
|
||||
?>
|
||||
|
||||
vim: et sw=4 sts=4
|
||||
|
@@ -17,10 +17,10 @@ ce document pour quelques choses.
|
||||
|
||||
1. Compatibilité
|
||||
|
||||
HTML Purifier fonctionne dans PHP 4 et PHP 5. PHP 4.3.2 est le dernier
|
||||
HTML Purifier fonctionne dans PHP 5. PHP 5.0.5 est le dernier
|
||||
version que je le testais. Il ne dépend de les autre librairies.
|
||||
|
||||
Les extensions optionnel est iconv (en général déjà installer) et
|
||||
Les extensions optionnel est iconv (en général déjà installer) et
|
||||
tidy (répandu aussi). Si vous utilisez UTF-8 et ne voulez pas
|
||||
l'indentation, vous pouvez utiliser HTML Purifier sans ces extensions.
|
||||
|
||||
@@ -34,25 +34,21 @@ Utilisez:
|
||||
...quand vous devez utiliser HTML Purifier (ne inclure pas quand vous
|
||||
ne devez pas, parce que HTML Purifier est trés grand.)
|
||||
|
||||
Si vous n'aime pas que HTML Purifier change vos include_path, on peut
|
||||
change vos include_path, et:
|
||||
HTML Purifier utilise 'autoload'. Si vous avez définu la fonction
|
||||
__autoload, vous doivez ajoute cet programme:
|
||||
|
||||
require_once 'HTMLPurifier.php';
|
||||
spl_autoload_register('__autoload')
|
||||
|
||||
Seuleument les contents dans library/ est essentiel; vous peut enlever
|
||||
les autre fichiers quand vous est dans une atmosphère professionnel.
|
||||
Plus d'information est dans le document 'INSTALL'.
|
||||
|
||||
|
||||
[En cours de construction]
|
||||
|
||||
|
||||
6. Installation vite
|
||||
3. Installation vite
|
||||
|
||||
Si votre site web est en UTF-8 et XHTML Transitional, utilisez:
|
||||
|
||||
<?php
|
||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||
|
||||
|
||||
$purificateur = new HTMLPurifier();
|
||||
$html_propre = $purificateur->purify($html_salle);
|
||||
?>
|
||||
@@ -61,11 +57,13 @@ Sinon, utilisez:
|
||||
|
||||
<?php
|
||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Core', 'Encoding', 'ISO-8859-1'); //remplacez avec votre encoding
|
||||
$config->set('Core', 'XHTML', true); //remplacez avec false si HTML 4.01
|
||||
$purificateur = new HTMLPurifier($config);
|
||||
|
||||
|
||||
$html_propre = $purificateur->purify($html_salle);
|
||||
?>
|
||||
?>
|
||||
|
||||
vim: et sw=4 sts=4
|
||||
|
4
LICENSE
4
LICENSE
@@ -146,7 +146,7 @@ such a program is covered only if its contents constitute a work based
|
||||
on the Library (independent of the use of the Library in a tool for
|
||||
writing it). Whether that is true depends on what the Library does
|
||||
and what the program that uses the Library does.
|
||||
|
||||
|
||||
1. You may copy and distribute verbatim copies of the Library's
|
||||
complete source code as you receive it, in any medium, provided that
|
||||
you conspicuously and appropriately publish on each copy an
|
||||
@@ -501,4 +501,4 @@ necessary. Here is a sample; alter the names:
|
||||
|
||||
That's all there is to it!
|
||||
|
||||
|
||||
vim: et sw=4 sts=4
|
||||
|
492
NEWS
492
NEWS
@@ -9,6 +9,492 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
. Internal change
|
||||
==========================
|
||||
|
||||
4.2.0, released 2010-09-15
|
||||
! Added %Core.RemoveProcessingInstructions, which lets you remove
|
||||
<? ... ?> statements.
|
||||
! Added %URI.DisableResources functionality; the directive originally
|
||||
did nothing. Thanks David Rothstein for reporting.
|
||||
! Add documentation about configuration directive types.
|
||||
! Add %CSS.ForbiddenProperties configuration directive.
|
||||
! Add %HTML.FlashAllowFullScreen to permit embedded Flash objects
|
||||
to utilize full-screen mode.
|
||||
! Add optional support for the <code>file</code> URI scheme, enable
|
||||
by explicitly setting %URI.AllowedSchemes.
|
||||
! Add %Core.NormalizeNewlines options to allow turning off newline
|
||||
normalization.
|
||||
- Fix improper handling of Internet Explorer conditional comments
|
||||
by parser. Thanks zmonteca for reporting.
|
||||
- Fix missing attributes bug when running on Mac Snow Leopard and APC.
|
||||
Thanks sidepodcast for the fix.
|
||||
- Warn if an element is allowed, but an attribute it requires is
|
||||
not allowed.
|
||||
|
||||
4.1.1, released 2010-05-31
|
||||
- Fix undefined index warnings in maintenance scripts.
|
||||
- Fix bug in DirectLex for parsing elements with a single attribute
|
||||
with entities.
|
||||
- Rewrite CSS output logic for font-family and url(). Thanks Mario
|
||||
Heiderich <mario.heiderich@googlemail.com> for reporting and Takeshi
|
||||
Terada <t-terada@violet.plala.or.jp> for suggesting the fix.
|
||||
- Emit an error for CollectErrors if a body is extracted
|
||||
- Fix bug where in background-position for center keyword handling.
|
||||
- Fix infinite loop when a wrapper element is inserted in a context
|
||||
where it's not allowed. Thanks Lars <lars@renoz.dk> for reporting.
|
||||
- Remove +x bit and shebang from index.php; only supported mode is to
|
||||
explicitly call it with php.
|
||||
- Make test script less chatty when log_errors is on.
|
||||
|
||||
4.1.0, released 2010-04-26
|
||||
! Support proprietary height attribute on table element
|
||||
! Support YouTube slideshows that contain /cp/ in their URL.
|
||||
! Support for data: URI scheme; not enabled by default, add it using
|
||||
%URI.AllowedSchemes
|
||||
! Support flashvars when using %HTML.SafeObject and %HTML.SafeEmbed.
|
||||
! Support for Internet Explorer compatibility with %HTML.SafeObject
|
||||
using %Output.FlashCompat.
|
||||
! Handle <ol><ol> properly, by inserting the necessary <li> tag.
|
||||
- Always quote the insides of url(...) in CSS.
|
||||
|
||||
4.0.0, released 2009-07-07
|
||||
# APIs for ConfigSchema subsystem have substantially changed. See
|
||||
docs/dev-config-bcbreaks.txt for details; in essence, anything that
|
||||
had both namespace and directive now have a single unified key.
|
||||
# Some configuration directives were renamed, specifically:
|
||||
%AutoFormatParam.PurifierLinkifyDocURL -> %AutoFormat.PurifierLinkify.DocURL
|
||||
%FilterParam.ExtractStyleBlocksEscaping -> %Filter.ExtractStyleBlocks.Escaping
|
||||
%FilterParam.ExtractStyleBlocksScope -> %Filter.ExtractStyleBlocks.Scope
|
||||
%FilterParam.ExtractStyleBlocksTidyImpl -> %Filter.ExtractStyleBlocks.TidyImpl
|
||||
As usual, the old directive names will still work, but will throw E_NOTICE
|
||||
errors.
|
||||
# The allowed values for class have been relaxed to allow all of CDATA for
|
||||
doctypes that are not XHTML 1.1 or XHTML 2.0. For old behavior, set
|
||||
%Attr.ClassUseCDATA to false.
|
||||
# Instead of appending the content model to an old content model, a blank
|
||||
element will replace the old content model. You can use #SUPER to get
|
||||
the old content model.
|
||||
! More robust support for name="" and id=""
|
||||
! HTMLPurifier_Config::inherit($config) allows you to inherit one
|
||||
configuration, and have changes to that configuration be propagated
|
||||
to all of its children.
|
||||
! Implement %HTML.Attr.Name.UseCDATA, which relaxes validation rules on
|
||||
the name attribute when set. Use with care. Thanks Ian Cook for
|
||||
sponsoring.
|
||||
! Implement %AutoFormat.RemoveEmpty.RemoveNbsp, which removes empty
|
||||
tags that contain non-breaking spaces as well other whitespace. You
|
||||
can also modify which tags should have maintained with
|
||||
%AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions.
|
||||
! Implement %Attr.AllowedClasses, which allows administrators to restrict
|
||||
classes users can use to a specified finite set of classes, and
|
||||
%Attr.ForbiddenClasses, which is the logical inverse.
|
||||
! You can now maintain your own configuration schema directories by
|
||||
creating a config-schema.php file or passing an extra argument. Check
|
||||
docs/dev-config-schema.html for more details.
|
||||
! Added HTMLPurifier_Config->serialize() method, which lets you save away
|
||||
your configuration in a compact serial file, which you can unserialize
|
||||
and use directly without having to go through the overhead of setup.
|
||||
- Fix bug where URIDefinition would not get cleared if it's directives got
|
||||
changed.
|
||||
- Fix fatal error in HTMLPurifier_Encoder on certain platforms (probably NetBSD 5.0)
|
||||
- Fix bug in Linkify autoformatter involving <a><span>http://foo</span></a>
|
||||
- Make %URI.Munge not apply to links that have the same host as your host.
|
||||
- Prevent stray </body> tag from truncating output, if a second </body>
|
||||
is present.
|
||||
. Created script maintenance/rename-config.php for renaming a configuration
|
||||
directive while maintaining its alias. This script does not change source code.
|
||||
. Implement namespace locking for definition construction, to prevent
|
||||
bugs where a directive is used for definition construction but is not
|
||||
used to construct the cache hash.
|
||||
|
||||
3.3.0, released 2009-02-16
|
||||
! Implement CSS property 'overflow' when %CSS.AllowTricky is true.
|
||||
! Implement generic property list classess
|
||||
- Fix bug with testEncodingSupportsASCII() algorithm when iconv() implementation
|
||||
does not do the "right thing" with characters not supported in the output
|
||||
set.
|
||||
- Spellcheck UTF-8: The Secret To Character Encoding
|
||||
- Fix improper removal of the contents of elements with only whitespace. Thanks
|
||||
Eric Wald for reporting.
|
||||
- Fix broken test suite in versions of PHP without spl_autoload_register()
|
||||
- Fix degenerate case with YouTube filter involving double hyphens.
|
||||
Thanks Pierre Attar for reporting.
|
||||
- Fix YouTube rendering problem on certain versions of Firefox.
|
||||
- Fix CSSDefinition Printer problems with decorators
|
||||
- Add text parameter to unit tests, forces text output
|
||||
. Add verbose mode to command line test runner, use (--verbose)
|
||||
. Turn on unit tests for UnitConverter
|
||||
. Fix missing version number in configuration %Attr.DefaultImageAlt (added 3.2.0)
|
||||
. Fix newline errors that caused spurious failures when CRLF HTML Purifier was
|
||||
tested on Linux.
|
||||
. Removed trailing whitespace from all text files, see
|
||||
remote-trailing-whitespace.php maintenance script.
|
||||
. Convert configuration to use property list backend.
|
||||
|
||||
3.2.0, released 2008-10-31
|
||||
# Using %Core.CollectErrors forces line number/column tracking on, whereas
|
||||
previously you could theoretically turn it off.
|
||||
# HTMLPurifier_Injector->notifyEnd() is formally deprecated. Please
|
||||
use handleEnd() instead.
|
||||
! %Output.AttrSort for when you need your attributes in alphabetical order to
|
||||
deal with a bug in FCKEditor. Requested by frank farmer.
|
||||
! Enable HTML comments when %HTML.Trusted is on. Requested by Waldo Jaquith.
|
||||
! Proper support for name attribute. It is now allowed and equivalent to the id
|
||||
attribute in a and img tags, and is only converted to id when %HTML.TidyLevel
|
||||
is heavy (for all doctypes).
|
||||
! %AutoFormat.RemoveEmpty to remove some empty tags from documents. Please don't
|
||||
use on hand-written HTML.
|
||||
! Add error-cases for unsupported elements in MakeWellFormed. This enables
|
||||
the strategy to be used, standalone, on untrusted input.
|
||||
! %Core.AggressivelyFixLt is on by default. This causes more sensible
|
||||
processing of left angled brackets in smileys and other whatnot.
|
||||
! Test scripts now have a 'type' parameter, which lets you say 'htmlpurifier',
|
||||
'phpt', 'vtest', etc. in order to only execute those tests. This supercedes
|
||||
the --only-phpt parameter, although for backwards-compatibility the flag
|
||||
will still work.
|
||||
! AutoParagraph auto-formatter will now preserve double-newlines upon output.
|
||||
Users who are not performing inbound filtering, this may seem a little
|
||||
useless, but as a bonus, the test suite and handling of edge cases is also
|
||||
improved.
|
||||
! Experimental implementation of forms for %HTML.Trusted
|
||||
! Track column numbers when maintain line numbers is on
|
||||
! Proprietary 'background' attribute on table-related elements converted into
|
||||
corresponding CSS. Thanks Fusemail for sponsoring this feature!
|
||||
! Add forward(), forwardUntilEndToken(), backward() and current() to Injector
|
||||
supertype.
|
||||
! HTMLPurifier_Injector->handleEnd() permits modification to end tokens. The
|
||||
time of operation varies slightly from notifyEnd() as *all* end tokens are
|
||||
processed by the injector before they are subject to the well-formedness rules.
|
||||
! %Attr.DefaultImageAlt allows overriding default behavior of setting alt to
|
||||
basename of image when not present.
|
||||
! %AutoFormat.DisplayLinkURI neuters <a> tags into plain text URLs.
|
||||
- Fix two bugs in %URI.MakeAbsolute; one involving empty paths in base URLs,
|
||||
the other involving an undefined $is_folder error.
|
||||
- Throw error when %Core.Encoding is set to a spurious value. Previously,
|
||||
this errored silently and returned false.
|
||||
- Redirected stderr to stdout for flush error output.
|
||||
- %URI.DisableExternal will now use the host in %URI.Base if %URI.Host is not
|
||||
available.
|
||||
- Do not re-munge URL if the output URL has the same host as the input URL.
|
||||
Requested by Chris.
|
||||
- Fix error in documentation regarding %Filter.ExtractStyleBlocks
|
||||
- Prevent <![CDATA[<body></body>]]> from triggering %Core.ConvertDocumentToFragment
|
||||
- Fix bug with inline elements in blockquotes conflicting with strict doctype
|
||||
- Detect if HTML support is disabled for DOM by checking for loadHTML() method.
|
||||
- Fix bug where dots and double-dots in absolute URLs without hostname were
|
||||
not collapsed by URIFilter_MakeAbsolute.
|
||||
- Fix bug with anonymous modules operating on SafeEmbed or SafeObject elements
|
||||
by reordering their addition.
|
||||
- Will now throw exception on many error conditions during lexer creation; also
|
||||
throw an exception when MaintainLineNumbers is true, but a non-tracksLineNumbers
|
||||
is being used.
|
||||
- Detect if domxml extension is loaded, and use DirectLEx accordingly.
|
||||
- Improve handling of big numbers with floating point arithmetic in UnitConverter.
|
||||
Reported by David Morton.
|
||||
. Strategy_MakeWellFormed now operates in-place, saving memory and allowing
|
||||
for more interesting filter-backtracking
|
||||
. New HTMLPurifier_Injector->rewind() functionality, allows injectors to rewind
|
||||
index to reprocess tokens.
|
||||
. StringHashParser now allows for multiline sections with "empty" content;
|
||||
previously the section would remain undefined.
|
||||
. Added --quick option to multitest.php, which tests only the most recent
|
||||
release for each series.
|
||||
. Added --distro option to multitest.php, which accepts either 'normal' or
|
||||
'standalone'. This supercedes --exclude-normal and --exclude-standalone
|
||||
|
||||
3.1.1, released 2008-06-19
|
||||
# %URI.Munge now, by default, does not munge resources (for example, <img src="">)
|
||||
In order to enable this again, please set %URI.MungeResources to true.
|
||||
! More robust imagecrash protection with height/width CSS with %CSS.MaxImgLength,
|
||||
and height/width HTML with %HTML.MaxImgLength.
|
||||
! %URI.MungeSecretKey for secure URI munging. Thanks Chris
|
||||
for sponsoring this feature. Check out the corresponding documentation
|
||||
for details. (Att Nightly testers: The API for this feature changed before
|
||||
the general release. Namely, rename your directives %URI.SecureMungeSecretKey =>
|
||||
%URI.MungeSecretKey and and %URI.SecureMunge => %URI.Munge)
|
||||
! Implemented post URI filtering. Set member variable $post to true to set
|
||||
a URIFilter as such.
|
||||
! Allow modules to define injectors via $info_injector. Injectors are
|
||||
automatically disabled if injector's needed elements are not found.
|
||||
! Support for "safe" objects added, use %HTML.SafeObject and %HTML.SafeEmbed.
|
||||
Thanks Chris for sponsoring. If you've been using ad hoc code from the
|
||||
forums, PLEASE use this instead.
|
||||
! Added substitutions for %e, %n, %a and %p in %URI.Munge (in order,
|
||||
embedded, tag name, attribute name, CSS property name). See %URI.Munge
|
||||
for more details. Requested by Jochem Blok.
|
||||
- Disable percent height/width attributes for img.
|
||||
- AttrValidator operations are now atomic; updates to attributes are not
|
||||
manifest in token until end of operations. This prevents naughty internal
|
||||
code from directly modifying CurrentToken when they're not supposed to.
|
||||
This semantics change was requested by frank farmer.
|
||||
- Percent encoding checks enabled for URI query and fragment
|
||||
- Fix stray backslashes in font-family; CSS Unicode character escapes are
|
||||
now properly resolved (although *only* in font-family). Thanks Takeshi Terada
|
||||
for reporting.
|
||||
- Improve parseCDATA algorithm to take into account newline normalization
|
||||
- Account for browser confusion between Yen character and backslash in
|
||||
Shift_JIS encoding. This fix generalizes to any other encoding which is not
|
||||
a strict superset of printable ASCII. Thanks Takeshi Terada for reporting.
|
||||
- Fix missing configuration parameter in Generator calls. Thanks vs for the
|
||||
partial patch.
|
||||
- Improved adherence to Unicode by checking for non-character codepoints.
|
||||
Thanks Geoffrey Sneddon for reporting. This may result in degraded
|
||||
performance for extremely large inputs.
|
||||
- Allow CSS property-value pair ''text-decoration: none''. Thanks Jochem Blok
|
||||
for reporting.
|
||||
. Added HTMLPurifier_UnitConverter and HTMLPurifier_Length for convenient
|
||||
handling of CSS-style lengths. HTMLPurifier_AttrDef_CSS_Length now uses
|
||||
this class.
|
||||
. API of HTMLPurifier_AttrDef_CSS_Length changed from __construct($disable_negative)
|
||||
to __construct($min, $max). __construct(true) is equivalent to
|
||||
__construct('0').
|
||||
. Added HTMLPurifier_AttrDef_Switch class
|
||||
. Rename HTMLPurifier_HTMLModule_Tidy->construct() to setup() and bubble method
|
||||
up inheritance hierarchy to HTMLPurifier_HTMLModule. All HTMLModules
|
||||
get this called with the configuration object. All modules now
|
||||
use this rather than __construct(), although legacy code using constructors
|
||||
will still work--the new format, however, lets modules access the
|
||||
configuration object for HTML namespace dependant tweaks.
|
||||
. AttrDef_HTML_Pixels now takes a single construction parameter, pixels.
|
||||
. ConfigSchema data-structure heavily optimized; on average it uses a third
|
||||
the memory it did previously. The interface has changed accordingly,
|
||||
consult changes to HTMLPurifier_Config for details.
|
||||
. Variable parsing types now are magic integers instead of strings
|
||||
. Added benchmark for ConfigSchema
|
||||
. HTMLPurifier_Generator requires $config and $context parameters. If you
|
||||
don't know what they should be, use HTMLPurifier_Config::createDefault()
|
||||
and new HTMLPurifier_Context().
|
||||
. Printers now properly distinguish between output configuration, and
|
||||
target configuration. This is not applicable to scripts using
|
||||
the Printers for HTML Purifier related tasks.
|
||||
. HTML/CSS Printers must be primed with prepareGenerator($gen_config), otherwise
|
||||
fatal errors will ensue.
|
||||
. URIFilter->prepare can return false in order to abort loading of the filter
|
||||
. Factory for AttrDef_URI implemented, URI#embedded to indicate URI that embeds
|
||||
an external resource.
|
||||
. %URI.Munge functionality factored out into a post-filter class.
|
||||
. Added CurrentCSSProperty context variable during CSS validation
|
||||
|
||||
3.1.0, released 2008-05-18
|
||||
# Unnecessary references to objects (vestiges of PHP4) removed from method
|
||||
signatures. The following methods do not need references when assigning from
|
||||
them and will result in E_STRICT errors if you try:
|
||||
+ HTMLPurifier_Config->get*Definition() [* = HTML, CSS]
|
||||
+ HTMLPurifier_ConfigSchema::instance()
|
||||
+ HTMLPurifier_DefinitionCacheFactory::instance()
|
||||
+ HTMLPurifier_DefinitionCacheFactory->create()
|
||||
+ HTMLPurifier_DoctypeRegistry->register()
|
||||
+ HTMLPurifier_DoctypeRegistry->get()
|
||||
+ HTMLPurifier_HTMLModule->addElement()
|
||||
+ HTMLPurifier_HTMLModule->addBlankElement()
|
||||
+ HTMLPurifier_LanguageFactory::instance()
|
||||
# Printer_ConfigForm's get*() functions were static-ified
|
||||
# %HTML.ForbiddenAttributes requires attribute declarations to be in the
|
||||
form of tag@attr, NOT tag.attr (which will throw an error and won't do
|
||||
anything). This is for forwards compatibility with XML; you'd do best
|
||||
to migrate an %HTML.AllowedAttributes directives to this syntax too.
|
||||
! Allow index to be false for config from form creation
|
||||
! Added HTMLPurifier::VERSION constant
|
||||
! Commas, not dashes, used for serializer IDs. This change is forwards-compatible
|
||||
and allows for version numbers like "3.1.0-dev".
|
||||
! %HTML.Allowed deals gracefully with whitespace anywhere, anytime!
|
||||
! HTML Purifier's URI handling is a lot more robust, with much stricter
|
||||
validation checks and better percent encoding handling. Thanks Gareth Heyes
|
||||
for indicating security vulnerabilities from lax percent encoding.
|
||||
! Bootstrap autoloader deals more robustly with classes that don't exist,
|
||||
preventing class_exists($class, true) from barfing.
|
||||
- InterchangeBuilder now alphabetizes its lists
|
||||
- Validation error in configdoc output fixed
|
||||
- Iconv and other encoding errors muted even with custom error handlers that
|
||||
do not honor error_reporting
|
||||
- Add protection against imagecrash attack with CSS height/width
|
||||
- HTMLPurifier::instance() created for consistency, is equivalent to getInstance()
|
||||
- Fixed and revamped broken ConfigForm smoketest
|
||||
- Bug with bool/null fields in Printer_ConfigForm fixed
|
||||
- Bug with global forbidden attributes fixed
|
||||
- Improved error messages for allowed and forbidden HTML elements and attributes
|
||||
- Missing (or null) in configdoc documentation restored
|
||||
- If DOM throws and exception during parsing with PH5P (occurs in newer versions
|
||||
of DOM), HTML Purifier punts to DirectLex
|
||||
- Fatal error with unserialization of ScriptRequired
|
||||
- Created directories are now chmod'ed properly
|
||||
- Fixed bug with fallback languages in LanguageFactory
|
||||
- Standalone testing setup properly with autoload
|
||||
. Out-of-date documentation revised
|
||||
. UTF-8 encoding check optimization as suggested by Diego
|
||||
. HTMLPurifier_Error removed in favor of exceptions
|
||||
. More copy() function removed; should use clone instead
|
||||
. More extensive unit tests for HTMLDefinition
|
||||
. assertPurification moved to central harness
|
||||
. HTMLPurifier_Generator accepts $config and $context parameters during
|
||||
instantiation, not runtime
|
||||
. Double-quotes outside of attribute values are now unescaped
|
||||
|
||||
3.1.0rc1, released 2008-04-22
|
||||
# Autoload support added. Internal require_once's removed in favor of an
|
||||
explicit require list or autoloading. To use HTML Purifier,
|
||||
you must now either use HTMLPurifier.auto.php
|
||||
or HTMLPurifier.includes.php; setting the include path and including
|
||||
HTMLPurifier.php is insufficient--in such cases include HTMLPurifier.autoload.php
|
||||
as well to register our autoload handler (or modify your autoload function
|
||||
to check HTMLPurifier_Bootstrap::getPath($class)). You can also use
|
||||
HTMLPurifier.safe-includes.php for a less performance friendly but more
|
||||
user-friendly library load.
|
||||
# HTMLPurifier_ConfigSchema static functions are officially deprecated. Schema
|
||||
information is stored in the ConfigSchema directory, and the
|
||||
maintenance/generate-schema-cache.php generates the schema.ser file, which
|
||||
is now instantiated. Support for userland schema changes coming soon!
|
||||
# HTMLPurifier_Config will now throw E_USER_NOTICE when you use a directive
|
||||
alias; to get rid of these errors just modify your configuration to use
|
||||
the new directive name.
|
||||
# HTMLPurifier->addFilter is deprecated; built-in filters can now be
|
||||
enabled using %Filter.$filter_name or by setting your own filters using
|
||||
%Filter.Custom
|
||||
# Directive-level safety properties superceded in favor of module-level
|
||||
safety. Internal method HTMLModule->addElement() has changed, although
|
||||
the externally visible HTMLDefinition->addElement has *not* changed.
|
||||
! Extra utility classes for testing and non-library operations can
|
||||
be found in extras/. Specifically, these are FSTools and ConfigDoc.
|
||||
You may find a use for these in your own project, but right now they
|
||||
are highly experimental and volatile.
|
||||
! Integration with PHPT allows for automated smoketests
|
||||
! Limited support for proprietary HTML elements, namely <marquee>, sponsored
|
||||
by Chris. You can enable them with %HTML.Proprietary if your client
|
||||
demands them.
|
||||
! Support for !important CSS cascade modifier. By default, this will be stripped
|
||||
from CSS, but you can enable it using %CSS.AllowImportant
|
||||
! Support for display and visibility CSS properties added, set %CSS.AllowTricky
|
||||
to true to use them.
|
||||
! HTML Purifier now has its own Exception hierarchy under HTMLPurifier_Exception.
|
||||
Developer error (not enduser error) can cause these to be triggered.
|
||||
! Experimental kses() wrapper introduced with HTMLPurifier.kses.php
|
||||
! Finally %CSS.AllowedProperties for tweaking allowed CSS properties without
|
||||
mucking around with HTMLPurifier_CSSDefinition
|
||||
! ConfigDoc output has been enhanced with version and deprecation info.
|
||||
! %HTML.ForbiddenAttributes and %HTML.ForbiddenElements implemented.
|
||||
- Autoclose now operates iteratively, i.e. <span><span><div> now has
|
||||
both span tags closed.
|
||||
- Various HTMLPurifier_Config convenience functions now accept another parameter
|
||||
$schema which defines what HTMLPurifier_ConfigSchema to use besides the
|
||||
global default.
|
||||
- Fix bug with trusted script handling in libxml versions later than 2.6.28.
|
||||
- Fix bug in ExtractStyleBlocks with comments in style tags
|
||||
- Fix bug in comment parsing for DirectLex
|
||||
- Flush output now displayed when in command line mode for unit tester
|
||||
- Fix bug with rgb(0, 1, 2) color syntax with spaces inside shorthand syntax
|
||||
- HTMLPurifier_HTMLDefinition->addAttribute can now be called multiple times
|
||||
on the same element without emitting errors.
|
||||
- Fixed fatal error in PH5P lexer with invalid tag names
|
||||
. Plugins now get their own changelogs according to project conventions.
|
||||
. Convert tokens to use instanceof, reducing memory footprint and
|
||||
improving comparison speed.
|
||||
. Dry runs now supported in SimpleTest; testing facilities improved
|
||||
. Bootstrap class added for handling autoloading functionality
|
||||
. Implemented recursive glob at FSTools->globr
|
||||
. ConfigSchema now has instance methods for all corresponding define*
|
||||
static methods.
|
||||
. A couple of new historical maintenance scripts were added.
|
||||
. HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php split into two files
|
||||
. tests/index.php can now be run from any directory.
|
||||
. HTMLPurifier_Token subclasses split into seperate files
|
||||
. HTMLPURIFIER_PREFIX now is defined in Bootstrap.php, NOT HTMLPurifier.php
|
||||
. HTMLPURIFIER_PREFIX can now be defined outside of HTML Purifier
|
||||
. New --php=php flag added, allows PHP executable to be specified (command
|
||||
line only!)
|
||||
. htmlpurifier_add_test() preferred method to translate test files in to
|
||||
classes, because it handles PHPT files too.
|
||||
. Debugger class is deprecated and will be removed soon.
|
||||
. Command line argument parsing for testing scripts revamped, now --opt value
|
||||
format is supported.
|
||||
. Smoketests now cleanup after magic quotes
|
||||
. Generator now can output comments (however, comments are still stripped
|
||||
from HTML Purifier output)
|
||||
. HTMLPurifier_ConfigSchema->validate() deprecated in favor of
|
||||
HTMLPurifier_VarParser->parse()
|
||||
. Integers auto-cast into float type by VarParser.
|
||||
. HTMLPURIFIER_STRICT removed; no validation is performed on runtime, only
|
||||
during cache generation
|
||||
. Reordered script calls in maintenance/flush.php
|
||||
. Command line scripts now honor exit codes
|
||||
. When --flush fails in unit testers, abort tests and print message
|
||||
. Improved documentation in docs/dev-flush.html about the maintenance scripts
|
||||
. copy() methods removed in favor of clone keyword
|
||||
|
||||
3.0.0, released 2008-01-06
|
||||
# HTML Purifier is PHP 5 only! The 2.1.x branch will be maintained
|
||||
until PHP 4 is completely deprecated, but no new features will be added
|
||||
to it.
|
||||
+ Visibility declarations added
|
||||
+ Constructor methods renamed to __construct()
|
||||
+ PHP4 reference cruft removed (in progress)
|
||||
! CSS properties are now case-insensitive
|
||||
! DefinitionCacheFactory now can register new implementations
|
||||
! New HTMLPurifier_Filter_ExtractStyleBlocks for extracting <style> from
|
||||
documents and cleaning their contents up. Requires the CSSTidy library
|
||||
<http://csstidy.sourceforge.net/>. You can access the blocks with the
|
||||
'StyleBlocks' Context variable ($purifier->context->get('StyleBlocks')).
|
||||
The output CSS can also be "scoped" for a specific element, use:
|
||||
%Filter.ExtractStyleBlocksScope
|
||||
! Experimental support for some proprietary CSS attributes allowed:
|
||||
opacity (and all of the browser-specific equivalents) and scrollbar colors.
|
||||
Enable by setting %CSS.Proprietary to true.
|
||||
- Colors missing # but in hex form will be corrected
|
||||
- CSS Number algorithm improved
|
||||
- Unit testing and multi-testing now on steroids: command lines,
|
||||
XML output, and other goodies now added.
|
||||
. Unit tests for Injector improved
|
||||
. New classes:
|
||||
+ HTMLPurifier_AttrDef_CSS_AlphaValue
|
||||
+ HTMLPurifier_AttrDef_CSS_Filter
|
||||
. Multitest now has a file docblock
|
||||
|
||||
2.1.3, released 2007-11-05
|
||||
! tests/multitest.php allows you to test multiple versions by running
|
||||
tests/index.php through multiple interpreters using `phpv` shell
|
||||
script (you must provide this script!)
|
||||
- Fixed poor include ordering for Email URI AttrDefs, causes fatal errors
|
||||
on some systems.
|
||||
- Injector algorithm further refined: off-by-one error regarding skip
|
||||
counts for dormant injectors fixed
|
||||
- Corrective blockquote definition now enabled for HTML 4.01 Strict
|
||||
- Fatal error when <img> tag (or any other element with required attributes)
|
||||
has 'id' attribute fixed, thanks NykO18 for reporting
|
||||
- Fix warning emitted when a non-supported URI scheme is passed to the
|
||||
MakeAbsolute URIFilter, thanks NykO18 (again)
|
||||
- Further refine AutoParagraph injector. Behavior inside of elements
|
||||
allowing paragraph tags clarified: only inline content delimeted by
|
||||
double newlines (not block elements) are paragraphed.
|
||||
- Buggy treatment of end tags of elements that have required attributes
|
||||
fixed (does not manifest on default tag-set)
|
||||
- Spurious internal content reorganization error suppressed
|
||||
- HTMLDefinition->addElement now returns a reference to the created
|
||||
element object, as implied by the documentation
|
||||
- Phorum mod's HTML Purifier help message expanded (unreleased elsewhere)
|
||||
- Fix a theoretical class of infinite loops from DirectLex reported
|
||||
by Nate Abele
|
||||
- Work around unnecessary DOMElement type-cast in PH5P that caused errors
|
||||
in PHP 5.1
|
||||
- Work around PHP 4 SimpleTest lack-of-error complaining for one-time-only
|
||||
HTMLDefinition errors, this may indicate problems with error-collecting
|
||||
facilities in PHP 5
|
||||
- Make ErrorCollectorEMock work in both PHP 4 and PHP 5
|
||||
- Make PH5P work with PHP 5.0 by removing unnecessary array parameter typedef
|
||||
. %Core.AcceptFullDocuments renamed to %Core.ConvertDocumentToFragment
|
||||
to better communicate its purpose
|
||||
. Error unit tests can now specify the expectation of no errors. Future
|
||||
iterations of the harness will be extremely strict about what errors
|
||||
are allowed
|
||||
. Extend Injector hooks to allow for more powerful injector routines
|
||||
. HTMLDefinition->addBlankElement created, as according to the HTMLModule
|
||||
method
|
||||
. Doxygen configuration file updated, with minor improvements
|
||||
. Test runner now checks for similarly named files in conf/ directory too.
|
||||
. Minor cosmetic change to flush-definition-cache.php: trailing newline is
|
||||
outputted
|
||||
. Maintenance script for generating PH5P patch added, original PH5P source
|
||||
file also added under version control
|
||||
. Full unit test runner script title made more descriptive with PHP version
|
||||
. Updated INSTALL file to state that 4.3.7 is the earliest version we
|
||||
are actively testing
|
||||
|
||||
2.1.2, released 2007-09-03
|
||||
! Implemented Object module for trusted users
|
||||
! Implemented experimental HTML5 parsing mode using PH5P. To use, add
|
||||
@@ -150,7 +636,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
and better modularization
|
||||
# Configuration object now finalizes itself when a read operation is
|
||||
performed on it, ensuring that its internal state stays consistent.
|
||||
To revert this behavior, you can set the $autoFinalize member variable
|
||||
To revert this behavior, you can set the $autoFinalize member variable
|
||||
off, but it's not recommended.
|
||||
# New compact syntax for AttrDef objects that can be used to instantiate
|
||||
new objects via make()
|
||||
@@ -234,7 +720,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
AttrTransform_EnumToCSS, refer to HTMLModule/TransformToStrict.php to
|
||||
see how the new equivalent is implemented
|
||||
. Unit tests now use exclusively assertIdentical
|
||||
|
||||
|
||||
1.6.0, released 2007-04-01
|
||||
! Support for most common deprecated attributes via transformations:
|
||||
+ bgcolor in td, th, tr and table
|
||||
@@ -448,3 +934,5 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
+ Shorthand CSS properties
|
||||
+ Table CSS properties
|
||||
+ Deprecated attribute transformations
|
||||
|
||||
vim: et sw=4 sts=4
|
||||
|
18
README
18
README
@@ -2,15 +2,15 @@
|
||||
README
|
||||
All about HTML Purifier
|
||||
|
||||
HTML Purifier is an HTML filtering solution that uses a unique combination
|
||||
of robust whitelists and agressive parsing to ensure that not only are
|
||||
XSS attacks thwarted, but the resulting HTML is standards compliant.
|
||||
HTML Purifier is an HTML filtering solution that uses a unique combination
|
||||
of robust whitelists and agressive parsing to ensure that not only are
|
||||
XSS attacks thwarted, but the resulting HTML is standards compliant.
|
||||
|
||||
HTML Purifier is oriented towards richly formatted documents from
|
||||
untrusted sources that require CSS and a full tag-set. This library can
|
||||
be configured to accept a more restrictive set of tags, but it won't be
|
||||
as efficient as more bare-bones parsers. It will, however, do the job
|
||||
right, which may be more important.
|
||||
HTML Purifier is oriented towards richly formatted documents from
|
||||
untrusted sources that require CSS and a full tag-set. This library can
|
||||
be configured to accept a more restrictive set of tags, but it won't be
|
||||
as efficient as more bare-bones parsers. It will, however, do the job
|
||||
right, which may be more important.
|
||||
|
||||
Places to go:
|
||||
|
||||
@@ -20,3 +20,5 @@ Places to go:
|
||||
* See WYSIWYG for information on editors like TinyMCE and FCKeditor
|
||||
|
||||
HTML Purifier can be found on the web at: http://htmlpurifier.org/
|
||||
|
||||
vim: et sw=4 sts=4
|
||||
|
142
TODO
142
TODO
@@ -7,83 +7,135 @@ TODO List
|
||||
? Maybe I'll Do It
|
||||
==========================
|
||||
|
||||
If no interest is expressed for a feature that may required a considerable
|
||||
If no interest is expressed for a feature that may require a considerable
|
||||
amount of effort to implement, it may get endlessly delayed. Do not be
|
||||
afraid to cast your vote for the next feature to be implemented!
|
||||
|
||||
2.2 release [Error'ed]
|
||||
# Error logging for filtering/cleanup procedures
|
||||
- XSS-attempt detection
|
||||
Things to do as soon as possible:
|
||||
|
||||
2.3 release [Do What I Mean, Not What I Say]
|
||||
# Additional support for poorly written HTML
|
||||
- Microsoft Word HTML cleaning (i.e. MsoNormal, but research essential!)
|
||||
- Friendly strict handling of <address> (block -> <br>)
|
||||
- Remove redundant tags, ex. <u><u>Underlined</u></u>. Implementation notes:
|
||||
1. Analyzing which tags to remove duplicants
|
||||
2. Ensure attributes are merged into the parent tag
|
||||
3. Extend the tag exclusion system to specify whether or not the
|
||||
contents should be dropped or not (currently, there's code that could do
|
||||
something like this if it didn't drop the inner text too.)
|
||||
- Remove <span> tags that don't do anything (no attributes)
|
||||
- Remove empty inline tags<i></i>
|
||||
- Append something to duplicate IDs so they're still usable (impl. note: the
|
||||
dupe detector would also need to detect the suffix as well)
|
||||
- Externalize inline CSS to promote clean HTML
|
||||
- Think about allowing explicit order of operations hooks for transforms
|
||||
- Inputs don't do the right thing with submit
|
||||
- Fix "<.<" bug (trailing < is removed if not EOD)
|
||||
- Build in better internal state dumps and debugging tools for remote
|
||||
debugging
|
||||
- Allowed/Allowed* have strange interactions when both set
|
||||
- Transform lone embeds into object tags
|
||||
- Deprecated config options that emit warnings when you set them (with'
|
||||
a way of muting the warning if you really want to)
|
||||
- Make HTML.Trusted work with Output.FlashCompat
|
||||
|
||||
2.4 release [It's All About Trust] (floating)
|
||||
FUTURE VERSIONS
|
||||
---------------
|
||||
|
||||
4.3 release [OMG CONFIG PONIES]
|
||||
! Fix Printer. It's from the old days when we didn't have decent XML classes
|
||||
! Factor demo.php into a set of Printer classes, and then create a stub
|
||||
file for users here (inside the actual HTML Purifier library)
|
||||
- Fix error handling with form construction
|
||||
- Do encoding validation in Printers, or at least, where user data comes in
|
||||
- Config: Add examples to everything (make built-in which also automatically
|
||||
gives output)
|
||||
- Add "register" field to config schemas to eliminate dependence on
|
||||
naming conventions (try to remember why we ultimately decided on tihs)
|
||||
|
||||
5.0 release [HTML 5]
|
||||
# Swap out code to use html5lib tokenizer and tree-builder
|
||||
! Allow turning off of FixNesting and required attribute insertion
|
||||
|
||||
5.1 release [It's All About Trust] (floating)
|
||||
# Implement untrusted, dangerous elements/attributes
|
||||
# Implement IDREF support (harder than it seems, since you cannot have
|
||||
IDREFs to non-existent IDs)
|
||||
- Implement <area> (client and server side image maps are blocking
|
||||
on IDREF support)
|
||||
# Frameset XHTML 1.0 and HTML 4.01 doctypes
|
||||
- Figure out how to simultaneously set %CSS.Trusted and %HTML.Trusted (?)
|
||||
|
||||
3.0 release [Beyond HTML]
|
||||
5.2 release [Error'ed]
|
||||
# Error logging for filtering/cleanup procedures
|
||||
# Additional support for poorly written HTML
|
||||
- Microsoft Word HTML cleaning (i.e. MsoNormal, but research essential!)
|
||||
- Friendly strict handling of <address> (block -> <br>)
|
||||
- XSS-attempt detection--certain errors are flagged XSS-like
|
||||
- Append something to duplicate IDs so they're still usable (impl. note: the
|
||||
dupe detector would also need to detect the suffix as well)
|
||||
|
||||
6.0 release [Beyond HTML]
|
||||
# Legit token based CSS parsing (will require revamping almost every
|
||||
AttrDef class). Probably will use CSSTidy class
|
||||
# More control over allowed CSS properties (maybe modularize it in the
|
||||
same fashion!)
|
||||
# Formatters for plaintext
|
||||
- Smileys
|
||||
AttrDef class). Probably will use CSSTidy
|
||||
# More control over allowed CSS properties using a modularization
|
||||
# IRI support (this includes IDN)
|
||||
- Standardize token armor for all areas of processing
|
||||
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
|
||||
Also, enable disabling of directionality
|
||||
|
||||
4.0 release [To XML and Beyond]
|
||||
7.0 release [To XML and Beyond]
|
||||
- Extended HTML capabilities based on namespacing and tag transforms (COMPLEX)
|
||||
- Hooks for adding custom processors to custom namespaced tags and
|
||||
attributes, offer default implementation
|
||||
- Lots of documentation and samples
|
||||
|
||||
Ongoing
|
||||
- Lots of profiling, make it faster!
|
||||
- More refactoring to take advantage of PHP5's facilities
|
||||
- Refactor unit tests into lots of test methods
|
||||
- Plugins for major CMSes (COMPLEX)
|
||||
- phpBB
|
||||
- eFiction
|
||||
- more! (look for ones that use WYSIWYGs)
|
||||
- Complete basic smoketests
|
||||
- Also, a FAQ for extension writers with HTML Purifier
|
||||
|
||||
Unknown release (on a scratch-an-itch basis)
|
||||
# CHMOD install script for PEAR installs
|
||||
? Have 'lang' attribute be checked against official lists, achieved by
|
||||
encoding all characters that have string entity equivalents
|
||||
- Abstract ChildDef_BlockQuote to work with all elements that only
|
||||
allow blocks in them, required or optional
|
||||
- Reorganize Unit Tests
|
||||
- Reorganize configuration directives (Create more namespaces! Get messy!)
|
||||
AutoFormat
|
||||
- Smileys
|
||||
- Syntax highlighting (with GeSHi) with <pre> and possibly <?php
|
||||
- Look at http://drupal.org/project/Modules/category/63 for ideas
|
||||
|
||||
Neat feature related
|
||||
! Support exporting configuration, so users can easily tweak settings
|
||||
in the demo, and then copy-paste into their own setup
|
||||
- Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
|
||||
- Implement lenient <ruby> child validation
|
||||
- Allow scoped="scoped" attribute in <style> tags; may be troublesome
|
||||
because regular CSS has no way of uniquely identifying nodes, so we'd
|
||||
have to generate IDs
|
||||
- Explain how to use HTML Purifier in non-PHP languages / create
|
||||
a simple command line stub (or complicated?)
|
||||
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
||||
- Automatically add non-breaking spaces to empty table cells when
|
||||
empty-cells:show is applied to have compatibility with Internet Explorer
|
||||
- Table of Contents generation (XHTML Compiler might be reusable). May also
|
||||
be out-of-band information.
|
||||
- Full set of color keywords. Also, a way to add onto them without
|
||||
finalizing the configuration object.
|
||||
- Write a var_export and memcached DefinitionCache - Denis
|
||||
- Built-in support for target="_blank" on all external links
|
||||
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
|
||||
Also, enable disabling of directionality
|
||||
? Externalize inline CSS to promote clean HTML, proposed by Sander Tekelenburg
|
||||
? Remove redundant tags, ex. <u><u>Underlined</u></u>. Implementation notes:
|
||||
1. Analyzing which tags to remove duplicants
|
||||
2. Ensure attributes are merged into the parent tag
|
||||
3. Extend the tag exclusion system to specify whether or not the
|
||||
contents should be dropped or not (currently, there's code that could do
|
||||
something like this if it didn't drop the inner text too.)
|
||||
|
||||
Requested
|
||||
Maintenance related (slightly boring)
|
||||
# CHMOD install script for PEAR installs
|
||||
! Factor out command line parser into its own class, and unit test it
|
||||
- Reduce size of internal data-structures (esp. HTMLDefinition)
|
||||
- Allow merging configurations. Thus,
|
||||
a -> b -> default
|
||||
c -> d -> default
|
||||
becomes
|
||||
a -> b -> c -> d -> default
|
||||
Maybe allow more fine-grained tuning of this behavior. Alternatively,
|
||||
encourage people to use short plist depths before building them up.
|
||||
- Time PHPT tests
|
||||
|
||||
ChildDef related (very boring)
|
||||
- Abstract ChildDef_BlockQuote to work with all elements that only
|
||||
allow blocks in them, required or optional
|
||||
- Implement lenient <ruby> child validation
|
||||
|
||||
Wontfix
|
||||
- Non-lossy smart alternate character encoding transformations (unless
|
||||
patch provided)
|
||||
- Pretty-printing HTML: users can use Tidy on the output on entire page
|
||||
- Native content compression, whitespace stripping (don't rely on Tidy, make
|
||||
sure we don't remove from <pre> or related tags): use gzip if this is
|
||||
- Native content compression, whitespace stripping: use gzip if this is
|
||||
really important
|
||||
|
||||
vim: et sw=4 sts=4
|
||||
|
16
WHATSNEW
16
WHATSNEW
@@ -1,8 +1,8 @@
|
||||
Version 2.1.2 is a mix of experimental features and stability updates.
|
||||
Among new features: an Object module for trusted users, support for the
|
||||
CSS property 'border-spacing', and HTML 5 style parsing using PH5P.
|
||||
Bug fixes ihave resolved a few obscure issues including border-collapse:seperate,
|
||||
a DirectLex parsing error, broken HTML in printDefinition.php, and problems
|
||||
with the experimental standalone distribution. Also, there were large
|
||||
amounts of behind-the-scenes refactoring and the removal of URIScheme
|
||||
inclusion reflection.
|
||||
HTML Purifier 4.2.0 is a minor release that implements a number of
|
||||
feature requests accumulated over half a year. New configuration
|
||||
options include %Core.RemoveProcessingInstructions,
|
||||
%CSS.ForbiddenProperties, %HTML.FlashAllowFullScreen and
|
||||
%Core.NormalizeNewlines. Additionally,%URI.DisableResources is
|
||||
now functional and file: is an optionally supported URI scheme.
|
||||
There are also some minor bugfixes, usability improvements and
|
||||
documentation updates.
|
||||
|
2
WYSIWYG
2
WYSIWYG
@@ -16,3 +16,5 @@ trouble. Therein lies the solution:
|
||||
HTML Purifier is perfect for filtering pure-HTML input from WYSIWYG editors.
|
||||
|
||||
Enough said.
|
||||
|
||||
vim: et sw=4 sts=4
|
||||
|
16
benchmarks/ConfigSchema.php
Normal file
16
benchmarks/ConfigSchema.php
Normal file
@@ -0,0 +1,16 @@
|
||||
<?php
|
||||
|
||||
chdir(dirname(__FILE__));
|
||||
|
||||
//require_once '../library/HTMLPurifier.path.php';
|
||||
shell_exec('php ../maintenance/generate-schema-cache.php');
|
||||
require_once '../library/HTMLPurifier.path.php';
|
||||
require_once 'HTMLPurifier.includes.php';
|
||||
|
||||
$begin = xdebug_memory_usage();
|
||||
|
||||
$schema = HTMLPurifier_ConfigSchema::makeFromSerial();
|
||||
|
||||
echo xdebug_memory_usage() - $begin;
|
||||
|
||||
// vim: et sw=4 sts=4
|
@@ -1,82 +1,69 @@
|
||||
<?php
|
||||
|
||||
// emulates inserting a dir called HTMLPurifier into your class dir
|
||||
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
||||
|
||||
require_once '../library/HTMLPurifier.auto.php';
|
||||
@include_once '../test-settings.php';
|
||||
|
||||
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||
require_once 'HTMLPurifier/Config.php';
|
||||
require_once 'HTMLPurifier/Context.php';
|
||||
// PEAR
|
||||
require_once 'Benchmark/Timer.php'; // to do the timing
|
||||
require_once 'Text/Password.php'; // for generating random input
|
||||
|
||||
$LEXERS = array();
|
||||
$RUNS = isset($GLOBALS['HTMLPurifierTest']['Runs'])
|
||||
? $GLOBALS['HTMLPurifierTest']['Runs'] : 2;
|
||||
? $GLOBALS['HTMLPurifierTest']['Runs'] : 2;
|
||||
|
||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||
$LEXERS['DirectLex'] = new HTMLPurifier_Lexer_DirectLex();
|
||||
|
||||
if (!empty($GLOBALS['HTMLPurifierTest']['PEAR'])) {
|
||||
require_once 'HTMLPurifier/Lexer/PEARSax3.php';
|
||||
$LEXERS['PEARSax3'] = new HTMLPurifier_Lexer_PEARSax3();
|
||||
} else {
|
||||
exit('PEAR required to perform benchmark.');
|
||||
}
|
||||
|
||||
if (version_compare(PHP_VERSION, '5', '>=')) {
|
||||
require_once 'HTMLPurifier/Lexer/DOMLex.php';
|
||||
$LEXERS['DOMLex'] = new HTMLPurifier_Lexer_DOMLex();
|
||||
}
|
||||
|
||||
// PEAR
|
||||
require_once 'Benchmark/Timer.php'; // to do the timing
|
||||
require_once 'Text/Password.php'; // for generating random input
|
||||
|
||||
// custom class to aid unit testing
|
||||
class RowTimer extends Benchmark_Timer
|
||||
{
|
||||
|
||||
|
||||
var $name;
|
||||
|
||||
|
||||
function RowTimer($name, $auto = false) {
|
||||
$this->name = htmlentities($name);
|
||||
$this->Benchmark_Timer($auto);
|
||||
}
|
||||
|
||||
|
||||
function getOutput() {
|
||||
|
||||
$total = $this->TimeElapsed();
|
||||
$result = $this->getProfiling();
|
||||
$dashes = '';
|
||||
|
||||
|
||||
$out = '<tr>';
|
||||
|
||||
|
||||
$out .= "<td>{$this->name}</td>";
|
||||
|
||||
|
||||
$standard = false;
|
||||
|
||||
|
||||
foreach ($result as $k => $v) {
|
||||
if ($v['name'] == 'Start' || $v['name'] == 'Stop') continue;
|
||||
|
||||
|
||||
//$perc = (($v['diff'] * 100) / $total);
|
||||
//$tperc = (($v['total'] * 100) / $total);
|
||||
|
||||
|
||||
//$out .= '<td align="right">' . $v['diff'] . '</td>';
|
||||
|
||||
|
||||
if ($standard == false) $standard = $v['diff'];
|
||||
|
||||
|
||||
$perc = $v['diff'] * 100 / $standard;
|
||||
$bad_run = ($v['diff'] < 0);
|
||||
|
||||
|
||||
$out .= '<td align="right"'.
|
||||
($bad_run ? ' style="color:#AAA;"' : '').
|
||||
'>' . number_format($perc, 2, '.', '') .
|
||||
'%</td><td>'.number_format($v['diff'],4,'.','').'</td>';
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
$out .= '</tr>';
|
||||
|
||||
|
||||
return $out;
|
||||
}
|
||||
}
|
||||
@@ -93,18 +80,18 @@ function print_lexers() {
|
||||
|
||||
function do_benchmark($name, $document) {
|
||||
global $LEXERS, $RUNS;
|
||||
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$context = new HTMLPurifier_Context();
|
||||
|
||||
|
||||
$timer = new RowTimer($name);
|
||||
$timer->start();
|
||||
|
||||
|
||||
foreach($LEXERS as $key => $lexer) {
|
||||
for ($i=0; $i<$RUNS; $i++) $tokens = $lexer->tokenizeHTML($document, $config, $context);
|
||||
$timer->setMarker($key);
|
||||
}
|
||||
|
||||
|
||||
$timer->stop();
|
||||
$timer->display();
|
||||
}
|
||||
@@ -131,11 +118,11 @@ foreach ($LEXERS as $key => $value) {
|
||||
$dir = 'samples/Lexer';
|
||||
$dh = opendir($dir);
|
||||
while (false !== ($filename = readdir($dh))) {
|
||||
|
||||
|
||||
if (strpos($filename, '.html') !== strlen($filename) - 5) continue;
|
||||
$document = file_get_contents($dir . '/' . $filename);
|
||||
do_benchmark("File: $filename", $document);
|
||||
|
||||
|
||||
}
|
||||
|
||||
// crashers, caused infinite loops before
|
||||
@@ -166,3 +153,6 @@ echo '<div>Random input was: ' .
|
||||
|
||||
|
||||
</body></html>
|
||||
<?php
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
@@ -1,17 +0,0 @@
|
||||
<?php
|
||||
|
||||
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
||||
|
||||
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||
require_once 'HTMLPurifier/Config.php';
|
||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||
require_once 'HTMLPurifier/Context.php';
|
||||
|
||||
$input = file_get_contents('samples/Lexer/4.html');
|
||||
$lexer = new HTMLPurifier_Lexer_DirectLex();
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$context = new HTMLPurifier_Context();
|
||||
|
||||
for ($i = 0; $i < 10; $i++) {
|
||||
$tokens = $lexer->tokenizeHTML($input, $config, $context);
|
||||
}
|
@@ -3,6 +3,11 @@
|
||||
ini_set('xdebug.trace_format', 1);
|
||||
ini_set('xdebug.show_mem_delta', true);
|
||||
|
||||
if (file_exists('Trace.xt')) {
|
||||
echo "Previous trace Trace.xt must be removed before this script can be run.";
|
||||
exit;
|
||||
}
|
||||
|
||||
xdebug_start_trace(dirname(__FILE__) . '/Trace');
|
||||
require_once '../library/HTMLPurifier.auto.php';
|
||||
|
||||
@@ -10,3 +15,7 @@ $purifier = new HTMLPurifier();
|
||||
|
||||
$data = $purifier->purify(file_get_contents('samples/Lexer/4.html'));
|
||||
xdebug_stop_trace();
|
||||
|
||||
echo "Trace finished.";
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
@@ -28,7 +28,7 @@
|
||||
|
||||
<li>HX Edison Taiji Club <a href="http://www.taijiclub.org/downloads/Taiji_club_regulation_.pdf">by-law</a> effective 3/28/2006</li>
|
||||
<li>A new email account for our club: HXEdisontaijiclub@yahoo.com</li>
|
||||
|
||||
|
||||
<li>Workshop conducted by <a href="http://www.taijiclub.org/ch/Digest/LiDeyin">?????</a> Li Deyin is set on June 4, 2006 at Clarion Hotel in Edison from 9:30am-12pm; <a href="http://www.taijiclub.org/en/Registration">Registration</a></li>
|
||||
|
||||
</ul>
|
||||
@@ -36,7 +36,7 @@
|
||||
|
||||
|
||||
|
||||
<p><i>Taiji</i> is an ancient Chinese tradition of movement systems that is associated with philosophy, physiology, psychology, geometry and dynamics. It is the slowest form of martial arts and is meant to improve the internal spirit. It is soothing to the soul and extremely invigorating. </p>
|
||||
<p><i>Taiji</i> is an ancient Chinese tradition of movement systems that is associated with philosophy, physiology, psychology, geometry and dynamics. It is the slowest form of martial arts and is meant to improve the internal spirit. It is soothing to the soul and extremely invigorating. </p>
|
||||
|
||||
<p>The founder of Taiji was Zhang Sanfeng (Chang San-feng), who was a monk of the Wu Dang (Wu Tang) Monastery and lived in the period from 1391 to 1459. His exercises stressed suppleness and elasticity as opposed to the hardness and force of other martial art styles. Several centuries old, Taiji was originally developed as a form of self-defense, emphasizing strength, balance, flexibility and speed. Tai Chi also differs from other martial arts in that it is based on the Taoist religion and aims to avoid aggressive forces. </p>
|
||||
|
||||
@@ -50,4 +50,7 @@
|
||||
|
||||
<div style="text-align:center;">Click on photo to see HR version</div></div>
|
||||
</body>
|
||||
</html>
|
||||
</html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -14,4 +14,7 @@ function rwt(el,ct,cd,sg){var e = window.encodeURIComponent ? encodeURIComponent
|
||||
<form action=/search name=f><script><!--
|
||||
function qs(el) {if (window.RegExp && window.encodeURIComponent) {var ue=el.href;var qe=encodeURIComponent(document.f.q.value);if(ue.indexOf("q=")!=-1){el.href=ue.replace(new RegExp("q=[^&$]*"),"q="+qe);}else{el.href=ue+"&q="+qe;}}return 1;}
|
||||
// -->
|
||||
</script><table border=0 cellspacing=0 cellpadding=4><tr><td nowrap><font size=-1><b>Web</b> <a id=1a class=q href="/imghp?hl=en&tab=wi" onClick="return qs(this);">Images</a> <a id=2a class=q href="http://groups.google.com/grphp?hl=en&tab=wg" onClick="return qs(this);">Groups</a> <a id=4a class=q href="http://news.google.com/nwshp?hl=en&tab=wn" onClick="return qs(this);">News</a> <a id=5a class=q href="http://froogle.google.com/frghp?hl=en&tab=wf" onClick="return qs(this);">Froogle</a> <a id=8a class=q href="/lochp?hl=en&tab=wl" onClick="return qs(this);">Local</a> <b><a href="/intl/en/options/" class=q>more »</a></b></font></td></tr></table><table cellspacing=0 cellpadding=0><tr><td width=25%> </td><td align=center><input type=hidden name=hl value=en><input maxlength=2048 size=55 name=q value="" title="Google Search"><br><input type=submit value="Google Search" name=btnG><input type=submit value="I'm Feeling Lucky" name=btnI></td><td valign=top nowrap width=25%><font size=-2> <a href=/advanced_search?hl=en>Advanced Search</a><br> <a href=/preferences?hl=en>Preferences</a><br> <a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><br><font size=-1><a href="/ads/">Advertising Programs</a> - <a href=/services/>Business Solutions</a> - <a href=/about.html>About Google</a></font><p><font size=-2>©2006 Google</font></p></center></body></html>
|
||||
</script><table border=0 cellspacing=0 cellpadding=4><tr><td nowrap><font size=-1><b>Web</b> <a id=1a class=q href="/imghp?hl=en&tab=wi" onClick="return qs(this);">Images</a> <a id=2a class=q href="http://groups.google.com/grphp?hl=en&tab=wg" onClick="return qs(this);">Groups</a> <a id=4a class=q href="http://news.google.com/nwshp?hl=en&tab=wn" onClick="return qs(this);">News</a> <a id=5a class=q href="http://froogle.google.com/frghp?hl=en&tab=wf" onClick="return qs(this);">Froogle</a> <a id=8a class=q href="/lochp?hl=en&tab=wl" onClick="return qs(this);">Local</a> <b><a href="/intl/en/options/" class=q>more »</a></b></font></td></tr></table><table cellspacing=0 cellpadding=0><tr><td width=25%> </td><td align=center><input type=hidden name=hl value=en><input maxlength=2048 size=55 name=q value="" title="Google Search"><br><input type=submit value="Google Search" name=btnG><input type=submit value="I'm Feeling Lucky" name=btnI></td><td valign=top nowrap width=25%><font size=-2> <a href=/advanced_search?hl=en>Advanced Search</a><br> <a href=/preferences?hl=en>Preferences</a><br> <a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><br><font size=-1><a href="/ads/">Advertising Programs</a> - <a href=/services/>Business Solutions</a> - <a href=/about.html>About Google</a></font><p><font size=-2>©2006 Google</font></p></center></body></html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -67,21 +67,21 @@ if (objAdMgr.isSlotAvailable("leaderboard")) {
|
||||
</table>
|
||||
<table width="86%" border="0" cellspacing="0" cellpadding="2">
|
||||
<tr>
|
||||
<td height="388" width="19%" bgcolor="#FFCCFF" valign="top">
|
||||
<td height="388" width="19%" bgcolor="#FFCCFF" valign="top">
|
||||
<p>May 1, 2000</p>
|
||||
<p><b>Pop Culture</b> </p>
|
||||
<p>by. H. Finkelstein</p>
|
||||
|
||||
</td>
|
||||
<td height="388" width="52%" valign="top">
|
||||
<p>Welcome to the <b>Anime Digi-Lib</b>, a virtual index to anime on the
|
||||
internet. This site strives to house a comprehensive index to both personal
|
||||
and commercial websites and provides reviews to these sites. We hope to
|
||||
be a gateway for people who've never imagined they'd ever be interested
|
||||
<td height="388" width="52%" valign="top">
|
||||
<p>Welcome to the <b>Anime Digi-Lib</b>, a virtual index to anime on the
|
||||
internet. This site strives to house a comprehensive index to both personal
|
||||
and commercial websites and provides reviews to these sites. We hope to
|
||||
be a gateway for people who've never imagined they'd ever be interested
|
||||
in Japanese Animation. </p>
|
||||
<table width="99%" border="1" cellspacing="0" cellpadding="2" height="320" name="Searchnservices">
|
||||
<tr>
|
||||
<td height="263" valign="top" width="58%">
|
||||
<td height="263" valign="top" width="58%">
|
||||
<p> </p>
|
||||
<p> </p>
|
||||
|
||||
@@ -107,15 +107,15 @@ if (objAdMgr.isSlotAvailable("leaderboard")) {
|
||||
</form>
|
||||
|
||||
|
||||
<td>
|
||||
<td>
|
||||
<table border="0" cellpadding="0" cellspacing="0" width="100%">
|
||||
<tr><td><font face="verdana,geneva" color="#000011" size="1">What is better, subtitled or dubbed anime?</font></td></tr>
|
||||
<tr><td><input type="radio" name="rd" value="1"><font face="verdana" size="2" color="#000011">Subtitled</font></td></tr>
|
||||
|
||||
<tr><td align="middle"><font face="verdana" size="1"><a href="http://pub.alxnet.com/poll?id=2079873&q=view">Current results</a></font></td></tr>
|
||||
</table></td></tr>
|
||||
<tr>
|
||||
<td><font face="verdana" size="1"><a href="http://www.alxnet.com/services/poll/">Free
|
||||
<tr>
|
||||
<td><font face="verdana" size="1"><a href="http://www.alxnet.com/services/poll/">Free
|
||||
Web Polls</a></font></td>
|
||||
</tr>
|
||||
</table></form>
|
||||
@@ -126,3 +126,6 @@ if (objAdMgr.isSlotAvailable("leaderboard")) {
|
||||
</body>
|
||||
|
||||
</html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -138,7 +138,7 @@
|
||||
</table>
|
||||
<p><script type="text/javascript">
|
||||
//<![CDATA[
|
||||
if (window.showTocToggle) { var tocShowText = "show"; var tocHideText = "hide"; showTocToggle(); }
|
||||
if (window.showTocToggle) { var tocShowText = "show"; var tocHideText = "hide"; showTocToggle(); }
|
||||
//]]>
|
||||
</script></p>
|
||||
<div class="editsection" style="float:right;margin-left:5px;">[<a href="/w/index.php?title=Tai_Chi_Chuan&action=edit&section=1" title="Edit section: Overview">edit</a>]</div>
|
||||
@@ -279,19 +279,19 @@ Yang Small Frame | <a href="/wik
|
||||
|
||||
| |
|
||||
<a href="/w/index.php?title=Wu_Ta-kuei&action=edit" class="new" title="Wu Ta-kuei">Wu Ta-kuei</a> <a href="/w/index.php?title=Sun_Hsing-i&action=edit" class="new" title="Sun Hsing-i">Sun Hsing-i</a>
|
||||
1923-1970 1891-1929
|
||||
|
||||
<b>MODERN FORMS</b>
|
||||
|
||||
from Yang Ch`eng-fu
|
||||
|
|
||||
|
|
||||
|
|
||||
+--------------+
|
||||
| |
|
||||
<a href="/wiki/Cheng_Man-ch%27ing" title="Cheng Man-ch'ing">Cheng Man-ch'ing</a> |
|
||||
1901-1975 |
|
||||
Short (37) Form |
|
||||
1923-1970 1891-1929
|
||||
|
||||
<b>MODERN FORMS</b>
|
||||
|
||||
from Yang Ch`eng-fu
|
||||
|
|
||||
|
|
||||
|
|
||||
+--------------+
|
||||
| |
|
||||
<a href="/wiki/Cheng_Man-ch%27ing" title="Cheng Man-ch'ing">Cheng Man-ch'ing</a> |
|
||||
1901-1975 |
|
||||
Short (37) Form |
|
||||
|
|
||||
Chinese Sports Commission
|
||||
1956
|
||||
@@ -538,3 +538,6 @@ Retrieved from "<a href="http://en.wikipedia.org/wiki/Tai_Chi_Chuan">http://en.w
|
||||
|
||||
<!-- Served by srv25 in 0.089 secs. -->
|
||||
</body></html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -2,4 +2,6 @@ Disclaimer:
|
||||
|
||||
The HTML used in these samples are taken from random websites. I claim
|
||||
no copyright over these and assert that I may use them like this under
|
||||
fair use.
|
||||
fair use.
|
||||
|
||||
vim: et sw=4 sts=4
|
||||
|
@@ -2,12 +2,12 @@
|
||||
|
||||
/**
|
||||
* Generates XML and HTML documents describing configuration.
|
||||
* @note PHP 5 only!
|
||||
* @note PHP 5.2+ only!
|
||||
*/
|
||||
|
||||
/*
|
||||
TODO:
|
||||
- make XML format richer (see XMLSerializer_ConfigSchema)
|
||||
- make XML format richer
|
||||
- extend XSLT transformation (see the corresponding XSLT file)
|
||||
- allow generation of packaged docs that can be easily moved
|
||||
- multipage documentation
|
||||
@@ -15,31 +15,50 @@ TODO:
|
||||
- add blurbs to ToC
|
||||
*/
|
||||
|
||||
if (version_compare('5', PHP_VERSION, '>')) exit('Requires PHP 5 or higher.');
|
||||
error_reporting(E_ALL); // probably not possible to use E_STRICT
|
||||
|
||||
define('HTMLPURIFIER_SCHEMA_STRICT', true); // description data needs to be collected
|
||||
if (version_compare(PHP_VERSION, '5.2', '<')) exit('PHP 5.2+ required.');
|
||||
error_reporting(E_ALL | E_STRICT);
|
||||
|
||||
// load dual-libraries
|
||||
require_once '../library/HTMLPurifier.auto.php';
|
||||
require_once 'library/ConfigDoc.auto.php';
|
||||
require_once dirname(__FILE__) . '/../extras/HTMLPurifierExtras.auto.php';
|
||||
require_once dirname(__FILE__) . '/../library/HTMLPurifier.auto.php';
|
||||
|
||||
$purifier = HTMLPurifier::getInstance(array(
|
||||
// setup HTML Purifier singleton
|
||||
HTMLPurifier::getInstance(array(
|
||||
'AutoFormat.PurifierLinkify' => true
|
||||
));
|
||||
|
||||
$schema = HTMLPurifier_ConfigSchema::instance();
|
||||
$style = 'plain'; // use $_GET in the future
|
||||
$configdoc = new ConfigDoc();
|
||||
$output = $configdoc->generate($schema, $style);
|
||||
$builder = new HTMLPurifier_ConfigSchema_InterchangeBuilder();
|
||||
$interchange = new HTMLPurifier_ConfigSchema_Interchange();
|
||||
$builder->buildDir($interchange);
|
||||
$loader = dirname(__FILE__) . '/../config-schema.php';
|
||||
if (file_exists($loader)) include $loader;
|
||||
$interchange->validate();
|
||||
|
||||
// write out
|
||||
file_put_contents("$style.html", $output);
|
||||
$style = 'plain'; // use $_GET in the future, careful to validate!
|
||||
$configdoc_xml = dirname(__FILE__) . '/configdoc.xml';
|
||||
|
||||
if (php_sapi_name() != 'cli') {
|
||||
// output = instant feedback
|
||||
echo $output;
|
||||
} else {
|
||||
echo 'Files generated successfully.';
|
||||
$xml_builder = new HTMLPurifier_ConfigSchema_Builder_Xml();
|
||||
$xml_builder->openURI($configdoc_xml);
|
||||
$xml_builder->build($interchange);
|
||||
unset($xml_builder); // free handle
|
||||
|
||||
$xslt = new ConfigDoc_HTMLXSLTProcessor();
|
||||
$xslt->importStylesheet(dirname(__FILE__) . "/styles/$style.xsl");
|
||||
$output = $xslt->transformToHTML($configdoc_xml);
|
||||
|
||||
if (!$output) {
|
||||
echo "Error in generating files\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// write out
|
||||
file_put_contents(dirname(__FILE__) . "/$style.html", $output);
|
||||
|
||||
if (php_sapi_name() != 'cli') {
|
||||
// output (instant feedback if it's a browser)
|
||||
echo $output;
|
||||
} else {
|
||||
echo "Files generated successfully.\n";
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
@@ -1,38 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'ConfigDoc/HTMLXSLTProcessor.php';
|
||||
require_once 'ConfigDoc/XMLSerializer/Types.php';
|
||||
require_once 'ConfigDoc/XMLSerializer/ConfigSchema.php';
|
||||
|
||||
class ConfigDoc
|
||||
{
|
||||
|
||||
function generate($schema, $xsl_stylesheet_name = 'plain', $parameters = array()) {
|
||||
// generate types document, describing type constraints
|
||||
$types_serializer = new ConfigDoc_XMLSerializer_Types();
|
||||
$types_document = $types_serializer->serialize($schema);
|
||||
$types_document->save(dirname(__FILE__) . '/../types.xml'); // only ONE
|
||||
|
||||
// generate configdoc.xml, documents configuration directives
|
||||
$schema_serializer = new ConfigDoc_XMLSerializer_ConfigSchema();
|
||||
$schema_document = $schema_serializer->serialize($schema);
|
||||
$schema_document->save('configdoc.xml');
|
||||
|
||||
// setup transformation
|
||||
$xsl_stylesheet = dirname(__FILE__) . "/../styles/$xsl_stylesheet_name.xsl";
|
||||
$xslt_processor = new ConfigDoc_HTMLXSLTProcessor();
|
||||
$xslt_processor->setParameters($parameters);
|
||||
$xslt_processor->importStylesheet($xsl_stylesheet);
|
||||
|
||||
return $xslt_processor->transformToHTML($schema_document);
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove any generated files
|
||||
*/
|
||||
function cleanup() {
|
||||
unlink('configdoc.xml');
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -1,25 +0,0 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* The XMLSerializer hierarchy of classes consist of classes that take
|
||||
* objects and serialize them into XML, specifically DOM, form; this
|
||||
* super-class contains convenience functions for those classes.
|
||||
*/
|
||||
class ConfigDoc_XMLSerializer
|
||||
{
|
||||
|
||||
protected function appendHTMLDiv($document, $node, $html) {
|
||||
$purifier = HTMLPurifier::getInstance();
|
||||
$html = $purifier->purify($html);
|
||||
$dom_html = $document->createDocumentFragment();
|
||||
$dom_html->appendXML($html);
|
||||
|
||||
$dom_div = $document->createElement('div');
|
||||
$dom_div->setAttribute('xmlns', 'http://www.w3.org/1999/xhtml');
|
||||
$dom_div->appendChild($dom_html);
|
||||
|
||||
$node->appendChild($dom_div);
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -1,123 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'ConfigDoc/XMLSerializer.php';
|
||||
|
||||
class ConfigDoc_XMLSerializer_ConfigSchema extends ConfigDoc_XMLSerializer
|
||||
{
|
||||
|
||||
/**
|
||||
* Serializes a schema into DOM form
|
||||
* @todo Split into sub-serializers
|
||||
* @param $schema HTMLPurifier_ConfigSchema to serialize
|
||||
*/
|
||||
public function serialize($schema) {
|
||||
$dom_document = new DOMDocument('1.0', 'UTF-8');
|
||||
$dom_root = $dom_document->createElement('configdoc');
|
||||
$dom_document->appendChild($dom_root);
|
||||
$dom_document->formatOutput = true;
|
||||
|
||||
// add the name of the application
|
||||
$dom_root->appendChild($dom_document->createElement('title', 'HTML Purifier'));
|
||||
|
||||
/*
|
||||
TODO for XML format:
|
||||
- create a definition (DTD or other) once interface stabilizes
|
||||
*/
|
||||
|
||||
foreach($schema->info as $namespace_name => $namespace_info) {
|
||||
|
||||
$dom_namespace = $dom_document->createElement('namespace');
|
||||
$dom_root->appendChild($dom_namespace);
|
||||
|
||||
$dom_namespace->setAttribute('id', $namespace_name);
|
||||
$dom_namespace->appendChild(
|
||||
$dom_document->createElement('name', $namespace_name)
|
||||
);
|
||||
$dom_namespace_description = $dom_document->createElement('description');
|
||||
$dom_namespace->appendChild($dom_namespace_description);
|
||||
$this->appendHTMLDiv($dom_document, $dom_namespace_description,
|
||||
$schema->info_namespace[$namespace_name]->description);
|
||||
|
||||
foreach ($namespace_info as $name => $info) {
|
||||
|
||||
if ($info->class == 'alias') continue;
|
||||
|
||||
$dom_directive = $dom_document->createElement('directive');
|
||||
$dom_namespace->appendChild($dom_directive);
|
||||
|
||||
$dom_directive->setAttribute('id', $namespace_name . '.' . $name);
|
||||
$dom_directive->appendChild(
|
||||
$dom_document->createElement('name', $name)
|
||||
);
|
||||
|
||||
$dom_aliases = $dom_document->createElement('aliases');
|
||||
$dom_directive->appendChild($dom_aliases);
|
||||
foreach ($info->directiveAliases as $alias) {
|
||||
$dom_aliases->appendChild($dom_document->createElement('alias', $alias));
|
||||
}
|
||||
|
||||
$dom_constraints = $dom_document->createElement('constraints');
|
||||
$dom_directive->appendChild($dom_constraints);
|
||||
|
||||
$dom_type = $dom_document->createElement('type', $info->type);
|
||||
if ($info->allow_null) {
|
||||
$dom_type->setAttribute('allow-null', 'yes');
|
||||
}
|
||||
$dom_constraints->appendChild($dom_type);
|
||||
|
||||
if ($info->allowed !== true) {
|
||||
$dom_allowed = $dom_document->createElement('allowed');
|
||||
$dom_constraints->appendChild($dom_allowed);
|
||||
foreach ($info->allowed as $allowed => $bool) {
|
||||
$dom_allowed->appendChild(
|
||||
$dom_document->createElement('value', $allowed)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
$raw_default = $schema->defaults[$namespace_name][$name];
|
||||
if (is_bool($raw_default)) {
|
||||
$default = $raw_default ? 'true' : 'false';
|
||||
} elseif (is_string($raw_default)) {
|
||||
$default = "\"$raw_default\"";
|
||||
} elseif (is_null($raw_default)) {
|
||||
$default = 'null';
|
||||
} else {
|
||||
$default = print_r(
|
||||
$schema->defaults[$namespace_name][$name], true
|
||||
);
|
||||
}
|
||||
|
||||
$dom_default = $dom_document->createElement('default', $default);
|
||||
|
||||
// remove this once we get a DTD
|
||||
$dom_default->setAttribute('xml:space', 'preserve');
|
||||
|
||||
$dom_constraints->appendChild($dom_default);
|
||||
|
||||
$dom_descriptions = $dom_document->createElement('descriptions');
|
||||
$dom_directive->appendChild($dom_descriptions);
|
||||
|
||||
foreach ($info->descriptions as $file => $file_descriptions) {
|
||||
foreach ($file_descriptions as $line => $description) {
|
||||
$dom_description = $dom_document->createElement('description');
|
||||
// refuse to write $file if it's a full path
|
||||
if (str_replace('\\', '/', realpath($file)) != $file) {
|
||||
$dom_description->setAttribute('file', $file);
|
||||
$dom_description->setAttribute('line', $line);
|
||||
}
|
||||
$this->appendHTMLDiv($dom_document, $dom_description, $description);
|
||||
$dom_descriptions->appendChild($dom_description);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return $dom_document;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -1,26 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'ConfigDoc/XMLSerializer.php';
|
||||
|
||||
class ConfigDoc_XMLSerializer_Types extends ConfigDoc_XMLSerializer
|
||||
{
|
||||
|
||||
/**
|
||||
* Serializes the types in a schema into DOM form
|
||||
* @param $schema HTMLPurifier_ConfigSchema owner of types to serialize
|
||||
*/
|
||||
public function serialize($schema) {
|
||||
$types_document = new DOMDocument('1.0', 'UTF-8');
|
||||
$types_root = $types_document->createElement('types');
|
||||
$types_document->appendChild($types_root);
|
||||
$types_document->formatOutput = true;
|
||||
foreach ($schema->types as $name => $expanded_name) {
|
||||
$types_type = $types_document->createElement('type', $expanded_name);
|
||||
$types_type->setAttribute('id', $name);
|
||||
$types_root->appendChild($types_type);
|
||||
}
|
||||
return $types_document;
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -1,16 +1,30 @@
|
||||
|
||||
body {margin:1em 4em;}
|
||||
body {margin:0;padding:0;}
|
||||
#content {
|
||||
margin:1em auto;
|
||||
max-width: 47em;
|
||||
width: expression(document.body.clientWidth >
|
||||
85 * parseInt(document.body.currentStyle.fontSize) ?
|
||||
"54em": "auto");
|
||||
}
|
||||
|
||||
table {border-collapse:collapse;}
|
||||
table td, table th {padding:0.2em;}
|
||||
|
||||
table.constraints {margin:0 0 1em;}
|
||||
table.constraints th {text-align:left;padding-left:0.4em;}
|
||||
table.constraints td {padding-right:0.4em;}
|
||||
table.constraints th {
|
||||
text-align:right;padding-left:0.4em;padding-right:0.4em;background:#EEE;
|
||||
width:8em;vertical-align:top;}
|
||||
table.constraints td {padding-right:0.4em; padding-left: 1em;}
|
||||
table.constraints td ul {padding:0; margin:0; list-style:none;}
|
||||
table.constraints td pre {margin:0;}
|
||||
|
||||
#toc {list-style-type:none; font-weight:bold;}
|
||||
#toc ul {list-style-type:disc; font-weight:normal;}
|
||||
#tocContainer {position:relative;}
|
||||
#toc {list-style-type:none; font-weight:bold; font-size:1em; margin-bottom:1em;}
|
||||
#toc li {position:relative; line-height: 1.2em;}
|
||||
#toc .col-2 {margin-left:50%;}
|
||||
#toc .col-l {float:left;}
|
||||
#toc ul {list-style-type:disc; font-weight:normal; padding-bottom:1.2em;}
|
||||
|
||||
.description p {margin-top:0;margin-bottom:1em;}
|
||||
|
||||
@@ -19,6 +33,12 @@ table.constraints td pre {margin:0;}
|
||||
#library {font-size:1em;}
|
||||
h1 {margin-top:0;}
|
||||
h2 {border-bottom:1px solid #CCC; font-family:sans-serif; font-weight:normal;
|
||||
font-size:1.3em;}
|
||||
font-size:1.3em; clear:both;}
|
||||
h3 {font-family:sans-serif; font-size:1.1em; font-weight:bold; }
|
||||
h4 {font-family:sans-serif; font-size:0.9em; font-weight:bold; }
|
||||
|
||||
.deprecated {color: #CCC;}
|
||||
.deprecated table.constraints th {background:#FFF;}
|
||||
.deprecated-notice {color: #000; text-align:center; margin-bottom: 1em;}
|
||||
|
||||
/* vim: et sw=4 sts=4 */
|
||||
|
@@ -14,9 +14,13 @@
|
||||
/>
|
||||
<xsl:param name="css" select="'styles/plain.css'"/>
|
||||
<xsl:param name="title" select="'Configuration Documentation'"/>
|
||||
|
||||
<xsl:variable name="typeLookup" select="document('../types.xml')" />
|
||||
|
||||
|
||||
<xsl:variable name="typeLookup" select="document('../types.xml')/types" />
|
||||
<xsl:variable name="usageLookup" select="document('../usage.xml')/usage" />
|
||||
|
||||
<!-- Twiddle this variable to get the columns as even as possible -->
|
||||
<xsl:variable name="maxNumberAdjust" select="2" />
|
||||
|
||||
<xsl:template match="/">
|
||||
<html lang="en" xml:lang="en">
|
||||
<head>
|
||||
@@ -25,114 +29,196 @@
|
||||
<link rel="stylesheet" type="text/css" href="{$css}" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="library"><xsl:value-of select="/configdoc/title" /></div>
|
||||
<h1><xsl:value-of select="$title" /></h1>
|
||||
<h2>Table of Contents</h2>
|
||||
<ul id="toc">
|
||||
<xsl:apply-templates mode="toc" />
|
||||
</ul>
|
||||
<xsl:apply-templates />
|
||||
<div id="content">
|
||||
<div id="library"><xsl:value-of select="/configdoc/title" /></div>
|
||||
<h1><xsl:value-of select="$title" /></h1>
|
||||
<div id="tocContainer">
|
||||
<h2>Table of Contents</h2>
|
||||
<ul id="toc">
|
||||
<xsl:apply-templates mode="toc">
|
||||
<xsl:with-param name="overflowNumber" select="round(count(/configdoc/namespace) div 2) + $maxNumberAdjust" />
|
||||
</xsl:apply-templates>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="typesContainer">
|
||||
<h2>Types</h2>
|
||||
<xsl:apply-templates select="$typeLookup" mode="types" />
|
||||
</div>
|
||||
<xsl:apply-templates />
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template match="type" mode="types">
|
||||
<div class="type-block">
|
||||
<xsl:attribute name="id">type-<xsl:value-of select="@id" /></xsl:attribute>
|
||||
<h3><code><xsl:value-of select="@id" /></code>: <xsl:value-of select="@name" /></h3>
|
||||
<div class="type-description">
|
||||
<xsl:copy-of xmlns:xhtml="http://www.w3.org/1999/xhtml" select="xhtml:div/node()" />
|
||||
</div>
|
||||
</div>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="title" mode="toc" />
|
||||
<xsl:template match="namespace" mode="toc">
|
||||
<xsl:param name="overflowNumber" />
|
||||
<xsl:variable name="number"><xsl:number level="single" /></xsl:variable>
|
||||
<xsl:variable name="directiveNumber"><xsl:number level="any" count="directive" /></xsl:variable>
|
||||
<xsl:if test="count(directive)>0">
|
||||
<li>
|
||||
<!-- BEGIN multicolumn code -->
|
||||
<xsl:if test="$number >= $overflowNumber">
|
||||
<xsl:attribute name="class">col-2</xsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:if test="$number = $overflowNumber">
|
||||
<xsl:attribute name="style">margin-top:-<xsl:value-of select="($number * 2 + $directiveNumber - 3) * 1.2" />em</xsl:attribute>
|
||||
</xsl:if>
|
||||
<!-- END multicolumn code -->
|
||||
<a href="#{@id}"><xsl:value-of select="name" /></a>
|
||||
<ul>
|
||||
<xsl:apply-templates select="directive" mode="toc" />
|
||||
<xsl:apply-templates select="directive" mode="toc">
|
||||
<xsl:with-param name="overflowNumber" select="$overflowNumber" />
|
||||
</xsl:apply-templates>
|
||||
</ul>
|
||||
<xsl:if test="$number + 1 = $overflowNumber">
|
||||
<div class="col-l" />
|
||||
</xsl:if>
|
||||
</li>
|
||||
</xsl:if>
|
||||
</xsl:template>
|
||||
<xsl:template match="directive" mode="toc">
|
||||
<li><a href="#{@id}"><xsl:value-of select="name" /></a></li>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="title" />
|
||||
|
||||
<xsl:template match="namespace">
|
||||
<xsl:apply-templates />
|
||||
<xsl:if test="count(directive)=0">
|
||||
<p>No configuration directives defined for this namespace.</p>
|
||||
<xsl:variable name="number">
|
||||
<xsl:number level="any" count="directive|namespace" />
|
||||
</xsl:variable>
|
||||
<xsl:if test="not(deprecated)">
|
||||
<li>
|
||||
<a href="#{@id}"><xsl:value-of select="name" /></a>
|
||||
</li>
|
||||
</xsl:if>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="title" />
|
||||
|
||||
<xsl:template match="namespace">
|
||||
<div class="namespace">
|
||||
<xsl:apply-templates />
|
||||
<xsl:if test="count(directive)=0">
|
||||
<p>No configuration directives defined for this namespace.</p>
|
||||
</xsl:if>
|
||||
</div>
|
||||
</xsl:template>
|
||||
<xsl:template match="namespace/name">
|
||||
<h2 id="{../@id}"><xsl:value-of select="." /></h2>
|
||||
</xsl:template>
|
||||
<xsl:template match="namespace/description">
|
||||
<div class="description">
|
||||
<xsl:copy-of select="div/node()" />
|
||||
<xsl:copy-of xmlns:xhtml="http://www.w3.org/1999/xhtml" select="xhtml:div/node()" />
|
||||
</div>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template match="directive">
|
||||
<xsl:apply-templates />
|
||||
<div>
|
||||
<xsl:attribute name="class"><!--
|
||||
-->directive<!--
|
||||
--><xsl:if test="deprecated"> deprecated</xsl:if><!--
|
||||
--></xsl:attribute>
|
||||
<xsl:apply-templates>
|
||||
<xsl:with-param name="id" select="@id" />
|
||||
</xsl:apply-templates>
|
||||
</div>
|
||||
</xsl:template>
|
||||
<xsl:template match="directive/name">
|
||||
<xsl:param name="id" />
|
||||
<xsl:apply-templates select="../aliases/alias" mode="anchor" />
|
||||
<h3 id="{../@id}"><xsl:value-of select="../@id" /></h3>
|
||||
<h3 id="{$id}"><xsl:value-of select="$id" /></h3>
|
||||
</xsl:template>
|
||||
<xsl:template match="alias" mode="anchor">
|
||||
<a id="{.}"></a>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<!-- Do not pass through -->
|
||||
<xsl:template match="alias"></xsl:template>
|
||||
|
||||
|
||||
<xsl:template match="directive/constraints">
|
||||
<xsl:param name="id" />
|
||||
<table class="constraints">
|
||||
<xsl:apply-templates />
|
||||
<!-- Calculated other values -->
|
||||
<xsl:if test="../descriptions/description[@file]">
|
||||
<tr>
|
||||
<th>Used by:</th>
|
||||
<td>
|
||||
<xsl:for-each select="../descriptions/description">
|
||||
<xsl:if test="position()>1">, </xsl:if>
|
||||
<xsl:value-of select="@file" />
|
||||
</xsl:for-each>
|
||||
</td>
|
||||
</tr>
|
||||
</xsl:if>
|
||||
<xsl:if test="../aliases/alias">
|
||||
<xsl:apply-templates select="../aliases" mode="constraints" />
|
||||
</xsl:if>
|
||||
<xsl:apply-templates select="$usageLookup/directive[@id=$id]" />
|
||||
</table>
|
||||
</xsl:template>
|
||||
<xsl:template match="directive/aliases" mode="constraints">
|
||||
<th>Aliases:</th>
|
||||
<td>
|
||||
<xsl:for-each select="alias">
|
||||
<tr>
|
||||
<th>Aliases</th>
|
||||
<td>
|
||||
<xsl:for-each select="alias">
|
||||
<xsl:if test="position()>1">, </xsl:if>
|
||||
<xsl:value-of select="." />
|
||||
</xsl:for-each>
|
||||
</td>
|
||||
</tr>
|
||||
</xsl:template>
|
||||
<xsl:template match="directive/description">
|
||||
<div class="description">
|
||||
<xsl:copy-of xmlns:xhtml="http://www.w3.org/1999/xhtml" select="xhtml:div/node()" />
|
||||
</div>
|
||||
</xsl:template>
|
||||
<xsl:template match="directive/deprecated">
|
||||
<div class="deprecated-notice">
|
||||
<strong>Warning:</strong>
|
||||
This directive was deprecated in version <xsl:value-of select="version" />.
|
||||
<a href="#{use}">%<xsl:value-of select="use" /></a> should be used instead.
|
||||
</div>
|
||||
</xsl:template>
|
||||
<xsl:template match="usage/directive">
|
||||
<tr>
|
||||
<th>Used in</th>
|
||||
<td>
|
||||
<ul>
|
||||
<xsl:apply-templates />
|
||||
</ul>
|
||||
</td>
|
||||
</tr>
|
||||
</xsl:template>
|
||||
<xsl:template match="usage/directive/file">
|
||||
<li>
|
||||
<em><xsl:value-of select="@name" /></em> on line<xsl:if test="count(line)>1">s</xsl:if>
|
||||
<xsl:text> </xsl:text>
|
||||
<xsl:for-each select="line">
|
||||
<xsl:if test="position()>1">, </xsl:if>
|
||||
<xsl:value-of select="." />
|
||||
</xsl:for-each>
|
||||
</td>
|
||||
</li>
|
||||
</xsl:template>
|
||||
<xsl:template match="directive//description">
|
||||
<div class="description">
|
||||
<xsl:copy-of select="div/node()" />
|
||||
</div>
|
||||
|
||||
<xsl:template match="constraints/version">
|
||||
<tr>
|
||||
<th>Version added</th>
|
||||
<td><xsl:value-of select="." /></td>
|
||||
</tr>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="constraints/type">
|
||||
<tr>
|
||||
<th>Type:</th>
|
||||
<th>Type</th>
|
||||
<td>
|
||||
<xsl:variable name="type" select="text()" />
|
||||
<xsl:attribute name="class">type type-<xsl:value-of select="$type" /></xsl:attribute>
|
||||
<xsl:value-of select="$typeLookup/types/type[@id=$type]/text()" />
|
||||
<xsl:if test="@allow-null='yes'">
|
||||
(or null)
|
||||
</xsl:if>
|
||||
<a>
|
||||
<xsl:attribute name="href">#type-<xsl:value-of select="$type" /></xsl:attribute>
|
||||
<xsl:value-of select="$typeLookup/type[@id=$type]/@name" />
|
||||
<xsl:if test="@allow-null='yes'">
|
||||
(or null)
|
||||
</xsl:if>
|
||||
</a>
|
||||
</td>
|
||||
</tr>
|
||||
</xsl:template>
|
||||
<xsl:template match="constraints/allowed">
|
||||
<tr>
|
||||
<th>Allowed values:</th>
|
||||
<th>Allowed values</th>
|
||||
<td>
|
||||
<xsl:for-each select="value"><!--
|
||||
--><xsl:if test="position()>1">, </xsl:if>
|
||||
@@ -143,9 +229,25 @@
|
||||
</xsl:template>
|
||||
<xsl:template match="constraints/default">
|
||||
<tr>
|
||||
<th>Default:</th>
|
||||
<th>Default</th>
|
||||
<td><pre><xsl:value-of select="." xml:space="preserve" /></pre></td>
|
||||
</tr>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="constraints/external">
|
||||
<tr>
|
||||
<th>External deps</th>
|
||||
<td>
|
||||
<ul>
|
||||
<xsl:apply-templates />
|
||||
</ul>
|
||||
</td>
|
||||
</tr>
|
||||
</xsl:template>
|
||||
<xsl:template match="constraints/external/project">
|
||||
<li><xsl:value-of select="." /></li>
|
||||
</xsl:template>
|
||||
|
||||
</xsl:stylesheet>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
69
configdoc/types.xml
Normal file
69
configdoc/types.xml
Normal file
@@ -0,0 +1,69 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<types>
|
||||
<type id="string" name="String"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
A <a
|
||||
href="http://docs.php.net/manual/en/language.types.string.php">sequence
|
||||
of characters</a>.
|
||||
</div></type>
|
||||
<type id="istring" name="Case-insensitive string"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
A series of case-insensitive characters. Internally, upper-case
|
||||
ASCII characters will be converted to lower-case.
|
||||
</div></type>
|
||||
<type id="text" name="Text"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
A series of characters that may contain newlines. Text tends to
|
||||
indicate human-oriented text, as opposed to a machine format.
|
||||
</div></type>
|
||||
<type id="itext" name="Case-insensitive text"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
A series of case-insensitive characters that may contain newlines.
|
||||
</div></type>
|
||||
<type id="int" name="Integer"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
An <a
|
||||
href="http://docs.php.net/manual/en/language.types.integer.php">
|
||||
integer</a>. You are alternatively permitted to pass a string of
|
||||
digits instead, which will be cast to an integer using
|
||||
<code>(int)</code>.
|
||||
</div></type>
|
||||
<type id="float" name="Float"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
A <a href="http://docs.php.net/manual/en/language.types.float.php">
|
||||
floating point number</a>. You are alternatively permitted to
|
||||
pass a numeric string (as defined by <code>is_numeric()</code>),
|
||||
which will be cast to a float using <code>(float)</code>.
|
||||
</div></type>
|
||||
<type id="bool" name="Boolean"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
A <a
|
||||
href="http://docs.php.net/manual/en/language.types.boolean.php">boolean</a>.
|
||||
You are alternatively permitted to pass an integer <code>0</code> or
|
||||
<code>1</code> (other integers are not permitted) or a string
|
||||
<code>"on"</code>, <code>"true"</code> or <code>"1"</code> for
|
||||
<code>true</code>, and <code>"off"</code>, <code>"false"</code> or
|
||||
<code>"0"</code> for <code>false</code>.
|
||||
</div></type>
|
||||
<type id="lookup" name="Lookup array"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
An array whose values are <code>true</code>, e.g. <code>array('key'
|
||||
=> true, 'key2' => true)</code>. You are alternatively permitted
|
||||
to pass an array list of the keys <code>array('key', 'key2')</code>
|
||||
or a comma-separated string of keys <code>"key, key2"</code>. If
|
||||
you pass an array list of values, ensure that your values are
|
||||
strictly numerically indexed: <code>array('key1', 2 =>
|
||||
'key2')</code> will not do what you expect and emits a warning.
|
||||
</div></type>
|
||||
<type id="list" name="Array list"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
An array which has consecutive integer indexes, e.g.
|
||||
<code>array('val1', 'val2')</code>. You are alternatively permitted
|
||||
to pass a comma-separated string of keys <code>"val1, val2"</code>.
|
||||
If your array is not in this form, <code>array_values</code> is run
|
||||
on the array and a warning is emitted.
|
||||
</div></type>
|
||||
<type id="hash" name="Associative array"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
An array which is a mapping of keys to values, e.g.
|
||||
<code>array('key1' => 'val1', 'key2' => 'val2')</code>. You are
|
||||
alternatively permitted to pass a comma-separated string of
|
||||
key-colon-value strings, e.g. <code>"key1: val1, key2: val2"</code>.
|
||||
</div></type>
|
||||
<type id="mixed" name="Mixed"><div xmlns="http://www.w3.org/1999/xhtml">
|
||||
An arbitrary PHP value of any type.
|
||||
</div></type>
|
||||
</types>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
472
configdoc/usage.xml
Normal file
472
configdoc/usage.xml
Normal file
@@ -0,0 +1,472 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<usage>
|
||||
<directive id="Core.CollectErrors">
|
||||
<file name="HTMLPurifier.php">
|
||||
<line>131</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Lexer.php">
|
||||
<line>81</line>
|
||||
<line>284</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Lexer/DirectLex.php">
|
||||
<line>53</line>
|
||||
<line>73</line>
|
||||
<line>348</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>47</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="CSS.MaxImgLength">
|
||||
<file name="HTMLPurifier/CSSDefinition.php">
|
||||
<line>157</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="CSS.Proprietary">
|
||||
<file name="HTMLPurifier/CSSDefinition.php">
|
||||
<line>214</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="CSS.AllowTricky">
|
||||
<file name="HTMLPurifier/CSSDefinition.php">
|
||||
<line>218</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="CSS.AllowImportant">
|
||||
<file name="HTMLPurifier/CSSDefinition.php">
|
||||
<line>222</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="CSS.AllowedProperties">
|
||||
<file name="HTMLPurifier/CSSDefinition.php">
|
||||
<line>275</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="CSS.ForbiddenProperties">
|
||||
<file name="HTMLPurifier/CSSDefinition.php">
|
||||
<line>289</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Cache.DefinitionImpl">
|
||||
<file name="HTMLPurifier/DefinitionCacheFactory.php">
|
||||
<line>49</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.Doctype">
|
||||
<file name="HTMLPurifier/DoctypeRegistry.php">
|
||||
<line>83</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.CustomDoctype">
|
||||
<file name="HTMLPurifier/DoctypeRegistry.php">
|
||||
<line>85</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.XHTML">
|
||||
<file name="HTMLPurifier/DoctypeRegistry.php">
|
||||
<line>88</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.Strict">
|
||||
<file name="HTMLPurifier/DoctypeRegistry.php">
|
||||
<line>93</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.Encoding">
|
||||
<file name="HTMLPurifier/Encoder.php">
|
||||
<line>267</line>
|
||||
<line>300</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Test.ForceNoIconv">
|
||||
<file name="HTMLPurifier/Encoder.php">
|
||||
<line>272</line>
|
||||
<line>308</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.EscapeNonASCIICharacters">
|
||||
<file name="HTMLPurifier/Encoder.php">
|
||||
<line>304</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Output.CommentScriptContents">
|
||||
<file name="HTMLPurifier/Generator.php">
|
||||
<line>56</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Output.SortAttr">
|
||||
<file name="HTMLPurifier/Generator.php">
|
||||
<line>57</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Output.FlashCompat">
|
||||
<file name="HTMLPurifier/Generator.php">
|
||||
<line>58</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Output.TidyFormat">
|
||||
<file name="HTMLPurifier/Generator.php">
|
||||
<line>87</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.NormalizeNewlines">
|
||||
<file name="HTMLPurifier/Generator.php">
|
||||
<line>101</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Lexer.php">
|
||||
<line>266</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Output.Newline">
|
||||
<file name="HTMLPurifier/Generator.php">
|
||||
<line>102</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.BlockWrapper">
|
||||
<file name="HTMLPurifier/HTMLDefinition.php">
|
||||
<line>222</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.Parent">
|
||||
<file name="HTMLPurifier/HTMLDefinition.php">
|
||||
<line>230</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.AllowedElements">
|
||||
<file name="HTMLPurifier/HTMLDefinition.php">
|
||||
<line>247</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.AllowedAttributes">
|
||||
<file name="HTMLPurifier/HTMLDefinition.php">
|
||||
<line>248</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.Allowed">
|
||||
<file name="HTMLPurifier/HTMLDefinition.php">
|
||||
<line>251</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.ForbiddenElements">
|
||||
<file name="HTMLPurifier/HTMLDefinition.php">
|
||||
<line>342</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.ForbiddenAttributes">
|
||||
<file name="HTMLPurifier/HTMLDefinition.php">
|
||||
<line>343</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.Trusted">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>202</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Lexer.php">
|
||||
<line>271</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/HTMLModule/Image.php">
|
||||
<line>27</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Lexer/DirectLex.php">
|
||||
<line>36</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>23</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.AllowedModules">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>209</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.CoreModules">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>210</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.Proprietary">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>221</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.SafeObject">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>226</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.SafeEmbed">
|
||||
<file name="HTMLPurifier/HTMLModuleManager.php">
|
||||
<line>229</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.IDBlacklist">
|
||||
<file name="HTMLPurifier/IDAccumulator.php">
|
||||
<line>26</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.Language">
|
||||
<file name="HTMLPurifier/LanguageFactory.php">
|
||||
<line>88</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.LexerImpl">
|
||||
<file name="HTMLPurifier/Lexer.php">
|
||||
<line>76</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.MaintainLineNumbers">
|
||||
<file name="HTMLPurifier/Lexer.php">
|
||||
<line>80</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Lexer/DirectLex.php">
|
||||
<line>48</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.ConvertDocumentToFragment">
|
||||
<file name="HTMLPurifier/Lexer.php">
|
||||
<line>282</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.RemoveProcessingInstructions">
|
||||
<file name="HTMLPurifier/Lexer.php">
|
||||
<line>303</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.">
|
||||
<file name="HTMLPurifier/URIDefinition.php">
|
||||
<line>55</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/URIFilter/Munge.php">
|
||||
<line>12</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.Host">
|
||||
<file name="HTMLPurifier/URIDefinition.php">
|
||||
<line>64</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.Base">
|
||||
<file name="HTMLPurifier/URIDefinition.php">
|
||||
<line>65</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.DefaultScheme">
|
||||
<file name="HTMLPurifier/URIDefinition.php">
|
||||
<line>72</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.AllowedSchemes">
|
||||
<file name="HTMLPurifier/URISchemeRegistry.php">
|
||||
<line>41</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.OverrideAllowedSchemes">
|
||||
<file name="HTMLPurifier/URISchemeRegistry.php">
|
||||
<line>42</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.Disable">
|
||||
<file name="HTMLPurifier/AttrDef/URI.php">
|
||||
<line>28</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.ColorKeywords">
|
||||
<file name="HTMLPurifier/AttrDef/CSS/Color.php">
|
||||
<line>12</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/AttrDef/HTML/Color.php">
|
||||
<line>12</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.AllowedClasses">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/Class.php">
|
||||
<line>18</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.ForbiddenClasses">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/Class.php">
|
||||
<line>19</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.AllowedFrameTargets">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/FrameTarget.php">
|
||||
<line>15</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.EnableID">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
|
||||
<line>20</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.IDPrefix">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
|
||||
<line>26</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.IDPrefixLocal">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
|
||||
<line>28</line>
|
||||
<line>31</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.IDBlacklistRegexp">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
|
||||
<line>54</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.">
|
||||
<file name="HTMLPurifier/AttrDef/HTML/LinkTypes.php">
|
||||
<line>30</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.DefaultTextDir">
|
||||
<file name="HTMLPurifier/AttrTransform/BdoDir.php">
|
||||
<line>13</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.RemoveInvalidImg">
|
||||
<file name="HTMLPurifier/AttrTransform/ImgRequired.php">
|
||||
<line>18</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>20</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.DefaultInvalidImage">
|
||||
<file name="HTMLPurifier/AttrTransform/ImgRequired.php">
|
||||
<line>19</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.DefaultImageAlt">
|
||||
<file name="HTMLPurifier/AttrTransform/ImgRequired.php">
|
||||
<line>25</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Attr.DefaultInvalidImageAlt">
|
||||
<file name="HTMLPurifier/AttrTransform/ImgRequired.php">
|
||||
<line>33</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.Attr.Name.UseCDATA">
|
||||
<file name="HTMLPurifier/AttrTransform/Name.php">
|
||||
<line>11</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/HTMLModule/Name.php">
|
||||
<line>13</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.FlashAllowFullScreen">
|
||||
<file name="HTMLPurifier/AttrTransform/SafeParam.php">
|
||||
<line>37</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.EscapeInvalidChildren">
|
||||
<file name="HTMLPurifier/ChildDef/Required.php">
|
||||
<line>62</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Cache.SerializerPath">
|
||||
<file name="HTMLPurifier/DefinitionCache/Serializer.php">
|
||||
<line>91</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Filter.ExtractStyleBlocks.TidyImpl">
|
||||
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
|
||||
<line>41</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Filter.ExtractStyleBlocks.Scope">
|
||||
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
|
||||
<line>65</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Filter.ExtractStyleBlocks.Escaping">
|
||||
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
|
||||
<line>123</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.MaxImgLength">
|
||||
<file name="HTMLPurifier/HTMLModule/Image.php">
|
||||
<line>14</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/HTMLModule/SafeEmbed.php">
|
||||
<line>13</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/HTMLModule/SafeObject.php">
|
||||
<line>19</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.TidyLevel">
|
||||
<file name="HTMLPurifier/HTMLModule/Tidy.php">
|
||||
<line>45</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.TidyAdd">
|
||||
<file name="HTMLPurifier/HTMLModule/Tidy.php">
|
||||
<line>49</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="HTML.TidyRemove">
|
||||
<file name="HTMLPurifier/HTMLModule/Tidy.php">
|
||||
<line>50</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="AutoFormat.PurifierLinkify.DocURL">
|
||||
<file name="HTMLPurifier/Injector/PurifierLinkify.php">
|
||||
<line>15</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="AutoFormat.RemoveEmpty.RemoveNbsp">
|
||||
<file name="HTMLPurifier/Injector/RemoveEmpty.php">
|
||||
<line>12</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions">
|
||||
<file name="HTMLPurifier/Injector/RemoveEmpty.php">
|
||||
<line>13</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.AggressivelyFixLt">
|
||||
<file name="HTMLPurifier/Lexer/DOMLex.php">
|
||||
<line>44</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.DirectLexLineNumberSyncInterval">
|
||||
<file name="HTMLPurifier/Lexer/DirectLex.php">
|
||||
<line>70</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.EscapeInvalidTags">
|
||||
<file name="HTMLPurifier/Strategy/MakeWellFormed.php">
|
||||
<line>45</line>
|
||||
</file>
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>19</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.RemoveScriptContents">
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>25</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="Core.HiddenElements">
|
||||
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
|
||||
<line>26</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.HostBlacklist">
|
||||
<file name="HTMLPurifier/URIFilter/HostBlacklist.php">
|
||||
<line>8</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.MungeResources">
|
||||
<file name="HTMLPurifier/URIFilter/Munge.php">
|
||||
<line>14</line>
|
||||
</file>
|
||||
</directive>
|
||||
<directive id="URI.MungeSecretKey">
|
||||
<file name="HTMLPurifier/URIFilter/Munge.php">
|
||||
<line>15</line>
|
||||
</file>
|
||||
</directive>
|
||||
</usage>
|
@@ -3,7 +3,7 @@
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Functional specification for HTML Purifier's advanced API for defining custom filtering behavior." />
|
||||
<meta name="description" content="Specification for HTML Purifier's advanced API for defining custom filtering behavior." />
|
||||
<link rel="stylesheet" type="text/css" href="style.css" />
|
||||
|
||||
<title>Advanced API - HTML Purifier</title>
|
||||
@@ -16,198 +16,11 @@
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>HTML Purifier currently natively supports only a subset of HTML's
|
||||
allowed elements, attributes, and behavior; specifically, this subset
|
||||
is the set of elements that are safe for untrusted users to use.
|
||||
However, HTML Purifier is often utilized to ensure standards-compliance
|
||||
from input that is trusted (making it a sort of Tidy substitute),
|
||||
and often users need to define new elements or attributes. The
|
||||
advanced API is oriented specifically for these use-cases.</p>
|
||||
|
||||
<p>Our goals are to let the user:</p>
|
||||
|
||||
<dl>
|
||||
<dt>Select</dt>
|
||||
<dd><ul>
|
||||
<li>Doctype</li>
|
||||
<!-- <li>Filterset</li> -->
|
||||
<li>Elements / Attributes / Modules</li>
|
||||
<li>Tidy</li>
|
||||
</ul></dd>
|
||||
<dt>Customize</dt>
|
||||
<dd><ul>
|
||||
<li>Attributes</li>
|
||||
<li>Elements</li>
|
||||
<!--<li>Doctypes</li>-->
|
||||
</ul></dd>
|
||||
</dl>
|
||||
|
||||
<h2>Select</h2>
|
||||
|
||||
<p>For basic use, the user will have to specify some basic parameters. This
|
||||
is not strictly necessary, as HTML Purifier's default setting will always
|
||||
output safe code, but is required for standards-compliant output.</p>
|
||||
|
||||
<h3>Selecting a Doctype</h3>
|
||||
|
||||
<p>The first thing to select is the <strong>doctype</strong>. This
|
||||
is essential for standards-compliant output.</p>
|
||||
|
||||
<p class="technical">This identifier is based
|
||||
on the name the W3C has given to the document type and <em>not</em>
|
||||
the DTD identifier.</p>
|
||||
|
||||
<p>This parameter is set via the configuration object:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');</pre>
|
||||
|
||||
<p>Due to historical reasons, the default doctype is XHTML 1.0
|
||||
Transitional, however, we really shouldn't be guessing what the user's
|
||||
doctype is. Fortunantely, people who can't be bothered to set this won't
|
||||
be bothered when their pages stop validating.</p>
|
||||
|
||||
<h3>Selecting Elements / Attributes / Modules</h3>
|
||||
|
||||
<p>HTML Purifier will, by default, allow as many elements and attributes
|
||||
as possible. However, a user may decide to roll their own filterset by
|
||||
selecting modules, elements and attributes to allow for their own
|
||||
specific use-case. This can be done using %HTML.Allowed:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Allowed', 'a[href|title],em,p,blockquote');</pre>
|
||||
|
||||
<p class="technical">The directive %HTML.Allowed is a convenience feature
|
||||
that may be fully expressed with the legacy interface.</p>
|
||||
|
||||
<p>We currently support another interface from older versions:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'AllowedElements', 'a,em,p,blockquote');
|
||||
$config->set('HTML', 'AllowedAttributes', 'a.href,a.title');</pre>
|
||||
|
||||
<p>A user may also choose to allow modules using a specialized
|
||||
directive:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'AllowedModules', 'Hypertext,Text,Lists');</pre>
|
||||
|
||||
<p>But it is not expected that this feature will be widely used.</p>
|
||||
|
||||
<p class="technical">Module selection will work slightly differently
|
||||
from the other AllowedElements and AllowedAttributes directives by
|
||||
directly modifying the doctype you are operating in, in the spirit of
|
||||
XHTML 1.1's modularization. We stop users from shooting themselves in the
|
||||
foot by mandating the modules in %HTML.CoreModules be used.</p>
|
||||
|
||||
<p class="technical">Modules are distinguished from regular elements by the
|
||||
case of their first letter. While XML distinguishes between and allows
|
||||
lower and uppercase letters in element names, XHTML uses only lower-case
|
||||
element names for sake of consistency.</p>
|
||||
|
||||
<h3>Selecting Tidy</h3>
|
||||
|
||||
<p>The name of this segment of functionality is inspired off of Dave
|
||||
Ragget's program HTML Tidy, which purported to help clean up HTML. In
|
||||
HTML Purifier, Tidy functionality involves turning unsupported and
|
||||
deprecated elements into standards-compliant ones, maintaining
|
||||
backwards compatibility, and enforcing best practices.</p>
|
||||
|
||||
<p>This is a complicated feature, and is explained more in depth at
|
||||
<a href="enduser-tidy.html">the Tidy documentation page</a>.</p>
|
||||
|
||||
<!--
|
||||
<h3>Unified selector</h3>
|
||||
|
||||
<p>Because selecting each and every one of these configuration options
|
||||
is a chore, we may wish to offer a specialized configuration method
|
||||
for selecting a filterset. Possibility:</p>
|
||||
|
||||
<pre>function selectFilter($doctype, $filterset, $tidy)</pre>
|
||||
|
||||
<p>...which is simply a light wrapper over the individual configuration
|
||||
calls. A custom config file format or text format could also be adopted.</p>
|
||||
-->
|
||||
|
||||
<h2>Customize</h2>
|
||||
|
||||
<p>By reviewing topic posts in the support forum, we determined that
|
||||
there were two primarily demanded customization features people wanted:
|
||||
to add an attribute to an existing element, and to add an element.
|
||||
Thus, we'll want to create convenience functions for these common
|
||||
use-cases.</p>
|
||||
|
||||
<p>Note that the functions described here are only available if
|
||||
a raw copy of <code>HTMLPurifier_HTMLDefinition</code> was retrieved.
|
||||
Furthermore, caching may prevent your changes from immediately
|
||||
being seen: consult <a href="enduser-customize.html">enduser-customize.html</a> on how
|
||||
to work around this.</p>
|
||||
|
||||
<h3>Attributes</h3>
|
||||
|
||||
<p>An attribute is bound to an element by a name and has a specific
|
||||
<code>AttrDef</code> that validates it. The interface is therefore:</p>
|
||||
|
||||
<pre>function addAttribute($element, $attribute, $attribute_def);</pre>
|
||||
|
||||
<p>Example of the functionality in action:</p>
|
||||
|
||||
<pre>$def->addAttribute('a', 'rel', 'Enum#nofollow');</pre>
|
||||
|
||||
<p>The <code>$attribute_def</code> value is flexible,
|
||||
to make things simpler. It can be a literal object or:</p>
|
||||
|
||||
<ul>
|
||||
<!--<li>Class name: We'll instantiate it for you</li>
|
||||
<li>Function name: We'll create an <code>HTMLPurifier_AttrDef_Anonymous</code>
|
||||
class with that function registered as a callback.</li>-->
|
||||
<li>String attribute type: We'll use <code>HTMLPurifier_AttrTypes</code>
|
||||
to resolve it for you. Any data that follows a hash mark (#) will
|
||||
be used to customize the attribute type: in the example above,
|
||||
we specify which values for Enum to allow.</li>
|
||||
</ul>
|
||||
|
||||
<h3>Elements</h3>
|
||||
|
||||
<p>An element requires certain information as specified by
|
||||
<code>HTMLPurifier_ElementDef</code>. However, not all of it is necessary,
|
||||
the usual things required are:</p>
|
||||
|
||||
<ul>
|
||||
<li>Attributes</li>
|
||||
<li>Content model/type</li>
|
||||
<li>Registration in a content set</li>
|
||||
</ul>
|
||||
|
||||
<p>This suggests an API like this:</p>
|
||||
|
||||
<pre>function addElement($element, $type, $contents,
|
||||
$attr_collections = array(); $attributes = array());</pre>
|
||||
|
||||
<p>Each parameter explained in depth:</p>
|
||||
|
||||
<dl>
|
||||
<dt><code>$element</code></dt>
|
||||
<dd>Element name, ex. 'label'</dd>
|
||||
<dt><code>$type</code></dt>
|
||||
<dd>Content set to register in, ex. 'Inline' or 'Flow'</dd>
|
||||
<dt><code>$contents</code></dt>
|
||||
<dd>Description of allowed children. This is a merged form of
|
||||
<code>HTMLPurifier_ElementDef</code>'s member variables
|
||||
<code>$content_model</code> and <code>$content_model_type</code>,
|
||||
where the form is <q>Type: Model</q>, ex. 'Optional: Inline'.
|
||||
There are also a number of predefined templates one may use.</dd>
|
||||
<dt><code>$attr_collections</code></dt>
|
||||
<dd>Array (or string if only one) of attribute collection(s) to
|
||||
merge into the attributes array.</dd>
|
||||
<dt><code>$attributes</code></dt>
|
||||
<dd>Array of attribute names to attribute definitions, much like
|
||||
the above-described attribute customization.</dd>
|
||||
</dl>
|
||||
|
||||
<p>A possible usage:</p>
|
||||
|
||||
<pre>$def->addElement('font', 'Inline', 'Optional: Inline', 'Common',
|
||||
array('color' => 'Color'));</pre>
|
||||
|
||||
<p>See <code>HTMLPurifier/HTMLModule.php</code> for details.</p>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
<p>
|
||||
Please see <a href="enduser-customize.html">Customize!</a>
|
||||
</p>
|
||||
|
||||
</body></html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -17,7 +17,6 @@ a lot of strtolower() calls, no legit casing
|
||||
URI - multiple regular expressions; missing validation for parts (?)
|
||||
CSS - parser doesn't accept advanced CSS (fringe)
|
||||
Number - constructor interface inconsistent with Integer
|
||||
ConfigSchema - redefinition is a mess
|
||||
Strategy
|
||||
FixNesting - cannot bubble nodes out of structures, duplicated checks
|
||||
for special-case parent node
|
||||
@@ -26,3 +25,5 @@ URIScheme - needs to have callable generic checks
|
||||
mailto - doesn't validate emails, doesn't validate querystring
|
||||
news - doesn't validate opaque path
|
||||
nntp - doesn't constrain path
|
||||
|
||||
vim: et sw=4 sts=4
|
||||
|
79
docs/dev-config-bcbreaks.txt
Normal file
79
docs/dev-config-bcbreaks.txt
Normal file
@@ -0,0 +1,79 @@
|
||||
|
||||
Configuration Backwards-Compatibility Breaks
|
||||
|
||||
In version 4.0.0, the configuration subsystem (composed of the outwards
|
||||
facing Config class, as well as the ConfigSchema and ConfigSchema_Interchange
|
||||
subsystems), was significantly revamped to make use of property lists.
|
||||
While most of the changes are internal, some internal APIs were changed for the
|
||||
sake of clarity. HTMLPurifier_Config was kept completely backwards compatible,
|
||||
although some of the functions were retrofitted with an unambiguous alternate
|
||||
syntax. Both of these changes are discussed in this document.
|
||||
|
||||
|
||||
|
||||
1. Outwards Facing Changes
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
The HTMLPurifier_Config class now takes an alternate syntax. The general rule
|
||||
is:
|
||||
|
||||
If you passed $namespace, $directive, pass "$namespace.$directive"
|
||||
instead.
|
||||
|
||||
An example:
|
||||
|
||||
$config->set('HTML', 'Allowed', 'p');
|
||||
|
||||
becomes:
|
||||
|
||||
$config->set('HTML.Allowed', 'p');
|
||||
|
||||
New configuration options may have more than one namespace, they might
|
||||
look something like %Filter.YouTube.Blacklist. While you could technically
|
||||
set it with ('HTML', 'YouTube.Blacklist'), the logical extension
|
||||
('HTML', 'YouTube', 'Blacklist') does not work.
|
||||
|
||||
The old API will still work, but will emit E_USER_NOTICEs.
|
||||
|
||||
|
||||
|
||||
2. Internal API Changes
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Some overarching notes: we've completely eliminated the notion of namespace;
|
||||
it's now an informal construct for organizing related configuration directives.
|
||||
|
||||
Also, the validation routines for keys (formerly "$namespace.$directive")
|
||||
have been completely relaxed. I don't think it really should be necessary.
|
||||
|
||||
2.1 HTMLPurifier_ConfigSchema
|
||||
|
||||
First off, if you're interfacing with this class, you really shouldn't.
|
||||
HTMLPurifier_ConfigSchema_Builder_ConfigSchema is really the only class that
|
||||
should ever be creating HTMLPurifier_ConfigSchema, and HTMLPurifier_Config the
|
||||
only class that should be reading it.
|
||||
|
||||
All namespace related methods were removed; they are completely unnecessary
|
||||
now. Any $namespace, $name arguments must be replaced with $key (where
|
||||
$key == "$namespace.$name"), including for addAlias().
|
||||
|
||||
The $info and $defaults member variables are no longer indexed as
|
||||
[$namespace][$name]; they are now indexed as ["$namespace.$name"].
|
||||
|
||||
All deprecated methods were finally removed, after having yelled at you as
|
||||
an E_USER_NOTICE for a while now.
|
||||
|
||||
2.2 HTMLPurifier_ConfigSchema_Interchange
|
||||
|
||||
Member variable $namespaces was removed.
|
||||
|
||||
2.3 HTMLPurifier_ConfigSchema_Interchange_Id
|
||||
|
||||
Member variable $namespace and $directive removed; member variable $key added.
|
||||
Any method that took $namespace, $directive now takes $key.
|
||||
|
||||
2.4 HTMLPurifier_ConfigSchema_Interchange_Namespace
|
||||
|
||||
Removed.
|
||||
|
||||
vim: et sw=4 sts=4
|
164
docs/dev-config-naming.txt
Normal file
164
docs/dev-config-naming.txt
Normal file
@@ -0,0 +1,164 @@
|
||||
Configuration naming
|
||||
|
||||
HTML Purifier 4.0.0 features a new configuration naming system that
|
||||
allows arbitrary nesting of namespaces. While there are certain cases
|
||||
in which using two namespaces is obviously better (the canonical example
|
||||
is where we were using AutoFormatParam to contain directives for AutoFormat
|
||||
parameters), it is unclear whether or not a general migration to highly
|
||||
namespaced directives is a good idea or not.
|
||||
|
||||
== Case studies ==
|
||||
|
||||
=== Attr.* ===
|
||||
|
||||
We have a dead duck HTML.Attr.Name.UseCDATA which migrated before we decided
|
||||
to think this out thoroughly.
|
||||
|
||||
We currently have a large number of directives in the Attr.* namespace.
|
||||
These directives tweak the behavior of some HTML attributes. They have
|
||||
the properties:
|
||||
|
||||
* While they apply to only one attribute at a time, the attribute can
|
||||
span over multiple elements (not necessarily all attributes, either).
|
||||
The information of which elements it impacts is either omitted or
|
||||
informally stated (EnableID applies to all elements, DefaultImageAlt
|
||||
applies to <img> tags, AllowedRev doesn't say but only applies to a tags).
|
||||
|
||||
* There is a certain degree of clustering that could be applied, especially
|
||||
to the ID directives. The clustering could be done with respect to
|
||||
what element/attribute was used, i.e.
|
||||
|
||||
*.id -> EnableID, IDBlacklistRegexp, IDBlacklist, IDPrefixLocal, IDPrefix
|
||||
img.src -> DefaultInvalidImage
|
||||
img.alt -> DefaultImageAlt, DefaultInvalidImageAlt
|
||||
bdo.dir -> DefaultTextDir
|
||||
a.rel -> AllowedRel
|
||||
a.rev -> AllowedRev
|
||||
a.target -> AllowedFrameTargets
|
||||
a.name -> Name.UseCDATA
|
||||
|
||||
* The directives often reference generic attribute types that were specified
|
||||
in the DTD/specification. However, some of the behavior specifically relies
|
||||
on the fact that other use cases of the attribute are not, at current,
|
||||
supported by HTML Purifier.
|
||||
|
||||
AllowedRel, AllowedRev -> heavily <a> specific; if <link> ends up being
|
||||
allowed, we will also have to give users specificity there (we also
|
||||
want to preserve generality) DTD %Linktypes, HTML5 distinguishes
|
||||
between <link> and <a>/<area>
|
||||
AllowedFrameTargets -> heavily <a> specific, but also used by <area>
|
||||
and <form>. Transitional DTD %FrameTarget, not present in strict,
|
||||
HTML5 calls them "browsing contexts"
|
||||
Default*Image* -> as a default parameter, is almost entirely exlcusive
|
||||
to <img>
|
||||
EnableID -> global attribute
|
||||
Name.UseCDATA -> heavily <a> specific, but has heavy other usage by
|
||||
many things
|
||||
|
||||
== AutoFormat.* ==
|
||||
|
||||
These have the fairly normal pluggable architecture that lends itself to
|
||||
large amounts of namespaces (pluggability may be the key to figuring
|
||||
out when gratuitous namespacing is good.) Properties:
|
||||
|
||||
* Boolean directives are fair game for being namespaced: for example,
|
||||
RemoveEmpty.RemoveNbsp triggers RemoveEmpty.RemoveNbsp.Exceptions,
|
||||
the latter of which only makes sense when RemoveEmpty.RemoveNbsp
|
||||
is set to true. (The same applies to RemoveNbsp too)
|
||||
|
||||
The AutoFormat string is a bit long, but is the only bit of repeated
|
||||
context.
|
||||
|
||||
== Core.* ==
|
||||
|
||||
Core is the potpourri of directives, mostly regarding some minor behavioral
|
||||
tweaks for HTML handling abilities.
|
||||
|
||||
AggressivelyFixLt
|
||||
ConvertDocumentToFragment
|
||||
DirectLexLineNumberSyncInterval
|
||||
LexerImpl
|
||||
MaintainLineNumbers
|
||||
Lexer
|
||||
CollectErrors
|
||||
Language
|
||||
Error handling (Language is ostensibly a little more general, but
|
||||
it's only used for error handling right now)
|
||||
ColorKeywords
|
||||
CSS and HTML
|
||||
Encoding
|
||||
EscapeNonASCIICharacters
|
||||
Character encoding
|
||||
EscapeInvalidChildren
|
||||
EscapeInvalidTags
|
||||
HiddenElements
|
||||
RemoveInvalidImg
|
||||
Lexing/Output
|
||||
RemoveScriptContents
|
||||
Deprecated
|
||||
|
||||
== HTML.* ==
|
||||
|
||||
AllowedAttributes
|
||||
AllowedElements
|
||||
AllowedModules
|
||||
Allowed
|
||||
ForbiddenAttributes
|
||||
ForbiddenElements
|
||||
Element set tuning
|
||||
BlockWrapper
|
||||
Child def advanced twiddle
|
||||
CoreModules
|
||||
CustomDoctype
|
||||
Advanced HTMLModuleManager twiddles
|
||||
DefinitionID
|
||||
DefinitionRev
|
||||
Caching
|
||||
Doctype
|
||||
Parent
|
||||
Strict
|
||||
XHTML
|
||||
Global environment
|
||||
MaxImgLength
|
||||
Attribute twiddle? (applies to two attributes)
|
||||
Proprietary
|
||||
SafeEmbed
|
||||
SafeObject
|
||||
Trusted
|
||||
Extra functionality/tagsets
|
||||
TidyAdd
|
||||
TidyLevel
|
||||
TidyRemove
|
||||
Tidy
|
||||
|
||||
== Output.* ==
|
||||
|
||||
These directly affect the output of Generator. These are all advanced
|
||||
twiddles.
|
||||
|
||||
== URI.* ==
|
||||
|
||||
AllowedSchemes
|
||||
OverrideAllowedSchemes
|
||||
Scheme tuning
|
||||
Base
|
||||
DefaultScheme
|
||||
Host
|
||||
Global environment
|
||||
DefinitionID
|
||||
DefinitionRev
|
||||
Caching
|
||||
DisableExternalResources
|
||||
DisableExternal
|
||||
DisableResources
|
||||
Disable
|
||||
Contextual/authority tuning
|
||||
HostBlacklist
|
||||
Authority tuning
|
||||
MakeAbsolute
|
||||
MungeResources
|
||||
MungeSecretKey
|
||||
Munge
|
||||
Transformation behavior (munge can be grouped)
|
||||
|
||||
|
412
docs/dev-config-schema.html
Normal file
412
docs/dev-config-schema.html
Normal file
@@ -0,0 +1,412 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Describes config schema framework in HTML Purifier." />
|
||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||
<title>Config Schema - HTML Purifier</title>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Config Schema</h1>
|
||||
|
||||
<div id="filing">Filed under Development</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>
|
||||
HTML Purifier has a fairly complex system for configuration. Users
|
||||
interact with a <code>HTMLPurifier_Config</code> object to
|
||||
set configuration directives. The values they set are validated according
|
||||
to a configuration schema, <code>HTMLPurifier_ConfigSchema</code>.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
The schema is mostly transparent to end-users, but if you're doing development
|
||||
work for HTML Purifier and need to define a new configuration directive,
|
||||
you'll need to interact with it. We'll also talk about how to define
|
||||
userspace configuration directives at the very end.
|
||||
</p>
|
||||
|
||||
<h2>Write a directive file</h2>
|
||||
|
||||
<p>
|
||||
Directive files define configuration directives to be used by
|
||||
HTML Purifier. They are placed in <code>library/HTMLPurifier/ConfigSchema/schema/</code>
|
||||
in the form <code><em>Namespace</em>.<em>Directive</em>.txt</code> (I
|
||||
couldn't think of a more descriptive file extension.)
|
||||
Directive files are actually what we call <code>StringHash</code>es,
|
||||
i.e. associative arrays represented in a string form reminiscent of
|
||||
<a href="http://qa.php.net/write-test.php">PHPT</a> tests. Here's a
|
||||
sample directive file, <code>Test.Sample.txt</code>:
|
||||
</p>
|
||||
|
||||
<pre>Test.Sample
|
||||
TYPE: string/null
|
||||
DEFAULT: NULL
|
||||
ALLOWED: 'foo', 'bar'
|
||||
VALUE-ALIASES: 'baz' => 'bar'
|
||||
VERSION: 3.1.0
|
||||
--DESCRIPTION--
|
||||
This is a sample configuration directive for the purposes of the
|
||||
<code>dev-config-schema.html<code> documentation.
|
||||
--ALIASES--
|
||||
Test.Example</pre>
|
||||
|
||||
<p>
|
||||
Each of these segments has a specific meaning:
|
||||
</p>
|
||||
|
||||
<table class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Key</th>
|
||||
<th>Example</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>ID</td>
|
||||
<td>Test.Sample</td>
|
||||
<td>The name of the directive, in the form Namespace.Directive
|
||||
(implicitly the first line)</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>TYPE</td>
|
||||
<td>string/null</td>
|
||||
<td>The type of variable this directive accepts. See below for
|
||||
details. You can also add <code>/null</code> to the end of
|
||||
any basic type to allow null values too.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>DEFAULT</td>
|
||||
<td>NULL</td>
|
||||
<td>A parseable PHP expression of the default value.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>DESCRIPTION</td>
|
||||
<td>This is a...</td>
|
||||
<td>An HTML description of what this directive does.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>VERSION</td>
|
||||
<td>3.1.0</td>
|
||||
<td><em>Recommended</em>. The version of HTML Purifier this directive was added.
|
||||
Directives that have been around since 1.0.0 don't have this,
|
||||
but any new ones should.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>ALIASES</td>
|
||||
<td>Test.Example</td>
|
||||
<td><em>Optional</em>. A comma separated list of aliases for this directive.
|
||||
This is most useful for backwards compatibility and should
|
||||
not be used otherwise.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>ALLOWED</td>
|
||||
<td>'foo', 'bar'</td>
|
||||
<td><em>Optional</em>. Set of allowed value for a directive,
|
||||
a comma separated list of parseable PHP expressions. This
|
||||
is only allowed string, istring, text and itext TYPEs.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>VALUE-ALIASES</td>
|
||||
<td>'baz' => 'bar'</td>
|
||||
<td><em>Optional</em>. Mapping of one value to another, and
|
||||
should be a comma separated list of keypair duples. This
|
||||
is only allowed string, istring, text and itext TYPEs.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>DEPRECATED-VERSION</td>
|
||||
<td>3.1.0</td>
|
||||
<td><em>Not shown</em>. Indicates that the directive was
|
||||
deprecated this version.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>DEPRECATED-USE</td>
|
||||
<td>Test.NewDirective</td>
|
||||
<td><em>Not shown</em>. Indicates what new directive should be
|
||||
used instead. Note that the directives will functionally be
|
||||
different, although they should offer the same functionality.
|
||||
If they are identical, use an alias instead.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>EXTERNAL</td>
|
||||
<td>CSSTidy</td>
|
||||
<td><em>Not shown</em>. Indicates if there is an external library
|
||||
the user will need to download and install to use this configuration
|
||||
directive. As of right now, this is merely a Google-able name; future
|
||||
versions may also provide links and instructions.</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<p>
|
||||
Some notes on format and style:
|
||||
</p>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
Each of these keys can be expressed in the short format
|
||||
(<code>KEY: Value</code>) or the long format
|
||||
(<code>--KEY--</code> with value beneath). You must use the
|
||||
long format if multiple lines are needed, or if a long format
|
||||
has been used already (that's why <code>ALIASES</code> in our
|
||||
example is in the long format); otherwise, it's user preference.
|
||||
</li>
|
||||
<li>
|
||||
The HTML descriptions should be wrapped at about 80 columns; do
|
||||
not rely on editor word-wrapping.
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<p>
|
||||
Also, as promised, here is the set of possible types:
|
||||
</p>
|
||||
|
||||
<table class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Type</th>
|
||||
<th>Example</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>string</td>
|
||||
<td>'Foo'</td>
|
||||
<td><a href="http://docs.php.net/manual/en/language.types.string.php">String</a> without newlines</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>istring</td>
|
||||
<td>'foo'</td>
|
||||
<td>Case insensitive ASCII string without newlines</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>text</td>
|
||||
<td>"A<em>\n</em>b"</td>
|
||||
<td>String with newlines</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>itext</td>
|
||||
<td>"a<em>\n</em>b"</td>
|
||||
<td>Case insensitive ASCII string without newlines</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>int</td>
|
||||
<td>23</td>
|
||||
<td>Integer</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>float</td>
|
||||
<td>3.0</td>
|
||||
<td>Floating point number</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>bool</td>
|
||||
<td>true</td>
|
||||
<td>Boolean</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>lookup</td>
|
||||
<td>array('key' => true)</td>
|
||||
<td>Lookup array, used with <code>isset($var[$key])</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>list</td>
|
||||
<td>array('f', 'b')</td>
|
||||
<td>List array, with ordered numerical indexes</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>hash</td>
|
||||
<td>array('key' => 'val')</td>
|
||||
<td>Associative array of keys to values</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>mixed</td>
|
||||
<td>new stdclass</td>
|
||||
<td>Any PHP variable is fine</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<p>
|
||||
The examples represent what will be returned out of the configuration
|
||||
object; users have a little bit of leeway when setting configuration
|
||||
values (for example, a lookup value can be specified as a list;
|
||||
HTML Purifier will flip it as necessary.) These types are defined
|
||||
in <a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/VarParser.php">
|
||||
library/HTMLPurifier/VarParser.php</a>.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
For more information on what values are allowed, and how they are parsed,
|
||||
consult <a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ConfigSchema/InterchangeBuilder.php">
|
||||
library/HTMLPurifier/ConfigSchema/InterchangeBuilder.php</a>, as well
|
||||
as <a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ConfigSchema/Interchange/Directive.php">
|
||||
library/HTMLPurifier/ConfigSchema/Interchange/Directive.php</a> for
|
||||
the semantics of the parsed values.
|
||||
</p>
|
||||
|
||||
<h2>Refreshing the cache</h2>
|
||||
|
||||
<p>
|
||||
You may have noticed that your directive file isn't doing anything
|
||||
yet. That's because it hasn't been added to the runtime
|
||||
<code>HTMLPurifier_ConfigSchema</code> instance. Run
|
||||
<code>maintenance/generate-schema-cache.php</code> to fix this.
|
||||
If there were no errors, you're good to go! Don't forget to add
|
||||
some unit tests for your functionality!
|
||||
</p>
|
||||
|
||||
<p>
|
||||
If you ever make changes to your configuration directives, you
|
||||
will need to run this script again.
|
||||
</p>
|
||||
<h2>Adding in-house schema definitions</h2>
|
||||
|
||||
<p>
|
||||
Placing stuff directly in HTML Purifier's source tree is generally not a
|
||||
good idea, so HTML Purifier 4.0.0+ has some facilities in place to make your
|
||||
life easier.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
The first is to pass an extra parameter to <code>maintenance/generate-schema-cache.php</code>
|
||||
with the location of your directory (relative or absolute path will do). For example,
|
||||
if I'm storing my custom definitions in <em>/var/htmlpurifier/myschema</em>, run:
|
||||
<code>php maintenance/generate-schema-cache.php /var/htmlpurifier/myschema</code>.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Alternatively, you can create a small loader PHP file in the HTML Purifier base
|
||||
directory named <code>config-schema.php</code> (this is the same directory
|
||||
you would place a <code>test-settings.php</code> file). In this file, add
|
||||
the following line for each directory you want to load:
|
||||
</p>
|
||||
|
||||
<pre>$builder->buildDir($interchange, '/var/htmlpurifier/myschema');</pre>
|
||||
|
||||
<p>You can even load a single file using:</p>
|
||||
|
||||
<pre>$builder->buildFile($interchange, '/var/htmlpurifier/myschema/MyApp.Directive.txt');</pre>
|
||||
|
||||
<p>Storing custom definitions that you don't plan on sending back upstream in
|
||||
a separate directory is <em>definitely</em> a good idea! Additionally, picking
|
||||
a good namespace can go a long way to saving you grief if you want to use
|
||||
someone else's change, but they picked the same name, or if HTML Purifier
|
||||
decides to add support for a configuration directive that has the same name.</p>
|
||||
|
||||
<!-- TODO: how to name directives that rely on naming conventions -->
|
||||
|
||||
<h2>Errors</h2>
|
||||
|
||||
<p>
|
||||
All directive files go through a rigorous validation process
|
||||
through <a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ConfigSchema/Validator.php">
|
||||
library/HTMLPurifier/ConfigSchema/Validator.php</a>, as well
|
||||
as some basic checks during building. While
|
||||
listing every error out here is out-of-scope for this document, we
|
||||
can give some general tips for interpreting error messages.
|
||||
There are two types of errors: builder errors and validation errors.
|
||||
</p>
|
||||
|
||||
<h3>Builder errors</h3>
|
||||
|
||||
<blockquote>
|
||||
<p>
|
||||
<strong>Exception:</strong> Expected type string, got
|
||||
integer in DEFAULT in directive hash 'Ns.Dir'
|
||||
</p>
|
||||
</blockquote>
|
||||
|
||||
<p>
|
||||
You can identify a builder error by the keyword "directive hash."
|
||||
These are the easiest to deal with, because they directly correspond
|
||||
with your directive file. Find the offending directive file (which
|
||||
is the directive hash plus the .txt extension), find the
|
||||
offending index ("in DEFAULT" means the DEFAULT key) and fix the error.
|
||||
This particular error would occur if your default value is not the same
|
||||
type as TYPE.
|
||||
</p>
|
||||
|
||||
<h3>Validation errors</h3>
|
||||
|
||||
<blockquote>
|
||||
<p>
|
||||
<strong>Exception:</strong> Alias 3 in valueAliases in directive
|
||||
'Ns.Dir' must be a string
|
||||
</p>
|
||||
</blockquote>
|
||||
|
||||
<p>
|
||||
These are a little trickier, because we're not actually validating
|
||||
your directive file, or even the direct string hash representation.
|
||||
We're validating an Interchange object, and the error messages do
|
||||
not mention any string hash keys.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Nevertheless, it's not difficult to figure out what went wrong.
|
||||
Read the "context" statements in reverse:
|
||||
</p>
|
||||
|
||||
<dl>
|
||||
<dt>in directive 'Ns.Dir'</dt>
|
||||
<dd>This means we need to look at the directive file <code>Ns.Dir.txt</code></dd>
|
||||
<dt>in valueAliases</dt>
|
||||
<dd>There's no key actually called this, but there's one that's close:
|
||||
VALUE-ALIASES. Indeed, that's where to look.</dd>
|
||||
<dt>Alias 3</dt>
|
||||
<dd>The value alias that is equal to 3 is the culprit.</dd>
|
||||
</dl>
|
||||
|
||||
<p>
|
||||
In this particular case, you're not allowed to alias integers values to
|
||||
strings values.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
The most difficult part is translating the Interchange member variable (valueAliases)
|
||||
into a directive file key (VALUE-ALIASES), but there's a one-to-one
|
||||
correspondence currently. If the two formats diverge, any discrepancies
|
||||
will be described in <a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ConfigSchema/InterchangeBuilder.php">
|
||||
library/HTMLPurifier/ConfigSchema/InterchangeBuilder.php</a>.
|
||||
</p>
|
||||
|
||||
<h2>Internals</h2>
|
||||
|
||||
<p>
|
||||
Much of the configuration schema framework's codebase deals with
|
||||
shuffling data from one format to another, and doing validation on this
|
||||
data.
|
||||
The keystone of all of this is the <code>HTMLPurifier_ConfigSchema_Interchange</code>
|
||||
class, which represents the purest, parsed representation of the schema.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Hand-writing this data is unwieldy, however, so we write directive files.
|
||||
These directive files are parsed by <code>HTMLPurifier_StringHashParser</code>
|
||||
into <code>HTMLPurifier_StringHash</code>es, which then
|
||||
are run through <code>HTMLPurifier_ConfigSchema_InterchangeBuilder</code>
|
||||
to construct the interchange object.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
From the interchange object, the data can be siphoned into other forms
|
||||
using <code>HTMLPurifier_ConfigSchema_Builder</code> subclasses.
|
||||
For example, <code>HTMLPurifier_ConfigSchema_Builder_ConfigSchema</code>
|
||||
generates a runtime <code>HTMLPurifier_ConfigSchema</code> object,
|
||||
which <code>HTMLPurifier_Config</code> uses to validate its incoming
|
||||
data. There is also an XML serializer, which is used to build documentation.
|
||||
</p>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
68
docs/dev-flush.html
Normal file
68
docs/dev-flush.html
Normal file
@@ -0,0 +1,68 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Discusses when to flush HTML Purifier's various caches." />
|
||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||
<title>Flushing the Purifier - HTML Purifier</title>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Flushing the Purifier</h1>
|
||||
|
||||
<div id="filing">Filed under Development</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>
|
||||
If you've been poking around the various folders in HTML Purifier,
|
||||
you may have noticed the <code>maintenance</code> directory. Almost
|
||||
all of these scripts are devoted to flushing out the various caches
|
||||
HTML Purifier uses. Normal users don't have to worry about this:
|
||||
regular library usage is transparent. However, when doing development
|
||||
work on HTML Purifier, you may find you have to flush one of the
|
||||
caches.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
As a general rule of thumb, run <code>flush.php</code> whenever you make
|
||||
any <em>major</em> changes, or when tests start mysteriously failing.
|
||||
In more detail, run this script if:
|
||||
</p>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
You added new source files to HTML Purifier's main library.
|
||||
(see <code>generate-includes.php</code>)
|
||||
</li>
|
||||
<li>
|
||||
You modified the configuration schema (see
|
||||
<code>generate-schema-cache.php</code>). This usually means
|
||||
adding or modifying files in <code>HTMLPurifier/ConfigSchema/schema/</code>,
|
||||
although in rare cases modifying <code>HTMLPurifier/ConfigSchema.php</code>
|
||||
will also require this.
|
||||
</li>
|
||||
<li>
|
||||
You modified a Definition, or its subsystems. The most usual candidate
|
||||
is <code>HTMLPurifier/HTMLDefinition.php</code>, which also encompasses
|
||||
the files in <code>HTMLPurifier/HTMLModule/</code> as well as if you've
|
||||
<a href="enduser-customize.html">customizing definitions</a> without
|
||||
the cache disabled. (see <code>flush-generation-cache.php</code>)
|
||||
</li>
|
||||
<li>
|
||||
You modified source files, and have been using the standalone
|
||||
version from the full installation. (see <code>generate-standalone.php</code>)
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<p>
|
||||
You can check out the corresponding scripts for more information on what they
|
||||
do.
|
||||
</p>
|
||||
|
||||
</body></html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
281
docs/dev-includes.txt
Normal file
281
docs/dev-includes.txt
Normal file
@@ -0,0 +1,281 @@
|
||||
|
||||
INCLUDES, AUTOLOAD, BYTECODE CACHES and OPTIMIZATION
|
||||
|
||||
The Problem
|
||||
-----------
|
||||
|
||||
HTML Purifier contains a number of extra components that are not used all
|
||||
of the time, only if the user explicitly specifies that we should use
|
||||
them.
|
||||
|
||||
Some of these optional components are optionally included (Filter,
|
||||
Language, Lexer, Printer), while others are included all the time
|
||||
(Injector, URIFilter, HTMLModule, URIScheme). We will stipulate that these
|
||||
are all developer specified: it is conceivable that certain Tokens are not
|
||||
used, but this is user-dependent and should not be trusted.
|
||||
|
||||
We should come up with a consistent way to handle these things and ensure
|
||||
that we get the maximum performance when there is bytecode caches and
|
||||
when there are not. Unfortunately, these two goals seem contrary to each
|
||||
other.
|
||||
|
||||
A peripheral issue is the performance of ConfigSchema, which has been
|
||||
shown take a large, constant amount of initialization time, and is
|
||||
intricately linked to the issue of includes due to its pervasive use
|
||||
in our plugin architecture.
|
||||
|
||||
Pros and Cons
|
||||
-------------
|
||||
|
||||
We will assume that user-based extensions will be included by them.
|
||||
|
||||
Conditional includes:
|
||||
Pros:
|
||||
- User management is simplified; only a single directive needs to be set
|
||||
- Only necessary code is included
|
||||
Cons:
|
||||
- Doesn't play nicely with opcode caches
|
||||
- Adds complexity to standalone version
|
||||
- Optional configuration directives are not exposed without a little
|
||||
extra coaxing (not implemented yet)
|
||||
|
||||
Include it all:
|
||||
Pros:
|
||||
- User management is still simple
|
||||
- Plays nicely with opcode caches and standalone version
|
||||
- All configuration directives are present
|
||||
Cons:
|
||||
- Lots of (how much?) extra code is included
|
||||
- Classes that inherit from external libraries will cause compile
|
||||
errors
|
||||
|
||||
Build an include stub (Let's do this!):
|
||||
Pros:
|
||||
- Only necessary code is included
|
||||
- Plays nicely with opcode caches and standalone version
|
||||
- require (without once) can be used, see above
|
||||
- Could further extend as a compilation to one file
|
||||
Cons:
|
||||
- Not implemented yet
|
||||
- Requires user intervention and use of a command line script
|
||||
- Standalone script must be chained to this
|
||||
- More complex and compiled-language-like
|
||||
- Requires a whole new class of system-wide configuration directives,
|
||||
as configuration objects can be reused
|
||||
- Determining what needs to be included can be complex (see above)
|
||||
- No way of autodetecting dynamically instantiated classes
|
||||
- Might be slow
|
||||
|
||||
Include stubs
|
||||
-------------
|
||||
|
||||
This solution may be "just right" for users who are heavily oriented
|
||||
towards performance. However, there are a number of picky implementation
|
||||
details to work out beforehand.
|
||||
|
||||
The number one concern is how to make the HTML Purifier files "work
|
||||
out of the box", while still being able to easily get them into a form
|
||||
that works with this setup. As the codebase stands right now, it would
|
||||
be necessary to strip out all of the require_once calls. The only way
|
||||
we could get rid of the require_once calls is to use __autoload or
|
||||
use the stub for all cases (which might not be a bad idea).
|
||||
|
||||
Aside
|
||||
-----
|
||||
An important thing to remember, however, is that these require_once's
|
||||
are valuable data about what classes a file needs. Unfortunately, there's
|
||||
no distinction between whether or not the file is needed all the time,
|
||||
or whether or not it is one of our "optional" files. Thus, it is
|
||||
effectively useless.
|
||||
|
||||
Deprecated
|
||||
----------
|
||||
One of the things I'd like to do is have the code search for any classes
|
||||
that are explicitly mentioned in the code. If a class isn't mentioned, I
|
||||
get to assume that it is "optional," i.e. included via introspection.
|
||||
The choice is either to use PHP's tokenizer or use regexps; regexps would
|
||||
be faster but a tokenizer would be more correct. If this ends up being
|
||||
unfeasible, adding dependency comments isn't a bad idea. (This could
|
||||
even be done automatically by search/replacing require_once, although
|
||||
we'd have to manually inspect the results for the optional requires.)
|
||||
|
||||
NOTE: This ends up not being necessary, as we're going to make the user
|
||||
figure out all the extra classes they need, and only include the core
|
||||
which is predetermined.
|
||||
|
||||
Using the autoload framework with include stubs works nicely with
|
||||
introspective classes: instead of having to have require_once inside
|
||||
the function, we can let autoload do the work; we simply need to
|
||||
new $class or accept the object straight from the caller. Handling filters
|
||||
becomes a simple matter of ticking off configuration directives, and
|
||||
if ConfigSchema spits out errors, adding the necessary includes. We could
|
||||
also use the autoload framework as a fallback, in case the user forgets
|
||||
to make the include, but doesn't really care about performance.
|
||||
|
||||
Insight
|
||||
-------
|
||||
All of this talk is merely a natural extension of what our current
|
||||
standalone functionality does. However, instead of having our code
|
||||
perform the includes, or attempting to inline everything that possibly
|
||||
could be used, we boot the issue to the user, making them include
|
||||
everything or setup the fallback autoload handler.
|
||||
|
||||
Configuration Schema
|
||||
--------------------
|
||||
|
||||
A common deficiency for all of the conditional include setups (including
|
||||
the dynamically built include PHP stub) is that if one of this
|
||||
conditionally included files includes a configuration directive, it
|
||||
is not accessible to configdoc. A stopgap solution for this problem is
|
||||
to have it piggy-back off of the data in the merge-library.php script
|
||||
to figure out what extra files it needs to include, but if the file also
|
||||
inherits classes that don't exist, we're in big trouble.
|
||||
|
||||
I think it's high time we centralized the configuration documentation.
|
||||
However, the type checking has been a great boon for the library, and
|
||||
I'd like to keep that. The compromise is to use some other source, and
|
||||
then parse it into the ConfigSchema internal format (sans all of those
|
||||
nasty documentation strings which we really don't need at runtime) and
|
||||
serialize that for future use.
|
||||
|
||||
The next question is that of format. XML is very verbose, and the prospect
|
||||
of setting defaults in it gives me willies. However, this may be necessary.
|
||||
Splitting up the file into manageable chunks may alleviate this trouble,
|
||||
and we may be even want to create our own format optimized for specifying
|
||||
configuration. It might look like (based off the PHPT format, which is
|
||||
nicely compact yet unambiguous and human-readable):
|
||||
|
||||
Core.HiddenElements
|
||||
TYPE: lookup
|
||||
DEFAULT: array('script', 'style') // auto-converted during processing
|
||||
--ALIASES--
|
||||
Core.InvisibleElements, Core.StupidElements
|
||||
--DESCRIPTION--
|
||||
<p>
|
||||
Blah blah
|
||||
</p>
|
||||
|
||||
The first line is the directive name, the lines after that prior to the
|
||||
first --HEADER-- block are single-line values, and then after that
|
||||
the multiline values are there. No value is restricted to a particular
|
||||
format: DEFAULT could very well be multiline if that would be easier.
|
||||
This would make it insanely easy, also, to add arbitrary extra parameters,
|
||||
like:
|
||||
|
||||
VERSION: 3.0.0
|
||||
ALLOWED: 'none', 'light', 'medium', 'heavy' // this is wrapped in array()
|
||||
EXTERNAL: CSSTidy // this would be documented somewhere else with a URL
|
||||
|
||||
The final loss would be that you wouldn't know what file the directive
|
||||
was used in; with some clever regexps it should be possible to
|
||||
figure out where $config->get($ns, $d); occurs. Reflective calls to
|
||||
the configuration object is mitigated by the fact that getBatch is
|
||||
used, so we can simply talk about that in the namespace definition page.
|
||||
This might be slow, but it would only happen when we are creating
|
||||
the documentation for consumption, and is sugar.
|
||||
|
||||
We can put this in a schema/ directory, outside of HTML Purifier. The serialized
|
||||
data gets treated like entities.ser.
|
||||
|
||||
The final thing that needs to be handled is user defined configurations.
|
||||
They can be added at runtime using ConfigSchema::registerDirectory()
|
||||
which globs the directory and grabs all of the directives to be incorporated
|
||||
in. Then, the result is saved. We may want to take advantage of the
|
||||
DefinitionCache framework, although it is not altogether certain what
|
||||
configuration directives would be used to generate our key (meta-directives!)
|
||||
|
||||
Further thoughts
|
||||
----------------
|
||||
Our master configuration schema will only need to be updated once
|
||||
every new version, so it's easily versionable. User specified
|
||||
schema files are far more volatile, but it's far too expensive
|
||||
to check the filemtimes of all the files, so a DefinitionRev style
|
||||
mechanism works better. However, we can uniquely identify the
|
||||
schema based on the directories they loaded, so there's no need
|
||||
for a DefinitionId until we give them full programmatic control.
|
||||
|
||||
These variables should be directly incorporated into ConfigSchema,
|
||||
and ConfigSchema should handle serialization. Some refactoring will be
|
||||
necessary for the DefinitionCache classes, as they are built with
|
||||
Config in mind. If the user changes something, the cache file gets
|
||||
rebuilt. If the version changes, the cache file gets rebuilt. Since
|
||||
our unit tests flush the caches before we start, and the operation is
|
||||
pretty fast, this will not negatively impact unit testing.
|
||||
|
||||
One last thing: certain configuration directives require that files
|
||||
get added. They may even be specified dynamically. It is not a good idea
|
||||
for the HTMLPurifier_Config object to be used directly for such matters.
|
||||
Instead, the userland code should explicitly perform the includes. We may
|
||||
put in something like:
|
||||
|
||||
REQUIRES: HTMLPurifier_Filter_ExtractStyleBlocks
|
||||
|
||||
To indicate that if that class doesn't exist, and the user is attempting
|
||||
to use the directive, we should fatally error out. The stub includes the core files,
|
||||
and the user includes everything else. Any reflective things like new
|
||||
$class would be required to tie in with the configuration.
|
||||
|
||||
It would work very well with rarely used configuration options, but it
|
||||
wouldn't be so good for "core" parts that can be disabled. In such cases
|
||||
the core include file would need to be modified, and the only way
|
||||
to properly do this is use the configuration object. Once again, our
|
||||
ability to create cache keys saves the day again: we can create arbitrary
|
||||
stub files for arbitrary configurations and include those. They could
|
||||
even be the single file affairs. The only thing we'd need to include,
|
||||
then, would be HTMLPurifier_Config! Then, the configuration object would
|
||||
load the library.
|
||||
|
||||
An aside...
|
||||
-----------
|
||||
One questions, however, the wisdom of letting PHP files write other PHP
|
||||
files. It seems like a recipe for disaster, or at least lots of headaches
|
||||
in highly secured setups, where PHP does not have the ability to write
|
||||
to its root. In such cases, we could use sticky bits or tell the user
|
||||
to manually generate the file.
|
||||
|
||||
The other troublesome bit is actually doing the calculations necessary.
|
||||
For certain cases, it's simple (such as URIScheme), but for AttrDef
|
||||
and HTMLModule the dependency trees are very complex in relation to
|
||||
%HTML.Allowed and friends. I think that this idea should be shelved
|
||||
and looked at a later, less insane date.
|
||||
|
||||
An interesting dilemma presents itself when a configuration form is offered
|
||||
to the user. Normally, the configuration object is not accessible without
|
||||
editing PHP code; this facility changes thing. The sensible thing to do
|
||||
is stipulate that all classes required by the directives you allow must
|
||||
be included.
|
||||
|
||||
Unit testing
|
||||
------------
|
||||
|
||||
Setting up the parsing and translation into our existing format would not
|
||||
be difficult to do. It might represent a good time for us to rethink our
|
||||
tests for these facilities; as creative as they are, they are often hacky
|
||||
and require public visibility for things that ought to be protected.
|
||||
This is especially applicable for our DefinitionCache tests.
|
||||
|
||||
Migration
|
||||
---------
|
||||
|
||||
Because we are not *adding* anything essentially new, it should be trivial
|
||||
to write a script to take our existing data and dump it into the new format.
|
||||
Well, not trivial, but fairly easy to accomplish. Primary implementation
|
||||
difficulties would probably involve formatting the file nicely.
|
||||
|
||||
Backwards-compatibility
|
||||
-----------------------
|
||||
|
||||
I expect that the ConfigSchema methods should stick around for a little bit,
|
||||
but display E_USER_NOTICE warnings that they are deprecated. This will
|
||||
require documentation!
|
||||
|
||||
New stuff
|
||||
---------
|
||||
|
||||
VERSION: Version number directive was introduced
|
||||
DEPRECATED-VERSION: If the directive was deprecated, when was it deprecated?
|
||||
DEPRECATED-USE: If the directive was deprecated, what should the user use now?
|
||||
REQUIRES: What classes does this configuration directive require, but are
|
||||
not part of the HTML Purifier core?
|
||||
|
||||
vim: et sw=4 sts=4
|
@@ -35,7 +35,7 @@ help you find the correct functionality more quickly. Here they are:</p>
|
||||
<dt>Harness and Test are reserved class names for unit tests</dt>
|
||||
<dd>The suffix <code>Test</code> indicates that the class is a subclass of UnitTestCase
|
||||
(of the Simpletest library) and is testable. "Harness" indicates a subclass
|
||||
of UnitTestCase that is not meant to be run but to be extended into
|
||||
of UnitTestCase that is not meant to be run but to be extended into
|
||||
concrete test cases and contains custom test methods (i.e. assert*())</dd>
|
||||
|
||||
<dt>Class names do not necessarily represent inheritance hierarchies</dt>
|
||||
@@ -51,7 +51,7 @@ help you find the correct functionality more quickly. Here they are:</p>
|
||||
all must be present in order for proper functioning.</dd>
|
||||
|
||||
<dt>Abbreviations are avoided</dt>
|
||||
<dd>We try to avoid abbreviations as much as possible, but in some cases,
|
||||
<dd>We try to avoid abbreviations as much as possible, but in some cases,
|
||||
abbreviated version is more readable than the full version. Here, we
|
||||
list common abbreviations:
|
||||
<ul>
|
||||
@@ -77,6 +77,7 @@ help you find the correct functionality more quickly. Here they are:</p>
|
||||
|
||||
</dl>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -23,11 +23,11 @@ that itch, put it here!</p>
|
||||
<ul>
|
||||
<li>Make Tokens Flyweights (may prove problematic, probably not worth it)</li>
|
||||
<li>Rewrite regexps into PHP code</li>
|
||||
<li>Serialize the Definition object</li>
|
||||
<li>Batch regexp validation (do as many per function call as possible)</li>
|
||||
<li>Parallelize strategies</li>
|
||||
</ul>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -34,6 +34,11 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>
|
||||
<strong>Warning:</strong> This table is kept for historical purposes and
|
||||
is not being actively updated.
|
||||
</p>
|
||||
|
||||
<h2>Key</h2>
|
||||
|
||||
<table cellspacing="0"><tbody>
|
||||
@@ -148,7 +153,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
||||
Depends on background-image</td></tr>
|
||||
<tr class="css1 impl-yes"><td>background-position</td><td>Depends on background-image</td></tr>
|
||||
<tr class="danger impl-no"><td>cursor</td><td>Dangerous but fluffy</td></tr>
|
||||
<tr class="danger css1"><td>display</td><td>ENUM(...), Dangerous but interesting;
|
||||
<tr class="danger impl-yes"><td>display</td><td>ENUM(...), Dangerous but interesting;
|
||||
will not implement list-item, run-in (Opera only) or table (no IE);
|
||||
inline-block has incomplete IE6 support and requires -moz-inline-box
|
||||
for Mozilla. Unknown target milestone.</td></tr>
|
||||
@@ -167,7 +172,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
||||
Mostly supported. Unknown target milestone.</td></tr>
|
||||
<tr><td>page-break-inside</td><td>ENUM(avoid, auto), Opera only. Unknown target milestone.</td></tr>
|
||||
<tr class="impl-no"><td>quotes</td><td>May be dropped from CSS2, fairly useless for inline context</td></tr>
|
||||
<tr class="impl-no"><td>visibility</td><td>ENUM(visible, hidden, collapse),
|
||||
<tr class="danger impl-yes"><td>visibility</td><td>ENUM(visible, hidden, collapse),
|
||||
Dangerous</td></tr>
|
||||
<tr class="css1 feature impl-partial"><td>white-space</td><td>ENUM(normal, pre, nowrap, pre-wrap,
|
||||
pre-line), Spotty implementation:
|
||||
@@ -298,6 +303,7 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
||||
|
||||
</table>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -18,12 +18,11 @@
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>
|
||||
You may have heard of the <a href="dev-advanced-api.html">Advanced API</a>.
|
||||
If you're interested in reading dry prose and boring functional
|
||||
specifications, feel free to click that link to get a no-nonsense overview
|
||||
on the Advanced API. For the rest of us, there's this tutorial. By the time
|
||||
you're finished reading this, you should have a pretty good idea on
|
||||
how to implement custom tags and attributes that HTML Purifier may not have.
|
||||
HTML Purifier has this quirk where if you try to allow certain elements or
|
||||
attributes, HTML Purifier will tell you that it's not supported, and that
|
||||
you should go to the forums to find out how to implement it. Well, this
|
||||
document is how to implement elements and attributes which HTML Purifier
|
||||
doesn't support out of the box.
|
||||
</p>
|
||||
|
||||
<h2>Is it necessary?</h2>
|
||||
@@ -84,17 +83,6 @@
|
||||
limited to translations) above or below other corresponding text.
|
||||
</p>
|
||||
|
||||
<h3>XHTML 2.0</h3>
|
||||
|
||||
<p>
|
||||
<a href="http://www.w3.org/TR/xhtml2/">XHTML 2.0</a> is still a
|
||||
working draft, so any elements introduced in the
|
||||
specification have not been implemented and will not be implemented
|
||||
until we get a recommendation or proposal. Because XHTML 2.0 is
|
||||
an entirely new markup language, implementing rules for it will be
|
||||
no easy task.
|
||||
</p>
|
||||
|
||||
<h3>HTML 5</h3>
|
||||
|
||||
<p>
|
||||
@@ -156,9 +144,9 @@
|
||||
</p>
|
||||
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML', 'DefinitionRev', 1);
|
||||
$def =& $config->getHTMLDefinition(true);</pre>
|
||||
$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML.DefinitionRev', 1);
|
||||
$def = $config->getHTMLDefinition(true);</pre>
|
||||
|
||||
<p>
|
||||
Assuming that HTML Purifier has already been properly loaded (hint:
|
||||
@@ -206,15 +194,15 @@ $def =& $config->getHTMLDefinition(true);</pre>
|
||||
<h2>Turn off caching</h2>
|
||||
|
||||
<p>
|
||||
To make development easier, we're going to temporarily turn off
|
||||
To make development easier, we're going to temporarily turn off
|
||||
definition caching:
|
||||
</p>
|
||||
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML', 'DefinitionRev', 1);
|
||||
<strong>$config->set('Core', 'DefinitionCache', null); // remove this later!</strong>
|
||||
$def =& $config->getHTMLDefinition(true);</pre>
|
||||
$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML.DefinitionRev', 1);
|
||||
<strong>$config->set('Cache.DefinitionImpl', null); // TODO: remove this later!</strong>
|
||||
$def = $config->getHTMLDefinition(true);</pre>
|
||||
|
||||
<p>
|
||||
A few things should be mentioned about the caching mechanism before
|
||||
@@ -267,10 +255,10 @@ $def =& $config->getHTMLDefinition(true);</pre>
|
||||
</p>
|
||||
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML', 'DefinitionRev', 1);
|
||||
$config->set('Core', 'DefinitionCache', null); // remove this later!
|
||||
$def =& $config->getHTMLDefinition(true);
|
||||
$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML.DefinitionRev', 1);
|
||||
$config->set('Cache.DefinitionImpl', null); // remove this later!
|
||||
$def = $config->getHTMLDefinition(true);
|
||||
<strong>$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');</strong></pre>
|
||||
|
||||
<p>
|
||||
@@ -372,10 +360,10 @@ $def =& $config->getHTMLDefinition(true);
|
||||
|
||||
<p>
|
||||
For a complete list, consult
|
||||
<a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/AttrTypes.php"><code>library/HTMLPurifier/AttrTypes.php</code></a>;
|
||||
<a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/AttrTypes.php"><code>library/HTMLPurifier/AttrTypes.php</code></a>;
|
||||
more information on attributes that accept parameters can be found on their
|
||||
respective includes in
|
||||
<a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/AttrDef/"><code>library/HTMLPurifier/AttrDef</code></a>.
|
||||
<a href="http://repo.or.cz/w/htmlpurifier.git?a=tree;hb=HEAD;f=library/HTMLPurifier/AttrDef"><code>library/HTMLPurifier/AttrDef</code></a>.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
@@ -385,11 +373,11 @@ $def =& $config->getHTMLDefinition(true);
|
||||
</p>
|
||||
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML', 'DefinitionRev', 1);
|
||||
$config->set('Core', 'DefinitionCache', null); // remove this later!
|
||||
$def =& $config->getHTMLDefinition(true);
|
||||
<strong>$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
|
||||
$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML.DefinitionRev', 1);
|
||||
$config->set('Cache.DefinitionImpl', null); // remove this later!
|
||||
$def = $config->getHTMLDefinition(true);
|
||||
<strong>$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
|
||||
array('_blank','_self','_target','_top')
|
||||
));</strong></pre>
|
||||
|
||||
@@ -401,8 +389,8 @@ $def =& $config->getHTMLDefinition(true);
|
||||
|
||||
<p>
|
||||
Adding attributes is really small-fry stuff, though, and it was possible
|
||||
to add them (albeit a bit more wordy) prior to 2.0. The real gem of
|
||||
the Advanced API is adding elements. There are five questions to
|
||||
to add them (albeit a bit more wordy) prior to 2.0. The real gem of
|
||||
the Advanced API is adding elements. There are five questions to
|
||||
ask when adding a new element:
|
||||
</p>
|
||||
|
||||
@@ -493,9 +481,9 @@ $def =& $config->getHTMLDefinition(true);
|
||||
<p>
|
||||
The <code>(%flow;)*</code> indicates the allowed children of the
|
||||
<code>li</code> tag: <code>li</code> allows any number of flow
|
||||
elements as its children. (The <code>- O</code> allows the closing tag to be
|
||||
omitted, though in XML this is not allowed.) In HTML Purifier,
|
||||
we'd write it like <code>Flow</code> (here's where the content sets
|
||||
elements as its children. (The <code>- O</code> allows the closing tag to be
|
||||
omitted, though in XML this is not allowed.) In HTML Purifier,
|
||||
we'd write it like <code>Flow</code> (here's where the content sets
|
||||
we were discussing earlier come into play). There are three shorthand
|
||||
content models you can specify:
|
||||
</p>
|
||||
@@ -596,7 +584,7 @@ $def =& $config->getHTMLDefinition(true);
|
||||
be nuked. This is why there is are specific content model types like
|
||||
Optional and Required: while they could be implemented as <code>Custom:
|
||||
(valid | elements)*</code>, the custom classes contain special recovery
|
||||
measures that make sure as much of the user's original content gets
|
||||
measures that make sure as much of the user's original content gets
|
||||
through. HTML Purifier's core, as a rule, does not use Custom.
|
||||
</p>
|
||||
|
||||
@@ -676,7 +664,7 @@ $def =& $config->getHTMLDefinition(true);
|
||||
href="http://www.w3.org/TR/xhtml-modularization/abstract_modules.html#s_commonatts">abstract
|
||||
modules of the XHTML Modularization 1.1</a>. We believe this section
|
||||
to be in error, as <code>br</code> permits the use of the <code>style</code>
|
||||
attribute even though it uses the <code>Core</code> collection, and
|
||||
attribute even though it uses the <code>Core</code> collection, and
|
||||
the DTD and XML Schemas supplied by W3C support our interpretation.
|
||||
</p>
|
||||
|
||||
@@ -724,7 +712,7 @@ $def =& $config->getHTMLDefinition(true);
|
||||
or more flow elements, but no nested <code>form</code>s</strong></li>
|
||||
<li>What attributes does the element allow that are general? <strong>Common</strong></li>
|
||||
<li>What attributes does the element allow that are specific to this element? <strong>A whole bunch, see ATTLIST;
|
||||
we're going to the vital ones: <code>action</code>, <code>method</code> and <code>name</code></strong></li>
|
||||
we're going to do the vital ones: <code>action</code>, <code>method</code> and <code>name</code></strong></li>
|
||||
</ol>
|
||||
|
||||
<p>
|
||||
@@ -732,14 +720,14 @@ $def =& $config->getHTMLDefinition(true);
|
||||
</p>
|
||||
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML', 'DefinitionRev', 1);
|
||||
$config->set('Core', 'DefinitionCache', null); // remove this later!
|
||||
$def =& $config->getHTMLDefinition(true);
|
||||
$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
|
||||
$config->set('HTML.DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML.DefinitionRev', 1);
|
||||
$config->set('Cache.DefinitionImpl', null); // remove this later!
|
||||
$def = $config->getHTMLDefinition(true);
|
||||
$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
|
||||
array('_blank','_self','_target','_top')
|
||||
));
|
||||
<strong>$form =& $def->addElement(
|
||||
<strong>$form = $def->addElement(
|
||||
'form', // name
|
||||
'Block', // content set
|
||||
'Flow', // allowed children
|
||||
@@ -750,7 +738,7 @@ $def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
|
||||
'name' => 'ID'
|
||||
)
|
||||
);
|
||||
$form->excludes = array('form' => true);</strong></pre>
|
||||
$form->excludes = array('form' => true);</strong></pre>
|
||||
|
||||
<p>
|
||||
Each of the parameters corresponds to one of the questions we asked.
|
||||
@@ -764,7 +752,7 @@ $form->excludes = array('form' => true);</strong></pre>
|
||||
<p>
|
||||
And that's all there is to it! Implementing the rest of the form
|
||||
module is left as an exercise to the user; to see more examples
|
||||
check the <a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/HTMLModule/"><code>library/HTMLPurifier/HTMLModule/</code></a> directory
|
||||
check the <a href="http://repo.or.cz/w/htmlpurifier.git?a=tree;hb=HEAD;f=library/HTMLPurifier/HTMLModule"><code>library/HTMLPurifier/HTMLModule/</code></a> directory
|
||||
in your local HTML Purifier installation.
|
||||
</p>
|
||||
|
||||
@@ -789,10 +777,11 @@ $form->excludes = array('form' => true);</strong></pre>
|
||||
</p>
|
||||
|
||||
<ul>
|
||||
<li><a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/HTMLModule.php"><code>library/HTMLPurifier/HTMLModule.php</code></a></li>
|
||||
<li><a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/ElementDef.php"><code>library/HTMLPurifier/ElementDef.php</code></a></li>
|
||||
<li><a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/HTMLModule.php"><code>library/HTMLPurifier/HTMLModule.php</code></a></li>
|
||||
<li><a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ElementDef.php"><code>library/HTMLPurifier/ElementDef.php</code></a></li>
|
||||
</ul>
|
||||
|
||||
<div id="version">$Id: enduser-tidy.html 1158 2007-06-18 19:26:29Z Edward $</div>
|
||||
|
||||
</body></html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -31,7 +31,7 @@ by default.</p>
|
||||
|
||||
<p>IDs, however, are quite useful functionality to have, so if users start
|
||||
complaining about broken anchors you'll probably want to turn them back on
|
||||
with %HTML.EnableAttrID. But before you go mucking around with the config
|
||||
with %Attr.EnableID. But before you go mucking around with the config
|
||||
object, it's probably worth to take some precautions to keep your page
|
||||
validating. Why?</p>
|
||||
|
||||
@@ -56,8 +56,8 @@ validating. Why?</p>
|
||||
deal with the most obvious solution: preventing users from using any IDs that
|
||||
appear elsewhere on the document. The method is simple:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'EnableAttrID', true);
|
||||
$config->set('Attr', 'IDBlacklist' array(
|
||||
<pre>$config->set('Attr.EnableID', true);
|
||||
$config->set('Attr.IDBlacklist' array(
|
||||
'list', 'of', 'attribute', 'values', 'that', 'are', 'forbidden'
|
||||
));</pre>
|
||||
|
||||
@@ -88,8 +88,8 @@ all, they might have simply specified a duplicate ID by accident.</p>
|
||||
<p>This method, too, is quite simple: add a prefix to all user IDs. With this
|
||||
code:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'EnableAttrID', true);
|
||||
$config->set('Attr', 'IDPrefix', 'user_');</pre>
|
||||
<pre>$config->set('Attr.EnableID', true);
|
||||
$config->set('Attr.IDPrefix', 'user_');</pre>
|
||||
|
||||
<p>...this:</p>
|
||||
|
||||
@@ -109,7 +109,7 @@ user_ to the beginning."</p>
|
||||
nothing about multiple HTML Purifier outputs on one page. Thus, we have
|
||||
a second configuration value to piggy-back off of: %Attr.IDPrefixLocal:</p>
|
||||
|
||||
<pre>$config->set('Attr', 'IDPrefixLocal', 'comment' . $id . '_');</pre>
|
||||
<pre>$config->set('Attr.IDPrefixLocal', 'comment' . $id . '_');</pre>
|
||||
|
||||
<p>This new attributes does nothing but append on to regular IDPrefix, but is
|
||||
special in that it is volatile: it's value is determined at run-time and
|
||||
@@ -137,11 +137,12 @@ anchors is beyond me.</p>
|
||||
|
||||
<p>To revert back to pre-1.2.0 behavior, simply:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'EnableAttrID', true);</pre>
|
||||
<pre>$config->set('Attr.EnableID', true);</pre>
|
||||
|
||||
<p>Don't come crying to me when your page mysteriously stops validating, though.</p>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -55,3 +55,5 @@ HTML tags. Things like blog comments are, in all likelihood, most appropriately
|
||||
written in an extremely restrictive set of markup that doesn't require
|
||||
all this functionality (or not written in HTML at all), although this may
|
||||
be changing in the future with the addition of levels of filtering.
|
||||
|
||||
vim: et sw=4 sts=4
|
||||
|
@@ -14,3 +14,5 @@ to be effective. Things to remember:
|
||||
|
||||
4. CSS: document pending
|
||||
Explain which CSS styles we blocked and why.
|
||||
|
||||
vim: et sw=4 sts=4
|
||||
|
@@ -17,25 +17,25 @@
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>HTML Purifier is a very powerful library. But with power comes great
|
||||
responsibility, in the form of longer execution times. Remember, this
|
||||
library isn't lightly grazing over submitted HTML: it's deconstructing
|
||||
the whole thing, rigorously checking the parts, and then putting it back
|
||||
<p>HTML Purifier is a very powerful library. But with power comes great
|
||||
responsibility, in the form of longer execution times. Remember, this
|
||||
library isn't lightly grazing over submitted HTML: it's deconstructing
|
||||
the whole thing, rigorously checking the parts, and then putting it back
|
||||
together. </p>
|
||||
|
||||
<p>So, if it so turns out that HTML Purifier is kinda too slow for outbound
|
||||
<p>So, if it so turns out that HTML Purifier is kinda too slow for outbound
|
||||
filtering, you've got a few options: </p>
|
||||
|
||||
<h2>Inbound filtering</h2>
|
||||
|
||||
<p>Perform filtering of HTML when it's submitted by the user. Since the
|
||||
user is already submitting something, an extra half a second tacked on
|
||||
to the load time probably isn't going to be that huge of a problem.
|
||||
Then, displaying the content is a simple a manner of outputting it
|
||||
directly from your database/filesystem. The trouble with this method is
|
||||
that your user loses the original text, and when doing edits, will be
|
||||
handling the filtered text. While this may be a good thing, especially
|
||||
if you're using a WYSIWYG editor, it can also result in data-loss if a
|
||||
<p>Perform filtering of HTML when it's submitted by the user. Since the
|
||||
user is already submitting something, an extra half a second tacked on
|
||||
to the load time probably isn't going to be that huge of a problem.
|
||||
Then, displaying the content is a simple a manner of outputting it
|
||||
directly from your database/filesystem. The trouble with this method is
|
||||
that your user loses the original text, and when doing edits, will be
|
||||
handling the filtered text. While this may be a good thing, especially
|
||||
if you're using a WYSIWYG editor, it can also result in data-loss if a
|
||||
user makes a typo. </p>
|
||||
|
||||
<p>Example (non-functional):</p>
|
||||
@@ -66,14 +66,14 @@ user makes a typo. </p>
|
||||
|
||||
<h2>Caching the filtered output</h2>
|
||||
|
||||
<p>Accept the submitted text and put it unaltered into the database, but
|
||||
then also generate a filtered version and stash that in the database.
|
||||
Serve the filtered version to readers, and the unaltered version to
|
||||
editors. If need be, you can invalidate the cache and have the cached
|
||||
filtered version be regenerated on the first page view. Pros? Full data
|
||||
retention. Cons? It's more complicated, and opens other editors up to
|
||||
XSS if they are using a WYSIWYG editor (to fix that, they'd have to be
|
||||
able to get their hands on the *really* original text served in
|
||||
<p>Accept the submitted text and put it unaltered into the database, but
|
||||
then also generate a filtered version and stash that in the database.
|
||||
Serve the filtered version to readers, and the unaltered version to
|
||||
editors. If need be, you can invalidate the cache and have the cached
|
||||
filtered version be regenerated on the first page view. Pros? Full data
|
||||
retention. Cons? It's more complicated, and opens other editors up to
|
||||
XSS if they are using a WYSIWYG editor (to fix that, they'd have to be
|
||||
able to get their hands on the *really* original text served in
|
||||
plaintext mode). </p>
|
||||
|
||||
<p>Example (non-functional):</p>
|
||||
@@ -108,10 +108,13 @@ plaintext mode). </p>
|
||||
<p>In short, inbound filtering is the simple option and caching is the
|
||||
robust option (albeit with bigger storage requirements). </p>
|
||||
|
||||
<p>There is a third option, independent of the two we've discussed: profile
|
||||
and optimize HTMLPurifier yourself. Be sure to report back your results
|
||||
if you decide to do that! Especially if you port HTML Purifier to C++.
|
||||
<p>There is a third option, independent of the two we've discussed: profile
|
||||
and optimize HTMLPurifier yourself. Be sure to report back your results
|
||||
if you decide to do that! Especially if you port HTML Purifier to C++.
|
||||
<tt>;-)</tt></p>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -50,7 +50,7 @@ not need Tidy installed on your PHP to use these features!</strong></p>
|
||||
|
||||
<h2>What are levels?</h2>
|
||||
|
||||
<p>Levels describe how aggressive the Tidy module should be when
|
||||
<p>Levels describe how aggressive the Tidy module should be when
|
||||
cleaning up HTML. There are four levels to pick: none, light, medium
|
||||
and heavy. Each of these levels has a well-defined set of behavior
|
||||
associated with it, although it may change depending on your doctype.</p>
|
||||
@@ -76,7 +76,7 @@ associated with it, although it may change depending on your doctype.</p>
|
||||
change the level of cleaning by setting the %HTML.TidyLevel configuration
|
||||
directive:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'TidyLevel', 'heavy'); // burn baby burn!</pre>
|
||||
<pre>$config->set('HTML.TidyLevel', 'heavy'); // burn baby burn!</pre>
|
||||
|
||||
<h2>Is the light level really light?</h2>
|
||||
|
||||
@@ -165,17 +165,17 @@ smoketest</a>.</p>
|
||||
so happy about the br@clear implementation. That's perfectly fine!
|
||||
HTML Purifier will make accomodations:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');
|
||||
$config->set('HTML', 'TidyLevel', 'heavy'); // all changes, minus...
|
||||
<strong>$config->set('HTML', 'TidyRemove', 'br@clear');</strong></pre>
|
||||
<pre>$config->set('HTML.Doctype', 'XHTML 1.0 Transitional');
|
||||
$config->set('HTML.TidyLevel', 'heavy'); // all changes, minus...
|
||||
<strong>$config->set('HTML.TidyRemove', 'br@clear');</strong></pre>
|
||||
|
||||
<p>That third line does the magic, removing the br@clear fix
|
||||
from the module, ensuring that <code><br clear="both" /></code>
|
||||
will pass through unharmed. The reverse is possible too:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');
|
||||
$config->set('HTML', 'TidyLevel', 'none'); // no changes, plus...
|
||||
<strong>$config->set('HTML', 'TidyAdd', 'p@align');</strong></pre>
|
||||
<pre>$config->set('HTML.Doctype', 'XHTML 1.0 Transitional');
|
||||
$config->set('HTML.TidyLevel', 'none'); // no changes, plus...
|
||||
<strong>$config->set('HTML.TidyAdd', 'p@align');</strong></pre>
|
||||
|
||||
<p>In this case, all transformations are shut off, except for the p@align
|
||||
one, which you found handy.</p>
|
||||
@@ -225,6 +225,7 @@ and if that still doesn't satisfy your appetite, do some fine-tuning.
|
||||
Other than that, don't worry about it: this all works silently and
|
||||
effectively in the background.</p>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -33,9 +33,9 @@
|
||||
|
||||
<pre>class HTMLPurifier_URIFilter_<strong>NameOfFilter</strong> extends HTMLPurifier_URIFilter
|
||||
{
|
||||
var $name = '<strong>NameOfFilter</strong>';
|
||||
function prepare($config) {}
|
||||
function filter(&$uri, $config, &$context) {}
|
||||
public $name = '<strong>NameOfFilter</strong>';
|
||||
public function prepare($config) {}
|
||||
public function filter(&$uri, $config, $context) {}
|
||||
}</pre>
|
||||
|
||||
<p>
|
||||
@@ -56,12 +56,12 @@
|
||||
|
||||
<pre>class HTMLPurifier_URI
|
||||
{
|
||||
var $scheme, $userinfo, $host, $port, $path, $query, $fragment;
|
||||
function HTMLPurifier_URI($scheme, $userinfo, $host, $port, $path, $query, $fragment);
|
||||
function toString();
|
||||
function copy();
|
||||
function getSchemeObj($config, &$context);
|
||||
function validate($config, &$context);
|
||||
public $scheme, $userinfo, $host, $port, $path, $query, $fragment;
|
||||
public function HTMLPurifier_URI($scheme, $userinfo, $host, $port, $path, $query, $fragment);
|
||||
public function toString();
|
||||
public function copy();
|
||||
public function getSchemeObj($config, $context);
|
||||
public function validate($config, $context);
|
||||
}</pre>
|
||||
|
||||
<p>
|
||||
@@ -92,7 +92,7 @@
|
||||
</tbody></table>
|
||||
|
||||
<p>
|
||||
Because the URI is presented to us in this form, and not
|
||||
Because the URI is presented to us in this form, and not
|
||||
<code>http://bob@example.com:8080/foo.php?q=string#hash</code>, it saves us
|
||||
a lot of trouble in having to parse the URI every time we want to filter
|
||||
it. For the record, the above URI has the following components:
|
||||
@@ -130,30 +130,26 @@
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Let's suppose I wanted to write a filter that de-internationalized domain
|
||||
names by converting them to <a href="http://en.wikipedia.org/wiki/Punycode">Punycode</a>.
|
||||
Assuming that <code>punycode_encode($input)</code> converts <code>$input</code> to
|
||||
Punycode and returns <code>false</code> on failure:
|
||||
Let's suppose I wanted to write a filter that converted links with a
|
||||
custom <code>image</code> scheme to its corresponding real path on
|
||||
our website:
|
||||
</p>
|
||||
|
||||
<pre>class HTMLPurifier_URIFilter_ConvertIDNToPunycode extends HTMLPurifier_URIFilter
|
||||
<pre>class HTMLPurifier_URIFilter_TransformImageScheme extends HTMLPurifier_URIFilter
|
||||
{
|
||||
var $name = 'ConvertIDNToPunycode';
|
||||
function filter(&$uri, $config, &$context) {
|
||||
if (is_null($uri->host)) return true;
|
||||
if ($uri->host == utf8_decode($uri->host)) {
|
||||
// is ASCII, abort
|
||||
return true;
|
||||
}
|
||||
$host = punycode_encode($uri->host);
|
||||
if ($host === false) return false;
|
||||
$uri->host = $host;
|
||||
public $name = 'TransformImageScheme';
|
||||
public function filter(&$uri, $config, $context) {
|
||||
if ($uri->scheme !== 'image') return true;
|
||||
$img_name = $uri->path;
|
||||
// Overwrite the previous URI object
|
||||
$uri = new HTMLPurifier_URI('http', null, null, null, '/img/' . $img_name . '.png', null, null);
|
||||
return true;
|
||||
}
|
||||
}</pre>
|
||||
|
||||
<p>
|
||||
Notice I did not <code>return $uri;</code>.
|
||||
Notice I did not <code>return $uri;</code>. This filter would turn
|
||||
<code>image:Foo</code> into <code>/img/Foo.png</code>.
|
||||
</p>
|
||||
|
||||
<h2>Activating your filter</h2>
|
||||
@@ -163,39 +159,46 @@
|
||||
to use it. Fortunately, this part's simple:
|
||||
</p>
|
||||
|
||||
<pre>$uri =& $config->getDefinition('URI');
|
||||
$uri->addFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>());</pre>
|
||||
<pre>$uri = $config->getDefinition('URI');
|
||||
$uri->addFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>(), $config);</pre>
|
||||
|
||||
<p>
|
||||
If you want to be really fancy, you can define a configuration directive
|
||||
for your filter and have HTML Purifier automatically manage whether or
|
||||
not your filter gets loaded or not (this is how internal filters manage
|
||||
things):
|
||||
After adding a filter, you won't be able to set configuration directives.
|
||||
Structure your code accordingly.
|
||||
</p>
|
||||
|
||||
<pre>HTMLPurifier_ConfigSchema::define(
|
||||
'URI', '<strong>NameOfFilter</strong>', false, 'bool',
|
||||
'<strong>What your filter does.</strong>'
|
||||
);
|
||||
$uri =& $config->getDefinition('URI', true);
|
||||
$uri->registerFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>());
|
||||
<!-- XXX: link to new documentation system -->
|
||||
|
||||
<h2>Post-filter</h2>
|
||||
|
||||
<p>
|
||||
Remember our TransformImageScheme filter? That filter acted before we had
|
||||
performed scheme validation; otherwise, the URI would have been filtered
|
||||
out when it was discovered that there was no image scheme. Well, a post-filter
|
||||
is run after scheme specific validation, so it's ideal for bulk
|
||||
post-processing of URIs, including munging. To specify a URI as a post-filter,
|
||||
set the <code>$post</code> member variable to TRUE.
|
||||
</p>
|
||||
|
||||
<pre>class HTMLPurifier_URIFilter_MyPostFilter extends HTMLPurifier_URIFilter
|
||||
{
|
||||
public $name = 'MyPostFilter';
|
||||
public $post = true;
|
||||
// ... extra code here
|
||||
}
|
||||
</pre>
|
||||
|
||||
<p>
|
||||
Now, your filter will only be called when %URI.<strong>NameOfFilter</strong>
|
||||
is set to true.
|
||||
</p>
|
||||
|
||||
<h2>Examples</h2>
|
||||
|
||||
<p>
|
||||
Check the
|
||||
<a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/URIFilter/">URIFilter</a>
|
||||
directory for more implementation examples, and see <a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/docs/proposal-new-directives.txt">the
|
||||
<a href="http://repo.or.cz/w/htmlpurifier.git?a=tree;hb=HEAD;f=library/HTMLPurifier/URIFilter">URIFilter</a>
|
||||
directory for more implementation examples, and see <a href="proposal-new-directives.txt">the
|
||||
new directives proposal document</a> for ideas on what could be implemented
|
||||
as a filter.
|
||||
</p>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -5,7 +5,6 @@
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Describes the rationale for using UTF-8, the ramifications otherwise, and how to make the switch." />
|
||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||
<script defer="defer" type="text/javascript" src="./toc-gen.js"></script>
|
||||
<style type="text/css">
|
||||
.minor td {font-style:italic;}
|
||||
</style>
|
||||
@@ -107,7 +106,7 @@ there are now many character encodings floating around.</p>
|
||||
interpret raw zeroes and ones into real characters. It
|
||||
usually does this by pairing numbers with characters.</p>
|
||||
<p>There are many different types of character encodings floating
|
||||
around, but the ones we deal most frequently with are ASCII,
|
||||
around, but the ones we deal most frequently with are ASCII,
|
||||
8-bit encodings, and Unicode-based encodings.</p>
|
||||
<ul>
|
||||
<li><strong>ASCII</strong> is a 7-bit encoding based on the
|
||||
@@ -119,9 +118,8 @@ there are now many character encodings floating around.</p>
|
||||
see a page on the web, chances are it's encoded in one
|
||||
of these encodings.</li>
|
||||
<li><strong>Unicode-based encodings</strong> implement the
|
||||
Unicode standard and include UTF-8, UCS-2 and UTF-16.
|
||||
They go beyond 8-bits (the first two are variable length,
|
||||
while the second one uses 16-bits), and support almost
|
||||
Unicode standard and include UTF-8, UTF-16 and UTF-32/UCS-4.
|
||||
They go beyond 8-bits and support almost
|
||||
every language in the world. UTF-8 is gaining traction
|
||||
as the dominant international encoding of the web.</li>
|
||||
</ul>
|
||||
@@ -308,7 +306,7 @@ languages</a>. The appropriate code is:</p>
|
||||
|
||||
<p>...replacing UTF-8 with whatever your embedded encoding is.
|
||||
This code must come before any output, so be careful about
|
||||
stray whitespace in your application (i.e., any whitespace before
|
||||
stray whitespace in your application (i.e., any whitespace before
|
||||
output excluding whitespace within <?php ?> tags).</p>
|
||||
|
||||
<h4 id="fixcharset-server-phpini">PHP ini directive</h4>
|
||||
@@ -368,9 +366,9 @@ to send anything at all:</p>
|
||||
<pre><a href="http://httpd.apache.org/docs/1.3/mod/core.html#adddefaultcharset">AddDefaultCharset</a> Off</pre>
|
||||
|
||||
<p>...making your internal charset declaration (usually the <code>META</code> tags)
|
||||
the sole source of character encoding
|
||||
information. In these cases, it is <em>especially</em> important to make
|
||||
sure you have valid <code>META</code> tags on your pages and all the
|
||||
the sole source of character encoding
|
||||
information. In these cases, it is <em>especially</em> important to make
|
||||
sure you have valid <code>META</code> tags on your pages and all the
|
||||
text before them is ASCII.</p>
|
||||
|
||||
<blockquote class="aside"><p>These directives can also be
|
||||
@@ -445,9 +443,9 @@ Declarations. They look like:</p>
|
||||
<p>For XHTML, this XML Declaration theoretically
|
||||
overrides the <code>META</code> tag. In reality, this happens only when the
|
||||
XHTML is actually served as legit XML and not HTML, which is almost always
|
||||
never due to Internet Explorer's lack of support for
|
||||
never due to Internet Explorer's lack of support for
|
||||
<code>application/xhtml+xml</code> (even though doing so is often
|
||||
argued to be <a href="http://www.hixie.ch/advocacy/xhtml">good
|
||||
argued to be <a href="http://www.hixie.ch/advocacy/xhtml">good
|
||||
practice</a> and is required by the XHTML 1.1 specification).</p>
|
||||
|
||||
<p>For XML, however, this XML Declaration is extremely important.
|
||||
@@ -458,7 +456,7 @@ ISO-8859-1 encoding (you see this in garbled RSS feeds).</p>
|
||||
|
||||
<p>In short, if you use XHTML and have gone through the
|
||||
trouble of adding the XML Declaration, make sure it jives
|
||||
with your <code>META</code> tags (which should only be present
|
||||
with your <code>META</code> tags (which should only be present
|
||||
if served in text/html) and HTTP headers.</p>
|
||||
|
||||
<h3 id="fixcharset-internals">Inside the process</h3>
|
||||
@@ -483,7 +481,7 @@ if we don't know it's character encoding? And how do we figure out
|
||||
the character encoding, if we don't know the contents of the
|
||||
<code>META</code> tag?</p>
|
||||
|
||||
<p>Fortunantely for us, the characters we need to write the
|
||||
<p>Fortunately for us, the characters we need to write the
|
||||
<code>META</code> are in ASCII, which is pretty much universal
|
||||
over every character encoding that is in common use today. So,
|
||||
all the web-browser has to do is parse all the way down until
|
||||
@@ -491,7 +489,7 @@ it gets to the Content-Type tag, extract the character encoding
|
||||
tag, then re-parse the document according to this new information.</p>
|
||||
|
||||
<p>Obviously this is complicated, so browsers prefer the simpler
|
||||
and more efficient solution: get the character encoding from a
|
||||
and more efficient solution: get the character encoding from a
|
||||
somewhere other than the document itself, i.e. the HTTP headers,
|
||||
much to the chagrin of HTML authors who can't set these headers.</p>
|
||||
|
||||
@@ -528,7 +526,7 @@ you don't have to use those user-unfriendly entities.</p>
|
||||
|
||||
<h3 id="whyutf8-user">User-friendly</h3>
|
||||
|
||||
<p>Websites encoded in Latin-1 (ISO-8859-1) which ocassionally need
|
||||
<p>Websites encoded in Latin-1 (ISO-8859-1) which occasionally need
|
||||
a special character outside of their scope often will use a character
|
||||
entity reference to achieve the desired effect. For instance, θ can be
|
||||
written <code>&theta;</code>, regardless of the character encoding's
|
||||
@@ -580,19 +578,21 @@ files.</p>
|
||||
<a href="http://web.archive.org/web/20060427015200/ppewww.ph.gla.ac.uk/~flavell/charset/form-i18n.html">
|
||||
<code>FORM</code> submission and i18n</a>. That document contains lots
|
||||
of useful information, but is written in a rambly manner, so
|
||||
here I try to get right to the point. (Note: the original has
|
||||
here I try to get right to the point. (Note: the original has
|
||||
disappeared off the web, so I am linking to the Web Archive copy.)</p>
|
||||
|
||||
<h4 id="whyutf8-forms-urlencoded"><code>application/x-www-form-urlencoded</code></h4>
|
||||
|
||||
<p>This is the Content-Type that GET requests must use, and POST requests
|
||||
use by default. It involves the ubiquituous percent encoding format that
|
||||
use by default. It involves the ubiquitous percent encoding format that
|
||||
looks something like: <code>%C3%86</code>. There is no official way of
|
||||
determining the character encoding of such a request, since the percent
|
||||
encoding operates on a byte level, so it is usually assumed that it
|
||||
is the same as the encoding the page containing the form was submitted
|
||||
in. You'll run into very few problems if you only use characters in
|
||||
the character encoding you chose.</p>
|
||||
in. (<a href="http://tools.ietf.org/html/rfc3986#section-2.5">RFC 3986</a>
|
||||
recommends that textual identifiers be translated to UTF-8; however, browser
|
||||
compliance is spotty.) You'll run into very few problems
|
||||
if you only use characters in the character encoding you chose.</p>
|
||||
|
||||
<p>However, once you start adding characters outside of your encoding
|
||||
(and this is a lot more common than you may think: take curly
|
||||
@@ -674,7 +674,7 @@ it up to the module iconv to do the dirty work.</p>
|
||||
<p>This approach, however, is not perfect. iconv is blithely unaware
|
||||
of HTML character entities. HTML Purifier, in order to
|
||||
protect against sophisticated escaping schemes, normalizes all character
|
||||
and numeric entitie references before processing the text. This leads to
|
||||
and numeric entity references before processing the text. This leads to
|
||||
one important ramification:</p>
|
||||
|
||||
<p><strong>Any character that is not supported by the target character
|
||||
@@ -770,7 +770,7 @@ the text when you try to convert it to UTF-8. You'll have to convert
|
||||
it to a binary field, convert it to a Shift-JIS field (the real encoding),
|
||||
and then finally to UTF-8. Many a website had pages irreversibly mangled
|
||||
because they didn't realize that they'd been deluding themselves about
|
||||
the character encoding all along, don't become the next victim.</p>
|
||||
the character encoding all along; don't become the next victim.</p>
|
||||
|
||||
<p>For <a href="http://www.postgresql.org/docs/8.2/static/multibyte.html">PostgreSQL</a>, there appears to be no direct way to change the
|
||||
encoding of a database (as of 8.2). You will have to dump the data, and then reimport
|
||||
@@ -790,7 +790,7 @@ usually supported).</p>
|
||||
|
||||
<h4 id="migrate-db-binary">Binary</h4>
|
||||
|
||||
<p>Due to the abovementioned compatibility issues, a more interoperable
|
||||
<p>Due to the aforementioned compatibility issues, a more interoperable
|
||||
way of storing UTF-8 text is to stuff it in a binary datatype.
|
||||
<code>CHAR</code> becomes <code>BINARY</code>, <code>VARCHAR</code> becomes
|
||||
<code>VARBINARY</code> and <code>TEXT</code> becomes <code>BLOB</code>.
|
||||
@@ -886,7 +886,7 @@ sure the page is saved WITHOUT the BOM.</p>
|
||||
</blockquote>
|
||||
|
||||
<p>If you are reading in text files to insert into the middle of another
|
||||
page, it is strongly advised (but not strictly necessary) that you replace out the UTF-8 byte
|
||||
page, it is strongly advised (but not strictly necessary) that you replace out the UTF-8 byte
|
||||
sequence for BOM <code>"\xEF\xBB\xBF"</code> before inserting it in,
|
||||
via:</p>
|
||||
|
||||
@@ -917,8 +917,8 @@ anyway. So we'll deal with the other two edge cases.</p>
|
||||
would like to read your website but get heaps of question marks or
|
||||
other meaningless characters. Fixing this problem requires the
|
||||
installation of a font or language pack which is often highly
|
||||
dependent on what the language is. <a href="http://bn.wikipedia.org/wiki/%E0%A6%89%E0%A6%87%E0%A6%95%E0%A6%BF%E0%A6%AA%E0%A7%87%E0%A6%A1%E0%A6%BF%E0%A6%AF%E0%A6%BC%E0%A6%BE:Bangla_script_display_help">Here is an example</a>
|
||||
of such a help file for the Bengali language, I am sure there are
|
||||
dependent on what the language is. <a href="http://bn.wikipedia.org/wiki/%E0%A6%89%E0%A6%87%E0%A6%95%E0%A6%BF%E0%A6%AA%E0%A7%87%E0%A6%A1%E0%A6%BF%E0%A6%AF%E0%A6%BC%E0%A6%BE:Bangla_script_display_and_input_help">Here is an example</a>
|
||||
of such a help file for the Bengali language; I am sure there are
|
||||
others out there too. You just have to point users to the appropriate
|
||||
help file.</p>
|
||||
|
||||
@@ -928,7 +928,7 @@ help file.</p>
|
||||
characters embedded in what otherwise would be very bland ASCII are
|
||||
letters of the
|
||||
<a href="http://en.wikipedia.org/wiki/International_Phonetic_Alphabet">International
|
||||
Phonetic Alphabet (IPA)</a>, use to designate pronounciations in a very standard
|
||||
Phonetic Alphabet (IPA)</a>, use to designate pronunciations in a very standard
|
||||
manner (you probably see them all the time in your dictionary). Your
|
||||
average font probably won't have support for all of the IPA characters
|
||||
like ʘ (bilabial click) or ʒ (voiced postalveolar fricative).
|
||||
@@ -941,15 +941,15 @@ most widely used browser in the entire world? Microsoft IE 6
|
||||
is not smart enough to borrow from other fonts when a character isn't
|
||||
present, so more often than not you'll be slapped with a nice big �.
|
||||
To get things to work, MSIE 6 needs a little nudge. You could configure it
|
||||
to use a different font to render the text, but you can acheive the same
|
||||
to use a different font to render the text, but you can achieve the same
|
||||
effect by selectively changing the font for blocks of special characters
|
||||
to known good Unicode fonts.</p>
|
||||
|
||||
<p>Fortunantely, the folks over at Wikipedia have already done all the
|
||||
<p>Fortunately, the folks over at Wikipedia have already done all the
|
||||
heavy lifting for you. Get the CSS from the horses mouth here:
|
||||
<a href="http://en.wikipedia.org/wiki/MediaWiki:Common.css">Common.css</a>,
|
||||
and search for ".IPA" There are also a smattering of
|
||||
other classes you can use for other purposes, check out
|
||||
other classes you can use for other purposes, check out
|
||||
<a href="http://meta.wikimedia.org/wiki/Help:Special_characters#Displaying_Special_Characters">this page</a>
|
||||
for more details. For you lazy ones, this should work:</p>
|
||||
|
||||
@@ -972,7 +972,7 @@ users.</p>
|
||||
<h3 id="migrate-variablewidth">Dealing with variable width in functions</h3>
|
||||
|
||||
<p>When people claim that PHP6 will solve all our Unicode problems, they're
|
||||
misinformed. It will not fix any of the abovementioned troubles. It will,
|
||||
misinformed. It will not fix any of the aforementioned troubles. It will,
|
||||
however, fix the problem we are about to discuss: processing UTF-8 text
|
||||
in PHP.</p>
|
||||
|
||||
@@ -1035,7 +1035,7 @@ directory.</p>
|
||||
<p>Well, that's it. Hopefully this document has served as a very
|
||||
practical springboard into knowledge of how UTF-8 works. You may have
|
||||
decided that you don't want to migrate yet: that's fine, just know
|
||||
what will happen to your output and what bug reports you may recieve.</p>
|
||||
what will happen to your output and what bug reports you may receive.</p>
|
||||
|
||||
<p>Many other developers have already discussed the subject of Unicode,
|
||||
UTF-8 and internationalization, and I would like to defer to them for
|
||||
@@ -1055,3 +1055,6 @@ a more in-depth look into character sets and encodings.</p>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -67,17 +67,15 @@ into your documents. YouTube's code goes like this:</p>
|
||||
</ol>
|
||||
|
||||
<p>What point 2 means is that if we have code like <code><span
|
||||
class="embed-youtube">AyPzM5WK8ys</span></code> your
|
||||
class="youtube-embed">AyPzM5WK8ys</span></code> your
|
||||
application can reconstruct the full object from this small snippet that
|
||||
passes through HTML Purifier <em>unharmed</em>.
|
||||
<a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/Filter/YouTube.php">Show me the code!</a></p>
|
||||
<a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/Filter/YouTube.php">Show me the code!</a></p>
|
||||
|
||||
<p>And the corresponding usage:</p>
|
||||
|
||||
<pre><?php
|
||||
// assuming $purifier is an instance of HTMLPurifier
|
||||
require_once 'HTMLPurifier/Filter/YouTube.php';
|
||||
$purifier->addFilter(new HTMLPurifier_Filter_YouTube());
|
||||
$config->set('Filter.YouTube', true);
|
||||
?></pre>
|
||||
|
||||
<p>There is a bit going in the two code snippets, so let's explain.</p>
|
||||
@@ -150,3 +148,6 @@ with the core!</p>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -8,8 +8,8 @@ require_once '../../library/HTMLPurifier.auto.php';
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
|
||||
// configuration goes here:
|
||||
$config->set('Core', 'Encoding', 'UTF-8'); // replace with your encoding
|
||||
$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional'); // replace with your doctype
|
||||
$config->set('Core.Encoding', 'UTF-8'); // replace with your encoding
|
||||
$config->set('HTML.Doctype', 'XHTML 1.0 Transitional'); // replace with your doctype
|
||||
|
||||
$purifier = new HTMLPurifier($config);
|
||||
|
||||
@@ -20,3 +20,4 @@ $pure_html = $purifier->purify($html);
|
||||
|
||||
echo '<pre>' . htmlspecialchars($pure_html) . '</pre>';
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
@@ -4,3 +4,6 @@ function init() {
|
||||
element.innerHTML = '“'+element.innerHTML+'”';
|
||||
}
|
||||
</script>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -60,10 +60,16 @@ conventions.</p>
|
||||
<dt><a href="dev-optimization.html">Optimization</a></dt>
|
||||
<dd>Discusses possible methods of optimizing HTML Purifier.</dd>
|
||||
|
||||
<dt><a href="dev-flush.html">Flushing the Purifier</a></dt>
|
||||
<dd>Discusses when to flush HTML Purifier's various caches.</dd>
|
||||
|
||||
<dt><a href="dev-advanced-api.html">Advanced API</a></dt>
|
||||
<dd>Functional specification for HTML Purifier's advanced API for defining
|
||||
<dd>Specification for HTML Purifier's advanced API for defining
|
||||
custom filtering behavior.</dd>
|
||||
|
||||
<dt><a href="dev-config-schema.html">Config Schema</a></dt>
|
||||
<dd>Describes config schema framework in HTML Purifier.</dd>
|
||||
|
||||
</dl>
|
||||
|
||||
<h2>Proposals</h2>
|
||||
@@ -92,8 +98,8 @@ the code. They may be upgraded to HTML files or stay as TXT scratchpads.</p>
|
||||
<table class="table">
|
||||
|
||||
<thead><tr>
|
||||
<th width="10%">Type</th>
|
||||
<th width="20%">Name</th>
|
||||
<th style="width:10%">Type</th>
|
||||
<th style="width:20%">Name</th>
|
||||
<th>Description</th>
|
||||
</tr></thead>
|
||||
|
||||
@@ -113,7 +119,13 @@ the code. They may be upgraded to HTML files or stay as TXT scratchpads.</p>
|
||||
|
||||
<tr>
|
||||
<td>Development</td>
|
||||
<td><a href="enduser-code-quality.txt">Code Quality Issues</a></td>
|
||||
<td><a href="dev-config-bcbreaks.txt">Config BC Breaks</a></td>
|
||||
<td>Backwards-incompatible changes in HTML Purifier 4.0.0</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Development</td>
|
||||
<td><a href="dev-code-quality.txt">Code Quality Issues</a></td>
|
||||
<td>Enumerates code quality issues and places that need to be refactored.</td>
|
||||
</tr>
|
||||
|
||||
@@ -135,6 +147,12 @@ the code. They may be upgraded to HTML files or stay as TXT scratchpads.</p>
|
||||
<td>Assorted configuration options that could be implemented.</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Proposal</td>
|
||||
<td><a href="proposal-css-extraction.txt">CSS extraction</a></td>
|
||||
<td>Taking the inline CSS out of documents and into <code>style</code>.</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Reference</td>
|
||||
<td><a href="ref-content-models.txt">Handling Content Model Changes</a></td>
|
||||
@@ -163,6 +181,8 @@ the code. They may be upgraded to HTML files or stay as TXT scratchpads.</p>
|
||||
|
||||
</table>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -42,7 +42,8 @@ into the mix.</li>
|
||||
something like that?</li>
|
||||
</ol>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -19,3 +19,5 @@ Definition objects are complex datatypes influenced by their respective
|
||||
directive namespaces (HTMLDefinition with HTML and CSSDefinition with CSS).
|
||||
If any of these directives is updated, HTML Purifier forces the definition
|
||||
to be regenerated.
|
||||
|
||||
vim: et sw=4 sts=4
|
||||
|
34
docs/proposal-css-extraction.txt
Normal file
34
docs/proposal-css-extraction.txt
Normal file
@@ -0,0 +1,34 @@
|
||||
|
||||
Extracting inline CSS from HTML Purifier
|
||||
voodoofied: Assigning semantics to elements
|
||||
|
||||
Sander Tekelenburg brought to my attention the poor programming style of
|
||||
inline CSS in HTML documents. In an ideal world, we wouldn't be using inline
|
||||
CSS at all: everything would be assigned using semantic class attributes
|
||||
from an external stylesheet.
|
||||
|
||||
With ExtractStyleBlocks and CSSTidy, this is now possible (when allowed, users
|
||||
can specify a style element which gets extracted from the user-submitted HTML, which
|
||||
the application can place in the head of the HTML document). But there still
|
||||
is the issue of inline CSS that refuses to go away.
|
||||
|
||||
The basic idea behind this feature is assign every element a unique identifier,
|
||||
and then move all of the CSS data to a style-sheet. This HTML:
|
||||
|
||||
<div style="text-align:center">Big <span style="color:red;">things</span>!</div>
|
||||
|
||||
into
|
||||
|
||||
<div id="hp-12345">Big <span id="hp-12346">things</span>!</div>
|
||||
|
||||
and a stylesheet that is:
|
||||
|
||||
#hp-12345 {text-align:center;}
|
||||
#hp-12346 {color:red;}
|
||||
|
||||
Beyond that, HTML Purifier can magically merge common CSS values together,
|
||||
and a whole manner of other heuristic things. HTML Purifier should also
|
||||
make it easy for an admin to re-style the HTML semantically. Speed is not
|
||||
an issue. Also, better WYSIWYG editors are needed.
|
||||
|
||||
vim: et sw=4 sts=4
|
211
docs/proposal-errors.txt
Normal file
211
docs/proposal-errors.txt
Normal file
@@ -0,0 +1,211 @@
|
||||
Considerations for ErrorCollection
|
||||
|
||||
Presently, HTML Purifier takes a code-execution centric approach to handling
|
||||
errors. Errors are organized and grouped according to which segment of the
|
||||
code triggers them, not necessarily the portion of the input document that
|
||||
triggered the error. This means that errors are pseudo-sorted by category,
|
||||
rather than location in the document.
|
||||
|
||||
One easy way to "fix" this problem would be to re-sort according to line number.
|
||||
However, the "category" style information we derive from naively following
|
||||
program execution is still useful. After all, each of the strategies which
|
||||
can report errors still process the document mostly linearly. Furthermore,
|
||||
not only do they process linearly, but the way they pass off operations to
|
||||
sub-systems mirrors that of the document. For example, AttrValidator will
|
||||
linearly proceed through elements, and on each element will use AttrDef to
|
||||
validate those contents. From there, the attribute might have more
|
||||
sub-components, which have execution passed off accordingly.
|
||||
|
||||
In fact, each strategy handles a very specific class of "error."
|
||||
|
||||
RemoveForeignElements - element tokens
|
||||
MakeWellFormed - element token ordering
|
||||
FixNesting - element token ordering
|
||||
ValidateAttributes - attributes of elements
|
||||
|
||||
The crucial point is that while we care about the hierarchy governing these
|
||||
different errors, we *don't* care about any other information about what actually
|
||||
happens to the elements. This brings up another point: if HTML Purifier fixes
|
||||
something, this is not really a notice/warning/error; it's really a suggestion
|
||||
of a way to fix the aforementioned defects.
|
||||
|
||||
In short, the refactoring to take this into account kinda sucks.
|
||||
|
||||
Errors should not be recorded in order that they are reported. Instead, they
|
||||
should be bound to the line (and preferably element) in which they were found.
|
||||
This means we need some way to uniquely identify every element in the document,
|
||||
which doesn't presently exist. An easy way of adding this would be to track
|
||||
line columns. An important ramification of this is that we *must* use the
|
||||
DirectLex implementation.
|
||||
|
||||
1. Implement column numbers for DirectLex [DONE!]
|
||||
2. Disable error collection when not using DirectLex [DONE!]
|
||||
|
||||
Next, we need to re-orient all of the error declarations to place CurrentToken
|
||||
at utmost important. Since this is passed via Context, it's not always clear
|
||||
if that's available. ErrorCollector should complain HARD if it isn't available.
|
||||
There are some locations when we don't have a token available. These include:
|
||||
|
||||
* Lexing - this can actually have a row and column, but NOT correspond to
|
||||
a token
|
||||
* End of document errors - bump this to the end
|
||||
|
||||
Actually, we *don't* have to complain if CurrentToken isn't available; we just
|
||||
set it as a document-wide error. And actually, nothing needs to be done here.
|
||||
|
||||
Something interesting to consider is whether or not we care about the locations
|
||||
of attributes and CSS properties, i.e. the sub-objects that compose these things.
|
||||
In terms of consistency, at the very least attributes should have column/line
|
||||
numbers attached to them. However, this may be overkill, as attributes are
|
||||
uniquely identifiable. You could go even further, with CSS, but they are also
|
||||
uniquely identifiable.
|
||||
|
||||
Bottom-line is, however, this information must be available, in form of the
|
||||
CurrentAttribute and CurrentCssProperty (theoretical) context variables, and
|
||||
it must be used to organize the errors that the sub-processes may throw.
|
||||
There is also a hierarchy of sorts that may make merging this into one context
|
||||
variable more sense, if it hadn't been for HTML's reasonably rigid structure.
|
||||
A CSS property will never contain an HTML attribute. So we won't ever get
|
||||
recursive relations, and having multiple depths won't ever make sense. Leave
|
||||
this be.
|
||||
|
||||
We already have this information, and consequently, using start and end is
|
||||
*unnecessary*, so long as the context variables are set appropriately. We don't
|
||||
care if an error was thrown by an attribute transform or an attribute definition;
|
||||
to the end user these are the same (for a developer, they are different, but
|
||||
they're better off with a stack trace (which we should add support for) in such
|
||||
cases).
|
||||
|
||||
3. Remove start()/end() code. Don't get rid of recursion, though [DONE]
|
||||
4. Setup ErrorCollector to use context information to setup hierarchies.
|
||||
This may require a different internal format. Use objects if it gets
|
||||
complex. [DONE]
|
||||
|
||||
ASIDE
|
||||
More on this topic: since we are now binding errors to lines
|
||||
and columns, a particular error can have three relationships to that
|
||||
specific location:
|
||||
|
||||
1. The token at that location directly
|
||||
RemoveForeignElements
|
||||
AttrValidator (transforms)
|
||||
MakeWellFormed
|
||||
2. A "component" of that token (i.e. attribute)
|
||||
AttrValidator (removals)
|
||||
3. A modification to that node (i.e. contents from start to end
|
||||
token) as a whole
|
||||
FixNesting
|
||||
|
||||
This needs to be marked accordingly. In the presentation, it might
|
||||
make sense keep (3) separate, have (2) a sublist of (1). (1) can
|
||||
be a closing tag, in which case (3) makes no sense at all, OR it
|
||||
should be related with its opening tag (this may not necessarily
|
||||
be possible before MakeWellFormed is run).
|
||||
|
||||
So, the line and column counts as our identifier, so:
|
||||
|
||||
$errors[$line][$col] = ...
|
||||
|
||||
Then, we need to identify case 1, 2 or 3. They are identified as
|
||||
such:
|
||||
|
||||
1. Need some sort of semaphore in RemoveForeignElements, etc.
|
||||
2. If CurrentAttr/CurrentCssProperty is non-null
|
||||
3. Default (FixNesting, MakeWellFormed)
|
||||
|
||||
One consideration about (1) is that it usually is actually a
|
||||
(3) modification, but we have no way of knowing about that because
|
||||
of various optimizations. However, they can probably be treated
|
||||
the same. The other difficulty is that (3) is never a line and
|
||||
column; rather, it is a range (i.e. a duple) and telling the user
|
||||
the very start of the range may confuse them. For example,
|
||||
|
||||
<b>Foo<div>bar</div></b>
|
||||
^ ^
|
||||
|
||||
The node being operated on is <b>, so the error would be assigned
|
||||
to the first caret, with a "node reorganized" error. Then, the
|
||||
ChildDef would have submitted its own suggestions and errors with
|
||||
regard to what's going in the internals. So I suppose this is
|
||||
ok. :-)
|
||||
|
||||
Now, the structure of the earlier mentioned ... would be something
|
||||
like this:
|
||||
|
||||
object {
|
||||
type = (token|attr|property),
|
||||
value, // appropriate for type
|
||||
errors => array(),
|
||||
sub-errors = [recursive],
|
||||
}
|
||||
|
||||
This helps us keep things agnostic. It is also sufficiently complex
|
||||
enough to warrant an object.
|
||||
|
||||
So, more wanking about the object format is in order. The way HTML Purifier is
|
||||
currently setup, the only possible hierarchy is:
|
||||
|
||||
token -> attr -> css property
|
||||
|
||||
These relations do not exist all of the time; a comment or end token would not
|
||||
ever have any attributes, and non-style attributes would never have CSS properties
|
||||
associated with them.
|
||||
|
||||
I believe that it is worth supporting multiple paths. At some point, we might
|
||||
have a hierarchy like:
|
||||
|
||||
* -> syntax
|
||||
-> token -> attr -> css property
|
||||
-> url
|
||||
-> css stylesheet <style>
|
||||
|
||||
et cetera. Now, one of the practical implications of this is that every "node"
|
||||
on our tree is well-defined, so in theory it should be possible to either 1.
|
||||
create a separate class for each error struct, or 2. embed this information
|
||||
directly into HTML Purifier's token stream. Embedding the information in the
|
||||
token stream is not a terribly good idea, since tokens can be removed, etc.
|
||||
So that leaves us with 1... and if we use a generic interface we can cut down
|
||||
on a lot of code we might need. So let's leave it like this.
|
||||
|
||||
~~~~
|
||||
|
||||
Then we setup suggestions.
|
||||
|
||||
5. Setup a separate error class which tells the user any modifications
|
||||
HTML Purifier made.
|
||||
|
||||
Some information about this:
|
||||
|
||||
Our current paradigm is to tell the user what HTML Purifier did to the HTML.
|
||||
This is the most natural mode of operation, since that's what HTML Purifier
|
||||
is all about; it was not meant to be a validator.
|
||||
|
||||
However, most other people have experience dealing with a validator. In cases
|
||||
where HTML Purifier unambiguously does the right thing, simply giving the user
|
||||
the correct version isn't a bad idea, but problems arise when:
|
||||
|
||||
- The user has such bad HTML we do something odd, when we should have just
|
||||
flagged the HTML as an error. Such examples are when we do things like
|
||||
remove text from directly inside a <table> tag. It was probably meant to
|
||||
be in a <td> tag or be outside the table, but we're not smart enough to
|
||||
realize this so we just remove it. In such a case, we should tell the user
|
||||
that there was foreign data in the table, but then we shouldn't "demand"
|
||||
the user remove the data; it's more of a "here's a possible way of
|
||||
rectifying the problem"
|
||||
|
||||
- Giving line context for input is hard enough, but feasible; giving output
|
||||
line context will be extremely difficult due to shifting lines; we'd probably
|
||||
have to track what the tokens are and then find the appropriate out context
|
||||
and it's not guaranteed to work etc etc etc.
|
||||
|
||||
````````````
|
||||
|
||||
Don't forget to spruce up output.
|
||||
|
||||
6. Output needs to automatically give line and column numbers, basically
|
||||
"at line" on steroids. Look at W3C's output; it's ok. [PARTIALLY DONE]
|
||||
|
||||
- We need a standard CSS to apply (check demo.css for some starting
|
||||
styling; some buttons would also be hip)
|
||||
|
||||
vim: et sw=4 sts=4
|
@@ -133,3 +133,5 @@ Dramatic - border, list-style-position (list-style), margin, padding,
|
||||
|
||||
Dramatic elements substantially change the look of text in ways that should
|
||||
probably have been reserved to other areas.
|
||||
|
||||
vim: et sw=4 sts=4
|
||||
|
@@ -60,3 +60,5 @@ Neat functionality:
|
||||
- Roman numeral formatting
|
||||
|
||||
Items marked with a + likely need to be addressed by HTML Purifier
|
||||
|
||||
vim: et sw=4 sts=4
|
||||
|
@@ -16,7 +16,7 @@ implemented, give us a ring, and we'll move it up the priority chain.
|
||||
%Attr.ClassBlacklist. When it's Whitelist, only allow those in
|
||||
%Attr.ClassWhitelist.
|
||||
|
||||
%Attr.MaxWidth,
|
||||
%Attr.MaxWidth,
|
||||
%Attr.MaxHeight - caps for width and height related checks.
|
||||
(the hack in Pixels for an image crashing attack could be replaced by this)
|
||||
|
||||
@@ -41,3 +41,4 @@ implemented, give us a ring, and we'll move it up the priority chain.
|
||||
absolute DNS. While this is actually the preferred method according to
|
||||
the RFC, most people opt to use a relative domain name relative to . (root).
|
||||
|
||||
vim: et sw=4 sts=4
|
||||
|
218
docs/proposal-plists.txt
Normal file
218
docs/proposal-plists.txt
Normal file
@@ -0,0 +1,218 @@
|
||||
THE UNIVERSAL DESIGN PATTERN: PROPERTIES
|
||||
Steve Yegge
|
||||
|
||||
Implementation:
|
||||
get(name)
|
||||
put(name, value)
|
||||
has(name)
|
||||
remove(name)
|
||||
iteration, with filtering [this will be our namespaces]
|
||||
parent
|
||||
|
||||
Representations:
|
||||
- Keys are strings
|
||||
- It's nice to not need to quote keys (if we formulate our own language,
|
||||
consider this)
|
||||
- Property not present representation (key missing)
|
||||
- Frequent removal/re-add may have null help. If null is valid, use
|
||||
another value. (PHP semantics are weird here)
|
||||
|
||||
Data structures:
|
||||
- LinkedHashMap is wonderful (O(1) access and maintains order)
|
||||
- Using a special property that points to the parent is usual
|
||||
- Multiple inheritance possible, need rules for which to lookup first
|
||||
- Iterative inheritance is best
|
||||
- Consider performance!
|
||||
|
||||
Deletion
|
||||
- Tricky problem with inheritance
|
||||
- Distinguish between "not found" and "look in my parent for the property"
|
||||
[Maybe HTML Purifier won't allow deletion]
|
||||
|
||||
Read/write asymmetry (it's correct!)
|
||||
|
||||
Read-only plists
|
||||
- Allow ability to freeze [this is what we have already]
|
||||
- Don't overuse it
|
||||
|
||||
Performance:
|
||||
- Intern strings (PHP does this already)
|
||||
- Don't be case-insensitive
|
||||
- If all properties in a plist are known a-priori, you can use a "perfect"
|
||||
hash function. Often overkill.
|
||||
- Copy-on-read caching "plundering" reduces lookup, but uses memory and can
|
||||
grow stale. Use as last resort.
|
||||
- Refactoring to fields. Watch for API compatibility, system complexity,
|
||||
and lack of flexibility.
|
||||
- Refrigerator: external data-structure to hold plists
|
||||
|
||||
Transient properties:
|
||||
[Don't need to worry about this]
|
||||
- Use a separate plist for transient properties
|
||||
- Non-numeric override; numeric should ADD
|
||||
- Deletion: removeTransientProperty() and transientlyRemoveProperty()
|
||||
|
||||
Persistence:
|
||||
- XML/JSON are good
|
||||
- Text-based is good for readability, maintainability and bootstrapping
|
||||
- Compressed binary format for network transport [not necessary]
|
||||
- RDBMS or XML database
|
||||
|
||||
Querying: [not relevant]
|
||||
- XML database is nice for XPath/XQuery
|
||||
- jQuery for JSON
|
||||
- Just load it all into a program
|
||||
|
||||
Backfills/Data integrity:
|
||||
- Use usual methods
|
||||
- Lazy backfill is a nice hack
|
||||
|
||||
Type systems:
|
||||
- Flags: ReadOnly, Permanent, DontEnum
|
||||
- Typed properties isn't that useful [It's also Not-PHP]
|
||||
- Seperate meta-list of directive properties IS useful
|
||||
- Duck typing is useful for systems designed fully around properties pattern
|
||||
|
||||
Trade-off:
|
||||
+ Flexibility
|
||||
+ Extensibility
|
||||
+ Unit-testing/prototype-speed
|
||||
- Performance
|
||||
- Data integrity
|
||||
- Navagability/Query-ability
|
||||
- Reversability (hard to go back)
|
||||
|
||||
HTML Purifier
|
||||
|
||||
We are not happy with our current system of defining configuration directives,
|
||||
because it has become clear that things will get a lot nicer if we allow
|
||||
multiple namespaces, and there are some features that naturally lend themselves
|
||||
to inheritance, which we do not really support well.
|
||||
|
||||
One of the considered implementation changes would be to go from a structure
|
||||
like:
|
||||
|
||||
array(
|
||||
'Namespace' => array(
|
||||
'Directive' => 'val1',
|
||||
'Directive2' => 'val2',
|
||||
)
|
||||
)
|
||||
|
||||
to:
|
||||
|
||||
array(
|
||||
'Namespace.Directive' => 'val1',
|
||||
'Namespace.Directive2' => 'val2',
|
||||
)
|
||||
|
||||
The below implementation takes more memory, however, and it makes it a bit
|
||||
complicated to grab all values from a namespace.
|
||||
|
||||
The alternate implementation choice is to allow nested plists. This keeps
|
||||
iteration easy, but is problematic for inheritance (it would be difficult
|
||||
to distinguish a plist from an array) and retrieval (when specifying multiple
|
||||
namespaces we would need some multiple de-referencing).
|
||||
|
||||
----
|
||||
|
||||
We can bite the performance hit, and just do iteration with filter
|
||||
(the strncmp call should be relatively cheap). Then, users should be able
|
||||
to optimize doing something like:
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
if (!file_exists('config.php')) {
|
||||
// set up $config
|
||||
$config->save('config.php');
|
||||
} else {
|
||||
$config->load('config.php');
|
||||
}
|
||||
|
||||
Or maybe memcache, or something. This means that "// set up $config" must
|
||||
not have any dynamic parts, or the user has to invalidate the cache when
|
||||
they do update it. We have to think about this a little more carefully; the
|
||||
file call might be more expensive.
|
||||
|
||||
----
|
||||
|
||||
This might get expensive, however, when we actually care about iterating
|
||||
over the configuration and want the actual values. So what about nesting the
|
||||
lists?
|
||||
|
||||
"ns.sub.directive" => values['ns']['sub']['directive']
|
||||
|
||||
We can distinguish between plists and arrays by using ArrayObjects for the
|
||||
plists, and regular arrays for the arrays? Alternatively, use ArrayObjects
|
||||
for the arrays, and regular arrays for the plists.
|
||||
|
||||
----
|
||||
|
||||
Implementation demands, and what has caused them:
|
||||
|
||||
1. DefinitionCache, the HTML, CSS and URI namespaces have caches attached to them
|
||||
Results:
|
||||
- getBatchSerial()
|
||||
- getBatch() : in general, the ability to traverse just a namespace
|
||||
|
||||
2. AutoFormat/Filter, this is a plugin architecture, directives not hard-coded
|
||||
- getBatch()
|
||||
|
||||
3. Configuration form
|
||||
- Namespaces used to organize directives
|
||||
|
||||
Other than that, we have a pure plist. PERHAPS we should maintain separate things
|
||||
for these different demands.
|
||||
|
||||
Issue 2: Directives for configuring the plugins are regular plists, but
|
||||
when enabling them, while it's "plist-ish", what you're really doing is adding
|
||||
them to an array of "autoformatters"/"filters" to enable. We can setup
|
||||
magic BC as well as in the new interface, but there should also be an
|
||||
add('AutoFormat', 'AutoParagraph'); which does the right thing.
|
||||
|
||||
One thing to consider is whether or not inheritance rules will apply to these.
|
||||
I'd say yes. That means that they're still plisty, in fact, the underlying
|
||||
implementation will probably be a plist. However, they will get their OWN
|
||||
plists, and will NOT support nesting.
|
||||
|
||||
Issue 1: Our current implementation is generally not efficient; md5(serialize($foo))
|
||||
is pretty expensive. So, I don't think there will be any problems if it
|
||||
gets "less" efficient, as long as we give users a properly fast alternative;
|
||||
DefinitionRev gives us a way to do this, by simply telling the user they must
|
||||
update it whenever they update Configuration directives as well. (There are
|
||||
obvious BC concerns here).
|
||||
|
||||
In such a case, we simply iterate over our plist (performing full retrievals
|
||||
for each value), grab the entries we care about, and then serialize and hash.
|
||||
It's going to be slow either way, due to the ability of plists to inherit.
|
||||
If we ksort(), we don't have to traverse the entire array, however, the
|
||||
cost of a ksort() call may not be worth it.
|
||||
|
||||
At this point, last time, I started worrying about the performance implications
|
||||
of allowing inheritance, and wondering whether or not I wanted to squash
|
||||
the plist. At first blush, our code might be under the assumption that
|
||||
accessing properties is cheap; but actually we prefer to copy out the value
|
||||
into a member variable if it's going to be used many times. With this is mind
|
||||
I don't think CPU consumption from a few nested function calls is going to
|
||||
be a problem. We *are* going to enforce a function only interface.
|
||||
|
||||
The next issue at hand is how we're going to manage the "special" plists,
|
||||
which should still be able to be inherited. Basically, it means that multiple
|
||||
plists would be attached to the configuration object, which is not the
|
||||
best for memory performance. The alternative is to keep them all in one
|
||||
big plist, and then eat the one-time cost of traversing the entire plist
|
||||
to grab the appropriate values.
|
||||
|
||||
I think at this point we can write the generic interface, and then set up separate
|
||||
plists if that ends up being necessary for performance (it probably won't.) Now
|
||||
lets code our generic plist implementation.
|
||||
|
||||
----
|
||||
|
||||
Iterating over the plist presents some problems. The way we've chosen to solve
|
||||
this is to squash all of the parents.
|
||||
|
||||
----
|
||||
|
||||
But I don't need iteration.
|
||||
|
||||
vim: et sw=4 sts=4
|
@@ -46,3 +46,5 @@ is eliminated completely, in the latter case, the text of the node
|
||||
would is preserved (as the parent node does allow PCDATA). Custom
|
||||
content model implementations probably are not the best way of handling
|
||||
these cases, instead, node bubbling should be implemented instead.
|
||||
|
||||
vim: et sw=4 sts=4
|
||||
|
@@ -22,7 +22,9 @@ Relative:
|
||||
1 ex ~= 0.5 em, though Mozilla Firefox says 1 ex = 6px
|
||||
1 px ~= 1 pt
|
||||
|
||||
Watch out: font-sizes can also be nested to get successively larger
|
||||
Watch out: font-sizes can also be nested to get successively larger
|
||||
(although I do not relish having to keep track of context font-sizes,
|
||||
this may be necessary, especially for some of the more advanced features
|
||||
for preventing things like white on white).
|
||||
|
||||
vim: et sw=4 sts=4
|
||||
|
@@ -40,6 +40,8 @@ the development of this library in these forum threads:</p>
|
||||
|
||||
<p>...as well as any I may have forgotten.</p>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
<!-- vim: et sw=4 sts=4
|
||||
-->
|
||||
|
@@ -1,8 +1,8 @@
|
||||
|
||||
The Modularization of HTMLDefinition in HTML Purifier
|
||||
|
||||
Todo for XHTML 1.1 support <http://www.w3.org/TR/xhtml11/changes.html>
|
||||
1. Support Ruby <http://www.w3.org/TR/2001/REC-ruby-20010531/>
|
||||
WARNING: This document was drafted before the implementation of this
|
||||
system, and some implementation details may have evolved over time.
|
||||
|
||||
HTML Purifier uses the modularization of XHTML
|
||||
<http://www.w3.org/TR/xhtml-modularization/> to organize the internals
|
||||
@@ -121,7 +121,7 @@ a proprietary system called ChildDef for performance and flexibility
|
||||
reasons, but this does not line up very well with W3C's notion of
|
||||
regexps for defining the allowed children of an element.
|
||||
|
||||
HTMLPurifier->elements[$element]->content_model and
|
||||
HTMLPurifier->elements[$element]->content_model and
|
||||
HTMLPurifier->elements[$element]->content_model_type store information
|
||||
about the final ChildDef that will be stored in
|
||||
HTMLPurifier->elements[$element]->child (we use a different variable
|
||||
@@ -162,3 +162,5 @@ array of content set names to content set contents. If the content set
|
||||
already exists, your values are appended on to it (great for, say,
|
||||
registering the font tag as an inline element), otherwise it is
|
||||
created. They are substituted into content_model.
|
||||
|
||||
vim: et sw=4 sts=4
|
||||
|
@@ -22,3 +22,5 @@ HTML Purifier context.
|
||||
|
||||
These should be put into their own Tidy module, not loaded by default(?). These
|
||||
all qualify as "lenient" transforms.
|
||||
|
||||
vim: et sw=4 sts=4
|
||||
|
@@ -18,7 +18,9 @@ committing ourselves till the spec stabilizes, though.
|
||||
|
||||
More immediately speaking though, however, is the well-defined parsing
|
||||
behavior that HTML 5 adds. While I have little interest in writing
|
||||
another DirectLex parser, other parsers like ph5p
|
||||
another DirectLex parser, other parsers like ph5p
|
||||
<http://jero.net/lab/ph5p/> can be adapted to DOMLex to support much more
|
||||
flexible HTML parsing (a cool feature I've seen is how they resolve
|
||||
<b>bold<i>both</b>italic</i>).
|
||||
|
||||
vim: et sw=4 sts=4
|
||||
|
@@ -1,8 +1,10 @@
|
||||
Licensing of Specimens
|
||||
|
||||
Some files in this directory have different licenses:
|
||||
|
||||
windows-live-mail-desktop-beta.html - donated by laacz, public domain
|
||||
img.png - LGPL, from <http://commons.wikimedia.org/wiki/Image:Pastille_chrome.png>
|
||||
|
||||
All other files are by me, and are licensed under LGPL.
|
||||
Licensing of Specimens
|
||||
|
||||
Some files in this directory have different licenses:
|
||||
|
||||
windows-live-mail-desktop-beta.html - donated by laacz, public domain
|
||||
img.png - LGPL, from <http://commons.wikimedia.org/wiki/Image:Pastille_chrome.png>
|
||||
|
||||
All other files are by me, and are licensed under LGPL.
|
||||
|
||||
vim: et sw=4 sts=4
|
||||
|
@@ -162,4 +162,4 @@ div.segment {width:250px; float:left; margin-top:1em;}
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
</html>
|
||||
|
129
docs/specimens/jochem-blok-word.html
Normal file
129
docs/specimens/jochem-blok-word.html
Normal file
@@ -0,0 +1,129 @@
|
||||
<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40">
|
||||
|
||||
<head>
|
||||
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=us-ascii">
|
||||
<meta name=Generator content="Microsoft Word 12 (filtered medium)">
|
||||
<!--[if !mso]>
|
||||
<style>
|
||||
v\:* {behavior:url(#default#VML);}
|
||||
o\:* {behavior:url(#default#VML);}
|
||||
w\:* {behavior:url(#default#VML);}
|
||||
..shape {behavior:url(#default#VML);}
|
||||
</style>
|
||||
<![endif]-->
|
||||
<style>
|
||||
<!--
|
||||
/* Font Definitions */
|
||||
@font-face
|
||||
{font-family:"Cambria Math";
|
||||
panose-1:2 4 5 3 5 4 6 3 2 4;}
|
||||
@font-face
|
||||
{font-family:Calibri;
|
||||
panose-1:2 15 5 2 2 2 4 3 2 4;}
|
||||
@font-face
|
||||
{font-family:Tahoma;
|
||||
panose-1:2 11 6 4 3 5 4 4 2 4;}
|
||||
@font-face
|
||||
{font-family:Verdana;
|
||||
panose-1:2 11 6 4 3 5 4 4 2 4;}
|
||||
/* Style Definitions */
|
||||
p.MsoNormal, li.MsoNormal, div.MsoNormal
|
||||
{margin:0cm;
|
||||
margin-bottom:.0001pt;
|
||||
font-size:10.0pt;
|
||||
font-family:"Verdana","sans-serif";}
|
||||
a:link, span.MsoHyperlink
|
||||
{mso-style-priority:99;
|
||||
color:blue;
|
||||
text-decoration:underline;}
|
||||
a:visited, span.MsoHyperlinkFollowed
|
||||
{mso-style-priority:99;
|
||||
color:purple;
|
||||
text-decoration:underline;}
|
||||
p.MsoAcetate, li.MsoAcetate, div.MsoAcetate
|
||||
{mso-style-priority:99;
|
||||
mso-style-link:"Balloon Text Char";
|
||||
margin:0cm;
|
||||
margin-bottom:.0001pt;
|
||||
font-size:8.0pt;
|
||||
font-family:"Tahoma","sans-serif";}
|
||||
span.EmailStyle17
|
||||
{mso-style-type:personal-compose;
|
||||
font-family:"Verdana","sans-serif";
|
||||
color:windowtext;}
|
||||
span.BalloonTextChar
|
||||
{mso-style-name:"Balloon Text Char";
|
||||
mso-style-priority:99;
|
||||
mso-style-link:"Balloon Text";
|
||||
font-family:"Tahoma","sans-serif";}
|
||||
..MsoChpDefault
|
||||
{mso-style-type:export-only;}
|
||||
@page Section1
|
||||
{size:612.0pt 792.0pt;
|
||||
margin:70.85pt 70.85pt 70.85pt 70.85pt;}
|
||||
div.Section1
|
||||
{page:Section1;}
|
||||
-->
|
||||
</style>
|
||||
<!--[if gte mso 9]><xml>
|
||||
<o:shapedefaults v:ext="edit" spidmax="2050" />
|
||||
</xml><![endif]--><!--[if gte mso 9]><xml>
|
||||
<o:shapelayout v:ext="edit">
|
||||
<o:idmap v:ext="edit" data="1" />
|
||||
</o:shapelayout></xml><![endif]-->
|
||||
</head>
|
||||
|
||||
<body lang=NL link=blue vlink=purple>
|
||||
|
||||
<div class=Section1>
|
||||
|
||||
<p class=MsoNormal><img width=1277 height=994 id="Picture_x0020_1"
|
||||
src="cid:image001.png@01C8CBDF.5D1BAEE0"><o:p></o:p></p>
|
||||
|
||||
<p class=MsoNormal><o:p> </o:p></p>
|
||||
|
||||
<p class=MsoNormal><b>Name<o:p></o:p></b></p>
|
||||
|
||||
<p class=MsoNormal>E-mail : <a href="mailto:mail@example.com"><span
|
||||
style='color:windowtext'>mail@example.com</span></a><o:p></o:p></p>
|
||||
|
||||
<p class=MsoNormal><o:p> </o:p></p>
|
||||
|
||||
<p class=MsoNormal><b>Company<o:p></o:p></b></p>
|
||||
|
||||
<p class=MsoNormal>Address 1<o:p></o:p></p>
|
||||
|
||||
<p class=MsoNormal>Address 2<o:p></o:p></p>
|
||||
|
||||
<p class=MsoNormal><o:p> </o:p></p>
|
||||
|
||||
<p class=MsoNormal>Telefoon : +xx xx xxx xxx xx <span style='color:black'><o:p></o:p></span></p>
|
||||
|
||||
<p class=MsoNormal><span lang=EN-US style='color:black'>Fax : +xx xx xxx xx xx<o:p></o:p></span></p>
|
||||
|
||||
<p class=MsoNormal><span lang=EN-US style='color:black'>Internet : </span><span
|
||||
style='color:black'><a href="http://www.example.com/"><span lang=EN-US
|
||||
style='color:black'>http://www.example.com</span></a></span><span
|
||||
lang=EN-US style='color:black'><o:p></o:p></span></p>
|
||||
|
||||
<p class=MsoNormal><span lang=EN-US style='color:black'>Kamer van koophandel
|
||||
xxxxxxxxx<o:p></o:p></span></p>
|
||||
|
||||
<p class=MsoNormal><span lang=EN-US style='color:black'><o:p> </o:p></span></p>
|
||||
|
||||
<p class=MsoNormal><span lang=EN-US style='font-size:7.5pt;color:black'>Op deze
|
||||
e-mail is een disclaimer van toepassing, ga naar </span><span lang=EN-US
|
||||
style='font-size:7.5pt'><a
|
||||
href="http://www.example.com/disclaimer"><span
|
||||
style='color:black'>www.example.com/disclaimer</span></a><br>
|
||||
<span style='color:black'>A disclaimer is applicable to this email, please
|
||||
refer to </span><a href="http://www.example.com/disclaimer"><span
|
||||
style='color:black'>www.example.com/disclaimer</span></a><o:p></o:p></span></p>
|
||||
|
||||
<p class=MsoNormal><span lang=EN-US><o:p> </o:p></span></p>
|
||||
|
||||
</div>
|
||||
|
||||
</body>
|
||||
|
||||
</html>
|
@@ -4,71 +4,71 @@
|
||||
<STYLE></STYLE>
|
||||
|
||||
<META content="MSHTML 6.00.6000.16414" name=GENERATOR></HEAD>
|
||||
<BODY id=MailContainerBody
|
||||
style="PADDING-RIGHT: 10px; PADDING-LEFT: 10px; FONT-SIZE: 10pt; COLOR: #000000; PADDING-TOP: 15px; FONT-FAMILY: Arial"
|
||||
bgColor=#ff6600 leftMargin=0 background="" topMargin=0
|
||||
<BODY id=MailContainerBody
|
||||
style="PADDING-RIGHT: 10px; PADDING-LEFT: 10px; FONT-SIZE: 10pt; COLOR: #000000; PADDING-TOP: 15px; FONT-FAMILY: Arial"
|
||||
bgColor=#ff6600 leftMargin=0 background="" topMargin=0
|
||||
name="Compose message area" acc_role="text" CanvasTabStop="false">
|
||||
<DIV
|
||||
style="BORDER-TOP: #dddddd 1px solid; FONT-SIZE: 10pt; WIDTH: 100%; MARGIN-RIGHT: 10px; PADDING-TOP: 5px; BORDER-BOTTOM: #dddddd 1px solid; FONT-FAMILY: Verdana; HEIGHT: 25px; BACKGROUND-COLOR: #ffffff"><NOBR><SPAN
|
||||
title="View a slideshow of the pictures in this e-mail message."
|
||||
style="PADDING-RIGHT: 20px"><A style="COLOR: #0088e4"
|
||||
href="http://g.msn.com/5meen_us/171?path=/photomail/{6fc0065f-ffdd-4ca6-9a4c-cc5a93dc122f}&image=47D7B182CFEFB10!127&imagehi=47D7B182CFEFB10!125&CID=323550092004883216">Play
|
||||
slideshow </A></SPAN><SPAN style="COLOR: #909090"><SPAN>|</SPAN><SPAN
|
||||
style="PADDING-LEFT: 20px"> Download the highest quality version of a picture by
|
||||
<DIV
|
||||
style="BORDER-TOP: #dddddd 1px solid; FONT-SIZE: 10pt; WIDTH: 100%; MARGIN-RIGHT: 10px; PADDING-TOP: 5px; BORDER-BOTTOM: #dddddd 1px solid; FONT-FAMILY: Verdana; HEIGHT: 25px; BACKGROUND-COLOR: #ffffff"><NOBR><SPAN
|
||||
title="View a slideshow of the pictures in this e-mail message."
|
||||
style="PADDING-RIGHT: 20px"><A style="COLOR: #0088e4"
|
||||
href="http://g.msn.com/5meen_us/171?path=/photomail/{6fc0065f-ffdd-4ca6-9a4c-cc5a93dc122f}&image=47D7B182CFEFB10!127&imagehi=47D7B182CFEFB10!125&CID=323550092004883216">Play
|
||||
slideshow </A></SPAN><SPAN style="COLOR: #909090"><SPAN>|</SPAN><SPAN
|
||||
style="PADDING-LEFT: 20px"> Download the highest quality version of a picture by
|
||||
clicking the + above it </SPAN></SPAN></NOBR></DIV>
|
||||
<DIV
|
||||
<DIV
|
||||
style="PADDING-RIGHT: 5px; PADDING-LEFT: 7px; PADDING-BOTTOM: 2px; WIDTH: 100%; PADDING-TOP: 2px">
|
||||
<OL>
|
||||
<LI><IMG title="Angry smile emoticon"
|
||||
style="FLOAT: none; MARGIN: 0px; POSITION: static" tabIndex=-1
|
||||
alt="Angry smile emoticon" src="cid:49F0C856199E4D688D2D740680733D74@wc"
|
||||
MSNNonUserImageOrEmoticon="true">Un ka <FONT style="BACKGROUND-COLOR: #800000"
|
||||
color=#cc99ff><STRONG>Tev</STRONG></FONT> iet, un ko tu dari?
|
||||
<LI><IMG title="Angry smile emoticon"
|
||||
style="FLOAT: none; MARGIN: 0px; POSITION: static" tabIndex=-1
|
||||
alt="Angry smile emoticon" src="cid:49F0C856199E4D688D2D740680733D74@wc"
|
||||
MSNNonUserImageOrEmoticon="true">Un ka <FONT style="BACKGROUND-COLOR: #800000"
|
||||
color=#cc99ff><STRONG>Tev</STRONG></FONT> iet, un ko tu dari?
|
||||
<LI>Aha!</LI></OL>
|
||||
|
||||
<UL>
|
||||
<LI>Buletets
|
||||
<LI>Buletets
|
||||
<LI>
|
||||
<DIV align=justify><A title=http://laacz.lv/blog/
|
||||
href="http://laacz.lv/blog/">http://laacz.lv/blog/</A> un <A
|
||||
<DIV align=justify><A title=http://laacz.lv/blog/
|
||||
href="http://laacz.lv/blog/">http://laacz.lv/blog/</A> un <A
|
||||
title=http://google.com/ href="http://google.com/">gugle</A></DIV>
|
||||
<LI>Sarakstucitis</LI></UL></DIV><SPAN><SPAN xmlns:canvas="canvas-namespace-id"
|
||||
layoutEmptyTextWellFont="Tahoma"><SPAN
|
||||
style="MARGIN-BOTTOM: 15px; OVERFLOW: visible; HEIGHT: 16px"></SPAN><SPAN
|
||||
<LI>Sarakstucitis</LI></UL></DIV><SPAN><SPAN xmlns:canvas="canvas-namespace-id"
|
||||
layoutEmptyTextWellFont="Tahoma"><SPAN
|
||||
style="MARGIN-BOTTOM: 15px; OVERFLOW: visible; HEIGHT: 16px"></SPAN><SPAN
|
||||
style="MARGIN-BOTTOM: 25px; VERTICAL-ALIGN: top; OVERFLOW: visible; MARGIN-RIGHT: 25px; HEIGHT: 234px">
|
||||
<TABLE style="DISPLAY: inline">
|
||||
<TBODY>
|
||||
<TR>
|
||||
|
||||
<TD>
|
||||
<DIV
|
||||
style="FONT-WEIGHT: bold; FONT-SIZE: 12pt; FONT-FAMILY: arial; TEXT-ALIGN: center"><A
|
||||
id=HiresARef
|
||||
title="Click here to view or download a high resolution version of this picture"
|
||||
style="COLOR: #0088e4; TEXT-DECORATION: none"
|
||||
<DIV
|
||||
style="FONT-WEIGHT: bold; FONT-SIZE: 12pt; FONT-FAMILY: arial; TEXT-ALIGN: center"><A
|
||||
id=HiresARef
|
||||
title="Click here to view or download a high resolution version of this picture"
|
||||
style="COLOR: #0088e4; TEXT-DECORATION: none"
|
||||
href="http://byfiles.storage.msn.com/x1pMvt0I80jTgT6DuaCpEMbprX3nk3jNv_vjigxV_EYVSMyM_PKgEvDEUtuNhQC-F-23mTTcKyqx6eGaeK2e_wMJ0ikwpDdFntk4SY7pfJUv2g2Ck6R2S2vAA?download">+</A></DIV>
|
||||
<DIV
|
||||
title="Click here to view the full image using the online photo viewer."
|
||||
style="DISPLAY: inline; OVERFLOW: hidden; WIDTH: 140px; HEIGHT: 140px"><A
|
||||
href="http://g.msn.com/5meen_us/171?path=/photomail/{6fc0065f-ffdd-4ca6-9a4c-cc5a93dc122f}&image=47D7B182CFEFB10!127&imagehi=47D7B182CFEFB10!125&CID=323550092004883216"
|
||||
border="0"><IMG
|
||||
style="MARGIN-TOP: 15px; DISPLAY: inline-block; MARGIN-LEFT: 0px"
|
||||
height=109 src="cid:006A71303B80404E9FB6184E55D6A446@wc" width=140
|
||||
<DIV
|
||||
title="Click here to view the full image using the online photo viewer."
|
||||
style="DISPLAY: inline; OVERFLOW: hidden; WIDTH: 140px; HEIGHT: 140px"><A
|
||||
href="http://g.msn.com/5meen_us/171?path=/photomail/{6fc0065f-ffdd-4ca6-9a4c-cc5a93dc122f}&image=47D7B182CFEFB10!127&imagehi=47D7B182CFEFB10!125&CID=323550092004883216"
|
||||
border="0"><IMG
|
||||
style="MARGIN-TOP: 15px; DISPLAY: inline-block; MARGIN-LEFT: 0px"
|
||||
height=109 src="cid:006A71303B80404E9FB6184E55D6A446@wc" width=140
|
||||
border=0></A></DIV></TD></TR>
|
||||
<TR>
|
||||
<TD>
|
||||
<DIV
|
||||
style="FONT-SIZE: 10pt; WIDTH: 140px; FONT-FAMILY: verdana; TEXT-ALIGN: center"><EM><STRONG>This
|
||||
<U>is </U></STRONG><U>tit</U>le</EM> fo<STRONG>r <FONT
|
||||
face="Arial Black">t<FONT color=#800000 size=7>h<U>i</U></FONT>s
|
||||
<DIV
|
||||
style="FONT-SIZE: 10pt; WIDTH: 140px; FONT-FAMILY: verdana; TEXT-ALIGN: center"><EM><STRONG>This
|
||||
<U>is </U></STRONG><U>tit</U>le</EM> fo<STRONG>r <FONT
|
||||
face="Arial Black">t<FONT color=#800000 size=7>h<U>i</U></FONT>s
|
||||
</FONT>picture</STRONG></DIV></TD></TR></TBODY></TABLE></SPAN></SPAN></SPAN>
|
||||
|
||||
<DIV
|
||||
<DIV
|
||||
style="PADDING-RIGHT: 5px; PADDING-LEFT: 7px; PADDING-BOTTOM: 2px; WIDTH: 100%; PADDING-TOP: 2px; HEIGHT: 50px">
|
||||
<DIV> </DIV></DIV>
|
||||
<DIV
|
||||
style="BORDER-TOP: #dddddd 1px solid; FONT-SIZE: 10pt; MARGIN-BOTTOM: 10px; WIDTH: 100%; COLOR: #909090; MARGIN-RIGHT: 10px; PADDING-TOP: 9px; FONT-FAMILY: Verdana; HEIGHT: 42px; BACKGROUND-COLOR: #ffffff"><NOBR><SPAN
|
||||
title="Join Windows Live to share photos using Windows Live Photo E-mail.">Online
|
||||
pictures are available for 30 days. <A style="COLOR: #0088e4"
|
||||
href="http://g.msn.com/5meen_us/175">Get Windows Live Mail desktop to create
|
||||
your own photo e-mails. </A></SPAN></NOBR></DIV></BODY></HTML>
|
||||
<DIV
|
||||
style="BORDER-TOP: #dddddd 1px solid; FONT-SIZE: 10pt; MARGIN-BOTTOM: 10px; WIDTH: 100%; COLOR: #909090; MARGIN-RIGHT: 10px; PADDING-TOP: 9px; FONT-FAMILY: Verdana; HEIGHT: 42px; BACKGROUND-COLOR: #ffffff"><NOBR><SPAN
|
||||
title="Join Windows Live to share photos using Windows Live Photo E-mail.">Online
|
||||
pictures are available for 30 days. <A style="COLOR: #0088e4"
|
||||
href="http://g.msn.com/5meen_us/175">Get Windows Live Mail desktop to create
|
||||
your own photo e-mails. </A></SPAN></NOBR></DIV></BODY></HTML>
|
||||
|
@@ -72,3 +72,5 @@ q:after {
|
||||
.fixme:before {content:"Fix me: "; font-weight:bold; color:#C00; }
|
||||
|
||||
#applicability {margin: 1em 5%; font-style:italic;}
|
||||
|
||||
/* vim: et sw=4 sts=4 */
|
||||
|
@@ -1,21 +1,23 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Special XSLTProcessor specifically for HTML documents. Loosely
|
||||
* based off of XSLTProcessor, but not really
|
||||
* Decorator/extender XSLT processor specifically for HTML documents.
|
||||
*/
|
||||
class ConfigDoc_HTMLXSLTProcessor
|
||||
{
|
||||
|
||||
protected $xsltProcessor;
|
||||
|
||||
public function __construct() {
|
||||
$this->xsltProcessor = new XSLTProcessor();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Imports stylesheet for processor to use
|
||||
* @param $xsl XSLT DOM tree, or filename of the XSL transformation
|
||||
* Instance of XSLTProcessor
|
||||
*/
|
||||
protected $xsltProcessor;
|
||||
|
||||
public function __construct($proc = false) {
|
||||
if ($proc === false) $proc = new XSLTProcessor();
|
||||
$this->xsltProcessor = $proc;
|
||||
}
|
||||
|
||||
/**
|
||||
* @note Allows a string $xsl filename to be passed
|
||||
*/
|
||||
public function importStylesheet($xsl) {
|
||||
if (is_string($xsl)) {
|
||||
@@ -25,18 +27,27 @@ class ConfigDoc_HTMLXSLTProcessor
|
||||
}
|
||||
return $this->xsltProcessor->importStylesheet($xsl);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Transforms an XML file into HTML based on the stylesheet
|
||||
* @param $xml XML DOM tree
|
||||
* Transforms an XML file into compatible XHTML based on the stylesheet
|
||||
* @param $xml XML DOM tree, or string filename
|
||||
* @return string HTML output
|
||||
* @todo Rename to transformToXHTML, as transformToHTML is misleading
|
||||
*/
|
||||
public function transformToHTML($xml) {
|
||||
$out = $this->xsltProcessor->transformToXML($xml);
|
||||
|
||||
if (is_string($xml)) {
|
||||
$dom = new DOMDocument();
|
||||
$dom->load($xml);
|
||||
} else {
|
||||
$dom = $xml;
|
||||
}
|
||||
$out = $this->xsltProcessor->transformToXML($dom);
|
||||
|
||||
// fudges for HTML backwards compatibility
|
||||
// assumes that document is XHTML
|
||||
$out = str_replace('/>', ' />', $out); // <br /> not <br/>
|
||||
$out = str_replace(' xmlns=""', '', $out); // rm unnecessary xmlns
|
||||
$out = str_replace(' xmlns="http://www.w3.org/1999/xhtml"', '', $out); // rm unnecessary xmlns
|
||||
|
||||
if (class_exists('Tidy')) {
|
||||
// cleanup output
|
||||
$config = array(
|
||||
@@ -49,14 +60,27 @@ class ConfigDoc_HTMLXSLTProcessor
|
||||
$tidy->cleanRepair();
|
||||
$out = (string) $tidy;
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Bulk sets parameters for the XSL stylesheet
|
||||
* @param array $options Associative array of options to set
|
||||
*/
|
||||
public function setParameters($options) {
|
||||
foreach ($options as $name => $value) {
|
||||
$this->xsltProcessor->setParameter('', $name, $value);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Forward any other calls to the XSLT processor
|
||||
*/
|
||||
public function __call($name, $arguments) {
|
||||
call_user_func_array(array($this->xsltProcessor, $name), $arguments);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
157
extras/FSTools.php
Normal file
157
extras/FSTools.php
Normal file
@@ -0,0 +1,157 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Filesystem tools not provided by default; can recursively create, copy
|
||||
* and delete folders. Some template methods are provided for extensibility.
|
||||
*
|
||||
* @note This class must be instantiated to be used, although it does
|
||||
* not maintain state.
|
||||
*/
|
||||
class FSTools
|
||||
{
|
||||
|
||||
private static $singleton;
|
||||
|
||||
/**
|
||||
* Returns a global instance of FSTools
|
||||
*/
|
||||
static public function singleton() {
|
||||
if (empty(FSTools::$singleton)) FSTools::$singleton = new FSTools();
|
||||
return FSTools::$singleton;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets our global singleton to something else; useful for overloading
|
||||
* functions.
|
||||
*/
|
||||
static public function setSingleton($singleton) {
|
||||
FSTools::$singleton = $singleton;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively creates a directory
|
||||
* @param string $folder Name of folder to create
|
||||
* @note Adapted from the PHP manual comment 76612
|
||||
*/
|
||||
public function mkdirr($folder) {
|
||||
$folders = preg_split("#[\\\\/]#", $folder);
|
||||
$base = '';
|
||||
for($i = 0, $c = count($folders); $i < $c; $i++) {
|
||||
if(empty($folders[$i])) {
|
||||
if (!$i) {
|
||||
// special case for root level
|
||||
$base .= DIRECTORY_SEPARATOR;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
$base .= $folders[$i];
|
||||
if(!is_dir($base)){
|
||||
$this->mkdir($base);
|
||||
}
|
||||
$base .= DIRECTORY_SEPARATOR;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy a file, or recursively copy a folder and its contents; modified
|
||||
* so that copied files, if PHP, have includes removed
|
||||
* @note Adapted from http://aidanlister.com/repos/v/function.copyr.php
|
||||
*/
|
||||
public function copyr($source, $dest) {
|
||||
// Simple copy for a file
|
||||
if (is_file($source)) {
|
||||
return $this->copy($source, $dest);
|
||||
}
|
||||
// Make destination directory
|
||||
if (!is_dir($dest)) {
|
||||
$this->mkdir($dest);
|
||||
}
|
||||
// Loop through the folder
|
||||
$dir = $this->dir($source);
|
||||
while ( false !== ($entry = $dir->read()) ) {
|
||||
// Skip pointers
|
||||
if ($entry == '.' || $entry == '..') {
|
||||
continue;
|
||||
}
|
||||
if (!$this->copyable($entry)) {
|
||||
continue;
|
||||
}
|
||||
// Deep copy directories
|
||||
if ($dest !== "$source/$entry") {
|
||||
$this->copyr("$source/$entry", "$dest/$entry");
|
||||
}
|
||||
}
|
||||
// Clean up
|
||||
$dir->close();
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Overloadable function that tests a filename for copyability. By
|
||||
* default, everything should be copied; you can restrict things to
|
||||
* ignore hidden files, unreadable files, etc. This function
|
||||
* applies to copyr().
|
||||
*/
|
||||
public function copyable($file) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a file, or a folder and its contents
|
||||
* @note Adapted from http://aidanlister.com/repos/v/function.rmdirr.php
|
||||
*/
|
||||
public function rmdirr($dirname)
|
||||
{
|
||||
// Sanity check
|
||||
if (!$this->file_exists($dirname)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Simple delete for a file
|
||||
if ($this->is_file($dirname) || $this->is_link($dirname)) {
|
||||
return $this->unlink($dirname);
|
||||
}
|
||||
|
||||
// Loop through the folder
|
||||
$dir = $this->dir($dirname);
|
||||
while (false !== $entry = $dir->read()) {
|
||||
// Skip pointers
|
||||
if ($entry == '.' || $entry == '..') {
|
||||
continue;
|
||||
}
|
||||
// Recurse
|
||||
$this->rmdirr($dirname . DIRECTORY_SEPARATOR . $entry);
|
||||
}
|
||||
|
||||
// Clean up
|
||||
$dir->close();
|
||||
return $this->rmdir($dirname);
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively globs a directory.
|
||||
*/
|
||||
public function globr($dir, $pattern, $flags = NULL) {
|
||||
$files = $this->glob("$dir/$pattern", $flags);
|
||||
if ($files === false) $files = array();
|
||||
$sub_dirs = $this->glob("$dir/*", GLOB_ONLYDIR);
|
||||
if ($sub_dirs === false) $sub_dirs = array();
|
||||
foreach ($sub_dirs as $sub_dir) {
|
||||
$sub_files = $this->globr($sub_dir, $pattern, $flags);
|
||||
$files = array_merge($files, $sub_files);
|
||||
}
|
||||
return $files;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allows for PHP functions to be called and be stubbed.
|
||||
* @warning This function will not work for functions that need
|
||||
* to pass references; manually define a stub function for those.
|
||||
*/
|
||||
public function __call($name, $args) {
|
||||
return call_user_func_array($name, $args);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
126
extras/FSTools/File.php
Normal file
126
extras/FSTools/File.php
Normal file
@@ -0,0 +1,126 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Represents a file in the filesystem
|
||||
*
|
||||
* @warning Be sure to distinguish between get() and write() versus
|
||||
* read() and put(), the former operates on the entire file, while
|
||||
* the latter operates on a handle.
|
||||
*/
|
||||
class FSTools_File
|
||||
{
|
||||
|
||||
/** Filename of file this object represents */
|
||||
protected $name;
|
||||
|
||||
/** Handle for the file */
|
||||
protected $handle = false;
|
||||
|
||||
/** Instance of FSTools for interfacing with filesystem */
|
||||
protected $fs;
|
||||
|
||||
/**
|
||||
* Filename of file you wish to instantiate.
|
||||
* @note This file need not exist
|
||||
*/
|
||||
public function __construct($name, $fs = false) {
|
||||
$this->name = $name;
|
||||
$this->fs = $fs ? $fs : FSTools::singleton();
|
||||
}
|
||||
|
||||
/** Returns the filename of the file. */
|
||||
public function getName() {return $this->name;}
|
||||
|
||||
/** Returns directory of the file without trailing slash */
|
||||
public function getDirectory() {return $this->fs->dirname($this->name);}
|
||||
|
||||
/**
|
||||
* Retrieves the contents of a file
|
||||
* @todo Throw an exception if file doesn't exist
|
||||
*/
|
||||
public function get() {
|
||||
return $this->fs->file_get_contents($this->name);
|
||||
}
|
||||
|
||||
/** Writes contents to a file, creates new file if necessary */
|
||||
public function write($contents) {
|
||||
return $this->fs->file_put_contents($this->name, $contents);
|
||||
}
|
||||
|
||||
/** Deletes the file */
|
||||
public function delete() {
|
||||
return $this->fs->unlink($this->name);
|
||||
}
|
||||
|
||||
/** Returns true if file exists and is a file. */
|
||||
public function exists() {
|
||||
return $this->fs->is_file($this->name);
|
||||
}
|
||||
|
||||
/** Returns last file modification time */
|
||||
public function getMTime() {
|
||||
return $this->fs->filemtime($this->name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Chmod a file
|
||||
* @note We ignore errors because of some weird owner trickery due
|
||||
* to SVN duality
|
||||
*/
|
||||
public function chmod($octal_code) {
|
||||
return @$this->fs->chmod($this->name, $octal_code);
|
||||
}
|
||||
|
||||
/** Opens file's handle */
|
||||
public function open($mode) {
|
||||
if ($this->handle) $this->close();
|
||||
$this->handle = $this->fs->fopen($this->name, $mode);
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Closes file's handle */
|
||||
public function close() {
|
||||
if (!$this->handle) return false;
|
||||
$status = $this->fs->fclose($this->handle);
|
||||
$this->handle = false;
|
||||
return $status;
|
||||
}
|
||||
|
||||
/** Retrieves a line from an open file, with optional max length $length */
|
||||
public function getLine($length = null) {
|
||||
if (!$this->handle) $this->open('r');
|
||||
if ($length === null) return $this->fs->fgets($this->handle);
|
||||
else return $this->fs->fgets($this->handle, $length);
|
||||
}
|
||||
|
||||
/** Retrieves a character from an open file */
|
||||
public function getChar() {
|
||||
if (!$this->handle) $this->open('r');
|
||||
return $this->fs->fgetc($this->handle);
|
||||
}
|
||||
|
||||
/** Retrieves an $length bytes of data from an open data */
|
||||
public function read($length) {
|
||||
if (!$this->handle) $this->open('r');
|
||||
return $this->fs->fread($this->handle, $length);
|
||||
}
|
||||
|
||||
/** Writes to an open file */
|
||||
public function put($string) {
|
||||
if (!$this->handle) $this->open('a');
|
||||
return $this->fs->fwrite($this->handle, $string);
|
||||
}
|
||||
|
||||
/** Returns TRUE if the end of the file has been reached */
|
||||
public function eof() {
|
||||
if (!$this->handle) return true;
|
||||
return $this->fs->feof($this->handle);
|
||||
}
|
||||
|
||||
public function __destruct() {
|
||||
if ($this->handle) $this->close();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
@@ -5,5 +5,7 @@
|
||||
*/
|
||||
|
||||
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
|
||||
require_once 'ConfigDoc.php';
|
||||
require_once 'HTMLPurifierExtras.php';
|
||||
require_once 'HTMLPurifierExtras.autoload.php';
|
||||
|
||||
// vim: et sw=4 sts=4
|
25
extras/HTMLPurifierExtras.autoload.php
Normal file
25
extras/HTMLPurifierExtras.autoload.php
Normal file
@@ -0,0 +1,25 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* Convenience file that registers autoload handler for HTML Purifier.
|
||||
*
|
||||
* @warning
|
||||
* This autoloader does not contain the compatibility code seen in
|
||||
* HTMLPurifier_Bootstrap; the user is expected to make any necessary
|
||||
* changes to use this library.
|
||||
*/
|
||||
|
||||
if (function_exists('spl_autoload_register')) {
|
||||
spl_autoload_register(array('HTMLPurifierExtras', 'autoload'));
|
||||
if (function_exists('__autoload')) {
|
||||
// Be polite and ensure that userland autoload gets retained
|
||||
spl_autoload_register('__autoload');
|
||||
}
|
||||
} elseif (!function_exists('__autoload')) {
|
||||
function __autoload($class) {
|
||||
return HTMLPurifierExtras::autoload($class);
|
||||
}
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
29
extras/HTMLPurifierExtras.php
Normal file
29
extras/HTMLPurifierExtras.php
Normal file
@@ -0,0 +1,29 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Meta-class for HTML Purifier's extra class hierarchies, similar to
|
||||
* HTMLPurifier_Bootstrap.
|
||||
*/
|
||||
class HTMLPurifierExtras
|
||||
{
|
||||
|
||||
public static function autoload($class) {
|
||||
$path = HTMLPurifierExtras::getPath($class);
|
||||
if (!$path) return false;
|
||||
require $path;
|
||||
return true;
|
||||
}
|
||||
|
||||
public static function getPath($class) {
|
||||
if (
|
||||
strncmp('FSTools', $class, 7) !== 0 &&
|
||||
strncmp('ConfigDoc', $class, 9) !== 0
|
||||
) return false;
|
||||
// Custom implementations can go here
|
||||
// Standard implementation:
|
||||
return str_replace('_', '/', $class) . '.php';
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
32
extras/README
Normal file
32
extras/README
Normal file
@@ -0,0 +1,32 @@
|
||||
|
||||
HTML Purifier Extras
|
||||
The Method Behind The Madness!
|
||||
|
||||
The extras/ folder in HTML Purifier contains--you guessed it--extra things
|
||||
for HTML Purifier. Specifically, these are two extra libraries called
|
||||
FSTools and ConfigSchema. They're extra for a reason: you don't need them
|
||||
if you're using HTML Purifier for normal usage: filtering HTML. However,
|
||||
if you're a developer, and would like to test HTML Purifier, or need to
|
||||
use one of HTML Purifier's maintenance scripts, chances are they'll need
|
||||
these libraries. Who knows: maybe you'll find them useful too!
|
||||
|
||||
Here are the libraries:
|
||||
|
||||
|
||||
FSTools
|
||||
-------
|
||||
|
||||
Short for File System Tools, this is a poor-man's object-oriented wrapper for
|
||||
the filesystem. It currently consists of two classes:
|
||||
|
||||
- FSTools: This is a singleton that contains a manner of useful functions
|
||||
such as recursive glob, directory removal, etc, as well as the ability
|
||||
to call arbitrary native PHP functions through it like $FS->fopen(...).
|
||||
This makes it a lot simpler to mock these filesystem calls for unit testing.
|
||||
|
||||
- FSTools_File: This object represents a single file, and has almost any
|
||||
method imaginable one would need.
|
||||
|
||||
Check the files themselves for more information.
|
||||
|
||||
vim: et sw=4 sts=4
|
@@ -5,5 +5,7 @@
|
||||
*/
|
||||
|
||||
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
|
||||
require_once 'HTMLPurifier.php';
|
||||
require_once 'HTMLPurifier/Bootstrap.php';
|
||||
require_once 'HTMLPurifier.autoload.php';
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
21
library/HTMLPurifier.autoload.php
Normal file
21
library/HTMLPurifier.autoload.php
Normal file
@@ -0,0 +1,21 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* Convenience file that registers autoload handler for HTML Purifier.
|
||||
*/
|
||||
|
||||
if (function_exists('spl_autoload_register') && function_exists('spl_autoload_unregister')) {
|
||||
// We need unregister for our pre-registering functionality
|
||||
HTMLPurifier_Bootstrap::registerAutoload();
|
||||
if (function_exists('__autoload')) {
|
||||
// Be polite and ensure that userland autoload gets retained
|
||||
spl_autoload_register('__autoload');
|
||||
}
|
||||
} elseif (!function_exists('__autoload')) {
|
||||
function __autoload($class) {
|
||||
return HTMLPurifier_Bootstrap::autoload($class);
|
||||
}
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
@@ -1,20 +1,23 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Function wrapper for HTML Purifier for quick use.
|
||||
* @note This function only includes the library when it is called. While
|
||||
* this is efficient for instances when you only use HTML Purifier
|
||||
* on a few of your pages, it murders bytecode caching. You still
|
||||
* need to add HTML Purifier to your path.
|
||||
* @file
|
||||
* Defines a function wrapper for HTML Purifier for quick use.
|
||||
* @note ''HTMLPurifier()'' is NOT the same as ''new HTMLPurifier()''
|
||||
*/
|
||||
|
||||
/**
|
||||
* Purify HTML.
|
||||
* @param $html String HTML to purify
|
||||
* @param $config Configuration to use, can be any value accepted by
|
||||
* HTMLPurifier_Config::create()
|
||||
*/
|
||||
function HTMLPurifier($html, $config = null) {
|
||||
static $purifier = false;
|
||||
if (!$purifier) {
|
||||
require_once 'HTMLPurifier.php';
|
||||
$purifier = new HTMLPurifier();
|
||||
}
|
||||
return $purifier->purify($html, $config);
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
212
library/HTMLPurifier.includes.php
Normal file
212
library/HTMLPurifier.includes.php
Normal file
@@ -0,0 +1,212 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* This file was auto-generated by generate-includes.php and includes all of
|
||||
* the core files required by HTML Purifier. Use this if performance is a
|
||||
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
|
||||
* FILE, changes will be overwritten the next time the script is run.
|
||||
*
|
||||
* @version 4.2.0
|
||||
*
|
||||
* @warning
|
||||
* You must *not* include any other HTML Purifier files before this file,
|
||||
* because 'require' not 'require_once' is used.
|
||||
*
|
||||
* @warning
|
||||
* This file requires that the include path contains the HTML Purifier
|
||||
* library directory; this is not auto-set.
|
||||
*/
|
||||
|
||||
require 'HTMLPurifier.php';
|
||||
require 'HTMLPurifier/AttrCollections.php';
|
||||
require 'HTMLPurifier/AttrDef.php';
|
||||
require 'HTMLPurifier/AttrTransform.php';
|
||||
require 'HTMLPurifier/AttrTypes.php';
|
||||
require 'HTMLPurifier/AttrValidator.php';
|
||||
require 'HTMLPurifier/Bootstrap.php';
|
||||
require 'HTMLPurifier/Definition.php';
|
||||
require 'HTMLPurifier/CSSDefinition.php';
|
||||
require 'HTMLPurifier/ChildDef.php';
|
||||
require 'HTMLPurifier/Config.php';
|
||||
require 'HTMLPurifier/ConfigSchema.php';
|
||||
require 'HTMLPurifier/ContentSets.php';
|
||||
require 'HTMLPurifier/Context.php';
|
||||
require 'HTMLPurifier/DefinitionCache.php';
|
||||
require 'HTMLPurifier/DefinitionCacheFactory.php';
|
||||
require 'HTMLPurifier/Doctype.php';
|
||||
require 'HTMLPurifier/DoctypeRegistry.php';
|
||||
require 'HTMLPurifier/ElementDef.php';
|
||||
require 'HTMLPurifier/Encoder.php';
|
||||
require 'HTMLPurifier/EntityLookup.php';
|
||||
require 'HTMLPurifier/EntityParser.php';
|
||||
require 'HTMLPurifier/ErrorCollector.php';
|
||||
require 'HTMLPurifier/ErrorStruct.php';
|
||||
require 'HTMLPurifier/Exception.php';
|
||||
require 'HTMLPurifier/Filter.php';
|
||||
require 'HTMLPurifier/Generator.php';
|
||||
require 'HTMLPurifier/HTMLDefinition.php';
|
||||
require 'HTMLPurifier/HTMLModule.php';
|
||||
require 'HTMLPurifier/HTMLModuleManager.php';
|
||||
require 'HTMLPurifier/IDAccumulator.php';
|
||||
require 'HTMLPurifier/Injector.php';
|
||||
require 'HTMLPurifier/Language.php';
|
||||
require 'HTMLPurifier/LanguageFactory.php';
|
||||
require 'HTMLPurifier/Length.php';
|
||||
require 'HTMLPurifier/Lexer.php';
|
||||
require 'HTMLPurifier/PercentEncoder.php';
|
||||
require 'HTMLPurifier/PropertyList.php';
|
||||
require 'HTMLPurifier/PropertyListIterator.php';
|
||||
require 'HTMLPurifier/Strategy.php';
|
||||
require 'HTMLPurifier/StringHash.php';
|
||||
require 'HTMLPurifier/StringHashParser.php';
|
||||
require 'HTMLPurifier/TagTransform.php';
|
||||
require 'HTMLPurifier/Token.php';
|
||||
require 'HTMLPurifier/TokenFactory.php';
|
||||
require 'HTMLPurifier/URI.php';
|
||||
require 'HTMLPurifier/URIDefinition.php';
|
||||
require 'HTMLPurifier/URIFilter.php';
|
||||
require 'HTMLPurifier/URIParser.php';
|
||||
require 'HTMLPurifier/URIScheme.php';
|
||||
require 'HTMLPurifier/URISchemeRegistry.php';
|
||||
require 'HTMLPurifier/UnitConverter.php';
|
||||
require 'HTMLPurifier/VarParser.php';
|
||||
require 'HTMLPurifier/VarParserException.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS.php';
|
||||
require 'HTMLPurifier/AttrDef/Enum.php';
|
||||
require 'HTMLPurifier/AttrDef/Integer.php';
|
||||
require 'HTMLPurifier/AttrDef/Lang.php';
|
||||
require 'HTMLPurifier/AttrDef/Switch.php';
|
||||
require 'HTMLPurifier/AttrDef/Text.php';
|
||||
require 'HTMLPurifier/AttrDef/URI.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/Number.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/AlphaValue.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/Background.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/Border.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/Color.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/Composite.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/Filter.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/Font.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/Length.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/ListStyle.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/Multiple.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/Percentage.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/TextDecoration.php';
|
||||
require 'HTMLPurifier/AttrDef/CSS/URI.php';
|
||||
require 'HTMLPurifier/AttrDef/HTML/Bool.php';
|
||||
require 'HTMLPurifier/AttrDef/HTML/Nmtokens.php';
|
||||
require 'HTMLPurifier/AttrDef/HTML/Class.php';
|
||||
require 'HTMLPurifier/AttrDef/HTML/Color.php';
|
||||
require 'HTMLPurifier/AttrDef/HTML/FrameTarget.php';
|
||||
require 'HTMLPurifier/AttrDef/HTML/ID.php';
|
||||
require 'HTMLPurifier/AttrDef/HTML/Pixels.php';
|
||||
require 'HTMLPurifier/AttrDef/HTML/Length.php';
|
||||
require 'HTMLPurifier/AttrDef/HTML/LinkTypes.php';
|
||||
require 'HTMLPurifier/AttrDef/HTML/MultiLength.php';
|
||||
require 'HTMLPurifier/AttrDef/URI/Email.php';
|
||||
require 'HTMLPurifier/AttrDef/URI/Host.php';
|
||||
require 'HTMLPurifier/AttrDef/URI/IPv4.php';
|
||||
require 'HTMLPurifier/AttrDef/URI/IPv6.php';
|
||||
require 'HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
|
||||
require 'HTMLPurifier/AttrTransform/Background.php';
|
||||
require 'HTMLPurifier/AttrTransform/BdoDir.php';
|
||||
require 'HTMLPurifier/AttrTransform/BgColor.php';
|
||||
require 'HTMLPurifier/AttrTransform/BoolToCSS.php';
|
||||
require 'HTMLPurifier/AttrTransform/Border.php';
|
||||
require 'HTMLPurifier/AttrTransform/EnumToCSS.php';
|
||||
require 'HTMLPurifier/AttrTransform/ImgRequired.php';
|
||||
require 'HTMLPurifier/AttrTransform/ImgSpace.php';
|
||||
require 'HTMLPurifier/AttrTransform/Input.php';
|
||||
require 'HTMLPurifier/AttrTransform/Lang.php';
|
||||
require 'HTMLPurifier/AttrTransform/Length.php';
|
||||
require 'HTMLPurifier/AttrTransform/Name.php';
|
||||
require 'HTMLPurifier/AttrTransform/NameSync.php';
|
||||
require 'HTMLPurifier/AttrTransform/SafeEmbed.php';
|
||||
require 'HTMLPurifier/AttrTransform/SafeObject.php';
|
||||
require 'HTMLPurifier/AttrTransform/SafeParam.php';
|
||||
require 'HTMLPurifier/AttrTransform/ScriptRequired.php';
|
||||
require 'HTMLPurifier/AttrTransform/Textarea.php';
|
||||
require 'HTMLPurifier/ChildDef/Chameleon.php';
|
||||
require 'HTMLPurifier/ChildDef/Custom.php';
|
||||
require 'HTMLPurifier/ChildDef/Empty.php';
|
||||
require 'HTMLPurifier/ChildDef/Required.php';
|
||||
require 'HTMLPurifier/ChildDef/Optional.php';
|
||||
require 'HTMLPurifier/ChildDef/StrictBlockquote.php';
|
||||
require 'HTMLPurifier/ChildDef/Table.php';
|
||||
require 'HTMLPurifier/DefinitionCache/Decorator.php';
|
||||
require 'HTMLPurifier/DefinitionCache/Null.php';
|
||||
require 'HTMLPurifier/DefinitionCache/Serializer.php';
|
||||
require 'HTMLPurifier/DefinitionCache/Decorator/Cleanup.php';
|
||||
require 'HTMLPurifier/DefinitionCache/Decorator/Memory.php';
|
||||
require 'HTMLPurifier/HTMLModule/Bdo.php';
|
||||
require 'HTMLPurifier/HTMLModule/CommonAttributes.php';
|
||||
require 'HTMLPurifier/HTMLModule/Edit.php';
|
||||
require 'HTMLPurifier/HTMLModule/Forms.php';
|
||||
require 'HTMLPurifier/HTMLModule/Hypertext.php';
|
||||
require 'HTMLPurifier/HTMLModule/Image.php';
|
||||
require 'HTMLPurifier/HTMLModule/Legacy.php';
|
||||
require 'HTMLPurifier/HTMLModule/List.php';
|
||||
require 'HTMLPurifier/HTMLModule/Name.php';
|
||||
require 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
|
||||
require 'HTMLPurifier/HTMLModule/Object.php';
|
||||
require 'HTMLPurifier/HTMLModule/Presentation.php';
|
||||
require 'HTMLPurifier/HTMLModule/Proprietary.php';
|
||||
require 'HTMLPurifier/HTMLModule/Ruby.php';
|
||||
require 'HTMLPurifier/HTMLModule/SafeEmbed.php';
|
||||
require 'HTMLPurifier/HTMLModule/SafeObject.php';
|
||||
require 'HTMLPurifier/HTMLModule/Scripting.php';
|
||||
require 'HTMLPurifier/HTMLModule/StyleAttribute.php';
|
||||
require 'HTMLPurifier/HTMLModule/Tables.php';
|
||||
require 'HTMLPurifier/HTMLModule/Target.php';
|
||||
require 'HTMLPurifier/HTMLModule/Text.php';
|
||||
require 'HTMLPurifier/HTMLModule/Tidy.php';
|
||||
require 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
|
||||
require 'HTMLPurifier/HTMLModule/Tidy/Name.php';
|
||||
require 'HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
|
||||
require 'HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
|
||||
require 'HTMLPurifier/HTMLModule/Tidy/Strict.php';
|
||||
require 'HTMLPurifier/HTMLModule/Tidy/Transitional.php';
|
||||
require 'HTMLPurifier/HTMLModule/Tidy/XHTML.php';
|
||||
require 'HTMLPurifier/Injector/AutoParagraph.php';
|
||||
require 'HTMLPurifier/Injector/DisplayLinkURI.php';
|
||||
require 'HTMLPurifier/Injector/Linkify.php';
|
||||
require 'HTMLPurifier/Injector/PurifierLinkify.php';
|
||||
require 'HTMLPurifier/Injector/RemoveEmpty.php';
|
||||
require 'HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
|
||||
require 'HTMLPurifier/Injector/SafeObject.php';
|
||||
require 'HTMLPurifier/Lexer/DOMLex.php';
|
||||
require 'HTMLPurifier/Lexer/DirectLex.php';
|
||||
require 'HTMLPurifier/Strategy/Composite.php';
|
||||
require 'HTMLPurifier/Strategy/Core.php';
|
||||
require 'HTMLPurifier/Strategy/FixNesting.php';
|
||||
require 'HTMLPurifier/Strategy/MakeWellFormed.php';
|
||||
require 'HTMLPurifier/Strategy/RemoveForeignElements.php';
|
||||
require 'HTMLPurifier/Strategy/ValidateAttributes.php';
|
||||
require 'HTMLPurifier/TagTransform/Font.php';
|
||||
require 'HTMLPurifier/TagTransform/Simple.php';
|
||||
require 'HTMLPurifier/Token/Comment.php';
|
||||
require 'HTMLPurifier/Token/Tag.php';
|
||||
require 'HTMLPurifier/Token/Empty.php';
|
||||
require 'HTMLPurifier/Token/End.php';
|
||||
require 'HTMLPurifier/Token/Start.php';
|
||||
require 'HTMLPurifier/Token/Text.php';
|
||||
require 'HTMLPurifier/URIFilter/DisableExternal.php';
|
||||
require 'HTMLPurifier/URIFilter/DisableExternalResources.php';
|
||||
require 'HTMLPurifier/URIFilter/DisableResources.php';
|
||||
require 'HTMLPurifier/URIFilter/HostBlacklist.php';
|
||||
require 'HTMLPurifier/URIFilter/MakeAbsolute.php';
|
||||
require 'HTMLPurifier/URIFilter/Munge.php';
|
||||
require 'HTMLPurifier/URIScheme/data.php';
|
||||
require 'HTMLPurifier/URIScheme/file.php';
|
||||
require 'HTMLPurifier/URIScheme/ftp.php';
|
||||
require 'HTMLPurifier/URIScheme/http.php';
|
||||
require 'HTMLPurifier/URIScheme/https.php';
|
||||
require 'HTMLPurifier/URIScheme/mailto.php';
|
||||
require 'HTMLPurifier/URIScheme/news.php';
|
||||
require 'HTMLPurifier/URIScheme/nntp.php';
|
||||
require 'HTMLPurifier/VarParser/Flexible.php';
|
||||
require 'HTMLPurifier/VarParser/Native.php';
|
30
library/HTMLPurifier.kses.php
Normal file
30
library/HTMLPurifier.kses.php
Normal file
@@ -0,0 +1,30 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* Emulation layer for code that used kses(), substituting in HTML Purifier.
|
||||
*/
|
||||
|
||||
require_once dirname(__FILE__) . '/HTMLPurifier.auto.php';
|
||||
|
||||
function kses($string, $allowed_html, $allowed_protocols = null) {
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$allowed_elements = array();
|
||||
$allowed_attributes = array();
|
||||
foreach ($allowed_html as $element => $attributes) {
|
||||
$allowed_elements[$element] = true;
|
||||
foreach ($attributes as $attribute => $x) {
|
||||
$allowed_attributes["$element.$attribute"] = true;
|
||||
}
|
||||
}
|
||||
$config->set('HTML.AllowedElements', $allowed_elements);
|
||||
$config->set('HTML.AllowedAttributes', $allowed_attributes);
|
||||
$allowed_schemes = array();
|
||||
if ($allowed_protocols !== null) {
|
||||
$config->set('URI.AllowedSchemes', $allowed_protocols);
|
||||
}
|
||||
$purifier = new HTMLPurifier($config);
|
||||
return $purifier->purify($string);
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
11
library/HTMLPurifier.path.php
Normal file
11
library/HTMLPurifier.path.php
Normal file
@@ -0,0 +1,11 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* Convenience stub file that adds HTML Purifier's library file to the path
|
||||
* without any other side-effects.
|
||||
*/
|
||||
|
||||
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
|
||||
|
||||
// vim: et sw=4 sts=4
|
@@ -1,12 +1,11 @@
|
||||
<?php
|
||||
|
||||
/*!
|
||||
* @mainpage
|
||||
*
|
||||
/*! @mainpage
|
||||
*
|
||||
* HTML Purifier is an HTML filter that will take an arbitrary snippet of
|
||||
* HTML and rigorously test, validate and filter it into a version that
|
||||
* is safe for output onto webpages. It achieves this by:
|
||||
*
|
||||
*
|
||||
* -# Lexing (parsing into tokens) the document,
|
||||
* -# Executing various strategies on the tokens:
|
||||
* -# Removing all elements not in the whitelist,
|
||||
@@ -14,16 +13,14 @@
|
||||
* -# Fixing the nesting of the nodes, and
|
||||
* -# Validating attributes of the nodes; and
|
||||
* -# Generating HTML from the purified tokens.
|
||||
*
|
||||
*
|
||||
* However, most users will only need to interface with the HTMLPurifier
|
||||
* class, so this massive amount of infrastructure is usually concealed.
|
||||
* If you plan on working with the internals, be sure to include
|
||||
* HTMLPurifier_ConfigSchema and HTMLPurifier_Config.
|
||||
* and HTMLPurifier_Config.
|
||||
*/
|
||||
|
||||
/*
|
||||
HTML Purifier 2.1.2 - Standards Compliant HTML Filtering
|
||||
Copyright (C) 2006 Edward Z. Yang
|
||||
HTML Purifier 4.2.0 - Standards Compliant HTML Filtering
|
||||
Copyright (C) 2006-2008 Edward Z. Yang
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
@@ -40,56 +37,46 @@
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
// constants are slow, but we'll make one exception
|
||||
define('HTMLPURIFIER_PREFIX', dirname(__FILE__));
|
||||
|
||||
// almost every class has an undocumented dependency to these, so make sure
|
||||
// they get included
|
||||
require_once 'HTMLPurifier/ConfigSchema.php'; // important
|
||||
require_once 'HTMLPurifier/Config.php';
|
||||
require_once 'HTMLPurifier/Context.php';
|
||||
|
||||
require_once 'HTMLPurifier/Lexer.php';
|
||||
require_once 'HTMLPurifier/Generator.php';
|
||||
require_once 'HTMLPurifier/Strategy/Core.php';
|
||||
require_once 'HTMLPurifier/Encoder.php';
|
||||
|
||||
require_once 'HTMLPurifier/ErrorCollector.php';
|
||||
require_once 'HTMLPurifier/LanguageFactory.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'CollectErrors', false, 'bool', '
|
||||
Whether or not to collect errors found while filtering the document. This
|
||||
is a useful way to give feedback to your users. CURRENTLY NOT IMPLEMENTED.
|
||||
This directive has been available since 2.0.0.
|
||||
');
|
||||
|
||||
/**
|
||||
* Main library execution class.
|
||||
*
|
||||
* Facade that performs calls to the HTMLPurifier_Lexer,
|
||||
* HTMLPurifier_Strategy and HTMLPurifier_Generator subsystems in order to
|
||||
* purify HTML.
|
||||
*
|
||||
* @todo We need an easier way to inject strategies, it'll probably end
|
||||
* up getting done through config though.
|
||||
* Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
|
||||
*
|
||||
* @note There are several points in which configuration can be specified
|
||||
* for HTML Purifier. The precedence of these (from lowest to
|
||||
* highest) is as follows:
|
||||
* -# Instance: new HTMLPurifier($config)
|
||||
* -# Invocation: purify($html, $config)
|
||||
* These configurations are entirely independent of each other and
|
||||
* are *not* merged (this behavior may change in the future).
|
||||
*
|
||||
* @todo We need an easier way to inject strategies using the configuration
|
||||
* object.
|
||||
*/
|
||||
class HTMLPurifier
|
||||
{
|
||||
|
||||
var $version = '2.1.2';
|
||||
|
||||
var $config;
|
||||
var $filters;
|
||||
|
||||
var $strategy, $generator;
|
||||
|
||||
|
||||
/** Version of HTML Purifier */
|
||||
public $version = '4.2.0';
|
||||
|
||||
/** Constant with version of HTML Purifier */
|
||||
const VERSION = '4.2.0';
|
||||
|
||||
/** Global configuration object */
|
||||
public $config;
|
||||
|
||||
/** Array of extra HTMLPurifier_Filter objects to run on HTML, for backwards compatibility */
|
||||
private $filters = array();
|
||||
|
||||
/** Single instance of HTML Purifier */
|
||||
private static $instance;
|
||||
|
||||
protected $strategy, $generator;
|
||||
|
||||
/**
|
||||
* Final HTMLPurifier_Context of last run purification. Might be an array.
|
||||
* @public
|
||||
* Resultant HTMLPurifier_Context of last run purification. Is an array
|
||||
* of contexts if the last called method was purifyArray().
|
||||
*/
|
||||
var $context;
|
||||
|
||||
public $context;
|
||||
|
||||
/**
|
||||
* Initializes the purifier.
|
||||
* @param $config Optional HTMLPurifier_Config object for all instances of
|
||||
@@ -98,26 +85,26 @@ class HTMLPurifier
|
||||
* The parameter can also be any type that
|
||||
* HTMLPurifier_Config::create() supports.
|
||||
*/
|
||||
function HTMLPurifier($config = null) {
|
||||
|
||||
public function __construct($config = null) {
|
||||
|
||||
$this->config = HTMLPurifier_Config::create($config);
|
||||
|
||||
|
||||
$this->strategy = new HTMLPurifier_Strategy_Core();
|
||||
$this->generator = new HTMLPurifier_Generator();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a filter to process the output. First come first serve
|
||||
* @param $filter HTMLPurifier_Filter object
|
||||
*/
|
||||
function addFilter($filter) {
|
||||
public function addFilter($filter) {
|
||||
trigger_error('HTMLPurifier->addFilter() is deprecated, use configuration directives in the Filter namespace or Filter.Custom', E_USER_WARNING);
|
||||
$this->filters[] = $filter;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Filters an HTML snippet/document to be XSS-free and standards-compliant.
|
||||
*
|
||||
*
|
||||
* @param $html String of HTML to purify
|
||||
* @param $config HTMLPurifier_Config object for this operation, if omitted,
|
||||
* defaults to the config object specified during this
|
||||
@@ -125,39 +112,63 @@ class HTMLPurifier
|
||||
* that HTMLPurifier_Config::create() supports.
|
||||
* @return Purified HTML
|
||||
*/
|
||||
function purify($html, $config = null) {
|
||||
|
||||
public function purify($html, $config = null) {
|
||||
|
||||
// :TODO: make the config merge in, instead of replace
|
||||
$config = $config ? HTMLPurifier_Config::create($config) : $this->config;
|
||||
|
||||
|
||||
// implementation is partially environment dependant, partially
|
||||
// configuration dependant
|
||||
$lexer = HTMLPurifier_Lexer::create($config);
|
||||
|
||||
|
||||
$context = new HTMLPurifier_Context();
|
||||
|
||||
// our friendly neighborhood generator, all primed with configuration too!
|
||||
$this->generator->generateFromTokens(array(), $config, $context);
|
||||
|
||||
// setup HTML generator
|
||||
$this->generator = new HTMLPurifier_Generator($config, $context);
|
||||
$context->register('Generator', $this->generator);
|
||||
|
||||
|
||||
// set up global context variables
|
||||
if ($config->get('Core', 'CollectErrors')) {
|
||||
if ($config->get('Core.CollectErrors')) {
|
||||
// may get moved out if other facilities use it
|
||||
$language_factory = HTMLPurifier_LanguageFactory::instance();
|
||||
$language = $language_factory->create($config, $context);
|
||||
$context->register('Locale', $language);
|
||||
|
||||
|
||||
$error_collector = new HTMLPurifier_ErrorCollector($context);
|
||||
$context->register('ErrorCollector', $error_collector);
|
||||
}
|
||||
|
||||
|
||||
// setup id_accumulator context, necessary due to the fact that
|
||||
// AttrValidator can be called from many places
|
||||
$id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
|
||||
$context->register('IDAccumulator', $id_accumulator);
|
||||
|
||||
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
|
||||
|
||||
for ($i = 0, $size = count($this->filters); $i < $size; $i++) {
|
||||
$html = $this->filters[$i]->preFilter($html, $config, $context);
|
||||
|
||||
// setup filters
|
||||
$filter_flags = $config->getBatch('Filter');
|
||||
$custom_filters = $filter_flags['Custom'];
|
||||
unset($filter_flags['Custom']);
|
||||
$filters = array();
|
||||
foreach ($filter_flags as $filter => $flag) {
|
||||
if (!$flag) continue;
|
||||
if (strpos($filter, '.') !== false) continue;
|
||||
$class = "HTMLPurifier_Filter_$filter";
|
||||
$filters[] = new $class;
|
||||
}
|
||||
|
||||
foreach ($custom_filters as $filter) {
|
||||
// maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat
|
||||
$filters[] = $filter;
|
||||
}
|
||||
$filters = array_merge($filters, $this->filters);
|
||||
// maybe prepare(), but later
|
||||
|
||||
for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
|
||||
$html = $filters[$i]->preFilter($html, $config, $context);
|
||||
}
|
||||
|
||||
// purified HTML
|
||||
$html =
|
||||
$html =
|
||||
$this->generator->generateFromTokens(
|
||||
// list of tokens
|
||||
$this->strategy->execute(
|
||||
@@ -167,26 +178,25 @@ class HTMLPurifier
|
||||
$html, $config, $context
|
||||
),
|
||||
$config, $context
|
||||
),
|
||||
$config, $context
|
||||
)
|
||||
);
|
||||
|
||||
for ($i = $size - 1; $i >= 0; $i--) {
|
||||
$html = $this->filters[$i]->postFilter($html, $config, $context);
|
||||
|
||||
for ($i = $filter_size - 1; $i >= 0; $i--) {
|
||||
$html = $filters[$i]->postFilter($html, $config, $context);
|
||||
}
|
||||
|
||||
|
||||
$html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
|
||||
$this->context =& $context;
|
||||
return $html;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Filters an array of HTML snippets
|
||||
* @param $config Optional HTMLPurifier_Config object for this operation.
|
||||
* See HTMLPurifier::purify() for more details.
|
||||
* @return Array of purified HTML
|
||||
*/
|
||||
function purifyArray($array_of_html, $config = null) {
|
||||
public function purifyArray($array_of_html, $config = null) {
|
||||
$context_array = array();
|
||||
foreach ($array_of_html as $key => $html) {
|
||||
$array_of_html[$key] = $this->purify($html, $config);
|
||||
@@ -195,24 +205,33 @@ class HTMLPurifier
|
||||
$this->context = $context_array;
|
||||
return $array_of_html;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Singleton for enforcing just one HTML Purifier in your system
|
||||
* @param $prototype Optional prototype HTMLPurifier instance to
|
||||
* overload singleton with, or HTMLPurifier_Config
|
||||
* instance to configure the generated version with.
|
||||
*/
|
||||
function &getInstance($prototype = null) {
|
||||
static $htmlpurifier;
|
||||
if (!$htmlpurifier || $prototype) {
|
||||
if (is_a($prototype, 'HTMLPurifier')) {
|
||||
$htmlpurifier = $prototype;
|
||||
public static function instance($prototype = null) {
|
||||
if (!self::$instance || $prototype) {
|
||||
if ($prototype instanceof HTMLPurifier) {
|
||||
self::$instance = $prototype;
|
||||
} elseif ($prototype) {
|
||||
$htmlpurifier = new HTMLPurifier($prototype);
|
||||
self::$instance = new HTMLPurifier($prototype);
|
||||
} else {
|
||||
$htmlpurifier = new HTMLPurifier();
|
||||
self::$instance = new HTMLPurifier();
|
||||
}
|
||||
}
|
||||
return $htmlpurifier;
|
||||
return self::$instance;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @note Backwards compatibility, see instance()
|
||||
*/
|
||||
public static function getInstance($prototype = null) {
|
||||
return HTMLPurifier::instance($prototype);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
206
library/HTMLPurifier.safe-includes.php
Normal file
206
library/HTMLPurifier.safe-includes.php
Normal file
@@ -0,0 +1,206 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* This file was auto-generated by generate-includes.php and includes all of
|
||||
* the core files required by HTML Purifier. This is a convenience stub that
|
||||
* includes all files using dirname(__FILE__) and require_once. PLEASE DO NOT
|
||||
* EDIT THIS FILE, changes will be overwritten the next time the script is run.
|
||||
*
|
||||
* Changes to include_path are not necessary.
|
||||
*/
|
||||
|
||||
$__dir = dirname(__FILE__);
|
||||
|
||||
require_once $__dir . '/HTMLPurifier.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrCollections.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTypes.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrValidator.php';
|
||||
require_once $__dir . '/HTMLPurifier/Bootstrap.php';
|
||||
require_once $__dir . '/HTMLPurifier/Definition.php';
|
||||
require_once $__dir . '/HTMLPurifier/CSSDefinition.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef.php';
|
||||
require_once $__dir . '/HTMLPurifier/Config.php';
|
||||
require_once $__dir . '/HTMLPurifier/ConfigSchema.php';
|
||||
require_once $__dir . '/HTMLPurifier/ContentSets.php';
|
||||
require_once $__dir . '/HTMLPurifier/Context.php';
|
||||
require_once $__dir . '/HTMLPurifier/DefinitionCache.php';
|
||||
require_once $__dir . '/HTMLPurifier/DefinitionCacheFactory.php';
|
||||
require_once $__dir . '/HTMLPurifier/Doctype.php';
|
||||
require_once $__dir . '/HTMLPurifier/DoctypeRegistry.php';
|
||||
require_once $__dir . '/HTMLPurifier/ElementDef.php';
|
||||
require_once $__dir . '/HTMLPurifier/Encoder.php';
|
||||
require_once $__dir . '/HTMLPurifier/EntityLookup.php';
|
||||
require_once $__dir . '/HTMLPurifier/EntityParser.php';
|
||||
require_once $__dir . '/HTMLPurifier/ErrorCollector.php';
|
||||
require_once $__dir . '/HTMLPurifier/ErrorStruct.php';
|
||||
require_once $__dir . '/HTMLPurifier/Exception.php';
|
||||
require_once $__dir . '/HTMLPurifier/Filter.php';
|
||||
require_once $__dir . '/HTMLPurifier/Generator.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLDefinition.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModuleManager.php';
|
||||
require_once $__dir . '/HTMLPurifier/IDAccumulator.php';
|
||||
require_once $__dir . '/HTMLPurifier/Injector.php';
|
||||
require_once $__dir . '/HTMLPurifier/Language.php';
|
||||
require_once $__dir . '/HTMLPurifier/LanguageFactory.php';
|
||||
require_once $__dir . '/HTMLPurifier/Length.php';
|
||||
require_once $__dir . '/HTMLPurifier/Lexer.php';
|
||||
require_once $__dir . '/HTMLPurifier/PercentEncoder.php';
|
||||
require_once $__dir . '/HTMLPurifier/PropertyList.php';
|
||||
require_once $__dir . '/HTMLPurifier/PropertyListIterator.php';
|
||||
require_once $__dir . '/HTMLPurifier/Strategy.php';
|
||||
require_once $__dir . '/HTMLPurifier/StringHash.php';
|
||||
require_once $__dir . '/HTMLPurifier/StringHashParser.php';
|
||||
require_once $__dir . '/HTMLPurifier/TagTransform.php';
|
||||
require_once $__dir . '/HTMLPurifier/Token.php';
|
||||
require_once $__dir . '/HTMLPurifier/TokenFactory.php';
|
||||
require_once $__dir . '/HTMLPurifier/URI.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIDefinition.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIFilter.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIParser.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIScheme.php';
|
||||
require_once $__dir . '/HTMLPurifier/URISchemeRegistry.php';
|
||||
require_once $__dir . '/HTMLPurifier/UnitConverter.php';
|
||||
require_once $__dir . '/HTMLPurifier/VarParser.php';
|
||||
require_once $__dir . '/HTMLPurifier/VarParserException.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/Enum.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/Integer.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/Lang.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/Switch.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/Text.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/URI.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Number.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/AlphaValue.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Background.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Border.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Color.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Composite.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Filter.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Font.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/FontFamily.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Length.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ListStyle.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Multiple.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Percentage.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/TextDecoration.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/URI.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Bool.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Nmtokens.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Class.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Color.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/FrameTarget.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/ID.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Pixels.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Length.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/LinkTypes.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/MultiLength.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/URI/Email.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/URI/Host.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/URI/IPv4.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/URI/IPv6.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Background.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/BdoDir.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/BgColor.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/BoolToCSS.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Border.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/EnumToCSS.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/ImgRequired.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/ImgSpace.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Input.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Lang.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Length.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Name.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/NameSync.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeEmbed.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php';
|
||||
require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/Required.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php';
|
||||
require_once $__dir . '/HTMLPurifier/ChildDef/Table.php';
|
||||
require_once $__dir . '/HTMLPurifier/DefinitionCache/Decorator.php';
|
||||
require_once $__dir . '/HTMLPurifier/DefinitionCache/Null.php';
|
||||
require_once $__dir . '/HTMLPurifier/DefinitionCache/Serializer.php';
|
||||
require_once $__dir . '/HTMLPurifier/DefinitionCache/Decorator/Cleanup.php';
|
||||
require_once $__dir . '/HTMLPurifier/DefinitionCache/Decorator/Memory.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Bdo.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/CommonAttributes.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Edit.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Forms.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Hypertext.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Image.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/List.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Name.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Object.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Presentation.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Proprietary.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Ruby.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeEmbed.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeObject.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Scripting.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/StyleAttribute.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tables.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Target.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Text.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Name.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Proprietary.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Strict.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/Transitional.php';
|
||||
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/XHTML.php';
|
||||
require_once $__dir . '/HTMLPurifier/Injector/AutoParagraph.php';
|
||||
require_once $__dir . '/HTMLPurifier/Injector/DisplayLinkURI.php';
|
||||
require_once $__dir . '/HTMLPurifier/Injector/Linkify.php';
|
||||
require_once $__dir . '/HTMLPurifier/Injector/PurifierLinkify.php';
|
||||
require_once $__dir . '/HTMLPurifier/Injector/RemoveEmpty.php';
|
||||
require_once $__dir . '/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
|
||||
require_once $__dir . '/HTMLPurifier/Injector/SafeObject.php';
|
||||
require_once $__dir . '/HTMLPurifier/Lexer/DOMLex.php';
|
||||
require_once $__dir . '/HTMLPurifier/Lexer/DirectLex.php';
|
||||
require_once $__dir . '/HTMLPurifier/Strategy/Composite.php';
|
||||
require_once $__dir . '/HTMLPurifier/Strategy/Core.php';
|
||||
require_once $__dir . '/HTMLPurifier/Strategy/FixNesting.php';
|
||||
require_once $__dir . '/HTMLPurifier/Strategy/MakeWellFormed.php';
|
||||
require_once $__dir . '/HTMLPurifier/Strategy/RemoveForeignElements.php';
|
||||
require_once $__dir . '/HTMLPurifier/Strategy/ValidateAttributes.php';
|
||||
require_once $__dir . '/HTMLPurifier/TagTransform/Font.php';
|
||||
require_once $__dir . '/HTMLPurifier/TagTransform/Simple.php';
|
||||
require_once $__dir . '/HTMLPurifier/Token/Comment.php';
|
||||
require_once $__dir . '/HTMLPurifier/Token/Tag.php';
|
||||
require_once $__dir . '/HTMLPurifier/Token/Empty.php';
|
||||
require_once $__dir . '/HTMLPurifier/Token/End.php';
|
||||
require_once $__dir . '/HTMLPurifier/Token/Start.php';
|
||||
require_once $__dir . '/HTMLPurifier/Token/Text.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternal.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternalResources.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIFilter/DisableResources.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIScheme/data.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIScheme/file.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIScheme/http.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIScheme/https.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIScheme/mailto.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIScheme/news.php';
|
||||
require_once $__dir . '/HTMLPurifier/URIScheme/nntp.php';
|
||||
require_once $__dir . '/HTMLPurifier/VarParser/Flexible.php';
|
||||
require_once $__dir . '/HTMLPurifier/VarParser/Native.php';
|
@@ -1,19 +1,17 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTypes.php';
|
||||
|
||||
/**
|
||||
* Defines common attribute collections that modules reference
|
||||
*/
|
||||
|
||||
class HTMLPurifier_AttrCollections
|
||||
{
|
||||
|
||||
|
||||
/**
|
||||
* Associative array of attribute collections, indexed by name
|
||||
*/
|
||||
var $info = array();
|
||||
|
||||
public $info = array();
|
||||
|
||||
/**
|
||||
* Performs all expansions on internal data for use by other inclusions
|
||||
* It also collects all attribute collection extensions from
|
||||
@@ -21,7 +19,7 @@ class HTMLPurifier_AttrCollections
|
||||
* @param $attr_types HTMLPurifier_AttrTypes instance
|
||||
* @param $modules Hash array of HTMLPurifier_HTMLModule members
|
||||
*/
|
||||
function HTMLPurifier_AttrCollections($attr_types, $modules) {
|
||||
public function __construct($attr_types, $modules) {
|
||||
// load extensions from the modules
|
||||
foreach ($modules as $module) {
|
||||
foreach ($module->attr_collections as $coll_i => $coll) {
|
||||
@@ -47,13 +45,13 @@ class HTMLPurifier_AttrCollections
|
||||
$this->expandIdentifiers($this->info[$name], $attr_types);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Takes a reference to an attribute associative array and performs
|
||||
* all inclusions specified by the zero index.
|
||||
* @param &$attr Reference to attribute array
|
||||
*/
|
||||
function performInclusions(&$attr) {
|
||||
public function performInclusions(&$attr) {
|
||||
if (!isset($attr[0])) return;
|
||||
$merge = $attr[0];
|
||||
$seen = array(); // recursion guard
|
||||
@@ -74,25 +72,25 @@ class HTMLPurifier_AttrCollections
|
||||
}
|
||||
unset($attr[0]);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Expands all string identifiers in an attribute array by replacing
|
||||
* them with the appropriate values inside HTMLPurifier_AttrTypes
|
||||
* @param &$attr Reference to attribute array
|
||||
* @param $attr_types HTMLPurifier_AttrTypes instance
|
||||
*/
|
||||
function expandIdentifiers(&$attr, $attr_types) {
|
||||
|
||||
public function expandIdentifiers(&$attr, $attr_types) {
|
||||
|
||||
// because foreach will process new elements we add, make sure we
|
||||
// skip duplicates
|
||||
$processed = array();
|
||||
|
||||
|
||||
foreach ($attr as $def_i => $def) {
|
||||
// skip inclusions
|
||||
if ($def_i === 0) continue;
|
||||
|
||||
|
||||
if (isset($processed[$def_i])) continue;
|
||||
|
||||
|
||||
// determine whether or not attribute is required
|
||||
if ($required = (strpos($def_i, '*') !== false)) {
|
||||
// rename the definition
|
||||
@@ -100,21 +98,21 @@ class HTMLPurifier_AttrCollections
|
||||
$def_i = trim($def_i, '*');
|
||||
$attr[$def_i] = $def;
|
||||
}
|
||||
|
||||
|
||||
$processed[$def_i] = true;
|
||||
|
||||
|
||||
// if we've already got a literal object, move on
|
||||
if (is_object($def)) {
|
||||
// preserve previous required
|
||||
$attr[$def_i]->required = ($required || $attr[$def_i]->required);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if ($def === false) {
|
||||
unset($attr[$def_i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if ($t = $attr_types->get($def)) {
|
||||
$attr[$def_i] = $t;
|
||||
$attr[$def_i]->required = $required;
|
||||
@@ -122,8 +120,9 @@ class HTMLPurifier_AttrCollections
|
||||
unset($attr[$def_i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
@@ -2,85 +2,122 @@
|
||||
|
||||
/**
|
||||
* Base class for all validating attribute definitions.
|
||||
*
|
||||
*
|
||||
* This family of classes forms the core for not only HTML attribute validation,
|
||||
* but also any sort of string that needs to be validated or cleaned (which
|
||||
* means CSS properties and composite definitions are defined here too).
|
||||
* means CSS properties and composite definitions are defined here too).
|
||||
* Besides defining (through code) what precisely makes the string valid,
|
||||
* subclasses are also responsible for cleaning the code if possible.
|
||||
*/
|
||||
|
||||
class HTMLPurifier_AttrDef
|
||||
abstract class HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
|
||||
/**
|
||||
* Tells us whether or not an HTML attribute is minimized. Has no
|
||||
* meaning in other contexts.
|
||||
*/
|
||||
var $minimized = false;
|
||||
|
||||
public $minimized = false;
|
||||
|
||||
/**
|
||||
* Tells us whether or not an HTML attribute is required. Has no
|
||||
* meaning in other contexts
|
||||
*/
|
||||
var $required = false;
|
||||
|
||||
public $required = false;
|
||||
|
||||
/**
|
||||
* Validates and cleans passed string according to a definition.
|
||||
*
|
||||
* @public
|
||||
*
|
||||
* @param $string String to be validated and cleaned.
|
||||
* @param $config Mandatory HTMLPurifier_Config object.
|
||||
* @param $context Mandatory HTMLPurifier_AttrContext object.
|
||||
*/
|
||||
function validate($string, $config, &$context) {
|
||||
trigger_error('Cannot call abstract function', E_USER_ERROR);
|
||||
}
|
||||
|
||||
abstract public function validate($string, $config, $context);
|
||||
|
||||
/**
|
||||
* Convenience method that parses a string as if it were CDATA.
|
||||
*
|
||||
*
|
||||
* This method process a string in the manner specified at
|
||||
* <http://www.w3.org/TR/html4/types.html#h-6.2> by removing
|
||||
* leading and trailing whitespace, ignoring line feeds, and replacing
|
||||
* carriage returns and tabs with spaces. While most useful for HTML
|
||||
* attributes specified as CDATA, it can also be applied to most CSS
|
||||
* values.
|
||||
*
|
||||
*
|
||||
* @note This method is not entirely standards compliant, as trim() removes
|
||||
* more types of whitespace than specified in the spec. In practice,
|
||||
* this is rarely a problem, as those extra characters usually have
|
||||
* already been removed by HTMLPurifier_Encoder.
|
||||
*
|
||||
*
|
||||
* @warning This processing is inconsistent with XML's whitespace handling
|
||||
* as specified by section 3.3.3 and referenced XHTML 1.0 section
|
||||
* 4.7. Compliant processing requires all line breaks normalized
|
||||
* to "\n", so the fix is not as simple as fixing it in this
|
||||
* function. Trim and whitespace collapsing are supposed to only
|
||||
* occur in NMTOKENs. However, note that we are NOT necessarily
|
||||
* parsing XML, thus, this behavior may still be correct.
|
||||
*
|
||||
* @public
|
||||
* 4.7. However, note that we are NOT necessarily
|
||||
* parsing XML, thus, this behavior may still be correct. We
|
||||
* assume that newlines have been normalized.
|
||||
*/
|
||||
function parseCDATA($string) {
|
||||
public function parseCDATA($string) {
|
||||
$string = trim($string);
|
||||
$string = str_replace("\n", '', $string);
|
||||
$string = str_replace(array("\r", "\t"), ' ', $string);
|
||||
$string = str_replace(array("\n", "\t", "\r"), ' ', $string);
|
||||
return $string;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Factory method for creating this class from a string.
|
||||
* @param $string String construction info
|
||||
* @return Created AttrDef object corresponding to $string
|
||||
* @public
|
||||
*/
|
||||
function make($string) {
|
||||
// default implementation, return flyweight of this object
|
||||
// if overloaded, it is *necessary* for you to clone the
|
||||
// object (usually by instantiating a new copy) and return that
|
||||
public function make($string) {
|
||||
// default implementation, return a flyweight of this object.
|
||||
// If $string has an effect on the returned object (i.e. you
|
||||
// need to overload this method), it is best
|
||||
// to clone or instantiate new copies. (Instantiation is safer.)
|
||||
return $this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
|
||||
* properly. THIS IS A HACK!
|
||||
*/
|
||||
protected function mungeRgb($string) {
|
||||
return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a possibly escaped CSS string and returns the "pure"
|
||||
* version of it.
|
||||
*/
|
||||
protected function expandCSSEscape($string) {
|
||||
// flexibly parse it
|
||||
$ret = '';
|
||||
for ($i = 0, $c = strlen($string); $i < $c; $i++) {
|
||||
if ($string[$i] === '\\') {
|
||||
$i++;
|
||||
if ($i >= $c) {
|
||||
$ret .= '\\';
|
||||
break;
|
||||
}
|
||||
if (ctype_xdigit($string[$i])) {
|
||||
$code = $string[$i];
|
||||
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
|
||||
if (!ctype_xdigit($string[$i])) break;
|
||||
$code .= $string[$i];
|
||||
}
|
||||
// We have to be extremely careful when adding
|
||||
// new characters, to make sure we're not breaking
|
||||
// the encoding.
|
||||
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
|
||||
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
|
||||
$ret .= $char;
|
||||
if ($i < $c && trim($string[$i]) !== '') $i--;
|
||||
continue;
|
||||
}
|
||||
if ($string[$i] === "\n") continue;
|
||||
}
|
||||
$ret .= $string[$i];
|
||||
}
|
||||
return $ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
@@ -1,8 +1,5 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/CSSDefinition.php';
|
||||
|
||||
/**
|
||||
* Validates the HTML attribute style, otherwise known as CSS.
|
||||
* @note We don't implement the whole CSS specification, so it might be
|
||||
@@ -16,29 +13,48 @@ require_once 'HTMLPurifier/CSSDefinition.php';
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($css, $config, &$context) {
|
||||
|
||||
|
||||
public function validate($css, $config, $context) {
|
||||
|
||||
$css = $this->parseCDATA($css);
|
||||
|
||||
|
||||
$definition = $config->getCSSDefinition();
|
||||
|
||||
|
||||
// we're going to break the spec and explode by semicolons.
|
||||
// This is because semicolon rarely appears in escaped form
|
||||
// Doing this is generally flaky but fast
|
||||
// IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI
|
||||
// for details
|
||||
|
||||
|
||||
$declarations = explode(';', $css);
|
||||
$propvalues = array();
|
||||
|
||||
|
||||
/**
|
||||
* Name of the current CSS property being validated.
|
||||
*/
|
||||
$property = false;
|
||||
$context->register('CurrentCSSProperty', $property);
|
||||
|
||||
foreach ($declarations as $declaration) {
|
||||
if (!$declaration) continue;
|
||||
if (!strpos($declaration, ':')) continue;
|
||||
list($property, $value) = explode(':', $declaration, 2);
|
||||
$property = trim($property);
|
||||
$value = trim($value);
|
||||
if (!isset($definition->info[$property])) continue;
|
||||
$ok = false;
|
||||
do {
|
||||
if (isset($definition->info[$property])) {
|
||||
$ok = true;
|
||||
break;
|
||||
}
|
||||
if (ctype_lower($property)) break;
|
||||
$property = strtolower($property);
|
||||
if (isset($definition->info[$property])) {
|
||||
$ok = true;
|
||||
break;
|
||||
}
|
||||
} while(0);
|
||||
if (!$ok) continue;
|
||||
// inefficient call, since the validator will do this again
|
||||
if (strtolower(trim($value)) !== 'inherit') {
|
||||
// inherit works for everything (but only on the base property)
|
||||
@@ -50,19 +66,22 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
|
||||
if ($result === false) continue;
|
||||
$propvalues[$property] = $result;
|
||||
}
|
||||
|
||||
|
||||
$context->destroy('CurrentCSSProperty');
|
||||
|
||||
// procedure does not write the new CSS simultaneously, so it's
|
||||
// slightly inefficient, but it's the only way of getting rid of
|
||||
// duplicates. Perhaps config to optimize it, but not now.
|
||||
|
||||
|
||||
$new_declarations = '';
|
||||
foreach ($propvalues as $prop => $value) {
|
||||
$new_declarations .= "$prop:$value;";
|
||||
}
|
||||
|
||||
|
||||
return $new_declarations ? $new_declarations : false;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
21
library/HTMLPurifier/AttrDef/CSS/AlphaValue.php
Normal file
21
library/HTMLPurifier/AttrDef/CSS/AlphaValue.php
Normal file
@@ -0,0 +1,21 @@
|
||||
<?php
|
||||
|
||||
class HTMLPurifier_AttrDef_CSS_AlphaValue extends HTMLPurifier_AttrDef_CSS_Number
|
||||
{
|
||||
|
||||
public function __construct() {
|
||||
parent::__construct(false); // opacity is non-negative, but we will clamp it
|
||||
}
|
||||
|
||||
public function validate($number, $config, $context) {
|
||||
$result = parent::validate($number, $config, $context);
|
||||
if ($result === false) return $result;
|
||||
$float = (float) $result;
|
||||
if ($float < 0.0) $result = '0';
|
||||
if ($float > 1.0) $result = '1';
|
||||
return $result;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
@@ -1,22 +1,19 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/CSSDefinition.php';
|
||||
|
||||
/**
|
||||
* Validates shorthand CSS property background.
|
||||
* @warning Does not support url tokens that have internal spaces.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
|
||||
/**
|
||||
* Local copy of component validators.
|
||||
* @note See HTMLPurifier_AttrDef_Font::$info for a similar impl.
|
||||
*/
|
||||
var $info;
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_Background($config) {
|
||||
protected $info;
|
||||
|
||||
public function __construct($config) {
|
||||
$def = $config->getCSSDefinition();
|
||||
$this->info['background-color'] = $def->info['background-color'];
|
||||
$this->info['background-image'] = $def->info['background-image'];
|
||||
@@ -24,26 +21,29 @@ class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
|
||||
$this->info['background-attachment'] = $def->info['background-attachment'];
|
||||
$this->info['background-position'] = $def->info['background-position'];
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
|
||||
public function validate($string, $config, $context) {
|
||||
|
||||
// regular pre-processing
|
||||
$string = $this->parseCDATA($string);
|
||||
if ($string === '') return false;
|
||||
|
||||
|
||||
// munge rgb() decl if necessary
|
||||
$string = $this->mungeRgb($string);
|
||||
|
||||
// assumes URI doesn't have spaces in it
|
||||
$bits = explode(' ', strtolower($string)); // bits to process
|
||||
|
||||
|
||||
$caught = array();
|
||||
$caught['color'] = false;
|
||||
$caught['image'] = false;
|
||||
$caught['repeat'] = false;
|
||||
$caught['attachment'] = false;
|
||||
$caught['position'] = false;
|
||||
|
||||
|
||||
$i = 0; // number of catches
|
||||
$none = false;
|
||||
|
||||
|
||||
foreach ($bits as $bit) {
|
||||
if ($bit === '') continue;
|
||||
foreach ($caught as $key => $status) {
|
||||
@@ -64,23 +64,24 @@ class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (!$i) return false;
|
||||
if ($caught['position'] !== false) {
|
||||
$caught['position'] = $this->info['background-position']->
|
||||
validate($caught['position'], $config, $context);
|
||||
}
|
||||
|
||||
|
||||
$ret = array();
|
||||
foreach ($caught as $value) {
|
||||
if ($value === false) continue;
|
||||
$ret[] = $value;
|
||||
}
|
||||
|
||||
|
||||
if (empty($ret)) return false;
|
||||
return implode(' ', $ret);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
@@ -1,9 +1,5 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Length.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Percentage.php';
|
||||
|
||||
/* W3C says:
|
||||
[ // adjective and number must be in correct order, even if
|
||||
// you could switch them without introducing ambiguity.
|
||||
@@ -11,7 +7,7 @@ require_once 'HTMLPurifier/AttrDef/CSS/Percentage.php';
|
||||
[
|
||||
<percentage> | <length> | left | center | right
|
||||
]
|
||||
[
|
||||
[
|
||||
<percentage> | <length> | top | center | bottom
|
||||
]?
|
||||
] |
|
||||
@@ -32,10 +28,10 @@ require_once 'HTMLPurifier/AttrDef/CSS/Percentage.php';
|
||||
|
||||
/* QuirksMode says:
|
||||
keyword + length/percentage must be ordered correctly, as per W3C
|
||||
|
||||
|
||||
Internet Explorer and Opera, however, support arbitrary ordering. We
|
||||
should fix it up.
|
||||
|
||||
|
||||
Minor issue though, not strictly necessary.
|
||||
*/
|
||||
|
||||
@@ -47,27 +43,28 @@ require_once 'HTMLPurifier/AttrDef/CSS/Percentage.php';
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
var $length;
|
||||
var $percentage;
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_BackgroundPosition() {
|
||||
|
||||
protected $length;
|
||||
protected $percentage;
|
||||
|
||||
public function __construct() {
|
||||
$this->length = new HTMLPurifier_AttrDef_CSS_Length();
|
||||
$this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage();
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
public function validate($string, $config, $context) {
|
||||
$string = $this->parseCDATA($string);
|
||||
$bits = explode(' ', $string);
|
||||
|
||||
|
||||
$keywords = array();
|
||||
$keywords['h'] = false; // left, right
|
||||
$keywords['v'] = false; // top, bottom
|
||||
$keywords['c'] = false; // center
|
||||
$keywords['ch'] = false; // center (first word)
|
||||
$keywords['cv'] = false; // center (second word)
|
||||
$measures = array();
|
||||
|
||||
|
||||
$i = 0;
|
||||
|
||||
|
||||
$lookup = array(
|
||||
'top' => 'v',
|
||||
'bottom' => 'v',
|
||||
@@ -75,55 +72,62 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
|
||||
'right' => 'h',
|
||||
'center' => 'c'
|
||||
);
|
||||
|
||||
|
||||
foreach ($bits as $bit) {
|
||||
if ($bit === '') continue;
|
||||
|
||||
|
||||
// test for keyword
|
||||
$lbit = ctype_lower($bit) ? $bit : strtolower($bit);
|
||||
if (isset($lookup[$lbit])) {
|
||||
$status = $lookup[$lbit];
|
||||
if ($status == 'c') {
|
||||
if ($i == 0) {
|
||||
$status = 'ch';
|
||||
} else {
|
||||
$status = 'cv';
|
||||
}
|
||||
}
|
||||
$keywords[$status] = $lbit;
|
||||
$i++;
|
||||
}
|
||||
|
||||
|
||||
// test for length
|
||||
$r = $this->length->validate($bit, $config, $context);
|
||||
if ($r !== false) {
|
||||
$measures[] = $r;
|
||||
$i++;
|
||||
}
|
||||
|
||||
|
||||
// test for percentage
|
||||
$r = $this->percentage->validate($bit, $config, $context);
|
||||
if ($r !== false) {
|
||||
$measures[] = $r;
|
||||
$i++;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
if (!$i) return false; // no valid values were caught
|
||||
|
||||
|
||||
|
||||
$ret = array();
|
||||
|
||||
|
||||
// first keyword
|
||||
if ($keywords['h']) $ret[] = $keywords['h'];
|
||||
elseif (count($measures)) $ret[] = array_shift($measures);
|
||||
elseif ($keywords['c']) {
|
||||
$ret[] = $keywords['c'];
|
||||
$keywords['c'] = false; // prevent re-use: center = center center
|
||||
elseif ($keywords['ch']) {
|
||||
$ret[] = $keywords['ch'];
|
||||
$keywords['cv'] = false; // prevent re-use: center = center center
|
||||
}
|
||||
|
||||
if ($keywords['v']) $ret[] = $keywords['v'];
|
||||
elseif (count($measures)) $ret[] = array_shift($measures);
|
||||
elseif ($keywords['c']) $ret[] = $keywords['c'];
|
||||
|
||||
|
||||
if ($keywords['v']) $ret[] = $keywords['v'];
|
||||
elseif ($keywords['cv']) $ret[] = $keywords['cv'];
|
||||
elseif (count($measures)) $ret[] = array_shift($measures);
|
||||
|
||||
if (empty($ret)) return false;
|
||||
return implode(' ', $ret);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
@@ -1,28 +1,26 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
/**
|
||||
* Validates the border property as defined by CSS.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
|
||||
/**
|
||||
* Local copy of properties this property is shorthand for.
|
||||
*/
|
||||
var $info = array();
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_Border($config) {
|
||||
protected $info = array();
|
||||
|
||||
public function __construct($config) {
|
||||
$def = $config->getCSSDefinition();
|
||||
$this->info['border-width'] = $def->info['border-width'];
|
||||
$this->info['border-style'] = $def->info['border-style'];
|
||||
$this->info['border-top-color'] = $def->info['border-top-color'];
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
public function validate($string, $config, $context) {
|
||||
$string = $this->parseCDATA($string);
|
||||
// we specifically will not support rgb() syntax with spaces
|
||||
$string = $this->mungeRgb($string);
|
||||
$bits = explode(' ', $string);
|
||||
$done = array(); // segments we've finished
|
||||
$ret = ''; // return value
|
||||
@@ -39,6 +37,7 @@ class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
|
||||
}
|
||||
return rtrim($ret);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
@@ -1,58 +1,24 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'ColorKeywords', array(
|
||||
'maroon' => '#800000',
|
||||
'red' => '#FF0000',
|
||||
'orange' => '#FFA500',
|
||||
'yellow' => '#FFFF00',
|
||||
'olive' => '#808000',
|
||||
'purple' => '#800080',
|
||||
'fuchsia' => '#FF00FF',
|
||||
'white' => '#FFFFFF',
|
||||
'lime' => '#00FF00',
|
||||
'green' => '#008000',
|
||||
'navy' => '#000080',
|
||||
'blue' => '#0000FF',
|
||||
'aqua' => '#00FFFF',
|
||||
'teal' => '#008080',
|
||||
'black' => '#000000',
|
||||
'silver' => '#C0C0C0',
|
||||
'gray' => '#808080'
|
||||
), 'hash', '
|
||||
Lookup array of color names to six digit hexadecimal number corresponding
|
||||
to color, with preceding hash mark. Used when parsing colors.
|
||||
This directive has been available since 2.0.0.
|
||||
');
|
||||
|
||||
/**
|
||||
* Validates Color as defined by CSS.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($color, $config, &$context) {
|
||||
|
||||
|
||||
public function validate($color, $config, $context) {
|
||||
|
||||
static $colors = null;
|
||||
if ($colors === null) $colors = $config->get('Core', 'ColorKeywords');
|
||||
|
||||
if ($colors === null) $colors = $config->get('Core.ColorKeywords');
|
||||
|
||||
$color = trim($color);
|
||||
if (!$color) return false;
|
||||
|
||||
if ($color === '') return false;
|
||||
|
||||
$lower = strtolower($color);
|
||||
if (isset($colors[$lower])) return $colors[$lower];
|
||||
|
||||
if ($color[0] === '#') {
|
||||
// hexadecimal handling
|
||||
$hex = substr($color, 1);
|
||||
$length = strlen($hex);
|
||||
if ($length !== 3 && $length !== 6) return false;
|
||||
if (!ctype_xdigit($hex)) return false;
|
||||
} else {
|
||||
|
||||
if (strpos($color, 'rgb(') !== false) {
|
||||
// rgb literal handling
|
||||
if (strpos($color, 'rgb(')) return false;
|
||||
$length = strlen($color);
|
||||
if (strpos($color, ')') !== $length - 1) return false;
|
||||
$triad = substr($color, 4, $length - 4 - 1);
|
||||
@@ -90,11 +56,23 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
|
||||
}
|
||||
$new_triad = implode(',', $new_parts);
|
||||
$color = "rgb($new_triad)";
|
||||
} else {
|
||||
// hexadecimal handling
|
||||
if ($color[0] === '#') {
|
||||
$hex = substr($color, 1);
|
||||
} else {
|
||||
$hex = $color;
|
||||
$color = '#' . $color;
|
||||
}
|
||||
$length = strlen($hex);
|
||||
if ($length !== 3 && $length !== 6) return false;
|
||||
if (!ctype_xdigit($hex)) return false;
|
||||
}
|
||||
|
||||
|
||||
return $color;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
@@ -2,7 +2,7 @@
|
||||
|
||||
/**
|
||||
* Allows multiple validators to attempt to validate attribute.
|
||||
*
|
||||
*
|
||||
* Composite is just what it sounds like: a composite of many validators.
|
||||
* This means that multiple HTMLPurifier_AttrDef objects will have a whack
|
||||
* at the string. If one of them passes, that's what is returned. This is
|
||||
@@ -11,27 +11,28 @@
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
|
||||
/**
|
||||
* List of HTMLPurifier_AttrDef objects that may process strings
|
||||
* @protected
|
||||
* @todo Make protected
|
||||
*/
|
||||
var $defs;
|
||||
|
||||
public $defs;
|
||||
|
||||
/**
|
||||
* @param $defs List of HTMLPurifier_AttrDef objects
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_CSS_Composite($defs) {
|
||||
public function __construct($defs) {
|
||||
$this->defs = $defs;
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
public function validate($string, $config, $context) {
|
||||
foreach ($this->defs as $i => $def) {
|
||||
$result = $this->defs[$i]->validate($string, $config, $context);
|
||||
if ($result !== false) return $result;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
28
library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php
Normal file
28
library/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php
Normal file
@@ -0,0 +1,28 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Decorator which enables CSS properties to be disabled for specific elements.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef
|
||||
{
|
||||
public $def, $element;
|
||||
|
||||
/**
|
||||
* @param $def Definition to wrap
|
||||
* @param $element Element to deny
|
||||
*/
|
||||
public function __construct($def, $element) {
|
||||
$this->def = $def;
|
||||
$this->element = $element;
|
||||
}
|
||||
/**
|
||||
* Checks if CurrentToken is set and equal to $this->element
|
||||
*/
|
||||
public function validate($string, $config, $context) {
|
||||
$token = $context->get('CurrentToken', true);
|
||||
if ($token && $token->name == $this->element) return false;
|
||||
return $this->def->validate($string, $config, $context);
|
||||
}
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
54
library/HTMLPurifier/AttrDef/CSS/Filter.php
Normal file
54
library/HTMLPurifier/AttrDef/CSS/Filter.php
Normal file
@@ -0,0 +1,54 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Microsoft's proprietary filter: CSS property
|
||||
* @note Currently supports the alpha filter. In the future, this will
|
||||
* probably need an extensible framework
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Filter extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
protected $intValidator;
|
||||
|
||||
public function __construct() {
|
||||
$this->intValidator = new HTMLPurifier_AttrDef_Integer();
|
||||
}
|
||||
|
||||
public function validate($value, $config, $context) {
|
||||
$value = $this->parseCDATA($value);
|
||||
if ($value === 'none') return $value;
|
||||
// if we looped this we could support multiple filters
|
||||
$function_length = strcspn($value, '(');
|
||||
$function = trim(substr($value, 0, $function_length));
|
||||
if ($function !== 'alpha' &&
|
||||
$function !== 'Alpha' &&
|
||||
$function !== 'progid:DXImageTransform.Microsoft.Alpha'
|
||||
) return false;
|
||||
$cursor = $function_length + 1;
|
||||
$parameters_length = strcspn($value, ')', $cursor);
|
||||
$parameters = substr($value, $cursor, $parameters_length);
|
||||
$params = explode(',', $parameters);
|
||||
$ret_params = array();
|
||||
$lookup = array();
|
||||
foreach ($params as $param) {
|
||||
list($key, $value) = explode('=', $param);
|
||||
$key = trim($key);
|
||||
$value = trim($value);
|
||||
if (isset($lookup[$key])) continue;
|
||||
if ($key !== 'opacity') continue;
|
||||
$value = $this->intValidator->validate($value, $config, $context);
|
||||
if ($value === false) continue;
|
||||
$int = (int) $value;
|
||||
if ($int > 100) $value = '100';
|
||||
if ($int < 0) $value = '0';
|
||||
$ret_params[] = "$key=$value";
|
||||
$lookup[$key] = true;
|
||||
}
|
||||
$ret_parameters = implode(',', $ret_params);
|
||||
$ret_function = "$function($ret_parameters)";
|
||||
return $ret_function;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
@@ -1,24 +1,22 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
/**
|
||||
* Validates shorthand CSS property font.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
|
||||
/**
|
||||
* Local copy of component validators.
|
||||
*
|
||||
*
|
||||
* @note If we moved specific CSS property definitions to their own
|
||||
* classes instead of having them be assembled at run time by
|
||||
* CSSDefinition, this wouldn't be necessary. We'd instantiate
|
||||
* our own copies.
|
||||
*/
|
||||
var $info = array();
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_Font($config) {
|
||||
protected $info = array();
|
||||
|
||||
public function __construct($config) {
|
||||
$def = $config->getCSSDefinition();
|
||||
$this->info['font-style'] = $def->info['font-style'];
|
||||
$this->info['font-variant'] = $def->info['font-variant'];
|
||||
@@ -27,9 +25,9 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
|
||||
$this->info['line-height'] = $def->info['line-height'];
|
||||
$this->info['font-family'] = $def->info['font-family'];
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
|
||||
public function validate($string, $config, $context) {
|
||||
|
||||
static $system_fonts = array(
|
||||
'caption' => true,
|
||||
'icon' => true,
|
||||
@@ -38,27 +36,27 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
|
||||
'small-caption' => true,
|
||||
'status-bar' => true
|
||||
);
|
||||
|
||||
|
||||
// regular pre-processing
|
||||
$string = $this->parseCDATA($string);
|
||||
if ($string === '') return false;
|
||||
|
||||
|
||||
// check if it's one of the keywords
|
||||
$lowercase_string = strtolower($string);
|
||||
if (isset($system_fonts[$lowercase_string])) {
|
||||
return $lowercase_string;
|
||||
}
|
||||
|
||||
|
||||
$bits = explode(' ', $string); // bits to process
|
||||
$stage = 0; // this indicates what we're looking for
|
||||
$caught = array(); // which stage 0 properties have we caught?
|
||||
$stage_1 = array('font-style', 'font-variant', 'font-weight');
|
||||
$final = ''; // output
|
||||
|
||||
|
||||
for ($i = 0, $size = count($bits); $i < $size; $i++) {
|
||||
if ($bits[$i] === '') continue;
|
||||
switch ($stage) {
|
||||
|
||||
|
||||
// attempting to catch font-style, font-variant or font-weight
|
||||
case 0:
|
||||
foreach ($stage_1 as $validator_name) {
|
||||
@@ -74,7 +72,7 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
|
||||
// all three caught, continue on
|
||||
if (count($caught) >= 3) $stage = 1;
|
||||
if ($r !== false) break;
|
||||
|
||||
|
||||
// attempting to catch font-size and perhaps line-height
|
||||
case 1:
|
||||
$found_slash = false;
|
||||
@@ -128,7 +126,7 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
|
||||
|
||||
// attempting to catch font-family
|
||||
case 2:
|
||||
$font_family =
|
||||
@@ -145,6 +143,7 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
// vim: et sw=4 sts=4
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user