mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-03 12:47:56 +02:00
Compare commits
562 Commits
v1.0.0beta
...
v2.0.1
Author | SHA1 | Date | |
---|---|---|---|
|
45748500ec | ||
|
e99520ab96 | ||
|
1e2abb7f8f | ||
|
362c802191 | ||
|
3a1d505b3d | ||
|
a005da8a4c | ||
|
9a66394abb | ||
|
62c0575468 | ||
|
6a95d91a1a | ||
|
275932ec05 | ||
|
ae90bb919d | ||
|
3c734b4c72 | ||
|
3d02a2a7d4 | ||
|
0bfa42f9b7 | ||
|
7a8edc88f9 | ||
|
98b4e70a93 | ||
|
6f5592ae60 | ||
|
9f996b125a | ||
|
96b571d236 | ||
|
0e9904a9ba | ||
|
e66a98c396 | ||
|
728088f2ba | ||
|
8ae2604440 | ||
|
7b087c7bbe | ||
|
58064592ff | ||
|
b19fc32a5a | ||
|
b15cbbb42a | ||
|
5f0663cad7 | ||
|
75e52a12a6 | ||
|
269268b843 | ||
|
62c6d93b6d | ||
|
31704c92f6 | ||
|
291fa4cb29 | ||
|
389fcc9a5d | ||
|
e5191b3ada | ||
|
5d0a992579 | ||
|
ae83bebc98 | ||
|
9191877740 | ||
|
3066ca357a | ||
|
53fd096641 | ||
|
2166246b7e | ||
|
49bb6ec35d | ||
|
401612dc3a | ||
|
dc0fb7d2b4 | ||
|
eee45fed37 | ||
|
03657ad51a | ||
|
dda4038446 | ||
|
996ccdbdda | ||
|
008348db21 | ||
|
b10a380ff4 | ||
|
bf0d659c47 | ||
|
e55551ecdd | ||
|
e9f3fef47b | ||
|
840f9f7434 | ||
|
10c970760d | ||
|
69996acc9e | ||
|
8bbb73e47d | ||
|
cf7a50163c | ||
|
da2ea348fd | ||
|
ab3ebcba6d | ||
|
d399abba50 | ||
|
0b0a505c30 | ||
|
6aa3dfc116 | ||
|
c3094275ef | ||
|
220c150e0a | ||
|
32d30a9181 | ||
|
0e5491b20c | ||
|
7699efd593 | ||
|
4bf15de536 | ||
|
70bcccf54c | ||
|
bf6ce67fc1 | ||
|
bd44105ca9 | ||
|
d1f43636e5 | ||
|
9c7483166c | ||
|
e840564228 | ||
|
7d4b532d6b | ||
|
58f00105c8 | ||
|
8d15d1ce13 | ||
|
9c60eeed04 | ||
|
2e089477a5 | ||
|
b442d09ea6 | ||
|
12f73605a3 | ||
|
e2a951420f | ||
|
002395de09 | ||
|
d1187ed331 | ||
|
426fbd1f97 | ||
|
9c5f01a0cf | ||
|
f985d3cd96 | ||
|
0cb1d85822 | ||
|
073ddb0cb2 | ||
|
889ccb1a92 | ||
|
aec84dc3f6 | ||
|
dea62ffdab | ||
|
8913239b7f | ||
|
e06929c218 | ||
|
aaf4839c34 | ||
|
c113f43440 | ||
|
bd8ecdd268 | ||
|
ef51f8681a | ||
|
ee61ffc0d9 | ||
|
f758f7c534 | ||
|
95499e34da | ||
|
de23201cbb | ||
|
21ab12a6a8 | ||
|
69666e977f | ||
|
fa05319e30 | ||
|
ea46d79b0a | ||
|
a62f8971e4 | ||
|
7a3e06d4d0 | ||
|
e180b7689e | ||
|
7579932948 | ||
|
818d0d7a23 | ||
|
797d3e0393 | ||
|
ff7eec7424 | ||
|
0ea04db559 | ||
|
831db14c79 | ||
|
a470fc5621 | ||
|
2945f6a930 | ||
|
71326abec1 | ||
|
23ef535043 | ||
|
fda2043ace | ||
|
3f06d8316c | ||
|
e4b621eec2 | ||
|
9728be4a52 | ||
|
f1ec05afd0 | ||
|
7481d349d3 | ||
|
086dc9177b | ||
|
4d38c02932 | ||
|
83a50465dc | ||
|
dd62a303eb | ||
|
e4e981b6f1 | ||
|
a846f4e70b | ||
|
a5136b65e4 | ||
|
2d035483dd | ||
|
831a09d455 | ||
|
2cbb3be602 | ||
|
f7eccc0038 | ||
|
65252d6fbd | ||
|
6b9c5ec603 | ||
|
e7b15068c2 | ||
|
53c19552d2 | ||
|
048242004e | ||
|
05e1aca2fa | ||
|
23feb457f2 | ||
|
8f6380d63a | ||
|
3b1c40b2fc | ||
|
da92cb9ff4 | ||
|
bda9167423 | ||
|
cb9c96a2b0 | ||
|
e0cf214c44 | ||
|
ed73fdd5b8 | ||
|
eaea42f827 | ||
|
7f39e1e2c3 | ||
|
b81fb0af90 | ||
|
47fe34ad81 | ||
|
ac50d333a5 | ||
|
ce013e2962 | ||
|
67fab710bf | ||
|
b3a599e8c2 | ||
|
f4e4c1556d | ||
|
c5e33416d3 | ||
|
6c08ca4c16 | ||
|
b1822bb04f | ||
|
893e962890 | ||
|
bd6071cb3b | ||
|
92ea74cba2 | ||
|
a01459c87a | ||
|
fd35c43643 | ||
|
0426985c81 | ||
|
bbea02f55c | ||
|
4e77a1adbd | ||
|
bd58a7ba77 | ||
|
a3ed9196b9 | ||
|
2646f5ea57 | ||
|
424c7ad2e3 | ||
|
234b3085d7 | ||
|
3d978c961d | ||
|
72254cd77a | ||
|
d8a6361244 | ||
|
968dfa2feb | ||
|
114d6841ab | ||
|
1c68d769b5 | ||
|
ac0ca3f15c | ||
|
2d5498b8aa | ||
|
71ccae1a3a | ||
|
cb186dddc4 | ||
|
2ceccc0969 | ||
|
93aa98ad01 | ||
|
c0b38bab85 | ||
|
d6c4473a12 | ||
|
fc06f221d5 | ||
|
ac3ab2a556 | ||
|
2c330cac73 | ||
|
a0d6543b84 | ||
|
e223490a78 | ||
|
2666f067cc | ||
|
826a57a04a | ||
|
e08b5aaa70 | ||
|
b15e8c344e | ||
|
2c9e041b4c | ||
|
e2c3394d70 | ||
|
1532fe703a | ||
|
058f1eba7d | ||
|
1102dc6e27 | ||
|
85374d330f | ||
|
a16d6c4342 | ||
|
9b5e2978ad | ||
|
06468a4157 | ||
|
0167f8aa84 | ||
|
f1a90e684b | ||
|
14d98413fd | ||
|
97a4ec7598 | ||
|
71ed725c5c | ||
|
d4bf41288a | ||
|
365bd78c20 | ||
|
52fa958fb2 | ||
|
17d32bac7f | ||
|
e2babe5308 | ||
|
5f1a6b883f | ||
|
c5e3796202 | ||
|
72f1984229 | ||
|
918081b372 | ||
|
6c56dd070f | ||
|
299f93f8f0 | ||
|
4169846c57 | ||
|
aff4957531 | ||
|
e4bdf472a6 | ||
|
9a99750474 | ||
|
7eb751b5f5 | ||
|
0d0173eb6e | ||
|
556ed4ea90 | ||
|
cf445a6107 | ||
|
243ad45e59 | ||
|
31d0c621f5 | ||
|
0870974a25 | ||
|
5c4a0a6785 | ||
|
e55babdc53 | ||
|
6e1b540d99 | ||
|
edf20018f0 | ||
|
c09432e171 | ||
|
9c031b5c1e | ||
|
a827cbc3ba | ||
|
c05eebee15 | ||
|
93a69d020a | ||
|
f3fa9c01ba | ||
|
bae5b0c022 | ||
|
67befbc8a8 | ||
|
cac22f01cf | ||
|
94d2dbaa74 | ||
|
6add828bc8 | ||
|
800b67ed65 | ||
|
71e4ddd222 | ||
|
54a68a1713 | ||
|
bd544ad038 | ||
|
d5491da77f | ||
|
591fc0ae28 | ||
|
dac7ac1eae | ||
|
64ee756b7a | ||
|
e2103ce0f2 | ||
|
219902ebff | ||
|
21116373a7 | ||
|
5ed88809f3 | ||
|
bb8b38b1e0 | ||
|
236159242f | ||
|
9d8f839bf2 | ||
|
882148f9ad | ||
|
a863f62489 | ||
|
6478c7c2df | ||
|
129a4ea506 | ||
|
a122243a89 | ||
|
315c55eeb1 | ||
|
cfe50ff8ae | ||
|
d0018a2696 | ||
|
77d9e05a07 | ||
|
80243f377c | ||
|
43b157cf4d | ||
|
f6b50d4bfd | ||
|
806901cfd2 | ||
|
f90eef7f1f | ||
|
06867e14b6 | ||
|
bda2615b30 | ||
|
e1a5d10e75 | ||
|
98fd6b7d82 | ||
|
be264a4b20 | ||
|
01c85b71d2 | ||
|
2d22c0aa55 | ||
|
6e061f5184 | ||
|
44b988f1f6 | ||
|
0ead9558b4 | ||
|
159a1cced1 | ||
|
6871a54d64 | ||
|
96ac7e8797 | ||
|
2d49299621 | ||
|
ab5c782c77 | ||
|
8893b87e04 | ||
|
aeef746060 | ||
|
da13c6ac87 | ||
|
ccae73c25a | ||
|
8d6bfa4037 | ||
|
712d81ebea | ||
|
f7f6fed86a | ||
|
2293c67eec | ||
|
108df87824 | ||
|
5e366b25f8 | ||
|
2e16c4a968 | ||
|
a8db22dfff | ||
|
fbe2c25f8a | ||
|
158be61def | ||
|
d693c4ea09 | ||
|
c24916e1d6 | ||
|
a68b6afda1 | ||
|
78cf7db82e | ||
|
9b375fdfb8 | ||
|
0dd866cc15 | ||
|
ad1169c711 | ||
|
2816ae535f | ||
|
462d3ab72f | ||
|
cf1d868782 | ||
|
c705e17a58 | ||
|
1cce367950 | ||
|
61f852d429 | ||
|
3a73c2cf04 | ||
|
e75b676656 | ||
|
b53370efbf | ||
|
d60f345cab | ||
|
aefda60696 | ||
|
2ffa5d3135 | ||
|
23d3490d49 | ||
|
582ffc4143 | ||
|
d52189a19d | ||
|
02006d6e64 | ||
|
dcaa374dae | ||
|
e2cc37724b | ||
|
3ad6239dc3 | ||
|
663fb4e1b2 | ||
|
a909632d2d | ||
|
18f7c85ebe | ||
|
4c54283642 | ||
|
688b1833f5 | ||
|
ceb1b9ccdb | ||
|
52fb35b0bb | ||
|
b6e222cbc2 | ||
|
dcfd8f5641 | ||
|
48da08ab78 | ||
|
360f984f63 | ||
|
41a25cb6b8 | ||
|
a0fd6a9f5c | ||
|
66e1d2732a | ||
|
b73b5100fd | ||
|
d886ed59fd | ||
|
cbb492c52c | ||
|
4f8f022eac | ||
|
301b2585ae | ||
|
8e733a52fb | ||
|
2a01cf786e | ||
|
825b0671b5 | ||
|
4bdc0446de | ||
|
45a70e8ae4 | ||
|
1fe60c9b9d | ||
|
dc0e2c6b3e | ||
|
9bbbb87ffa | ||
|
b63b0be21f | ||
|
73a1e31fad | ||
|
775763c583 | ||
|
49cb2a4a7c | ||
|
61b6ee7183 | ||
|
d7ce6b4587 | ||
|
f67ee19f31 | ||
|
92b3f0e817 | ||
|
3c4da9666f | ||
|
925a07b828 | ||
|
94db380271 | ||
|
b9e7ba6a2f | ||
|
b1b3377b9c | ||
|
d8673539ab | ||
|
3b26e5dc5b | ||
|
c5ea987069 | ||
|
b152448608 | ||
|
b0575cb888 | ||
|
224ef774f7 | ||
|
18a83acc5d | ||
|
f9090e45c0 | ||
|
450523a9ca | ||
|
1955527a11 | ||
|
a5751c7f20 | ||
|
0960cf6ace | ||
|
83ed9e0fe1 | ||
|
fe9238af3a | ||
|
f0fe829af4 | ||
|
a3968a1ec7 | ||
|
a8298172e1 | ||
|
90dd7f13ae | ||
|
780c7fd309 | ||
|
dec6c52695 | ||
|
1ea3c1e968 | ||
|
bdab77b59e | ||
|
82afd890c4 | ||
|
b0df2f292f | ||
|
7a4c7b3777 | ||
|
2dc8e9c3d5 | ||
|
d48f9b6b21 | ||
|
2df5896324 | ||
|
f38fe431ed | ||
|
926b94bdd3 | ||
|
ad934540da | ||
|
afee1ea9bf | ||
|
a6bbe60e7c | ||
|
d2fd193bc4 | ||
|
e1b29d7c25 | ||
|
9668ac1e38 | ||
|
eb6950d7d0 | ||
|
4a724d0230 | ||
|
504203c0f3 | ||
|
e998b034d1 | ||
|
84e3a28001 | ||
|
4ee1bf94e3 | ||
|
24f2771304 | ||
|
74ba9b8629 | ||
|
b9caa35bf4 | ||
|
6ff78d2f79 | ||
|
8256ca4376 | ||
|
7d2fe4c5d7 | ||
|
f3646a3a06 | ||
|
29716bf8f4 | ||
|
fb38b02135 | ||
|
13790c6db2 | ||
|
2d6bf12fe0 | ||
|
8f515b9cda | ||
|
58be73fcf7 | ||
|
f432a40f50 | ||
|
d660b9018b | ||
|
4d96433c23 | ||
|
a78f0f5f80 | ||
|
d941d30cfa | ||
|
9af9c505e1 | ||
|
7e6a3fc990 | ||
|
c7e798080c | ||
|
32c5b5080b | ||
|
cbdd48811d | ||
|
37def0104b | ||
|
d9bb97cc26 | ||
|
8bff97ec08 | ||
|
fab2b363d0 | ||
|
8e1cfb362d | ||
|
1fa5101511 | ||
|
24663d65ed | ||
|
6adbaf0e5c | ||
|
81cd9b1ee8 | ||
|
f5ff8acbb0 | ||
|
ad8310c1f5 | ||
|
4b5198c5bc | ||
|
a251ec590f | ||
|
2bfdfaa02c | ||
|
4abf83af62 | ||
|
1ad55e0ed5 | ||
|
6c04bbdac1 | ||
|
c046da638a | ||
|
801dbcafb7 | ||
|
4f8d83506d | ||
|
00fce29467 | ||
|
686824262e | ||
|
b93892a3b6 | ||
|
7a6de55f76 | ||
|
d7642b8c70 | ||
|
3b30c2ca5b | ||
|
f43616f72d | ||
|
6740ba61af | ||
|
6a33945499 | ||
|
4660791682 | ||
|
b5c69d8ca5 | ||
|
e440f25bce | ||
|
665e80d223 | ||
|
69747ede8a | ||
|
49b3832ebf | ||
|
a365d4c688 | ||
|
7038fad788 | ||
|
50b272d75e | ||
|
bfb642d32c | ||
|
edb39601c7 | ||
|
694139d3bb | ||
|
81721ded5c | ||
|
371fb7c3d2 | ||
|
9e6953e619 | ||
|
2299f0c831 | ||
|
9dd4dcb27a | ||
|
aa0838492e | ||
|
df075c96e0 | ||
|
fbaa909d25 | ||
|
967f40fc11 | ||
|
5ee6ffe20f | ||
|
10d41d7130 | ||
|
65a628bcb7 | ||
|
a5b4ed2126 | ||
|
d20bbd8db3 | ||
|
b99573223d | ||
|
c6cfb68713 | ||
|
2259bfa40e | ||
|
de3b2b70fb | ||
|
4f0a5c0e22 | ||
|
fdd583253c | ||
|
a4be6ffe4d | ||
|
6de42d8d1d | ||
|
e9a519e589 | ||
|
709a17a504 | ||
|
47a6c9eb75 | ||
|
eddf474351 | ||
|
0e715bdda6 | ||
|
478fab1ad1 | ||
|
f4f636a09c | ||
|
b621602ac1 | ||
|
14aeafcf22 | ||
|
90279eaee2 | ||
|
0ac97774d4 | ||
|
89376a11e3 | ||
|
1de3088276 | ||
|
55503744ee | ||
|
670d298a87 | ||
|
784e7356d1 | ||
|
fbb0c486ec | ||
|
a1b60ad70f | ||
|
3e8b1d1148 | ||
|
dd1b911183 | ||
|
4e08389427 | ||
|
dc19ac9a2a | ||
|
15988980db | ||
|
7588068b7b | ||
|
24cde9c891 | ||
|
0d4ee2ba37 | ||
|
78414abafd | ||
|
692a9abc0f | ||
|
ffe39d7f30 | ||
|
5169fc7a3b | ||
|
80e79d906a | ||
|
a43a2730bc | ||
|
ca1453401f | ||
|
dcec92e7b3 | ||
|
fb0003a608 | ||
|
1b8f4fa5bc | ||
|
f46b15cb82 | ||
|
6af60425b8 | ||
|
f8839d56a0 | ||
|
fb08b9c89b | ||
|
f7760c8cb6 | ||
|
7813e79bda | ||
|
314a48373c | ||
|
ca0914789c | ||
|
2605257723 | ||
|
679302b161 | ||
|
37cbdc25b1 | ||
|
973cc43b64 | ||
|
53808ee34a | ||
|
2eef708557 | ||
|
42ba96e2de | ||
|
a33cd12f1a | ||
|
c393ef8a81 | ||
|
ad83ab430d | ||
|
4c52e42189 | ||
|
50d5179dbd | ||
|
af0de616ae | ||
|
66ddc4cc5a | ||
|
252c5afae0 | ||
|
04c0953af0 |
6
CREDITS
6
CREDITS
@@ -2,6 +2,6 @@
|
||||
CREDITS
|
||||
|
||||
Almost everything written by Edward Z. Yang (Ambush Commander). Lots of thanks
|
||||
to the DevNetwork Community for their help (see docs/devnetwork.html for more
|
||||
details), Feyd especially (namely IPv6 and optimization). Thanks to RSnake for
|
||||
letting me package his fantastic XSS cheatsheet for a smoketest.
|
||||
to the DevNetwork Community for their help (see docs/ref-devnetwork.html for
|
||||
more details), Feyd especially (namely IPv6 and optimization). Thanks to RSnake
|
||||
for letting me package his fantastic XSS cheatsheet for a smoketest.
|
||||
|
12
Doxyfile
12
Doxyfile
@@ -3,8 +3,8 @@
|
||||
#---------------------------------------------------------------------------
|
||||
# Project related configuration options
|
||||
#---------------------------------------------------------------------------
|
||||
PROJECT_NAME = HTMLPurifier
|
||||
PROJECT_NUMBER = trunk
|
||||
PROJECT_NAME = HTML Purifier
|
||||
PROJECT_NUMBER = 2.0.1
|
||||
OUTPUT_DIRECTORY = "C:/Documents and Settings/Edward/My Documents/My Webs/htmlpurifier/docs/doxygen"
|
||||
CREATE_SUBDIRS = NO
|
||||
OUTPUT_LANGUAGE = English
|
||||
@@ -88,7 +88,13 @@ RECURSIVE = YES
|
||||
EXCLUDE =
|
||||
EXCLUDE_SYMLINKS = NO
|
||||
EXCLUDE_PATTERNS = */tests/* \
|
||||
*/benchmarks/*
|
||||
*/benchmarks/* \
|
||||
*/docs/* \
|
||||
*/test-settings.php \
|
||||
*/configdoc/* \
|
||||
*/test-settings.php \
|
||||
*/maintenance/* \
|
||||
*/smoketests/*
|
||||
EXAMPLE_PATH =
|
||||
EXAMPLE_PATTERNS = *
|
||||
EXAMPLE_RECURSIVE = NO
|
||||
|
241
INSTALL
241
INSTALL
@@ -1,61 +1,230 @@
|
||||
|
||||
Install
|
||||
How to install HTMLPurifier
|
||||
How to install HTML Purifier
|
||||
|
||||
Being a library, there's no fancy GUI that will take you step-by-step through
|
||||
configuring database credentials and other mumbo-jumbo. HTMLPurifier is
|
||||
designed to run "out of the box." Regardless, there are still a couple of
|
||||
things you should be mindful of.
|
||||
HTML Purifier is designed to run out of the box, so actually using the library
|
||||
is extremely easy. (Although, if you were looking for a step-by-step
|
||||
installation GUI, you've come to the wrong place!) The impatient can scroll
|
||||
down to the bottom of this INSTALL document to see the code, but you really
|
||||
should make sure a few things are properly done.
|
||||
|
||||
|
||||
1. Compatibility
|
||||
|
||||
HTML Purifier works in both PHP 4 and PHP 5, from PHP 4.3.2 and up. It has no
|
||||
core dependencies with other libraries.
|
||||
|
||||
Optional extensions are iconv (usually installed) and tidy (also common).
|
||||
If you use UTF-8 and don't plan on pretty-printing HTML, you can get away with
|
||||
not having either of these extensions.
|
||||
|
||||
|
||||
|
||||
1. Including the proper files
|
||||
2. Including the library
|
||||
|
||||
The library/ directory must be added to your path: HTMLPurifier will not be
|
||||
able to find the necessary includes otherwise. This is as simple as:
|
||||
Simply use:
|
||||
|
||||
set_include_path('/path/to/htmlpurifier/library' . PATH_SEPARATOR . get_include_path());
|
||||
require_once '/path/to/library/HTMLPurifier.auto.php';
|
||||
|
||||
...replacing /path/to/htmlpurifier with the actual location of the folder. Don't
|
||||
worry, HTMLPurifier is namespaced so unless you have another file named
|
||||
HTMLPurifier.php, the files won't collide with any of your includes.
|
||||
...and you're good to go. Since HTML Purifier's codebase is fairly
|
||||
large, I recommend only including HTML Purifier when you need it.
|
||||
|
||||
Then, it's a simple matter of including the base file:
|
||||
If you don't like your include_path to be fiddled around with, simply set
|
||||
HTML Purifier's library/ directory to the include path yourself and then:
|
||||
|
||||
require_once 'HTMLPurifier.php';
|
||||
require_once 'HTMLPurifier.php';
|
||||
|
||||
...and you're good to go.
|
||||
Only the contents in the library/ folder are necessary, so you can remove
|
||||
everything else when using HTML Purifier in a production environment.
|
||||
|
||||
|
||||
|
||||
2. Preparing the proper environment
|
||||
3. Preparing the proper output environment
|
||||
|
||||
While no configuration is necessary, you first should take precautions regarding
|
||||
the other output HTML that the filtered content will be going along with. Here
|
||||
is a (short) checklist:
|
||||
HTML Purifier is all about web-standards, so accordingly your webpages should
|
||||
be standards compliant. HTML Purifier can deal with these doctypes:
|
||||
|
||||
* Have I specified XHTML 1.0 Transitional as the doctype?
|
||||
* Have I specified UTF-8 as the character encoding?
|
||||
* XHTML 1.0 Transitional (default)
|
||||
* XHTML 1.0 Strict
|
||||
* HTML 4.01 Transitional
|
||||
* HTML 4.01 Strict
|
||||
* XHTML 1.1 (sans Ruby)
|
||||
|
||||
I cannot stress the importance of these two bullets enough. Omitting either
|
||||
of them could have dire consequences not only for security but for plain
|
||||
old usability. You can find a more in-depth discussion of why this is needed
|
||||
in docs/security.txt, in the meantime, try to change your output so this is
|
||||
the case.
|
||||
...and these character encodings:
|
||||
|
||||
If, for some reason, you are unable to switch to UTF-8 immediately, you can
|
||||
use iconv to convert the output of HTMLPurifier to your desired encoding.
|
||||
We may integrate support for other encodings in later releases, but for now,
|
||||
UTF-8 is all you should need. (If you're not using UTF-8, switch now!)
|
||||
* UTF-8 (default)
|
||||
* Any encoding iconv supports (support is crippled for i18n though)
|
||||
|
||||
The defaults are there for a reason: they are best-practice choices that
|
||||
should not be changed lightly. For those of you in the dark, you can determine
|
||||
the doctype from this code in your HTML documents:
|
||||
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
|
||||
...and the character encoding from this code:
|
||||
|
||||
<meta http-equiv="Content-type" content="text/html;charset=ENCODING">
|
||||
|
||||
For legacy codebases these declarations may be missing. If that is the case,
|
||||
STOP, and read docs/enduser-utf8.html
|
||||
|
||||
You may currently be vulnerable to XSS and other security threats, and HTML
|
||||
Purifier won't be able to fix that.
|
||||
|
||||
|
||||
|
||||
3. Using the code
|
||||
4. Configuration
|
||||
|
||||
The interface is mind-numbingly simple.
|
||||
HTML Purifier is designed to run out-of-the-box, but occasionally HTML
|
||||
Purifier needs to be told what to do. If you answered no to any of these
|
||||
questions, read on, otherwise, you can skip to the next section (or, if you're
|
||||
into configuring things just for the heck of it, skip to 4.3).
|
||||
|
||||
$purifier = new HTMLPurifier();
|
||||
$clean_html = $purifier->purify($dirty_html);
|
||||
* Am I using UTF-8?
|
||||
* Am I using XHTML 1.0 Transitional?
|
||||
|
||||
If you answered no to any of these questions, instantiate a configuration
|
||||
object and read on:
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
|
||||
|
||||
|
||||
4.1. Setting a different character encoding
|
||||
|
||||
You really shouldn't use any other encoding except UTF-8, especially if you
|
||||
plan to support multilingual websites (read section three for more details).
|
||||
However, switching to UTF-8 is not always immediately feasible, so we can
|
||||
adapt.
|
||||
|
||||
HTML Purifier uses iconv to support other character encodings, as such,
|
||||
any encoding that iconv supports <http://www.gnu.org/software/libiconv/>
|
||||
HTML Purifier supports with this code:
|
||||
|
||||
$config->set('Core', 'Encoding', /* put your encoding here */);
|
||||
|
||||
An example usage for Latin-1 websites (the most common encoding for English
|
||||
websites):
|
||||
|
||||
$config->set('Core', 'Encoding', 'ISO-8859-1');
|
||||
|
||||
Note that HTML Purifier's support for non-Unicode encodings is crippled by the
|
||||
fact that any character not supported by that encoding will be silently
|
||||
dropped, EVEN if it is ampersand escaped. If you want to work around
|
||||
this, you are welcome to read docs/enduser-utf8.html for a fix,
|
||||
but please be cognizant of the issues the "solution" creates (for this
|
||||
reason, I do not include the solution in this document).
|
||||
|
||||
|
||||
|
||||
4.2. Setting a different doctype
|
||||
|
||||
For those of you using HTML 4.01 Transitional, you can disable
|
||||
XHTML output like this:
|
||||
|
||||
$config->set('HTML', 'Doctype', 'HTML 4.01 Transitional');
|
||||
|
||||
Other supported doctypes include:
|
||||
|
||||
* HTML 4.01 Strict
|
||||
* HTML 4.01 Transitional
|
||||
* XHTML 1.0 Strict
|
||||
* XHTML 1.0 Transitional
|
||||
* XHTML 1.1
|
||||
|
||||
|
||||
|
||||
4.3. Other settings
|
||||
|
||||
There are more configuration directives which can be read about
|
||||
here: <http://htmlpurifier.org/live/configdoc/plain.html> They're a bit boring,
|
||||
but they can help out for those of you who like to exert maximum control over
|
||||
your code. Some of the more interesting ones are configurable at the
|
||||
demo <http://htmlpurifier.org/demo.php> and are well worth looking into
|
||||
for your own system.
|
||||
|
||||
|
||||
|
||||
5. Using the code
|
||||
|
||||
The interface is mind-numbingly simple:
|
||||
|
||||
$purifier = new HTMLPurifier();
|
||||
$clean_html = $purifier->purify( $dirty_html );
|
||||
|
||||
...or, if you're using the configuration object:
|
||||
|
||||
$purifier = new HTMLPurifier($config);
|
||||
$clean_html = $purifier->purify( $dirty_html );
|
||||
|
||||
That's it! For more examples, check out docs/examples/ (they aren't very
|
||||
different though). Also, docs/enduser-slow.html gives advice on what to
|
||||
do if HTML Purifier is slowing down your application.
|
||||
|
||||
|
||||
|
||||
6. Quick install
|
||||
|
||||
First, make sure library/HTMLPurifier/DefinitionCache/Serializer is
|
||||
writable by the webserver (see Section 7: Caching below for details).
|
||||
If your website is in UTF-8 and XHTML Transitional, use this code:
|
||||
|
||||
<?php
|
||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||
|
||||
$purifier = new HTMLPurifier();
|
||||
$clean_html = $purifier->purify($dirty_html);
|
||||
?>
|
||||
|
||||
If your website is in a different encoding or doctype, use this code:
|
||||
|
||||
<?php
|
||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Core', 'Encoding', 'ISO-8859-1'); // replace with your encoding
|
||||
$config->set('HTML', 'Doctype', 'HTML 4.01 Transitional'); // replace with your doctype
|
||||
$purifier = new HTMLPurifier($config);
|
||||
|
||||
$clean_html = $purifier->purify($dirty_html);
|
||||
?>
|
||||
|
||||
|
||||
|
||||
7. Caching
|
||||
|
||||
HTML Purifier generates some cache files (generally one or two) to speed up
|
||||
its execution. For maximum performance, make sure that
|
||||
library/HTMLPurifier/DefinitionCache/Serializer is writeable by the webserver.
|
||||
|
||||
If you are in the library/ folder of HTML Purifier, you can set the
|
||||
appropriate permissions using:
|
||||
|
||||
chmod -R 0755 HTMLPurifier/DefinitionCache/Serializer
|
||||
|
||||
If the above command doesn't work, you may need to assign write permissions
|
||||
to all. This may be necessary if your webserver runs as nobody, but is
|
||||
not recommended since it means any other user can write files in the
|
||||
directory. Use:
|
||||
|
||||
chmod -R 0777 HTMLPurifier/DefinitionCache/Serializer
|
||||
|
||||
You can also chmod files via your FTP client; this option
|
||||
is usually accessible by right clicking the corresponding directory and
|
||||
then selecting "chmod" or "file permissions".
|
||||
|
||||
Starting with 2.0.1, HTML Purifier will generate friendly error messages
|
||||
that will tell you exactly what you have to chmod the directory to, if in doubt,
|
||||
follow its advice.
|
||||
|
||||
If you are unable or unwilling to give write permissions to the cache
|
||||
directory, you can either disable the cache (and suffer a performance
|
||||
hit):
|
||||
|
||||
$config->set('Core', 'DefinitionCache', null);
|
||||
|
||||
Or move the cache directory somewhere else (no trailing slash):
|
||||
|
||||
$config->set('Cache', 'SerializerPath', '/home/user/absolute/path');
|
||||
|
||||
That's it. For more examples, check out docs/examples/. Also, SLOW gives
|
||||
advice on what to do if HTMLPurifier is slowing down your application.
|
||||
|
71
INSTALL.fr.utf8
Normal file
71
INSTALL.fr.utf8
Normal file
@@ -0,0 +1,71 @@
|
||||
|
||||
Installation
|
||||
Comment installer HTML Purifier
|
||||
|
||||
Attention: Ce document a encode en UTF-8. Si les lettres avec les accents
|
||||
est essoreuse, prenez un mieux editeur de texte.
|
||||
|
||||
À L'Aide: Je ne suis pas un diseur natif de français. Si vous trouvez une
|
||||
erreur dans ce document, racontez-moi! Merci.
|
||||
|
||||
|
||||
L'installation de HTML Purifier est trés simple, parce qu'il ne doit pas
|
||||
la configuration. Dans le pied de de document, les utilisateurs
|
||||
impatient peuvent trouver le code, mais je recommande que vous lisez
|
||||
ce document pour quelques choses.
|
||||
|
||||
|
||||
1. Compatibilité
|
||||
|
||||
HTML Purifier fonctionne dans PHP 4 et PHP 5. PHP 4.3.2 est le dernier
|
||||
version que je le testais. Il ne dépend de les autre librairies.
|
||||
|
||||
Les extensions optionnel est iconv (en général déjà installer) et
|
||||
tidy (répandu aussi). Si vous utilisez UTF-8 et ne voulez pas
|
||||
l'indentation, vous pouvez utiliser HTML Purifier sans ces extensions.
|
||||
|
||||
|
||||
2. Inclure la librarie
|
||||
|
||||
Utilisez:
|
||||
|
||||
require_once '/path/to/library/HTMLPurifier.auto.php';
|
||||
|
||||
...quand vous devez utiliser HTML Purifier (ne inclure pas quand vous
|
||||
ne devez pas, parce que HTML Purifier est trés grand.)
|
||||
|
||||
Si vous n'aime pas que HTML Purifier change vos include_path, on peut
|
||||
change vos include_path, et:
|
||||
|
||||
require_once 'HTMLPurifier.php';
|
||||
|
||||
Seuleument les contents dans library/ est essentiel; vous peut enlever
|
||||
les autre fichiers quand vous est dans une atmosphère professionnel.
|
||||
|
||||
|
||||
[En cours de construction]
|
||||
|
||||
|
||||
6. Installation vite
|
||||
|
||||
Si votre site web est en UTF-8 et XHTML Transitional, utilisez:
|
||||
|
||||
<?php
|
||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||
|
||||
$purificateur = new HTMLPurifier();
|
||||
$html_propre = $purificateur->purify($html_salle);
|
||||
?>
|
||||
|
||||
Sinon, utilisez:
|
||||
|
||||
<?php
|
||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Core', 'Encoding', 'ISO-8859-1'); //remplacez avec votre encoding
|
||||
$config->set('Core', 'XHTML', true); //remplacez avec false si HTML 4.01
|
||||
$purificateur = new HTMLPurifier($config);
|
||||
|
||||
$html_propre = $purificateur->purify($html_salle);
|
||||
?>
|
361
NEWS
361
NEWS
@@ -1,9 +1,360 @@
|
||||
NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||
|
||||
= KEY ====================
|
||||
# Breaks back-compat
|
||||
! Feature
|
||||
- Bugfix
|
||||
+ Sub-comment
|
||||
. Internal change
|
||||
==========================
|
||||
|
||||
2.0.1, released 2007-06-27
|
||||
! Tag auto-closing now based on a ChildDef heuristic rather than a
|
||||
manually set auto_close array; some behavior may change
|
||||
! Experimental AutoFormat functionality added: auto-paragraph and
|
||||
linkify your HTML input by setting %AutoFormat.AutoParagraph and
|
||||
%AutoFormat.Linkify to true
|
||||
! Newlines normalized internally, and then converted back to the
|
||||
value of PHP_EOL. If this is not desired, set your newline format
|
||||
using %Output.Newline.
|
||||
! Beta error collection, messages are implemented for the most generic
|
||||
cases involving Lexing or Strategies
|
||||
- Clean up special case code for <script> tags
|
||||
- Reorder includes for DefinitionCache decorators, fixes a possible
|
||||
missing class error
|
||||
- Fixed bug where manually modified definitions were not saved via cache
|
||||
(mostly harmless, except for the fact that it would be a little slower)
|
||||
- Configuration objects with different serials do not clobber each
|
||||
others when revision numbers are unequal
|
||||
- Improve Serializer DefinitionCache directory permissions checks
|
||||
- DefinitionCache no longer throws errors when it encounters old
|
||||
serial files that do not conform to the current style
|
||||
- Stray xmlns attributes removed from configuration documentation
|
||||
- configForm.php smoketest no longer has XSS vulnerability due to
|
||||
unescaped print_r output
|
||||
- Printer adheres to configuration's directives on output format
|
||||
- Fix improperly named form field in ConfigForm printer
|
||||
. Rewire some test-cases to swallow errors rather than expect them
|
||||
. HTMLDefinition printer updated with some of the new attributes
|
||||
. DefinitionCache keys reordered to reflect precedence: version number,
|
||||
hash, then revision number
|
||||
. %Core.DefinitionCache renamed to %Cache.DefinitionImpl
|
||||
. Interlinking in configuration documentation added using
|
||||
Injector_PurifierLinkify
|
||||
. Directives now keep track of aliases to themselves
|
||||
. Error collector now requires a severity to be passed, use PHP's internal
|
||||
error constants for this
|
||||
. HTMLPurifier_Config::getAllowedDirectivesForForm implemented, allows
|
||||
much easier selective embedding of configuration values
|
||||
. Doctype objects now accept public and system DTD identifiers
|
||||
. %HTML.Doctype is now constrained by specific values, to specify a custom
|
||||
doctype use new %HTML.CustomDoctype
|
||||
. ConfigForm truncates long directives to keep the form small, and does
|
||||
not re-output namespaces
|
||||
|
||||
2.0.0, released 2007-06-20
|
||||
# Completely refactored HTMLModuleManager, decentralizing safety
|
||||
information
|
||||
# Transform modules changed to Tidy modules, which offer more flexibility
|
||||
and better modularization
|
||||
# Configuration object now finalizes itself when a read operation is
|
||||
performed on it, ensuring that its internal state stays consistent.
|
||||
To revert this behavior, you can set the $autoFinalize member variable
|
||||
off, but it's not recommended.
|
||||
# New compact syntax for AttrDef objects that can be used to instantiate
|
||||
new objects via make()
|
||||
# Definitions (esp. HTMLDefinition) are now cached for a significant
|
||||
performance boost. You can disable caching by setting %Core.DefinitionCache
|
||||
to null. You CANNOT edit raw definitions without setting the corresponding
|
||||
DefinitionID directive (%HTML.DefinitionID for HTMLDefinition).
|
||||
# Contents between <script> tags are now completely removed if <script>
|
||||
is not allowed
|
||||
# Prototype-declarations for Lexer removed in favor of configuration
|
||||
determination of Lexer implementations.
|
||||
! HTML Purifier now works in PHP 4.3.2.
|
||||
! Configuration form-editing API makes tweaking HTMLPurifier_Config a
|
||||
breeze!
|
||||
! Configuration directives that accept hashes now allow new string
|
||||
format: key1:value1,key2:value2
|
||||
! ConfigDoc now factored into OOP design
|
||||
! All deprecated elements now natively supported
|
||||
! Implement TinyMCE styled whitelist specification format in
|
||||
%HTML.Allowed
|
||||
! Config object gives more friendly error messages when things go wrong
|
||||
! Advanced API implemented: easy functions for creating elements (addElement)
|
||||
and attributes (addAttribute) on HTMLDefinition
|
||||
! Add native support for required attributes
|
||||
- Deprecated and removed EnableRedundantUTF8Cleaning. It didn't even work!
|
||||
- DOMLex will not emit errors when a custom error handler that does not
|
||||
honor error_reporting is used
|
||||
- StrictBlockquote child definition refrains from wrapping whitespace
|
||||
in tags now.
|
||||
- Bug resulting from tag transforms to non-allowed elements fixed
|
||||
- ChildDef_Custom's regex generation has been improved, removing several
|
||||
false positives
|
||||
. Unit test for ElementDef created, ElementDef behavior modified to
|
||||
be more flexible
|
||||
. Added convenience functions for HTMLModule constructors
|
||||
. AttrTypes now has accessor functions that should be used instead
|
||||
of directly manipulating info
|
||||
. TagTransform_Center deprecated in favor of generic TagTransform_Simple
|
||||
. Add extra protection in AttrDef_URI against phantom Schemes
|
||||
. Doctype object added to HTMLDefinition which describes certain aspects
|
||||
of the operational document type
|
||||
. Lexer is now pre-emptively included, with a conditional include for the
|
||||
PHP5 only version.
|
||||
. HTMLDefinition and CSSDefinition have a common parent class: Definition.
|
||||
. DirectLex can now track line-numbers
|
||||
. Preliminary error collector is in place, although no code actually reports
|
||||
errors yet
|
||||
. Factor out most of ValidateAttributes to new AttrValidator class
|
||||
|
||||
1.6.1, released 2007-05-05
|
||||
! Support for more deprecated attributes via transformations:
|
||||
+ hspace and vspace in img
|
||||
+ size and noshade in hr
|
||||
+ nowrap in td
|
||||
+ clear in br
|
||||
+ align in caption, table, img and hr
|
||||
+ type in ul, ol and li
|
||||
! DirectLex now preserves text in which a < bracket is followed by
|
||||
a non-alphanumeric character. This means that certain emoticons
|
||||
are now preserved.
|
||||
! %Core.RemoveInvalidImg is now operational, when set to false invalid
|
||||
images will hang around with an empty src
|
||||
! target attribute in a tag supported, use %Attr.AllowedFrameTargets
|
||||
to enable
|
||||
! CSS property white-space now allows nowrap (supported in all modern
|
||||
browsers) but not others (which have spotty browser implementations)
|
||||
! XHTML 1.1 mode now sort-of works without any fatal errors, and
|
||||
lang is now moved over to xml:lang.
|
||||
! Attribute transformation smoketest available at smoketests/attrTransform.php
|
||||
! Transformation of font's size attribute now handles super-large numbers
|
||||
- Possibly fatal bug with __autoload() fixed in module manager
|
||||
- Invert HTMLModuleManager->addModule() processing order to check
|
||||
prefixes first and then the literal module
|
||||
- Empty strings get converted to empty arrays instead of arrays with
|
||||
an empty string in them.
|
||||
- Merging in attribute lists now works.
|
||||
. Demo script removed: it has been added to the website's repository
|
||||
. Basic.php script modified to work out of the box
|
||||
. Refactor AttrTransform classes to reduce duplication
|
||||
. AttrTransform_TextAlign axed in favor of a more general
|
||||
AttrTransform_EnumToCSS, refer to HTMLModule/TransformToStrict.php to
|
||||
see how the new equivalent is implemented
|
||||
. Unit tests now use exclusively assertIdentical
|
||||
|
||||
1.6.0, released 2007-04-01
|
||||
! Support for most common deprecated attributes via transformations:
|
||||
+ bgcolor in td, th, tr and table
|
||||
+ border in img
|
||||
+ name in a and img
|
||||
+ width in td, th and hr
|
||||
+ height in td, th
|
||||
! Support for CSS attribute 'height' added
|
||||
! Support for rel and rev attributes in a tags added, use %Attr.AllowedRel
|
||||
and %Attr.AllowedRev to activate
|
||||
- You can define ID blacklists using regular expressions via
|
||||
%Attr.IDBlacklistRegexp
|
||||
- Error messages are emitted when you attempt to "allow" elements or
|
||||
attributes that HTML Purifier does not support
|
||||
- Fix segfault in unit test. The problem is not very reproduceable and
|
||||
I don't know what causes it, but a six line patch fixed it.
|
||||
|
||||
1.5.0, released 2007-03-23
|
||||
! Added a rudimentary I18N and L10N system modeled off MediaWiki. It
|
||||
doesn't actually do anything yet, but keep your eyes peeled.
|
||||
! docs/enduser-utf8.html explains how to use UTF-8 and HTML Purifier
|
||||
! Newly structured HTMLDefinition modeled off of XHTML 1.1 modules.
|
||||
I am loathe to release beta quality APIs, but this is exactly that;
|
||||
don't use the internal interfaces if you're not willing to do migration
|
||||
later on.
|
||||
- Allow 'x' subtag in language codes
|
||||
- Fixed buggy chameleon-support for ins and del
|
||||
. Added support for IDREF attributes (i.e. for)
|
||||
. Renamed HTMLPurifier_AttrDef_Class to HTMLPurifier_AttrDef_Nmtokens
|
||||
. Removed context variable ParentType, replaced with IsInline, which
|
||||
is false when you're not inline and an integer of the parent that
|
||||
caused you to become inline when you are (so possibly zero)
|
||||
. Removed ElementDef->type in favor of ElementDef->descendants_are_inline
|
||||
and HTMLDefinition->content_sets
|
||||
. StrictBlockquote now reports what elements its supposed to allow,
|
||||
rather than what it does allow
|
||||
. Removed HTMLDefinition->info_flow_elements in favor of
|
||||
HTMLDefinition->content_sets['Flow']
|
||||
. Removed redundant "exclusionary" definitions from DTD roster
|
||||
. StrictBlockquote now requires a construction parameter as if it
|
||||
were an Required ChildDef, this is the "real" set of allowed elements
|
||||
. AttrDef partitioned into HTML, CSS and URI segments
|
||||
. Modify Youtube filter regexp to be multiline
|
||||
. Require both PHP5 and DOM extension in order to use DOMLex, fixes
|
||||
some edge cases where a DOMDocument class exists in a PHP4 environment
|
||||
due to DOM XML extension.
|
||||
|
||||
1.4.1, released 2007-01-21
|
||||
! docs/enduser-youtube.html updated according to new functionality
|
||||
- YouTube IDs can have underscores and dashes
|
||||
|
||||
1.4.0, released 2007-01-21
|
||||
! Implemented list-style-image, URIs now allowed in list-style
|
||||
! Implemented background-image, background-repeat, background-attachment
|
||||
and background-position CSS properties. Shorthand property background
|
||||
supports all of these properties.
|
||||
! Configuration documentation looks nicer
|
||||
! Added %Core.EscapeNonASCIICharacters to workaround loss of Unicode
|
||||
characters while %Core.Encoding is set to a non-UTF-8 encoding.
|
||||
! Support for configuration directive aliases added
|
||||
! Config object can now be instantiated from ini files
|
||||
! YouTube preservation code added to the core, with two lines of code
|
||||
you can add it as a filter to your code. See smoketests/preserveYouTube.php
|
||||
for sample code.
|
||||
! Moved SLOW to docs/enduser-slow.html and added code examples
|
||||
- Replaced version check with functionality check for DOM (thanks Stephen
|
||||
Khoo)
|
||||
. Added smoketest 'all.php', which loads all other smoketests via frames
|
||||
. Implemented AttrDef_CSSURI for url(http://google.com) style declarations
|
||||
. Added convenient single test selector form on test runner
|
||||
|
||||
1.3.2, released 2006-12-25
|
||||
! HTMLPurifier object now accepts configuration arrays, no need to manually
|
||||
instantiate a configuration object
|
||||
! Context object now accessible to outside
|
||||
! Added enduser-youtube.html, explains how to embed YouTube videos. See
|
||||
also corresponding smoketest preserveYouTube.php.
|
||||
! Added purifyArray(), which takes a list of HTML and purifies it all
|
||||
! Added static member variable $version to HTML Purifier with PHP-compatible
|
||||
version number string.
|
||||
- Fixed fatal error thrown by upper-cased language attributes
|
||||
- printDefinition.php: added labels, added better clarification
|
||||
. HTMLPurifier_Config::create() added, takes mixed variable and converts into
|
||||
a HTMLPurifier_Config object.
|
||||
|
||||
1.3.1, released 2006-12-06
|
||||
! Added HTMLPurifier.func.php stub for a convenient function to call the library
|
||||
- Fixed bug in RemoveInvalidImg code that caused all images to be dropped
|
||||
(thanks to .mario for reporting this)
|
||||
. Standardized all attribute handling variables to attr, made it plural
|
||||
|
||||
1.3.0, released 2006-11-26
|
||||
# Invalid images are now removed, rather than replaced with a dud
|
||||
<img src="" alt="Invalid image" />. Previous behavior can be restored
|
||||
with new directive %Core.RemoveInvalidImg set to false.
|
||||
! (X)HTML Strict now supported
|
||||
+ Transparently handles inline elements in block context (blockquote)
|
||||
! Added GET method to demo for easier validation, added 50kb max input size
|
||||
! New directive %HTML.BlockWrapper, for block-ifying inline elements
|
||||
! New directive %HTML.Parent, allows you to only allow inline content
|
||||
! New directives %HTML.AllowedElements and %HTML.AllowedAttributes to let
|
||||
users narrow the set of allowed tags
|
||||
! <li value="4"> and <ul start="2"> now allowed in loose mode
|
||||
! New directives %URI.DisableExternalResources and %URI.DisableResources
|
||||
! New directive %Attr.DisableURI, which eliminates all hyperlinking
|
||||
! New directive %URI.Munge, munges URI so you can use some sort of redirector
|
||||
service to avoid PageRank leaks or warn users that they are exiting your site.
|
||||
! Added spiffy new smoketest printDefinition.php, which lets you twiddle with
|
||||
the configuration settings and see how the internal rules are affected.
|
||||
! New directive %URI.HostBlacklist for blocking links to bad hosts.
|
||||
xssAttacks.php smoketest updated accordingly.
|
||||
- Added missing type to ChildDef_Chameleon
|
||||
- Remove Tidy option from demo if there is not Tidy available
|
||||
. ChildDef_Required guards against empty tags
|
||||
. Lookup table HTMLDefinition->info_flow_elements added
|
||||
. Added peace-of-mind variable initialization to Strategy_FixNesting
|
||||
. Added HTMLPurifier->info_parent_def, parent child processing made special
|
||||
. Added internal documents briefly summarizing future progression of HTML
|
||||
. HTMLPurifier_Config->getBatch($namespace) added
|
||||
. More lenient casting to bool from string in HTMLPurifier_ConfigSchema
|
||||
. Refactored ChildDef classes into their own files
|
||||
|
||||
1.2.0, released 2006-11-19
|
||||
# ID attributes now disabled by default. New directives:
|
||||
+ %HTML.EnableAttrID - restores old behavior by allowing IDs
|
||||
+ %Attr.IDPrefix - %Attr.IDBlacklist alternative that munges all user IDs
|
||||
so that they don't collide with your IDs
|
||||
+ %Attr.IDPrefixLocal - Same as above, but for when there are multiple
|
||||
instances of user content on the page
|
||||
+ Profuse documentation on how to use these available in docs/enduser-id.txt
|
||||
! Added MODx plugin <http://modxcms.com/forums/index.php/topic,6604.0.html>
|
||||
! Added percent encoding normalization
|
||||
! XSS attacks smoketest given facelift
|
||||
! Configuration documentation now has table of contents
|
||||
! Added %URI.DisableExternal, which prevents links to external websites. You
|
||||
can also use %URI.Host to permit absolute linking to subdomains
|
||||
! Non-accessible resources (ex. mailto) blocked from embedded URIs (img src)
|
||||
- Type variable in HTMLDefinition was not being set properly, fixed
|
||||
- Documentation updated
|
||||
+ TODO added request Phalanger
|
||||
+ TODO added request Native compression
|
||||
+ TODO added request Remove redundant tags
|
||||
+ TODO added possible plaintext formatter for HTML Purifier documentation
|
||||
+ Updated ConfigDoc TODO
|
||||
+ Improved inline comments in AttrDef/Class.php, AttrDef/CSS.php
|
||||
and AttrDef/Host.php
|
||||
+ Revamped documentation into HTML, along with misc updates
|
||||
- HTMLPurifier_Context doesn't throw a variable reference error if you attempt
|
||||
to retrieve a non-existent variable
|
||||
. Switched to purify()-wide Context object registry
|
||||
. Refactored unit tests to minimize duplication
|
||||
. XSS attack sheet updated
|
||||
. configdoc.xml now has xml:space attached to default value nodes
|
||||
. Allow configuration directives to permit null values
|
||||
. Cleaned up test-cases to remove unnecessary swallowErrors()
|
||||
|
||||
1.1.2, released 2006-09-30
|
||||
! Add HTMLPurifier.auto.php stub file that configures include_path
|
||||
- Documentation updated
|
||||
+ INSTALL document rewritten
|
||||
+ TODO added semi-lossy conversion
|
||||
+ API Doxygen docs' file exclusions updated
|
||||
+ Added notes on HTML versus XML attribute whitespace handling
|
||||
+ Noted that HTMLPurifier_ChildDef_Custom isn't being used
|
||||
+ Noted that config object's definitions are cached versions
|
||||
- Fixed lack of attribute parsing in HTMLPurifier_Lexer_PEARSax3
|
||||
- ftp:// URIs now have their typecodes checked
|
||||
- Hooked up HTMLPurifier_ChildDef_Custom's unit tests (they weren't being run)
|
||||
. Line endings standardized throughout project (svn:eol-style standardized)
|
||||
. Refactored parseData() to general Lexer class
|
||||
. Tester named "HTML Purifier" not "HTMLPurifier"
|
||||
|
||||
1.1.1, released 2006-09-24
|
||||
! Configuration option to optionally Tidy up output for indentation to make up
|
||||
for dropped whitespace by DOMLex (pretty-printing for the entire application
|
||||
should be done by a page-wide Tidy)
|
||||
- Various documentation updates
|
||||
- Fixed parse error in configuration documentation script
|
||||
- Fixed fatal error in benchmark scripts, slightly augmented
|
||||
- As far as possible, whitespace is preserved in-between table children
|
||||
- Sample test-settings.php file included
|
||||
|
||||
1.1.0, released 2006-09-16
|
||||
! Directive documentation generation using XSLT
|
||||
! XHTML can now be turned off, output becomes <br>
|
||||
- Made URI validator more forgiving: will ignore leading and trailing
|
||||
quotes, apostrophes and less than or greater than signs.
|
||||
- Enforce alphanumeric namespace and directive names for configuration.
|
||||
- Table child definition made more flexible, will fix up poorly ordered elements
|
||||
. Renamed ConfigDef to ConfigSchema
|
||||
|
||||
1.0.1, released 2006-09-04
|
||||
- Fixed slight bug in DOMLex attribute parsing
|
||||
- Fixed rejection of case-insensitive configuration values when there is a
|
||||
set of allowed values. This manifested in %Core.Encoding.
|
||||
- Fixed rejection of inline style declarations that had lots of extra
|
||||
space in them. This manifested in TinyMCE.
|
||||
|
||||
1.0.0, released 2006-09-01
|
||||
! Shorthand CSS properties implemented: font, border, background, list-style
|
||||
! Basic color keywords translated into hexadecimal values
|
||||
! Table CSS properties implemented
|
||||
! Support for charsets other than UTF-8 (defined by iconv)
|
||||
! Malformed UTF-8 and non-SGML character detection and cleaning implemented
|
||||
- Fixed broken numeric entity conversion
|
||||
- API documentation completed
|
||||
. (HTML|CSS)Definition de-singleton-ized
|
||||
|
||||
1.0.0beta, released 2006-08-16
|
||||
- First public release, most functionality implemented. Notable omissions are:
|
||||
. Shorthand CSS properties
|
||||
. Table CSS properties
|
||||
. IPv6 validation
|
||||
. Deprecated attribute transformations
|
||||
! First public release, most functionality implemented. Notable omissions are:
|
||||
+ Shorthand CSS properties
|
||||
+ Table CSS properties
|
||||
+ Deprecated attribute transformations
|
||||
|
33
README
33
README
@@ -1,13 +1,22 @@
|
||||
|
||||
README
|
||||
All about HTMLPurifier
|
||||
|
||||
HTMLPurifier is an HTML filtering solution. It uses a unique combination of
|
||||
robust whitelists and agressive parsing to ensure that not only are XSS
|
||||
attacks thwarted, but the resulting HTML is standards compliant.
|
||||
|
||||
See INSTALL on how to use the library. See docs/ for more developer-oriented
|
||||
documentation as well as some code examples. Users of TinyMCE or FCKeditor
|
||||
may be especially interested in WYSIWYG.
|
||||
|
||||
HTMLPurifier can be found on the web at: http://hp.jpsband.org/
|
||||
README
|
||||
All about HTML Purifier
|
||||
|
||||
HTML Purifier is an HTML filtering solution that uses a unique combination
|
||||
of robust whitelists and agressive parsing to ensure that not only are
|
||||
XSS attacks thwarted, but the resulting HTML is standards compliant.
|
||||
|
||||
HTML Purifier is oriented towards richly formatted documents from
|
||||
untrusted sources that require CSS and a full tag-set. This library can
|
||||
be configured to accept a more restrictive set of tags, but it won't be
|
||||
as efficient as more bare-bones parsers. It will, however, do the job
|
||||
right, which may be more important.
|
||||
|
||||
Places to go:
|
||||
|
||||
* See INSTALL for a quick installation guide
|
||||
* See docs/ for developer-oriented documentation, code examples and
|
||||
an in-depth installation guide.
|
||||
* See WYSIWYG for information on editors like TinyMCE and FCKeditor
|
||||
|
||||
HTML Purifier can be found on the web at: http://htmlpurifier.org/
|
||||
|
34
SLOW
34
SLOW
@@ -1,34 +0,0 @@
|
||||
|
||||
SLOW
|
||||
also known as the HELP ME LIBRARY IS TOO SLOW MY PAGE TAKE TOO LONG LOAD page
|
||||
|
||||
HTMLPurifier is a very powerful library. But with power comes great
|
||||
responsibility, or, at least, longer execution times. Remember, this
|
||||
library isn't lightly grazing over submitted HTML: it's deconstructing
|
||||
the whole thing, rigorously checking the parts, and then putting it
|
||||
back together.
|
||||
|
||||
So, if it so turns out that HTMLPurifier is kinda too slow for outbound
|
||||
filtering, you've got a few options:
|
||||
|
||||
1. Inbound filtering - perform filtering of HTML when it's submitted by the
|
||||
user. Since the user is already submitting something, an extra half a
|
||||
second tacked on to the load time probably isn't going to be that huge of
|
||||
a problem. Then, displaying the content is a simple a manner of outputting
|
||||
it directly from your database/filesystem. The trouble with this method is
|
||||
that your user loses the original text, and when doing edits, will be
|
||||
handling the filtered text. Of course, maybe that's a good thing. If you
|
||||
don't mind a little extra complexity, you can try...
|
||||
|
||||
2. Caching the filtered output - accept the submitted text and put it
|
||||
unaltered into the database, but then also generate a filtered version and
|
||||
stash that in the database. Serve the filtered version to readers, and the
|
||||
unaltered version to editors. If need be, you can invalidate the cache and
|
||||
have the cached filtered version be regenerated on the first page view. Pros?
|
||||
Full data retention. Cons? It's more complicated.
|
||||
|
||||
In short, inbound filtering is almost as simple as outbound filtering, but
|
||||
it has some drawbacks which cannot be fixed unless you save both the original
|
||||
and the filtered versions.
|
||||
|
||||
There is a third option: profile and optimize HTMLPurifier yourself. ;-)
|
104
TODO
104
TODO
@@ -1,19 +1,91 @@
|
||||
Todo List
|
||||
|
||||
Core:
|
||||
- Finish table and shorthand CSS attributes
|
||||
- Implement all non-essential attribute transforms
|
||||
TODO List
|
||||
|
||||
Code issues:
|
||||
- Massive profiling, make it faster!
|
||||
- Fix IPv6 issues
|
||||
- Make URI validation routines tighter (especially mailto)
|
||||
- Distinguish between different types of URIs, for instance, a mailto URI
|
||||
in IMG SRC is nonsensical
|
||||
- Factor out Host validation to its own AttrDef
|
||||
- Rewrite table's child definition
|
||||
- Silently drop content inbetween SCRIPT tags
|
||||
= KEY ====================
|
||||
# Flagship
|
||||
- Regular
|
||||
? Maybe I'll Do It
|
||||
==========================
|
||||
|
||||
Enhancements:
|
||||
- Do fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
||||
- Pretty-printing HTML
|
||||
2.1 release [Refactor, refactor!]
|
||||
# URI validation routines tighter (see docs/dev-code-quality.html) (COMPLEX)
|
||||
# Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
|
||||
# Ruby support
|
||||
- Configuration profiles: predefined directives set with one func call
|
||||
- Implement IDREF support (harder than it seems, since you cannot have
|
||||
IDREFs to non-existent IDs)
|
||||
- Allow non-ASCII characters in font names
|
||||
|
||||
2.2 release [Error'ed]
|
||||
# Error logging for filtering/cleanup procedures
|
||||
- XSS-attempt detection
|
||||
|
||||
2.3 release [Do What I Mean, Not What I Say]
|
||||
# Additional support for poorly written HTML
|
||||
- Microsoft Word HTML cleaning (i.e. MsoNormal, but research essential!)
|
||||
- Friendly strict handling of <address> (block -> <br>)
|
||||
- Remove redundant tags, ex. <u><u>Underlined</u></u>. Implementation notes:
|
||||
1. Analyzing which tags to remove duplicants
|
||||
2. Ensure attributes are merged into the parent tag
|
||||
3. Extend the tag exclusion system to specify whether or not the
|
||||
contents should be dropped or not (currently, there's code that could do
|
||||
something like this if it didn't drop the inner text too.)
|
||||
- Remove <span> tags that don't do anything (no attributes)
|
||||
- Remove empty inline tags<i></i>
|
||||
- Append something to duplicate IDs so they're still usable (impl. note: the
|
||||
dupe detector would also need to detect the suffix as well)
|
||||
|
||||
2.4 release [It's All About Trust] (floating)
|
||||
# Implement untrusted, dangerous elements/attributes
|
||||
|
||||
3.0 release [Beyond HTML]
|
||||
# Legit token based CSS parsing (will require revamping almost every
|
||||
AttrDef class)
|
||||
# More control over allowed CSS properties (maybe modularize it in the
|
||||
same fashion!)
|
||||
# Formatters for plaintext
|
||||
- Smileys
|
||||
- Standardize token armor for all areas of processing
|
||||
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
||||
- Automatically add non-breaking spaces to empty table cells when
|
||||
empty-cells:show is applied to have compatibility with Internet Explorer
|
||||
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
|
||||
Also, enable disabling of directionality
|
||||
|
||||
4.0 release [To XML and Beyond]
|
||||
- Extended HTML capabilities based on namespacing and tag transforms (COMPLEX)
|
||||
- Hooks for adding custom processors to custom namespaced tags and
|
||||
attributes, offer default implementation
|
||||
- Lots of documentation and samples
|
||||
|
||||
Ongoing
|
||||
- Lots of profiling, make it faster!
|
||||
- Plugins for major CMSes (COMPLEX)
|
||||
- WordPress (mostly written, needs beta-testing)
|
||||
- phpBB
|
||||
- Phorum
|
||||
- eFiction
|
||||
- more! (look for ones that use WYSIWYGs)
|
||||
- Complete basic smoketests
|
||||
|
||||
Unknown release (on a scratch-an-itch basis)
|
||||
? Semi-lossy dumb alternate character encoding transfor
|
||||
? Have 'lang' attribute be checked against official lists, achieved by
|
||||
encoding all characters that have string entity equivalents
|
||||
- Explain how to use HTML Purifier in non-PHP languages / create
|
||||
a simple command line stub
|
||||
- Abstract ChildDef_BlockQuote to work with all elements that only
|
||||
allow blocks in them, required or optional
|
||||
- Reorganize Unit Tests
|
||||
- Refactor loop tests (esp. AttrDef_URI)
|
||||
- Reorganize configuration directives (Create more namespaces! Get messy!)
|
||||
|
||||
Requested
|
||||
|
||||
Wontfix
|
||||
- Non-lossy smart alternate character encoding transformations (unless
|
||||
patch provided)
|
||||
- Pretty-printing HTML, users can use Tidy on the output on entire page
|
||||
- Native content compression, whitespace stripping (don't rely on Tidy, make
|
||||
sure we don't remove from <pre> or related tags): use gzip if this is
|
||||
really important
|
||||
|
12
WHATSNEW
Normal file
12
WHATSNEW
Normal file
@@ -0,0 +1,12 @@
|
||||
The 2.0.1 release introduces a number of stability and usability fixes,
|
||||
as well as a number of (disabled by default) experimental features. The
|
||||
security-minded should note that a reflected XSS vulnerability was patched
|
||||
in smoketests/configForm.php; if you cannot upgrade immediately, please
|
||||
delete that file (if that directory is not publically accessible, there
|
||||
is no security risk). The maintenance changes include more helpful file
|
||||
permissions errors, internal newline normalization, reordered includes
|
||||
to prevent a missing class definition in some setups, and better cache
|
||||
revision and id handling. The two experimental features are auto-formatting
|
||||
(auto-paragraphing and linkification) and error collection, these can
|
||||
be enabled with %AutoFormat.AutoParagraph, %AutoFormat.Linkify and
|
||||
%Core.CollectErrors respectively.
|
4
WYSIWYG
4
WYSIWYG
@@ -1,6 +1,6 @@
|
||||
|
||||
WYSIWYG - What You See Is What You Get
|
||||
HTMLPurifier: A Pretty Good Fit for TinyMCE and FCKeditor
|
||||
HTML Purifier: A Pretty Good Fit for TinyMCE and FCKeditor
|
||||
|
||||
Javascript-based WYSIWYG editors, simply stated, are quite amazing. But I've
|
||||
always been wary about using them due to security issues: they handle the
|
||||
@@ -13,6 +13,6 @@ other markup languages still reign supreme. Put simply: filtering HTML is
|
||||
hard work, and these WYSIWYG authors don't offer anything to alleviate that
|
||||
trouble. Therein lies the solution:
|
||||
|
||||
HTMLPurifier is perfect for filtering pure-HTML input from WYSIWYG editors.
|
||||
HTML Purifier is perfect for filtering pure-HTML input from WYSIWYG editors.
|
||||
|
||||
Enough said.
|
||||
|
BIN
art/1000passes.png
Normal file
BIN
art/1000passes.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 3.4 KiB |
BIN
art/100cases.png
Normal file
BIN
art/100cases.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.7 KiB |
@@ -3,15 +3,25 @@
|
||||
// emulates inserting a dir called HTMLPurifier into your class dir
|
||||
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
||||
|
||||
require_once 'HTMLPurifier/ConfigDef.php';
|
||||
require_once 'HTMLPurifier/Config.php';
|
||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||
require_once 'HTMLPurifier/Lexer/PEARSax3.php';
|
||||
@include_once '../test-settings.php';
|
||||
|
||||
$LEXERS = array(
|
||||
'DirectLex' => new HTMLPurifier_Lexer_DirectLex(),
|
||||
'PEARSax3' => new HTMLPurifier_Lexer_PEARSax3()
|
||||
);
|
||||
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||
require_once 'HTMLPurifier/Config.php';
|
||||
require_once 'HTMLPurifier/Context.php';
|
||||
|
||||
$LEXERS = array();
|
||||
$RUNS = isset($GLOBALS['HTMLPurifierTest']['Runs'])
|
||||
? $GLOBALS['HTMLPurifierTest']['Runs'] : 2;
|
||||
|
||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||
$LEXERS['DirectLex'] = new HTMLPurifier_Lexer_DirectLex();
|
||||
|
||||
if (!empty($GLOBALS['HTMLPurifierTest']['PEAR'])) {
|
||||
require_once 'HTMLPurifier/Lexer/PEARSax3.php';
|
||||
$LEXERS['PEARSax3'] = new HTMLPurifier_Lexer_PEARSax3();
|
||||
} else {
|
||||
exit('PEAR required to perform benchmark.');
|
||||
}
|
||||
|
||||
if (version_compare(PHP_VERSION, '5', '>=')) {
|
||||
require_once 'HTMLPurifier/Lexer/DOMLex.php';
|
||||
@@ -56,9 +66,12 @@ class RowTimer extends Benchmark_Timer
|
||||
if ($standard == false) $standard = $v['diff'];
|
||||
|
||||
$perc = $v['diff'] * 100 / $standard;
|
||||
$bad_run = ($v['diff'] < 0);
|
||||
|
||||
$out .= '<td align="right">' . number_format($perc, 2, '.', '') .
|
||||
'%</td>';
|
||||
$out .= '<td align="right"'.
|
||||
($bad_run ? ' style="color:#AAA;"' : '').
|
||||
'>' . number_format($perc, 2, '.', '') .
|
||||
'%</td><td>'.number_format($v['diff'],4,'.','').'</td>';
|
||||
|
||||
}
|
||||
|
||||
@@ -79,13 +92,16 @@ function print_lexers() {
|
||||
}
|
||||
|
||||
function do_benchmark($name, $document) {
|
||||
global $LEXERS;
|
||||
global $LEXERS, $RUNS;
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$context = new HTMLPurifier_Context();
|
||||
|
||||
$timer = new RowTimer($name);
|
||||
$timer->start();
|
||||
|
||||
foreach($LEXERS as $key => $lexer) {
|
||||
$tokens = $lexer->tokenizeHTML($document);
|
||||
for ($i=0; $i<$RUNS; $i++) $tokens = $lexer->tokenizeHTML($document, $config, $context);
|
||||
$timer->setMarker($key);
|
||||
}
|
||||
|
||||
@@ -103,7 +119,7 @@ function do_benchmark($name, $document) {
|
||||
<table border="1">
|
||||
<tr><th>Case</th><?php
|
||||
foreach ($LEXERS as $key => $value) {
|
||||
echo '<th>' . htmlspecialchars($key) . '</th>';
|
||||
echo '<th colspan="2">' . htmlspecialchars($key) . '</th>';
|
||||
}
|
||||
?></tr>
|
||||
<?php
|
||||
@@ -149,4 +165,4 @@ echo '<div>Random input was: ' .
|
||||
?>
|
||||
|
||||
|
||||
</body></html>
|
||||
</body></html>
|
||||
|
@@ -2,15 +2,16 @@
|
||||
|
||||
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
||||
|
||||
require_once 'HTMLPurifier/ConfigDef.php';
|
||||
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||
require_once 'HTMLPurifier/Config.php';
|
||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||
require_once 'HTMLPurifier/Context.php';
|
||||
|
||||
$input = file_get_contents('samples/Lexer/4.html');
|
||||
$lexer = new HTMLPurifier_Lexer_DirectLex();
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$context = new HTMLPurifier_Context();
|
||||
|
||||
for ($i = 0; $i < 10; $i++) {
|
||||
$tokens = $lexer->tokenizeHTML($input);
|
||||
$tokens = $lexer->tokenizeHTML($input, $config, $context);
|
||||
}
|
||||
|
||||
?>
|
43
configdoc/generate.php
Normal file
43
configdoc/generate.php
Normal file
@@ -0,0 +1,43 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Generates XML and HTML documents describing configuration.
|
||||
* @note PHP 5 only!
|
||||
*/
|
||||
|
||||
/*
|
||||
TODO:
|
||||
- make XML format richer (see XMLSerializer_ConfigSchema)
|
||||
- extend XSLT transformation (see the corresponding XSLT file)
|
||||
- allow generation of packaged docs that can be easily moved
|
||||
- multipage documentation
|
||||
- determine how to multilingualize
|
||||
- add blurbs to ToC
|
||||
*/
|
||||
|
||||
if (version_compare('5', PHP_VERSION, '>')) exit('Requires PHP 5 or higher.');
|
||||
error_reporting(E_ALL); // probably not possible to use E_STRICT
|
||||
|
||||
// load dual-libraries
|
||||
require_once '../library/HTMLPurifier.auto.php';
|
||||
require_once 'library/ConfigDoc.auto.php';
|
||||
|
||||
$purifier = HTMLPurifier::getInstance(array(
|
||||
'AutoFormat.PurifierLinkify' => true
|
||||
));
|
||||
|
||||
$schema = HTMLPurifier_ConfigSchema::instance();
|
||||
$style = 'plain'; // use $_GET in the future
|
||||
$configdoc = new ConfigDoc();
|
||||
$output = $configdoc->generate($schema, $style);
|
||||
|
||||
// write out
|
||||
file_put_contents("$style.html", $output);
|
||||
|
||||
if (php_sapi_name() != 'cli') {
|
||||
// output = instant feedback
|
||||
echo $output;
|
||||
} else {
|
||||
echo 'Files generated successfully.';
|
||||
}
|
||||
|
9
configdoc/library/ConfigDoc.auto.php
Normal file
9
configdoc/library/ConfigDoc.auto.php
Normal file
@@ -0,0 +1,9 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* This is a stub include that automatically configures the include path.
|
||||
*/
|
||||
|
||||
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
|
||||
require_once 'ConfigDoc.php';
|
||||
|
38
configdoc/library/ConfigDoc.php
Normal file
38
configdoc/library/ConfigDoc.php
Normal file
@@ -0,0 +1,38 @@
|
||||
<?php
|
||||
|
||||
require_once 'ConfigDoc/HTMLXSLTProcessor.php';
|
||||
require_once 'ConfigDoc/XMLSerializer/Types.php';
|
||||
require_once 'ConfigDoc/XMLSerializer/ConfigSchema.php';
|
||||
|
||||
class ConfigDoc
|
||||
{
|
||||
|
||||
function generate($schema, $xsl_stylesheet_name = 'plain', $parameters = array()) {
|
||||
// generate types document, describing type constraints
|
||||
$types_serializer = new ConfigDoc_XMLSerializer_Types();
|
||||
$types_document = $types_serializer->serialize($schema);
|
||||
$types_document->save(dirname(__FILE__) . '/../types.xml'); // only ONE
|
||||
|
||||
// generate configdoc.xml, documents configuration directives
|
||||
$schema_serializer = new ConfigDoc_XMLSerializer_ConfigSchema();
|
||||
$schema_document = $schema_serializer->serialize($schema);
|
||||
$schema_document->save('configdoc.xml');
|
||||
|
||||
// setup transformation
|
||||
$xsl_stylesheet = dirname(__FILE__) . "/../styles/$xsl_stylesheet_name.xsl";
|
||||
$xslt_processor = new ConfigDoc_HTMLXSLTProcessor();
|
||||
$xslt_processor->setParameters($parameters);
|
||||
$xslt_processor->importStylesheet($xsl_stylesheet);
|
||||
|
||||
return $xslt_processor->transformToHTML($schema_document);
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove any generated files
|
||||
*/
|
||||
function cleanup() {
|
||||
unlink('configdoc.xml');
|
||||
}
|
||||
|
||||
}
|
||||
|
62
configdoc/library/ConfigDoc/HTMLXSLTProcessor.php
Normal file
62
configdoc/library/ConfigDoc/HTMLXSLTProcessor.php
Normal file
@@ -0,0 +1,62 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Special XSLTProcessor specifically for HTML documents. Loosely
|
||||
* based off of XSLTProcessor, but not really
|
||||
*/
|
||||
class ConfigDoc_HTMLXSLTProcessor
|
||||
{
|
||||
|
||||
protected $xsltProcessor;
|
||||
|
||||
public function __construct() {
|
||||
$this->xsltProcessor = new XSLTProcessor();
|
||||
}
|
||||
|
||||
/**
|
||||
* Imports stylesheet for processor to use
|
||||
* @param $xsl XSLT DOM tree, or filename of the XSL transformation
|
||||
*/
|
||||
public function importStylesheet($xsl) {
|
||||
if (is_string($xsl)) {
|
||||
$xsl_file = $xsl;
|
||||
$xsl = new DOMDocument();
|
||||
$xsl->load($xsl_file);
|
||||
}
|
||||
return $this->xsltProcessor->importStylesheet($xsl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms an XML file into HTML based on the stylesheet
|
||||
* @param $xml XML DOM tree
|
||||
*/
|
||||
public function transformToHTML($xml) {
|
||||
$out = $this->xsltProcessor->transformToXML($xml);
|
||||
|
||||
// fudges for HTML backwards compatibility
|
||||
$out = str_replace('/>', ' />', $out); // <br /> not <br/>
|
||||
$out = str_replace(' xmlns=""', '', $out); // rm unnecessary xmlns
|
||||
$out = str_replace(' xmlns="http://www.w3.org/1999/xhtml"', '', $out); // rm unnecessary xmlns
|
||||
if (class_exists('Tidy')) {
|
||||
// cleanup output
|
||||
$config = array(
|
||||
'indent' => true,
|
||||
'output-xhtml' => true,
|
||||
'wrap' => 80
|
||||
);
|
||||
$tidy = new Tidy;
|
||||
$tidy->parseString($out, $config, 'utf8');
|
||||
$tidy->cleanRepair();
|
||||
$out = (string) $tidy;
|
||||
}
|
||||
return $out;
|
||||
}
|
||||
|
||||
public function setParameters($options) {
|
||||
foreach ($options as $name => $value) {
|
||||
$this->xsltProcessor->setParameter('', $name, $value);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
25
configdoc/library/ConfigDoc/XMLSerializer.php
Normal file
25
configdoc/library/ConfigDoc/XMLSerializer.php
Normal file
@@ -0,0 +1,25 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* The XMLSerializer hierarchy of classes consist of classes that take
|
||||
* objects and serialize them into XML, specifically DOM, form; this
|
||||
* super-class contains convenience functions for those classes.
|
||||
*/
|
||||
class ConfigDoc_XMLSerializer
|
||||
{
|
||||
|
||||
protected function appendHTMLDiv($document, $node, $html) {
|
||||
$purifier = HTMLPurifier::getInstance();
|
||||
$html = $purifier->purify($html);
|
||||
$dom_html = $document->createDocumentFragment();
|
||||
$dom_html->appendXML($html);
|
||||
|
||||
$dom_div = $document->createElement('div');
|
||||
$dom_div->setAttribute('xmlns', 'http://www.w3.org/1999/xhtml');
|
||||
$dom_div->appendChild($dom_html);
|
||||
|
||||
$node->appendChild($dom_div);
|
||||
}
|
||||
|
||||
}
|
||||
|
123
configdoc/library/ConfigDoc/XMLSerializer/ConfigSchema.php
Normal file
123
configdoc/library/ConfigDoc/XMLSerializer/ConfigSchema.php
Normal file
@@ -0,0 +1,123 @@
|
||||
<?php
|
||||
|
||||
require_once 'ConfigDoc/XMLSerializer.php';
|
||||
|
||||
class ConfigDoc_XMLSerializer_ConfigSchema extends ConfigDoc_XMLSerializer
|
||||
{
|
||||
|
||||
/**
|
||||
* Serializes a schema into DOM form
|
||||
* @todo Split into sub-serializers
|
||||
* @param $schema HTMLPurifier_ConfigSchema to serialize
|
||||
*/
|
||||
public function serialize($schema) {
|
||||
$dom_document = new DOMDocument('1.0', 'UTF-8');
|
||||
$dom_root = $dom_document->createElement('configdoc');
|
||||
$dom_document->appendChild($dom_root);
|
||||
$dom_document->formatOutput = true;
|
||||
|
||||
// add the name of the application
|
||||
$dom_root->appendChild($dom_document->createElement('title', 'HTML Purifier'));
|
||||
|
||||
/*
|
||||
TODO for XML format:
|
||||
- create a definition (DTD or other) once interface stabilizes
|
||||
*/
|
||||
|
||||
foreach($schema->info as $namespace_name => $namespace_info) {
|
||||
|
||||
$dom_namespace = $dom_document->createElement('namespace');
|
||||
$dom_root->appendChild($dom_namespace);
|
||||
|
||||
$dom_namespace->setAttribute('id', $namespace_name);
|
||||
$dom_namespace->appendChild(
|
||||
$dom_document->createElement('name', $namespace_name)
|
||||
);
|
||||
$dom_namespace_description = $dom_document->createElement('description');
|
||||
$dom_namespace->appendChild($dom_namespace_description);
|
||||
$this->appendHTMLDiv($dom_document, $dom_namespace_description,
|
||||
$schema->info_namespace[$namespace_name]->description);
|
||||
|
||||
foreach ($namespace_info as $name => $info) {
|
||||
|
||||
if ($info->class == 'alias') continue;
|
||||
|
||||
$dom_directive = $dom_document->createElement('directive');
|
||||
$dom_namespace->appendChild($dom_directive);
|
||||
|
||||
$dom_directive->setAttribute('id', $namespace_name . '.' . $name);
|
||||
$dom_directive->appendChild(
|
||||
$dom_document->createElement('name', $name)
|
||||
);
|
||||
|
||||
$dom_aliases = $dom_document->createElement('aliases');
|
||||
$dom_directive->appendChild($dom_aliases);
|
||||
foreach ($info->directiveAliases as $alias) {
|
||||
$dom_aliases->appendChild($dom_document->createElement('alias', $alias));
|
||||
}
|
||||
|
||||
$dom_constraints = $dom_document->createElement('constraints');
|
||||
$dom_directive->appendChild($dom_constraints);
|
||||
|
||||
$dom_type = $dom_document->createElement('type', $info->type);
|
||||
if ($info->allow_null) {
|
||||
$dom_type->setAttribute('allow-null', 'yes');
|
||||
}
|
||||
$dom_constraints->appendChild($dom_type);
|
||||
|
||||
if ($info->allowed !== true) {
|
||||
$dom_allowed = $dom_document->createElement('allowed');
|
||||
$dom_constraints->appendChild($dom_allowed);
|
||||
foreach ($info->allowed as $allowed => $bool) {
|
||||
$dom_allowed->appendChild(
|
||||
$dom_document->createElement('value', $allowed)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
$raw_default = $schema->defaults[$namespace_name][$name];
|
||||
if (is_bool($raw_default)) {
|
||||
$default = $raw_default ? 'true' : 'false';
|
||||
} elseif (is_string($raw_default)) {
|
||||
$default = "\"$raw_default\"";
|
||||
} elseif (is_null($raw_default)) {
|
||||
$default = 'null';
|
||||
} else {
|
||||
$default = print_r(
|
||||
$schema->defaults[$namespace_name][$name], true
|
||||
);
|
||||
}
|
||||
|
||||
$dom_default = $dom_document->createElement('default', $default);
|
||||
|
||||
// remove this once we get a DTD
|
||||
$dom_default->setAttribute('xml:space', 'preserve');
|
||||
|
||||
$dom_constraints->appendChild($dom_default);
|
||||
|
||||
$dom_descriptions = $dom_document->createElement('descriptions');
|
||||
$dom_directive->appendChild($dom_descriptions);
|
||||
|
||||
foreach ($info->descriptions as $file => $file_descriptions) {
|
||||
foreach ($file_descriptions as $line => $description) {
|
||||
$dom_description = $dom_document->createElement('description');
|
||||
// refuse to write $file if it's a full path
|
||||
if (str_replace('\\', '/', realpath($file)) != $file) {
|
||||
$dom_description->setAttribute('file', $file);
|
||||
$dom_description->setAttribute('line', $line);
|
||||
}
|
||||
$this->appendHTMLDiv($dom_document, $dom_description, $description);
|
||||
$dom_descriptions->appendChild($dom_description);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return $dom_document;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
26
configdoc/library/ConfigDoc/XMLSerializer/Types.php
Normal file
26
configdoc/library/ConfigDoc/XMLSerializer/Types.php
Normal file
@@ -0,0 +1,26 @@
|
||||
<?php
|
||||
|
||||
require_once 'ConfigDoc/XMLSerializer.php';
|
||||
|
||||
class ConfigDoc_XMLSerializer_Types extends ConfigDoc_XMLSerializer
|
||||
{
|
||||
|
||||
/**
|
||||
* Serializes the types in a schema into DOM form
|
||||
* @param $schema HTMLPurifier_ConfigSchema owner of types to serialize
|
||||
*/
|
||||
public function serialize($schema) {
|
||||
$types_document = new DOMDocument('1.0', 'UTF-8');
|
||||
$types_root = $types_document->createElement('types');
|
||||
$types_document->appendChild($types_root);
|
||||
$types_document->formatOutput = true;
|
||||
foreach ($schema->types as $name => $expanded_name) {
|
||||
$types_type = $types_document->createElement('type', $expanded_name);
|
||||
$types_type->setAttribute('id', $name);
|
||||
$types_root->appendChild($types_type);
|
||||
}
|
||||
return $types_document;
|
||||
}
|
||||
|
||||
}
|
||||
|
24
configdoc/styles/plain.css
Normal file
24
configdoc/styles/plain.css
Normal file
@@ -0,0 +1,24 @@
|
||||
|
||||
body {margin:1em 4em;}
|
||||
|
||||
table {border-collapse:collapse;}
|
||||
table td, table th {padding:0.2em;}
|
||||
|
||||
table.constraints {margin:0 0 1em;}
|
||||
table.constraints th {text-align:left;padding-left:0.4em;}
|
||||
table.constraints td {padding-right:0.4em;}
|
||||
table.constraints td pre {margin:0;}
|
||||
|
||||
#toc {list-style-type:none; font-weight:bold;}
|
||||
#toc ul {list-style-type:disc; font-weight:normal;}
|
||||
|
||||
.description p {margin-top:0;margin-bottom:1em;}
|
||||
|
||||
#library, h1 {text-align:center; font-family:Garamond, serif;
|
||||
font-variant:small-caps;}
|
||||
#library {font-size:1em;}
|
||||
h1 {margin-top:0;}
|
||||
h2 {border-bottom:1px solid #CCC; font-family:sans-serif; font-weight:normal;
|
||||
font-size:1.3em;}
|
||||
h3 {font-family:sans-serif; font-size:1.1em; font-weight:bold; }
|
||||
h4 {font-family:sans-serif; font-size:0.9em; font-weight:bold; }
|
151
configdoc/styles/plain.xsl
Normal file
151
configdoc/styles/plain.xsl
Normal file
@@ -0,0 +1,151 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<xsl:stylesheet
|
||||
version = "1.0"
|
||||
xmlns = "http://www.w3.org/1999/xhtml"
|
||||
xmlns:xsl = "http://www.w3.org/1999/XSL/Transform"
|
||||
>
|
||||
<xsl:output
|
||||
method = "xml"
|
||||
encoding = "UTF-8"
|
||||
doctype-public = "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
doctype-system = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
|
||||
indent = "no"
|
||||
media-type = "text/html"
|
||||
/>
|
||||
<xsl:param name="css" select="'styles/plain.css'"/>
|
||||
<xsl:param name="title" select="'Configuration Documentation'"/>
|
||||
|
||||
<xsl:variable name="typeLookup" select="document('../types.xml')" />
|
||||
|
||||
<xsl:template match="/">
|
||||
<html lang="en" xml:lang="en">
|
||||
<head>
|
||||
<title><xsl:value-of select="$title" /> - <xsl:value-of select="/configdoc/title" /></title>
|
||||
<meta http-equiv="Content-Type" content="text/html;charset=UTF-8" />
|
||||
<link rel="stylesheet" type="text/css" href="{$css}" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="library"><xsl:value-of select="/configdoc/title" /></div>
|
||||
<h1><xsl:value-of select="$title" /></h1>
|
||||
<h2>Table of Contents</h2>
|
||||
<ul id="toc">
|
||||
<xsl:apply-templates mode="toc" />
|
||||
</ul>
|
||||
<xsl:apply-templates />
|
||||
</body>
|
||||
</html>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="title" mode="toc" />
|
||||
<xsl:template match="namespace" mode="toc">
|
||||
<xsl:if test="count(directive)>0">
|
||||
<li>
|
||||
<a href="#{@id}"><xsl:value-of select="name" /></a>
|
||||
<ul>
|
||||
<xsl:apply-templates select="directive" mode="toc" />
|
||||
</ul>
|
||||
</li>
|
||||
</xsl:if>
|
||||
</xsl:template>
|
||||
<xsl:template match="directive" mode="toc">
|
||||
<li><a href="#{@id}"><xsl:value-of select="name" /></a></li>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="title" />
|
||||
|
||||
<xsl:template match="namespace">
|
||||
<xsl:apply-templates />
|
||||
<xsl:if test="count(directive)=0">
|
||||
<p>No configuration directives defined for this namespace.</p>
|
||||
</xsl:if>
|
||||
</xsl:template>
|
||||
<xsl:template match="namespace/name">
|
||||
<h2 id="{../@id}"><xsl:value-of select="." /></h2>
|
||||
</xsl:template>
|
||||
<xsl:template match="namespace/description">
|
||||
<div class="description">
|
||||
<xsl:copy-of select="div/node()" />
|
||||
</div>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="directive">
|
||||
<xsl:apply-templates />
|
||||
</xsl:template>
|
||||
<xsl:template match="directive/name">
|
||||
<xsl:apply-templates select="../aliases/alias" mode="anchor" />
|
||||
<h3 id="{../@id}"><xsl:value-of select="../@id" /></h3>
|
||||
</xsl:template>
|
||||
<xsl:template match="alias" mode="anchor">
|
||||
<a id="{.}"></a>
|
||||
</xsl:template>
|
||||
|
||||
<!-- Do not pass through -->
|
||||
<xsl:template match="alias"></xsl:template>
|
||||
|
||||
<xsl:template match="directive/constraints">
|
||||
<table class="constraints">
|
||||
<xsl:apply-templates />
|
||||
<!-- Calculated other values -->
|
||||
<xsl:if test="../descriptions/description[@file]">
|
||||
<tr>
|
||||
<th>Used by:</th>
|
||||
<td>
|
||||
<xsl:for-each select="../descriptions/description">
|
||||
<xsl:if test="position()>1">, </xsl:if>
|
||||
<xsl:value-of select="@file" />
|
||||
</xsl:for-each>
|
||||
</td>
|
||||
</tr>
|
||||
</xsl:if>
|
||||
<xsl:if test="../aliases/alias">
|
||||
<xsl:apply-templates select="../aliases" mode="constraints" />
|
||||
</xsl:if>
|
||||
</table>
|
||||
</xsl:template>
|
||||
<xsl:template match="directive/aliases" mode="constraints">
|
||||
<th>Aliases:</th>
|
||||
<td>
|
||||
<xsl:for-each select="alias">
|
||||
<xsl:if test="position()>1">, </xsl:if>
|
||||
<xsl:value-of select="." />
|
||||
</xsl:for-each>
|
||||
</td>
|
||||
</xsl:template>
|
||||
<xsl:template match="directive//description">
|
||||
<div class="description">
|
||||
<xsl:copy-of select="div/node()" />
|
||||
</div>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="constraints/type">
|
||||
<tr>
|
||||
<th>Type:</th>
|
||||
<td>
|
||||
<xsl:variable name="type" select="text()" />
|
||||
<xsl:attribute name="class">type type-<xsl:value-of select="$type" /></xsl:attribute>
|
||||
<xsl:value-of select="$typeLookup/types/type[@id=$type]/text()" />
|
||||
<xsl:if test="@allow-null='yes'">
|
||||
(or null)
|
||||
</xsl:if>
|
||||
</td>
|
||||
</tr>
|
||||
</xsl:template>
|
||||
<xsl:template match="constraints/allowed">
|
||||
<tr>
|
||||
<th>Allowed values:</th>
|
||||
<td>
|
||||
<xsl:for-each select="value"><!--
|
||||
--><xsl:if test="position()>1">, </xsl:if>
|
||||
"<xsl:value-of select="." />"<!--
|
||||
--></xsl:for-each>
|
||||
</td>
|
||||
</tr>
|
||||
</xsl:template>
|
||||
<xsl:template match="constraints/default">
|
||||
<tr>
|
||||
<th>Default:</th>
|
||||
<td><pre><xsl:value-of select="." xml:space="preserve" /></pre></td>
|
||||
</tr>
|
||||
</xsl:template>
|
||||
|
||||
</xsl:stylesheet>
|
@@ -1,38 +0,0 @@
|
||||
|
||||
Code Quality Issues
|
||||
|
||||
Okay, face it. Programmers can get lazy, cut corners, or make mistakes. They
|
||||
also can do quick prototypes, and then forget to rewrite them later. Well,
|
||||
while I can't list mistakes in here, I can list prototype-like segments
|
||||
of code that should be aggressively refactored after the beta is released.
|
||||
This does not list optimization issues, that needs to be done after intense
|
||||
profiling.
|
||||
|
||||
Here we go:
|
||||
|
||||
AttrDef
|
||||
Class - doesn't support Unicode characters, uses regular expressions
|
||||
Lang - code duplication, premature optimization, doesn't consult official
|
||||
lists
|
||||
Pixels/Length/MultiLength - implemented according to HTML spec (excludes
|
||||
code reuse in CSS)
|
||||
URI - multiple regular expressions, needs host validation routines factored
|
||||
out for mailto scheme, IPv6 validation is broken (fringe), unintuitive
|
||||
variable overwriting, missing validation for query, fragment and path,
|
||||
no percent-encode fixing
|
||||
CSS - parser doesn't accept advanced CSS (fringe)
|
||||
AttrTransform - doesn't accept AttrContext, non-validating
|
||||
Lang - invalid xml:lang value can overwrite valid lang value (fringe)
|
||||
ChildDef - not-allowed nodes translated to text, likely invalid handling
|
||||
Config - "load configuration" hooks missing, rich set* accessors missing
|
||||
Strategy
|
||||
FixNesting - cannot bubble nodes out of structures
|
||||
MakeWellFormed - insufficient automatic closing definitions
|
||||
RemoveForeignElements - should be run in parallel with MakeWellFormed
|
||||
URIScheme - needs to have callable generic checks
|
||||
ftp - missing typecode check
|
||||
mailto - doesn't validate emails
|
||||
news - doesn't validate opaque path
|
||||
nntp - doesn't constrain path
|
||||
EOL
|
||||
|
@@ -1,45 +0,0 @@
|
||||
|
||||
Configuration Ideas
|
||||
|
||||
Here are some theoretical configuration ideas that we could implement some
|
||||
time. Note the naming convention: %Namespace.Directive
|
||||
|
||||
%Attr.IDPrefix - prefix all ids with this
|
||||
|
||||
%Attr.RewriteFragments - if there's %Attr.IDPrefix we may want to transparently
|
||||
rewrite the URLs we parse too. However, we can only do it when it's a pure
|
||||
anchor link, so it's not foolproof
|
||||
|
||||
%Attr.ClassBlacklist,
|
||||
%Attr.ClassWhitelist,
|
||||
%Attr.ClassListMode - determines what classes are allowed. When
|
||||
%Attr.ClassListMode is set to Blacklist, only allow those not in
|
||||
%Attr.ClassBlacklist. When it's Whitelist, only allow those in
|
||||
%Attr.ClassWhitelist.
|
||||
|
||||
%Attr.LangAlphaOnly - designate whether or not to allow numerals in language
|
||||
code subtags
|
||||
* RFC 1766, the current standard referenced by XML, does not permit
|
||||
numbers, but,
|
||||
* RFC 3066, the superseding best practice standard since January 2001,
|
||||
permits them.
|
||||
We allow numbers by default, but you generally never see them
|
||||
at all, which makes this a little more sane.
|
||||
|
||||
%Attr.MaxWidth,
|
||||
%Attr.MaxHeight - caps for width and height related checks.
|
||||
|
||||
%URI.Munge - will munge all URIs to a different URI, which should redirect
|
||||
the user to the applicable page. A urlencoded version of the URI
|
||||
will replace any instances of %s in the string. One possible
|
||||
string is 'http://www.google.com/url?q=%s'. Useful for preventing
|
||||
pagerank from being sent to other sites
|
||||
|
||||
%URI.AddRelNofollow - will add rel="nofollow" to all links, preventing the
|
||||
spread of ill-gotten pagerank
|
||||
|
||||
%URI.Host - host of website, for external link checks
|
||||
|
||||
%URI.RelativeToAbsolute - transforms all relative URIs to absolute form
|
||||
|
||||
%URI.DisableExternal - disable external links
|
@@ -1,18 +0,0 @@
|
||||
|
||||
Configuration
|
||||
|
||||
Configuration is documented on a per-use case: if a class uses a certain
|
||||
value from the configuration object, it has to define its name and what the
|
||||
value is used for. This means decentralized configuration declarations that
|
||||
are nevertheless error checking and a centralized configuration object.
|
||||
|
||||
Directives are divided into namespaces, indicating the major portion of
|
||||
functionality they cover (although there may be overlaps. Please consult
|
||||
the documentation in ConfigDef for more information on these namespaces.
|
||||
|
||||
Since configuration is dependent on context, most of the internal classes
|
||||
require a configuration object to be passed as a parameter. However, a few
|
||||
make this optional: they will supply a default configuration object if none
|
||||
are passed. These classes are: HTMLPurifier::*, Generator::generateFromTokens
|
||||
and Lexer::tokenizeHTML. However, whenever a valid configuration object
|
||||
is defined, that object should be used.
|
213
docs/dev-advanced-api.html
Normal file
213
docs/dev-advanced-api.html
Normal file
@@ -0,0 +1,213 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Functional specification for HTML Purifier's advanced API for defining custom filtering behavior." />
|
||||
<link rel="stylesheet" type="text/css" href="style.css" />
|
||||
|
||||
<title>Advanced API - HTML Purifier</title>
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1>Advanced API</h1>
|
||||
|
||||
<div id="filing">Filed under Development</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>HTML Purifier currently natively supports only a subset of HTML's
|
||||
allowed elements, attributes, and behavior; specifically, this subset
|
||||
is the set of elements that are safe for untrusted users to use.
|
||||
However, HTML Purifier is often utilized to ensure standards-compliance
|
||||
from input that is trusted (making it a sort of Tidy substitute),
|
||||
and often users need to define new elements or attributes. The
|
||||
advanced API is oriented specifically for these use-cases.</p>
|
||||
|
||||
<p>Our goals are to let the user:</p>
|
||||
|
||||
<dl>
|
||||
<dt>Select</dt>
|
||||
<dd><ul>
|
||||
<li>Doctype</li>
|
||||
<!-- <li>Filterset</li> -->
|
||||
<li>Elements / Attributes / Modules</li>
|
||||
<li>Tidy</li>
|
||||
</ul></dd>
|
||||
<dt>Customize</dt>
|
||||
<dd><ul>
|
||||
<li>Attributes</li>
|
||||
<li>Elements</li>
|
||||
<!--<li>Doctypes</li>-->
|
||||
</ul></dd>
|
||||
</dl>
|
||||
|
||||
<h2>Select</h2>
|
||||
|
||||
<p>For basic use, the user will have to specify some basic parameters. This
|
||||
is not strictly necessary, as HTML Purifier's default setting will always
|
||||
output safe code, but is required for standards-compliant output.</p>
|
||||
|
||||
<h3>Selecting a Doctype</h3>
|
||||
|
||||
<p>The first thing to select is the <strong>doctype</strong>. This
|
||||
is essential for standards-compliant output.</p>
|
||||
|
||||
<p class="technical">This identifier is based
|
||||
on the name the W3C has given to the document type and <em>not</em>
|
||||
the DTD identifier.</p>
|
||||
|
||||
<p>This parameter is set via the configuration object:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');</pre>
|
||||
|
||||
<p>Due to historical reasons, the default doctype is XHTML 1.0
|
||||
Transitional, however, we really shouldn't be guessing what the user's
|
||||
doctype is. Fortunantely, people who can't be bothered to set this won't
|
||||
be bothered when their pages stop validating.</p>
|
||||
|
||||
<h3>Selecting Elements / Attributes / Modules</h3>
|
||||
|
||||
<p>HTML Purifier will, by default, allow as many elements and attributes
|
||||
as possible. However, a user may decide to roll their own filterset by
|
||||
selecting modules, elements and attributes to allow for their own
|
||||
specific use-case. This can be done using %HTML.Allowed:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Allowed', 'a[href|title],em,p,blockquote');</pre>
|
||||
|
||||
<p class="technical">The directive %HTML.Allowed is a convenience feature
|
||||
that may be fully expressed with the legacy interface.</p>
|
||||
|
||||
<p>We currently support another interface from older versions:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'AllowedElements', 'a,em,p,blockquote');
|
||||
$config->set('HTML', 'AllowedAttributes', 'a.href,a.title');</pre>
|
||||
|
||||
<p>A user may also choose to allow modules using a specialized
|
||||
directive:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'AllowedModules', 'Hypertext,Text,Lists');</pre>
|
||||
|
||||
<p>But it is not expected that this feature will be widely used.</p>
|
||||
|
||||
<p class="technical">Module selection will work slightly differently
|
||||
from the other AllowedElements and AllowedAttributes directives by
|
||||
directly modifying the doctype you are operating in, in the spirit of
|
||||
XHTML 1.1's modularization. We stop users from shooting themselves in the
|
||||
foot by mandating the modules in %HTML.CoreModules be used.</p>
|
||||
|
||||
<p class="technical">Modules are distinguished from regular elements by the
|
||||
case of their first letter. While XML distinguishes between and allows
|
||||
lower and uppercase letters in element names, XHTML uses only lower-case
|
||||
element names for sake of consistency.</p>
|
||||
|
||||
<h3>Selecting Tidy</h3>
|
||||
|
||||
<p>The name of this segment of functionality is inspired off of Dave
|
||||
Ragget's program HTML Tidy, which purported to help clean up HTML. In
|
||||
HTML Purifier, Tidy functionality involves turning unsupported and
|
||||
deprecated elements into standards-compliant ones, maintaining
|
||||
backwards compatibility, and enforcing best practices.</p>
|
||||
|
||||
<p>This is a complicated feature, and is explained more in depth at
|
||||
<a href="enduser-tidy.html">the Tidy documentation page</a>.</p>
|
||||
|
||||
<!--
|
||||
<h3>Unified selector</h3>
|
||||
|
||||
<p>Because selecting each and every one of these configuration options
|
||||
is a chore, we may wish to offer a specialized configuration method
|
||||
for selecting a filterset. Possibility:</p>
|
||||
|
||||
<pre>function selectFilter($doctype, $filterset, $tidy)</pre>
|
||||
|
||||
<p>...which is simply a light wrapper over the individual configuration
|
||||
calls. A custom config file format or text format could also be adopted.</p>
|
||||
-->
|
||||
|
||||
<h2>Customize</h2>
|
||||
|
||||
<p>By reviewing topic posts in the support forum, we determined that
|
||||
there were two primarily demanded customization features people wanted:
|
||||
to add an attribute to an existing element, and to add an element.
|
||||
Thus, we'll want to create convenience functions for these common
|
||||
use-cases.</p>
|
||||
|
||||
<p>Note that the functions described here are only available if
|
||||
a raw copy of <code>HTMLPurifier_HTMLDefinition</code> was retrieved.
|
||||
Furthermore, caching may prevent your changes from immediately
|
||||
being seen: consult <a href="enduser-customize.html">enduser-customize.html</a> on how
|
||||
to work around this.</p>
|
||||
|
||||
<h3>Attributes</h3>
|
||||
|
||||
<p>An attribute is bound to an element by a name and has a specific
|
||||
<code>AttrDef</code> that validates it. The interface is therefore:</p>
|
||||
|
||||
<pre>function addAttribute($element, $attribute, $attribute_def);</pre>
|
||||
|
||||
<p>Example of the functionality in action:</p>
|
||||
|
||||
<pre>$def->addAttribute('a', 'rel', 'Enum#nofollow');</pre>
|
||||
|
||||
<p>The <code>$attribute_def</code> value is flexible,
|
||||
to make things simpler. It can be a literal object or:</p>
|
||||
|
||||
<ul>
|
||||
<!--<li>Class name: We'll instantiate it for you</li>
|
||||
<li>Function name: We'll create an <code>HTMLPurifier_AttrDef_Anonymous</code>
|
||||
class with that function registered as a callback.</li>-->
|
||||
<li>String attribute type: We'll use <code>HTMLPurifier_AttrTypes</code>
|
||||
to resolve it for you. Any data that follows a hash mark (#) will
|
||||
be used to customize the attribute type: in the example above,
|
||||
we specify which values for Enum to allow.</li>
|
||||
</ul>
|
||||
|
||||
<h3>Elements</h3>
|
||||
|
||||
<p>An element requires certain information as specified by
|
||||
<code>HTMLPurifier_ElementDef</code>. However, not all of it is necessary,
|
||||
the usual things required are:</p>
|
||||
|
||||
<ul>
|
||||
<li>Attributes</li>
|
||||
<li>Content model/type</li>
|
||||
<li>Registration in a content set</li>
|
||||
</ul>
|
||||
|
||||
<p>This suggests an API like this:</p>
|
||||
|
||||
<pre>function addElement($element, $type, $contents,
|
||||
$attr_collections = array(); $attributes = array());</pre>
|
||||
|
||||
<p>Each parameter explained in depth:</p>
|
||||
|
||||
<dl>
|
||||
<dt><code>$element</code></dt>
|
||||
<dd>Element name, ex. 'label'</dd>
|
||||
<dt><code>$type</code></dt>
|
||||
<dd>Content set to register in, ex. 'Inline' or 'Flow'</dd>
|
||||
<dt><code>$contents</code></dt>
|
||||
<dd>Description of allowed children. This is a merged form of
|
||||
<code>HTMLPurifier_ElementDef</code>'s member variables
|
||||
<code>$content_model</code> and <code>$content_model_type</code>,
|
||||
where the form is <q>Type: Model</q>, ex. 'Optional: Inline'.
|
||||
There are also a number of predefined templates one may use.</dd>
|
||||
<dt><code>$attr_collections</code></dt>
|
||||
<dd>Array (or string if only one) of attribute collection(s) to
|
||||
merge into the attributes array.</dd>
|
||||
<dt><code>$attributes</code></dt>
|
||||
<dd>Array of attribute names to attribute definitions, much like
|
||||
the above-described attribute customization.</dd>
|
||||
</dl>
|
||||
|
||||
<p>A possible usage:</p>
|
||||
|
||||
<pre>$def->addElement('font', 'Inline', 'Optional: Inline', 'Common',
|
||||
array('color' => 'Color'));</pre>
|
||||
|
||||
<p>See <code>HTMLPurifier/HTMLModule.php</code> for details.</p>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
32
docs/dev-code-quality.txt
Normal file
32
docs/dev-code-quality.txt
Normal file
@@ -0,0 +1,32 @@
|
||||
|
||||
Code Quality Issues
|
||||
|
||||
Okay, face it. Programmers can get lazy, cut corners, or make mistakes. They
|
||||
also can do quick prototypes, and then forget to rewrite them later. Well,
|
||||
while I can't list mistakes in here, I can list prototype-like segments
|
||||
of code that should be aggressively refactored. This does not list
|
||||
optimization issues, that needs to be done after intense profiling.
|
||||
|
||||
docs/examples/demo.php - ad hoc HTML/PHP soup to the extreme
|
||||
|
||||
AttrDef - a lot of duplication, more generic classes need to be created;
|
||||
a lot of strtolower() calls, no legit casing
|
||||
Class - doesn't support Unicode characters (fringe); uses regular
|
||||
expressions
|
||||
Lang - code duplication; premature optimization
|
||||
Length - easily mistaken for CSSLength
|
||||
URI - multiple regular expressions; missing validation for parts (?)
|
||||
CSS - parser doesn't accept advanced CSS (fringe)
|
||||
Number - constructor interface inconsistent with Integer
|
||||
ConfigSchema - redefinition is a mess
|
||||
Strategy
|
||||
FixNesting - cannot bubble nodes out of structures, duplicated checks
|
||||
for special-case parent node
|
||||
MakeWellFormed - insufficient automatic closing definitions (check HTML
|
||||
spec for optional end tags, also, closing based on type (block/inline)
|
||||
might be efficient).
|
||||
RemoveForeignElements - should be run in parallel with MakeWellFormed
|
||||
URIScheme - needs to have callable generic checks
|
||||
mailto - doesn't validate emails, doesn't validate querystring
|
||||
news - doesn't validate opaque path
|
||||
nntp - doesn't constrain path
|
82
docs/dev-naming.html
Normal file
82
docs/dev-naming.html
Normal file
@@ -0,0 +1,82 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Defines class naming conventions in HTML Purifier." />
|
||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||
|
||||
<title>Naming Conventions - HTML Purifier</title>
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1>Naming Conventions</h1>
|
||||
|
||||
<div id="filing">Filed under Development</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>The classes in this library follow a few naming conventions, which may
|
||||
help you find the correct functionality more quickly. Here they are:</p>
|
||||
|
||||
<dl>
|
||||
|
||||
<dt>All classes occupy the HTMLPurifier pseudo-namespace.</dt>
|
||||
<dd>This means that all classes are prefixed with HTMLPurifier_. As such, all
|
||||
names under HTMLPurifier_ are reserved. I recommend that you use the name
|
||||
HTMLPurifierX_YourName_ClassName, especially if you want to take advantage
|
||||
of HTMLPurifier_ConfigDef.</dd>
|
||||
|
||||
<dt>All classes correspond to their path if library/ was in the include path</dt>
|
||||
<dd>HTMLPurifier_AttrDef is located at HTMLPurifier/AttrDef.php; replace
|
||||
underscores with slashes and append .php and you'll have the location of
|
||||
the class.</dd>
|
||||
|
||||
<dt>Harness and Test are reserved class names for unit tests</dt>
|
||||
<dd>The suffix <code>Test</code> indicates that the class is a subclass of UnitTestCase
|
||||
(of the Simpletest library) and is testable. "Harness" indicates a subclass
|
||||
of UnitTestCase that is not meant to be run but to be extended into
|
||||
concrete test cases and contains custom test methods (i.e. assert*())</dd>
|
||||
|
||||
<dt>Class names do not necessarily represent inheritance hierarchies</dt>
|
||||
<dd>While we try to reflect inheritance in naming to some extent, it is not
|
||||
guaranteed (for instance, none of the classes inherit from HTMLPurifier,
|
||||
the base class). However, all class files have the require_once
|
||||
declarations to whichever classes they are tightly coupled to.</dd>
|
||||
|
||||
<dt>Strategy has a meaning different from the Gang of Four pattern</dt>
|
||||
<dd>In Design Patterns, the Gang of Four describes a Strategy object as
|
||||
encapsulating an algorithm so that they can be switched at run-time. While
|
||||
our strategies are indeed algorithms, they are not meant to be substituted:
|
||||
all must be present in order for proper functioning.</dd>
|
||||
|
||||
<dt>Abbreviations are avoided</dt>
|
||||
<dd>We try to avoid abbreviations as much as possible, but in some cases,
|
||||
abbreviated version is more readable than the full version. Here, we
|
||||
list common abbreviations:
|
||||
<ul>
|
||||
<li>Attr to Attributes (note that it is plural, i.e. <code>$attr = array()</code>)</li>
|
||||
<li>Def to Definition</li>
|
||||
<li><code>$ret</code> is the value to be returned in a function</li>
|
||||
</ul>
|
||||
</dd>
|
||||
|
||||
<dt>Ambiguity concerning the definition of Def/Definition</dt>
|
||||
<dd>While a definition normally defines the structure/acceptable values of
|
||||
an entity, most of the definitions in this application also attempt
|
||||
to validate and fix the value. I am unsure of a better name, as
|
||||
"Validator" would exclude fixing the value, "Fixer" doesn't invoke
|
||||
the proper image of "fixing" something, and "ValidatorFixer" is too long!
|
||||
Some other suggestions were "Handler", "Reference", "Check", "Fix",
|
||||
"Repair" and "Heal".</dd>
|
||||
|
||||
<dt>Transform not Transformer</dt>
|
||||
<dd>Transform is both a noun and a verb, and thus we define a "Transform" as
|
||||
something that "transforms," leaving "Transformer" (which sounds like an
|
||||
electrical device/robot toy).</dd>
|
||||
|
||||
</dl>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
33
docs/dev-optimization.html
Normal file
33
docs/dev-optimization.html
Normal file
@@ -0,0 +1,33 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Discusses possible methods of optimizing HTML Purifier." />
|
||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||
|
||||
<title>Optimization - HTML Purifier</title>
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1>Optimization</h1>
|
||||
|
||||
<div id="filing">Filed under Development</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>Here are some possible optimization techniques we can apply to code sections if
|
||||
they turn out to be slow. Be sure not to prematurely optimize: if you get
|
||||
that itch, put it here!</p>
|
||||
|
||||
<ul>
|
||||
<li>Make Tokens Flyweights (may prove problematic, probably not worth it)</li>
|
||||
<li>Rewrite regexps into PHP code</li>
|
||||
<li>Serialize the Definition object</li>
|
||||
<li>Batch regexp validation (do as many per function call as possible)</li>
|
||||
<li>Parallelize strategies</li>
|
||||
</ul>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
@@ -1,290 +1,302 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
|
||||
<title>HTMLPurifier Progress</title>
|
||||
|
||||
<style type="text/css">
|
||||
|
||||
td {padding-right:1em;border-bottom:1px solid #000;padding-left:0.5em;}
|
||||
th {text-align:left;padding-top:1.4em;font-size:13pt;
|
||||
border-bottom:2px solid #000;background:#FFF;}
|
||||
thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
||||
|
||||
.impl-yes {background:#AFA;}
|
||||
.impl-partial {background:#FFA;}
|
||||
.impl-no {background:#FAA;}
|
||||
|
||||
.danger {background:#FEE;}
|
||||
.css1 {color:#060;}
|
||||
.required {font-weight:bold;}
|
||||
|
||||
</style>
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1>HTMLPurifier Progress</h1>
|
||||
|
||||
<h2>Key</h2>
|
||||
|
||||
<table cellspacing="0"><tbody>
|
||||
<tr><td class="impl-yes">Implemented</td></tr>
|
||||
<tr><td class="impl-partial">Partially implemented</td></tr>
|
||||
<tr><td class="impl-no">Will not implement</td></tr>
|
||||
<tr><td class="danger">Dangerous attribute/property</td></tr>
|
||||
<tr><td class="css1">Present in CSS1</td></tr>
|
||||
</tbody></table>
|
||||
|
||||
<h2>Interesting Attributes</h2>
|
||||
|
||||
<table cellspacing="0">
|
||||
|
||||
<thead>
|
||||
<tr><th>Attribute</th><th>Tags</th><th>Notes</th></tr>
|
||||
</thead>
|
||||
|
||||
<!--
|
||||
<tr><th></th></tr>
|
||||
<tbody>
|
||||
<tr><td>-</td><td>-</td><td>-</td></tr>
|
||||
</tbody>
|
||||
-->
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="3">CSS</th></tr>
|
||||
<tr class="impl-partial"><td>style</td><td>All</td><td>Needs CSS parser</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="3">Questionable</th></tr>
|
||||
<tr class="impl-no"><td>accesskey</td><td>A</td><td>May interfere with main interface</td></tr>
|
||||
<tr class="impl-no"><td>tabindex</td><td>A</td><td>May interfere with main interface</td></tr>
|
||||
<tr><td>target</td><td>A</td><td>Config enabled, only useful for frame layouts</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="3">Miscellaneous</th></tr>
|
||||
<tr><td>datetime</td><td>DEL, INS</td><td>No visible effect, ISO format</td></tr>
|
||||
<tr><td>rel</td><td>A</td><td>Largely user-defined: nofollow, tag (see microformats)</td></tr>
|
||||
<tr><td>rev</td><td>A</td><td>Largely user-defined: vote-*</td></tr>
|
||||
<tr class="impl-no"><td>axis</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||
<tr class="impl-no"><td>char</td><td>COL, COLGROUP, TBODY, TD, TFOOT, TH, THEAD, TR</td><td>W3C only: No browser implementation</td></tr>
|
||||
<tr class="impl-no"><td>headers</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||
<tr class="impl-no"><td>scope</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody class="impl-yes">
|
||||
<tr><th colspan="3">URI</th></tr>
|
||||
<tr><td rowspan="2">cite</td><td>BLOCKQUOTE, Q</td><td>For attribution</td></tr>
|
||||
<tr><td>DEL, INS</td><td>Link to explanation why it changed</td></tr>
|
||||
<tr><td>href</td><td>A</td><td>-</td></tr>
|
||||
<tr><td>longdesc</td><td>IMG</td><td>-</td></tr>
|
||||
<tr class="required"><td>src</td><td>IMG</td><td>Required</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="3">Transform</th></tr>
|
||||
<tr><td rowspan="5">align</td><td>CAPTION</td><td>Near-equiv style 'caption-side', drop left and right</td></tr>
|
||||
<tr><td>IMG</td><td rowspan="2">Margin-left and margin-right = auto or parent div</td></tr>
|
||||
<tr><td>TABLE</td></tr>
|
||||
<tr><td>HR</td><td>Equivalent style 'text-align' (IE tested)</td></tr>
|
||||
<tr class="impl-yes"><td>H1, H2, H3, H4, H5, H6, P</td><td>Equivalent style 'text-align'</td></tr>
|
||||
<tr class="required impl-yes"><td>alt</td><td>IMG</td><td>Required, insert image filename if src is present or default invalid image text</td></tr>
|
||||
<tr><td rowspan="3">bgcolor</td><td>TABLE</td><td>Equivalent style 'background-color' (IE tested)</td></tr>
|
||||
<tr><td>TR</td><td>Equivalent style 'background-color' (IE tested)</td></tr>
|
||||
<tr><td>TD, TH</td><td>Equivalent style 'background-color'</td></tr>
|
||||
<tr><td>border</td><td>IMG</td><td>Equivalent style 'border-width', only applies when link present</td></tr>
|
||||
<tr><td>clear</td><td>BR</td><td>Near-equiv style 'clear', transform 'all' into 'both'</td></tr>
|
||||
<tr class="impl-no"><td>compact</td><td>DL, OL, UL</td><td>Boolean, needs custom CSS class</td></tr>
|
||||
<tr class="required impl-yes"><td>dir</td><td>BDO</td><td>Required, insert ltr (or configuration value) if none</td></tr>
|
||||
<tr><td>height</td><td>TD, TH</td><td>Near-equiv style 'height', needs px suffix if original was in pixels</td></tr>
|
||||
<tr><td>hspace</td><td>IMG</td><td>Near-equiv styles 'margin-top' and 'margin-bottom', needs px suffix</td></tr>
|
||||
<tr class="impl-yes"><td>lang</td><td>*</td><td>Copy value to xml:lang</td></tr>
|
||||
<tr><td rowspan="2">name</td><td>IMG</td><td>Turn into ID</td></tr>
|
||||
<tr><td>A</td><td>Turn into ID? (not deprecated, though in which specs?)</td></tr>
|
||||
<tr><td>noshade</td><td>HR</td><td>Boolean, style 'border-style:solid;'</td></tr>
|
||||
<tr><td>nowrap</td><td>TD, TH</td><td>Boolean, style 'white-space:nowrap;' (not compat with IE5)</td></tr>
|
||||
<tr><td>size</td><td>HR</td><td>Near-equiv 'width', needs px suffix if original was pixels</td></tr>
|
||||
<tr class="required impl-yes"><td>src</td><td>IMG</td><td>Required, insert blank or default img if not set</td></tr>
|
||||
<tr><td>start</td><td>OL</td><td>Poorly supported 'counter-reset', transform may not be desirable</td></tr>
|
||||
<tr><td rowspan="3">type</td><td>LI</td><td rowspan="3">Equivalent style 'list-style-type', different allowed values though. (needs testing)</td></tr>
|
||||
<tr><td>OL</td></tr>
|
||||
<tr><td>UL</td></tr>
|
||||
<tr><td>value</td><td>LI</td><td>Poorly supported 'counter-reset', transform may not be desirable, see ol.start. Configurable.</td></tr>
|
||||
<tr><td>vspace</td><td>IMG</td><td>Near-equiv styles 'margin-left' and 'margin-right', needs px suffix, see hspace</td></tr>
|
||||
<tr><td rowspan="2">width</td><td>HR</td><td rowspan="2">Near-equiv style 'width', needs px suffix if original was pixels</td></tr>
|
||||
<tr><td>TD, TH</td></tr>
|
||||
</tbody>
|
||||
|
||||
</table>
|
||||
|
||||
<h3>CSS</h3>
|
||||
|
||||
<table cellspacing="0">
|
||||
|
||||
<thead>
|
||||
<tr><th>Name</th><th>Notes</th></tr>
|
||||
</thead>
|
||||
|
||||
<!--
|
||||
<tr><td>-</td><td>-</td></tr>
|
||||
-->
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="2">Standard</th></tr>
|
||||
<tr class="css1 impl-yes"><td>background-color</td><td>COMPOSITE(<color>, transparent)</td></tr>
|
||||
<tr class="css1"><td>background</td><td>SHORTHAND</td></tr>
|
||||
<tr class="css1"><td>border</td><td>SHORTHAND, MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border-color</td><td>MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border-style</td><td>MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border-width</td><td>MULTIPLE</td></tr>
|
||||
<tr class="css1"><td>border-*</td><td>SHORTHAND</td></tr>
|
||||
<tr class="impl-yes"><td>border-*-color</td><td>COMPOSITE(<color>, transparent)</td></tr>
|
||||
<tr class="impl-yes"><td>border-*-style</td><td>ENUM(none, hidden, dotted, dashed,
|
||||
solid, double, groove, ridge, inset, outset)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border-*-width</td><td>COMPOSITE(<length>, thin, medium, thick)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>clear</td><td>ENUM(none, left, right, both)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>color</td><td><color></td></tr>
|
||||
<tr class="css1 impl-yes"><td>float</td><td>ENUM(left, right, none), May require layout
|
||||
precautions with clear</td></tr>
|
||||
<tr class="css1"><td>font</td><td>SHORTHAND</td></tr>
|
||||
<tr class="css1 impl-yes"><td>font-family</td><td>CSS validator may complain if fallback font
|
||||
family not specified</td></tr>
|
||||
<tr class="css1 impl-yes"><td>font-size</td><td>COMPOSITE(<absolute-size>,
|
||||
<relative-size>, <length>, <percentage>)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>font-style</td><td>ENUM(normal, italic, oblique)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>font-variant</td><td>ENUM(normal, small-caps)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>font-weight</td><td>ENUM(normal, bold, bolder, lighter,
|
||||
100, 200, 300, 400, 500, 600, 700, 800, 900), maybe special code for
|
||||
in-between integers</td></tr>
|
||||
<tr class="css1 impl-yes"><td>letter-spacing</td><td>COMPOSITE(<length>, normal)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>line-height</td><td>COMPOSITE(<number>,
|
||||
<length>, <percentage>, normal)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>list-style-position</td><td>ENUM(inside, outside),
|
||||
Strange behavior in browsers</td></tr>
|
||||
<tr class="css1 impl-yes"><td>list-style-type</td><td>ENUM(...),
|
||||
Well-supported values are: disc, circle, square,
|
||||
decimal, lower-roman, upper-roman, lower-alpha and upper-alpha. See also
|
||||
CSS 3. Mostly IE lack of support.</td></tr>
|
||||
<tr class="css1"><td>list-style</td><td>SHORTHAND</td></tr>
|
||||
<tr class="css1 impl-yes"><td>margin</td><td>MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>margin-*</td><td>COMPOSITE(<length>,
|
||||
<percentage>, auto)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>padding</td><td>MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>padding-*</td><td>COMPOSITE(<length>(positive),
|
||||
<percentage>(positive))</td></tr>
|
||||
<tr class="css1 impl-yes"><td>text-align</td><td>ENUM(left, right,
|
||||
center, justify)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>text-decoration</td><td>No blink (argh my eyes), not
|
||||
enum, can be combined (composite sorta): underline, overline,
|
||||
line-through</td></tr>
|
||||
<tr class="css1 impl-yes"><td>text-indent</td><td>COMPOSITE(<length>,
|
||||
<percentage>)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>text-transform</td><td>ENUM(capitalize, uppercase,
|
||||
lowercase, none)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>width</td><td>COMPOSITE(<length>,
|
||||
<percentage>, auto), Interesting</td></tr>
|
||||
<tr class="css1 impl-yes"><td>word-spacing</td><td>COMPOSITE(<length>, auto),
|
||||
IE 5 no support</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="2">Table</th></tr>
|
||||
<tr><td>border-collapse</td><td>ENUM(collapse, seperate)</td></tr>
|
||||
<tr><td>caption-side</td><td>ENUM(top, bottom)</td></tr>
|
||||
<tr><td>empty-cells</td><td>ENUM(show, hide), No IE support, possible fix
|
||||
with &nbsp;?</td></tr>
|
||||
<tr><td>table-layout</td><td>ENUM(auto, fixed)</td></tr>
|
||||
<tr class="css1"><td>vertical-align</td><td>COMPOSITE(ENUM(baseline, sub,
|
||||
super, top, text-top, middle, bottom, text-bottom), <percentage>,
|
||||
<length>) Also applies to others with explicit height</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="2">Absolute positioning</th></tr>
|
||||
<tr class="danger"><td>bottom</td><td rowspan="4">Dangerous, must be non-negative</td></tr>
|
||||
<tr class="danger"><td>left</td></tr>
|
||||
<tr class="danger"><td>right</td></tr>
|
||||
<tr class="danger"><td>top</td></tr>
|
||||
<tr><td>clip</td><td>-</td></tr>
|
||||
<tr class="danger"><td>position</td><td>ENUM(static, relative, absolute, fixed), permit
|
||||
relative not absolute?</td></tr>
|
||||
<tr class="danger"><td>z-index</td><td>Dangerous</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="2">Unknown</th></tr>
|
||||
<tr class="danger css1"><td>background-image</td><td>Dangerous</td></tr>
|
||||
<tr class="css1"><td>background-attachment</td><td>ENUM(scroll, fixed),
|
||||
Depends on background-image</td></tr>
|
||||
<tr class="css1"><td>background-position</td><td>Depends on background-image</td></tr>
|
||||
<tr class="danger"><td>cursor</td><td>Dangerous but fluffy</td></tr>
|
||||
<tr class="danger css1"><td>display</td><td>ENUM(...), Dangerous but interesting;
|
||||
will not implement list-item, run-in (Opera only) or table (no IE);
|
||||
inline-block has incomplete IE6 support and requires -moz-inline-box
|
||||
for Mozilla.</td></tr>
|
||||
<tr><td class="css1">height</td><td>Interesting, why use it?</td></tr>
|
||||
<tr class="danger css1"><td>list-style-image</td><td>Dangerous?</td></tr>
|
||||
<tr class="impl-no"><td>max-height</td><td rowspan="4">No IE 5/6</td></tr>
|
||||
<tr class="impl-no"><td>min-height</td></tr>
|
||||
<tr class="impl-no"><td>max-width</td></tr>
|
||||
<tr class="impl-no"><td>min-width</td></tr>
|
||||
<tr class="impl-no"><td>orphans</td><td>No IE support</td></tr>
|
||||
<tr class="impl-no"><td>widows</td><td>No IE support</td></tr>
|
||||
<tr><td>overflow</td><td>ENUM, IE 5/6 almost (remove visible if set)</td></tr>
|
||||
<tr><td>page-break-after</td><td>ENUM(auto, always, avoid, left, right),
|
||||
IE 5.5/6 and Opera</td></tr>
|
||||
<tr><td>page-break-before</td><td>ENUM(auto, always, avoid, left, right),
|
||||
Mostly supported</td></tr>
|
||||
<tr><td>page-break-inside</td><td>ENUM(avoid, auto), Opera only</td></tr>
|
||||
<tr class="impl-no"><td>quotes</td><td>May be dropped from CSS2</td></tr>
|
||||
<tr class="impl-no"><td>visibility</td><td>ENUM(visible, hidden, collapse),
|
||||
Dangerous</td></tr>
|
||||
<tr><td class="css1">white-space</td><td>ENUM(normal, pre, nowrap, pre-wrap,
|
||||
pre-line), Spotty implementation:
|
||||
pre (no IE 5/6), nowrap (no IE 5),
|
||||
pre-wrap (only Opera), pre-line (no support). Fixable?</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody class="impl-no">
|
||||
<tr><th colspan="2">Aural</th></tr>
|
||||
<tr><td>azimuth</td><td>-</td></tr>
|
||||
<tr><td>cue</td><td>-</td></tr>
|
||||
<tr><td>cue-after</td><td>-</td></tr>
|
||||
<tr><td>cue-before</td><td>-</td></tr>
|
||||
<tr><td>elevation</td><td>-</td></tr>
|
||||
<tr><td>pause-after</td><td>-</td></tr>
|
||||
<tr><td>pause-before</td><td>-</td></tr>
|
||||
<tr><td>pause</td><td>-</td></tr>
|
||||
<tr><td>pitch-range</td><td>-</td></tr>
|
||||
<tr><td>pitch</td><td>-</td></tr>
|
||||
<tr><td>play-during</td><td>-</td></tr>
|
||||
<tr><td>richness</td><td>-</td></tr>
|
||||
<tr><td>speak-header</td><td>Table related</td></tr>
|
||||
<tr><td>speak-numeral</td><td>-</td></tr>
|
||||
<tr><td>speak-punctuation</td><td>-</td></tr>
|
||||
<tr><td>speak</td><td>-</td></tr>
|
||||
<tr><td>speech-rate</td><td>-</td></tr>
|
||||
<tr><td>stress</td><td>-</td></tr>
|
||||
<tr><td>voice-family</td><td>-</td></tr>
|
||||
<tr><td>volume</td><td>-</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody class="impl-no">
|
||||
<tr><th colspan="2">Will not implement</th></tr>
|
||||
<tr><td>content</td><td>Not applicable for inline styles</td></tr>
|
||||
<tr><td>counter-increment</td><td>Needs content, Opera only</td></tr>
|
||||
<tr><td>counter-reset</td><td>Needs content, Opera only</td></tr>
|
||||
<tr><td>direction</td><td>No support</td></tr>
|
||||
<tr><td>outline-color</td><td rowspan="4">IE Mac and Opera on outside,
|
||||
Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
||||
<tr><td>outline-style</td></tr>
|
||||
<tr><td>outline-width</td></tr>
|
||||
<tr><td>outline</td></tr>
|
||||
<tr><td>unicode-bidi</td><td>No support</td></tr>
|
||||
</tbody>
|
||||
|
||||
</table>
|
||||
|
||||
</body></html>
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Tables detailing HTML element and CSS property implementation coverage in HTML Purifier." />
|
||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||
|
||||
<title>Implementation Progress - HTML Purifier</title>
|
||||
|
||||
<style type="text/css">
|
||||
|
||||
td {padding-right:1em;border-bottom:1px solid #000;padding-left:0.5em;}
|
||||
th {text-align:left;padding-top:1.4em;font-size:13pt;
|
||||
border-bottom:2px solid #000;background:#FFF;}
|
||||
thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
||||
|
||||
.impl-yes {background:#9D9;}
|
||||
.impl-partial {background:#FFA;}
|
||||
.impl-no {background:#CCC;}
|
||||
|
||||
.danger {color:#600;}
|
||||
.css1 {color:#060;}
|
||||
.required {font-weight:bold;}
|
||||
.feature {color:#999;}
|
||||
|
||||
</style>
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1>Implementation Progress</h1>
|
||||
|
||||
<div id="filing">Filed under Development</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<h2>Key</h2>
|
||||
|
||||
<table cellspacing="0"><tbody>
|
||||
<tr><td class="impl-yes">Implemented</td></tr>
|
||||
<tr><td class="impl-partial">Partially implemented</td></tr>
|
||||
<tr><td class="impl-no">Will not implement</td></tr>
|
||||
<tr><td class="danger">Dangerous attribute/property</td></tr>
|
||||
<tr><td class="css1">Present in CSS1</td></tr>
|
||||
<tr><td class="feature">Feature, requires extra work</td></tr>
|
||||
</tbody></table>
|
||||
|
||||
<h2>CSS</h2>
|
||||
|
||||
<table cellspacing="0">
|
||||
|
||||
<thead>
|
||||
<tr><th>Name</th><th>Notes</th></tr>
|
||||
</thead>
|
||||
|
||||
<!--
|
||||
<tr><td>-</td><td>-</td></tr>
|
||||
-->
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="2">Standard</th></tr>
|
||||
<tr class="css1 impl-yes"><td>background-color</td><td>COMPOSITE(<color>, transparent)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>background</td><td>SHORTHAND, currently alias for background-color</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border</td><td>SHORTHAND, MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border-color</td><td>MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border-style</td><td>MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border-width</td><td>MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border-*</td><td>SHORTHAND</td></tr>
|
||||
<tr class="impl-yes"><td>border-*-color</td><td>COMPOSITE(<color>, transparent)</td></tr>
|
||||
<tr class="impl-yes"><td>border-*-style</td><td>ENUM(none, hidden, dotted, dashed,
|
||||
solid, double, groove, ridge, inset, outset)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>border-*-width</td><td>COMPOSITE(<length>, thin, medium, thick)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>clear</td><td>ENUM(none, left, right, both)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>color</td><td><color></td></tr>
|
||||
<tr class="css1 impl-yes"><td>float</td><td>ENUM(left, right, none), May require layout
|
||||
precautions with clear</td></tr>
|
||||
<tr class="css1 impl-yes"><td>font</td><td>SHORTHAND</td></tr>
|
||||
<tr class="css1 impl-yes"><td>font-family</td><td>CSS validator may complain if fallback font
|
||||
family not specified</td></tr>
|
||||
<tr class="css1 impl-yes"><td>font-size</td><td>COMPOSITE(<absolute-size>,
|
||||
<relative-size>, <length>, <percentage>)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>font-style</td><td>ENUM(normal, italic, oblique)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>font-variant</td><td>ENUM(normal, small-caps)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>font-weight</td><td>ENUM(normal, bold, bolder, lighter,
|
||||
100, 200, 300, 400, 500, 600, 700, 800, 900), maybe special code for
|
||||
in-between integers</td></tr>
|
||||
<tr class="css1 impl-yes"><td>letter-spacing</td><td>COMPOSITE(<length>, normal)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>line-height</td><td>COMPOSITE(<number>,
|
||||
<length>, <percentage>, normal)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>list-style-position</td><td>ENUM(inside, outside),
|
||||
Strange behavior in browsers</td></tr>
|
||||
<tr class="css1 impl-yes"><td>list-style-type</td><td>ENUM(...),
|
||||
Well-supported values are: disc, circle, square,
|
||||
decimal, lower-roman, upper-roman, lower-alpha and upper-alpha. See also
|
||||
CSS 3. Mostly IE lack of support.</td></tr>
|
||||
<tr class="css1 impl-yes"><td>list-style</td><td>SHORTHAND</td></tr>
|
||||
<tr class="css1 impl-yes"><td>margin</td><td>MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>margin-*</td><td>COMPOSITE(<length>,
|
||||
<percentage>, auto)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>padding</td><td>MULTIPLE</td></tr>
|
||||
<tr class="css1 impl-yes"><td>padding-*</td><td>COMPOSITE(<length>(positive),
|
||||
<percentage>(positive))</td></tr>
|
||||
<tr class="css1 impl-yes"><td>text-align</td><td>ENUM(left, right,
|
||||
center, justify)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>text-decoration</td><td>No blink (argh my eyes), not
|
||||
enum, can be combined (composite sorta): underline, overline,
|
||||
line-through</td></tr>
|
||||
<tr class="css1 impl-yes"><td>text-indent</td><td>COMPOSITE(<length>,
|
||||
<percentage>)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>text-transform</td><td>ENUM(capitalize, uppercase,
|
||||
lowercase, none)</td></tr>
|
||||
<tr class="css1 impl-yes"><td>width</td><td>COMPOSITE(<length>,
|
||||
<percentage>, auto), Interesting</td></tr>
|
||||
<tr class="css1 impl-yes"><td>word-spacing</td><td>COMPOSITE(<length>, auto),
|
||||
IE 5 no support</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="2">Table</th></tr>
|
||||
<tr class="impl-yes"><td>border-collapse</td><td>ENUM(collapse, seperate)</td></tr>
|
||||
<tr class="impl-yes"><td>caption-side</td><td>ENUM(top, bottom)</td></tr>
|
||||
<tr class="feature"><td>empty-cells</td><td>ENUM(show, hide), No IE support makes this useless,
|
||||
possible fix with &nbsp;? Unknown release milestone.</td></tr>
|
||||
<tr class="impl-yes"><td>table-layout</td><td>ENUM(auto, fixed)</td></tr>
|
||||
<tr class="impl-yes css1"><td>vertical-align</td><td>COMPOSITE(ENUM(baseline, sub,
|
||||
super, top, text-top, middle, bottom, text-bottom), <percentage>,
|
||||
<length>) Also applies to others with explicit height</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="2">Absolute positioning, unknown release milestone</th></tr>
|
||||
<tr class="danger impl-no"><td>bottom</td><td rowspan="4">Dangerous, must be non-negative to even be considered,
|
||||
but it's still possible to arbitrarily position by running over.</td></tr>
|
||||
<tr class="danger impl-no"><td>left</td></tr>
|
||||
<tr class="danger impl-no"><td>right</td></tr>
|
||||
<tr class="danger impl-no"><td>top</td></tr>
|
||||
<tr class="impl-no"><td>clip</td><td>-</td></tr>
|
||||
<tr class="danger impl-no"><td>position</td><td>ENUM(static, relative, absolute, fixed)
|
||||
relative not absolute?</td></tr>
|
||||
<tr class="danger impl-no"><td>z-index</td><td>Dangerous</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="2">Unknown</th></tr>
|
||||
<tr class="danger css1 impl-yes"><td>background-image</td><td>Dangerous</td></tr>
|
||||
<tr class="css1 impl-yes"><td>background-attachment</td><td>ENUM(scroll, fixed),
|
||||
Depends on background-image</td></tr>
|
||||
<tr class="css1 impl-yes"><td>background-position</td><td>Depends on background-image</td></tr>
|
||||
<tr class="danger impl-no"><td>cursor</td><td>Dangerous but fluffy</td></tr>
|
||||
<tr class="danger css1"><td>display</td><td>ENUM(...), Dangerous but interesting;
|
||||
will not implement list-item, run-in (Opera only) or table (no IE);
|
||||
inline-block has incomplete IE6 support and requires -moz-inline-box
|
||||
for Mozilla. Unknown target milestone.</td></tr>
|
||||
<tr class="css1 impl-yes"><td>height</td><td>Interesting, why use it? Unknown target milestone.</td></tr>
|
||||
<tr class="danger css1 impl-yes"><td>list-style-image</td><td>Dangerous?</td></tr>
|
||||
<tr class="impl-no"><td>max-height</td><td rowspan="4">No IE 5/6</td></tr>
|
||||
<tr class="impl-no"><td>min-height</td></tr>
|
||||
<tr class="impl-no"><td>max-width</td></tr>
|
||||
<tr class="impl-no"><td>min-width</td></tr>
|
||||
<tr class="impl-no"><td>orphans</td><td>No IE support</td></tr>
|
||||
<tr class="impl-no"><td>widows</td><td>No IE support</td></tr>
|
||||
<tr><td>overflow</td><td>ENUM, IE 5/6 almost (remove visible if set). Unknown target milestone.</td></tr>
|
||||
<tr><td>page-break-after</td><td>ENUM(auto, always, avoid, left, right),
|
||||
IE 5.5/6 and Opera. Unknown target milestone.</td></tr>
|
||||
<tr><td>page-break-before</td><td>ENUM(auto, always, avoid, left, right),
|
||||
Mostly supported. Unknown target milestone.</td></tr>
|
||||
<tr><td>page-break-inside</td><td>ENUM(avoid, auto), Opera only. Unknown target milestone.</td></tr>
|
||||
<tr class="impl-no"><td>quotes</td><td>May be dropped from CSS2, fairly useless for inline context</td></tr>
|
||||
<tr class="impl-no"><td>visibility</td><td>ENUM(visible, hidden, collapse),
|
||||
Dangerous</td></tr>
|
||||
<tr class="css1 feature impl-partial"><td>white-space</td><td>ENUM(normal, pre, nowrap, pre-wrap,
|
||||
pre-line), Spotty implementation:
|
||||
pre (no IE 5/6), <em>nowrap</em> (no IE 5, supported),
|
||||
pre-wrap (only Opera), pre-line (no support). Fixable? Unknown target milestone.</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody class="impl-no">
|
||||
<tr><th colspan="2">Aural</th></tr>
|
||||
<tr><td>azimuth</td><td>-</td></tr>
|
||||
<tr><td>cue</td><td>-</td></tr>
|
||||
<tr><td>cue-after</td><td>-</td></tr>
|
||||
<tr><td>cue-before</td><td>-</td></tr>
|
||||
<tr><td>elevation</td><td>-</td></tr>
|
||||
<tr><td>pause-after</td><td>-</td></tr>
|
||||
<tr><td>pause-before</td><td>-</td></tr>
|
||||
<tr><td>pause</td><td>-</td></tr>
|
||||
<tr><td>pitch-range</td><td>-</td></tr>
|
||||
<tr><td>pitch</td><td>-</td></tr>
|
||||
<tr><td>play-during</td><td>-</td></tr>
|
||||
<tr><td>richness</td><td>-</td></tr>
|
||||
<tr><td>speak-header</td><td>Table related</td></tr>
|
||||
<tr><td>speak-numeral</td><td>-</td></tr>
|
||||
<tr><td>speak-punctuation</td><td>-</td></tr>
|
||||
<tr><td>speak</td><td>-</td></tr>
|
||||
<tr><td>speech-rate</td><td>-</td></tr>
|
||||
<tr><td>stress</td><td>-</td></tr>
|
||||
<tr><td>voice-family</td><td>-</td></tr>
|
||||
<tr><td>volume</td><td>-</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody class="impl-no">
|
||||
<tr><th colspan="2">Will not implement</th></tr>
|
||||
<tr><td>content</td><td>Not applicable for inline styles</td></tr>
|
||||
<tr><td>counter-increment</td><td>Needs content, Opera only</td></tr>
|
||||
<tr><td>counter-reset</td><td>Needs content, Opera only</td></tr>
|
||||
<tr><td>direction</td><td>No support</td></tr>
|
||||
<tr><td>outline-color</td><td rowspan="4">IE Mac and Opera on outside,
|
||||
Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
|
||||
<tr><td>outline-style</td></tr>
|
||||
<tr><td>outline-width</td></tr>
|
||||
<tr><td>outline</td></tr>
|
||||
<tr><td>unicode-bidi</td><td>No support</td></tr>
|
||||
</tbody>
|
||||
|
||||
</table>
|
||||
|
||||
<h2>Interesting Attributes</h2>
|
||||
|
||||
<table cellspacing="0">
|
||||
|
||||
<thead>
|
||||
<tr><th>Attribute</th><th>Tags</th><th>Notes</th></tr>
|
||||
</thead>
|
||||
|
||||
<!--
|
||||
<tr><th></th></tr>
|
||||
<tbody>
|
||||
<tr><td>-</td><td>-</td><td>-</td></tr>
|
||||
</tbody>
|
||||
-->
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="3">CSS</th></tr>
|
||||
<tr class="impl-yes"><td>style</td><td>All</td><td>Parser is reasonably functional. Status here doesn't count individual properties.</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="3">Questionable</th></tr>
|
||||
<tr class="impl-no"><td>accesskey</td><td>A</td><td>May interfere with main interface</td></tr>
|
||||
<tr class="impl-no"><td>tabindex</td><td>A</td><td>May interfere with main interface</td></tr>
|
||||
<tr class="impl-yes"><td>target</td><td>A</td><td>Config enabled, only useful for frame layouts, disallowed in strict</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="3">Miscellaneous</th></tr>
|
||||
<tr><td>datetime</td><td>DEL, INS</td><td>No visible effect, ISO format</td></tr>
|
||||
<tr class="impl-yes"><td>rel</td><td>A</td><td>Largely user-defined: nofollow, tag (see microformats)</td></tr>
|
||||
<tr class="impl-yes"><td>rev</td><td>A</td><td>Largely user-defined: vote-*</td></tr>
|
||||
<tr class="feature"><td>axis</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||
<tr class="feature"><td>char</td><td>COL, COLGROUP, TBODY, TD, TFOOT, TH, THEAD, TR</td><td>W3C only: No browser implementation</td></tr>
|
||||
<tr class="feature"><td>headers</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||
<tr class="feature"><td>scope</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody class="impl-yes">
|
||||
<tr><th colspan="3">URI</th></tr>
|
||||
<tr><td rowspan="2">cite</td><td>BLOCKQUOTE, Q</td><td>For attribution</td></tr>
|
||||
<tr><td>DEL, INS</td><td>Link to explanation why it changed</td></tr>
|
||||
<tr><td>href</td><td>A</td><td>-</td></tr>
|
||||
<tr><td>longdesc</td><td>IMG</td><td>-</td></tr>
|
||||
<tr class="required"><td>src</td><td>IMG</td><td>Required</td></tr>
|
||||
</tbody>
|
||||
|
||||
<tbody>
|
||||
<tr><th colspan="3">Transform</th></tr>
|
||||
<tr class="impl-yes"><td rowspan="5">align</td><td>CAPTION</td><td>'caption-side' for top/bottom, 'text-align' for left/right</td></tr>
|
||||
<tr class="impl-yes"><td>IMG</td><td rowspan="3">See specimens/html-align-to-css.html</td></tr>
|
||||
<tr class="impl-yes"><td>TABLE</td></tr>
|
||||
<tr class="impl-yes"><td>HR</td></tr>
|
||||
<tr class="impl-yes"><td>H1, H2, H3, H4, H5, H6, P</td><td>Equivalent style 'text-align'</td></tr>
|
||||
<tr class="required impl-yes"><td>alt</td><td>IMG</td><td>Required, insert image filename if src is present or default invalid image text</td></tr>
|
||||
<tr class="impl-yes"><td rowspan="3">bgcolor</td><td>TABLE</td><td>Superset style 'background-color'</td></tr>
|
||||
<tr class="impl-yes"><td>TR</td><td>Superset style 'background-color'</td></tr>
|
||||
<tr class="impl-yes"><td>TD, TH</td><td>Superset style 'background-color'</td></tr>
|
||||
<tr class="impl-yes"><td>border</td><td>IMG</td><td>Equivalent style <code>border:[number]px solid</code></td></tr>
|
||||
<tr class="impl-yes"><td>clear</td><td>BR</td><td>Near-equiv style 'clear', transform 'all' into 'both'</td></tr>
|
||||
<tr class="impl-no"><td>compact</td><td>DL, OL, UL</td><td>Boolean, needs custom CSS class; rarely used anyway</td></tr>
|
||||
<tr class="required impl-yes"><td>dir</td><td>BDO</td><td>Required, insert ltr (or configuration value) if none</td></tr>
|
||||
<tr class="impl-yes"><td>height</td><td>TD, TH</td><td>Near-equiv style 'height', needs px suffix if original was in pixels</td></tr>
|
||||
<tr class="impl-yes"><td>hspace</td><td>IMG</td><td>Near-equiv styles 'margin-top' and 'margin-bottom', needs px suffix</td></tr>
|
||||
<tr class="impl-yes"><td>lang</td><td>*</td><td>Copy value to xml:lang</td></tr>
|
||||
<tr class="impl-yes"><td rowspan="2">name</td><td>IMG</td><td>Turn into ID</td></tr>
|
||||
<tr class="impl-yes"><td>A</td><td>Turn into ID</td></tr>
|
||||
<tr class="impl-yes"><td>noshade</td><td>HR</td><td>Boolean, style 'border-style:solid;'</td></tr>
|
||||
<tr class="impl-yes"><td>nowrap</td><td>TD, TH</td><td>Boolean, style 'white-space:nowrap;' (not compat with IE5)</td></tr>
|
||||
<tr class="impl-yes"><td>size</td><td>HR</td><td>Near-equiv 'height', needs px suffix if original was pixels</td></tr>
|
||||
<tr class="required impl-yes"><td>src</td><td>IMG</td><td>Required, insert blank or default img if not set</td></tr>
|
||||
<tr class="impl-yes"><td>start</td><td>OL</td><td>Poorly supported 'counter-reset', allowed in loose, dropped in strict</td></tr>
|
||||
<tr class="impl-yes"><td rowspan="3">type</td><td>LI</td><td rowspan="3">Equivalent style 'list-style-type', different allowed values though. (needs testing)</td></tr>
|
||||
<tr class="impl-yes"><td>OL</td></tr>
|
||||
<tr class="impl-yes"><td>UL</td></tr>
|
||||
<tr class="impl-yes"><td>value</td><td>LI</td><td>Poorly supported 'counter-reset', allowed in loose, dropped in strict</td></tr>
|
||||
<tr class="impl-yes"><td>vspace</td><td>IMG</td><td>Near-equiv styles 'margin-left' and 'margin-right', needs px suffix, see hspace</td></tr>
|
||||
<tr class="impl-yes"><td rowspan="2">width</td><td>HR</td><td rowspan="2">Near-equiv style 'width', needs px suffix if original was pixels</td></tr>
|
||||
<tr class="impl-yes"><td>TD, TH</td></tr>
|
||||
</tbody>
|
||||
|
||||
</table>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
@@ -1,272 +0,0 @@
|
||||
<!-- Transform %TextAlign to align:value in style -->
|
||||
|
||||
<!-- text alignment for p, div, h1-h6. The default is
|
||||
align="left" for ltr headings, "right" for rtl
|
||||
|
||||
Move to style! -->
|
||||
<!ENTITY % TextAlign "DEPRECATED align (left|center|right|justify) #IMPLIED">
|
||||
|
||||
<!-- type and start should have CSS equivalents, but they'll need to
|
||||
be translated intelligently -->
|
||||
<!ENTITY % ULStyle "(disc|square|circle)">
|
||||
<!-- Ordered list numbering style
|
||||
|
||||
1 arabic numbers 1, 2, 3, ...
|
||||
a lower alpha a, b, c, ...
|
||||
A upper alpha A, B, C, ...
|
||||
i lower roman i, ii, iii, ...
|
||||
I upper roman I, II, III, ...
|
||||
|
||||
The style is applied to the sequence number which by default
|
||||
is reset to 1 for the first list item in an ordered list.
|
||||
-->
|
||||
<!ENTITY % OLStyle "CDATA">
|
||||
<!-- LIStyle is constrained to: "(%ULStyle;|%OLStyle;)" -->
|
||||
<!ENTITY % LIStyle "CDATA">
|
||||
|
||||
<!ATTLIST ol
|
||||
%attrs;
|
||||
DEPRECATED type %OLStyle; #IMPLIED
|
||||
DEPRECATED start %Number; #IMPLIED
|
||||
>
|
||||
|
||||
<!ATTLIST li
|
||||
%attrs;
|
||||
DEPRECATED type %LIStyle; #IMPLIED
|
||||
DEPRECATED value %Number; #IMPLIED
|
||||
>
|
||||
|
||||
<!ATTLIST hr
|
||||
%attrs;
|
||||
DEPRECATED align (left|center|right) #IMPLIED
|
||||
DEPRECATED size %Pixels; #IMPLIED
|
||||
DEPRECATED width %Length; #IMPLIED
|
||||
>
|
||||
|
||||
<!ATTLIST pre
|
||||
%attrs;
|
||||
DEPRECATED width %Number; #IMPLIED
|
||||
>
|
||||
|
||||
<!ATTLIST blockquote
|
||||
%attrs;
|
||||
cite %URI; #IMPLIED
|
||||
>
|
||||
|
||||
<!ATTLIST ins
|
||||
%attrs;
|
||||
cite %URI; #IMPLIED
|
||||
datetime %Datetime; #IMPLIED
|
||||
>
|
||||
<!ATTLIST del
|
||||
%attrs;
|
||||
cite %URI; #IMPLIED
|
||||
datetime %Datetime; #IMPLIED
|
||||
>
|
||||
|
||||
<!ATTLIST a
|
||||
%attrs;
|
||||
name NMTOKEN #IMPLIED // ID
|
||||
href %URI; #IMPLIED
|
||||
rel %LinkTypes; #IMPLIED // needs policing
|
||||
rev %LinkTypes; #IMPLIED // see rel
|
||||
target %FrameTarget; #IMPLIED // usually not used, but might be
|
||||
>
|
||||
|
||||
<!ATTLIST bdo
|
||||
%coreattrs; // !#!
|
||||
lang %LanguageCode; #IMPLIED
|
||||
xml:lang %LanguageCode; #IMPLIED
|
||||
dir (ltr|rtl) #REQUIRED
|
||||
>
|
||||
|
||||
<!ATTLIST br
|
||||
%coreattrs; // !#!
|
||||
DEPRECATED clear (left|all|right|none) "none"
|
||||
>
|
||||
|
||||
<!ELEMENT q %Inline;> <!-- inlined quote -->
|
||||
<!ATTLIST q
|
||||
%attrs;
|
||||
cite %URI; #IMPLIED
|
||||
>
|
||||
|
||||
<!ATTLIST img
|
||||
%attrs;
|
||||
src %URI; #REQUIRED
|
||||
alt %Text; #REQUIRED
|
||||
DEPRECATED name NMTOKEN #IMPLIED // ID
|
||||
longdesc %URI; #IMPLIED
|
||||
height %Length; #IMPLIED // dubious, but we'll allow
|
||||
width %Length; #IMPLIED //
|
||||
DEPRECATED align %ImgAlign; #IMPLIED
|
||||
DEPRECATED border %Length; #IMPLIED
|
||||
DEPRECATED hspace %Pixels; #IMPLIED // left/right margin
|
||||
DEPRECATED vspace %Pixels; #IMPLIED // up/down margin
|
||||
>
|
||||
|
||||
<!--
|
||||
The border attribute sets the thickness of the frame around the
|
||||
table. The default units are screen pixels.
|
||||
|
||||
The frame attribute specifies which parts of the frame around
|
||||
the table should be rendered. The values are not the same as
|
||||
CALS to avoid a name clash with the valign attribute.
|
||||
-->
|
||||
<!ENTITY % TFrame "(void|above|below|hsides|lhs|rhs|vsides|box|border)">
|
||||
|
||||
<!--
|
||||
The rules attribute defines which rules to draw between cells:
|
||||
|
||||
If rules is absent then assume:
|
||||
"none" if border is absent or border="0" otherwise "all"
|
||||
-->
|
||||
|
||||
<!ENTITY % TRules "(none | groups | rows | cols | all)">
|
||||
|
||||
<!-- horizontal placement of table relative to document -->
|
||||
<!ENTITY % TAlign "(left|center|right)">
|
||||
|
||||
<!-- horizontal alignment attributes for cell contents
|
||||
|
||||
char alignment char, e.g. char=':'
|
||||
charoff offset for alignment char
|
||||
-->
|
||||
<!ENTITY % cellhalign
|
||||
"align (left|center|right|justify|char) #IMPLIED
|
||||
char %Character; #IMPLIED
|
||||
charoff %Length; #IMPLIED"
|
||||
>
|
||||
|
||||
<!-- vertical alignment attributes for cell contents -->
|
||||
<!ENTITY % cellvalign
|
||||
"valign (top|middle|bottom|baseline) #IMPLIED"
|
||||
>
|
||||
|
||||
<!-- we may want to convert some of these nonetheless -->
|
||||
<!ATTLIST table
|
||||
%attrs;
|
||||
summary %Text; #IMPLIED
|
||||
width %Length; #IMPLIED
|
||||
border %Pixels; #IMPLIED
|
||||
frame %TFrame; #IMPLIED
|
||||
rules %TRules; #IMPLIED
|
||||
cellspacing %Length; #IMPLIED
|
||||
cellpadding %Length; #IMPLIED
|
||||
DEPRECATED align %TAlign; #IMPLIED
|
||||
DEPRECATED bgcolor %Color; #IMPLIED
|
||||
>
|
||||
|
||||
<!ENTITY % CAlign "(top|bottom|left|right)">
|
||||
|
||||
<!ATTLIST caption
|
||||
%attrs;
|
||||
DEPRECATED align %CAlign; #IMPLIED // watch, it's a special set
|
||||
>
|
||||
|
||||
<!--
|
||||
colgroup groups a set of col elements. It allows you to group
|
||||
several semantically related columns together.
|
||||
-->
|
||||
<!ATTLIST colgroup
|
||||
%attrs;
|
||||
span %Number; "1"
|
||||
width %MultiLength; #IMPLIED
|
||||
%cellhalign; // very interesting
|
||||
%cellvalign;
|
||||
>
|
||||
|
||||
<!--
|
||||
col elements define the alignment properties for cells in
|
||||
one or more columns.
|
||||
|
||||
The width attribute specifies the width of the columns, e.g.
|
||||
|
||||
width=64 width in screen pixels
|
||||
width=0.5* relative width of 0.5
|
||||
|
||||
The span attribute causes the attributes of one
|
||||
col element to apply to more than one column.
|
||||
-->
|
||||
<!ATTLIST col
|
||||
%attrs;
|
||||
span %Number; "1"
|
||||
width %MultiLength; #IMPLIED
|
||||
%cellhalign;
|
||||
%cellvalign;
|
||||
>
|
||||
|
||||
<!--
|
||||
Use thead to duplicate headers when breaking table
|
||||
across page boundaries, or for static headers when
|
||||
tbody sections are rendered in scrolling panel.
|
||||
|
||||
Use tfoot to duplicate footers when breaking table
|
||||
across page boundaries, or for static footers when
|
||||
tbody sections are rendered in scrolling panel.
|
||||
|
||||
Use multiple tbody sections when rules are needed
|
||||
between groups of table rows.
|
||||
-->
|
||||
<!ATTLIST thead
|
||||
%attrs;
|
||||
%cellhalign;
|
||||
%cellvalign;
|
||||
>
|
||||
|
||||
<!ATTLIST tfoot
|
||||
%attrs;
|
||||
%cellhalign;
|
||||
%cellvalign;
|
||||
>
|
||||
|
||||
<!ATTLIST tbody
|
||||
%attrs;
|
||||
%cellhalign;
|
||||
%cellvalign;
|
||||
>
|
||||
|
||||
<!ATTLIST tr
|
||||
%attrs;
|
||||
%cellhalign;
|
||||
%cellvalign;
|
||||
DEPRECATED bgcolor %Color; #IMPLIED
|
||||
>
|
||||
|
||||
<!-- Scope is simpler than headers attribute for common tables -->
|
||||
<!ENTITY % Scope "(row|col|rowgroup|colgroup)">
|
||||
|
||||
<!-- th is for headers, td for data and for cells acting as both -->
|
||||
|
||||
<!ATTLIST th
|
||||
%attrs;
|
||||
abbr %Text; #IMPLIED
|
||||
axis CDATA #IMPLIED
|
||||
headers IDREFS #IMPLIED
|
||||
scope %Scope; #IMPLIED
|
||||
rowspan %Number; "1"
|
||||
colspan %Number; "1"
|
||||
%cellhalign;
|
||||
%cellvalign;
|
||||
DEPRECATED nowrap (nowrap) #IMPLIED
|
||||
DEPRECATED bgcolor %Color; #IMPLIED
|
||||
DEPRECATED width %Length; #IMPLIED
|
||||
DEPRECATED height %Length; #IMPLIED
|
||||
>
|
||||
|
||||
<!ATTLIST td
|
||||
%attrs;
|
||||
abbr %Text; #IMPLIED
|
||||
axis CDATA #IMPLIED
|
||||
headers IDREFS #IMPLIED
|
||||
scope %Scope; #IMPLIED
|
||||
rowspan %Number; "1"
|
||||
colspan %Number; "1"
|
||||
%cellhalign;
|
||||
%cellvalign;
|
||||
DEPRECATED nowrap (nowrap) #IMPLIED
|
||||
DEPRECATED bgcolor %Color; #IMPLIED
|
||||
DEPRECATED width %Length; #IMPLIED
|
||||
DEPRECATED height %Length; #IMPLIED
|
||||
>
|
||||
|
786
docs/enduser-customize.html
Normal file
786
docs/enduser-customize.html
Normal file
@@ -0,0 +1,786 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Tutorial for customizing HTML Purifier's tag and attribute sets." />
|
||||
<link rel="stylesheet" type="text/css" href="style.css" />
|
||||
|
||||
<title>Customize - HTML Purifier</title>
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1 class="subtitled">Customize!</h1>
|
||||
<div class="subtitle">HTML Purifier is a Swiss-Army Knife</div>
|
||||
|
||||
<div id="filing">Filed under End-User</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>
|
||||
You may have heard of the <a href="dev-advanced-api.html">Advanced API</a>.
|
||||
If you're interested in reading dry prose and boring functional
|
||||
specifications, feel free to click that link to get a no-nonsense overview
|
||||
on the Advanced API. For the rest of us, there's this tutorial. By the time
|
||||
you're finished reading this, you should have a pretty good idea on
|
||||
how to implement custom tags and attributes that HTML Purifier may not have.
|
||||
</p>
|
||||
|
||||
<h2>Is it necessary?</h2>
|
||||
|
||||
<p>
|
||||
Before we even write any code, it is paramount to consider whether or
|
||||
not the code we're writing is necessary or not. HTML Purifier, by default,
|
||||
contains a large set of elements and attributes: large enough so that
|
||||
<em>any</em> element or attribute in XHTML 1.0 (and its HTML variant)
|
||||
that can be safely used by the general public is implemented.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
So what needs to be implemented? (Feel free to skip this section if
|
||||
you know what you want).
|
||||
</p>
|
||||
|
||||
<h3>XHTML 1.0</h3>
|
||||
|
||||
<p>
|
||||
All of the modules listed below are based off of the
|
||||
<a href="http://www.w3.org/TR/2001/REC-xhtml-modularization-20010410/abstract_modules.html#sec_5.2.">modularization of
|
||||
XHTML</a>, which, while technically for XHTML 1.1, is quite a useful
|
||||
resource.
|
||||
</p>
|
||||
|
||||
<ul>
|
||||
<li>Structure</li>
|
||||
<li>Frames</li>
|
||||
<li>Applets (deprecated)</li>
|
||||
<li>Forms</li>
|
||||
<li>Image maps</li>
|
||||
<li>Objects</li>
|
||||
<li>Frames</li>
|
||||
<li>Events</li>
|
||||
<li>Meta-information</li>
|
||||
<li>Style sheets</li>
|
||||
<li>Link (not hypertext)</li>
|
||||
<li>Base</li>
|
||||
<li>Name</li>
|
||||
</ul>
|
||||
|
||||
<p>
|
||||
If you don't recognize it, you probably don't need it. But the curious
|
||||
can look all of these modules up in the above-mentioned document. Note
|
||||
that inline scripting comes packaged with HTML Purifier (more on this
|
||||
later).
|
||||
</p>
|
||||
|
||||
<h3>XHTML 1.1</h3>
|
||||
|
||||
<p>
|
||||
We have not implemented the
|
||||
<a href="http://www.w3.org/TR/2001/REC-ruby-20010531/">Ruby module</a>,
|
||||
which defines a set of tags
|
||||
for publishing short annotations for text, used mostly in Japanese
|
||||
and Chinese school texts.
|
||||
</p>
|
||||
|
||||
<h3>XHTML 2.0</h3>
|
||||
|
||||
<p>
|
||||
<a href="http://www.w3.org/TR/xhtml2/">XHTML 2.0</a> is still a
|
||||
working draft, so any elements introduced in the
|
||||
specification have not been implemented and will not be implemented
|
||||
until we get a recommendation or proposal. Because XHTML 2.0 is
|
||||
an entirely new markup language, implementing rules for it will be
|
||||
no easy task.
|
||||
</p>
|
||||
|
||||
<h3>HTML 5</h3>
|
||||
|
||||
<p>
|
||||
<a href="http://www.whatwg.org/specs/web-apps/current-work/">HTML 5</a>
|
||||
is a fork of HTML 4.01 by WHATWG, who believed that XHTML 2.0 was headed
|
||||
in the wrong direction. It too is a working draft, and may change
|
||||
drastically before publication, but it should be noted that the
|
||||
<code>canvas</code> tag has been implemented by many browser vendors.
|
||||
</p>
|
||||
|
||||
<h3>Proprietary</h3>
|
||||
|
||||
<p>
|
||||
There are a number of proprietary tags still in the wild. Many of them
|
||||
have been documented in <a href="ref-proprietary-tags.txt">ref-proprietary-tags.txt</a>,
|
||||
but there is currently no implementation for any of them.
|
||||
</p>
|
||||
|
||||
<h3>Extensions</h3>
|
||||
|
||||
<p>
|
||||
There are also a number of other XML languages out there that can
|
||||
be embedded in HTML documents: two of the most popular are MathML and
|
||||
SVG, and I frequently get requests to implement these. But they are
|
||||
expansive, comprehensive specifications, and it would take far too long
|
||||
to implement them <em>correctly</em> (most systems I've seen go as far
|
||||
as whitelisting tags and no further; come on, what about nesting!)
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Word of warning: HTML Purifier is currently <em>not</em> namespace
|
||||
aware.
|
||||
</p>
|
||||
|
||||
<h2>Giving back</h2>
|
||||
|
||||
<p>
|
||||
As you may imagine from the details above (don't be abashed if you didn't
|
||||
read it all: a glance over would have done), there's quite a bit that
|
||||
HTML Purifier doesn't implement. Recent architectural changes have
|
||||
allowed HTML Purifier to implement elements and attributes that are not
|
||||
safe! Don't worry, they won't be activated unless you set %HTML.Trusted
|
||||
to true, but they certainly help out users who need to put, say, forms
|
||||
on their page and don't want to go through the trouble of reading this
|
||||
and implementing it themself.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
So any of the above that you implement for your own application could
|
||||
help out some other poor sap on the other side of the globe. Help us
|
||||
out, and send back code so that it can be hammered into a module and
|
||||
released with the core. Any code would be greatly appreciated!
|
||||
</p>
|
||||
|
||||
<h2>And now...</h2>
|
||||
|
||||
<p>
|
||||
Enough philosophical talk, time for some code:
|
||||
</p>
|
||||
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML', 'DefinitionRev', 1);
|
||||
$def =& $config->getHTMLDefinition(true);</pre>
|
||||
|
||||
<p>
|
||||
Assuming that HTML Purifier has already been properly loaded (hint:
|
||||
include <code>HTMLPurifier.auto.php</code>), this code will set up
|
||||
the environment that you need to start customizing the HTML definition.
|
||||
What's going on?
|
||||
</p>
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
The first three lines are regular configuration code:
|
||||
<ul>
|
||||
<li>
|
||||
%HTML.DefinitionID is set to a unique identifier for your
|
||||
custom HTML definition. This prevents it from clobbering
|
||||
other custom definitions on the same installation.
|
||||
</li>
|
||||
<li>
|
||||
%HTML.DefinitionRev is a revision integer of your HTML
|
||||
definition. Because HTML definitions are cached, you'll need
|
||||
to increment this whenever you make a change in order to flush
|
||||
the cache.
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
The fourth line retrieves a raw <code>HTMLPurifier_HTMLDefinition</code>
|
||||
object that we will be tweaking. If the parameter was removed, we
|
||||
would be retrieving a fully formed definition object, which is somewhat
|
||||
useless for customization purposes.
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<h3>Broken backwards-compatibility</h3>
|
||||
|
||||
<p>
|
||||
Those of you who have already been twiddling around with the raw
|
||||
HTML definition object, you'll be noticing that you're getting an error
|
||||
when you attempt to retrieve the raw definition object without specifying
|
||||
a DefinitionID. It is vital to caching (see below) that you make a unique
|
||||
name for your customized definition, so make up something right now and
|
||||
things will operate again.
|
||||
</p>
|
||||
|
||||
<h2>Turn off caching</h2>
|
||||
|
||||
<p>
|
||||
To make development easier, we're going to temporarily turn off
|
||||
definition caching:
|
||||
</p>
|
||||
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML', 'DefinitionRev', 1);
|
||||
<strong>$config->set('Core', 'DefinitionCache', null); // remove this later!</strong>
|
||||
$def =& $config->getHTMLDefinition(true);</pre>
|
||||
|
||||
<p>
|
||||
A few things should be mentioned about the caching mechanism before
|
||||
we move on. For performance reasons, HTML Purifier caches generated
|
||||
<code>HTMLPurifier_Definition</code> objects in serialized files
|
||||
stored (by default) in <code>library/HTMLPurifier/DefinitionCache/Serializer</code>.
|
||||
A lot of processing is done in order to create these objects, so it
|
||||
makes little sense to repeat the same processing over and over again
|
||||
whenever HTML Purifier is called.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
In order to identify a cache entry, HTML Purifier uses three variables:
|
||||
the library's version number, the value of %HTML.DefinitionRev and
|
||||
a serial of relevant configuration. Whenever any of these changes,
|
||||
a new HTML definition is generated. Notice that there is no way
|
||||
for the definition object to track changes to customizations: here, it
|
||||
is up to you to supply appropriate information to DefinitionID and
|
||||
DefinitionRev.
|
||||
</p>
|
||||
|
||||
<h2 id="addAttribute">Add an attribute</h2>
|
||||
|
||||
<p>
|
||||
For this example, we're going to implement the <code>target</code> attribute found
|
||||
on <code>a</code> elements. To implement an attribute, we have to
|
||||
ask a few questions:
|
||||
</p>
|
||||
|
||||
<ol>
|
||||
<li>What element is it found on?</li>
|
||||
<li>What is its name?</li>
|
||||
<li>Is it required or optional?</li>
|
||||
<li>What are valid values for it?</li>
|
||||
</ol>
|
||||
|
||||
<p>
|
||||
The first three are easy: the element is <code>a</code>, the attribute
|
||||
is <code>target</code>, and it is not a required attribute. (If it
|
||||
was required, we'd need to append an asterisk to the attribute name,
|
||||
you'll see an example of this in the addElement() example).
|
||||
</p>
|
||||
|
||||
<p>
|
||||
The last question is a little trickier.
|
||||
Lets allow the special values: _blank, _self, _target and _top.
|
||||
The form of this is called an <strong>enumeration</strong>, a list of
|
||||
valid values, although only one can be used at a time. To translate
|
||||
this into code form, we write:
|
||||
</p>
|
||||
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML', 'DefinitionRev', 1);
|
||||
$config->set('Core', 'DefinitionCache', null); // remove this later!
|
||||
$def =& $config->getHTMLDefinition(true);
|
||||
<strong>$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');</strong></pre>
|
||||
|
||||
<p>
|
||||
The <code>Enum#_blank,_self,_target,_top</code> does all the magic.
|
||||
The string is split into two parts, separated by a hash mark (#):
|
||||
</p>
|
||||
|
||||
<ol>
|
||||
<li>The first part is the name of what we call an <code>AttrDef</code></li>
|
||||
<li>The second part is the parameter of the above-mentioned <code>AttrDef</code></li>
|
||||
</ol>
|
||||
|
||||
<p>
|
||||
If that sounds vague and generic, it's because it is! HTML Purifier defines
|
||||
an assortment of different attribute types one can use, and each of these
|
||||
has their own specialized parameter format. Here are some of the more useful
|
||||
ones:
|
||||
</p>
|
||||
|
||||
<table class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Type</th>
|
||||
<th>Format</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>Enum</th>
|
||||
<td><em>[s:]</em>value1,value2,...</td>
|
||||
<td>
|
||||
Attribute with a number of valid values, one of which may be used. When
|
||||
s: is present, the enumeration is case sensitive.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Bool</th>
|
||||
<td>attribute_name</td>
|
||||
<td>
|
||||
Boolean attribute, with only one valid value: the name
|
||||
of the attribute.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>CDATA</th>
|
||||
<td></td>
|
||||
<td>
|
||||
Attribute of arbitrary text. Can also be referred to as <strong>Text</strong>
|
||||
(the specification makes a semantic distinction between the two).
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>ID</th>
|
||||
<td></td>
|
||||
<td>
|
||||
Attribute that specifies a unique ID
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Pixels</th>
|
||||
<td></td>
|
||||
<td>
|
||||
Attribute that specifies an integer pixel length
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Length</th>
|
||||
<td></td>
|
||||
<td>
|
||||
Attribute that specifies a pixel or percentage length
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>NMTOKENS</th>
|
||||
<td></td>
|
||||
<td>
|
||||
Attribute that specifies a number of name tokens, example: the
|
||||
<code>class</code> attribute
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>URI</th>
|
||||
<td></td>
|
||||
<td>
|
||||
Attribute that specifies a URI, example: the <code>href</code>
|
||||
attribute
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Number</th>
|
||||
<td></td>
|
||||
<td>
|
||||
Attribute that specifies an positive integer number
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<p>
|
||||
For a complete list, consult
|
||||
<a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/AttrTypes.php"><code>library/HTMLPurifier/AttrTypes.php</code></a>;
|
||||
more information on attributes that accept parameters can be found on their
|
||||
respective includes in
|
||||
<a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/AttrDef/"><code>library/HTMLPurifier/AttrDef</code></a>.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Sometimes, the restrictive list in AttrTypes just doesn't cut it. Don't
|
||||
sweat: you can also use a fully instantiated object as the value. The
|
||||
equivalent, verbose form of the above example is:
|
||||
</p>
|
||||
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML', 'DefinitionRev', 1);
|
||||
$config->set('Core', 'DefinitionCache', null); // remove this later!
|
||||
$def =& $config->getHTMLDefinition(true);
|
||||
<strong>$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
|
||||
array('_blank','_self','_target','_top')
|
||||
));</strong></pre>
|
||||
|
||||
<p>
|
||||
Trust me, you'll learn to love the shorthand.
|
||||
</p>
|
||||
|
||||
<h2>Add an element</h2>
|
||||
|
||||
<p>
|
||||
Adding attributes is really small-fry stuff, though, and it was possible
|
||||
to add them (albeit a bit more wordy) prior to 2.0. The real gem of
|
||||
the Advanced API is adding elements. There are five questions to
|
||||
ask when adding a new element:
|
||||
</p>
|
||||
|
||||
<ol>
|
||||
<li>What is the element's name?</li>
|
||||
<li>What content set does this element belong to?</li>
|
||||
<li>What are the allowed children of this element?</li>
|
||||
<li>What attributes does the element allow that are general?</li>
|
||||
<li>What attributes does the element allow that are specific to this element?</li>
|
||||
</ol>
|
||||
|
||||
<p>
|
||||
It's a mouthful, and you'll be slightly lost if your not familiar with
|
||||
the HTML specification, so let's explain them step by step.
|
||||
</p>
|
||||
|
||||
<h3>Content set</h3>
|
||||
|
||||
<p>
|
||||
The HTML specification defines two major content sets: Inline
|
||||
and Block. Each of these
|
||||
content sets contain a list of elements: Inline contains things like
|
||||
<code>span</code> and <code>b</code> while Block contains things like
|
||||
<code>div</code> and <code>blockquote</code>.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
These content sets amount to a macro mechanism for HTML definition. Most
|
||||
elements in HTML are organized into one of these two sets, and most
|
||||
elements in HTML allow elements from one of these sets. If we had
|
||||
to write each element verbatim into each other element's allowed
|
||||
children, we would have ridiculously large lists; instead we use
|
||||
content sets to compactify the declaration.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Practically speaking, there are several useful values you can use here:
|
||||
</p>
|
||||
|
||||
<table class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Content set</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>Inline</th>
|
||||
<td>Character level elements, text</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Block</th>
|
||||
<td>Block-like elements, like paragraphs and lists</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th><em>false</em></th>
|
||||
<td>
|
||||
Any element that doesn't fit into the mold, for example <code>li</code>
|
||||
or <code>tr</code>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<p>
|
||||
By specifying a valid value here, all other elements that use that
|
||||
content set will also allow your element, without you having to do
|
||||
anything. If you specify <em>false</em>, you'll have to register
|
||||
your element manually.
|
||||
</p>
|
||||
|
||||
<h3>Allowed children</h3>
|
||||
|
||||
<p>
|
||||
Allowed children defines the elements that this element can contain.
|
||||
The allowed values may range from none to a complex regexp depending on
|
||||
your element.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
If you've ever taken a look at the HTML DTD's before, you may have
|
||||
noticed declarations like this:
|
||||
</p>
|
||||
|
||||
<pre><!ELEMENT LI - O (%flow;)* -- list item --></pre>
|
||||
|
||||
<p>
|
||||
The <code>(%flow;)*</code> indicates the allowed children of the
|
||||
<code>li</code> tag: <code>li</code> allows any number of flow
|
||||
elements as its children. In HTML Purifier, we'd write it like
|
||||
<code>Flow</code> (here's where the content sets we were
|
||||
discussing earlier come into play). There are three shorthand content models you
|
||||
can specify:
|
||||
</p>
|
||||
|
||||
<table class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Content model</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>Empty</th>
|
||||
<td>No children allowed, like <code>br</code> or <code>hr</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Inline</th>
|
||||
<td>Any number of inline elements and text, like <code>span</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Flow</th>
|
||||
<td>Any number of inline elements, block elements and text, like <code>div</code></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<p>
|
||||
This covers 90% of all the cases out there, but what about elements that
|
||||
break the mold like <code>ul</code>? This guy requires at least one
|
||||
child, and the only valid children for it are <code>li</code>. The
|
||||
content model is: <code>Required: li</code>. There are two parts: the
|
||||
first type determines what <code>ChildDef</code> will be used to validate
|
||||
content models. The most common values are:
|
||||
</p>
|
||||
|
||||
<table class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Type</th>
|
||||
<th>Description</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>Required</th>
|
||||
<td>Children must be one or more of the valid elements</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Optional</th>
|
||||
<td>Children can be any number of the valid elements</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Custom</th>
|
||||
<td>Children must follow the DTD-style regex</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<p>
|
||||
You can also implement your own <code>ChildDef</code>: this was done
|
||||
for a few special cases in HTML Purifier such as <code>Chameleon</code>
|
||||
(for <code>ins</code> and <code>del</code>), <code>StrictBlockquote</code>
|
||||
and <code>Table</code>.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
The second part specifies either valid elements or a regular expression.
|
||||
Valid elements are separated with horizontal bars (|), i.e.
|
||||
"<code>a | b | c</code>". Use #PCDATA to represent plain text.
|
||||
Regular expressions are based off of DTD's style:
|
||||
</p>
|
||||
|
||||
<ul>
|
||||
<li>Parentheses () are used for grouping</li>
|
||||
<li>Commas (,) separate elements that should come one after another</li>
|
||||
<li>Horizontal bars (|) indicate one or the other elements should be used</li>
|
||||
<li>Plus signs (+) are used for a one or more match</li>
|
||||
<li>Asterisks (*) are used for a zero or more match</li>
|
||||
<li>Question marks (?) are used for a zero or one match</li>
|
||||
</ul>
|
||||
|
||||
<p>
|
||||
For example, "<code>a, b?, (c | d), e+, f*</code>" means "In this order,
|
||||
one <code>a</code> element, at most one <code>b</code> element,
|
||||
one <code>c</code> or <code>d</code> element (but not both), one or more
|
||||
<code>e</code> elements, and any number of <code>f</code> elements."
|
||||
Regex veterans should be able to jump right in, and those not so savvy
|
||||
can always copy-paste W3C's content model definitions into HTML Purifier
|
||||
and hope for the best.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
A word of warning: while the regex format is extremely flexible on
|
||||
the developer's side, it is
|
||||
quite unforgiving on the user's side. If the user input does not <em>exactly</em>
|
||||
match the specification, the entire contents of the element will
|
||||
be nuked. This is why there is are specific content model types like
|
||||
Optional and Required: while they could be implemented as <code>Custom:
|
||||
(valid | elements)*</code>, the custom classes contain special recovery
|
||||
measures that make sure as much of the user's original content gets
|
||||
through. HTML Purifier's core, as a rule, does not use Custom.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
One final note: you can also use Content Sets inside your valid elements
|
||||
lists or regular expressions. In fact, the three shorthand content models
|
||||
mentioned above are just that: abbreviations:
|
||||
</p>
|
||||
|
||||
<table class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Content model</th>
|
||||
<th>Implementation</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>Inline</th>
|
||||
<td>Optional: Inline | #PCDATA</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Flow</th>
|
||||
<td>Optional: Flow | #PCDATA</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<p>
|
||||
When the definition is compiled, Inline will be replaced with a
|
||||
horizontal-bar separated list of inline elements. Also, notice that
|
||||
it does not contain text: you have to specify that yourself.
|
||||
</p>
|
||||
|
||||
<h3>Common attributes</h3>
|
||||
|
||||
<p>
|
||||
Congratulations: you have just gotten over the proverbial hump (Allowed
|
||||
children). Common attributes is much simpler, and boils down to
|
||||
one question: does your element have the <code>id</code>, <code>style</code>,
|
||||
<code>class</code>, <code>title</code> and <code>lang</code> attributes?
|
||||
If so, you'll want to specify the <code>Common</code> attribute collection,
|
||||
which contains these five attributes that are found on almost every
|
||||
HTML element in the specification.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
There are a few more collections, but they're really edge cases:
|
||||
</p>
|
||||
|
||||
<table class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Collection</th>
|
||||
<th>Attributes</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<th>I18N</th>
|
||||
<td><code>lang</code>, possibly <code>xml:lang</code></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<th>Core</th>
|
||||
<td><code>style</code>, <code>class</code>, <code>id</code> and <code>title</code></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<p>
|
||||
Common is a combination of the above-mentioned collections.
|
||||
</p>
|
||||
|
||||
<h3>Attributes</h3>
|
||||
|
||||
<p>
|
||||
If you didn't read the <a href="#addAttribute">previous section on
|
||||
adding attributes</a>, read it now. The last parameter is simply
|
||||
array of attribute names to attribute implementations, in the exact
|
||||
same format as <code>addAttribute()</code>.
|
||||
</p>
|
||||
|
||||
<h3>Putting it all together</h3>
|
||||
|
||||
<p>
|
||||
We're going to implement <code>form</code>. Before we embark, lets
|
||||
grab a reference implementation from over at the
|
||||
<a href="http://www.w3.org/TR/html4/sgml/loosedtd.html">transitional DTD</a>:
|
||||
</p>
|
||||
|
||||
<pre><!ELEMENT FORM - - (%flow;)* -(FORM) -- interactive form -->
|
||||
<!ATTLIST FORM
|
||||
%attrs; -- %coreattrs, %i18n, %events --
|
||||
action %URI; #REQUIRED -- server-side form handler --
|
||||
method (GET|POST) GET -- HTTP method used to submit the form--
|
||||
enctype %ContentType; "application/x-www-form-urlencoded"
|
||||
accept %ContentTypes; #IMPLIED -- list of MIME types for file upload --
|
||||
name CDATA #IMPLIED -- name of form for scripting --
|
||||
onsubmit %Script; #IMPLIED -- the form was submitted --
|
||||
onreset %Script; #IMPLIED -- the form was reset --
|
||||
target %FrameTarget; #IMPLIED -- render in this frame --
|
||||
accept-charset %Charsets; #IMPLIED -- list of supported charsets --
|
||||
></pre>
|
||||
|
||||
<p>
|
||||
Juicy! With just this, we can answer four of our five questions:
|
||||
</p>
|
||||
|
||||
<ol>
|
||||
<li>What is the element's name? <strong>form</strong></li>
|
||||
<li>What content set does this element belong to? <strong>Block</strong>
|
||||
(this needs a little sleuthing, I find the easiest way is to search
|
||||
the DTD for <code>FORM</code> and determine which set it is in.)</li>
|
||||
<li>What are the allowed children of this element? <strong>One
|
||||
or more flow elements, but no nested <code>form</code>s</strong></li>
|
||||
<li>What attributes does the element allow that are general? <strong>Common</strong></li>
|
||||
<li>What attributes does the element allow that are specific to this element? <strong>A whole bunch, see ATTLIST;
|
||||
we're going to the vital ones: <code>action</code>, <code>method</code> and <code>name</code></strong></li>
|
||||
</ol>
|
||||
|
||||
<p>
|
||||
Time for some code:
|
||||
</p>
|
||||
|
||||
<pre>$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
|
||||
$config->set('HTML', 'DefinitionRev', 1);
|
||||
$config->set('Core', 'DefinitionCache', null); // remove this later!
|
||||
$def =& $config->getHTMLDefinition(true);
|
||||
$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
|
||||
array('_blank','_self','_target','_top')
|
||||
));
|
||||
<strong>$form =& $def->addElement(
|
||||
'form', // name
|
||||
'Block', // content set
|
||||
'Flow', // allowed children
|
||||
'Common', // attribute collection
|
||||
array( // attributes
|
||||
'action*' => 'URI',
|
||||
'method' => 'Enum#get|post',
|
||||
'name' => 'ID'
|
||||
)
|
||||
);
|
||||
$form->excludes = array('form' => true);</strong></pre>
|
||||
|
||||
<p>
|
||||
Each of the parameters corresponds to one of the questions we asked.
|
||||
Notice that we added an asterisk to the end of the <code>action</code>
|
||||
attribute to indicate that it is required. If someone specifies a
|
||||
<code>form</code> without that attribute, the tag will be axed.
|
||||
Also, the extra line at the end is a special extra declaration that
|
||||
prevents forms from being nested within each other.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
And that's all there is to it! Implementing the rest of the form
|
||||
module is left as an exercise to the user; to see more examples
|
||||
check the <a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/HTMLModule/"><code>library/HTMLPurifier/HTMLModule/</code></a> directory
|
||||
in your local HTML Purifier installation.
|
||||
</p>
|
||||
|
||||
<h2>And beyond...</h2>
|
||||
|
||||
<p>
|
||||
Perceptive users may have realized that, to a certain extent, we
|
||||
have simply re-implemented the facilities of XML Schema or the
|
||||
Document Type Definition. What you are seeing here, however, is
|
||||
not just an XML Schema or Document Type Definition: it is a fully
|
||||
expressive method of specifying the definition of HTML that is
|
||||
a portable superset of the capabilities of the two above-mentioned schema
|
||||
languages. What makes HTMLDefinition so powerful is the fact that
|
||||
if we don't have an implementation for a content model or an attribute
|
||||
definition, you can supply it yourself by writing a PHP class.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
There are many facets of HTMLDefinition beyond the Advanced API I have
|
||||
walked you through today. To find out more about these, you can
|
||||
check out these source files:
|
||||
</p>
|
||||
|
||||
<ul>
|
||||
<li><a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/HTMLModule.php"><code>library/HTMLPurifier/HTMLModule.php</code></a></li>
|
||||
<li><a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/ElementDef.php"><code>library/HTMLPurifier/ElementDef.php</code></a></li>
|
||||
</ul>
|
||||
|
||||
<div id="version">$Id: enduser-tidy.html 1158 2007-06-18 19:26:29Z Edward $</div>
|
||||
|
||||
</body></html>
|
147
docs/enduser-id.html
Normal file
147
docs/enduser-id.html
Normal file
@@ -0,0 +1,147 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Explains various methods for allowing IDs in documents safely in HTML Purifier." />
|
||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||
|
||||
<title>IDs - HTML Purifier</title>
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1 class="subtitled">IDs</h1>
|
||||
<div class="subtitle">What they are, why you should(n't) wear them, and how to deal with it</div>
|
||||
|
||||
<div id="filing">Filed under End-User</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>Prior to HTML Purifier 1.2.0, this library blithely accepted user input that
|
||||
looked like this:</p>
|
||||
|
||||
<pre><a id="fragment">Anchor</a></pre>
|
||||
|
||||
<p>...presenting an attractive vector for those that would destroy standards
|
||||
compliance: simply set the ID to one that is already used elsewhere in the
|
||||
document and voila: validation breaks. There was a half-hearted attempt to
|
||||
prevent this by allowing users to blacklist IDs, but I suspect that no one
|
||||
really bothered, and thus, with the release of 1.2.0, IDs are now <em>removed</em>
|
||||
by default.</p>
|
||||
|
||||
<p>IDs, however, are quite useful functionality to have, so if users start
|
||||
complaining about broken anchors you'll probably want to turn them back on
|
||||
with %HTML.EnableAttrID. But before you go mucking around with the config
|
||||
object, it's probably worth to take some precautions to keep your page
|
||||
validating. Why?</p>
|
||||
|
||||
<ol>
|
||||
<li>Standards-compliant pages are good</li>
|
||||
<li>Duplicated IDs interfere with anchors. If there are two id="foobar"s in a
|
||||
document, which spot does a browser presented with the fragment #foobar go
|
||||
to? Most browsers opt for the first appearing ID, making it impossible
|
||||
to references the second section. Similarly, duplicated IDs can hijack
|
||||
client-side scripting that relies on the IDs of elements.</li>
|
||||
</ol>
|
||||
|
||||
<p>You have (currently) four ways of dealing with the problem.</p>
|
||||
|
||||
|
||||
|
||||
<h2 class="subtitled">Blacklisting IDs</h2>
|
||||
<div class="subsubtitle">Good for pages with single content source and stable templates</div>
|
||||
|
||||
<p>Keeping in terms with the
|
||||
<acronym title="Keep It Simple, Stupid">KISS</acronym> principle, let us
|
||||
deal with the most obvious solution: preventing users from using any IDs that
|
||||
appear elsewhere on the document. The method is simple:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'EnableAttrID', true);
|
||||
$config->set('Attr', 'IDBlacklist' array(
|
||||
'list', 'of', 'attributes', 'that', 'are', 'forbidden'
|
||||
));</pre>
|
||||
|
||||
<p>That being said, there are some notable drawbacks. First of all, you have to
|
||||
know precisely which IDs are being used by the HTML surrounding the user code.
|
||||
This is easier said than done: quite often the page designer and the system
|
||||
coder work separately, so the designer has to constantly be talking with the
|
||||
coder whenever he decides to add a new anchor. Miss one and you open yourself
|
||||
to possible standards-compliance issues.</p>
|
||||
|
||||
<p>Furthermore, this position becomes untenable when a single web page must hold
|
||||
multiple portions of user-submitted content. Since there's obviously no way
|
||||
to find out before-hand what IDs users will use, the blacklist is helpless.
|
||||
And even since HTML Purifier validates each segment seperately, perhaps doing
|
||||
so at different times, it would be extremely difficult to dynamically update
|
||||
the blacklist inbetween runs.</p>
|
||||
|
||||
<p>Finally, simply destroying the ID is extremely un-userfriendly behavior: after
|
||||
all, they might have simply specified a duplicate ID by accident.</p>
|
||||
|
||||
<p>Thus, we get to our second method.</p>
|
||||
|
||||
|
||||
|
||||
<h2 class="subtitled">Namespacing IDs</h2>
|
||||
<div class="subsubtitle">Lazy developer's way, but needs user education</div>
|
||||
|
||||
<p>This method, too, is quite simple: add a prefix to all user IDs. With this
|
||||
code:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'EnableAttrID', true);
|
||||
$config->set('Attr', 'IDPrefix', 'user_');</pre>
|
||||
|
||||
<p>...this:</p>
|
||||
|
||||
<pre><a id="foobar">Anchor!</a></pre>
|
||||
|
||||
<p>...turns into:</p>
|
||||
|
||||
<pre><a id="user_foobar">Anchor!</a></pre>
|
||||
|
||||
<p>As long as you don't have any IDs that start with user_, collisions are
|
||||
guaranteed not to happen. The drawback is obvious: if a user submits
|
||||
id="foobar", they probably expect to be able to reference their page with
|
||||
#foobar. You'll have to tell them, "No, that doesn't work, you have to add
|
||||
user_ to the beginning."</p>
|
||||
|
||||
<p>And yes, things get hairier. Even with a nice prefix, we still have done
|
||||
nothing about multiple HTML Purifier outputs on one page. Thus, we have
|
||||
a second configuration value to piggy-back off of: %Attr.IDPrefixLocal:</p>
|
||||
|
||||
<pre>$config->set('Attr', 'IDPrefixLocal', 'comment' . $id . '_');</pre>
|
||||
|
||||
<p>This new attributes does nothing but append on to regular IDPrefix, but is
|
||||
special in that it is volatile: it's value is determined at run-time and
|
||||
cannot possibly be cordoned into, say, a .ini config file. As for what to
|
||||
put into the directive, is up to you, but I would recommend the ID number
|
||||
the text has been assigned in the database. Whatever you pick, however, it
|
||||
has to be unique and stable for the text you are validating. Note, however,
|
||||
that we require that %Attr.IDPrefix be set before you use this directive.</p>
|
||||
|
||||
<p>And also remember: the user has to know what this prefix is too!</p>
|
||||
|
||||
|
||||
|
||||
<h2>Abstinence</h2>
|
||||
|
||||
<p>You may not want to bother. That's okay too, just don't enable IDs.</p>
|
||||
|
||||
<p>Personally, I would take this road whenever user-submitted content would be
|
||||
possibly be shown together on one page. Why a blog comment would need to use
|
||||
anchors is beyond me.</p>
|
||||
|
||||
|
||||
|
||||
<h2>Denial</h2>
|
||||
|
||||
<p>To revert back to pre-1.2.0 behavior, simply:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'EnableAttrID', true);</pre>
|
||||
|
||||
<p>Don't come crying to me when your page mysteriously stops validating, though.</p>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body>
|
||||
</html>
|
@@ -29,13 +29,14 @@ output is valid XHTML or send the HTML through a draconic XML parser (and yet
|
||||
still get the nesting wrong: SafeHtmlChecker.class.php does not prevent <a>
|
||||
tags from being nested within each other).
|
||||
|
||||
This document seeks to detail the inner workings of HTML Purifier. The first
|
||||
This document no longer is a detailed description of how HTMLPurifier works,
|
||||
as those descriptions have been moved to the appropriate code. The first
|
||||
draft was drawn up after two rough code sketches and the implementation of a
|
||||
forgiving lexer. You may also be interested in the unit tests located in the
|
||||
tests/ folder, which provide a living document on how exactly the filter deals
|
||||
with malformed input.
|
||||
|
||||
In summary:
|
||||
In summary (see corresponding classes for more details):
|
||||
|
||||
1. Parse document into an array of tag and text tokens (Lexer)
|
||||
2. Remove all elements not on whitelist and transform certain other elements
|
||||
@@ -52,4 +53,5 @@ In summary:
|
||||
HTML Purifier is best suited for documents that require a rich array of
|
||||
HTML tags. Things like blog comments are, in all likelihood, most appropriately
|
||||
written in an extremely restrictive set of markup that doesn't require
|
||||
all this functionality (or not written in HTML at all).
|
||||
all this functionality (or not written in HTML at all), although this may
|
||||
be changing in the future with the addition of levels of filtering.
|
18
docs/enduser-security.txt
Normal file
18
docs/enduser-security.txt
Normal file
@@ -0,0 +1,18 @@
|
||||
|
||||
Security
|
||||
|
||||
Like anything that claims to afford security, HTML_Purifier can be circumvented
|
||||
through negligence of people. This class will do its job: no more, no less,
|
||||
and it's up to you to provide it the proper information and proper context
|
||||
to be effective. Things to remember:
|
||||
|
||||
1. Character Encoding: see enduser-utf8.html for more info.
|
||||
|
||||
2. IDs: see enduser-id.html for more info
|
||||
|
||||
3. Links: document pending feature completion
|
||||
Rudimentary blacklisting, we should also allow only relative URIs. We
|
||||
need a doc to explain the stuff.
|
||||
|
||||
4. CSS: document pending
|
||||
Explain which CSS styles we blocked and why.
|
117
docs/enduser-slow.html
Normal file
117
docs/enduser-slow.html
Normal file
@@ -0,0 +1,117 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Explains how to speed up HTML Purifier through caching or inbound filtering." />
|
||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||
|
||||
<title>Speeding up HTML Purifier - HTML Purifier</title>
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1 class="subtitled">Speeding up HTML Purifier</h1>
|
||||
<div class="subtitle">...also known as the HELP ME LIBRARY IS TOO SLOW MY PAGE TAKE TOO LONG page</div>
|
||||
|
||||
<div id="filing">Filed under End-User</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>HTML Purifier is a very powerful library. But with power comes great
|
||||
responsibility, in the form of longer execution times. Remember, this
|
||||
library isn't lightly grazing over submitted HTML: it's deconstructing
|
||||
the whole thing, rigorously checking the parts, and then putting it back
|
||||
together. </p>
|
||||
|
||||
<p>So, if it so turns out that HTML Purifier is kinda too slow for outbound
|
||||
filtering, you've got a few options: </p>
|
||||
|
||||
<h2>Inbound filtering</h2>
|
||||
|
||||
<p>Perform filtering of HTML when it's submitted by the user. Since the
|
||||
user is already submitting something, an extra half a second tacked on
|
||||
to the load time probably isn't going to be that huge of a problem.
|
||||
Then, displaying the content is a simple a manner of outputting it
|
||||
directly from your database/filesystem. The trouble with this method is
|
||||
that your user loses the original text, and when doing edits, will be
|
||||
handling the filtered text. While this may be a good thing, especially
|
||||
if you're using a WYSIWYG editor, it can also result in data-loss if a
|
||||
user makes a typo. </p>
|
||||
|
||||
<p>Example (non-functional):</p>
|
||||
|
||||
<pre><?php
|
||||
/**
|
||||
* FORM SUBMISSION PAGE
|
||||
* display_error($message) : displays nice error page with message
|
||||
* display_success() : displays a nice success page
|
||||
* display_form() : displays the HTML submission form
|
||||
* database_insert($html) : inserts data into database as new row
|
||||
*/
|
||||
if (!empty($_POST)) {
|
||||
require_once '/path/to/library/HTMLPurifier.auto.php';
|
||||
require_once 'HTMLPurifier.func.php';
|
||||
$dirty_html = isset($_POST['html']) ? $_POST['html'] : false;
|
||||
if (!$dirty_html) {
|
||||
display_error('You must write some HTML!');
|
||||
}
|
||||
$html = HTMLPurifier($dirty_html);
|
||||
database_insert($html);
|
||||
display_success();
|
||||
// notice that $dirty_html is *not* saved
|
||||
} else {
|
||||
display_form();
|
||||
}
|
||||
?></pre>
|
||||
|
||||
<h2>Caching the filtered output</h2>
|
||||
|
||||
<p>Accept the submitted text and put it unaltered into the database, but
|
||||
then also generate a filtered version and stash that in the database.
|
||||
Serve the filtered version to readers, and the unaltered version to
|
||||
editors. If need be, you can invalidate the cache and have the cached
|
||||
filtered version be regenerated on the first page view. Pros? Full data
|
||||
retention. Cons? It's more complicated, and opens other editors up to
|
||||
XSS if they are using a WYSIWYG editor (to fix that, they'd have to be
|
||||
able to get their hands on the *really* original text served in
|
||||
plaintext mode). </p>
|
||||
|
||||
<p>Example (non-functional):</p>
|
||||
|
||||
<pre><?php
|
||||
/**
|
||||
* VIEW PAGE
|
||||
* display_error($message) : displays nice error page with message
|
||||
* cache_get($id) : retrieves HTML from fast cache (db or file)
|
||||
* cache_insert($id, $html) : inserts good HTML into cache system
|
||||
* database_get($id) : retrieves raw HTML from database
|
||||
*/
|
||||
$id = isset($_GET['id']) ? (int) $_GET['id'] : false;
|
||||
if (!$id) {
|
||||
display_error('Must specify ID.');
|
||||
exit;
|
||||
}
|
||||
$html = cache_get($id); // filesystem or database
|
||||
if ($html === false) {
|
||||
// cache didn't have the HTML, generate it
|
||||
$raw_html = database_get($id);
|
||||
require_once '/path/to/library/HTMLPurifier.auto.php';
|
||||
require_once 'HTMLPurifier.func.php';
|
||||
$html = HTMLPurifier($raw_html);
|
||||
cache_insert($id, $html);
|
||||
}
|
||||
echo $html;
|
||||
?></pre>
|
||||
|
||||
<h2>Summary</h2>
|
||||
|
||||
<p>In short, inbound filtering is the simple option and caching is the
|
||||
robust option (albeit with bigger storage requirements). </p>
|
||||
|
||||
<p>There is a third option, independent of the two we've discussed: profile
|
||||
and optimize HTMLPurifier yourself. Be sure to report back your results
|
||||
if you decide to do that! Especially if you port HTML Purifier to C++.
|
||||
<tt>;-)</tt></p>
|
||||
|
||||
</body>
|
||||
</html>
|
230
docs/enduser-tidy.html
Normal file
230
docs/enduser-tidy.html
Normal file
@@ -0,0 +1,230 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Tutorial for tweaking HTML Purifier's Tidy-like behavior." />
|
||||
<link rel="stylesheet" type="text/css" href="style.css" />
|
||||
|
||||
<title>Tidy - HTML Purifier</title>
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1>Tidy</h1>
|
||||
|
||||
<div id="filing">Filed under Development</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>You've probably heard of HTML Tidy, Dave Raggett's little piece
|
||||
of software that cleans up poorly written HTML. Let me say it straight
|
||||
out:</p>
|
||||
|
||||
<p class="emphasis">This ain't HTML Tidy!</p>
|
||||
|
||||
<p>Rather, Tidy stands for a cool set of Tidy-inspired in HTML Purifier
|
||||
that allows users to submit deprecated elements and attributes and get
|
||||
valid strict markup back. For example:</p>
|
||||
|
||||
<pre><center>Centered</center></pre>
|
||||
|
||||
<p>...becomes:</p>
|
||||
|
||||
<pre><div style="text-align:center;">Centered</div></pre>
|
||||
|
||||
<p>...when this particular fix is run on the HTML. This tutorial will give
|
||||
you down the lowdown of what exactly HTML Purifier will do when Tidy
|
||||
is on, and how to fine tune this behavior. Once again, <strong>you do
|
||||
not need Tidy installed on your PHP to use these features!</strong></p>
|
||||
|
||||
<h2>What does it do?</h2>
|
||||
|
||||
<p>Tidy will do several things to your HTML:</p>
|
||||
|
||||
<ul>
|
||||
<li>Convert deprecated elements and attributes to standards-compliant
|
||||
alternatives</li>
|
||||
<li>Enforce XHTML compatibility guidelines and other best practices</li>
|
||||
<li>Preserve data that would normally be removed as per W3C</li>
|
||||
</ul>
|
||||
|
||||
<h2>What are levels?</h2>
|
||||
|
||||
<p>Levels describe how aggressive the Tidy module should be when
|
||||
cleaning up HTML. There are four levels to pick: none, light, medium
|
||||
and heavy. Each of these levels has a well-defined set of behavior
|
||||
associated with it, although it may change depending on your doctype.</p>
|
||||
|
||||
<dl>
|
||||
<dt>light</dt>
|
||||
<dd>This is the <strong>lenient</strong> level. If a tag or attribute
|
||||
is about to be removed because it isn't supported by the
|
||||
doctype, Tidy will step in and change into an alternative that
|
||||
is supported.</dd>
|
||||
<dt>medium</dt>
|
||||
<dd>This is the <strong>correctional</strong> level. At this level,
|
||||
all the functions of light are performed, as well as some extra,
|
||||
non-essential best practices enforcement. Changes made on this
|
||||
level are very benign and are unlikely to cause problems.</dd>
|
||||
<dt>heavy</dt>
|
||||
<dd>This is the <strong>aggressive</strong> level. If a tag or
|
||||
attribute is deprecated, it will be converted into a non-deprecated
|
||||
version, no ifs ands or buts.</dd>
|
||||
</dl>
|
||||
|
||||
<p>By default, Tidy operates on the <strong>medium</strong> level. You can
|
||||
change the level of cleaning by setting the %HTML.TidyLevel configuration
|
||||
directive:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'TidyLevel', 'heavy'); // burn baby burn!</pre>
|
||||
|
||||
<h2>Is the light level really light?</h2>
|
||||
|
||||
<p>It depends on what doctype you're using. If your documents are HTML
|
||||
4.01 <em>Transitional</em>, HTML Purifier will be lazy
|
||||
and won't clean up your <code>center</code>
|
||||
or <code>font</code> tags. But if you're using HTML 4.01 <em>Strict</em>,
|
||||
HTML Purifier has no choice: it has to convert them, or they will
|
||||
be nuked out of existence. So while light on Transitional will result
|
||||
in little to no changes, light on Strict will still result in quite
|
||||
a lot of fixes.</p>
|
||||
|
||||
<p>This is different behavior from 1.6 or before, where deprecated
|
||||
tags in transitional documents would
|
||||
always be cleaned up regardless. This is also better behavior.</p>
|
||||
|
||||
<h2>My pages look different!</h2>
|
||||
|
||||
<p>HTML Purifier is tasked with converting deprecated tags and
|
||||
attributes to standards-compliant alternatives, which usually
|
||||
need copious amounts of CSS. It's also not foolproof: sometimes
|
||||
things do get lost in the translation. This is why when HTML Purifier
|
||||
can get away with not doing cleaning, it won't; this is why
|
||||
the default value is <strong>medium</strong> and not heavy.</p>
|
||||
|
||||
<p>Fortunately, only a few attributes have problems with the switch
|
||||
over. They are described below:</p>
|
||||
|
||||
<table class="table">
|
||||
<thead><tr>
|
||||
<th>Element@Attr</th>
|
||||
<th>Changes</th>
|
||||
</tr></thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>caption@align</td>
|
||||
<td>Firefox supports stuffing the caption on the
|
||||
left and right side of the table, a feature that
|
||||
Internet Explorer, understandably, does not have.
|
||||
When align equals right or left, the text will simply
|
||||
be aligned on the left or right side.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>img@align</td>
|
||||
<td>The implementation for align bottom is good, but not
|
||||
perfect. There are a few pixel differences.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>br@clear</td>
|
||||
<td>Clear both gets a little wonky in Internet Explorer. Haven't
|
||||
really been able to figure out why.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>hr@noshade</td>
|
||||
<td>All browsers implement this slightly differently: we've
|
||||
chosen to make noshade horizontal rules gray.</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<p>There are a few more minor, although irritating, bugs.
|
||||
Some older browsers support deprecated attributes,
|
||||
but not CSS. Transformed elements and attributes will look unstyled
|
||||
to said browsers. Also, CSS precedence is slightly different for
|
||||
inline styles versus presentational markup. In increasing precedence:</p>
|
||||
|
||||
<ol>
|
||||
<li>Presentational attributes</li>
|
||||
<li>External style sheets</li>
|
||||
<li>Inline styling</li>
|
||||
</ol>
|
||||
|
||||
<p>This means that styling that may have been masked by external CSS
|
||||
declarations will start showing up (a good thing, perhaps). Finally,
|
||||
if you've turned off the style attribute, almost all of
|
||||
these transformations will not work. Sorry mates.</p>
|
||||
|
||||
<p>You can review the rendering before and after of these transformations
|
||||
by consulting the <a
|
||||
href="http://htmlpurifier.org/live/smoketests/attrTransform.php">attrTransform.php
|
||||
smoketest</a>.</p>
|
||||
|
||||
<h2>I like the general idea, but the specifics bug me!</h2>
|
||||
|
||||
<p>So you want HTML Purifier to clean up your HTML, but you're not
|
||||
so happy about the br@clear implementation. That's perfectly fine!
|
||||
HTML Purifier will make accomodations:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');
|
||||
$config->set('HTML', 'TidyLevel', 'heavy'); // all changes, minus...
|
||||
<strong>$config->set('HTML', 'TidyRemove', 'br@clear');</strong></pre>
|
||||
|
||||
<p>That third line does the magic, removing the br@clear fix
|
||||
from the module, ensuring that <code><br clear="both" /></code>
|
||||
will pass through unharmed. The reverse is possible too:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');
|
||||
$config->set('HTML', 'TidyLevel', 'none'); // no changes, plus...
|
||||
<strong>$config->set('HTML', 'TidyAdd', 'p@align');</strong></pre>
|
||||
|
||||
<p>In this case, all transformations are shut off, except for the p@align
|
||||
one, which you found handy.</p>
|
||||
|
||||
<p>To find out what the names of fixes you want to turn on or off are,
|
||||
you'll have to consult the source code, specifically the files in
|
||||
<code>HTMLPurifier/HTMLModule/Tidy/</code>. There is, however, a
|
||||
general syntax:</p>
|
||||
|
||||
<table class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th>Example</th>
|
||||
<th>Interpretation</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>element</td>
|
||||
<td>font</td>
|
||||
<td>Tag transform for <em>element</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>element@attr</td>
|
||||
<td>br@clear</td>
|
||||
<td>Attribute transform for <em>attr</em> on <em>element</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>@attr</td>
|
||||
<td>@lang</td>
|
||||
<td>Global attribute transform for <em>attr</em></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>e#content_model_type</td>
|
||||
<td>blockquote#content_model_type</td>
|
||||
<td>Change of child processing implementation for <em>e</em></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
<h2>So... what's the lowdown?</h2>
|
||||
|
||||
<p>The lowdown is, quite frankly, HTML Purifier's default settings are
|
||||
probably good enough. The next step is to bump the level up to heavy,
|
||||
and if that still doesn't satisfy your appetite, do some fine tuning.
|
||||
Other than that, don't worry about it: this all works silently and
|
||||
effectively in the background.</p>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body></html>
|
1046
docs/enduser-utf8.html
Normal file
1046
docs/enduser-utf8.html
Normal file
File diff suppressed because it is too large
Load Diff
152
docs/enduser-youtube.html
Normal file
152
docs/enduser-youtube.html
Normal file
@@ -0,0 +1,152 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Explains how to safely allow the embedding of flash from trusted sites in HTML Purifier." />
|
||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||
|
||||
<title>Embedding YouTube Videos - HTML Purifier</title>
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1 class="subtitled">Embedding YouTube Videos</h1>
|
||||
<div class="subtitle">...as well as other dangerous active content</div>
|
||||
|
||||
<div id="filing">Filed under End-User</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>Clients like their YouTube videos. It gives them a warm fuzzy feeling when
|
||||
they see a neat little embedded video player on their websites that can play
|
||||
the latest clips from their documentary "Fido and the Bones of Spring".
|
||||
All joking aside, the ability to embed YouTube videos or other active
|
||||
content in their pages is something that a lot of people like.</p>
|
||||
|
||||
<p>This is a <em>bad</em> idea. The moment you embed anything untrusted,
|
||||
you will definitely be slammed by a manner of nasties that can be
|
||||
embedded in things from your run of the mill Flash movie to
|
||||
<a href="http://blog.spywareguide.com/2006/12/myspace_phish_attack_leads_use.html">Quicktime movies</a>.
|
||||
Even <code>img</code> tags, which HTML Purifier allows by default, can be
|
||||
dangerous. Be distrustful of anything that tells a browser to load content
|
||||
from another website automatically.</p>
|
||||
|
||||
<p>Luckily for us, however, whitelisting saves the day. Sure, letting users
|
||||
include any old random flash file could be dangerous, but if it's
|
||||
from a specific website, it probably is okay. If no amount of pleading will
|
||||
convince the people upstairs that they should just settle with just linking
|
||||
to their movies, you may find this technique very useful.</p>
|
||||
|
||||
<h2>Looking in</h2>
|
||||
|
||||
<p>Below is custom code that allows users to embed
|
||||
YouTube videos. This is not favoritism: this trick can easily be adapted for
|
||||
other forms of embeddable content.</p>
|
||||
|
||||
<p>Usually, websites like YouTube give us boilerplate code that you can insert
|
||||
into your documents. YouTube's code goes like this:</p>
|
||||
|
||||
<pre>
|
||||
<object width="425" height="350">
|
||||
<param name="movie" value="http://www.youtube.com/v/AyPzM5WK8ys" />
|
||||
<param name="wmode" value="transparent" />
|
||||
<embed src="http://www.youtube.com/v/AyPzM5WK8ys"
|
||||
type="application/x-shockwave-flash"
|
||||
wmode="transparent" width="425" height="350" />
|
||||
</object>
|
||||
</pre>
|
||||
|
||||
<p>There are two things to note about this code:</p>
|
||||
|
||||
<ol>
|
||||
<li><code><embed></code> is not recognized by W3C, so if you want
|
||||
standards-compliant code, you'll have to get rid of it.</li>
|
||||
<li>The code is exactly the same for all instances, except for the
|
||||
identifier <tt>AyPzM5WK8ys</tt> which tells us which movie file
|
||||
to retrieve.</li>
|
||||
</ol>
|
||||
|
||||
<p>What point 2 means is that if we have code like <code><span
|
||||
class="embed-youtube">AyPzM5WK8ys</span></code> your
|
||||
application can reconstruct the full object from this small snippet that
|
||||
passes through HTML Purifier <em>unharmed</em>.
|
||||
<a href="http://htmlpurifier.org/svnroot/htmlpurifier/trunk/library/HTMLPurifier/Filter/YouTube.php">Show me the code!</a></p>
|
||||
|
||||
<p>And the corresponding usage:</p>
|
||||
|
||||
<pre><?php
|
||||
// assuming $purifier is an instance of HTMLPurifier
|
||||
require_once 'HTMLPurifier/Filter/YouTube.php';
|
||||
$purifier->addFilter(new HTMLPurifier_Filter_YouTube());
|
||||
?></pre>
|
||||
|
||||
<p>There is a bit going in the two code snippets, so let's explain.</p>
|
||||
|
||||
<ol>
|
||||
<li>This is a Filter object, which intercepts the HTML that is
|
||||
coming into and out of the purifier. You can add as many
|
||||
filter objects as you like. <code>preFilter()</code>
|
||||
processes the code before it gets purified, and <code>postFilter()</code>
|
||||
processes the code afterwards. So, we'll use <code>preFilter()</code> to
|
||||
replace the object tag with a <code>span</code>, and <code>postFilter()</code>
|
||||
to restore it.</li>
|
||||
<li>The first preg_replace call replaces any YouTube code users may have
|
||||
embedded into the benign span tag. Span is used because it is inline,
|
||||
and objects are inline too. We are very careful to be extremely
|
||||
restrictive on what goes inside the span tag, as if an errant code
|
||||
gets in there it could get messy.</li>
|
||||
<li>The HTML is then purified as usual.</li>
|
||||
<li>Then, another preg_replace replaces the span tag with a fully fledged
|
||||
object. Note that the embed is removed, and, in its place, a data
|
||||
attribute was added to the object. This makes the tag standards
|
||||
compliant! It also breaks Internet Explorer, so we add in a bit of
|
||||
conditional comments with the old embed code to make it work again.
|
||||
It's all quite convoluted but works.</li>
|
||||
</ol>
|
||||
|
||||
<h2>Warning</h2>
|
||||
|
||||
<p>There are a number of possible problems with the code above, depending
|
||||
on how you look at it.</p>
|
||||
|
||||
<h3>Cannot change width and height</h3>
|
||||
|
||||
<p>The width and height of the final YouTube movie cannot be adjusted. This
|
||||
is because I am lazy. If you really insist on letting users change the size
|
||||
of the movie, what you need to do is package up the attributes inside the
|
||||
span tag (along with the movie ID). It gets complicated though: a malicious
|
||||
user can specify an outrageously large height and width and attempt to crash
|
||||
the user's operating system/browser. You need to either cap it by limiting
|
||||
the amount of digits allowed in the regex or using a callback to check the
|
||||
number.</p>
|
||||
|
||||
<h3>Trusts media's host's security</h3>
|
||||
|
||||
<p>By allowing this code onto our website, we are trusting that YouTube has
|
||||
tech-savvy enough people not to allow their users to inject malicious
|
||||
code into the Flash files. An exploit on YouTube means an exploit on your
|
||||
site. Even though YouTube is run by the reputable Google, it
|
||||
<a href="http://ha.ckers.org/blog/20061213/google-xss-vuln/">doesn't</a>
|
||||
mean they are
|
||||
<a href="http://ha.ckers.org/blog/20061208/xss-in-googles-orkut/">invulnerable.</a>
|
||||
You're putting a certain measure of the job on an external provider (just as
|
||||
you have by entrusting your user input to HTML Purifier), and
|
||||
it is important that you are cognizant of the risk.</p>
|
||||
|
||||
<h3>Poorly written adaptations compromise security</h3>
|
||||
|
||||
<p>This should go without saying, but if you're going to adapt this code
|
||||
for Google Video or the like, make sure you do it <em>right</em>. It's
|
||||
extremely easy to allow a character too many in <code>postFilter()</code> and
|
||||
suddenly you're introducing XSS into HTML Purifier's XSS free output. HTML
|
||||
Purifier may be well written, but it cannot guard against vulnerabilities
|
||||
introduced after it has finished.</p>
|
||||
|
||||
<h2>Help out!</h2>
|
||||
|
||||
<p>If you write a filter for your favorite video destination (or anything
|
||||
like that, for that matter), send it over and it might get included
|
||||
with the core!</p>
|
||||
|
||||
</body>
|
||||
</html>
|
@@ -2,14 +2,21 @@
|
||||
|
||||
// This file demonstrates basic usage of HTMLPurifier.
|
||||
|
||||
exit; // not to be called directly, it will fail fantastically!
|
||||
// replace this with the path to the HTML Purifier library
|
||||
require_once '../../library/HTMLPurifier.auto.php';
|
||||
|
||||
set_include_path('/path/to/htmlpurifier/library' . PATH_SEPARATOR . get_include_path());
|
||||
require_once 'HTMLPurifier.php';
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
|
||||
$purifier = new HTMLPurifier();
|
||||
// configuration goes here:
|
||||
$config->set('Core', 'Encoding', 'UTF-8'); // replace with your encoding
|
||||
$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional'); // replace with your doctype
|
||||
|
||||
$purifier = new HTMLPurifier($config);
|
||||
|
||||
// untrusted input HTML
|
||||
$html = '<b>Simple and short';
|
||||
|
||||
$pure_html = $purifier->purify($html);
|
||||
|
||||
?>
|
||||
echo '<pre>' . htmlspecialchars($pure_html) . '</pre>';
|
||||
|
||||
|
@@ -1,70 +0,0 @@
|
||||
<!DOCTYPE html
|
||||
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>HTMLPurifier Live Demo</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
</head>
|
||||
<body>
|
||||
<h1>HTMLPurifier Live Demo</h1>
|
||||
<?php
|
||||
|
||||
set_time_limit(120);
|
||||
|
||||
set_include_path('../../library' . PATH_SEPARATOR . get_include_path());
|
||||
require_once 'HTMLPurifier.php';
|
||||
|
||||
if (!empty($_POST['html'])) {
|
||||
|
||||
$html = get_magic_quotes_gpc() ? stripslashes($_POST['html']) : $_POST['html'];
|
||||
|
||||
$purifier = new HTMLPurifier();
|
||||
$pure_html = $purifier->purify($html);
|
||||
|
||||
?>
|
||||
<p>Here is your purified HTML:</p>
|
||||
<div style="border:5px solid #CCC;margin:0 10%;padding:1em;">
|
||||
<?php
|
||||
|
||||
echo $pure_html;
|
||||
|
||||
?>
|
||||
<div style="clear:both;"></div>
|
||||
</div>
|
||||
<p>Here is the source code of the purified HTML:</p>
|
||||
<pre><?php
|
||||
|
||||
echo htmlspecialchars($pure_html, ENT_COMPAT, 'UTF-8');
|
||||
|
||||
?></pre>
|
||||
<?php
|
||||
|
||||
} else {
|
||||
|
||||
?>
|
||||
<p>Welcome to the live demo. Enter some HTML and see how HTMLPurifier
|
||||
will filter it.</p>
|
||||
<?php
|
||||
|
||||
}
|
||||
|
||||
?>
|
||||
<form name="filter" action="demo.php<?php
|
||||
if (isset($_GET['profile']) || isset($_GET['XDEBUG_PROFILE'])) {
|
||||
echo '?XDEBUG_PROFILE=1';
|
||||
} ?>" method="post">
|
||||
<fieldset>
|
||||
<legend>HTML</legend>
|
||||
<textarea name="html" cols="60" rows="15"><?php
|
||||
|
||||
if (isset($html)) echo htmlspecialchars($html, ENT_COMPAT, 'UTF-8');
|
||||
|
||||
?></textarea>
|
||||
<div>
|
||||
<input type="submit" value="Submit" name="submit" class="button" />
|
||||
</div>
|
||||
</fieldset>
|
||||
</form>
|
||||
</body>
|
||||
</html>
|
6
docs/fixquotes.htc
Normal file
6
docs/fixquotes.htc
Normal file
@@ -0,0 +1,6 @@
|
||||
<public:attach event="oncontentready" onevent="init();" />
|
||||
<script>
|
||||
function init() {
|
||||
element.innerHTML = '“'+element.innerHTML+'”';
|
||||
}
|
||||
</script>
|
165
docs/index.html
Normal file
165
docs/index.html
Normal file
@@ -0,0 +1,165 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Index to all HTML Purifier documentation." />
|
||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||
|
||||
<title>Documentation - HTML Purifier</title>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>Documentation</h1>
|
||||
|
||||
<p><strong><a href="http://htmlpurifier.org/">HTML Purifier</a></strong> has documentation for all types of people.
|
||||
Here is an index of all of them.</p>
|
||||
|
||||
<h2>End-user</h2>
|
||||
<p>End-user documentation that contains articles, tutorials and useful
|
||||
information for casual developers using HTML Purifier.</p>
|
||||
|
||||
<dl>
|
||||
|
||||
<dt><a href="enduser-id.html">IDs</a></dt>
|
||||
<dd>Explains various methods for allowing IDs in documents safely.</dd>
|
||||
|
||||
<dt><a href="enduser-youtube.html">Embedding YouTube videos</a></dt>
|
||||
<dd>Explains how to safely allow the embedding of flash from trusted sites.</dd>
|
||||
|
||||
<dt><a href="enduser-slow.html">Speeding up HTML Purifier</a></dt>
|
||||
<dd>Explains how to speed up HTML Purifier through caching or inbound filtering.</dd>
|
||||
|
||||
<dt><a href="enduser-utf8.html">UTF-8: The Secret of Character Encoding</a></dt>
|
||||
<dd>Describes the rationale for using UTF-8, the ramifications otherwise, and how to make the switch.</dd>
|
||||
|
||||
<dt><a href="enduser-tidy.html">Tidy</a></dt>
|
||||
<dd>Tutorial for tweaking HTML Purifier's Tidy-like behavior.</dd>
|
||||
|
||||
<dt><a href="enduser-customize.html">Customize</a></dt>
|
||||
<dd>Tutorial for customizing HTML Purifier's tag and attribute sets.</dd>
|
||||
|
||||
</dl>
|
||||
|
||||
<h2>Development</h2>
|
||||
<p>Developer documentation detailing code issues, roadmaps and project
|
||||
conventions.</p>
|
||||
|
||||
<dl>
|
||||
|
||||
<dt><a href="dev-progress.html">Implementation Progress</a></dt>
|
||||
<dd>Tables detailing HTML element and CSS property implementation coverage.</dd>
|
||||
|
||||
<dt><a href="dev-naming.html">Naming Conventions</a></dt>
|
||||
<dd>Defines class naming conventions.</dd>
|
||||
|
||||
<dt><a href="dev-optimization.html">Optimization</a></dt>
|
||||
<dd>Discusses possible methods of optimizing HTML Purifier.</dd>
|
||||
|
||||
<dt><a href="dev-advanced-api.html">Advanced API</a></dt>
|
||||
<dd>Functional specification for HTML Purifier's advanced API for defining
|
||||
custom filtering behavior.</dd>
|
||||
|
||||
</dl>
|
||||
|
||||
<h2>Proposals</h2>
|
||||
<p>Proposed features, as well as the associated rambling to get a clear
|
||||
objective in place before attempted implementation.</p>
|
||||
|
||||
<dl>
|
||||
<dt><a href="proposal-colors.html">Colors</a></dt>
|
||||
<dd>Proposal to allow for color constraints.</dd>
|
||||
</dl>
|
||||
|
||||
<h2>Reference</h2>
|
||||
<p>Miscellaneous essays, research pieces and other reference type material
|
||||
that may not directly discuss HTML Purifier.</p>
|
||||
|
||||
<dl>
|
||||
<dt><a href="ref-devnetwork.html">DevNetwork Credits</a></dt>
|
||||
<dd>Credits and links to DevNetwork forum topics.</dd>
|
||||
</dl>
|
||||
|
||||
<h2>Internal memos</h2>
|
||||
|
||||
<p>Plaintext documents that are more for use by active developers of
|
||||
the code. They may be upgraded to HTML files or stay as TXT scratchpads.</p>
|
||||
|
||||
<table class="table">
|
||||
|
||||
<thead><tr>
|
||||
<th width="10%">Type</th>
|
||||
<th width="20%">Name</th>
|
||||
<th>Description</th>
|
||||
</tr></thead>
|
||||
|
||||
<tbody>
|
||||
|
||||
<tr>
|
||||
<td>End-user</td>
|
||||
<td><a href="enduser-overview.txt">Overview</a></td>
|
||||
<td>High level overview of the general control flow (mostly obsolete).</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>End-user</td>
|
||||
<td><a href="enduser-security.txt">Security</a></td>
|
||||
<td>Common security issues that may still arise (half-baked).</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Development</td>
|
||||
<td><a href="enduser-code-quality.txt">Code Quality Issues</a></td>
|
||||
<td>Enumerates code quality issues and places that need to be refactored.</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Proposal</td>
|
||||
<td><a href="proposal-filter-levels.txt">Filter levels</a></td>
|
||||
<td>Outlines details of projected configurable level of filtering.</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Proposal</td>
|
||||
<td><a href="proposal-language.txt">Language</a></td>
|
||||
<td>Specification of I18N for error messages derived from MediaWiki (half-baked).</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Proposal</td>
|
||||
<td><a href="proposal-new-directives.txt">New directives</a></td>
|
||||
<td>Assorted configuration options that could be implemented.</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Reference</td>
|
||||
<td><a href="ref-content-models.txt">Handling Content Model Changes</a></td>
|
||||
<td>Discusses how to tidy up content model changes using custom ChildDef classes.</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Reference</td>
|
||||
<td><a href="ref-proprietary-tags.txt">Proprietary tags</a></td>
|
||||
<td>List of vendor-specific tags we may want to transform to W3C compliant markup.</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Reference</td>
|
||||
<td><a href="ref-html-modularization.txt">Modularization of HTMLDefinition</a></td>
|
||||
<td>Provides a high-level overview of the concepts behind HTMLModules.</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Reference</td>
|
||||
<td><a href="ref-whatwg.txt">WHATWG</a></td>
|
||||
<td>How WHATWG plays into what we need to do.</td>
|
||||
</tr>
|
||||
|
||||
</tbody>
|
||||
|
||||
</table>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
</body>
|
||||
</html>
|
@@ -1,55 +0,0 @@
|
||||
|
||||
Naming
|
||||
|
||||
The classes in this library follow a few naming conventions, which may
|
||||
help you find the correct functionality more quickly. Here they are:
|
||||
|
||||
All classes occupy the HTMLPurifier pseudo-namespace.
|
||||
This means that all classes are prefixed with HTMLPurifier_. As such, all
|
||||
names under HTMLPurifier_ are reserved, and userspace extensions should
|
||||
be registered in a different namespace (or the main namespace).
|
||||
|
||||
All classes correspond to their path if library/ was in the include path
|
||||
HTMLPurifier_AttrDef is located at HTMLPurifier/AttrDef.php; replace
|
||||
underscores with slashes and append .php and you'll have the location of
|
||||
the class.
|
||||
|
||||
Harness and Test are reserved class names for unit tests
|
||||
The suffix "Test" indicates that the class is a subclass of UnitTestCase
|
||||
(of the Simpletest library) and is testable. "Harness" indicates a subclass
|
||||
of UnitTestCase that is not meant to be run but to be extended into
|
||||
concrete test cases and contains custom test methods (i.e. assert*())
|
||||
|
||||
Class names do not necessarily represent inheritance hierarchies
|
||||
While we try to reflect inheritance in naming to some extent, it is not
|
||||
guaranteed (for instance, none of the classes inherit from HTMLPurifier,
|
||||
the base class). However, all class files have the require_once
|
||||
declarations to whichever classes they are tightly coupled to.
|
||||
|
||||
Strategy has a meaning different from the Gang of Four pattern
|
||||
In Design Patterns, the Gang of Four describes a Strategy object as
|
||||
encapsulating an algorithm so that they can be switched at run-time. While
|
||||
our strategies are indeed algorithms, they are not meant to be substituted:
|
||||
all must be present in order for proper functioning.
|
||||
|
||||
Abbreviations are avoided
|
||||
We try to avoid abbreviations as much as possible, but in some cases,
|
||||
abbreviated version is more readable than the full version. Here, we
|
||||
list common abbreviations:
|
||||
Attr(s) -> Attribute(s)
|
||||
Def -> Definition
|
||||
|
||||
Ambiguity concerning the definition of Def/Definition
|
||||
While a definition normally defines the structure/acceptable values of
|
||||
an entity, most of the definitions in this application also attempt
|
||||
to validate and fix the value. I am unsure of a better name, as
|
||||
"Validator" would exclude fixing the value, "Fixer" doesn't invoke
|
||||
the proper image of "fixing" something, and "ValidatorFixer" is too long!
|
||||
Some other suggestions were "Handler", "Reference", "Check", "Fix",
|
||||
"Repair" and "Heal".
|
||||
|
||||
Transform not Transformer
|
||||
Transform is both a noun and a verb, and thus we define a "Transform" as
|
||||
something that "transforms," leaving "Transformer" (which sounds like an
|
||||
electrical device/robot toy).
|
||||
|
@@ -1,11 +0,0 @@
|
||||
|
||||
Optimization
|
||||
|
||||
Here are some possible optimization techniques we can apply to code sections if
|
||||
they turn out to be slow. Be sure not to prematurely optimize though!
|
||||
|
||||
- Make Tokens Flyweights
|
||||
- Rewrite regexps into PHP code
|
||||
- Serialize the Definition object
|
||||
- Batch regexp validation (do as many per function call as possible)
|
||||
- Parallelize strategies
|
48
docs/proposal-colors.html
Normal file
48
docs/proposal-colors.html
Normal file
@@ -0,0 +1,48 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Proposal to allow for color constraints in HTML Purifier." />
|
||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||
|
||||
<title>Proposal: Colors - HTML Purifier</title>
|
||||
|
||||
</head><body>
|
||||
|
||||
<h1 class="subtitled">Colors</h1>
|
||||
<div class="subtitle">Hammering some sense into those color-blind newbies</div>
|
||||
|
||||
<div id="filing">Filed under Proposals</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>Your website probably has a color-scheme.
|
||||
<span style="color:#090; background:#FFF;">Green on white</span>,
|
||||
<span style="color:#A0F; background:#FF0;">purple on yellow</span>,
|
||||
whatever. When you give users the ability to style their content, you may
|
||||
want them to keep in line with your styling. If you're website is all
|
||||
about light colors, you don't want a user to come in and vandalize your
|
||||
page with a deep maroon.</p>
|
||||
|
||||
<p>This is an extremely silly feature proposal, but I'm writing it down anyway.</p>
|
||||
|
||||
<p>What if the user could constrain the colors specified in inline styles? You
|
||||
are only allowed to use these shades of dark green for text and these shades
|
||||
of light yellow for the background. At the very least, you could ensure
|
||||
that we did not have pale yellow on white text.</p>
|
||||
|
||||
<h2>Implementation issues</h2>
|
||||
|
||||
<ol>
|
||||
<li>Requires the color attribute definition to know, currently, what the text
|
||||
and background colors are. This becomes difficult when classes are thrown
|
||||
into the mix.</li>
|
||||
<li>The user still has to define the permissible colors, how does one do
|
||||
something like that?</li>
|
||||
</ol>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
|
||||
</body>
|
||||
</html>
|
21
docs/proposal-config.txt
Normal file
21
docs/proposal-config.txt
Normal file
@@ -0,0 +1,21 @@
|
||||
|
||||
Configuration
|
||||
|
||||
Configuration is documented on a per-use case: if a class uses a certain
|
||||
value from the configuration object, it has to define its name and what the
|
||||
value is used for. This means decentralized configuration declarations that
|
||||
are nevertheless error checking and a centralized configuration object.
|
||||
|
||||
Directives are divided into namespaces, indicating the major portion of
|
||||
functionality they cover (although there may be overlaps). Please consult
|
||||
the documentation in ConfigDef for more information on these namespaces.
|
||||
|
||||
Since configuration is dependant on context, internal classes require a
|
||||
configuration object to be passed as a parameter. (They also require a
|
||||
Context object). A majority of classes do not need the config object,
|
||||
but for those who do, it is a lifesaver.
|
||||
|
||||
Definition objects are complex datatypes influenced by their respective
|
||||
directive namespaces (HTMLDefinition with HTML and CSSDefinition with CSS).
|
||||
If any of these directives is updated, HTML Purifier forces the definition
|
||||
to be regenerated.
|
135
docs/proposal-filter-levels.txt
Normal file
135
docs/proposal-filter-levels.txt
Normal file
@@ -0,0 +1,135 @@
|
||||
|
||||
Filter Levels
|
||||
When one size *does not* fit all
|
||||
|
||||
It makes little sense to constrain users to one set of HTML elements and
|
||||
attributes and tell them that they are not allowed to mold this in
|
||||
any fashion. Many users demand to be able to custom-select which elements
|
||||
and attributes they want. This is fine: because HTML Purifier keeps close
|
||||
track of what elements are safe to use, there is no way for them to
|
||||
accidently allow an XSS-able tag.
|
||||
|
||||
However, combing through the HTML spec to make your own whitelist can
|
||||
be a daunting task. HTML Purifier ought to offer pre-canned filter levels
|
||||
that amateur users can select based on what they think is their use-case.
|
||||
|
||||
Here are some fuzzy levels you could set:
|
||||
|
||||
1. Comments - Wordpress recommends a, abbr, acronym, b, blockquote, cite,
|
||||
code, em, i, strike, strong; however, you could get away with only a, em and
|
||||
p; also having blockquote and pre tags would be helpful.
|
||||
2. BBCode - Emulate the usual tagset for forums: b, i, img, a, blockquote,
|
||||
pre, div, span and h[2-6] (the last three are for specially formatted
|
||||
posts, div and span require associated classes or inline styling enabled
|
||||
to be useful)
|
||||
3. Pages - As permissive as possible without allowing XSS. No protection
|
||||
against bad design sense, unfortunantely. Suitable for wiki and page
|
||||
environments. (probably what we have now)
|
||||
4. Lint - Accept everything in the spec, a Tidy wannabe. (This probably won't
|
||||
get implemented as it would require routines for things like <object>
|
||||
and friends to be implemented, which is a lot of work for not a lot of
|
||||
benefit)
|
||||
|
||||
One final note: when you start axing tags that are more commonly used, you
|
||||
run the risk of accidentally destroying user data, especially if the data
|
||||
is incoming from a WYSIWYG eidtor that hasn't been synced accordingly. This may
|
||||
make forbidden element to text transformations desirable (for example, images).
|
||||
|
||||
|
||||
|
||||
== Element Risk Analysis ==
|
||||
|
||||
Although none of the currently supported elements presents a security
|
||||
threat per-say, some can cause problems for page layouts or be
|
||||
extremely complicated.
|
||||
|
||||
Legend:
|
||||
[danger level] - regular tags / uncommon tags ~ deprecated tags
|
||||
[danger level]* - rare tags
|
||||
|
||||
1 - blockquote, code, em, i, p, tt / strong, sub, sup
|
||||
1* - abbr, acronym, bdo, cite, dfn, kbd, q, samp
|
||||
2 - b, br, del, div, pre, span / ins, s, strike ~ u
|
||||
3 - h2, h3, h4, h5, h6 ~ center
|
||||
4 - h1, big ~ font
|
||||
5 - a
|
||||
7 - area, map
|
||||
|
||||
These are special use tags, they should be enabled on a blanket basis.
|
||||
|
||||
Lists - dd, dl, dt, li, ol, ul ~ menu, dir
|
||||
Tables - caption, table, td, th, tr / col, colgroup, tbody, tfoot, thead
|
||||
|
||||
Forms - fieldset, form, input, lable, legend, optgroup, option, select, textarea
|
||||
XSS - noscript, object, script ~ applet
|
||||
Meta - base, basefont, body, head, html, link, meta, style, title
|
||||
Frames - frame, frameset, iframe
|
||||
|
||||
And tag specific notes:
|
||||
|
||||
a - general problems involving linkspam
|
||||
b - too much bold is bad, typographically speaking bold is discouraged
|
||||
br - often misused
|
||||
center - CSS, usually no legit use
|
||||
del - only useful in editing context
|
||||
div - little meaning in certain contexts i.e. blog comment
|
||||
h1 - usually no legit use, as header is already set by application
|
||||
h* - not needed in blog comments
|
||||
hr - usually not necessary in blog comments
|
||||
img - could be extremely undesirable if linking to external pics (CSRF, goatse)
|
||||
pre - could use formatting, only useful in code contexts
|
||||
q - very little support
|
||||
s - transform into span with styling or del?
|
||||
small - technically presentational
|
||||
span - depends on attribute allowances
|
||||
sub, sup - specialized
|
||||
u - little legit use, prefer class with text-decoration
|
||||
|
||||
Based on the riskiness of the items, we may want to offer %HTML.DisableImages
|
||||
attribute and put URI filtering higher up on the priority list.
|
||||
|
||||
|
||||
== Attribute Risk Analysis ==
|
||||
|
||||
We actually have a suprisingly small assortment of allowed attributes (the
|
||||
rest are deprecated in strict, and thus we opted not to allow them, even
|
||||
though our output is XHTML Transitional by default.)
|
||||
|
||||
Required URI - img.alt, img.src, a.href
|
||||
Medium risk - *.class, *.dir
|
||||
High risk - img.height, img.width, *.id, *.style
|
||||
|
||||
Table - colgroup/col.span, td/th.rowspan, td/th.colspan
|
||||
Uncommon - *.title, *.lang, *.xml:lang
|
||||
Rare - td/th.abbr, table.summary, {table}.charoff
|
||||
Rare URI - del.cite, ins.cite, blockquote.cite, q.cite, img.longdesc
|
||||
Presentational - {table}.align, {table}.valign, table.frame, table.rules,
|
||||
table.border
|
||||
Partially presentational - table.cellpadding, table.cellspacing,
|
||||
table.width, col.width, colgroup.width
|
||||
|
||||
|
||||
== CSS Risk Analysis ==
|
||||
|
||||
Currently, there is no support for fine-grained "allowed CSS" specification,
|
||||
mainly because I'm lazy, partially because no one has asked for it. However,
|
||||
this will be added eventually.
|
||||
|
||||
There are certain CSS elements that are extremely useful inline, but then
|
||||
as you get to more presentation oriented styling it may not always be
|
||||
appropriate to inline them.
|
||||
|
||||
Useful - clear, float, border-collapse, caption-side
|
||||
|
||||
These CSS properties can break layouts if used improperly. We have excluded
|
||||
any CSS properties that are not currently implemented (such as position).
|
||||
|
||||
Dangerous, can go outside container - float
|
||||
Easy to abuse - font-size, font-family (font), width
|
||||
Colored - background-color (background), border-color (border), color
|
||||
(see proposal-colors.html)
|
||||
Dramatic - border, list-style-position (list-style), margin, padding,
|
||||
text-align, text-indent, text-transform, vertical-align, line-height
|
||||
|
||||
Dramatic elements substantially change the look of text in ways that should
|
||||
probably have been reserved to other areas.
|
62
docs/proposal-language.txt
Normal file
62
docs/proposal-language.txt
Normal file
@@ -0,0 +1,62 @@
|
||||
We are going to model our I18N/L10N off of MediaWiki's system. Their's is
|
||||
obviously quite complicated, so we're going to simplify it a bit for our needs.
|
||||
|
||||
== Caching ==
|
||||
|
||||
MediaWiki has lots of caching mechanisms built in, which make the code somewhat
|
||||
more difficult to understand. Before doing any loading, MediaWiki will check
|
||||
the following places to see if we can be lazy:
|
||||
|
||||
1. $mLocalisationCache[$code] - just a variable where it may have been stashed
|
||||
2. serialized/$code.ser - compiled serialized language file
|
||||
3. Memcached version of file (with expiration checking)
|
||||
|
||||
Expiration checking consists of by ensuring all dependencies have filemtime
|
||||
that match the ones bundled with the cached copy. Similar checking could be
|
||||
implemented for serialized versions, as it seems that they are not updated
|
||||
until manually recompiled.
|
||||
|
||||
== Behavior ==
|
||||
|
||||
Things that are localizable:
|
||||
|
||||
- Weekdays (and abbrev)
|
||||
- Months (and abbrev)
|
||||
- Bookstores
|
||||
- Skin names
|
||||
- Date preferences / Custom date format
|
||||
- Default date format
|
||||
- Default user option overrides
|
||||
-+ Language names
|
||||
- Timezones
|
||||
-+ Character encoding conversion via iconv
|
||||
- UpperLowerCase first (needs casemaps for some)
|
||||
- UpperLowerCase
|
||||
- Uppercase words
|
||||
- Uppercase word breaks
|
||||
- Case folding
|
||||
- Strip punctuation for MySQL search
|
||||
- Get first character
|
||||
-+ Alternate encoding
|
||||
-+ Recoding for edit (and then recode input)
|
||||
-+ RTL
|
||||
-+ Direction mark character depending on RTL
|
||||
-? Arrow depending on RTL
|
||||
- Languages where italics cannot be used
|
||||
-+ Number formatting (commafy, transform digits, transform separators)
|
||||
- Truncate (multibyte)
|
||||
- Grammar conversions for inflected languages
|
||||
- Plural transformations
|
||||
- Formatting expiry times
|
||||
- Segmenting for diffs (Chinese)
|
||||
- Convert to variants of language
|
||||
- Language specific user preference options
|
||||
- Link trails [[foo]]bar
|
||||
-+ Language code (RFC 3066)
|
||||
|
||||
Neat functionality:
|
||||
|
||||
- I18N sprintfDate
|
||||
- Roman numeral formatting
|
||||
|
||||
Items marked with a + likely need to be addressed by HTML Purifier
|
44
docs/proposal-new-directives.txt
Normal file
44
docs/proposal-new-directives.txt
Normal file
@@ -0,0 +1,44 @@
|
||||
|
||||
Configuration Ideas
|
||||
|
||||
Here are some theoretical configuration ideas that we could implement some
|
||||
time. Note the naming convention: %Namespace.Directive
|
||||
|
||||
%Attr.RewriteFragments - if there's %Attr.IDPrefix we may want to transparently
|
||||
rewrite the URLs we parse too. However, we can only do it when it's a pure
|
||||
anchor link, so it's not foolproof
|
||||
|
||||
%Attr.ClassBlacklist,
|
||||
%Attr.ClassWhitelist,
|
||||
%Attr.ClassPolicy - determines what classes are allowed. When
|
||||
%Attr.ClassPolicy is set to Blacklist, only allow those not in
|
||||
%Attr.ClassBlacklist. When it's Whitelist, only allow those in
|
||||
%Attr.ClassWhitelist.
|
||||
|
||||
%Attr.MaxWidth,
|
||||
%Attr.MaxHeight - caps for width and height related checks.
|
||||
(the hack in Pixels for an image crashing attack could be replaced by this)
|
||||
|
||||
%URI.AddRelNofollow - will add rel="nofollow" to all links, preventing the
|
||||
spread of ill-gotten pagerank
|
||||
|
||||
%URI.RelativeToAbsolute - transforms all relative URIs to absolute form
|
||||
|
||||
%URI.HostBlacklistRegex - regexes that if matching the host are disallowed
|
||||
%URI.HostWhitelist - domain names that are excluded from the host blacklist
|
||||
%URI.HostPolicy - determines whether or not its reject all and then whitelist
|
||||
or allow all in then do specific blacklists with whitelist intervening.
|
||||
'DenyAll' or 'AllowAll' (default)
|
||||
|
||||
%URI.DisableIPHosts - URIs that have IP addresses for hosts are disallowed.
|
||||
Be sure to also grab unusual encodings (dword, hex and octal), which may
|
||||
be currently be caught by regular DNS
|
||||
%URI.DisableIDN - Disallow raw internationalized domain names. Punycode
|
||||
will still be permitted.
|
||||
|
||||
%URI.ConvertUnusualIPHosts - transform dword/hex/octal IP addresses to the
|
||||
regular form
|
||||
%URI.ConvertAbsoluteDNS - Remove extra dots after host names that trigger
|
||||
absolute DNS. While this is actually the preferred method according to
|
||||
the RFC, most people opt to use a relative domain name relative to . (root).
|
||||
|
48
docs/ref-content-models.txt
Normal file
48
docs/ref-content-models.txt
Normal file
@@ -0,0 +1,48 @@
|
||||
|
||||
Handling Content Model Changes
|
||||
|
||||
|
||||
1. Context
|
||||
|
||||
The distinction between Transitional and Strict document types is somewhat
|
||||
of an anomaly in the lineage of XHTML document types (following 1.0, no
|
||||
doctypes do not have flavors: instead, modularization is used to let
|
||||
document authors vary their elements). This transition is usually quite
|
||||
straight-forward, as W3C usually deprecates attributes or elements, which
|
||||
are quite easily handled using tag and attribute transforms.
|
||||
|
||||
However, for two elements, <blockquote>, <body> and <address>, W3C elected
|
||||
to also change the content model. <blockquote> and <body> originally
|
||||
accepted both inline and block elements, but in the strict doctype they
|
||||
only allow block elements. With <address>, the situation is inverted:
|
||||
<p> tags were now forbidden from appearing within this tag.
|
||||
|
||||
|
||||
2. Current situation
|
||||
|
||||
Currently, HTML Purifier treats <blockquote> specially during Tidy mode
|
||||
using a custom ChildDef class StrictBlockquote. StrictBlockquote
|
||||
operates similarly to Required, except that when it encounters an inline
|
||||
element, it will wrap it in a block tag (as specified by
|
||||
%HTML.BlockWrapper, the default is <p>). The naming suggests it can
|
||||
only be used for <blockquote>s, although it may be possible to
|
||||
genericize it to work on other cases of this nature (this would be of
|
||||
little practical application, as no other element in XHTML 1.1 or earlier
|
||||
has a block-only content model).
|
||||
|
||||
Tidy currently contains no custom, lenient implementation for <address>.
|
||||
If one were to be written, it would likely operate on the principle that,
|
||||
when a <p> tag were to be encountered, it would be replaced with a
|
||||
leading and trailing <br /> tag (the contents of <p>, being inline, are
|
||||
not an issue). There is no prior work with this sort of operation.
|
||||
|
||||
|
||||
3. Outside applicability
|
||||
|
||||
There are a number of other elements that contain restrictive content
|
||||
models, such as <ul> or <span> (the latter is restrictive in that it
|
||||
does not allow block elements). In the former case, an errant node
|
||||
is eliminated completely, in the latter case, the text of the node
|
||||
would is preserved (as the parent node does allow PCDATA). Custom
|
||||
content model implementations probably are not the best way of handling
|
||||
these cases, instead, node bubbling should be implemented instead.
|
@@ -1,28 +1,45 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
|
||||
<title>DevNetwork Forums</title>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<p>Many thanks to the DevNetwork community for answering questions,
|
||||
theorizing about design, and offering encouragement during
|
||||
the development of this library in these forum threads:</p>
|
||||
|
||||
<ul>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=52905">HTMLPurifier PHP Library hompeage</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53056">How much of CSS to implement?</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53083">Parsing URL only according to URI : Security Risk?</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53096">Gimme a name : URI and friends</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53415">How to document configuration directives</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53479">IPv6</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53539">http and ftp versus news and mailto</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53579">HTMLPurifier - Take your best shot</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53664">Need help optimizing a block of code</a>
|
||||
</ul>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
<meta name="description" content="Credits and links to DevNetwork forum topics on HTML Purifier." />
|
||||
<link rel="stylesheet" type="text/css" href="./style.css" />
|
||||
|
||||
<title>DevNetwork Credits - HTML Purifier</title>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>DevNetwork Credits</h1>
|
||||
|
||||
<div id="filing">Filed under Reference</div>
|
||||
<div id="index">Return to the <a href="index.html">index</a>.</div>
|
||||
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
|
||||
|
||||
<p>Many thanks to the DevNetwork community for answering questions,
|
||||
theorizing about design, and offering encouragement during
|
||||
the development of this library in these forum threads:</p>
|
||||
|
||||
<ul>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=52905">HTMLPurifier PHP Library hompeage</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53056">How much of CSS to implement?</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53083">Parsing URL only according to URI : Security Risk?</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53096">Gimme a name : URI and friends</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53415">How to document configuration directives</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53479">IPv6</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53539">http and ftp versus news and mailto</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53579">HTMLPurifier - Take your best shot</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53664">Need help optimizing a block of code</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=53861">Non-SGML characters</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=54283">Wordpress makes me cry</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=54478">Parameter Object vs. Parameter Array vs. Parameter Functions</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=54521">Convert encoding where output cannot represent characters</a></li>
|
||||
<li><a href="http://forums.devnetwork.net/viewtopic.php?t=56411">Reporting errors in a document without line numbers</a></li>
|
||||
</ul>
|
||||
|
||||
<p>...as well as any I may have forgotten.</p>
|
||||
|
||||
<div id="version">$Id$</div>
|
||||
</body>
|
||||
</html>
|
164
docs/ref-html-modularization.txt
Normal file
164
docs/ref-html-modularization.txt
Normal file
@@ -0,0 +1,164 @@
|
||||
|
||||
The Modularization of HTMLDefinition in HTML Purifier
|
||||
|
||||
Todo for XHTML 1.1 support <http://www.w3.org/TR/xhtml11/changes.html>
|
||||
1. Support Ruby <http://www.w3.org/TR/2001/REC-ruby-20010531/>
|
||||
|
||||
HTML Purifier uses the modularization of XHTML
|
||||
<http://www.w3.org/TR/xhtml-modularization/> to organize the internals
|
||||
of HTMLDefinition into a more manageable and extensible fashion. Rather
|
||||
than have one super-object, HTMLDefinition is split into HTMLModules,
|
||||
each of which are responsible for defining elements, their attributes,
|
||||
and other properties (for a more indepth coverage, see
|
||||
/library/HTMLPurifier/HTMLModule.php's docblock comments). These modules
|
||||
are managed by HTMLModuleManager.
|
||||
|
||||
Modules that we don't support but could support are:
|
||||
|
||||
* 5.6. Table Modules
|
||||
o 5.6.1. Basic Tables Module [?]
|
||||
* 5.8. Client-side Image Map Module [?]
|
||||
* 5.9. Server-side Image Map Module [?]
|
||||
* 5.12. Target Module [?]
|
||||
* 5.21. Name Identification Module [deprecated]
|
||||
|
||||
These modules would be implemented as "unsafe":
|
||||
|
||||
* 5.2. Core Modules
|
||||
o 5.2.1. Structure Module
|
||||
* 5.3. Applet Module
|
||||
* 5.5. Forms Modules
|
||||
o 5.5.1. Basic Forms Module
|
||||
o 5.5.2. Forms Module
|
||||
* 5.10. Object Module
|
||||
* 5.11. Frames Module
|
||||
* 5.13. Iframe Module
|
||||
* 5.14. Intrinsic Events Module
|
||||
* 5.15. Metainformation Module
|
||||
* 5.16. Scripting Module
|
||||
* 5.17. Style Sheet Module
|
||||
* 5.19. Link Module
|
||||
* 5.20. Base Module
|
||||
|
||||
We will not be using W3C's XML Schemas or DTDs directly due to the lack
|
||||
of robust tools for handling them (the main problem is that all the
|
||||
current parsers are usually PHP 5 only and solely-validating, not
|
||||
correcting).
|
||||
|
||||
This system may be generalized and ported over for CSS.
|
||||
|
||||
== General Use-Case ==
|
||||
|
||||
The outwards API of HTMLDefinition has been largely preserved, not
|
||||
only for backwards-compatibility but also by design. Instead,
|
||||
HTMLDefinition can be retrieved "raw", in which it loads a structure
|
||||
that closely resembles the modules of XHTML 1.1. This structure is very
|
||||
dynamic, making it easy to make cascading changes to global content
|
||||
sets or remove elements in bulk.
|
||||
|
||||
However, once HTML Purifier needs the actual definition, it retrieves
|
||||
a finalized version of HTMLDefinition. The finalized definition involves
|
||||
processing the modules into a form that it is optimized for multiple
|
||||
calls. This final version is immutable and, even if editable, would
|
||||
be extremely hard to change.
|
||||
|
||||
So, some code taking advantage of the XHTML modularization may look
|
||||
like this:
|
||||
|
||||
<?php
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$def =& $config->getHTMLDefinition(true); // reference to raw
|
||||
$def->addElement('marquee', 'Block', 'Flow', 'Common');
|
||||
$purifier = new HTMLPurifier($config);
|
||||
$purifier->purify($html); // now the definition is finalized
|
||||
?>
|
||||
|
||||
== Inclusions ==
|
||||
|
||||
One of the nice features of HTMLDefinition is that piggy-backing off
|
||||
of global attribute and content sets is extremely easy to do.
|
||||
|
||||
=== Attributes ===
|
||||
|
||||
HTMLModule->elements[$element]->attr stores attribute information for the
|
||||
specific attributes of $element. This is quite close to the final
|
||||
API that HTML Purifier interfaces with, but there's an important
|
||||
extra feature: attr may also contain a array with a member index zero.
|
||||
|
||||
<?php
|
||||
HTMLModule->elements[$element]->attr[0] = array('AttrSet');
|
||||
?>
|
||||
|
||||
Rather than map the attribute key 0 to an array (which should be
|
||||
an AttrDef), it defines a number of attribute collections that should
|
||||
be merged into this elements attribute array.
|
||||
|
||||
Furthermore, the value of an attribute key, attribute value pair need
|
||||
not be a fully fledged AttrDef object. They can also be a string, which
|
||||
signifies a AttrDef that is looked up from a centralized registry
|
||||
AttrTypes. This allows more concise attribute definitions that look
|
||||
more like W3C's declarations, as well as offering a centralized point
|
||||
for modifying the behavior of one attribute type. And, of course, the
|
||||
old method of manually instantiating an AttrDef still works.
|
||||
|
||||
=== Attribute Collections ===
|
||||
|
||||
Attribute collections are stored and processed in the AttrCollections
|
||||
object, which is responsible for performing the inclusions signified
|
||||
by the 0 index. These attribute collections, too, are mutable, by
|
||||
using HTMLModule->attr_collections. You may add new attributes
|
||||
to a collection or define an entirely new collection for your module's
|
||||
use. Inclusions can also be cumulative.
|
||||
|
||||
Attribute collections allow us to get rid of so called "global attributes"
|
||||
(which actually aren't so global).
|
||||
|
||||
=== Content Models and ChildDef ===
|
||||
|
||||
An implementation of the above-mentioned attributes and attribute
|
||||
collections was applied to the ChildDef system. HTML Purifier uses
|
||||
a proprietary system called ChildDef for performance and flexibility
|
||||
reasons, but this does not line up very well with W3C's notion of
|
||||
regexps for defining the allowed children of an element.
|
||||
|
||||
HTMLPurifier->elements[$element]->content_model and
|
||||
HTMLPurifier->elements[$element]->content_model_type store information
|
||||
about the final ChildDef that will be stored in
|
||||
HTMLPurifier->elements[$element]->child (we use a different variable
|
||||
because the two forms are sufficiently different).
|
||||
|
||||
$content_model is an abstract, string representation of the internal
|
||||
state of ChildDef, while $content_model_type is a string identifier
|
||||
of which ChildDef subclass to instantiate. $content_model is processed
|
||||
by substituting all content set identifiers (capitalized element names)
|
||||
with their contents. It is then parsed and passed into the appropriate
|
||||
ChildDef class, as defined by the ContentSets->getChildDef() or the
|
||||
custom fallback HTMLModule->getChildDef() for custom child definitions
|
||||
not in the core.
|
||||
|
||||
You'll need to use these facilities if you plan on referencing a content
|
||||
set like "Inline" or "Block", and using them is recommended even if you're
|
||||
not due to their conciseness.
|
||||
|
||||
A few notes on $content_model: it's structure can be as complicated
|
||||
as you want, but the pipe symbol (|) is reserved for defining possible
|
||||
choices, due to the content sets implementation. For example, a content
|
||||
model that looks like:
|
||||
|
||||
"Inline -> Block -> a"
|
||||
|
||||
...when the Inline content set is defined as "span | b" and the Block
|
||||
content set is defined as "div | blockquote", will expand into:
|
||||
|
||||
"span | b -> div | blockquote -> a"
|
||||
|
||||
The custom HTMLModule->getChildDef() function will need to be able to
|
||||
then feed this information to ChildDef in a usable manner.
|
||||
|
||||
=== Content Sets ===
|
||||
|
||||
Content sets can be altered using HTMLModule->content_sets, an associative
|
||||
array of content set names to content set contents. If the content set
|
||||
already exists, your values are appended on to it (great for, say,
|
||||
registering the font tag as an inline element), otherwise it is
|
||||
created. They are substituted into content_model.
|
24
docs/ref-proprietary-tags.txt
Normal file
24
docs/ref-proprietary-tags.txt
Normal file
@@ -0,0 +1,24 @@
|
||||
|
||||
Proprietary Tags
|
||||
<nobr> and friends
|
||||
|
||||
Here are some proprietary tags that W3C does not define but occasionally show
|
||||
up in the wild. We have only included tags that would make sense in an
|
||||
HTML Purifier context.
|
||||
|
||||
<align>, block element that aligns (extremely rare)
|
||||
<blackface>, inline that double-bolds text (extremely rare)
|
||||
<comment>, hidden comment for IE and WebTV
|
||||
<multicol cols=number gutter=pixels width=pixels>, multiple columns
|
||||
<nobr>, no linebreaks
|
||||
<spacer align=* type="vertical|horizontal|block">, whitespace in doc,
|
||||
use width/height for block and size for vertical/horizontal (attributes)
|
||||
(extremely rare)
|
||||
<wbr>, potential word break point: allows linebreaks. Only works in <nobr>
|
||||
|
||||
<listing>, monospace pre-variant (extremely rare)
|
||||
<plaintext>, escapes all tags to the end of document
|
||||
<xmp>, monospace, replace with pre
|
||||
|
||||
These should be put into their own Tidy module, not loaded by default(?). These
|
||||
all qualify as "lenient" transforms.
|
24
docs/ref-whatwg.txt
Normal file
24
docs/ref-whatwg.txt
Normal file
@@ -0,0 +1,24 @@
|
||||
|
||||
Web Hypertext Application Technology Working Group
|
||||
WHATWG
|
||||
|
||||
== HTML 5 ==
|
||||
|
||||
URL: http://www.whatwg.org/specs/web-apps/current-work/
|
||||
|
||||
HTML 5 defines a kaboodle of new elements and attributes, as well as
|
||||
some well-defined, "quirks mode" HTML parsing. Although WHATWG professes
|
||||
to be targeted towards web applications, many of their semantic additions
|
||||
would be quite useful in regular documents. Eventually, HTML
|
||||
Purifier will need to audit their lists and figure out what changes need
|
||||
to be made. This process is complicated by the fact that the WHATWG
|
||||
doesn't buy into W3C's modularization of XHTML 1.1: we may need
|
||||
to remodularize HTML 5 (probably done by section name). No sense in
|
||||
committing ourselves till the spec stabilizes, though.
|
||||
|
||||
More immediately speaking though, however, is the well-defined parsing
|
||||
behavior that HTML 5 adds. While I have little interest in writing
|
||||
another DirectLex parser, other parsers like ph5p
|
||||
<http://jero.net/lab/ph5p/> can be adapted to DOMLex to support much more
|
||||
flexible HTML parsing (a cool feature I've seen is how they resolve
|
||||
<b>bold<i>both</b>italic</i>).
|
@@ -1,39 +0,0 @@
|
||||
|
||||
Security
|
||||
|
||||
Like anything that claims to afford security, HTML_Purifier can be circumvented
|
||||
through negligence of people. This class will do its job: no more, no less,
|
||||
and it's up to you to provide it the proper information and proper context
|
||||
to be effective. Things to remember:
|
||||
|
||||
1. UTF-8. Currently, the parser runs under the assumption that it is dealing
|
||||
with UTF-8. Not ISO-8859-1 or Windows-1252, UTF-8. And definitely not "no
|
||||
character encoding explicitly stated" or UTF-7. If you're not using UTF-8 as
|
||||
your character encoding, you should switch. Now. (in future versions, however,
|
||||
I may make the character encoding configurable, but there's only so much I
|
||||
can do). Make sure any input is properly converted to UTF-8, or the parser
|
||||
will mangle it badly (though it won't be a security risk if you're outputting
|
||||
it as UTF-8 though).
|
||||
|
||||
2. XHTML 1.0 Transitional. This is what the parser is outputting. For the most
|
||||
part, it's compatible with HTML 4.01, but XHTML enforces some very nice things
|
||||
that all web developers should use. Regardless, NO DOCTYPE is a NO. Quirks mode
|
||||
has waaaay too many quirks for a little parser to handle. We did not select
|
||||
strict in order to prevent ourselves from being too draconic on users, but
|
||||
this may be configurable in the future.
|
||||
|
||||
3. IDs. They need to be unique, but without some knowledge of the
|
||||
rest of the document, it's difficult to know what's unique. Without setting
|
||||
%Attr.IDBlacklist to the proper
|
||||
|
||||
4. [PROJECTED] Links. We're not going to try for spam protection (although
|
||||
some hooks for such a module might be nice) but we may offer the ability to
|
||||
only accept relative URLs. Pick the one that's right for you.
|
||||
|
||||
5. CSS. While we can prevent the most flagrant cases from affecting your
|
||||
layout (such as absolutely positioned elements), no amount of code is going
|
||||
to protect your pages from being attacked by garish colors and plain old
|
||||
bad taste. A neat feature would be the ability to define acceptable colors
|
||||
in a document, but that's not likely to be implemented for a while. In the
|
||||
meantime, be sure to make sure that floated elements (permitted, since they
|
||||
can be quite useful) cna't mess up your layout.
|
8
docs/specimens/LICENSE
Normal file
8
docs/specimens/LICENSE
Normal file
@@ -0,0 +1,8 @@
|
||||
Licensing of Specimens
|
||||
|
||||
Some files in this directory have different licenses:
|
||||
|
||||
windows-live-mail-desktop-beta.html - donated by laacz, public domain
|
||||
img.png - LGPL, from <http://commons.wikimedia.org/wiki/Image:Pastille_chrome.png>
|
||||
|
||||
All other files are by me, and are licensed under LGPL.
|
165
docs/specimens/html-align-to-css.html
Normal file
165
docs/specimens/html-align-to-css.html
Normal file
@@ -0,0 +1,165 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
|
||||
"http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>HTML align attribute to CSS - HTML Purifier Specimen</title>
|
||||
<style type="text/css">
|
||||
div.container {position:relative;height:110px;}
|
||||
div.container.legend .test {text-align:center;line-height:100px;}
|
||||
div.test {width:100px;height:100px;border:1px solid black;
|
||||
position:absolute;top:10px;}
|
||||
div.test.html {left:10px;}
|
||||
div.test.css {left:140px;}
|
||||
table {background:#F00;}
|
||||
img {border:1px solid #000;}
|
||||
hr {width:50px;}
|
||||
div.segment {width:250px; float:left; margin-top:1em;}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h1>HTML align attribute to CSS</h1>
|
||||
|
||||
<p>Inspect source for methodology.</p>
|
||||
|
||||
<div class="container legend">
|
||||
<div class="test html">
|
||||
HTML
|
||||
</div>
|
||||
<div class="test css">
|
||||
CSS
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="segment">
|
||||
|
||||
<h2>table.align</h2>
|
||||
|
||||
<h3>left</h3>
|
||||
<div class="container">
|
||||
<div class="test html">
|
||||
a<table align="left"><tr><td>O</td></tr></table>a
|
||||
</div>
|
||||
<div class="test css">
|
||||
a<table style="float:left;"><tr><td>O</td></tr></table>a
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h3>center</h3>
|
||||
<div class="container">
|
||||
<div class="test html">
|
||||
a<table align="center"><tr><td>O</td></tr></table>a
|
||||
</div>
|
||||
<div class="test css">
|
||||
a<table style="margin-left:auto; margin-right:auto;"><tr><td>O</td></tr></table>a
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h3>right</h3>
|
||||
<div class="container">
|
||||
<div class="test html">
|
||||
a<table align="right"><tr><td>O</td></tr></table>a
|
||||
</div>
|
||||
<div class="test css">
|
||||
a<table style="float:right;"><tr><td>O</td></tr></table>a
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<!-- ################################################################## -->
|
||||
|
||||
<div class="segment">
|
||||
<h2>img.align</h2>
|
||||
<h3>left</h3>
|
||||
<div class="container">
|
||||
<div class="test html">
|
||||
a<img src="img.png" align="left">a
|
||||
</div>
|
||||
<div class="test css">
|
||||
a<img src="img.png" style="float:left;">a
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h3>right</h3>
|
||||
<div class="container">
|
||||
<div class="test html">
|
||||
a<img src="img.png" align="right">a
|
||||
</div>
|
||||
<div class="test css">
|
||||
a<img src="img.png" style="float:right;">a
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h3>bottom</h3>
|
||||
<div class="container">
|
||||
<div class="test html">
|
||||
a<img src="img.png" align="bottom">a
|
||||
</div>
|
||||
<div class="test css">
|
||||
a<img src="img.png" style="vertical-align:baseline;">a
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h3>middle</h3>
|
||||
<div class="container">
|
||||
<div class="test html">
|
||||
a<img src="img.png" align="middle">a
|
||||
</div>
|
||||
<div class="test css">
|
||||
a<img src="img.png" style="vertical-align:middle;">a
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h3>top</h3>
|
||||
<div class="container">
|
||||
<div class="test html">
|
||||
a<img src="img.png" align="top">a
|
||||
</div>
|
||||
<div class="test css">
|
||||
a<img src="img.png" style="vertical-align:top;">a
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<!-- ################################################################## -->
|
||||
|
||||
<div class="segment">
|
||||
|
||||
<h2>hr.align</h2>
|
||||
|
||||
<h3>left</h3>
|
||||
<div class="container">
|
||||
<div class="test html">
|
||||
<hr align="left" />
|
||||
</div>
|
||||
<div class="test css">
|
||||
<hr style="margin-right:auto; margin-left:0; text-align:left;" />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h3>center</h3>
|
||||
<div class="container">
|
||||
<div class="test html">
|
||||
<hr align="center" />
|
||||
</div>
|
||||
<div class="test css">
|
||||
<hr style="margin-right:auto; margin-left:auto; text-align:center;" />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<h3>right</h3>
|
||||
<div class="container">
|
||||
<div class="test html">
|
||||
<hr align="right" />
|
||||
</div>
|
||||
<div class="test css">
|
||||
<hr style="margin-right:0; margin-left:auto; text-align:right;" />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
BIN
docs/specimens/img.png
Normal file
BIN
docs/specimens/img.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.1 KiB |
74
docs/specimens/windows-live-mail-desktop-beta.html
Normal file
74
docs/specimens/windows-live-mail-desktop-beta.html
Normal file
@@ -0,0 +1,74 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
<HTML ChildAreas="4" xmlns:canvas><HEAD>
|
||||
<META http-equiv=Content-Type content=text/html;charset=windows-1257>
|
||||
<STYLE></STYLE>
|
||||
|
||||
<META content="MSHTML 6.00.6000.16414" name=GENERATOR></HEAD>
|
||||
<BODY id=MailContainerBody
|
||||
style="PADDING-RIGHT: 10px; PADDING-LEFT: 10px; FONT-SIZE: 10pt; COLOR: #000000; PADDING-TOP: 15px; FONT-FAMILY: Arial"
|
||||
bgColor=#ff6600 leftMargin=0 background="" topMargin=0
|
||||
name="Compose message area" acc_role="text" CanvasTabStop="false">
|
||||
<DIV
|
||||
style="BORDER-TOP: #dddddd 1px solid; FONT-SIZE: 10pt; WIDTH: 100%; MARGIN-RIGHT: 10px; PADDING-TOP: 5px; BORDER-BOTTOM: #dddddd 1px solid; FONT-FAMILY: Verdana; HEIGHT: 25px; BACKGROUND-COLOR: #ffffff"><NOBR><SPAN
|
||||
title="View a slideshow of the pictures in this e-mail message."
|
||||
style="PADDING-RIGHT: 20px"><A style="COLOR: #0088e4"
|
||||
href="http://g.msn.com/5meen_us/171?path=/photomail/{6fc0065f-ffdd-4ca6-9a4c-cc5a93dc122f}&image=47D7B182CFEFB10!127&imagehi=47D7B182CFEFB10!125&CID=323550092004883216">Play
|
||||
slideshow </A></SPAN><SPAN style="COLOR: #909090"><SPAN>|</SPAN><SPAN
|
||||
style="PADDING-LEFT: 20px"> Download the highest quality version of a picture by
|
||||
clicking the + above it </SPAN></SPAN></NOBR></DIV>
|
||||
<DIV
|
||||
style="PADDING-RIGHT: 5px; PADDING-LEFT: 7px; PADDING-BOTTOM: 2px; WIDTH: 100%; PADDING-TOP: 2px">
|
||||
<OL>
|
||||
<LI><IMG title="Angry smile emoticon"
|
||||
style="FLOAT: none; MARGIN: 0px; POSITION: static" tabIndex=-1
|
||||
alt="Angry smile emoticon" src="cid:49F0C856199E4D688D2D740680733D74@wc"
|
||||
MSNNonUserImageOrEmoticon="true">Un ka <FONT style="BACKGROUND-COLOR: #800000"
|
||||
color=#cc99ff><STRONG>Tev</STRONG></FONT> iet, un ko tu dari?
|
||||
<LI>Aha!</LI></OL>
|
||||
|
||||
<UL>
|
||||
<LI>Buletets
|
||||
<LI>
|
||||
<DIV align=justify><A title=http://laacz.lv/blog/
|
||||
href="http://laacz.lv/blog/">http://laacz.lv/blog/</A> un <A
|
||||
title=http://google.com/ href="http://google.com/">gugle</A></DIV>
|
||||
<LI>Sarakstucitis</LI></UL></DIV><SPAN><SPAN xmlns:canvas="canvas-namespace-id"
|
||||
layoutEmptyTextWellFont="Tahoma"><SPAN
|
||||
style="MARGIN-BOTTOM: 15px; OVERFLOW: visible; HEIGHT: 16px"></SPAN><SPAN
|
||||
style="MARGIN-BOTTOM: 25px; VERTICAL-ALIGN: top; OVERFLOW: visible; MARGIN-RIGHT: 25px; HEIGHT: 234px">
|
||||
<TABLE style="DISPLAY: inline">
|
||||
<TBODY>
|
||||
<TR>
|
||||
|
||||
<TD>
|
||||
<DIV
|
||||
style="FONT-WEIGHT: bold; FONT-SIZE: 12pt; FONT-FAMILY: arial; TEXT-ALIGN: center"><A
|
||||
id=HiresARef
|
||||
title="Click here to view or download a high resolution version of this picture"
|
||||
style="COLOR: #0088e4; TEXT-DECORATION: none"
|
||||
href="http://byfiles.storage.msn.com/x1pMvt0I80jTgT6DuaCpEMbprX3nk3jNv_vjigxV_EYVSMyM_PKgEvDEUtuNhQC-F-23mTTcKyqx6eGaeK2e_wMJ0ikwpDdFntk4SY7pfJUv2g2Ck6R2S2vAA?download">+</A></DIV>
|
||||
<DIV
|
||||
title="Click here to view the full image using the online photo viewer."
|
||||
style="DISPLAY: inline; OVERFLOW: hidden; WIDTH: 140px; HEIGHT: 140px"><A
|
||||
href="http://g.msn.com/5meen_us/171?path=/photomail/{6fc0065f-ffdd-4ca6-9a4c-cc5a93dc122f}&image=47D7B182CFEFB10!127&imagehi=47D7B182CFEFB10!125&CID=323550092004883216"
|
||||
border="0"><IMG
|
||||
style="MARGIN-TOP: 15px; DISPLAY: inline-block; MARGIN-LEFT: 0px"
|
||||
height=109 src="cid:006A71303B80404E9FB6184E55D6A446@wc" width=140
|
||||
border=0></A></DIV></TD></TR>
|
||||
<TR>
|
||||
<TD>
|
||||
<DIV
|
||||
style="FONT-SIZE: 10pt; WIDTH: 140px; FONT-FAMILY: verdana; TEXT-ALIGN: center"><EM><STRONG>This
|
||||
<U>is </U></STRONG><U>tit</U>le</EM> fo<STRONG>r <FONT
|
||||
face="Arial Black">t<FONT color=#800000 size=7>h<U>i</U></FONT>s
|
||||
</FONT>picture</STRONG></DIV></TD></TR></TBODY></TABLE></SPAN></SPAN></SPAN>
|
||||
|
||||
<DIV
|
||||
style="PADDING-RIGHT: 5px; PADDING-LEFT: 7px; PADDING-BOTTOM: 2px; WIDTH: 100%; PADDING-TOP: 2px; HEIGHT: 50px">
|
||||
<DIV> </DIV></DIV>
|
||||
<DIV
|
||||
style="BORDER-TOP: #dddddd 1px solid; FONT-SIZE: 10pt; MARGIN-BOTTOM: 10px; WIDTH: 100%; COLOR: #909090; MARGIN-RIGHT: 10px; PADDING-TOP: 9px; FONT-FAMILY: Verdana; HEIGHT: 42px; BACKGROUND-COLOR: #ffffff"><NOBR><SPAN
|
||||
title="Join Windows Live to share photos using Windows Live Photo E-mail.">Online
|
||||
pictures are available for 30 days. <A style="COLOR: #0088e4"
|
||||
href="http://g.msn.com/5meen_us/175">Get Windows Live Mail desktop to create
|
||||
your own photo e-mails. </A></SPAN></NOBR></DIV></BODY></HTML>
|
71
docs/style.css
Normal file
71
docs/style.css
Normal file
@@ -0,0 +1,71 @@
|
||||
html {font-size:1em; font-family:serif; }
|
||||
body {margin-left:4em; margin-right:4em; }
|
||||
|
||||
dt {font-weight:bold; }
|
||||
pre {margin-left:2em; }
|
||||
pre, code, tt {font-family:monospace; font-size:1em; }
|
||||
|
||||
h1 {text-align:center; font-family:Garamond, serif;
|
||||
font-variant:small-caps;}
|
||||
h2 {border-bottom:1px solid #CCC; font-family:sans-serif; font-weight:normal;
|
||||
font-size:1.3em;}
|
||||
h3 {font-family:sans-serif; font-size:1.1em; font-weight:bold; }
|
||||
h4 {font-family:sans-serif; font-size:0.9em; font-weight:bold; }
|
||||
|
||||
/* For witty quips */
|
||||
.subtitled {margin-bottom:0em;}
|
||||
.subtitle , .subsubtitle {font-size:.8em; margin-bottom:1em;
|
||||
font-style:italic; margin-top:-.2em;text-align:center;}
|
||||
.subsubtitle {text-align:left;margin-left:2em;}
|
||||
|
||||
/* Used for special "See also" links. */
|
||||
.reference {font-style:italic;margin-left:2em;}
|
||||
|
||||
/* Marks off asides, discussions on why something is the way it is */
|
||||
.aside {margin-left:2em; font-family:sans-serif; font-size:0.9em; }
|
||||
blockquote .label {font-weight:bold; font-size:1em; margin:0 0 .1em;
|
||||
border-bottom:1px solid #CCC;}
|
||||
.emphasis {font-weight:bold; text-align:center; font-size:1.3em;}
|
||||
|
||||
/* A regular table */
|
||||
.table {border-collapse:collapse; border-bottom:2px solid #888; margin-left:2em; }
|
||||
.table thead th {margin:0; background:#888; color:#FFF; }
|
||||
.table thead th:first-child {-moz-border-radius-topleft:1em;}
|
||||
.table tbody td {border-bottom:1px solid #CCC; padding-right:0.6em;padding-left:0.6em;}
|
||||
|
||||
/* Category of the file */
|
||||
#filing {font-weight:bold; font-size:smaller; }
|
||||
|
||||
/* Contains, without exception, Return to index. */
|
||||
#index {font-size:smaller; }
|
||||
|
||||
#home {font-size:smaller;}
|
||||
|
||||
/* Contains, without exception, $Id$, for SVN version info. */
|
||||
#version {text-align:right; font-style:italic; margin:2em 0;}
|
||||
|
||||
#toc ol ol {list-style-type:lower-roman;}
|
||||
#toc ol {list-style-type:decimal;}
|
||||
#toc {list-style-type:upper-alpha;}
|
||||
|
||||
q {
|
||||
behavior: url(fixquotes.htc); /* IE fix */
|
||||
quotes: '\201C' '\201D' '\2018' '\2019';
|
||||
}
|
||||
q:before {
|
||||
content: open-quote;
|
||||
}
|
||||
q:after {
|
||||
content: close-quote;
|
||||
}
|
||||
|
||||
/* Marks off implementation details interesting only to the person writing
|
||||
the class described in the spec. */
|
||||
.technical {margin-left:2em; }
|
||||
.technical:before {content:"Technical note: "; font-weight:bold; color:#061; }
|
||||
|
||||
/* Marks off sections that are lacking. */
|
||||
.fixme {margin-left:2em; }
|
||||
.fixme:before {content:"Fix me: "; font-weight:bold; color:#C00; }
|
||||
|
||||
#applicability {margin: 1em 5%; font-style:italic;}
|
9
library/HTMLPurifier.auto.php
Normal file
9
library/HTMLPurifier.auto.php
Normal file
@@ -0,0 +1,9 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* This is a stub include that automatically configures the include path.
|
||||
*/
|
||||
|
||||
set_include_path(dirname(__FILE__) . PATH_SEPARATOR . get_include_path() );
|
||||
require_once 'HTMLPurifier.php';
|
||||
|
20
library/HTMLPurifier.func.php
Normal file
20
library/HTMLPurifier.func.php
Normal file
@@ -0,0 +1,20 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Function wrapper for HTML Purifier for quick use.
|
||||
* @note This function only includes the library when it is called. While
|
||||
* this is efficient for instances when you only use HTML Purifier
|
||||
* on a few of your pages, it murders bytecode caching. You still
|
||||
* need to add HTML Purifier to your path.
|
||||
* @note ''HTMLPurifier()'' is NOT the same as ''new HTMLPurifier()''
|
||||
*/
|
||||
|
||||
function HTMLPurifier($html, $config = null) {
|
||||
static $purifier = false;
|
||||
if (!$purifier) {
|
||||
require_once 'HTMLPurifier.php';
|
||||
$purifier = new HTMLPurifier();
|
||||
}
|
||||
return $purifier->purify($html, $config);
|
||||
}
|
||||
|
@@ -3,7 +3,7 @@
|
||||
/*!
|
||||
* @mainpage
|
||||
*
|
||||
* HTMLPurifier is a purification class that will take an arbitrary snippet of
|
||||
* HTML Purifier is an HTML filter that will take an arbitrary snippet of
|
||||
* HTML and rigorously test, validate and filter it into a version that
|
||||
* is safe for output onto webpages. It achieves this by:
|
||||
*
|
||||
@@ -15,15 +15,51 @@
|
||||
* -# Validating attributes of the nodes; and
|
||||
* -# Generating HTML from the purified tokens.
|
||||
*
|
||||
* See /docs/spec.txt for more details.
|
||||
* However, most users will only need to interface with the HTMLPurifier
|
||||
* class, so this massive amount of infrastructure is usually concealed.
|
||||
* If you plan on working with the internals, be sure to include
|
||||
* HTMLPurifier_ConfigSchema and HTMLPurifier_Config.
|
||||
*/
|
||||
|
||||
require_once 'HTMLPurifier/ConfigDef.php';
|
||||
/*
|
||||
HTML Purifier 2.0.1 - Standards Compliant HTML Filtering
|
||||
Copyright (C) 2006 Edward Z. Yang
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
// almost every class has an undocumented dependency to these, so make sure
|
||||
// they get included
|
||||
require_once 'HTMLPurifier/ConfigSchema.php'; // important
|
||||
require_once 'HTMLPurifier/Config.php';
|
||||
require_once 'HTMLPurifier/Context.php';
|
||||
|
||||
require_once 'HTMLPurifier/Lexer.php';
|
||||
require_once 'HTMLPurifier/HTMLDefinition.php';
|
||||
require_once 'HTMLPurifier/Generator.php';
|
||||
require_once 'HTMLPurifier/Strategy/Core.php';
|
||||
require_once 'HTMLPurifier/Encoder.php';
|
||||
|
||||
require_once 'HTMLPurifier/ErrorCollector.php';
|
||||
require_once 'HTMLPurifier/LanguageFactory.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'CollectErrors', false, 'bool', '
|
||||
Whether or not to collect errors found while filtering the document. This
|
||||
is a useful way to give feedback to your users. CURRENTLY NOT IMPLEMENTED.
|
||||
This directive has been available since 2.0.0.
|
||||
');
|
||||
|
||||
/**
|
||||
* Main library execution class.
|
||||
@@ -31,41 +67,149 @@ require_once 'HTMLPurifier/Strategy/Core.php';
|
||||
* Facade that performs calls to the HTMLPurifier_Lexer,
|
||||
* HTMLPurifier_Strategy and HTMLPurifier_Generator subsystems in order to
|
||||
* purify HTML.
|
||||
*
|
||||
* @todo We need an easier way to inject strategies, it'll probably end
|
||||
* up getting done through config though.
|
||||
*/
|
||||
class HTMLPurifier
|
||||
{
|
||||
|
||||
var $version = '2.0.1';
|
||||
|
||||
var $config;
|
||||
var $filters;
|
||||
|
||||
var $strategy, $generator;
|
||||
|
||||
/**
|
||||
* Final HTMLPurifier_Context of last run purification. Might be an array.
|
||||
* @public
|
||||
*/
|
||||
var $context;
|
||||
|
||||
/**
|
||||
* Initializes the purifier.
|
||||
* @param $config Configuration for all instances of the purifier
|
||||
* @param $config Optional HTMLPurifier_Config object for all instances of
|
||||
* the purifier, if omitted, a default configuration is
|
||||
* supplied (which can be overridden on a per-use basis).
|
||||
* The parameter can also be any type that
|
||||
* HTMLPurifier_Config::create() supports.
|
||||
*/
|
||||
function HTMLPurifier($config = null) {
|
||||
$this->config = $config ? $config : HTMLPurifier_Config::createDefault();
|
||||
|
||||
$this->config = HTMLPurifier_Config::create($config);
|
||||
|
||||
$this->strategy = new HTMLPurifier_Strategy_Core();
|
||||
$this->generator = new HTMLPurifier_Generator();
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Purifies HTML.
|
||||
* Adds a filter to process the output. First come first serve
|
||||
* @param $filter HTMLPurifier_Filter object
|
||||
*/
|
||||
function addFilter($filter) {
|
||||
$this->filters[] = $filter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Filters an HTML snippet/document to be XSS-free and standards-compliant.
|
||||
*
|
||||
* @param $html String of HTML to purify
|
||||
* @param $config HTMLPurifier_Config object for this specific round
|
||||
* @param $config HTMLPurifier_Config object for this operation, if omitted,
|
||||
* defaults to the config object specified during this
|
||||
* object's construction. The parameter can also be any type
|
||||
* that HTMLPurifier_Config::create() supports.
|
||||
* @return Purified HTML
|
||||
*/
|
||||
function purify($html, $config = null) {
|
||||
$config = $config ? $config : $this->config;
|
||||
$lexer = HTMLPurifier_Lexer::create();
|
||||
$strategy = new HTMLPurifier_Strategy_Core();
|
||||
$generator = new HTMLPurifier_Generator();
|
||||
return $generator->generateFromTokens(
|
||||
$strategy->execute(
|
||||
$lexer->tokenizeHTML($html, $config),
|
||||
$config
|
||||
),
|
||||
$config
|
||||
);
|
||||
|
||||
$config = $config ? HTMLPurifier_Config::create($config) : $this->config;
|
||||
|
||||
// implementation is partially environment dependant, partially
|
||||
// configuration dependant
|
||||
$lexer = HTMLPurifier_Lexer::create($config);
|
||||
|
||||
$context = new HTMLPurifier_Context();
|
||||
|
||||
// our friendly neighborhood generator, all primed with configuration too!
|
||||
$this->generator->generateFromTokens(array(), $config, $context);
|
||||
$context->register('Generator', $this->generator);
|
||||
|
||||
// set up global context variables
|
||||
if ($config->get('Core', 'CollectErrors')) {
|
||||
// may get moved out if other facilities use it
|
||||
$language_factory = HTMLPurifier_LanguageFactory::instance();
|
||||
$language = $language_factory->create($config, $context);
|
||||
$context->register('Locale', $language);
|
||||
|
||||
$error_collector = new HTMLPurifier_ErrorCollector($context);
|
||||
$context->register('ErrorCollector', $error_collector);
|
||||
}
|
||||
|
||||
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
|
||||
|
||||
for ($i = 0, $size = count($this->filters); $i < $size; $i++) {
|
||||
$html = $this->filters[$i]->preFilter($html, $config, $context);
|
||||
}
|
||||
|
||||
// purified HTML
|
||||
$html =
|
||||
$this->generator->generateFromTokens(
|
||||
// list of tokens
|
||||
$this->strategy->execute(
|
||||
// list of un-purified tokens
|
||||
$lexer->tokenizeHTML(
|
||||
// un-purified HTML
|
||||
$html, $config, $context
|
||||
),
|
||||
$config, $context
|
||||
),
|
||||
$config, $context
|
||||
);
|
||||
|
||||
for ($i = $size - 1; $i >= 0; $i--) {
|
||||
$html = $this->filters[$i]->postFilter($html, $config, $context);
|
||||
}
|
||||
|
||||
$html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
|
||||
$this->context =& $context;
|
||||
return $html;
|
||||
}
|
||||
|
||||
/**
|
||||
* Filters an array of HTML snippets
|
||||
* @param $config Optional HTMLPurifier_Config object for this operation.
|
||||
* See HTMLPurifier::purify() for more details.
|
||||
* @return Array of purified HTML
|
||||
*/
|
||||
function purifyArray($array_of_html, $config = null) {
|
||||
$context_array = array();
|
||||
foreach ($array_of_html as $key => $html) {
|
||||
$array_of_html[$key] = $this->purify($html, $config);
|
||||
$context_array[$key] = $this->context;
|
||||
}
|
||||
$this->context = $context_array;
|
||||
return $array_of_html;
|
||||
}
|
||||
|
||||
/**
|
||||
* Singleton for enforcing just one HTML Purifier in your system
|
||||
*/
|
||||
function &getInstance($prototype = null) {
|
||||
static $htmlpurifier;
|
||||
if (!$htmlpurifier || $prototype) {
|
||||
if (is_a($prototype, 'HTMLPurifier')) {
|
||||
$htmlpurifier = $prototype;
|
||||
} elseif ($prototype) {
|
||||
$htmlpurifier = new HTMLPurifier(HTMLPurifier_Config::create($prototype));
|
||||
} else {
|
||||
$htmlpurifier = new HTMLPurifier();
|
||||
}
|
||||
}
|
||||
return $htmlpurifier;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
?>
|
129
library/HTMLPurifier/AttrCollections.php
Normal file
129
library/HTMLPurifier/AttrCollections.php
Normal file
@@ -0,0 +1,129 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrTypes.php';
|
||||
|
||||
/**
|
||||
* Defines common attribute collections that modules reference
|
||||
*/
|
||||
|
||||
class HTMLPurifier_AttrCollections
|
||||
{
|
||||
|
||||
/**
|
||||
* Associative array of attribute collections, indexed by name
|
||||
*/
|
||||
var $info = array();
|
||||
|
||||
/**
|
||||
* Performs all expansions on internal data for use by other inclusions
|
||||
* It also collects all attribute collection extensions from
|
||||
* modules
|
||||
* @param $attr_types HTMLPurifier_AttrTypes instance
|
||||
* @param $modules Hash array of HTMLPurifier_HTMLModule members
|
||||
*/
|
||||
function HTMLPurifier_AttrCollections($attr_types, $modules) {
|
||||
// load extensions from the modules
|
||||
foreach ($modules as $module) {
|
||||
foreach ($module->attr_collections as $coll_i => $coll) {
|
||||
if (!isset($this->info[$coll_i])) {
|
||||
$this->info[$coll_i] = array();
|
||||
}
|
||||
foreach ($coll as $attr_i => $attr) {
|
||||
if ($attr_i === 0 && isset($this->info[$coll_i][$attr_i])) {
|
||||
// merge in includes
|
||||
$this->info[$coll_i][$attr_i] = array_merge(
|
||||
$this->info[$coll_i][$attr_i], $attr);
|
||||
continue;
|
||||
}
|
||||
$this->info[$coll_i][$attr_i] = $attr;
|
||||
}
|
||||
}
|
||||
}
|
||||
// perform internal expansions and inclusions
|
||||
foreach ($this->info as $name => $attr) {
|
||||
// merge attribute collections that include others
|
||||
$this->performInclusions($this->info[$name]);
|
||||
// replace string identifiers with actual attribute objects
|
||||
$this->expandIdentifiers($this->info[$name], $attr_types);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a reference to an attribute associative array and performs
|
||||
* all inclusions specified by the zero index.
|
||||
* @param &$attr Reference to attribute array
|
||||
*/
|
||||
function performInclusions(&$attr) {
|
||||
if (!isset($attr[0])) return;
|
||||
$merge = $attr[0];
|
||||
$seen = array(); // recursion guard
|
||||
// loop through all the inclusions
|
||||
for ($i = 0; isset($merge[$i]); $i++) {
|
||||
if (isset($seen[$merge[$i]])) continue;
|
||||
$seen[$merge[$i]] = true;
|
||||
// foreach attribute of the inclusion, copy it over
|
||||
if (!isset($this->info[$merge[$i]])) continue;
|
||||
foreach ($this->info[$merge[$i]] as $key => $value) {
|
||||
if (isset($attr[$key])) continue; // also catches more inclusions
|
||||
$attr[$key] = $value;
|
||||
}
|
||||
if (isset($this->info[$merge[$i]][0])) {
|
||||
// recursion
|
||||
$merge = array_merge($merge, $this->info[$merge[$i]][0]);
|
||||
}
|
||||
}
|
||||
unset($attr[0]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expands all string identifiers in an attribute array by replacing
|
||||
* them with the appropriate values inside HTMLPurifier_AttrTypes
|
||||
* @param &$attr Reference to attribute array
|
||||
* @param $attr_types HTMLPurifier_AttrTypes instance
|
||||
*/
|
||||
function expandIdentifiers(&$attr, $attr_types) {
|
||||
|
||||
// because foreach will process new elements we add, make sure we
|
||||
// skip duplicates
|
||||
$processed = array();
|
||||
|
||||
foreach ($attr as $def_i => $def) {
|
||||
// skip inclusions
|
||||
if ($def_i === 0) continue;
|
||||
|
||||
if (isset($processed[$def_i])) continue;
|
||||
|
||||
// determine whether or not attribute is required
|
||||
if ($required = (strpos($def_i, '*') !== false)) {
|
||||
// rename the definition
|
||||
unset($attr[$def_i]);
|
||||
$def_i = trim($def_i, '*');
|
||||
$attr[$def_i] = $def;
|
||||
}
|
||||
|
||||
$processed[$def_i] = true;
|
||||
|
||||
// if we've already got a literal object, move on
|
||||
if (is_object($def)) {
|
||||
// preserve previous required
|
||||
$attr[$def_i]->required = ($required || $attr[$def_i]->required);
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($def === false) {
|
||||
unset($attr[$def_i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($t = $attr_types->get($def)) {
|
||||
$attr[$def_i] = $t;
|
||||
$attr[$def_i]->required = $required;
|
||||
} else {
|
||||
unset($attr[$def_i]);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -1,15 +0,0 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Internal data-structure used in attribute validation to accumulate state.
|
||||
*
|
||||
* All it is is a data-structure that holds objects that accumulate state, like
|
||||
* HTMLPurifier_IDAccumulator.
|
||||
*/
|
||||
|
||||
class HTMLPurifier_AttrContext
|
||||
{
|
||||
var $id_accumulator;
|
||||
}
|
||||
|
||||
?>
|
@@ -1,22 +1,86 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrContext.php';
|
||||
/**
|
||||
* Base class for all validating attribute definitions.
|
||||
*
|
||||
* This family of classes forms the core for not only HTML attribute validation,
|
||||
* but also any sort of string that needs to be validated or cleaned (which
|
||||
* means CSS properties and composite definitions are defined here too).
|
||||
* Besides defining (through code) what precisely makes the string valid,
|
||||
* subclasses are also responsible for cleaning the code if possible.
|
||||
*/
|
||||
|
||||
// AttrDef = Attribute Definition
|
||||
class HTMLPurifier_AttrDef
|
||||
{
|
||||
function HTMLPurifier_AttrDef() {}
|
||||
|
||||
/**
|
||||
* Tells us whether or not an HTML attribute is minimized. Has no
|
||||
* meaning in other contexts.
|
||||
*/
|
||||
var $minimized = false;
|
||||
|
||||
/**
|
||||
* Tells us whether or not an HTML attribute is required. Has no
|
||||
* meaning in other contexts
|
||||
*/
|
||||
var $required = false;
|
||||
|
||||
/**
|
||||
* Validates and cleans passed string according to a definition.
|
||||
*
|
||||
* @public
|
||||
* @param $string String to be validated and cleaned.
|
||||
* @param $config Mandatory HTMLPurifier_Config object.
|
||||
* @param $context Mandatory HTMLPurifier_AttrContext object.
|
||||
*/
|
||||
function validate($string, $config, &$context) {
|
||||
trigger_error('Cannot call abstract function', E_USER_ERROR);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method that parses a string as if it were CDATA.
|
||||
*
|
||||
* This method process a string in the manner specified at
|
||||
* <http://www.w3.org/TR/html4/types.html#h-6.2> by removing
|
||||
* leading and trailing whitespace, ignoring line feeds, and replacing
|
||||
* carriage returns and tabs with spaces. While most useful for HTML
|
||||
* attributes specified as CDATA, it can also be applied to most CSS
|
||||
* values.
|
||||
*
|
||||
* @note This method is not entirely standards compliant, as trim() removes
|
||||
* more types of whitespace than specified in the spec. In practice,
|
||||
* this is rarely a problem, as those extra characters usually have
|
||||
* already been removed by HTMLPurifier_Encoder.
|
||||
*
|
||||
* @warning This processing is inconsistent with XML's whitespace handling
|
||||
* as specified by section 3.3.3 and referenced XHTML 1.0 section
|
||||
* 4.7. Compliant processing requires all line breaks normalized
|
||||
* to "\n", so the fix is not as simple as fixing it in this
|
||||
* function. Trim and whitespace collapsing are supposed to only
|
||||
* occur in NMTOKENs. However, note that we are NOT necessarily
|
||||
* parsing XML, thus, this behavior may still be correct.
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
function parseCDATA($string) {
|
||||
$string = trim($string);
|
||||
$string = str_replace("\n", '', $string);
|
||||
$string = str_replace(array("\r", "\t"), ' ', $string);
|
||||
return $string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory method for creating this class from a string.
|
||||
* @param $string String construction info
|
||||
* @return Created AttrDef object corresponding to $string
|
||||
* @public
|
||||
*/
|
||||
function make($string) {
|
||||
// default implementation, return flyweight of this object
|
||||
// if overloaded, it is *necessary* for you to clone the
|
||||
// object (usually by instantiating a new copy) and return that
|
||||
return $this;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -3,6 +3,17 @@
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/CSSDefinition.php';
|
||||
|
||||
/**
|
||||
* Validates the HTML attribute style, otherwise known as CSS.
|
||||
* @note We don't implement the whole CSS specification, so it might be
|
||||
* difficult to reuse this component in the context of validating
|
||||
* actual stylesheet declarations.
|
||||
* @note If we were really serious about validating the CSS, we would
|
||||
* tokenize the styles and then parse the tokens. Obviously, we
|
||||
* are not doing that. Doing that could seriously harm performance,
|
||||
* but would make these components a lot more viable for a CSS
|
||||
* filtering solution.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
@@ -10,10 +21,13 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
|
||||
|
||||
$css = $this->parseCDATA($css);
|
||||
|
||||
$definition = HTMLPurifier_CSSDefinition::instance();
|
||||
$definition = $config->getCSSDefinition();
|
||||
|
||||
// we're going to break the spec and explode by semicolons.
|
||||
// This is because semicolon rarely appears in escaped form
|
||||
// Doing this is generally flaky but fast
|
||||
// IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI
|
||||
// for details
|
||||
|
||||
$declarations = explode(';', $css);
|
||||
$propvalues = array();
|
||||
@@ -22,10 +36,12 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
|
||||
if (!$declaration) continue;
|
||||
if (!strpos($declaration, ':')) continue;
|
||||
list($property, $value) = explode(':', $declaration, 2);
|
||||
$property = trim($property);
|
||||
$value = trim($value);
|
||||
if (!isset($definition->info[$property])) continue;
|
||||
// inefficient call, since the validator will do this again
|
||||
// inherit works for everything
|
||||
if (strtolower(trim($value)) !== 'inherit') {
|
||||
// inherit works for everything (but only on the base property)
|
||||
$result = $definition->info[$property]->validate(
|
||||
$value, $config, $context );
|
||||
} else {
|
||||
@@ -35,6 +51,7 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
|
||||
$propvalues[$property] = $result;
|
||||
}
|
||||
|
||||
// procedure does not write the new CSS simultaneously, so it's
|
||||
// slightly inefficient, but it's the only way of getting rid of
|
||||
// duplicates. Perhaps config to optimize it, but not now.
|
||||
|
||||
@@ -49,4 +66,3 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
86
library/HTMLPurifier/AttrDef/CSS/Background.php
Normal file
86
library/HTMLPurifier/AttrDef/CSS/Background.php
Normal file
@@ -0,0 +1,86 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/CSSDefinition.php';
|
||||
|
||||
/**
|
||||
* Validates shorthand CSS property background.
|
||||
* @warning Does not support url tokens that have internal spaces.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Local copy of component validators.
|
||||
* @note See HTMLPurifier_AttrDef_Font::$info for a similar impl.
|
||||
*/
|
||||
var $info;
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_Background($config) {
|
||||
$def = $config->getCSSDefinition();
|
||||
$this->info['background-color'] = $def->info['background-color'];
|
||||
$this->info['background-image'] = $def->info['background-image'];
|
||||
$this->info['background-repeat'] = $def->info['background-repeat'];
|
||||
$this->info['background-attachment'] = $def->info['background-attachment'];
|
||||
$this->info['background-position'] = $def->info['background-position'];
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
// regular pre-processing
|
||||
$string = $this->parseCDATA($string);
|
||||
if ($string === '') return false;
|
||||
|
||||
// assumes URI doesn't have spaces in it
|
||||
$bits = explode(' ', strtolower($string)); // bits to process
|
||||
|
||||
$caught = array();
|
||||
$caught['color'] = false;
|
||||
$caught['image'] = false;
|
||||
$caught['repeat'] = false;
|
||||
$caught['attachment'] = false;
|
||||
$caught['position'] = false;
|
||||
|
||||
$i = 0; // number of catches
|
||||
$none = false;
|
||||
|
||||
foreach ($bits as $bit) {
|
||||
if ($bit === '') continue;
|
||||
foreach ($caught as $key => $status) {
|
||||
if ($key != 'position') {
|
||||
if ($status !== false) continue;
|
||||
$r = $this->info['background-' . $key]->validate($bit, $config, $context);
|
||||
} else {
|
||||
$r = $bit;
|
||||
}
|
||||
if ($r === false) continue;
|
||||
if ($key == 'position') {
|
||||
if ($caught[$key] === false) $caught[$key] = '';
|
||||
$caught[$key] .= $r . ' ';
|
||||
} else {
|
||||
$caught[$key] = $r;
|
||||
}
|
||||
$i++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!$i) return false;
|
||||
if ($caught['position'] !== false) {
|
||||
$caught['position'] = $this->info['background-position']->
|
||||
validate($caught['position'], $config, $context);
|
||||
}
|
||||
|
||||
$ret = array();
|
||||
foreach ($caught as $value) {
|
||||
if ($value === false) continue;
|
||||
$ret[] = $value;
|
||||
}
|
||||
|
||||
if (empty($ret)) return false;
|
||||
return implode(' ', $ret);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
129
library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php
Normal file
129
library/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php
Normal file
@@ -0,0 +1,129 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Length.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Percentage.php';
|
||||
|
||||
/* W3C says:
|
||||
[ // adjective and number must be in correct order, even if
|
||||
// you could switch them without introducing ambiguity.
|
||||
// some browsers support that syntax
|
||||
[
|
||||
<percentage> | <length> | left | center | right
|
||||
]
|
||||
[
|
||||
<percentage> | <length> | top | center | bottom
|
||||
]?
|
||||
] |
|
||||
[ // this signifies that the vertical and horizontal adjectives
|
||||
// can be arbitrarily ordered, however, there can only be two,
|
||||
// one of each, or none at all
|
||||
[
|
||||
left | center | right
|
||||
] ||
|
||||
[
|
||||
top | center | bottom
|
||||
]
|
||||
]
|
||||
top, left = 0%
|
||||
center, (none) = 50%
|
||||
bottom, right = 100%
|
||||
*/
|
||||
|
||||
/* QuirksMode says:
|
||||
keyword + length/percentage must be ordered correctly, as per W3C
|
||||
|
||||
Internet Explorer and Opera, however, support arbitrary ordering. We
|
||||
should fix it up.
|
||||
|
||||
Minor issue though, not strictly necessary.
|
||||
*/
|
||||
|
||||
// control freaks may appreciate the ability to convert these to
|
||||
// percentages or something, but it's not necessary
|
||||
|
||||
/**
|
||||
* Validates the value of background-position.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
var $length;
|
||||
var $percentage;
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_BackgroundPosition() {
|
||||
$this->length = new HTMLPurifier_AttrDef_CSS_Length();
|
||||
$this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage();
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
$string = $this->parseCDATA($string);
|
||||
$bits = explode(' ', $string);
|
||||
|
||||
$keywords = array();
|
||||
$keywords['h'] = false; // left, right
|
||||
$keywords['v'] = false; // top, bottom
|
||||
$keywords['c'] = false; // center
|
||||
$measures = array();
|
||||
|
||||
$i = 0;
|
||||
|
||||
$lookup = array(
|
||||
'top' => 'v',
|
||||
'bottom' => 'v',
|
||||
'left' => 'h',
|
||||
'right' => 'h',
|
||||
'center' => 'c'
|
||||
);
|
||||
|
||||
foreach ($bits as $bit) {
|
||||
if ($bit === '') continue;
|
||||
|
||||
// test for keyword
|
||||
$lbit = ctype_lower($bit) ? $bit : strtolower($bit);
|
||||
if (isset($lookup[$lbit])) {
|
||||
$status = $lookup[$lbit];
|
||||
$keywords[$status] = $lbit;
|
||||
$i++;
|
||||
}
|
||||
|
||||
// test for length
|
||||
$r = $this->length->validate($bit, $config, $context);
|
||||
if ($r !== false) {
|
||||
$measures[] = $r;
|
||||
$i++;
|
||||
}
|
||||
|
||||
// test for percentage
|
||||
$r = $this->percentage->validate($bit, $config, $context);
|
||||
if ($r !== false) {
|
||||
$measures[] = $r;
|
||||
$i++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (!$i) return false; // no valid values were caught
|
||||
|
||||
|
||||
$ret = array();
|
||||
|
||||
// first keyword
|
||||
if ($keywords['h']) $ret[] = $keywords['h'];
|
||||
elseif (count($measures)) $ret[] = array_shift($measures);
|
||||
elseif ($keywords['c']) {
|
||||
$ret[] = $keywords['c'];
|
||||
$keywords['c'] = false; // prevent re-use: center = center center
|
||||
}
|
||||
|
||||
if ($keywords['v']) $ret[] = $keywords['v'];
|
||||
elseif (count($measures)) $ret[] = array_shift($measures);
|
||||
elseif ($keywords['c']) $ret[] = $keywords['c'];
|
||||
|
||||
if (empty($ret)) return false;
|
||||
return implode(' ', $ret);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
44
library/HTMLPurifier/AttrDef/CSS/Border.php
Normal file
44
library/HTMLPurifier/AttrDef/CSS/Border.php
Normal file
@@ -0,0 +1,44 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
/**
|
||||
* Validates the border property as defined by CSS.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Local copy of properties this property is shorthand for.
|
||||
*/
|
||||
var $info = array();
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_Border($config) {
|
||||
$def = $config->getCSSDefinition();
|
||||
$this->info['border-width'] = $def->info['border-width'];
|
||||
$this->info['border-style'] = $def->info['border-style'];
|
||||
$this->info['border-top-color'] = $def->info['border-top-color'];
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
$string = $this->parseCDATA($string);
|
||||
// we specifically will not support rgb() syntax with spaces
|
||||
$bits = explode(' ', $string);
|
||||
$done = array(); // segments we've finished
|
||||
$ret = ''; // return value
|
||||
foreach ($bits as $bit) {
|
||||
foreach ($this->info as $propname => $validator) {
|
||||
if (isset($done[$propname])) continue;
|
||||
$r = $validator->validate($bit, $config, $context);
|
||||
if ($r !== false) {
|
||||
$ret .= $r . ' ';
|
||||
$done[$propname] = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return rtrim($ret);
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -2,14 +2,48 @@
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
class HTMLPurifier_AttrDef_Color
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'ColorKeywords', array(
|
||||
'maroon' => '#800000',
|
||||
'red' => '#FF0000',
|
||||
'orange' => '#FFA500',
|
||||
'yellow' => '#FFFF00',
|
||||
'olive' => '#808000',
|
||||
'purple' => '#800080',
|
||||
'fuchsia' => '#FF00FF',
|
||||
'white' => '#FFFFFF',
|
||||
'lime' => '#00FF00',
|
||||
'green' => '#008000',
|
||||
'navy' => '#000080',
|
||||
'blue' => '#0000FF',
|
||||
'aqua' => '#00FFFF',
|
||||
'teal' => '#008080',
|
||||
'black' => '#000000',
|
||||
'silver' => '#C0C0C0',
|
||||
'gray' => '#808080'
|
||||
), 'hash', '
|
||||
Lookup array of color names to six digit hexadecimal number corresponding
|
||||
to color, with preceding hash mark. Used when parsing colors.
|
||||
This directive has been available since 2.0.0.
|
||||
');
|
||||
|
||||
/**
|
||||
* Validates Color as defined by CSS.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($color, $config, &$context) {
|
||||
|
||||
static $colors = null;
|
||||
if ($colors === null) $colors = $config->get('Core', 'ColorKeywords');
|
||||
|
||||
$color = trim($color);
|
||||
if (!$color) return false;
|
||||
|
||||
$lower = strtolower($color);
|
||||
if (isset($colors[$lower])) return $colors[$lower];
|
||||
|
||||
if ($color[0] === '#') {
|
||||
// hexadecimal handling
|
||||
$hex = substr($color, 1);
|
||||
@@ -64,4 +98,3 @@ class HTMLPurifier_AttrDef_Color
|
||||
|
||||
}
|
||||
|
||||
?>
|
37
library/HTMLPurifier/AttrDef/CSS/Composite.php
Normal file
37
library/HTMLPurifier/AttrDef/CSS/Composite.php
Normal file
@@ -0,0 +1,37 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Allows multiple validators to attempt to validate attribute.
|
||||
*
|
||||
* Composite is just what it sounds like: a composite of many validators.
|
||||
* This means that multiple HTMLPurifier_AttrDef objects will have a whack
|
||||
* at the string. If one of them passes, that's what is returned. This is
|
||||
* especially useful for CSS values, which often are a choice between
|
||||
* an enumerated set of predefined values or a flexible data type.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* List of HTMLPurifier_AttrDef objects that may process strings
|
||||
* @protected
|
||||
*/
|
||||
var $defs;
|
||||
|
||||
/**
|
||||
* @param $defs List of HTMLPurifier_AttrDef objects
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_CSS_Composite($defs) {
|
||||
$this->defs = $defs;
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
foreach ($this->defs as $i => $def) {
|
||||
$result = $this->defs[$i]->validate($string, $config, $context);
|
||||
if ($result !== false) return $result;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
150
library/HTMLPurifier/AttrDef/CSS/Font.php
Normal file
150
library/HTMLPurifier/AttrDef/CSS/Font.php
Normal file
@@ -0,0 +1,150 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
/**
|
||||
* Validates shorthand CSS property font.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Local copy of component validators.
|
||||
*
|
||||
* @note If we moved specific CSS property definitions to their own
|
||||
* classes instead of having them be assembled at run time by
|
||||
* CSSDefinition, this wouldn't be necessary. We'd instantiate
|
||||
* our own copies.
|
||||
*/
|
||||
var $info = array();
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_Font($config) {
|
||||
$def = $config->getCSSDefinition();
|
||||
$this->info['font-style'] = $def->info['font-style'];
|
||||
$this->info['font-variant'] = $def->info['font-variant'];
|
||||
$this->info['font-weight'] = $def->info['font-weight'];
|
||||
$this->info['font-size'] = $def->info['font-size'];
|
||||
$this->info['line-height'] = $def->info['line-height'];
|
||||
$this->info['font-family'] = $def->info['font-family'];
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
static $system_fonts = array(
|
||||
'caption' => true,
|
||||
'icon' => true,
|
||||
'menu' => true,
|
||||
'message-box' => true,
|
||||
'small-caption' => true,
|
||||
'status-bar' => true
|
||||
);
|
||||
|
||||
// regular pre-processing
|
||||
$string = $this->parseCDATA($string);
|
||||
if ($string === '') return false;
|
||||
|
||||
// check if it's one of the keywords
|
||||
$lowercase_string = strtolower($string);
|
||||
if (isset($system_fonts[$lowercase_string])) {
|
||||
return $lowercase_string;
|
||||
}
|
||||
|
||||
$bits = explode(' ', $string); // bits to process
|
||||
$stage = 0; // this indicates what we're looking for
|
||||
$caught = array(); // which stage 0 properties have we caught?
|
||||
$stage_1 = array('font-style', 'font-variant', 'font-weight');
|
||||
$final = ''; // output
|
||||
|
||||
for ($i = 0, $size = count($bits); $i < $size; $i++) {
|
||||
if ($bits[$i] === '') continue;
|
||||
switch ($stage) {
|
||||
|
||||
// attempting to catch font-style, font-variant or font-weight
|
||||
case 0:
|
||||
foreach ($stage_1 as $validator_name) {
|
||||
if (isset($caught[$validator_name])) continue;
|
||||
$r = $this->info[$validator_name]->validate(
|
||||
$bits[$i], $config, $context);
|
||||
if ($r !== false) {
|
||||
$final .= $r . ' ';
|
||||
$caught[$validator_name] = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// all three caught, continue on
|
||||
if (count($caught) >= 3) $stage = 1;
|
||||
if ($r !== false) break;
|
||||
|
||||
// attempting to catch font-size and perhaps line-height
|
||||
case 1:
|
||||
$found_slash = false;
|
||||
if (strpos($bits[$i], '/') !== false) {
|
||||
list($font_size, $line_height) =
|
||||
explode('/', $bits[$i]);
|
||||
if ($line_height === '') {
|
||||
// ooh, there's a space after the slash!
|
||||
$line_height = false;
|
||||
$found_slash = true;
|
||||
}
|
||||
} else {
|
||||
$font_size = $bits[$i];
|
||||
$line_height = false;
|
||||
}
|
||||
$r = $this->info['font-size']->validate(
|
||||
$font_size, $config, $context);
|
||||
if ($r !== false) {
|
||||
$final .= $r;
|
||||
// attempt to catch line-height
|
||||
if ($line_height === false) {
|
||||
// we need to scroll forward
|
||||
for ($j = $i + 1; $j < $size; $j++) {
|
||||
if ($bits[$j] === '') continue;
|
||||
if ($bits[$j] === '/') {
|
||||
if ($found_slash) {
|
||||
return false;
|
||||
} else {
|
||||
$found_slash = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
$line_height = $bits[$j];
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// slash already found
|
||||
$found_slash = true;
|
||||
$j = $i;
|
||||
}
|
||||
if ($found_slash) {
|
||||
$i = $j;
|
||||
$r = $this->info['line-height']->validate(
|
||||
$line_height, $config, $context);
|
||||
if ($r !== false) {
|
||||
$final .= '/' . $r;
|
||||
}
|
||||
}
|
||||
$final .= ' ';
|
||||
$stage = 2;
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
|
||||
// attempting to catch font-family
|
||||
case 2:
|
||||
$font_family =
|
||||
implode(' ', array_slice($bits, $i, $size - $i));
|
||||
$r = $this->info['font-family']->validate(
|
||||
$font_family, $config, $context);
|
||||
if ($r !== false) {
|
||||
$final .= $r . ' ';
|
||||
// processing completed successfully
|
||||
return rtrim($final);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -4,18 +4,21 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
// whitelisting allowed fonts would be nice
|
||||
|
||||
class HTMLPurifier_AttrDef_FontFamily extends HTMLPurifier_AttrDef
|
||||
/**
|
||||
* Validates a font family list according to CSS spec
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
var $generic_names = array(
|
||||
'serif' => true,
|
||||
'sans-serif' => true,
|
||||
'monospace' => true,
|
||||
'fantasy' => true,
|
||||
'cursive' => true
|
||||
);
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
static $generic_names = array(
|
||||
'serif' => true,
|
||||
'sans-serif' => true,
|
||||
'monospace' => true,
|
||||
'fantasy' => true,
|
||||
'cursive' => true
|
||||
);
|
||||
|
||||
$string = $this->parseCDATA($string);
|
||||
// assume that no font names contain commas in them
|
||||
$fonts = explode(',', $string);
|
||||
@@ -24,7 +27,7 @@ class HTMLPurifier_AttrDef_FontFamily extends HTMLPurifier_AttrDef
|
||||
$font = trim($font);
|
||||
if ($font === '') continue;
|
||||
// match a generic name
|
||||
if (isset($this->generic_names[$font])) {
|
||||
if (isset($generic_names[$font])) {
|
||||
$final .= $font . ', ';
|
||||
continue;
|
||||
}
|
||||
@@ -56,4 +59,3 @@ class HTMLPurifier_AttrDef_FontFamily extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -1,17 +1,32 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Number.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Number.php';
|
||||
|
||||
class HTMLPurifier_AttrDef_CSSLength extends HTMLPurifier_AttrDef
|
||||
/**
|
||||
* Represents a Length as defined by CSS.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Valid unit lookup table.
|
||||
* @warning The code assumes all units are two characters long. Be careful
|
||||
* if we have to change this behavior!
|
||||
*/
|
||||
var $units = array('em' => true, 'ex' => true, 'px' => true, 'in' => true,
|
||||
'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true);
|
||||
/**
|
||||
* Instance of HTMLPurifier_AttrDef_Number to defer number validation to
|
||||
*/
|
||||
var $number_def;
|
||||
|
||||
function HTMLPurifier_AttrDef_CSSLength($non_negative = false) {
|
||||
$this->number_def = new HTMLPurifier_AttrDef_Number($non_negative);
|
||||
/**
|
||||
* @param $non_negative Bool indication whether or not negative values are
|
||||
* allowed.
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_CSS_Length($non_negative = false) {
|
||||
$this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
|
||||
}
|
||||
|
||||
function validate($length, $config, &$context) {
|
||||
@@ -24,6 +39,7 @@ class HTMLPurifier_AttrDef_CSSLength extends HTMLPurifier_AttrDef
|
||||
|
||||
// we assume all units are two characters
|
||||
$unit = substr($length, $strlen - 2);
|
||||
if (!ctype_lower($unit)) $unit = strtolower($unit);
|
||||
$number = substr($length, 0, $strlen - 2);
|
||||
|
||||
if (!isset($this->units[$unit])) return false;
|
||||
@@ -37,4 +53,3 @@ class HTMLPurifier_AttrDef_CSSLength extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
79
library/HTMLPurifier/AttrDef/CSS/ListStyle.php
Normal file
79
library/HTMLPurifier/AttrDef/CSS/ListStyle.php
Normal file
@@ -0,0 +1,79 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
/**
|
||||
* Validates shorthand CSS property list-style.
|
||||
* @warning Does not support url tokens that have internal spaces.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Local copy of component validators.
|
||||
* @note See HTMLPurifier_AttrDef_CSS_Font::$info for a similar impl.
|
||||
*/
|
||||
var $info;
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_ListStyle($config) {
|
||||
$def = $config->getCSSDefinition();
|
||||
$this->info['list-style-type'] = $def->info['list-style-type'];
|
||||
$this->info['list-style-position'] = $def->info['list-style-position'];
|
||||
$this->info['list-style-image'] = $def->info['list-style-image'];
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
// regular pre-processing
|
||||
$string = $this->parseCDATA($string);
|
||||
if ($string === '') return false;
|
||||
|
||||
// assumes URI doesn't have spaces in it
|
||||
$bits = explode(' ', strtolower($string)); // bits to process
|
||||
|
||||
$caught = array();
|
||||
$caught['type'] = false;
|
||||
$caught['position'] = false;
|
||||
$caught['image'] = false;
|
||||
|
||||
$i = 0; // number of catches
|
||||
$none = false;
|
||||
|
||||
foreach ($bits as $bit) {
|
||||
if ($i >= 3) return; // optimization bit
|
||||
if ($bit === '') continue;
|
||||
foreach ($caught as $key => $status) {
|
||||
if ($status !== false) continue;
|
||||
$r = $this->info['list-style-' . $key]->validate($bit, $config, $context);
|
||||
if ($r === false) continue;
|
||||
if ($r === 'none') {
|
||||
if ($none) continue;
|
||||
else $none = true;
|
||||
if ($key == 'image') continue;
|
||||
}
|
||||
$caught[$key] = $r;
|
||||
$i++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!$i) return false;
|
||||
|
||||
$ret = array();
|
||||
|
||||
// construct type
|
||||
if ($caught['type']) $ret[] = $caught['type'];
|
||||
|
||||
// construct image
|
||||
if ($caught['image']) $ret[] = $caught['image'];
|
||||
|
||||
// construct position
|
||||
if ($caught['position']) $ret[] = $caught['position'];
|
||||
|
||||
if (empty($ret)) return false;
|
||||
return implode(' ', $ret);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
57
library/HTMLPurifier/AttrDef/CSS/Multiple.php
Normal file
57
library/HTMLPurifier/AttrDef/CSS/Multiple.php
Normal file
@@ -0,0 +1,57 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
/**
|
||||
* Framework class for strings that involve multiple values.
|
||||
*
|
||||
* Certain CSS properties such as border-width and margin allow multiple
|
||||
* lengths to be specified. This class can take a vanilla border-width
|
||||
* definition and multiply it, usually into a max of four.
|
||||
*
|
||||
* @note Even though the CSS specification isn't clear about it, inherit
|
||||
* can only be used alone: it will never manifest as part of a multi
|
||||
* shorthand declaration. Thus, this class does not allow inherit.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Instance of component definition to defer validation to.
|
||||
*/
|
||||
var $single;
|
||||
|
||||
/**
|
||||
* Max number of values allowed.
|
||||
*/
|
||||
var $max;
|
||||
|
||||
/**
|
||||
* @param $single HTMLPurifier_AttrDef to multiply
|
||||
* @param $max Max number of values allowed (usually four)
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_CSS_Multiple($single, $max = 4) {
|
||||
$this->single = $single;
|
||||
$this->max = $max;
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
$string = $this->parseCDATA($string);
|
||||
if ($string === '') return false;
|
||||
$parts = explode(' ', $string); // parseCDATA replaced \r, \t and \n
|
||||
$length = count($parts);
|
||||
$final = '';
|
||||
for ($i = 0, $num = 0; $i < $length && $num < $this->max; $i++) {
|
||||
if (ctype_space($parts[$i])) continue;
|
||||
$result = $this->single->validate($parts[$i], $config, $context);
|
||||
if ($result !== false) {
|
||||
$final .= $result . ' ';
|
||||
$num++;
|
||||
}
|
||||
}
|
||||
if ($final === '') return false;
|
||||
return rtrim($final);
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -1,11 +1,20 @@
|
||||
<?php
|
||||
|
||||
class HTMLPurifier_AttrDef_Number extends HTMLPurifier_AttrDef
|
||||
/**
|
||||
* Validates a number as defined by the CSS spec.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Bool indicating whether or not only positive values allowed.
|
||||
*/
|
||||
var $non_negative = false;
|
||||
|
||||
function HTMLPurifier_AttrDef_Number($non_negative = false) {
|
||||
/**
|
||||
* @param $non_negative Bool indicating whether negatives are forbidden
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_CSS_Number($non_negative = false) {
|
||||
$this->non_negative = $non_negative;
|
||||
}
|
||||
|
||||
@@ -49,4 +58,3 @@ class HTMLPurifier_AttrDef_Number extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -1,15 +1,24 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Number.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Number.php';
|
||||
|
||||
class HTMLPurifier_AttrDef_Percentage extends HTMLPurifier_AttrDef
|
||||
/**
|
||||
* Validates a Percentage as defined by the CSS spec.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Instance of HTMLPurifier_AttrDef_CSS_Number to defer number validation
|
||||
*/
|
||||
var $number_def;
|
||||
|
||||
function HTMLPurifier_AttrDef_Percentage($non_negative = false) {
|
||||
$this->number_def = new HTMLPurifier_AttrDef_Number($non_negative);
|
||||
/**
|
||||
* @param Bool indicating whether to forbid negative values
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_CSS_Percentage($non_negative = false) {
|
||||
$this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
@@ -31,4 +40,3 @@ class HTMLPurifier_AttrDef_Percentage extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
36
library/HTMLPurifier/AttrDef/CSS/TextDecoration.php
Normal file
36
library/HTMLPurifier/AttrDef/CSS/TextDecoration.php
Normal file
@@ -0,0 +1,36 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
/**
|
||||
* Validates the value for the CSS property text-decoration
|
||||
* @note This class could be generalized into a version that acts sort of
|
||||
* like Enum except you can compound the allowed values.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
static $allowed_values = array(
|
||||
'line-through' => true,
|
||||
'overline' => true,
|
||||
'underline' => true
|
||||
);
|
||||
|
||||
$string = strtolower($this->parseCDATA($string));
|
||||
$parts = explode(' ', $string);
|
||||
$final = '';
|
||||
foreach ($parts as $part) {
|
||||
if (isset($allowed_values[$part])) {
|
||||
$final .= $part . ' ';
|
||||
}
|
||||
}
|
||||
$final = rtrim($final);
|
||||
if ($final === '') return false;
|
||||
return $final;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
57
library/HTMLPurifier/AttrDef/CSS/URI.php
Normal file
57
library/HTMLPurifier/AttrDef/CSS/URI.php
Normal file
@@ -0,0 +1,57 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef/URI.php';
|
||||
|
||||
/**
|
||||
* Validates a URI in CSS syntax, which uses url('http://example.com')
|
||||
* @note While theoretically speaking a URI in a CSS document could
|
||||
* be non-embedded, as of CSS2 there is no such usage so we're
|
||||
* generalizing it. This may need to be changed in the future.
|
||||
* @warning Since HTMLPurifier_AttrDef_CSS blindly uses semicolons as
|
||||
* the separator, you cannot put a literal semicolon in
|
||||
* in the URI. Try percent encoding it, in that case.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
|
||||
{
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_URI() {
|
||||
$this->HTMLPurifier_AttrDef_URI(true); // always embedded
|
||||
}
|
||||
|
||||
function validate($uri_string, $config, &$context) {
|
||||
// parse the URI out of the string and then pass it onto
|
||||
// the parent object
|
||||
|
||||
$uri_string = $this->parseCDATA($uri_string);
|
||||
if (strpos($uri_string, 'url(') !== 0) return false;
|
||||
$uri_string = substr($uri_string, 4);
|
||||
$new_length = strlen($uri_string) - 1;
|
||||
if ($uri_string[$new_length] != ')') return false;
|
||||
$uri = trim(substr($uri_string, 0, $new_length));
|
||||
|
||||
if (!empty($uri) && ($uri[0] == "'" || $uri[0] == '"')) {
|
||||
$quote = $uri[0];
|
||||
$new_length = strlen($uri) - 1;
|
||||
if ($uri[$new_length] !== $quote) return false;
|
||||
$uri = substr($uri, 1, $new_length - 1);
|
||||
}
|
||||
|
||||
$keys = array( '(', ')', ',', ' ', '"', "'");
|
||||
$values = array('\\(', '\\)', '\\,', '\\ ', '\\"', "\\'");
|
||||
$uri = str_replace($values, $keys, $uri);
|
||||
|
||||
$result = parent::validate($uri, $config, $context);
|
||||
|
||||
if ($result === false) return false;
|
||||
|
||||
// escape necessary characters according to CSS spec
|
||||
// except for the comma, none of these should appear in the
|
||||
// URI at all
|
||||
$result = str_replace($keys, $values, $result);
|
||||
|
||||
return "url($result)";
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -1,22 +0,0 @@
|
||||
<?php
|
||||
|
||||
class HTMLPurifier_AttrDef_Composite extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
var $defs;
|
||||
|
||||
function HTMLPurifier_AttrDef_Composite($defs) {
|
||||
$this->defs = $defs;
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
foreach ($this->defs as $i => $def) {
|
||||
$result = $this->defs[$i]->validate($string, $config, $context);
|
||||
if ($result !== false) return $result;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -3,15 +3,33 @@
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
// Enum = Enumerated
|
||||
/**
|
||||
* Validates a keyword against a list of valid values.
|
||||
* @warning The case-insensitive compare of this function uses PHP's
|
||||
* built-in strtolower and ctype_lower functions, which may
|
||||
* cause problems with international comparisons
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Lookup table of valid values.
|
||||
*/
|
||||
var $valid_values = array();
|
||||
|
||||
/**
|
||||
* Bool indicating whether or not enumeration is case sensitive.
|
||||
* @note In general this is always case insensitive.
|
||||
*/
|
||||
var $case_sensitive = false; // values according to W3C spec
|
||||
|
||||
/**
|
||||
* @param $valid_values List of valid values
|
||||
* @param $case_sensitive Bool indicating whether or not case sensitive
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_Enum(
|
||||
$valid_values = array(), $case_sensitive = false) {
|
||||
|
||||
$valid_values = array(), $case_sensitive = false
|
||||
) {
|
||||
$this->valid_values = array_flip($valid_values);
|
||||
$this->case_sensitive = $case_sensitive;
|
||||
}
|
||||
@@ -19,6 +37,7 @@ class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
|
||||
function validate($string, $config, &$context) {
|
||||
$string = trim($string);
|
||||
if (!$this->case_sensitive) {
|
||||
// we may want to do full case-insensitive libraries
|
||||
$string = ctype_lower($string) ? $string : strtolower($string);
|
||||
}
|
||||
$result = isset($this->valid_values[$string]);
|
||||
@@ -26,6 +45,21 @@ class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
|
||||
return $result ? $string : false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param $string In form of comma-delimited list of case-insensitive
|
||||
* valid values. Example: "foo,bar,baz". Prepend "s:" to make
|
||||
* case sensitive
|
||||
*/
|
||||
function make($string) {
|
||||
if (strlen($string) > 2 && $string[0] == 's' && $string[1] == ':') {
|
||||
$string = substr($string, 2);
|
||||
$sensitive = true;
|
||||
} else {
|
||||
$sensitive = false;
|
||||
}
|
||||
$values = explode(',', $string);
|
||||
return new HTMLPurifier_AttrDef_Enum($values, $sensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
29
library/HTMLPurifier/AttrDef/HTML/Bool.php
Normal file
29
library/HTMLPurifier/AttrDef/HTML/Bool.php
Normal file
@@ -0,0 +1,29 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
/**
|
||||
* Validates a boolean attribute
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
var $name;
|
||||
var $minimized = true;
|
||||
|
||||
function HTMLPurifier_AttrDef_HTML_Bool($name = false) {$this->name = $name;}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
if (empty($string)) return false;
|
||||
return $this->name;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param $string Name of attribute
|
||||
*/
|
||||
function make($string) {
|
||||
return new HTMLPurifier_AttrDef_HTML_Bool($string);
|
||||
}
|
||||
|
||||
}
|
||||
|
34
library/HTMLPurifier/AttrDef/HTML/Color.php
Normal file
34
library/HTMLPurifier/AttrDef/HTML/Color.php
Normal file
@@ -0,0 +1,34 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Color.php'; // for %Core.ColorKeywords
|
||||
|
||||
/**
|
||||
* Validates a color according to the HTML spec.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
static $colors = null;
|
||||
if ($colors === null) $colors = $config->get('Core', 'ColorKeywords');
|
||||
|
||||
$string = trim($string);
|
||||
|
||||
if (empty($string)) return false;
|
||||
if (isset($colors[$string])) return $colors[$string];
|
||||
if ($string[0] === '#') $hex = substr($string, 1);
|
||||
else $hex = $string;
|
||||
|
||||
$length = strlen($hex);
|
||||
if ($length !== 3 && $length !== 6) return false;
|
||||
if (!ctype_xdigit($hex)) return false;
|
||||
if ($length === 3) $hex = $hex[0].$hex[0].$hex[1].$hex[1].$hex[2].$hex[2];
|
||||
|
||||
return "#$hex";
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
33
library/HTMLPurifier/AttrDef/HTML/FrameTarget.php
Normal file
33
library/HTMLPurifier/AttrDef/HTML/FrameTarget.php
Normal file
@@ -0,0 +1,33 @@
|
||||
<?php
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'AllowedFrameTargets', array(), 'lookup',
|
||||
'Lookup table of all allowed link frame targets. Some commonly used '.
|
||||
'link targets include _blank, _self, _parent and _top. Values should '.
|
||||
'be lowercase, as validation will be done in a case-sensitive manner '.
|
||||
'despite W3C\'s recommendation. XHTML 1.0 Strict does not permit '.
|
||||
'the target attribute so this directive will have no effect in that '.
|
||||
'doctype. XHTML 1.1 does not enable the Target module by default, you '.
|
||||
'will have to manually enable it (see the module documentation for more details.)'
|
||||
);
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef/Enum.php';
|
||||
|
||||
/**
|
||||
* Special-case enum attribute definition that lazy loads allowed frame targets
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum
|
||||
{
|
||||
|
||||
var $valid_values = false; // uninitialized value
|
||||
var $case_sensitive = false;
|
||||
|
||||
function HTMLPurifier_AttrDef_HTML_FrameTarget() {}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
if ($this->valid_values === false) $this->valid_values = $config->get('Attr', 'AllowedFrameTargets');
|
||||
return parent::validate($string, $config, $context);
|
||||
}
|
||||
|
||||
}
|
||||
|
120
library/HTMLPurifier/AttrDef/HTML/ID.php
Normal file
120
library/HTMLPurifier/AttrDef/HTML/ID.php
Normal file
@@ -0,0 +1,120 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/IDAccumulator.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'EnableID', false, 'bool',
|
||||
'Allows the ID attribute in HTML. This is disabled by default '.
|
||||
'due to the fact that without proper configuration user input can '.
|
||||
'easily break the validation of a webpage by specifying an ID that is '.
|
||||
'already on the surrounding HTML. If you don\'t mind throwing caution to '.
|
||||
'the wind, enable this directive, but I strongly recommend you also '.
|
||||
'consider blacklisting IDs you use (%Attr.IDBlacklist) or prefixing all '.
|
||||
'user supplied IDs (%Attr.IDPrefix). This directive has been available '.
|
||||
'since 1.2.0, and when set to true reverts to the behavior of pre-1.2.0 '.
|
||||
'versions.'
|
||||
);
|
||||
HTMLPurifier_ConfigSchema::defineAlias(
|
||||
'HTML', 'EnableAttrID', 'Attr', 'EnableID'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'IDPrefix', '', 'string',
|
||||
'String to prefix to IDs. If you have no idea what IDs your pages '.
|
||||
'may use, you may opt to simply add a prefix to all user-submitted ID '.
|
||||
'attributes so that they are still usable, but will not conflict with '.
|
||||
'core page IDs. Example: setting the directive to \'user_\' will result in '.
|
||||
'a user submitted \'foo\' to become \'user_foo\' Be sure to set '.
|
||||
'%HTML.EnableAttrID to true before using '.
|
||||
'this. This directive was available since 1.2.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'IDPrefixLocal', '', 'string',
|
||||
'Temporary prefix for IDs used in conjunction with %Attr.IDPrefix. If '.
|
||||
'you need to allow multiple sets of '.
|
||||
'user content on web page, you may need to have a seperate prefix that '.
|
||||
'changes with each iteration. This way, seperately submitted user content '.
|
||||
'displayed on the same page doesn\'t clobber each other. Ideal values '.
|
||||
'are unique identifiers for the content it represents (i.e. the id of '.
|
||||
'the row in the database). Be sure to add a seperator (like an underscore) '.
|
||||
'at the end. Warning: this directive will not work unless %Attr.IDPrefix '.
|
||||
'is set to a non-empty value! This directive was available since 1.2.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'IDBlacklistRegexp', null, 'string/null',
|
||||
'PCRE regular expression to be matched against all IDs. If the expression '.
|
||||
'is matches, the ID is rejected. Use this with care: may cause '.
|
||||
'significant degradation. ID matching is done after all other '.
|
||||
'validation. This directive was available since 1.6.0.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Validates the HTML attribute ID.
|
||||
* @warning Even though this is the id processor, it
|
||||
* will ignore the directive Attr:IDBlacklist, since it will only
|
||||
* go according to the ID accumulator. Since the accumulator is
|
||||
* automatically generated, it will have already absorbed the
|
||||
* blacklist. If you're hacking around, make sure you use load()!
|
||||
*/
|
||||
|
||||
class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
// ref functionality disabled, since we also have to verify
|
||||
// whether or not the ID it refers to exists
|
||||
|
||||
function validate($id, $config, &$context) {
|
||||
|
||||
if (!$config->get('Attr', 'EnableID')) return false;
|
||||
|
||||
$id = trim($id); // trim it first
|
||||
|
||||
if ($id === '') return false;
|
||||
|
||||
$prefix = $config->get('Attr', 'IDPrefix');
|
||||
if ($prefix !== '') {
|
||||
$prefix .= $config->get('Attr', 'IDPrefixLocal');
|
||||
// prevent re-appending the prefix
|
||||
if (strpos($id, $prefix) !== 0) $id = $prefix . $id;
|
||||
} elseif ($config->get('Attr', 'IDPrefixLocal') !== '') {
|
||||
trigger_error('%Attr.IDPrefixLocal cannot be used unless '.
|
||||
'%Attr.IDPrefix is set', E_USER_WARNING);
|
||||
}
|
||||
|
||||
//if (!$this->ref) {
|
||||
$id_accumulator =& $context->get('IDAccumulator');
|
||||
if (isset($id_accumulator->ids[$id])) return false;
|
||||
//}
|
||||
|
||||
// we purposely avoid using regex, hopefully this is faster
|
||||
|
||||
if (ctype_alpha($id)) {
|
||||
$result = true;
|
||||
} else {
|
||||
if (!ctype_alpha(@$id[0])) return false;
|
||||
$trim = trim( // primitive style of regexps, I suppose
|
||||
$id,
|
||||
'A..Za..z0..9:-._'
|
||||
);
|
||||
$result = ($trim === '');
|
||||
}
|
||||
|
||||
$regexp = $config->get('Attr', 'IDBlacklistRegexp');
|
||||
if ($regexp && preg_match($regexp, $id)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (/*!$this->ref && */$result) $id_accumulator->add($id);
|
||||
|
||||
// if no change was made to the ID, return the result
|
||||
// else, return the new id if stripping whitespace made it
|
||||
// valid, or return false.
|
||||
return $result ? $id : false;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -1,18 +1,16 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Pixels.php';
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/Pixels.php';
|
||||
|
||||
/**
|
||||
* Validates the HTML type length (not to be confused with CSS's length).
|
||||
*
|
||||
* This accepts integer pixels or percentages as lengths for certain
|
||||
* HTML attributes. Don't use this for CSS: that's
|
||||
* HTMLPurifier_AttrDef_CSSLength which requires prefixes and allows a lot
|
||||
* more different types.
|
||||
* HTML attributes.
|
||||
*/
|
||||
|
||||
class HTMLPurifier_AttrDef_Length extends HTMLPurifier_AttrDef_Pixels
|
||||
class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
@@ -43,4 +41,3 @@ class HTMLPurifier_AttrDef_Length extends HTMLPurifier_AttrDef_Pixels
|
||||
|
||||
}
|
||||
|
||||
?>
|
72
library/HTMLPurifier/AttrDef/HTML/LinkTypes.php
Normal file
72
library/HTMLPurifier/AttrDef/HTML/LinkTypes.php
Normal file
@@ -0,0 +1,72 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'AllowedRel', array(), 'lookup',
|
||||
'List of allowed forward document relationships in the rel attribute. '.
|
||||
'Common values may be nofollow or print. By default, this is empty, '.
|
||||
'meaning that no document relationships are allowed. This directive '.
|
||||
'was available since 1.6.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Attr', 'AllowedRev', array(), 'lookup',
|
||||
'List of allowed reverse document relationships in the rev attribute. '.
|
||||
'This attribute is a bit of an edge-case; if you don\'t know what it '.
|
||||
'is for, stay away. This directive was available since 1.6.0.'
|
||||
);
|
||||
|
||||
/**
|
||||
* Validates a rel/rev link attribute against a directive of allowed values
|
||||
* @note We cannot use Enum because link types allow multiple
|
||||
* values.
|
||||
* @note Assumes link types are ASCII text
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/** Name config attribute to pull. */
|
||||
var $name;
|
||||
|
||||
function HTMLPurifier_AttrDef_HTML_LinkTypes($name) {
|
||||
$configLookup = array(
|
||||
'rel' => 'AllowedRel',
|
||||
'rev' => 'AllowedRev'
|
||||
);
|
||||
if (!isset($configLookup[$name])) {
|
||||
trigger_error('Unrecognized attribute name for link '.
|
||||
'relationship.', E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
$this->name = $configLookup[$name];
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
$allowed = $config->get('Attr', $this->name);
|
||||
if (empty($allowed)) return false;
|
||||
|
||||
$string = $this->parseCDATA($string);
|
||||
$parts = explode(' ', $string);
|
||||
|
||||
// lookup to prevent duplicates
|
||||
$ret_lookup = array();
|
||||
foreach ($parts as $part) {
|
||||
$part = strtolower(trim($part));
|
||||
if (!isset($allowed[$part])) continue;
|
||||
$ret_lookup[$part] = true;
|
||||
}
|
||||
|
||||
if (empty($ret_lookup)) return false;
|
||||
|
||||
$ret_array = array();
|
||||
foreach ($ret_lookup as $part => $bool) $ret_array[] = $part;
|
||||
$string = implode(' ', $ret_array);
|
||||
|
||||
return $string;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -1,9 +1,15 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Length.php';
|
||||
require_once 'HTMLPurifier/AttrDef/HTML/Length.php';
|
||||
|
||||
class HTMLPurifier_AttrDef_MultiLength extends HTMLPurifier_AttrDef_Length
|
||||
/**
|
||||
* Validates a MultiLength as defined by the HTML spec.
|
||||
*
|
||||
* A multilength is either a integer (pixel count), a percentage, or
|
||||
* a relative number.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
@@ -21,16 +27,17 @@ class HTMLPurifier_AttrDef_MultiLength extends HTMLPurifier_AttrDef_Length
|
||||
|
||||
$int = substr($string, 0, $length - 1);
|
||||
|
||||
if ($int == '') return '*';
|
||||
if (!is_numeric($int)) return false;
|
||||
|
||||
$int = (int) $int;
|
||||
|
||||
if ($int < 0) return '0*';
|
||||
|
||||
if ($int < 0) return false;
|
||||
if ($int == 0) return '0';
|
||||
if ($int == 1) return '*';
|
||||
return ((string) $int) . '*';
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -3,7 +3,14 @@
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/Config.php';
|
||||
|
||||
class HTMLPurifier_AttrDef_Class extends HTMLPurifier_AttrDef
|
||||
/**
|
||||
* Validates contents based on NMTOKENS attribute type.
|
||||
* @note The only current use for this is the class attribute in HTML
|
||||
* @note Could have some functionality factored out into Nmtoken class
|
||||
* @warning We cannot assume this class will be used only for 'class'
|
||||
* attributes. Not sure how to hook in magic behavior, then.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
@@ -21,16 +28,17 @@ class HTMLPurifier_AttrDef_Class extends HTMLPurifier_AttrDef
|
||||
// and plus it would complicate optimization efforts (you never
|
||||
// see that anyway).
|
||||
$matches = array();
|
||||
$pattern = '/(?:(?<=\s)|\A)'.
|
||||
$pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start
|
||||
'((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'.
|
||||
'(?:(?=\s)|\z)/';
|
||||
'(?:(?=\s)|\z)/'; // look ahead for space or string end
|
||||
preg_match_all($pattern, $string, $matches);
|
||||
|
||||
if (empty($matches[1])) return false;
|
||||
|
||||
// reconstruct string
|
||||
$new_string = '';
|
||||
foreach ($matches[1] as $class_names) {
|
||||
$new_string .= $class_names . ' ';
|
||||
foreach ($matches[1] as $token) {
|
||||
$new_string .= $token . ' ';
|
||||
}
|
||||
$new_string = rtrim($new_string);
|
||||
|
||||
@@ -40,4 +48,3 @@ class HTMLPurifier_AttrDef_Class extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -2,7 +2,10 @@
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
class HTMLPurifier_AttrDef_Pixels extends HTMLPurifier_AttrDef
|
||||
/**
|
||||
* Validates an integer representation of pixels according to the HTML spec.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
@@ -31,4 +34,3 @@ class HTMLPurifier_AttrDef_Pixels extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -1,46 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/IDAccumulator.php';
|
||||
|
||||
// NOTE QUIRKY BEHAVIOR: even though this is the id processor, it
|
||||
// will ignore directive Attr:IDBlacklist, since it will only
|
||||
// go according to the ID accumulator. Since the accumulator is
|
||||
// automatically generated, it will have already absorbed the
|
||||
// blacklist. If you're hacking around, make sure you use load()!
|
||||
|
||||
class HTMLPurifier_AttrDef_ID extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($id, $config, &$context) {
|
||||
|
||||
$id = trim($id); // trim it first
|
||||
|
||||
if ($id === '') return false;
|
||||
if (isset($context->id_accumulator->ids[$id])) return false;
|
||||
|
||||
// we purposely avoid using regex, hopefully this is faster
|
||||
|
||||
if (ctype_alpha($id)) {
|
||||
$result = true;
|
||||
} else {
|
||||
if (!ctype_alpha(@$id[0])) return false;
|
||||
$trim = trim( // primitive style of regexps, I suppose
|
||||
$id,
|
||||
'A..Za..z0..9:-._'
|
||||
);
|
||||
$result = ($trim === '');
|
||||
}
|
||||
|
||||
if ($result) $context->id_accumulator->add($id);
|
||||
|
||||
// if no change was made to the ID, return the result
|
||||
// else, return the new id if stripping whitespace made it
|
||||
// valid, or return false.
|
||||
return $result ? $id : false;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -2,16 +2,42 @@
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
// appears to be a dud class: no currently allowed CSS uses this type
|
||||
// Uses this: widows, orphans, z-index, counter-increment, counter-reset
|
||||
|
||||
/**
|
||||
* Validates an integer.
|
||||
* @note While this class was modeled off the CSS definition, no currently
|
||||
* allowed CSS uses this type. The properties that do are: widows,
|
||||
* orphans, z-index, counter-increment, counter-reset. Some of the
|
||||
* HTML attributes, however, find use for a non-negative version of this.
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
var $non_negative = false;
|
||||
/**
|
||||
* Bool indicating whether or not negative values are allowed
|
||||
*/
|
||||
var $negative = true;
|
||||
|
||||
function HTMLPurifier_AttrDef_Integer($non_negative = false) {
|
||||
$this->non_negative = $non_negative;
|
||||
/**
|
||||
* Bool indicating whether or not zero is allowed
|
||||
*/
|
||||
var $zero = true;
|
||||
|
||||
/**
|
||||
* Bool indicating whether or not positive values are allowed
|
||||
*/
|
||||
var $positive = true;
|
||||
|
||||
/**
|
||||
* @param $negative Bool indicating whether or not negative values are allowed
|
||||
* @param $zero Bool indicating whether or not zero is allowed
|
||||
* @param $positive Bool indicating whether or not positive values are allowed
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_Integer(
|
||||
$negative = true, $zero = true, $positive = true
|
||||
) {
|
||||
$this->negative = $negative;
|
||||
$this->zero = $zero;
|
||||
$this->positive = $positive;
|
||||
}
|
||||
|
||||
function validate($integer, $config, &$context) {
|
||||
@@ -19,19 +45,30 @@ class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef
|
||||
$integer = $this->parseCDATA($integer);
|
||||
if ($integer === '') return false;
|
||||
|
||||
if ( !$this->non_negative && $integer[0] === '-' ) {
|
||||
// we could possibly simply typecast it to integer, but there are
|
||||
// certain fringe cases that must not return an integer.
|
||||
|
||||
// clip leading sign
|
||||
if ( $this->negative && $integer[0] === '-' ) {
|
||||
$digits = substr($integer, 1);
|
||||
} elseif( $integer[0] === '+' ) {
|
||||
$digits = $integer = substr($integer, 1);
|
||||
if ($digits === '0') $integer = '0'; // rm minus sign for zero
|
||||
} elseif( $this->positive && $integer[0] === '+' ) {
|
||||
$digits = $integer = substr($integer, 1); // rm unnecessary plus
|
||||
} else {
|
||||
$digits = $integer;
|
||||
}
|
||||
|
||||
// test if it's numeric
|
||||
if (!ctype_digit($digits)) return false;
|
||||
|
||||
// perform scope tests
|
||||
if (!$this->zero && $integer == 0) return false;
|
||||
if (!$this->positive && $integer > 0) return false;
|
||||
if (!$this->negative && $integer < 0) return false;
|
||||
|
||||
return $integer;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -2,8 +2,10 @@
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
// built according to RFC 3066, which obsoleted RFC 1766
|
||||
|
||||
/**
|
||||
* Validates the HTML attribute lang, effectively a language code.
|
||||
* @note Built according to RFC 3066, which obsoleted RFC 1766
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
@@ -44,10 +46,10 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
|
||||
|
||||
// process second subtag : $subtags[1]
|
||||
$length = strlen($subtags[1]);
|
||||
if ($length == 0 || $length == 1 || $length > 8 || !ctype_alnum($subtags[1])) {
|
||||
if ($length == 0 || ($length == 1 && $subtags[1] != 'x') || $length > 8 || !ctype_alnum($subtags[1])) {
|
||||
return $new_string;
|
||||
}
|
||||
if (!ctype_lower($subtags[1])) $subtags[1] = strotolower($subtags[1]);
|
||||
if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]);
|
||||
|
||||
$new_string .= '-' . $subtags[1];
|
||||
if ($num_subtags == 2) return $new_string;
|
||||
@@ -59,7 +61,7 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
|
||||
return $new_string;
|
||||
}
|
||||
if (!ctype_lower($subtags[$i])) {
|
||||
$subtags[$i] = strotolower($subtags[$i]);
|
||||
$subtags[$i] = strtolower($subtags[$i]);
|
||||
}
|
||||
$new_string .= '-' . $subtags[$i];
|
||||
}
|
||||
@@ -70,4 +72,3 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -1,36 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
class HTMLPurifier_AttrDef_Multiple extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
var $single;
|
||||
var $max;
|
||||
|
||||
function HTMLPurifier_AttrDef_Multiple($single, $max = 4) {
|
||||
$this->single = $single;
|
||||
$this->max = $max;
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
$string = $this->parseCDATA($string);
|
||||
if ($string === '') return false;
|
||||
$parts = explode(' ', $string); // parseCDATA replaced \r, \t and \n
|
||||
$length = count($parts);
|
||||
$final = '';
|
||||
for ($i = 0, $num = 0; $i < $length && $num < $this->max; $i++) {
|
||||
if (ctype_space($parts[$i])) continue;
|
||||
$result = $this->single->validate($parts[$i], $config, $context);
|
||||
if ($result !== false) {
|
||||
$final .= $result . ' ';
|
||||
$num++;
|
||||
}
|
||||
}
|
||||
if ($final === '') return false;
|
||||
return rtrim($final);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
@@ -1,23 +0,0 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
// for col and row spans, essentially, a positive integer
|
||||
class HTMLPurifier_AttrDef_NumberSpan extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
|
||||
$string = trim($string);
|
||||
if ($string === '') return false;
|
||||
if ($string === '1') return false; // this is the default value
|
||||
if (!is_numeric($string)) return false;
|
||||
$int = (int) $string;
|
||||
if ($int <= 0) return false;
|
||||
return (string) $int;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user