mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-04 05:07:55 +02:00
Compare commits
464 Commits
v2.1.0-str
...
v3.0.0
Author | SHA1 | Date | |
---|---|---|---|
|
02658df8b2 | ||
|
be7c1e7a8f | ||
|
562f53b54c | ||
|
38a59ef5b8 | ||
|
8779b46fc4 | ||
|
a7fab00cdd | ||
|
beefb11879 | ||
|
ae1c8f47cc | ||
|
0f961c6af4 | ||
|
a840c24796 | ||
|
66c0407bef | ||
|
5b3431d889 | ||
|
831f552ec5 | ||
|
54b37674f1 | ||
|
62f3fd894d | ||
|
b5546ff6f0 | ||
|
7ddd9d0afe | ||
|
620ab75906 | ||
|
3ef9bdf8a2 | ||
|
43f01925cd | ||
|
85a23bacb6 | ||
|
4066416160 | ||
|
fad6aa45fa | ||
|
a7e6d85f6d | ||
|
c330860606 | ||
|
0ea53e5a3d | ||
|
68167176dc | ||
|
bb08f679f0 | ||
|
8cd1806ec8 | ||
|
1274cfed49 | ||
|
1ab47ba949 | ||
|
da95ee096a | ||
|
6d7250c309 | ||
|
df55df1083 | ||
|
1a8d864a42 | ||
|
552102f7f2 | ||
|
f5371bbad4 | ||
|
c8b020879d | ||
|
094b20f58f | ||
|
f2df669eec | ||
|
ca43df9fdd | ||
|
5f76796e14 | ||
|
1f9a6ba30e | ||
|
ccca8cc34f | ||
|
28c29656af | ||
|
88f4f57a47 | ||
|
43a98de909 | ||
|
b9d886d53b | ||
|
5b3c8c5534 | ||
|
dd40d41bc3 | ||
|
37a80f1295 | ||
|
fb367dc871 | ||
|
29c3c21b34 | ||
|
e45cc503a2 | ||
|
85cdea0120 | ||
|
c7676afb0d | ||
|
d75c695994 | ||
|
6f6fcbc354 | ||
|
c31d6ec80e | ||
|
cb92a57e4e | ||
|
423afedbf4 | ||
|
7827a95273 | ||
|
9881a34712 | ||
|
a19f30fdcf | ||
|
8f58c7f49e | ||
|
71301b36eb | ||
|
4f0d012dfa | ||
|
24a4dfdf83 | ||
|
f922285383 | ||
|
3af6457801 | ||
|
d51d3c127b | ||
|
4f92c0377f | ||
|
c3efafb07d | ||
|
79c18eb781 | ||
|
7b64bc37e2 | ||
|
b3aa5fa0dc | ||
|
350d8301dd | ||
|
a40e16dd2e | ||
|
ee388e86c0 | ||
|
79df79b2fd | ||
|
f5b72c623c | ||
|
7bccc24977 | ||
|
25fe416ab2 | ||
|
a9012f4387 | ||
|
82f8561123 | ||
|
0b743fb2db | ||
|
08e32597df | ||
|
2b82fbacad | ||
|
710820cbe9 | ||
|
22ef52a7f6 | ||
|
4919187fc6 | ||
|
797b899305 | ||
|
8c9dbe142d | ||
|
2a002857ce | ||
|
9d98b45dea | ||
|
b0f3116b9e | ||
|
b03a44abff | ||
|
cf257cabde | ||
|
ab950a1909 | ||
|
a12ea4bb3b | ||
|
f80de908bd | ||
|
349c4de75b | ||
|
89622c964e | ||
|
732fe5cad7 | ||
|
e7e81c0a5b | ||
|
626b2a13c8 | ||
|
35487c02ae | ||
|
4bc1761b12 | ||
|
63f5414f2e | ||
|
88d014706b | ||
|
f6de73d7e7 | ||
|
733868a76d | ||
|
fab6a212c8 | ||
|
ea1362ce5c | ||
|
cff498ef67 | ||
|
1765a7537a | ||
|
d7157d0ccd | ||
|
ed44b5c5ba | ||
|
5e5c0f3aa4 | ||
|
b2ed0aff01 | ||
|
148681d1b0 | ||
|
2e7e411491 | ||
|
02051e465c | ||
|
a96b5bf612 | ||
|
9dd7c8c7dd | ||
|
0c59db1da3 | ||
|
584a1abd15 | ||
|
a6ede3804e | ||
|
4476745003 | ||
|
45748500ec | ||
|
e99520ab96 | ||
|
1e2abb7f8f | ||
|
362c802191 | ||
|
3a1d505b3d | ||
|
a005da8a4c | ||
|
9a66394abb | ||
|
62c0575468 | ||
|
6a95d91a1a | ||
|
275932ec05 | ||
|
ae90bb919d | ||
|
3c734b4c72 | ||
|
3d02a2a7d4 | ||
|
0bfa42f9b7 | ||
|
7a8edc88f9 | ||
|
98b4e70a93 | ||
|
6f5592ae60 | ||
|
9f996b125a | ||
|
96b571d236 | ||
|
0e9904a9ba | ||
|
e66a98c396 | ||
|
728088f2ba | ||
|
8ae2604440 | ||
|
7b087c7bbe | ||
|
58064592ff | ||
|
b19fc32a5a | ||
|
b15cbbb42a | ||
|
5f0663cad7 | ||
|
75e52a12a6 | ||
|
269268b843 | ||
|
62c6d93b6d | ||
|
31704c92f6 | ||
|
291fa4cb29 | ||
|
389fcc9a5d | ||
|
e5191b3ada | ||
|
5d0a992579 | ||
|
ae83bebc98 | ||
|
9191877740 | ||
|
3066ca357a | ||
|
53fd096641 | ||
|
2166246b7e | ||
|
49bb6ec35d | ||
|
401612dc3a | ||
|
dc0fb7d2b4 | ||
|
eee45fed37 | ||
|
03657ad51a | ||
|
dda4038446 | ||
|
996ccdbdda | ||
|
008348db21 | ||
|
b10a380ff4 | ||
|
bf0d659c47 | ||
|
e55551ecdd | ||
|
e9f3fef47b | ||
|
840f9f7434 | ||
|
10c970760d | ||
|
69996acc9e | ||
|
8bbb73e47d | ||
|
cf7a50163c | ||
|
da2ea348fd | ||
|
ab3ebcba6d | ||
|
d399abba50 | ||
|
0b0a505c30 | ||
|
6aa3dfc116 | ||
|
c3094275ef | ||
|
220c150e0a | ||
|
32d30a9181 | ||
|
0e5491b20c | ||
|
7699efd593 | ||
|
4bf15de536 | ||
|
70bcccf54c | ||
|
bf6ce67fc1 | ||
|
bd44105ca9 | ||
|
d1f43636e5 | ||
|
9c7483166c | ||
|
e840564228 | ||
|
7d4b532d6b | ||
|
58f00105c8 | ||
|
8d15d1ce13 | ||
|
9c60eeed04 | ||
|
2e089477a5 | ||
|
b442d09ea6 | ||
|
12f73605a3 | ||
|
e2a951420f | ||
|
002395de09 | ||
|
d1187ed331 | ||
|
426fbd1f97 | ||
|
9c5f01a0cf | ||
|
f985d3cd96 | ||
|
0cb1d85822 | ||
|
073ddb0cb2 | ||
|
889ccb1a92 | ||
|
aec84dc3f6 | ||
|
dea62ffdab | ||
|
8913239b7f | ||
|
e06929c218 | ||
|
aaf4839c34 | ||
|
c113f43440 | ||
|
bd8ecdd268 | ||
|
ef51f8681a | ||
|
ee61ffc0d9 | ||
|
f758f7c534 | ||
|
95499e34da | ||
|
de23201cbb | ||
|
21ab12a6a8 | ||
|
69666e977f | ||
|
fa05319e30 | ||
|
ea46d79b0a | ||
|
a62f8971e4 | ||
|
7a3e06d4d0 | ||
|
e180b7689e | ||
|
7579932948 | ||
|
818d0d7a23 | ||
|
797d3e0393 | ||
|
ff7eec7424 | ||
|
0ea04db559 | ||
|
831db14c79 | ||
|
a470fc5621 | ||
|
2945f6a930 | ||
|
71326abec1 | ||
|
23ef535043 | ||
|
fda2043ace | ||
|
3f06d8316c | ||
|
e4b621eec2 | ||
|
9728be4a52 | ||
|
f1ec05afd0 | ||
|
7481d349d3 | ||
|
086dc9177b | ||
|
4d38c02932 | ||
|
83a50465dc | ||
|
dd62a303eb | ||
|
e4e981b6f1 | ||
|
a846f4e70b | ||
|
a5136b65e4 | ||
|
2d035483dd | ||
|
831a09d455 | ||
|
2cbb3be602 | ||
|
f7eccc0038 | ||
|
65252d6fbd | ||
|
6b9c5ec603 | ||
|
e7b15068c2 | ||
|
53c19552d2 | ||
|
048242004e | ||
|
05e1aca2fa | ||
|
23feb457f2 | ||
|
8f6380d63a | ||
|
3b1c40b2fc | ||
|
da92cb9ff4 | ||
|
bda9167423 | ||
|
cb9c96a2b0 | ||
|
e0cf214c44 | ||
|
ed73fdd5b8 | ||
|
eaea42f827 | ||
|
7f39e1e2c3 | ||
|
b81fb0af90 | ||
|
47fe34ad81 | ||
|
ac50d333a5 | ||
|
ce013e2962 | ||
|
67fab710bf | ||
|
b3a599e8c2 | ||
|
f4e4c1556d | ||
|
c5e33416d3 | ||
|
6c08ca4c16 | ||
|
b1822bb04f | ||
|
893e962890 | ||
|
bd6071cb3b | ||
|
92ea74cba2 | ||
|
a01459c87a | ||
|
fd35c43643 | ||
|
0426985c81 | ||
|
bbea02f55c | ||
|
4e77a1adbd | ||
|
bd58a7ba77 | ||
|
a3ed9196b9 | ||
|
2646f5ea57 | ||
|
424c7ad2e3 | ||
|
234b3085d7 | ||
|
3d978c961d | ||
|
72254cd77a | ||
|
d8a6361244 | ||
|
968dfa2feb | ||
|
114d6841ab | ||
|
1c68d769b5 | ||
|
ac0ca3f15c | ||
|
2d5498b8aa | ||
|
71ccae1a3a | ||
|
cb186dddc4 | ||
|
2ceccc0969 | ||
|
93aa98ad01 | ||
|
c0b38bab85 | ||
|
d6c4473a12 | ||
|
fc06f221d5 | ||
|
ac3ab2a556 | ||
|
2c330cac73 | ||
|
a0d6543b84 | ||
|
e223490a78 | ||
|
2666f067cc | ||
|
826a57a04a | ||
|
e08b5aaa70 | ||
|
b15e8c344e | ||
|
2c9e041b4c | ||
|
e2c3394d70 | ||
|
1532fe703a | ||
|
058f1eba7d | ||
|
1102dc6e27 | ||
|
85374d330f | ||
|
a16d6c4342 | ||
|
9b5e2978ad | ||
|
06468a4157 | ||
|
0167f8aa84 | ||
|
f1a90e684b | ||
|
14d98413fd | ||
|
97a4ec7598 | ||
|
71ed725c5c | ||
|
d4bf41288a | ||
|
365bd78c20 | ||
|
52fa958fb2 | ||
|
17d32bac7f | ||
|
e2babe5308 | ||
|
5f1a6b883f | ||
|
c5e3796202 | ||
|
72f1984229 | ||
|
918081b372 | ||
|
6c56dd070f | ||
|
299f93f8f0 | ||
|
4169846c57 | ||
|
aff4957531 | ||
|
e4bdf472a6 | ||
|
9a99750474 | ||
|
7eb751b5f5 | ||
|
0d0173eb6e | ||
|
556ed4ea90 | ||
|
cf445a6107 | ||
|
243ad45e59 | ||
|
31d0c621f5 | ||
|
0870974a25 | ||
|
5c4a0a6785 | ||
|
e55babdc53 | ||
|
6e1b540d99 | ||
|
edf20018f0 | ||
|
c09432e171 | ||
|
9c031b5c1e | ||
|
a827cbc3ba | ||
|
c05eebee15 | ||
|
93a69d020a | ||
|
f3fa9c01ba | ||
|
bae5b0c022 | ||
|
67befbc8a8 | ||
|
cac22f01cf | ||
|
94d2dbaa74 | ||
|
6add828bc8 | ||
|
800b67ed65 | ||
|
71e4ddd222 | ||
|
54a68a1713 | ||
|
bd544ad038 | ||
|
d5491da77f | ||
|
591fc0ae28 | ||
|
dac7ac1eae | ||
|
64ee756b7a | ||
|
e2103ce0f2 | ||
|
219902ebff | ||
|
21116373a7 | ||
|
5ed88809f3 | ||
|
bb8b38b1e0 | ||
|
236159242f | ||
|
9d8f839bf2 | ||
|
882148f9ad | ||
|
a863f62489 | ||
|
6478c7c2df | ||
|
129a4ea506 | ||
|
a122243a89 | ||
|
315c55eeb1 | ||
|
cfe50ff8ae | ||
|
d0018a2696 | ||
|
77d9e05a07 | ||
|
80243f377c | ||
|
43b157cf4d | ||
|
f6b50d4bfd | ||
|
806901cfd2 | ||
|
f90eef7f1f | ||
|
06867e14b6 | ||
|
bda2615b30 | ||
|
e1a5d10e75 | ||
|
98fd6b7d82 | ||
|
be264a4b20 | ||
|
01c85b71d2 | ||
|
2d22c0aa55 | ||
|
6e061f5184 | ||
|
44b988f1f6 | ||
|
0ead9558b4 | ||
|
159a1cced1 | ||
|
6871a54d64 | ||
|
96ac7e8797 | ||
|
2d49299621 | ||
|
ab5c782c77 | ||
|
8893b87e04 | ||
|
aeef746060 | ||
|
da13c6ac87 | ||
|
ccae73c25a | ||
|
8d6bfa4037 | ||
|
712d81ebea | ||
|
f7f6fed86a | ||
|
2293c67eec | ||
|
108df87824 | ||
|
5e366b25f8 | ||
|
2e16c4a968 | ||
|
a8db22dfff | ||
|
fbe2c25f8a | ||
|
158be61def | ||
|
d693c4ea09 | ||
|
c24916e1d6 | ||
|
a68b6afda1 | ||
|
78cf7db82e | ||
|
9b375fdfb8 | ||
|
0dd866cc15 | ||
|
ad1169c711 | ||
|
2816ae535f | ||
|
462d3ab72f | ||
|
cf1d868782 | ||
|
c705e17a58 | ||
|
1cce367950 | ||
|
61f852d429 | ||
|
3a73c2cf04 | ||
|
e75b676656 | ||
|
b53370efbf | ||
|
d60f345cab | ||
|
aefda60696 | ||
|
2ffa5d3135 | ||
|
23d3490d49 | ||
|
582ffc4143 | ||
|
d52189a19d | ||
|
02006d6e64 | ||
|
dcaa374dae | ||
|
e2cc37724b | ||
|
3ad6239dc3 | ||
|
663fb4e1b2 |
220
INSTALL
220
INSTALL
@@ -1,64 +1,57 @@
|
||||
|
||||
Install
|
||||
How to install HTML Purifier
|
||||
|
||||
HTML Purifier is designed to run out of the box, so actually using the library
|
||||
is extremely easy. (Although, if you were looking for a step-by-step
|
||||
installation GUI, you've come to the wrong place!) The impatient can scroll
|
||||
down to the bottom of this INSTALL document to see the code, but you really
|
||||
should make sure a few things are properly done.
|
||||
|
||||
|
||||
HTML Purifier is designed to run out of the box, so actually using the
|
||||
library is extremely easy. (Although... if you were looking for a
|
||||
step-by-step installation GUI, you've downloaded the wrong software!)
|
||||
|
||||
While the impatient can get going immediately with some of the sample
|
||||
code at the bottom of this library, it's well worth performing some
|
||||
basic sanity checks to get the most out of this library.
|
||||
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
1. Compatibility
|
||||
|
||||
HTML Purifier works in both PHP 4 and PHP 5, from PHP 4.3.2 and up. It has no
|
||||
core dependencies with other libraries.
|
||||
HTML Purifier is PHP 5 only, and is actively tested from PHP 5.0.0 and
|
||||
up (see tests/multitest.php for the specific versions that are being
|
||||
actively tested). It has no core dependencies with other libraries. PHP
|
||||
4 support was deprecated on December 31, 2007 with HTML Purifier 3.0.0.
|
||||
Essential security fixes will be issued for the 2.1.x branch until
|
||||
August 8, 2008.
|
||||
|
||||
Optional extensions are iconv (usually installed) and tidy (also common).
|
||||
If you use UTF-8 and don't plan on pretty-printing HTML, you can get away with
|
||||
not having either of these extensions.
|
||||
These optional extensions can enhance the capabilities of HTML Purifier:
|
||||
|
||||
* iconv : Converts text to and from non-UTF-8 encodings
|
||||
* tidy : Used for pretty-printing HTML
|
||||
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
2. Reconnaissance
|
||||
|
||||
2. Including the library
|
||||
A big plus of HTML Purifier is its inerrant support of standards, so
|
||||
your web-pages should be standards-compliant. (They should also use
|
||||
semantic markup, but that's another issue altogether, one HTML Purifier
|
||||
cannot fix without reading your mind.)
|
||||
|
||||
Simply use:
|
||||
|
||||
require_once '/path/to/library/HTMLPurifier.auto.php';
|
||||
|
||||
...and you're good to go. Since HTML Purifier's codebase is fairly
|
||||
large, I recommend only including HTML Purifier when you need it.
|
||||
|
||||
If you don't like your include_path to be fiddled around with, simply set
|
||||
HTML Purifier's library/ directory to the include path yourself and then:
|
||||
|
||||
require_once 'HTMLPurifier.php';
|
||||
|
||||
Only the contents in the library/ folder are necessary, so you can remove
|
||||
everything else when using HTML Purifier in a production environment.
|
||||
|
||||
|
||||
|
||||
3. Preparing the proper output environment
|
||||
|
||||
HTML Purifier is all about web-standards, so accordingly your webpages should
|
||||
be standards compliant. HTML Purifier can deal with these doctypes:
|
||||
HTML Purifier can process these doctypes:
|
||||
|
||||
* XHTML 1.0 Transitional (default)
|
||||
* XHTML 1.0 Strict
|
||||
* HTML 4.01 Transitional
|
||||
* HTML 4.01 Strict
|
||||
* XHTML 1.1 (sans Ruby)
|
||||
* XHTML 1.1
|
||||
|
||||
...and these character encodings:
|
||||
|
||||
* UTF-8 (default)
|
||||
* Any encoding iconv supports (support is crippled for i18n though)
|
||||
* Any encoding iconv supports (with crippled internationalization support)
|
||||
|
||||
The defaults are there for a reason: they are best-practice choices that
|
||||
should not be changed lightly. For those of you in the dark, you can determine
|
||||
the doctype from this code in your HTML documents:
|
||||
These defaults reflect what my choices where be if I were authoring an
|
||||
HTML document, however, what you choose depends on the nature of your
|
||||
codebase. If you don't know what doctype you are using, you can determine
|
||||
the doctype from this identifier at the top of your source code:
|
||||
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
@@ -67,18 +60,34 @@ the doctype from this code in your HTML documents:
|
||||
|
||||
<meta http-equiv="Content-type" content="text/html;charset=ENCODING">
|
||||
|
||||
For legacy codebases these declarations may be missing. If that is the case,
|
||||
STOP, and read docs/enduser-utf8.html
|
||||
If the character encoding declaration is missing, STOP NOW, and
|
||||
read 'docs/enduser-utf8.html' (web accessible at
|
||||
http://htmlpurifier.org/docs/enduser-utf8.html). In fact, even if it is
|
||||
present, read this document anyway, as most websites specify character
|
||||
encoding incorrectly.
|
||||
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
3. Including the library
|
||||
|
||||
The procedure is quite simple:
|
||||
|
||||
require_once '/path/to/library/HTMLPurifier.auto.php';
|
||||
|
||||
I recommend only including HTML Purifier when you need it, because that
|
||||
call represents the inclusion of a lot of PHP files which constitute
|
||||
the bulk of HTML Purifier's memory usage.
|
||||
|
||||
If you don't like your include_path to be fiddled around with, simply set
|
||||
HTML Purifier's library/ directory to the include path yourself and then:
|
||||
|
||||
require_once 'HTMLPurifier.php';
|
||||
|
||||
Only the contents in the library/ folder are necessary, so you can remove
|
||||
everything else when using HTML Purifier in a production environment.
|
||||
|
||||
|
||||
|
||||
You may currently be vulnerable to XSS and other security threats, and HTML
|
||||
Purifier won't be able to fix that.
|
||||
|
||||
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
4. Configuration
|
||||
|
||||
HTML Purifier is designed to run out-of-the-box, but occasionally HTML
|
||||
@@ -95,7 +104,6 @@ object and read on:
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
|
||||
|
||||
|
||||
4.1. Setting a different character encoding
|
||||
|
||||
You really shouldn't use any other encoding except UTF-8, especially if you
|
||||
@@ -122,10 +130,6 @@ but please be cognizant of the issues the "solution" creates (for this
|
||||
reason, I do not include the solution in this document).
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
4.2. Setting a different doctype
|
||||
|
||||
For those of you using HTML 4.01 Transitional, you can disable
|
||||
@@ -135,7 +139,6 @@ XHTML output like this:
|
||||
|
||||
Other supported doctypes include:
|
||||
|
||||
|
||||
* HTML 4.01 Strict
|
||||
* HTML 4.01 Transitional
|
||||
* XHTML 1.0 Strict
|
||||
@@ -143,7 +146,6 @@ Other supported doctypes include:
|
||||
* XHTML 1.1
|
||||
|
||||
|
||||
|
||||
4.3. Other settings
|
||||
|
||||
There are more configuration directives which can be read about
|
||||
@@ -153,55 +155,24 @@ your code. Some of the more interesting ones are configurable at the
|
||||
demo <http://htmlpurifier.org/demo.php> and are well worth looking into
|
||||
for your own system.
|
||||
|
||||
For example, you can fine tune allowed elements and attributes, convert
|
||||
relative URLs to absolute ones, and even autoparagraph input text! These
|
||||
are, respectively, %HTML.Allowed, %URI.MakeAbsolute and %URI.Base, and
|
||||
%AutoFormat.AutoParagraph. The %Namespace.Directive naming convention
|
||||
translates to:
|
||||
|
||||
$config->set('Namespace', 'Directive', $value);
|
||||
|
||||
E.g.
|
||||
|
||||
$config->set('HTML', 'Allowed', 'p,b,a[href],i');
|
||||
$config->set('URI', 'Base', 'http://www.example.com');
|
||||
$config->set('URI', 'MakeAbsolute', true);
|
||||
$config->set('AutoFormat', 'AutoParagraph', true);
|
||||
|
||||
|
||||
5. Using the code
|
||||
|
||||
The interface is mind-numbingly simple:
|
||||
|
||||
$purifier = new HTMLPurifier();
|
||||
$clean_html = $purifier->purify( $dirty_html );
|
||||
|
||||
...or, if you're using the configuration object:
|
||||
|
||||
$purifier = new HTMLPurifier($config);
|
||||
$clean_html = $purifier->purify( $dirty_html );
|
||||
|
||||
That's it! For more examples, check out docs/examples/ (they aren't very
|
||||
different though). Also, docs/enduser-slow.html gives advice on what to
|
||||
do if HTML Purifier is slowing down your application.
|
||||
|
||||
|
||||
|
||||
6. Quick install
|
||||
|
||||
First, make sure library/HTMLPurifier/DefinitionCache/Serializer is
|
||||
writable by the webserver (see Section 7: Caching below for details).
|
||||
If your website is in UTF-8 and XHTML Transitional, use this code:
|
||||
|
||||
<?php
|
||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||
|
||||
$purifier = new HTMLPurifier();
|
||||
$clean_html = $purifier->purify($dirty_html);
|
||||
?>
|
||||
|
||||
If your website is in a different encoding or doctype, use this code:
|
||||
|
||||
<?php
|
||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Core', 'Encoding', 'ISO-8859-1'); // replace with your encoding
|
||||
$config->set('HTML', 'Doctype', 'HTML 4.01 Transitional'); // replace with your doctype
|
||||
$purifier = new HTMLPurifier($config);
|
||||
|
||||
$clean_html = $purifier->purify($dirty_html);
|
||||
?>
|
||||
|
||||
|
||||
|
||||
7. Caching
|
||||
---------------------------------------------------------------------------
|
||||
5. Caching
|
||||
|
||||
HTML Purifier generates some cache files (generally one or two) to speed up
|
||||
its execution. For maximum performance, make sure that
|
||||
@@ -236,3 +207,50 @@ hit):
|
||||
Or move the cache directory somewhere else (no trailing slash):
|
||||
|
||||
$config->set('Cache', 'SerializerPath', '/home/user/absolute/path');
|
||||
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
6. Using the code
|
||||
|
||||
The interface is mind-numbingly simple:
|
||||
|
||||
$purifier = new HTMLPurifier();
|
||||
$clean_html = $purifier->purify( $dirty_html );
|
||||
|
||||
...or, if you're using the configuration object:
|
||||
|
||||
$purifier = new HTMLPurifier($config);
|
||||
$clean_html = $purifier->purify( $dirty_html );
|
||||
|
||||
That's it! For more examples, check out docs/examples/ (they aren't very
|
||||
different though). Also, docs/enduser-slow.html gives advice on what to
|
||||
do if HTML Purifier is slowing down your application.
|
||||
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
7. Quick install
|
||||
|
||||
First, make sure library/HTMLPurifier/DefinitionCache/Serializer is
|
||||
writable by the webserver (see Section 5: Caching above for details).
|
||||
If your website is in UTF-8 and XHTML Transitional, use this code:
|
||||
|
||||
<?php
|
||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||
|
||||
$purifier = new HTMLPurifier();
|
||||
$clean_html = $purifier->purify($dirty_html);
|
||||
?>
|
||||
|
||||
If your website is in a different encoding or doctype, use this code:
|
||||
|
||||
<?php
|
||||
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
|
||||
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Core', 'Encoding', 'ISO-8859-1'); // replace with your encoding
|
||||
$config->set('HTML', 'Doctype', 'HTML 4.01 Transitional'); // replace with your doctype
|
||||
$purifier = new HTMLPurifier($config);
|
||||
|
||||
$clean_html = $purifier->purify($dirty_html);
|
||||
?>
|
||||
|
||||
|
@@ -17,7 +17,7 @@ ce document pour quelques choses.
|
||||
|
||||
1. Compatibilité
|
||||
|
||||
HTML Purifier fonctionne dans PHP 4 et PHP 5. PHP 4.3.2 est le dernier
|
||||
HTML Purifier fonctionne dans PHP 5. PHP 5.0.0 est le dernier
|
||||
version que je le testais. Il ne dépend de les autre librairies.
|
||||
|
||||
Les extensions optionnel est iconv (en général déjà installer) et
|
||||
|
171
NEWS
171
NEWS
@@ -9,6 +9,173 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
. Internal change
|
||||
==========================
|
||||
|
||||
3.0.0, released 2008-01-06
|
||||
# HTML Purifier is PHP 5 only! The 2.1.x branch will be maintained
|
||||
until PHP 4 is completely deprecated, but no new features will be added
|
||||
to it.
|
||||
+ Visibility declarations added
|
||||
+ Constructor methods renamed to __construct()
|
||||
+ PHP4 reference cruft removed (in progress)
|
||||
! CSS properties are now case-insensitive
|
||||
! DefinitionCacheFactory now can register new implementations
|
||||
! New HTMLPurifier_Filter_ExtractStyleBlocks for extracting <style> from
|
||||
documents and cleaning their contents up. Requires the CSSTidy library
|
||||
<http://csstidy.sourceforge.net/>. You can access the blocks with the
|
||||
'StyleBlocks' Context variable ($purifier->context->get('StyleBlocks')).
|
||||
The output CSS can also be "scoped" for a specific element, use:
|
||||
%Filter.ExtractStyleBlocksScope
|
||||
! Experimental support for some proprietary CSS attributes allowed:
|
||||
opacity (and all of the browser-specific equivalents) and scrollbar colors.
|
||||
Enable by setting %CSS.Proprietary to true.
|
||||
- Colors missing # but in hex form will be corrected
|
||||
- CSS Number algorithm improved
|
||||
- Unit testing and multi-testing now on steroids: command lines,
|
||||
XML output, and other goodies now added.
|
||||
. Unit tests for Injector improved
|
||||
. New classes:
|
||||
+ HTMLPurifier_AttrDef_CSS_AlphaValue
|
||||
+ HTMLPurifier_AttrDef_CSS_Filter
|
||||
. Multitest now has a file docblock
|
||||
|
||||
2.1.3, released 2007-11-05
|
||||
! tests/multitest.php allows you to test multiple versions by running
|
||||
tests/index.php through multiple interpreters using `phpv` shell
|
||||
script (you must provide this script!)
|
||||
- Fixed poor include ordering for Email URI AttrDefs, causes fatal errors
|
||||
on some systems.
|
||||
- Injector algorithm further refined: off-by-one error regarding skip
|
||||
counts for dormant injectors fixed
|
||||
- Corrective blockquote definition now enabled for HTML 4.01 Strict
|
||||
- Fatal error when <img> tag (or any other element with required attributes)
|
||||
has 'id' attribute fixed, thanks NykO18 for reporting
|
||||
- Fix warning emitted when a non-supported URI scheme is passed to the
|
||||
MakeAbsolute URIFilter, thanks NykO18 (again)
|
||||
- Further refine AutoParagraph injector. Behavior inside of elements
|
||||
allowing paragraph tags clarified: only inline content delimeted by
|
||||
double newlines (not block elements) are paragraphed.
|
||||
- Buggy treatment of end tags of elements that have required attributes
|
||||
fixed (does not manifest on default tag-set)
|
||||
- Spurious internal content reorganization error suppressed
|
||||
- HTMLDefinition->addElement now returns a reference to the created
|
||||
element object, as implied by the documentation
|
||||
- Phorum mod's HTML Purifier help message expanded (unreleased elsewhere)
|
||||
- Fix a theoretical class of infinite loops from DirectLex reported
|
||||
by Nate Abele
|
||||
- Work around unnecessary DOMElement type-cast in PH5P that caused errors
|
||||
in PHP 5.1
|
||||
- Work around PHP 4 SimpleTest lack-of-error complaining for one-time-only
|
||||
HTMLDefinition errors, this may indicate problems with error-collecting
|
||||
facilities in PHP 5
|
||||
- Make ErrorCollectorEMock work in both PHP 4 and PHP 5
|
||||
- Make PH5P work with PHP 5.0 by removing unnecessary array parameter typedef
|
||||
. %Core.AcceptFullDocuments renamed to %Core.ConvertDocumentToFragment
|
||||
to better communicate its purpose
|
||||
. Error unit tests can now specify the expectation of no errors. Future
|
||||
iterations of the harness will be extremely strict about what errors
|
||||
are allowed
|
||||
. Extend Injector hooks to allow for more powerful injector routines
|
||||
. HTMLDefinition->addBlankElement created, as according to the HTMLModule
|
||||
method
|
||||
. Doxygen configuration file updated, with minor improvements
|
||||
. Test runner now checks for similarly named files in conf/ directory too.
|
||||
. Minor cosmetic change to flush-definition-cache.php: trailing newline is
|
||||
outputted
|
||||
. Maintenance script for generating PH5P patch added, original PH5P source
|
||||
file also added under version control
|
||||
. Full unit test runner script title made more descriptive with PHP version
|
||||
. Updated INSTALL file to state that 4.3.7 is the earliest version we
|
||||
are actively testing
|
||||
|
||||
2.1.2, released 2007-09-03
|
||||
! Implemented Object module for trusted users
|
||||
! Implemented experimental HTML5 parsing mode using PH5P. To use, add
|
||||
this to your code:
|
||||
require_once 'HTMLPurifier/Lexer/PH5P.php';
|
||||
$config->set('Core', 'LexerImpl', 'PH5P');
|
||||
Note that this Lexer introduces some classes not in the HTMLPurifier
|
||||
namespace. Also, this is PHP5 only.
|
||||
! CSS property border-spacing implemented
|
||||
- Fix non-visible parsing error in DirectLex with empty tags that have
|
||||
slashes inside attribute values.
|
||||
- Fix typo in CSS definition: border-collapse:seperate; was incorrectly
|
||||
accepted as valid CSS. Usually non-visible, because this styling is the
|
||||
default for tables in most browsers. Thanks Brett Zamir for pointing
|
||||
this out.
|
||||
- Fix validation errors in configuration form
|
||||
- Hammer out a bunch of edge-case bugs in the standalone distribution
|
||||
- Inclusion reflection removed from URISchemeRegistry; you must manually
|
||||
include any new schema files you wish to use
|
||||
- Numerous typo fixes in documentation thanks to Brett Zamir
|
||||
. Unit test refactoring for one logical test per test function
|
||||
. Config and context parameters in ComplexHarness deprecated: instead, edit
|
||||
the $config and $context member variables
|
||||
. HTML wrapper in DOMLex now takes DTD identifiers into account; doesn't
|
||||
really make a difference, but is good for completeness sake
|
||||
. merge-library.php script refactored for greater code reusability and
|
||||
PHP4 compatibility
|
||||
|
||||
2.1.1, released 2007-08-04
|
||||
- Fix show-stopper bug in %URI.MakeAbsolute functionality
|
||||
- Fix PHP4 syntax error in standalone version
|
||||
. Add prefix directory to include path for standalone, this prevents
|
||||
other installations from clobbering the standalone's URI schemes
|
||||
. Single test methods can be invoked by prefixing with __only
|
||||
|
||||
2.1.0, released 2007-08-02
|
||||
# flush-htmldefinition-cache.php superseded in favor of a generic
|
||||
flush-definition-cache.php script, you can clear a specific cache
|
||||
by passing its name as a parameter to the script
|
||||
! Phorum mod implemented for HTML Purifier
|
||||
! With %Core.AggressivelyFixLt, <3 and similar emoticons no longer
|
||||
trigger HTML removal in PHP5 (DOMLex). This directive is not necessary
|
||||
for PHP4 (DirectLex).
|
||||
! Standalone file now available, which greatly reduces the amount of
|
||||
includes (although there are still a few files that reside in the
|
||||
standalone folder)
|
||||
! Relative URIs can now be transformed into their absolute equivalents
|
||||
using %URI.Base and %URI.MakeAbsolute
|
||||
! Ruby implemented for XHTML 1.1
|
||||
! You can now define custom URI filtering behavior, see enduser-uri-filter.html
|
||||
for more details
|
||||
! UTF-8 font names now supported in CSS
|
||||
- AutoFormatters emit friendly error messages if tags or attributes they
|
||||
need are not allowed
|
||||
- ConfigForm's compactification of directive names is now configurable
|
||||
- AutoParagraph autoformatter algorithm refined after field-testing
|
||||
- XHTML 1.1 now applies XHTML 1.0 Strict cleanup routines, namely
|
||||
blockquote wrapping
|
||||
- Contents of <style> tags removed by default when tags are removed
|
||||
. HTMLPurifier_Config->getSerial() implemented, this is extremely useful
|
||||
for output cache invalidation
|
||||
. ConfigForm printer now can retrieve CSS and JS files as strings, in
|
||||
case HTML Purifier's directory is not publically accessible
|
||||
. Introduce new text/itext configuration directive values: these represent
|
||||
longer strings that would be more appropriately edited with a textarea
|
||||
. Allow newlines to act as separators for lists, hashes, lookups and
|
||||
%HTML.Allowed
|
||||
. ConfigForm generates textareas instead of text inputs for lists, hashes,
|
||||
lookups, text and itext fields
|
||||
. Hidden element content removal genericized: %Core.HiddenElements can
|
||||
be used to customize this behavior, by default <script> and <style> are
|
||||
hidden
|
||||
. Added HTMLPURIFIER_PREFIX constant, should be used instead of dirname(__FILE__)
|
||||
. Custom ChildDef added to default include list
|
||||
. URIScheme reflection improved: will not attempt to include file if class
|
||||
already exists. May clobber autoload, so I need to keep an eye on it
|
||||
. ConfigSchema heavily optimized, will only collect information and validate
|
||||
definitions when HTMLPURIFIER_SCHEMA_STRICT is true.
|
||||
. AttrDef_URI unit tests and implementation refactored
|
||||
. benchmarks/ directory now protected from public view with .htaccess file;
|
||||
run the tests via command line
|
||||
. URI scheme is munged off if there is no authority and the scheme is the
|
||||
default one
|
||||
. All unit tests inherit from HTMLPurifier_Harness, not UnitTestCase
|
||||
. Interface for URIScheme changed
|
||||
. Generic URI object to hold components of URI added, most systems involved
|
||||
in URI validation have been migrated to use it
|
||||
. Custom filtering for URIs factored out to URIDefinition interface for
|
||||
maximum extensibility
|
||||
|
||||
2.0.1, released 2007-06-27
|
||||
! Tag auto-closing now based on a ChildDef heuristic rather than a
|
||||
manually set auto_close array; some behavior may change
|
||||
@@ -159,8 +326,6 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
%Attr.IDBlacklistRegexp
|
||||
- Error messages are emitted when you attempt to "allow" elements or
|
||||
attributes that HTML Purifier does not support
|
||||
|
||||
|
||||
- Fix segfault in unit test. The problem is not very reproduceable and
|
||||
I don't know what causes it, but a six line patch fixed it.
|
||||
|
||||
@@ -359,4 +524,4 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|
||||
! First public release, most functionality implemented. Notable omissions are:
|
||||
+ Shorthand CSS properties
|
||||
+ Table CSS properties
|
||||
+ Deprecated attribute transformations
|
||||
+ Deprecated attribute transformations
|
||||
|
64
TODO
64
TODO
@@ -1,3 +1,4 @@
|
||||
|
||||
TODO List
|
||||
|
||||
= KEY ====================
|
||||
@@ -6,20 +7,15 @@ TODO List
|
||||
? Maybe I'll Do It
|
||||
==========================
|
||||
|
||||
2.1 release [Refactor, refactor!]
|
||||
# URI validation routines tighter (see docs/dev-code-quality.html) (COMPLEX)
|
||||
# Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
|
||||
# Ruby support
|
||||
- Configuration profiles: predefined directives set with one func call
|
||||
- Implement IDREF support (harder than it seems, since you cannot have
|
||||
IDREFs to non-existent IDs)
|
||||
- Allow non-ASCII characters in font names
|
||||
If no interest is expressed for a feature that may required a considerable
|
||||
amount of effort to implement, it may get endlessly delayed. Do not be
|
||||
afraid to cast your vote for the next feature to be implemented!
|
||||
|
||||
2.2 release [Error'ed]
|
||||
3.1 release [Error'ed]
|
||||
# Error logging for filtering/cleanup procedures
|
||||
- XSS-attempt detection
|
||||
|
||||
2.3 release [Do What I Mean, Not What I Say]
|
||||
3.2 release [Do What I Mean, Not What I Say]
|
||||
# Additional support for poorly written HTML
|
||||
- Microsoft Word HTML cleaning (i.e. MsoNormal, but research essential!)
|
||||
- Friendly strict handling of <address> (block -> <br>)
|
||||
@@ -33,58 +29,70 @@ TODO List
|
||||
- Remove empty inline tags<i></i>
|
||||
- Append something to duplicate IDs so they're still usable (impl. note: the
|
||||
dupe detector would also need to detect the suffix as well)
|
||||
- Externalize inline CSS to promote clean HTML
|
||||
|
||||
2.4 release [It's All About Trust] (floating)
|
||||
3.3 release [It's All About Trust] (floating)
|
||||
# Implement untrusted, dangerous elements/attributes
|
||||
# Implement IDREF support (harder than it seems, since you cannot have
|
||||
IDREFs to non-existent IDs)
|
||||
# Frameset XHTML 1.0 and HTML 4.01 doctypes
|
||||
|
||||
3.0 release [Beyond HTML]
|
||||
4.0 release [Beyond HTML]
|
||||
# Legit token based CSS parsing (will require revamping almost every
|
||||
AttrDef class)
|
||||
AttrDef class). Probably will use CSSTidy class
|
||||
# More control over allowed CSS properties (maybe modularize it in the
|
||||
same fashion!)
|
||||
# Formatters for plaintext
|
||||
- Smileys
|
||||
- Standardize token armor for all areas of processing
|
||||
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
||||
- Automatically add non-breaking spaces to empty table cells when
|
||||
empty-cells:show is applied to have compatibility with Internet Explorer
|
||||
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
|
||||
Also, enable disabling of directionality
|
||||
- Table of Contents generation (XHTML Compiler might be reusable)
|
||||
|
||||
4.0 release [To XML and Beyond]
|
||||
5.0 release [To XML and Beyond]
|
||||
- Extended HTML capabilities based on namespacing and tag transforms (COMPLEX)
|
||||
- Hooks for adding custom processors to custom namespaced tags and
|
||||
attributes, offer default implementation
|
||||
- Lots of documentation and samples
|
||||
|
||||
Ongoing
|
||||
- More refactoring to take advantage of PHP5's facilities
|
||||
- Lots of profiling, make it faster!
|
||||
- Plugins for major CMSes (COMPLEX)
|
||||
- WordPress (mostly written, needs beta-testing)
|
||||
- phpBB
|
||||
- Phorum
|
||||
- eFiction
|
||||
- more! (look for ones that use WYSIWYGs)
|
||||
- Complete basic smoketests
|
||||
|
||||
AutoFormat
|
||||
- Smileys
|
||||
- Syntax highlighting with <pre> and possibly <?php
|
||||
- Look at http://drupal.org/project/Modules/category/63 for ideas
|
||||
|
||||
Unknown release (on a scratch-an-itch basis)
|
||||
? Semi-lossy dumb alternate character encoding transfor
|
||||
# CHMOD install script for PEAR installs
|
||||
? Have 'lang' attribute be checked against official lists, achieved by
|
||||
encoding all characters that have string entity equivalents
|
||||
- Explain how to use HTML Purifier in non-PHP languages / create
|
||||
a simple command line stub
|
||||
- Abstract ChildDef_BlockQuote to work with all elements that only
|
||||
allow blocks in them, required or optional
|
||||
- Reorganize Unit Tests
|
||||
- Refactor loop tests (esp. AttrDef_URI)
|
||||
- Reorganize configuration directives (Create more namespaces! Get messy!)
|
||||
- Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
|
||||
- Implement lenient <ruby> child validation
|
||||
- Explain how to use HTML Purifier in non-PHP languages / create
|
||||
a simple command line stub (or complicated?)
|
||||
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
|
||||
- Automatically add non-breaking spaces to empty table cells when
|
||||
empty-cells:show is applied to have compatibility with Internet Explorer
|
||||
- Distinguish between default settings and explicitly set settings, so
|
||||
configurations can be merged
|
||||
- Nested configuration namespaces
|
||||
- Allow scoped="scoped" attribute in <style> tags; may be troublesome
|
||||
because regular CSS has no way of uniquely identifying nodes, so we'd
|
||||
have to generate IDs
|
||||
|
||||
Requested
|
||||
|
||||
Wontfix
|
||||
- Non-lossy smart alternate character encoding transformations (unless
|
||||
patch provided)
|
||||
- Pretty-printing HTML, users can use Tidy on the output on entire page
|
||||
- Pretty-printing HTML: users can use Tidy on the output on entire page
|
||||
- Native content compression, whitespace stripping (don't rely on Tidy, make
|
||||
sure we don't remove from <pre> or related tags): use gzip if this is
|
||||
really important
|
||||
|
18
WHATSNEW
18
WHATSNEW
@@ -1,8 +1,10 @@
|
||||
In version 2.1, HTML Purifier's URI validation and filtering handling
|
||||
system has been revamped with a new, extensible URIFilter system. Also
|
||||
notable features include preservation of emoticons in PHP5 with
|
||||
%Core.AggressivelyFixLt, standalone and lite download versions,
|
||||
transforming relative URIs to absolute URIs, Ruby in XHTML 1.1, a Phorum
|
||||
mod, and UTF-8 font names. Notable bug-fixes include refinement of
|
||||
the auto-paragraphing algorithm (no longer experimental), better XHTML
|
||||
1.1 support and the removal of the contents of <style> elements.
|
||||
Release 3.0.0 is the first release of 2008 and also HTML Purifier's first
|
||||
PHP 5 only release. The 2.1 series will still be supported for bug and
|
||||
security fixes, but will not get new features. This release a number of
|
||||
improvements in CSS handling, including the filter
|
||||
HTMLPurifier_Filter_ExtractStyleBlocks which integrates HTML Purifier with
|
||||
CSSTidy for cleaning style sheets, contains experimental support for
|
||||
proprietary CSS properties with %CSS.Proprietary, case-insensitive
|
||||
CSS properties, and more lenient hexadecimal color codes. Also, all code
|
||||
has been upgraded to full PHP 5 which is E_STRICT clean for all versions
|
||||
of PHP 5 (including the 5.0 series).
|
||||
|
1
benchmarks/.htaccess
Normal file
1
benchmarks/.htaccess
Normal file
@@ -0,0 +1 @@
|
||||
Deny from all
|
@@ -1,13 +1,11 @@
|
||||
<?php
|
||||
|
||||
// emulates inserting a dir called HTMLPurifier into your class dir
|
||||
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
||||
|
||||
require_once '../library/HTMLPurifier.auto.php';
|
||||
@include_once '../test-settings.php';
|
||||
|
||||
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||
require_once 'HTMLPurifier/Config.php';
|
||||
require_once 'HTMLPurifier/Context.php';
|
||||
// PEAR
|
||||
require_once 'Benchmark/Timer.php'; // to do the timing
|
||||
require_once 'Text/Password.php'; // for generating random input
|
||||
|
||||
$LEXERS = array();
|
||||
$RUNS = isset($GLOBALS['HTMLPurifierTest']['Runs'])
|
||||
@@ -16,22 +14,11 @@ $RUNS = isset($GLOBALS['HTMLPurifierTest']['Runs'])
|
||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||
$LEXERS['DirectLex'] = new HTMLPurifier_Lexer_DirectLex();
|
||||
|
||||
if (!empty($GLOBALS['HTMLPurifierTest']['PEAR'])) {
|
||||
require_once 'HTMLPurifier/Lexer/PEARSax3.php';
|
||||
$LEXERS['PEARSax3'] = new HTMLPurifier_Lexer_PEARSax3();
|
||||
} else {
|
||||
exit('PEAR required to perform benchmark.');
|
||||
}
|
||||
|
||||
if (version_compare(PHP_VERSION, '5', '>=')) {
|
||||
require_once 'HTMLPurifier/Lexer/DOMLex.php';
|
||||
$LEXERS['DOMLex'] = new HTMLPurifier_Lexer_DOMLex();
|
||||
}
|
||||
|
||||
// PEAR
|
||||
require_once 'Benchmark/Timer.php'; // to do the timing
|
||||
require_once 'Text/Password.php'; // for generating random input
|
||||
|
||||
// custom class to aid unit testing
|
||||
class RowTimer extends Benchmark_Timer
|
||||
{
|
||||
|
@@ -1,17 +0,0 @@
|
||||
<?php
|
||||
|
||||
set_include_path(get_include_path() . PATH_SEPARATOR . '../library/');
|
||||
|
||||
require_once 'HTMLPurifier/ConfigSchema.php';
|
||||
require_once 'HTMLPurifier/Config.php';
|
||||
require_once 'HTMLPurifier/Lexer/DirectLex.php';
|
||||
require_once 'HTMLPurifier/Context.php';
|
||||
|
||||
$input = file_get_contents('samples/Lexer/4.html');
|
||||
$lexer = new HTMLPurifier_Lexer_DirectLex();
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$context = new HTMLPurifier_Context();
|
||||
|
||||
for ($i = 0; $i < 10; $i++) {
|
||||
$tokens = $lexer->tokenizeHTML($input, $config, $context);
|
||||
}
|
19
benchmarks/Trace.php
Normal file
19
benchmarks/Trace.php
Normal file
@@ -0,0 +1,19 @@
|
||||
<?php
|
||||
|
||||
ini_set('xdebug.trace_format', 1);
|
||||
ini_set('xdebug.show_mem_delta', true);
|
||||
|
||||
if (file_exists('Trace.xt')) {
|
||||
echo "Previous trace Trace.xt must be removed before this script can be run.";
|
||||
exit;
|
||||
}
|
||||
|
||||
xdebug_start_trace(dirname(__FILE__) . '/Trace');
|
||||
require_once '../library/HTMLPurifier.auto.php';
|
||||
|
||||
$purifier = new HTMLPurifier();
|
||||
|
||||
$data = $purifier->purify(file_get_contents('samples/Lexer/4.html'));
|
||||
xdebug_stop_trace();
|
||||
|
||||
echo "Trace finished.";
|
@@ -4,10 +4,21 @@ require_once 'ConfigDoc/HTMLXSLTProcessor.php';
|
||||
require_once 'ConfigDoc/XMLSerializer/Types.php';
|
||||
require_once 'ConfigDoc/XMLSerializer/ConfigSchema.php';
|
||||
|
||||
/**
|
||||
* Facade class for configuration documentation system
|
||||
*/
|
||||
class ConfigDoc
|
||||
{
|
||||
|
||||
function generate($schema, $xsl_stylesheet_name = 'plain', $parameters = array()) {
|
||||
/**
|
||||
* Generates configuration documentation based on a HTMLPurifier_ConfigSchema
|
||||
* object and styleshet name
|
||||
* @param $schema Instance of HTMLPurifier_ConfigSchema to document
|
||||
* @param $xsl_stylesheet_name Name of XSL stylesheet in ../styles/ directory to use
|
||||
* @param $parameters Extra parameters to pass to the stylesheet
|
||||
* @return string HTML output
|
||||
*/
|
||||
public function generate($schema, $xsl_stylesheet_name = 'plain', $parameters = array()) {
|
||||
// generate types document, describing type constraints
|
||||
$types_serializer = new ConfigDoc_XMLSerializer_Types();
|
||||
$types_document = $types_serializer->serialize($schema);
|
||||
@@ -29,9 +40,10 @@ class ConfigDoc
|
||||
|
||||
/**
|
||||
* Remove any generated files
|
||||
* @return boolean Success?
|
||||
*/
|
||||
function cleanup() {
|
||||
unlink('configdoc.xml');
|
||||
public function cleanup() {
|
||||
return unlink('configdoc.xml');
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -1,12 +1,15 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Special XSLTProcessor specifically for HTML documents. Loosely
|
||||
* based off of XSLTProcessor, but not really
|
||||
* Special XSLT processor specifically for HTML documents. Loosely
|
||||
* based off of XSLTProcessor, but does not inherit from that class
|
||||
*/
|
||||
class ConfigDoc_HTMLXSLTProcessor
|
||||
{
|
||||
|
||||
/**
|
||||
* Instance of XSLTProcessor
|
||||
*/
|
||||
protected $xsltProcessor;
|
||||
|
||||
public function __construct() {
|
||||
@@ -16,6 +19,7 @@ class ConfigDoc_HTMLXSLTProcessor
|
||||
/**
|
||||
* Imports stylesheet for processor to use
|
||||
* @param $xsl XSLT DOM tree, or filename of the XSL transformation
|
||||
* @return bool Success?
|
||||
*/
|
||||
public function importStylesheet($xsl) {
|
||||
if (is_string($xsl)) {
|
||||
@@ -27,16 +31,20 @@ class ConfigDoc_HTMLXSLTProcessor
|
||||
}
|
||||
|
||||
/**
|
||||
* Transforms an XML file into HTML based on the stylesheet
|
||||
* Transforms an XML file into compatible XHTML based on the stylesheet
|
||||
* @param $xml XML DOM tree
|
||||
* @return string HTML output
|
||||
* @todo Rename to transformToXHTML, as transformToHTML is misleading
|
||||
*/
|
||||
public function transformToHTML($xml) {
|
||||
$out = $this->xsltProcessor->transformToXML($xml);
|
||||
|
||||
// fudges for HTML backwards compatibility
|
||||
// assumes that document is XHTML
|
||||
$out = str_replace('/>', ' />', $out); // <br /> not <br/>
|
||||
$out = str_replace(' xmlns=""', '', $out); // rm unnecessary xmlns
|
||||
$out = str_replace(' xmlns="http://www.w3.org/1999/xhtml"', '', $out); // rm unnecessary xmlns
|
||||
|
||||
if (class_exists('Tidy')) {
|
||||
// cleanup output
|
||||
$config = array(
|
||||
@@ -49,9 +57,14 @@ class ConfigDoc_HTMLXSLTProcessor
|
||||
$tidy->cleanRepair();
|
||||
$out = (string) $tidy;
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Bulk sets parameters for the XSL stylesheet
|
||||
* @param array $options Associative array of options to set
|
||||
*/
|
||||
public function setParameters($options) {
|
||||
foreach ($options as $name => $value) {
|
||||
$this->xsltProcessor->setParameter('', $name, $value);
|
||||
|
@@ -8,16 +8,22 @@
|
||||
class ConfigDoc_XMLSerializer
|
||||
{
|
||||
|
||||
/**
|
||||
* Appends a div containing HTML into a node
|
||||
* @param $document Base document node belongs to
|
||||
* @param $node Node to append to
|
||||
* @param $html HTML to place inside div to append
|
||||
* @todo Place this directly in DOMNode, using registerNodeClass to
|
||||
* override.
|
||||
*/
|
||||
protected function appendHTMLDiv($document, $node, $html) {
|
||||
$purifier = HTMLPurifier::getInstance();
|
||||
$html = $purifier->purify($html);
|
||||
$dom_html = $document->createDocumentFragment();
|
||||
$dom_html->appendXML($html);
|
||||
|
||||
$dom_div = $document->createElement('div');
|
||||
$dom_div->setAttribute('xmlns', 'http://www.w3.org/1999/xhtml');
|
||||
$dom_div->appendChild($dom_html);
|
||||
|
||||
$node->appendChild($dom_div);
|
||||
}
|
||||
|
||||
|
@@ -9,6 +9,7 @@ class ConfigDoc_XMLSerializer_ConfigSchema extends ConfigDoc_XMLSerializer
|
||||
* Serializes a schema into DOM form
|
||||
* @todo Split into sub-serializers
|
||||
* @param $schema HTMLPurifier_ConfigSchema to serialize
|
||||
* @return DOMDocument representation of schema
|
||||
*/
|
||||
public function serialize($schema) {
|
||||
$dom_document = new DOMDocument('1.0', 'UTF-8');
|
||||
|
@@ -8,6 +8,7 @@ class ConfigDoc_XMLSerializer_Types extends ConfigDoc_XMLSerializer
|
||||
/**
|
||||
* Serializes the types in a schema into DOM form
|
||||
* @param $schema HTMLPurifier_ConfigSchema owner of types to serialize
|
||||
* @return DOMDocument representing schema types
|
||||
*/
|
||||
public function serialize($schema) {
|
||||
$types_document = new DOMDocument('1.0', 'UTF-8');
|
||||
|
@@ -39,7 +39,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
||||
<table cellspacing="0"><tbody>
|
||||
<tr><td class="impl-yes">Implemented</td></tr>
|
||||
<tr><td class="impl-partial">Partially implemented</td></tr>
|
||||
<tr><td class="impl-no">Will not implement</td></tr>
|
||||
<tr><td class="impl-no">Not priority to implement</td></tr>
|
||||
<tr><td class="danger">Dangerous attribute/property</td></tr>
|
||||
<tr><td class="css1">Present in CSS1</td></tr>
|
||||
<tr><td class="feature">Feature, requires extra work</td></tr>
|
||||
@@ -118,6 +118,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
|
||||
<tbody>
|
||||
<tr><th colspan="2">Table</th></tr>
|
||||
<tr class="impl-yes"><td>border-collapse</td><td>ENUM(collapse, seperate)</td></tr>
|
||||
<tr class="impl-yes"><td>border-space</td><td>MULTIPLE</td></tr>
|
||||
<tr class="impl-yes"><td>caption-side</td><td>ENUM(top, bottom)</td></tr>
|
||||
<tr class="feature"><td>empty-cells</td><td>ENUM(show, hide), No IE support makes this useless,
|
||||
possible fix with &nbsp;? Unknown release milestone.</td></tr>
|
||||
|
@@ -32,7 +32,7 @@
|
||||
Before we even write any code, it is paramount to consider whether or
|
||||
not the code we're writing is necessary or not. HTML Purifier, by default,
|
||||
contains a large set of elements and attributes: large enough so that
|
||||
<em>any</em> element or attribute in XHTML 1.0 (and its HTML variant)
|
||||
<em>any</em> element or attribute in XHTML 1.0 or 1.1 (and its HTML variants)
|
||||
that can be safely used by the general public is implemented.
|
||||
</p>
|
||||
|
||||
@@ -76,11 +76,12 @@
|
||||
<h3>XHTML 1.1</h3>
|
||||
|
||||
<p>
|
||||
We have not implemented the
|
||||
As of HTMLPurifier 2.1.0, we have implemented the
|
||||
<a href="http://www.w3.org/TR/2001/REC-ruby-20010531/">Ruby module</a>,
|
||||
which defines a set of tags
|
||||
for publishing short annotations for text, used mostly in Japanese
|
||||
and Chinese school texts.
|
||||
and Chinese school texts, but applicable for positioning any text (not
|
||||
limited to translations) above or below other corresponding text.
|
||||
</p>
|
||||
|
||||
<h3>XHTML 2.0</h3>
|
||||
@@ -492,10 +493,11 @@ $def =& $config->getHTMLDefinition(true);
|
||||
<p>
|
||||
The <code>(%flow;)*</code> indicates the allowed children of the
|
||||
<code>li</code> tag: <code>li</code> allows any number of flow
|
||||
elements as its children. In HTML Purifier, we'd write it like
|
||||
<code>Flow</code> (here's where the content sets we were
|
||||
discussing earlier come into play). There are three shorthand content models you
|
||||
can specify:
|
||||
elements as its children. (The <code>- O</code> allows the closing tag to be
|
||||
omitted, though in XML this is not allowed.) In HTML Purifier,
|
||||
we'd write it like <code>Flow</code> (here's where the content sets
|
||||
we were discussing earlier come into play). There are three shorthand
|
||||
content models you can specify:
|
||||
</p>
|
||||
|
||||
<table class="table">
|
||||
@@ -668,12 +670,22 @@ $def =& $config->getHTMLDefinition(true);
|
||||
Common is a combination of the above-mentioned collections.
|
||||
</p>
|
||||
|
||||
<p class="aside">
|
||||
Readers familiar with the modularization may have noticed that the Core
|
||||
attribute collection differs from that specified by the <a
|
||||
href="http://www.w3.org/TR/xhtml-modularization/abstract_modules.html#s_commonatts">abstract
|
||||
modules of the XHTML Modularization 1.1</a>. We believe this section
|
||||
to be in error, as <code>br</code> permits the use of the <code>style</code>
|
||||
attribute even though it uses the <code>Core</code> collection, and
|
||||
the DTD and XML Schemas supplied by W3C support our interpretation.
|
||||
</p>
|
||||
|
||||
<h3>Attributes</h3>
|
||||
|
||||
<p>
|
||||
If you didn't read the <a href="#addAttribute">previous section on
|
||||
If you didn't read the <a href="#addAttribute">earlier section on
|
||||
adding attributes</a>, read it now. The last parameter is simply
|
||||
array of attribute names to attribute implementations, in the exact
|
||||
an array of attribute names to attribute implementations, in the exact
|
||||
same format as <code>addAttribute()</code>.
|
||||
</p>
|
||||
|
||||
|
@@ -58,7 +58,7 @@ appear elsewhere on the document. The method is simple:</p>
|
||||
|
||||
<pre>$config->set('HTML', 'EnableAttrID', true);
|
||||
$config->set('Attr', 'IDBlacklist' array(
|
||||
'list', 'of', 'attributes', 'that', 'are', 'forbidden'
|
||||
'list', 'of', 'attribute', 'values', 'that', 'are', 'forbidden'
|
||||
));</pre>
|
||||
|
||||
<p>That being said, there are some notable drawbacks. First of all, you have to
|
||||
@@ -71,9 +71,9 @@ to possible standards-compliance issues.</p>
|
||||
<p>Furthermore, this position becomes untenable when a single web page must hold
|
||||
multiple portions of user-submitted content. Since there's obviously no way
|
||||
to find out before-hand what IDs users will use, the blacklist is helpless.
|
||||
And even since HTML Purifier validates each segment seperately, perhaps doing
|
||||
And since HTML Purifier validates each segment separately, perhaps doing
|
||||
so at different times, it would be extremely difficult to dynamically update
|
||||
the blacklist inbetween runs.</p>
|
||||
the blacklist in between runs.</p>
|
||||
|
||||
<p>Finally, simply destroying the ID is extremely un-userfriendly behavior: after
|
||||
all, they might have simply specified a duplicate ID by accident.</p>
|
||||
|
@@ -22,7 +22,7 @@ out:</p>
|
||||
|
||||
<p class="emphasis">This ain't HTML Tidy!</p>
|
||||
|
||||
<p>Rather, Tidy stands for a cool set of Tidy-inspired in HTML Purifier
|
||||
<p>Rather, Tidy stands for a cool set of Tidy-inspired features in HTML Purifier
|
||||
that allows users to submit deprecated elements and attributes and get
|
||||
valid strict markup back. For example:</p>
|
||||
|
||||
@@ -33,8 +33,8 @@ valid strict markup back. For example:</p>
|
||||
<pre><div style="text-align:center;">Centered</div></pre>
|
||||
|
||||
<p>...when this particular fix is run on the HTML. This tutorial will give
|
||||
you down the lowdown of what exactly HTML Purifier will do when Tidy
|
||||
is on, and how to fine tune this behavior. Once again, <strong>you do
|
||||
you the lowdown of what exactly HTML Purifier will do when Tidy
|
||||
is on, and how to fine-tune this behavior. Once again, <strong>you do
|
||||
not need Tidy installed on your PHP to use these features!</strong></p>
|
||||
|
||||
<h2>What does it do?</h2>
|
||||
@@ -221,7 +221,7 @@ general syntax:</p>
|
||||
|
||||
<p>The lowdown is, quite frankly, HTML Purifier's default settings are
|
||||
probably good enough. The next step is to bump the level up to heavy,
|
||||
and if that still doesn't satisfy your appetite, do some fine tuning.
|
||||
and if that still doesn't satisfy your appetite, do some fine-tuning.
|
||||
Other than that, don't worry about it: this all works silently and
|
||||
effectively in the background.</p>
|
||||
|
||||
|
@@ -96,7 +96,7 @@ which can be a rewarding (but difficult) task.</p>
|
||||
<h2 id="findcharset">Finding the real encoding</h2>
|
||||
|
||||
<p>In the beginning, there was ASCII, and things were simple. But they
|
||||
weren't good, for no one could write in Cryllic or Thai. So there
|
||||
weren't good, for no one could write in Cyrillic or Thai. So there
|
||||
exploded a proliferation of character encodings to remedy the problem
|
||||
by extending the characters ASCII could express. This ridiculously
|
||||
simplified version of the history of character encodings shows us that
|
||||
@@ -138,7 +138,7 @@ browser:</p>
|
||||
<dd>View > Encoding: bulleted item is unofficial name</dd>
|
||||
</dl>
|
||||
|
||||
<p>Internet Explorer won't give you the mime (i.e. useful/real) name of the
|
||||
<p>Internet Explorer won't give you the MIME (i.e. useful/real) name of the
|
||||
character encoding, so you'll have to look it up using their description.
|
||||
Some common ones:</p>
|
||||
|
||||
@@ -216,6 +216,12 @@ if your <code>META</code> tag claims that either:</p>
|
||||
|
||||
<h2 id="fixcharset">Fixing the encoding</h2>
|
||||
|
||||
<p class="aside">The advice given here is for pages being served as
|
||||
vanilla <code>text/html</code>. Different practices must be used
|
||||
for <code>application/xml</code> or <code>application/xml+xhtml</code>, see
|
||||
<a href="http://www.w3.org/TR/2002/NOTE-xhtml-media-types-20020430/">W3C's
|
||||
document on XHTML media types</a> for more information.</p>
|
||||
|
||||
<p>If your <code>META</code> encoding and your real encoding match,
|
||||
savvy! You can skip this section. If they don't...</p>
|
||||
|
||||
@@ -231,7 +237,7 @@ of your real encoding.</p>
|
||||
why the character encoding should be explicitly stated. When the
|
||||
browser isn't told what the character encoding of a text is, it
|
||||
has to guess: and sometimes the guess is wrong. Hackers can manipulate
|
||||
this guess in order to slip XSS pass filters and then fool the
|
||||
this guess in order to slip XSS past filters and then fool the
|
||||
browser into executing it as active code. A great example of this
|
||||
is the <a href="http://shiflett.org/archive/177">Google UTF-7
|
||||
exploit</a>.</p>
|
||||
@@ -302,7 +308,8 @@ languages</a>. The appropriate code is:</p>
|
||||
|
||||
<p>...replacing UTF-8 with whatever your embedded encoding is.
|
||||
This code must come before any output, so be careful about
|
||||
stray whitespace in your application.</p>
|
||||
stray whitespace in your application (i.e., any whitespace before
|
||||
output excluding whitespace within <?php ?> tags).</p>
|
||||
|
||||
<h4 id="fixcharset-server-phpini">PHP ini directive</h4>
|
||||
|
||||
@@ -313,8 +320,8 @@ header call: <code><a href="http://php.net/ini.core#ini.default-charset">default
|
||||
|
||||
<p>...will also do the trick. If PHP is running as an Apache module (and
|
||||
not as FastCGI, consult
|
||||
<a href="http://php.net/phpinfo">phpinfo</a>() for details), you can even use htaccess do apply this property
|
||||
globally:</p>
|
||||
<a href="http://php.net/phpinfo">phpinfo</a>() for details), you can even use htaccess to apply this property
|
||||
across many PHP files:</p>
|
||||
|
||||
<pre><a href="http://php.net/configuration.changes#configuration.changes.apache">php_value</a> default_charset "UTF-8"</pre>
|
||||
|
||||
@@ -360,10 +367,11 @@ to send anything at all:</p>
|
||||
|
||||
<pre><a href="http://httpd.apache.org/docs/1.3/mod/core.html#adddefaultcharset">AddDefaultCharset</a> Off</pre>
|
||||
|
||||
<p>...making your <code>META</code> tags the sole source of
|
||||
character encoding information. In these cases, it is
|
||||
<em>especially</em> important to make sure you have valid <code>META</code>
|
||||
tags on your pages and all the text before them is ASCII.</p>
|
||||
<p>...making your internal charset declaration (usually the <code>META</code> tags)
|
||||
the sole source of character encoding
|
||||
information. In these cases, it is <em>especially</em> important to make
|
||||
sure you have valid <code>META</code> tags on your pages and all the
|
||||
text before them is ASCII.</p>
|
||||
|
||||
<blockquote class="aside"><p>These directives can also be
|
||||
placed in httpd.conf file for Apache, but
|
||||
@@ -428,28 +436,30 @@ IIS to change character encodings, I'd be grateful.</p>
|
||||
|
||||
<p><code>META</code> tags are the most common source of embedded
|
||||
encodings, but they can also come from somewhere else: XML
|
||||
processing instructions. They look like:</p>
|
||||
Declarations. They look like:</p>
|
||||
|
||||
<pre><?xml version="1.0" encoding="UTF-8"?></pre>
|
||||
|
||||
<p>...and are most often found in XML documents (including XHTML).</p>
|
||||
|
||||
<p>For XHTML, this processing instruction theoretically
|
||||
<p>For XHTML, this XML Declaration theoretically
|
||||
overrides the <code>META</code> tag. In reality, this happens only when the
|
||||
XHTML is actually served as legit XML and not HTML, which is almost always
|
||||
never due to Internet Explorer's lack of support for
|
||||
<code>application/xhtml+xml</code> (even though doing so is often
|
||||
argued to be <a href="http://www.hixie.ch/advocacy/xhtml">good practice</a>).</p>
|
||||
argued to be <a href="http://www.hixie.ch/advocacy/xhtml">good
|
||||
practice</a> and is required by the XHTML 1.1 specification).</p>
|
||||
|
||||
<p>For XML, however, this processing instruction is extremely important.
|
||||
<p>For XML, however, this XML Declaration is extremely important.
|
||||
Since most webservers are not configured to send charsets for .xml files,
|
||||
this is the only thing a parser has to go on. Furthermore, the default
|
||||
for XML files is UTF-8, which often butts heads with more common
|
||||
ISO-8859-1 encoding (you see this in garbled RSS feeds).</p>
|
||||
|
||||
<p>In short, if you use XHTML and have gone through the
|
||||
trouble of adding the XML header, make sure it jives
|
||||
with your <code>META</code> tags and HTTP headers.</p>
|
||||
trouble of adding the XML Declaration, make sure it jives
|
||||
with your <code>META</code> tags (which should only be present
|
||||
if served in text/html) and HTTP headers.</p>
|
||||
|
||||
<h3 id="fixcharset-internals">Inside the process</h3>
|
||||
|
||||
@@ -506,7 +516,7 @@ usage in one language sometimes requires the occasional special character
|
||||
that, without surprise, is not available in your character set. Sometimes
|
||||
developers get around this by adding support for multiple encodings: when
|
||||
using Chinese, use Big5, when using Japanese, use Shift-JIS, when
|
||||
using Greek, etc. Other times, they use character entities with great
|
||||
using Greek, etc. Other times, they use character references with great
|
||||
zeal.</p>
|
||||
|
||||
<p>UTF-8, however, obviates the need for any of these complicated
|
||||
@@ -520,14 +530,14 @@ you don't have to use those user-unfriendly entities.</p>
|
||||
|
||||
<p>Websites encoded in Latin-1 (ISO-8859-1) which ocassionally need
|
||||
a special character outside of their scope often will use a character
|
||||
entity to achieve the desired effect. For instance, θ can be
|
||||
entity reference to achieve the desired effect. For instance, θ can be
|
||||
written <code>&theta;</code>, regardless of the character encoding's
|
||||
support of Greek letters.</p>
|
||||
|
||||
<p>This works nicely for limited use of special characters, but
|
||||
say you wanted this sentence of Chinese text: 激光,
|
||||
這兩個字是甚麼意思.
|
||||
The entity-ized version would look like this:</p>
|
||||
The ampersand encoded version would look like this:</p>
|
||||
|
||||
<pre>&#28608;&#20809;, &#36889;&#20841;&#20491;&#23383;&#26159;&#29978;&#40636;&#24847;&#24605;</pre>
|
||||
|
||||
@@ -545,7 +555,7 @@ an application that originally used ISO-8859-1 but switched to UTF-8
|
||||
when it became far to cumbersome to support foreign languages. Bots
|
||||
will now actually go through articles and convert character entities
|
||||
to their corresponding real characters for the sake of user-friendliness
|
||||
and searcheability. See
|
||||
and searchability. See
|
||||
<a href="http://meta.wikimedia.org/wiki/Help:Special_characters">Meta's
|
||||
page on special characters</a> for more details.
|
||||
</p></blockquote>
|
||||
@@ -567,10 +577,11 @@ which may be used by POST, and is required when you want to upload
|
||||
files.</p>
|
||||
|
||||
<p>The following is a summarization of notes from
|
||||
<a href="http://ppewww.physics.gla.ac.uk/~flavell/charset/form-i18n.html">
|
||||
<a href="http://web.archive.org/web/20060427015200/ppewww.ph.gla.ac.uk/~flavell/charset/form-i18n.html">
|
||||
<code>FORM</code> submission and i18n</a>. That document contains lots
|
||||
of useful information, but is written in a rambly manner, so
|
||||
here I try to get right to the point.</p>
|
||||
here I try to get right to the point. (Note: the original has
|
||||
disappeared off the web, so I am linking to the Web Archive copy.)</p>
|
||||
|
||||
<h4 id="whyutf8-forms-urlencoded"><code>application/x-www-form-urlencoded</code></h4>
|
||||
|
||||
@@ -592,7 +603,7 @@ browser you're using, they might:</p>
|
||||
<ul>
|
||||
<li>Replace the unsupported characters with useless question marks,</li>
|
||||
<li>Attempt to fix the characters (example: smart quotes to regular quotes),</li>
|
||||
<li>Replace the character with a character entity, or</li>
|
||||
<li>Replace the character with a character entity reference, or</li>
|
||||
<li>Send it anyway as a different character encoding mixed in
|
||||
with the original encoding (usually Windows-1252 rather than
|
||||
iso-8859-1 or UTF-8 interspersed in 8-bit)</li>
|
||||
@@ -608,7 +619,7 @@ since UTF-8 supports every character.</p>
|
||||
|
||||
<h4 id="whyutf8-forms-multipart"><code>multipart/form-data</code></h4>
|
||||
|
||||
<p>Multipart form submission takes a way a lot of the ambiguity
|
||||
<p>Multipart form submission takes away a lot of the ambiguity
|
||||
that percent-encoding had: the server now can explicitly ask for
|
||||
certain encodings, and the client can explicitly tell the server
|
||||
during the form submission what encoding the fields are in.</p>
|
||||
@@ -621,9 +632,9 @@ Each method has deficiencies, especially the former.</p>
|
||||
<p>If you tell the browser to send the form in the same encoding as
|
||||
the page, you still have the trouble of what to do with characters
|
||||
that are outside of the character encoding's range. The behavior, once
|
||||
again, varies: Firefox 2.0 entity-izes them while Internet Explorer
|
||||
7.0 mangles them beyond intelligibility. For serious internationalization purposes,
|
||||
this is not an option.</p>
|
||||
again, varies: Firefox 2.0 converts them to character entity references
|
||||
while Internet Explorer 7.0 mangles them beyond intelligibility. For
|
||||
serious internationalization purposes, this is not an option.</p>
|
||||
|
||||
<p>The other possibility is to set Accept-Encoding to UTF-8, which
|
||||
begs the question: Why aren't you using UTF-8 for everything then?
|
||||
@@ -663,12 +674,12 @@ it up to the module iconv to do the dirty work.</p>
|
||||
<p>This approach, however, is not perfect. iconv is blithely unaware
|
||||
of HTML character entities. HTML Purifier, in order to
|
||||
protect against sophisticated escaping schemes, normalizes all character
|
||||
and numeric entities before processing the text. This leads to
|
||||
and numeric entitie references before processing the text. This leads to
|
||||
one important ramification:</p>
|
||||
|
||||
<p><strong>Any character that is not supported by the target character
|
||||
set, regardless of whether or not it is in the form of a character
|
||||
entity or a raw character, will be silently ignored.</strong></p>
|
||||
entity reference or a raw character, will be silently ignored.</strong></p>
|
||||
|
||||
<p>Example of this principle at work: say you have <code>&theta;</code>
|
||||
in your HTML, but the output is in Latin-1 (which, understandably,
|
||||
@@ -677,7 +688,7 @@ set the encoding correctly using %Core.Encoding):</p>
|
||||
|
||||
<ul>
|
||||
<li>The <code>Encoder</code> will transform the text from ISO 8859-1 to UTF-8
|
||||
(note that theta is preserved since it doesn't actually use
|
||||
(note that theta is preserved here since it doesn't actually use
|
||||
any non-ASCII characters): <code>&theta;</code></li>
|
||||
<li>The <code>EntityParser</code> will transform all named and numeric
|
||||
character entities to their corresponding raw UTF-8 equivalents:
|
||||
@@ -700,7 +711,7 @@ Purifier has provided a slightly more palatable workaround using
|
||||
<li>The <code>EntityParser</code> transforms entities: <code>θ</code></li>
|
||||
<li>HTML Purifier processes the code: <code>θ</code></li>
|
||||
<li>The <code>Encoder</code> replaces all non-ASCII characters
|
||||
with numeric entities: <code>&#952;</code></li>
|
||||
with numeric entity reference: <code>&#952;</code></li>
|
||||
<li>For good measure, <code>Encoder</code> transforms encoding back to
|
||||
original (which is strictly unnecessary for 99% of encodings
|
||||
out there): <code>&#952;</code> (remember, it's all ASCII!)</li>
|
||||
@@ -710,19 +721,19 @@ Purifier has provided a slightly more palatable workaround using
|
||||
the land of Unicode characters, and is totally unacceptable for Chinese
|
||||
or Japanese texts. The even bigger kicker is that, supposing the
|
||||
input encoding was actually ISO-8859-7, which <em>does</em> support
|
||||
theta, the character would get entity-ized anyway! (The Encoder does
|
||||
not discriminate).</p>
|
||||
theta, the character would get converted into a character entity reference
|
||||
anyway! (The Encoder does not discriminate).</p>
|
||||
|
||||
<p>The current functionality is about where HTML Purifier will be for
|
||||
the rest of eternity. HTML Purifier could attempt to preserve the original
|
||||
form of the entities so that they could be substituted back in, only the
|
||||
form of the character references so that they could be substituted back in, only the
|
||||
DOM extension kills them off irreversibly. HTML Purifier could also attempt
|
||||
to be smart and only convert non-ASCII characters that weren't supported
|
||||
by the target encoding, but that would require reimplementing iconv
|
||||
with HTML awareness, something I will not do.</p>
|
||||
|
||||
<p>So there: either it's UTF-8 or crippled international support. Your pick! (and I'm
|
||||
not being sarcastic here: some people could care less about other languages)</p>
|
||||
not being sarcastic here: some people could care less about other languages).</p>
|
||||
|
||||
<h2 id="migrate">Migrate to UTF-8</h2>
|
||||
|
||||
@@ -984,7 +995,7 @@ and yes, it is variable width. Other traits:</p>
|
||||
in different ways. It is beyond the scope of this document to explain
|
||||
what precisely these implications are. PHPWact provides
|
||||
a very good <a href="http://www.phpwact.org/php/i18n/utf-8">reference document</a>
|
||||
on what to expect from each functions, although coverage is spotty in
|
||||
on what to expect from each function, although coverage is spotty in
|
||||
some areas. Their more general notes on
|
||||
<a href="http://www.phpwact.org/php/i18n/charsets">character sets</a>
|
||||
are also worth looking at for information on UTF-8. Some rules of thumb
|
||||
@@ -998,7 +1009,7 @@ when dealing with Unicode text:</p>
|
||||
<li>Think twice before using functions that:<ul>
|
||||
<li>...count characters (strlen will return bytes, not characters;
|
||||
str_split and word_wrap may corrupt)</li>
|
||||
<li>...entity-ize things (UTF-8 doesn't need entities)</li>
|
||||
<li>...convert characters to entity references (UTF-8 doesn't need entities)</li>
|
||||
<li>...do very complex string processing (*printf)</li>
|
||||
</ul></li>
|
||||
</ul>
|
||||
|
28
docs/ref-css-length.txt
Normal file
28
docs/ref-css-length.txt
Normal file
@@ -0,0 +1,28 @@
|
||||
|
||||
CSS Length Reference
|
||||
To bound, or not to bound, that is the question
|
||||
|
||||
It's quite a reasonable request, really, and it's already been implemented
|
||||
for HTML. That is, length bounding. It makes little sense to let users
|
||||
define text blocks that have a font-size of 63,360 inches (that's a mile,
|
||||
by the way) or a width of forty-fold the parent container.
|
||||
|
||||
But it's a little more complicated then that. There are multiple units
|
||||
one can use, and we have to a little unit conversion to get things working.
|
||||
Here's what we have:
|
||||
|
||||
Absolute:
|
||||
1 in ~= 2.54 cm
|
||||
1 cm = 10 mm
|
||||
1 pt = 1/72 in
|
||||
1 pc = 12 pt
|
||||
|
||||
Relative:
|
||||
1 em ~= 10.0667 px
|
||||
1 ex ~= 0.5 em, though Mozilla Firefox says 1 ex = 6px
|
||||
1 px ~= 1 pt
|
||||
|
||||
Watch out: font-sizes can also be nested to get successively larger
|
||||
(although I do not relish having to keep track of context font-sizes,
|
||||
this may be necessary, especially for some of the more advanced features
|
||||
for preventing things like white on white).
|
@@ -1,7 +1,6 @@
|
||||
<?php
|
||||
|
||||
/*!
|
||||
* @mainpage
|
||||
/*! @mainpage
|
||||
*
|
||||
* HTML Purifier is an HTML filter that will take an arbitrary snippet of
|
||||
* HTML and rigorously test, validate and filter it into a version that
|
||||
@@ -22,8 +21,8 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
HTML Purifier 2.0.1 - Standards Compliant HTML Filtering
|
||||
Copyright (C) 2006 Edward Z. Yang
|
||||
HTML Purifier 3.0.0 - Standards Compliant HTML Filtering
|
||||
Copyright (C) 2006-2008 Edward Z. Yang
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
@@ -40,9 +39,11 @@
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
// almost every class has an undocumented dependency to these, so make sure
|
||||
// they get included
|
||||
require_once 'HTMLPurifier/ConfigSchema.php'; // important
|
||||
// constants are slow, but we'll make one exception
|
||||
define('HTMLPURIFIER_PREFIX', dirname(__FILE__));
|
||||
|
||||
// every class has an undocumented dependency to these, must be included!
|
||||
require_once 'HTMLPurifier/ConfigSchema.php'; // fatal errors if not included
|
||||
require_once 'HTMLPurifier/Config.php';
|
||||
require_once 'HTMLPurifier/Context.php';
|
||||
|
||||
@@ -57,16 +58,23 @@ require_once 'HTMLPurifier/LanguageFactory.php';
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'Core', 'CollectErrors', false, 'bool', '
|
||||
Whether or not to collect errors found while filtering the document. This
|
||||
is a useful way to give feedback to your users. CURRENTLY NOT IMPLEMENTED.
|
||||
This directive has been available since 2.0.0.
|
||||
is a useful way to give feedback to your users. <strong>Warning:</strong>
|
||||
Currently this feature is very patchy and experimental, with lots of
|
||||
possible error messages not yet implemented. It will not cause any problems,
|
||||
but it may not help your users either. This directive has been available
|
||||
since 2.0.0.
|
||||
');
|
||||
|
||||
/**
|
||||
* Main library execution class.
|
||||
* Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
|
||||
*
|
||||
* Facade that performs calls to the HTMLPurifier_Lexer,
|
||||
* HTMLPurifier_Strategy and HTMLPurifier_Generator subsystems in order to
|
||||
* purify HTML.
|
||||
* @note There are several points in which configuration can be specified
|
||||
* for HTML Purifier. The precedence of these (from lowest to
|
||||
* highest) is as follows:
|
||||
* -# Instance: new HTMLPurifier($config)
|
||||
* -# Invocation: purify($html, $config)
|
||||
* These configurations are entirely independent of each other and
|
||||
* are *not* merged.
|
||||
*
|
||||
* @todo We need an easier way to inject strategies, it'll probably end
|
||||
* up getting done through config though.
|
||||
@@ -74,18 +82,19 @@ This directive has been available since 2.0.0.
|
||||
class HTMLPurifier
|
||||
{
|
||||
|
||||
var $version = '2.0.1';
|
||||
public $version = '3.0.0';
|
||||
|
||||
var $config;
|
||||
var $filters;
|
||||
public $config;
|
||||
public $filters = array();
|
||||
|
||||
var $strategy, $generator;
|
||||
protected $strategy, $generator;
|
||||
|
||||
/**
|
||||
* Final HTMLPurifier_Context of last run purification. Might be an array.
|
||||
* Resultant HTMLPurifier_Context of last run purification. Is an array
|
||||
* of contexts if the last called method was purifyArray().
|
||||
* @public
|
||||
*/
|
||||
var $context;
|
||||
public $context;
|
||||
|
||||
/**
|
||||
* Initializes the purifier.
|
||||
@@ -95,7 +104,7 @@ class HTMLPurifier
|
||||
* The parameter can also be any type that
|
||||
* HTMLPurifier_Config::create() supports.
|
||||
*/
|
||||
function HTMLPurifier($config = null) {
|
||||
public function __construct($config = null) {
|
||||
|
||||
$this->config = HTMLPurifier_Config::create($config);
|
||||
|
||||
@@ -108,7 +117,7 @@ class HTMLPurifier
|
||||
* Adds a filter to process the output. First come first serve
|
||||
* @param $filter HTMLPurifier_Filter object
|
||||
*/
|
||||
function addFilter($filter) {
|
||||
public function addFilter($filter) {
|
||||
$this->filters[] = $filter;
|
||||
}
|
||||
|
||||
@@ -122,8 +131,9 @@ class HTMLPurifier
|
||||
* that HTMLPurifier_Config::create() supports.
|
||||
* @return Purified HTML
|
||||
*/
|
||||
function purify($html, $config = null) {
|
||||
public function purify($html, $config = null) {
|
||||
|
||||
// todo: make the config merge in, instead of replace
|
||||
$config = $config ? HTMLPurifier_Config::create($config) : $this->config;
|
||||
|
||||
// implementation is partially environment dependant, partially
|
||||
@@ -147,6 +157,11 @@ class HTMLPurifier
|
||||
$context->register('ErrorCollector', $error_collector);
|
||||
}
|
||||
|
||||
// setup id_accumulator context, necessary due to the fact that
|
||||
// AttrValidator can be called from many places
|
||||
$id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
|
||||
$context->register('IDAccumulator', $id_accumulator);
|
||||
|
||||
$html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
|
||||
|
||||
for ($i = 0, $size = count($this->filters); $i < $size; $i++) {
|
||||
@@ -183,7 +198,7 @@ class HTMLPurifier
|
||||
* See HTMLPurifier::purify() for more details.
|
||||
* @return Array of purified HTML
|
||||
*/
|
||||
function purifyArray($array_of_html, $config = null) {
|
||||
public function purifyArray($array_of_html, $config = null) {
|
||||
$context_array = array();
|
||||
foreach ($array_of_html as $key => $html) {
|
||||
$array_of_html[$key] = $this->purify($html, $config);
|
||||
@@ -195,14 +210,16 @@ class HTMLPurifier
|
||||
|
||||
/**
|
||||
* Singleton for enforcing just one HTML Purifier in your system
|
||||
* @param $prototype Optional prototype HTMLPurifier instance to
|
||||
* overload singleton with.
|
||||
*/
|
||||
function &getInstance($prototype = null) {
|
||||
public static function &getInstance($prototype = null) {
|
||||
static $htmlpurifier;
|
||||
if (!$htmlpurifier || $prototype) {
|
||||
if (is_a($prototype, 'HTMLPurifier')) {
|
||||
if ($prototype instanceof HTMLPurifier) {
|
||||
$htmlpurifier = $prototype;
|
||||
} elseif ($prototype) {
|
||||
$htmlpurifier = new HTMLPurifier(HTMLPurifier_Config::create($prototype));
|
||||
$htmlpurifier = new HTMLPurifier($prototype);
|
||||
} else {
|
||||
$htmlpurifier = new HTMLPurifier();
|
||||
}
|
||||
@@ -213,3 +230,4 @@ class HTMLPurifier
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@@ -12,7 +12,7 @@ class HTMLPurifier_AttrCollections
|
||||
/**
|
||||
* Associative array of attribute collections, indexed by name
|
||||
*/
|
||||
var $info = array();
|
||||
public $info = array();
|
||||
|
||||
/**
|
||||
* Performs all expansions on internal data for use by other inclusions
|
||||
@@ -21,7 +21,7 @@ class HTMLPurifier_AttrCollections
|
||||
* @param $attr_types HTMLPurifier_AttrTypes instance
|
||||
* @param $modules Hash array of HTMLPurifier_HTMLModule members
|
||||
*/
|
||||
function HTMLPurifier_AttrCollections($attr_types, $modules) {
|
||||
public function __construct($attr_types, $modules) {
|
||||
// load extensions from the modules
|
||||
foreach ($modules as $module) {
|
||||
foreach ($module->attr_collections as $coll_i => $coll) {
|
||||
@@ -53,7 +53,7 @@ class HTMLPurifier_AttrCollections
|
||||
* all inclusions specified by the zero index.
|
||||
* @param &$attr Reference to attribute array
|
||||
*/
|
||||
function performInclusions(&$attr) {
|
||||
public function performInclusions(&$attr) {
|
||||
if (!isset($attr[0])) return;
|
||||
$merge = $attr[0];
|
||||
$seen = array(); // recursion guard
|
||||
@@ -81,7 +81,7 @@ class HTMLPurifier_AttrCollections
|
||||
* @param &$attr Reference to attribute array
|
||||
* @param $attr_types HTMLPurifier_AttrTypes instance
|
||||
*/
|
||||
function expandIdentifiers(&$attr, $attr_types) {
|
||||
public function expandIdentifiers(&$attr, $attr_types) {
|
||||
|
||||
// because foreach will process new elements we add, make sure we
|
||||
// skip duplicates
|
||||
|
@@ -10,32 +10,29 @@
|
||||
* subclasses are also responsible for cleaning the code if possible.
|
||||
*/
|
||||
|
||||
class HTMLPurifier_AttrDef
|
||||
abstract class HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
* Tells us whether or not an HTML attribute is minimized. Has no
|
||||
* meaning in other contexts.
|
||||
*/
|
||||
var $minimized = false;
|
||||
public $minimized = false;
|
||||
|
||||
/**
|
||||
* Tells us whether or not an HTML attribute is required. Has no
|
||||
* meaning in other contexts
|
||||
*/
|
||||
var $required = false;
|
||||
public $required = false;
|
||||
|
||||
/**
|
||||
* Validates and cleans passed string according to a definition.
|
||||
*
|
||||
* @public
|
||||
* @param $string String to be validated and cleaned.
|
||||
* @param $config Mandatory HTMLPurifier_Config object.
|
||||
* @param $context Mandatory HTMLPurifier_AttrContext object.
|
||||
*/
|
||||
function validate($string, $config, &$context) {
|
||||
trigger_error('Cannot call abstract function', E_USER_ERROR);
|
||||
}
|
||||
abstract public function validate($string, $config, $context);
|
||||
|
||||
/**
|
||||
* Convenience method that parses a string as if it were CDATA.
|
||||
@@ -59,10 +56,8 @@ class HTMLPurifier_AttrDef
|
||||
* function. Trim and whitespace collapsing are supposed to only
|
||||
* occur in NMTOKENs. However, note that we are NOT necessarily
|
||||
* parsing XML, thus, this behavior may still be correct.
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
function parseCDATA($string) {
|
||||
public function parseCDATA($string) {
|
||||
$string = trim($string);
|
||||
$string = str_replace("\n", '', $string);
|
||||
$string = str_replace(array("\r", "\t"), ' ', $string);
|
||||
@@ -73,9 +68,8 @@ class HTMLPurifier_AttrDef
|
||||
* Factory method for creating this class from a string.
|
||||
* @param $string String construction info
|
||||
* @return Created AttrDef object corresponding to $string
|
||||
* @public
|
||||
*/
|
||||
function make($string) {
|
||||
public function make($string) {
|
||||
// default implementation, return flyweight of this object
|
||||
// if overloaded, it is *necessary* for you to clone the
|
||||
// object (usually by instantiating a new copy) and return that
|
||||
|
@@ -17,7 +17,7 @@ require_once 'HTMLPurifier/CSSDefinition.php';
|
||||
class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($css, $config, &$context) {
|
||||
public function validate($css, $config, $context) {
|
||||
|
||||
$css = $this->parseCDATA($css);
|
||||
|
||||
@@ -38,7 +38,20 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
|
||||
list($property, $value) = explode(':', $declaration, 2);
|
||||
$property = trim($property);
|
||||
$value = trim($value);
|
||||
if (!isset($definition->info[$property])) continue;
|
||||
$ok = false;
|
||||
do {
|
||||
if (isset($definition->info[$property])) {
|
||||
$ok = true;
|
||||
break;
|
||||
}
|
||||
if (ctype_lower($property)) break;
|
||||
$property = strtolower($property);
|
||||
if (isset($definition->info[$property])) {
|
||||
$ok = true;
|
||||
break;
|
||||
}
|
||||
} while(0);
|
||||
if (!$ok) continue;
|
||||
// inefficient call, since the validator will do this again
|
||||
if (strtolower(trim($value)) !== 'inherit') {
|
||||
// inherit works for everything (but only on the base property)
|
||||
|
22
library/HTMLPurifier/AttrDef/CSS/AlphaValue.php
Normal file
22
library/HTMLPurifier/AttrDef/CSS/AlphaValue.php
Normal file
@@ -0,0 +1,22 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Number.php';
|
||||
|
||||
class HTMLPurifier_AttrDef_CSS_AlphaValue extends HTMLPurifier_AttrDef_CSS_Number
|
||||
{
|
||||
|
||||
public function __construct() {
|
||||
parent::__construct(false); // opacity is non-negative, but we will clamp it
|
||||
}
|
||||
|
||||
public function validate($number, $config, $context) {
|
||||
$result = parent::validate($number, $config, $context);
|
||||
if ($result === false) return $result;
|
||||
$float = (float) $result;
|
||||
if ($float < 0.0) $result = '0';
|
||||
if ($float > 1.0) $result = '1';
|
||||
return $result;
|
||||
}
|
||||
|
||||
}
|
@@ -14,9 +14,9 @@ class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
|
||||
* Local copy of component validators.
|
||||
* @note See HTMLPurifier_AttrDef_Font::$info for a similar impl.
|
||||
*/
|
||||
var $info;
|
||||
protected $info;
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_Background($config) {
|
||||
public function __construct($config) {
|
||||
$def = $config->getCSSDefinition();
|
||||
$this->info['background-color'] = $def->info['background-color'];
|
||||
$this->info['background-image'] = $def->info['background-image'];
|
||||
@@ -25,7 +25,7 @@ class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
|
||||
$this->info['background-position'] = $def->info['background-position'];
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
public function validate($string, $config, $context) {
|
||||
|
||||
// regular pre-processing
|
||||
$string = $this->parseCDATA($string);
|
||||
|
@@ -48,15 +48,15 @@ require_once 'HTMLPurifier/AttrDef/CSS/Percentage.php';
|
||||
class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
var $length;
|
||||
var $percentage;
|
||||
protected $length;
|
||||
protected $percentage;
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_BackgroundPosition() {
|
||||
public function __construct() {
|
||||
$this->length = new HTMLPurifier_AttrDef_CSS_Length();
|
||||
$this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage();
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
public function validate($string, $config, $context) {
|
||||
$string = $this->parseCDATA($string);
|
||||
$bits = explode(' ', $string);
|
||||
|
||||
|
@@ -11,16 +11,16 @@ class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
|
||||
/**
|
||||
* Local copy of properties this property is shorthand for.
|
||||
*/
|
||||
var $info = array();
|
||||
protected $info = array();
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_Border($config) {
|
||||
public function __construct($config) {
|
||||
$def = $config->getCSSDefinition();
|
||||
$this->info['border-width'] = $def->info['border-width'];
|
||||
$this->info['border-style'] = $def->info['border-style'];
|
||||
$this->info['border-top-color'] = $def->info['border-top-color'];
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
public function validate($string, $config, $context) {
|
||||
$string = $this->parseCDATA($string);
|
||||
// we specifically will not support rgb() syntax with spaces
|
||||
$bits = explode(' ', $string);
|
||||
|
@@ -33,26 +33,19 @@ This directive has been available since 2.0.0.
|
||||
class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($color, $config, &$context) {
|
||||
public function validate($color, $config, $context) {
|
||||
|
||||
static $colors = null;
|
||||
if ($colors === null) $colors = $config->get('Core', 'ColorKeywords');
|
||||
|
||||
$color = trim($color);
|
||||
if (!$color) return false;
|
||||
if ($color === '') return false;
|
||||
|
||||
$lower = strtolower($color);
|
||||
if (isset($colors[$lower])) return $colors[$lower];
|
||||
|
||||
if ($color[0] === '#') {
|
||||
// hexadecimal handling
|
||||
$hex = substr($color, 1);
|
||||
$length = strlen($hex);
|
||||
if ($length !== 3 && $length !== 6) return false;
|
||||
if (!ctype_xdigit($hex)) return false;
|
||||
} else {
|
||||
if (strpos($color, 'rgb(') !== false) {
|
||||
// rgb literal handling
|
||||
if (strpos($color, 'rgb(')) return false;
|
||||
$length = strlen($color);
|
||||
if (strpos($color, ')') !== $length - 1) return false;
|
||||
$triad = substr($color, 4, $length - 4 - 1);
|
||||
@@ -90,6 +83,17 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
|
||||
}
|
||||
$new_triad = implode(',', $new_parts);
|
||||
$color = "rgb($new_triad)";
|
||||
} else {
|
||||
// hexadecimal handling
|
||||
if ($color[0] === '#') {
|
||||
$hex = substr($color, 1);
|
||||
} else {
|
||||
$hex = $color;
|
||||
$color = '#' . $color;
|
||||
}
|
||||
$length = strlen($hex);
|
||||
if ($length !== 3 && $length !== 6) return false;
|
||||
if (!ctype_xdigit($hex)) return false;
|
||||
}
|
||||
|
||||
return $color;
|
||||
|
@@ -14,18 +14,18 @@ class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef
|
||||
|
||||
/**
|
||||
* List of HTMLPurifier_AttrDef objects that may process strings
|
||||
* @protected
|
||||
* @todo Make protected
|
||||
*/
|
||||
var $defs;
|
||||
public $defs;
|
||||
|
||||
/**
|
||||
* @param $defs List of HTMLPurifier_AttrDef objects
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_CSS_Composite($defs) {
|
||||
public function __construct($defs) {
|
||||
$this->defs = $defs;
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
public function validate($string, $config, $context) {
|
||||
foreach ($this->defs as $i => $def) {
|
||||
$result = $this->defs[$i]->validate($string, $config, $context);
|
||||
if ($result !== false) return $result;
|
||||
|
55
library/HTMLPurifier/AttrDef/CSS/Filter.php
Normal file
55
library/HTMLPurifier/AttrDef/CSS/Filter.php
Normal file
@@ -0,0 +1,55 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/AttrDef/Integer.php';
|
||||
|
||||
/**
|
||||
* Microsoft's proprietary filter: CSS property
|
||||
* @note Currently supports the alpha filter. In the future, this will
|
||||
* probably need an extensible framework
|
||||
*/
|
||||
class HTMLPurifier_AttrDef_CSS_Filter extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
protected $intValidator;
|
||||
|
||||
public function __construct() {
|
||||
$this->intValidator = new HTMLPurifier_AttrDef_Integer();
|
||||
}
|
||||
|
||||
public function validate($value, $config, $context) {
|
||||
$value = $this->parseCDATA($value);
|
||||
if ($value === 'none') return $value;
|
||||
// if we looped this we could support multiple filters
|
||||
$function_length = strcspn($value, '(');
|
||||
$function = trim(substr($value, 0, $function_length));
|
||||
if ($function !== 'alpha' &&
|
||||
$function !== 'Alpha' &&
|
||||
$function !== 'progid:DXImageTransform.Microsoft.Alpha'
|
||||
) return false;
|
||||
$cursor = $function_length + 1;
|
||||
$parameters_length = strcspn($value, ')', $cursor);
|
||||
$parameters = substr($value, $cursor, $parameters_length);
|
||||
$params = explode(',', $parameters);
|
||||
$ret_params = array();
|
||||
$lookup = array();
|
||||
foreach ($params as $param) {
|
||||
list($key, $value) = explode('=', $param);
|
||||
$key = trim($key);
|
||||
$value = trim($value);
|
||||
if (isset($lookup[$key])) continue;
|
||||
if ($key !== 'opacity') continue;
|
||||
$value = $this->intValidator->validate($value, $config, $context);
|
||||
if ($value === false) continue;
|
||||
$int = (int) $value;
|
||||
if ($int > 100) $value = '100';
|
||||
if ($int < 0) $value = '0';
|
||||
$ret_params[] = "$key=$value";
|
||||
$lookup[$key] = true;
|
||||
}
|
||||
$ret_parameters = implode(',', $ret_params);
|
||||
$ret_function = "$function($ret_parameters)";
|
||||
return $ret_function;
|
||||
}
|
||||
|
||||
}
|
@@ -16,9 +16,9 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
|
||||
* CSSDefinition, this wouldn't be necessary. We'd instantiate
|
||||
* our own copies.
|
||||
*/
|
||||
var $info = array();
|
||||
protected $info = array();
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_Font($config) {
|
||||
public function __construct($config) {
|
||||
$def = $config->getCSSDefinition();
|
||||
$this->info['font-style'] = $def->info['font-style'];
|
||||
$this->info['font-variant'] = $def->info['font-variant'];
|
||||
@@ -28,7 +28,7 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
|
||||
$this->info['font-family'] = $def->info['font-family'];
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
public function validate($string, $config, $context) {
|
||||
|
||||
static $system_fonts = array(
|
||||
'caption' => true,
|
||||
|
@@ -10,7 +10,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
public function validate($string, $config, $context) {
|
||||
static $generic_names = array(
|
||||
'serif' => true,
|
||||
'sans-serif' => true,
|
||||
@@ -38,19 +38,24 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
||||
$quote = $font[0];
|
||||
if ($font[$length - 1] !== $quote) continue;
|
||||
$font = substr($font, 1, $length - 2);
|
||||
// double-backslash processing is buggy
|
||||
$font = str_replace("\\$quote", $quote, $font); // de-escape quote
|
||||
$font = str_replace("\\\n", "\n", $font); // de-escape newlines
|
||||
}
|
||||
// process font
|
||||
// $font is a pure representation of the font name
|
||||
|
||||
if (ctype_alnum($font)) {
|
||||
// very simple font, allow it in unharmed
|
||||
$final .= $font . ', ';
|
||||
continue;
|
||||
}
|
||||
$nospace = str_replace(array(' ', '.', '!'), '', $font);
|
||||
if (ctype_alnum($nospace)) {
|
||||
// font with spaces in it
|
||||
$final .= "'$font', ";
|
||||
continue;
|
||||
}
|
||||
|
||||
// complicated font, requires quoting
|
||||
|
||||
// armor single quotes and new lines
|
||||
$font = str_replace("'", "\\'", $font);
|
||||
$font = str_replace("\n", "\\\n", $font);
|
||||
$final .= "'$font', ";
|
||||
}
|
||||
$final = rtrim($final, ', ');
|
||||
if ($final === '') return false;
|
||||
|
@@ -14,22 +14,22 @@ class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
|
||||
* @warning The code assumes all units are two characters long. Be careful
|
||||
* if we have to change this behavior!
|
||||
*/
|
||||
var $units = array('em' => true, 'ex' => true, 'px' => true, 'in' => true,
|
||||
protected $units = array('em' => true, 'ex' => true, 'px' => true, 'in' => true,
|
||||
'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true);
|
||||
/**
|
||||
* Instance of HTMLPurifier_AttrDef_Number to defer number validation to
|
||||
*/
|
||||
var $number_def;
|
||||
protected $number_def;
|
||||
|
||||
/**
|
||||
* @param $non_negative Bool indication whether or not negative values are
|
||||
* allowed.
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_CSS_Length($non_negative = false) {
|
||||
public function __construct($non_negative = false) {
|
||||
$this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
|
||||
}
|
||||
|
||||
function validate($length, $config, &$context) {
|
||||
public function validate($length, $config, $context) {
|
||||
|
||||
$length = $this->parseCDATA($length);
|
||||
if ($length === '') return false;
|
||||
|
@@ -13,16 +13,16 @@ class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef
|
||||
* Local copy of component validators.
|
||||
* @note See HTMLPurifier_AttrDef_CSS_Font::$info for a similar impl.
|
||||
*/
|
||||
var $info;
|
||||
protected $info;
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_ListStyle($config) {
|
||||
public function __construct($config) {
|
||||
$def = $config->getCSSDefinition();
|
||||
$this->info['list-style-type'] = $def->info['list-style-type'];
|
||||
$this->info['list-style-position'] = $def->info['list-style-position'];
|
||||
$this->info['list-style-image'] = $def->info['list-style-image'];
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
public function validate($string, $config, $context) {
|
||||
|
||||
// regular pre-processing
|
||||
$string = $this->parseCDATA($string);
|
||||
|
@@ -18,24 +18,26 @@ class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef
|
||||
|
||||
/**
|
||||
* Instance of component definition to defer validation to.
|
||||
* @todo Make protected
|
||||
*/
|
||||
var $single;
|
||||
public $single;
|
||||
|
||||
/**
|
||||
* Max number of values allowed.
|
||||
* @todo Make protected
|
||||
*/
|
||||
var $max;
|
||||
public $max;
|
||||
|
||||
/**
|
||||
* @param $single HTMLPurifier_AttrDef to multiply
|
||||
* @param $max Max number of values allowed (usually four)
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_CSS_Multiple($single, $max = 4) {
|
||||
public function __construct($single, $max = 4) {
|
||||
$this->single = $single;
|
||||
$this->max = $max;
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
public function validate($string, $config, $context) {
|
||||
$string = $this->parseCDATA($string);
|
||||
if ($string === '') return false;
|
||||
$parts = explode(' ', $string); // parseCDATA replaced \r, \t and \n
|
||||
|
@@ -9,20 +9,21 @@ class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
|
||||
/**
|
||||
* Bool indicating whether or not only positive values allowed.
|
||||
*/
|
||||
var $non_negative = false;
|
||||
protected $non_negative = false;
|
||||
|
||||
/**
|
||||
* @param $non_negative Bool indicating whether negatives are forbidden
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_CSS_Number($non_negative = false) {
|
||||
public function __construct($non_negative = false) {
|
||||
$this->non_negative = $non_negative;
|
||||
}
|
||||
|
||||
function validate($number, $config, &$context) {
|
||||
public function validate($number, $config, $context) {
|
||||
|
||||
$number = $this->parseCDATA($number);
|
||||
|
||||
if ($number === '') return false;
|
||||
if ($number === '0') return '0';
|
||||
|
||||
$sign = '';
|
||||
switch ($number[0]) {
|
||||
@@ -37,13 +38,16 @@ class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
|
||||
$number = ltrim($number, '0');
|
||||
return $number ? $sign . $number : '0';
|
||||
}
|
||||
if (!strpos($number, '.')) return false;
|
||||
|
||||
// Period is the only non-numeric character allowed
|
||||
if (strpos($number, '.') === false) return false;
|
||||
|
||||
list($left, $right) = explode('.', $number, 2);
|
||||
|
||||
if (!ctype_digit($left)) return false;
|
||||
$left = ltrim($left, '0');
|
||||
if ($left === '' && $right === '') return false;
|
||||
if ($left !== '' && !ctype_digit($left)) return false;
|
||||
|
||||
$left = ltrim($left, '0');
|
||||
$right = rtrim($right, '0');
|
||||
|
||||
if ($right === '') {
|
||||
|
@@ -12,16 +12,16 @@ class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef
|
||||
/**
|
||||
* Instance of HTMLPurifier_AttrDef_CSS_Number to defer number validation
|
||||
*/
|
||||
var $number_def;
|
||||
protected $number_def;
|
||||
|
||||
/**
|
||||
* @param Bool indicating whether to forbid negative values
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_CSS_Percentage($non_negative = false) {
|
||||
public function __construct($non_negative = false) {
|
||||
$this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
public function validate($string, $config, $context) {
|
||||
|
||||
$string = $this->parseCDATA($string);
|
||||
|
||||
|
@@ -10,7 +10,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
public function validate($string, $config, $context) {
|
||||
|
||||
static $allowed_values = array(
|
||||
'line-through' => true,
|
||||
|
@@ -14,11 +14,11 @@ require_once 'HTMLPurifier/AttrDef/URI.php';
|
||||
class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
|
||||
{
|
||||
|
||||
function HTMLPurifier_AttrDef_CSS_URI() {
|
||||
$this->HTMLPurifier_AttrDef_URI(true); // always embedded
|
||||
public function __construct() {
|
||||
parent::__construct(true); // always embedded
|
||||
}
|
||||
|
||||
function validate($uri_string, $config, &$context) {
|
||||
public function validate($uri_string, $config, $context) {
|
||||
// parse the URI out of the string and then pass it onto
|
||||
// the parent object
|
||||
|
||||
|
@@ -14,27 +14,28 @@ class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
|
||||
|
||||
/**
|
||||
* Lookup table of valid values.
|
||||
* @todo Make protected
|
||||
*/
|
||||
var $valid_values = array();
|
||||
public $valid_values = array();
|
||||
|
||||
/**
|
||||
* Bool indicating whether or not enumeration is case sensitive.
|
||||
* @note In general this is always case insensitive.
|
||||
*/
|
||||
var $case_sensitive = false; // values according to W3C spec
|
||||
protected $case_sensitive = false; // values according to W3C spec
|
||||
|
||||
/**
|
||||
* @param $valid_values List of valid values
|
||||
* @param $case_sensitive Bool indicating whether or not case sensitive
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_Enum(
|
||||
public function __construct(
|
||||
$valid_values = array(), $case_sensitive = false
|
||||
) {
|
||||
$this->valid_values = array_flip($valid_values);
|
||||
$this->case_sensitive = $case_sensitive;
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
public function validate($string, $config, $context) {
|
||||
$string = trim($string);
|
||||
if (!$this->case_sensitive) {
|
||||
// we may want to do full case-insensitive libraries
|
||||
@@ -50,7 +51,7 @@ class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
|
||||
* valid values. Example: "foo,bar,baz". Prepend "s:" to make
|
||||
* case sensitive
|
||||
*/
|
||||
function make($string) {
|
||||
public function make($string) {
|
||||
if (strlen($string) > 2 && $string[0] == 's' && $string[1] == ':') {
|
||||
$string = substr($string, 2);
|
||||
$sensitive = true;
|
||||
|
@@ -8,12 +8,12 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
var $name;
|
||||
var $minimized = true;
|
||||
protected $name;
|
||||
public $minimized = true;
|
||||
|
||||
function HTMLPurifier_AttrDef_HTML_Bool($name = false) {$this->name = $name;}
|
||||
public function __construct($name = false) {$this->name = $name;}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
public function validate($string, $config, $context) {
|
||||
if (empty($string)) return false;
|
||||
return $this->name;
|
||||
}
|
||||
@@ -21,7 +21,7 @@ class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef
|
||||
/**
|
||||
* @param $string Name of attribute
|
||||
*/
|
||||
function make($string) {
|
||||
public function make($string) {
|
||||
return new HTMLPurifier_AttrDef_HTML_Bool($string);
|
||||
}
|
||||
|
||||
|
@@ -9,7 +9,7 @@ require_once 'HTMLPurifier/AttrDef/CSS/Color.php'; // for %Core.ColorKeywords
|
||||
class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
public function validate($string, $config, $context) {
|
||||
|
||||
static $colors = null;
|
||||
if ($colors === null) $colors = $config->get('Core', 'ColorKeywords');
|
||||
|
@@ -19,12 +19,12 @@ require_once 'HTMLPurifier/AttrDef/Enum.php';
|
||||
class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum
|
||||
{
|
||||
|
||||
var $valid_values = false; // uninitialized value
|
||||
var $case_sensitive = false;
|
||||
public $valid_values = false; // uninitialized value
|
||||
protected $case_sensitive = false;
|
||||
|
||||
function HTMLPurifier_AttrDef_HTML_FrameTarget() {}
|
||||
public function __construct() {}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
public function validate($string, $config, $context) {
|
||||
if ($this->valid_values === false) $this->valid_values = $config->get('Attr', 'AllowedFrameTargets');
|
||||
return parent::validate($string, $config, $context);
|
||||
}
|
||||
|
@@ -66,7 +66,7 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
|
||||
// ref functionality disabled, since we also have to verify
|
||||
// whether or not the ID it refers to exists
|
||||
|
||||
function validate($id, $config, &$context) {
|
||||
public function validate($id, $config, $context) {
|
||||
|
||||
if (!$config->get('Attr', 'EnableID')) return false;
|
||||
|
||||
|
@@ -13,7 +13,7 @@ require_once 'HTMLPurifier/AttrDef/HTML/Pixels.php';
|
||||
class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
public function validate($string, $config, $context) {
|
||||
|
||||
$string = trim($string);
|
||||
if ($string === '') return false;
|
||||
|
@@ -27,9 +27,9 @@ class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/** Name config attribute to pull. */
|
||||
var $name;
|
||||
protected $name;
|
||||
|
||||
function HTMLPurifier_AttrDef_HTML_LinkTypes($name) {
|
||||
public function __construct($name) {
|
||||
$configLookup = array(
|
||||
'rel' => 'AllowedRel',
|
||||
'rev' => 'AllowedRev'
|
||||
@@ -42,7 +42,7 @@ class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
|
||||
$this->name = $configLookup[$name];
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
public function validate($string, $config, $context) {
|
||||
|
||||
$allowed = $config->get('Attr', $this->name);
|
||||
if (empty($allowed)) return false;
|
||||
|
@@ -12,7 +12,7 @@ require_once 'HTMLPurifier/AttrDef/HTML/Length.php';
|
||||
class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
public function validate($string, $config, $context) {
|
||||
|
||||
$string = trim($string);
|
||||
if ($string === '') return false;
|
||||
|
@@ -13,7 +13,7 @@ require_once 'HTMLPurifier/Config.php';
|
||||
class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
public function validate($string, $config, $context) {
|
||||
|
||||
$string = trim($string);
|
||||
|
||||
|
@@ -8,7 +8,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
public function validate($string, $config, $context) {
|
||||
|
||||
$string = trim($string);
|
||||
if ($string === '0') return $string;
|
||||
|
@@ -15,24 +15,24 @@ class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef
|
||||
/**
|
||||
* Bool indicating whether or not negative values are allowed
|
||||
*/
|
||||
var $negative = true;
|
||||
protected $negative = true;
|
||||
|
||||
/**
|
||||
* Bool indicating whether or not zero is allowed
|
||||
*/
|
||||
var $zero = true;
|
||||
protected $zero = true;
|
||||
|
||||
/**
|
||||
* Bool indicating whether or not positive values are allowed
|
||||
*/
|
||||
var $positive = true;
|
||||
protected $positive = true;
|
||||
|
||||
/**
|
||||
* @param $negative Bool indicating whether or not negative values are allowed
|
||||
* @param $zero Bool indicating whether or not zero is allowed
|
||||
* @param $positive Bool indicating whether or not positive values are allowed
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_Integer(
|
||||
public function __construct(
|
||||
$negative = true, $zero = true, $positive = true
|
||||
) {
|
||||
$this->negative = $negative;
|
||||
@@ -40,7 +40,7 @@ class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef
|
||||
$this->positive = $positive;
|
||||
}
|
||||
|
||||
function validate($integer, $config, &$context) {
|
||||
public function validate($integer, $config, $context) {
|
||||
|
||||
$integer = $this->parseCDATA($integer);
|
||||
if ($integer === '') return false;
|
||||
|
@@ -9,7 +9,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
public function validate($string, $config, $context) {
|
||||
|
||||
$string = trim($string);
|
||||
if (!$string) return false;
|
||||
|
@@ -8,7 +8,7 @@ require_once 'HTMLPurifier/AttrDef.php';
|
||||
class HTMLPurifier_AttrDef_Text extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
public function validate($string, $config, $context) {
|
||||
return $this->parseCDATA($string);
|
||||
}
|
||||
|
||||
|
@@ -1,90 +1,66 @@
|
||||
<?php
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
require_once 'HTMLPurifier/URIParser.php';
|
||||
require_once 'HTMLPurifier/URIScheme.php';
|
||||
require_once 'HTMLPurifier/URISchemeRegistry.php';
|
||||
require_once 'HTMLPurifier/AttrDef/URI/Host.php';
|
||||
require_once 'HTMLPurifier/PercentEncoder.php';
|
||||
require_once 'HTMLPurifier/AttrDef/URI/Email.php';
|
||||
|
||||
// special case filtering directives
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DefaultScheme', 'http', 'string',
|
||||
'Defines through what scheme the output will be served, in order to '.
|
||||
'select the proper object validator when no scheme information is present.'
|
||||
);
|
||||
'URI', 'Munge', null, 'string/null', '
|
||||
<p>
|
||||
Munges all browsable (usually http, https and ftp)
|
||||
absolute URI\'s into another URI, usually a URI redirection service.
|
||||
This directive accepts a URI, formatted with a <code>%s</code> where
|
||||
the url-encoded original URI should be inserted (sample:
|
||||
<code>http://www.google.com/url?q=%s</code>).
|
||||
</p>
|
||||
<p>
|
||||
Uses for this directive:
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
Prevent PageRank leaks, while being fairly transparent
|
||||
to users (you may also want to add some client side JavaScript to
|
||||
override the text in the statusbar). <strong>Notice</strong>:
|
||||
Many security experts believe that this form of protection does not deter spam-bots.
|
||||
</li>
|
||||
<li>
|
||||
Redirect users to a splash page telling them they are leaving your
|
||||
website. While this is poor usability practice, it is often mandated
|
||||
in corporate environments.
|
||||
</li>
|
||||
</ul>
|
||||
<p>
|
||||
This directive has been available since 1.3.0.
|
||||
</p>
|
||||
');
|
||||
|
||||
// disabling directives
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'Host', null, 'string/null',
|
||||
'Defines the domain name of the server, so we can determine whether or '.
|
||||
'an absolute URI is from your website or not. Not strictly necessary, '.
|
||||
'as users should be using relative URIs to reference resources on your '.
|
||||
'website. It will, however, let you use absolute URIs to link to '.
|
||||
'subdomains of the domain you post here: i.e. example.com will allow '.
|
||||
'sub.example.com. However, higher up domains will still be excluded: '.
|
||||
'if you set %URI.Host to sub.example.com, example.com will be blocked. '.
|
||||
'This directive has been available since 1.2.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DisableExternal', false, 'bool',
|
||||
'Disables links to external websites. This is a highly effective '.
|
||||
'anti-spam and anti-pagerank-leech measure, but comes at a hefty price: no'.
|
||||
'links or images outside of your domain will be allowed. Non-linkified '.
|
||||
'URIs will still be preserved. If you want to be able to link to '.
|
||||
'subdomains or use absolute URIs, specify %URI.Host for your website. '.
|
||||
'This directive has been available since 1.2.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DisableExternalResources', false, 'bool',
|
||||
'Disables the embedding of external resources, preventing users from '.
|
||||
'embedding things like images from other hosts. This prevents '.
|
||||
'access tracking (good for email viewers), bandwidth leeching, '.
|
||||
'cross-site request forging, goatse.cx posting, and '.
|
||||
'other nasties, but also results in '.
|
||||
'a loss of end-user functionality (they can\'t directly post a pic '.
|
||||
'they posted from Flickr anymore). Use it if you don\'t have a '.
|
||||
'robust user-content moderation team. This directive has been '.
|
||||
'available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DisableResources', false, 'bool',
|
||||
'Disables embedding resources, essentially meaning no pictures. You can '.
|
||||
'still link to them though. See %URI.DisableExternalResources for why '.
|
||||
'this might be a good idea. This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'Munge', null, 'string/null',
|
||||
'Munges all browsable (usually http, https and ftp) URI\'s into some URL '.
|
||||
'redirection service. Pass this directive a URI, with %s inserted where '.
|
||||
'the url-encoded original URI should be inserted (sample: '.
|
||||
'<code>http://www.google.com/url?q=%s</code>). '.
|
||||
'This prevents PageRank leaks, while being as transparent as possible '.
|
||||
'to users (you may also want to add some client side JavaScript to '.
|
||||
'override the text in the statusbar). Warning: many security experts '.
|
||||
'believe that this form of protection does not deter spam-bots. '.
|
||||
'You can also use this directive to redirect users to a splash page '.
|
||||
'telling them they are leaving your website. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'HostBlacklist', array(), 'list',
|
||||
'List of strings that are forbidden in the host of any URI. Use it to '.
|
||||
'kill domain names of spam, etc. Note that it will catch anything in '.
|
||||
'the domain, so <tt>moo.com</tt> will catch <tt>moo.com.example.com</tt>. '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'Disable', false, 'bool',
|
||||
'Disables all URIs in all forms. Not sure why you\'d want to do that '.
|
||||
'(after all, the Internet\'s founded on the notion of a hyperlink). '.
|
||||
'This directive has been available since 1.3.0.'
|
||||
);
|
||||
'URI', 'Disable', false, 'bool', '
|
||||
<p>
|
||||
Disables all URIs in all forms. Not sure why you\'d want to do that
|
||||
(after all, the Internet\'s founded on the notion of a hyperlink).
|
||||
This directive has been available since 1.3.0.
|
||||
</p>
|
||||
');
|
||||
HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable');
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'URI', 'DisableResources', false, 'bool', '
|
||||
<p>
|
||||
Disables embedding resources, essentially meaning no pictures. You can
|
||||
still link to them though. See %URI.DisableExternalResources for why
|
||||
this might be a good idea. This directive has been available since 1.3.0.
|
||||
</p>
|
||||
');
|
||||
|
||||
/**
|
||||
* Validates a URI as defined by RFC 3986.
|
||||
* @note Scheme-specific mechanics deferred to HTMLPurifier_URIScheme
|
||||
@@ -92,214 +68,83 @@ HTMLPurifier_ConfigSchema::defineAlias('Attr', 'DisableURI', 'URI', 'Disable');
|
||||
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
var $host;
|
||||
var $embeds_resource;
|
||||
protected $parser, $percentEncoder;
|
||||
protected $embedsResource;
|
||||
|
||||
/**
|
||||
* @param $embeds_resource_resource Does the URI here result in an extra HTTP request?
|
||||
*/
|
||||
function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
|
||||
$this->host = new HTMLPurifier_AttrDef_URI_Host();
|
||||
$this->embeds_resource = (bool) $embeds_resource;
|
||||
public function __construct($embeds_resource = false) {
|
||||
$this->parser = new HTMLPurifier_URIParser();
|
||||
$this->percentEncoder = new HTMLPurifier_PercentEncoder();
|
||||
$this->embedsResource = (bool) $embeds_resource;
|
||||
}
|
||||
|
||||
function validate($uri, $config, &$context) {
|
||||
|
||||
static $PercentEncoder = null;
|
||||
if ($PercentEncoder === null) $PercentEncoder = new HTMLPurifier_PercentEncoder();
|
||||
|
||||
// We'll write stack-based parsers later, for now, use regexps to
|
||||
// get things working as fast as possible (irony)
|
||||
public function validate($uri, $config, $context) {
|
||||
|
||||
if ($config->get('URI', 'Disable')) return false;
|
||||
|
||||
// parse as CDATA
|
||||
// initial operations
|
||||
$uri = $this->parseCDATA($uri);
|
||||
$uri = $this->percentEncoder->normalize($uri);
|
||||
|
||||
// fix up percent-encoding
|
||||
$uri = $PercentEncoder->normalize($uri);
|
||||
// parse the URI
|
||||
$uri = $this->parser->parse($uri);
|
||||
if ($uri === false) return false;
|
||||
|
||||
// while it would be nice to use parse_url(), that's specifically
|
||||
// for HTTP and thus won't work for our generic URI parsing
|
||||
// add embedded flag to context for validators
|
||||
$context->register('EmbeddedURI', $this->embedsResource);
|
||||
|
||||
// according to the RFC... (but this cuts corners, i.e. non-validating)
|
||||
$r_URI = '!'.
|
||||
'(([^:/?#<>\'"]+):)?'. // 2. Scheme
|
||||
'(//([^/?#<>\'"]*))?'. // 4. Authority
|
||||
'([^?#<>\'"]*)'. // 5. Path
|
||||
'(\?([^#<>\'"]*))?'. // 7. Query
|
||||
'(#([^<>\'"]*))?'. // 8. Fragment
|
||||
'!';
|
||||
|
||||
$matches = array();
|
||||
$result = preg_match($r_URI, $uri, $matches);
|
||||
|
||||
if (!$result) return false; // invalid URI
|
||||
|
||||
// seperate out parts
|
||||
$scheme = !empty($matches[1]) ? $matches[2] : null;
|
||||
$authority = !empty($matches[3]) ? $matches[4] : null;
|
||||
$path = $matches[5]; // always present, can be empty
|
||||
$query = !empty($matches[6]) ? $matches[7] : null;
|
||||
$fragment = !empty($matches[8]) ? $matches[9] : null;
|
||||
|
||||
|
||||
|
||||
$registry =& HTMLPurifier_URISchemeRegistry::instance();
|
||||
if ($scheme !== null) {
|
||||
// no need to validate the scheme's fmt since we do that when we
|
||||
// retrieve the specific scheme object from the registry
|
||||
$scheme = ctype_lower($scheme) ? $scheme : strtolower($scheme);
|
||||
$scheme_obj = $registry->getScheme($scheme, $config, $context);
|
||||
if (!$scheme_obj) return false; // invalid scheme, clean it out
|
||||
} else {
|
||||
$scheme_obj = $registry->getScheme(
|
||||
$config->get('URI', 'DefaultScheme'), $config, $context
|
||||
);
|
||||
}
|
||||
|
||||
// something funky weird happened in the registry, abort!
|
||||
if (!$scheme_obj) {
|
||||
trigger_error(
|
||||
'Default scheme object "' . $config->get('URI', 'DefaultScheme') . '" was not readable',
|
||||
E_USER_WARNING
|
||||
);
|
||||
return false;
|
||||
}
|
||||
|
||||
// the URI we're processing embeds_resource a resource in the page, but the URI
|
||||
// it references cannot be located
|
||||
if ($this->embeds_resource && !$scheme_obj->browsable) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
if ($authority !== null) {
|
||||
$ok = false;
|
||||
do {
|
||||
|
||||
// remove URI if it's absolute and we disabled externals or
|
||||
// if it's absolute and embedded and we disabled external resources
|
||||
unset($our_host);
|
||||
if (
|
||||
$config->get('URI', 'DisableExternal') ||
|
||||
(
|
||||
$config->get('URI', 'DisableExternalResources') &&
|
||||
$this->embeds_resource
|
||||
)
|
||||
) {
|
||||
$our_host = $config->get('URI', 'Host');
|
||||
if ($our_host === null) return false;
|
||||
// generic validation
|
||||
$result = $uri->validate($config, $context);
|
||||
if (!$result) break;
|
||||
|
||||
// chained filtering
|
||||
$uri_def =& $config->getDefinition('URI');
|
||||
$result = $uri_def->filter($uri, $config, $context);
|
||||
if (!$result) break;
|
||||
|
||||
// scheme-specific validation
|
||||
$scheme_obj = $uri->getSchemeObj($config, $context);
|
||||
if (!$scheme_obj) break;
|
||||
if ($this->embedsResource && !$scheme_obj->browsable) break;
|
||||
$result = $scheme_obj->validate($uri, $config, $context);
|
||||
if (!$result) break;
|
||||
|
||||
// survived gauntlet
|
||||
$ok = true;
|
||||
|
||||
} while (false);
|
||||
|
||||
$context->destroy('EmbeddedURI');
|
||||
if (!$ok) return false;
|
||||
|
||||
// munge scheme off if necessary (this must be last)
|
||||
if (!is_null($uri->scheme) && is_null($uri->host)) {
|
||||
if ($uri_def->defaultScheme == $uri->scheme) {
|
||||
$uri->scheme = null;
|
||||
}
|
||||
|
||||
$HEXDIG = '[A-Fa-f0-9]';
|
||||
$unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with []
|
||||
$sub_delims = '!$&\'()'; // needs []
|
||||
$pct_encoded = "%$HEXDIG$HEXDIG";
|
||||
$r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*";
|
||||
$r_authority = "/^(($r_userinfo)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
|
||||
$matches = array();
|
||||
preg_match($r_authority, $authority, $matches);
|
||||
// overloads regexp!
|
||||
$userinfo = !empty($matches[1]) ? $matches[2] : null;
|
||||
$host = !empty($matches[3]) ? $matches[3] : null;
|
||||
$port = !empty($matches[4]) ? $matches[5] : null;
|
||||
|
||||
// validate port
|
||||
if ($port !== null) {
|
||||
$port = (int) $port;
|
||||
if ($port < 1 || $port > 65535) $port = null;
|
||||
}
|
||||
|
||||
$host = $this->host->validate($host, $config, $context);
|
||||
if ($host === false) $host = null;
|
||||
|
||||
if ($this->checkBlacklist($host, $config, $context)) return false;
|
||||
|
||||
// more lenient absolute checking
|
||||
if (isset($our_host)) {
|
||||
$host_parts = array_reverse(explode('.', $host));
|
||||
// could be cached
|
||||
$our_host_parts = array_reverse(explode('.', $our_host));
|
||||
foreach ($our_host_parts as $i => $discard) {
|
||||
if (!isset($host_parts[$i])) return false;
|
||||
if ($host_parts[$i] != $our_host_parts[$i]) return false;
|
||||
}
|
||||
}
|
||||
|
||||
// userinfo and host are validated within the regexp
|
||||
|
||||
} else {
|
||||
$port = $host = $userinfo = null;
|
||||
}
|
||||
|
||||
// back to string
|
||||
$result = $uri->toString();
|
||||
|
||||
// query and fragment are quite simple in terms of definition:
|
||||
// *( pchar / "/" / "?" ), so define their validation routines
|
||||
// when we start fixing percent encoding
|
||||
|
||||
|
||||
|
||||
// path gets to be validated against a hodge-podge of rules depending
|
||||
// on the status of authority and scheme, but it's not that important,
|
||||
// esp. since it won't be applicable to everyone
|
||||
|
||||
|
||||
|
||||
// okay, now we defer execution to the subobject for more processing
|
||||
// note that $fragment is omitted
|
||||
list($userinfo, $host, $port, $path, $query) =
|
||||
$scheme_obj->validateComponents(
|
||||
$userinfo, $host, $port, $path, $query, $config, $context
|
||||
);
|
||||
|
||||
|
||||
// reconstruct authority
|
||||
$authority = null;
|
||||
if (!is_null($userinfo) || !is_null($host) || !is_null($port)) {
|
||||
$authority = '';
|
||||
if($userinfo !== null) $authority .= $userinfo . '@';
|
||||
$authority .= $host;
|
||||
if($port !== null) $authority .= ':' . $port;
|
||||
}
|
||||
|
||||
// reconstruct the result
|
||||
$result = '';
|
||||
if ($scheme !== null) $result .= "$scheme:";
|
||||
if ($authority !== null) $result .= "//$authority";
|
||||
$result .= $path;
|
||||
if ($query !== null) $result .= "?$query";
|
||||
if ($fragment !== null) $result .= "#$fragment";
|
||||
|
||||
// munge if necessary
|
||||
$munge = $config->get('URI', 'Munge');
|
||||
if (!empty($scheme_obj->browsable) && $munge !== null) {
|
||||
if ($authority !== null) {
|
||||
$result = str_replace('%s', rawurlencode($result), $munge);
|
||||
}
|
||||
// munge entire URI if necessary
|
||||
if (
|
||||
!is_null($uri->host) && // indicator for authority
|
||||
!empty($scheme_obj->browsable) &&
|
||||
!is_null($munge = $config->get('URI', 'Munge'))
|
||||
) {
|
||||
$result = str_replace('%s', rawurlencode($result), $munge);
|
||||
}
|
||||
|
||||
return $result;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks a host against an array blacklist
|
||||
* @param $host Host to check
|
||||
* @param $config HTMLPurifier_Config instance
|
||||
* @param $context HTMLPurifier_Context instance
|
||||
* @return bool Is spam?
|
||||
*/
|
||||
function checkBlacklist($host, &$config, &$context) {
|
||||
$blacklist = $config->get('URI', 'HostBlacklist');
|
||||
if (!empty($blacklist)) {
|
||||
foreach($blacklist as $blacklisted_host_fragment) {
|
||||
if (strpos($host, $blacklisted_host_fragment) !== false) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@@ -2,7 +2,7 @@
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef.php';
|
||||
|
||||
class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
|
||||
abstract class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -14,3 +14,5 @@ class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
|
||||
|
||||
}
|
||||
|
||||
// sub-implementations
|
||||
require_once 'HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php';
|
||||
|
@@ -9,7 +9,7 @@ require_once 'HTMLPurifier/AttrDef/URI/Email.php';
|
||||
class HTMLPurifier_AttrDef_URI_Email_SimpleCheck extends HTMLPurifier_AttrDef_URI_Email
|
||||
{
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
public function validate($string, $config, $context) {
|
||||
// no support for named mailboxes i.e. "Bob <bob@example.com>"
|
||||
// that needs more percent encoding to be done
|
||||
if ($string == '') return false;
|
||||
|
@@ -13,19 +13,19 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
|
||||
/**
|
||||
* Instance of HTMLPurifier_AttrDef_URI_IPv4 sub-validator
|
||||
*/
|
||||
var $ipv4;
|
||||
protected $ipv4;
|
||||
|
||||
/**
|
||||
* Instance of HTMLPurifier_AttrDef_URI_IPv6 sub-validator
|
||||
*/
|
||||
var $ipv6;
|
||||
protected $ipv6;
|
||||
|
||||
function HTMLPurifier_AttrDef_URI_Host() {
|
||||
public function __construct() {
|
||||
$this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4();
|
||||
$this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6();
|
||||
}
|
||||
|
||||
function validate($string, $config, &$context) {
|
||||
public function validate($string, $config, $context) {
|
||||
$length = strlen($string);
|
||||
if ($string === '') return '';
|
||||
if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') {
|
||||
|
@@ -11,11 +11,10 @@ class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
|
||||
|
||||
/**
|
||||
* IPv4 regex, protected so that IPv6 can reuse it
|
||||
* @protected
|
||||
*/
|
||||
var $ip4;
|
||||
protected $ip4;
|
||||
|
||||
function validate($aIP, $config, &$context) {
|
||||
public function validate($aIP, $config, $context) {
|
||||
|
||||
if (!$this->ip4) $this->_loadRegex();
|
||||
|
||||
@@ -32,7 +31,7 @@ class HTMLPurifier_AttrDef_URI_IPv4 extends HTMLPurifier_AttrDef
|
||||
* Lazy load function to prevent regex from being stuffed in
|
||||
* cache.
|
||||
*/
|
||||
function _loadRegex() {
|
||||
protected function _loadRegex() {
|
||||
$oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255
|
||||
$this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})";
|
||||
}
|
||||
|
@@ -11,7 +11,7 @@ require_once 'HTMLPurifier/AttrDef/URI/IPv4.php';
|
||||
class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
|
||||
{
|
||||
|
||||
function validate($aIP, $config, &$context) {
|
||||
public function validate($aIP, $config, $context) {
|
||||
|
||||
if (!$this->ip4) $this->_loadRegex();
|
||||
|
||||
|
@@ -14,7 +14,7 @@
|
||||
* more details.
|
||||
*/
|
||||
|
||||
class HTMLPurifier_AttrTransform
|
||||
abstract class HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
/**
|
||||
@@ -26,9 +26,7 @@ class HTMLPurifier_AttrTransform
|
||||
* @param $context Mandatory HTMLPurifier_Context object
|
||||
* @returns Processed attribute array.
|
||||
*/
|
||||
function transform($attr, $config, &$context) {
|
||||
trigger_error('Cannot call abstract function', E_USER_ERROR);
|
||||
}
|
||||
abstract public function transform($attr, $config, $context);
|
||||
|
||||
/**
|
||||
* Prepends CSS properties to the style attribute, creating the
|
||||
@@ -36,7 +34,7 @@ class HTMLPurifier_AttrTransform
|
||||
* @param $attr Attribute array to process (passed by reference)
|
||||
* @param $css CSS to prepend
|
||||
*/
|
||||
function prependCSS(&$attr, $css) {
|
||||
public function prependCSS(&$attr, $css) {
|
||||
$attr['style'] = isset($attr['style']) ? $attr['style'] : '';
|
||||
$attr['style'] = $css . $attr['style'];
|
||||
}
|
||||
@@ -46,7 +44,7 @@ class HTMLPurifier_AttrTransform
|
||||
* @param $attr Attribute array to process (passed by reference)
|
||||
* @param $key Key of attribute to confiscate
|
||||
*/
|
||||
function confiscateAttr(&$attr, $key) {
|
||||
public function confiscateAttr(&$attr, $key) {
|
||||
if (!isset($attr[$key])) return null;
|
||||
$value = $attr[$key];
|
||||
unset($attr[$key]);
|
||||
|
@@ -20,7 +20,7 @@ HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||
class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
public function transform($attr, $config, $context) {
|
||||
if (isset($attr['dir'])) return $attr;
|
||||
$attr['dir'] = $config->get('Attr', 'DefaultTextDir');
|
||||
return $attr;
|
||||
|
@@ -5,10 +5,9 @@ require_once 'HTMLPurifier/AttrTransform.php';
|
||||
/**
|
||||
* Pre-transform that changes deprecated bgcolor attribute to CSS.
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_BgColor
|
||||
extends HTMLPurifier_AttrTransform {
|
||||
class HTMLPurifier_AttrTransform_BgColor extends HTMLPurifier_AttrTransform {
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
public function transform($attr, $config, $context) {
|
||||
|
||||
if (!isset($attr['bgcolor'])) return $attr;
|
||||
|
||||
|
@@ -11,23 +11,23 @@ extends HTMLPurifier_AttrTransform {
|
||||
/**
|
||||
* Name of boolean attribute that is trigger
|
||||
*/
|
||||
var $attr;
|
||||
protected $attr;
|
||||
|
||||
/**
|
||||
* CSS declarations to add to style, needs trailing semicolon
|
||||
*/
|
||||
var $css;
|
||||
protected $css;
|
||||
|
||||
/**
|
||||
* @param $attr string attribute name to convert from
|
||||
* @param $css string CSS declarations to add to style (needs semicolon)
|
||||
*/
|
||||
function HTMLPurifier_AttrTransform_BoolToCSS($attr, $css) {
|
||||
public function __construct($attr, $css) {
|
||||
$this->attr = $attr;
|
||||
$this->css = $css;
|
||||
}
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
public function transform($attr, $config, $context) {
|
||||
if (!isset($attr[$this->attr])) return $attr;
|
||||
unset($attr[$this->attr]);
|
||||
$this->prependCSS($attr, $this->css);
|
||||
|
@@ -7,7 +7,7 @@ require_once 'HTMLPurifier/AttrTransform.php';
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_Border extends HTMLPurifier_AttrTransform {
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
public function transform($attr, $config, $context) {
|
||||
if (!isset($attr['border'])) return $attr;
|
||||
$border_width = $this->confiscateAttr($attr, 'border');
|
||||
// some validation should happen here
|
||||
|
@@ -11,32 +11,32 @@ class HTMLPurifier_AttrTransform_EnumToCSS extends HTMLPurifier_AttrTransform {
|
||||
/**
|
||||
* Name of attribute to transform from
|
||||
*/
|
||||
var $attr;
|
||||
protected $attr;
|
||||
|
||||
/**
|
||||
* Lookup array of attribute values to CSS
|
||||
*/
|
||||
var $enumToCSS = array();
|
||||
protected $enumToCSS = array();
|
||||
|
||||
/**
|
||||
* Case sensitivity of the matching
|
||||
* @warning Currently can only be guaranteed to work with ASCII
|
||||
* values.
|
||||
*/
|
||||
var $caseSensitive = false;
|
||||
protected $caseSensitive = false;
|
||||
|
||||
/**
|
||||
* @param $attr String attribute name to transform from
|
||||
* @param $enumToCSS Lookup array of attribute values to CSS
|
||||
* @param $case_sensitive Boolean case sensitivity indicator, default false
|
||||
*/
|
||||
function HTMLPurifier_AttrTransform_EnumToCSS($attr, $enum_to_css, $case_sensitive = false) {
|
||||
public function __construct($attr, $enum_to_css, $case_sensitive = false) {
|
||||
$this->attr = $attr;
|
||||
$this->enumToCSS = $enum_to_css;
|
||||
$this->caseSensitive = (bool) $case_sensitive;
|
||||
}
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
public function transform($attr, $config, $context) {
|
||||
|
||||
if (!isset($attr[$this->attr])) return $attr;
|
||||
|
||||
|
@@ -28,7 +28,7 @@ HTMLPurifier_ConfigSchema::define(
|
||||
class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
public function transform($attr, $config, $context) {
|
||||
|
||||
$src = true;
|
||||
if (!isset($attr['src'])) {
|
||||
|
@@ -5,23 +5,22 @@ require_once 'HTMLPurifier/AttrTransform.php';
|
||||
/**
|
||||
* Pre-transform that changes deprecated hspace and vspace attributes to CSS
|
||||
*/
|
||||
class HTMLPurifier_AttrTransform_ImgSpace
|
||||
extends HTMLPurifier_AttrTransform {
|
||||
class HTMLPurifier_AttrTransform_ImgSpace extends HTMLPurifier_AttrTransform {
|
||||
|
||||
var $attr;
|
||||
var $css = array(
|
||||
protected $attr;
|
||||
protected $css = array(
|
||||
'hspace' => array('left', 'right'),
|
||||
'vspace' => array('top', 'bottom')
|
||||
);
|
||||
|
||||
function HTMLPurifier_AttrTransform_ImgSpace($attr) {
|
||||
public function __construct($attr) {
|
||||
$this->attr = $attr;
|
||||
if (!isset($this->css[$attr])) {
|
||||
trigger_error(htmlspecialchars($attr) . ' is not valid space attribute');
|
||||
}
|
||||
}
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
public function transform($attr, $config, $context) {
|
||||
|
||||
if (!isset($attr[$this->attr])) return $attr;
|
||||
|
||||
|
@@ -10,7 +10,7 @@ require_once 'HTMLPurifier/AttrTransform.php';
|
||||
class HTMLPurifier_AttrTransform_Lang extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
public function transform($attr, $config, $context) {
|
||||
|
||||
$lang = isset($attr['lang']) ? $attr['lang'] : false;
|
||||
$xml_lang = isset($attr['xml:lang']) ? $attr['xml:lang'] : false;
|
||||
|
@@ -8,15 +8,15 @@ require_once 'HTMLPurifier/AttrTransform.php';
|
||||
class HTMLPurifier_AttrTransform_Length extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
var $name;
|
||||
var $cssName;
|
||||
protected $name;
|
||||
protected $cssName;
|
||||
|
||||
function HTMLPurifier_AttrTransform_Length($name, $css_name = null) {
|
||||
public function __construct($name, $css_name = null) {
|
||||
$this->name = $name;
|
||||
$this->cssName = $css_name ? $css_name : $name;
|
||||
}
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
public function transform($attr, $config, $context) {
|
||||
if (!isset($attr[$this->name])) return $attr;
|
||||
$length = $this->confiscateAttr($attr, $this->name);
|
||||
if(ctype_digit($length)) $length .= 'px';
|
||||
|
@@ -8,7 +8,7 @@ require_once 'HTMLPurifier/AttrTransform.php';
|
||||
class HTMLPurifier_AttrTransform_Name extends HTMLPurifier_AttrTransform
|
||||
{
|
||||
|
||||
function transform($attr, $config, &$context) {
|
||||
public function transform($attr, $config, $context) {
|
||||
if (!isset($attr['name'])) return $attr;
|
||||
$id = $this->confiscateAttr($attr, 'name');
|
||||
if ( isset($attr['id'])) return $attr;
|
||||
|
@@ -20,15 +20,14 @@ class HTMLPurifier_AttrTypes
|
||||
{
|
||||
/**
|
||||
* Lookup array of attribute string identifiers to concrete implementations
|
||||
* @protected
|
||||
*/
|
||||
var $info = array();
|
||||
public $info = array();
|
||||
|
||||
/**
|
||||
* Constructs the info array, supplying default implementations for attribute
|
||||
* types.
|
||||
*/
|
||||
function HTMLPurifier_AttrTypes() {
|
||||
public function __construct() {
|
||||
// pseudo-types, must be instantiated via shorthand
|
||||
$this->info['Enum'] = new HTMLPurifier_AttrDef_Enum();
|
||||
$this->info['Bool'] = new HTMLPurifier_AttrDef_HTML_Bool();
|
||||
@@ -44,6 +43,9 @@ class HTMLPurifier_AttrTypes
|
||||
$this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang();
|
||||
$this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color();
|
||||
|
||||
// unimplemented aliases
|
||||
$this->info['ContentType'] = new HTMLPurifier_AttrDef_Text();
|
||||
|
||||
// number is really a positive integer (one or more digits)
|
||||
// FIXME: ^^ not always, see start and value of list items
|
||||
$this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true);
|
||||
@@ -54,7 +56,7 @@ class HTMLPurifier_AttrTypes
|
||||
* @param $type String type name
|
||||
* @return Object AttrDef for type
|
||||
*/
|
||||
function get($type) {
|
||||
public function get($type) {
|
||||
|
||||
// determine if there is any extra info tacked on
|
||||
if (strpos($type, '#') !== false) list($type, $string) = explode('#', $type, 2);
|
||||
@@ -74,7 +76,7 @@ class HTMLPurifier_AttrTypes
|
||||
* @param $type String type name
|
||||
* @param $impl Object AttrDef for type
|
||||
*/
|
||||
function set($type, $impl) {
|
||||
public function set($type, $impl) {
|
||||
$this->info[$type] = $impl;
|
||||
}
|
||||
}
|
||||
|
@@ -18,11 +18,18 @@ class HTMLPurifier_AttrValidator
|
||||
* @param $config Instance of HTMLPurifier_Config
|
||||
* @param $context Instance of HTMLPurifier_Context
|
||||
*/
|
||||
function validateToken(&$token, &$config, &$context) {
|
||||
public function validateToken(&$token, &$config, $context) {
|
||||
|
||||
$definition = $config->getHTMLDefinition();
|
||||
$e =& $context->get('ErrorCollector', true);
|
||||
|
||||
// initialize IDAccumulator if necessary
|
||||
$ok =& $context->get('IDAccumulator', true);
|
||||
if (!$ok) {
|
||||
$id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
|
||||
$context->register('IDAccumulator', $id_accumulator);
|
||||
}
|
||||
|
||||
// initialize CurrentToken if necessary
|
||||
$current_token =& $context->get('CurrentToken', true);
|
||||
if (!$current_token) $context->register('CurrentToken', $token);
|
||||
|
@@ -2,11 +2,13 @@
|
||||
|
||||
require_once 'HTMLPurifier/Definition.php';
|
||||
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/AlphaValue.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Background.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Border.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Color.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Composite.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Filter.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Font.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
|
||||
require_once 'HTMLPurifier/AttrDef/CSS/Length.php';
|
||||
@@ -26,6 +28,14 @@ HTMLPurifier_ConfigSchema::define(
|
||||
</p>
|
||||
');
|
||||
|
||||
HTMLPurifier_ConfigSchema::define(
|
||||
'CSS', 'Proprietary', false, 'bool', '
|
||||
<p>
|
||||
Whether or not to allow safe, proprietary CSS values. This directive
|
||||
has been available since 3.0.0.
|
||||
</p>
|
||||
');
|
||||
|
||||
/**
|
||||
* Defines allowed CSS attributes and what their values are.
|
||||
* @see HTMLPurifier_HTMLDefinition
|
||||
@@ -33,17 +43,17 @@ HTMLPurifier_ConfigSchema::define(
|
||||
class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
|
||||
{
|
||||
|
||||
var $type = 'CSS';
|
||||
public $type = 'CSS';
|
||||
|
||||
/**
|
||||
* Assoc array of attribute name to definition object.
|
||||
*/
|
||||
var $info = array();
|
||||
public $info = array();
|
||||
|
||||
/**
|
||||
* Constructs the info array. The meat of this class.
|
||||
*/
|
||||
function doSetup($config) {
|
||||
protected function doSetup($config) {
|
||||
|
||||
$this->info['text-align'] = new HTMLPurifier_AttrDef_Enum(
|
||||
array('left', 'right', 'center', 'justify'), false);
|
||||
@@ -204,7 +214,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
|
||||
$this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
|
||||
|
||||
$this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||
'collapse', 'seperate'));
|
||||
'collapse', 'separate'));
|
||||
|
||||
$this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(array(
|
||||
'top', 'bottom'));
|
||||
@@ -219,9 +229,34 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
|
||||
new HTMLPurifier_AttrDef_CSS_Percentage()
|
||||
));
|
||||
|
||||
$this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2);
|
||||
|
||||
// partial support
|
||||
$this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap'));
|
||||
|
||||
if ($config->get('CSS', 'Proprietary')) {
|
||||
$this->doSetupProprietary($config);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
protected function doSetupProprietary($config) {
|
||||
// Internet Explorer only scrollbar colors
|
||||
$this->info['scrollbar-arrow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
|
||||
$this->info['scrollbar-base-color'] = new HTMLPurifier_AttrDef_CSS_Color();
|
||||
$this->info['scrollbar-darkshadow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
|
||||
$this->info['scrollbar-face-color'] = new HTMLPurifier_AttrDef_CSS_Color();
|
||||
$this->info['scrollbar-highlight-color'] = new HTMLPurifier_AttrDef_CSS_Color();
|
||||
$this->info['scrollbar-shadow-color'] = new HTMLPurifier_AttrDef_CSS_Color();
|
||||
|
||||
// technically not proprietary, but CSS3, and no one supports it
|
||||
$this->info['opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
|
||||
$this->info['-moz-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
|
||||
$this->info['-khtml-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
|
||||
|
||||
// only opacity, for now
|
||||
$this->info['filter'] = new HTMLPurifier_AttrDef_CSS_Filter();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -22,29 +22,25 @@ class HTMLPurifier_ChildDef
|
||||
/**
|
||||
* Type of child definition, usually right-most part of class name lowercase.
|
||||
* Used occasionally in terms of context.
|
||||
* @public
|
||||
*/
|
||||
var $type;
|
||||
public $type;
|
||||
|
||||
/**
|
||||
* Bool that indicates whether or not an empty array of children is okay
|
||||
*
|
||||
* This is necessary for redundant checking when changes affecting
|
||||
* a child node may cause a parent node to now be disallowed.
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
var $allow_empty;
|
||||
public $allow_empty;
|
||||
|
||||
/**
|
||||
* Lookup array of all elements that this definition could possibly allow
|
||||
*/
|
||||
var $elements = array();
|
||||
public $elements = array();
|
||||
|
||||
/**
|
||||
* Validates nodes according to definition and returns modification.
|
||||
*
|
||||
* @public
|
||||
* @param $tokens_of_children Array of HTMLPurifier_Token
|
||||
* @param $config HTMLPurifier_Config object
|
||||
* @param $context HTMLPurifier_Context object
|
||||
@@ -52,7 +48,7 @@ class HTMLPurifier_ChildDef
|
||||
* @return bool false to remove parent node
|
||||
* @return array of replacement child tokens
|
||||
*/
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
public function validateChildren($tokens_of_children, $config, $context) {
|
||||
trigger_error('Call to abstract function', E_USER_ERROR);
|
||||
}
|
||||
}
|
||||
|
@@ -16,29 +16,27 @@ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
|
||||
|
||||
/**
|
||||
* Instance of the definition object to use when inline. Usually stricter.
|
||||
* @public
|
||||
*/
|
||||
var $inline;
|
||||
public $inline;
|
||||
|
||||
/**
|
||||
* Instance of the definition object to use when block.
|
||||
* @public
|
||||
*/
|
||||
var $block;
|
||||
public $block;
|
||||
|
||||
var $type = 'chameleon';
|
||||
public $type = 'chameleon';
|
||||
|
||||
/**
|
||||
* @param $inline List of elements to allow when inline.
|
||||
* @param $block List of elements to allow when block.
|
||||
*/
|
||||
function HTMLPurifier_ChildDef_Chameleon($inline, $block) {
|
||||
public function __construct($inline, $block) {
|
||||
$this->inline = new HTMLPurifier_ChildDef_Optional($inline);
|
||||
$this->block = new HTMLPurifier_ChildDef_Optional($block);
|
||||
$this->elements = $this->block->elements;
|
||||
}
|
||||
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
public function validateChildren($tokens_of_children, $config, $context) {
|
||||
if ($context->get('IsInline') === false) {
|
||||
return $this->block->validateChildren(
|
||||
$tokens_of_children, $config, $context);
|
||||
|
@@ -12,28 +12,28 @@ require_once 'HTMLPurifier/ChildDef.php';
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $type = 'custom';
|
||||
var $allow_empty = false;
|
||||
public $type = 'custom';
|
||||
public $allow_empty = false;
|
||||
/**
|
||||
* Allowed child pattern as defined by the DTD
|
||||
*/
|
||||
var $dtd_regex;
|
||||
public $dtd_regex;
|
||||
/**
|
||||
* PCRE regex derived from $dtd_regex
|
||||
* @private
|
||||
*/
|
||||
var $_pcre_regex;
|
||||
private $_pcre_regex;
|
||||
/**
|
||||
* @param $dtd_regex Allowed child pattern from the DTD
|
||||
*/
|
||||
function HTMLPurifier_ChildDef_Custom($dtd_regex) {
|
||||
public function __construct($dtd_regex) {
|
||||
$this->dtd_regex = $dtd_regex;
|
||||
$this->_compileRegex();
|
||||
}
|
||||
/**
|
||||
* Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
|
||||
*/
|
||||
function _compileRegex() {
|
||||
protected function _compileRegex() {
|
||||
$raw = str_replace(' ', '', $this->dtd_regex);
|
||||
if ($raw{0} != '(') {
|
||||
$raw = "($raw)";
|
||||
@@ -61,7 +61,7 @@ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
|
||||
|
||||
$this->_pcre_regex = $reg;
|
||||
}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
public function validateChildren($tokens_of_children, $config, $context) {
|
||||
$list_of_children = '';
|
||||
$nesting = 0; // depth into the nest
|
||||
foreach ($tokens_of_children as $token) {
|
||||
|
@@ -11,10 +11,10 @@ require_once 'HTMLPurifier/ChildDef.php';
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $allow_empty = true;
|
||||
var $type = 'empty';
|
||||
function HTMLPurifier_ChildDef_Empty() {}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
public $allow_empty = true;
|
||||
public $type = 'empty';
|
||||
public function __construct() {}
|
||||
public function validateChildren($tokens_of_children, $config, $context) {
|
||||
return array();
|
||||
}
|
||||
}
|
||||
|
@@ -11,11 +11,14 @@ require_once 'HTMLPurifier/ChildDef/Required.php';
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
|
||||
{
|
||||
var $allow_empty = true;
|
||||
var $type = 'optional';
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
public $allow_empty = true;
|
||||
public $type = 'optional';
|
||||
public function validateChildren($tokens_of_children, $config, $context) {
|
||||
$result = parent::validateChildren($tokens_of_children, $config, $context);
|
||||
if ($result === false) return array();
|
||||
if ($result === false) {
|
||||
if (empty($tokens_of_children)) return true;
|
||||
else return array();
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
@@ -11,11 +11,11 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
|
||||
* Lookup table of allowed elements.
|
||||
* @public
|
||||
*/
|
||||
var $elements = array();
|
||||
public $elements = array();
|
||||
/**
|
||||
* @param $elements List of allowed element names (lowercase).
|
||||
*/
|
||||
function HTMLPurifier_ChildDef_Required($elements) {
|
||||
public function __construct($elements) {
|
||||
if (is_string($elements)) {
|
||||
$elements = str_replace(' ', '', $elements);
|
||||
$elements = explode('|', $elements);
|
||||
@@ -30,9 +30,9 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
|
||||
}
|
||||
$this->elements = $elements;
|
||||
}
|
||||
var $allow_empty = false;
|
||||
var $type = 'required';
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
public $allow_empty = false;
|
||||
public $type = 'required';
|
||||
public function validateChildren($tokens_of_children, $config, $context) {
|
||||
// if there are no tokens, delete parent node
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
|
@@ -8,12 +8,12 @@ require_once 'HTMLPurifier/ChildDef/Required.php';
|
||||
class HTMLPurifier_ChildDef_StrictBlockquote
|
||||
extends HTMLPurifier_ChildDef_Required
|
||||
{
|
||||
var $real_elements;
|
||||
var $fake_elements;
|
||||
var $allow_empty = true;
|
||||
var $type = 'strictblockquote';
|
||||
var $init = false;
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
protected $real_elements;
|
||||
protected $fake_elements;
|
||||
public $allow_empty = true;
|
||||
public $type = 'strictblockquote';
|
||||
protected $init = false;
|
||||
public function validateChildren($tokens_of_children, $config, $context) {
|
||||
|
||||
$def = $config->getHTMLDefinition();
|
||||
if (!$this->init) {
|
||||
|
@@ -7,12 +7,12 @@ require_once 'HTMLPurifier/ChildDef.php';
|
||||
*/
|
||||
class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
|
||||
{
|
||||
var $allow_empty = false;
|
||||
var $type = 'table';
|
||||
var $elements = array('tr' => true, 'tbody' => true, 'thead' => true,
|
||||
public $allow_empty = false;
|
||||
public $type = 'table';
|
||||
public $elements = array('tr' => true, 'tbody' => true, 'thead' => true,
|
||||
'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true);
|
||||
function HTMLPurifier_ChildDef_Table() {}
|
||||
function validateChildren($tokens_of_children, $config, &$context) {
|
||||
public function __construct() {}
|
||||
public function validateChildren($tokens_of_children, $config, $context) {
|
||||
if (empty($tokens_of_children)) return false;
|
||||
|
||||
// this ensures that the loop gets run one last time before closing
|
||||
|
@@ -5,6 +5,7 @@ require_once 'HTMLPurifier/ConfigSchema.php';
|
||||
// member variables
|
||||
require_once 'HTMLPurifier/HTMLDefinition.php';
|
||||
require_once 'HTMLPurifier/CSSDefinition.php';
|
||||
require_once 'HTMLPurifier/URIDefinition.php';
|
||||
require_once 'HTMLPurifier/Doctype.php';
|
||||
require_once 'HTMLPurifier/DefinitionCacheFactory.php';
|
||||
|
||||
@@ -34,6 +35,8 @@ if (!defined('PHP_EOL')) {
|
||||
* because a configuration object should always be forwarded,
|
||||
* otherwise, you run the risk of missing a parameter and then
|
||||
* being stumped when a configuration directive doesn't work.
|
||||
*
|
||||
* @todo Reconsider some of the public member variables
|
||||
*/
|
||||
class HTMLPurifier_Config
|
||||
{
|
||||
@@ -41,73 +44,82 @@ class HTMLPurifier_Config
|
||||
/**
|
||||
* HTML Purifier's version
|
||||
*/
|
||||
var $version = '2.0.1';
|
||||
|
||||
/**
|
||||
* Two-level associative array of configuration directives
|
||||
*/
|
||||
var $conf;
|
||||
|
||||
/**
|
||||
* Reference HTMLPurifier_ConfigSchema for value checking
|
||||
*/
|
||||
var $def;
|
||||
|
||||
/**
|
||||
* Indexed array of definitions
|
||||
*/
|
||||
var $definitions;
|
||||
|
||||
/**
|
||||
* Bool indicator whether or not config is finalized
|
||||
*/
|
||||
var $finalized = false;
|
||||
public $version = '3.0.0';
|
||||
|
||||
/**
|
||||
* Bool indicator whether or not to automatically finalize
|
||||
* the object if a read operation is done
|
||||
*/
|
||||
var $autoFinalize = true;
|
||||
public $autoFinalize = true;
|
||||
|
||||
// protected member variables
|
||||
|
||||
/**
|
||||
* Namespace indexed array of serials for specific namespaces (see
|
||||
* getSerial for more info).
|
||||
* getSerial() for more info).
|
||||
*/
|
||||
var $serials = array();
|
||||
protected $serials = array();
|
||||
|
||||
/**
|
||||
* Serial for entire configuration object
|
||||
*/
|
||||
protected $serial;
|
||||
|
||||
/**
|
||||
* Two-level associative array of configuration directives
|
||||
*/
|
||||
protected $conf;
|
||||
|
||||
/**
|
||||
* Reference HTMLPurifier_ConfigSchema for value checking
|
||||
* @note This is public for introspective purposes. Please don't
|
||||
* abuse!
|
||||
*/
|
||||
public $def;
|
||||
|
||||
/**
|
||||
* Indexed array of definitions
|
||||
*/
|
||||
protected $definitions;
|
||||
|
||||
/**
|
||||
* Bool indicator whether or not config is finalized
|
||||
*/
|
||||
protected $finalized = false;
|
||||
|
||||
/**
|
||||
* @param $definition HTMLPurifier_ConfigSchema that defines what directives
|
||||
* are allowed.
|
||||
*/
|
||||
function HTMLPurifier_Config(&$definition) {
|
||||
public function __construct(&$definition) {
|
||||
$this->conf = $definition->defaults; // set up, copy in defaults
|
||||
$this->def = $definition; // keep a copy around for checking
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience constructor that creates a config object based on a mixed var
|
||||
* @static
|
||||
* @param mixed $config Variable that defines the state of the config
|
||||
* object. Can be: a HTMLPurifier_Config() object,
|
||||
* an array of directives based on loadArray(),
|
||||
* or a string filename of an ini file.
|
||||
* @return Configured HTMLPurifier_Config object
|
||||
*/
|
||||
static function create($config) {
|
||||
if ($config instanceof HTMLPurifier_Config) return $config;
|
||||
public static function create($config) {
|
||||
if ($config instanceof HTMLPurifier_Config) {
|
||||
// pass-through
|
||||
return $config;
|
||||
}
|
||||
$ret = HTMLPurifier_Config::createDefault();
|
||||
if (is_string($config)) $ret->loadIni($config);
|
||||
elseif (is_array($config)) $ret->loadArray($config);
|
||||
if (isset($revision)) $ret->revision = $revision;
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience constructor that creates a default configuration object.
|
||||
* @static
|
||||
* @return Default HTMLPurifier_Config object.
|
||||
*/
|
||||
static function createDefault() {
|
||||
public static function createDefault() {
|
||||
$definition =& HTMLPurifier_ConfigSchema::instance();
|
||||
$config = new HTMLPurifier_Config($definition);
|
||||
return $config;
|
||||
@@ -118,7 +130,7 @@ class HTMLPurifier_Config
|
||||
* @param $namespace String namespace
|
||||
* @param $key String key
|
||||
*/
|
||||
function get($namespace, $key, $from_alias = false) {
|
||||
public function get($namespace, $key) {
|
||||
if (!$this->finalized && $this->autoFinalize) $this->finalize();
|
||||
if (!isset($this->def->info[$namespace][$key])) {
|
||||
// can't add % due to SimpleTest bug
|
||||
@@ -139,7 +151,7 @@ class HTMLPurifier_Config
|
||||
* Retreives an array of directives to values from a given namespace
|
||||
* @param $namespace String namespace
|
||||
*/
|
||||
function getBatch($namespace) {
|
||||
public function getBatch($namespace) {
|
||||
if (!$this->finalized && $this->autoFinalize) $this->finalize();
|
||||
if (!isset($this->def->info[$namespace])) {
|
||||
trigger_error('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace),
|
||||
@@ -156,7 +168,7 @@ class HTMLPurifier_Config
|
||||
* before processing!
|
||||
* @param $namespace Namespace to get serial for
|
||||
*/
|
||||
function getBatchSerial($namespace) {
|
||||
public function getBatchSerial($namespace) {
|
||||
if (empty($this->serials[$namespace])) {
|
||||
$batch = $this->getBatch($namespace);
|
||||
unset($batch['DefinitionRev']);
|
||||
@@ -165,10 +177,21 @@ class HTMLPurifier_Config
|
||||
return $this->serials[$namespace];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a md5 signature for the entire configuration object
|
||||
* that uniquely identifies that particular configuration
|
||||
*/
|
||||
public function getSerial() {
|
||||
if (empty($this->serial)) {
|
||||
$this->serial = md5(serialize($this->getAll()));
|
||||
}
|
||||
return $this->serial;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves all directives, organized by namespace
|
||||
*/
|
||||
function getAll() {
|
||||
public function getAll() {
|
||||
if (!$this->finalized && $this->autoFinalize) $this->finalize();
|
||||
return $this->conf;
|
||||
}
|
||||
@@ -179,7 +202,7 @@ class HTMLPurifier_Config
|
||||
* @param $key String key
|
||||
* @param $value Mixed value
|
||||
*/
|
||||
function set($namespace, $key, $value, $from_alias = false) {
|
||||
public function set($namespace, $key, $value, $from_alias = false) {
|
||||
if ($this->isFinalized('Cannot set directive after finalization')) return;
|
||||
if (!isset($this->def->info[$namespace][$key])) {
|
||||
trigger_error('Cannot set undefined directive ' . htmlspecialchars("$namespace.$key") . ' to value',
|
||||
@@ -233,9 +256,8 @@ class HTMLPurifier_Config
|
||||
|
||||
/**
|
||||
* Convenience function for error reporting
|
||||
* @private
|
||||
*/
|
||||
function _listify($lookup) {
|
||||
private function _listify($lookup) {
|
||||
$list = array();
|
||||
foreach ($lookup as $name => $b) $list[] = $name;
|
||||
return implode(', ', $list);
|
||||
@@ -246,7 +268,7 @@ class HTMLPurifier_Config
|
||||
* @param $raw Return a copy that has not been setup yet. Must be
|
||||
* called before it's been setup, otherwise won't work.
|
||||
*/
|
||||
function &getHTMLDefinition($raw = false) {
|
||||
public function &getHTMLDefinition($raw = false) {
|
||||
$def =& $this->getDefinition('HTML', $raw);
|
||||
return $def; // prevent PHP 4.4.0 from complaining
|
||||
}
|
||||
@@ -254,7 +276,7 @@ class HTMLPurifier_Config
|
||||
/**
|
||||
* Retrieves reference to the CSS definition
|
||||
*/
|
||||
function &getCSSDefinition($raw = false) {
|
||||
public function &getCSSDefinition($raw = false) {
|
||||
$def =& $this->getDefinition('CSS', $raw);
|
||||
return $def;
|
||||
}
|
||||
@@ -264,7 +286,7 @@ class HTMLPurifier_Config
|
||||
* @param $type Type of definition: HTML, CSS, etc
|
||||
* @param $raw Whether or not definition should be returned raw
|
||||
*/
|
||||
function &getDefinition($type, $raw = false) {
|
||||
public function &getDefinition($type, $raw = false) {
|
||||
if (!$this->finalized && $this->autoFinalize) $this->finalize();
|
||||
$factory = HTMLPurifier_DefinitionCacheFactory::instance();
|
||||
$cache = $factory->create($type, $this);
|
||||
@@ -295,6 +317,8 @@ class HTMLPurifier_Config
|
||||
$this->definitions[$type] = new HTMLPurifier_HTMLDefinition();
|
||||
} elseif ($type == 'CSS') {
|
||||
$this->definitions[$type] = new HTMLPurifier_CSSDefinition();
|
||||
} elseif ($type == 'URI') {
|
||||
$this->definitions[$type] = new HTMLPurifier_URIDefinition();
|
||||
} else {
|
||||
trigger_error("Definition of $type type not supported");
|
||||
$false = false;
|
||||
@@ -322,7 +346,7 @@ class HTMLPurifier_Config
|
||||
* Namespace.Directive => Value
|
||||
* @param $config_array Configuration associative array
|
||||
*/
|
||||
function loadArray($config_array) {
|
||||
public function loadArray($config_array) {
|
||||
if ($this->isFinalized('Cannot load directives after finalization')) return;
|
||||
foreach ($config_array as $key => $value) {
|
||||
$key = str_replace('_', '.', $key);
|
||||
@@ -345,9 +369,8 @@ class HTMLPurifier_Config
|
||||
* that are allowed in a web-form context as per an allowed
|
||||
* namespaces/directives list.
|
||||
* @param $allowed List of allowed namespaces/directives
|
||||
* @static
|
||||
*/
|
||||
static function getAllowedDirectivesForForm($allowed) {
|
||||
public static function getAllowedDirectivesForForm($allowed) {
|
||||
$schema = HTMLPurifier_ConfigSchema::instance();
|
||||
if ($allowed !== true) {
|
||||
if (is_string($allowed)) $allowed = array($allowed);
|
||||
@@ -390,9 +413,27 @@ class HTMLPurifier_Config
|
||||
* @param $index Index/name that the config variables are in
|
||||
* @param $allowed List of allowed namespaces/directives
|
||||
* @param $mq_fix Boolean whether or not to enable magic quotes fix
|
||||
* @static
|
||||
*/
|
||||
static function loadArrayFromForm($array, $index, $allowed = true, $mq_fix = true) {
|
||||
public static function loadArrayFromForm($array, $index, $allowed = true, $mq_fix = true) {
|
||||
$ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix);
|
||||
$config = HTMLPurifier_Config::create($ret);
|
||||
return $config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges in configuration values from $_GET/$_POST to object. NOT STATIC.
|
||||
* @note Same parameters as loadArrayFromForm
|
||||
*/
|
||||
public function mergeArrayFromForm($array, $index, $allowed = true, $mq_fix = true) {
|
||||
$ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix);
|
||||
$this->loadArray($ret);
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepares an array from a form into something usable for the more
|
||||
* strict parts of HTMLPurifier_Config
|
||||
*/
|
||||
public static function prepareArrayFromForm($array, $index, $allowed = true, $mq_fix = true) {
|
||||
$array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array();
|
||||
$mq = get_magic_quotes_gpc() && $mq_fix;
|
||||
|
||||
@@ -409,16 +450,14 @@ class HTMLPurifier_Config
|
||||
$value = $mq ? stripslashes($array[$skey]) : $array[$skey];
|
||||
$ret[$ns][$directive] = $value;
|
||||
}
|
||||
|
||||
$config = HTMLPurifier_Config::create($ret);
|
||||
return $config;
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads configuration values from an ini file
|
||||
* @param $filename Name of ini file
|
||||
*/
|
||||
function loadIni($filename) {
|
||||
public function loadIni($filename) {
|
||||
if ($this->isFinalized('Cannot load directives after finalization')) return;
|
||||
$array = parse_ini_file($filename, true);
|
||||
$this->loadArray($array);
|
||||
@@ -428,7 +467,7 @@ class HTMLPurifier_Config
|
||||
* Checks whether or not the configuration object is finalized.
|
||||
* @param $error String error message, or false for no error
|
||||
*/
|
||||
function isFinalized($error = false) {
|
||||
public function isFinalized($error = false) {
|
||||
if ($this->finalized && $error) {
|
||||
trigger_error($error, E_USER_ERROR);
|
||||
}
|
||||
@@ -439,17 +478,18 @@ class HTMLPurifier_Config
|
||||
* Finalizes configuration only if auto finalize is on and not
|
||||
* already finalized
|
||||
*/
|
||||
function autoFinalize() {
|
||||
public function autoFinalize() {
|
||||
if (!$this->finalized && $this->autoFinalize) $this->finalize();
|
||||
}
|
||||
|
||||
/**
|
||||
* Finalizes a configuration object, prohibiting further change
|
||||
*/
|
||||
function finalize() {
|
||||
public function finalize() {
|
||||
$this->finalized = true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@@ -4,6 +4,6 @@
|
||||
* Base class for configuration entity
|
||||
*/
|
||||
class HTMLPurifier_ConfigDef {
|
||||
var $class = false;
|
||||
public $class = false;
|
||||
}
|
||||
|
||||
|
@@ -9,9 +9,9 @@ require_once 'HTMLPurifier/ConfigDef.php';
|
||||
class HTMLPurifier_ConfigDef_Directive extends HTMLPurifier_ConfigDef
|
||||
{
|
||||
|
||||
var $class = 'directive';
|
||||
public $class = 'directive';
|
||||
|
||||
function HTMLPurifier_ConfigDef_Directive(
|
||||
public function __construct(
|
||||
$type = null,
|
||||
$descriptions = null,
|
||||
$allow_null = null,
|
||||
@@ -37,40 +37,40 @@ class HTMLPurifier_ConfigDef_Directive extends HTMLPurifier_ConfigDef
|
||||
* - hash (array of key => value)
|
||||
* - mixed (anything goes)
|
||||
*/
|
||||
var $type = 'mixed';
|
||||
public $type = 'mixed';
|
||||
|
||||
/**
|
||||
* Plaintext descriptions of the configuration entity is. Organized by
|
||||
* file and line number, so multiple descriptions are allowed.
|
||||
*/
|
||||
var $descriptions = array();
|
||||
public $descriptions = array();
|
||||
|
||||
/**
|
||||
* Is null allowed? Has no effect for mixed type.
|
||||
* @bool
|
||||
*/
|
||||
var $allow_null = false;
|
||||
public $allow_null = false;
|
||||
|
||||
/**
|
||||
* Lookup table of allowed values of the element, bool true if all allowed.
|
||||
*/
|
||||
var $allowed = true;
|
||||
public $allowed = true;
|
||||
|
||||
/**
|
||||
* Hash of value aliases, i.e. values that are equivalent.
|
||||
*/
|
||||
var $aliases = array();
|
||||
public $aliases = array();
|
||||
|
||||
/**
|
||||
* Advisory list of directive aliases, i.e. other directives that
|
||||
* redirect here
|
||||
*/
|
||||
var $directiveAliases = array();
|
||||
public $directiveAliases = array();
|
||||
|
||||
/**
|
||||
* Adds a description to the array
|
||||
*/
|
||||
function addDescription($file, $line, $description) {
|
||||
public function addDescription($file, $line, $description) {
|
||||
if (!isset($this->descriptions[$file])) $this->descriptions[$file] = array();
|
||||
$this->descriptions[$file][$line] = $description;
|
||||
}
|
||||
|
@@ -7,18 +7,18 @@ require_once 'HTMLPurifier/ConfigDef.php';
|
||||
*/
|
||||
class HTMLPurifier_ConfigDef_DirectiveAlias extends HTMLPurifier_ConfigDef
|
||||
{
|
||||
var $class = 'alias';
|
||||
public $class = 'alias';
|
||||
|
||||
/**
|
||||
* Namespace being aliased to
|
||||
*/
|
||||
var $namespace;
|
||||
public $namespace;
|
||||
/**
|
||||
* Directive being aliased to
|
||||
*/
|
||||
var $name;
|
||||
public $name;
|
||||
|
||||
function HTMLPurifier_ConfigDef_DirectiveAlias($namespace, $name) {
|
||||
public function __construct($namespace, $name) {
|
||||
$this->namespace = $namespace;
|
||||
$this->name = $name;
|
||||
}
|
||||
|
@@ -7,16 +7,16 @@ require_once 'HTMLPurifier/ConfigDef.php';
|
||||
*/
|
||||
class HTMLPurifier_ConfigDef_Namespace extends HTMLPurifier_ConfigDef {
|
||||
|
||||
function HTMLPurifier_ConfigDef_Namespace($description = null) {
|
||||
public function HTMLPurifier_ConfigDef_Namespace($description = null) {
|
||||
$this->description = $description;
|
||||
}
|
||||
|
||||
var $class = 'namespace';
|
||||
public $class = 'namespace';
|
||||
|
||||
/**
|
||||
* String description of what kinds of directives go in this namespace.
|
||||
*/
|
||||
var $description;
|
||||
public $description;
|
||||
|
||||
}
|
||||
|
||||
|
@@ -6,6 +6,8 @@ require_once 'HTMLPurifier/ConfigDef/Namespace.php';
|
||||
require_once 'HTMLPurifier/ConfigDef/Directive.php';
|
||||
require_once 'HTMLPurifier/ConfigDef/DirectiveAlias.php';
|
||||
|
||||
if (!defined('HTMLPURIFIER_SCHEMA_STRICT')) define('HTMLPURIFIER_SCHEMA_STRICT', false);
|
||||
|
||||
/**
|
||||
* Configuration definition, defines directives and their defaults.
|
||||
* @note If you update this, please update Printer_ConfigForm
|
||||
@@ -31,24 +33,26 @@ class HTMLPurifier_ConfigSchema {
|
||||
* Defaults of the directives and namespaces.
|
||||
* @note This shares the exact same structure as HTMLPurifier_Config::$conf
|
||||
*/
|
||||
var $defaults = array();
|
||||
public $defaults = array();
|
||||
|
||||
/**
|
||||
* Definition of the directives.
|
||||
*/
|
||||
var $info = array();
|
||||
public $info = array();
|
||||
|
||||
/**
|
||||
* Definition of namespaces.
|
||||
*/
|
||||
var $info_namespace = array();
|
||||
public $info_namespace = array();
|
||||
|
||||
/**
|
||||
* Lookup table of allowed types.
|
||||
*/
|
||||
var $types = array(
|
||||
public $types = array(
|
||||
'string' => 'String',
|
||||
'istring' => 'Case-insensitive string',
|
||||
'text' => 'Text',
|
||||
'itext' => 'Case-insensitive text',
|
||||
'int' => 'Integer',
|
||||
'float' => 'Float',
|
||||
'bool' => 'Boolean',
|
||||
@@ -61,7 +65,7 @@ class HTMLPurifier_ConfigSchema {
|
||||
/**
|
||||
* Initializes the default namespaces.
|
||||
*/
|
||||
function initialize() {
|
||||
public function initialize() {
|
||||
$this->defineNamespace('Core', 'Core features that are always available.');
|
||||
$this->defineNamespace('Attr', 'Features regarding attribute validation.');
|
||||
$this->defineNamespace('URI', 'Features regarding Uniform Resource Identifiers.');
|
||||
@@ -69,6 +73,7 @@ class HTMLPurifier_ConfigSchema {
|
||||
$this->defineNamespace('CSS', 'Configuration regarding allowed CSS.');
|
||||
$this->defineNamespace('AutoFormat', 'Configuration for activating auto-formatting functionality (also known as <code>Injector</code>s)');
|
||||
$this->defineNamespace('AutoFormatParam', 'Configuration for customizing auto-formatting functionality');
|
||||
$this->defineNamespace('Filter', 'Configuration for filters');
|
||||
$this->defineNamespace('Output', 'Configuration relating to the generation of (X)HTML.');
|
||||
$this->defineNamespace('Cache', 'Configuration for DefinitionCache and related subclasses.');
|
||||
$this->defineNamespace('Test', 'Developer testing configuration for our unit tests.');
|
||||
@@ -76,9 +81,8 @@ class HTMLPurifier_ConfigSchema {
|
||||
|
||||
/**
|
||||
* Retrieves an instance of the application-wide configuration definition.
|
||||
* @static
|
||||
*/
|
||||
static function &instance($prototype = null) {
|
||||
public static function &instance($prototype = null) {
|
||||
static $instance;
|
||||
if ($prototype !== null) {
|
||||
$instance = $prototype;
|
||||
@@ -91,7 +95,6 @@ class HTMLPurifier_ConfigSchema {
|
||||
|
||||
/**
|
||||
* Defines a directive for configuration
|
||||
* @static
|
||||
* @warning Will fail of directive's namespace is defined
|
||||
* @param $namespace Namespace the directive is in
|
||||
* @param $name Key of directive
|
||||
@@ -100,27 +103,30 @@ class HTMLPurifier_ConfigSchema {
|
||||
* HTMLPurifier_DirectiveDef::$type for allowed values
|
||||
* @param $description Description of directive for documentation
|
||||
*/
|
||||
static function define(
|
||||
$namespace, $name, $default, $type,
|
||||
$description
|
||||
) {
|
||||
public static function define($namespace, $name, $default, $type, $description) {
|
||||
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||
if (!isset($def->info[$namespace])) {
|
||||
trigger_error('Cannot define directive for undefined namespace',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (!ctype_alnum($name)) {
|
||||
trigger_error('Directive name must be alphanumeric',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (empty($description)) {
|
||||
trigger_error('Description must be non-empty',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
|
||||
// basic sanity checks
|
||||
if (HTMLPURIFIER_SCHEMA_STRICT) {
|
||||
if (!isset($def->info[$namespace])) {
|
||||
trigger_error('Cannot define directive for undefined namespace',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (!ctype_alnum($name)) {
|
||||
trigger_error('Directive name must be alphanumeric',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (empty($description)) {
|
||||
trigger_error('Description must be non-empty',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (isset($def->info[$namespace][$name])) {
|
||||
// already defined
|
||||
if (
|
||||
$def->info[$namespace][$name]->type !== $type ||
|
||||
$def->defaults[$namespace][$name] !== $default
|
||||
@@ -129,29 +135,35 @@ class HTMLPurifier_ConfigSchema {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// process modifiers
|
||||
// needs defining
|
||||
|
||||
// process modifiers (OPTIMIZE!)
|
||||
$type_values = explode('/', $type, 2);
|
||||
$type = $type_values[0];
|
||||
$modifier = isset($type_values[1]) ? $type_values[1] : false;
|
||||
$allow_null = ($modifier === 'null');
|
||||
|
||||
if (!isset($def->types[$type])) {
|
||||
trigger_error('Invalid type for configuration directive',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
$default = $def->validate($default, $type, $allow_null);
|
||||
if ($def->isError($default)) {
|
||||
trigger_error('Default value does not match directive type',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
if (HTMLPURIFIER_SCHEMA_STRICT) {
|
||||
if (!isset($def->types[$type])) {
|
||||
trigger_error('Invalid type for configuration directive',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
$default = $def->validate($default, $type, $allow_null);
|
||||
if ($def->isError($default)) {
|
||||
trigger_error('Default value does not match directive type',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
$def->info[$namespace][$name] =
|
||||
new HTMLPurifier_ConfigDef_Directive();
|
||||
$def->info[$namespace][$name]->type = $type;
|
||||
$def->info[$namespace][$name]->allow_null = $allow_null;
|
||||
$def->defaults[$namespace][$name] = $default;
|
||||
}
|
||||
if (!HTMLPURIFIER_SCHEMA_STRICT) return;
|
||||
$backtrace = debug_backtrace();
|
||||
$file = $def->mungeFilename($backtrace[0]['file']);
|
||||
$line = $backtrace[0]['line'];
|
||||
@@ -160,25 +172,26 @@ class HTMLPurifier_ConfigSchema {
|
||||
|
||||
/**
|
||||
* Defines a namespace for directives to be put into.
|
||||
* @static
|
||||
* @param $namespace Namespace's name
|
||||
* @param $description Description of the namespace
|
||||
*/
|
||||
static function defineNamespace($namespace, $description) {
|
||||
public static function defineNamespace($namespace, $description) {
|
||||
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||
if (isset($def->info[$namespace])) {
|
||||
trigger_error('Cannot redefine namespace', E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (!ctype_alnum($namespace)) {
|
||||
trigger_error('Namespace name must be alphanumeric',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (empty($description)) {
|
||||
trigger_error('Description must be non-empty',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
if (HTMLPURIFIER_SCHEMA_STRICT) {
|
||||
if (isset($def->info[$namespace])) {
|
||||
trigger_error('Cannot redefine namespace', E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (!ctype_alnum($namespace)) {
|
||||
trigger_error('Namespace name must be alphanumeric',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (empty($description)) {
|
||||
trigger_error('Description must be non-empty',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
}
|
||||
$def->info[$namespace] = array();
|
||||
$def->info_namespace[$namespace] = new HTMLPurifier_ConfigDef_Namespace();
|
||||
@@ -191,31 +204,32 @@ class HTMLPurifier_ConfigSchema {
|
||||
*
|
||||
* Directive value aliases are convenient for developers because it lets
|
||||
* them set a directive to several values and get the same result.
|
||||
* @static
|
||||
* @param $namespace Directive's namespace
|
||||
* @param $name Name of Directive
|
||||
* @param $alias Name of aliased value
|
||||
* @param $real Value aliased value will be converted into
|
||||
*/
|
||||
static function defineValueAliases($namespace, $name, $aliases) {
|
||||
public static function defineValueAliases($namespace, $name, $aliases) {
|
||||
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||
if (!isset($def->info[$namespace][$name])) {
|
||||
if (HTMLPURIFIER_SCHEMA_STRICT && !isset($def->info[$namespace][$name])) {
|
||||
trigger_error('Cannot set value alias for non-existant directive',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
foreach ($aliases as $alias => $real) {
|
||||
if (!$def->info[$namespace][$name] !== true &&
|
||||
!isset($def->info[$namespace][$name]->allowed[$real])
|
||||
) {
|
||||
trigger_error('Cannot define alias to value that is not allowed',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (isset($def->info[$namespace][$name]->allowed[$alias])) {
|
||||
trigger_error('Cannot define alias over allowed value',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
if (HTMLPURIFIER_SCHEMA_STRICT) {
|
||||
if (!$def->info[$namespace][$name] !== true &&
|
||||
!isset($def->info[$namespace][$name]->allowed[$real])
|
||||
) {
|
||||
trigger_error('Cannot define alias to value that is not allowed',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (isset($def->info[$namespace][$name]->allowed[$alias])) {
|
||||
trigger_error('Cannot define alias over allowed value',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
}
|
||||
$def->info[$namespace][$name]->aliases[$alias] = $real;
|
||||
}
|
||||
@@ -223,21 +237,20 @@ class HTMLPurifier_ConfigSchema {
|
||||
|
||||
/**
|
||||
* Defines a set of allowed values for a directive.
|
||||
* @static
|
||||
* @param $namespace Namespace of directive
|
||||
* @param $name Name of directive
|
||||
* @param $allowed_values Arraylist of allowed values
|
||||
*/
|
||||
static function defineAllowedValues($namespace, $name, $allowed_values) {
|
||||
public static function defineAllowedValues($namespace, $name, $allowed_values) {
|
||||
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||
if (!isset($def->info[$namespace][$name])) {
|
||||
if (HTMLPURIFIER_SCHEMA_STRICT && !isset($def->info[$namespace][$name])) {
|
||||
trigger_error('Cannot define allowed values for undefined directive',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
$directive =& $def->info[$namespace][$name];
|
||||
$type = $directive->type;
|
||||
if ($type != 'string' && $type != 'istring') {
|
||||
if (HTMLPURIFIER_SCHEMA_STRICT && $type != 'string' && $type != 'istring') {
|
||||
trigger_error('Cannot define allowed values for directive whose type is not string',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
@@ -248,8 +261,11 @@ class HTMLPurifier_ConfigSchema {
|
||||
foreach ($allowed_values as $value) {
|
||||
$directive->allowed[$value] = true;
|
||||
}
|
||||
if ($def->defaults[$namespace][$name] !== null &&
|
||||
!isset($directive->allowed[$def->defaults[$namespace][$name]])) {
|
||||
if (
|
||||
HTMLPURIFIER_SCHEMA_STRICT &&
|
||||
$def->defaults[$namespace][$name] !== null &&
|
||||
!isset($directive->allowed[$def->defaults[$namespace][$name]])
|
||||
) {
|
||||
trigger_error('Default value must be in allowed range of variables',
|
||||
E_USER_ERROR);
|
||||
$directive->allowed = true; // undo undo!
|
||||
@@ -259,38 +275,39 @@ class HTMLPurifier_ConfigSchema {
|
||||
|
||||
/**
|
||||
* Defines a directive alias for backwards compatibility
|
||||
* @static
|
||||
* @param $namespace
|
||||
* @param $name Directive that will be aliased
|
||||
* @param $new_namespace
|
||||
* @param $new_name Directive that the alias will be to
|
||||
*/
|
||||
static function defineAlias($namespace, $name, $new_namespace, $new_name) {
|
||||
public static function defineAlias($namespace, $name, $new_namespace, $new_name) {
|
||||
$def =& HTMLPurifier_ConfigSchema::instance();
|
||||
if (!isset($def->info[$namespace])) {
|
||||
trigger_error('Cannot define directive alias in undefined namespace',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (!ctype_alnum($name)) {
|
||||
trigger_error('Directive name must be alphanumeric',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (isset($def->info[$namespace][$name])) {
|
||||
trigger_error('Cannot define alias over directive',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (!isset($def->info[$new_namespace][$new_name])) {
|
||||
trigger_error('Cannot define alias to undefined directive',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if ($def->info[$new_namespace][$new_name]->class == 'alias') {
|
||||
trigger_error('Cannot define alias to alias',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
if (HTMLPURIFIER_SCHEMA_STRICT) {
|
||||
if (!isset($def->info[$namespace])) {
|
||||
trigger_error('Cannot define directive alias in undefined namespace',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (!ctype_alnum($name)) {
|
||||
trigger_error('Directive name must be alphanumeric',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (isset($def->info[$namespace][$name])) {
|
||||
trigger_error('Cannot define alias over directive',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if (!isset($def->info[$new_namespace][$new_name])) {
|
||||
trigger_error('Cannot define alias to undefined directive',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
if ($def->info[$new_namespace][$new_name]->class == 'alias') {
|
||||
trigger_error('Cannot define alias to alias',
|
||||
E_USER_ERROR);
|
||||
return;
|
||||
}
|
||||
}
|
||||
$def->info[$namespace][$name] =
|
||||
new HTMLPurifier_ConfigDef_DirectiveAlias(
|
||||
@@ -300,8 +317,9 @@ class HTMLPurifier_ConfigSchema {
|
||||
|
||||
/**
|
||||
* Validate a variable according to type. Return null if invalid.
|
||||
* @todo Consider making protected
|
||||
*/
|
||||
function validate($var, $type, $allow_null = false) {
|
||||
public function validate($var, $type, $allow_null = false) {
|
||||
if (!isset($this->types[$type])) {
|
||||
trigger_error('Invalid type', E_USER_ERROR);
|
||||
return;
|
||||
@@ -313,8 +331,10 @@ class HTMLPurifier_ConfigSchema {
|
||||
return $var;
|
||||
case 'istring':
|
||||
case 'string':
|
||||
case 'text': // no difference, just is longer/multiple line string
|
||||
case 'itext':
|
||||
if (!is_string($var)) break;
|
||||
if ($type === 'istring') $var = strtolower($var);
|
||||
if ($type === 'istring' || $type === 'itext') $var = strtolower($var);
|
||||
return $var;
|
||||
case 'int':
|
||||
if (is_string($var) && ctype_digit($var)) $var = (int) $var;
|
||||
@@ -345,9 +365,13 @@ class HTMLPurifier_ConfigSchema {
|
||||
// a single empty string item, but having an empty
|
||||
// array is more intuitive
|
||||
if ($var == '') return array();
|
||||
// simplistic string to array method that only works
|
||||
// for simple lists of tag names or alphanumeric characters
|
||||
$var = explode(',',$var);
|
||||
if (strpos($var, "\n") === false && strpos($var, "\r") === false) {
|
||||
// simplistic string to array method that only works
|
||||
// for simple lists of tag names or alphanumeric characters
|
||||
$var = explode(',',$var);
|
||||
} else {
|
||||
$var = preg_split('/(,|[\n\r]+)/', $var);
|
||||
}
|
||||
// remove spaces
|
||||
foreach ($var as $i => $j) $var[$i] = trim($j);
|
||||
if ($type === 'hash') {
|
||||
@@ -386,8 +410,12 @@ class HTMLPurifier_ConfigSchema {
|
||||
|
||||
/**
|
||||
* Takes an absolute path and munges it into a more manageable relative path
|
||||
* @todo Consider making protected
|
||||
* @param $filename Filename to check
|
||||
* @return string munged filename
|
||||
*/
|
||||
function mungeFilename($filename) {
|
||||
public function mungeFilename($filename) {
|
||||
if (!HTMLPURIFIER_SCHEMA_STRICT) return $filename;
|
||||
$offset = strrpos($filename, 'HTMLPurifier');
|
||||
$filename = substr($filename, $offset);
|
||||
$filename = str_replace('\\', '/', $filename);
|
||||
@@ -396,8 +424,9 @@ class HTMLPurifier_ConfigSchema {
|
||||
|
||||
/**
|
||||
* Checks if var is an HTMLPurifier_Error object
|
||||
* @todo Consider making protected
|
||||
*/
|
||||
function isError($var) {
|
||||
public function isError($var) {
|
||||
if (!is_object($var)) return false;
|
||||
if (!($var instanceof HTMLPurifier_Error)) return false;
|
||||
return true;
|
||||
|
@@ -5,40 +5,40 @@ require_once 'HTMLPurifier/ChildDef.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Empty.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Required.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Optional.php';
|
||||
require_once 'HTMLPurifier/ChildDef/Custom.php';
|
||||
|
||||
// NOT UNIT TESTED!!!
|
||||
|
||||
/**
|
||||
* @todo Unit test
|
||||
*/
|
||||
class HTMLPurifier_ContentSets
|
||||
{
|
||||
|
||||
/**
|
||||
* List of content set strings (pipe seperators) indexed by name.
|
||||
* @public
|
||||
*/
|
||||
var $info = array();
|
||||
public $info = array();
|
||||
|
||||
/**
|
||||
* List of content set lookups (element => true) indexed by name.
|
||||
* @note This is in HTMLPurifier_HTMLDefinition->info_content_sets
|
||||
* @public
|
||||
*/
|
||||
var $lookup = array();
|
||||
public $lookup = array();
|
||||
|
||||
/**
|
||||
* Synchronized list of defined content sets (keys of info)
|
||||
*/
|
||||
var $keys = array();
|
||||
protected $keys = array();
|
||||
/**
|
||||
* Synchronized list of defined content values (values of info)
|
||||
*/
|
||||
var $values = array();
|
||||
protected $values = array();
|
||||
|
||||
/**
|
||||
* Merges in module's content sets, expands identifiers in the content
|
||||
* sets and populates the keys, values and lookup member variables.
|
||||
* @param $modules List of HTMLPurifier_HTMLModule
|
||||
*/
|
||||
function HTMLPurifier_ContentSets($modules) {
|
||||
public function __construct($modules) {
|
||||
if (!is_array($modules)) $modules = array($modules);
|
||||
// populate content_sets based on module hints
|
||||
// sorry, no way of overloading
|
||||
@@ -78,7 +78,7 @@ class HTMLPurifier_ContentSets
|
||||
* @param $def HTMLPurifier_ElementDef reference
|
||||
* @param $module Module that defined the ElementDef
|
||||
*/
|
||||
function generateChildDef(&$def, $module) {
|
||||
public function generateChildDef(&$def, $module) {
|
||||
if (!empty($def->child)) return; // already done!
|
||||
$content_model = $def->content_model;
|
||||
if (is_string($content_model)) {
|
||||
@@ -96,7 +96,7 @@ class HTMLPurifier_ContentSets
|
||||
* @param $def HTMLPurifier_ElementDef to have ChildDef extracted
|
||||
* @return HTMLPurifier_ChildDef corresponding to ElementDef
|
||||
*/
|
||||
function getChildDef($def, $module) {
|
||||
public function getChildDef($def, $module) {
|
||||
$value = $def->content_model;
|
||||
if (is_object($value)) {
|
||||
trigger_error(
|
||||
@@ -136,7 +136,7 @@ class HTMLPurifier_ContentSets
|
||||
* @param $string List of elements
|
||||
* @return Lookup array of elements
|
||||
*/
|
||||
function convertToLookup($string) {
|
||||
protected function convertToLookup($string) {
|
||||
$array = explode('|', str_replace(' ', '', $string));
|
||||
$ret = array();
|
||||
foreach ($array as $i => $k) {
|
||||
|
@@ -10,16 +10,15 @@ class HTMLPurifier_Context
|
||||
|
||||
/**
|
||||
* Private array that stores the references.
|
||||
* @private
|
||||
*/
|
||||
var $_storage = array();
|
||||
private $_storage = array();
|
||||
|
||||
/**
|
||||
* Registers a variable into the context.
|
||||
* @param $name String name
|
||||
* @param $ref Variable to be registered
|
||||
*/
|
||||
function register($name, &$ref) {
|
||||
public function register($name, &$ref) {
|
||||
if (isset($this->_storage[$name])) {
|
||||
trigger_error("Name $name produces collision, cannot re-register",
|
||||
E_USER_ERROR);
|
||||
@@ -33,7 +32,7 @@ class HTMLPurifier_Context
|
||||
* @param $name String name
|
||||
* @param $ignore_error Boolean whether or not to ignore error
|
||||
*/
|
||||
function &get($name, $ignore_error = false) {
|
||||
public function &get($name, $ignore_error = false) {
|
||||
if (!isset($this->_storage[$name])) {
|
||||
if (!$ignore_error) {
|
||||
trigger_error("Attempted to retrieve non-existent variable $name",
|
||||
@@ -49,7 +48,7 @@ class HTMLPurifier_Context
|
||||
* Destorys a variable in the context.
|
||||
* @param $name String name
|
||||
*/
|
||||
function destroy($name) {
|
||||
public function destroy($name) {
|
||||
if (!isset($this->_storage[$name])) {
|
||||
trigger_error("Attempted to destroy non-existent variable $name",
|
||||
E_USER_ERROR);
|
||||
@@ -62,7 +61,7 @@ class HTMLPurifier_Context
|
||||
* Checks whether or not the variable exists.
|
||||
* @param $name String name
|
||||
*/
|
||||
function exists($name) {
|
||||
public function exists($name) {
|
||||
return isset($this->_storage[$name]);
|
||||
}
|
||||
|
||||
@@ -70,7 +69,7 @@ class HTMLPurifier_Context
|
||||
* Loads a series of variables from an associative array
|
||||
* @param $context_array Assoc array of variables to load
|
||||
*/
|
||||
function loadArray(&$context_array) {
|
||||
public function loadArray($context_array) {
|
||||
foreach ($context_array as $key => $discard) {
|
||||
$this->register($key, $context_array[$key]);
|
||||
}
|
||||
|
@@ -4,33 +4,31 @@
|
||||
* Super-class for definition datatype objects, implements serialization
|
||||
* functions for the class.
|
||||
*/
|
||||
class HTMLPurifier_Definition
|
||||
abstract class HTMLPurifier_Definition
|
||||
{
|
||||
|
||||
/**
|
||||
* Has setup() been called yet?
|
||||
*/
|
||||
var $setup = false;
|
||||
public $setup = false;
|
||||
|
||||
/**
|
||||
* What type of definition is it?
|
||||
*/
|
||||
var $type;
|
||||
public $type;
|
||||
|
||||
/**
|
||||
* Sets up the definition object into the final form, something
|
||||
* not done by the constructor
|
||||
* @param $config HTMLPurifier_Config instance
|
||||
*/
|
||||
function doSetup($config) {
|
||||
trigger_error('Cannot call abstract method', E_USER_ERROR);
|
||||
}
|
||||
abstract protected function doSetup($config);
|
||||
|
||||
/**
|
||||
* Setup function that aborts if already setup
|
||||
* @param $config HTMLPurifier_Config instance
|
||||
*/
|
||||
function setup($config) {
|
||||
public function setup($config) {
|
||||
if ($this->setup) return;
|
||||
$this->setup = true;
|
||||
$this->doSetup($config);
|
||||
|
@@ -10,25 +10,21 @@ require_once 'HTMLPurifier/DefinitionCache/Decorator/Cleanup.php';
|
||||
/**
|
||||
* Abstract class representing Definition cache managers that implements
|
||||
* useful common methods and is a factory.
|
||||
* @todo Get some sort of versioning variable so the library can easily
|
||||
* invalidate the cache with a new version
|
||||
* @todo Make the test runner cache aware and allow the user to easily
|
||||
* flush the cache
|
||||
* @todo Create a separate maintenance file advanced users can use to
|
||||
* cache their custom HTMLDefinition, which can be loaded
|
||||
* via a configuration directive
|
||||
* @todo Implement memcached
|
||||
*/
|
||||
class HTMLPurifier_DefinitionCache
|
||||
abstract class HTMLPurifier_DefinitionCache
|
||||
{
|
||||
|
||||
var $type;
|
||||
public $type;
|
||||
|
||||
/**
|
||||
* @param $name Type of definition objects this instance of the
|
||||
* cache will handle.
|
||||
*/
|
||||
function HTMLPurifier_DefinitionCache($type) {
|
||||
public function __construct($type) {
|
||||
$this->type = $type;
|
||||
}
|
||||
|
||||
@@ -36,7 +32,7 @@ class HTMLPurifier_DefinitionCache
|
||||
* Generates a unique identifier for a particular configuration
|
||||
* @param Instance of HTMLPurifier_Config
|
||||
*/
|
||||
function generateKey($config) {
|
||||
public function generateKey($config) {
|
||||
return $config->version . '-' . // possibly replace with function calls
|
||||
$config->getBatchSerial($this->type) . '-' .
|
||||
$config->get($this->type, 'DefinitionRev');
|
||||
@@ -48,7 +44,7 @@ class HTMLPurifier_DefinitionCache
|
||||
* @param $key Key to test
|
||||
* @param $config Instance of HTMLPurifier_Config to test against
|
||||
*/
|
||||
function isOld($key, $config) {
|
||||
public function isOld($key, $config) {
|
||||
if (substr_count($key, '-') < 2) return true;
|
||||
list($version, $hash, $revision) = explode('-', $key, 3);
|
||||
$compare = version_compare($version, $config->version);
|
||||
@@ -68,7 +64,7 @@ class HTMLPurifier_DefinitionCache
|
||||
* @param $def Definition object to check
|
||||
* @return Boolean true if good, false if not
|
||||
*/
|
||||
function checkDefType($def) {
|
||||
public function checkDefType($def) {
|
||||
if ($def->type !== $this->type) {
|
||||
trigger_error("Cannot use definition of type {$def->type} in cache for {$this->type}");
|
||||
return false;
|
||||
@@ -79,50 +75,40 @@ class HTMLPurifier_DefinitionCache
|
||||
/**
|
||||
* Adds a definition object to the cache
|
||||
*/
|
||||
function add($def, $config) {
|
||||
trigger_error('Cannot call abstract method', E_USER_ERROR);
|
||||
}
|
||||
abstract public function add($def, $config);
|
||||
|
||||
/**
|
||||
* Unconditionally saves a definition object to the cache
|
||||
*/
|
||||
function set($def, $config) {
|
||||
trigger_error('Cannot call abstract method', E_USER_ERROR);
|
||||
}
|
||||
abstract public function set($def, $config);
|
||||
|
||||
/**
|
||||
* Replace an object in the cache
|
||||
*/
|
||||
function replace($def, $config) {
|
||||
trigger_error('Cannot call abstract method', E_USER_ERROR);
|
||||
}
|
||||
abstract public function replace($def, $config);
|
||||
|
||||
/**
|
||||
* Retrieves a definition object from the cache
|
||||
*/
|
||||
function get($config) {
|
||||
trigger_error('Cannot call abstract method', E_USER_ERROR);
|
||||
}
|
||||
abstract public function get($config);
|
||||
|
||||
/**
|
||||
* Removes a definition object to the cache
|
||||
*/
|
||||
function remove($config) {
|
||||
trigger_error('Cannot call abstract method', E_USER_ERROR);
|
||||
}
|
||||
abstract public function remove($config);
|
||||
|
||||
/**
|
||||
* Clears all objects from cache
|
||||
*/
|
||||
function flush($config) {
|
||||
trigger_error('Cannot call abstract method', E_USER_ERROR);
|
||||
}
|
||||
abstract public function flush($config);
|
||||
|
||||
/**
|
||||
* Clears all expired (older version or revision) objects from cache
|
||||
* @note Be carefuly implementing this method as flush. Flush must
|
||||
* not interfere with other Definition types, and cleanup()
|
||||
* should not be repeatedly called by userland code.
|
||||
*/
|
||||
function cleanup($config) {
|
||||
trigger_error('Cannot call abstract method', E_USER_ERROR);
|
||||
}
|
||||
abstract public function cleanup($config);
|
||||
|
||||
}
|
||||
|
||||
|
@@ -8,15 +8,15 @@ class HTMLPurifier_DefinitionCache_Decorator extends HTMLPurifier_DefinitionCach
|
||||
/**
|
||||
* Cache object we are decorating
|
||||
*/
|
||||
var $cache;
|
||||
public $cache;
|
||||
|
||||
function HTMLPurifier_DefinitionCache_Decorator() {}
|
||||
public function __construct() {}
|
||||
|
||||
/**
|
||||
* Lazy decorator function
|
||||
* @param $cache Reference to cache object to decorate
|
||||
*/
|
||||
function decorate(&$cache) {
|
||||
public function decorate(&$cache) {
|
||||
$decorator = $this->copy();
|
||||
// reference is necessary for mocks in PHP 4
|
||||
$decorator->cache =& $cache;
|
||||
@@ -27,31 +27,35 @@ class HTMLPurifier_DefinitionCache_Decorator extends HTMLPurifier_DefinitionCach
|
||||
/**
|
||||
* Cross-compatible clone substitute
|
||||
*/
|
||||
function copy() {
|
||||
public function copy() {
|
||||
return new HTMLPurifier_DefinitionCache_Decorator();
|
||||
}
|
||||
|
||||
function add($def, $config) {
|
||||
public function add($def, $config) {
|
||||
return $this->cache->add($def, $config);
|
||||
}
|
||||
|
||||
function set($def, $config) {
|
||||
public function set($def, $config) {
|
||||
return $this->cache->set($def, $config);
|
||||
}
|
||||
|
||||
function replace($def, $config) {
|
||||
public function replace($def, $config) {
|
||||
return $this->cache->replace($def, $config);
|
||||
}
|
||||
|
||||
function get($config) {
|
||||
public function get($config) {
|
||||
return $this->cache->get($config);
|
||||
}
|
||||
|
||||
function flush($config) {
|
||||
public function remove($config) {
|
||||
return $this->cache->remove($config);
|
||||
}
|
||||
|
||||
public function flush($config) {
|
||||
return $this->cache->flush($config);
|
||||
}
|
||||
|
||||
function cleanup($config) {
|
||||
public function cleanup($config) {
|
||||
return $this->cache->cleanup($config);
|
||||
}
|
||||
|
||||
|
@@ -10,31 +10,31 @@ class HTMLPurifier_DefinitionCache_Decorator_Cleanup extends
|
||||
HTMLPurifier_DefinitionCache_Decorator
|
||||
{
|
||||
|
||||
var $name = 'Cleanup';
|
||||
public $name = 'Cleanup';
|
||||
|
||||
function copy() {
|
||||
public function copy() {
|
||||
return new HTMLPurifier_DefinitionCache_Decorator_Cleanup();
|
||||
}
|
||||
|
||||
function add($def, $config) {
|
||||
public function add($def, $config) {
|
||||
$status = parent::add($def, $config);
|
||||
if (!$status) parent::cleanup($config);
|
||||
return $status;
|
||||
}
|
||||
|
||||
function set($def, $config) {
|
||||
public function set($def, $config) {
|
||||
$status = parent::set($def, $config);
|
||||
if (!$status) parent::cleanup($config);
|
||||
return $status;
|
||||
}
|
||||
|
||||
function replace($def, $config) {
|
||||
public function replace($def, $config) {
|
||||
$status = parent::replace($def, $config);
|
||||
if (!$status) parent::cleanup($config);
|
||||
return $status;
|
||||
}
|
||||
|
||||
function get($config) {
|
||||
public function get($config) {
|
||||
$ret = parent::get($config);
|
||||
if (!$ret) parent::cleanup($config);
|
||||
return $ret;
|
||||
|
@@ -11,32 +11,32 @@ class HTMLPurifier_DefinitionCache_Decorator_Memory extends
|
||||
HTMLPurifier_DefinitionCache_Decorator
|
||||
{
|
||||
|
||||
var $definitions;
|
||||
var $name = 'Memory';
|
||||
protected $definitions;
|
||||
public $name = 'Memory';
|
||||
|
||||
function copy() {
|
||||
public function copy() {
|
||||
return new HTMLPurifier_DefinitionCache_Decorator_Memory();
|
||||
}
|
||||
|
||||
function add($def, $config) {
|
||||
public function add($def, $config) {
|
||||
$status = parent::add($def, $config);
|
||||
if ($status) $this->definitions[$this->generateKey($config)] = $def;
|
||||
return $status;
|
||||
}
|
||||
|
||||
function set($def, $config) {
|
||||
public function set($def, $config) {
|
||||
$status = parent::set($def, $config);
|
||||
if ($status) $this->definitions[$this->generateKey($config)] = $def;
|
||||
return $status;
|
||||
}
|
||||
|
||||
function replace($def, $config) {
|
||||
public function replace($def, $config) {
|
||||
$status = parent::replace($def, $config);
|
||||
if ($status) $this->definitions[$this->generateKey($config)] = $def;
|
||||
return $status;
|
||||
}
|
||||
|
||||
function get($config) {
|
||||
public function get($config) {
|
||||
$key = $this->generateKey($config);
|
||||
if (isset($this->definitions[$key])) return $this->definitions[$key];
|
||||
$this->definitions[$key] = parent::get($config);
|
||||
|
@@ -8,27 +8,31 @@ require_once 'HTMLPurifier/DefinitionCache.php';
|
||||
class HTMLPurifier_DefinitionCache_Null extends HTMLPurifier_DefinitionCache
|
||||
{
|
||||
|
||||
function add($def, $config) {
|
||||
public function add($def, $config) {
|
||||
return false;
|
||||
}
|
||||
|
||||
function set($def, $config) {
|
||||
public function set($def, $config) {
|
||||
return false;
|
||||
}
|
||||
|
||||
function replace($def, $config) {
|
||||
public function replace($def, $config) {
|
||||
return false;
|
||||
}
|
||||
|
||||
function get($config) {
|
||||
public function remove($config) {
|
||||
return false;
|
||||
}
|
||||
|
||||
function flush($config) {
|
||||
public function get($config) {
|
||||
return false;
|
||||
}
|
||||
|
||||
function cleanup($config) {
|
||||
public function flush($config) {
|
||||
return false;
|
||||
}
|
||||
|
||||
public function cleanup($config) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@@ -17,7 +17,7 @@ class HTMLPurifier_DefinitionCache_Serializer extends
|
||||
HTMLPurifier_DefinitionCache
|
||||
{
|
||||
|
||||
function add($def, $config) {
|
||||
public function add($def, $config) {
|
||||
if (!$this->checkDefType($def)) return;
|
||||
$file = $this->generateFilePath($config);
|
||||
if (file_exists($file)) return false;
|
||||
@@ -25,14 +25,14 @@ class HTMLPurifier_DefinitionCache_Serializer extends
|
||||
return $this->_write($file, serialize($def));
|
||||
}
|
||||
|
||||
function set($def, $config) {
|
||||
public function set($def, $config) {
|
||||
if (!$this->checkDefType($def)) return;
|
||||
$file = $this->generateFilePath($config);
|
||||
if (!$this->_prepareDir($config)) return false;
|
||||
return $this->_write($file, serialize($def));
|
||||
}
|
||||
|
||||
function replace($def, $config) {
|
||||
public function replace($def, $config) {
|
||||
if (!$this->checkDefType($def)) return;
|
||||
$file = $this->generateFilePath($config);
|
||||
if (!file_exists($file)) return false;
|
||||
@@ -40,19 +40,19 @@ class HTMLPurifier_DefinitionCache_Serializer extends
|
||||
return $this->_write($file, serialize($def));
|
||||
}
|
||||
|
||||
function get($config) {
|
||||
public function get($config) {
|
||||
$file = $this->generateFilePath($config);
|
||||
if (!file_exists($file)) return false;
|
||||
return unserialize(file_get_contents($file));
|
||||
}
|
||||
|
||||
function remove($config) {
|
||||
public function remove($config) {
|
||||
$file = $this->generateFilePath($config);
|
||||
if (!file_exists($file)) return false;
|
||||
return unlink($file);
|
||||
}
|
||||
|
||||
function flush($config) {
|
||||
public function flush($config) {
|
||||
if (!$this->_prepareDir($config)) return false;
|
||||
$dir = $this->generateDirectoryPath($config);
|
||||
$dh = opendir($dir);
|
||||
@@ -63,7 +63,7 @@ class HTMLPurifier_DefinitionCache_Serializer extends
|
||||
}
|
||||
}
|
||||
|
||||
function cleanup($config) {
|
||||
public function cleanup($config) {
|
||||
if (!$this->_prepareDir($config)) return false;
|
||||
$dir = $this->generateDirectoryPath($config);
|
||||
$dh = opendir($dir);
|
||||
@@ -78,8 +78,9 @@ class HTMLPurifier_DefinitionCache_Serializer extends
|
||||
/**
|
||||
* Generates the file path to the serial file corresponding to
|
||||
* the configuration and definition name
|
||||
* @todo Make protected
|
||||
*/
|
||||
function generateFilePath($config) {
|
||||
public function generateFilePath($config) {
|
||||
$key = $this->generateKey($config);
|
||||
return $this->generateDirectoryPath($config) . '/' . $key . '.ser';
|
||||
}
|
||||
@@ -87,8 +88,9 @@ class HTMLPurifier_DefinitionCache_Serializer extends
|
||||
/**
|
||||
* Generates the path to the directory contain this cache's serial files
|
||||
* @note No trailing slash
|
||||
* @todo Make protected
|
||||
*/
|
||||
function generateDirectoryPath($config) {
|
||||
public function generateDirectoryPath($config) {
|
||||
$base = $this->generateBaseDirectoryPath($config);
|
||||
return $base . '/' . $this->type;
|
||||
}
|
||||
@@ -96,10 +98,11 @@ class HTMLPurifier_DefinitionCache_Serializer extends
|
||||
/**
|
||||
* Generates path to base directory that contains all definition type
|
||||
* serials
|
||||
* @todo Make protected
|
||||
*/
|
||||
function generateBaseDirectoryPath($config) {
|
||||
public function generateBaseDirectoryPath($config) {
|
||||
$base = $config->get('Cache', 'SerializerPath');
|
||||
$base = is_null($base) ? dirname(__FILE__) . '/Serializer' : $base;
|
||||
$base = is_null($base) ? HTMLPURIFIER_PREFIX . '/HTMLPurifier/DefinitionCache/Serializer' : $base;
|
||||
return $base;
|
||||
}
|
||||
|
||||
@@ -109,7 +112,7 @@ class HTMLPurifier_DefinitionCache_Serializer extends
|
||||
* @param $data Data to write into file
|
||||
* @return Number of bytes written if success, or false if failure.
|
||||
*/
|
||||
function _write($file, $data) {
|
||||
private function _write($file, $data) {
|
||||
static $file_put_contents;
|
||||
if ($file_put_contents === null) {
|
||||
$file_put_contents = function_exists('file_put_contents');
|
||||
@@ -128,7 +131,7 @@ class HTMLPurifier_DefinitionCache_Serializer extends
|
||||
* Prepares the directory that this type stores the serials in
|
||||
* @return True if successful
|
||||
*/
|
||||
function _prepareDir($config) {
|
||||
private function _prepareDir($config) {
|
||||
$directory = $this->generateDirectoryPath($config);
|
||||
if (!is_dir($directory)) {
|
||||
$base = $this->generateBaseDirectoryPath($config);
|
||||
@@ -151,7 +154,7 @@ class HTMLPurifier_DefinitionCache_Serializer extends
|
||||
* Tests permissions on a directory and throws out friendly
|
||||
* error messages and attempts to chmod it itself if possible
|
||||
*/
|
||||
function _testPermissions($dir) {
|
||||
private function _testPermissions($dir) {
|
||||
// early abort, if it is writable, everything is hunky-dory
|
||||
if (is_writable($dir)) return true;
|
||||
if (!is_dir($dir)) {
|
||||
|
@@ -10,10 +10,6 @@ to disable caching (not recommended, as you will see a definite
|
||||
performance degradation). This directive has been available since 2.0.0.
|
||||
');
|
||||
|
||||
HTMLPurifier_ConfigSchema::defineAllowedValues(
|
||||
'Cache', 'DefinitionImpl', array('Serializer')
|
||||
);
|
||||
|
||||
HTMLPurifier_ConfigSchema::defineAlias(
|
||||
'Core', 'DefinitionCache',
|
||||
'Cache', 'DefinitionImpl'
|
||||
@@ -26,21 +22,21 @@ HTMLPurifier_ConfigSchema::defineAlias(
|
||||
class HTMLPurifier_DefinitionCacheFactory
|
||||
{
|
||||
|
||||
var $caches = array('Serializer' => array());
|
||||
var $decorators = array();
|
||||
protected $caches = array('Serializer' => array());
|
||||
protected $implementations = array();
|
||||
protected $decorators = array();
|
||||
|
||||
/**
|
||||
* Initialize default decorators
|
||||
*/
|
||||
function setup() {
|
||||
public function setup() {
|
||||
$this->addDecorator('Cleanup');
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves an instance of global definition cache factory.
|
||||
* @static
|
||||
*/
|
||||
static function &instance($prototype = null) {
|
||||
public static function &instance($prototype = null) {
|
||||
static $instance;
|
||||
if ($prototype !== null) {
|
||||
$instance = $prototype;
|
||||
@@ -51,14 +47,21 @@ class HTMLPurifier_DefinitionCacheFactory
|
||||
return $instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Registers a new definition cache object
|
||||
* @param $short Short name of cache object, for reference
|
||||
* @param $long Full class name of cache object, for construction
|
||||
*/
|
||||
public function register($short, $long) {
|
||||
$this->implementations[$short] = $long;
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory method that creates a cache object based on configuration
|
||||
* @param $name Name of definitions handled by cache
|
||||
* @param $config Instance of HTMLPurifier_Config
|
||||
*/
|
||||
function &create($type, $config) {
|
||||
// only one implementation as for right now, $config will
|
||||
// be used to determine implementation
|
||||
public function &create($type, $config) {
|
||||
$method = $config->get('Cache', 'DefinitionImpl');
|
||||
if ($method === null) {
|
||||
$null = new HTMLPurifier_DefinitionCache_Null($type);
|
||||
@@ -67,7 +70,17 @@ class HTMLPurifier_DefinitionCacheFactory
|
||||
if (!empty($this->caches[$method][$type])) {
|
||||
return $this->caches[$method][$type];
|
||||
}
|
||||
$cache = new HTMLPurifier_DefinitionCache_Serializer($type);
|
||||
if (
|
||||
isset($this->implementations[$method]) &&
|
||||
class_exists($class = $this->implementations[$method], false)
|
||||
) {
|
||||
$cache = new $class($type);
|
||||
} else {
|
||||
if ($method != 'Serializer') {
|
||||
trigger_error("Unrecognized DefinitionCache $method, using Serializer instead", E_USER_WARNING);
|
||||
}
|
||||
$cache = new HTMLPurifier_DefinitionCache_Serializer($type);
|
||||
}
|
||||
foreach ($this->decorators as $decorator) {
|
||||
$new_cache = $decorator->decorate($cache);
|
||||
// prevent infinite recursion in PHP 4
|
||||
@@ -82,7 +95,7 @@ class HTMLPurifier_DefinitionCacheFactory
|
||||
* Registers a decorator to add to all new cache objects
|
||||
* @param
|
||||
*/
|
||||
function addDecorator($decorator) {
|
||||
public function addDecorator($decorator) {
|
||||
if (is_string($decorator)) {
|
||||
$class = "HTMLPurifier_DefinitionCache_Decorator_$decorator";
|
||||
$decorator = new $class;
|
||||
|
@@ -11,40 +11,40 @@ class HTMLPurifier_Doctype
|
||||
/**
|
||||
* Full name of doctype
|
||||
*/
|
||||
var $name;
|
||||
public $name;
|
||||
|
||||
/**
|
||||
* List of standard modules (string identifiers or literal objects)
|
||||
* that this doctype uses
|
||||
*/
|
||||
var $modules = array();
|
||||
public $modules = array();
|
||||
|
||||
/**
|
||||
* List of modules to use for tidying up code
|
||||
*/
|
||||
var $tidyModules = array();
|
||||
public $tidyModules = array();
|
||||
|
||||
/**
|
||||
* Is the language derived from XML (i.e. XHTML)?
|
||||
*/
|
||||
var $xml = true;
|
||||
public $xml = true;
|
||||
|
||||
/**
|
||||
* List of aliases for this doctype
|
||||
*/
|
||||
var $aliases = array();
|
||||
public $aliases = array();
|
||||
|
||||
/**
|
||||
* Public DTD identifier
|
||||
*/
|
||||
var $dtdPublic;
|
||||
public $dtdPublic;
|
||||
|
||||
/**
|
||||
* System DTD identifier
|
||||
*/
|
||||
var $dtdSystem;
|
||||
public $dtdSystem;
|
||||
|
||||
function HTMLPurifier_Doctype($name = null, $xml = true, $modules = array(),
|
||||
public function __construct($name = null, $xml = true, $modules = array(),
|
||||
$tidyModules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null
|
||||
) {
|
||||
$this->name = $name;
|
||||
@@ -59,7 +59,7 @@ class HTMLPurifier_Doctype
|
||||
/**
|
||||
* Clones the doctype, use before resolving modes and the like
|
||||
*/
|
||||
function copy() {
|
||||
public function copy() {
|
||||
return unserialize(serialize($this));
|
||||
}
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user