1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-03 12:47:56 +02:00

Compare commits

...

303 Commits

Author SHA1 Message Date
Edward Z. Yang
6d50e5282a Release 4.9.2
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2017-03-12 23:30:53 -07:00
Edward Z. Yang
5bc7c72608 Add tests for new entity decoding codepath.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2017-03-12 20:05:09 -07:00
Edward Z. Yang
98984546d4 NEWS for 4.9.2
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2017-03-12 20:03:47 -07:00
Edward Z. Yang
c7a2f6f0df Merge pull request #129 from rybakit/patch-1
Fix a call to undefined function HTMLPurifier_Encoder()
2017-03-12 16:25:58 -07:00
Eugene Leonovich
fd24de69a3 Fix a call to undefined function HTMLPurifier_Encoder() 2017-03-12 22:44:03 +01:00
Edward Z. Yang
5688656174 Fix more PHP 5.3 problems.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2017-03-08 18:01:58 -08:00
Edward Z. Yang
d728205767 Turn on 5.3 Travis testing.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2017-03-08 17:47:14 -08:00
Edward Z. Yang
8836ae05aa Fix PHP 5.3 compatibility, fixes #125.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2017-03-08 17:46:29 -08:00
Edward Z. Yang
b90295deda Enable PHP 7.1 testing.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2017-03-08 13:38:20 -08:00
Edward Z. Yang
de82f9845f Release 4.9.1 (sic)
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2017-03-08 00:22:36 -08:00
Edward Z. Yang
9d2d75d8bc Add test case for removing empty list items.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2017-03-08 00:11:32 -08:00
Edward Z. Yang
74f123a84c Fix #83.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2017-03-07 17:52:41 -08:00
Edward Z. Yang
7e11c271b9 Revamp entity decoding to be more like HTML5.
See %Core.LegacyEntityDecoder for more details.

Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2017-03-07 17:34:59 -08:00
Edward Z. Yang
66bbae73a9 Comment on why it's a non-greedy match.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2017-03-06 23:27:30 -08:00
Edward Z. Yang
5886326cd0 Test for catastrophic backtracking.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2017-03-06 23:26:55 -08:00
Edward Z. Yang
564af61809 Usage/includes update.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2017-03-06 23:06:56 -08:00
Edward Z. Yang
b19dcb0ba5 CHANGELOG for #120 fix, and remove the array_filter.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2017-03-06 23:06:24 -08:00
Edward Z. Yang
586abc63e4 CHANGELOG for rgba/hsl/hsla patch.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2017-03-06 23:03:33 -08:00
Edward Z. Yang
5b6a3f55bf Merge pull request #121 from breathbath/master
Fixing PREG_BACKTRACK_LIMIT_ERROR in HTMLPurifier_Filter_ExtractStyle…
2017-03-06 23:01:34 -08:00
Edward Z. Yang
0c31b22240 Merge pull request #118 from fxbt/master
Add hsl, hsla and rgba support for css color attribute definition
2017-03-06 23:01:06 -08:00
Edward Z. Yang
5662efc936 Fix #78.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2017-03-06 22:54:54 -08:00
Edward Z. Yang
353c96f156 Document skips in more detail, #116.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2017-03-06 20:31:28 -08:00
Edward Z. Yang
4047a6230b Extra cleanup on cleanUTF8.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2017-03-06 16:31:02 -08:00
Andrey Pozolotin
9195cb7a2e Added escape sequense 2017-03-06 16:28:53 -08:00
Andrey Pozolotin
39c4c359ad Fixing PREG_BACKTRACK_LIMIT_ERROR in HTMLPurifier_Filter_ExtractStyleBlocks 2017-03-06 16:28:53 -08:00
Edward Z. Yang
bb3f86e80a Merge pull request #123 from mpyw-forks/fix/#122/surrogate-pair-range
Fix surrogate pair range
2017-03-03 23:13:30 -08:00
mpyw
d16e73e63e Add test for #122 2017-03-04 15:40:44 +09:00
mpyw
f145f64bf4 Fix #122: correct surrogate pair range 2017-03-04 15:38:01 +09:00
Andrey Pozolotin
5fdec87fe9 Added escape sequense 2017-03-01 17:52:00 +01:00
Andrey Pozolotin
4462559459 Fixing PREG_BACKTRACK_LIMIT_ERROR in HTMLPurifier_Filter_ExtractStyleBlocks 2017-03-01 17:46:03 +01:00
f.godfrin
12185143ef Use a constructor and a property for the alpha check 2017-02-10 21:03:11 +01:00
f.godfrin
17a90a951a Better regex for mungeRgb 2017-02-10 00:40:56 +01:00
f.godfrin
0bab4b9fd0 Fix mungeRgb to handle percent, float and hsl values 2017-02-10 00:38:05 +01:00
f.godfrin
bd92f3531b Remove double % 2017-02-09 23:37:36 +01:00
f.godfrin
0d5ab2fe13 Include hsl and hsla support 2017-02-09 23:34:19 +01:00
f.godfrin
d41a59e422 Add rgba support for css color attribute definition 2017-02-09 22:18:15 +01:00
Bastian Hofmann
8e4cacf0a7 Refactor HTML.Noopener to HTML.TargetNoopener so that it behaves like HTML.TargetNoreferrer and is active by default if a target is set 2017-02-03 16:54:51 -08:00
Bastian Hofmann
c82051c3e1 Add HTML.Noopener to add a noopener rel to every external link
This has performance benefits https://jakearchibald.com/2016/performance-benefits-of-rel-noopener/ but most importantly also security benefits https://mathiasbynens.github.io/rel-noopener/

Adresses https://github.com/ezyang/htmlpurifier/issues/96
2017-02-03 16:54:51 -08:00
Edward Z. Yang
d4a96463ef export-ignore .travis.yml
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2017-01-19 09:28:40 -08:00
Edward Z. Yang
1b7d684d07 Remove $a = array($a) which is miscompiled by Zend OpCache.
Fixes #108.

Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2017-01-04 14:35:52 -05:00
Edward Z. Yang
5070404376 Handle semicolons in strings in CSS correctly.
Fixes http://htmlpurifier.org/phorum/read.php?3,7522,8096

Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-10-29 00:01:19 -07:00
Edward Z. Yang
cef27f750d Add missing changelog entries.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-10-27 17:31:10 -07:00
Edward Z. Yang
59463c5c39 Allow %URI.DefaultScheme to be null.
Fixes #103.

Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-10-27 17:30:44 -07:00
Edward Z. Yang
d19d648a26 [ci skip] Add a Travis build badge.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-10-27 02:02:29 -07:00
Edward Z. Yang
20b40a5441 Travis support.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-10-27 02:00:47 -07:00
Edward Z. Yang
34d252cbbc Update usage.xml.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-10-27 02:00:47 -07:00
Edward Z. Yang
8b28e571fe Handle case when IDNAs are supported.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-10-27 02:00:46 -07:00
Edward Z. Yang
3ae21ce511 PHP 7.0 warnings fix: don't pass rvalue by reference.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-10-27 02:00:46 -07:00
Edward Z. Yang
3ba9133b21 Don't assume that idn_to_ascii does validation.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-10-27 02:00:46 -07:00
Edward Z. Yang
dc8702160c Merge pull request #101 from yankos/hotfix/directory_not_close
FIX directory not closing
2016-10-15 23:14:10 -07:00
yan_kos
4dc68aa920 FIX directory not closing
#100
2016-10-15 16:20:47 +03:00
Edward Z. Yang
08eee90e15 Delete asserts, fixes #97.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-10-02 00:14:41 -07:00
Edward Z. Yang
1ef4375dbb Proposed fix to Serializer code.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-09-05 15:24:08 -07:00
Edward Z. Yang
6a221a3045 Merge pull request #94 from zobzn/css-min-max-width
css definition (min-width, max-width, min-height, max-height)
2016-09-05 14:57:44 -07:00
zema
246fc8946a css properties: min-width, max-width, min-height, max-height 2016-09-05 10:45:58 +03:00
Edward Z. Yang
1ce2fde400 Merge pull request #91 from apsdsm/fix-permissions-bug
changed chmod behaviour in Serializer
2016-07-29 03:25:41 -07:00
Nick del Pozo
1f982d279f rollback change to permissions 2016-07-29 08:56:36 +09:00
Nick del Pozo
8be8cee9b3 changed chmod behaviour in Serializer 2016-07-27 12:56:03 +09:00
Edward Z. Yang
d0c392f77d Release 4.8.0
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-07-16 05:58:58 -07:00
Edward Z. Yang
d1c5d75027 Fix #73 with Attr.ID.HTML5
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-07-16 05:52:45 -07:00
Bart Butler
3747cb7efb avoid exif_imagetype exception with small files/corrupt data URI 2016-07-16 05:23:17 -07:00
Edward Z. Yang
0166c3728b Stop trying to chmod if SerializerPermissions is null, fixes #71
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-07-01 16:04:11 -04:00
Edward Z. Yang
ed180f595d Hack to fix #85
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-07-01 15:52:09 -04:00
Edward Z. Yang
3e4deabbb3 New smoketest for testing configuration HTML form.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-07-01 15:50:51 -04:00
Edward Z. Yang
44baee6a82 Partial border-radius support.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-06-30 22:22:13 -04:00
Cameron Ball
1675fc7caf Add %HTML.TargetNoreferrer, which adds rel="noreferrer" when target attribute is set
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-06-30 21:53:43 -04:00
Wes Cossick
cc35c8eb8c tel protocol support. 2016-06-30 21:19:49 -04:00
Edward Z. Yang
a11aeab4a6 Don't suggest 777, only 775.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-03-27 15:59:10 -07:00
Edward Z. Yang
43a9f052fd Fix #57, make flashvars check (and others) case-insensitive.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-03-27 15:56:30 -07:00
Edward Z. Yang
b4981c3395 Fix #67, don't use <body> tags in comments for %Core.ConvertDocumentToFragment
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-03-27 15:19:32 -07:00
Edward Z. Yang
f14076dc3e Fix #49; prevent readdir infinite loop when cache directory not listable.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-03-27 14:53:31 -07:00
Edward Z. Yang
91fd55c857 Fix #45, errors when ul/ol allowed without li.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-03-26 22:41:54 -07:00
Rodrigo Prado
096064dd0a Added more info in README 2016-03-24 20:32:54 -07:00
Mike Zukowsky
845edf16e2 Docblock update 2016-03-24 20:26:41 -07:00
Roman Kovalenko
2c4f889ca4 Remove BOM from file INSTALL.fr.utf8 It's only one file with BOM among project 2016-03-24 20:25:58 -07:00
Stefano Torresi
b3856d2040 Export maintenance and path2class scripts in composer.
These scripts could be used in continuously integrated environments
(e.g. `generate-standalone.php`).
2016-03-24 20:24:18 -07:00
Chimpzee
6e00b443cd Bug with tempnam("/tmp", "");
Some hostings have a different temporary path than "/tmp".
2016-03-24 20:19:57 -07:00
Edward Z. Yang
7e49ff3dcd Announce PHP 7 support.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-03-24 00:14:05 -07:00
Edward Z. Yang
1f3e282fde Fix a bounds error which now errors in PHP 7.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-03-24 00:13:08 -07:00
Edward Z. Yang
753c830239 Update to work with Git version of SimpleTest.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-03-24 00:08:03 -07:00
Edward Z. Yang
72123e23c9 Update ExtractStyleBlocks tests for modern CSSTidy at https://github.com/Cerdic/CSSTidy
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-03-23 23:39:38 -07:00
Edward Z. Yang
45161b4fb1 Accept leading digits in hostnames as per RFC 1123.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-03-23 22:42:21 -07:00
Synchro
25db9e1dd0 Don't use PHP4-style constructors 2016-03-16 17:09:41 -07:00
Edward Z. Yang
92aabf2b23 Fix #76, linkify includes dots at end of URL.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-03-02 02:05:54 -08:00
Edward Z. Yang
aebe1c02a2 Use idn_to_ascii when available.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2016-03-02 01:35:07 -08:00
Edward Z. Yang
913ac6955b CSS.AllowDuplicates for duplicate properties.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2015-12-20 11:53:54 -08:00
Edward Z. Yang
958ba65595 Don't truncate alts.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2015-09-29 15:36:53 -07:00
Edward Z. Yang
ae1828d955 Release 4.7.0.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2015-08-04 18:03:42 -07:00
Edward Z. Yang
e34a858ca9 Merge pull request #60 from sylfabre/patch-1
Missing @return
2015-08-03 10:36:45 -07:00
Sylvain
2c963dcc7f Missing @return
Adding PHPDoc @return statement for code completion in IDE
2015-08-03 10:21:47 +02:00
Edward Z. Yang
bfbf8a9da1 Revert "Fix autoloading in Composer."
This reverts commit 04cf6c8739.
2015-06-14 10:57:52 -07:00
Timothée Barray
04cf6c8739 Fix autoloading in Composer.
Per https://getcomposer.org/doc/04-schema.md#psr-0
2015-06-06 20:04:21 -07:00
Edward Z. Yang
0d7328dbb2 s/Include/Inclure/
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2015-05-05 13:43:49 -07:00
anthonybocci
7aeedd9071 Updated translation of installing in french 2015-05-05 10:50:42 -07:00
Edward Z. Yang
c67e4c2f7e All values, including empty, are valid HTML bools.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2015-02-11 16:36:44 -08:00
Edward Z. Yang
0c3e68dd03 Stop using umask to make definition cache. Fixes #32
This is not really the right way to solve the ACL problem,
but there isn't really any reason we should be mucking about
with the umask.

Mucked around with the test case to make it pass, but I think
it's probably a bit delicate now.

Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2014-12-08 18:30:54 -08:00
Jon Dufresne
b307f3d9ef Update gitattributes to produce slimmer packages for composer 2014-10-23 15:36:02 -07:00
Edward Z. Yang
cd60294ada Fix rgb in border attribute with spaces, fixes #30.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2014-08-31 12:12:38 +01:00
Edward Z. Yang
39d3df1fd7 Add AutoFormat.RemoveEmpty.Predicate, fixes #35.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2014-08-31 12:12:17 +01:00
Edward Z. Yang
b8704535a3 Update test.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2014-08-31 11:10:11 +01:00
Edward Z. Yang
4da38aca80 Update YouTube embed code to new style, fixes #28
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2014-08-31 09:30:16 +01:00
Edward Z. Yang
bf84df4f7d Move opacity to tricky. Fixes #16.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2014-08-31 09:24:11 +01:00
Edward Z. Yang
15d1a3003a Don't truncate in DOMLex when seeing closing div
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2014-08-31 08:50:33 +01:00
Edward Z. Yang
80ebd4322e Typo in docs, thanks Soleil Golden for reporting.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2014-02-04 12:17:24 -08:00
Edward Z. Yang
18b8a0e44a Make Composer work with PHP 5.2 and earlier. Reported by @voku
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2013-12-08 15:51:56 -08:00
Edward Z. Yang
6f389f0f25 Release 4.6.0.
Signed-off-by: Edward Z. Yang <ezyang@cs.stanford.edu>
2013-11-30 00:25:19 -08:00
Edward Z. Yang
8cd08620dc Conditionalize hash_hmac tests for 5.0
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-11-29 22:27:01 -08:00
Edward Z. Yang
0beecad78a Add Twitter handle to release notes.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-11-29 22:26:57 -08:00
Edward Z. Yang
54477c172b Fix infinite loop in Lexer.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-10-27 21:41:08 -07:00
Edward Z. Yang
e52d1fe310 Fix < PHP 5.4 compatibility break. Thanks GromNaN for submitting the patch.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-10-22 14:17:41 -07:00
Edward Z. Yang
0767bbc12d Rewrite FixNesting implementation to be tree-based.
This mega-patch rips out the FixNesting implementation and the related
ChildDef components.  The primary algorithmic change is to convert from
use of tokens to tree nodes, which are far more amenable to the style
of processing that FixNesting uses.  Additionally, FixNesting has been
changed to go bottom-up rather than top-down, in order to avoid needing
to implement backtracking.

This patch simplifies a good deal of the relevant logic, since we no
longer need to continually recalculate the nesting structure when
processing things.  However, the conversion to the alternate format
incurs some overhead, so for small inputs these changes are not a win.
One possibility to greatly reduce the constant factors here is to switch
to entirely using libxml's representation, and never serializing tokens;
this would require one to rewrite injectors, however.

The iterative post-order traversal in FixNesting is a bit subtle, but
we have essentially reified the stack and continuations.

We've removed support for %Core.EscapeInvalidChildren.

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-10-20 22:37:01 -07:00
Edward Z. Yang
b3640e1af6 Add conversion functions for our own tree format.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-10-20 15:05:11 -07:00
Edward Z. Yang
be5769804a Make the Token class abstract.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-10-17 16:13:04 -07:00
Edward Z. Yang
d6fbd7df22 Remove some unnecessary pass-by-reference.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-10-16 18:55:23 -07:00
Edward Z. Yang
804a06f01e Remove PHP 4 compatibility hack.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-10-16 18:36:44 -07:00
Edward Z. Yang
8f401f769e Use a Zipper to process MakeWellFormed, removing quadratic behavior.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-10-13 13:21:02 -07:00
Edward Z. Yang
82bcc62058 Properly handle context variables that are NULL.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-10-13 13:21:02 -07:00
Edward Z. Yang
f17490f009 Implementation of a Zipper, for efficient splice.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-10-13 01:16:32 -07:00
Edward Z. Yang
a5fc37d8c3 Improve gitignore.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-10-13 00:18:11 -07:00
Edward Z. Yang
412bae13b5 Fix quadratic behavior in DOMLex due to array_shift.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-09-17 00:48:42 -07:00
Edward Z. Yang
cf44f399f8 Properly use HMAC for secure munging.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-09-13 21:16:50 -07:00
Marcus Bointon
fac747bdbd PSR-2 reformatting PHPDoc corrections
With minor corrections.

Signed-off-by: Marcus Bointon <marcus@synchromedia.co.uk>
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-08-17 22:27:26 -04:00
Edward Z. Yang
19eee14899 Tighten up invariants.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-07-26 21:54:53 -07:00
Edward Z. Yang
25d49f4ec0 Explicitly specify decorator name.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-07-26 21:37:33 -07:00
Edward Z. Yang
53c2907706 New directive %Core.AllowHostnameUnderscore
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-07-26 21:33:39 -07:00
Edward Z. Yang
af7107e830 Add note fall through is intentional.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-07-18 10:22:45 -07:00
Marcus Bointon
107b3055a1 Fix var name conflict in loadArray 2013-07-16 21:56:29 -07:00
Synchro
29a3c70370 A bunch of PHPdoc and php codesniffer corrections - no functional code changes 2013-07-16 21:53:17 -07:00
Edward Z. Yang
75bd7abcc7 Make list nesting test more sensitive.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-06-06 13:08:13 -07:00
Edward Z. Yang
0680832d41 Use info_parent_def to get parent information, since it may not be present in info array.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-05-21 17:19:59 -07:00
Edward Z. Yang
19360ddb36 Ignore commas and nbsps for linkification. Thanks nAS for contributing.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-05-21 16:43:59 -07:00
Edward Z. Yang
3c903b7463 Doc fix.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-05-18 08:48:47 -07:00
Edward Z. Yang
6e37ecd1c8 Make URI parsing algorithm more strict.
Thanks Michael Gusev <mgusev@sugarcrm.com> for contributing this patch.

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-04-16 13:56:43 -07:00
Edward Z. Yang
20eff0a3a0 Fix NEWS entry.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-02-21 14:08:36 -08:00
Edward Z. Yang
d516e2f8de Release 4.5.0
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-02-17 16:04:08 -08:00
Edward Z. Yang
631021733b Add %Core.DisableExcludes directive
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-02-17 15:47:38 -08:00
Michael Tibben
344e0640b6 Add required constant for composer autoloading
Signed-off-by: Michael Tibben <michael.tibben@99designs.com>
2012-12-21 16:16:16 +08:00
Edward Z. Yang
62d2550e16 Use SHA-1 instead of MD5.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-10-27 02:33:22 -07:00
Edward Z. Yang
087145a71b Blacklist more tags from RemoveEmpty.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-10-27 02:32:48 -07:00
Edward Z. Yang
a44187a5c1 Cleanup after data validation.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-10-27 02:30:58 -07:00
Edward Z. Yang
c0ad68108a Do checks against iconvAvailable because PHP 5.4 has botched iconv support.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-10-27 02:27:57 -07:00
Edward Z. Yang
83a574491e Comment for bug that needs to get fixed.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-10-11 11:40:02 -07:00
Edward Z. Yang
3b537365a4 CSS properties page-break-*
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-10-11 11:39:52 -07:00
Rob Loach
8a8b123d33 Autoloading support for Composer 2012-09-16 18:11:46 +02:00
Edward Z. Yang
72db575446 Fix bug with non-lower case color names in HTML.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-07-30 10:54:32 -04:00
Edward Z. Yang
d8bb73ce46 Permit underscores in font-families.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-07-27 18:28:29 -04:00
Edward Z. Yang
f90372f8ab More support for white-space.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-06-16 17:10:36 -04:00
Edward Z. Yang
f38fca32a9 Don't lower-case components of background.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-06-02 11:22:58 -04:00
Edward Z. Yang
5a23004652 Support for inline-block.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-05-25 23:55:48 -04:00
Edward Z. Yang
6705140082 Fix in AttrTransform_Nofollow
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-05-14 23:07:27 -04:00
Edward Z. Yang
cb7162a995 Use prepend for autoloading on PHP 5.3+
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-05-02 11:07:24 -04:00
Edward Z. Yang
2189a9430f Support for safe external scripts via explicit whitelist.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-04-27 17:44:49 -04:00
Edward Z. Yang
7291f19347 Fix problem where stacked AttrTransforms clobber each other.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-03-16 23:12:16 -04:00
Benjamin Steininger
9fcffd6533 Add composer.json file for easy install via composer.
Composer: http://getcomposer.org/

Since HTML Purifier is not completely psr-0 compatible (a classmap is
not enough for autoloading), the package-description does not contain
anything autoload-related. The user has to include the autoloader
himself.

This lets us create an entry on packagist which allows installing HTML
Purifier without the need to declare a repository in projects; it also
makes it easy to create libraries which want to use HTML Purifier using
composer.

Signed-off-by: Benjamin Steininger <robo47@robo47.net>
2012-03-16 01:05:02 -04:00
Edward Z. Yang
31dce298ea Actually make URI.DisableResources do something.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-03-02 13:25:00 -05:00
Edward Z. Yang
8c9d461a62 Bugfix: _blank not blank.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-02-18 11:28:01 -05:00
Edward Z. Yang
7291a9647e Update NEWS.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-25 07:06:30 -05:00
Edward Z. Yang
17af0e4fc1 Release 4.4.0
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-18 19:22:31 -05:00
Edward Z. Yang
70028f83d6 Make all of the tests work on all PHP versions.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-18 18:57:13 -05:00
Edward Z. Yang
5c5e3fe79f Avoid doing stupidly clever reflection tricks that make old PHP versions sad.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-18 18:21:36 -05:00
Edward Z. Yang
56a26cab14 Modernize some of the testing facilities.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-18 18:10:16 -05:00
Edward Z. Yang
1c7fedff5a Tighter CSS selector validation.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-17 15:36:26 -05:00
Edward Z. Yang
9de0785448 Remark about bypassing host list with punycode.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-06 05:32:53 -08:00
Edward Z. Yang
974fe3f25e Optional support for IDNAs with PEAR Net_IDNA2
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-06 05:28:00 -08:00
Edward Z. Yang
94468f3c24 Remove PEARSax3 lexer.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2012-01-03 20:40:17 +08:00
Edward Z. Yang
e0354fecd9 Make forms work for transitional doctypes.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-30 22:56:44 +08:00
Edward Z. Yang
1bbbc624dd Remove inscrutable TODO, optionalize another.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-27 23:50:02 +08:00
Edward Z. Yang
49879d2cc6 Add note about superseding modules in TODO.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-27 23:21:32 +08:00
Edward Z. Yang
5c9b5130c8 Bump minor version number to 4.4.0.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-26 21:55:14 +08:00
Edward Z. Yang
d2de8d976a Add test for invalid SafeIframe usage.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-26 21:52:55 +08:00
Bradley M. Froehle
4164b2eb2b Implement Iframe module, and provide %HTML.SafeIframe and %URI.SafeIframeRegexp for untrusted usage.
The purpose of this addition is twofold. In trusted mode, iframes are
now unconditionally allowed.

However, many online video providers (YouTube, Vimeo) and other web
applications (Google Maps, Google Calendar, etc) provide embed code in
iframe format, which is useful functionality in untrusted mode.
You can specify iframes as trusted elements with %HTML.SafeIframe;
however, you need to additionally specify a whitelist mechanism such as
%URI.SafeIframeRegexp to say what iframe embeds are OK (by default
everything is rejected).

Note: As iframes are invalid in strict doctypes, you will not be able to
use them there.

We also added an always_load parameter to URIFilters in order to support
the strange nature of the SafeIframe URIFilter (it always needs to be
loaded, due to the inability of accessing the %HTML.SafeIframe directive
to see if it's needed!)  We expect this URIFilter can expand in the future
to offer more complex validation mechanisms.

Signed-off-by: Bradley M. Froehle <brad.froehle@gmail.com>
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-26 21:50:53 +08:00
Edward Z. Yang
1e5293d9fe Add more attributions.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-26 15:45:41 +08:00
Edward Z. Yang
6b643ede02 Implement %HTML.AllowedComments and %HTML.AllowedCommentsRegexp
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-26 15:34:42 +08:00
Edward Z. Yang
e41af46a8b Fix broken table content model, easily seen in XHTML1.1
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-26 14:49:26 +08:00
Edward Z. Yang
3570c9985a Properly handle nested sublists by folding into previous list item.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-26 14:00:34 +08:00
Edward Z. Yang
8d572993b4 Implement %HTML.TargetBlank
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-26 08:36:00 +08:00
Edward Z. Yang
1bacbc0563 Add isBenign and getDefaultScheme methods.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-25 23:31:15 +08:00
Edward Z. Yang
bfe2c10d07 Add a little bit of documentation about contexts for URIFilters.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-25 23:31:15 +08:00
Edward Z. Yang
9b10515fa4 Core.EscapeNonASCIICharacters now always works, even if target is UTF-8.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-25 23:31:15 +08:00
Edward Z. Yang
1255d0f15d Add support for scope attribute on td and th.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-25 23:31:13 +08:00
Edward Z. Yang
d45e11cc6b Add one more test for SPL autoload defaults.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-25 02:58:51 -05:00
Edward Z. Yang
94c15d1f56 Fix iconv truncation bug.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-25 02:31:06 -05:00
Edward Z. Yang
ce68cfe484 Remove spurious abstract definition; PHP 5.4 doesn't like that.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-18 13:28:07 -05:00
Edward Z. Yang
9f5f85952b Don't unset parser variable; plays poorly with serialize.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-12-18 13:27:51 -05:00
Edward Z. Yang
dbb365155b Typofix.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-08-24 09:56:51 -04:00
Edward Z. Yang
32c0ffde0c Don't add nofollow for matching hosts, generalize this code.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-08-24 09:56:49 -04:00
Edward Z. Yang
856a5e5b89 Update INSTALL to avoid missing config snafu, update usage.xml.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-08-24 09:56:21 -04:00
Edward Z. Yang
820d6e9097 Do not duplicate nofollow attribute in transform.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-08-24 09:56:13 -04:00
Edward Z. Yang
35b1fbce01 Explicitly initialize anonModule to null.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-04-19 22:46:17 +01:00
Edward Z. Yang
bcfbb8338c URI.Munge munges https to http URIs.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-04-10 13:09:24 +01:00
Edward Z. Yang
f51a6f7de9 Color keywords now case-insensitive.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-04-10 12:45:02 +01:00
Edward Z. Yang
f1439f0af5 Release 4.3.0
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-03-27 23:02:49 +01:00
Edward Z. Yang
0124605918 Fix CSS URL innerHTML/cssText escaping bug.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-03-27 21:24:32 +01:00
Edward Z. Yang
afb007d22f Protect against font family innerHTML/cssText attacks.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-03-27 20:35:43 +01:00
Edward Z. Yang
0dd9e4faf4 Fix Internet Explorer innerHTML bug.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-03-27 11:50:52 +01:00
Edward Z. Yang
94ed3b1231 Implement CSS.AllowedFonts.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-03-24 22:54:39 +00:00
Edward Z. Yang
6a6c0ed5d7 Don't autoclose if no parents support the tag.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-03-22 00:26:41 +00:00
Edward Z. Yang
e05b555448 Safety update for nested ul test.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-03-21 21:05:23 +00:00
Edward Z. Yang
ee9c70ab7f Fix E_NOTICE from indexing into empty string.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-03-17 17:33:11 +00:00
Edward Z. Yang
b4469f17aa Fix missing numeric entities (shows up when DirectLexing).
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-02-27 11:58:37 +00:00
Edward Z. Yang
e76f4b45d0 Dramatically rewrite null host URI handling.
Basically, browsers don't parse what should be valid URIs correctly, so
we have to go through some backbends to accomodate them.  Specifically,
for browseable URIs, the following URIs have unintended behavior:

    - ///example.com
    - http:/example.com
    - http:///example.com

Furthermore, if the path begins with //, modifying these URLs must
be done with care, as if you remove the host-name component, the
parse tree changes.

I've modified the engine to follow correct URI semantics as much
as possible while outputting browser compatible code, and invalidate
the URI in cases where we can't deal.  There has been a refactoring
of URIScheme so that this important check is always performed,
introducing a new member variable allow_empty_host which is true
on data, file, mailto and news schemes.

This also fixes bypass bugs on URI.Munge.

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-01-25 18:56:46 +00:00
Edward Z. Yang
a32d5b52e1 Fix embedding flash on non-IE browsers and allow more wmode.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-01-22 12:28:57 +00:00
Maxim Krizhanovsky
a3d71fe606 Iterative traversal of DOM.
There are some deep DOMs you can hit the maximum nesting level
limit in tokenizeDOM (we've experienced this even with maximum nesting
level of 300). Here is an iterative version of the same function with
simple queue/dequeue approach.

Signed-off-by: Maxim Krizhanovsky <darhazer@gmail.com>
2011-01-19 22:06:40 +00:00
Edward Z. Yang
77982bd61d Bump version number for Cache.SerializerPermissions.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-01-14 00:40:39 +00:00
Petr Skoda
78c4e62245 Add new Cache.SerializerPermissions option. 2011-01-13 22:57:40 +00:00
Edward Z. Yang
5803c06765 Check that argv is set before operating on it.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2011-01-13 22:42:47 +00:00
Edward Z. Yang
b63569ac22 Fix bad interaction between bootstrap autoloader and Zend Debugger/APC.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-12-31 09:48:28 +00:00
Edward Z. Yang
f3d050c517 Fix two bugs with caching of customized raw definitions.
The first bug is that we will repeatedly write out the result
of a customized raw definition to the filesystem, even when a cache
entry already exists.

The second bug is that caching these definitions doesn't actually
work (the cache entry is written but never used.)  A new API
for retrieving raw definitions permits the user to take advantage
of caching.

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-12-30 23:51:53 +00:00
Edward Z. Yang
6dcc37cb55 Update PHPT instructions.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-11-21 14:00:20 +00:00
Edward Z. Yang
cfc4ee1faf Add initial implementation of CSS.Trusted.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-11-12 18:45:03 +00:00
Edward Z. Yang
598c5b60c9 Add sanity check against ze1_compatibility_mode.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-11-12 16:15:03 +00:00
Edward Z. Yang
c9e7ffc172 Fix incorrect PEARSax3 test assertion.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-11-12 16:06:34 +00:00
Edward Z. Yang
feeffe6ed2 Check if schema.ser was corrupted.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-10-29 14:47:40 +01:00
Edward Z. Yang
4754d407aa Fix removal of id with DirectLex by preserving armor.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-10-28 17:25:31 +01:00
Nick Pope
0b9db1f54b Allow non-static autoload methods w/ PHP >= 5.2.11
HTML Purifier loads itself as the first autoload function by
unregistering all existing functions and re-registering them after
registering itself.

Originally an exception was thrown when a non-static object method was
encountered as the behaviour of spl_autoload_functions() did not return
the object instance, but only the class name.  This was filed on PHP
bugs (#44144).

The bug was fixed for PHP >= 5.2.11 and >= 5.3

Signed-off-by: Nick Pope <nick@nickpope.me.uk>
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-10-28 17:25:17 +01:00
Edward Z. Yang
1d4a38d055 Escape CDATA before handling conditional comments.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-28 12:11:26 -04:00
Edward Z. Yang
8c80349f9d Implement HTML.Nofollow for external links.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-28 12:01:57 -04:00
Edward Z. Yang
d848c99b74 Make IE conditional comment matching ungreedy.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-28 10:22:38 -04:00
Edward Z. Yang
882ffed9ba Release 4.2.0.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-15 02:52:57 -04:00
Edward Z. Yang
86990a21f1 Rename newline normalization directive to something better.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-15 02:50:39 -04:00
Tomasz Muras
9573f0933d Make newline normalization optional. 2010-09-14 23:49:28 -04:00
Edward Z. Yang
632bf2bbd4 Shift to 4.2.0 release cycle.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-14 23:38:51 -04:00
Edward Z. Yang
ec86598446 Add support for file:// URI scheme.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-09 00:01:26 -04:00
Edward Z. Yang
b6c3f5e89b Update TODO.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-08 23:42:05 -04:00
Edward Z. Yang
7c91104532 Implement HTML.FlashAllowFullScreen.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-08 23:39:20 -04:00
Edward Z. Yang
eac628f490 Add %CSS.ForbiddenProperties directive.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-04 02:59:03 -04:00
Edward Z. Yang
92913bc816 Add documentation about configuration directive types.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-04 02:28:53 -04:00
Edward Z. Yang
479d793562 Reword documentation to be clearer, and give warning on common user error.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-09-04 01:31:20 -04:00
Edward Z. Yang
e2c15f1c98 Fix Mac Snow Leopard APC bug.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-08-26 21:40:58 -07:00
Edward Z. Yang
57ced3f361 Tighten up ignore spec.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-06-30 06:00:45 -07:00
Edward Z. Yang
c04a441b3e Actually make URI.DisableResources do something.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-06-30 05:59:17 -07:00
Edward Z. Yang
1bed8b6d5f Added %Core.RemoveProcessingInstructions.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-06-20 18:26:44 -07:00
Edward Z. Yang
33afd7d9e0 Fix improper handling of IE conditional comments.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-06-18 06:08:54 -07:00
Edward Z. Yang
18e538317a Release 4.1.1.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-05-31 20:17:31 -07:00
Edward Z. Yang
96a4193fc9 Fix undefined index warnings in maintenance scripts.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-05-31 20:07:27 -07:00
Edward Z. Yang
00c66fa9cb Fix bug in parsing single attribute with entities.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-05-31 19:44:18 -07:00
Edward Z. Yang
d3abcb90e3 Rewrite CSS url() and font-family output logic.
The new logic is as follows:

* Given a URL to insert into url(), check that it is properly URL
  encoded (in particular, a doublequote and backslash never occurs
  within it) and then place it as url("http://example.com").

* Given a font name, if it is strictly alphanumeric, it is safe to omit
  quotes. Otherwise, wrap in double quotes and replace '"' with '\22 '
  (note trailing space) and '\' with '\5C ' (ditto).

We introduce expandCSSEscape() which is a hack for common parsing
idioms in CSS; this means that CSS escapes are now recognized inside
URLs as well as unquoted font names.

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-05-31 18:45:21 -07:00
Edward Z. Yang
df3100b1b3 Make test script less chatty when log_errors is on.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-05-20 21:50:44 -04:00
Edward Z. Yang
143e1ad718 Remove shebang and +x from test script.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-05-20 21:21:26 -04:00
Edward Z. Yang
875b0febde Fix infinite loop involving wrapping formedness.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-05-17 23:22:51 -04:00
Edward Z. Yang
3166b8a10f Fix bug in background-position with center keyword.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-05-05 15:08:57 -04:00
Edward Z. Yang
1a70bffd5a Emit errors when body is extracted.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-05-04 13:41:09 -04:00
Edward Z. Yang
f4c6e10ff7 Release 4.1.0.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-04-26 18:31:40 -04:00
Edward Z. Yang
c1cbd9e565 Mute STRICT errors from CSSTidy and don't run PEARSax3 on PHP 5.3.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-04-26 18:27:32 -04:00
Edward Z. Yang
da94d3d6ac Always quote the contents of url() in CSS.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-04-26 12:10:15 -04:00
Edward Z. Yang
80793e925e Remove +x bit from RemoveSpansWithoutAttributes.php
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-04-17 00:23:09 -04:00
Edward Z. Yang
8ef4fb22db Support for flashvars in HTML.SafeEmbed.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-03-30 13:33:13 -04:00
Edward Z. Yang
70a7a3f5dd Handle <ol><ol> properly by adding missing <li> tag.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-03-10 00:58:37 -05:00
Edward Z. Yang
4d612d5a77 Improve handling of malformed object parameters.
When specifying source material for <object> tags, you must use
data inside the object tag as well as specify movie in a param.
If you specify a src (which is the appropriate markup for <embed>)
we now convert and fill in the other attributes appropriately.

Also, fix a PHP warning in Generator code.

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-03-09 17:29:38 -05:00
Edward Z. Yang
63a854ee5d Remove call-time pass-by-reference.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-03-08 03:45:11 -05:00
Edward Z. Yang
0229458f8f Implement Internet Explorer compatibility code for embedded content.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-03-08 01:56:40 -05:00
Edward Z. Yang
baa477ac08 Truncate alt text from src if it's too long.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-03-08 01:22:21 -05:00
Edward Z. Yang
dc90e8e85b Support flashvars.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-03-08 01:16:57 -05:00
Edward Z. Yang
97125ed18b Implement data URI scheme.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-03-07 21:45:39 -05:00
Paul Stone
9a9036c689 Implement auto-formatter that removes empty span tags.
Signed-off-by: Paul Stone <patches@pdjs.co.uk>
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-03-07 18:59:33 -05:00
Edward Z. Yang
aea7d02dfe Support YouTube slideshow embedding.
YouTube slideshows contain a /cp/, not a /v/, in their URL;
relax the YouTube filter to allow them.

Signed-off-by: Nigel McNie <nigel@catalyst.net.nz>
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-03-07 18:57:22 -05:00
Brian DeRocher
b3ca1498c2 Add boolean value flag for PEARSax3 for testing if a token is empty.
Signed-off-by: Brian DeRocher <brian@derocher.org>
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-02-26 21:36:51 -05:00
Edward Z. Yang
ac18672aba Fix extant broken PEARSax3 parsing patterns.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-02-26 21:14:52 -05:00
Edward Z. Yang
faf28682ad Manually work around PEARSax3 E_STRICT errors.
Previously, my development environment was not running the PEARSax3
tests because my environment was set to E_STRICT error handling, and
thus the tests were skipped.  Relax this requirement by making the
wrapper class E_STRICT safe.  This introduces a few failing tests.

Also update TODO and add another fresh test.

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-02-26 20:42:42 -05:00
Edward Z. Yang
e2cd852bcf Add shebang line to tests index script.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-02-15 02:55:43 -05:00
Edward Z. Yang
694583259c Fix autoparagraph bug with non-inline elements.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2010-02-15 02:55:33 -05:00
Edward Z. Yang
bde4de3c78 Update TODO.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2009-08-27 20:17:41 -04:00
Edward Z. Yang
5b4e5c983e Support proprietary height attribute on table.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2009-08-27 20:17:24 -04:00
Edward Z. Yang
1ad8fd5ce9 Gracefully deal with null injectors.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2009-08-27 20:03:31 -04:00
Edward Z. Yang
6bdf161afd Update TODO.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2009-07-15 14:50:52 -04:00
Edward Z. Yang
af45a6c191 Release Phorum module 4.0.0.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2009-07-09 21:12:35 -04:00
Edward Z. Yang
2b72d0445f Add 4.1.0 release NEWS entry.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2009-07-09 21:03:46 -04:00
Edward Z. Yang
d7b3117678 Add doxygen doc scripts, and fix package.php
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2009-07-08 22:11:15 -04:00
Edward Z. Yang
53ff3e2744 Release 4.0.0.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2009-07-07 22:41:01 -04:00
Edward Z. Yang
6776efccdd Update configuration scanner to parse new format.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2009-07-07 22:32:44 -04:00
Edward Z. Yang
ba9fd175d7 Make extractBody not terminate prematurely on first </body>.
Previously, if two </body> tags were present, HTML Purifier
would truncate everything after the first </body>.  This is
not ideal behavior; so HTML Purifier has been changed to
match up to the last </body>.

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2009-07-07 22:19:04 -04:00
Edward Z. Yang
4d27906b02 Make %URI.Munge respect %URI.Host (don't munge).
%URI.Munge incorrectly munged URIs that pointed to the
same host as the current website (it did, however, have
the correct behavior for when the munge URL was on the
same server).

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2009-07-06 22:04:51 -04:00
Edward Z. Yang
8f573df3dc XHTML 2 is dead. Long live XHTML 2.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2009-07-02 15:43:42 -04:00
Edward Z. Yang
c7594487a2 Fix inability to totally override content model.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-06-10 18:24:52 -04:00
Edward Z. Yang
733a5ce5c3 Fix allowsElement() bug manifesting in LinkifyTest.
Thanks frank farmer for reporting.

Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-06-10 18:11:34 -04:00
Edward Z. Yang
e8abd5953c Fix prototype impedance in HTMLDefinition and typo in
docs/enduser-customize.html
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-06-07 16:05:46 -04:00
Edward Z. Yang
1b8c8865b2 Fix PHP 5.3.0 problem with numeric indices causing -0 problem.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-06-07 16:04:07 -04:00
Edward Z. Yang
6e66dc9cad Add HTMLPurifier_config->serialize()
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-05-30 00:25:14 -04:00
Edward Z. Yang
77b60a4206 Update documentation to new configuration format.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-05-29 23:46:40 -04:00
Edward Z. Yang
5bf7ac4e9f Add docs and facilities for having separate directories of schemas.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-05-29 22:16:35 -04:00
Edward Z. Yang
a025203b18 Minor updates to Config and TODO items thereof.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-05-29 18:03:57 -04:00
Edward Z. Yang
809da84ae1 Ignore tags files (from exuberant ctags)
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-05-29 18:03:44 -04:00
Edward Z. Yang
777781a95c Don't have mute error handler be private.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-05-29 17:59:30 -04:00
Edward Z. Yang
4a87f732ca Fix two minor bugs, updating Phorum and removing unused $dir variable.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-05-27 01:17:23 -04:00
Edward Z. Yang
a2885181df Update TODO file.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-05-26 12:55:09 -04:00
Edward Z. Yang
84abae08f5 Relax allowed values of class for certain doctypes, see %Attr.ClassUseCDATA
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-05-26 01:07:40 -04:00
Edward Z. Yang
10e2d32a79 Lock configuration objects to a single namespace, to help prevent bugs.
* Also, fix a slight bug with URI definition clearing.

Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-05-25 23:38:49 -04:00
Edward Z. Yang
baf053b016 Implement %Attr.AllowedClasses and %Attr.ForbiddenClasses.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-05-25 22:08:45 -04:00
Edward Z. Yang
bf71c3f392 Add documents on how to restructure configuration directives.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-05-25 21:54:43 -04:00
Edward Z. Yang
bfbe29d5a1 Rename ExtractStyleBlocks configuration parameters.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-05-25 21:54:39 -04:00
Edward Z. Yang
e194b8efc6 Rename AutoFormatParam.PurifierLinkifyDocURL.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-05-25 21:51:08 -04:00
Edward Z. Yang
4214ac9d67 Update TODO list.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-05-22 14:52:43 -04:00
Edward Z. Yang
24f761d84a Remove PHP4 cruft from URISchemeRegistry.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-05-13 16:14:57 -04:00
Edward Z. Yang
41c9226f3d Style refresh: add/remove vimlines, fix minor factual errors.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-04-09 12:47:10 -04:00
Edward Z. Yang
e3c2063f69 Implement %AutoFormat.RemoveEmpty.RemoveNbsp, by popular demand.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-04-09 00:53:19 -04:00
Edward Z. Yang
398a02039e Implement %HTML.Attr.Name.UseCDATA which relaxes name validation rules.
Sponsored-by: Ian Cook <thinkspill@gmail.com>
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-03-20 19:34:38 -04:00
Edward Z. Yang
84e2e141fc Fix bad configuration call in NameSyncTest.php.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-03-14 19:18:02 -04:00
Edward Z. Yang
47bbbad000 Fix typo in YouTube docs. Thanks vbMark for reporting.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-03-13 13:33:51 -04:00
Edward Z. Yang
eaa906f8fc Implement configuration inheritance.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-02-21 03:01:02 -05:00
Edward Z. Yang
86ca784da3 Convert all to new configuration get/set format.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-02-21 03:00:34 -05:00
Edward Z. Yang
b107eec452 Revamp configuration backend.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-02-21 03:00:33 -05:00
Edward Z. Yang
fcbf724e6e Make name="" and id="" play nicely together.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-02-21 02:58:30 -05:00
Edward Z. Yang
92344cc83a Add 4.0.0 release information.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
2009-02-16 22:00:22 -05:00
646 changed files with 22277 additions and 9862 deletions

14
.gitattributes vendored
View File

@@ -1 +1,13 @@
configdoc/usage.xml -crlf /.gitattributes export-ignore
/.gitignore export-ignore
/.travis.yml export-ignore
/Doxyfile export-ignore
/art/ export-ignore
/benchmarks/ export-ignore
/configdoc/ export-ignore
/configdoc/usage.xml -crlf
/docs/ export-ignore
/phpdoc.ini
/smoketests/ export-ignore
/tests/* export-ignore
/tests/path2class.func.php -export-ignore

10
.gitignore vendored
View File

@@ -1,5 +1,7 @@
tags
conf/ conf/
test-settings.php test-settings.php
config-schema.php
library/HTMLPurifier/DefinitionCache/Serializer/*/ library/HTMLPurifier/DefinitionCache/Serializer/*/
library/standalone/ library/standalone/
library/HTMLPurifier.standalone.php library/HTMLPurifier.standalone.php
@@ -16,3 +18,11 @@ docs/doxygen*
*.phpt.php *.phpt.php
*.phpt.skip.php *.phpt.skip.php
*.htmlt.ini *.htmlt.ini
*.patch
/*.php
vendor
composer.lock
*.rej
*.orig
*.bak
core

13
.travis.yml Normal file
View File

@@ -0,0 +1,13 @@
language: php
php:
- '5.3'
- '5.4'
- '5.5'
- '5.6'
- '7.0'
- '7.1'
before_script:
- git clone --depth=50 https://github.com/ezyang/simpletest.git
- cp test-settings.travis.php test-settings.php
script:
- php tests/index.php

View File

@@ -31,7 +31,7 @@ PROJECT_NAME = HTMLPurifier
# This could be handy for archiving the generated documentation or # This could be handy for archiving the generated documentation or
# if some version control system is used. # if some version control system is used.
PROJECT_NUMBER = 3.3.0 PROJECT_NUMBER = 4.9.2
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put. # base path where the generated documentation will be put.

13
FOCUS
View File

@@ -1,13 +0,0 @@
7 - Major bugfixes
[ Appendix A: Release focus IDs ]
0 - N/A
1 - Initial freshmeat announcement
2 - Documentation
3 - Code cleanup
4 - Minor feature enhancements
5 - Major feature enhancements
6 - Minor bugfixes
7 - Major bugfixes
8 - Minor security fixes
9 - Major security fixes

50
INSTALL
View File

@@ -15,9 +15,8 @@ with these contents.
--------------------------------------------------------------------------- ---------------------------------------------------------------------------
1. Compatibility 1. Compatibility
HTML Purifier is PHP 5 only, and is actively tested from PHP 5.0.5 and HTML Purifier is PHP 5 and PHP 7, and is actively tested from PHP 5.0.5
up. It has no core dependencies with other libraries. PHP and up. It has no core dependencies with other libraries.
4 support was deprecated on December 31, 2007 with HTML Purifier 3.0.0.
These optional extensions can enhance the capabilities of HTML Purifier: These optional extensions can enhance the capabilities of HTML Purifier:
@@ -25,6 +24,13 @@ These optional extensions can enhance the capabilities of HTML Purifier:
* bcmath : Used for unit conversion and imagecrash protection * bcmath : Used for unit conversion and imagecrash protection
* tidy : Used for pretty-printing HTML * tidy : Used for pretty-printing HTML
These optional libraries can enhance the capabilities of HTML Purifier:
* CSSTidy : Clean CSS stylesheets using %Core.ExtractStyleBlocks
Note: You should use the modernized fork of CSSTidy available
at https://github.com/Cerdic/CSSTidy
* Net_IDNA2 (PEAR) : IRI support using %Core.EnableIDNA
Note: This is not necessary for PHP 5.3 or later
--------------------------------------------------------------------------- ---------------------------------------------------------------------------
2. Reconnaissance 2. Reconnaissance
@@ -231,12 +237,12 @@ HTML Purifier uses iconv to support other character encodings, as such,
any encoding that iconv supports <http://www.gnu.org/software/libiconv/> any encoding that iconv supports <http://www.gnu.org/software/libiconv/>
HTML Purifier supports with this code: HTML Purifier supports with this code:
$config->set('Core', 'Encoding', /* put your encoding here */); $config->set('Core.Encoding', /* put your encoding here */);
An example usage for Latin-1 websites (the most common encoding for English An example usage for Latin-1 websites (the most common encoding for English
websites): websites):
$config->set('Core', 'Encoding', 'ISO-8859-1'); $config->set('Core.Encoding', 'ISO-8859-1');
Note that HTML Purifier's support for non-Unicode encodings is crippled by the Note that HTML Purifier's support for non-Unicode encodings is crippled by the
fact that any character not supported by that encoding will be silently fact that any character not supported by that encoding will be silently
@@ -251,7 +257,7 @@ reason, I do not include the solution in this document).
For those of you using HTML 4.01 Transitional, you can disable For those of you using HTML 4.01 Transitional, you can disable
XHTML output like this: XHTML output like this:
$config->set('HTML', 'Doctype', 'HTML 4.01 Transitional'); $config->set('HTML.Doctype', 'HTML 4.01 Transitional');
Other supported doctypes include: Other supported doctypes include:
@@ -277,14 +283,14 @@ are, respectively, %HTML.Allowed, %URI.MakeAbsolute and %URI.Base, and
%AutoFormat.AutoParagraph. The %Namespace.Directive naming convention %AutoFormat.AutoParagraph. The %Namespace.Directive naming convention
translates to: translates to:
$config->set('Namespace', 'Directive', $value); $config->set('Namespace.Directive', $value);
E.g. E.g.
$config->set('HTML', 'Allowed', 'p,b,a[href],i'); $config->set('HTML.Allowed', 'p,b,a[href],i');
$config->set('URI', 'Base', 'http://www.example.com'); $config->set('URI.Base', 'http://www.example.com');
$config->set('URI', 'MakeAbsolute', true); $config->set('URI.MakeAbsolute', true);
$config->set('AutoFormat', 'AutoParagraph', true); $config->set('AutoFormat.AutoParagraph', true);
--------------------------------------------------------------------------- ---------------------------------------------------------------------------
@@ -300,11 +306,9 @@ appropriate permissions using:
chmod -R 0755 HTMLPurifier/DefinitionCache/Serializer chmod -R 0755 HTMLPurifier/DefinitionCache/Serializer
If the above command doesn't work, you may need to assign write permissions If the above command doesn't work, you may need to assign write permissions
to all. This may be necessary if your webserver runs as nobody, but is to group:
not recommended since it means any other user can write files in the
directory. Use:
chmod -R 0777 HTMLPurifier/DefinitionCache/Serializer chmod -R 0775 HTMLPurifier/DefinitionCache/Serializer
You can also chmod files via your FTP client; this option You can also chmod files via your FTP client; this option
is usually accessible by right clicking the corresponding directory and is usually accessible by right clicking the corresponding directory and
@@ -318,11 +322,11 @@ If you are unable or unwilling to give write permissions to the cache
directory, you can either disable the cache (and suffer a performance directory, you can either disable the cache (and suffer a performance
hit): hit):
$config->set('Core', 'DefinitionCache', null); $config->set('Core.DefinitionCache', null);
Or move the cache directory somewhere else (no trailing slash): Or move the cache directory somewhere else (no trailing slash):
$config->set('Cache', 'SerializerPath', '/home/user/absolute/path'); $config->set('Cache.SerializerPath', '/home/user/absolute/path');
--------------------------------------------------------------------------- ---------------------------------------------------------------------------
@@ -330,11 +334,6 @@ Or move the cache directory somewhere else (no trailing slash):
The interface is mind-numbingly simple: The interface is mind-numbingly simple:
$purifier = new HTMLPurifier();
$clean_html = $purifier->purify( $dirty_html );
...or, if you're using the configuration object:
$purifier = new HTMLPurifier($config); $purifier = new HTMLPurifier($config);
$clean_html = $purifier->purify( $dirty_html ); $clean_html = $purifier->purify( $dirty_html );
@@ -353,7 +352,8 @@ If your website is in UTF-8 and XHTML Transitional, use this code:
<?php <?php
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php'; require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
$purifier = new HTMLPurifier(); $config = HTMLPurifier_Config::createDefault();
$purifier = new HTMLPurifier($config);
$clean_html = $purifier->purify($dirty_html); $clean_html = $purifier->purify($dirty_html);
?> ?>
@@ -363,8 +363,8 @@ If your website is in a different encoding or doctype, use this code:
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php'; require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php';
$config = HTMLPurifier_Config::createDefault(); $config = HTMLPurifier_Config::createDefault();
$config->set('Core', 'Encoding', 'ISO-8859-1'); // replace with your encoding $config->set('Core.Encoding', 'ISO-8859-1'); // replace with your encoding
$config->set('HTML', 'Doctype', 'HTML 4.01 Transitional'); // replace with your doctype $config->set('HTML.Doctype', 'HTML 4.01 Transitional'); // replace with your doctype
$purifier = new HTMLPurifier($config); $purifier = new HTMLPurifier($config);
$clean_html = $purifier->purify($dirty_html); $clean_html = $purifier->purify($dirty_html);

View File

@@ -1,69 +1,60 @@

Installation Installation
Comment installer HTML Purifier Comment installer HTML Purifier
Attention: Ce document a encode en UTF-8. Si les lettres avec les accents Attention : Ce document est encodé en UTF-8, si les lettres avec des accents
est essoreuse, prenez un mieux editeur de texte. ne s'affichent pas, prenez un meilleur éditeur de texte.
À L'Aide: Je ne suis pas un diseur natif de français. Si vous trouvez une
erreur dans ce document, racontez-moi! Merci.
L'installation de HTML Purifier est trés simple, parce qu'il ne doit pas
la configuration. Dans le pied de de document, les utilisateurs
impatient peuvent trouver le code, mais je recommande que vous lisez
ce document pour quelques choses.
L'installation de HTML Purifier est très simple, parce qu'il n'a pas besoin
de configuration. Pour les utilisateurs impatients, le code se trouve dans le
pied de page, mais je recommande de lire le document.
1. Compatibilité 1. Compatibilité
HTML Purifier fonctionne dans PHP 5. PHP 5.0.5 est le dernier HTML Purifier fonctionne avec PHP 5. PHP 5.0.5 est la dernière version testée.
version que je le testais. Il ne dépend de les autre librairies. Il ne dépend pas d'autres librairies.
Les extensions optionnel est iconv (en général déjà installer) et Les extensions optionnelles sont iconv (généralement déjà installée) et tidy
tidy (répandu aussi). Si vous utilisez UTF-8 et ne voulez pas (répendue aussi). Si vous utilisez UTF-8 et que vous ne voulez pas l'indentation,
l'indentation, vous pouvez utiliser HTML Purifier sans ces extensions. vous pouvez utiliser HTML Purifier sans ces extensions.
2. Inclure la librarie 2. Inclure la librairie
Utilisez: Quand vous devez l'utilisez, incluez le :
require_once '/path/to/library/HTMLPurifier.auto.php'; require_once('/path/to/library/HTMLPurifier.auto.php');
...quand vous devez utiliser HTML Purifier (ne inclure pas quand vous Ne pas l'inclure si ce n'est pas nécessaire, car HTML Purifier est lourd.
ne devez pas, parce que HTML Purifier est trés grand.)
HTML Purifier utilise 'autoload'. Si vous avez définu la fonction HTML Purifier utilise "autoload". Si vous avez défini la fonction __autoload,
__autoload, vous doivez ajoute cet programme: vous devez ajouter cette fonction :
spl_autoload_register('__autoload') spl_autoload_register('__autoload')
Plus d'information est dans le document 'INSTALL'. Plus d'informations dans le document "INSTALL".
3. Installation rapide
3. Installation vite Si votre site Web est en UTF-8 et XHTML Transitional, utilisez :
Si votre site web est en UTF-8 et XHTML Transitional, utilisez:
<?php <?php
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php'; require_once('/path/to/htmlpurifier/library/HTMLPurifier.auto.php');
$purificateur = new HTMLPurifier(); $purificateur = new HTMLPurifier();
$html_propre = $purificateur->purify($html_salle); $html_propre = $purificateur->purify($html_a_purifier);
?> ?>
Sinon, utilisez: Sinon, utilisez :
<?php <?php
require_once '/path/to/htmlpurifier/library/HTMLPurifier.auto.php'; require_once('/path/to/html/purifier/library/HTMLPurifier.auto.load');
$config = $HTMLPurifier_Config::createDefault();
$config = HTMLPurifier_Config::createDefault(); $config->set('Core', 'Encoding', 'ISO-8859-1'); //Remplacez par votre
$config->set('Core', 'Encoding', 'ISO-8859-1'); //remplacez avec votre encoding encodage
$config->set('Core', 'XHTML', true); //remplacez avec false si HTML 4.01 $config->set('Core', 'XHTML', true); //Remplacer par false si HTML 4.01
$purificateur = new HTMLPurifier($config); $purificateur = new HTMLPurifier($config);
$html_propre = $purificateur->purify($html_a_purifier);
$html_propre = $purificateur->purify($html_salle);
?> ?>
vim: et sw=4 sts=4 vim: et sw=4 sts=4

326
NEWS
View File

@@ -9,6 +9,332 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. Internal change . Internal change
========================== ==========================
4.9.2, released 2017-03-12
- Fixes PHP 5.3 compatibility
- Fix breakage when decoding decimal entities. Thanks @rybakit (#129)
4.9.1, released 2017-03-08
! %URI.DefaultScheme can now be set to null, in which case
all relative paths are removed.
! New CSS properties: min-width, max-width, min-height, max-height (#94)
! Transparency (rgba) and hsl/hsla supported where color CSS is present.
Thanks @fxbt for contributing the patch. (#118)
- When idn_to_ascii is defined, we might accept malformed
hostnames. Apply validation to the result in such cases.
- Close directory when done in Serializer DefinitionCache (#100)
- Deleted some asserts to avoid linters from choking (#97)
- Rework Serializer cache behavior to avoid chmod'ing if possible (#32)
- Embedded semicolons in strings in CSS are now handled correctly!
- We accidentally dropped certain Unicode characters if there was
one or more invalid characters. This has been fixed, thanks
to mpyw <ryosuke_i_628@yahoo.co.jp>
- Fix for "Don't truncate upon encountering </div> when using DOMLex"
caused a regression with HTML 4.01 Strict parsing with libxml 2.9.1
(and maybe later versions, but known OK with libxml 2.9.4). The
fix is to go about handling truncation a bit more cleverly so that
we can wrap with divs (sidestepping the bug) but slurping out the
rest of the text in case it ran off the end. (#78)
- Fix PREG_BACKTRACK_LIMIT_ERROR in HTMLPurifier_Filter_ExtractStyle.
Thanks @breathbath for contributing the report and fix (#120)
- Fix entity decoding algorithm to be more conservative about
decoding entities that are missing trailing semicolon.
To get old behavior, set %Core.LegacyEntityDecoder to true.
(#119)
- Workaround libxml bug when HTML tags are embedded inside
script tags. To disable workaround set %Core.AggressivelyRemoveScript
to false. (#83)
# By default, when a link has a target attribute associated
with it, we now also add rel="noopener" in order to
prevent the new window from being able to overwrite
the original frame. To disable this protection,
set %HTML.TargetNoopener to FALSE.
4.9.0 was cut on Git but never properly released; when we did the
real release we decided to skip this version number.
4.8.0, released 2016-07-16
# By default, when a link has a target attribute associated
with it, we now also add rel="noreferrer" in order to
prevent the new window from being able to overwrite
the original frame. To disable this protection,
set %HTML.TargetNoreferrer to FALSE.
! Full PHP 7 compatibility, the test suite is ALL GO.
! %CSS.AllowDuplicates permits duplicate CSS properties.
! Support for 'tel' URIs.
! Partial support for 'border-radius' properties when %CSS.AllowProprietary is true.
The slash syntax, i.e., 'border-radius: 2em 1em 4em / 0.5em 3em' is not
yet supported.
! %Attr.ID.HTML5 turns on HTML5-style ID handling.
- alt truncation could result in malformed UTF-8 sequence. Don't
truncate. Thanks Brandon Farber for reporting.
- Linkify regex is smarter, based off of Gruber's regex.
- IDNA supported natively on PHP 5.3 and later.
- Non all-numeric top-level names (e.g., foo.1f, 1f) are now
allowed.
- Minor bounds error fix to squash a PHP 7 notice.
- Support non-/tmp temporary directories for data:// validation
- Give a better error message when a user attempts to allow
ul/ol without allowing li.
- On some versions of PHP, the Serializer DefinitionCache could
infinite loop when the directory exists but is not listable. (#49)
- Don't match for <body> inside comments with
%Core.ConvertDocumentToFragment. (#67)
- SafeObject is now less case sensitive. (#57)
- AutoFormat.RemoveEmpty.Predicate now correctly renders in
web form. (#85)
4.7.0, released 2015-08-04
# opacity is now considered a "tricky" CSS property rather than a
proprietary one.
! %AutoFormat.RemoveEmpty.Predicate for specifying exactly when
an element should be considered "empty" (maybe preserve if it
has attributes), and modify iframe support so that the iframe
is removed if it is missing a src attribute. Thanks meeva for
reporting.
- Don't truncate upon encountering </div> when using DOMLex. Thanks
Myrto Christina for finally convincing me to fix this.
- Update YouTube filter for new code.
- Fix parsing of rgb() values with spaces in them for 'border'
attribute.
- Don't remove foo="" attributes if foo is a boolean attribute. Thanks
valME for reporting.
4.6.0, released 2013-11-30
# Secure URI munge hashing algorithm has changed to hash_hmac("sha256", $url, $secret).
Please update any verification scripts you may have.
# URI parsing algorithm was made more strict, so only prefixes which
looks like schemes will actually be schemes. Thanks
Michael Gusev <mgusev@sugarcrm.com> for fixing.
# %Core.EscapeInvalidChildren is no longer supported, and no longer does
anything.
! New directive %Core.AllowHostnameUnderscore which allows underscores
in hostnames.
- Eliminate quadratic behavior in DOMLex by using a proper queue.
Thanks Ole Laursen for noticing this.
- Rewritten MakeWellFormed/FixNesting implementation eliminates quadratic
behavior in the rest of the purificaiton pipeline. Thanks Chedburn
Networks for sponsoring this work.
- Made Linkify URL parser a bit less permissive, so that non-breaking
spaces and commas are not included as part of URL. Thanks nAS for fixing.
- Fix some bad interactions with %HTML.Allowed and injectors. Thanks
David Hirtz for reporting.
- Fix infinite loop in DirectLex. Thanks Ashar Javed (@soaj1664ashar)
for reporting.
4.5.0, released 2013-02-17
# Fix bug where stacked attribute transforms clobber each other;
this also means it's no longer possible to override attribute
transforms in later modules. No internal code was using this
but this may break some clients.
# We now use SHA-1 to identify cached definitions, instead of MD5.
! Support display:inline-block
! Support for more white-space CSS values.
! Permit underscores in font families
! Support for page-break-* CSS3 properties when proprietary properties
are enabled.
! New directive %Core.DisableExcludes; can be set to 'true' to turn off
SGML excludes checking. If HTML Purifier is removing too much text
and you don't care about full standards compliance, try setting this to
'true'.
- Use prepend for SPL autoloading on PHP 5.3 and later.
- Fix bug with nofollow transform when pre-existing rel exists.
- Fix bug where background:url() always gets lower-cased
(but not background-image:url())
- Fix bug with non lower-case color names in HTML
- Fix bug where data URI validation doesn't remove temporary files.
Thanks Javier Marín Ros <javiermarinros@gmail.com> for reporting.
- Don't remove certain empty tags on RemoveEmpty.
4.4.0, released 2012-01-18
# Removed PEARSax3 handler.
# URI.Munge now munges URIs inside the same host that go from https
to http. Reported by Neike Taika-Tessaro.
# Core.EscapeNonASCIICharacters now always transforms entities to
entities, even if target encoding is UTF-8.
# Tighten up selector validation in ExtractStyleBlocks.
Non-syntactically valid selectors are now rejected, along with
some of the more obscure ones such as attribute selectors, the
:lang pseudoselector, and anything not in CSS2.1. Furthermore,
ID and class selectors now work properly with the relevant
configuration attributes. Also, mute errors when parsing CSS
with CSS Tidy. Reported by Mario Heiderich and Norman Hippert.
! Added support for 'scope' attribute on tables.
! Added %HTML.TargetBlank, which adds target="blank" to all outgoing links.
! Properly handle sub-lists directly nested inside of lists in
a standards compliant way, by moving them into the preceding <li>
! Added %HTML.AllowedComments and %HTML.AllowedCommentsRegexp for
limited allowed comments in untrusted situations.
! Implement iframes, and allow them to be used in untrusted mode with
%HTML.SafeIframe and %URI.SafeIframeRegexp. Thanks Bradley M. Froehle
<brad.froehle@gmail.com> for submitting an initial version of the patch.
! The Forms module now works properly for transitional doctypes.
! Added support for internationalized domain names. You need the PEAR
Net_IDNA2 module to be in your path; if it is installed, ensure the
class can be loaded and then set %Core.EnableIDNA to true.
- Color keywords are now case insensitive. Thanks Yzmir Ramirez
<yramirez-htmlpurifier@adicio.com> for reporting.
- Explicitly initialize anonModule variable to null.
- Do not duplicate nofollow if already present. Thanks 178
for reporting.
- Do not add nofollow if hostname matches our current host. Thanks 178
for reporting, and Neike Taika-Tessaro for helping diagnose.
- Do not unset parser variable; this fixes intermittent serialization
problems. Thanks Neike Taika-Tessaro for reporting, bill
<10010tiger@gmail.com> for diagnosing.
- Fix iconv truncation bug, where non-UTF-8 target encodings see
output truncated after around 8000 characters. Thanks Jörg Ludwig
<joerg.ludwig@iserv.eu> for reporting.
- Fix broken table content model for XHTML1.1 (and also earlier
versions, although the W3C validator doesn't catch those violations).
Thanks GlitchMr <glitch.mr@gmail.com> for reporting.
4.3.0, released 2011-03-27
# Fixed broken caching of customized raw definitions, but requires an
API change. The old API still works but will emit a warning,
see http://htmlpurifier.org/docs/enduser-customize.html#optimized
for how to upgrade your code.
# Protect against Internet Explorer innerHTML behavior by specially
treating attributes with backticks but no angled brackets, quotes or
spaces. This constitutes a slight semantic change, which can be
reverted using %Output.FixInnerHTML. Reported by Neike Taika-Tessaro
and Mario Heiderich.
# Protect against cssText/innerHTML by restricting allowed characters
used in fonts further than mandated by the specification and encoding
some extra special characters in URLs. Reported by Neike
Taika-Tessaro and Mario Heiderich.
! Added %HTML.Nofollow to add rel="nofollow" to external links.
! More types of SPL autoloaders allowed on later versions of PHP.
! Implementations for position, top, left, right, bottom, z-index
when %CSS.Trusted is on.
! Add %Cache.SerializerPermissions option for custom serializer
directory/file permissions
! Fix longstanding bug in Flash support for non-IE browsers, and
allow more wmode attributes.
! Add %CSS.AllowedFonts to restrict permissible font names.
- Switch to an iterative traversal of the DOM, which prevents us
from running out of stack space for deeply nested documents.
Thanks Maxim Krizhanovsky for contributing a patch.
- Make removal of conditional IE comments ungreedy; thanks Bernd
for reporting.
- Escape CDATA before removing Internet Explorer comments.
- Fix removal of id attributes under certain conditions by ensuring
armor attributes are preserved when recreating tags.
- Check if schema.ser was corrupted.
- Check if zend.ze1_compatibility_mode is on, and error out if it is.
This safety check is only done for HTMLPurifier.auto.php; if you
are using standalone or the specialized includes files, you're
expected to know what you're doing.
- Stop repeatedly writing the cache file after I'm done customizing a
raw definition. Reported by ajh.
- Switch to using require_once in the Bootstrap to work around bad
interaction with Zend Debugger and APC. Reported by Antonio Parraga.
- Fix URI handling when hostname is missing but scheme is present.
Reported by Neike Taika-Tessaro.
- Fix missing numeric entities on DirectLex; thanks Neike Taika-Tessaro
for reporting.
- Fix harmless notice from indexing into empty string. Thanks Matthijs
Kooijman <matthijs@stdin.nl> for reporting.
- Don't autoclose no parent elements are able to support the element
that triggered the autoclose. In particular fixes strange behavior
of stray <li> tags. Thanks pkuliga@gmail.com for reporting and
Neike Taika-Tessaro <pinkgothic@gmail.com> for debugging assistance.
4.2.0, released 2010-09-15
! Added %Core.RemoveProcessingInstructions, which lets you remove
<? ... ?> statements.
! Added %URI.DisableResources functionality; the directive originally
did nothing. Thanks David Rothstein for reporting.
! Add documentation about configuration directive types.
! Add %CSS.ForbiddenProperties configuration directive.
! Add %HTML.FlashAllowFullScreen to permit embedded Flash objects
to utilize full-screen mode.
! Add optional support for the <code>file</code> URI scheme, enable
by explicitly setting %URI.AllowedSchemes.
! Add %Core.NormalizeNewlines options to allow turning off newline
normalization.
- Fix improper handling of Internet Explorer conditional comments
by parser. Thanks zmonteca for reporting.
- Fix missing attributes bug when running on Mac Snow Leopard and APC.
Thanks sidepodcast for the fix.
- Warn if an element is allowed, but an attribute it requires is
not allowed.
4.1.1, released 2010-05-31
- Fix undefined index warnings in maintenance scripts.
- Fix bug in DirectLex for parsing elements with a single attribute
with entities.
- Rewrite CSS output logic for font-family and url(). Thanks Mario
Heiderich <mario.heiderich@googlemail.com> for reporting and Takeshi
Terada <t-terada@violet.plala.or.jp> for suggesting the fix.
- Emit an error for CollectErrors if a body is extracted
- Fix bug where in background-position for center keyword handling.
- Fix infinite loop when a wrapper element is inserted in a context
where it's not allowed. Thanks Lars <lars@renoz.dk> for reporting.
- Remove +x bit and shebang from index.php; only supported mode is to
explicitly call it with php.
- Make test script less chatty when log_errors is on.
4.1.0, released 2010-04-26
! Support proprietary height attribute on table element
! Support YouTube slideshows that contain /cp/ in their URL.
! Support for data: URI scheme; not enabled by default, add it using
%URI.AllowedSchemes
! Support flashvars when using %HTML.SafeObject and %HTML.SafeEmbed.
! Support for Internet Explorer compatibility with %HTML.SafeObject
using %Output.FlashCompat.
! Handle <ol><ol> properly, by inserting the necessary <li> tag.
- Always quote the insides of url(...) in CSS.
4.0.0, released 2009-07-07
# APIs for ConfigSchema subsystem have substantially changed. See
docs/dev-config-bcbreaks.txt for details; in essence, anything that
had both namespace and directive now have a single unified key.
# Some configuration directives were renamed, specifically:
%AutoFormatParam.PurifierLinkifyDocURL -> %AutoFormat.PurifierLinkify.DocURL
%FilterParam.ExtractStyleBlocksEscaping -> %Filter.ExtractStyleBlocks.Escaping
%FilterParam.ExtractStyleBlocksScope -> %Filter.ExtractStyleBlocks.Scope
%FilterParam.ExtractStyleBlocksTidyImpl -> %Filter.ExtractStyleBlocks.TidyImpl
As usual, the old directive names will still work, but will throw E_NOTICE
errors.
# The allowed values for class have been relaxed to allow all of CDATA for
doctypes that are not XHTML 1.1 or XHTML 2.0. For old behavior, set
%Attr.ClassUseCDATA to false.
# Instead of appending the content model to an old content model, a blank
element will replace the old content model. You can use #SUPER to get
the old content model.
! More robust support for name="" and id=""
! HTMLPurifier_Config::inherit($config) allows you to inherit one
configuration, and have changes to that configuration be propagated
to all of its children.
! Implement %HTML.Attr.Name.UseCDATA, which relaxes validation rules on
the name attribute when set. Use with care. Thanks Ian Cook for
sponsoring.
! Implement %AutoFormat.RemoveEmpty.RemoveNbsp, which removes empty
tags that contain non-breaking spaces as well other whitespace. You
can also modify which tags should have &nbsp; maintained with
%AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions.
! Implement %Attr.AllowedClasses, which allows administrators to restrict
classes users can use to a specified finite set of classes, and
%Attr.ForbiddenClasses, which is the logical inverse.
! You can now maintain your own configuration schema directories by
creating a config-schema.php file or passing an extra argument. Check
docs/dev-config-schema.html for more details.
! Added HTMLPurifier_Config->serialize() method, which lets you save away
your configuration in a compact serial file, which you can unserialize
and use directly without having to go through the overhead of setup.
- Fix bug where URIDefinition would not get cleared if it's directives got
changed.
- Fix fatal error in HTMLPurifier_Encoder on certain platforms (probably NetBSD 5.0)
- Fix bug in Linkify autoformatter involving <a><span>http://foo</span></a>
- Make %URI.Munge not apply to links that have the same host as your host.
- Prevent stray </body> tag from truncating output, if a second </body>
is present.
. Created script maintenance/rename-config.php for renaming a configuration
directive while maintaining its alias. This script does not change source code.
. Implement namespace locking for definition construction, to prevent
bugs where a directive is used for definition construction but is not
used to construct the cache hash.
3.3.0, released 2009-02-16 3.3.0, released 2009-02-16
! Implement CSS property 'overflow' when %CSS.AllowTricky is true. ! Implement CSS property 'overflow' when %CSS.AllowTricky is true.
! Implement generic property list classess ! Implement generic property list classess

View File

@@ -1,6 +1,5 @@
HTML Purifier [![Build Status](https://secure.travis-ci.org/ezyang/htmlpurifier.svg?branch=master)](http://travis-ci.org/ezyang/htmlpurifier)
README =============
All about HTML Purifier
HTML Purifier is an HTML filtering solution that uses a unique combination HTML Purifier is an HTML filtering solution that uses a unique combination
of robust whitelists and agressive parsing to ensure that not only are of robust whitelists and agressive parsing to ensure that not only are
@@ -19,6 +18,12 @@ Places to go:
an in-depth installation guide. an in-depth installation guide.
* See WYSIWYG for information on editors like TinyMCE and FCKeditor * See WYSIWYG for information on editors like TinyMCE and FCKeditor
HTML Purifier can be found on the web at: http://htmlpurifier.org/ HTML Purifier can be found on the web at: [http://htmlpurifier.org/](http://htmlpurifier.org/)
vim: et sw=4 sts=4 ## Installation
Package available on [Composer](https://packagist.org/packages/ezyang/htmlpurifier).
If you're using Composer to manage dependencies, you can use
$ composer require "ezyang/htmlpurifier": "dev-master"

107
TODO
View File

@@ -11,54 +11,68 @@ If no interest is expressed for a feature that may require a considerable
amount of effort to implement, it may get endlessly delayed. Do not be amount of effort to implement, it may get endlessly delayed. Do not be
afraid to cast your vote for the next feature to be implemented! afraid to cast your vote for the next feature to be implemented!
- Investigate how early internal structures can be accessed; this would Things to do as soon as possible:
prevent structures from being parsed and serialized multiple times.
- Built-in support for target="_blank" on all external links - http://htmlpurifier.org/phorum/read.php?3,5560,6307#msg-6307
- Allow <a id="asdf" name="asdf"> - Think about allowing explicit order of operations hooks for transforms
- Convert configuration to allow an arbitrary number of namespaces; - Fix "<.<" bug (trailing < is removed if not EOD)
then rename as appropriate. - Build in better internal state dumps and debugging tools for remote
debugging
- Allowed/Allowed* have strange interactions when both set
? Transform lone embeds into object tags
- Deprecated config options that emit warnings when you set them (with'
a way of muting the warning if you really want to)
- Make HTML.Trusted work with Output.FlashCompat
- HTML.Trusted and HTML.SafeObject have funny interaction; general
problem is what to do when a module "supersedes" another
(see also tables and basic tables.) This is a little dicier
because HTML.SafeObject has some extra functionality that
trusted might find useful. See http://htmlpurifier.org/phorum/read.php?3,5762,6100
FUTURE VERSIONS FUTURE VERSIONS
--------------- ---------------
4.1 release [It's All About Trust] (floating) 4.9 release [OMG CONFIG PONIES]
! Fix Printer. It's from the old days when we didn't have decent XML classes
! Factor demo.php into a set of Printer classes, and then create a stub
file for users here (inside the actual HTML Purifier library)
- Fix error handling with form construction
- Do encoding validation in Printers, or at least, where user data comes in
- Config: Add examples to everything (make built-in which also automatically
gives output)
- Add "register" field to config schemas to eliminate dependence on
naming conventions (try to remember why we ultimately decided on tihs)
5.0 release [HTML 5]
# Swap out code to use html5lib tokenizer and tree-builder
! Allow turning off of FixNesting and required attribute insertion
5.1 release [It's All About Trust] (floating)
# Implement untrusted, dangerous elements/attributes # Implement untrusted, dangerous elements/attributes
# Implement IDREF support (harder than it seems, since you cannot have # Implement IDREF support (harder than it seems, since you cannot have
IDREFs to non-existent IDs) IDREFs to non-existent IDs)
- Implement <area> (client and server side image maps are blocking
on IDREF support)
# Frameset XHTML 1.0 and HTML 4.01 doctypes # Frameset XHTML 1.0 and HTML 4.01 doctypes
- Implement <area>
- Figure out how to simultaneously set %CSS.Trusted and %HTML.Trusted (?) - Figure out how to simultaneously set %CSS.Trusted and %HTML.Trusted (?)
4.2 release [Error'ed] 5.2 release [Error'ed]
# Error logging for filtering/cleanup procedures # Error logging for filtering/cleanup procedures
- XSS-attempt detection--certain errors are flagged XSS-like
4.3 release [Do What I Mean, Not What I Say]
# Additional support for poorly written HTML # Additional support for poorly written HTML
- Microsoft Word HTML cleaning (i.e. MsoNormal, but research essential!) - Microsoft Word HTML cleaning (i.e. MsoNormal, but research essential!)
- Friendly strict handling of <address> (block -> <br>) - Friendly strict handling of <address> (block -> <br>)
? Remove redundant tags, ex. <u><u>Underlined</u></u>. Implementation notes: - XSS-attempt detection--certain errors are flagged XSS-like
1. Analyzing which tags to remove duplicants
2. Ensure attributes are merged into the parent tag
3. Extend the tag exclusion system to specify whether or not the
contents should be dropped or not (currently, there's code that could do
something like this if it didn't drop the inner text too.)
- Remove <span> tags that don't do anything (no attributes)
- Append something to duplicate IDs so they're still usable (impl. note: the - Append something to duplicate IDs so they're still usable (impl. note: the
dupe detector would also need to detect the suffix as well) dupe detector would also need to detect the suffix as well)
- Externalize inline CSS to promote clean HTML, proposed by Sander Tekelenburg
5.0 release [Beyond HTML] 6.0 release [Beyond HTML]
# Legit token based CSS parsing (will require revamping almost every # Legit token based CSS parsing (will require revamping almost every
AttrDef class). Probably will use CSSTidy class? AttrDef class). Probably will use CSSTidy
# More control over allowed CSS properties using a modularization # More control over allowed CSS properties using a modularization
# HTML 5 support
# IRI support (this includes IDN) # IRI support (this includes IDN)
- Standardize token armor for all areas of processing - Standardize token armor for all areas of processing
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
Also, enable disabling of directionality
6.0 release [To XML and Beyond] 7.0 release [To XML and Beyond]
- Extended HTML capabilities based on namespacing and tag transforms (COMPLEX) - Extended HTML capabilities based on namespacing and tag transforms (COMPLEX)
- Hooks for adding custom processors to custom namespaced tags and - Hooks for adding custom processors to custom namespaced tags and
attributes, offer default implementation attributes, offer default implementation
@@ -69,27 +83,14 @@ Ongoing
- Refactor unit tests into lots of test methods - Refactor unit tests into lots of test methods
- Plugins for major CMSes (COMPLEX) - Plugins for major CMSes (COMPLEX)
- phpBB - phpBB
- Drupal needs loving! - Also, a FAQ for extension writers with HTML Purifier
- Phorum need loving!
- more! (look for ones that use WYSIWYGs)
- Also, maybe a FAQ for extension writers with HTML Purifier
AutoFormat AutoFormat
- Smileys - Smileys
- Syntax highlighting (with GeSHi) with <pre> and possibly <?php - Syntax highlighting (with GeSHi) with <pre> and possibly <?php
- Look at http://drupal.org/project/Modules/category/63 for ideas - Look at http://drupal.org/project/Modules/category/63 for ideas
Optimizations
- Reduce size of internal data-structures (esp. HTMLDefinition)
- Research memory usage of objects versus arrays
- Combine multiple strategies into a single, single-pass strategy
- Get PH5P working with the latest versions of DOM, which have much more
stringent error checking procedures. Maybe convert straight to tokens.
- Get rid of set_include_path(). Save this for another major release.
Neat feature related Neat feature related
! Factor demo.php into a set of Printer classes, and then create a stub
file for users here (inside the actual HTML Purifier library)
! Support exporting configuration, so users can easily tweak settings ! Support exporting configuration, so users can easily tweak settings
in the demo, and then copy-paste into their own setup in the demo, and then copy-paste into their own setup
- Advanced URI filtering schemes (see docs/proposal-new-directives.txt) - Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
@@ -106,14 +107,32 @@ Neat feature related
- Full set of color keywords. Also, a way to add onto them without - Full set of color keywords. Also, a way to add onto them without
finalizing the configuration object. finalizing the configuration object.
- Write a var_export and memcached DefinitionCache - Denis - Write a var_export and memcached DefinitionCache - Denis
- Allow restriction of allowed class values - Built-in support for target="_blank" on all external links
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
Also, enable disabling of directionality
? Externalize inline CSS to promote clean HTML, proposed by Sander Tekelenburg
? Remove redundant tags, ex. <u><u>Underlined</u></u>. Implementation notes:
1. Analyzing which tags to remove duplicants
2. Ensure attributes are merged into the parent tag
3. Extend the tag exclusion system to specify whether or not the
contents should be dropped or not (currently, there's code that could do
something like this if it didn't drop the inner text too.)
? Make AutoParagraph also support paragraph-izing double <br> tags, and not
just double newlines. This is kind of tough to do in the current framework,
though, and might be reasonably approximated by search replacing double <br>s
with newlines before running it through HTML Purifier.
Maintenance related (slightly boring) Maintenance related (slightly boring)
# CHMOD install script for PEAR installs # CHMOD install script for PEAR installs
! Factor out command line parser into its own class, and unit test it ! Factor out command line parser into its own class, and unit test it
! Nested configuration namespaces - Reduce size of internal data-structures (esp. HTMLDefinition)
- Distinguish between default settings and explicitly set settings, so - Allow merging configurations. Thus,
configurations can be merged a -> b -> default
c -> d -> default
becomes
a -> b -> c -> d -> default
Maybe allow more fine-grained tuning of this behavior. Alternatively,
encourage people to use short plist depths before building them up.
- Time PHPT tests - Time PHPT tests
ChildDef related (very boring) ChildDef related (very boring)

View File

@@ -1 +1 @@
3.3.0 4.9.2

View File

@@ -1,6 +1,12 @@
HTML Purifier 3.3.0 is fixes a number of obscure bugs reported and fixed HTML Purifier 4.9.x is a maintenance release, collecting a year
over a four month period. It is probably the last release in the 3.x of accumulated bug fixes plus a few new features. New features
series. Notable new features include support for the overflow CSS include support for min/max-width/height CSS, and rgba/hsl/hsla
property; notable bugfixes include fixed YouTube rendering in certain in color specifications. Major bugfixes include improvements
versions of Firefox, CSSDefinition Printer, improved early PHP support in the Serializer cache to avoid chmod'ing directories, better
and bugs in iconv. entity decoding (we won't accidentally encode entities that occur
in URLs) and rel="noopener" on links with target attributes,
to prevent them from overwriting the original frame.
4.9.0 was skipped due to a packaging problem; 4.9.2 fixes two
major regressions in PHP 5.3 support and entity decoding; no
other functional changes were applied.

View File

@@ -23,15 +23,16 @@ if (version_compare(PHP_VERSION, '5', '>=')) {
class RowTimer extends Benchmark_Timer class RowTimer extends Benchmark_Timer
{ {
var $name; public $name;
function RowTimer($name, $auto = false) { public function __construct($name, $auto = false)
{
$this->name = htmlentities($name); $this->name = htmlentities($name);
$this->Benchmark_Timer($auto); $this->Benchmark_Timer($auto);
} }
function getOutput() { public function getOutput()
{
$total = $this->TimeElapsed(); $total = $this->TimeElapsed();
$result = $this->getProfiling(); $result = $this->getProfiling();
$dashes = ''; $dashes = '';
@@ -68,7 +69,8 @@ class RowTimer extends Benchmark_Timer
} }
} }
function print_lexers() { function print_lexers()
{
global $LEXERS; global $LEXERS;
$first = true; $first = true;
foreach ($LEXERS as $key => $value) { foreach ($LEXERS as $key => $value) {
@@ -78,7 +80,8 @@ function print_lexers() {
} }
} }
function do_benchmark($name, $document) { function do_benchmark($name, $document)
{
global $LEXERS, $RUNS; global $LEXERS, $RUNS;
$config = HTMLPurifier_Config::createDefault(); $config = HTMLPurifier_Config::createDefault();

View File

@@ -52,4 +52,5 @@
</body> </body>
</html> </html>
<!-- vim: et sw=4 sts=4 --> <!-- vim: et sw=4 sts=4
-->

View File

@@ -16,4 +16,5 @@ function qs(el) {if (window.RegExp && window.encodeURIComponent) {var ue=el.href
// --> // -->
</script><table border=0 cellspacing=0 cellpadding=4><tr><td nowrap><font size=-1><b>Web</b>&nbsp;&nbsp;&nbsp;&nbsp;<a id=1a class=q href="/imghp?hl=en&tab=wi" onClick="return qs(this);">Images</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=2a class=q href="http://groups.google.com/grphp?hl=en&tab=wg" onClick="return qs(this);">Groups</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=4a class=q href="http://news.google.com/nwshp?hl=en&tab=wn" onClick="return qs(this);">News</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=5a class=q href="http://froogle.google.com/frghp?hl=en&tab=wf" onClick="return qs(this);">Froogle</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=8a class=q href="/lochp?hl=en&tab=wl" onClick="return qs(this);">Local</a>&nbsp;&nbsp;&nbsp;&nbsp;<b><a href="/intl/en/options/" class=q>more&nbsp;&raquo;</a></b></font></td></tr></table><table cellspacing=0 cellpadding=0><tr><td width=25%>&nbsp;</td><td align=center><input type=hidden name=hl value=en><input maxlength=2048 size=55 name=q value="" title="Google Search"><br><input type=submit value="Google Search" name=btnG><input type=submit value="I'm Feeling Lucky" name=btnI></td><td valign=top nowrap width=25%><font size=-2>&nbsp;&nbsp;<a href=/advanced_search?hl=en>Advanced Search</a><br>&nbsp;&nbsp;<a href=/preferences?hl=en>Preferences</a><br>&nbsp;&nbsp;<a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><br><font size=-1><a href="/ads/">Advertising&nbsp;Programs</a> - <a href=/services/>Business Solutions</a> - <a href=/about.html>About Google</a></font><p><font size=-2>&copy;2006 Google</font></p></center></body></html> </script><table border=0 cellspacing=0 cellpadding=4><tr><td nowrap><font size=-1><b>Web</b>&nbsp;&nbsp;&nbsp;&nbsp;<a id=1a class=q href="/imghp?hl=en&tab=wi" onClick="return qs(this);">Images</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=2a class=q href="http://groups.google.com/grphp?hl=en&tab=wg" onClick="return qs(this);">Groups</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=4a class=q href="http://news.google.com/nwshp?hl=en&tab=wn" onClick="return qs(this);">News</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=5a class=q href="http://froogle.google.com/frghp?hl=en&tab=wf" onClick="return qs(this);">Froogle</a>&nbsp;&nbsp;&nbsp;&nbsp;<a id=8a class=q href="/lochp?hl=en&tab=wl" onClick="return qs(this);">Local</a>&nbsp;&nbsp;&nbsp;&nbsp;<b><a href="/intl/en/options/" class=q>more&nbsp;&raquo;</a></b></font></td></tr></table><table cellspacing=0 cellpadding=0><tr><td width=25%>&nbsp;</td><td align=center><input type=hidden name=hl value=en><input maxlength=2048 size=55 name=q value="" title="Google Search"><br><input type=submit value="Google Search" name=btnG><input type=submit value="I'm Feeling Lucky" name=btnI></td><td valign=top nowrap width=25%><font size=-2>&nbsp;&nbsp;<a href=/advanced_search?hl=en>Advanced Search</a><br>&nbsp;&nbsp;<a href=/preferences?hl=en>Preferences</a><br>&nbsp;&nbsp;<a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><br><font size=-1><a href="/ads/">Advertising&nbsp;Programs</a> - <a href=/services/>Business Solutions</a> - <a href=/about.html>About Google</a></font><p><font size=-2>&copy;2006 Google</font></p></center></body></html>
<!-- vim: et sw=4 sts=4 --> <!-- vim: et sw=4 sts=4
-->

View File

@@ -127,4 +127,5 @@ if (objAdMgr.isSlotAvailable("leaderboard")) {
</html> </html>
<!-- vim: et sw=4 sts=4 --> <!-- vim: et sw=4 sts=4
-->

View File

@@ -539,4 +539,5 @@ Retrieved from "<a href="http://en.wikipedia.org/wiki/Tai_Chi_Chuan">http://en.w
<!-- Served by srv25 in 0.089 secs. --> <!-- Served by srv25 in 0.089 secs. -->
</body></html> </body></html>
<!-- vim: et sw=4 sts=4 --> <!-- vim: et sw=4 sts=4
-->

25
composer.json Normal file
View File

@@ -0,0 +1,25 @@
{
"name": "ezyang/htmlpurifier",
"description": "Standards compliant HTML filter written in PHP",
"type": "library",
"keywords": ["html"],
"homepage": "http://htmlpurifier.org/",
"license": "LGPL",
"authors": [
{
"name": "Edward Z. Yang",
"email": "admin@htmlpurifier.org",
"homepage": "http://ezyang.com"
}
],
"require": {
"php": ">=5.2"
},
"require-dev": {
"simpletest/simpletest": "^1.1"
},
"autoload": {
"psr-0": { "HTMLPurifier": "library/" },
"files": ["library/HTMLPurifier.composer.php"]
}
}

View File

@@ -18,22 +18,24 @@ TODO:
if (version_compare(PHP_VERSION, '5.2', '<')) exit('PHP 5.2+ required.'); if (version_compare(PHP_VERSION, '5.2', '<')) exit('PHP 5.2+ required.');
error_reporting(E_ALL | E_STRICT); error_reporting(E_ALL | E_STRICT);
chdir(dirname(__FILE__));
// load dual-libraries // load dual-libraries
require_once '../extras/HTMLPurifierExtras.auto.php'; require_once dirname(__FILE__) . '/../extras/HTMLPurifierExtras.auto.php';
require_once '../library/HTMLPurifier.auto.php'; require_once dirname(__FILE__) . '/../library/HTMLPurifier.auto.php';
// setup HTML Purifier singleton // setup HTML Purifier singleton
HTMLPurifier::getInstance(array( HTMLPurifier::getInstance(array(
'AutoFormat.PurifierLinkify' => true 'AutoFormat.PurifierLinkify' => true
)); ));
$interchange = HTMLPurifier_ConfigSchema_InterchangeBuilder::buildFromDirectory(); $builder = new HTMLPurifier_ConfigSchema_InterchangeBuilder();
$interchange = new HTMLPurifier_ConfigSchema_Interchange();
$builder->buildDir($interchange);
$loader = dirname(__FILE__) . '/../config-schema.php';
if (file_exists($loader)) include $loader;
$interchange->validate(); $interchange->validate();
$style = 'plain'; // use $_GET in the future, careful to validate! $style = 'plain'; // use $_GET in the future, careful to validate!
$configdoc_xml = 'configdoc.xml'; $configdoc_xml = dirname(__FILE__) . '/configdoc.xml';
$xml_builder = new HTMLPurifier_ConfigSchema_Builder_Xml(); $xml_builder = new HTMLPurifier_ConfigSchema_Builder_Xml();
$xml_builder->openURI($configdoc_xml); $xml_builder->openURI($configdoc_xml);
@@ -50,13 +52,13 @@ if (!$output) {
} }
// write out // write out
file_put_contents("$style.html", $output); file_put_contents(dirname(__FILE__) . "/$style.html", $output);
if (php_sapi_name() != 'cli') { if (php_sapi_name() != 'cli') {
// output (instant feedback if it's a browser) // output (instant feedback if it's a browser)
echo $output; echo $output;
} else { } else {
echo 'Files generated successfully.'; echo "Files generated successfully.\n";
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -40,12 +40,26 @@
</xsl:apply-templates> </xsl:apply-templates>
</ul> </ul>
</div> </div>
<div id="typesContainer">
<h2>Types</h2>
<xsl:apply-templates select="$typeLookup" mode="types" />
</div>
<xsl:apply-templates /> <xsl:apply-templates />
</div> </div>
</body> </body>
</html> </html>
</xsl:template> </xsl:template>
<xsl:template match="type" mode="types">
<div class="type-block">
<xsl:attribute name="id">type-<xsl:value-of select="@id" /></xsl:attribute>
<h3><code><xsl:value-of select="@id" /></code>: <xsl:value-of select="@name" /></h3>
<div class="type-description">
<xsl:copy-of xmlns:xhtml="http://www.w3.org/1999/xhtml" select="xhtml:div/node()" />
</div>
</div>
</xsl:template>
<xsl:template match="title" mode="toc" /> <xsl:template match="title" mode="toc" />
<xsl:template match="namespace" mode="toc"> <xsl:template match="namespace" mode="toc">
<xsl:param name="overflowNumber" /> <xsl:param name="overflowNumber" />
@@ -192,10 +206,13 @@
<td> <td>
<xsl:variable name="type" select="text()" /> <xsl:variable name="type" select="text()" />
<xsl:attribute name="class">type type-<xsl:value-of select="$type" /></xsl:attribute> <xsl:attribute name="class">type type-<xsl:value-of select="$type" /></xsl:attribute>
<xsl:value-of select="$typeLookup/type[@id=$type]/text()" /> <a>
<xsl:if test="@allow-null='yes'"> <xsl:attribute name="href">#type-<xsl:value-of select="$type" /></xsl:attribute>
(or null) <xsl:value-of select="$typeLookup/type[@id=$type]/@name" />
</xsl:if> <xsl:if test="@allow-null='yes'">
(or null)
</xsl:if>
</a>
</td> </td>
</tr> </tr>
</xsl:template> </xsl:template>
@@ -232,4 +249,5 @@
</xsl:stylesheet> </xsl:stylesheet>
<!-- vim: et sw=4 sts=4 --> <!-- vim: et sw=4 sts=4
-->

View File

@@ -1,16 +1,69 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<types> <types>
<type id="string">String</type> <type id="string" name="String"><div xmlns="http://www.w3.org/1999/xhtml">
<type id="istring">Case-insensitive string</type> A <a
<type id="text">Text</type> href="http://docs.php.net/manual/en/language.types.string.php">sequence
<type id="itext">Case-insensitive text</type> of characters</a>.
<type id="int">Integer</type> </div></type>
<type id="float">Float</type> <type id="istring" name="Case-insensitive string"><div xmlns="http://www.w3.org/1999/xhtml">
<type id="bool">Boolean</type> A series of case-insensitive characters. Internally, upper-case
<type id="lookup">Lookup array</type> ASCII characters will be converted to lower-case.
<type id="list">Array list</type> </div></type>
<type id="hash">Associative array</type> <type id="text" name="Text"><div xmlns="http://www.w3.org/1999/xhtml">
<type id="mixed">Mixed</type> A series of characters that may contain newlines. Text tends to
indicate human-oriented text, as opposed to a machine format.
</div></type>
<type id="itext" name="Case-insensitive text"><div xmlns="http://www.w3.org/1999/xhtml">
A series of case-insensitive characters that may contain newlines.
</div></type>
<type id="int" name="Integer"><div xmlns="http://www.w3.org/1999/xhtml">
An <a
href="http://docs.php.net/manual/en/language.types.integer.php">
integer</a>. You are alternatively permitted to pass a string of
digits instead, which will be cast to an integer using
<code>(int)</code>.
</div></type>
<type id="float" name="Float"><div xmlns="http://www.w3.org/1999/xhtml">
A <a href="http://docs.php.net/manual/en/language.types.float.php">
floating point number</a>. You are alternatively permitted to
pass a numeric string (as defined by <code>is_numeric()</code>),
which will be cast to a float using <code>(float)</code>.
</div></type>
<type id="bool" name="Boolean"><div xmlns="http://www.w3.org/1999/xhtml">
A <a
href="http://docs.php.net/manual/en/language.types.boolean.php">boolean</a>.
You are alternatively permitted to pass an integer <code>0</code> or
<code>1</code> (other integers are not permitted) or a string
<code>"on"</code>, <code>"true"</code> or <code>"1"</code> for
<code>true</code>, and <code>"off"</code>, <code>"false"</code> or
<code>"0"</code> for <code>false</code>.
</div></type>
<type id="lookup" name="Lookup array"><div xmlns="http://www.w3.org/1999/xhtml">
An array whose values are <code>true</code>, e.g. <code>array('key'
=> true, 'key2' => true)</code>. You are alternatively permitted
to pass an array list of the keys <code>array('key', 'key2')</code>
or a comma-separated string of keys <code>"key, key2"</code>. If
you pass an array list of values, ensure that your values are
strictly numerically indexed: <code>array('key1', 2 =>
'key2')</code> will not do what you expect and emits a warning.
</div></type>
<type id="list" name="Array list"><div xmlns="http://www.w3.org/1999/xhtml">
An array which has consecutive integer indexes, e.g.
<code>array('val1', 'val2')</code>. You are alternatively permitted
to pass a comma-separated string of keys <code>"val1, val2"</code>.
If your array is not in this form, <code>array_values</code> is run
on the array and a warning is emitted.
</div></type>
<type id="hash" name="Associative array"><div xmlns="http://www.w3.org/1999/xhtml">
An array which is a mapping of keys to values, e.g.
<code>array('key1' => 'val1', 'key2' => 'val2')</code>. You are
alternatively permitted to pass a comma-separated string of
key-colon-value strings, e.g. <code>"key1: val1, key2: val2"</code>.
</div></type>
<type id="mixed" name="Mixed"><div xmlns="http://www.w3.org/1999/xhtml">
An arbitrary PHP value of any type.
</div></type>
</types> </types>
<!-- vim: et sw=4 sts=4 --> <!-- vim: et sw=4 sts=4
-->

View File

@@ -2,401 +2,589 @@
<usage> <usage>
<directive id="Core.CollectErrors"> <directive id="Core.CollectErrors">
<file name="HTMLPurifier.php"> <file name="HTMLPurifier.php">
<line>131</line> <line>162</line>
</file> </file>
<file name="HTMLPurifier/Lexer.php"> <file name="HTMLPurifier/Lexer.php">
<line>81</line> <line>85</line>
<line>326</line>
</file> </file>
<file name="HTMLPurifier/Lexer/DirectLex.php"> <file name="HTMLPurifier/Lexer/DirectLex.php">
<line>53</line> <line>67</line>
<line>73</line> <line>87</line>
<line>348</line> <line>385</line>
</file> </file>
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php"> <file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>47</line> <line>57</line>
</file> </file>
</directive> </directive>
<directive id="CSS.MaxImgLength"> <directive id="CSS.MaxImgLength">
<file name="HTMLPurifier/CSSDefinition.php"> <file name="HTMLPurifier/CSSDefinition.php">
<line>157</line> <line>226</line>
</file> </file>
</directive> </directive>
<directive id="CSS.Proprietary"> <directive id="CSS.Proprietary">
<file name="HTMLPurifier/CSSDefinition.php"> <file name="HTMLPurifier/CSSDefinition.php">
<line>214</line> <line>323</line>
</file> </file>
</directive> </directive>
<directive id="CSS.AllowTricky"> <directive id="CSS.AllowTricky">
<file name="HTMLPurifier/CSSDefinition.php"> <file name="HTMLPurifier/CSSDefinition.php">
<line>218</line> <line>327</line>
</file>
</directive>
<directive id="CSS.Trusted">
<file name="HTMLPurifier/CSSDefinition.php">
<line>331</line>
</file> </file>
</directive> </directive>
<directive id="CSS.AllowImportant"> <directive id="CSS.AllowImportant">
<file name="HTMLPurifier/CSSDefinition.php"> <file name="HTMLPurifier/CSSDefinition.php">
<line>222</line> <line>335</line>
</file> </file>
</directive> </directive>
<directive id="CSS.AllowedProperties"> <directive id="CSS.AllowedProperties">
<file name="HTMLPurifier/CSSDefinition.php"> <file name="HTMLPurifier/CSSDefinition.php">
<line>275</line> <line>464</line>
</file>
</directive>
<directive id="CSS.ForbiddenProperties">
<file name="HTMLPurifier/CSSDefinition.php">
<line>480</line>
</file> </file>
</directive> </directive>
<directive id="Cache.DefinitionImpl"> <directive id="Cache.DefinitionImpl">
<file name="HTMLPurifier/DefinitionCacheFactory.php"> <file name="HTMLPurifier/DefinitionCacheFactory.php">
<line>49</line> <line>66</line>
</file> </file>
</directive> </directive>
<directive id="HTML.Doctype"> <directive id="HTML.Doctype">
<file name="HTMLPurifier/DoctypeRegistry.php"> <file name="HTMLPurifier/DoctypeRegistry.php">
<line>83</line> <line>119</line>
</file> </file>
</directive> </directive>
<directive id="HTML.CustomDoctype"> <directive id="HTML.CustomDoctype">
<file name="HTMLPurifier/DoctypeRegistry.php"> <file name="HTMLPurifier/DoctypeRegistry.php">
<line>85</line> <line>123</line>
</file> </file>
</directive> </directive>
<directive id="HTML.XHTML"> <directive id="HTML.XHTML">
<file name="HTMLPurifier/DoctypeRegistry.php"> <file name="HTMLPurifier/DoctypeRegistry.php">
<line>88</line> <line>128</line>
</file> </file>
</directive> </directive>
<directive id="HTML.Strict"> <directive id="HTML.Strict">
<file name="HTMLPurifier/DoctypeRegistry.php"> <file name="HTMLPurifier/DoctypeRegistry.php">
<line>93</line> <line>133</line>
</file> </file>
</directive> </directive>
<directive id="Core.Encoding"> <directive id="Core.Encoding">
<file name="HTMLPurifier/Encoder.php"> <file name="HTMLPurifier/Encoder.php">
<line>267</line> <line>380</line>
<line>300</line> <line>428</line>
</file> </file>
</directive> </directive>
<directive id="Test.ForceNoIconv"> <directive id="Test.ForceNoIconv">
<file name="HTMLPurifier/Encoder.php"> <file name="HTMLPurifier/Encoder.php">
<line>272</line> <line>388</line>
<line>308</line> <line>439</line>
</file> </file>
</directive> </directive>
<directive id="Core.EscapeNonASCIICharacters"> <directive id="Core.EscapeNonASCIICharacters">
<file name="HTMLPurifier/Encoder.php"> <file name="HTMLPurifier/Encoder.php">
<line>304</line> <line>429</line>
</file> </file>
</directive> </directive>
<directive id="Output.CommentScriptContents"> <directive id="Output.CommentScriptContents">
<file name="HTMLPurifier/Generator.php"> <file name="HTMLPurifier/Generator.php">
<line>45</line> <line>70</line>
</file>
</directive>
<directive id="Output.FixInnerHTML">
<file name="HTMLPurifier/Generator.php">
<line>71</line>
</file> </file>
</directive> </directive>
<directive id="Output.SortAttr"> <directive id="Output.SortAttr">
<file name="HTMLPurifier/Generator.php"> <file name="HTMLPurifier/Generator.php">
<line>46</line> <line>72</line>
</file>
</directive>
<directive id="Output.FlashCompat">
<file name="HTMLPurifier/Generator.php">
<line>73</line>
</file> </file>
</directive> </directive>
<directive id="Output.TidyFormat"> <directive id="Output.TidyFormat">
<file name="HTMLPurifier/Generator.php"> <file name="HTMLPurifier/Generator.php">
<line>75</line> <line>104</line>
</file>
</directive>
<directive id="Core.NormalizeNewlines">
<file name="HTMLPurifier/Generator.php">
<line>122</line>
</file>
<file name="HTMLPurifier/Lexer.php">
<line>308</line>
</file> </file>
</directive> </directive>
<directive id="Output.Newline"> <directive id="Output.Newline">
<file name="HTMLPurifier/Generator.php"> <file name="HTMLPurifier/Generator.php">
<line>89</line> <line>123</line>
</file> </file>
</directive> </directive>
<directive id="HTML.BlockWrapper"> <directive id="HTML.BlockWrapper">
<file name="HTMLPurifier/HTMLDefinition.php"> <file name="HTMLPurifier/HTMLDefinition.php">
<line>222</line> <line>263</line>
</file> </file>
</directive> </directive>
<directive id="HTML.Parent"> <directive id="HTML.Parent">
<file name="HTMLPurifier/HTMLDefinition.php"> <file name="HTMLPurifier/HTMLDefinition.php">
<line>230</line> <line>273</line>
</file> </file>
</directive> </directive>
<directive id="HTML.AllowedElements"> <directive id="HTML.AllowedElements">
<file name="HTMLPurifier/HTMLDefinition.php"> <file name="HTMLPurifier/HTMLDefinition.php">
<line>247</line> <line>291</line>
</file> </file>
</directive> </directive>
<directive id="HTML.AllowedAttributes"> <directive id="HTML.AllowedAttributes">
<file name="HTMLPurifier/HTMLDefinition.php"> <file name="HTMLPurifier/HTMLDefinition.php">
<line>248</line> <line>292</line>
</file> </file>
</directive> </directive>
<directive id="HTML.Allowed"> <directive id="HTML.Allowed">
<file name="HTMLPurifier/HTMLDefinition.php"> <file name="HTMLPurifier/HTMLDefinition.php">
<line>251</line> <line>295</line>
</file> </file>
</directive> </directive>
<directive id="HTML.ForbiddenElements"> <directive id="HTML.ForbiddenElements">
<file name="HTMLPurifier/HTMLDefinition.php"> <file name="HTMLPurifier/HTMLDefinition.php">
<line>337</line> <line>399</line>
</file> </file>
</directive> </directive>
<directive id="HTML.ForbiddenAttributes"> <directive id="HTML.ForbiddenAttributes">
<file name="HTMLPurifier/HTMLDefinition.php"> <file name="HTMLPurifier/HTMLDefinition.php">
<line>338</line> <line>400</line>
</file> </file>
</directive> </directive>
<directive id="HTML.Trusted"> <directive id="HTML.Trusted">
<file name="HTMLPurifier/HTMLModuleManager.php"> <file name="HTMLPurifier/HTMLModuleManager.php">
<line>202</line> <line>234</line>
</file> </file>
<file name="HTMLPurifier/Lexer.php"> <file name="HTMLPurifier/Lexer.php">
<line>258</line> <line>313</line>
<line>353</line>
</file> </file>
<file name="HTMLPurifier/HTMLModule/Image.php"> <file name="HTMLPurifier/HTMLModule/Image.php">
<line>27</line> <line>37</line>
</file> </file>
<file name="HTMLPurifier/Lexer/DirectLex.php"> <file name="HTMLPurifier/Lexer/DirectLex.php">
<line>36</line> <line>47</line>
</file> </file>
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php"> <file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>23</line> <line>30</line>
</file> </file>
</directive> </directive>
<directive id="HTML.AllowedModules"> <directive id="HTML.AllowedModules">
<file name="HTMLPurifier/HTMLModuleManager.php"> <file name="HTMLPurifier/HTMLModuleManager.php">
<line>209</line> <line>241</line>
</file> </file>
</directive> </directive>
<directive id="HTML.CoreModules"> <directive id="HTML.CoreModules">
<file name="HTMLPurifier/HTMLModuleManager.php"> <file name="HTMLPurifier/HTMLModuleManager.php">
<line>210</line> <line>242</line>
</file> </file>
</directive> </directive>
<directive id="HTML.Proprietary"> <directive id="HTML.Proprietary">
<file name="HTMLPurifier/HTMLModuleManager.php"> <file name="HTMLPurifier/HTMLModuleManager.php">
<line>221</line> <line>256</line>
</file> </file>
</directive> </directive>
<directive id="HTML.SafeObject"> <directive id="HTML.SafeObject">
<file name="HTMLPurifier/HTMLModuleManager.php"> <file name="HTMLPurifier/HTMLModuleManager.php">
<line>226</line> <line>259</line>
</file> </file>
</directive> </directive>
<directive id="HTML.SafeEmbed"> <directive id="HTML.SafeEmbed">
<file name="HTMLPurifier/HTMLModuleManager.php"> <file name="HTMLPurifier/HTMLModuleManager.php">
<line>229</line> <line>262</line>
</file>
</directive>
<directive id="HTML.SafeScripting">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>265</line>
</file>
<file name="HTMLPurifier/HTMLModule/SafeScripting.php">
<line>22</line>
</file>
</directive>
<directive id="HTML.Nofollow">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>268</line>
</file>
</directive>
<directive id="HTML.TargetBlank">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>271</line>
</file>
</directive>
<directive id="HTML.TargetNoreferrer">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>276</line>
</file>
</directive>
<directive id="HTML.TargetNoopener">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>279</line>
</file> </file>
</directive> </directive>
<directive id="Attr.IDBlacklist"> <directive id="Attr.IDBlacklist">
<file name="HTMLPurifier/IDAccumulator.php"> <file name="HTMLPurifier/IDAccumulator.php">
<line>26</line> <line>27</line>
</file> </file>
</directive> </directive>
<directive id="Core.Language"> <directive id="Core.Language">
<file name="HTMLPurifier/LanguageFactory.php"> <file name="HTMLPurifier/LanguageFactory.php">
<line>88</line> <line>93</line>
</file> </file>
</directive> </directive>
<directive id="Core.LexerImpl"> <directive id="Core.LexerImpl">
<file name="HTMLPurifier/Lexer.php"> <file name="HTMLPurifier/Lexer.php">
<line>76</line> <line>80</line>
</file> </file>
</directive> </directive>
<directive id="Core.MaintainLineNumbers"> <directive id="Core.MaintainLineNumbers">
<file name="HTMLPurifier/Lexer.php"> <file name="HTMLPurifier/Lexer.php">
<line>80</line> <line>84</line>
</file> </file>
<file name="HTMLPurifier/Lexer/DirectLex.php"> <file name="HTMLPurifier/Lexer/DirectLex.php">
<line>48</line> <line>62</line>
</file>
</directive>
<directive id="Core.LegacyEntityDecoder">
<file name="HTMLPurifier/Lexer.php">
<line>215</line>
<line>337</line>
</file> </file>
</directive> </directive>
<directive id="Core.ConvertDocumentToFragment"> <directive id="Core.ConvertDocumentToFragment">
<file name="HTMLPurifier/Lexer.php"> <file name="HTMLPurifier/Lexer.php">
<line>267</line> <line>324</line>
</file>
</directive>
<directive id="Core.RemoveProcessingInstructions">
<file name="HTMLPurifier/Lexer.php">
<line>347</line>
</file>
</directive>
<directive id="Core.HiddenElements">
<file name="HTMLPurifier/Lexer.php">
<line>351</line>
</file>
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>36</line>
</file>
</directive>
<directive id="Core.AggressivelyRemoveScript">
<file name="HTMLPurifier/Lexer.php">
<line>352</line>
</file>
</directive>
<directive id="Core.RemoveScriptContents">
<file name="HTMLPurifier/Lexer.php">
<line>353</line>
</file>
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>35</line>
</file>
</directive>
<directive id="URI.">
<file name="HTMLPurifier/URIDefinition.php">
<line>65</line>
</file>
<file name="HTMLPurifier/URIFilter/Munge.php">
<line>46</line>
</file> </file>
</directive> </directive>
<directive id="URI.Host"> <directive id="URI.Host">
<file name="HTMLPurifier/URIDefinition.php"> <file name="HTMLPurifier/URIDefinition.php">
<line>64</line> <line>76</line>
</file>
<file name="HTMLPurifier/URIScheme.php">
<line>89</line>
</file> </file>
</directive> </directive>
<directive id="URI.Base"> <directive id="URI.Base">
<file name="HTMLPurifier/URIDefinition.php"> <file name="HTMLPurifier/URIDefinition.php">
<line>65</line> <line>77</line>
</file> </file>
</directive> </directive>
<directive id="URI.DefaultScheme"> <directive id="URI.DefaultScheme">
<file name="HTMLPurifier/URIDefinition.php"> <file name="HTMLPurifier/URIDefinition.php">
<line>72</line> <line>84</line>
</file> </file>
</directive> </directive>
<directive id="URI.AllowedSchemes"> <directive id="URI.AllowedSchemes">
<file name="HTMLPurifier/URISchemeRegistry.php"> <file name="HTMLPurifier/URISchemeRegistry.php">
<line>42</line> <line>48</line>
</file> </file>
</directive> </directive>
<directive id="URI.OverrideAllowedSchemes"> <directive id="URI.OverrideAllowedSchemes">
<file name="HTMLPurifier/URISchemeRegistry.php"> <file name="HTMLPurifier/URISchemeRegistry.php">
<line>43</line> <line>49</line>
</file>
</directive>
<directive id="CSS.AllowDuplicates">
<file name="HTMLPurifier/AttrDef/CSS.php">
<line>28</line>
</file> </file>
</directive> </directive>
<directive id="URI.Disable"> <directive id="URI.Disable">
<file name="HTMLPurifier/AttrDef/URI.php"> <file name="HTMLPurifier/AttrDef/URI.php">
<line>28</line> <line>47</line>
</file> </file>
</directive> </directive>
<directive id="Core.ColorKeywords"> <directive id="Core.ColorKeywords">
<file name="HTMLPurifier/AttrDef/CSS/Color.php"> <file name="HTMLPurifier/AttrDef/CSS/Color.php">
<line>12</line> <line>29</line>
</file> </file>
<file name="HTMLPurifier/AttrDef/HTML/Color.php"> <file name="HTMLPurifier/AttrDef/HTML/Color.php">
<line>12</line> <line>19</line>
</file>
</directive>
<directive id="CSS.AllowedFonts">
<file name="HTMLPurifier/AttrDef/CSS/FontFamily.php">
<line>64</line>
</file>
</directive>
<directive id="Attr.AllowedClasses">
<file name="HTMLPurifier/AttrDef/HTML/Class.php">
<line>33</line>
</file>
</directive>
<directive id="Attr.ForbiddenClasses">
<file name="HTMLPurifier/AttrDef/HTML/Class.php">
<line>34</line>
</file> </file>
</directive> </directive>
<directive id="Attr.AllowedFrameTargets"> <directive id="Attr.AllowedFrameTargets">
<file name="HTMLPurifier/AttrDef/HTML/FrameTarget.php"> <file name="HTMLPurifier/AttrDef/HTML/FrameTarget.php">
<line>15</line> <line>32</line>
</file> </file>
</directive> </directive>
<directive id="Attr.EnableID"> <directive id="Attr.EnableID">
<file name="HTMLPurifier/AttrDef/HTML/ID.php"> <file name="HTMLPurifier/AttrDef/HTML/ID.php">
<line>20</line> <line>41</line>
</file> </file>
</directive> </directive>
<directive id="Attr.IDPrefix"> <directive id="Attr.IDPrefix">
<file name="HTMLPurifier/AttrDef/HTML/ID.php"> <file name="HTMLPurifier/AttrDef/HTML/ID.php">
<line>26</line> <line>51</line>
</file> </file>
</directive> </directive>
<directive id="Attr.IDPrefixLocal"> <directive id="Attr.IDPrefixLocal">
<file name="HTMLPurifier/AttrDef/HTML/ID.php"> <file name="HTMLPurifier/AttrDef/HTML/ID.php">
<line>28</line> <line>53</line>
<line>31</line> <line>58</line>
</file>
</directive>
<directive id="Attr.ID.HTML5">
<file name="HTMLPurifier/AttrDef/HTML/ID.php">
<line>75</line>
</file> </file>
</directive> </directive>
<directive id="Attr.IDBlacklistRegexp"> <directive id="Attr.IDBlacklistRegexp">
<file name="HTMLPurifier/AttrDef/HTML/ID.php"> <file name="HTMLPurifier/AttrDef/HTML/ID.php">
<line>54</line> <line>97</line>
</file>
</directive>
<directive id="Attr.">
<file name="HTMLPurifier/AttrDef/HTML/LinkTypes.php">
<line>46</line>
</file>
</directive>
<directive id="Core.AllowHostnameUnderscore">
<file name="HTMLPurifier/AttrDef/URI/Host.php">
<line>77</line>
</file>
</directive>
<directive id="Core.EnableIDNA">
<file name="HTMLPurifier/AttrDef/URI/Host.php">
<line>105</line>
</file> </file>
</directive> </directive>
<directive id="Attr.DefaultTextDir"> <directive id="Attr.DefaultTextDir">
<file name="HTMLPurifier/AttrTransform/BdoDir.php"> <file name="HTMLPurifier/AttrTransform/BdoDir.php">
<line>13</line> <line>22</line>
</file> </file>
</directive> </directive>
<directive id="Core.RemoveInvalidImg"> <directive id="Core.RemoveInvalidImg">
<file name="HTMLPurifier/AttrTransform/ImgRequired.php"> <file name="HTMLPurifier/AttrTransform/ImgRequired.php">
<line>18</line> <line>24</line>
</file> </file>
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php"> <file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>20</line> <line>27</line>
</file> </file>
</directive> </directive>
<directive id="Attr.DefaultInvalidImage"> <directive id="Attr.DefaultInvalidImage">
<file name="HTMLPurifier/AttrTransform/ImgRequired.php"> <file name="HTMLPurifier/AttrTransform/ImgRequired.php">
<line>19</line> <line>27</line>
</file> </file>
</directive> </directive>
<directive id="Attr.DefaultImageAlt"> <directive id="Attr.DefaultImageAlt">
<file name="HTMLPurifier/AttrTransform/ImgRequired.php"> <file name="HTMLPurifier/AttrTransform/ImgRequired.php">
<line>25</line> <line>33</line>
</file> </file>
</directive> </directive>
<directive id="Attr.DefaultInvalidImageAlt"> <directive id="Attr.DefaultInvalidImageAlt">
<file name="HTMLPurifier/AttrTransform/ImgRequired.php"> <file name="HTMLPurifier/AttrTransform/ImgRequired.php">
<line>32</line> <line>40</line>
</file> </file>
</directive> </directive>
<directive id="Core.EscapeInvalidChildren"> <directive id="HTML.Attr.Name.UseCDATA">
<file name="HTMLPurifier/ChildDef/Required.php"> <file name="HTMLPurifier/AttrTransform/Name.php">
<line>62</line> <line>18</line>
</file>
<file name="HTMLPurifier/HTMLModule/Name.php">
<line>19</line>
</file>
</directive>
<directive id="HTML.FlashAllowFullScreen">
<file name="HTMLPurifier/AttrTransform/SafeParam.php">
<line>53</line>
</file> </file>
</directive> </directive>
<directive id="Cache.SerializerPath"> <directive id="Cache.SerializerPath">
<file name="HTMLPurifier/DefinitionCache/Serializer.php"> <file name="HTMLPurifier/DefinitionCache/Serializer.php">
<line>91</line> <line>185</line>
</file> </file>
</directive> </directive>
<directive id="FilterParam.ExtractStyleBlocksTidyImpl"> <directive id="Cache.SerializerPermissions">
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php"> <file name="HTMLPurifier/DefinitionCache/Serializer.php">
<line>41</line> <line>202</line>
<line>218</line>
</file> </file>
</directive> </directive>
<directive id="FilterParam.ExtractStyleBlocksScope"> <directive id="Filter.ExtractStyleBlocks.TidyImpl">
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php"> <file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
<line>65</line> <line>94</line>
</file> </file>
</directive> </directive>
<directive id="FilterParam.ExtractStyleBlocksEscaping"> <directive id="Filter.ExtractStyleBlocks.Scope">
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php"> <file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
<line>123</line> <line>125</line>
</file>
</directive>
<directive id="Filter.ExtractStyleBlocks.Escaping">
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
<line>330</line>
</file>
</directive>
<directive id="HTML.SafeIframe">
<file name="HTMLPurifier/HTMLModule/Iframe.php">
<line>28</line>
</file>
<file name="HTMLPurifier/URIFilter/SafeIframe.php">
<line>48</line>
</file> </file>
</directive> </directive>
<directive id="HTML.MaxImgLength"> <directive id="HTML.MaxImgLength">
<file name="HTMLPurifier/HTMLModule/Image.php"> <file name="HTMLPurifier/HTMLModule/Image.php">
<line>14</line> <line>21</line>
</file> </file>
<file name="HTMLPurifier/HTMLModule/SafeEmbed.php"> <file name="HTMLPurifier/HTMLModule/SafeEmbed.php">
<line>13</line> <line>18</line>
</file> </file>
<file name="HTMLPurifier/HTMLModule/SafeObject.php"> <file name="HTMLPurifier/HTMLModule/SafeObject.php">
<line>19</line> <line>24</line>
</file> </file>
</directive> </directive>
<directive id="HTML.TidyLevel"> <directive id="HTML.TidyLevel">
<file name="HTMLPurifier/HTMLModule/Tidy.php"> <file name="HTMLPurifier/HTMLModule/Tidy.php">
<line>45</line> <line>50</line>
</file> </file>
</directive> </directive>
<directive id="HTML.TidyAdd"> <directive id="HTML.TidyAdd">
<file name="HTMLPurifier/HTMLModule/Tidy.php"> <file name="HTMLPurifier/HTMLModule/Tidy.php">
<line>49</line> <line>54</line>
</file> </file>
</directive> </directive>
<directive id="HTML.TidyRemove"> <directive id="HTML.TidyRemove">
<file name="HTMLPurifier/HTMLModule/Tidy.php"> <file name="HTMLPurifier/HTMLModule/Tidy.php">
<line>50</line> <line>55</line>
</file> </file>
</directive> </directive>
<directive id="AutoFormatParam.PurifierLinkifyDocURL"> <directive id="AutoFormat.PurifierLinkify.DocURL">
<file name="HTMLPurifier/Injector/PurifierLinkify.php"> <file name="HTMLPurifier/Injector/PurifierLinkify.php">
<line>15</line> <line>31</line>
</file>
</directive>
<directive id="AutoFormat.RemoveEmpty.RemoveNbsp">
<file name="HTMLPurifier/Injector/RemoveEmpty.php">
<line>46</line>
</file>
</directive>
<directive id="AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions">
<file name="HTMLPurifier/Injector/RemoveEmpty.php">
<line>47</line>
</file>
</directive>
<directive id="AutoFormat.RemoveEmpty.Predicate">
<file name="HTMLPurifier/Injector/RemoveEmpty.php">
<line>48</line>
</file> </file>
</directive> </directive>
<directive id="Core.AggressivelyFixLt"> <directive id="Core.AggressivelyFixLt">
<file name="HTMLPurifier/Lexer/DOMLex.php"> <file name="HTMLPurifier/Lexer/DOMLex.php">
<line>44</line> <line>54</line>
</file> </file>
</directive> </directive>
<directive id="Core.DirectLexLineNumberSyncInterval"> <directive id="Core.DirectLexLineNumberSyncInterval">
<file name="HTMLPurifier/Lexer/DirectLex.php"> <file name="HTMLPurifier/Lexer/DirectLex.php">
<line>70</line> <line>84</line>
</file>
</directive>
<directive id="Core.DisableExcludes">
<file name="HTMLPurifier/Strategy/FixNesting.php">
<line>54</line>
</file> </file>
</directive> </directive>
<directive id="Core.EscapeInvalidTags"> <directive id="Core.EscapeInvalidTags">
<file name="HTMLPurifier/Strategy/MakeWellFormed.php"> <file name="HTMLPurifier/Strategy/MakeWellFormed.php">
<line>45</line> <line>72</line>
</file> </file>
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>19</line>
</file>
</directive>
<directive id="Core.RemoveScriptContents">
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>25</line>
</file>
</directive>
<directive id="Core.HiddenElements">
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php"> <file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>26</line> <line>26</line>
</file> </file>
</directive> </directive>
<directive id="HTML.AllowedComments">
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>31</line>
</file>
</directive>
<directive id="HTML.AllowedCommentsRegexp">
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>32</line>
</file>
</directive>
<directive id="URI.HostBlacklist"> <directive id="URI.HostBlacklist">
<file name="HTMLPurifier/URIFilter/HostBlacklist.php"> <file name="HTMLPurifier/URIFilter/HostBlacklist.php">
<line>8</line> <line>25</line>
</file> </file>
</directive> </directive>
<directive id="URI.MungeResources"> <directive id="URI.MungeResources">
<file name="HTMLPurifier/URIFilter/Munge.php"> <file name="HTMLPurifier/URIFilter/Munge.php">
<line>14</line> <line>48</line>
</file> </file>
</directive> </directive>
<directive id="URI.MungeSecretKey"> <directive id="URI.MungeSecretKey">
<file name="HTMLPurifier/URIFilter/Munge.php"> <file name="HTMLPurifier/URIFilter/Munge.php">
<line>15</line> <line>49</line>
</file>
</directive>
<directive id="URI.SafeIframeRegexp">
<file name="HTMLPurifier/URIFilter/SafeIframe.php">
<line>35</line>
</file> </file>
</directive> </directive>
</usage> </usage>

View File

@@ -17,202 +17,10 @@
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div> <div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
<p> <p>
<strong>Warning:</strong> This document may be out-of-date. When in doubt, Please see <a href="enduser-customize.html">Customize!</a>
consult the source code documentation.
</p> </p>
<p>HTML Purifier currently natively supports only a subset of HTML's
allowed elements, attributes, and behavior; specifically, this subset
is the set of elements that are safe for untrusted users to use.
However, HTML Purifier is often utilized to ensure standards-compliance
from input that is trusted (making it a sort of Tidy substitute),
and often users need to define new elements or attributes. The
advanced API is oriented specifically for these use-cases.</p>
<p>Our goals are to let the user:</p>
<dl>
<dt>Select</dt>
<dd><ul>
<li>Doctype</li>
<!-- <li>Filterset</li> -->
<li>Elements / Attributes / Modules</li>
<li>Tidy</li>
</ul></dd>
<dt>Customize</dt>
<dd><ul>
<li>Attributes</li>
<li>Elements</li>
<!--<li>Doctypes</li>-->
</ul></dd>
</dl>
<h2>Select</h2>
<p>For basic use, the user will have to specify some basic parameters. This
is not strictly necessary, as HTML Purifier's default setting will always
output safe code, but is required for standards-compliant output.</p>
<h3>Selecting a Doctype</h3>
<p>The first thing to select is the <strong>doctype</strong>. This
is essential for standards-compliant output.</p>
<p class="technical">This identifier is based
on the name the W3C has given to the document type and <em>not</em>
the DTD identifier.</p>
<p>This parameter is set via the configuration object:</p>
<pre>$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');</pre>
<p>Due to historical reasons, the default doctype is XHTML 1.0
Transitional, however, we really shouldn't be guessing what the user's
doctype is. Fortunantely, people who can't be bothered to set this won't
be bothered when their pages stop validating.</p>
<h3>Selecting Elements / Attributes / Modules</h3>
<p>HTML Purifier will, by default, allow as many elements and attributes
as possible. However, a user may decide to roll their own filterset by
selecting modules, elements and attributes to allow for their own
specific use-case. This can be done using %HTML.Allowed:</p>
<pre>$config->set('HTML', 'Allowed', 'a[href|title],em,p,blockquote');</pre>
<p class="technical">The directive %HTML.Allowed is a convenience feature
that may be fully expressed with the legacy interface.</p>
<p>We currently support another interface from older versions:</p>
<pre>$config->set('HTML', 'AllowedElements', 'a,em,p,blockquote');
$config->set('HTML', 'AllowedAttributes', 'a.href,a.title');</pre>
<p>A user may also choose to allow modules using a specialized
directive:</p>
<pre>$config->set('HTML', 'AllowedModules', 'Hypertext,Text,Lists');</pre>
<p>But it is not expected that this feature will be widely used.</p>
<p class="technical">Module selection will work slightly differently
from the other AllowedElements and AllowedAttributes directives by
directly modifying the doctype you are operating in, in the spirit of
XHTML 1.1's modularization. We stop users from shooting themselves in the
foot by mandating the modules in %HTML.CoreModules be used.</p>
<p class="technical">Modules are distinguished from regular elements by the
case of their first letter. While XML distinguishes between and allows
lower and uppercase letters in element names, XHTML uses only lower-case
element names for sake of consistency.</p>
<h3>Selecting Tidy</h3>
<p>The name of this segment of functionality is inspired off of Dave
Ragget's program HTML Tidy, which purported to help clean up HTML. In
HTML Purifier, Tidy functionality involves turning unsupported and
deprecated elements into standards-compliant ones, maintaining
backwards compatibility, and enforcing best practices.</p>
<p>This is a complicated feature, and is explained more in depth at
<a href="enduser-tidy.html">the Tidy documentation page</a>.</p>
<!--
<h3>Unified selector</h3>
<p>Because selecting each and every one of these configuration options
is a chore, we may wish to offer a specialized configuration method
for selecting a filterset. Possibility:</p>
<pre>function selectFilter($doctype, $filterset, $tidy)</pre>
<p>...which is simply a light wrapper over the individual configuration
calls. A custom config file format or text format could also be adopted.</p>
-->
<h2>Customize</h2>
<p>By reviewing topic posts in the support forum, we determined that
there were two primarily demanded customization features people wanted:
to add an attribute to an existing element, and to add an element.
Thus, we'll want to create convenience functions for these common
use-cases.</p>
<p>Note that the functions described here are only available if
a raw copy of <code>HTMLPurifier_HTMLDefinition</code> was retrieved.
Furthermore, caching may prevent your changes from immediately
being seen: consult <a href="enduser-customize.html">enduser-customize.html</a> on how
to work around this.</p>
<h3>Attributes</h3>
<p>An attribute is bound to an element by a name and has a specific
<code>AttrDef</code> that validates it. The interface is therefore:</p>
<pre>function addAttribute($element, $attribute, $attribute_def);</pre>
<p>Example of the functionality in action:</p>
<pre>$def->addAttribute('a', 'rel', 'Enum#nofollow');</pre>
<p>The <code>$attribute_def</code> value is flexible,
to make things simpler. It can be a literal object or:</p>
<ul>
<!--<li>Class name: We'll instantiate it for you</li>
<li>Function name: We'll create an <code>HTMLPurifier_AttrDef_Anonymous</code>
class with that function registered as a callback.</li>-->
<li>String attribute type: We'll use <code>HTMLPurifier_AttrTypes</code>
to resolve it for you. Any data that follows a hash mark (#) will
be used to customize the attribute type: in the example above,
we specify which values for Enum to allow.</li>
</ul>
<h3>Elements</h3>
<p>An element requires certain information as specified by
<code>HTMLPurifier_ElementDef</code>. However, not all of it is necessary,
the usual things required are:</p>
<ul>
<li>Attributes</li>
<li>Content model/type</li>
<li>Registration in a content set</li>
</ul>
<p>This suggests an API like this:</p>
<pre>function addElement($element, $type, $contents,
$attr_collections = array(); $attributes = array());</pre>
<p>Each parameter explained in depth:</p>
<dl>
<dt><code>$element</code></dt>
<dd>Element name, ex. 'label'</dd>
<dt><code>$type</code></dt>
<dd>Content set to register in, ex. 'Inline' or 'Flow'</dd>
<dt><code>$contents</code></dt>
<dd>Description of allowed children. This is a merged form of
<code>HTMLPurifier_ElementDef</code>'s member variables
<code>$content_model</code> and <code>$content_model_type</code>,
where the form is <q>Type: Model</q>, ex. 'Optional: Inline'.
There are also a number of predefined templates one may use.</dd>
<dt><code>$attr_collections</code></dt>
<dd>Array (or string if only one) of attribute collection(s) to
merge into the attributes array.</dd>
<dt><code>$attributes</code></dt>
<dd>Array of attribute names to attribute definitions, much like
the above-described attribute customization.</dd>
</dl>
<p>A possible usage:</p>
<pre>$def->addElement('font', 'Inline', 'Optional: Inline', 'Common',
array('color' => 'Color'));</pre>
<p>See <code>HTMLPurifier/HTMLModule.php</code> for details.</p>
</body></html> </body></html>
<!-- vim: et sw=4 sts=4 --> <!-- vim: et sw=4 sts=4
-->

View File

@@ -25,5 +25,6 @@ URIScheme - needs to have callable generic checks
mailto - doesn't validate emails, doesn't validate querystring mailto - doesn't validate emails, doesn't validate querystring
news - doesn't validate opaque path news - doesn't validate opaque path
nntp - doesn't constrain path nntp - doesn't constrain path
tel - doesn't validate phone numbers, only allows characters '+', '1-9', and 'x'
vim: et sw=4 sts=4 vim: et sw=4 sts=4

View File

@@ -0,0 +1,79 @@
Configuration Backwards-Compatibility Breaks
In version 4.0.0, the configuration subsystem (composed of the outwards
facing Config class, as well as the ConfigSchema and ConfigSchema_Interchange
subsystems), was significantly revamped to make use of property lists.
While most of the changes are internal, some internal APIs were changed for the
sake of clarity. HTMLPurifier_Config was kept completely backwards compatible,
although some of the functions were retrofitted with an unambiguous alternate
syntax. Both of these changes are discussed in this document.
1. Outwards Facing Changes
--------------------------------------------------------------------------------
The HTMLPurifier_Config class now takes an alternate syntax. The general rule
is:
If you passed $namespace, $directive, pass "$namespace.$directive"
instead.
An example:
$config->set('HTML', 'Allowed', 'p');
becomes:
$config->set('HTML.Allowed', 'p');
New configuration options may have more than one namespace, they might
look something like %Filter.YouTube.Blacklist. While you could technically
set it with ('HTML', 'YouTube.Blacklist'), the logical extension
('HTML', 'YouTube', 'Blacklist') does not work.
The old API will still work, but will emit E_USER_NOTICEs.
2. Internal API Changes
--------------------------------------------------------------------------------
Some overarching notes: we've completely eliminated the notion of namespace;
it's now an informal construct for organizing related configuration directives.
Also, the validation routines for keys (formerly "$namespace.$directive")
have been completely relaxed. I don't think it really should be necessary.
2.1 HTMLPurifier_ConfigSchema
First off, if you're interfacing with this class, you really shouldn't.
HTMLPurifier_ConfigSchema_Builder_ConfigSchema is really the only class that
should ever be creating HTMLPurifier_ConfigSchema, and HTMLPurifier_Config the
only class that should be reading it.
All namespace related methods were removed; they are completely unnecessary
now. Any $namespace, $name arguments must be replaced with $key (where
$key == "$namespace.$name"), including for addAlias().
The $info and $defaults member variables are no longer indexed as
[$namespace][$name]; they are now indexed as ["$namespace.$name"].
All deprecated methods were finally removed, after having yelled at you as
an E_USER_NOTICE for a while now.
2.2 HTMLPurifier_ConfigSchema_Interchange
Member variable $namespaces was removed.
2.3 HTMLPurifier_ConfigSchema_Interchange_Id
Member variable $namespace and $directive removed; member variable $key added.
Any method that took $namespace, $directive now takes $key.
2.4 HTMLPurifier_ConfigSchema_Interchange_Namespace
Removed.
vim: et sw=4 sts=4

164
docs/dev-config-naming.txt Normal file
View File

@@ -0,0 +1,164 @@
Configuration naming
HTML Purifier 4.0.0 features a new configuration naming system that
allows arbitrary nesting of namespaces. While there are certain cases
in which using two namespaces is obviously better (the canonical example
is where we were using AutoFormatParam to contain directives for AutoFormat
parameters), it is unclear whether or not a general migration to highly
namespaced directives is a good idea or not.
== Case studies ==
=== Attr.* ===
We have a dead duck HTML.Attr.Name.UseCDATA which migrated before we decided
to think this out thoroughly.
We currently have a large number of directives in the Attr.* namespace.
These directives tweak the behavior of some HTML attributes. They have
the properties:
* While they apply to only one attribute at a time, the attribute can
span over multiple elements (not necessarily all attributes, either).
The information of which elements it impacts is either omitted or
informally stated (EnableID applies to all elements, DefaultImageAlt
applies to <img> tags, AllowedRev doesn't say but only applies to a tags).
* There is a certain degree of clustering that could be applied, especially
to the ID directives. The clustering could be done with respect to
what element/attribute was used, i.e.
*.id -> EnableID, IDBlacklistRegexp, IDBlacklist, IDPrefixLocal, IDPrefix
img.src -> DefaultInvalidImage
img.alt -> DefaultImageAlt, DefaultInvalidImageAlt
bdo.dir -> DefaultTextDir
a.rel -> AllowedRel
a.rev -> AllowedRev
a.target -> AllowedFrameTargets
a.name -> Name.UseCDATA
* The directives often reference generic attribute types that were specified
in the DTD/specification. However, some of the behavior specifically relies
on the fact that other use cases of the attribute are not, at current,
supported by HTML Purifier.
AllowedRel, AllowedRev -> heavily <a> specific; if <link> ends up being
allowed, we will also have to give users specificity there (we also
want to preserve generality) DTD %Linktypes, HTML5 distinguishes
between <link> and <a>/<area>
AllowedFrameTargets -> heavily <a> specific, but also used by <area>
and <form>. Transitional DTD %FrameTarget, not present in strict,
HTML5 calls them "browsing contexts"
Default*Image* -> as a default parameter, is almost entirely exlcusive
to <img>
EnableID -> global attribute
Name.UseCDATA -> heavily <a> specific, but has heavy other usage by
many things
== AutoFormat.* ==
These have the fairly normal pluggable architecture that lends itself to
large amounts of namespaces (pluggability may be the key to figuring
out when gratuitous namespacing is good.) Properties:
* Boolean directives are fair game for being namespaced: for example,
RemoveEmpty.RemoveNbsp triggers RemoveEmpty.RemoveNbsp.Exceptions,
the latter of which only makes sense when RemoveEmpty.RemoveNbsp
is set to true. (The same applies to RemoveNbsp too)
The AutoFormat string is a bit long, but is the only bit of repeated
context.
== Core.* ==
Core is the potpourri of directives, mostly regarding some minor behavioral
tweaks for HTML handling abilities.
AggressivelyFixLt
ConvertDocumentToFragment
DirectLexLineNumberSyncInterval
LexerImpl
MaintainLineNumbers
Lexer
CollectErrors
Language
Error handling (Language is ostensibly a little more general, but
it's only used for error handling right now)
ColorKeywords
CSS and HTML
Encoding
EscapeNonASCIICharacters
Character encoding
EscapeInvalidChildren
EscapeInvalidTags
HiddenElements
RemoveInvalidImg
Lexing/Output
RemoveScriptContents
Deprecated
== HTML.* ==
AllowedAttributes
AllowedElements
AllowedModules
Allowed
ForbiddenAttributes
ForbiddenElements
Element set tuning
BlockWrapper
Child def advanced twiddle
CoreModules
CustomDoctype
Advanced HTMLModuleManager twiddles
DefinitionID
DefinitionRev
Caching
Doctype
Parent
Strict
XHTML
Global environment
MaxImgLength
Attribute twiddle? (applies to two attributes)
Proprietary
SafeEmbed
SafeObject
Trusted
Extra functionality/tagsets
TidyAdd
TidyLevel
TidyRemove
Tidy
== Output.* ==
These directly affect the output of Generator. These are all advanced
twiddles.
== URI.* ==
AllowedSchemes
OverrideAllowedSchemes
Scheme tuning
Base
DefaultScheme
Host
Global environment
DefinitionID
DefinitionRev
Caching
DisableExternalResources
DisableExternal
DisableResources
Disable
Contextual/authority tuning
HostBlacklist
Authority tuning
MakeAbsolute
MungeResources
MungeSecretKey
Munge
Transformation behavior (munge can be grouped)

View File

@@ -114,7 +114,7 @@ Test.Example</pre>
</tr> </tr>
<tr> <tr>
<td>VALUE-ALIASES</td> <td>VALUE-ALIASES</td>
<td>'baz' => 'bar'</td> <td>'baz' =&gt; 'bar'</td>
<td><em>Optional</em>. Mapping of one value to another, and <td><em>Optional</em>. Mapping of one value to another, and
should be a comma separated list of keypair duples. This should be a comma separated list of keypair duples. This
is only allowed string, istring, text and itext TYPEs.</td> is only allowed string, istring, text and itext TYPEs.</td>
@@ -213,7 +213,7 @@ Test.Example</pre>
</tr> </tr>
<tr> <tr>
<td>lookup</td> <td>lookup</td>
<td>array('key' => true)</td> <td>array('key' =&gt; true)</td>
<td>Lookup array, used with <code>isset($var[$key])</code></td> <td>Lookup array, used with <code>isset($var[$key])</code></td>
</tr> </tr>
<tr> <tr>
@@ -223,7 +223,7 @@ Test.Example</pre>
</tr> </tr>
<tr> <tr>
<td>hash</td> <td>hash</td>
<td>array('key' => 'val')</td> <td>array('key' =&gt; 'val')</td>
<td>Associative array of keys to values</td> <td>Associative array of keys to values</td>
</tr> </tr>
<tr> <tr>
@@ -267,6 +267,41 @@ Test.Example</pre>
If you ever make changes to your configuration directives, you If you ever make changes to your configuration directives, you
will need to run this script again. will need to run this script again.
</p> </p>
<h2>Adding in-house schema definitions</h2>
<p>
Placing stuff directly in HTML Purifier's source tree is generally not a
good idea, so HTML Purifier 4.0.0+ has some facilities in place to make your
life easier.
</p>
<p>
The first is to pass an extra parameter to <code>maintenance/generate-schema-cache.php</code>
with the location of your directory (relative or absolute path will do). For example,
if I'm storing my custom definitions in <em>/var/htmlpurifier/myschema</em>, run:
<code>php maintenance/generate-schema-cache.php /var/htmlpurifier/myschema</code>.
</p>
<p>
Alternatively, you can create a small loader PHP file in the HTML Purifier base
directory named <code>config-schema.php</code> (this is the same directory
you would place a <code>test-settings.php</code> file). In this file, add
the following line for each directory you want to load:
</p>
<pre>$builder-&gt;buildDir($interchange, '/var/htmlpurifier/myschema');</pre>
<p>You can even load a single file using:</p>
<pre>$builder-&gt;buildFile($interchange, '/var/htmlpurifier/myschema/MyApp.Directive.txt');</pre>
<p>Storing custom definitions that you don't plan on sending back upstream in
a separate directory is <em>definitely</em> a good idea! Additionally, picking
a good namespace can go a long way to saving you grief if you want to use
someone else's change, but they picked the same name, or if HTML Purifier
decides to add support for a configuration directive that has the same name.</p>
<!-- TODO: how to name directives that rely on naming conventions -->
<h2>Errors</h2> <h2>Errors</h2>
@@ -373,4 +408,5 @@ Test.Example</pre>
</body> </body>
</html> </html>
<!-- vim: et sw=4 sts=4 --> <!-- vim: et sw=4 sts=4
-->

View File

@@ -64,4 +64,5 @@
</body></html> </body></html>
<!-- vim: et sw=4 sts=4 --> <!-- vim: et sw=4 sts=4
-->

View File

@@ -79,4 +79,5 @@ help you find the correct functionality more quickly. Here they are:</p>
</body></html> </body></html>
<!-- vim: et sw=4 sts=4 --> <!-- vim: et sw=4 sts=4
-->

View File

@@ -29,4 +29,5 @@ that itch, put it here!</p>
</body></html> </body></html>
<!-- vim: et sw=4 sts=4 --> <!-- vim: et sw=4 sts=4
-->

View File

@@ -255,7 +255,7 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
<tr class="feature"><td>axis</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr> <tr class="feature"><td>axis</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
<tr class="feature"><td>char</td><td>COL, COLGROUP, TBODY, TD, TFOOT, TH, THEAD, TR</td><td>W3C only: No browser implementation</td></tr> <tr class="feature"><td>char</td><td>COL, COLGROUP, TBODY, TD, TFOOT, TH, THEAD, TR</td><td>W3C only: No browser implementation</td></tr>
<tr class="feature"><td>headers</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr> <tr class="feature"><td>headers</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
<tr class="feature"><td>scope</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr> <tr class="impl-yes"><td>scope</td><td>TD, TH</td><td>W3C only: No browser implementation</td></tr>
</tbody> </tbody>
<tbody class="impl-yes"> <tbody class="impl-yes">
@@ -305,4 +305,5 @@ Mozilla on inside and needs -moz-outline, no IE support.</td></tr>
</body></html> </body></html>
<!-- vim: et sw=4 sts=4 --> <!-- vim: et sw=4 sts=4
-->

View File

@@ -18,12 +18,11 @@
<div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div> <div id="home"><a href="http://htmlpurifier.org/">HTML Purifier</a> End-User Documentation</div>
<p> <p>
You may have heard of the <a href="dev-advanced-api.html">Advanced API</a>. HTML Purifier has this quirk where if you try to allow certain elements or
If you're interested in reading dry prose and boring functional attributes, HTML Purifier will tell you that it's not supported, and that
specifications, feel free to click that link to get a no-nonsense overview you should go to the forums to find out how to implement it. Well, this
on the Advanced API. For the rest of us, there's this tutorial. By the time document is how to implement elements and attributes which HTML Purifier
you're finished reading this, you should have a pretty good idea on doesn't support out of the box.
how to implement custom tags and attributes that HTML Purifier may not have.
</p> </p>
<h2>Is it necessary?</h2> <h2>Is it necessary?</h2>
@@ -84,17 +83,6 @@
limited to translations) above or below other corresponding text. limited to translations) above or below other corresponding text.
</p> </p>
<h3>XHTML 2.0</h3>
<p>
<a href="http://www.w3.org/TR/xhtml2/">XHTML 2.0</a> is still a
working draft, so any elements introduced in the
specification have not been implemented and will not be implemented
until we get a recommendation or proposal. Because XHTML 2.0 is
an entirely new markup language, implementing rules for it will be
no easy task.
</p>
<h3>HTML 5</h3> <h3>HTML 5</h3>
<p> <p>
@@ -156,9 +144,11 @@
</p> </p>
<pre>$config = HTMLPurifier_Config::createDefault(); <pre>$config = HTMLPurifier_Config::createDefault();
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial'); $config-&gt;set('HTML.DefinitionID', 'enduser-customize.html tutorial');
$config->set('HTML', 'DefinitionRev', 1); $config-&gt;set('HTML.DefinitionRev', 1);
$def = $config->getHTMLDefinition(true);</pre> if ($def = $config-&gt;maybeGetRawHTMLDefinition()) {
// our code will go here
}</pre>
<p> <p>
Assuming that HTML Purifier has already been properly loaded (hint: Assuming that HTML Purifier has already been properly loaded (hint:
@@ -186,23 +176,15 @@ $def = $config->getHTMLDefinition(true);</pre>
</li> </li>
<li> <li>
The fourth line retrieves a raw <code>HTMLPurifier_HTMLDefinition</code> The fourth line retrieves a raw <code>HTMLPurifier_HTMLDefinition</code>
object that we will be tweaking. If the parameter was removed, we object that we will be tweaking. Interestingly enough, we have
would be retrieving a fully formed definition object, which is somewhat placed it in an if block: this is because
useless for customization purposes. <code>maybeGetRawHTMLDefinition</code>, as its name suggests, may
return a NULL, in which case we should skip doing any
initialization. This, in fact, will correspond to when our fully
customized object is already in the cache.
</li> </li>
</ul> </ul>
<h3>Broken backwards-compatibility</h3>
<p>
Those of you who have already been twiddling around with the raw
HTML definition object, you'll be noticing that you're getting an error
when you attempt to retrieve the raw definition object without specifying
a DefinitionID. It is vital to caching (see below) that you make a unique
name for your customized definition, so make up something right now and
things will operate again.
</p>
<h2>Turn off caching</h2> <h2>Turn off caching</h2>
<p> <p>
@@ -211,10 +193,10 @@ $def = $config->getHTMLDefinition(true);</pre>
</p> </p>
<pre>$config = HTMLPurifier_Config::createDefault(); <pre>$config = HTMLPurifier_Config::createDefault();
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial'); $config-&gt;set('HTML.DefinitionID', 'enduser-customize.html tutorial');
$config->set('HTML', 'DefinitionRev', 1); $config-&gt;set('HTML.DefinitionRev', 1);
<strong>$config->set('Cache', 'DefinitionImpl', null); // remove this later!</strong> <strong>$config-&gt;set('Cache.DefinitionImpl', null); // TODO: remove this later!</strong>
$def = $config->getHTMLDefinition(true);</pre> $def = $config-&gt;getHTMLDefinition(true);</pre>
<p> <p>
A few things should be mentioned about the caching mechanism before A few things should be mentioned about the caching mechanism before
@@ -267,10 +249,10 @@ $def = $config->getHTMLDefinition(true);</pre>
</p> </p>
<pre>$config = HTMLPurifier_Config::createDefault(); <pre>$config = HTMLPurifier_Config::createDefault();
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial'); $config-&gt;set('HTML.DefinitionID', 'enduser-customize.html tutorial');
$config->set('HTML', 'DefinitionRev', 1); $config-&gt;set('HTML.DefinitionRev', 1);
$config->set('Cache', 'DefinitionImpl', null); // remove this later! $config-&gt;set('Cache.DefinitionImpl', null); // remove this later!
$def = $config->getHTMLDefinition(true); $def = $config-&gt;getHTMLDefinition(true);
<strong>$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');</strong></pre> <strong>$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');</strong></pre>
<p> <p>
@@ -385,11 +367,11 @@ $def = $config->getHTMLDefinition(true);
</p> </p>
<pre>$config = HTMLPurifier_Config::createDefault(); <pre>$config = HTMLPurifier_Config::createDefault();
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial'); $config-&gt;set('HTML.DefinitionID', 'enduser-customize.html tutorial');
$config->set('HTML', 'DefinitionRev', 1); $config-&gt;set('HTML.DefinitionRev', 1);
$config->set('Cache', 'DefinitionImpl', null); // remove this later! $config-&gt;set('Cache.DefinitionImpl', null); // remove this later!
$def = $config->getHTMLDefinition(true); $def = $config-&gt;getHTMLDefinition(true);
<strong>$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum( <strong>$def-&gt;addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
array('_blank','_self','_target','_top') array('_blank','_self','_target','_top')
));</strong></pre> ));</strong></pre>
@@ -724,7 +706,7 @@ $def = $config->getHTMLDefinition(true);
or more flow elements, but no nested <code>form</code>s</strong></li> or more flow elements, but no nested <code>form</code>s</strong></li>
<li>What attributes does the element allow that are general? <strong>Common</strong></li> <li>What attributes does the element allow that are general? <strong>Common</strong></li>
<li>What attributes does the element allow that are specific to this element? <strong>A whole bunch, see ATTLIST; <li>What attributes does the element allow that are specific to this element? <strong>A whole bunch, see ATTLIST;
we're going to the vital ones: <code>action</code>, <code>method</code> and <code>name</code></strong></li> we're going to do the vital ones: <code>action</code>, <code>method</code> and <code>name</code></strong></li>
</ol> </ol>
<p> <p>
@@ -732,14 +714,14 @@ $def = $config->getHTMLDefinition(true);
</p> </p>
<pre>$config = HTMLPurifier_Config::createDefault(); <pre>$config = HTMLPurifier_Config::createDefault();
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial'); $config-&gt;set('HTML.DefinitionID', 'enduser-customize.html tutorial');
$config->set('HTML', 'DefinitionRev', 1); $config-&gt;set('HTML.DefinitionRev', 1);
$config->set('Cache', 'DefinitionImpl', null); // remove this later! $config-&gt;set('Cache.DefinitionImpl', null); // remove this later!
$def = $config->getHTMLDefinition(true); $def = $config-&gt;getHTMLDefinition(true);
$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum( $def-&gt;addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
array('_blank','_self','_target','_top') array('_blank','_self','_target','_top')
)); ));
<strong>$form = $def->addElement( <strong>$form = $def-&gt;addElement(
'form', // name 'form', // name
'Block', // content set 'Block', // content set
'Flow', // allowed children 'Flow', // allowed children
@@ -750,7 +732,7 @@ $def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
'name' => 'ID' 'name' => 'ID'
) )
); );
$form->excludes = array('form' => true);</strong></pre> $form-&gt;excludes = array('form' => true);</strong></pre>
<p> <p>
Each of the parameters corresponds to one of the questions we asked. Each of the parameters corresponds to one of the questions we asked.
@@ -793,6 +775,76 @@ $form->excludes = array('form' => true);</strong></pre>
<li><a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ElementDef.php"><code>library/HTMLPurifier/ElementDef.php</code></a></li> <li><a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/ElementDef.php"><code>library/HTMLPurifier/ElementDef.php</code></a></li>
</ul> </ul>
<h2 id="optimized">Notes for HTML Purifier 4.2.0 and earlier</h3>
<p>
Previously, this tutorial gave some incorrect template code for
editing raw definitions, and that template code will now produce the
error <q>Due to a documentation error in previous version of HTML
Purifier...</q> Here is how to mechanically transform old-style
code into new-style code.
</p>
<p>
First, identify all code that edits the raw definition object, and
put it together. Ensure none of this code must be run on every
request; if some sub-part needs to always be run, move it outside
this block. Here is an example below, with the raw definition
object code bolded.
</p>
<pre>$config = HTMLPurifier_Config::createDefault();
$config-&gt;set('HTML.DefinitionID', 'enduser-customize.html tutorial');
$config-&gt;set('HTML.DefinitionRev', 1);
$def = $config-&gt;getHTMLDefinition(true);
<strong>$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');</strong>
$purifier = new HTMLPurifier($config);</pre>
<p>
Next, replace the raw definition retrieval with a
maybeGetRawHTMLDefinition method call inside an if conditional, and
place the editing code inside that if block.
</p>
<pre>$config = HTMLPurifier_Config::createDefault();
$config-&gt;set('HTML.DefinitionID', 'enduser-customize.html tutorial');
$config-&gt;set('HTML.DefinitionRev', 1);
<strong>if ($def = $config-&gt;maybeGetRawHTMLDefinition()) {
$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');
}</strong>
$purifier = new HTMLPurifier($config);</pre>
<p>
And you're done! Alternatively, if you're OK with not ever caching
your code, the following will still work and not emit warnings.
</p>
<pre>$config = HTMLPurifier_Config::createDefault();
$def = $config-&gt;getHTMLDefinition(true);
$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');
$purifier = new HTMLPurifier($config);</pre>
<p>
A slightly less efficient version of this was what was going on with
old versions of HTML Purifier.
</p>
<p>
<em>Technical notes:</em> ajh pointed out on <a
href="http://htmlpurifier.org/phorum/read.php?5,5164,5169#msg-5169">in a forum topic</a> that
HTML Purifier appeared to be repeatedly writing to the cache even
when a cache entry already existed. Investigation lead to the
discovery of the following infelicity: caching of customized
definitions didn't actually work! The problem was that even though
a cache file would be written out at the end of the process, there
was no way for HTML Purifier to say, <q>Actually, I've already got a
copy of your work, no need to reconfigure your
customizations</q>. This required the API to change: placing
all of the customizations to the raw definition object in a
conditional which could be skipped.
</p>
</body></html> </body></html>
<!-- vim: et sw=4 sts=4 --> <!-- vim: et sw=4 sts=4
-->

View File

@@ -31,7 +31,7 @@ by default.</p>
<p>IDs, however, are quite useful functionality to have, so if users start <p>IDs, however, are quite useful functionality to have, so if users start
complaining about broken anchors you'll probably want to turn them back on complaining about broken anchors you'll probably want to turn them back on
with %HTML.EnableAttrID. But before you go mucking around with the config with %Attr.EnableID. But before you go mucking around with the config
object, it's probably worth to take some precautions to keep your page object, it's probably worth to take some precautions to keep your page
validating. Why?</p> validating. Why?</p>
@@ -56,8 +56,8 @@ validating. Why?</p>
deal with the most obvious solution: preventing users from using any IDs that deal with the most obvious solution: preventing users from using any IDs that
appear elsewhere on the document. The method is simple:</p> appear elsewhere on the document. The method is simple:</p>
<pre>$config->set('HTML', 'EnableAttrID', true); <pre>$config-&gt;set('Attr.EnableID', true);
$config->set('Attr', 'IDBlacklist' array( $config-&gt;set('Attr.IDBlacklist' array(
'list', 'of', 'attribute', 'values', 'that', 'are', 'forbidden' 'list', 'of', 'attribute', 'values', 'that', 'are', 'forbidden'
));</pre> ));</pre>
@@ -88,8 +88,8 @@ all, they might have simply specified a duplicate ID by accident.</p>
<p>This method, too, is quite simple: add a prefix to all user IDs. With this <p>This method, too, is quite simple: add a prefix to all user IDs. With this
code:</p> code:</p>
<pre>$config->set('HTML', 'EnableAttrID', true); <pre>$config-&gt;set('Attr.EnableID', true);
$config->set('Attr', 'IDPrefix', 'user_');</pre> $config-&gt;set('Attr.IDPrefix', 'user_');</pre>
<p>...this:</p> <p>...this:</p>
@@ -109,7 +109,7 @@ user_ to the beginning.&quot;</p>
nothing about multiple HTML Purifier outputs on one page. Thus, we have nothing about multiple HTML Purifier outputs on one page. Thus, we have
a second configuration value to piggy-back off of: %Attr.IDPrefixLocal:</p> a second configuration value to piggy-back off of: %Attr.IDPrefixLocal:</p>
<pre>$config->set('Attr', 'IDPrefixLocal', 'comment' . $id . '_');</pre> <pre>$config-&gt;set('Attr.IDPrefixLocal', 'comment' . $id . '_');</pre>
<p>This new attributes does nothing but append on to regular IDPrefix, but is <p>This new attributes does nothing but append on to regular IDPrefix, but is
special in that it is volatile: it's value is determined at run-time and special in that it is volatile: it's value is determined at run-time and
@@ -137,11 +137,12 @@ anchors is beyond me.</p>
<p>To revert back to pre-1.2.0 behavior, simply:</p> <p>To revert back to pre-1.2.0 behavior, simply:</p>
<pre>$config->set('HTML', 'EnableAttrID', true);</pre> <pre>$config-&gt;set('Attr.EnableID', true);</pre>
<p>Don't come crying to me when your page mysteriously stops validating, though.</p> <p>Don't come crying to me when your page mysteriously stops validating, though.</p>
</body> </body>
</html> </html>
<!-- vim: et sw=4 sts=4 --> <!-- vim: et sw=4 sts=4
-->

View File

@@ -116,4 +116,5 @@ if you decide to do that! Especially if you port HTML Purifier to C++.
</body> </body>
</html> </html>
<!-- vim: et sw=4 sts=4 --> <!-- vim: et sw=4 sts=4
-->

View File

@@ -76,7 +76,7 @@ associated with it, although it may change depending on your doctype.</p>
change the level of cleaning by setting the %HTML.TidyLevel configuration change the level of cleaning by setting the %HTML.TidyLevel configuration
directive:</p> directive:</p>
<pre>$config->set('HTML', 'TidyLevel', 'heavy'); // burn baby burn!</pre> <pre>$config-&gt;set('HTML.TidyLevel', 'heavy'); // burn baby burn!</pre>
<h2>Is the light level really light?</h2> <h2>Is the light level really light?</h2>
@@ -165,17 +165,17 @@ smoketest</a>.</p>
so happy about the br@clear implementation. That's perfectly fine! so happy about the br@clear implementation. That's perfectly fine!
HTML Purifier will make accomodations:</p> HTML Purifier will make accomodations:</p>
<pre>$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional'); <pre>$config-&gt;set('HTML.Doctype', 'XHTML 1.0 Transitional');
$config->set('HTML', 'TidyLevel', 'heavy'); // all changes, minus... $config-&gt;set('HTML.TidyLevel', 'heavy'); // all changes, minus...
<strong>$config->set('HTML', 'TidyRemove', 'br@clear');</strong></pre> <strong>$config-&gt;set('HTML.TidyRemove', 'br@clear');</strong></pre>
<p>That third line does the magic, removing the br@clear fix <p>That third line does the magic, removing the br@clear fix
from the module, ensuring that <code>&lt;br clear="both" /&gt;</code> from the module, ensuring that <code>&lt;br clear="both" /&gt;</code>
will pass through unharmed. The reverse is possible too:</p> will pass through unharmed. The reverse is possible too:</p>
<pre>$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional'); <pre>$config-&gt;set('HTML.Doctype', 'XHTML 1.0 Transitional');
$config->set('HTML', 'TidyLevel', 'none'); // no changes, plus... $config-&gt;set('HTML.TidyLevel', 'none'); // no changes, plus...
<strong>$config->set('HTML', 'TidyAdd', 'p@align');</strong></pre> <strong>$config-&gt;set('HTML.TidyAdd', 'p@align');</strong></pre>
<p>In this case, all transformations are shut off, except for the p@align <p>In this case, all transformations are shut off, except for the p@align
one, which you found handy.</p> one, which you found handy.</p>
@@ -227,4 +227,5 @@ effectively in the background.</p>
</body></html> </body></html>
<!-- vim: et sw=4 sts=4 --> <!-- vim: et sw=4 sts=4
-->

View File

@@ -160,27 +160,14 @@
</p> </p>
<pre>$uri = $config->getDefinition('URI'); <pre>$uri = $config->getDefinition('URI');
$uri->addFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>());</pre> $uri->addFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>(), $config);</pre>
<p> <p>
If you want to be really fancy, you can define a configuration directive After adding a filter, you won't be able to set configuration directives.
for your filter and have HTML Purifier automatically manage whether or Structure your code accordingly.
not your filter gets loaded or not (this is how internal filters manage
things):
</p> </p>
<pre>HTMLPurifier_ConfigSchema::define( <!-- XXX: link to new documentation system -->
'URI', '<strong>NameOfFilter</strong>', false, 'bool',
'<strong>What your filter does.</strong>'
);
$uri = $config->getDefinition('URI', true);
$uri->registerFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>());
</pre>
<p>
Now, your filter will only be called when %URI.<strong>NameOfFilter</strong>
is set to true.
</p>
<h2>Post-filter</h2> <h2>Post-filter</h2>
@@ -213,4 +200,5 @@ $uri->registerFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>())
</body></html> </body></html>
<!-- vim: et sw=4 sts=4 --> <!-- vim: et sw=4 sts=4
-->

View File

@@ -561,7 +561,7 @@ page on special characters</a> for more details.
<h3 id="whyutf8-forms">Forms</h3> <h3 id="whyutf8-forms">Forms</h3>
<p>While we're on the tack of users, how do non-UTF-8 web forms deal <p>While we're on the tack of users, how do non-UTF-8 web forms deal
with characters that our outside of their character set? Rather than with characters that are outside of their character set? Rather than
discuss what UTF-8 does right, we're going to show what could go wrong discuss what UTF-8 does right, we're going to show what could go wrong
if you didn't use UTF-8 and people tried to use characters outside if you didn't use UTF-8 and people tried to use characters outside
of your character encoding.</p> of your character encoding.</p>
@@ -1056,4 +1056,5 @@ a more in-depth look into character sets and encodings.</p>
</body> </body>
</html> </html>
<!-- vim: et sw=4 sts=4 --> <!-- vim: et sw=4 sts=4
-->

View File

@@ -67,7 +67,7 @@ into your documents. YouTube's code goes like this:</p>
</ol> </ol>
<p>What point 2 means is that if we have code like <code>&lt;span <p>What point 2 means is that if we have code like <code>&lt;span
class=&quot;embed-youtube&quot;&gt;AyPzM5WK8ys&lt;/span&gt;</code> your class=&quot;youtube-embed&quot;&gt;AyPzM5WK8ys&lt;/span&gt;</code> your
application can reconstruct the full object from this small snippet that application can reconstruct the full object from this small snippet that
passes through HTML Purifier <em>unharmed</em>. passes through HTML Purifier <em>unharmed</em>.
<a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/Filter/YouTube.php">Show me the code!</a></p> <a href="http://repo.or.cz/w/htmlpurifier.git?a=blob;hb=HEAD;f=library/HTMLPurifier/Filter/YouTube.php">Show me the code!</a></p>
@@ -75,7 +75,7 @@ passes through HTML Purifier <em>unharmed</em>.
<p>And the corresponding usage:</p> <p>And the corresponding usage:</p>
<pre>&lt;?php <pre>&lt;?php
$config->set('Filter', 'YouTube', true); $config-&gt;set('Filter.YouTube', true);
?&gt;</pre> ?&gt;</pre>
<p>There is a bit going in the two code snippets, so let's explain.</p> <p>There is a bit going in the two code snippets, so let's explain.</p>
@@ -149,4 +149,5 @@ with the core!</p>
</body> </body>
</html> </html>
<!-- vim: et sw=4 sts=4 --> <!-- vim: et sw=4 sts=4
-->

View File

@@ -8,8 +8,8 @@ require_once '../../library/HTMLPurifier.auto.php';
$config = HTMLPurifier_Config::createDefault(); $config = HTMLPurifier_Config::createDefault();
// configuration goes here: // configuration goes here:
$config->set('Core', 'Encoding', 'UTF-8'); // replace with your encoding $config->set('Core.Encoding', 'UTF-8'); // replace with your encoding
$config->set('HTML', 'Doctype', 'XHTML 1.0 Transitional'); // replace with your doctype $config->set('HTML.Doctype', 'XHTML 1.0 Transitional'); // replace with your doctype
$purifier = new HTMLPurifier($config); $purifier = new HTMLPurifier($config);

View File

@@ -5,4 +5,5 @@ function init() {
} }
</script> </script>
<!-- vim: et sw=4 sts=4 --> <!-- vim: et sw=4 sts=4
-->

View File

@@ -117,6 +117,12 @@ the code. They may be upgraded to HTML files or stay as TXT scratchpads.</p>
<td>Common security issues that may still arise (half-baked).</td> <td>Common security issues that may still arise (half-baked).</td>
</tr> </tr>
<tr>
<td>Development</td>
<td><a href="dev-config-bcbreaks.txt">Config BC Breaks</a></td>
<td>Backwards-incompatible changes in HTML Purifier 4.0.0</td>
</tr>
<tr> <tr>
<td>Development</td> <td>Development</td>
<td><a href="dev-code-quality.txt">Code Quality Issues</a></td> <td><a href="dev-code-quality.txt">Code Quality Issues</a></td>
@@ -178,4 +184,5 @@ the code. They may be upgraded to HTML files or stay as TXT scratchpads.</p>
</body> </body>
</html> </html>
<!-- vim: et sw=4 sts=4 --> <!-- vim: et sw=4 sts=4
-->

View File

@@ -45,4 +45,5 @@ something like that?</li>
</body> </body>
</html> </html>
<!-- vim: et sw=4 sts=4 --> <!-- vim: et sw=4 sts=4
-->

218
docs/proposal-plists.txt Normal file
View File

@@ -0,0 +1,218 @@
THE UNIVERSAL DESIGN PATTERN: PROPERTIES
Steve Yegge
Implementation:
get(name)
put(name, value)
has(name)
remove(name)
iteration, with filtering [this will be our namespaces]
parent
Representations:
- Keys are strings
- It's nice to not need to quote keys (if we formulate our own language,
consider this)
- Property not present representation (key missing)
- Frequent removal/re-add may have null help. If null is valid, use
another value. (PHP semantics are weird here)
Data structures:
- LinkedHashMap is wonderful (O(1) access and maintains order)
- Using a special property that points to the parent is usual
- Multiple inheritance possible, need rules for which to lookup first
- Iterative inheritance is best
- Consider performance!
Deletion
- Tricky problem with inheritance
- Distinguish between "not found" and "look in my parent for the property"
[Maybe HTML Purifier won't allow deletion]
Read/write asymmetry (it's correct!)
Read-only plists
- Allow ability to freeze [this is what we have already]
- Don't overuse it
Performance:
- Intern strings (PHP does this already)
- Don't be case-insensitive
- If all properties in a plist are known a-priori, you can use a "perfect"
hash function. Often overkill.
- Copy-on-read caching "plundering" reduces lookup, but uses memory and can
grow stale. Use as last resort.
- Refactoring to fields. Watch for API compatibility, system complexity,
and lack of flexibility.
- Refrigerator: external data-structure to hold plists
Transient properties:
[Don't need to worry about this]
- Use a separate plist for transient properties
- Non-numeric override; numeric should ADD
- Deletion: removeTransientProperty() and transientlyRemoveProperty()
Persistence:
- XML/JSON are good
- Text-based is good for readability, maintainability and bootstrapping
- Compressed binary format for network transport [not necessary]
- RDBMS or XML database
Querying: [not relevant]
- XML database is nice for XPath/XQuery
- jQuery for JSON
- Just load it all into a program
Backfills/Data integrity:
- Use usual methods
- Lazy backfill is a nice hack
Type systems:
- Flags: ReadOnly, Permanent, DontEnum
- Typed properties isn't that useful [It's also Not-PHP]
- Seperate meta-list of directive properties IS useful
- Duck typing is useful for systems designed fully around properties pattern
Trade-off:
+ Flexibility
+ Extensibility
+ Unit-testing/prototype-speed
- Performance
- Data integrity
- Navagability/Query-ability
- Reversability (hard to go back)
HTML Purifier
We are not happy with our current system of defining configuration directives,
because it has become clear that things will get a lot nicer if we allow
multiple namespaces, and there are some features that naturally lend themselves
to inheritance, which we do not really support well.
One of the considered implementation changes would be to go from a structure
like:
array(
'Namespace' => array(
'Directive' => 'val1',
'Directive2' => 'val2',
)
)
to:
array(
'Namespace.Directive' => 'val1',
'Namespace.Directive2' => 'val2',
)
The below implementation takes more memory, however, and it makes it a bit
complicated to grab all values from a namespace.
The alternate implementation choice is to allow nested plists. This keeps
iteration easy, but is problematic for inheritance (it would be difficult
to distinguish a plist from an array) and retrieval (when specifying multiple
namespaces we would need some multiple de-referencing).
----
We can bite the performance hit, and just do iteration with filter
(the strncmp call should be relatively cheap). Then, users should be able
to optimize doing something like:
$config = HTMLPurifier_Config::createDefault();
if (!file_exists('config.php')) {
// set up $config
$config->save('config.php');
} else {
$config->load('config.php');
}
Or maybe memcache, or something. This means that "// set up $config" must
not have any dynamic parts, or the user has to invalidate the cache when
they do update it. We have to think about this a little more carefully; the
file call might be more expensive.
----
This might get expensive, however, when we actually care about iterating
over the configuration and want the actual values. So what about nesting the
lists?
"ns.sub.directive" => values['ns']['sub']['directive']
We can distinguish between plists and arrays by using ArrayObjects for the
plists, and regular arrays for the arrays? Alternatively, use ArrayObjects
for the arrays, and regular arrays for the plists.
----
Implementation demands, and what has caused them:
1. DefinitionCache, the HTML, CSS and URI namespaces have caches attached to them
Results:
- getBatchSerial()
- getBatch() : in general, the ability to traverse just a namespace
2. AutoFormat/Filter, this is a plugin architecture, directives not hard-coded
- getBatch()
3. Configuration form
- Namespaces used to organize directives
Other than that, we have a pure plist. PERHAPS we should maintain separate things
for these different demands.
Issue 2: Directives for configuring the plugins are regular plists, but
when enabling them, while it's "plist-ish", what you're really doing is adding
them to an array of "autoformatters"/"filters" to enable. We can setup
magic BC as well as in the new interface, but there should also be an
add('AutoFormat', 'AutoParagraph'); which does the right thing.
One thing to consider is whether or not inheritance rules will apply to these.
I'd say yes. That means that they're still plisty, in fact, the underlying
implementation will probably be a plist. However, they will get their OWN
plists, and will NOT support nesting.
Issue 1: Our current implementation is generally not efficient; md5(serialize($foo))
is pretty expensive. So, I don't think there will be any problems if it
gets "less" efficient, as long as we give users a properly fast alternative;
DefinitionRev gives us a way to do this, by simply telling the user they must
update it whenever they update Configuration directives as well. (There are
obvious BC concerns here).
In such a case, we simply iterate over our plist (performing full retrievals
for each value), grab the entries we care about, and then serialize and hash.
It's going to be slow either way, due to the ability of plists to inherit.
If we ksort(), we don't have to traverse the entire array, however, the
cost of a ksort() call may not be worth it.
At this point, last time, I started worrying about the performance implications
of allowing inheritance, and wondering whether or not I wanted to squash
the plist. At first blush, our code might be under the assumption that
accessing properties is cheap; but actually we prefer to copy out the value
into a member variable if it's going to be used many times. With this is mind
I don't think CPU consumption from a few nested function calls is going to
be a problem. We *are* going to enforce a function only interface.
The next issue at hand is how we're going to manage the "special" plists,
which should still be able to be inherited. Basically, it means that multiple
plists would be attached to the configuration object, which is not the
best for memory performance. The alternative is to keep them all in one
big plist, and then eat the one-time cost of traversing the entire plist
to grab the appropriate values.
I think at this point we can write the generic interface, and then set up separate
plists if that ends up being necessary for performance (it probably won't.) Now
lets code our generic plist implementation.
----
Iterating over the plist presents some problems. The way we've chosen to solve
this is to squash all of the parents.
----
But I don't need iteration.
vim: et sw=4 sts=4

View File

@@ -43,4 +43,5 @@ the development of this library in these forum threads:</p>
</body> </body>
</html> </html>
<!-- vim: et sw=4 sts=4 --> <!-- vim: et sw=4 sts=4
-->

View File

@@ -163,5 +163,3 @@ div.segment {width:250px; float:left; margin-top:1em;}
</body> </body>
</html> </html>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -127,5 +127,3 @@ style='color:black'>www.example.com/disclaimer</span></a><o:p></o:p></span></p>
</body> </body>
</html> </html>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -72,5 +72,3 @@ title="Join Windows Live to share photos using Windows Live Photo E-mail.">Onlin
pictures are available for 30 days. <A style="COLOR: #0088e4" pictures are available for 30 days. <A style="COLOR: #0088e4"
href="http://g.msn.com/5meen_us/175">Get Windows Live Mail desktop to create href="http://g.msn.com/5meen_us/175">Get Windows Live Mail desktop to create
your own photo e-mails. </A></SPAN></NOBR></DIV></BODY></HTML> your own photo e-mails. </A></SPAN></NOBR></DIV></BODY></HTML>
<!-- vim: et sw=4 sts=4 -->

View File

@@ -11,7 +11,8 @@ class ConfigDoc_HTMLXSLTProcessor
*/ */
protected $xsltProcessor; protected $xsltProcessor;
public function __construct($proc = false) { public function __construct($proc = false)
{
if ($proc === false) $proc = new XSLTProcessor(); if ($proc === false) $proc = new XSLTProcessor();
$this->xsltProcessor = $proc; $this->xsltProcessor = $proc;
} }
@@ -19,7 +20,8 @@ class ConfigDoc_HTMLXSLTProcessor
/** /**
* @note Allows a string $xsl filename to be passed * @note Allows a string $xsl filename to be passed
*/ */
public function importStylesheet($xsl) { public function importStylesheet($xsl)
{
if (is_string($xsl)) { if (is_string($xsl)) {
$xsl_file = $xsl; $xsl_file = $xsl;
$xsl = new DOMDocument(); $xsl = new DOMDocument();
@@ -34,7 +36,8 @@ class ConfigDoc_HTMLXSLTProcessor
* @return string HTML output * @return string HTML output
* @todo Rename to transformToXHTML, as transformToHTML is misleading * @todo Rename to transformToXHTML, as transformToHTML is misleading
*/ */
public function transformToHTML($xml) { public function transformToHTML($xml)
{
if (is_string($xml)) { if (is_string($xml)) {
$dom = new DOMDocument(); $dom = new DOMDocument();
$dom->load($xml); $dom->load($xml);
@@ -68,7 +71,8 @@ class ConfigDoc_HTMLXSLTProcessor
* Bulk sets parameters for the XSL stylesheet * Bulk sets parameters for the XSL stylesheet
* @param array $options Associative array of options to set * @param array $options Associative array of options to set
*/ */
public function setParameters($options) { public function setParameters($options)
{
foreach ($options as $name => $value) { foreach ($options as $name => $value) {
$this->xsltProcessor->setParameter('', $name, $value); $this->xsltProcessor->setParameter('', $name, $value);
} }
@@ -77,7 +81,8 @@ class ConfigDoc_HTMLXSLTProcessor
/** /**
* Forward any other calls to the XSLT processor * Forward any other calls to the XSLT processor
*/ */
public function __call($name, $arguments) { public function __call($name, $arguments)
{
call_user_func_array(array($this->xsltProcessor, $name), $arguments); call_user_func_array(array($this->xsltProcessor, $name), $arguments);
} }

View File

@@ -15,7 +15,8 @@ class FSTools
/** /**
* Returns a global instance of FSTools * Returns a global instance of FSTools
*/ */
static public function singleton() { public static function singleton()
{
if (empty(FSTools::$singleton)) FSTools::$singleton = new FSTools(); if (empty(FSTools::$singleton)) FSTools::$singleton = new FSTools();
return FSTools::$singleton; return FSTools::$singleton;
} }
@@ -24,7 +25,8 @@ class FSTools
* Sets our global singleton to something else; useful for overloading * Sets our global singleton to something else; useful for overloading
* functions. * functions.
*/ */
static public function setSingleton($singleton) { public static function setSingleton($singleton)
{
FSTools::$singleton = $singleton; FSTools::$singleton = $singleton;
} }
@@ -33,7 +35,8 @@ class FSTools
* @param string $folder Name of folder to create * @param string $folder Name of folder to create
* @note Adapted from the PHP manual comment 76612 * @note Adapted from the PHP manual comment 76612
*/ */
public function mkdirr($folder) { public function mkdirr($folder)
{
$folders = preg_split("#[\\\\/]#", $folder); $folders = preg_split("#[\\\\/]#", $folder);
$base = ''; $base = '';
for($i = 0, $c = count($folders); $i < $c; $i++) { for($i = 0, $c = count($folders); $i < $c; $i++) {
@@ -57,7 +60,8 @@ class FSTools
* so that copied files, if PHP, have includes removed * so that copied files, if PHP, have includes removed
* @note Adapted from http://aidanlister.com/repos/v/function.copyr.php * @note Adapted from http://aidanlister.com/repos/v/function.copyr.php
*/ */
public function copyr($source, $dest) { public function copyr($source, $dest)
{
// Simple copy for a file // Simple copy for a file
if (is_file($source)) { if (is_file($source)) {
return $this->copy($source, $dest); return $this->copy($source, $dest);
@@ -92,7 +96,8 @@ class FSTools
* ignore hidden files, unreadable files, etc. This function * ignore hidden files, unreadable files, etc. This function
* applies to copyr(). * applies to copyr().
*/ */
public function copyable($file) { public function copyable($file)
{
return true; return true;
} }
@@ -131,7 +136,8 @@ class FSTools
/** /**
* Recursively globs a directory. * Recursively globs a directory.
*/ */
public function globr($dir, $pattern, $flags = NULL) { public function globr($dir, $pattern, $flags = NULL)
{
$files = $this->glob("$dir/$pattern", $flags); $files = $this->glob("$dir/$pattern", $flags);
if ($files === false) $files = array(); if ($files === false) $files = array();
$sub_dirs = $this->glob("$dir/*", GLOB_ONLYDIR); $sub_dirs = $this->glob("$dir/*", GLOB_ONLYDIR);
@@ -148,7 +154,8 @@ class FSTools
* @warning This function will not work for functions that need * @warning This function will not work for functions that need
* to pass references; manually define a stub function for those. * to pass references; manually define a stub function for those.
*/ */
public function __call($name, $args) { public function __call($name, $args)
{
return call_user_func_array($name, $args); return call_user_func_array($name, $args);
} }

View File

@@ -23,7 +23,8 @@ class FSTools_File
* Filename of file you wish to instantiate. * Filename of file you wish to instantiate.
* @note This file need not exist * @note This file need not exist
*/ */
public function __construct($name, $fs = false) { public function __construct($name, $fs = false)
{
$this->name = $name; $this->name = $name;
$this->fs = $fs ? $fs : FSTools::singleton(); $this->fs = $fs ? $fs : FSTools::singleton();
} }
@@ -38,27 +39,32 @@ class FSTools_File
* Retrieves the contents of a file * Retrieves the contents of a file
* @todo Throw an exception if file doesn't exist * @todo Throw an exception if file doesn't exist
*/ */
public function get() { public function get()
{
return $this->fs->file_get_contents($this->name); return $this->fs->file_get_contents($this->name);
} }
/** Writes contents to a file, creates new file if necessary */ /** Writes contents to a file, creates new file if necessary */
public function write($contents) { public function write($contents)
{
return $this->fs->file_put_contents($this->name, $contents); return $this->fs->file_put_contents($this->name, $contents);
} }
/** Deletes the file */ /** Deletes the file */
public function delete() { public function delete()
{
return $this->fs->unlink($this->name); return $this->fs->unlink($this->name);
} }
/** Returns true if file exists and is a file. */ /** Returns true if file exists and is a file. */
public function exists() { public function exists()
{
return $this->fs->is_file($this->name); return $this->fs->is_file($this->name);
} }
/** Returns last file modification time */ /** Returns last file modification time */
public function getMTime() { public function getMTime()
{
return $this->fs->filemtime($this->name); return $this->fs->filemtime($this->name);
} }
@@ -67,19 +73,22 @@ class FSTools_File
* @note We ignore errors because of some weird owner trickery due * @note We ignore errors because of some weird owner trickery due
* to SVN duality * to SVN duality
*/ */
public function chmod($octal_code) { public function chmod($octal_code)
{
return @$this->fs->chmod($this->name, $octal_code); return @$this->fs->chmod($this->name, $octal_code);
} }
/** Opens file's handle */ /** Opens file's handle */
public function open($mode) { public function open($mode)
{
if ($this->handle) $this->close(); if ($this->handle) $this->close();
$this->handle = $this->fs->fopen($this->name, $mode); $this->handle = $this->fs->fopen($this->name, $mode);
return true; return true;
} }
/** Closes file's handle */ /** Closes file's handle */
public function close() { public function close()
{
if (!$this->handle) return false; if (!$this->handle) return false;
$status = $this->fs->fclose($this->handle); $status = $this->fs->fclose($this->handle);
$this->handle = false; $this->handle = false;
@@ -87,37 +96,43 @@ class FSTools_File
} }
/** Retrieves a line from an open file, with optional max length $length */ /** Retrieves a line from an open file, with optional max length $length */
public function getLine($length = null) { public function getLine($length = null)
{
if (!$this->handle) $this->open('r'); if (!$this->handle) $this->open('r');
if ($length === null) return $this->fs->fgets($this->handle); if ($length === null) return $this->fs->fgets($this->handle);
else return $this->fs->fgets($this->handle, $length); else return $this->fs->fgets($this->handle, $length);
} }
/** Retrieves a character from an open file */ /** Retrieves a character from an open file */
public function getChar() { public function getChar()
{
if (!$this->handle) $this->open('r'); if (!$this->handle) $this->open('r');
return $this->fs->fgetc($this->handle); return $this->fs->fgetc($this->handle);
} }
/** Retrieves an $length bytes of data from an open data */ /** Retrieves an $length bytes of data from an open data */
public function read($length) { public function read($length)
{
if (!$this->handle) $this->open('r'); if (!$this->handle) $this->open('r');
return $this->fs->fread($this->handle, $length); return $this->fs->fread($this->handle, $length);
} }
/** Writes to an open file */ /** Writes to an open file */
public function put($string) { public function put($string)
{
if (!$this->handle) $this->open('a'); if (!$this->handle) $this->open('a');
return $this->fs->fwrite($this->handle, $string); return $this->fs->fwrite($this->handle, $string);
} }
/** Returns TRUE if the end of the file has been reached */ /** Returns TRUE if the end of the file has been reached */
public function eof() { public function eof()
{
if (!$this->handle) return true; if (!$this->handle) return true;
return $this->fs->feof($this->handle); return $this->fs->feof($this->handle);
} }
public function __destruct() { public function __destruct()
{
if ($this->handle) $this->close(); if ($this->handle) $this->close();
} }

View File

@@ -17,7 +17,8 @@ if (function_exists('spl_autoload_register')) {
spl_autoload_register('__autoload'); spl_autoload_register('__autoload');
} }
} elseif (!function_exists('__autoload')) { } elseif (!function_exists('__autoload')) {
function __autoload($class) { function __autoload($class)
{
return HTMLPurifierExtras::autoload($class); return HTMLPurifierExtras::autoload($class);
} }
} }

View File

@@ -7,14 +7,16 @@
class HTMLPurifierExtras class HTMLPurifierExtras
{ {
public static function autoload($class) { public static function autoload($class)
{
$path = HTMLPurifierExtras::getPath($class); $path = HTMLPurifierExtras::getPath($class);
if (!$path) return false; if (!$path) return false;
require $path; require $path;
return true; return true;
} }
public static function getPath($class) { public static function getPath($class)
{
if ( if (
strncmp('FSTools', $class, 7) !== 0 && strncmp('FSTools', $class, 7) !== 0 &&
strncmp('ConfigDoc', $class, 9) !== 0 strncmp('ConfigDoc', $class, 9) !== 0

View File

@@ -3,6 +3,7 @@
/** /**
* @file * @file
* Convenience file that registers autoload handler for HTML Purifier. * Convenience file that registers autoload handler for HTML Purifier.
* It also does some sanity checks.
*/ */
if (function_exists('spl_autoload_register') && function_exists('spl_autoload_unregister')) { if (function_exists('spl_autoload_register') && function_exists('spl_autoload_unregister')) {
@@ -13,9 +14,14 @@ if (function_exists('spl_autoload_register') && function_exists('spl_autoload_un
spl_autoload_register('__autoload'); spl_autoload_register('__autoload');
} }
} elseif (!function_exists('__autoload')) { } elseif (!function_exists('__autoload')) {
function __autoload($class) { function __autoload($class)
{
return HTMLPurifier_Bootstrap::autoload($class); return HTMLPurifier_Bootstrap::autoload($class);
} }
} }
if (ini_get('zend.ze1_compatibility_mode')) {
trigger_error("HTML Purifier is not compatible with zend.ze1_compatibility_mode; please turn it off", E_USER_ERROR);
}
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -0,0 +1,4 @@
<?php
if (!defined('HTMLPURIFIER_PREFIX')) {
define('HTMLPURIFIER_PREFIX', dirname(__FILE__));
}

View File

@@ -8,11 +8,13 @@
/** /**
* Purify HTML. * Purify HTML.
* @param $html String HTML to purify * @param string $html String HTML to purify
* @param $config Configuration to use, can be any value accepted by * @param mixed $config Configuration to use, can be any value accepted by
* HTMLPurifier_Config::create() * HTMLPurifier_Config::create()
* @return string
*/ */
function HTMLPurifier($html, $config = null) { function HTMLPurifier($html, $config = null)
{
static $purifier = false; static $purifier = false;
if (!$purifier) { if (!$purifier) {
$purifier = new HTMLPurifier(); $purifier = new HTMLPurifier();

View File

@@ -7,7 +7,7 @@
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS * primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
* FILE, changes will be overwritten the next time the script is run. * FILE, changes will be overwritten the next time the script is run.
* *
* @version 3.3.0 * @version 4.9.2
* *
* @warning * @warning
* You must *not* include any other HTML Purifier files before this file, * You must *not* include any other HTML Purifier files before this file,
@@ -19,6 +19,7 @@
*/ */
require 'HTMLPurifier.php'; require 'HTMLPurifier.php';
require 'HTMLPurifier/Arborize.php';
require 'HTMLPurifier/AttrCollections.php'; require 'HTMLPurifier/AttrCollections.php';
require 'HTMLPurifier/AttrDef.php'; require 'HTMLPurifier/AttrDef.php';
require 'HTMLPurifier/AttrTransform.php'; require 'HTMLPurifier/AttrTransform.php';
@@ -54,9 +55,11 @@ require 'HTMLPurifier/Language.php';
require 'HTMLPurifier/LanguageFactory.php'; require 'HTMLPurifier/LanguageFactory.php';
require 'HTMLPurifier/Length.php'; require 'HTMLPurifier/Length.php';
require 'HTMLPurifier/Lexer.php'; require 'HTMLPurifier/Lexer.php';
require 'HTMLPurifier/Node.php';
require 'HTMLPurifier/PercentEncoder.php'; require 'HTMLPurifier/PercentEncoder.php';
require 'HTMLPurifier/PropertyList.php'; require 'HTMLPurifier/PropertyList.php';
require 'HTMLPurifier/PropertyListIterator.php'; require 'HTMLPurifier/PropertyListIterator.php';
require 'HTMLPurifier/Queue.php';
require 'HTMLPurifier/Strategy.php'; require 'HTMLPurifier/Strategy.php';
require 'HTMLPurifier/StringHash.php'; require 'HTMLPurifier/StringHash.php';
require 'HTMLPurifier/StringHashParser.php'; require 'HTMLPurifier/StringHashParser.php';
@@ -72,7 +75,9 @@ require 'HTMLPurifier/URISchemeRegistry.php';
require 'HTMLPurifier/UnitConverter.php'; require 'HTMLPurifier/UnitConverter.php';
require 'HTMLPurifier/VarParser.php'; require 'HTMLPurifier/VarParser.php';
require 'HTMLPurifier/VarParserException.php'; require 'HTMLPurifier/VarParserException.php';
require 'HTMLPurifier/Zipper.php';
require 'HTMLPurifier/AttrDef/CSS.php'; require 'HTMLPurifier/AttrDef/CSS.php';
require 'HTMLPurifier/AttrDef/Clone.php';
require 'HTMLPurifier/AttrDef/Enum.php'; require 'HTMLPurifier/AttrDef/Enum.php';
require 'HTMLPurifier/AttrDef/Integer.php'; require 'HTMLPurifier/AttrDef/Integer.php';
require 'HTMLPurifier/AttrDef/Lang.php'; require 'HTMLPurifier/AttrDef/Lang.php';
@@ -90,6 +95,7 @@ require 'HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
require 'HTMLPurifier/AttrDef/CSS/Filter.php'; require 'HTMLPurifier/AttrDef/CSS/Filter.php';
require 'HTMLPurifier/AttrDef/CSS/Font.php'; require 'HTMLPurifier/AttrDef/CSS/Font.php';
require 'HTMLPurifier/AttrDef/CSS/FontFamily.php'; require 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
require 'HTMLPurifier/AttrDef/CSS/Ident.php';
require 'HTMLPurifier/AttrDef/CSS/ImportantDecorator.php'; require 'HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
require 'HTMLPurifier/AttrDef/CSS/Length.php'; require 'HTMLPurifier/AttrDef/CSS/Length.php';
require 'HTMLPurifier/AttrDef/CSS/ListStyle.php'; require 'HTMLPurifier/AttrDef/CSS/ListStyle.php';
@@ -98,6 +104,8 @@ require 'HTMLPurifier/AttrDef/CSS/Percentage.php';
require 'HTMLPurifier/AttrDef/CSS/TextDecoration.php'; require 'HTMLPurifier/AttrDef/CSS/TextDecoration.php';
require 'HTMLPurifier/AttrDef/CSS/URI.php'; require 'HTMLPurifier/AttrDef/CSS/URI.php';
require 'HTMLPurifier/AttrDef/HTML/Bool.php'; require 'HTMLPurifier/AttrDef/HTML/Bool.php';
require 'HTMLPurifier/AttrDef/HTML/Nmtokens.php';
require 'HTMLPurifier/AttrDef/HTML/Class.php';
require 'HTMLPurifier/AttrDef/HTML/Color.php'; require 'HTMLPurifier/AttrDef/HTML/Color.php';
require 'HTMLPurifier/AttrDef/HTML/FrameTarget.php'; require 'HTMLPurifier/AttrDef/HTML/FrameTarget.php';
require 'HTMLPurifier/AttrDef/HTML/ID.php'; require 'HTMLPurifier/AttrDef/HTML/ID.php';
@@ -105,7 +113,6 @@ require 'HTMLPurifier/AttrDef/HTML/Pixels.php';
require 'HTMLPurifier/AttrDef/HTML/Length.php'; require 'HTMLPurifier/AttrDef/HTML/Length.php';
require 'HTMLPurifier/AttrDef/HTML/LinkTypes.php'; require 'HTMLPurifier/AttrDef/HTML/LinkTypes.php';
require 'HTMLPurifier/AttrDef/HTML/MultiLength.php'; require 'HTMLPurifier/AttrDef/HTML/MultiLength.php';
require 'HTMLPurifier/AttrDef/HTML/Nmtokens.php';
require 'HTMLPurifier/AttrDef/URI/Email.php'; require 'HTMLPurifier/AttrDef/URI/Email.php';
require 'HTMLPurifier/AttrDef/URI/Host.php'; require 'HTMLPurifier/AttrDef/URI/Host.php';
require 'HTMLPurifier/AttrDef/URI/IPv4.php'; require 'HTMLPurifier/AttrDef/URI/IPv4.php';
@@ -123,14 +130,20 @@ require 'HTMLPurifier/AttrTransform/Input.php';
require 'HTMLPurifier/AttrTransform/Lang.php'; require 'HTMLPurifier/AttrTransform/Lang.php';
require 'HTMLPurifier/AttrTransform/Length.php'; require 'HTMLPurifier/AttrTransform/Length.php';
require 'HTMLPurifier/AttrTransform/Name.php'; require 'HTMLPurifier/AttrTransform/Name.php';
require 'HTMLPurifier/AttrTransform/NameSync.php';
require 'HTMLPurifier/AttrTransform/Nofollow.php';
require 'HTMLPurifier/AttrTransform/SafeEmbed.php'; require 'HTMLPurifier/AttrTransform/SafeEmbed.php';
require 'HTMLPurifier/AttrTransform/SafeObject.php'; require 'HTMLPurifier/AttrTransform/SafeObject.php';
require 'HTMLPurifier/AttrTransform/SafeParam.php'; require 'HTMLPurifier/AttrTransform/SafeParam.php';
require 'HTMLPurifier/AttrTransform/ScriptRequired.php'; require 'HTMLPurifier/AttrTransform/ScriptRequired.php';
require 'HTMLPurifier/AttrTransform/TargetBlank.php';
require 'HTMLPurifier/AttrTransform/TargetNoopener.php';
require 'HTMLPurifier/AttrTransform/TargetNoreferrer.php';
require 'HTMLPurifier/AttrTransform/Textarea.php'; require 'HTMLPurifier/AttrTransform/Textarea.php';
require 'HTMLPurifier/ChildDef/Chameleon.php'; require 'HTMLPurifier/ChildDef/Chameleon.php';
require 'HTMLPurifier/ChildDef/Custom.php'; require 'HTMLPurifier/ChildDef/Custom.php';
require 'HTMLPurifier/ChildDef/Empty.php'; require 'HTMLPurifier/ChildDef/Empty.php';
require 'HTMLPurifier/ChildDef/List.php';
require 'HTMLPurifier/ChildDef/Required.php'; require 'HTMLPurifier/ChildDef/Required.php';
require 'HTMLPurifier/ChildDef/Optional.php'; require 'HTMLPurifier/ChildDef/Optional.php';
require 'HTMLPurifier/ChildDef/StrictBlockquote.php'; require 'HTMLPurifier/ChildDef/StrictBlockquote.php';
@@ -145,10 +158,12 @@ require 'HTMLPurifier/HTMLModule/CommonAttributes.php';
require 'HTMLPurifier/HTMLModule/Edit.php'; require 'HTMLPurifier/HTMLModule/Edit.php';
require 'HTMLPurifier/HTMLModule/Forms.php'; require 'HTMLPurifier/HTMLModule/Forms.php';
require 'HTMLPurifier/HTMLModule/Hypertext.php'; require 'HTMLPurifier/HTMLModule/Hypertext.php';
require 'HTMLPurifier/HTMLModule/Iframe.php';
require 'HTMLPurifier/HTMLModule/Image.php'; require 'HTMLPurifier/HTMLModule/Image.php';
require 'HTMLPurifier/HTMLModule/Legacy.php'; require 'HTMLPurifier/HTMLModule/Legacy.php';
require 'HTMLPurifier/HTMLModule/List.php'; require 'HTMLPurifier/HTMLModule/List.php';
require 'HTMLPurifier/HTMLModule/Name.php'; require 'HTMLPurifier/HTMLModule/Name.php';
require 'HTMLPurifier/HTMLModule/Nofollow.php';
require 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php'; require 'HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
require 'HTMLPurifier/HTMLModule/Object.php'; require 'HTMLPurifier/HTMLModule/Object.php';
require 'HTMLPurifier/HTMLModule/Presentation.php'; require 'HTMLPurifier/HTMLModule/Presentation.php';
@@ -156,10 +171,14 @@ require 'HTMLPurifier/HTMLModule/Proprietary.php';
require 'HTMLPurifier/HTMLModule/Ruby.php'; require 'HTMLPurifier/HTMLModule/Ruby.php';
require 'HTMLPurifier/HTMLModule/SafeEmbed.php'; require 'HTMLPurifier/HTMLModule/SafeEmbed.php';
require 'HTMLPurifier/HTMLModule/SafeObject.php'; require 'HTMLPurifier/HTMLModule/SafeObject.php';
require 'HTMLPurifier/HTMLModule/SafeScripting.php';
require 'HTMLPurifier/HTMLModule/Scripting.php'; require 'HTMLPurifier/HTMLModule/Scripting.php';
require 'HTMLPurifier/HTMLModule/StyleAttribute.php'; require 'HTMLPurifier/HTMLModule/StyleAttribute.php';
require 'HTMLPurifier/HTMLModule/Tables.php'; require 'HTMLPurifier/HTMLModule/Tables.php';
require 'HTMLPurifier/HTMLModule/Target.php'; require 'HTMLPurifier/HTMLModule/Target.php';
require 'HTMLPurifier/HTMLModule/TargetBlank.php';
require 'HTMLPurifier/HTMLModule/TargetNoopener.php';
require 'HTMLPurifier/HTMLModule/TargetNoreferrer.php';
require 'HTMLPurifier/HTMLModule/Text.php'; require 'HTMLPurifier/HTMLModule/Text.php';
require 'HTMLPurifier/HTMLModule/Tidy.php'; require 'HTMLPurifier/HTMLModule/Tidy.php';
require 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php'; require 'HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
@@ -174,9 +193,13 @@ require 'HTMLPurifier/Injector/DisplayLinkURI.php';
require 'HTMLPurifier/Injector/Linkify.php'; require 'HTMLPurifier/Injector/Linkify.php';
require 'HTMLPurifier/Injector/PurifierLinkify.php'; require 'HTMLPurifier/Injector/PurifierLinkify.php';
require 'HTMLPurifier/Injector/RemoveEmpty.php'; require 'HTMLPurifier/Injector/RemoveEmpty.php';
require 'HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
require 'HTMLPurifier/Injector/SafeObject.php'; require 'HTMLPurifier/Injector/SafeObject.php';
require 'HTMLPurifier/Lexer/DOMLex.php'; require 'HTMLPurifier/Lexer/DOMLex.php';
require 'HTMLPurifier/Lexer/DirectLex.php'; require 'HTMLPurifier/Lexer/DirectLex.php';
require 'HTMLPurifier/Node/Comment.php';
require 'HTMLPurifier/Node/Element.php';
require 'HTMLPurifier/Node/Text.php';
require 'HTMLPurifier/Strategy/Composite.php'; require 'HTMLPurifier/Strategy/Composite.php';
require 'HTMLPurifier/Strategy/Core.php'; require 'HTMLPurifier/Strategy/Core.php';
require 'HTMLPurifier/Strategy/FixNesting.php'; require 'HTMLPurifier/Strategy/FixNesting.php';
@@ -193,14 +216,19 @@ require 'HTMLPurifier/Token/Start.php';
require 'HTMLPurifier/Token/Text.php'; require 'HTMLPurifier/Token/Text.php';
require 'HTMLPurifier/URIFilter/DisableExternal.php'; require 'HTMLPurifier/URIFilter/DisableExternal.php';
require 'HTMLPurifier/URIFilter/DisableExternalResources.php'; require 'HTMLPurifier/URIFilter/DisableExternalResources.php';
require 'HTMLPurifier/URIFilter/DisableResources.php';
require 'HTMLPurifier/URIFilter/HostBlacklist.php'; require 'HTMLPurifier/URIFilter/HostBlacklist.php';
require 'HTMLPurifier/URIFilter/MakeAbsolute.php'; require 'HTMLPurifier/URIFilter/MakeAbsolute.php';
require 'HTMLPurifier/URIFilter/Munge.php'; require 'HTMLPurifier/URIFilter/Munge.php';
require 'HTMLPurifier/URIFilter/SafeIframe.php';
require 'HTMLPurifier/URIScheme/data.php';
require 'HTMLPurifier/URIScheme/file.php';
require 'HTMLPurifier/URIScheme/ftp.php'; require 'HTMLPurifier/URIScheme/ftp.php';
require 'HTMLPurifier/URIScheme/http.php'; require 'HTMLPurifier/URIScheme/http.php';
require 'HTMLPurifier/URIScheme/https.php'; require 'HTMLPurifier/URIScheme/https.php';
require 'HTMLPurifier/URIScheme/mailto.php'; require 'HTMLPurifier/URIScheme/mailto.php';
require 'HTMLPurifier/URIScheme/news.php'; require 'HTMLPurifier/URIScheme/news.php';
require 'HTMLPurifier/URIScheme/nntp.php'; require 'HTMLPurifier/URIScheme/nntp.php';
require 'HTMLPurifier/URIScheme/tel.php';
require 'HTMLPurifier/VarParser/Flexible.php'; require 'HTMLPurifier/VarParser/Flexible.php';
require 'HTMLPurifier/VarParser/Native.php'; require 'HTMLPurifier/VarParser/Native.php';

View File

@@ -7,7 +7,8 @@
require_once dirname(__FILE__) . '/HTMLPurifier.auto.php'; require_once dirname(__FILE__) . '/HTMLPurifier.auto.php';
function kses($string, $allowed_html, $allowed_protocols = null) { function kses($string, $allowed_html, $allowed_protocols = null)
{
$config = HTMLPurifier_Config::createDefault(); $config = HTMLPurifier_Config::createDefault();
$allowed_elements = array(); $allowed_elements = array();
$allowed_attributes = array(); $allowed_attributes = array();
@@ -17,11 +18,10 @@ function kses($string, $allowed_html, $allowed_protocols = null) {
$allowed_attributes["$element.$attribute"] = true; $allowed_attributes["$element.$attribute"] = true;
} }
} }
$config->set('HTML', 'AllowedElements', $allowed_elements); $config->set('HTML.AllowedElements', $allowed_elements);
$config->set('HTML', 'AllowedAttributes', $allowed_attributes); $config->set('HTML.AllowedAttributes', $allowed_attributes);
$allowed_schemes = array();
if ($allowed_protocols !== null) { if ($allowed_protocols !== null) {
$config->set('URI', 'AllowedSchemes', $allowed_protocols); $config->set('URI.AllowedSchemes', $allowed_protocols);
} }
$purifier = new HTMLPurifier($config); $purifier = new HTMLPurifier($config);
return $purifier->purify($string); return $purifier->purify($string);

View File

@@ -19,7 +19,7 @@
*/ */
/* /*
HTML Purifier 3.3.0 - Standards Compliant HTML Filtering HTML Purifier 4.9.2 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or This library is free software; you can redistribute it and/or
@@ -54,66 +54,97 @@
class HTMLPurifier class HTMLPurifier
{ {
/** Version of HTML Purifier */ /**
public $version = '3.3.0'; * Version of HTML Purifier.
* @type string
/** Constant with version of HTML Purifier */ */
const VERSION = '3.3.0'; public $version = '4.9.2';
/** Global configuration object */
public $config;
/** Array of extra HTMLPurifier_Filter objects to run on HTML, for backwards compatibility */
private $filters = array();
/** Single instance of HTML Purifier */
private static $instance;
protected $strategy, $generator;
/** /**
* Resultant HTMLPurifier_Context of last run purification. Is an array * Constant with version of HTML Purifier.
* of contexts if the last called method was purifyArray(). */
const VERSION = '4.9.2';
/**
* Global configuration object.
* @type HTMLPurifier_Config
*/
public $config;
/**
* Array of extra filter objects to run on HTML,
* for backwards compatibility.
* @type HTMLPurifier_Filter[]
*/
private $filters = array();
/**
* Single instance of HTML Purifier.
* @type HTMLPurifier
*/
private static $instance;
/**
* @type HTMLPurifier_Strategy_Core
*/
protected $strategy;
/**
* @type HTMLPurifier_Generator
*/
protected $generator;
/**
* Resultant context of last run purification.
* Is an array of contexts if the last called method was purifyArray().
* @type HTMLPurifier_Context
*/ */
public $context; public $context;
/** /**
* Initializes the purifier. * Initializes the purifier.
* @param $config Optional HTMLPurifier_Config object for all instances of *
* the purifier, if omitted, a default configuration is * @param HTMLPurifier_Config|mixed $config Optional HTMLPurifier_Config object
* supplied (which can be overridden on a per-use basis). * for all instances of the purifier, if omitted, a default
* configuration is supplied (which can be overridden on a
* per-use basis).
* The parameter can also be any type that * The parameter can also be any type that
* HTMLPurifier_Config::create() supports. * HTMLPurifier_Config::create() supports.
*/ */
public function __construct($config = null) { public function __construct($config = null)
{
$this->config = HTMLPurifier_Config::create($config); $this->config = HTMLPurifier_Config::create($config);
$this->strategy = new HTMLPurifier_Strategy_Core();
$this->strategy = new HTMLPurifier_Strategy_Core();
} }
/** /**
* Adds a filter to process the output. First come first serve * Adds a filter to process the output. First come first serve
* @param $filter HTMLPurifier_Filter object *
* @param HTMLPurifier_Filter $filter HTMLPurifier_Filter object
*/ */
public function addFilter($filter) { public function addFilter($filter)
trigger_error('HTMLPurifier->addFilter() is deprecated, use configuration directives in the Filter namespace or Filter.Custom', E_USER_WARNING); {
trigger_error(
'HTMLPurifier->addFilter() is deprecated, use configuration directives' .
' in the Filter namespace or Filter.Custom',
E_USER_WARNING
);
$this->filters[] = $filter; $this->filters[] = $filter;
} }
/** /**
* Filters an HTML snippet/document to be XSS-free and standards-compliant. * Filters an HTML snippet/document to be XSS-free and standards-compliant.
* *
* @param $html String of HTML to purify * @param string $html String of HTML to purify
* @param $config HTMLPurifier_Config object for this operation, if omitted, * @param HTMLPurifier_Config $config Config object for this operation,
* defaults to the config object specified during this * if omitted, defaults to the config object specified during this
* object's construction. The parameter can also be any type * object's construction. The parameter can also be any type
* that HTMLPurifier_Config::create() supports. * that HTMLPurifier_Config::create() supports.
* @return Purified HTML *
* @return string Purified HTML
*/ */
public function purify($html, $config = null) { public function purify($html, $config = null)
{
// :TODO: make the config merge in, instead of replace // :TODO: make the config merge in, instead of replace
$config = $config ? HTMLPurifier_Config::create($config) : $this->config; $config = $config ? HTMLPurifier_Config::create($config) : $this->config;
@@ -128,7 +159,7 @@ class HTMLPurifier
$context->register('Generator', $this->generator); $context->register('Generator', $this->generator);
// set up global context variables // set up global context variables
if ($config->get('Core', 'CollectErrors')) { if ($config->get('Core.CollectErrors')) {
// may get moved out if other facilities use it // may get moved out if other facilities use it
$language_factory = HTMLPurifier_LanguageFactory::instance(); $language_factory = HTMLPurifier_LanguageFactory::instance();
$language = $language_factory->create($config, $context); $language = $language_factory->create($config, $context);
@@ -151,7 +182,12 @@ class HTMLPurifier
unset($filter_flags['Custom']); unset($filter_flags['Custom']);
$filters = array(); $filters = array();
foreach ($filter_flags as $filter => $flag) { foreach ($filter_flags as $filter => $flag) {
if (!$flag) continue; if (!$flag) {
continue;
}
if (strpos($filter, '.') !== false) {
continue;
}
$class = "HTMLPurifier_Filter_$filter"; $class = "HTMLPurifier_Filter_$filter";
$filters[] = new $class; $filters[] = new $class;
} }
@@ -174,9 +210,12 @@ class HTMLPurifier
// list of un-purified tokens // list of un-purified tokens
$lexer->tokenizeHTML( $lexer->tokenizeHTML(
// un-purified HTML // un-purified HTML
$html, $config, $context $html,
$config,
$context
), ),
$config, $context $config,
$context
) )
); );
@@ -191,11 +230,15 @@ class HTMLPurifier
/** /**
* Filters an array of HTML snippets * Filters an array of HTML snippets
* @param $config Optional HTMLPurifier_Config object for this operation. *
* @param string[] $array_of_html Array of html snippets
* @param HTMLPurifier_Config $config Optional config object for this operation.
* See HTMLPurifier::purify() for more details. * See HTMLPurifier::purify() for more details.
* @return Array of purified HTML *
* @return string[] Array of purified HTML
*/ */
public function purifyArray($array_of_html, $config = null) { public function purifyArray($array_of_html, $config = null)
{
$context_array = array(); $context_array = array();
foreach ($array_of_html as $key => $html) { foreach ($array_of_html as $key => $html) {
$array_of_html[$key] = $this->purify($html, $config); $array_of_html[$key] = $this->purify($html, $config);
@@ -207,11 +250,16 @@ class HTMLPurifier
/** /**
* Singleton for enforcing just one HTML Purifier in your system * Singleton for enforcing just one HTML Purifier in your system
* @param $prototype Optional prototype HTMLPurifier instance to *
* overload singleton with, or HTMLPurifier_Config * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
* instance to configure the generated version with. * HTMLPurifier instance to overload singleton with,
* or HTMLPurifier_Config instance to configure the
* generated version with.
*
* @return HTMLPurifier
*/ */
public static function instance($prototype = null) { public static function instance($prototype = null)
{
if (!self::$instance || $prototype) { if (!self::$instance || $prototype) {
if ($prototype instanceof HTMLPurifier) { if ($prototype instanceof HTMLPurifier) {
self::$instance = $prototype; self::$instance = $prototype;
@@ -225,12 +273,20 @@ class HTMLPurifier
} }
/** /**
* Singleton for enforcing just one HTML Purifier in your system
*
* @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
* HTMLPurifier instance to overload singleton with,
* or HTMLPurifier_Config instance to configure the
* generated version with.
*
* @return HTMLPurifier
* @note Backwards compatibility, see instance() * @note Backwards compatibility, see instance()
*/ */
public static function getInstance($prototype = null) { public static function getInstance($prototype = null)
{
return HTMLPurifier::instance($prototype); return HTMLPurifier::instance($prototype);
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -13,6 +13,7 @@
$__dir = dirname(__FILE__); $__dir = dirname(__FILE__);
require_once $__dir . '/HTMLPurifier.php'; require_once $__dir . '/HTMLPurifier.php';
require_once $__dir . '/HTMLPurifier/Arborize.php';
require_once $__dir . '/HTMLPurifier/AttrCollections.php'; require_once $__dir . '/HTMLPurifier/AttrCollections.php';
require_once $__dir . '/HTMLPurifier/AttrDef.php'; require_once $__dir . '/HTMLPurifier/AttrDef.php';
require_once $__dir . '/HTMLPurifier/AttrTransform.php'; require_once $__dir . '/HTMLPurifier/AttrTransform.php';
@@ -48,9 +49,11 @@ require_once $__dir . '/HTMLPurifier/Language.php';
require_once $__dir . '/HTMLPurifier/LanguageFactory.php'; require_once $__dir . '/HTMLPurifier/LanguageFactory.php';
require_once $__dir . '/HTMLPurifier/Length.php'; require_once $__dir . '/HTMLPurifier/Length.php';
require_once $__dir . '/HTMLPurifier/Lexer.php'; require_once $__dir . '/HTMLPurifier/Lexer.php';
require_once $__dir . '/HTMLPurifier/Node.php';
require_once $__dir . '/HTMLPurifier/PercentEncoder.php'; require_once $__dir . '/HTMLPurifier/PercentEncoder.php';
require_once $__dir . '/HTMLPurifier/PropertyList.php'; require_once $__dir . '/HTMLPurifier/PropertyList.php';
require_once $__dir . '/HTMLPurifier/PropertyListIterator.php'; require_once $__dir . '/HTMLPurifier/PropertyListIterator.php';
require_once $__dir . '/HTMLPurifier/Queue.php';
require_once $__dir . '/HTMLPurifier/Strategy.php'; require_once $__dir . '/HTMLPurifier/Strategy.php';
require_once $__dir . '/HTMLPurifier/StringHash.php'; require_once $__dir . '/HTMLPurifier/StringHash.php';
require_once $__dir . '/HTMLPurifier/StringHashParser.php'; require_once $__dir . '/HTMLPurifier/StringHashParser.php';
@@ -66,7 +69,9 @@ require_once $__dir . '/HTMLPurifier/URISchemeRegistry.php';
require_once $__dir . '/HTMLPurifier/UnitConverter.php'; require_once $__dir . '/HTMLPurifier/UnitConverter.php';
require_once $__dir . '/HTMLPurifier/VarParser.php'; require_once $__dir . '/HTMLPurifier/VarParser.php';
require_once $__dir . '/HTMLPurifier/VarParserException.php'; require_once $__dir . '/HTMLPurifier/VarParserException.php';
require_once $__dir . '/HTMLPurifier/Zipper.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Clone.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Enum.php'; require_once $__dir . '/HTMLPurifier/AttrDef/Enum.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Integer.php'; require_once $__dir . '/HTMLPurifier/AttrDef/Integer.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Lang.php'; require_once $__dir . '/HTMLPurifier/AttrDef/Lang.php';
@@ -84,6 +89,7 @@ require_once $__dir . '/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Filter.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Filter.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Font.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Font.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/FontFamily.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS/FontFamily.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Ident.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Length.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Length.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ListStyle.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ListStyle.php';
@@ -92,6 +98,8 @@ require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Percentage.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/TextDecoration.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS/TextDecoration.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/URI.php'; require_once $__dir . '/HTMLPurifier/AttrDef/CSS/URI.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Bool.php'; require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Bool.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Nmtokens.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Class.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Color.php'; require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Color.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/FrameTarget.php'; require_once $__dir . '/HTMLPurifier/AttrDef/HTML/FrameTarget.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/ID.php'; require_once $__dir . '/HTMLPurifier/AttrDef/HTML/ID.php';
@@ -99,7 +107,6 @@ require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Pixels.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Length.php'; require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Length.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/LinkTypes.php'; require_once $__dir . '/HTMLPurifier/AttrDef/HTML/LinkTypes.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/MultiLength.php'; require_once $__dir . '/HTMLPurifier/AttrDef/HTML/MultiLength.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Nmtokens.php';
require_once $__dir . '/HTMLPurifier/AttrDef/URI/Email.php'; require_once $__dir . '/HTMLPurifier/AttrDef/URI/Email.php';
require_once $__dir . '/HTMLPurifier/AttrDef/URI/Host.php'; require_once $__dir . '/HTMLPurifier/AttrDef/URI/Host.php';
require_once $__dir . '/HTMLPurifier/AttrDef/URI/IPv4.php'; require_once $__dir . '/HTMLPurifier/AttrDef/URI/IPv4.php';
@@ -117,14 +124,20 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/Input.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Lang.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/Lang.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Length.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/Length.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Name.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/Name.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/NameSync.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Nofollow.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeEmbed.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/SafeEmbed.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/TargetBlank.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/TargetNoopener.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/TargetNoreferrer.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php'; require_once $__dir . '/HTMLPurifier/AttrTransform/Textarea.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php';
require_once $__dir . '/HTMLPurifier/ChildDef/List.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Required.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Required.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php'; require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php';
require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php'; require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php';
@@ -139,10 +152,12 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/CommonAttributes.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Edit.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Edit.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Forms.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Forms.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Hypertext.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Hypertext.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Iframe.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Image.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Image.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Legacy.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/List.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/List.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Name.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Name.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Nofollow.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Object.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Object.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Presentation.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Presentation.php';
@@ -150,10 +165,14 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/Proprietary.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Ruby.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Ruby.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeEmbed.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/SafeEmbed.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeObject.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/SafeObject.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeScripting.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Scripting.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Scripting.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/StyleAttribute.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/StyleAttribute.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tables.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Tables.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Target.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Target.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/TargetBlank.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/TargetNoopener.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/TargetNoreferrer.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Text.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Text.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/XMLCommonAttributes.php'; require_once $__dir . '/HTMLPurifier/HTMLModule/XMLCommonAttributes.php';
@@ -168,9 +187,13 @@ require_once $__dir . '/HTMLPurifier/Injector/DisplayLinkURI.php';
require_once $__dir . '/HTMLPurifier/Injector/Linkify.php'; require_once $__dir . '/HTMLPurifier/Injector/Linkify.php';
require_once $__dir . '/HTMLPurifier/Injector/PurifierLinkify.php'; require_once $__dir . '/HTMLPurifier/Injector/PurifierLinkify.php';
require_once $__dir . '/HTMLPurifier/Injector/RemoveEmpty.php'; require_once $__dir . '/HTMLPurifier/Injector/RemoveEmpty.php';
require_once $__dir . '/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
require_once $__dir . '/HTMLPurifier/Injector/SafeObject.php'; require_once $__dir . '/HTMLPurifier/Injector/SafeObject.php';
require_once $__dir . '/HTMLPurifier/Lexer/DOMLex.php'; require_once $__dir . '/HTMLPurifier/Lexer/DOMLex.php';
require_once $__dir . '/HTMLPurifier/Lexer/DirectLex.php'; require_once $__dir . '/HTMLPurifier/Lexer/DirectLex.php';
require_once $__dir . '/HTMLPurifier/Node/Comment.php';
require_once $__dir . '/HTMLPurifier/Node/Element.php';
require_once $__dir . '/HTMLPurifier/Node/Text.php';
require_once $__dir . '/HTMLPurifier/Strategy/Composite.php'; require_once $__dir . '/HTMLPurifier/Strategy/Composite.php';
require_once $__dir . '/HTMLPurifier/Strategy/Core.php'; require_once $__dir . '/HTMLPurifier/Strategy/Core.php';
require_once $__dir . '/HTMLPurifier/Strategy/FixNesting.php'; require_once $__dir . '/HTMLPurifier/Strategy/FixNesting.php';
@@ -187,14 +210,19 @@ require_once $__dir . '/HTMLPurifier/Token/Start.php';
require_once $__dir . '/HTMLPurifier/Token/Text.php'; require_once $__dir . '/HTMLPurifier/Token/Text.php';
require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternal.php'; require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternal.php';
require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternalResources.php'; require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternalResources.php';
require_once $__dir . '/HTMLPurifier/URIFilter/DisableResources.php';
require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php'; require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';
require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php'; require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';
require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php'; require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php';
require_once $__dir . '/HTMLPurifier/URIFilter/SafeIframe.php';
require_once $__dir . '/HTMLPurifier/URIScheme/data.php';
require_once $__dir . '/HTMLPurifier/URIScheme/file.php';
require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php'; require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';
require_once $__dir . '/HTMLPurifier/URIScheme/http.php'; require_once $__dir . '/HTMLPurifier/URIScheme/http.php';
require_once $__dir . '/HTMLPurifier/URIScheme/https.php'; require_once $__dir . '/HTMLPurifier/URIScheme/https.php';
require_once $__dir . '/HTMLPurifier/URIScheme/mailto.php'; require_once $__dir . '/HTMLPurifier/URIScheme/mailto.php';
require_once $__dir . '/HTMLPurifier/URIScheme/news.php'; require_once $__dir . '/HTMLPurifier/URIScheme/news.php';
require_once $__dir . '/HTMLPurifier/URIScheme/nntp.php'; require_once $__dir . '/HTMLPurifier/URIScheme/nntp.php';
require_once $__dir . '/HTMLPurifier/URIScheme/tel.php';
require_once $__dir . '/HTMLPurifier/VarParser/Flexible.php'; require_once $__dir . '/HTMLPurifier/VarParser/Flexible.php';
require_once $__dir . '/HTMLPurifier/VarParser/Native.php'; require_once $__dir . '/HTMLPurifier/VarParser/Native.php';

View File

@@ -0,0 +1,71 @@
<?php
/**
* Converts a stream of HTMLPurifier_Token into an HTMLPurifier_Node,
* and back again.
*
* @note This transformation is not an equivalence. We mutate the input
* token stream to make it so; see all [MUT] markers in code.
*/
class HTMLPurifier_Arborize
{
public static function arborize($tokens, $config, $context) {
$definition = $config->getHTMLDefinition();
$parent = new HTMLPurifier_Token_Start($definition->info_parent);
$stack = array($parent->toNode());
foreach ($tokens as $token) {
$token->skip = null; // [MUT]
$token->carryover = null; // [MUT]
if ($token instanceof HTMLPurifier_Token_End) {
$token->start = null; // [MUT]
$r = array_pop($stack);
//assert($r->name === $token->name);
//assert(empty($token->attr));
$r->endCol = $token->col;
$r->endLine = $token->line;
$r->endArmor = $token->armor;
continue;
}
$node = $token->toNode();
$stack[count($stack)-1]->children[] = $node;
if ($token instanceof HTMLPurifier_Token_Start) {
$stack[] = $node;
}
}
//assert(count($stack) == 1);
return $stack[0];
}
public static function flatten($node, $config, $context) {
$level = 0;
$nodes = array($level => new HTMLPurifier_Queue(array($node)));
$closingTokens = array();
$tokens = array();
do {
while (!$nodes[$level]->isEmpty()) {
$node = $nodes[$level]->shift(); // FIFO
list($start, $end) = $node->toTokenPair();
if ($level > 0) {
$tokens[] = $start;
}
if ($end !== NULL) {
$closingTokens[$level][] = $end;
}
if ($node instanceof HTMLPurifier_Node_Element) {
$level++;
$nodes[$level] = new HTMLPurifier_Queue();
foreach ($node->children as $childNode) {
$nodes[$level]->push($childNode);
}
}
}
$level--;
if ($level && isset($closingTokens[$level])) {
while ($token = array_pop($closingTokens[$level])) {
$tokens[] = $token;
}
}
} while ($level > 0);
return $tokens;
}
}

View File

@@ -8,7 +8,8 @@ class HTMLPurifier_AttrCollections
{ {
/** /**
* Associative array of attribute collections, indexed by name * Associative array of attribute collections, indexed by name.
* @type array
*/ */
public $info = array(); public $info = array();
@@ -16,10 +17,16 @@ class HTMLPurifier_AttrCollections
* Performs all expansions on internal data for use by other inclusions * Performs all expansions on internal data for use by other inclusions
* It also collects all attribute collection extensions from * It also collects all attribute collection extensions from
* modules * modules
* @param $attr_types HTMLPurifier_AttrTypes instance * @param HTMLPurifier_AttrTypes $attr_types HTMLPurifier_AttrTypes instance
* @param $modules Hash array of HTMLPurifier_HTMLModule members * @param HTMLPurifier_HTMLModule[] $modules Hash array of HTMLPurifier_HTMLModule members
*/ */
public function __construct($attr_types, $modules) { public function __construct($attr_types, $modules)
{
$this->doConstruct($attr_types, $modules);
}
public function doConstruct($attr_types, $modules)
{
// load extensions from the modules // load extensions from the modules
foreach ($modules as $module) { foreach ($modules as $module) {
foreach ($module->attr_collections as $coll_i => $coll) { foreach ($module->attr_collections as $coll_i => $coll) {
@@ -30,7 +37,9 @@ class HTMLPurifier_AttrCollections
if ($attr_i === 0 && isset($this->info[$coll_i][$attr_i])) { if ($attr_i === 0 && isset($this->info[$coll_i][$attr_i])) {
// merge in includes // merge in includes
$this->info[$coll_i][$attr_i] = array_merge( $this->info[$coll_i][$attr_i] = array_merge(
$this->info[$coll_i][$attr_i], $attr); $this->info[$coll_i][$attr_i],
$attr
);
continue; continue;
} }
$this->info[$coll_i][$attr_i] = $attr; $this->info[$coll_i][$attr_i] = $attr;
@@ -49,20 +58,29 @@ class HTMLPurifier_AttrCollections
/** /**
* Takes a reference to an attribute associative array and performs * Takes a reference to an attribute associative array and performs
* all inclusions specified by the zero index. * all inclusions specified by the zero index.
* @param &$attr Reference to attribute array * @param array &$attr Reference to attribute array
*/ */
public function performInclusions(&$attr) { public function performInclusions(&$attr)
if (!isset($attr[0])) return; {
if (!isset($attr[0])) {
return;
}
$merge = $attr[0]; $merge = $attr[0];
$seen = array(); // recursion guard $seen = array(); // recursion guard
// loop through all the inclusions // loop through all the inclusions
for ($i = 0; isset($merge[$i]); $i++) { for ($i = 0; isset($merge[$i]); $i++) {
if (isset($seen[$merge[$i]])) continue; if (isset($seen[$merge[$i]])) {
continue;
}
$seen[$merge[$i]] = true; $seen[$merge[$i]] = true;
// foreach attribute of the inclusion, copy it over // foreach attribute of the inclusion, copy it over
if (!isset($this->info[$merge[$i]])) continue; if (!isset($this->info[$merge[$i]])) {
continue;
}
foreach ($this->info[$merge[$i]] as $key => $value) { foreach ($this->info[$merge[$i]] as $key => $value) {
if (isset($attr[$key])) continue; // also catches more inclusions if (isset($attr[$key])) {
continue;
} // also catches more inclusions
$attr[$key] = $value; $attr[$key] = $value;
} }
if (isset($this->info[$merge[$i]][0])) { if (isset($this->info[$merge[$i]][0])) {
@@ -76,20 +94,24 @@ class HTMLPurifier_AttrCollections
/** /**
* Expands all string identifiers in an attribute array by replacing * Expands all string identifiers in an attribute array by replacing
* them with the appropriate values inside HTMLPurifier_AttrTypes * them with the appropriate values inside HTMLPurifier_AttrTypes
* @param &$attr Reference to attribute array * @param array &$attr Reference to attribute array
* @param $attr_types HTMLPurifier_AttrTypes instance * @param HTMLPurifier_AttrTypes $attr_types HTMLPurifier_AttrTypes instance
*/ */
public function expandIdentifiers(&$attr, $attr_types) { public function expandIdentifiers(&$attr, $attr_types)
{
// because foreach will process new elements we add, make sure we // because foreach will process new elements we add, make sure we
// skip duplicates // skip duplicates
$processed = array(); $processed = array();
foreach ($attr as $def_i => $def) { foreach ($attr as $def_i => $def) {
// skip inclusions // skip inclusions
if ($def_i === 0) continue; if ($def_i === 0) {
continue;
}
if (isset($processed[$def_i])) continue; if (isset($processed[$def_i])) {
continue;
}
// determine whether or not attribute is required // determine whether or not attribute is required
if ($required = (strpos($def_i, '*') !== false)) { if ($required = (strpos($def_i, '*') !== false)) {
@@ -120,9 +142,7 @@ class HTMLPurifier_AttrCollections
unset($attr[$def_i]); unset($attr[$def_i]);
} }
} }
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -14,23 +14,25 @@ abstract class HTMLPurifier_AttrDef
{ {
/** /**
* Tells us whether or not an HTML attribute is minimized. Has no * Tells us whether or not an HTML attribute is minimized.
* meaning in other contexts. * Has no meaning in other contexts.
* @type bool
*/ */
public $minimized = false; public $minimized = false;
/** /**
* Tells us whether or not an HTML attribute is required. Has no * Tells us whether or not an HTML attribute is required.
* meaning in other contexts * Has no meaning in other contexts
* @type bool
*/ */
public $required = false; public $required = false;
/** /**
* Validates and cleans passed string according to a definition. * Validates and cleans passed string according to a definition.
* *
* @param $string String to be validated and cleaned. * @param string $string String to be validated and cleaned.
* @param $config Mandatory HTMLPurifier_Config object. * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object.
* @param $context Mandatory HTMLPurifier_AttrContext object. * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object.
*/ */
abstract public function validate($string, $config, $context); abstract public function validate($string, $config, $context);
@@ -55,7 +57,8 @@ abstract class HTMLPurifier_AttrDef
* parsing XML, thus, this behavior may still be correct. We * parsing XML, thus, this behavior may still be correct. We
* assume that newlines have been normalized. * assume that newlines have been normalized.
*/ */
public function parseCDATA($string) { public function parseCDATA($string)
{
$string = trim($string); $string = trim($string);
$string = str_replace(array("\n", "\t", "\r"), ' ', $string); $string = str_replace(array("\n", "\t", "\r"), ' ', $string);
return $string; return $string;
@@ -63,10 +66,11 @@ abstract class HTMLPurifier_AttrDef
/** /**
* Factory method for creating this class from a string. * Factory method for creating this class from a string.
* @param $string String construction info * @param string $string String construction info
* @return Created AttrDef object corresponding to $string * @return HTMLPurifier_AttrDef Created AttrDef object corresponding to $string
*/ */
public function make($string) { public function make($string)
{
// default implementation, return a flyweight of this object. // default implementation, return a flyweight of this object.
// If $string has an effect on the returned object (i.e. you // If $string has an effect on the returned object (i.e. you
// need to overload this method), it is best // need to overload this method), it is best
@@ -77,11 +81,64 @@ abstract class HTMLPurifier_AttrDef
/** /**
* Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
* properly. THIS IS A HACK! * properly. THIS IS A HACK!
* @param string $string a CSS colour definition
* @return string
*/ */
protected function mungeRgb($string) { protected function mungeRgb($string)
return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string); {
$p = '\s*(\d+(\.\d+)?([%]?))\s*';
if (preg_match('/(rgba|hsla)\(/', $string)) {
return preg_replace('/(rgba|hsla)\('.$p.','.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8,\11)', $string);
}
return preg_replace('/(rgb|hsl)\('.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8)', $string);
} }
/**
* Parses a possibly escaped CSS string and returns the "pure"
* version of it.
*/
protected function expandCSSEscape($string)
{
// flexibly parse it
$ret = '';
for ($i = 0, $c = strlen($string); $i < $c; $i++) {
if ($string[$i] === '\\') {
$i++;
if ($i >= $c) {
$ret .= '\\';
break;
}
if (ctype_xdigit($string[$i])) {
$code = $string[$i];
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
if (!ctype_xdigit($string[$i])) {
break;
}
$code .= $string[$i];
}
// We have to be extremely careful when adding
// new characters, to make sure we're not breaking
// the encoding.
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') {
continue;
}
$ret .= $char;
if ($i < $c && trim($string[$i]) !== '') {
$i--;
}
continue;
}
if ($string[$i] === "\n") {
continue;
}
}
$ret .= $string[$i];
}
return $ret;
}
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -14,20 +14,53 @@
class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
{ {
public function validate($css, $config, $context) { /**
* @param string $css
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($css, $config, $context)
{
$css = $this->parseCDATA($css); $css = $this->parseCDATA($css);
$definition = $config->getCSSDefinition(); $definition = $config->getCSSDefinition();
$allow_duplicates = $config->get("CSS.AllowDuplicates");
// we're going to break the spec and explode by semicolons.
// This is because semicolon rarely appears in escaped form
// Doing this is generally flaky but fast
// IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI
// for details
$declarations = explode(';', $css); // According to the CSS2.1 spec, the places where a
// non-delimiting semicolon can appear are in strings
// escape sequences. So here is some dumb hack to
// handle quotes.
$len = strlen($css);
$accum = "";
$declarations = array();
$quoted = false;
for ($i = 0; $i < $len; $i++) {
$c = strcspn($css, ";'\"", $i);
$accum .= substr($css, $i, $c);
$i += $c;
if ($i == $len) break;
$d = $css[$i];
if ($quoted) {
$accum .= $d;
if ($d == $quoted) {
$quoted = false;
}
} else {
if ($d == ";") {
$declarations[] = $accum;
$accum = "";
} else {
$accum .= $d;
$quoted = $d;
}
}
}
if ($accum != "") $declarations[] = $accum;
$propvalues = array(); $propvalues = array();
$new_declarations = '';
/** /**
* Name of the current CSS property being validated. * Name of the current CSS property being validated.
@@ -36,35 +69,52 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
$context->register('CurrentCSSProperty', $property); $context->register('CurrentCSSProperty', $property);
foreach ($declarations as $declaration) { foreach ($declarations as $declaration) {
if (!$declaration) continue; if (!$declaration) {
if (!strpos($declaration, ':')) continue; continue;
}
if (!strpos($declaration, ':')) {
continue;
}
list($property, $value) = explode(':', $declaration, 2); list($property, $value) = explode(':', $declaration, 2);
$property = trim($property); $property = trim($property);
$value = trim($value); $value = trim($value);
$ok = false; $ok = false;
do { do {
if (isset($definition->info[$property])) { if (isset($definition->info[$property])) {
$ok = true; $ok = true;
break; break;
} }
if (ctype_lower($property)) break; if (ctype_lower($property)) {
break;
}
$property = strtolower($property); $property = strtolower($property);
if (isset($definition->info[$property])) { if (isset($definition->info[$property])) {
$ok = true; $ok = true;
break; break;
} }
} while(0); } while (0);
if (!$ok) continue; if (!$ok) {
continue;
}
// inefficient call, since the validator will do this again // inefficient call, since the validator will do this again
if (strtolower(trim($value)) !== 'inherit') { if (strtolower(trim($value)) !== 'inherit') {
// inherit works for everything (but only on the base property) // inherit works for everything (but only on the base property)
$result = $definition->info[$property]->validate( $result = $definition->info[$property]->validate(
$value, $config, $context ); $value,
$config,
$context
);
} else { } else {
$result = 'inherit'; $result = 'inherit';
} }
if ($result === false) continue; if ($result === false) {
$propvalues[$property] = $result; continue;
}
if ($allow_duplicates) {
$new_declarations .= "$property:$result;";
} else {
$propvalues[$property] = $result;
}
} }
$context->destroy('CurrentCSSProperty'); $context->destroy('CurrentCSSProperty');
@@ -73,7 +123,6 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
// slightly inefficient, but it's the only way of getting rid of // slightly inefficient, but it's the only way of getting rid of
// duplicates. Perhaps config to optimize it, but not now. // duplicates. Perhaps config to optimize it, but not now.
$new_declarations = '';
foreach ($propvalues as $prop => $value) { foreach ($propvalues as $prop => $value) {
$new_declarations .= "$prop:$value;"; $new_declarations .= "$prop:$value;";
} }

View File

@@ -3,19 +3,32 @@
class HTMLPurifier_AttrDef_CSS_AlphaValue extends HTMLPurifier_AttrDef_CSS_Number class HTMLPurifier_AttrDef_CSS_AlphaValue extends HTMLPurifier_AttrDef_CSS_Number
{ {
public function __construct() { public function __construct()
{
parent::__construct(false); // opacity is non-negative, but we will clamp it parent::__construct(false); // opacity is non-negative, but we will clamp it
} }
public function validate($number, $config, $context) { /**
* @param string $number
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return string
*/
public function validate($number, $config, $context)
{
$result = parent::validate($number, $config, $context); $result = parent::validate($number, $config, $context);
if ($result === false) return $result; if ($result === false) {
$float = (float) $result; return $result;
if ($float < 0.0) $result = '0'; }
if ($float > 1.0) $result = '1'; $float = (float)$result;
if ($float < 0.0) {
$result = '0';
}
if ($float > 1.0) {
$result = '1';
}
return $result; return $result;
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -9,11 +9,16 @@ class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
/** /**
* Local copy of component validators. * Local copy of component validators.
* @type HTMLPurifier_AttrDef[]
* @note See HTMLPurifier_AttrDef_Font::$info for a similar impl. * @note See HTMLPurifier_AttrDef_Font::$info for a similar impl.
*/ */
protected $info; protected $info;
public function __construct($config) { /**
* @param HTMLPurifier_Config $config
*/
public function __construct($config)
{
$def = $config->getCSSDefinition(); $def = $config->getCSSDefinition();
$this->info['background-color'] = $def->info['background-color']; $this->info['background-color'] = $def->info['background-color'];
$this->info['background-image'] = $def->info['background-image']; $this->info['background-image'] = $def->info['background-image'];
@@ -22,40 +27,55 @@ class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
$this->info['background-position'] = $def->info['background-position']; $this->info['background-position'] = $def->info['background-position'];
} }
public function validate($string, $config, $context) { /**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
// regular pre-processing // regular pre-processing
$string = $this->parseCDATA($string); $string = $this->parseCDATA($string);
if ($string === '') return false; if ($string === '') {
return false;
}
// munge rgb() decl if necessary // munge rgb() decl if necessary
$string = $this->mungeRgb($string); $string = $this->mungeRgb($string);
// assumes URI doesn't have spaces in it // assumes URI doesn't have spaces in it
$bits = explode(' ', strtolower($string)); // bits to process $bits = explode(' ', $string); // bits to process
$caught = array(); $caught = array();
$caught['color'] = false; $caught['color'] = false;
$caught['image'] = false; $caught['image'] = false;
$caught['repeat'] = false; $caught['repeat'] = false;
$caught['attachment'] = false; $caught['attachment'] = false;
$caught['position'] = false; $caught['position'] = false;
$i = 0; // number of catches $i = 0; // number of catches
$none = false;
foreach ($bits as $bit) { foreach ($bits as $bit) {
if ($bit === '') continue; if ($bit === '') {
continue;
}
foreach ($caught as $key => $status) { foreach ($caught as $key => $status) {
if ($key != 'position') { if ($key != 'position') {
if ($status !== false) continue; if ($status !== false) {
continue;
}
$r = $this->info['background-' . $key]->validate($bit, $config, $context); $r = $this->info['background-' . $key]->validate($bit, $config, $context);
} else { } else {
$r = $bit; $r = $bit;
} }
if ($r === false) continue; if ($r === false) {
continue;
}
if ($key == 'position') { if ($key == 'position') {
if ($caught[$key] === false) $caught[$key] = ''; if ($caught[$key] === false) {
$caught[$key] = '';
}
$caught[$key] .= $r . ' '; $caught[$key] .= $r . ' ';
} else { } else {
$caught[$key] = $r; $caught[$key] = $r;
@@ -65,7 +85,9 @@ class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
} }
} }
if (!$i) return false; if (!$i) {
return false;
}
if ($caught['position'] !== false) { if ($caught['position'] !== false) {
$caught['position'] = $this->info['background-position']-> $caught['position'] = $this->info['background-position']->
validate($caught['position'], $config, $context); validate($caught['position'], $config, $context);
@@ -73,15 +95,17 @@ class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
$ret = array(); $ret = array();
foreach ($caught as $value) { foreach ($caught as $value) {
if ($value === false) continue; if ($value === false) {
continue;
}
$ret[] = $value; $ret[] = $value;
} }
if (empty($ret)) return false; if (empty($ret)) {
return false;
}
return implode(' ', $ret); return implode(' ', $ret);
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -44,22 +44,38 @@
class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
{ {
/**
* @type HTMLPurifier_AttrDef_CSS_Length
*/
protected $length; protected $length;
/**
* @type HTMLPurifier_AttrDef_CSS_Percentage
*/
protected $percentage; protected $percentage;
public function __construct() { public function __construct()
$this->length = new HTMLPurifier_AttrDef_CSS_Length(); {
$this->length = new HTMLPurifier_AttrDef_CSS_Length();
$this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage(); $this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage();
} }
public function validate($string, $config, $context) { /**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$string = $this->parseCDATA($string); $string = $this->parseCDATA($string);
$bits = explode(' ', $string); $bits = explode(' ', $string);
$keywords = array(); $keywords = array();
$keywords['h'] = false; // left, right $keywords['h'] = false; // left, right
$keywords['v'] = false; // top, bottom $keywords['v'] = false; // top, bottom
$keywords['c'] = false; // center $keywords['ch'] = false; // center (first word)
$keywords['cv'] = false; // center (second word)
$measures = array(); $measures = array();
$i = 0; $i = 0;
@@ -73,12 +89,21 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
); );
foreach ($bits as $bit) { foreach ($bits as $bit) {
if ($bit === '') continue; if ($bit === '') {
continue;
}
// test for keyword // test for keyword
$lbit = ctype_lower($bit) ? $bit : strtolower($bit); $lbit = ctype_lower($bit) ? $bit : strtolower($bit);
if (isset($lookup[$lbit])) { if (isset($lookup[$lbit])) {
$status = $lookup[$lbit]; $status = $lookup[$lbit];
if ($status == 'c') {
if ($i == 0) {
$status = 'ch';
} else {
$status = 'cv';
}
}
$keywords[$status] = $lbit; $keywords[$status] = $lbit;
$i++; $i++;
} }
@@ -96,31 +121,37 @@ class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef
$measures[] = $r; $measures[] = $r;
$i++; $i++;
} }
} }
if (!$i) return false; // no valid values were caught if (!$i) {
return false;
} // no valid values were caught
$ret = array(); $ret = array();
// first keyword // first keyword
if ($keywords['h']) $ret[] = $keywords['h']; if ($keywords['h']) {
elseif (count($measures)) $ret[] = array_shift($measures); $ret[] = $keywords['h'];
elseif ($keywords['c']) { } elseif ($keywords['ch']) {
$ret[] = $keywords['c']; $ret[] = $keywords['ch'];
$keywords['c'] = false; // prevent re-use: center = center center $keywords['cv'] = false; // prevent re-use: center = center center
} elseif (count($measures)) {
$ret[] = array_shift($measures);
} }
if ($keywords['v']) $ret[] = $keywords['v']; if ($keywords['v']) {
elseif (count($measures)) $ret[] = array_shift($measures); $ret[] = $keywords['v'];
elseif ($keywords['c']) $ret[] = $keywords['c']; } elseif ($keywords['cv']) {
$ret[] = $keywords['cv'];
} elseif (count($measures)) {
$ret[] = array_shift($measures);
}
if (empty($ret)) return false; if (empty($ret)) {
return false;
}
return implode(' ', $ret); return implode(' ', $ret);
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -8,17 +8,29 @@ class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
/** /**
* Local copy of properties this property is shorthand for. * Local copy of properties this property is shorthand for.
* @type HTMLPurifier_AttrDef[]
*/ */
protected $info = array(); protected $info = array();
public function __construct($config) { /**
* @param HTMLPurifier_Config $config
*/
public function __construct($config)
{
$def = $config->getCSSDefinition(); $def = $config->getCSSDefinition();
$this->info['border-width'] = $def->info['border-width']; $this->info['border-width'] = $def->info['border-width'];
$this->info['border-style'] = $def->info['border-style']; $this->info['border-style'] = $def->info['border-style'];
$this->info['border-top-color'] = $def->info['border-top-color']; $this->info['border-top-color'] = $def->info['border-top-color'];
} }
public function validate($string, $config, $context) { /**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$string = $this->parseCDATA($string); $string = $this->parseCDATA($string);
$string = $this->mungeRgb($string); $string = $this->mungeRgb($string);
$bits = explode(' ', $string); $bits = explode(' ', $string);
@@ -26,7 +38,9 @@ class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
$ret = ''; // return value $ret = ''; // return value
foreach ($bits as $bit) { foreach ($bits as $bit) {
foreach ($this->info as $propname => $validator) { foreach ($this->info as $propname => $validator) {
if (isset($done[$propname])) continue; if (isset($done[$propname])) {
continue;
}
$r = $validator->validate($bit, $config, $context); $r = $validator->validate($bit, $config, $context);
if ($r !== false) { if ($r !== false) {
$ret .= $r . ' '; $ret .= $r . ' ';
@@ -37,7 +51,6 @@ class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
} }
return rtrim($ret); return rtrim($ret);
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -6,56 +6,137 @@
class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
{ {
public function validate($color, $config, $context) { /**
* @type HTMLPurifier_AttrDef_CSS_AlphaValue
*/
protected $alpha;
public function __construct()
{
$this->alpha = new HTMLPurifier_AttrDef_CSS_AlphaValue();
}
/**
* @param string $color
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($color, $config, $context)
{
static $colors = null; static $colors = null;
if ($colors === null) $colors = $config->get('Core', 'ColorKeywords'); if ($colors === null) {
$colors = $config->get('Core.ColorKeywords');
}
$color = trim($color); $color = trim($color);
if ($color === '') return false; if ($color === '') {
return false;
}
$lower = strtolower($color); $lower = strtolower($color);
if (isset($colors[$lower])) return $colors[$lower]; if (isset($colors[$lower])) {
return $colors[$lower];
}
if (strpos($color, 'rgb(') !== false) { if (preg_match('#(rgb|rgba|hsl|hsla)\(#', $color, $matches) === 1) {
// rgb literal handling
$length = strlen($color); $length = strlen($color);
if (strpos($color, ')') !== $length - 1) return false; if (strpos($color, ')') !== $length - 1) {
$triad = substr($color, 4, $length - 4 - 1); return false;
$parts = explode(',', $triad); }
if (count($parts) !== 3) return false;
$type = false; // to ensure that they're all the same type // get used function : rgb, rgba, hsl or hsla
$function = $matches[1];
$parameters_size = 3;
$alpha_channel = false;
if (substr($function, -1) === 'a') {
$parameters_size = 4;
$alpha_channel = true;
}
/*
* Allowed types for values :
* parameter_position => [type => max_value]
*/
$allowed_types = array(
1 => array('percentage' => 100, 'integer' => 255),
2 => array('percentage' => 100, 'integer' => 255),
3 => array('percentage' => 100, 'integer' => 255),
);
$allow_different_types = false;
if (strpos($function, 'hsl') !== false) {
$allowed_types = array(
1 => array('integer' => 360),
2 => array('percentage' => 100),
3 => array('percentage' => 100),
);
$allow_different_types = true;
}
$values = trim(str_replace($function, '', $color), ' ()');
$parts = explode(',', $values);
if (count($parts) !== $parameters_size) {
return false;
}
$type = false;
$new_parts = array(); $new_parts = array();
$i = 0;
foreach ($parts as $part) { foreach ($parts as $part) {
$i++;
$part = trim($part); $part = trim($part);
if ($part === '') return false;
$length = strlen($part); if ($part === '') {
if ($part[$length - 1] === '%') { return false;
// handle percents }
if (!$type) {
$type = 'percentage'; // different check for alpha channel
} elseif ($type !== 'percentage') { if ($alpha_channel === true && $i === count($parts)) {
$result = $this->alpha->validate($part, $config, $context);
if ($result === false) {
return false; return false;
} }
$num = (float) substr($part, 0, $length - 1);
if ($num < 0) $num = 0; $new_parts[] = (string)$result;
if ($num > 100) $num = 100; continue;
$new_parts[] = "$num%"; }
if (substr($part, -1) === '%') {
$current_type = 'percentage';
} else { } else {
// handle integers $current_type = 'integer';
if (!$type) { }
$type = 'integer';
} elseif ($type !== 'integer') { if (!array_key_exists($current_type, $allowed_types[$i])) {
return false; return false;
} }
$num = (int) $part;
if ($num < 0) $num = 0; if (!$type) {
if ($num > 255) $num = 255; $type = $current_type;
$new_parts[] = (string) $num; }
if ($allow_different_types === false && $type != $current_type) {
return false;
}
$max_value = $allowed_types[$i][$current_type];
if ($current_type == 'integer') {
// Return value between range 0 -> $max_value
$new_parts[] = (int)max(min($part, $max_value), 0);
} elseif ($current_type == 'percentage') {
$new_parts[] = (float)max(min(rtrim($part, '%'), $max_value), 0) . '%';
} }
} }
$new_triad = implode(',', $new_parts);
$color = "rgb($new_triad)"; $new_values = implode(',', $new_parts);
$color = $function . '(' . $new_values . ')';
} else { } else {
// hexadecimal handling // hexadecimal handling
if ($color[0] === '#') { if ($color[0] === '#') {
@@ -65,12 +146,14 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
$color = '#' . $color; $color = '#' . $color;
} }
$length = strlen($hex); $length = strlen($hex);
if ($length !== 3 && $length !== 6) return false; if ($length !== 3 && $length !== 6) {
if (!ctype_xdigit($hex)) return false; return false;
}
if (!ctype_xdigit($hex)) {
return false;
}
} }
return $color; return $color;
} }
} }

View File

@@ -13,26 +13,36 @@ class HTMLPurifier_AttrDef_CSS_Composite extends HTMLPurifier_AttrDef
{ {
/** /**
* List of HTMLPurifier_AttrDef objects that may process strings * List of objects that may process strings.
* @type HTMLPurifier_AttrDef[]
* @todo Make protected * @todo Make protected
*/ */
public $defs; public $defs;
/** /**
* @param $defs List of HTMLPurifier_AttrDef objects * @param HTMLPurifier_AttrDef[] $defs List of HTMLPurifier_AttrDef objects
*/ */
public function __construct($defs) { public function __construct($defs)
{
$this->defs = $defs; $this->defs = $defs;
} }
public function validate($string, $config, $context) { /**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
foreach ($this->defs as $i => $def) { foreach ($this->defs as $i => $def) {
$result = $this->defs[$i]->validate($string, $config, $context); $result = $this->defs[$i]->validate($string, $config, $context);
if ($result !== false) return $result; if ($result !== false) {
return $result;
}
} }
return false; return false;
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -5,22 +5,38 @@
*/ */
class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef
{ {
public $def, $element; /**
* @type HTMLPurifier_AttrDef
*/
public $def;
/**
* @type string
*/
public $element;
/** /**
* @param $def Definition to wrap * @param HTMLPurifier_AttrDef $def Definition to wrap
* @param $element Element to deny * @param string $element Element to deny
*/ */
public function __construct($def, $element) { public function __construct($def, $element)
{
$this->def = $def; $this->def = $def;
$this->element = $element; $this->element = $element;
} }
/** /**
* Checks if CurrentToken is set and equal to $this->element * Checks if CurrentToken is set and equal to $this->element
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/ */
public function validate($string, $config, $context) { public function validate($string, $config, $context)
{
$token = $context->get('CurrentToken', true); $token = $context->get('CurrentToken', true);
if ($token && $token->name == $this->element) return false; if ($token && $token->name == $this->element) {
return false;
}
return $this->def->validate($string, $config, $context); return $this->def->validate($string, $config, $context);
} }
} }

View File

@@ -7,23 +7,37 @@
*/ */
class HTMLPurifier_AttrDef_CSS_Filter extends HTMLPurifier_AttrDef class HTMLPurifier_AttrDef_CSS_Filter extends HTMLPurifier_AttrDef
{ {
/**
* @type HTMLPurifier_AttrDef_Integer
*/
protected $intValidator; protected $intValidator;
public function __construct() { public function __construct()
{
$this->intValidator = new HTMLPurifier_AttrDef_Integer(); $this->intValidator = new HTMLPurifier_AttrDef_Integer();
} }
public function validate($value, $config, $context) { /**
* @param string $value
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($value, $config, $context)
{
$value = $this->parseCDATA($value); $value = $this->parseCDATA($value);
if ($value === 'none') return $value; if ($value === 'none') {
return $value;
}
// if we looped this we could support multiple filters // if we looped this we could support multiple filters
$function_length = strcspn($value, '('); $function_length = strcspn($value, '(');
$function = trim(substr($value, 0, $function_length)); $function = trim(substr($value, 0, $function_length));
if ($function !== 'alpha' && if ($function !== 'alpha' &&
$function !== 'Alpha' && $function !== 'Alpha' &&
$function !== 'progid:DXImageTransform.Microsoft.Alpha' $function !== 'progid:DXImageTransform.Microsoft.Alpha'
) return false; ) {
return false;
}
$cursor = $function_length + 1; $cursor = $function_length + 1;
$parameters_length = strcspn($value, ')', $cursor); $parameters_length = strcspn($value, ')', $cursor);
$parameters = substr($value, $cursor, $parameters_length); $parameters = substr($value, $cursor, $parameters_length);
@@ -32,15 +46,25 @@ class HTMLPurifier_AttrDef_CSS_Filter extends HTMLPurifier_AttrDef
$lookup = array(); $lookup = array();
foreach ($params as $param) { foreach ($params as $param) {
list($key, $value) = explode('=', $param); list($key, $value) = explode('=', $param);
$key = trim($key); $key = trim($key);
$value = trim($value); $value = trim($value);
if (isset($lookup[$key])) continue; if (isset($lookup[$key])) {
if ($key !== 'opacity') continue; continue;
}
if ($key !== 'opacity') {
continue;
}
$value = $this->intValidator->validate($value, $config, $context); $value = $this->intValidator->validate($value, $config, $context);
if ($value === false) continue; if ($value === false) {
$int = (int) $value; continue;
if ($int > 100) $value = '100'; }
if ($int < 0) $value = '0'; $int = (int)$value;
if ($int > 100) {
$value = '100';
}
if ($int < 0) {
$value = '0';
}
$ret_params[] = "$key=$value"; $ret_params[] = "$key=$value";
$lookup[$key] = true; $lookup[$key] = true;
} }
@@ -48,7 +72,6 @@ class HTMLPurifier_AttrDef_CSS_Filter extends HTMLPurifier_AttrDef
$ret_function = "$function($ret_parameters)"; $ret_function = "$function($ret_parameters)";
return $ret_function; return $ret_function;
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -7,8 +7,8 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
{ {
/** /**
* Local copy of component validators. * Local copy of validators
* * @type HTMLPurifier_AttrDef[]
* @note If we moved specific CSS property definitions to their own * @note If we moved specific CSS property definitions to their own
* classes instead of having them be assembled at run time by * classes instead of having them be assembled at run time by
* CSSDefinition, this wouldn't be necessary. We'd instantiate * CSSDefinition, this wouldn't be necessary. We'd instantiate
@@ -16,18 +16,28 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
*/ */
protected $info = array(); protected $info = array();
public function __construct($config) { /**
* @param HTMLPurifier_Config $config
*/
public function __construct($config)
{
$def = $config->getCSSDefinition(); $def = $config->getCSSDefinition();
$this->info['font-style'] = $def->info['font-style']; $this->info['font-style'] = $def->info['font-style'];
$this->info['font-variant'] = $def->info['font-variant']; $this->info['font-variant'] = $def->info['font-variant'];
$this->info['font-weight'] = $def->info['font-weight']; $this->info['font-weight'] = $def->info['font-weight'];
$this->info['font-size'] = $def->info['font-size']; $this->info['font-size'] = $def->info['font-size'];
$this->info['line-height'] = $def->info['line-height']; $this->info['line-height'] = $def->info['line-height'];
$this->info['font-family'] = $def->info['font-family']; $this->info['font-family'] = $def->info['font-family'];
} }
public function validate($string, $config, $context) { /**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
static $system_fonts = array( static $system_fonts = array(
'caption' => true, 'caption' => true,
'icon' => true, 'icon' => true,
@@ -39,7 +49,9 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
// regular pre-processing // regular pre-processing
$string = $this->parseCDATA($string); $string = $this->parseCDATA($string);
if ($string === '') return false; if ($string === '') {
return false;
}
// check if it's one of the keywords // check if it's one of the keywords
$lowercase_string = strtolower($string); $lowercase_string = strtolower($string);
@@ -54,15 +66,20 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
$final = ''; // output $final = ''; // output
for ($i = 0, $size = count($bits); $i < $size; $i++) { for ($i = 0, $size = count($bits); $i < $size; $i++) {
if ($bits[$i] === '') continue; if ($bits[$i] === '') {
continue;
}
switch ($stage) { switch ($stage) {
case 0: // attempting to catch font-style, font-variant or font-weight
// attempting to catch font-style, font-variant or font-weight
case 0:
foreach ($stage_1 as $validator_name) { foreach ($stage_1 as $validator_name) {
if (isset($caught[$validator_name])) continue; if (isset($caught[$validator_name])) {
continue;
}
$r = $this->info[$validator_name]->validate( $r = $this->info[$validator_name]->validate(
$bits[$i], $config, $context); $bits[$i],
$config,
$context
);
if ($r !== false) { if ($r !== false) {
$final .= $r . ' '; $final .= $r . ' ';
$caught[$validator_name] = true; $caught[$validator_name] = true;
@@ -70,15 +87,17 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
} }
} }
// all three caught, continue on // all three caught, continue on
if (count($caught) >= 3) $stage = 1; if (count($caught) >= 3) {
if ($r !== false) break; $stage = 1;
}
// attempting to catch font-size and perhaps line-height if ($r !== false) {
case 1: break;
}
case 1: // attempting to catch font-size and perhaps line-height
$found_slash = false; $found_slash = false;
if (strpos($bits[$i], '/') !== false) { if (strpos($bits[$i], '/') !== false) {
list($font_size, $line_height) = list($font_size, $line_height) =
explode('/', $bits[$i]); explode('/', $bits[$i]);
if ($line_height === '') { if ($line_height === '') {
// ooh, there's a space after the slash! // ooh, there's a space after the slash!
$line_height = false; $line_height = false;
@@ -89,14 +108,19 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
$line_height = false; $line_height = false;
} }
$r = $this->info['font-size']->validate( $r = $this->info['font-size']->validate(
$font_size, $config, $context); $font_size,
$config,
$context
);
if ($r !== false) { if ($r !== false) {
$final .= $r; $final .= $r;
// attempt to catch line-height // attempt to catch line-height
if ($line_height === false) { if ($line_height === false) {
// we need to scroll forward // we need to scroll forward
for ($j = $i + 1; $j < $size; $j++) { for ($j = $i + 1; $j < $size; $j++) {
if ($bits[$j] === '') continue; if ($bits[$j] === '') {
continue;
}
if ($bits[$j] === '/') { if ($bits[$j] === '/') {
if ($found_slash) { if ($found_slash) {
return false; return false;
@@ -116,7 +140,10 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
if ($found_slash) { if ($found_slash) {
$i = $j; $i = $j;
$r = $this->info['line-height']->validate( $r = $this->info['line-height']->validate(
$line_height, $config, $context); $line_height,
$config,
$context
);
if ($r !== false) { if ($r !== false) {
$final .= '/' . $r; $final .= '/' . $r;
} }
@@ -126,13 +153,14 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
break; break;
} }
return false; return false;
case 2: // attempting to catch font-family
// attempting to catch font-family
case 2:
$font_family = $font_family =
implode(' ', array_slice($bits, $i, $size - $i)); implode(' ', array_slice($bits, $i, $size - $i));
$r = $this->info['font-family']->validate( $r = $this->info['font-family']->validate(
$font_family, $config, $context); $font_family,
$config,
$context
);
if ($r !== false) { if ($r !== false) {
$final .= $r . ' '; $final .= $r . ' ';
// processing completed successfully // processing completed successfully
@@ -143,7 +171,6 @@ class HTMLPurifier_AttrDef_CSS_Font extends HTMLPurifier_AttrDef
} }
return false; return false;
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -2,12 +2,58 @@
/** /**
* Validates a font family list according to CSS spec * Validates a font family list according to CSS spec
* @todo whitelisting allowed fonts would be nice
*/ */
class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
{ {
public function validate($string, $config, $context) { protected $mask = null;
public function __construct()
{
$this->mask = '_- ';
for ($c = 'a'; $c <= 'z'; $c++) {
$this->mask .= $c;
}
for ($c = 'A'; $c <= 'Z'; $c++) {
$this->mask .= $c;
}
for ($c = '0'; $c <= '9'; $c++) {
$this->mask .= $c;
} // cast-y, but should be fine
// special bytes used by UTF-8
for ($i = 0x80; $i <= 0xFF; $i++) {
// We don't bother excluding invalid bytes in this range,
// because the our restriction of well-formed UTF-8 will
// prevent these from ever occurring.
$this->mask .= chr($i);
}
/*
PHP's internal strcspn implementation is
O(length of string * length of mask), making it inefficient
for large masks. However, it's still faster than
preg_match 8)
for (p = s1;;) {
spanp = s2;
do {
if (*spanp == c || p == s1_end) {
return p - s1;
}
} while (spanp++ < (s2_end - 1));
c = *++p;
}
*/
// possible optimization: invert the mask.
}
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
static $generic_names = array( static $generic_names = array(
'serif' => true, 'serif' => true,
'sans-serif' => true, 'sans-serif' => true,
@@ -15,73 +61,156 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
'fantasy' => true, 'fantasy' => true,
'cursive' => true 'cursive' => true
); );
$allowed_fonts = $config->get('CSS.AllowedFonts');
// assume that no font names contain commas in them // assume that no font names contain commas in them
$fonts = explode(',', $string); $fonts = explode(',', $string);
$final = ''; $final = '';
foreach($fonts as $font) { foreach ($fonts as $font) {
$font = trim($font); $font = trim($font);
if ($font === '') continue; if ($font === '') {
continue;
}
// match a generic name // match a generic name
if (isset($generic_names[$font])) { if (isset($generic_names[$font])) {
$final .= $font . ', '; if ($allowed_fonts === null || isset($allowed_fonts[$font])) {
$final .= $font . ', ';
}
continue; continue;
} }
// match a quoted name // match a quoted name
if ($font[0] === '"' || $font[0] === "'") { if ($font[0] === '"' || $font[0] === "'") {
$length = strlen($font); $length = strlen($font);
if ($length <= 2) continue; if ($length <= 2) {
$quote = $font[0]; continue;
if ($font[$length - 1] !== $quote) continue;
$font = substr($font, 1, $length - 2);
$new_font = '';
for ($i = 0, $c = strlen($font); $i < $c; $i++) {
if ($font[$i] === '\\') {
$i++;
if ($i >= $c) {
$new_font .= '\\';
break;
}
if (ctype_xdigit($font[$i])) {
$code = $font[$i];
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
if (!ctype_xdigit($font[$i])) break;
$code .= $font[$i];
}
// We have to be extremely careful when adding
// new characters, to make sure we're not breaking
// the encoding.
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
$new_font .= $char;
if ($i < $c && trim($font[$i]) !== '') $i--;
continue;
}
if ($font[$i] === "\n") continue;
}
$new_font .= $font[$i];
} }
$quote = $font[0];
$font = $new_font; if ($font[$length - 1] !== $quote) {
continue;
}
$font = substr($font, 1, $length - 2);
} }
$font = $this->expandCSSEscape($font);
// $font is a pure representation of the font name // $font is a pure representation of the font name
if ($allowed_fonts !== null && !isset($allowed_fonts[$font])) {
continue;
}
if (ctype_alnum($font) && $font !== '') { if (ctype_alnum($font) && $font !== '') {
// very simple font, allow it in unharmed // very simple font, allow it in unharmed
$final .= $font . ', '; $final .= $font . ', ';
continue; continue;
} }
// complicated font, requires quoting // bugger out on whitespace. form feed (0C) really
// shouldn't show up regardless
$font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font);
// armor single quotes and new lines // Here, there are various classes of characters which need
$font = str_replace("\\", "\\\\", $font); // to be treated differently:
$font = str_replace("'", "\\'", $font); // - Alphanumeric characters are essentially safe. We
// handled these above.
// - Spaces require quoting, though most parsers will do
// the right thing if there aren't any characters that
// can be misinterpreted
// - Dashes rarely occur, but they fairly unproblematic
// for parsing/rendering purposes.
// The above characters cover the majority of Western font
// names.
// - Arbitrary Unicode characters not in ASCII. Because
// most parsers give little thought to Unicode, treatment
// of these codepoints is basically uniform, even for
// punctuation-like codepoints. These characters can
// show up in non-Western pages and are supported by most
// major browsers, for example: " 明朝" is a
// legitimate font-name
// <http://ja.wikipedia.org/wiki/MS_明朝>. See
// the CSS3 spec for more examples:
// <http://www.w3.org/TR/2011/WD-css3-fonts-20110324/localizedfamilynames.png>
// You can see live samples of these on the Internet:
// <http://www.google.co.jp/search?q=font-family++明朝|ゴシック>
// However, most of these fonts have ASCII equivalents:
// for example, 'MS Mincho', and it's considered
// professional to use ASCII font names instead of
// Unicode font names. Thanks Takeshi Terada for
// providing this information.
// The following characters, to my knowledge, have not been
// used to name font names.
// - Single quote. While theoretically you might find a
// font name that has a single quote in its name (serving
// as an apostrophe, e.g. Dave's Scribble), I haven't
// been able to find any actual examples of this.
// Internet Explorer's cssText translation (which I
// believe is invoked by innerHTML) normalizes any
// quoting to single quotes, and fails to escape single
// quotes. (Note that this is not IE's behavior for all
// CSS properties, just some sort of special casing for
// font-family). So a single quote *cannot* be used
// safely in the font-family context if there will be an
// innerHTML/cssText translation. Note that Firefox 3.x
// does this too.
// - Double quote. In IE, these get normalized to
// single-quotes, no matter what the encoding. (Fun
// fact, in IE8, the 'content' CSS property gained
// support, where they special cased to preserve encoded
// double quotes, but still translate unadorned double
// quotes into single quotes.) So, because their
// fixpoint behavior is identical to single quotes, they
// cannot be allowed either. Firefox 3.x displays
// single-quote style behavior.
// - Backslashes are reduced by one (so \\ -> \) every
// iteration, so they cannot be used safely. This shows
// up in IE7, IE8 and FF3
// - Semicolons, commas and backticks are handled properly.
// - The rest of the ASCII punctuation is handled properly.
// We haven't checked what browsers do to unadorned
// versions, but this is not important as long as the
// browser doesn't /remove/ surrounding quotes (as IE does
// for HTML).
//
// With these results in hand, we conclude that there are
// various levels of safety:
// - Paranoid: alphanumeric, spaces and dashes(?)
// - International: Paranoid + non-ASCII Unicode
// - Edgy: Everything except quotes, backslashes
// - NoJS: Standards compliance, e.g. sod IE. Note that
// with some judicious character escaping (since certain
// types of escaping doesn't work) this is theoretically
// OK as long as innerHTML/cssText is not called.
// We believe that international is a reasonable default
// (that we will implement now), and once we do more
// extensive research, we may feel comfortable with dropping
// it down to edgy.
// Edgy: alphanumeric, spaces, dashes, underscores and Unicode. Use of
// str(c)spn assumes that the string was already well formed
// Unicode (which of course it is).
if (strspn($font, $this->mask) !== strlen($font)) {
continue;
}
// Historical:
// In the absence of innerHTML/cssText, these ugly
// transforms don't pose a security risk (as \\ and \"
// might--these escapes are not supported by most browsers).
// We could try to be clever and use single-quote wrapping
// when there is a double quote present, but I have choosen
// not to implement that. (NOTE: you can reduce the amount
// of escapes by one depending on what quoting style you use)
// $font = str_replace('\\', '\\5C ', $font);
// $font = str_replace('"', '\\22 ', $font);
// $font = str_replace("'", '\\27 ', $font);
// font possibly with spaces, requires quoting
$final .= "'$font', "; $final .= "'$font', ";
} }
$final = rtrim($final, ', '); $final = rtrim($final, ', ');
if ($final === '') return false; if ($final === '') {
return false;
}
return $final; return $final;
} }

View File

@@ -0,0 +1,32 @@
<?php
/**
* Validates based on {ident} CSS grammar production
*/
class HTMLPurifier_AttrDef_CSS_Ident extends HTMLPurifier_AttrDef
{
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$string = trim($string);
// early abort: '' and '0' (strings that convert to false) are invalid
if (!$string) {
return false;
}
$pattern = '/^(-?[A-Za-z_][A-Za-z_\-0-9]*)$/';
if (!preg_match($pattern, $string)) {
return false;
}
return $string;
}
}
// vim: et sw=4 sts=4

View File

@@ -5,20 +5,34 @@
*/ */
class HTMLPurifier_AttrDef_CSS_ImportantDecorator extends HTMLPurifier_AttrDef class HTMLPurifier_AttrDef_CSS_ImportantDecorator extends HTMLPurifier_AttrDef
{ {
public $def, $allow; /**
* @type HTMLPurifier_AttrDef
*/
public $def;
/**
* @type bool
*/
public $allow;
/** /**
* @param $def Definition to wrap * @param HTMLPurifier_AttrDef $def Definition to wrap
* @param $allow Whether or not to allow !important * @param bool $allow Whether or not to allow !important
*/ */
public function __construct($def, $allow = false) { public function __construct($def, $allow = false)
{
$this->def = $def; $this->def = $def;
$this->allow = $allow; $this->allow = $allow;
} }
/** /**
* Intercepts and removes !important if necessary * Intercepts and removes !important if necessary
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/ */
public function validate($string, $config, $context) { public function validate($string, $config, $context)
{
// test for ! and important tokens // test for ! and important tokens
$string = trim($string); $string = trim($string);
$is_important = false; $is_important = false;
@@ -32,7 +46,9 @@ class HTMLPurifier_AttrDef_CSS_ImportantDecorator extends HTMLPurifier_AttrDef
} }
} }
$string = $this->def->validate($string, $config, $context); $string = $this->def->validate($string, $config, $context);
if ($this->allow && $is_important) $string .= ' !important'; if ($this->allow && $is_important) {
$string .= ' !important';
}
return $string; return $string;
} }
} }

View File

@@ -6,42 +6,72 @@
class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
{ {
protected $min, $max; /**
* @type HTMLPurifier_Length|string
*/
protected $min;
/** /**
* @param HTMLPurifier_Length $max Minimum length, or null for no bound. String is also acceptable. * @type HTMLPurifier_Length|string
* @param HTMLPurifier_Length $max Maximum length, or null for no bound. String is also acceptable.
*/ */
public function __construct($min = null, $max = null) { protected $max;
/**
* @param HTMLPurifier_Length|string $min Minimum length, or null for no bound. String is also acceptable.
* @param HTMLPurifier_Length|string $max Maximum length, or null for no bound. String is also acceptable.
*/
public function __construct($min = null, $max = null)
{
$this->min = $min !== null ? HTMLPurifier_Length::make($min) : null; $this->min = $min !== null ? HTMLPurifier_Length::make($min) : null;
$this->max = $max !== null ? HTMLPurifier_Length::make($max) : null; $this->max = $max !== null ? HTMLPurifier_Length::make($max) : null;
} }
public function validate($string, $config, $context) { /**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$string = $this->parseCDATA($string); $string = $this->parseCDATA($string);
// Optimizations // Optimizations
if ($string === '') return false; if ($string === '') {
if ($string === '0') return '0'; return false;
if (strlen($string) === 1) return false; }
if ($string === '0') {
return '0';
}
if (strlen($string) === 1) {
return false;
}
$length = HTMLPurifier_Length::make($string); $length = HTMLPurifier_Length::make($string);
if (!$length->isValid()) return false; if (!$length->isValid()) {
return false;
}
if ($this->min) { if ($this->min) {
$c = $length->compareTo($this->min); $c = $length->compareTo($this->min);
if ($c === false) return false; if ($c === false) {
if ($c < 0) return false; return false;
}
if ($c < 0) {
return false;
}
} }
if ($this->max) { if ($this->max) {
$c = $length->compareTo($this->max); $c = $length->compareTo($this->max);
if ($c === false) return false; if ($c === false) {
if ($c > 0) return false; return false;
}
if ($c > 0) {
return false;
}
} }
return $length->toString(); return $length->toString();
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -8,46 +8,72 @@ class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef
{ {
/** /**
* Local copy of component validators. * Local copy of validators.
* @type HTMLPurifier_AttrDef[]
* @note See HTMLPurifier_AttrDef_CSS_Font::$info for a similar impl. * @note See HTMLPurifier_AttrDef_CSS_Font::$info for a similar impl.
*/ */
protected $info; protected $info;
public function __construct($config) { /**
* @param HTMLPurifier_Config $config
*/
public function __construct($config)
{
$def = $config->getCSSDefinition(); $def = $config->getCSSDefinition();
$this->info['list-style-type'] = $def->info['list-style-type']; $this->info['list-style-type'] = $def->info['list-style-type'];
$this->info['list-style-position'] = $def->info['list-style-position']; $this->info['list-style-position'] = $def->info['list-style-position'];
$this->info['list-style-image'] = $def->info['list-style-image']; $this->info['list-style-image'] = $def->info['list-style-image'];
} }
public function validate($string, $config, $context) { /**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
// regular pre-processing // regular pre-processing
$string = $this->parseCDATA($string); $string = $this->parseCDATA($string);
if ($string === '') return false; if ($string === '') {
return false;
}
// assumes URI doesn't have spaces in it // assumes URI doesn't have spaces in it
$bits = explode(' ', strtolower($string)); // bits to process $bits = explode(' ', strtolower($string)); // bits to process
$caught = array(); $caught = array();
$caught['type'] = false; $caught['type'] = false;
$caught['position'] = false; $caught['position'] = false;
$caught['image'] = false; $caught['image'] = false;
$i = 0; // number of catches $i = 0; // number of catches
$none = false; $none = false;
foreach ($bits as $bit) { foreach ($bits as $bit) {
if ($i >= 3) return; // optimization bit if ($i >= 3) {
if ($bit === '') continue; return;
} // optimization bit
if ($bit === '') {
continue;
}
foreach ($caught as $key => $status) { foreach ($caught as $key => $status) {
if ($status !== false) continue; if ($status !== false) {
continue;
}
$r = $this->info['list-style-' . $key]->validate($bit, $config, $context); $r = $this->info['list-style-' . $key]->validate($bit, $config, $context);
if ($r === false) continue; if ($r === false) {
continue;
}
if ($r === 'none') { if ($r === 'none') {
if ($none) continue; if ($none) {
else $none = true; continue;
if ($key == 'image') continue; } else {
$none = true;
}
if ($key == 'image') {
continue;
}
} }
$caught[$key] = $r; $caught[$key] = $r;
$i++; $i++;
@@ -55,24 +81,32 @@ class HTMLPurifier_AttrDef_CSS_ListStyle extends HTMLPurifier_AttrDef
} }
} }
if (!$i) return false; if (!$i) {
return false;
}
$ret = array(); $ret = array();
// construct type // construct type
if ($caught['type']) $ret[] = $caught['type']; if ($caught['type']) {
$ret[] = $caught['type'];
}
// construct image // construct image
if ($caught['image']) $ret[] = $caught['image']; if ($caught['image']) {
$ret[] = $caught['image'];
}
// construct position // construct position
if ($caught['position']) $ret[] = $caught['position']; if ($caught['position']) {
$ret[] = $caught['position'];
}
if (empty($ret)) return false; if (empty($ret)) {
return false;
}
return implode(' ', $ret); return implode(' ', $ret);
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -13,9 +13,9 @@
*/ */
class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef
{ {
/** /**
* Instance of component definition to defer validation to. * Instance of component definition to defer validation to.
* @type HTMLPurifier_AttrDef
* @todo Make protected * @todo Make protected
*/ */
public $single; public $single;
@@ -27,32 +27,45 @@ class HTMLPurifier_AttrDef_CSS_Multiple extends HTMLPurifier_AttrDef
public $max; public $max;
/** /**
* @param $single HTMLPurifier_AttrDef to multiply * @param HTMLPurifier_AttrDef $single HTMLPurifier_AttrDef to multiply
* @param $max Max number of values allowed (usually four) * @param int $max Max number of values allowed (usually four)
*/ */
public function __construct($single, $max = 4) { public function __construct($single, $max = 4)
{
$this->single = $single; $this->single = $single;
$this->max = $max; $this->max = $max;
} }
public function validate($string, $config, $context) { /**
$string = $this->parseCDATA($string); * @param string $string
if ($string === '') return false; * @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$string = $this->mungeRgb($this->parseCDATA($string));
if ($string === '') {
return false;
}
$parts = explode(' ', $string); // parseCDATA replaced \r, \t and \n $parts = explode(' ', $string); // parseCDATA replaced \r, \t and \n
$length = count($parts); $length = count($parts);
$final = ''; $final = '';
for ($i = 0, $num = 0; $i < $length && $num < $this->max; $i++) { for ($i = 0, $num = 0; $i < $length && $num < $this->max; $i++) {
if (ctype_space($parts[$i])) continue; if (ctype_space($parts[$i])) {
continue;
}
$result = $this->single->validate($parts[$i], $config, $context); $result = $this->single->validate($parts[$i], $config, $context);
if ($result !== false) { if ($result !== false) {
$final .= $result . ' '; $final .= $result . ' ';
$num++; $num++;
} }
} }
if ($final === '') return false; if ($final === '') {
return false;
}
return rtrim($final); return rtrim($final);
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -7,32 +7,44 @@ class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
{ {
/** /**
* Bool indicating whether or not only positive values allowed. * Indicates whether or not only positive values are allowed.
* @type bool
*/ */
protected $non_negative = false; protected $non_negative = false;
/** /**
* @param $non_negative Bool indicating whether negatives are forbidden * @param bool $non_negative indicates whether negatives are forbidden
*/ */
public function __construct($non_negative = false) { public function __construct($non_negative = false)
{
$this->non_negative = $non_negative; $this->non_negative = $non_negative;
} }
/** /**
* @param string $number
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return string|bool
* @warning Some contexts do not pass $config, $context. These * @warning Some contexts do not pass $config, $context. These
* variables should not be used without checking HTMLPurifier_Length * variables should not be used without checking HTMLPurifier_Length
*/ */
public function validate($number, $config, $context) { public function validate($number, $config, $context)
{
$number = $this->parseCDATA($number); $number = $this->parseCDATA($number);
if ($number === '') return false; if ($number === '') {
if ($number === '0') return '0'; return false;
}
if ($number === '0') {
return '0';
}
$sign = ''; $sign = '';
switch ($number[0]) { switch ($number[0]) {
case '-': case '-':
if ($this->non_negative) return false; if ($this->non_negative) {
return false;
}
$sign = '-'; $sign = '-';
case '+': case '+':
$number = substr($number, 1); $number = substr($number, 1);
@@ -44,14 +56,20 @@ class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
} }
// Period is the only non-numeric character allowed // Period is the only non-numeric character allowed
if (strpos($number, '.') === false) return false; if (strpos($number, '.') === false) {
return false;
}
list($left, $right) = explode('.', $number, 2); list($left, $right) = explode('.', $number, 2);
if ($left === '' && $right === '') return false; if ($left === '' && $right === '') {
if ($left !== '' && !ctype_digit($left)) return false; return false;
}
if ($left !== '' && !ctype_digit($left)) {
return false;
}
$left = ltrim($left, '0'); $left = ltrim($left, '0');
$right = rtrim($right, '0'); $right = rtrim($right, '0');
if ($right === '') { if ($right === '') {
@@ -59,11 +77,8 @@ class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
} elseif (!ctype_digit($right)) { } elseif (!ctype_digit($right)) {
return false; return false;
} }
return $sign . $left . '.' . $right; return $sign . $left . '.' . $right;
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -7,34 +7,48 @@ class HTMLPurifier_AttrDef_CSS_Percentage extends HTMLPurifier_AttrDef
{ {
/** /**
* Instance of HTMLPurifier_AttrDef_CSS_Number to defer number validation * Instance to defer number validation to.
* @type HTMLPurifier_AttrDef_CSS_Number
*/ */
protected $number_def; protected $number_def;
/** /**
* @param Bool indicating whether to forbid negative values * @param bool $non_negative Whether to forbid negative values
*/ */
public function __construct($non_negative = false) { public function __construct($non_negative = false)
{
$this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative); $this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
} }
public function validate($string, $config, $context) { /**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$string = $this->parseCDATA($string); $string = $this->parseCDATA($string);
if ($string === '') return false; if ($string === '') {
return false;
}
$length = strlen($string); $length = strlen($string);
if ($length === 1) return false; if ($length === 1) {
if ($string[$length - 1] !== '%') return false; return false;
}
if ($string[$length - 1] !== '%') {
return false;
}
$number = substr($string, 0, $length - 1); $number = substr($string, 0, $length - 1);
$number = $this->number_def->validate($number, $config, $context); $number = $this->number_def->validate($number, $config, $context);
if ($number === false) return false; if ($number === false) {
return false;
}
return "$number%"; return "$number%";
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -8,8 +8,14 @@
class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
{ {
public function validate($string, $config, $context) { /**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
static $allowed_values = array( static $allowed_values = array(
'line-through' => true, 'line-through' => true,
'overline' => true, 'overline' => true,
@@ -18,7 +24,9 @@ class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
$string = strtolower($this->parseCDATA($string)); $string = strtolower($this->parseCDATA($string));
if ($string === 'none') return $string; if ($string === 'none') {
return $string;
}
$parts = explode(' ', $string); $parts = explode(' ', $string);
$final = ''; $final = '';
@@ -28,11 +36,11 @@ class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
} }
} }
$final = rtrim($final); $final = rtrim($final);
if ($final === '') return false; if ($final === '') {
return false;
}
return $final; return $final;
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -12,45 +12,66 @@
class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
{ {
public function __construct() { public function __construct()
{
parent::__construct(true); // always embedded parent::__construct(true); // always embedded
} }
public function validate($uri_string, $config, $context) { /**
* @param string $uri_string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($uri_string, $config, $context)
{
// parse the URI out of the string and then pass it onto // parse the URI out of the string and then pass it onto
// the parent object // the parent object
$uri_string = $this->parseCDATA($uri_string); $uri_string = $this->parseCDATA($uri_string);
if (strpos($uri_string, 'url(') !== 0) return false; if (strpos($uri_string, 'url(') !== 0) {
return false;
}
$uri_string = substr($uri_string, 4); $uri_string = substr($uri_string, 4);
if (strlen($uri_string) == 0) {
return false;
}
$new_length = strlen($uri_string) - 1; $new_length = strlen($uri_string) - 1;
if ($uri_string[$new_length] != ')') return false; if ($uri_string[$new_length] != ')') {
return false;
}
$uri = trim(substr($uri_string, 0, $new_length)); $uri = trim(substr($uri_string, 0, $new_length));
if (!empty($uri) && ($uri[0] == "'" || $uri[0] == '"')) { if (!empty($uri) && ($uri[0] == "'" || $uri[0] == '"')) {
$quote = $uri[0]; $quote = $uri[0];
$new_length = strlen($uri) - 1; $new_length = strlen($uri) - 1;
if ($uri[$new_length] !== $quote) return false; if ($uri[$new_length] !== $quote) {
return false;
}
$uri = substr($uri, 1, $new_length - 1); $uri = substr($uri, 1, $new_length - 1);
} }
$keys = array( '(', ')', ',', ' ', '"', "'"); $uri = $this->expandCSSEscape($uri);
$values = array('\\(', '\\)', '\\,', '\\ ', '\\"', "\\'");
$uri = str_replace($values, $keys, $uri);
$result = parent::validate($uri, $config, $context); $result = parent::validate($uri, $config, $context);
if ($result === false) return false; if ($result === false) {
return false;
}
// escape necessary characters according to CSS spec // extra sanity check; should have been done by URI
// except for the comma, none of these should appear in the $result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result);
// URI at all
$result = str_replace($keys, $values, $result);
return "url($result)"; // suspicious characters are ()'; we're going to percent encode
// them for safety.
$result = str_replace(array('(', ')', "'"), array('%28', '%29', '%27'), $result);
// there's an extra bug where ampersands lose their escaping on
// an innerHTML cycle, so a very unlucky query parameter could
// then change the meaning of the URL. Unfortunately, there's
// not much we can do about that...
return "url(\"$result\")";
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -0,0 +1,44 @@
<?php
/**
* Dummy AttrDef that mimics another AttrDef, BUT it generates clones
* with make.
*/
class HTMLPurifier_AttrDef_Clone extends HTMLPurifier_AttrDef
{
/**
* What we're cloning.
* @type HTMLPurifier_AttrDef
*/
protected $clone;
/**
* @param HTMLPurifier_AttrDef $clone
*/
public function __construct($clone)
{
$this->clone = $clone;
}
/**
* @param string $v
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($v, $config, $context)
{
return $this->clone->validate($v, $config, $context);
}
/**
* @param string $string
* @return HTMLPurifier_AttrDef
*/
public function make($string)
{
return clone $this->clone;
}
}
// vim: et sw=4 sts=4

View File

@@ -12,9 +12,10 @@ class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
/** /**
* Lookup table of valid values. * Lookup table of valid values.
* @type array
* @todo Make protected * @todo Make protected
*/ */
public $valid_values = array(); public $valid_values = array();
/** /**
* Bool indicating whether or not enumeration is case sensitive. * Bool indicating whether or not enumeration is case sensitive.
@@ -23,17 +24,23 @@ class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
protected $case_sensitive = false; // values according to W3C spec protected $case_sensitive = false; // values according to W3C spec
/** /**
* @param $valid_values List of valid values * @param array $valid_values List of valid values
* @param $case_sensitive Bool indicating whether or not case sensitive * @param bool $case_sensitive Whether or not case sensitive
*/ */
public function __construct( public function __construct($valid_values = array(), $case_sensitive = false)
$valid_values = array(), $case_sensitive = false {
) {
$this->valid_values = array_flip($valid_values); $this->valid_values = array_flip($valid_values);
$this->case_sensitive = $case_sensitive; $this->case_sensitive = $case_sensitive;
} }
public function validate($string, $config, $context) { /**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$string = trim($string); $string = trim($string);
if (!$this->case_sensitive) { if (!$this->case_sensitive) {
// we may want to do full case-insensitive libraries // we may want to do full case-insensitive libraries
@@ -45,11 +52,13 @@ class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
} }
/** /**
* @param $string In form of comma-delimited list of case-insensitive * @param string $string In form of comma-delimited list of case-insensitive
* valid values. Example: "foo,bar,baz". Prepend "s:" to make * valid values. Example: "foo,bar,baz". Prepend "s:" to make
* case sensitive * case sensitive
* @return HTMLPurifier_AttrDef_Enum
*/ */
public function make($string) { public function make($string)
{
if (strlen($string) > 2 && $string[0] == 's' && $string[1] == ':') { if (strlen($string) > 2 && $string[0] == 's' && $string[1] == ':') {
$string = substr($string, 2); $string = substr($string, 2);
$sensitive = true; $sensitive = true;
@@ -59,7 +68,6 @@ class HTMLPurifier_AttrDef_Enum extends HTMLPurifier_AttrDef
$values = explode(',', $string); $values = explode(',', $string);
return new HTMLPurifier_AttrDef_Enum($values, $sensitive); return new HTMLPurifier_AttrDef_Enum($values, $sensitive);
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -6,23 +6,43 @@
class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef
{ {
/**
* @type bool
*/
protected $name; protected $name;
/**
* @type bool
*/
public $minimized = true; public $minimized = true;
public function __construct($name = false) {$this->name = $name;} /**
* @param bool $name
*/
public function __construct($name = false)
{
$this->name = $name;
}
public function validate($string, $config, $context) { /**
if (empty($string)) return false; * @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
return $this->name; return $this->name;
} }
/** /**
* @param $string Name of attribute * @param string $string Name of attribute
* @return HTMLPurifier_AttrDef_HTML_Bool
*/ */
public function make($string) { public function make($string)
{
return new HTMLPurifier_AttrDef_HTML_Bool($string); return new HTMLPurifier_AttrDef_HTML_Bool($string);
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -0,0 +1,48 @@
<?php
/**
* Implements special behavior for class attribute (normally NMTOKENS)
*/
class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens
{
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
protected function split($string, $config, $context)
{
// really, this twiddle should be lazy loaded
$name = $config->getDefinition('HTML')->doctype->name;
if ($name == "XHTML 1.1" || $name == "XHTML 2.0") {
return parent::split($string, $config, $context);
} else {
return preg_split('/\s+/', $string);
}
}
/**
* @param array $tokens
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return array
*/
protected function filter($tokens, $config, $context)
{
$allowed = $config->get('Attr.AllowedClasses');
$forbidden = $config->get('Attr.ForbiddenClasses');
$ret = array();
foreach ($tokens as $token) {
if (($allowed === null || isset($allowed[$token])) &&
!isset($forbidden[$token]) &&
// We need this O(n) check because of PHP's array
// implementation that casts -0 to 0.
!in_array($token, $ret, true)
) {
$ret[] = $token;
}
}
return $ret;
}
}

View File

@@ -6,27 +6,46 @@
class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef
{ {
public function validate($string, $config, $context) { /**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
static $colors = null; static $colors = null;
if ($colors === null) $colors = $config->get('Core', 'ColorKeywords'); if ($colors === null) {
$colors = $config->get('Core.ColorKeywords');
}
$string = trim($string); $string = trim($string);
if (empty($string)) return false; if (empty($string)) {
if (isset($colors[$string])) return $colors[$string]; return false;
if ($string[0] === '#') $hex = substr($string, 1); }
else $hex = $string; $lower = strtolower($string);
if (isset($colors[$lower])) {
return $colors[$lower];
}
if ($string[0] === '#') {
$hex = substr($string, 1);
} else {
$hex = $string;
}
$length = strlen($hex); $length = strlen($hex);
if ($length !== 3 && $length !== 6) return false; if ($length !== 3 && $length !== 6) {
if (!ctype_xdigit($hex)) return false; return false;
if ($length === 3) $hex = $hex[0].$hex[0].$hex[1].$hex[1].$hex[2].$hex[2]; }
if (!ctype_xdigit($hex)) {
return false;
}
if ($length === 3) {
$hex = $hex[0] . $hex[0] . $hex[1] . $hex[1] . $hex[2] . $hex[2];
}
return "#$hex"; return "#$hex";
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -6,16 +6,33 @@
class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum class HTMLPurifier_AttrDef_HTML_FrameTarget extends HTMLPurifier_AttrDef_Enum
{ {
/**
* @type array
*/
public $valid_values = false; // uninitialized value public $valid_values = false; // uninitialized value
/**
* @type bool
*/
protected $case_sensitive = false; protected $case_sensitive = false;
public function __construct() {} public function __construct()
{
public function validate($string, $config, $context) {
if ($this->valid_values === false) $this->valid_values = $config->get('Attr', 'AllowedFrameTargets');
return parent::validate($string, $config, $context);
} }
/**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
if ($this->valid_values === false) {
$this->valid_values = $config->get('Attr.AllowedFrameTargets');
}
return parent::validate($string, $config, $context);
}
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -12,59 +12,102 @@
class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
{ {
// ref functionality disabled, since we also have to verify // selector is NOT a valid thing to use for IDREFs, because IDREFs
// whether or not the ID it refers to exists // *must* target IDs that exist, whereas selector #ids do not.
public function validate($id, $config, $context) { /**
* Determines whether or not we're validating an ID in a CSS
* selector context.
* @type bool
*/
protected $selector;
if (!$config->get('Attr', 'EnableID')) return false; /**
* @param bool $selector
*/
public function __construct($selector = false)
{
$this->selector = $selector;
}
/**
* @param string $id
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($id, $config, $context)
{
if (!$this->selector && !$config->get('Attr.EnableID')) {
return false;
}
$id = trim($id); // trim it first $id = trim($id); // trim it first
if ($id === '') return false; if ($id === '') {
return false;
$prefix = $config->get('Attr', 'IDPrefix');
if ($prefix !== '') {
$prefix .= $config->get('Attr', 'IDPrefixLocal');
// prevent re-appending the prefix
if (strpos($id, $prefix) !== 0) $id = $prefix . $id;
} elseif ($config->get('Attr', 'IDPrefixLocal') !== '') {
trigger_error('%Attr.IDPrefixLocal cannot be used unless '.
'%Attr.IDPrefix is set', E_USER_WARNING);
} }
//if (!$this->ref) { $prefix = $config->get('Attr.IDPrefix');
if ($prefix !== '') {
$prefix .= $config->get('Attr.IDPrefixLocal');
// prevent re-appending the prefix
if (strpos($id, $prefix) !== 0) {
$id = $prefix . $id;
}
} elseif ($config->get('Attr.IDPrefixLocal') !== '') {
trigger_error(
'%Attr.IDPrefixLocal cannot be used unless ' .
'%Attr.IDPrefix is set',
E_USER_WARNING
);
}
if (!$this->selector) {
$id_accumulator =& $context->get('IDAccumulator'); $id_accumulator =& $context->get('IDAccumulator');
if (isset($id_accumulator->ids[$id])) return false; if (isset($id_accumulator->ids[$id])) {
//} return false;
}
}
// we purposely avoid using regex, hopefully this is faster // we purposely avoid using regex, hopefully this is faster
if (ctype_alpha($id)) { if ($config->get('Attr.ID.HTML5') === true) {
$result = true; if (preg_match('/[\t\n\x0b\x0c ]/', $id)) {
return false;
}
} else { } else {
if (!ctype_alpha(@$id[0])) return false; if (ctype_alpha($id)) {
$trim = trim( // primitive style of regexps, I suppose // OK
$id, } else {
'A..Za..z0..9:-._' if (!ctype_alpha(@$id[0])) {
); return false;
$result = ($trim === ''); }
// primitive style of regexps, I suppose
$trim = trim(
$id,
'A..Za..z0..9:-._'
);
if ($trim !== '') {
return false;
}
}
} }
$regexp = $config->get('Attr', 'IDBlacklistRegexp'); $regexp = $config->get('Attr.IDBlacklistRegexp');
if ($regexp && preg_match($regexp, $id)) { if ($regexp && preg_match($regexp, $id)) {
return false; return false;
} }
if (/*!$this->ref && */$result) $id_accumulator->add($id); if (!$this->selector) {
$id_accumulator->add($id);
}
// if no change was made to the ID, return the result // if no change was made to the ID, return the result
// else, return the new id if stripping whitespace made it // else, return the new id if stripping whitespace made it
// valid, or return false. // valid, or return false.
return $result ? $id : false; return $id;
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -10,32 +10,47 @@
class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels class HTMLPurifier_AttrDef_HTML_Length extends HTMLPurifier_AttrDef_HTML_Pixels
{ {
public function validate($string, $config, $context) { /**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$string = trim($string); $string = trim($string);
if ($string === '') return false; if ($string === '') {
return false;
}
$parent_result = parent::validate($string, $config, $context); $parent_result = parent::validate($string, $config, $context);
if ($parent_result !== false) return $parent_result; if ($parent_result !== false) {
return $parent_result;
}
$length = strlen($string); $length = strlen($string);
$last_char = $string[$length - 1]; $last_char = $string[$length - 1];
if ($last_char !== '%') return false; if ($last_char !== '%') {
return false;
}
$points = substr($string, 0, $length - 1); $points = substr($string, 0, $length - 1);
if (!is_numeric($points)) return false; if (!is_numeric($points)) {
return false;
}
$points = (int) $points; $points = (int)$points;
if ($points < 0) return '0%';
if ($points > 100) return '100%';
return ((string) $points) . '%';
if ($points < 0) {
return '0%';
}
if ($points > 100) {
return '100%';
}
return ((string)$points) . '%';
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -9,26 +9,44 @@
class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
{ {
/** Name config attribute to pull. */ /**
* Name config attribute to pull.
* @type string
*/
protected $name; protected $name;
public function __construct($name) { /**
* @param string $name
*/
public function __construct($name)
{
$configLookup = array( $configLookup = array(
'rel' => 'AllowedRel', 'rel' => 'AllowedRel',
'rev' => 'AllowedRev' 'rev' => 'AllowedRev'
); );
if (!isset($configLookup[$name])) { if (!isset($configLookup[$name])) {
trigger_error('Unrecognized attribute name for link '. trigger_error(
'relationship.', E_USER_ERROR); 'Unrecognized attribute name for link ' .
'relationship.',
E_USER_ERROR
);
return; return;
} }
$this->name = $configLookup[$name]; $this->name = $configLookup[$name];
} }
public function validate($string, $config, $context) { /**
* @param string $string
$allowed = $config->get('Attr', $this->name); * @param HTMLPurifier_Config $config
if (empty($allowed)) return false; * @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$allowed = $config->get('Attr.' . $this->name);
if (empty($allowed)) {
return false;
}
$string = $this->parseCDATA($string); $string = $this->parseCDATA($string);
$parts = explode(' ', $string); $parts = explode(' ', $string);
@@ -37,17 +55,18 @@ class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
$ret_lookup = array(); $ret_lookup = array();
foreach ($parts as $part) { foreach ($parts as $part) {
$part = strtolower(trim($part)); $part = strtolower(trim($part));
if (!isset($allowed[$part])) continue; if (!isset($allowed[$part])) {
continue;
}
$ret_lookup[$part] = true; $ret_lookup[$part] = true;
} }
if (empty($ret_lookup)) return false; if (empty($ret_lookup)) {
return false;
}
$string = implode(' ', array_keys($ret_lookup)); $string = implode(' ', array_keys($ret_lookup));
return $string; return $string;
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -9,33 +9,52 @@
class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length class HTMLPurifier_AttrDef_HTML_MultiLength extends HTMLPurifier_AttrDef_HTML_Length
{ {
public function validate($string, $config, $context) { /**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$string = trim($string); $string = trim($string);
if ($string === '') return false; if ($string === '') {
return false;
}
$parent_result = parent::validate($string, $config, $context); $parent_result = parent::validate($string, $config, $context);
if ($parent_result !== false) return $parent_result; if ($parent_result !== false) {
return $parent_result;
}
$length = strlen($string); $length = strlen($string);
$last_char = $string[$length - 1]; $last_char = $string[$length - 1];
if ($last_char !== '*') return false; if ($last_char !== '*') {
return false;
}
$int = substr($string, 0, $length - 1); $int = substr($string, 0, $length - 1);
if ($int == '') return '*'; if ($int == '') {
if (!is_numeric($int)) return false; return '*';
}
$int = (int) $int; if (!is_numeric($int)) {
return false;
if ($int < 0) return false; }
if ($int == 0) return '0';
if ($int == 1) return '*';
return ((string) $int) . '*';
$int = (int)$int;
if ($int < 0) {
return false;
}
if ($int == 0) {
return '0';
}
if ($int == 1) {
return '*';
}
return ((string)$int) . '*';
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -2,21 +2,42 @@
/** /**
* Validates contents based on NMTOKENS attribute type. * Validates contents based on NMTOKENS attribute type.
* @note The only current use for this is the class attribute in HTML
* @note Could have some functionality factored out into Nmtoken class
* @warning We cannot assume this class will be used only for 'class'
* attributes. Not sure how to hook in magic behavior, then.
*/ */
class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
{ {
public function validate($string, $config, $context) { /**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$string = trim($string); $string = trim($string);
// early abort: '' and '0' (strings that convert to false) are invalid // early abort: '' and '0' (strings that convert to false) are invalid
if (!$string) return false; if (!$string) {
return false;
}
$tokens = $this->split($string, $config, $context);
$tokens = $this->filter($tokens, $config, $context);
if (empty($tokens)) {
return false;
}
return implode(' ', $tokens);
}
/**
* Splits a space separated list of tokens into its constituent parts.
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return array
*/
protected function split($string, $config, $context)
{
// OPTIMIZABLE! // OPTIMIZABLE!
// do the preg_match, capture all subpatterns for reformulation // do the preg_match, capture all subpatterns for reformulation
@@ -24,25 +45,26 @@ class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef
// escaping because I don't know how to do that with regexps // escaping because I don't know how to do that with regexps
// and plus it would complicate optimization efforts (you never // and plus it would complicate optimization efforts (you never
// see that anyway). // see that anyway).
$matches = array(); $pattern = '/(?:(?<=\s)|\A)' . // look behind for space or string start
$pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)' .
'((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'. '(?:(?=\s)|\z)/'; // look ahead for space or string end
'(?:(?=\s)|\z)/'; // look ahead for space or string end
preg_match_all($pattern, $string, $matches); preg_match_all($pattern, $string, $matches);
return $matches[1];
if (empty($matches[1])) return false;
// reconstruct string
$new_string = '';
foreach ($matches[1] as $token) {
$new_string .= $token . ' ';
}
$new_string = rtrim($new_string);
return $new_string;
} }
/**
* Template method for removing certain tokens based on arbitrary criteria.
* @note If we wanted to be really functional, we'd do an array_filter
* with a callback. But... we're not.
* @param array $tokens
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return array
*/
protected function filter($tokens, $config, $context)
{
return $tokens;
}
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -6,43 +6,71 @@
class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
{ {
/**
* @type int
*/
protected $max; protected $max;
public function __construct($max = null) { /**
* @param int $max
*/
public function __construct($max = null)
{
$this->max = $max; $this->max = $max;
} }
public function validate($string, $config, $context) { /**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$string = trim($string); $string = trim($string);
if ($string === '0') return $string; if ($string === '0') {
if ($string === '') return false; return $string;
}
if ($string === '') {
return false;
}
$length = strlen($string); $length = strlen($string);
if (substr($string, $length - 2) == 'px') { if (substr($string, $length - 2) == 'px') {
$string = substr($string, 0, $length - 2); $string = substr($string, 0, $length - 2);
} }
if (!is_numeric($string)) return false; if (!is_numeric($string)) {
$int = (int) $string; return false;
}
$int = (int)$string;
if ($int < 0) return '0'; if ($int < 0) {
return '0';
}
// upper-bound value, extremely high values can // upper-bound value, extremely high values can
// crash operating systems, see <http://ha.ckers.org/imagecrash.html> // crash operating systems, see <http://ha.ckers.org/imagecrash.html>
// WARNING, above link WILL crash you if you're using Windows // WARNING, above link WILL crash you if you're using Windows
if ($this->max !== null && $int > $this->max) return (string) $this->max; if ($this->max !== null && $int > $this->max) {
return (string)$this->max;
return (string) $int; }
return (string)$int;
} }
public function make($string) { /**
if ($string === '') $max = null; * @param string $string
else $max = (int) $string; * @return HTMLPurifier_AttrDef
*/
public function make($string)
{
if ($string === '') {
$max = null;
} else {
$max = (int)$string;
}
$class = get_class($this); $class = get_class($this);
return new $class($max); return new $class($max);
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -11,17 +11,20 @@ class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef
{ {
/** /**
* Bool indicating whether or not negative values are allowed * Whether or not negative values are allowed.
* @type bool
*/ */
protected $negative = true; protected $negative = true;
/** /**
* Bool indicating whether or not zero is allowed * Whether or not zero is allowed.
* @type bool
*/ */
protected $zero = true; protected $zero = true;
/** /**
* Bool indicating whether or not positive values are allowed * Whether or not positive values are allowed.
* @type bool
*/ */
protected $positive = true; protected $positive = true;
@@ -30,44 +33,59 @@ class HTMLPurifier_AttrDef_Integer extends HTMLPurifier_AttrDef
* @param $zero Bool indicating whether or not zero is allowed * @param $zero Bool indicating whether or not zero is allowed
* @param $positive Bool indicating whether or not positive values are allowed * @param $positive Bool indicating whether or not positive values are allowed
*/ */
public function __construct( public function __construct($negative = true, $zero = true, $positive = true)
$negative = true, $zero = true, $positive = true {
) {
$this->negative = $negative; $this->negative = $negative;
$this->zero = $zero; $this->zero = $zero;
$this->positive = $positive; $this->positive = $positive;
} }
public function validate($integer, $config, $context) { /**
* @param string $integer
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($integer, $config, $context)
{
$integer = $this->parseCDATA($integer); $integer = $this->parseCDATA($integer);
if ($integer === '') return false; if ($integer === '') {
return false;
}
// we could possibly simply typecast it to integer, but there are // we could possibly simply typecast it to integer, but there are
// certain fringe cases that must not return an integer. // certain fringe cases that must not return an integer.
// clip leading sign // clip leading sign
if ( $this->negative && $integer[0] === '-' ) { if ($this->negative && $integer[0] === '-') {
$digits = substr($integer, 1); $digits = substr($integer, 1);
if ($digits === '0') $integer = '0'; // rm minus sign for zero if ($digits === '0') {
} elseif( $this->positive && $integer[0] === '+' ) { $integer = '0';
} // rm minus sign for zero
} elseif ($this->positive && $integer[0] === '+') {
$digits = $integer = substr($integer, 1); // rm unnecessary plus $digits = $integer = substr($integer, 1); // rm unnecessary plus
} else { } else {
$digits = $integer; $digits = $integer;
} }
// test if it's numeric // test if it's numeric
if (!ctype_digit($digits)) return false; if (!ctype_digit($digits)) {
return false;
}
// perform scope tests // perform scope tests
if (!$this->zero && $integer == 0) return false; if (!$this->zero && $integer == 0) {
if (!$this->positive && $integer > 0) return false; return false;
if (!$this->negative && $integer < 0) return false; }
if (!$this->positive && $integer > 0) {
return false;
}
if (!$this->negative && $integer < 0) {
return false;
}
return $integer; return $integer;
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -7,15 +7,25 @@
class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
{ {
public function validate($string, $config, $context) { /**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$string = trim($string); $string = trim($string);
if (!$string) return false; if (!$string) {
return false;
}
$subtags = explode('-', $string); $subtags = explode('-', $string);
$num_subtags = count($subtags); $num_subtags = count($subtags);
if ($num_subtags == 0) return false; // sanity check if ($num_subtags == 0) { // sanity check
return false;
}
// process primary subtag : $subtags[0] // process primary subtag : $subtags[0]
$length = strlen($subtags[0]); $length = strlen($subtags[0]);
@@ -23,15 +33,15 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
case 0: case 0:
return false; return false;
case 1: case 1:
if (! ($subtags[0] == 'x' || $subtags[0] == 'i') ) { if (!($subtags[0] == 'x' || $subtags[0] == 'i')) {
return false; return false;
} }
break; break;
case 2: case 2:
case 3: case 3:
if (! ctype_alpha($subtags[0]) ) { if (!ctype_alpha($subtags[0])) {
return false; return false;
} elseif (! ctype_lower($subtags[0]) ) { } elseif (!ctype_lower($subtags[0])) {
$subtags[0] = strtolower($subtags[0]); $subtags[0] = strtolower($subtags[0]);
} }
break; break;
@@ -40,17 +50,23 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
} }
$new_string = $subtags[0]; $new_string = $subtags[0];
if ($num_subtags == 1) return $new_string; if ($num_subtags == 1) {
return $new_string;
}
// process second subtag : $subtags[1] // process second subtag : $subtags[1]
$length = strlen($subtags[1]); $length = strlen($subtags[1]);
if ($length == 0 || ($length == 1 && $subtags[1] != 'x') || $length > 8 || !ctype_alnum($subtags[1])) { if ($length == 0 || ($length == 1 && $subtags[1] != 'x') || $length > 8 || !ctype_alnum($subtags[1])) {
return $new_string; return $new_string;
} }
if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]); if (!ctype_lower($subtags[1])) {
$subtags[1] = strtolower($subtags[1]);
}
$new_string .= '-' . $subtags[1]; $new_string .= '-' . $subtags[1];
if ($num_subtags == 2) return $new_string; if ($num_subtags == 2) {
return $new_string;
}
// process all other subtags, index 2 and up // process all other subtags, index 2 and up
for ($i = 2; $i < $num_subtags; $i++) { for ($i = 2; $i < $num_subtags; $i++) {
@@ -63,11 +79,8 @@ class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
} }
$new_string .= '-' . $subtags[$i]; $new_string .= '-' . $subtags[$i];
} }
return $new_string; return $new_string;
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -6,21 +6,41 @@
class HTMLPurifier_AttrDef_Switch class HTMLPurifier_AttrDef_Switch
{ {
/**
* @type string
*/
protected $tag; protected $tag;
protected $withTag, $withoutTag;
/**
* @type HTMLPurifier_AttrDef
*/
protected $withTag;
/**
* @type HTMLPurifier_AttrDef
*/
protected $withoutTag;
/** /**
* @param string $tag Tag name to switch upon * @param string $tag Tag name to switch upon
* @param HTMLPurifier_AttrDef $with_tag Call if token matches tag * @param HTMLPurifier_AttrDef $with_tag Call if token matches tag
* @param HTMLPurifier_AttrDef $without_tag Call if token doesn't match, or there is no token * @param HTMLPurifier_AttrDef $without_tag Call if token doesn't match, or there is no token
*/ */
public function __construct($tag, $with_tag, $without_tag) { public function __construct($tag, $with_tag, $without_tag)
{
$this->tag = $tag; $this->tag = $tag;
$this->withTag = $with_tag; $this->withTag = $with_tag;
$this->withoutTag = $without_tag; $this->withoutTag = $without_tag;
} }
public function validate($string, $config, $context) { /**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
$token = $context->get('CurrentToken', true); $token = $context->get('CurrentToken', true);
if (!$token || $token->name !== $this->tag) { if (!$token || $token->name !== $this->tag) {
return $this->withoutTag->validate($string, $config, $context); return $this->withoutTag->validate($string, $config, $context);
@@ -28,7 +48,6 @@ class HTMLPurifier_AttrDef_Switch
return $this->withTag->validate($string, $config, $context); return $this->withTag->validate($string, $config, $context);
} }
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -6,10 +6,16 @@
class HTMLPurifier_AttrDef_Text extends HTMLPurifier_AttrDef class HTMLPurifier_AttrDef_Text extends HTMLPurifier_AttrDef
{ {
public function validate($string, $config, $context) { /**
* @param string $string
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($string, $config, $context)
{
return $this->parseCDATA($string); return $this->parseCDATA($string);
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -7,31 +7,54 @@
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
{ {
/**
* @type HTMLPurifier_URIParser
*/
protected $parser; protected $parser;
/**
* @type bool
*/
protected $embedsResource; protected $embedsResource;
/** /**
* @param $embeds_resource_resource Does the URI here result in an extra HTTP request? * @param bool $embeds_resource Does the URI here result in an extra HTTP request?
*/ */
public function __construct($embeds_resource = false) { public function __construct($embeds_resource = false)
{
$this->parser = new HTMLPurifier_URIParser(); $this->parser = new HTMLPurifier_URIParser();
$this->embedsResource = (bool) $embeds_resource; $this->embedsResource = (bool)$embeds_resource;
} }
public function make($string) { /**
$embeds = (bool) $string; * @param string $string
* @return HTMLPurifier_AttrDef_URI
*/
public function make($string)
{
$embeds = ($string === 'embedded');
return new HTMLPurifier_AttrDef_URI($embeds); return new HTMLPurifier_AttrDef_URI($embeds);
} }
public function validate($uri, $config, $context) { /**
* @param string $uri
if ($config->get('URI', 'Disable')) return false; * @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return bool|string
*/
public function validate($uri, $config, $context)
{
if ($config->get('URI.Disable')) {
return false;
}
$uri = $this->parseCDATA($uri); $uri = $this->parseCDATA($uri);
// parse the URI // parse the URI
$uri = $this->parser->parse($uri); $uri = $this->parser->parse($uri);
if ($uri === false) return false; if ($uri === false) {
return false;
}
// add embedded flag to context for validators // add embedded flag to context for validators
$context->register('EmbeddedURI', $this->embedsResource); $context->register('EmbeddedURI', $this->embedsResource);
@@ -41,23 +64,35 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
// generic validation // generic validation
$result = $uri->validate($config, $context); $result = $uri->validate($config, $context);
if (!$result) break; if (!$result) {
break;
}
// chained filtering // chained filtering
$uri_def = $config->getDefinition('URI'); $uri_def = $config->getDefinition('URI');
$result = $uri_def->filter($uri, $config, $context); $result = $uri_def->filter($uri, $config, $context);
if (!$result) break; if (!$result) {
break;
}
// scheme-specific validation // scheme-specific validation
$scheme_obj = $uri->getSchemeObj($config, $context); $scheme_obj = $uri->getSchemeObj($config, $context);
if (!$scheme_obj) break; if (!$scheme_obj) {
if ($this->embedsResource && !$scheme_obj->browsable) break; break;
}
if ($this->embedsResource && !$scheme_obj->browsable) {
break;
}
$result = $scheme_obj->validate($uri, $config, $context); $result = $scheme_obj->validate($uri, $config, $context);
if (!$result) break; if (!$result) {
break;
}
// Post chained filtering // Post chained filtering
$result = $uri_def->postFilter($uri, $config, $context); $result = $uri_def->postFilter($uri, $config, $context);
if (!$result) break; if (!$result) {
break;
}
// survived gauntlet // survived gauntlet
$ok = true; $ok = true;
@@ -65,13 +100,12 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
} while (false); } while (false);
$context->destroy('EmbeddedURI'); $context->destroy('EmbeddedURI');
if (!$ok) return false; if (!$ok) {
return false;
}
// back to string // back to string
return $uri->toString(); return $uri->toString();
} }
} }
// vim: et sw=4 sts=4 // vim: et sw=4 sts=4

View File

@@ -5,8 +5,11 @@ abstract class HTMLPurifier_AttrDef_URI_Email extends HTMLPurifier_AttrDef
/** /**
* Unpacks a mailbox into its display-name and address * Unpacks a mailbox into its display-name and address
* @param string $string
* @return mixed
*/ */
function unpack($string) { public function unpack($string)
{
// needs to be implemented // needs to be implemented
} }

Some files were not shown because too many files have changed in this diff Show More