1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-04 13:18:00 +02:00

Compare commits

...

79 Commits

Author SHA1 Message Date
Edward Z. Yang
7015aaff46 Release 3.1.1
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1808 48356398-32a2-884e-a903-53898d9a118a
2008-06-19 21:43:57 +00:00
Edward Z. Yang
1009bd41a6 var -> public
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1807 48356398-32a2-884e-a903-53898d9a118a
2008-06-19 21:24:50 +00:00
Edward Z. Yang
511dfe2d4a [3.1.1] Update Munge docs.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1804 48356398-32a2-884e-a903-53898d9a118a
2008-06-19 19:06:55 +00:00
Edward Z. Yang
463aa3a0fa [3.1.1] General munge improvements
- Add CurrentCSSProperty context variable
- Move Munge to its own class, derived off of SecureMunge.
- Rename %URI.SecureMunge to %URI.Munge
- Rename %URI.SecureMungeSecretKey to %URI.MungeSecretKey
- Add extra substitutions for munge

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1803 48356398-32a2-884e-a903-53898d9a118a
2008-06-18 03:29:27 +00:00
Edward Z. Yang
7189ec2790 Update TODO
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1802 48356398-32a2-884e-a903-53898d9a118a
2008-06-17 04:00:03 +00:00
Edward Z. Yang
e901d832ab Update Modx plugin to work with HTML Purifier 3.1.0.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1801 48356398-32a2-884e-a903-53898d9a118a
2008-06-17 03:41:40 +00:00
Edward Z. Yang
643ed1bddc [3.1.1] Fix text-decoration: none bug
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1799 48356398-32a2-884e-a903-53898d9a118a
2008-06-17 03:12:50 +00:00
Edward Z. Yang
41830cd902 Update TODO
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1798 48356398-32a2-884e-a903-53898d9a118a
2008-06-17 02:40:38 +00:00
Edward Z. Yang
261aa1aeaa Update news, installer, and add an extra specimen.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1796 48356398-32a2-884e-a903-53898d9a118a
2008-06-15 22:13:16 +00:00
Edward Z. Yang
486b401cf7 Fix broken tests.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1795 48356398-32a2-884e-a903-53898d9a118a
2008-06-12 03:12:39 +00:00
Edward Z. Yang
f2794e59c5 [3.1.1] Mimick movie value in data if not set in safe object.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1794 48356398-32a2-884e-a903-53898d9a118a
2008-06-11 23:12:38 +00:00
Edward Z. Yang
d702077d2e Undo redundant embedded URI check.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1782 48356398-32a2-884e-a903-53898d9a118a
2008-06-10 01:23:11 +00:00
Edward Z. Yang
36bd06d53e [3.1.1] Implement SafeEmbed. Also, miscellaneous bugfixes.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1781 48356398-32a2-884e-a903-53898d9a118a
2008-06-10 01:18:03 +00:00
Edward Z. Yang
13eb016e06 [3.1.1] Implement SafeObject.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1780 48356398-32a2-884e-a903-53898d9a118a
2008-06-10 00:13:44 +00:00
Edward Z. Yang
32025a12e1 [3.1.1] Allow injectors to be specified by modules.
- Make method for URI implemented
- Split out checkNeeded in Injector from prepare

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1779 48356398-32a2-884e-a903-53898d9a118a
2008-06-09 01:23:05 +00:00
Edward Z. Yang
7dae94c44b Update TODO.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1778 48356398-32a2-884e-a903-53898d9a118a
2008-06-08 16:57:48 +00:00
Edward Z. Yang
54cc691ba7 Update TODO.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1777 48356398-32a2-884e-a903-53898d9a118a
2008-06-04 22:52:48 +00:00
Edward Z. Yang
3af2ff8f98 Fix bug with SecureMunge regarding embedded URIs.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1775 48356398-32a2-884e-a903-53898d9a118a
2008-06-02 17:39:29 +00:00
Edward Z. Yang
36fb284d2f Add integration test, and fix broken SecureMunge
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1774 48356398-32a2-884e-a903-53898d9a118a
2008-05-27 17:47:25 +00:00
Edward Z. Yang
8d1f1e8e73 [3.1.1] Improved adherence to Unicode by checking for non-character codepoints. Thanks Geoffrey Sneddon for reporting.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1773 48356398-32a2-884e-a903-53898d9a118a
2008-05-26 21:27:52 +00:00
Edward Z. Yang
322288e6c0 [3.1.1] Implement %URI.SecureMunge and %URI.SecureMungeSecretKey, thanks Chris!
- URIFilter->prepare can return false in order to abort loading of the filter
- Implemented post URI filtering. Set member variable $post to true to set a URIFilter as such.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1772 48356398-32a2-884e-a903-53898d9a118a
2008-05-26 16:26:47 +00:00
Edward Z. Yang
3c4346cb1e Fix back-compat regressions. Also, compactify configuration code.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1771 48356398-32a2-884e-a903-53898d9a118a
2008-05-26 04:35:12 +00:00
Edward Z. Yang
14d934c7ca [3.1.1] Land vs's HTMLPurifier_Generator patch, and a number of other bugfixes for that change
- Convert a number of calls to use new constructor signature for Generator
- Make generator require configuration; this exposes a number of latent bugs
- Removed generator hack
- Convert Printers to use new optimized ConfigSchema format
- Hack with Printer configuration; pass an array(generator config, render config) to distinguish between output and target.
- HTML/CSS Printers need to be primed, otherwise fatal errors
- Convert a few test-cases to use member properties

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1770 48356398-32a2-884e-a903-53898d9a118a
2008-05-26 04:05:48 +00:00
Edward Z. Yang
bb16d8eae5 [3.1.1] Fix Shift_JIS encoding wonkiness with yen symbols and whatnot
- Improve parseCDATA algorithm to take into account newline normalization
- Fix regression in FontFamily validator. We now have a legit parser in place, albeit somewhat limited in use. Will be superseded by parser for entire grammar
- Convert EncoderTest to new format

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1769 48356398-32a2-884e-a903-53898d9a118a
2008-05-25 05:40:20 +00:00
Edward Z. Yang
10530d7f81 [3.1.1] Fix stray backslashes in font-family.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1768 48356398-32a2-884e-a903-53898d9a118a
2008-05-24 18:19:36 +00:00
Edward Z. Yang
c7e172f660 Update news: not 1/2, more like 1/3
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1767 48356398-32a2-884e-a903-53898d9a118a
2008-05-23 17:15:13 +00:00
Edward Z. Yang
917d2ea5ef [3.1.1] More ConfigSchema optimizations: degenerate form can accommodate type and allow_null
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1766 48356398-32a2-884e-a903-53898d9a118a
2008-05-23 17:10:26 +00:00
Edward Z. Yang
895141e0b5 [3.1.1] Further optimize ConfigSchema by eliminating stdclass when only type is set.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1765 48356398-32a2-884e-a903-53898d9a118a
2008-05-23 17:00:58 +00:00
Edward Z. Yang
8ab30e24b7 [3.1.1] Memory optimizations for ConfigSchema. Changes include:
- Elimination of ConfigDef and subclasses in favor of stdclass. Most property names stay the same
- Added benchmark script for ConfigSchema
- Types are internally handled as magic integers. Use HTMLPurifier_VarParser->getTypeName to convert to human readable form. HTMLPurifier_VarParser still accepts strings.
- Parser in config schema only used for legacy interface


git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1764 48356398-32a2-884e-a903-53898d9a118a
2008-05-23 16:43:24 +00:00
Edward Z. Yang
9db891c3aa Add benchmark script for ConfigSchema.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1763 48356398-32a2-884e-a903-53898d9a118a
2008-05-23 15:24:15 +00:00
Edward Z. Yang
eb9f9bc7f6 [3.1.1] Round up imagecrash support with HTML.MaxImgLength
- Add $max to AttrDef/HTML/Pixels.php
- Add %HTML.MaxImgLength
- CSS width/height allows percents when MaxImgLength is disabled


git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1762 48356398-32a2-884e-a903-53898d9a118a
2008-05-23 02:09:43 +00:00
Edward Z. Yang
fcebb7731d [3.1.1] Migrate all HTMLModules to use setup($config) rather than __construct
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1761 48356398-32a2-884e-a903-53898d9a118a
2008-05-22 19:36:59 +00:00
Edward Z. Yang
8d0d0d1a03 [3.1.1] construct() to setup() in HTMLModules
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1760 48356398-32a2-884e-a903-53898d9a118a
2008-05-22 04:34:19 +00:00
Edward Z. Yang
80f59206d7 [3.1.1] Implement percent encoding for URI query and fragment
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1758 48356398-32a2-884e-a903-53898d9a118a
2008-05-21 02:58:41 +00:00
Edward Z. Yang
af3f5190dc [3.1.1] Lazy token updating for HTMLPurifier/AttrValidator.php
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1757 48356398-32a2-884e-a903-53898d9a118a
2008-05-21 02:30:27 +00:00
Edward Z. Yang
5620241165 [3.1.1] Disable percent height/width attributes for img
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1756 48356398-32a2-884e-a903-53898d9a118a
2008-05-21 02:01:25 +00:00
Edward Z. Yang
c06727190e Update NEWS accordingly.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1755 48356398-32a2-884e-a903-53898d9a118a
2008-05-21 01:58:48 +00:00
Edward Z. Yang
1a95852007 [3.1.1] Implement more robust imagecrash protection for CSS width/height.
- Change API for HTMLPurifier_AttrDef_CSS_Length
- Implement HTMLPurifier_AttrDef_Switch class
- Implement HTMLPurifier_Length->compareTo, and make make() accept object instances

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1754 48356398-32a2-884e-a903-53898d9a118a
2008-05-21 01:56:48 +00:00
Edward Z. Yang
c3fab7200e Add support for pixel as a pseudo-English unit.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1753 48356398-32a2-884e-a903-53898d9a118a
2008-05-21 00:42:55 +00:00
Edward Z. Yang
6d7a17e9b6 Implement without-bcmath compatible UnitConverter. We might want to factor our floating point fudges. These calculations are only accurate for small precisions, and are architecture-dependent. (Unit tests seem to work on 32bit, though).
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1752 48356398-32a2-884e-a903-53898d9a118a
2008-05-21 00:29:31 +00:00
Edward Z. Yang
64b5581bf2 [3.1.1] Have CSS/Length.php use the new Length class. Also, put onus of non-negative to callee, which would compare $n.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1751 48356398-32a2-884e-a903-53898d9a118a
2008-05-20 23:15:20 +00:00
Edward Z. Yang
d8da5ff406 Finally stabilize the unit converter.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1750 48356398-32a2-884e-a903-53898d9a118a
2008-05-20 21:23:38 +00:00
Edward Z. Yang
fda310f1e7 Update UnitConverter to deal more correctly with X.XX... decimals. Not complete.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1749 48356398-32a2-884e-a903-53898d9a118a
2008-05-20 17:48:15 +00:00
Edward Z. Yang
fc7dbdbd33 Disable Tidy test completely.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1748 48356398-32a2-884e-a903-53898d9a118a
2008-05-20 17:14:08 +00:00
Edward Z. Yang
02ac821503 Update TODO and run flush.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1747 48356398-32a2-884e-a903-53898d9a118a
2008-05-20 01:31:51 +00:00
Edward Z. Yang
16fa73afa0 [3.1.1] Added HTMLPurifier_UnitConverter and HTMLPurifier_Length for convenient handling of CSS-style lengths.
- Fixed another de-underscoring in the SimpleTest library

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1746 48356398-32a2-884e-a903-53898d9a118a
2008-05-20 01:19:00 +00:00
Edward Z. Yang
32a6afa27c Update news.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1744 48356398-32a2-884e-a903-53898d9a118a
2008-05-18 20:12:25 +00:00
Edward Z. Yang
587d642826 Release 3.1.0.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1728 48356398-32a2-884e-a903-53898d9a118a
2008-05-18 05:46:06 +00:00
Edward Z. Yang
0bef016271 [3.1.0] Get testing working again for all versions
- Standalone testing setup properly with autoload
- Bootstrap autoloader deals more robustly with classes that don't exist, preventing class_exists($class, true) from barfing.
- Cleanup $_reporter to $reporter

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1727 48356398-32a2-884e-a903-53898d9a118a
2008-05-16 01:49:33 +00:00
Edward Z. Yang
ef6a1c9274 Allow for users to load Language class files themselves. Messages are still HTML Purifier dependent; we need to figure out a way around that.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1725 48356398-32a2-884e-a903-53898d9a118a
2008-05-15 23:22:34 +00:00
Edward Z. Yang
86b1da9b6f [3.1.0] Fixed bug with fallback languages in LanguageFactory
- Also, reverted bogus Generator changes

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1723 48356398-32a2-884e-a903-53898d9a118a
2008-05-15 23:04:46 +00:00
Edward Z. Yang
00ea2062d4 [3.1.0] Fix buggy LanguageFactory. This revision is incomplete.
- Some bogus commits to Generator were made, and will be reverted next revision.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1722 48356398-32a2-884e-a903-53898d9a118a
2008-05-15 17:47:47 +00:00
Edward Z. Yang
cb5d5d0648 [3.1.0] Revamp URI handling of percent encoding and validation.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1709 48356398-32a2-884e-a903-53898d9a118a
2008-05-14 02:19:00 +00:00
Edward Z. Yang
77ce3e8b4a [3.1.0] Extend scanner to catch $this->config; chmod new directories from Serializer. I'm not exactly sure what the implications of the bugfix are, but hopefully it won't blow up.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1708 48356398-32a2-884e-a903-53898d9a118a
2008-05-13 03:17:38 +00:00
Edward Z. Yang
e0c0d8eab6 [3.1.0] Allow arbitrary whitespace in %HTML.Allowed
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1707 48356398-32a2-884e-a903-53898d9a118a
2008-05-13 02:02:27 +00:00
Edward Z. Yang
ce46fb618c [3.1.0] Add missing tests and errors for forbidden attributes
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1706 48356398-32a2-884e-a903-53898d9a118a
2008-05-13 01:41:25 +00:00
Edward Z. Yang
9f37764614 Update TODO with items from Denis.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1702 48356398-32a2-884e-a903-53898d9a118a
2008-05-06 03:08:09 +00:00
Edward Z. Yang
aaf6ba421c Sync with SimpleTest codebase
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1701 48356398-32a2-884e-a903-53898d9a118a
2008-04-28 19:52:13 +00:00
Edward Z. Yang
4b862f64e6 [3.1.0] Fix ScriptRequired bug with trusted installs
- Generator now takes $config and $context during instantiation
- Double quotes outside of attributes are not escaped


git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1700 48356398-32a2-884e-a903-53898d9a118a
2008-04-28 01:35:07 +00:00
Edward Z. Yang
be2cfb7918 Fix latest batch of SimpleTest changes. Also, commit forgotten NEWS.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1699 48356398-32a2-884e-a903-53898d9a118a
2008-04-26 19:50:27 +00:00
Edward Z. Yang
2f29c27a59 [3.1.0] Fix broken PH5P in latest versions of DOM with bandaid; punt to DirectLex.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1698 48356398-32a2-884e-a903-53898d9a118a
2008-04-26 19:47:22 +00:00
Edward Z. Yang
144bd6f07a [3.1.0] Fix bug with 3.1.0-dev version number (the dash caused problems, so we switched to commas)
- Refactored out null definition cache during HTMLDefinition tests


git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1697 48356398-32a2-884e-a903-53898d9a118a
2008-04-26 19:28:14 +00:00
Edward Z. Yang
a95f600e76 Fix another fatal error from SimpleTest refactoring.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1696 48356398-32a2-884e-a903-53898d9a118a
2008-04-26 03:18:07 +00:00
Edward Z. Yang
84aa2ca390 [3.1.0] Implement tag@attr for Allowed and Forbidden
- Fix (or null) bug in configdoc

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1695 48356398-32a2-884e-a903-53898d9a118a
2008-04-26 03:14:01 +00:00
Edward Z. Yang
1f8619cda5 [3.1.0] Fix and revamp configForm.php smoketest
- Fix bool/null ConfigForm field

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1694 48356398-32a2-884e-a903-53898d9a118a
2008-04-26 01:13:58 +00:00
Edward Z. Yang
04b1ec33cb [3.1.0] version -> VERSION
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1693 48356398-32a2-884e-a903-53898d9a118a
2008-04-25 05:47:36 +00:00
Edward Z. Yang
6d9643a92e [3.1.0] Add const version to HTMLPurifier, also bump version to 3.1.0-dev; this apparently is a good idea!
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1692 48356398-32a2-884e-a903-53898d9a118a
2008-04-25 05:26:10 +00:00
Edward Z. Yang
438d973073 Renumber as 3.1.0, however, NOT releasing (WHATSNEW isn't updated)
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1691 48356398-32a2-884e-a903-53898d9a118a
2008-04-25 03:54:38 +00:00
Edward Z. Yang
f295465ad4 [3.1.0] Allow index to be false for config from for creation
- Static-ify Printer_ConfigForm's get* functions

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1690 48356398-32a2-884e-a903-53898d9a118a
2008-04-25 02:43:31 +00:00
Edward Z. Yang
eaabccdd9b [3.1.0] More PHP4->PHP5 conversions, notably reference removal of most methods that return objects
- Removed HTMLPurifier_Error
- Documentation updates
- Removed more copy() methods in favor of clone
- HTMLPurifier::getInstance() to HTMLPurifier::instance()
- Fix InterchangeBuilder to use HTMLPURIFIER_PREFIX

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1689 48356398-32a2-884e-a903-53898d9a118a
2008-04-23 02:40:17 +00:00
Edward Z. Yang
893cdd0301 All 3.1.0 TODOs are done!
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1687 48356398-32a2-884e-a903-53898d9a118a
2008-04-23 00:17:52 +00:00
Edward Z. Yang
1ba77fedd4 [3.1.0] Implement DenyElementDecorator for imagecrash-protection against CSS width/height
- Misc doc changes
- Add missing inheritance for AttrDef_CSS decorators


git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1684 48356398-32a2-884e-a903-53898d9a118a
2008-04-22 22:28:54 +00:00
Edward Z. Yang
fae720115a Update TODO
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1683 48356398-32a2-884e-a903-53898d9a118a
2008-04-22 20:57:11 +00:00
Edward Z. Yang
c0f2e69c9f [3.1.0] Update French documentation.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1682 48356398-32a2-884e-a903-53898d9a118a
2008-04-22 20:43:47 +00:00
Edward Z. Yang
ca6b20ff2b [phorum-3.0.0.1] Improve installation documentation.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1681 48356398-32a2-884e-a903-53898d9a118a
2008-04-22 18:30:01 +00:00
Edward Z. Yang
c4aa3ee40c [3.1.0] Encoder optimization, as suggested by Diego
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1680 48356398-32a2-884e-a903-53898d9a118a
2008-04-22 18:14:40 +00:00
Edward Z. Yang
e9c7873057 [3.1.0] Fix validation error with missing li.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1679 48356398-32a2-884e-a903-53898d9a118a
2008-04-22 17:35:39 +00:00
Edward Z. Yang
d45f42e6a8 Update docs.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1678 48356398-32a2-884e-a903-53898d9a118a
2008-04-22 17:32:42 +00:00
Edward Z. Yang
f46aef698e Post rc skirmishes.
- Update docs
- Update source code comments in generated files
- release1-update.php now flushes after it finishes
- Make InterchangeBuilder alphabetize

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1676 48356398-32a2-884e-a903-53898d9a118a
2008-04-22 16:20:45 +00:00
191 changed files with 4349 additions and 1367 deletions

View File

@@ -31,7 +31,7 @@ PROJECT_NAME = HTMLPurifier
# This could be handy for archiving the generated documentation or
# if some version control system is used.
PROJECT_NUMBER = 3.1.0rc1
PROJECT_NUMBER = 3.1.1
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put.

View File

@@ -23,8 +23,9 @@ August 8, 2008.
These optional extensions can enhance the capabilities of HTML Purifier:
* iconv : Converts text to and from non-UTF-8 encodings
* tidy : Used for pretty-printing HTML
* iconv : Converts text to and from non-UTF-8 encodings
* bcmath : Used for unit conversion and imagecrash protection
* tidy : Used for pretty-printing HTML
---------------------------------------------------------------------------

View File

@@ -17,7 +17,7 @@ ce document pour quelques choses.
1. Compatibilité
HTML Purifier fonctionne dans PHP 5. PHP 5.0.0 est le dernier
HTML Purifier fonctionne dans PHP 5. PHP 5.0.5 est le dernier
version que je le testais. Il ne dépend de les autre librairies.
Les extensions optionnel est iconv (en général déjà installer) et
@@ -34,19 +34,15 @@ Utilisez:
...quand vous devez utiliser HTML Purifier (ne inclure pas quand vous
ne devez pas, parce que HTML Purifier est trés grand.)
Si vous n'aime pas que HTML Purifier change vos include_path, on peut
change vos include_path, et:
HTML Purifier utilise 'autoload'. Si vous avez définu la fonction
__autoload, vous doivez ajoute cet programme:
require_once 'HTMLPurifier.php';
spl_autoload_register('__autoload')
Seuleument les contents dans library/ est essentiel; vous peut enlever
les autre fichiers quand vous est dans une atmosphère professionnel.
Plus d'information est dans le document 'INSTALL'.
[En cours de construction]
6. Installation vite
3. Installation vite
Si votre site web est en UTF-8 et XHTML Transitional, utilisez:

128
NEWS
View File

@@ -9,6 +9,134 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. Internal change
==========================
3.1.1, released 2008-06-19
# %URI.Munge now, by default, does not munge resources (for example, <img src="">)
In order to enable this again, please set %URI.MungeResources to true.
! More robust imagecrash protection with height/width CSS with %CSS.MaxImgLength,
and height/width HTML with %HTML.MaxImgLength.
! %URI.MungeSecretKey for secure URI munging. Thanks Chris
for sponsoring this feature. Check out the corresponding documentation
for details. (Att Nightly testers: The API for this feature changed before
the general release. Namely, rename your directives %URI.SecureMungeSecretKey =>
%URI.MungeSecretKey and and %URI.SecureMunge => %URI.Munge)
! Implemented post URI filtering. Set member variable $post to true to set
a URIFilter as such.
! Allow modules to define injectors via $info_injector. Injectors are
automatically disabled if injector's needed elements are not found.
! Support for "safe" objects added, use %HTML.SafeObject and %HTML.SafeEmbed.
Thanks Chris for sponsoring. If you've been using ad hoc code from the
forums, PLEASE use this instead.
! Added substitutions for %e, %n, %a and %p in %URI.Munge (in order,
embedded, tag name, attribute name, CSS property name). See %URI.Munge
for more details. Requested by Jochem Blok.
- Disable percent height/width attributes for img.
- AttrValidator operations are now atomic; updates to attributes are not
manifest in token until end of operations. This prevents naughty internal
code from directly modifying CurrentToken when they're not supposed to.
This semantics change was requested by frank farmer.
- Percent encoding checks enabled for URI query and fragment
- Fix stray backslashes in font-family; CSS Unicode character escapes are
now properly resolved (although *only* in font-family). Thanks Takeshi Terada
for reporting.
- Improve parseCDATA algorithm to take into account newline normalization
- Account for browser confusion between Yen character and backslash in
Shift_JIS encoding. This fix generalizes to any other encoding which is not
a strict superset of printable ASCII. Thanks Takeshi Terada for reporting.
- Fix missing configuration parameter in Generator calls. Thanks vs for the
partial patch.
- Improved adherence to Unicode by checking for non-character codepoints.
Thanks Geoffrey Sneddon for reporting. This may result in degraded
performance for extremely large inputs.
- Allow CSS property-value pair ''text-decoration: none''. Thanks Jochem Blok
for reporting.
. Added HTMLPurifier_UnitConverter and HTMLPurifier_Length for convenient
handling of CSS-style lengths. HTMLPurifier_AttrDef_CSS_Length now uses
this class.
. API of HTMLPurifier_AttrDef_CSS_Length changed from __construct($disable_negative)
to __construct($min, $max). __construct(true) is equivalent to
__construct('0').
. Added HTMLPurifier_AttrDef_Switch class
. Rename HTMLPurifier_HTMLModule_Tidy->construct() to setup() and bubble method
up inheritance hierarchy to HTMLPurifier_HTMLModule. All HTMLModules
get this called with the configuration object. All modules now
use this rather than __construct(), although legacy code using constructors
will still work--the new format, however, lets modules access the
configuration object for HTML namespace dependant tweaks.
. AttrDef_HTML_Pixels now takes a single construction parameter, pixels.
. ConfigSchema data-structure heavily optimized; on average it uses a third
the memory it did previously. The interface has changed accordingly,
consult changes to HTMLPurifier_Config for details.
. Variable parsing types now are magic integers instead of strings
. Added benchmark for ConfigSchema
. HTMLPurifier_Generator requires $config and $context parameters. If you
don't know what they should be, use HTMLPurifier_Config::createDefault()
and new HTMLPurifier_Context().
. Printers now properly distinguish between output configuration, and
target configuration. This is not applicable to scripts using
the Printers for HTML Purifier related tasks.
. HTML/CSS Printers must be primed with prepareGenerator($gen_config), otherwise
fatal errors will ensue.
. URIFilter->prepare can return false in order to abort loading of the filter
. Factory for AttrDef_URI implemented, URI#embedded to indicate URI that embeds
an external resource.
. %URI.Munge functionality factored out into a post-filter class.
. Added CurrentCSSProperty context variable during CSS validation
3.1.0, released 2008-05-18
# Unnecessary references to objects (vestiges of PHP4) removed from method
signatures. The following methods do not need references when assigning from
them and will result in E_STRICT errors if you try:
+ HTMLPurifier_Config->get*Definition() [* = HTML, CSS]
+ HTMLPurifier_ConfigSchema::instance()
+ HTMLPurifier_DefinitionCacheFactory::instance()
+ HTMLPurifier_DefinitionCacheFactory->create()
+ HTMLPurifier_DoctypeRegistry->register()
+ HTMLPurifier_DoctypeRegistry->get()
+ HTMLPurifier_HTMLModule->addElement()
+ HTMLPurifier_HTMLModule->addBlankElement()
+ HTMLPurifier_LanguageFactory::instance()
# Printer_ConfigForm's get*() functions were static-ified
# %HTML.ForbiddenAttributes requires attribute declarations to be in the
form of tag@attr, NOT tag.attr (which will throw an error and won't do
anything). This is for forwards compatibility with XML; you'd do best
to migrate an %HTML.AllowedAttributes directives to this syntax too.
! Allow index to be false for config from form creation
! Added HTMLPurifier::VERSION constant
! Commas, not dashes, used for serializer IDs. This change is forwards-compatible
and allows for version numbers like "3.1.0-dev".
! %HTML.Allowed deals gracefully with whitespace anywhere, anytime!
! HTML Purifier's URI handling is a lot more robust, with much stricter
validation checks and better percent encoding handling. Thanks Gareth Heyes
for indicating security vulnerabilities from lax percent encoding.
! Bootstrap autoloader deals more robustly with classes that don't exist,
preventing class_exists($class, true) from barfing.
- InterchangeBuilder now alphabetizes its lists
- Validation error in configdoc output fixed
- Iconv and other encoding errors muted even with custom error handlers that
do not honor error_reporting
- Add protection against imagecrash attack with CSS height/width
- HTMLPurifier::instance() created for consistency, is equivalent to getInstance()
- Fixed and revamped broken ConfigForm smoketest
- Bug with bool/null fields in Printer_ConfigForm fixed
- Bug with global forbidden attributes fixed
- Improved error messages for allowed and forbidden HTML elements and attributes
- Missing (or null) in configdoc documentation restored
- If DOM throws and exception during parsing with PH5P (occurs in newer versions
of DOM), HTML Purifier punts to DirectLex
- Fatal error with unserialization of ScriptRequired
- Created directories are now chmod'ed properly
- Fixed bug with fallback languages in LanguageFactory
- Standalone testing setup properly with autoload
. Out-of-date documentation revised
. UTF-8 encoding check optimization as suggested by Diego
. HTMLPurifier_Error removed in favor of exceptions
. More copy() function removed; should use clone instead
. More extensive unit tests for HTMLDefinition
. assertPurification moved to central harness
. HTMLPurifier_Generator accepts $config and $context parameters during
instantiation, not runtime
. Double-quotes outside of attribute values are now unescaped
3.1.0rc1, released 2008-04-22
# Autoload support added. Internal require_once's removed in favor of an
explicit require list or autoloading. To use HTML Purifier,

123
TODO
View File

@@ -7,48 +7,36 @@ TODO List
? Maybe I'll Do It
==========================
If no interest is expressed for a feature that may required a considerable
If no interest is expressed for a feature that may require a considerable
amount of effort to implement, it may get endlessly delayed. Do not be
afraid to cast your vote for the next feature to be implemented!
UPCOMING RELEASE
----------------
IMPORTANT
- Release candidate, because of the major changes
DOCUMENTATION
- Update French translation of README
IMPORTANT FEATURES
- Factor out command line parser into its own class, and unit test it
NICE FEATURES
- Factor demo.php into a set of Printer classes, and then create a stub
file for users here (inside the actual HTML Purifier library)
- Support exporting configuration, so users can easily tweak settings
in the demo, and then copy-paste into their own setup
BUGS
- Style attribute height/width limiting for images
- Easy way to blacklist elements and attributes
- Investigate iconv error emitting
- Investigate UTF-8 optimization <http://htmlpurifier.org/phorum/read.php?3,1496>
- Figure out what to do about target="" and name="", since they show up so often
- Investigate how early internal structures can be accessed; this would
prevent structures from being parsed and serialized multiple times.
- Built-in support for target="_blank" on all external links
- Gitify the repository
FUTURE VERSIONS
---------------
3.2 release [Error'ed]
# Error logging for filtering/cleanup procedures
- XSS-attempt detection
3.2 release [It's All About Trust] (floating)
# Implement untrusted, dangerous elements/attributes
- Forms are especially wanted
# Implement IDREF support (harder than it seems, since you cannot have
IDREFs to non-existent IDs)
# Frameset XHTML 1.0 and HTML 4.01 doctypes
- Implement <area>
- Figure out how to simultaneously set %CSS.Trusted and %HTML.Trusted (?)
3.3 release [Do What I Mean, Not What I Say]
3.3 release [Error'ed]
# Error logging for filtering/cleanup procedures
- XSS-attempt detection--certain errors are flagged XSS-like
3.4 release [Do What I Mean, Not What I Say]
# Additional support for poorly written HTML
- Microsoft Word HTML cleaning (i.e. MsoNormal, but research essential!)
- Friendly strict handling of <address> (block -> <br>)
- Remove redundant tags, ex. <u><u>Underlined</u></u>. Implementation notes:
? Remove redundant tags, ex. <u><u>Underlined</u></u>. Implementation notes:
1. Analyzing which tags to remove duplicants
2. Ensure attributes are merged into the parent tag
3. Extend the tag exclusion system to specify whether or not the
@@ -58,27 +46,21 @@ FUTURE VERSIONS
- Remove empty inline tags<i></i>
- Append something to duplicate IDs so they're still usable (impl. note: the
dupe detector would also need to detect the suffix as well)
- Externalize inline CSS to promote clean HTML
3.4 release [It's All About Trust] (floating)
# Implement untrusted, dangerous elements/attributes
- Objects and Forms are especially wanted
# Implement IDREF support (harder than it seems, since you cannot have
IDREFs to non-existent IDs)
# Frameset XHTML 1.0 and HTML 4.01 doctypes
- Externalize inline CSS to promote clean HTML, proposed by Sander Tekelenburg
4.0 release [Beyond HTML]
# Legit token based CSS parsing (will require revamping almost every
AttrDef class). Probably will use CSSTidy class
# More control over allowed CSS properties (maybe modularize it in the
same fashion!)
AttrDef class). Probably will use CSSTidy class?
# More control over allowed CSS properties using a modularization
# HTML 5 support
# IRI support
- Standardize token armor for all areas of processing
- Convert RTL/LTR override characters to <bdo> tags, or vice versa on demand.
Also, enable disabling of directionality
- Table of Contents generation (XHTML Compiler might be reusable)
5.0 release [To XML and Beyond]
- AllowedAttributes and ForbiddenAttributes step on the toes of XML by
using periods; this needs to be changed.
- Extended HTML capabilities based on namespacing and tag transforms (COMPLEX)
- Hooks for adding custom processors to custom namespaced tags and
attributes, offer default implementation
@@ -86,45 +68,64 @@ FUTURE VERSIONS
Ongoing
- More refactoring to take advantage of PHP5's facilities
- Lots of profiling, make it faster!
- Refactor unit tests into lots of test methods
- Plugins for major CMSes (COMPLEX)
- phpBB
- Drupal needs loving!
- Phorum need loving!
- more! (look for ones that use WYSIWYGs)
- Complete basic smoketests
- Also, maybe a FAQ for extension writers with HTML Purifier
AutoFormat
- Smileys
- Syntax highlighting with <pre> and possibly <?php
- Syntax highlighting (with GeSHi) with <pre> and possibly <?php
- Look at http://drupal.org/project/Modules/category/63 for ideas
Unknown release (on a scratch-an-itch basis)
# CHMOD install script for PEAR installs
? Have 'lang' attribute be checked against official lists, achieved by
encoding all characters that have string entity equivalents
- Abstract ChildDef_BlockQuote to work with all elements that only
allow blocks in them, required or optional
- Reorganize Unit Tests
Optimizations
- Reduce size of internal data-structures (esp. HTMLDefinition)
- Research memory usage of objects versus arrays
- Combine multiple strategies into a single, single-pass strategy
- Get PH5P working with the latest versions of DOM, which have much more
stringent error checking procedures. Maybe convert straight to tokens.
- Get rid of set_include_path(). Save this for another major release.
Neat feature related
! Factor demo.php into a set of Printer classes, and then create a stub
file for users here (inside the actual HTML Purifier library)
! Support exporting configuration, so users can easily tweak settings
in the demo, and then copy-paste into their own setup
- Advanced URI filtering schemes (see docs/proposal-new-directives.txt)
- Implement lenient <ruby> child validation
- Allow scoped="scoped" attribute in <style> tags; may be troublesome
because regular CSS has no way of uniquely identifying nodes, so we'd
have to generate IDs
- Explain how to use HTML Purifier in non-PHP languages / create
a simple command line stub (or complicated?)
- Fixes for Firefox's inability to handle COL alignment props (Bug 915)
- Automatically add non-breaking spaces to empty table cells when
empty-cells:show is applied to have compatibility with Internet Explorer
- Table of Contents generation (XHTML Compiler might be reusable). May also
be out-of-band information.
- Full set of color keywords. Also, a way to add onto them without
finalizing the configuration object.
- Write a var_export and memcached DefinitionCache - Denis
- Allow restriction of allowed class values
Maintenance related (slightly boring)
# CHMOD install script for PEAR installs
! Factor out command line parser into its own class, and unit test it
! Nested configuration namespaces
- Distinguish between default settings and explicitly set settings, so
configurations can be merged
- Nested configuration namespaces
- Allow scoped="scoped" attribute in <style> tags; may be troublesome
because regular CSS has no way of uniquely identifying nodes, so we'd
have to generate IDs
- Time PHPT tests
Requested
ChildDef related (very boring)
- Abstract ChildDef_BlockQuote to work with all elements that only
allow blocks in them, required or optional
- Implement lenient <ruby> child validation
Wontfix
- Non-lossy smart alternate character encoding transformations (unless
patch provided)
- Pretty-printing HTML: users can use Tidy on the output on entire page
- Native content compression, whitespace stripping (don't rely on Tidy, make
sure we don't remove from <pre> or related tags): use gzip if this is
- Native content compression, whitespace stripping: use gzip if this is
really important

View File

@@ -1 +1 @@
3.1.0rc1
3.1.1

View File

@@ -1,8 +1,9 @@
Release 3.1.0rc1 is a release candidate aimed primarily at ironing out bugs
in HTML Purifier's shiny new __autoload system. There are lots of new features,
however; some notable ones include support for the !important CSS modifier,
display and visibility CSS properties with %CSS.AllowTricky, marquee
with %HTML.Proprietary (had you scared for a moment, hmm?), a kses() wrapper,
%CSS.AllowedProperties, %HTML.ForbiddenAttributes and %HTML.ForbiddenElements
and a totally revamped ConfigDoc system. And of course, the usual slew of
bugfixes.
HTML Purifier 3.1.1 is a security and bugfix release. This release addresses
two security vulnerabilities, both related to CSS, and one of which only
applies to users using Shift_JIS as their output encoding. There is also
a security improvement regarding the imagecrash attack. There is a backwards
incompatible change with %URI.Munge, in which resources are no longer munged
by default; please enable using %URI.MungeResources. Besides this, there
are numerous improvements to URI munging, esp. with the addition of
%URI.MungeSecretKey, as well as an experimental implementation of
%HTML.SafeObject and %HTML.SafeEmbed. There are also some memory optimizations.

View File

@@ -0,0 +1,14 @@
<?php
chdir(dirname(__FILE__));
//require_once '../library/HTMLPurifier.path.php';
shell_exec('php ../maintenance/generate-schema-cache.php');
require_once '../library/HTMLPurifier.path.php';
require_once 'HTMLPurifier.includes.php';
$begin = xdebug_memory_usage();
$schema = HTMLPurifier_ConfigSchema::makeFromSerial();
echo xdebug_memory_usage() - $begin;

View File

@@ -15,7 +15,7 @@ TODO:
- add blurbs to ToC
*/
if (version_compare(PHP_VERSION, '5.2.0', '<')) exit('PHP 5.2.0 or greater required.');
if (version_compare(PHP_VERSION, '5.2', '<')) exit('PHP 5.2+ required.');
error_reporting(E_ALL | E_STRICT);
chdir(dirname(__FILE__));
@@ -38,6 +38,7 @@ $configdoc_xml = 'configdoc.xml';
$xml_builder = new HTMLPurifier_ConfigSchema_Builder_Xml();
$xml_builder->openURI($configdoc_xml);
$xml_builder->build($interchange);
unset($xml_builder); // free handle
$xslt = new ConfigDoc_HTMLXSLTProcessor();
$xslt->importStylesheet(dirname(__FILE__) . "/styles/$style.xsl");

View File

@@ -19,7 +19,7 @@
<xsl:variable name="usageLookup" select="document('../usage.xml')/usage" />
<!-- Twiddle this variable to get the columns as even as possible -->
<xsl:variable name="maxNumberAdjust" select="1" />
<xsl:variable name="maxNumberAdjust" select="2" />
<xsl:template match="/">
<html lang="en" xml:lang="en">
@@ -227,7 +227,7 @@
</tr>
</xsl:template>
<xsl:template match="constraints/external/project">
<xsl:value-of select="." />
<li><xsl:value-of select="." /></li>
</xsl:template>
</xsl:stylesheet>

View File

@@ -5,7 +5,7 @@
<line>131</line>
</file>
<file name="HTMLPurifier/Lexer.php">
<line>93</line>
<line>85</line>
</file>
<file name="HTMLPurifier/Lexer/DirectLex.php">
<line>50</line>
@@ -16,24 +16,29 @@
<line>44</line>
</file>
</directive>
<directive id="CSS.MaxImgLength">
<file name="HTMLPurifier/CSSDefinition.php">
<line>157</line>
</file>
</directive>
<directive id="CSS.Proprietary">
<file name="HTMLPurifier/CSSDefinition.php">
<line>201</line>
<line>214</line>
</file>
</directive>
<directive id="CSS.AllowTricky">
<file name="HTMLPurifier/CSSDefinition.php">
<line>205</line>
<line>218</line>
</file>
</directive>
<directive id="CSS.AllowImportant">
<file name="HTMLPurifier/CSSDefinition.php">
<line>209</line>
<line>222</line>
</file>
</directive>
<directive id="CSS.AllowedProperties">
<file name="HTMLPurifier/CSSDefinition.php">
<line>261</line>
<line>274</line>
</file>
</directive>
<directive id="Cache.DefinitionImpl">
@@ -64,18 +69,18 @@
<directive id="Core.Encoding">
<file name="HTMLPurifier/Encoder.php">
<line>267</line>
<line>285</line>
<line>294</line>
</file>
</directive>
<directive id="Test.ForceNoIconv">
<file name="HTMLPurifier/Encoder.php">
<line>269</line>
<line>290</line>
<line>272</line>
<line>302</line>
</file>
</directive>
<directive id="Core.EscapeNonASCIICharacters">
<file name="HTMLPurifier/Encoder.php">
<line>287</line>
<line>298</line>
</file>
</directive>
<directive id="Core.MaintainLineNumbers">
@@ -83,7 +88,7 @@
<line>81</line>
</file>
<file name="HTMLPurifier/Lexer.php">
<line>90</line>
<line>82</line>
</file>
<file name="HTMLPurifier/Lexer/DirectLex.php">
<line>45</line>
@@ -96,47 +101,47 @@
</directive>
<directive id="Output.TidyFormat">
<file name="HTMLPurifier/Generator.php">
<line>61</line>
<line>69</line>
</file>
</directive>
<directive id="Output.Newline">
<file name="HTMLPurifier/Generator.php">
<line>86</line>
<line>83</line>
</file>
</directive>
<directive id="HTML.BlockWrapper">
<file name="HTMLPurifier/HTMLDefinition.php">
<line>213</line>
<line>222</line>
</file>
</directive>
<directive id="HTML.Parent">
<file name="HTMLPurifier/HTMLDefinition.php">
<line>221</line>
<line>230</line>
</file>
</directive>
<directive id="HTML.AllowedElements">
<file name="HTMLPurifier/HTMLDefinition.php">
<line>238</line>
<line>247</line>
</file>
</directive>
<directive id="HTML.AllowedAttributes">
<file name="HTMLPurifier/HTMLDefinition.php">
<line>239</line>
<line>248</line>
</file>
</directive>
<directive id="HTML.Allowed">
<file name="HTMLPurifier/HTMLDefinition.php">
<line>242</line>
<line>251</line>
</file>
</directive>
<directive id="HTML.ForbiddenElements">
<file name="HTMLPurifier/HTMLDefinition.php">
<line>303</line>
<line>337</line>
</file>
</directive>
<directive id="HTML.ForbiddenAttributes">
<file name="HTMLPurifier/HTMLDefinition.php">
<line>304</line>
<line>338</line>
</file>
</directive>
<directive id="HTML.Trusted">
@@ -144,7 +149,10 @@
<line>198</line>
</file>
<file name="HTMLPurifier/Lexer.php">
<line>245</line>
<line>238</line>
</file>
<file name="HTMLPurifier/HTMLModule/Image.php">
<line>27</line>
</file>
<file name="HTMLPurifier/Lexer/DirectLex.php">
<line>34</line>
@@ -165,6 +173,16 @@
<line>220</line>
</file>
</directive>
<directive id="HTML.SafeObject">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>225</line>
</file>
</directive>
<directive id="HTML.SafeEmbed">
<file name="HTMLPurifier/HTMLModuleManager.php">
<line>228</line>
</file>
</directive>
<directive id="Attr.IDBlacklist">
<file name="HTMLPurifier/IDAccumulator.php">
<line>26</line>
@@ -172,22 +190,22 @@
</directive>
<directive id="Core.Language">
<file name="HTMLPurifier/LanguageFactory.php">
<line>85</line>
<line>88</line>
</file>
</directive>
<directive id="Core.LexerImpl">
<file name="HTMLPurifier/Lexer.php">
<line>78</line>
<line>70</line>
</file>
</directive>
<directive id="Core.ConvertDocumentToFragment">
<file name="HTMLPurifier/Lexer.php">
<line>237</line>
<line>230</line>
</file>
</directive>
<directive id="URI.Host">
<file name="HTMLPurifier/URIDefinition.php">
<line>57</line>
<line>64</line>
</file>
<file name="HTMLPurifier/URIFilter/DisableExternal.php">
<line>8</line>
@@ -195,12 +213,12 @@
</directive>
<directive id="URI.Base">
<file name="HTMLPurifier/URIDefinition.php">
<line>58</line>
<line>65</line>
</file>
</directive>
<directive id="URI.DefaultScheme">
<file name="HTMLPurifier/URIDefinition.php">
<line>65</line>
<line>72</line>
</file>
</directive>
<directive id="URI.AllowedSchemes">
@@ -215,12 +233,7 @@
</directive>
<directive id="URI.Disable">
<file name="HTMLPurifier/AttrDef/URI.php">
<line>24</line>
</file>
</directive>
<directive id="URI.Munge">
<file name="HTMLPurifier/AttrDef/URI.php">
<line>78</line>
<line>28</line>
</file>
</directive>
<directive id="Core.ColorKeywords">
@@ -305,6 +318,17 @@
<line>123</line>
</file>
</directive>
<directive id="HTML.MaxImgLength">
<file name="HTMLPurifier/HTMLModule/Image.php">
<line>14</line>
</file>
<file name="HTMLPurifier/HTMLModule/SafeEmbed.php">
<line>13</line>
</file>
<file name="HTMLPurifier/HTMLModule/SafeObject.php">
<line>19</line>
</file>
</directive>
<directive id="HTML.TidyLevel">
<file name="HTMLPurifier/HTMLModule/Tidy.php">
<line>45</line>
@@ -358,4 +382,14 @@
<line>8</line>
</file>
</directive>
<directive id="URI.MungeResources">
<file name="HTMLPurifier/URIFilter/Munge.php">
<line>14</line>
</file>
</directive>
<directive id="URI.MungeSecretKey">
<file name="HTMLPurifier/URIFilter/Munge.php">
<line>15</line>
</file>
</directive>
</usage>

View File

@@ -367,7 +367,7 @@ Test.Example</pre>
For example, <code>HTMLPurifier_ConfigSchema_Builder_ConfigSchema</code>
generates a runtime <code>HTMLPurifier_ConfigSchema</code> object,
which <code>HTMLPurifier_Config</code> uses to validate its incoming
data. There is also a planned documentation builder.
data. There is also an XML serializer, which is used to build documentation.
</p>
<div id="version">$Id$</div>

View File

@@ -158,7 +158,7 @@
<pre>$config = HTMLPurifier_Config::createDefault();
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
$config->set('HTML', 'DefinitionRev', 1);
$def =& $config->getHTMLDefinition(true);</pre>
$def = $config->getHTMLDefinition(true);</pre>
<p>
Assuming that HTML Purifier has already been properly loaded (hint:
@@ -214,7 +214,7 @@ $def =& $config->getHTMLDefinition(true);</pre>
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
$config->set('HTML', 'DefinitionRev', 1);
<strong>$config->set('Core', 'DefinitionCache', null); // remove this later!</strong>
$def =& $config->getHTMLDefinition(true);</pre>
$def = $config->getHTMLDefinition(true);</pre>
<p>
A few things should be mentioned about the caching mechanism before
@@ -270,7 +270,7 @@ $def =& $config->getHTMLDefinition(true);</pre>
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
$config->set('HTML', 'DefinitionRev', 1);
$config->set('Core', 'DefinitionCache', null); // remove this later!
$def =& $config->getHTMLDefinition(true);
$def = $config->getHTMLDefinition(true);
<strong>$def->addAttribute('a', 'target', 'Enum#_blank,_self,_target,_top');</strong></pre>
<p>
@@ -388,7 +388,7 @@ $def =& $config->getHTMLDefinition(true);
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
$config->set('HTML', 'DefinitionRev', 1);
$config->set('Core', 'DefinitionCache', null); // remove this later!
$def =& $config->getHTMLDefinition(true);
$def = $config->getHTMLDefinition(true);
<strong>$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
array('_blank','_self','_target','_top')
));</strong></pre>
@@ -735,11 +735,11 @@ $def =& $config->getHTMLDefinition(true);
$config->set('HTML', 'DefinitionID', 'enduser-customize.html tutorial');
$config->set('HTML', 'DefinitionRev', 1);
$config->set('Core', 'DefinitionCache', null); // remove this later!
$def =& $config->getHTMLDefinition(true);
$def = $config->getHTMLDefinition(true);
$def->addAttribute('a', 'target', new HTMLPurifier_AttrDef_Enum(
array('_blank','_self','_target','_top')
));
<strong>$form =& $def->addElement(
<strong>$form = $def->addElement(
'form', // name
'Block', // content set
'Flow', // allowed children

View File

@@ -35,7 +35,7 @@
{
public $name = '<strong>NameOfFilter</strong>';
public function prepare($config) {}
public function filter(&$uri, $config, &$context) {}
public function filter(&$uri, $config, $context) {}
}</pre>
<p>
@@ -60,8 +60,8 @@
public function HTMLPurifier_URI($scheme, $userinfo, $host, $port, $path, $query, $fragment);
public function toString();
public function copy();
public function getSchemeObj($config, &$context);
public function validate($config, &$context);
public function getSchemeObj($config, $context);
public function validate($config, $context);
}</pre>
<p>
@@ -130,30 +130,26 @@
</p>
<p>
Let's suppose I wanted to write a filter that de-internationalized domain
names by converting them to <a href="http://en.wikipedia.org/wiki/Punycode">Punycode</a>.
Assuming that <code>punycode_encode($input)</code> converts <code>$input</code> to
Punycode and returns <code>false</code> on failure:
Let's suppose I wanted to write a filter that converted links with a
custom <code>image</code> scheme to its corresponding real path on
our website:
</p>
<pre>class HTMLPurifier_URIFilter_ConvertIDNToPunycode extends HTMLPurifier_URIFilter
<pre>class HTMLPurifier_URIFilter_TransformImageScheme extends HTMLPurifier_URIFilter
{
public $name = 'ConvertIDNToPunycode';
public function filter(&$uri, $config, &$context) {
if (is_null($uri->host)) return true;
if ($uri->host == utf8_decode($uri->host)) {
// is ASCII, abort
return true;
}
$host = punycode_encode($uri->host);
if ($host === false) return false;
$uri->host = $host;
public $name = 'TransformImageScheme';
public function filter(&$uri, $config, $context) {
if ($uri->scheme !== 'image') return true;
$img_name = $uri->path;
// Overwrite the previous URI object
$uri = new HTMLPurifier_URI('http', null, null, null, '/img/' . $img_name . '.png', null, null);
return true;
}
}</pre>
<p>
Notice I did not <code>return $uri;</code>.
Notice I did not <code>return $uri;</code>. This filter would turn
<code>image:Foo</code> into <code>/img/Foo.png</code>.
</p>
<h2>Activating your filter</h2>
@@ -163,7 +159,7 @@
to use it. Fortunately, this part's simple:
</p>
<pre>$uri =& $config->getDefinition('URI');
<pre>$uri = $config->getDefinition('URI');
$uri->addFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>());</pre>
<p>
@@ -177,7 +173,7 @@ $uri->addFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>());</pr
'URI', '<strong>NameOfFilter</strong>', false, 'bool',
'<strong>What your filter does.</strong>'
);
$uri =& $config->getDefinition('URI', true);
$uri = $config->getDefinition('URI', true);
$uri->registerFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>());
</pre>
@@ -186,6 +182,25 @@ $uri->registerFilter(new HTMLPurifier_URIFilter_<strong>NameOfFilter</strong>())
is set to true.
</p>
<h2>Post-filter</h2>
<p>
Remember our TransformImageScheme filter? That filter acted before we had
performed scheme validation; otherwise, the URI would have been filtered
out when it was discovered that there was no image scheme. Well, a post-filter
is run after scheme specific validation, so it's ideal for bulk
post-processing of URIs, including munging. To specify a URI as a post-filter,
set the <code>$post</code> member variable to TRUE.
</p>
<pre>class HTMLPurifier_URIFilter_MyPostFilter extends HTMLPurifier_URIFilter
{
public $name = 'MyPostFilter';
public $post = true;
// ... extra code here
}
</pre>
<h2>Examples</h2>
<p>

View File

@@ -75,9 +75,7 @@ passes through HTML Purifier <em>unharmed</em>.
<p>And the corresponding usage:</p>
<pre>&lt;?php
// assuming $purifier is an instance of HTMLPurifier
require_once 'HTMLPurifier/Filter/YouTube.php';
$purifier-&gt;addFilter(new HTMLPurifier_Filter_YouTube());
$config->set('Filter', 'YouTube', true);
?&gt;</pre>
<p>There is a bit going in the two code snippets, so let's explain.</p>

View File

@@ -0,0 +1,129 @@
<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40">
<head>
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=us-ascii">
<meta name=Generator content="Microsoft Word 12 (filtered medium)">
<!--[if !mso]>
<style>
v\:* {behavior:url(#default#VML);}
o\:* {behavior:url(#default#VML);}
w\:* {behavior:url(#default#VML);}
..shape {behavior:url(#default#VML);}
</style>
<![endif]-->
<style>
<!--
/* Font Definitions */
@font-face
{font-family:"Cambria Math";
panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
{font-family:Calibri;
panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
{font-family:Tahoma;
panose-1:2 11 6 4 3 5 4 4 2 4;}
@font-face
{font-family:Verdana;
panose-1:2 11 6 4 3 5 4 4 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
{margin:0cm;
margin-bottom:.0001pt;
font-size:10.0pt;
font-family:"Verdana","sans-serif";}
a:link, span.MsoHyperlink
{mso-style-priority:99;
color:blue;
text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
{mso-style-priority:99;
color:purple;
text-decoration:underline;}
p.MsoAcetate, li.MsoAcetate, div.MsoAcetate
{mso-style-priority:99;
mso-style-link:"Balloon Text Char";
margin:0cm;
margin-bottom:.0001pt;
font-size:8.0pt;
font-family:"Tahoma","sans-serif";}
span.EmailStyle17
{mso-style-type:personal-compose;
font-family:"Verdana","sans-serif";
color:windowtext;}
span.BalloonTextChar
{mso-style-name:"Balloon Text Char";
mso-style-priority:99;
mso-style-link:"Balloon Text";
font-family:"Tahoma","sans-serif";}
..MsoChpDefault
{mso-style-type:export-only;}
@page Section1
{size:612.0pt 792.0pt;
margin:70.85pt 70.85pt 70.85pt 70.85pt;}
div.Section1
{page:Section1;}
-->
</style>
<!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="2050" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]-->
</head>
<body lang=NL link=blue vlink=purple>
<div class=Section1>
<p class=MsoNormal><img width=1277 height=994 id="Picture_x0020_1"
src="cid:image001.png@01C8CBDF.5D1BAEE0"><o:p></o:p></p>
<p class=MsoNormal><o:p>&nbsp;</o:p></p>
<p class=MsoNormal><b>Name<o:p></o:p></b></p>
<p class=MsoNormal>E-mail : <a href="mailto:mail@example.com"><span
style='color:windowtext'>mail@example.com</span></a><o:p></o:p></p>
<p class=MsoNormal><o:p>&nbsp;</o:p></p>
<p class=MsoNormal><b>Company<o:p></o:p></b></p>
<p class=MsoNormal>Address 1<o:p></o:p></p>
<p class=MsoNormal>Address 2<o:p></o:p></p>
<p class=MsoNormal><o:p>&nbsp;</o:p></p>
<p class=MsoNormal>Telefoon&nbsp; : +xx xx xxx xxx xx <span style='color:black'><o:p></o:p></span></p>
<p class=MsoNormal><span lang=EN-US style='color:black'>Fax&nbsp; : +xx xx xxx xx xx<o:p></o:p></span></p>
<p class=MsoNormal><span lang=EN-US style='color:black'>Internet : </span><span
style='color:black'><a href="http://www.example.com/"><span lang=EN-US
style='color:black'>http://www.example.com</span></a></span><span
lang=EN-US style='color:black'><o:p></o:p></span></p>
<p class=MsoNormal><span lang=EN-US style='color:black'>Kamer van koophandel
xxxxxxxxx<o:p></o:p></span></p>
<p class=MsoNormal><span lang=EN-US style='color:black'><o:p>&nbsp;</o:p></span></p>
<p class=MsoNormal><span lang=EN-US style='font-size:7.5pt;color:black'>Op deze
e-mail is een disclaimer van toepassing, ga naar </span><span lang=EN-US
style='font-size:7.5pt'><a
href="http://www.example.com/disclaimer"><span
style='color:black'>www.example.com/disclaimer</span></a><br>
<span style='color:black'>A disclaimer is applicable to this email, please
refer to </span><a href="http://www.example.com/disclaimer"><span
style='color:black'>www.example.com/disclaimer</span></a><o:p></o:p></span></p>
<p class=MsoNormal><span lang=EN-US><o:p>&nbsp;</o:p></span></p>
</div>
</body>
</html>

View File

@@ -7,7 +7,7 @@
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
* FILE, changes will be overwritten the next time the script is run.
*
* @version 3.0.0
* @version 3.1.1
*
* @warning
* You must *not* include any other HTML Purifier files before this file,
@@ -29,7 +29,6 @@ require 'HTMLPurifier/Definition.php';
require 'HTMLPurifier/CSSDefinition.php';
require 'HTMLPurifier/ChildDef.php';
require 'HTMLPurifier/Config.php';
require 'HTMLPurifier/ConfigDef.php';
require 'HTMLPurifier/ConfigSchema.php';
require 'HTMLPurifier/ContentSets.php';
require 'HTMLPurifier/Context.php';
@@ -41,7 +40,6 @@ require 'HTMLPurifier/ElementDef.php';
require 'HTMLPurifier/Encoder.php';
require 'HTMLPurifier/EntityLookup.php';
require 'HTMLPurifier/EntityParser.php';
require 'HTMLPurifier/Error.php';
require 'HTMLPurifier/ErrorCollector.php';
require 'HTMLPurifier/Exception.php';
require 'HTMLPurifier/Filter.php';
@@ -53,6 +51,7 @@ require 'HTMLPurifier/IDAccumulator.php';
require 'HTMLPurifier/Injector.php';
require 'HTMLPurifier/Language.php';
require 'HTMLPurifier/LanguageFactory.php';
require 'HTMLPurifier/Length.php';
require 'HTMLPurifier/Lexer.php';
require 'HTMLPurifier/PercentEncoder.php';
require 'HTMLPurifier/Strategy.php';
@@ -67,12 +66,14 @@ require 'HTMLPurifier/URIFilter.php';
require 'HTMLPurifier/URIParser.php';
require 'HTMLPurifier/URIScheme.php';
require 'HTMLPurifier/URISchemeRegistry.php';
require 'HTMLPurifier/UnitConverter.php';
require 'HTMLPurifier/VarParser.php';
require 'HTMLPurifier/VarParserException.php';
require 'HTMLPurifier/AttrDef/CSS.php';
require 'HTMLPurifier/AttrDef/Enum.php';
require 'HTMLPurifier/AttrDef/Integer.php';
require 'HTMLPurifier/AttrDef/Lang.php';
require 'HTMLPurifier/AttrDef/Switch.php';
require 'HTMLPurifier/AttrDef/Text.php';
require 'HTMLPurifier/AttrDef/URI.php';
require 'HTMLPurifier/AttrDef/CSS/Number.php';
@@ -82,6 +83,7 @@ require 'HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';
require 'HTMLPurifier/AttrDef/CSS/Border.php';
require 'HTMLPurifier/AttrDef/CSS/Color.php';
require 'HTMLPurifier/AttrDef/CSS/Composite.php';
require 'HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
require 'HTMLPurifier/AttrDef/CSS/Filter.php';
require 'HTMLPurifier/AttrDef/CSS/Font.php';
require 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
@@ -116,6 +118,10 @@ require 'HTMLPurifier/AttrTransform/ImgSpace.php';
require 'HTMLPurifier/AttrTransform/Lang.php';
require 'HTMLPurifier/AttrTransform/Length.php';
require 'HTMLPurifier/AttrTransform/Name.php';
require 'HTMLPurifier/AttrTransform/SafeEmbed.php';
require 'HTMLPurifier/AttrTransform/SafeObject.php';
require 'HTMLPurifier/AttrTransform/SafeParam.php';
require 'HTMLPurifier/AttrTransform/ScriptRequired.php';
require 'HTMLPurifier/ChildDef/Chameleon.php';
require 'HTMLPurifier/ChildDef/Custom.php';
require 'HTMLPurifier/ChildDef/Empty.php';
@@ -123,9 +129,6 @@ require 'HTMLPurifier/ChildDef/Required.php';
require 'HTMLPurifier/ChildDef/Optional.php';
require 'HTMLPurifier/ChildDef/StrictBlockquote.php';
require 'HTMLPurifier/ChildDef/Table.php';
require 'HTMLPurifier/ConfigDef/Directive.php';
require 'HTMLPurifier/ConfigDef/DirectiveAlias.php';
require 'HTMLPurifier/ConfigDef/Namespace.php';
require 'HTMLPurifier/DefinitionCache/Decorator.php';
require 'HTMLPurifier/DefinitionCache/Null.php';
require 'HTMLPurifier/DefinitionCache/Serializer.php';
@@ -143,6 +146,8 @@ require 'HTMLPurifier/HTMLModule/Object.php';
require 'HTMLPurifier/HTMLModule/Presentation.php';
require 'HTMLPurifier/HTMLModule/Proprietary.php';
require 'HTMLPurifier/HTMLModule/Ruby.php';
require 'HTMLPurifier/HTMLModule/SafeEmbed.php';
require 'HTMLPurifier/HTMLModule/SafeObject.php';
require 'HTMLPurifier/HTMLModule/Scripting.php';
require 'HTMLPurifier/HTMLModule/StyleAttribute.php';
require 'HTMLPurifier/HTMLModule/Tables.php';
@@ -158,6 +163,7 @@ require 'HTMLPurifier/HTMLModule/Tidy/XHTML.php';
require 'HTMLPurifier/Injector/AutoParagraph.php';
require 'HTMLPurifier/Injector/Linkify.php';
require 'HTMLPurifier/Injector/PurifierLinkify.php';
require 'HTMLPurifier/Injector/SafeObject.php';
require 'HTMLPurifier/Lexer/DOMLex.php';
require 'HTMLPurifier/Lexer/DirectLex.php';
require 'HTMLPurifier/Strategy/Composite.php';
@@ -178,6 +184,7 @@ require 'HTMLPurifier/URIFilter/DisableExternal.php';
require 'HTMLPurifier/URIFilter/DisableExternalResources.php';
require 'HTMLPurifier/URIFilter/HostBlacklist.php';
require 'HTMLPurifier/URIFilter/MakeAbsolute.php';
require 'HTMLPurifier/URIFilter/Munge.php';
require 'HTMLPurifier/URIScheme/ftp.php';
require 'HTMLPurifier/URIScheme/http.php';
require 'HTMLPurifier/URIScheme/https.php';

View File

@@ -15,13 +15,11 @@
* -# Generating HTML from the purified tokens.
*
* However, most users will only need to interface with the HTMLPurifier
* class, so this massive amount of infrastructure is usually concealed.
* If you plan on working with the internals, be sure to include
* HTMLPurifier_ConfigSchema and HTMLPurifier_Config.
* and HTMLPurifier_Config.
*/
/*
HTML Purifier 3.1.0rc1 - Standards Compliant HTML Filtering
HTML Purifier 3.1.1 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or
@@ -57,7 +55,10 @@ class HTMLPurifier
{
/** Version of HTML Purifier */
public $version = '3.1.0rc1';
public $version = '3.1.1';
/** Constant with version of HTML Purifier */
const VERSION = '3.1.1';
/** Global configuration object */
public $config;
@@ -89,7 +90,6 @@ class HTMLPurifier
$this->config = HTMLPurifier_Config::create($config);
$this->strategy = new HTMLPurifier_Strategy_Core();
$this->generator = new HTMLPurifier_Generator();
}
@@ -123,8 +123,8 @@ class HTMLPurifier
$context = new HTMLPurifier_Context();
// our friendly neighborhood generator, all primed with configuration too!
$this->generator->generateFromTokens(array(), $config, $context);
// setup HTML generator
$this->generator = new HTMLPurifier_Generator($config, $context);
$context->register('Generator', $this->generator);
// set up global context variables
@@ -177,8 +177,7 @@ class HTMLPurifier
$html, $config, $context
),
$config, $context
),
$config, $context
)
);
for ($i = $filter_size - 1; $i >= 0; $i--) {
@@ -209,9 +208,10 @@ class HTMLPurifier
/**
* Singleton for enforcing just one HTML Purifier in your system
* @param $prototype Optional prototype HTMLPurifier instance to
* overload singleton with.
* overload singleton with, or HTMLPurifier_Config
* instance to configure the generated version with.
*/
public static function getInstance($prototype = null) {
public static function instance($prototype = null) {
if (!self::$instance || $prototype) {
if ($prototype instanceof HTMLPurifier) {
self::$instance = $prototype;
@@ -224,4 +224,11 @@ class HTMLPurifier
return self::$instance;
}
/**
* @note Backwards compatibility, see instance()
*/
public static function getInstance($prototype = null) {
return HTMLPurifier::instance($prototype);
}
}

View File

@@ -23,7 +23,6 @@ require_once $__dir . '/HTMLPurifier/Definition.php';
require_once $__dir . '/HTMLPurifier/CSSDefinition.php';
require_once $__dir . '/HTMLPurifier/ChildDef.php';
require_once $__dir . '/HTMLPurifier/Config.php';
require_once $__dir . '/HTMLPurifier/ConfigDef.php';
require_once $__dir . '/HTMLPurifier/ConfigSchema.php';
require_once $__dir . '/HTMLPurifier/ContentSets.php';
require_once $__dir . '/HTMLPurifier/Context.php';
@@ -35,7 +34,6 @@ require_once $__dir . '/HTMLPurifier/ElementDef.php';
require_once $__dir . '/HTMLPurifier/Encoder.php';
require_once $__dir . '/HTMLPurifier/EntityLookup.php';
require_once $__dir . '/HTMLPurifier/EntityParser.php';
require_once $__dir . '/HTMLPurifier/Error.php';
require_once $__dir . '/HTMLPurifier/ErrorCollector.php';
require_once $__dir . '/HTMLPurifier/Exception.php';
require_once $__dir . '/HTMLPurifier/Filter.php';
@@ -47,6 +45,7 @@ require_once $__dir . '/HTMLPurifier/IDAccumulator.php';
require_once $__dir . '/HTMLPurifier/Injector.php';
require_once $__dir . '/HTMLPurifier/Language.php';
require_once $__dir . '/HTMLPurifier/LanguageFactory.php';
require_once $__dir . '/HTMLPurifier/Length.php';
require_once $__dir . '/HTMLPurifier/Lexer.php';
require_once $__dir . '/HTMLPurifier/PercentEncoder.php';
require_once $__dir . '/HTMLPurifier/Strategy.php';
@@ -61,12 +60,14 @@ require_once $__dir . '/HTMLPurifier/URIFilter.php';
require_once $__dir . '/HTMLPurifier/URIParser.php';
require_once $__dir . '/HTMLPurifier/URIScheme.php';
require_once $__dir . '/HTMLPurifier/URISchemeRegistry.php';
require_once $__dir . '/HTMLPurifier/UnitConverter.php';
require_once $__dir . '/HTMLPurifier/VarParser.php';
require_once $__dir . '/HTMLPurifier/VarParserException.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Enum.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Integer.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Lang.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Switch.php';
require_once $__dir . '/HTMLPurifier/AttrDef/Text.php';
require_once $__dir . '/HTMLPurifier/AttrDef/URI.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Number.php';
@@ -76,6 +77,7 @@ require_once $__dir . '/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Border.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Color.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Composite.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Filter.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Font.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/FontFamily.php';
@@ -110,6 +112,10 @@ require_once $__dir . '/HTMLPurifier/AttrTransform/ImgSpace.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Lang.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Length.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/Name.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeEmbed.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeObject.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/SafeParam.php';
require_once $__dir . '/HTMLPurifier/AttrTransform/ScriptRequired.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Chameleon.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Custom.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Empty.php';
@@ -117,9 +123,6 @@ require_once $__dir . '/HTMLPurifier/ChildDef/Required.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Optional.php';
require_once $__dir . '/HTMLPurifier/ChildDef/StrictBlockquote.php';
require_once $__dir . '/HTMLPurifier/ChildDef/Table.php';
require_once $__dir . '/HTMLPurifier/ConfigDef/Directive.php';
require_once $__dir . '/HTMLPurifier/ConfigDef/DirectiveAlias.php';
require_once $__dir . '/HTMLPurifier/ConfigDef/Namespace.php';
require_once $__dir . '/HTMLPurifier/DefinitionCache/Decorator.php';
require_once $__dir . '/HTMLPurifier/DefinitionCache/Null.php';
require_once $__dir . '/HTMLPurifier/DefinitionCache/Serializer.php';
@@ -137,6 +140,8 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/Object.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Presentation.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Proprietary.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Ruby.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeEmbed.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/SafeObject.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Scripting.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/StyleAttribute.php';
require_once $__dir . '/HTMLPurifier/HTMLModule/Tables.php';
@@ -152,6 +157,7 @@ require_once $__dir . '/HTMLPurifier/HTMLModule/Tidy/XHTML.php';
require_once $__dir . '/HTMLPurifier/Injector/AutoParagraph.php';
require_once $__dir . '/HTMLPurifier/Injector/Linkify.php';
require_once $__dir . '/HTMLPurifier/Injector/PurifierLinkify.php';
require_once $__dir . '/HTMLPurifier/Injector/SafeObject.php';
require_once $__dir . '/HTMLPurifier/Lexer/DOMLex.php';
require_once $__dir . '/HTMLPurifier/Lexer/DirectLex.php';
require_once $__dir . '/HTMLPurifier/Strategy/Composite.php';
@@ -172,6 +178,7 @@ require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternal.php';
require_once $__dir . '/HTMLPurifier/URIFilter/DisableExternalResources.php';
require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';
require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';
require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php';
require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';
require_once $__dir . '/HTMLPurifier/URIScheme/http.php';
require_once $__dir . '/HTMLPurifier/URIScheme/https.php';

View File

@@ -51,16 +51,13 @@ abstract class HTMLPurifier_AttrDef
*
* @warning This processing is inconsistent with XML's whitespace handling
* as specified by section 3.3.3 and referenced XHTML 1.0 section
* 4.7. Compliant processing requires all line breaks normalized
* to "\n", so the fix is not as simple as fixing it in this
* function. Trim and whitespace collapsing are supposed to only
* occur in NMTOKENs. However, note that we are NOT necessarily
* parsing XML, thus, this behavior may still be correct.
* 4.7. However, note that we are NOT necessarily
* parsing XML, thus, this behavior may still be correct. We
* assume that newlines have been normalized.
*/
public function parseCDATA($string) {
$string = trim($string);
$string = str_replace("\n", '', $string);
$string = str_replace(array("\r", "\t"), ' ', $string);
$string = str_replace(array("\n", "\t", "\r"), ' ', $string);
return $string;
}
@@ -70,9 +67,10 @@ abstract class HTMLPurifier_AttrDef
* @return Created AttrDef object corresponding to $string
*/
public function make($string) {
// default implementation, return flyweight of this object
// if overloaded, it is *necessary* for you to clone the
// object (usually by instantiating a new copy) and return that
// default implementation, return a flyweight of this object.
// If $string has an effect on the returned object (i.e. you
// need to overload this method), it is best
// to clone or instantiate new copies. (Instantiation is safer.)
return $this;
}

View File

@@ -29,6 +29,12 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
$declarations = explode(';', $css);
$propvalues = array();
/**
* Name of the current CSS property being validated.
*/
$property = false;
$context->register('CurrentCSSProperty', $property);
foreach ($declarations as $declaration) {
if (!$declaration) continue;
if (!strpos($declaration, ':')) continue;
@@ -61,6 +67,8 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
$propvalues[$property] = $result;
}
$context->destroy('CurrentCSSProperty');
// procedure does not write the new CSS simultaneously, so it's
// slightly inefficient, but it's the only way of getting rid of
// duplicates. Perhaps config to optimize it, but not now.

View File

@@ -0,0 +1,26 @@
<?php
/**
* Decorator which enables CSS properties to be disabled for specific elements.
*/
class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef
{
protected $def, $element;
/**
* @param $def Definition to wrap
* @param $element Element to deny
*/
public function __construct($def, $element) {
$this->def = $def;
$this->element = $element;
}
/**
* Checks if CurrentToken is set and equal to $this->element
*/
public function validate($string, $config, $context) {
$token = $context->get('CurrentToken', true);
if ($token && $token->name == $this->element) return false;
return $this->def->validate($string, $config, $context);
}
}

View File

@@ -16,7 +16,6 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
'cursive' => true
);
$string = $this->parseCDATA($string);
// assume that no font names contain commas in them
$fonts = explode(',', $string);
$final = '';
@@ -35,13 +34,40 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
$quote = $font[0];
if ($font[$length - 1] !== $quote) continue;
$font = substr($font, 1, $length - 2);
// double-backslash processing is buggy
$font = str_replace("\\$quote", $quote, $font); // de-escape quote
$font = str_replace("\\\n", "\n", $font); // de-escape newlines
$new_font = '';
for ($i = 0, $c = strlen($font); $i < $c; $i++) {
if ($font[$i] === '\\') {
$i++;
if ($i >= $c) {
$new_font .= '\\';
break;
}
if (ctype_xdigit($font[$i])) {
$code = $font[$i];
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
if (!ctype_xdigit($font[$i])) break;
$code .= $font[$i];
}
// We have to be extremely careful when adding
// new characters, to make sure we're not breaking
// the encoding.
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
$new_font .= $char;
if ($i < $c && trim($font[$i]) !== '') $i--;
continue;
}
if ($font[$i] === "\n") continue;
}
$new_font .= $font[$i];
}
$font = $new_font;
}
// $font is a pure representation of the font name
if (ctype_alnum($font)) {
if (ctype_alnum($font) && $font !== '') {
// very simple font, allow it in unharmed
$final .= $font . ', ';
continue;
@@ -50,8 +76,8 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
// complicated font, requires quoting
// armor single quotes and new lines
$font = str_replace("\\", "\\\\", $font);
$font = str_replace("'", "\\'", $font);
$font = str_replace("\n", "\\\n", $font);
$final .= "'$font', ";
}
$final = rtrim($final, ', ');

View File

@@ -3,7 +3,7 @@
/**
* Decorator which enables !important to be used in CSS values.
*/
class HTMLPurifier_AttrDef_CSS_ImportantDecorator
class HTMLPurifier_AttrDef_CSS_ImportantDecorator extends HTMLPurifier_AttrDef
{
protected $def, $allow;

View File

@@ -6,46 +6,40 @@
class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
{
/**
* Valid unit lookup table.
* @warning The code assumes all units are two characters long. Be careful
* if we have to change this behavior!
*/
protected $units = array('em' => true, 'ex' => true, 'px' => true, 'in' => true,
'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true);
/**
* Instance of HTMLPurifier_AttrDef_Number to defer number validation to
*/
protected $number_def;
protected $min, $max;
/**
* @param $non_negative Bool indication whether or not negative values are
* allowed.
* @param HTMLPurifier_Length $max Minimum length, or null for no bound. String is also acceptable.
* @param HTMLPurifier_Length $max Maximum length, or null for no bound. String is also acceptable.
*/
public function __construct($non_negative = false) {
$this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
public function __construct($min = null, $max = null) {
$this->min = $min !== null ? HTMLPurifier_Length::make($min) : null;
$this->max = $max !== null ? HTMLPurifier_Length::make($max) : null;
}
public function validate($length, $config, $context) {
public function validate($string, $config, $context) {
$string = $this->parseCDATA($string);
$length = $this->parseCDATA($length);
if ($length === '') return false;
if ($length === '0') return '0';
$strlen = strlen($length);
if ($strlen === 1) return false; // impossible!
// Optimizations
if ($string === '') return false;
if ($string === '0') return '0';
if (strlen($string) === 1) return false;
// we assume all units are two characters
$unit = substr($length, $strlen - 2);
if (!ctype_lower($unit)) $unit = strtolower($unit);
$number = substr($length, 0, $strlen - 2);
$length = HTMLPurifier_Length::make($string);
if (!$length->isValid()) return false;
if (!isset($this->units[$unit])) return false;
$number = $this->number_def->validate($number, $config, $context);
if ($number === false) return false;
return $number . $unit;
if ($this->min) {
$c = $length->compareTo($this->min);
if ($c === false) return false;
if ($c < 0) return false;
}
if ($this->max) {
$c = $length->compareTo($this->max);
if ($c === false) return false;
if ($c > 0) return false;
}
return $length->toString();
}
}

View File

@@ -18,6 +18,10 @@ class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
$this->non_negative = $non_negative;
}
/**
* @warning Some contexts do not pass $config, $context. These
* variables should not be used without checking HTMLPurifier_Length
*/
public function validate($number, $config, $context) {
$number = $this->parseCDATA($number);

View File

@@ -13,10 +13,13 @@ class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
static $allowed_values = array(
'line-through' => true,
'overline' => true,
'underline' => true
'underline' => true,
);
$string = strtolower($this->parseCDATA($string));
if ($string === 'none') return $string;
$parts = explode(' ', $string);
$final = '';
foreach ($parts as $part) {

View File

@@ -6,6 +6,12 @@
class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
{
protected $max;
public function __construct($max = null) {
$this->max = $max;
}
public function validate($string, $config, $context) {
$string = trim($string);
@@ -24,11 +30,18 @@ class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
// crash operating systems, see <http://ha.ckers.org/imagecrash.html>
// WARNING, above link WILL crash you if you're using Windows
if ($int > 1200) return '1200';
if ($this->max !== null && $int > $this->max) return (string) $this->max;
return (string) $int;
}
public function make($string) {
if ($string === '') $max = null;
else $max = (int) $string;
$class = get_class($this);
return new $class($max);
}
}

View File

@@ -0,0 +1,32 @@
<?php
/**
* Decorator that, depending on a token, switches between two definitions.
*/
class HTMLPurifier_AttrDef_Switch
{
protected $tag;
protected $withTag, $withoutTag;
/**
* @param string $tag Tag name to switch upon
* @param HTMLPurifier_AttrDef $with_tag Call if token matches tag
* @param HTMLPurifier_AttrDef $without_tag Call if token doesn't match, or there is no token
*/
public function __construct($tag, $with_tag, $without_tag) {
$this->tag = $tag;
$this->withTag = $with_tag;
$this->withoutTag = $without_tag;
}
public function validate($string, $config, $context) {
$token = $context->get('CurrentToken', true);
if (!$token || $token->name !== $this->tag) {
return $this->withoutTag->validate($string, $config, $context);
} else {
return $this->withTag->validate($string, $config, $context);
}
}
}

View File

@@ -7,7 +7,7 @@
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
{
protected $parser, $percentEncoder;
protected $parser;
protected $embedsResource;
/**
@@ -15,17 +15,19 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
*/
public function __construct($embeds_resource = false) {
$this->parser = new HTMLPurifier_URIParser();
$this->percentEncoder = new HTMLPurifier_PercentEncoder();
$this->embedsResource = (bool) $embeds_resource;
}
public function make($string) {
$embeds = (bool) $string;
return new HTMLPurifier_AttrDef_URI($embeds);
}
public function validate($uri, $config, $context) {
if ($config->get('URI', 'Disable')) return false;
// initial operations
$uri = $this->parseCDATA($uri);
$uri = $this->percentEncoder->normalize($uri);
// parse the URI
$uri = $this->parser->parse($uri);
@@ -42,7 +44,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
if (!$result) break;
// chained filtering
$uri_def =& $config->getDefinition('URI');
$uri_def = $config->getDefinition('URI');
$result = $uri_def->filter($uri, $config, $context);
if (!$result) break;
@@ -53,6 +55,10 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
$result = $scheme_obj->validate($uri, $config, $context);
if (!$result) break;
// Post chained filtering
$result = $uri_def->postFilter($uri, $config, $context);
if (!$result) break;
// survived gauntlet
$ok = true;
@@ -61,26 +67,8 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
$context->destroy('EmbeddedURI');
if (!$ok) return false;
// munge scheme off if necessary (this must be last)
if (!is_null($uri->scheme) && is_null($uri->host)) {
if ($uri_def->defaultScheme == $uri->scheme) {
$uri->scheme = null;
}
}
// back to string
$result = $uri->toString();
// munge entire URI if necessary
if (
!is_null($uri->host) && // indicator for authority
!empty($scheme_obj->browsable) &&
!is_null($munge = $config->get('URI', 'Munge'))
) {
$result = str_replace('%s', rawurlencode($result), $munge);
}
return $result;
return $uri->toString();
}

View File

@@ -36,11 +36,23 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
$ipv4 = $this->ipv4->validate($string, $config, $context);
if ($ipv4 !== false) return $ipv4;
// validate a domain name here, do filtering, etc etc etc
// A regular domain name.
// We could use this, but it would break I18N domain names
//$match = preg_match('/^[a-z0-9][\w\-\.]*[a-z0-9]$/i', $string);
//if (!$match) return false;
// This breaks I18N domain names, but we don't have proper IRI support,
// so force users to insert Punycode. If there's complaining we'll
// try to fix things into an international friendly form.
// The productions describing this are:
$a = '[a-z]'; // alpha
$an = '[a-z0-9]'; // alphanum
$and = '[a-z0-9-]'; // alphanum | "-"
// domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
$domainlabel = "$an($and*$an)?";
// toplabel = alpha | alpha *( alphanum | "-" ) alphanum
$toplabel = "$a($and*$an)?";
// hostname = *( domainlabel "." ) toplabel [ "." ]
$match = preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string);
if (!$match) return false;
return $string;
}

View File

@@ -0,0 +1,13 @@
<?php
class HTMLPurifier_AttrTransform_SafeEmbed extends HTMLPurifier_AttrTransform
{
public $name = "SafeEmbed";
public function transform($attr, $config, $context) {
$attr['allowscriptaccess'] = 'never';
$attr['allownetworking'] = 'internal';
$attr['type'] = 'application/x-shockwave-flash';
return $attr;
}
}

View File

@@ -0,0 +1,14 @@
<?php
/**
* Writes default type for all objects. Currently only supports flash.
*/
class HTMLPurifier_AttrTransform_SafeObject extends HTMLPurifier_AttrTransform
{
public $name = "SafeObject";
function transform($attr, $config, $context) {
if (!isset($attr['type'])) $attr['type'] = 'application/x-shockwave-flash';
return $attr;
}
}

View File

@@ -0,0 +1,48 @@
<?php
/**
* Validates name/value pairs in param tags to be used in safe objects. This
* will only allow name values it recognizes, and pre-fill certain attributes
* with required values.
*
* @note
* This class only supports Flash. In the future, Quicktime support
* may be added.
*
* @warning
* This class expects an injector to add the necessary parameters tags.
*/
class HTMLPurifier_AttrTransform_SafeParam extends HTMLPurifier_AttrTransform
{
public $name = "SafeParam";
private $uri;
public function __construct() {
$this->uri = new HTMLPurifier_AttrDef_URI(true); // embedded
}
public function transform($attr, $config, $context) {
// If we add support for other objects, we'll need to alter the
// transforms.
switch ($attr['name']) {
// application/x-shockwave-flash
// Keep this synchronized with Injector/SafeObject.php
case 'allowScriptAccess':
$attr['value'] = 'never';
break;
case 'allowNetworking':
$attr['value'] = 'internal';
break;
case 'wmode':
$attr['value'] = 'window';
break;
case 'movie':
$attr['value'] = $this->uri->validate($attr['value'], $config, $context);
break;
// add other cases to support other param name/value pairs
default:
$attr['name'] = $attr['value'] = null;
}
return $attr;
}
}

View File

@@ -0,0 +1,14 @@
<?php
/**
* Implements required attribute stipulation for <script>
*/
class HTMLPurifier_AttrTransform_ScriptRequired extends HTMLPurifier_AttrTransform
{
public function transform($attr, $config, $context) {
if (!isset($attr['type'])) {
$attr['type'] = 'text/javascript';
}
return $attr;
}
}

View File

@@ -43,8 +43,8 @@ class HTMLPurifier_AttrValidator
// DEFINITION CALL
$d_defs = $definition->info_global_attr;
// reference attributes for easy manipulation
$attr =& $token->attr;
// don't update token until the very end, to ensure an atomic update
$attr = $token->attr;
// do global transformations (pre)
// nothing currently utilizes this
@@ -139,6 +139,8 @@ class HTMLPurifier_AttrValidator
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
$token->attr = $attr;
// destroy CurrentToken if we made it ourselves
if (!$current_token) $context->destroy('CurrentToken');

View File

@@ -37,7 +37,7 @@ class HTMLPurifier_Bootstrap
public static function autoload($class) {
$file = HTMLPurifier_Bootstrap::getPath($class);
if (!$file) return false;
require $file;
require HTMLPURIFIER_PREFIX . '/' . $file;
return true;
}
@@ -48,11 +48,13 @@ class HTMLPurifier_Bootstrap
if (strncmp('HTMLPurifier', $class, 12) !== 0) return false;
// Custom implementations
if (strncmp('HTMLPurifier_Language_', $class, 22) === 0) {
$code = str_replace('_', '-', substr($class, 22));
return 'HTMLPurifier/Language/classes/' . $code . '.php';
$code = str_replace('_', '-', substr($class, 22));
$file = 'HTMLPurifier/Language/classes/' . $code . '.php';
} else {
$file = str_replace('_', '/', $class) . '.php';
}
// Standard implementation
return str_replace('_', '/', $class) . '.php';
if (!file_exists(HTMLPURIFIER_PREFIX . '/' . $file)) return false;
return $file;
}
/**

View File

@@ -90,7 +90,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
$this->info['border-left-width'] =
$this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
new HTMLPurifier_AttrDef_CSS_Length(true) //disallow negative
new HTMLPurifier_AttrDef_CSS_Length('0') //disallow negative
));
$this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width);
@@ -116,7 +116,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
$this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_Enum(array('normal')),
new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives
new HTMLPurifier_AttrDef_CSS_Length(true),
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true)
));
@@ -138,7 +138,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
$this->info['padding-bottom'] =
$this->info['padding-left'] =
$this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_CSS_Length(true),
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true)
));
@@ -149,13 +149,26 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
new HTMLPurifier_AttrDef_CSS_Percentage()
));
$this->info['width'] =
$this->info['height'] =
new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_CSS_Length(true),
$trusted_wh = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true),
new HTMLPurifier_AttrDef_Enum(array('auto'))
));
$max = $config->get('CSS', 'MaxImgLength');
$this->info['width'] =
$this->info['height'] =
$max === null ?
$trusted_wh :
new HTMLPurifier_AttrDef_Switch('img',
// For img tags:
new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_CSS_Length('0', $max),
new HTMLPurifier_AttrDef_Enum(array('auto'))
)),
// For everyone else:
$trusted_wh
);
$this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();

View File

@@ -3,7 +3,7 @@
/**
* Defines allowed child nodes and validates tokens against it.
*/
class HTMLPurifier_ChildDef
abstract class HTMLPurifier_ChildDef
{
/**
* Type of child definition, usually right-most part of class name lowercase.
@@ -34,9 +34,7 @@ class HTMLPurifier_ChildDef
* @return bool false to remove parent node
* @return array of replacement child tokens
*/
public function validateChildren($tokens_of_children, $config, $context) {
trigger_error('Call to abstract function', E_USER_ERROR);
}
abstract public function validateChildren($tokens_of_children, $config, $context);
}

View File

@@ -55,10 +55,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
$escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren');
// generator
static $gen = null;
if ($gen === null) {
$gen = new HTMLPurifier_Generator();
}
$gen = new HTMLPurifier_Generator($config, $context);
foreach ($tokens_of_children as $token) {
if (!empty($token->is_whitespace)) {
@@ -83,7 +80,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
$result[] = $token;
} elseif ($pcdata_allowed && $escape_invalid_children) {
$result[] = new HTMLPurifier_Token_Text(
$gen->generateFromToken($token, $config)
$gen->generateFromToken($token)
);
}
continue;
@@ -94,7 +91,7 @@ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
} elseif ($pcdata_allowed && $escape_invalid_children) {
$result[] =
new HTMLPurifier_Token_Text(
$gen->generateFromToken( $token, $config )
$gen->generateFromToken($token)
);
} else {
// drop silently

View File

@@ -20,7 +20,7 @@ class HTMLPurifier_Config
/**
* HTML Purifier's version
*/
public $version = '3.1.0rc1';
public $version = '3.1.1';
/**
* Bool indicator whether or not to automatically finalize
@@ -72,7 +72,7 @@ class HTMLPurifier_Config
* @param $definition HTMLPurifier_ConfigSchema that defines what directives
* are allowed.
*/
public function __construct(&$definition) {
public function __construct($definition) {
$this->conf = $definition->defaults; // set up, copy in defaults
$this->def = $definition; // keep a copy around for checking
$this->parser = new HTMLPurifier_VarParser_Flexible();
@@ -107,7 +107,7 @@ class HTMLPurifier_Config
* @return Default HTMLPurifier_Config object.
*/
public static function createDefault() {
$definition =& HTMLPurifier_ConfigSchema::instance();
$definition = HTMLPurifier_ConfigSchema::instance();
$config = new HTMLPurifier_Config($definition);
return $config;
}
@@ -125,7 +125,7 @@ class HTMLPurifier_Config
E_USER_WARNING);
return;
}
if ($this->def->info[$namespace][$key]->class == 'alias') {
if (isset($this->def->info[$namespace][$key]->isAlias)) {
$d = $this->def->info[$namespace][$key];
trigger_error('Cannot get value from aliased directive, use real name ' . $d->namespace . '.' . $d->name,
E_USER_ERROR);
@@ -196,40 +196,48 @@ class HTMLPurifier_Config
E_USER_WARNING);
return;
}
if ($this->def->info[$namespace][$key]->class == 'alias') {
$def = $this->def->info[$namespace][$key];
if (isset($def->isAlias)) {
if ($from_alias) {
trigger_error('Double-aliases not allowed, please fix '.
'ConfigSchema bug with' . "$namespace.$key", E_USER_ERROR);
return;
}
$this->set($new_ns = $this->def->info[$namespace][$key]->namespace,
$new_dir = $this->def->info[$namespace][$key]->name,
$this->set($new_ns = $def->namespace,
$new_dir = $def->name,
$value, true);
trigger_error("$namespace.$key is an alias, preferred directive name is $new_ns.$new_dir", E_USER_NOTICE);
return;
}
// Raw type might be negative when using the fully optimized form
// of stdclass, which indicates allow_null == true
$rtype = is_int($def) ? $def : $def->type;
if ($rtype < 0) {
$type = -$rtype;
$allow_null = true;
} else {
$type = $rtype;
$allow_null = isset($def->allow_null);
}
try {
$value = $this->parser->parse(
$value,
$type = $this->def->info[$namespace][$key]->type,
$this->def->info[$namespace][$key]->allow_null
);
$value = $this->parser->parse($value, $type, $allow_null);
} catch (HTMLPurifier_VarParserException $e) {
trigger_error('Value for ' . "$namespace.$key" . ' is of invalid type, should be ' . $type, E_USER_WARNING);
trigger_error('Value for ' . "$namespace.$key" . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING);
return;
}
if (is_string($value)) {
if (is_string($value) && is_object($def)) {
// resolve value alias if defined
if (isset($this->def->info[$namespace][$key]->aliases[$value])) {
$value = $this->def->info[$namespace][$key]->aliases[$value];
if (isset($def->aliases[$value])) {
$value = $def->aliases[$value];
}
if ($this->def->info[$namespace][$key]->allowed !== true) {
// check to see if the value is allowed
if (!isset($this->def->info[$namespace][$key]->allowed[$value])) {
trigger_error('Value not supported, valid values are: ' .
$this->_listify($this->def->info[$namespace][$key]->allowed), E_USER_WARNING);
return;
}
// check to see if the value is allowed
if (isset($def->allowed) && !isset($def->allowed[$value])) {
trigger_error('Value not supported, valid values are: ' .
$this->_listify($def->allowed), E_USER_WARNING);
return;
}
}
$this->conf[$namespace][$key] = $value;
@@ -254,21 +262,21 @@ class HTMLPurifier_Config
}
/**
* Retrieves reference to the HTML definition.
* Retrieves object reference to the HTML definition.
* @param $raw Return a copy that has not been setup yet. Must be
* called before it's been setup, otherwise won't work.
*/
public function &getHTMLDefinition($raw = false) {
$def =& $this->getDefinition('HTML', $raw);
return $def; // prevent PHP 4.4.0 from complaining
public function getHTMLDefinition($raw = false) {
return $this->getDefinition('HTML', $raw);
}
/**
* Retrieves reference to the CSS definition
* Retrieves object reference to the CSS definition
* @param $raw Return a copy that has not been setup yet. Must be
* called before it's been setup, otherwise won't work.
*/
public function &getCSSDefinition($raw = false) {
$def =& $this->getDefinition('CSS', $raw);
return $def;
public function getCSSDefinition($raw = false) {
return $this->getDefinition('CSS', $raw);
}
/**
@@ -276,7 +284,7 @@ class HTMLPurifier_Config
* @param $type Type of definition: HTML, CSS, etc
* @param $raw Whether or not definition should be returned raw
*/
public function &getDefinition($type, $raw = false) {
public function getDefinition($type, $raw = false) {
if (!$this->finalized && $this->autoFinalize) $this->finalize();
$factory = HTMLPurifier_DefinitionCacheFactory::instance();
$cache = $factory->create($type, $this);
@@ -310,17 +318,13 @@ class HTMLPurifier_Config
} elseif ($type == 'URI') {
$this->definitions[$type] = new HTMLPurifier_URIDefinition();
} else {
trigger_error("Definition of $type type not supported");
$false = false;
return $false;
throw new HTMLPurifier_Exception("Definition of $type type not supported");
}
// quick abort if raw
if ($raw) {
if (is_null($this->get($type, 'DefinitionID'))) {
// fatally error out if definition ID not set
trigger_error("Cannot retrieve raw version without specifying %$type.DefinitionID", E_USER_ERROR);
$false = new HTMLPurifier_Error();
return $false;
throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID");
}
return $this->definitions[$type];
}
@@ -390,7 +394,7 @@ class HTMLPurifier_Config
if (isset($blacklisted_directives["$ns.$directive"])) continue;
if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue;
}
if ($def->class == 'alias') continue;
if (isset($def->isAlias)) continue;
if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue;
$ret[] = array($ns, $directive);
}
@@ -407,7 +411,7 @@ class HTMLPurifier_Config
* @param $mq_fix Boolean whether or not to enable magic quotes fix
* @param $schema Instance of HTMLPurifier_ConfigSchema to use, if not global copy
*/
public static function loadArrayFromForm($array, $index, $allowed = true, $mq_fix = true, $schema = null) {
public static function loadArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) {
$ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $schema);
$config = HTMLPurifier_Config::create($ret, $schema);
return $config;
@@ -417,7 +421,7 @@ class HTMLPurifier_Config
* Merges in configuration values from $_GET/$_POST to object. NOT STATIC.
* @note Same parameters as loadArrayFromForm
*/
public function mergeArrayFromForm($array, $index, $allowed = true, $mq_fix = true) {
public function mergeArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true) {
$ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $this->def);
$this->loadArray($ret);
}
@@ -426,8 +430,8 @@ class HTMLPurifier_Config
* Prepares an array from a form into something usable for the more
* strict parts of HTMLPurifier_Config
*/
public static function prepareArrayFromForm($array, $index, $allowed = true, $mq_fix = true, $schema = null) {
$array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array();
public static function prepareArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) {
if ($index !== false) $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array();
$mq = $mq_fix && function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc();
$allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed, $schema);

View File

@@ -1,9 +0,0 @@
<?php
/**
* Base class for configuration entity
*/
class HTMLPurifier_ConfigDef {
public $class = false;
}

View File

@@ -1,55 +0,0 @@
<?php
/**
* Structure object containing definition of a directive.
* @note This structure does not contain default values
*/
class HTMLPurifier_ConfigDef_Directive extends HTMLPurifier_ConfigDef
{
public $class = 'directive';
public function __construct(
$type = null,
$allow_null = null,
$allowed = null,
$aliases = null
) {
if ( $type !== null) $this->type = $type;
if ( $allow_null !== null) $this->allow_null = $allow_null;
if ( $allowed !== null) $this->allowed = $allowed;
if ( $aliases !== null) $this->aliases = $aliases;
}
/**
* Allowed type of the directive. Values are:
* - string
* - istring (case insensitive string)
* - int
* - float
* - bool
* - lookup (array of value => true)
* - list (regular numbered index array)
* - hash (array of key => value)
* - mixed (anything goes)
*/
public $type = 'mixed';
/**
* Is null allowed? Has no effect for mixed type.
* @bool
*/
public $allow_null = false;
/**
* Lookup table of allowed values of the element, bool true if all allowed.
*/
public $allowed = true;
/**
* Hash of value aliases, i.e. values that are equivalent.
*/
public $aliases = array();
}

View File

@@ -1,24 +0,0 @@
<?php
/**
* Structure object describing a directive alias
*/
class HTMLPurifier_ConfigDef_DirectiveAlias extends HTMLPurifier_ConfigDef
{
public $class = 'alias';
/**
* Namespace being aliased to
*/
public $namespace;
/**
* Directive being aliased to
*/
public $name;
public function __construct($namespace, $name) {
$this->namespace = $namespace;
$this->name = $name;
}
}

View File

@@ -1,10 +0,0 @@
<?php
/**
* Structure object describing of a namespace
*/
class HTMLPurifier_ConfigDef_Namespace extends HTMLPurifier_ConfigDef
{
public $class = 'namespace';
}

View File

@@ -12,7 +12,33 @@ class HTMLPurifier_ConfigSchema {
public $defaults = array();
/**
* Definition of the directives.
* Definition of the directives. The structure of this is:
*
* array(
* 'Namespace' => array(
* 'Directive' => new stdclass(),
* )
* )
*
* The stdclass may have the following properties:
*
* - If isAlias isn't set:
* - type: Integer type of directive, see HTMLPurifier_VarParser for definitions
* - allow_null: If set, this directive allows null values
* - aliases: If set, an associative array of value aliases to real values
* - allowed: If set, a lookup array of allowed (string) values
* - If isAlias is set:
* - namespace: Namespace this directive aliases to
* - name: Directive name this directive aliases to
*
* In certain degenerate cases, stdclass will actually be an integer. In
* that case, the value is equivalent to an stdclass with the type
* property set to the integer. If the integer is negative, type is
* equal to the absolute value of integer, and allow_null is true.
*
* This class is friendly with HTMLPurifier_Config. If you need introspection
* about the schema, you're better of using the ConfigSchema_Interchange,
* which uses more memory but has much richer information.
*/
public $info = array();
@@ -21,15 +47,6 @@ class HTMLPurifier_ConfigSchema {
*/
static protected $singleton;
/**
* Variable parser.
*/
protected $parser;
public function __construct() {
$this->parser = new HTMLPurifier_VarParser_Flexible();
}
/**
* Unserializes the default ConfigSchema.
*/
@@ -40,7 +57,7 @@ class HTMLPurifier_ConfigSchema {
/**
* Retrieves an instance of the application-wide configuration definition.
*/
public static function &instance($prototype = null) {
public static function instance($prototype = null) {
if ($prototype !== null) {
HTMLPurifier_ConfigSchema::$singleton = $prototype;
} elseif (HTMLPurifier_ConfigSchema::$singleton === null || $prototype === true) {
@@ -62,11 +79,11 @@ class HTMLPurifier_ConfigSchema {
* @param $allow_null Whether or not to allow null values
*/
public function add($namespace, $name, $default, $type, $allow_null) {
$default = $this->parser->parse($default, $type, $allow_null);
$this->info[$namespace][$name] = new HTMLPurifier_ConfigDef_Directive();
$this->info[$namespace][$name]->type = $type;
$this->info[$namespace][$name]->allow_null = $allow_null;
$this->defaults[$namespace][$name] = $default;
$obj = new stdclass();
$obj->type = is_int($type) ? $type : HTMLPurifier_VarParser::$types[$type];
if ($allow_null) $obj->allow_null = true;
$this->info[$namespace][$name] = $obj;
$this->defaults[$namespace][$name] = $default;
}
/**
@@ -90,6 +107,9 @@ class HTMLPurifier_ConfigSchema {
* @param $aliases Hash of aliased values to the real alias
*/
public function addValueAliases($namespace, $name, $aliases) {
if (!isset($this->info[$namespace][$name]->aliases)) {
$this->info[$namespace][$name]->aliases = array();
}
foreach ($aliases as $alias => $real) {
$this->info[$namespace][$name]->aliases[$alias] = $real;
}
@@ -104,9 +124,7 @@ class HTMLPurifier_ConfigSchema {
* @param $allowed Lookup array of allowed values
*/
public function addAllowedValues($namespace, $name, $allowed) {
$directive =& $this->info[$namespace][$name];
$type = $directive->type;
$directive->allowed = $allowed;
$this->info[$namespace][$name]->allowed = $allowed;
}
/**
@@ -117,7 +135,26 @@ class HTMLPurifier_ConfigSchema {
* @param $new_name Directive that the alias will be to
*/
public function addAlias($namespace, $name, $new_namespace, $new_name) {
$this->info[$namespace][$name] = new HTMLPurifier_ConfigDef_DirectiveAlias($new_namespace, $new_name);
$obj = new stdclass;
$obj->namespace = $new_namespace;
$obj->name = $new_name;
$obj->isAlias = true;
$this->info[$namespace][$name] = $obj;
}
/**
* Replaces any stdclass that only has the type property with type integer.
*/
public function postProcess() {
foreach ($this->info as $namespace => $info) {
foreach ($info as $directive => $v) {
if (count((array) $v) == 1) {
$this->info[$namespace][$directive] = $v->type;
} elseif (count((array) $v) == 2 && isset($v->allow_null)) {
$this->info[$namespace][$directive] = -$v->type;
}
}
}
}
// DEPRECATED METHODS
@@ -125,26 +162,25 @@ class HTMLPurifier_ConfigSchema {
/** @see HTMLPurifier_ConfigSchema->set() */
public static function define($namespace, $name, $default, $type, $description) {
HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
// process modifiers (OPTIMIZE!)
$type_values = explode('/', $type, 2);
$type = $type_values[0];
$modifier = isset($type_values[1]) ? $type_values[1] : false;
$allow_null = ($modifier === 'null');
$def =& HTMLPurifier_ConfigSchema::instance();
$def = HTMLPurifier_ConfigSchema::instance();
$def->add($namespace, $name, $default, $type, $allow_null);
}
/** @see HTMLPurifier_ConfigSchema->addNamespace() */
public static function defineNamespace($namespace, $description) {
HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
$def =& HTMLPurifier_ConfigSchema::instance();
$def = HTMLPurifier_ConfigSchema::instance();
$def->addNamespace($namespace);
}
/** @see HTMLPurifier_ConfigSchema->addValueAliases() */
public static function defineValueAliases($namespace, $name, $aliases) {
HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
$def =& HTMLPurifier_ConfigSchema::instance();
$def = HTMLPurifier_ConfigSchema::instance();
$def->addValueAliases($namespace, $name, $aliases);
}
@@ -155,21 +191,22 @@ class HTMLPurifier_ConfigSchema {
foreach ($allowed_values as $value) {
$allowed[$value] = true;
}
$def =& HTMLPurifier_ConfigSchema::instance();
$def = HTMLPurifier_ConfigSchema::instance();
$def->addAllowedValues($namespace, $name, $allowed);
}
/** @see HTMLPurifier_ConfigSchema->addAlias() */
public static function defineAlias($namespace, $name, $new_namespace, $new_name) {
HTMLPurifier_ConfigSchema::deprecated(__METHOD__);
$def =& HTMLPurifier_ConfigSchema::instance();
$def = HTMLPurifier_ConfigSchema::instance();
$def->addAlias($namespace, $name, $new_namespace, $new_name);
}
/** @deprecated, use HTMLPurifier_VarParser->parse() */
public function validate($a, $b, $c = false) {
trigger_error("HTMLPurifier_ConfigSchema->validate deprecated, use HTMLPurifier_VarParser->parse instead", E_USER_NOTICE);
return $this->parser->parse($a, $b, $c);
$parser = new HTMLPurifier_VarParser();
return $parser->parse($a, $b, $c);
}
/**

View File

@@ -43,6 +43,7 @@ class HTMLPurifier_ConfigSchema_Builder_ConfigSchema
);
}
}
$schema->postProcess();
return $schema;
}

View File

@@ -71,8 +71,10 @@ class HTMLPurifier_ConfigSchema_Builder_Xml extends XMLWriter
$this->startElement('constraints');
if ($directive->version) $this->writeElement('version', $directive->version);
$this->writeElement('type', $directive->type);
if ($directive->typeAllowsNull) $this->writeAttribute('allow-null', 'yes');
$this->startElement('type');
if ($directive->typeAllowsNull) $this->writeAttribute('allow-null', 'yes');
$this->text($directive->type);
$this->endElement(); // type
if ($directive->allowed) {
$this->startElement('allowed');
foreach ($directive->allowed as $value => $x) $this->writeElement('value', $value);

View File

@@ -17,21 +17,27 @@ class HTMLPurifier_ConfigSchema_InterchangeBuilder
$builder = new HTMLPurifier_ConfigSchema_InterchangeBuilder();
$interchange = new HTMLPurifier_ConfigSchema_Interchange();
if (!$dir) $dir = realpath(dirname(__FILE__) . '/schema/');
if (!$dir) $dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema/';
$info = parse_ini_file($dir . 'info.ini');
$interchange->name = $info['name'];
$files = array();
$dh = opendir($dir);
while (false !== ($file = readdir($dh))) {
if (!$file || $file[0] == '.' || strrchr($file, '.') !== '.txt') {
continue;
}
$files[] = $file;
}
closedir($dh);
sort($files);
foreach ($files as $file) {
$builder->build(
$interchange,
new HTMLPurifier_StringHash( $parser->parseFile($dir . $file) )
);
}
closedir($dh);
return $interchange;
}

View File

@@ -111,7 +111,8 @@ class HTMLPurifier_ConfigSchema_Validator
if (!is_null($d->allowed) || !empty($d->valueAliases)) {
// allowed and valueAliases require that we be dealing with
// strings, so check for that early.
if (!isset(HTMLPurifier_VarParser::$stringTypes[$d->type])) {
$d_int = HTMLPurifier_VarParser::$types[$d->type];
if (!isset(HTMLPurifier_VarParser::$stringTypes[$d_int])) {
$this->error('type', 'must be a string type when used with allowed or value aliases');
}
}

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,15 @@
CSS.MaxImgLength
TYPE: string/null
DEFAULT: '1200px'
VERSION: 3.1.1
--DESCRIPTION--
<p>
This parameter sets the maximum allowed length on <code>img</code> tags,
effectively the <code>width</code> and <code>height</code> properties.
Only absolute units of measurement (in, pt, pc, mm, cm) and pixels (px) are allowed. This is
in place to prevent imagecrash attacks, disable with null at your own risk.
This directive is similar to %HTML.MaxImgLength, and both should be
concurrently edited, although there are
subtle differences in the input format (the CSS max is a number with
a unit).
</p>

View File

@@ -21,3 +21,17 @@ $styles = $purifier->context->get('StyleBlocks');
foreach ($styles as $style) {
echo '<style type="text/css">' . $style . "</style>\n";
}]]></pre>
<p>
<strong>Warning:</strong> It is possible for a user to mount an
imagecrash attack using this CSS. Counter-measures are difficult;
it is not simply enough to limit the range of CSS lengths (using
relative lengths with many nesting levels allows for large values
to be attained without actually specifying them in the stylesheet),
and the flexible nature of selectors makes it difficult to selectively
disable lengths on image tags (HTML Purifier, however, does disable
CSS width and height in inline styling). There are probably two effective
counter measures: an explicit width and height set to auto in all
images in your document (unlikely) or the disabling of width and
height (somewhat reasonable). Whether or not these measures should be
used is left to the reader.
</p>

View File

@@ -4,7 +4,17 @@ VERSION: 3.1.0
DEFAULT: array()
--DESCRIPTION--
<p>
This directive complements %HTML.ForbiddenElements and is the inverse of
%HTML.AllowedAttributes. Please see the former for a discussion of why you
While this directive is similar to %HTML.AllowedAttributes, for
forwards-compatibility with XML, this attribute has a different syntax. Instead of
<code>tag.attr</code>, use <code>tag@attr</code>. To disallow <code>href</code>
attributes in <code>a</code> tags, set this directive to
<code>a@href</code>. You can also disallow an attribute globally with
<code>attr</code> or <code>*@attr</code> (either syntax is fine; the latter
is provided for consistency with %HTML.AllowedAttributes).
</p>
<p>
<strong>Warning:</strong> This directive complements %HTML.ForbiddenElements,
accordingly, check
out that directive for a discussion of why you
should think twice before using this directive.
</p>

View File

@@ -0,0 +1,13 @@
HTML.MaxImgLength
TYPE: int/null
DEFAULT: 1200
VERSION: 3.1.1
--DESCRIPTION--
<p>
This directive controls the maximum number of pixels in the width and
height attributes in <code>img</code> tags. This is
in place to prevent imagecrash attacks, disable with null at your own risk.
This directive is similar to %CSS.MaxImgLength, and both should be
concurrently edited, although there are
subtle differences in the input format (the HTML max is an integer).
</p>

View File

@@ -0,0 +1,13 @@
HTML.SafeEmbed
TYPE: bool
VERSION: 3.1.1
DEFAULT: false
--DESCRIPTION--
<p>
Whether or not to permit embed tags in documents, with a number of extra
security features added to prevent script execution. This is similar to
what websites like MySpace do to embed tags. Embed is a proprietary
element and will cause your website to stop validating. You probably want
to enable this with %HTML.SafeObject.
<strong>Highly experimental.</strong>
</p>

View File

@@ -0,0 +1,13 @@
HTML.SafeObject
TYPE: bool
VERSION: 3.1.1
DEFAULT: false
--DESCRIPTION--
<p>
Whether or not to permit object tags in documents, with a number of extra
security features added to prevent script execution. This is similar to
what websites like MySpace do to object tags. You may also want to
enable %HTML.SafeEmbed for maximum interoperability with Internet Explorer,
although embed tags will cause your website to stop validating.
<strong>Highly experimental.</strong>
</p>

View File

@@ -6,7 +6,7 @@ DEFAULT: NULL
<p>
Munges all browsable (usually http, https and ftp)
absolute URI's into another URI, usually a URI redirection service.
absolute URIs into another URI, usually a URI redirection service.
This directive accepts a URI, formatted with a <code>%s</code> where
the url-encoded original URI should be inserted (sample:
<code>http://www.google.com/url?q=%s</code>).
@@ -19,13 +19,64 @@ DEFAULT: NULL
Prevent PageRank leaks, while being fairly transparent
to users (you may also want to add some client side JavaScript to
override the text in the statusbar). <strong>Notice</strong>:
Many security experts believe that this form of protection does
not deter spam-bots.
Many security experts believe that this form of protection does not deter spam-bots.
</li>
<li>
Redirect users to a splash page telling them they are leaving your
website. While this is poor usability practice, it is often
mandated
website. While this is poor usability practice, it is often mandated
in corporate environments.
</li>
</ul>
<p>
Prior to HTML Purifier 3.1.1, this directive also enabled the munging
of browsable external resources, which could break things if your redirection
script was a splash page or used <code>meta</code> tags. To revert to
previous behavior, please use %URI.MungeResources.
</p>
<p>
You may want to also use %URI.MungeSecretKey along with this directive
in order to enforce what URIs your redirector script allows. Open
redirector scripts can be a security risk and negatively affect the
reputation of your domain name.
</p>
<p>
Starting with HTML Purifier 3.1.1, there is also these substitutions:
</p>
<table>
<thead>
<tr>
<th>Key</th>
<th>Description</th>
<th>Example <code>&lt;a href=""&gt;</code></th>
</tr>
</thead>
<tbody>
<tr>
<td>%r</td>
<td>1 - The URI embeds a resource<br />(blank) - The URI is merely a link</td>
<td></td>
</tr>
<tr>
<td>%n</td>
<td>The name of the tag this URI came from</td>
<td>a</td>
</tr>
<tr>
<td>%m</td>
<td>The name of the attribute this URI came from</td>
<td>href</td>
</tr>
<tr>
<td>%p</td>
<td>The name of the CSS property this URI came from, or blank if irrelevant</td>
<td></td>
</tr>
</tbody>
</table>
<p>
Admittedly, these letters are somewhat arbitrary; the only stipulation
was that they couldn't be a through f. r is for resource (I would have preferred
e, but you take what you can get), n is for name, m
was picked because it came after n (and I couldn't use a), p is for
property.
</p>

View File

@@ -0,0 +1,16 @@
URI.MungeResources
TYPE: bool
VERSION: 3.1.1
DEFAULT: false
--DESCRIPTION--
<p>
If true, any URI munging directives like %URI.Munge
will also apply to embedded resources, such as <code>&lt;img src=""&gt;</code>.
Be careful enabling this directive if you have a redirector script
that does not use the <code>Location</code> HTTP header; all of your images
and other embedded resources will break.
</p>
<p>
<strong>Warning:</strong> It is strongly advised you use this in conjunction
%URI.MungeSecretKey to mitigate the security risk of an open redirector.
</p>

View File

@@ -0,0 +1,29 @@
URI.MungeSecretKey
TYPE: string/null
VERSION: 3.1.1
DEFAULT: NULL
--DESCRIPTION--
<p>
This directive enables secure checksum generation along with %URI.Munge.
It should be set to a secure key that is not shared with anyone else.
The checksum can be placed in the URI using %t. Use of this checksum
affords an additional level of protection by allowing a redirector
to check if a URI has passed through HTML Purifier with this line:
</p>
<pre>$checksum === sha1($secret_key . ':' . $url)</pre>
<p>
If the output is TRUE, the redirector script should accept the URI.
</p>
<p>
Please note that it would still be possible for an attacker to procure
secure hashes en-mass by abusing your website's Preview feature or the
like, but this service affords an additional level of protection
that should be combined with website blacklisting.
</p>
<p>
Remember this has no effect if %URI.Munge is not on.
</p>

View File

@@ -4,6 +4,8 @@
* Registry object that contains information about the current context.
* @warning Is a bit buggy when variables are set to null: it thinks
* they don't exist! So use false instead, please.
* @note Since the variables Context deals with may not be objects,
* references are very important here! Do not remove!
*/
class HTMLPurifier_Context
{
@@ -16,7 +18,7 @@ class HTMLPurifier_Context
/**
* Registers a variable into the context.
* @param $name String name
* @param $ref Variable to be registered
* @param $ref Reference to variable to be registered
*/
public function register($name, &$ref) {
if (isset($this->_storage[$name])) {

View File

@@ -26,8 +26,8 @@ abstract class HTMLPurifier_DefinitionCache
* @param Instance of HTMLPurifier_Config
*/
public function generateKey($config) {
return $config->version . '-' . // possibly replace with function calls
$config->getBatchSerial($this->type) . '-' .
return $config->version . ',' . // possibly replace with function calls
$config->getBatchSerial($this->type) . ',' .
$config->get($this->type, 'DefinitionRev');
}
@@ -38,8 +38,8 @@ abstract class HTMLPurifier_DefinitionCache
* @param $config Instance of HTMLPurifier_Config to test against
*/
public function isOld($key, $config) {
if (substr_count($key, '-') < 2) return true;
list($version, $hash, $revision) = explode('-', $key, 3);
if (substr_count($key, ',') < 2) return true;
list($version, $hash, $revision) = explode(',', $key, 3);
$compare = version_compare($version, $config->version);
// version mismatch, is always old
if ($compare != 0) return true;

View File

@@ -100,18 +100,7 @@ class HTMLPurifier_DefinitionCache_Serializer extends
* @return Number of bytes written if success, or false if failure.
*/
private function _write($file, $data) {
static $file_put_contents;
if ($file_put_contents === null) {
$file_put_contents = function_exists('file_put_contents');
}
if ($file_put_contents) {
return file_put_contents($file, $data);
}
$fh = fopen($file, 'w');
if (!$fh) return false;
$status = fwrite($fh, $data);
fclose($fh);
return $status;
return file_put_contents($file, $data);
}
/**
@@ -130,7 +119,9 @@ class HTMLPurifier_DefinitionCache_Serializer extends
} elseif (!$this->_testPermissions($base)) {
return false;
}
$old = umask(0022); // disable group and world writes
mkdir($directory);
umask($old);
} elseif (!$this->_testPermissions($directory)) {
return false;
}

View File

@@ -20,7 +20,7 @@ class HTMLPurifier_DefinitionCacheFactory
/**
* Retrieves an instance of global definition cache factory.
*/
public static function &instance($prototype = null) {
public static function instance($prototype = null) {
static $instance;
if ($prototype !== null) {
$instance = $prototype;
@@ -45,11 +45,10 @@ class HTMLPurifier_DefinitionCacheFactory
* @param $name Name of definitions handled by cache
* @param $config Instance of HTMLPurifier_Config
*/
public function &create($type, $config) {
public function create($type, $config) {
$method = $config->get('Cache', 'DefinitionImpl');
if ($method === null) {
$null = new HTMLPurifier_DefinitionCache_Null($type);
return $null;
return new HTMLPurifier_DefinitionCache_Null($type);
}
if (!empty($this->caches[$method][$type])) {
return $this->caches[$method][$type];

View File

@@ -21,9 +21,9 @@ class HTMLPurifier_DoctypeRegistry
* @param $modules Modules doctype will load
* @param $modules_for_modes Modules doctype will load for certain modes
* @param $aliases Alias names for doctype
* @return Reference to registered doctype (usable for further editing)
* @return Editable registered doctype
*/
public function &register($doctype, $xml = true, $modules = array(),
public function register($doctype, $xml = true, $modules = array(),
$tidy_modules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null
) {
if (!is_array($modules)) $modules = array($modules);
@@ -34,7 +34,7 @@ class HTMLPurifier_DoctypeRegistry
$doctype, $xml, $modules, $tidy_modules, $aliases, $dtd_public, $dtd_system
);
}
$this->doctypes[$doctype->name] =& $doctype;
$this->doctypes[$doctype->name] = $doctype;
$name = $doctype->name;
// hookup aliases
foreach ($doctype->aliases as $alias) {
@@ -51,9 +51,9 @@ class HTMLPurifier_DoctypeRegistry
* @note This function resolves aliases
* @note When possible, use the more fully-featured make()
* @param $doctype Name of doctype
* @return Reference to doctype object
* @return Editable doctype object
*/
public function &get($doctype) {
public function get($doctype) {
if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype];
if (!isset($this->doctypes[$doctype])) {
trigger_error('Doctype ' . htmlspecialchars($doctype) . ' does not exist', E_USER_ERROR);

View File

@@ -14,6 +14,11 @@ class HTMLPurifier_Encoder
trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR);
}
/**
* Error-handler that mutes errors, alternative to shut-up operator.
*/
private static function muteErrorHandler() {}
/**
* Cleans a UTF-8 string for well-formedness and SGML validity
*
@@ -41,26 +46,13 @@ class HTMLPurifier_Encoder
*/
public static function cleanUTF8($str, $force_php = false) {
static $non_sgml_chars = array();
if (empty($non_sgml_chars)) {
for ($i = 0; $i <= 31; $i++) {
// non-SGML ASCII chars
// save \r, \t and \n
if ($i == 9 || $i == 13 || $i == 10) continue;
$non_sgml_chars[chr($i)] = '';
}
for ($i = 127; $i <= 159; $i++) {
$non_sgml_chars[HTMLPurifier_Encoder::unichr($i)] = '';
}
}
static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv');
if ($iconv && !$force_php) {
// do the shortcut way
$str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
return strtr($str, $non_sgml_chars);
// UTF-8 validity is checked since PHP 4.3.5
// This is an optimization: if the string is already valid UTF-8, no
// need to do PHP stuff. 99% of the time, this will be the case.
// The regexp matches the XML char production, as well as well as excluding
// non-SGML codepoints U+007F to U+009F
if (preg_match('/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du', $str)) {
return $str;
}
$mState = 0; // cached expected number of octets after the current octet
@@ -171,7 +163,17 @@ class HTMLPurifier_Encoder
) {
} elseif (0xFEFF != $mUcs4 && // omit BOM
!($mUcs4 >= 128 && $mUcs4 <= 159) // omit non-SGML
// check for valid Char unicode codepoints
(
0x9 == $mUcs4 ||
0xA == $mUcs4 ||
0xD == $mUcs4 ||
(0x20 <= $mUcs4 && 0x7E >= $mUcs4) ||
// 7F-9F is not strictly prohibited by XML,
// but it is non-SGML, and thus we don't allow it
(0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) ||
(0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4)
)
) {
$out .= $char;
}
@@ -262,14 +264,23 @@ class HTMLPurifier_Encoder
* Converts a string to UTF-8 based on configuration.
*/
public static function convertToUTF8($str, $config, $context) {
static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv');
$encoding = $config->get('Core', 'Encoding');
if ($encoding === 'utf-8') return $str;
static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv');
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
return @iconv($encoding, 'utf-8//IGNORE', $str);
$str = iconv($encoding, 'utf-8//IGNORE', $str);
// If the string is bjorked by Shift_JIS or a similar encoding
// that doesn't support all of ASCII, convert the naughty
// characters to their true byte-wise ASCII/UTF-8 equivalents.
$str = strtr($str, HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding));
restore_error_handler();
return $str;
} elseif ($encoding === 'iso-8859-1') {
return @utf8_encode($str);
$str = utf8_encode($str);
restore_error_handler();
return $str;
}
trigger_error('Encoding not supported', E_USER_ERROR);
}
@@ -280,17 +291,31 @@ class HTMLPurifier_Encoder
* characters being omitted.
*/
public static function convertFromUTF8($str, $config, $context) {
static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv');
$encoding = $config->get('Core', 'Encoding');
if ($encoding === 'utf-8') return $str;
if ($config->get('Core', 'EscapeNonASCIICharacters')) {
static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv');
if ($escape = $config->get('Core', 'EscapeNonASCIICharacters')) {
$str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str);
}
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
return @iconv('utf-8', $encoding . '//IGNORE', $str);
// Undo our previous fix in convertToUTF8, otherwise iconv will barf
$ascii_fix = HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding);
if (!$escape && !empty($ascii_fix)) {
$clear_fix = array();
foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = '';
$str = strtr($str, $clear_fix);
}
$str = strtr($str, array_flip($ascii_fix));
// Normal stuff
$str = iconv('utf-8', $encoding . '//IGNORE', $str);
restore_error_handler();
return $str;
} elseif ($encoding === 'iso-8859-1') {
return @utf8_decode($str);
$str = utf8_decode($str);
restore_error_handler();
return $str;
}
trigger_error('Encoding not supported', E_USER_ERROR);
}
@@ -342,6 +367,47 @@ class HTMLPurifier_Encoder
return $result;
}
/**
* This expensive function tests whether or not a given character
* encoding supports ASCII. 7/8-bit encodings like Shift_JIS will
* fail this test, and require special processing. Variable width
* encodings shouldn't ever fail.
*
* @param string $encoding Encoding name to test, as per iconv format
* @param bool $bypass Whether or not to bypass the precompiled arrays.
* @return Array of UTF-8 characters to their corresponding ASCII,
* which can be used to "undo" any overzealous iconv action.
*/
public static function testEncodingSupportsASCII($encoding, $bypass = false) {
static $encodings = array();
if (!$bypass) {
if (isset($encodings[$encoding])) return $encodings[$encoding];
$lenc = strtolower($encoding);
switch ($lenc) {
case 'shift_jis':
return array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~');
case 'johab':
return array("\xE2\x82\xA9" => '\\');
}
if (strpos($lenc, 'iso-8859-') === 0) return array();
}
$ret = array();
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
if (iconv('UTF-8', $encoding, 'a') === false) return false;
for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
$c = chr($i);
if (iconv('UTF-8', "$encoding//IGNORE", $c) === '') {
// Reverse engineer: what's the UTF-8 equiv of this byte
// sequence? This assumes that there's no variable width
// encoding that doesn't support ASCII.
$ret[iconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
}
}
restore_error_handler();
$encodings[$encoding] = $ret;
return $ret;
}
}

View File

@@ -1,7 +0,0 @@
<?php
/**
* Return object from functions that signifies error when null doesn't cut it
*/
class HTMLPurifier_Error {}

View File

@@ -15,7 +15,7 @@ class HTMLPurifier_ErrorCollector
public function __construct($context) {
$this->locale =& $context->get('Locale');
$this->generator =& $context->get('Generator');
$this->context =& $context;
$this->context = $context;
}
/**

View File

@@ -11,81 +11,78 @@ class HTMLPurifier_Generator
{
/**
* Bool cache of %HTML.XHTML
* @private
* Whether or not generator should produce XML output
*/
private $_xhtml = true;
/**
* Bool cache of %Output.CommentScriptContents
* @private
* :HACK: Whether or not generator should comment the insides of <script> tags
*/
private $_scriptFix = false;
/**
* Cache of HTMLDefinition
* @private
* Cache of HTMLDefinition during HTML output to determine whether or
* not attributes should be minimized.
*/
private $_def;
/**
* Configuration for the generator
*/
protected $config;
/**
* @param $config Instance of HTMLPurifier_Config
* @param $context Instance of HTMLPurifier_Context
*/
public function __construct($config, $context) {
$this->config = $config;
$this->_scriptFix = $config->get('Output', 'CommentScriptContents');
$this->_def = $config->getHTMLDefinition();
$this->_xhtml = $this->_def->doctype->xml;
}
/**
* Generates HTML from an array of tokens.
* @param $tokens Array of HTMLPurifier_Token
* @param $config HTMLPurifier_Config object
* @return Generated HTML
*/
public function generateFromTokens($tokens, $config, $context) {
$html = '';
if (!$config) $config = HTMLPurifier_Config::createDefault();
$this->_scriptFix = $config->get('Output', 'CommentScriptContents');
$this->_def = $config->getHTMLDefinition();
$this->_xhtml = $this->_def->doctype->xml;
public function generateFromTokens($tokens) {
if (!$tokens) return '';
// Basic algorithm
$html = '';
for ($i = 0, $size = count($tokens); $i < $size; $i++) {
if ($this->_scriptFix && $tokens[$i]->name === 'script'
&& $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {
// script special case
// the contents of the script block must be ONE token
// for this to work
// for this to work.
$html .= $this->generateFromToken($tokens[$i++]);
$html .= $this->generateScriptFromToken($tokens[$i++]);
// We're not going to do this: it wouldn't be valid anyway
//while ($tokens[$i]->name != 'script') {
// $html .= $this->generateScriptFromToken($tokens[$i++]);
//}
}
$html .= $this->generateFromToken($tokens[$i]);
}
if ($config->get('Output', 'TidyFormat') && extension_loaded('tidy')) {
$tidy_options = array(
// Tidy cleanup
if (extension_loaded('tidy') && $this->config->get('Output', 'TidyFormat')) {
$tidy = new Tidy;
$tidy->parseString($html, array(
'indent'=> true,
'output-xhtml' => $this->_xhtml,
'show-body-only' => true,
'indent-spaces' => 2,
'wrap' => 68,
);
if (version_compare(PHP_VERSION, '5', '<')) {
tidy_set_encoding('utf8');
foreach ($tidy_options as $key => $value) {
tidy_setopt($key, $value);
}
tidy_parse_string($html);
tidy_clean_repair();
$html = tidy_get_output();
} else {
$tidy = new Tidy;
$tidy->parseString($html, $tidy_options, 'utf8');
$tidy->cleanRepair();
$html = (string) $tidy;
}
), 'utf8');
$tidy->cleanRepair();
$html = (string) $tidy; // explicit cast necessary
}
// normalize newlines to system
$nl = $config->get('Output', 'Newline');
// Normalize newlines to system defined value
$nl = $this->config->get('Output', 'Newline');
if ($nl === null) $nl = PHP_EOL;
$html = str_replace("\n", $nl, $html);
if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
return $html;
}
@@ -95,8 +92,11 @@ class HTMLPurifier_Generator
* @return Generated HTML
*/
public function generateFromToken($token) {
if (!$token instanceof HTMLPurifier_Token) return '';
if ($token instanceof HTMLPurifier_Token_Start) {
if (!$token instanceof HTMLPurifier_Token) {
trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING);
return '';
} elseif ($token instanceof HTMLPurifier_Token_Start) {
$attr = $this->generateAttributes($token->attr, $token->name);
return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
@@ -106,11 +106,11 @@ class HTMLPurifier_Generator
} elseif ($token instanceof HTMLPurifier_Token_Empty) {
$attr = $this->generateAttributes($token->attr, $token->name);
return '<' . $token->name . ($attr ? ' ' : '') . $attr .
( $this->_xhtml ? ' /': '' )
( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
. '>';
} elseif ($token instanceof HTMLPurifier_Token_Text) {
return $this->escape($token->data);
return $this->escape($token->data, ENT_NOQUOTES);
} elseif ($token instanceof HTMLPurifier_Token_Comment) {
return '<!--' . $token->data . '-->';
@@ -127,25 +127,27 @@ class HTMLPurifier_Generator
*/
public function generateScriptFromToken($token) {
if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token);
// return '<!--' . "\n" . trim($token->data) . "\n" . '// -->';
// more advanced version:
// thanks <http://lachy.id.au/log/2005/05/script-comments>
// Thanks <http://lachy.id.au/log/2005/05/script-comments>
$data = preg_replace('#//\s*$#', '', $token->data);
return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
}
/**
* Generates attribute declarations from attribute array.
* @note This does not include the leading or trailing space.
* @param $assoc_array_of_attributes Attribute array
* @param $element Name of element attributes are for, used to check
* attribute minimization.
* @return Generate HTML fragment for insertion.
*/
public function generateAttributes($assoc_array_of_attributes, $element) {
public function generateAttributes($assoc_array_of_attributes, $element = false) {
$html = '';
foreach ($assoc_array_of_attributes as $key => $value) {
if (!$this->_xhtml) {
// remove namespaced attributes
// Remove namespaced attributes
if (strpos($key, ':') !== false) continue;
if (!empty($this->_def->info[$element]->attr[$key]->minimized)) {
// Check if we should minimize the attribute: val="val" -> val
if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
$html .= $key . ' ';
continue;
}
@@ -157,11 +159,16 @@ class HTMLPurifier_Generator
/**
* Escapes raw text data.
* @todo This really ought to be protected, but until we have a facility
* for properly generating HTML here w/o using tokens, it stays
* public.
* @param $string String data to escape for HTML.
* @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is
* permissible for non-attribute output.
* @return String escaped data.
*/
public function escape($string) {
return htmlspecialchars($string, ENT_COMPAT, 'UTF-8');
public function escape($string, $quote = ENT_COMPAT) {
return htmlspecialchars($string, $quote, 'UTF-8');
}
}

View File

@@ -76,6 +76,11 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
*/
public $info_content_sets = array();
/**
* Indexed list of HTMLPurifier_Injector to be used.
*/
public $info_injector = array();
/**
* Doctype object
*/
@@ -95,11 +100,11 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
* HTMLPurifier_AttrTypes for details
*/
public function addAttribute($element_name, $attr_name, $def) {
$module =& $this->getAnonymousModule();
$module = $this->getAnonymousModule();
if (!isset($module->info[$element_name])) {
$element =& $module->addBlankElement($element_name);
$element = $module->addBlankElement($element_name);
} else {
$element =& $module->info[$element_name];
$element = $module->info[$element_name];
}
$element->attr[$attr_name] = $def;
}
@@ -109,11 +114,11 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
* @note See HTMLPurifier_HTMLModule::addElement for detailed
* parameter and return value descriptions.
*/
public function &addElement($element_name, $type, $contents, $attr_collections, $attributes) {
$module =& $this->getAnonymousModule();
public function addElement($element_name, $type, $contents, $attr_collections, $attributes) {
$module = $this->getAnonymousModule();
// assume that if the user is calling this, the element
// is safe. This may not be a good idea
$element =& $module->addElement($element_name, $type, $contents, $attr_collections, $attributes);
$element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes);
return $element;
}
@@ -123,9 +128,9 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
* @note See HTMLPurifier_HTMLModule::addBlankElement for detailed
* parameter and return value descriptions.
*/
public function &addBlankElement($element_name) {
$module =& $this->getAnonymousModule();
$element =& $module->addBlankElement($element_name);
public function addBlankElement($element_name) {
$module = $this->getAnonymousModule();
$element = $module->addBlankElement($element_name);
return $element;
}
@@ -134,7 +139,7 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
* bust out advanced features without having to make your own
* module.
*/
public function &getAnonymousModule() {
public function getAnonymousModule() {
if (!$this->_anonModule) {
$this->_anonModule = new HTMLPurifier_HTMLModule();
$this->_anonModule->name = 'Anonymous';
@@ -186,18 +191,22 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
$this->doctype = $this->manager->doctype;
foreach ($this->manager->modules as $module) {
foreach($module->info_tag_transform as $k => $v) {
foreach($module->info_tag_transform as $k => $v) {
if ($v === false) unset($this->info_tag_transform[$k]);
else $this->info_tag_transform[$k] = $v;
}
foreach($module->info_attr_transform_pre as $k => $v) {
foreach($module->info_attr_transform_pre as $k => $v) {
if ($v === false) unset($this->info_attr_transform_pre[$k]);
else $this->info_attr_transform_pre[$k] = $v;
}
foreach($module->info_attr_transform_post as $k => $v) {
foreach($module->info_attr_transform_post as $k => $v) {
if ($v === false) unset($this->info_attr_transform_post[$k]);
else $this->info_attr_transform_post[$k] = $v;
}
foreach ($module->info_injector as $k => $v) {
if ($v === false) unset($this->info_injector[$k]);
else $this->info_injector[$k] = $v;
}
}
$this->info = $this->manager->getElements();
@@ -233,10 +242,10 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
$support = "(for information on implementing this, see the ".
"support forums) ";
// setup allowed elements
// setup allowed elements -----------------------------------------
$allowed_elements = $config->get('HTML', 'AllowedElements');
$allowed_attributes = $config->get('HTML', 'AllowedAttributes');
$allowed_attributes = $config->get('HTML', 'AllowedAttributes'); // retrieve early
if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
$allowed = $config->get('HTML', 'Allowed');
@@ -252,55 +261,80 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
}
// emit errors
foreach ($allowed_elements as $element => $d) {
// :TODO: Is this htmlspecialchars() call really necessary?
$element = htmlspecialchars($element);
$element = htmlspecialchars($element); // PHP doesn't escape errors, be careful!
trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
}
}
// setup allowed attributes ---------------------------------------
$allowed_attributes_mutable = $allowed_attributes; // by copy!
if (is_array($allowed_attributes)) {
foreach ($this->info_global_attr as $attr_key => $info) {
if (!isset($allowed_attributes["*.$attr_key"])) {
unset($this->info_global_attr[$attr_key]);
} elseif (isset($allowed_attributes_mutable["*.$attr_key"])) {
unset($allowed_attributes_mutable["*.$attr_key"]);
// This actually doesn't do anything, since we went away from
// global attributes. It's possible that userland code uses
// it, but HTMLModuleManager doesn't!
foreach ($this->info_global_attr as $attr => $x) {
$keys = array($attr, "*@$attr", "*.$attr");
$delete = true;
foreach ($keys as $key) {
if ($delete && isset($allowed_attributes[$key])) {
$delete = false;
}
if (isset($allowed_attributes_mutable[$key])) {
unset($allowed_attributes_mutable[$key]);
}
}
if ($delete) unset($this->info_global_attr[$attr]);
}
foreach ($this->info as $tag => $info) {
foreach ($info->attr as $attr => $attr_info) {
if (!isset($allowed_attributes["$tag.$attr"]) &&
!isset($allowed_attributes["*.$attr"])) {
unset($this->info[$tag]->attr[$attr]);
} else {
if (isset($allowed_attributes_mutable["$tag.$attr"])) {
unset($allowed_attributes_mutable["$tag.$attr"]);
} elseif (isset($allowed_attributes_mutable["*.$attr"])) {
unset($allowed_attributes_mutable["*.$attr"]);
foreach ($info->attr as $attr => $x) {
$keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr");
$delete = true;
foreach ($keys as $key) {
if ($delete && isset($allowed_attributes[$key])) {
$delete = false;
}
if (isset($allowed_attributes_mutable[$key])) {
unset($allowed_attributes_mutable[$key]);
}
}
if ($delete) unset($this->info[$tag]->attr[$attr]);
}
}
// emit errors
foreach ($allowed_attributes_mutable as $elattr => $d) {
list($element, $attribute) = explode('.', $elattr);
// :TODO: Is this htmlspecialchars() call really necessary?
$element = htmlspecialchars($element);
$attribute = htmlspecialchars($attribute);
if ($element == '*') {
trigger_error("Global attribute '$attribute' is not ".
"supported in any elements $support",
E_USER_WARNING);
} else {
trigger_error("Attribute '$attribute' in element '$element' not supported $support",
E_USER_WARNING);
$bits = preg_split('/[.@]/', $elattr, 2);
$c = count($bits);
switch ($c) {
case 2:
if ($bits[0] !== '*') {
$element = htmlspecialchars($bits[0]);
$attribute = htmlspecialchars($bits[1]);
if (!isset($this->info[$element])) {
trigger_error("Cannot allow attribute '$attribute' if element '$element' is not allowed/supported $support");
} else {
trigger_error("Attribute '$attribute' in element '$element' not supported $support",
E_USER_WARNING);
}
break;
}
// otherwise fall through
case 1:
$attribute = htmlspecialchars($bits[0]);
trigger_error("Global attribute '$attribute' is not ".
"supported in any elements $support",
E_USER_WARNING);
break;
}
}
}
// setup forbidden elements
$forbidden_elements = $config->get('HTML', 'ForbiddenElements');
// setup forbidden elements ---------------------------------------
$forbidden_elements = $config->get('HTML', 'ForbiddenElements');
$forbidden_attributes = $config->get('HTML', 'ForbiddenAttributes');
foreach ($this->info as $tag => $info) {
@@ -308,14 +342,37 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
unset($this->info[$tag]);
continue;
}
foreach ($info->attr as $name => $def) {
if (isset($forbidden_attributes["$tag.$name"])) {
unset($this->info[$tag]->attr[$name]);
foreach ($info->attr as $attr => $x) {
if (
isset($forbidden_attributes["$tag@$attr"]) ||
isset($forbidden_attributes["*@$attr"]) ||
isset($forbidden_attributes[$attr])
) {
unset($this->info[$tag]->attr[$attr]);
continue;
} // this segment might get removed eventually
elseif (isset($forbidden_attributes["$tag.$attr"])) {
// $tag.$attr are not user supplied, so no worries!
trigger_error("Error with $tag.$attr: tag.attr syntax not supported for HTML.ForbiddenAttributes; use tag@attr instead", E_USER_WARNING);
}
}
}
foreach ($forbidden_attributes as $key => $v) {
if (strlen($key) < 2) continue;
if ($key[0] != '*') continue;
if ($key[1] == '.') {
trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING);
}
}
// setup injectors -----------------------------------------------------
foreach ($this->info_injector as $i => $injector) {
if ($injector->checkNeeded($config) !== false) {
// remove injector that does not have it's required
// elements/attributes present, and is thus not needed.
unset($this->info_injector[$i]);
}
}
}
/**
@@ -329,6 +386,8 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
*/
public function parseTinyMCEAllowedList($list) {
$list = str_replace(array(' ', "\t"), '', $list);
$elements = array();
$attributes = array();

View File

@@ -71,6 +71,14 @@ class HTMLPurifier_HTMLModule
*/
public $info_attr_transform_post = array();
/**
* List of HTMLPurifier_Injector to be performed during well-formedness fixing.
* An injector will only be invoked if all of it's pre-requisites are met;
* if an injector fails setup, there will be no error; it will simply be
* silently disabled.
*/
public $info_injector = array();
/**
* Boolean flag that indicates whether or not getChildDef is implemented.
* For optimization reasons: may save a call to a function. Be sure
@@ -116,10 +124,10 @@ class HTMLPurifier_HTMLModule
* element?
* @param $attr What unique attributes does the element define?
* @note See ElementDef for in-depth descriptions of these parameters.
* @return Reference to created element definition object, so you
* @return Created element definition object, so you
* can set advanced parameters
*/
public function &addElement($element, $type, $contents, $attr_includes = array(), $attr = array()) {
public function addElement($element, $type, $contents, $attr_includes = array(), $attr = array()) {
$this->elements[] = $element;
// parse content_model
list($content_model_type, $content_model) = $this->parseContents($contents);
@@ -140,9 +148,9 @@ class HTMLPurifier_HTMLModule
* Convenience function that creates a totally blank, non-standalone
* element.
* @param $element Name of element to create
* @return Reference to created element
* @return Created element
*/
public function &addBlankElement($element) {
public function addBlankElement($element) {
if (!isset($this->info[$element])) {
$this->elements[] = $element;
$this->info[$element] = new HTMLPurifier_ElementDef();
@@ -222,5 +230,14 @@ class HTMLPurifier_HTMLModule
}
return $ret;
}
/**
* Lazy load construction of the module after determining whether
* or not it's needed, and also when a finalized configuration object
* is available.
* @param $config Instance of HTMLPurifier_Config
*/
public function setup($config) {}
}

View File

@@ -12,8 +12,8 @@ class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule
'I18N' => array('dir' => false)
);
public function __construct() {
$bdo =& $this->addElement(
public function setup($config) {
$bdo = $this->addElement(
'bdo', 'Inline', 'Inline', array('Core', 'Lang'),
array(
'dir' => 'Enum#ltr,rtl', // required

View File

@@ -9,7 +9,7 @@ class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule
public $name = 'Edit';
public function __construct() {
public function setup($config) {
$contents = 'Chameleon: #PCDATA | Inline ! #PCDATA | Flow';
$attr = array(
'cite' => 'URI',

View File

@@ -8,8 +8,8 @@ class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule
public $name = 'Hypertext';
public function __construct() {
$a =& $this->addElement(
public function setup($config) {
$a = $this->addElement(
'a', 'Inline', 'Inline', 'Common',
array(
// 'accesskey' => 'Character',

View File

@@ -10,17 +10,25 @@ class HTMLPurifier_HTMLModule_Image extends HTMLPurifier_HTMLModule
public $name = 'Image';
public function __construct() {
$img =& $this->addElement(
public function setup($config) {
$max = $config->get('HTML', 'MaxImgLength');
$img = $this->addElement(
'img', 'Inline', 'Empty', 'Common',
array(
'alt*' => 'Text',
'height' => 'Length',
// According to the spec, it's Length, but percents can
// be abused, so we allow only Pixels.
'height' => 'Pixels#' . $max,
'width' => 'Pixels#' . $max,
'longdesc' => 'URI',
'src*' => new HTMLPurifier_AttrDef_URI(true), // embedded
'width' => 'Length'
)
);
if ($max === null || $config->get('HTML', 'Trusted')) {
$img->attr['height'] =
$img->attr['width'] = 'Length';
}
// kind of strange, but splitting things up would be inefficient
$img->attr_transform_pre[] =
$img->attr_transform_post[] =

View File

@@ -21,7 +21,7 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
public $name = 'Legacy';
public function __construct() {
public function setup($config) {
$this->addElement('basefont', 'Inline', 'Empty', false, array(
'color' => 'Color',
@@ -49,40 +49,40 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
$align = 'Enum#left,right,center,justify';
$address =& $this->addBlankElement('address');
$address = $this->addBlankElement('address');
$address->content_model = 'Inline | #PCDATA | p';
$address->content_model_type = 'optional';
$address->child = false;
$blockquote =& $this->addBlankElement('blockquote');
$blockquote = $this->addBlankElement('blockquote');
$blockquote->content_model = 'Flow | #PCDATA';
$blockquote->content_model_type = 'optional';
$blockquote->child = false;
$br =& $this->addBlankElement('br');
$br = $this->addBlankElement('br');
$br->attr['clear'] = 'Enum#left,all,right,none';
$caption =& $this->addBlankElement('caption');
$caption = $this->addBlankElement('caption');
$caption->attr['align'] = 'Enum#top,bottom,left,right';
$div =& $this->addBlankElement('div');
$div = $this->addBlankElement('div');
$div->attr['align'] = $align;
$dl =& $this->addBlankElement('dl');
$dl = $this->addBlankElement('dl');
$dl->attr['compact'] = 'Bool#compact';
for ($i = 1; $i <= 6; $i++) {
$h =& $this->addBlankElement("h$i");
$h = $this->addBlankElement("h$i");
$h->attr['align'] = $align;
}
$hr =& $this->addBlankElement('hr');
$hr = $this->addBlankElement('hr');
$hr->attr['align'] = $align;
$hr->attr['noshade'] = 'Bool#noshade';
$hr->attr['size'] = 'Pixels';
$hr->attr['width'] = 'Length';
$img =& $this->addBlankElement('img');
$img = $this->addBlankElement('img');
$img->attr['align'] = 'Enum#top,middle,bottom,left,right';
$img->attr['border'] = 'Pixels';
$img->attr['hspace'] = 'Pixels';
@@ -90,43 +90,43 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
// figure out this integer business
$li =& $this->addBlankElement('li');
$li = $this->addBlankElement('li');
$li->attr['value'] = new HTMLPurifier_AttrDef_Integer();
$li->attr['type'] = 'Enum#s:1,i,I,a,A,disc,square,circle';
$ol =& $this->addBlankElement('ol');
$ol = $this->addBlankElement('ol');
$ol->attr['compact'] = 'Bool#compact';
$ol->attr['start'] = new HTMLPurifier_AttrDef_Integer();
$ol->attr['type'] = 'Enum#s:1,i,I,a,A';
$p =& $this->addBlankElement('p');
$p = $this->addBlankElement('p');
$p->attr['align'] = $align;
$pre =& $this->addBlankElement('pre');
$pre = $this->addBlankElement('pre');
$pre->attr['width'] = 'Number';
// script omitted
$table =& $this->addBlankElement('table');
$table = $this->addBlankElement('table');
$table->attr['align'] = 'Enum#left,center,right';
$table->attr['bgcolor'] = 'Color';
$tr =& $this->addBlankElement('tr');
$tr = $this->addBlankElement('tr');
$tr->attr['bgcolor'] = 'Color';
$th =& $this->addBlankElement('th');
$th = $this->addBlankElement('th');
$th->attr['bgcolor'] = 'Color';
$th->attr['height'] = 'Length';
$th->attr['nowrap'] = 'Bool#nowrap';
$th->attr['width'] = 'Length';
$td =& $this->addBlankElement('td');
$td = $this->addBlankElement('td');
$td->attr['bgcolor'] = 'Color';
$td->attr['height'] = 'Length';
$td->attr['nowrap'] = 'Bool#nowrap';
$td->attr['width'] = 'Length';
$ul =& $this->addBlankElement('ul');
$ul = $this->addBlankElement('ul');
$ul->attr['compact'] = 'Bool#compact';
$ul->attr['type'] = 'Enum#square,disc,circle';

View File

@@ -19,7 +19,7 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
public $content_sets = array('Flow' => 'List');
public function __construct() {
public function setup($config) {
$this->addElement('ol', 'List', 'Required: li', 'Common');
$this->addElement('ul', 'List', 'Required: li', 'Common');
$this->addElement('dl', 'List', 'Required: dt | dd', 'Common');

View File

@@ -11,7 +11,7 @@ class HTMLPurifier_HTMLModule_Object extends HTMLPurifier_HTMLModule
public $name = 'Object';
public $safe = false;
public function __construct() {
public function setup($config) {
$this->addElement('object', 'Inline', 'Optional: #PCDATA | Flow | param', 'Common',
array(

View File

@@ -15,7 +15,7 @@ class HTMLPurifier_HTMLModule_Presentation extends HTMLPurifier_HTMLModule
public $name = 'Presentation';
public function __construct() {
public function setup($config) {
$this->addElement('b', 'Inline', 'Inline', 'Common');
$this->addElement('big', 'Inline', 'Inline', 'Common');
$this->addElement('hr', 'Block', 'Empty', 'Common');

View File

@@ -9,7 +9,7 @@ class HTMLPurifier_HTMLModule_Proprietary extends HTMLPurifier_HTMLModule
public $name = 'Proprietary';
public function __construct() {
public function setup($config) {
$this->addElement('marquee', 'Inline', 'Flow', 'Common',
array(

View File

@@ -9,15 +9,15 @@ class HTMLPurifier_HTMLModule_Ruby extends HTMLPurifier_HTMLModule
public $name = 'Ruby';
public function __construct() {
public function setup($config) {
$this->addElement('ruby', 'Inline',
'Custom: ((rb, (rt | (rp, rt, rp))) | (rbc, rtc, rtc?))',
'Common');
$this->addElement('rbc', false, 'Required: rb', 'Common');
$this->addElement('rtc', false, 'Required: rt', 'Common');
$rb =& $this->addElement('rb', false, 'Inline', 'Common');
$rb = $this->addElement('rb', false, 'Inline', 'Common');
$rb->excludes = array('ruby' => true);
$rt =& $this->addElement('rt', false, 'Inline', 'Common', array('rbspan' => 'Number'));
$rt = $this->addElement('rt', false, 'Inline', 'Common', array('rbspan' => 'Number'));
$rt->excludes = array('ruby' => true);
$this->addElement('rp', false, 'Optional: #PCDATA', 'Common');
}

View File

@@ -0,0 +1,31 @@
<?php
/**
* A "safe" embed module. See SafeObject. This is a proprietary element.
*/
class HTMLPurifier_HTMLModule_SafeEmbed extends HTMLPurifier_HTMLModule
{
public $name = 'SafeEmbed';
public function setup($config) {
$max = $config->get('HTML', 'MaxImgLength');
$embed = $this->addElement(
'embed', 'Inline', 'Empty', 'Common',
array(
'src*' => 'URI#embedded',
'type' => 'Enum#application/x-shockwave-flash',
'width' => 'Pixels#' . $max,
'height' => 'Pixels#' . $max,
'allowscriptaccess' => 'Enum#never',
'allownetworking' => 'Enum#internal',
'wmode' => 'Enum#window',
'name' => 'ID',
)
);
$embed->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeEmbed();
}
}

View File

@@ -0,0 +1,48 @@
<?php
/**
* A "safe" object module. In theory, objects permitted by this module will
* be safe, and untrusted users can be allowed to embed arbitrary flash objects
* (maybe other types too, but only Flash is supported as of right now).
* Highly experimental.
*/
class HTMLPurifier_HTMLModule_SafeObject extends HTMLPurifier_HTMLModule
{
public $name = 'SafeObject';
public function setup($config) {
// These definitions are not intrinsically safe: the attribute transforms
// are a vital part of ensuring safety.
$max = $config->get('HTML', 'MaxImgLength');
$object = $this->addElement(
'object',
'Inline',
'Optional: param | Flow | #PCDATA',
'Common',
array(
// While technically not required by the spec, we're forcing
// it to this value.
'type' => 'Enum#application/x-shockwave-flash',
'width' => 'Pixels#' . $max,
'height' => 'Pixels#' . $max,
'data' => 'URI#embedded'
)
);
$object->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeObject();
$param = $this->addElement('param', false, 'Empty', false,
array(
'id' => 'ID',
'name*' => 'Text',
'value' => 'Text'
)
);
$param->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeParam();
$this->info_injector[] = 'SafeObject';
}
}

View File

@@ -7,19 +7,6 @@ INSIDE HTML PURIFIER DOCUMENTS. USE ONLY WITH TRUSTED USER INPUT!!!
*/
/**
* Implements required attribute stipulation for <script>
*/
class HTMLPurifier_AttrTransform_ScriptRequired extends HTMLPurifier_AttrTransform
{
public function transform($attr, $config, $context) {
if (!isset($attr['type'])) {
$attr['type'] = 'text/javascript';
}
return $attr;
}
}
/**
* XHTML 1.1 Scripting module, defines elements that are used to contain
* information pertaining to executable scripts or the lack of support
@@ -33,7 +20,7 @@ class HTMLPurifier_HTMLModule_Scripting extends HTMLPurifier_HTMLModule
public $content_sets = array('Block' => 'script | noscript', 'Inline' => 'script | noscript');
public $safe = false;
public function __construct() {
public function setup($config) {
// TODO: create custom child-definition for noscript that
// auto-wraps stray #PCDATA in a similar manner to
// blockquote's custom definition (we would use it but

View File

@@ -15,7 +15,7 @@ class HTMLPurifier_HTMLModule_StyleAttribute extends HTMLPurifier_HTMLModule
'Core' => array(0 => array('Style'))
);
public function __construct() {
public function setup($config) {
$this->attr_collections['Style']['style'] = new HTMLPurifier_AttrDef_CSS();
}

View File

@@ -8,7 +8,7 @@ class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule
public $name = 'Tables';
public function __construct() {
public function setup($config) {
$this->addElement('caption', false, 'Inline', 'Common');

View File

@@ -8,10 +8,10 @@ class HTMLPurifier_HTMLModule_Target extends HTMLPurifier_HTMLModule
public $name = 'Target';
public function __construct() {
public function setup($config) {
$elements = array('a');
foreach ($elements as $name) {
$e =& $this->addBlankElement($name);
$e = $this->addBlankElement($name);
$e->attr = array(
'target' => new HTMLPurifier_AttrDef_HTML_FrameTarget()
);

View File

@@ -20,7 +20,7 @@ class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
'Flow' => 'Heading | Block | Inline'
);
public function __construct() {
public function setup($config) {
// Inline Phrasal -------------------------------------------------
$this->addElement('abbr', 'Inline', 'Inline', 'Common');
@@ -42,7 +42,7 @@ class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
// Block Phrasal --------------------------------------------------
$this->addElement('address', 'Block', 'Inline', 'Common');
$this->addElement('blockquote', 'Block', 'Optional: Heading | Block | List', 'Common', array('cite' => 'URI') );
$pre =& $this->addElement('pre', 'Block', 'Inline', 'Common');
$pre = $this->addElement('pre', 'Block', 'Inline', 'Common');
$pre->excludes = $this->makeLookup(
'img', 'big', 'small', 'object', 'applet', 'font', 'basefont' );
$this->addElement('h1', 'Heading', 'Inline', 'Common');

View File

@@ -35,7 +35,7 @@ class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
* @todo Wildcard matching and error reporting when an added or
* subtracted fix has no effect.
*/
public function construct($config) {
public function setup($config) {
// create fixes, initialize fixesForLevel
$fixes = $this->makeFixes();
@@ -128,14 +128,16 @@ class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
if (isset($params['element'])) {
$element = $params['element'];
if (empty($this->info[$element])) {
$e =& $this->addBlankElement($element);
$e = $this->addBlankElement($element);
} else {
$e =& $this->info[$element];
$e = $this->info[$element];
}
} else {
$type = "info_$type";
$e =& $this;
$e = $this;
}
// PHP does some weird parsing when I do
// $e->$type[$attr], so I have to assign a ref.
$f =& $e->$type;
$f[$attr] = $fix;
break;
@@ -146,9 +148,9 @@ class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
case 'content_model_type':
$element = $params['element'];
if (empty($this->info[$element])) {
$e =& $this->addBlankElement($element);
$e = $this->addBlankElement($element);
} else {
$e =& $this->info[$element];
$e = $this->info[$element];
}
$e->$type = $fix;
break;

View File

@@ -221,15 +221,35 @@ class HTMLPurifier_HTMLModuleManager
$modules[] = 'Proprietary';
}
// add SafeObject/Safeembed modules
if ($config->get('HTML', 'SafeObject')) {
$modules[] = 'SafeObject';
}
if ($config->get('HTML', 'SafeEmbed')) {
$modules[] = 'SafeEmbed';
}
foreach ($modules as $module) {
$this->processModule($module);
$this->modules[$module]->setup($config);
}
foreach ($this->doctype->tidyModules as $module) {
$this->processModule($module);
if (method_exists($this->modules[$module], 'construct')) {
$this->modules[$module]->construct($config);
$this->modules[$module]->setup($config);
}
// prepare any injectors
foreach ($this->modules as $module) {
$n = array();
foreach ($module->info_injector as $i => $injector) {
if (!is_object($injector)) {
$class = "HTMLPurifier_Injector_$injector";
$injector = new $class;
}
$n[$injector->name] = $injector;
}
$module->info_injector = $n;
}
// setup lookup table based on all valid modules

View File

@@ -58,29 +58,45 @@ abstract class HTMLPurifier_Injector
* Prepares the injector by giving it the config and context objects:
* this allows references to important variables to be made within
* the injector. This function also checks if the HTML environment
* will work with the Injector: if p tags are not allowed, the
* Auto-Paragraphing injector should not be enabled.
* will work with the Injector (see checkNeeded()).
* @param $config Instance of HTMLPurifier_Config
* @param $context Instance of HTMLPurifier_Context
* @return Boolean false if success, string of missing needed element/attribute if failure
*/
public function prepare($config, $context) {
$this->htmlDefinition = $config->getHTMLDefinition();
// perform $needed checks
foreach ($this->needed as $element => $attributes) {
if (is_int($element)) $element = $attributes;
if (!isset($this->htmlDefinition->info[$element])) return $element;
if (!is_array($attributes)) continue;
foreach ($attributes as $name) {
if (!isset($this->htmlDefinition->info[$element]->attr[$name])) return "$element.$name";
}
}
// Even though this might fail, some unit tests ignore this and
// still test checkNeeded, so be careful. Maybe get rid of that
// dependency.
$result = $this->checkNeeded($config);
if ($result !== false) return $result;
$this->currentNesting =& $context->get('CurrentNesting');
$this->inputTokens =& $context->get('InputTokens');
$this->inputIndex =& $context->get('InputIndex');
return false;
}
/**
* This function checks if the HTML environment
* will work with the Injector: if p tags are not allowed, the
* Auto-Paragraphing injector should not be enabled.
* @param $config Instance of HTMLPurifier_Config
* @param $context Instance of HTMLPurifier_Context
* @return Boolean false if success, string of missing needed element/attribute if failure
*/
public function checkNeeded($config) {
$def = $config->getHTMLDefinition();
foreach ($this->needed as $element => $attributes) {
if (is_int($element)) $element = $attributes;
if (!isset($def->info[$element])) return $element;
if (!is_array($attributes)) continue;
foreach ($attributes as $name) {
if (!isset($def->info[$element]->attr[$name])) return "$element.$name";
}
}
return false;
}
/**
* Tests if the context node allows a certain element
* @param $name Name of element to test for

View File

@@ -0,0 +1,83 @@
<?php
/**
* Adds important param elements to inside of object in order to make
* things safe.
*/
class HTMLPurifier_Injector_SafeObject extends HTMLPurifier_Injector
{
public $name = 'SafeObject';
public $needed = array('object', 'param');
protected $objectStack = array();
protected $paramStack = array();
// Keep this synchronized with AttrTransform/SafeParam.php
protected $addParam = array(
'allowScriptAccess' => 'never',
'allowNetworking' => 'internal',
);
protected $allowedParam = array(
'wmode' => true,
'movie' => true,
);
public function prepare($config, $context) {
parent::prepare($config, $context);
}
public function handleElement(&$token) {
if ($token->name == 'object') {
$this->objectStack[] = $token;
$this->paramStack[] = array();
$new = array($token);
foreach ($this->addParam as $name => $value) {
$new[] = new HTMLPurifier_Token_Empty('param', array('name' => $name, 'value' => $value));
}
$token = $new;
} elseif ($token->name == 'param') {
$nest = count($this->currentNesting) - 1;
if ($nest >= 0 && $this->currentNesting[$nest]->name === 'object') {
$i = count($this->objectStack) - 1;
if (!isset($token->attr['name'])) {
$token = false;
return;
}
$n = $token->attr['name'];
// We need this fix because YouTube doesn't supply a data
// attribute, which we need if a type is specified. This is
// *very* Flash specific.
if (!isset($this->objectStack[$i]->attr['data']) && $token->attr['name'] == 'movie') {
$this->objectStack[$i]->attr['data'] = $token->attr['value'];
}
// Check if the parameter is the correct value but has not
// already been added
if (
!isset($this->paramStack[$i][$n]) &&
isset($this->addParam[$n]) &&
$token->attr['name'] === $this->addParam[$n]
) {
// keep token, and add to param stack
$this->paramStack[$i][$n] = true;
} elseif (isset($this->allowedParam[$n])) {
// keep token, don't do anything to it
// (could possibly check for duplicates here)
} else {
$token = false;
}
} else {
// not directly inside an object, DENY!
$token = false;
}
}
}
public function notifyEnd($token) {
if ($token->name == 'object') {
array_pop($this->objectStack);
array_pop($this->paramStack);
}
}
}

View File

@@ -1,5 +1,9 @@
<?php
/**
* Represents a language and defines localizable string formatting and
* other functions, as well as the localized messages for HTML Purifier.
*/
class HTMLPurifier_Language
{
@@ -23,6 +27,13 @@ class HTMLPurifier_Language
*/
public $errorNames = array();
/**
* True if no message file was found for this language, so English
* is being used instead. Check this if you'd like to notify the
* user that they've used a non-supported language.
*/
public $error = false;
/**
* Has the language object been loaded yet?
* @todo Make it private, fix usage in HTMLPurifier_LanguageTest
@@ -36,7 +47,7 @@ class HTMLPurifier_Language
public function __construct($config, $context) {
$this->config = $config;
$this->context =& $context;
$this->context = $context;
}
/**
@@ -122,7 +133,7 @@ class HTMLPurifier_Language
// could be introduced for all types of tokens. This
// may need to be factored out into a dedicated class
if (!empty($value->attr)) {
$stripped_token = $value->copy();
$stripped_token = clone $value;
$stripped_token->attr = array();
$subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token);
}

View File

@@ -0,0 +1,11 @@
<?php
// private language message file for unit testing purposes
// this language file has no class associated with it
$fallback = 'en';
$messages = array(
'HTMLPurifier' => 'HTML Purifier XNone'
);

View File

@@ -5,6 +5,7 @@
* caching and fallbacks.
* @note Thanks to MediaWiki for the general logic, although this version
* has been entirely rewritten
* @todo Serialized cache for languages
*/
class HTMLPurifier_LanguageFactory
{
@@ -53,7 +54,7 @@ class HTMLPurifier_LanguageFactory
* @param $prototype Optional prototype to overload sole instance with,
* or bool true to reset to default factory.
*/
public static function &instance($prototype = null) {
public static function instance($prototype = null) {
static $instance = null;
if ($prototype !== null) {
$instance = $prototype;
@@ -77,41 +78,43 @@ class HTMLPurifier_LanguageFactory
* Creates a language object, handles class fallbacks
* @param $config Instance of HTMLPurifier_Config
* @param $context Instance of HTMLPurifier_Context
* @param $code Code to override configuration with. Private parameter.
*/
public function create($config, $context) {
public function create($config, $context, $code = false) {
// validate language code
$code = $this->validator->validate(
$config->get('Core', 'Language'), $config, $context
);
if ($code === false) {
$code = $this->validator->validate(
$config->get('Core', 'Language'), $config, $context
);
} else {
$code = $this->validator->validate($code, $config, $context);
}
if ($code === false) $code = 'en'; // malformed code becomes English
$pcode = str_replace('-', '_', $code); // make valid PHP classname
static $depth = 0; // recursion protection
if ($code == 'en') {
$class = 'HTMLPurifier_Language';
$file = $this->dir . '/Language.php';
$lang = new HTMLPurifier_Language($config, $context);
} else {
$class = 'HTMLPurifier_Language_' . $pcode;
$file = $this->dir . '/Language/classes/' . $code . '.php';
// PHP5/APC deps bug workaround can go here
// you can bypass the conditional include by loading the
// file yourself
if (file_exists($file) && !class_exists($class, false)) {
include_once $file;
if (file_exists($file) || class_exists($class, false)) {
$lang = new $class($config, $context);
} else {
// Go fallback
$raw_fallback = $this->getFallbackFor($code);
$fallback = $raw_fallback ? $raw_fallback : 'en';
$depth++;
$lang = $this->create($config, $context, $fallback);
if (!$raw_fallback) {
$lang->error = true;
}
$depth--;
}
}
if (!class_exists($class, false)) {
// go fallback
$fallback = HTMLPurifier_LanguageFactory::getFallbackFor($code);
$depth++;
$lang = HTMLPurifier_LanguageFactory::factory( $fallback );
$depth--;
} else {
$lang = new $class($config, $context);
}
$lang->code = $code;
return $lang;

View File

@@ -0,0 +1,113 @@
<?php
/**
* Represents a measurable length, with a string numeric magnitude
* and a unit. This object is immutable.
*/
class HTMLPurifier_Length
{
/**
* String numeric magnitude.
*/
protected $n;
/**
* String unit. False is permitted if $n = 0.
*/
protected $unit;
/**
* Whether or not this length is valid. Null if not calculated yet.
*/
protected $isValid;
/**
* Lookup array of units recognized by CSS 2.1
*/
protected static $allowedUnits = array(
'em' => true, 'ex' => true, 'px' => true, 'in' => true,
'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true
);
/**
* @param number $n Magnitude
* @param string $u Unit
*/
public function __construct($n = '0', $u = false) {
$this->n = (string) $n;
$this->unit = $u !== false ? (string) $u : false;
}
/**
* @param string $s Unit string, like '2em' or '3.4in'
* @warning Does not perform validation.
*/
static public function make($s) {
if ($s instanceof HTMLPurifier_Length) return $s;
$n_length = strspn($s, '1234567890.+-');
$n = substr($s, 0, $n_length);
$unit = substr($s, $n_length);
if ($unit === '') $unit = false;
return new HTMLPurifier_Length($n, $unit);
}
/**
* Validates the number and unit.
*/
protected function validate() {
// Special case:
if ($this->n === '+0' || $this->n === '-0') $this->n = '0';
if ($this->n === '0' && $this->unit === false) return true;
if (!ctype_lower($this->unit)) $this->unit = strtolower($this->unit);
if (!isset(HTMLPurifier_Length::$allowedUnits[$this->unit])) return false;
// Hack:
$def = new HTMLPurifier_AttrDef_CSS_Number();
$result = $def->validate($this->n, false, false);
if ($result === false) return false;
$this->n = $result;
return true;
}
/**
* Returns string representation of number.
*/
public function toString() {
if (!$this->isValid()) return false;
return $this->n . $this->unit;
}
/**
* Retrieves string numeric magnitude.
*/
public function getN() {return $this->n;}
/**
* Retrieves string unit.
*/
public function getUnit() {return $this->unit;}
/**
* Returns true if this length unit is valid.
*/
public function isValid() {
if ($this->isValid === null) $this->isValid = $this->validate();
return $this->isValid;
}
/**
* Compares two lengths, and returns 1 if greater, -1 if less and 0 if equal.
* @warning If both values are too large or small, this calculation will
* not work properly
*/
public function compareTo($l) {
if ($l === false) return false;
if ($l->unit !== $this->unit) {
$converter = new HTMLPurifier_UnitConverter();
$l = $converter->convert($l, $this->unit);
if ($l === false) return false;
}
return $this->n - $l->n;
}
}

View File

@@ -11,7 +11,8 @@
*
* A lexer is HTML-oriented: it might work with XML, but it's not
* recommended, as we adhere to a subset of the specification for optimization
* reasons.
* reasons. This might change in the future. Also, most tokenizers are not
* expected to handle DTDs or PIs.
*
* This class should not be directly instantiated, but you may use create() to
* retrieve a default copy of the lexer. Being a supertype, this class
@@ -20,7 +21,8 @@
*
* @note The unit tests will instantiate this class for testing purposes, as
* many of the utility functions require a class to be instantiated.
* Be careful when porting this class to PHP 5.
* This means that, even though this class is not runnable, it will
* not be declared abstract.
*
* @par
*
@@ -28,18 +30,14 @@
* We use tokens rather than create a DOM representation because DOM would:
*
* @par
* -# Require more processing power to create,
* -# Require recursion to iterate,
* -# Must be compatible with PHP 5's DOM (otherwise duplication),
* -# Has the entire document structure (html and body not needed), and
* -# Has unknown readability improvement.
* -# Require more processing and memory to create,
* -# Is not streamable, and
* -# Has the entire document structure (html and body not needed).
*
* @par
* What the last item means is that the functions for manipulating tokens are
* already fairly compact, and when well-commented, more abstraction may not
* be needed.
*
* @see HTMLPurifier_Token
* However, DOM is helpful in that it makes it easy to move around nodes
* without a lot of lookaheads to see when a tag is closed. This is a
* limitation of the token system and some workarounds would be nice.
*/
class HTMLPurifier_Lexer
{
@@ -49,22 +47,16 @@ class HTMLPurifier_Lexer
/**
* Retrieves or sets the default Lexer as a Prototype Factory.
*
* Depending on what PHP version you are running, the abstract base
* Lexer class will determine which concrete Lexer is best for you:
* HTMLPurifier_Lexer_DirectLex for PHP 4, and HTMLPurifier_Lexer_DOMLex
* for PHP 5 and beyond. This general rule has a few exceptions to it
* involving special features that only DirectLex implements.
* By default HTMLPurifier_Lexer_DOMLex will be returned. There are
* a few exceptions involving special features that only DirectLex
* implements.
*
* @note The behavior of this class has changed, rather than accepting
* a prototype object, it now accepts a configuration object.
* To specify your own prototype, set %Core.LexerImpl to it.
* This change in behavior de-singletonizes the lexer object.
*
* @note In PHP4, it is possible to call this factory method from
* subclasses, such usage is not recommended and not
* forwards-compatible.
*
* @param $prototype Optional prototype lexer or configuration object
* @param $config Instance of HTMLPurifier_Config
* @return Concrete lexer.
*/
public static function create($config) {
@@ -96,8 +88,9 @@ class HTMLPurifier_Lexer
break;
}
if (version_compare(PHP_VERSION, "5", ">=") && // check for PHP5
class_exists('DOMDocument')) { // check for DOM support
if (class_exists('DOMDocument')) {
// check for DOM support, because, surprisingly enough,
// it's *not* part of the core!
$lexer = 'DOMLex';
} else {
$lexer = 'DirectLex';

View File

@@ -2,17 +2,28 @@
/**
* Experimental HTML5-based parser using Jeroen van der Meer's PH5P library.
* Requires PHP5, and occupies space in the HTML5 pseudo-namespace (may
* cause conflicts, sorry).
* Occupies space in the HTML5 pseudo-namespace, which may cause conflicts.
*
* @note
* Recent changes to PHP's DOM extension have resulted in some fatal
* error conditions with the original version of PH5P. Pending changes,
* this lexer will punt to DirectLex if DOM throughs an exception.
*/
class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex {
public function tokenizeHTML($html, $config, $context) {
$html = $this->normalize($html, $config, $context);
$html = $this->wrapHTML( $html, $config, $context);
$parser = new HTML5($html);
$doc = $parser->save();
$new_html = $this->normalize($html, $config, $context);
$new_html = $this->wrapHTML($new_html, $config, $context);
try {
$parser = new HTML5($new_html);
$doc = $parser->save();
} catch (DOMException $e) {
// Uh oh, it failed. Punt to DirectLex.
$lexer = new HTMLPurifier_Lexer_DirectLex();
$context->register('PH5PError', $e); // save the error, so we can detect it
return $lexer->tokenizeHTML($html, $config, $context); // use original HTML
}
$tokens = array();
$this->tokenizeDOM(
$doc->getElementsByTagName('html')->item(0)-> // <html>

View File

@@ -2,12 +2,68 @@
/**
* Class that handles operations involving percent-encoding in URIs.
*
* @warning
* Be careful when reusing instances of PercentEncoder. The object
* you use for normalize() SHOULD NOT be used for encode(), or
* vice-versa.
*/
class HTMLPurifier_PercentEncoder
{
/**
* Fix up percent-encoding by decoding unreserved characters and normalizing
* Reserved characters to preserve when using encode().
*/
protected $preserve = array();
/**
* String of characters that should be preserved while using encode().
*/
public function __construct($preserve = false) {
// unreserved letters, ought to const-ify
for ($i = 48; $i <= 57; $i++) $this->preserve[$i] = true; // digits
for ($i = 65; $i <= 90; $i++) $this->preserve[$i] = true; // upper-case
for ($i = 97; $i <= 122; $i++) $this->preserve[$i] = true; // lower-case
$this->preserve[45] = true; // Dash -
$this->preserve[46] = true; // Period .
$this->preserve[95] = true; // Underscore _
$this->preserve[126]= true; // Tilde ~
// extra letters not to escape
if ($preserve !== false) {
for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {
$this->preserve[ord($preserve[$i])] = true;
}
}
}
/**
* Our replacement for urlencode, it encodes all non-reserved characters,
* as well as any extra characters that were instructed to be preserved.
* @note
* Assumes that the string has already been normalized, making any
* and all percent escape sequences valid. Percents will not be
* re-escaped, regardless of their status in $preserve
* @param $string String to be encoded
* @return Encoded string.
*/
public function encode($string) {
$ret = '';
for ($i = 0, $c = strlen($string); $i < $c; $i++) {
if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])]) ) {
$ret .= '%' . sprintf('%02X', $int);
} else {
$ret .= $string[$i];
}
}
return $ret;
}
/**
* Fix up percent-encoding by decoding unreserved characters and normalizing.
* @warning This function is affected by $preserve, even though the
* usual desired behavior is for this not to preserve those
* characters. Be careful when reusing instances of PercentEncoder!
* @param $string String to normalize
*/
public function normalize($string) {
@@ -27,12 +83,7 @@ class HTMLPurifier_PercentEncoder
continue;
}
$int = hexdec($encoding);
if (
($int >= 48 && $int <= 57) || // digits
($int >= 65 && $int <= 90) || // uppercase letters
($int >= 97 && $int <= 122) || // lowercase letters
$int == 126 || $int == 45 || $int == 46 || $int == 95 // ~-._
) {
if (isset($this->preserve[$int])) {
$ret .= chr($int) . $text;
continue;
}

View File

@@ -1,6 +1,7 @@
<?php
// OUT OF DATE, NEEDS UPDATING!
// USE XMLWRITER!
class HTMLPurifier_Printer
{
@@ -19,18 +20,15 @@ class HTMLPurifier_Printer
* Initialize $generator.
*/
public function __construct() {
$this->generator = new HTMLPurifier_Generator();
}
/**
* Give generator necessary configuration if possible
*/
public function prepareGenerator($config) {
// hack for smoketests/configForm.php
$all = $config->getAll();
if (empty($all['HTML'])) return;
$context = new HTMLPurifier_Context();
$this->generator->generateFromTokens(array(), $config, $context);
$this->generator = new HTMLPurifier_Generator($config, $context);
}
/**

Some files were not shown because too many files have changed in this diff Show More