1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-02 20:27:40 +02:00

Compare commits

...

27 Commits

Author SHA1 Message Date
Gabriel R. Barbosa
1eb05d9864 feat: Support PHP 8.5 versions (#453) 2025-07-21 22:38:54 -04:00
Pieter Oliver
9f99c615bd add the docker files to be ignored with installation (#451) 2025-07-21 13:53:59 -04:00
Kieran
418eeb7dc0 fix: catastrophic backtracking in Core.AggressivelyFixLt (#440) 2025-06-05 23:49:52 -04:00
Tim Otten
77ebd08632 feat: Define option URI.AllowedSymbols (#447) 2025-06-05 22:46:45 -04:00
Saeid Dadkhah
127ee2f252 Update Serializer.php (#443) 2025-06-05 22:43:51 -04:00
Eli Barbosa
bdabc9b6ba Add docker files for dev environment (#424) 2025-06-05 22:42:52 -04:00
Kieran
ff005f6edc feat: PHP 8.4 support (#441) 2025-03-19 13:25:28 -04:00
Pieter Oliver
c2bc3549a3 fix: non-substantive typos (#434)
Co-authored-by: Viktor Szépe <viktor@szepe.net>
Co-authored-by: Edward Z. Yang <ezyang@mit.edu>
2025-01-14 16:31:27 -05:00
simonLeary42
b21a59101f fix: add warning for misleading option (#433) 2025-01-09 23:09:09 -05:00
Tim Otten
00a0748427 feat: Allow more image widths by default (#430) 2024-12-11 14:15:46 -05:00
Morgan Klonteig
63e631ebd3 feat: Add CSS direction support (#429) 2024-11-21 23:46:53 -05:00
nikkii94
5d154a2393 fix: Deprecated: preg_replace(): Passing null to parameter #3 ($subject) o… (#421)
Co-authored-by: Edward Z. Yang <ezyang@meta.com>
2024-11-09 22:06:41 -05:00
rio
d5150073e9 refactor: #414 remove method substrCount (#415)
Co-authored-by: Edward Z. Yang <ezyang@meta.com>
2024-11-09 22:06:09 -05:00
Eli Barbosa
b5cbf0cc3d feat: Add option for safe iframe hosts using array lookup (#423)
Co-authored-by: Edward Z. Yang <ezyang@meta.com>
2024-11-09 22:02:09 -05:00
Edward Z. Yang
f16ace76cd chore: Disable 8.4 CI (#426) 2024-11-09 21:20:53 -05:00
semantic-release-bot
cb56001e54 chore(release): 4.18.0 [skip ci]
# [4.18.0](https://github.com/ezyang/htmlpurifier/compare/v4.17.0...v4.18.0) (2024-11-01)

### Bug Fixes

* Adjust Core.AllowHostnameUnderscore to consider that "_" is defined as Unreserved Characters in RFC 3986 ([#406](https://github.com/ezyang/htmlpurifier/issues/406)) ([d9fbef8](d9fbef8e27))
* Avoid a deprecated error when the attribute name is numeric and DirectLex is used ([#412](https://github.com/ezyang/htmlpurifier/issues/412)) ([f0fbf51](f0fbf51098))
* checking that node has property name ([#399](https://github.com/ezyang/htmlpurifier/issues/399)) ([9ca5a36](9ca5a3687b))
* Ignore conditional comments ([#401](https://github.com/ezyang/htmlpurifier/issues/401)) ([4828fdf](4828fdf45a))
* Support PHP 8.4 ([#396](https://github.com/ezyang/htmlpurifier/issues/396)) ([92da247](92da2473ff))
* undefined array key warning ([#419](https://github.com/ezyang/htmlpurifier/issues/419)) ([01be377](01be377f93))

### Features

* Add allowfullscreen attr for iframe ([#411](https://github.com/ezyang/htmlpurifier/issues/411)) ([70754a2](70754a2533))
* add directive for removing blank nodes ([#404](https://github.com/ezyang/htmlpurifier/issues/404)) ([c9d60c9](c9d60c96d7))
* Add support for CSS aspect-ratio ([#408](https://github.com/ezyang/htmlpurifier/issues/408)) ([93bee73](93bee73349))
* Allow universal CSS values for all properties ([#410](https://github.com/ezyang/htmlpurifier/issues/410)) ([9723267](972326785d))
2024-11-01 03:51:45 +00:00
Olek Kaim
01be377f93 fix: undefined array key warning (#419) 2024-10-30 22:57:29 -04:00
Atsushi Matsuo
f0fbf51098 fix: Avoid a deprecated error when the attribute name is numeric and DirectLex is used (#412) 2024-07-30 22:06:23 -04:00
John Flatness
70754a2533 feat: Add allowfullscreen attr for iframe (#411) 2024-06-30 07:54:09 -04:00
John Flatness
972326785d feat: Allow universal CSS values for all properties (#410) 2024-06-28 08:37:00 -04:00
Erik
93bee73349 feat: Add support for CSS aspect-ratio (#408) 2024-06-27 15:12:06 -04:00
Atsushi Matsuo
d9fbef8e27 fix: Adjust Core.AllowHostnameUnderscore to consider that "_" is defined as Unreserved Characters in RFC 3986 (#406) 2024-04-18 21:48:20 -04:00
charlie-curtis
c9d60c96d7 feat: add directive for removing blank nodes (#404) 2024-04-11 20:52:45 -04:00
Kent Oyer
4828fdf45a fix: Ignore conditional comments (#401) 2024-03-12 23:41:45 -04:00
Christian Castelli
9ca5a3687b fix: checking that node has property name (#399)
Co-authored-by: Christian Castelli <christian.castelli@docebo.com>
2024-03-05 10:58:42 -05:00
Edward Z. Yang
92da2473ff fix: Support PHP 8.4 (#396)
Signed-off-by: Edward Z. Yang <ezyang@meta.com>
2024-02-22 00:05:10 -05:00
semantic-release-bot
bbc513d79a chore(release): 4.17.0 [skip ci]
# [4.17.0](https://github.com/ezyang/htmlpurifier/compare/v4.16.0...v4.17.0) (2023-11-17)

### Bug Fixes

* CSSTidy ImportantComments not handled properly ([#359](https://github.com/ezyang/htmlpurifier/issues/359)) ([78a9b4d](78a9b4d0da))
* fix CI ([#361](https://github.com/ezyang/htmlpurifier/issues/361)) ([9ec687c](9ec687c904))
* Invalid scheme check in Attr.TargetBlank ([#363](https://github.com/ezyang/htmlpurifier/issues/363)) ([0176ef4](0176ef4bb6))
* semantic release ([#339](https://github.com/ezyang/htmlpurifier/issues/339)) ([d82f3d9](d82f3d996a))
* semantic release ([#341](https://github.com/ezyang/htmlpurifier/issues/341)) ([e55fead](e55fead09f)), closes [#339](https://github.com/ezyang/htmlpurifier/issues/339)
* Support for locales using decimal separators other than . (dot) ([#372](https://github.com/ezyang/htmlpurifier/issues/372)) ([43f49ac](43f49ac9a5))

### Features

* Add support for all text-decoration properties ([#360](https://github.com/ezyang/htmlpurifier/issues/360)) ([2d775c0](2d775c0187))
* Allows commas to be included in tel URI ([#389](https://github.com/ezyang/htmlpurifier/issues/389)) ([ec92490](ec92490139)), closes [#388](https://github.com/ezyang/htmlpurifier/issues/388)

### Reverts

* Revert "fix: semantic release (#339)" (#340) ([3e83215](3e832152a6)), closes [#339](https://github.com/ezyang/htmlpurifier/issues/339) [#340](https://github.com/ezyang/htmlpurifier/issues/340)
2023-11-17 15:01:25 +00:00
115 changed files with 674 additions and 311 deletions

2
.gitattributes vendored
View File

@@ -5,6 +5,8 @@
/benchmarks export-ignore
/configdoc export-ignore
/configdoc/usage.xml -crlf
/docker-compose.yaml export-ignore
/Dockerfile export-ignore
/docs export-ignore
/Doxyfile export-ignore
/extras export-ignore

View File

@@ -10,7 +10,7 @@ jobs:
strategy:
fail-fast: true
matrix:
php: ['5.6', '7.0', '7.1', '7.2', '7.3', '7.4', '8.0', '8.1', '8.2', '8.3']
php: ['5.6', '7.0', '7.1', '7.2', '7.3', '7.4', '8.0', '8.1', '8.2', '8.3', '8.4', '8.5']
name: PHP ${{ matrix.php }}

1
.gitignore vendored
View File

@@ -26,3 +26,4 @@ composer.lock
*.orig
*.bak
core
.idea

30
Dockerfile Normal file
View File

@@ -0,0 +1,30 @@
FROM ubuntu:24.04
ARG DEBIAN_FRONTEND=noninteractive
ENV PHP_VERSION="8.4"
ENV LANG en_US.UTF-8
ENV LC_ALL en_US.UTF-8
RUN apt update -y && apt -y install git curl locales doxygen software-properties-common
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && \
locale-gen en_US.UTF-8 && \
update-locale LANG=en_US.UTF-8 \
RUN echo -y | add-apt-repository ppa:ondrej/php && apt update -y
RUN apt install -y \
php${PHP_VERSION} \
php${PHP_VERSION}-dev \
php${PHP_VERSION}-xdebug \
php${PHP_VERSION}-iconv \
php${PHP_VERSION}-bcmath \
php${PHP_VERSION}-tidy \
php${PHP_VERSION}-xml
RUN echo "xdebug.mode=debug,coverage" >> /etc/php/${PHP_VERSION}/cli/php.ini
COPY --from=composer:latest /usr/bin/composer /usr/bin/composer
WORKDIR /opt/htmlpurifier

View File

@@ -31,7 +31,7 @@ PROJECT_NAME = HTMLPurifier
# This could be handy for archiving the generated documentation or
# if some version control system is used.
PROJECT_NUMBER = 4.15.0
PROJECT_NUMBER = 4.18.0
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put.

View File

@@ -99,7 +99,7 @@ Autoload compatibility
HTML Purifier attempts to be as smart as possible when registering an
autoloader, but there are some cases where you will need to change
your own code to accomodate HTML Purifier. These are those cases:
your own code to accommodate HTML Purifier. These are those cases:
AN __autoload FUNCTION IS DECLARED AFTER OUR AUTOLOADER IS REGISTERED
spl_autoload_register() has the curious behavior of disabling

59
NEWS
View File

@@ -1,3 +1,46 @@
# [4.18.0](https://github.com/ezyang/htmlpurifier/compare/v4.17.0...v4.18.0) (2024-11-01)
### Bug Fixes
* Adjust Core.AllowHostnameUnderscore to consider that "_" is defined as Unreserved Characters in RFC 3986 ([#406](https://github.com/ezyang/htmlpurifier/issues/406)) ([d9fbef8](https://github.com/ezyang/htmlpurifier/commit/d9fbef8e27f6a0848a8987a8534351de98eb0fa1))
* Avoid a deprecated error when the attribute name is numeric and DirectLex is used ([#412](https://github.com/ezyang/htmlpurifier/issues/412)) ([f0fbf51](https://github.com/ezyang/htmlpurifier/commit/f0fbf510981b27fc03168efdb17d6bff48f521af))
* checking that node has property name ([#399](https://github.com/ezyang/htmlpurifier/issues/399)) ([9ca5a36](https://github.com/ezyang/htmlpurifier/commit/9ca5a3687bd2e6e42ed9d5199b2cfb1dbd6dbdc2))
* Ignore conditional comments ([#401](https://github.com/ezyang/htmlpurifier/issues/401)) ([4828fdf](https://github.com/ezyang/htmlpurifier/commit/4828fdf45a93eeeacfcbcc855f96f9a7e6b4ed44))
* Support PHP 8.4 ([#396](https://github.com/ezyang/htmlpurifier/issues/396)) ([92da247](https://github.com/ezyang/htmlpurifier/commit/92da2473ffbb3ed5e894560d4166b1ca8032aeb3))
* undefined array key warning ([#419](https://github.com/ezyang/htmlpurifier/issues/419)) ([01be377](https://github.com/ezyang/htmlpurifier/commit/01be377f93654fad4d3fbc8c81779f14316eaecf))
### Features
* Add allowfullscreen attr for iframe ([#411](https://github.com/ezyang/htmlpurifier/issues/411)) ([70754a2](https://github.com/ezyang/htmlpurifier/commit/70754a253342f67a67425cfa81029db3a5c1bd28))
* add directive for removing blank nodes ([#404](https://github.com/ezyang/htmlpurifier/issues/404)) ([c9d60c9](https://github.com/ezyang/htmlpurifier/commit/c9d60c96d799c02bc357c88a49f422034b6914fd))
* Add support for CSS aspect-ratio ([#408](https://github.com/ezyang/htmlpurifier/issues/408)) ([93bee73](https://github.com/ezyang/htmlpurifier/commit/93bee733497a098b65daf5910f1c435d347860a4))
* Allow universal CSS values for all properties ([#410](https://github.com/ezyang/htmlpurifier/issues/410)) ([9723267](https://github.com/ezyang/htmlpurifier/commit/972326785d201b81e1095bc296b99bbcfa8c7fd4))
# [4.17.0](https://github.com/ezyang/htmlpurifier/compare/v4.16.0...v4.17.0) (2023-11-17)
### Bug Fixes
* CSSTidy ImportantComments not handled properly ([#359](https://github.com/ezyang/htmlpurifier/issues/359)) ([78a9b4d](https://github.com/ezyang/htmlpurifier/commit/78a9b4d0dae8bce9a70e4e7e551bf51e9a23706d))
* fix CI ([#361](https://github.com/ezyang/htmlpurifier/issues/361)) ([9ec687c](https://github.com/ezyang/htmlpurifier/commit/9ec687c904a1fe66a5395d22c50f7043e045d1d3))
* Invalid scheme check in Attr.TargetBlank ([#363](https://github.com/ezyang/htmlpurifier/issues/363)) ([0176ef4](https://github.com/ezyang/htmlpurifier/commit/0176ef4bb6f57103fdcb60a802603e60e81ee93e))
* semantic release ([#339](https://github.com/ezyang/htmlpurifier/issues/339)) ([d82f3d9](https://github.com/ezyang/htmlpurifier/commit/d82f3d996a0d9b0f23364946d9a14408c1ad72c5))
* semantic release ([#341](https://github.com/ezyang/htmlpurifier/issues/341)) ([e55fead](https://github.com/ezyang/htmlpurifier/commit/e55fead09f39430d30f48438f06e7bc2326efc94)), closes [#339](https://github.com/ezyang/htmlpurifier/issues/339)
* Support for locales using decimal separators other than . (dot) ([#372](https://github.com/ezyang/htmlpurifier/issues/372)) ([43f49ac](https://github.com/ezyang/htmlpurifier/commit/43f49ac9a51b81dfd07d3bc8dcfc5ec5637a5e3b))
### Features
* Add support for all text-decoration properties ([#360](https://github.com/ezyang/htmlpurifier/issues/360)) ([2d775c0](https://github.com/ezyang/htmlpurifier/commit/2d775c01874e2f676ba1a2d9fe69b47c2a823061))
* Allows commas to be included in tel URI ([#389](https://github.com/ezyang/htmlpurifier/issues/389)) ([ec92490](https://github.com/ezyang/htmlpurifier/commit/ec924901392f088d334622f806b9449b17b75d7b)), closes [#388](https://github.com/ezyang/htmlpurifier/issues/388)
### Reverts
* Revert "fix: semantic release (#339)" (#340) ([3e83215](https://github.com/ezyang/htmlpurifier/commit/3e832152a6173f880c6495a3ab2b0e5235e253a6)), closes [#339](https://github.com/ezyang/htmlpurifier/issues/339) [#340](https://github.com/ezyang/htmlpurifier/issues/340)
NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
@@ -439,7 +482,7 @@ real release we decided to skip this version number.
3.3.0, released 2009-02-16
! Implement CSS property 'overflow' when %CSS.AllowTricky is true.
! Implement generic property list classess
! Implement generic property list classes
- Fix bug with testEncodingSupportsASCII() algorithm when iconv() implementation
does not do the "right thing" with characters not supported in the output
set.
@@ -479,7 +522,7 @@ real release we decided to skip this version number.
! %Core.AggressivelyFixLt is on by default. This causes more sensible
processing of left angled brackets in smileys and other whatnot.
! Test scripts now have a 'type' parameter, which lets you say 'htmlpurifier',
'phpt', 'vtest', etc. in order to only execute those tests. This supercedes
'phpt', 'vtest', etc. in order to only execute those tests. This supersedes
the --only-phpt parameter, although for backwards-compatibility the flag
will still work.
! AutoParagraph auto-formatter will now preserve double-newlines upon output.
@@ -530,7 +573,7 @@ real release we decided to skip this version number.
. Added --quick option to multitest.php, which tests only the most recent
release for each series.
. Added --distro option to multitest.php, which accepts either 'normal' or
'standalone'. This supercedes --exclude-normal and --exclude-standalone
'standalone'. This supersedes --exclude-normal and --exclude-standalone
3.1.1, released 2008-06-19
# %URI.Munge now, by default, does not munge resources (for example, <img src="">)
@@ -680,7 +723,7 @@ real release we decided to skip this version number.
# HTMLPurifier->addFilter is deprecated; built-in filters can now be
enabled using %Filter.$filter_name or by setting your own filters using
%Filter.Custom
# Directive-level safety properties superceded in favor of module-level
# Directive-level safety properties superseded in favor of module-level
safety. Internal method HTMLModule->addElement() has changed, although
the externally visible HTMLDefinition->addElement has *not* changed.
! Extra utility classes for testing and non-library operations can
@@ -726,7 +769,7 @@ real release we decided to skip this version number.
. A couple of new historical maintenance scripts were added.
. HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php split into two files
. tests/index.php can now be run from any directory.
. HTMLPurifier_Token subclasses split into seperate files
. HTMLPurifier_Token subclasses split into separate files
. HTMLPURIFIER_PREFIX now is defined in Bootstrap.php, NOT HTMLPurifier.php
. HTMLPURIFIER_PREFIX can now be defined outside of HTML Purifier
. New --php=php flag added, allows PHP executable to be specified (command
@@ -792,7 +835,7 @@ real release we decided to skip this version number.
- Fix warning emitted when a non-supported URI scheme is passed to the
MakeAbsolute URIFilter, thanks NykO18 (again)
- Further refine AutoParagraph injector. Behavior inside of elements
allowing paragraph tags clarified: only inline content delimeted by
allowing paragraph tags clarified: only inline content delimited by
double newlines (not block elements) are paragraphed.
- Buggy treatment of end tags of elements that have required attributes
fixed (does not manifest on default tag-set)
@@ -838,7 +881,7 @@ real release we decided to skip this version number.
! CSS property border-spacing implemented
- Fix non-visible parsing error in DirectLex with empty tags that have
slashes inside attribute values.
- Fix typo in CSS definition: border-collapse:seperate; was incorrectly
- Fix typo in CSS definition: border-collapse:separate; was incorrectly
accepted as valid CSS. Usually non-visible, because this styling is the
default for tables in most browsers. Thanks Brett Zamir for pointing
this out.
@@ -889,7 +932,7 @@ real release we decided to skip this version number.
. HTMLPurifier_Config->getSerial() implemented, this is extremely useful
for output cache invalidation
. ConfigForm printer now can retrieve CSS and JS files as strings, in
case HTML Purifier's directory is not publically accessible
case HTML Purifier's directory is not publicly accessible
. Introduce new text/itext configuration directive values: these represent
longer strings that would be more appropriately edited with a textarea
. Allow newlines to act as separators for lists, hashes, lookups and

4
TODO
View File

@@ -41,7 +41,7 @@ FUTURE VERSIONS
- Config: Add examples to everything (make built-in which also automatically
gives output)
- Add "register" field to config schemas to eliminate dependence on
naming conventions (try to remember why we ultimately decided on tihs)
naming conventions (try to remember why we ultimately decided on this)
5.0 release [HTML 5]
# Swap out code to use html5lib tokenizer and tree-builder
@@ -112,7 +112,7 @@ Neat feature related
Also, enable disabling of directionality
? Externalize inline CSS to promote clean HTML, proposed by Sander Tekelenburg
? Remove redundant tags, ex. <u><u>Underlined</u></u>. Implementation notes:
1. Analyzing which tags to remove duplicants
1. Analyzing which tags to remove duplicates
2. Ensure attributes are merged into the parent tag
3. Extend the tag exclusion system to specify whether or not the
contents should be dropped or not (currently, there's code that could do

View File

@@ -1 +1 @@
4.15.0
4.18.0

View File

@@ -392,7 +392,7 @@ Women practicing non-martial T'ai Chi in <a href="/wiki/Chinatown_%28Manhattan%2
<li><a href="http://www.scheele.org/lee/tcclinks.html" class="external text" title="http://www.scheele.org/lee/tcclinks.html">Lee Scheele's Links to T'ai Chi Ch'uan Web Sites</a></li>
<li><a href="http://news.bbc.co.uk/1/hi/health/3543907.stm" class="external text" title="http://news.bbc.co.uk/1/hi/health/3543907.stm">BBC article</a></li>
<li><a href="http://www.acupuncturetoday.com/archives2004/jul/07taichi.html" class="external text" title="http://www.acupuncturetoday.com/archives2004/jul/07taichi.html">Tai Chi: Good for the Mind, Good for the Body</a></li>
<li><a href="http://www.taichiunion.com/" class="external text" title="http://www.taichiunion.com/">Tai Chi Chuan Union for Great Britian: The largest collective of independent Tai Chi Chuan Instructors in the British Isles</a></li>
<li><a href="http://www.taichiunion.com/" class="external text" title="http://www.taichiunion.com/">Tai Chi Chuan Union for Great Britain: The largest collective of independent Tai Chi Chuan Instructors in the British Isles</a></li>
</ul>

View File

@@ -13,7 +13,7 @@
}
],
"require": {
"php": "~5.6.0 || ~7.0.0 || ~7.1.0 || ~7.2.0 || ~7.3.0 || ~7.4.0 || ~8.0.0 || ~8.1.0 || ~8.2.0 || ~8.3.0"
"php": "~5.6.0 || ~7.0.0 || ~7.1.0 || ~7.2.0 || ~7.3.0 || ~7.4.0 || ~8.0.0 || ~8.1.0 || ~8.2.0 || ~8.3.0 || ~8.4.0 || ~8.5.0"
},
"require-dev": {
"cerdic/css-tidy": "^1.7 || ^2.0",

View File

@@ -16,7 +16,7 @@ TODO:
*/
if (version_compare(PHP_VERSION, '5.2', '<')) exit('PHP 5.2+ required.');
error_reporting(E_ALL | E_STRICT);
error_reporting(E_ALL);
// load dual-libraries
require_once dirname(__FILE__) . '/../extras/HTMLPurifierExtras.auto.php';

View File

@@ -6,7 +6,7 @@
</file>
<file name="HTMLPurifier/Lexer.php">
<line>90</line>
<line>331</line>
<line>315</line>
</file>
<file name="HTMLPurifier/Lexer/DirectLex.php">
<line>67</line>
@@ -19,37 +19,37 @@
</directive>
<directive id="CSS.MaxImgLength">
<file name="HTMLPurifier/CSSDefinition.php">
<line>256</line>
<line>253</line>
</file>
</directive>
<directive id="CSS.Proprietary">
<file name="HTMLPurifier/CSSDefinition.php">
<line>381</line>
<line>397</line>
</file>
</directive>
<directive id="CSS.AllowTricky">
<file name="HTMLPurifier/CSSDefinition.php">
<line>385</line>
<line>401</line>
</file>
</directive>
<directive id="CSS.Trusted">
<file name="HTMLPurifier/CSSDefinition.php">
<line>389</line>
<line>405</line>
</file>
</directive>
<directive id="CSS.AllowImportant">
<file name="HTMLPurifier/CSSDefinition.php">
<line>393</line>
<line>409</line>
</file>
</directive>
<directive id="CSS.AllowedProperties">
<file name="HTMLPurifier/CSSDefinition.php">
<line>522</line>
<line>538</line>
</file>
</directive>
<directive id="CSS.ForbiddenProperties">
<file name="HTMLPurifier/CSSDefinition.php">
<line>538</line>
<line>554</line>
</file>
</directive>
<directive id="Cache.DefinitionImpl">
@@ -124,7 +124,7 @@
<line>122</line>
</file>
<file name="HTMLPurifier/Lexer.php">
<line>313</line>
<line>299</line>
</file>
</directive>
<directive id="Output.Newline">
@@ -172,12 +172,15 @@
<line>234</line>
</file>
<file name="HTMLPurifier/Lexer.php">
<line>318</line>
<line>358</line>
<line>304</line>
<line>342</line>
</file>
<file name="HTMLPurifier/AttrDef/HTML/ContentEditable.php">
<line>8</line>
</file>
<file name="HTMLPurifier/HTMLModule/Iframe.php">
<line>43</line>
</file>
<file name="HTMLPurifier/HTMLModule/Image.php">
<line>37</line>
</file>
@@ -267,22 +270,22 @@
<directive id="Core.LegacyEntityDecoder">
<file name="HTMLPurifier/Lexer.php">
<line>220</line>
<line>342</line>
<line>326</line>
</file>
</directive>
<directive id="Core.ConvertDocumentToFragment">
<file name="HTMLPurifier/Lexer.php">
<line>329</line>
<line>313</line>
</file>
</directive>
<directive id="Core.RemoveProcessingInstructions">
<file name="HTMLPurifier/Lexer.php">
<line>352</line>
<line>336</line>
</file>
</directive>
<directive id="Core.HiddenElements">
<file name="HTMLPurifier/Lexer.php">
<line>356</line>
<line>340</line>
</file>
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>36</line>
@@ -290,12 +293,12 @@
</directive>
<directive id="Core.AggressivelyRemoveScript">
<file name="HTMLPurifier/Lexer.php">
<line>357</line>
<line>341</line>
</file>
</directive>
<directive id="Core.RemoveScriptContents">
<file name="HTMLPurifier/Lexer.php">
<line>358</line>
<line>342</line>
</file>
<file name="HTMLPurifier/Strategy/RemoveForeignElements.php">
<line>35</line>
@@ -357,7 +360,7 @@
</directive>
<directive id="CSS.AllowedFonts">
<file name="HTMLPurifier/AttrDef/CSS/FontFamily.php">
<line>64</line>
<line>62</line>
</file>
</directive>
<directive id="Attr.AllowedClasses">
@@ -408,12 +411,12 @@
</directive>
<directive id="Core.AllowHostnameUnderscore">
<file name="HTMLPurifier/AttrDef/URI/Host.php">
<line>77</line>
<line>71</line>
</file>
</directive>
<directive id="Core.EnableIDNA">
<file name="HTMLPurifier/AttrDef/URI/Host.php">
<line>109</line>
<line>103</line>
</file>
</directive>
<directive id="Attr.DefaultTextDir">
@@ -470,17 +473,17 @@
</directive>
<directive id="Filter.ExtractStyleBlocks.TidyImpl">
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
<line>94</line>
<line>106</line>
</file>
</directive>
<directive id="Filter.ExtractStyleBlocks.Scope">
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
<line>125</line>
<line>137</line>
</file>
</directive>
<directive id="Filter.ExtractStyleBlocks.Escaping">
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
<line>330</line>
<line>351</line>
</file>
</directive>
<directive id="HTML.Forms">
@@ -552,6 +555,11 @@
<line>72</line>
</file>
</directive>
<directive id="Core.RemoveBlanks">
<file name="HTMLPurifier/Lexer/DOMLex.php">
<line>75</line>
</file>
</directive>
<directive id="Core.DirectLexLineNumberSyncInterval">
<file name="HTMLPurifier/Lexer/DirectLex.php">
<line>84</line>
@@ -600,4 +608,9 @@
<line>35</line>
</file>
</directive>
<directive id="URI.SafeIframeHosts">
<file name="HTMLPurifier/URIFilter/SafeIframe.php">
<line>67</line>
</file>
</directive>
</usage>

9
docker-compose.yaml Normal file
View File

@@ -0,0 +1,9 @@
services:
htmlpurifier:
build:
context: "."
dockerfile: Dockerfile
container_name: 'htmlpurifier'
tty: true
volumes:
- .:/opt/htmlpurifier

View File

@@ -49,7 +49,7 @@ the properties:
AllowedFrameTargets -> heavily <a> specific, but also used by <area>
and <form>. Transitional DTD %FrameTarget, not present in strict,
HTML5 calls them "browsing contexts"
Default*Image* -> as a default parameter, is almost entirely exlcusive
Default*Image* -> as a default parameter, is almost entirely exclusive
to <img>
EnableID -> global attribute
Name.UseCDATA -> heavily <a> specific, but has heavy other usage by

View File

@@ -122,7 +122,7 @@ thead th {text-align:left;padding:0.1em;background-color:#EEE;}
<tbody>
<tr><th colspan="2">Table</th></tr>
<tr class="impl-yes"><td>border-collapse</td><td>ENUM(collapse, seperate)</td></tr>
<tr class="impl-yes"><td>border-collapse</td><td>ENUM(collapse, separate)</td></tr>
<tr class="impl-yes"><td>border-space</td><td>MULTIPLE</td></tr>
<tr class="impl-yes"><td>caption-side</td><td>ENUM(top, bottom)</td></tr>
<tr class="feature"><td>empty-cells</td><td>ENUM(show, hide), No IE support makes this useless,

View File

@@ -163,7 +163,7 @@ smoketest</a>.</p>
<p>So you want HTML Purifier to clean up your HTML, but you're not
so happy about the br@clear implementation. That's perfectly fine!
HTML Purifier will make accomodations:</p>
HTML Purifier will make accommodations:</p>
<pre>$config-&gt;set('HTML.Doctype', 'XHTML 1.0 Transitional');
$config-&gt;set('HTML.TidyLevel', 'heavy'); // all changes, minus...

View File

@@ -60,7 +60,7 @@ These are special use tags, they should be enabled on a blanket basis.
Lists - dd, dl, dt, li, ol, ul ~ menu, dir
Tables - caption, table, td, th, tr / col, colgroup, tbody, tfoot, thead
Forms - fieldset, form, input, lable, legend, optgroup, option, select, textarea
Forms - fieldset, form, input, label, legend, optgroup, option, select, textarea
XSS - noscript, object, script ~ applet
Meta - base, basefont, body, head, html, link, meta, style, title
Frames - frame, frameset, iframe
@@ -91,7 +91,7 @@ attribute and put URI filtering higher up on the priority list.
== Attribute Risk Analysis ==
We actually have a suprisingly small assortment of allowed attributes (the
We actually have a surprisingly small assortment of allowed attributes (the
rest are deprecated in strict, and thus we opted not to allow them, even
though our output is XHTML Transitional by default.)

View File

@@ -70,7 +70,7 @@ Backfills/Data integrity:
Type systems:
- Flags: ReadOnly, Permanent, DontEnum
- Typed properties isn't that useful [It's also Not-PHP]
- Seperate meta-list of directive properties IS useful
- Separate meta-list of directive properties IS useful
- Duck typing is useful for systems designed fully around properties pattern
Trade-off:

View File

@@ -7,7 +7,7 @@
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
* FILE, changes will be overwritten the next time the script is run.
*
* @version 4.15.0
* @version 4.18.0
*
* @warning
* You must *not* include any other HTML Purifier files before this file,
@@ -101,6 +101,7 @@ require 'HTMLPurifier/AttrDef/CSS/Length.php';
require 'HTMLPurifier/AttrDef/CSS/ListStyle.php';
require 'HTMLPurifier/AttrDef/CSS/Multiple.php';
require 'HTMLPurifier/AttrDef/CSS/Percentage.php';
require 'HTMLPurifier/AttrDef/CSS/Ratio.php';
require 'HTMLPurifier/AttrDef/CSS/TextDecoration.php';
require 'HTMLPurifier/AttrDef/CSS/URI.php';
require 'HTMLPurifier/AttrDef/HTML/Bool.php';

View File

@@ -19,7 +19,7 @@
*/
/*
HTML Purifier 4.15.0 - Standards Compliant HTML Filtering
HTML Purifier 4.18.0 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or
@@ -58,12 +58,12 @@ class HTMLPurifier
* Version of HTML Purifier.
* @type string
*/
public $version = '4.15.0';
public $version = '4.18.0';
/**
* Constant with version of HTML Purifier.
*/
const VERSION = '4.15.0';
const VERSION = '4.18.0';
/**
* Global configuration object.

View File

@@ -95,6 +95,7 @@ require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Length.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/ListStyle.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Multiple.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Percentage.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/Ratio.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/TextDecoration.php';
require_once $__dir . '/HTMLPurifier/AttrDef/CSS/URI.php';
require_once $__dir . '/HTMLPurifier/AttrDef/HTML/Bool.php';

View File

@@ -27,6 +27,13 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
$definition = $config->getCSSDefinition();
$allow_duplicates = $config->get("CSS.AllowDuplicates");
$universal_attrdef = new HTMLPurifier_AttrDef_Enum(
array(
'initial',
'inherit',
'unset',
)
);
// According to the CSS2.1 spec, the places where a
// non-delimiting semicolon can appear are in strings
@@ -96,16 +103,13 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
if (!$ok) {
continue;
}
// inefficient call, since the validator will do this again
if (strtolower(trim($value)) !== 'inherit') {
// inherit works for everything (but only on the base property)
$result = $universal_attrdef->validate($value, $config, $context);
if ($result === false) {
$result = $definition->info[$property]->validate(
$value,
$config,
$context
);
} else {
$result = 'inherit';
}
if ($result === false) {
continue;

View File

@@ -195,7 +195,7 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
// transforms don't pose a security risk (as \\ and \"
// might--these escapes are not supported by most browsers).
// We could try to be clever and use single-quote wrapping
// when there is a double quote present, but I have choosen
// when there is a double quote present, but I have chosen
// not to implement that. (NOTE: you can reduce the amount
// of escapes by one depending on what quoting style you use)
// $font = str_replace('\\', '\\5C ', $font);

View File

@@ -0,0 +1,46 @@
<?php
/**
* Validates a ratio as defined by the CSS spec.
*/
class HTMLPurifier_AttrDef_CSS_Ratio extends HTMLPurifier_AttrDef
{
/**
* @param string $ratio Ratio to validate
* @param HTMLPurifier_Config $config Configuration options
* @param HTMLPurifier_Context $context Context
*
* @return string|boolean
*
* @warning Some contexts do not pass $config, $context. These
* variables should not be used without checking HTMLPurifier_Length
*/
public function validate($ratio, $config, $context)
{
$ratio = $this->parseCDATA($ratio);
$parts = explode('/', $ratio, 2);
$length = count($parts);
if ($length < 1 || $length > 2) {
return false;
}
$num = new \HTMLPurifier_AttrDef_CSS_Number();
if ($length === 1) {
return $num->validate($parts[0], $config, $context);
}
$num1 = $num->validate($parts[0], $config, $context);
$num2 = $num->validate($parts[1], $config, $context);
if ($num1 === false || $num2 === false) {
return false;
}
return $num1 . '/' . $num2;
}
}
// vim: et sw=4 sts=4

View File

@@ -25,12 +25,7 @@ class HTMLPurifier_AttrDef_HTML_LinkTypes extends HTMLPurifier_AttrDef
'rev' => 'AllowedRev'
);
if (!isset($configLookup[$name])) {
trigger_error(
'Unrecognized attribute name for link ' .
'relationship.',
E_USER_ERROR
);
return;
throw new Exception('Unrecognized attribute name for link relationship.');
}
$this->name = $configLookup[$name];
}

View File

@@ -63,24 +63,18 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
// This doesn't match I18N domain names, but we don't have proper IRI support,
// so force users to insert Punycode.
// There is not a good sense in which underscores should be
// allowed, since it's technically not! (And if you go as
// far to allow everything as specified by the DNS spec...
// well, that's literally everything, modulo some space limits
// for the components and the overall name (which, by the way,
// we are NOT checking!). So we (arbitrarily) decide this:
// let's allow underscores wherever we would have allowed
// hyphens, if they are enabled. This is a pretty good match
// for browser behavior, for example, a large number of browsers
// cannot handle foo_.example.com, but foo_bar.example.com is
// fairly well supported.
// Underscores defined as Unreserved Characters in RFC 3986 are
// allowed in a URI. There are cases where we want to consider a
// URI containing "_" such as "_dmarc.example.com".
// Underscores are not allowed in the default. If you want to
// allow it, set Core.AllowHostnameUnderscore to true.
$underscore = $config->get('Core.AllowHostnameUnderscore') ? '_' : '';
// Based off of RFC 1738, but amended so that
// as per RFC 3696, the top label need only not be all numeric.
// The productions describing this are:
$a = '[a-z]'; // alpha
$an = '[a-z0-9]'; // alphanum
$an = "[a-z0-9$underscore]"; // alphanum
$and = "[a-z0-9-$underscore]"; // alphanum | "-"
// domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
$domainlabel = "$an(?:$and*$an)?";

View File

@@ -37,7 +37,7 @@ class HTMLPurifier_AttrDef_URI_IPv6 extends HTMLPurifier_AttrDef_URI_IPv4
}
}
// IPv4-compatiblity check
// IPv4-compatibility check
if (preg_match('#(?<=:' . ')' . $this->ip4 . '$#s', $aIP, $find)) {
$aIP = substr($aIP, 0, 0 - strlen($find[0]));
$ip = explode('.', $find[0]);

View File

@@ -3,7 +3,7 @@
// this MUST be placed in post, as it assumes that any value in dir is valid
/**
* Post-trasnform that ensures that bdo tags have the dir attribute set.
* Post-transform that ensures that bdo tags have the dir attribute set.
*/
class HTMLPurifier_AttrTransform_BdoDir extends HTMLPurifier_AttrTransform
{

View File

@@ -77,7 +77,7 @@ class HTMLPurifier_AttrTypes
}
if (!isset($this->info[$type])) {
trigger_error('Cannot retrieve undefined attribute type ' . $type, E_USER_ERROR);
throw new Exception('Cannot retrieve undefined attribute type ' . $type);
return;
}
return $this->info[$type]->make($string);

View File

@@ -135,7 +135,7 @@ class HTMLPurifier_AttrValidator
// we'd also want slightly more complicated substitution
// involving an array as the return value,
// although we're not sure how colliding attributes would
// resolve (certain ones would be completely overriden,
// resolve (certain ones would be completely overridden,
// others would prepend themselves).
}

View File

@@ -5,7 +5,7 @@ if (!defined('HTMLPURIFIER_PREFIX')) {
define('HTMLPURIFIER_PREFIX', realpath(dirname(__FILE__) . '/..'));
}
// accomodations for versions earlier than 5.0.2
// accommodations for versions earlier than 5.0.2
// borrowed from PHP_Compat, LGPL licensed, by Aidan Lister <aidan@php.net>
if (!defined('PHP_EOL')) {
switch (strtoupper(substr(PHP_OS, 0, 3))) {

View File

@@ -26,6 +26,11 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
false
);
$this->info['direction'] = new HTMLPurifier_AttrDef_Enum(
['ltr', 'rtl'],
false
);
$border_style =
$this->info['border-bottom-style'] =
$this->info['border-right-style'] =
@@ -116,8 +121,6 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
'auto',
'cover',
'contain',
'initial',
'inherit',
]
),
new HTMLPurifier_AttrDef_CSS_Percentage(),
@@ -236,21 +239,20 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
[
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true),
new HTMLPurifier_AttrDef_Enum(['auto', 'initial', 'inherit'])
new HTMLPurifier_AttrDef_Enum(['auto'])
]
);
$trusted_min_wh = new HTMLPurifier_AttrDef_CSS_Composite(
[
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true),
new HTMLPurifier_AttrDef_Enum(['initial', 'inherit'])
]
);
$trusted_max_wh = new HTMLPurifier_AttrDef_CSS_Composite(
[
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true),
new HTMLPurifier_AttrDef_Enum(['none', 'initial', 'inherit'])
new HTMLPurifier_AttrDef_Enum(['none'])
]
);
$max = $config->get('CSS.MaxImgLength');
@@ -278,12 +280,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
new HTMLPurifier_AttrDef_Switch(
'img',
// For img tags:
new HTMLPurifier_AttrDef_CSS_Composite(
[
new HTMLPurifier_AttrDef_CSS_Length('0', $max),
new HTMLPurifier_AttrDef_Enum(['initial', 'inherit'])
]
),
new HTMLPurifier_AttrDef_CSS_Length('0', $max),
// For everyone else:
$trusted_min_wh
);
@@ -297,22 +294,29 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
new HTMLPurifier_AttrDef_CSS_Composite(
[
new HTMLPurifier_AttrDef_CSS_Length('0', $max),
new HTMLPurifier_AttrDef_Enum(['none', 'initial', 'inherit'])
new HTMLPurifier_AttrDef_Enum(['none'])
]
),
// For everyone else:
$trusted_max_wh
);
$this->info['aspect-ratio'] = new HTMLPurifier_AttrDef_CSS_Multiple(
new HTMLPurifier_AttrDef_CSS_Composite([
new HTMLPurifier_AttrDef_CSS_Ratio(),
new HTMLPurifier_AttrDef_Enum(['auto']),
])
);
// text-decoration and related shorthands
$this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();
$this->info['text-decoration-line'] = new HTMLPurifier_AttrDef_Enum(
['none', 'underline', 'overline', 'line-through', 'initial', 'inherit']
['none', 'underline', 'overline', 'line-through']
);
$this->info['text-decoration-style'] = new HTMLPurifier_AttrDef_Enum(
['solid', 'double', 'dotted', 'dashed', 'wavy', 'initial', 'inherit']
['solid', 'double', 'dotted', 'dashed', 'wavy']
);
$this->info['text-decoration-color'] = new HTMLPurifier_AttrDef_CSS_Color();
@@ -320,7 +324,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
$this->info['text-decoration-thickness'] = new HTMLPurifier_AttrDef_CSS_Composite([
new HTMLPurifier_AttrDef_CSS_Length(),
new HTMLPurifier_AttrDef_CSS_Percentage(),
new HTMLPurifier_AttrDef_Enum(['auto', 'from-font', 'initial', 'inherit'])
new HTMLPurifier_AttrDef_Enum(['auto', 'from-font'])
]);
$this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily();

View File

@@ -190,6 +190,9 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
$current_tr_tbody = null;
foreach($content as $node) {
if (!isset($node->name)) {
continue;
}
switch ($node->name) {
case 'tbody':
$current_tr_tbody = null;

View File

@@ -21,7 +21,7 @@ class HTMLPurifier_Config
* HTML Purifier's version
* @type string
*/
public $version = '4.15.0';
public $version = '4.18.0';
/**
* Whether or not to automatically finalize
@@ -898,7 +898,11 @@ class HTMLPurifier_Config
break;
}
}
trigger_error($msg . $extra, $no);
if ($no == E_USER_ERROR) {
throw new Exception($msg . $extra);
} else {
trigger_error($msg . $extra, $no);
}
}
/**

View File

@@ -72,7 +72,7 @@ class HTMLPurifier_ConfigSchema
$r = unserialize($contents);
if (!$r) {
$hash = sha1($contents);
trigger_error("Unserialization of configuration schema failed, sha1 of file was $hash", E_USER_ERROR);
throw new Exception("Unserialization of configuration schema failed, sha1 of file was $hash");
}
return $r;
}

View File

@@ -66,7 +66,7 @@ class HTMLPurifier_ConfigSchema_Interchange_Directive
public $version;
/**
* ID of directive that supercedes this old directive.
* ID of directive that supersedes this old directive.
* Null if not deprecated.
* @type HTMLPurifier_ConfigSchema_Interchange_Id
*/

File diff suppressed because one or more lines are too long

View File

@@ -5,10 +5,10 @@ DEFAULT: ''
--DESCRIPTION--
Temporary prefix for IDs used in conjunction with %Attr.IDPrefix. If you
need to allow multiple sets of user content on web page, you may need to
have a seperate prefix that changes with each iteration. This way,
seperately submitted user content displayed on the same page doesn't
have a separate prefix that changes with each iteration. This way,
separately submitted user content displayed on the same page doesn't
clobber each other. Ideal values are unique identifiers for the content it
represents (i.e. the id of the row in the database). Be sure to add a
seperator (like an underscore) at the end. Warning: this directive will
separator (like an underscore) at the end. Warning: this directive will
not work unless %Attr.IDPrefix is set to a non-empty value!
--# vim: et sw=4 sts=4

View File

@@ -1,6 +1,6 @@
CSS.MaxImgLength
TYPE: string/null
DEFAULT: '1200px'
DEFAULT: null
VERSION: 3.1.1
--DESCRIPTION--
<p>

View File

@@ -7,7 +7,8 @@ This parameter determines whether or not the filter should convert
input that is a full document with html and body tags to a fragment
of just the contents of a body tag. This parameter is simply something
HTML Purifier can do during an edge-case: for most inputs, this
processing is not necessary.
processing is not necessary. Warning: Full HTML purification has not
been implemented. See GitHub issue #7.
--ALIASES--
Core.AcceptFullDocuments

View File

@@ -8,6 +8,6 @@ converting all non-ASCII characters into decimal numeric entities before
converting it to its native encoding. This means that even characters that
can be expressed in the non-UTF-8 encoding will be entity-ized, which can
be a real downer for encodings like Big5. It also assumes that the ASCII
repetoire is available, although this is the case for almost all encodings.
repertoire is available, although this is the case for almost all encodings.
Anyway, use UTF-8!
--# vim: et sw=4 sts=4

View File

@@ -16,7 +16,7 @@ DEFAULT: NULL
</dd>
<dt><em>string</em> lexer identifier</dt>
<dd>
This is a slim way of manually overridding the implementation.
This is a slim way of manually overriding the implementation.
Currently recognized values are: DOMLex (the default PHP5
implementation)
and DirectLex (the default PHP4 implementation). Only use this if

View File

@@ -0,0 +1,10 @@
Core.RemoveBlanks
TYPE: bool
DEFAULT: false
VERSION: 4.18
--DESCRIPTION--
<p>
If set to true, blank nodes will be removed. This can be useful for maintaining
backwards compatibility when upgrading from previous versions of PHP.
</p>
--# vim: et sw=4 sts=4

View File

@@ -1,6 +1,6 @@
HTML.MaxImgLength
TYPE: int/null
DEFAULT: 1200
DEFAULT: null
VERSION: 3.1.1
--DESCRIPTION--
<p>

View File

@@ -6,7 +6,7 @@ DEFAULT: false
<p>
Whether or not to permit iframe tags in untrusted documents. This
directive must be accompanied by a whitelist of permitted iframes,
such as %URI.SafeIframeRegexp, otherwise it will fatally error.
such as %URI.SafeIframeRegexp or %URI.SafeIframeHosts, otherwise it will fatally error.
This directive has no effect on strict doctypes, as iframes are not
valid.
</p>

View File

@@ -0,0 +1,7 @@
URI.AllowedSymbols
TYPE: string/null
DEFAULT: '!$&\'()*+,;='
--DESCRIPTION--
If a system permits templated URLs, then the URI encoder may need extra
hints about which symbols to preserve.
--# vim: et sw=4 sts=4

View File

@@ -0,0 +1,14 @@
URI.SafeIframeHosts
TYPE: lookup/null
DEFAULT: null
--DESCRIPTION--
<p>
A whitelist which indicates what explicit hosts should be
allowed to embed iframe. See also %HTML.SafeIframeRegexp,
it has precedence over this config. Here are some example values:
</p>
<ul>
<li><code>www.youtube.com</code> - Allow YouTube videos</li>
<li><code>maps.google.com</code> - Allow Embedding a Google map</li>
</ul>
--# vim: et sw=4 sts=4

View File

@@ -142,12 +142,11 @@ class HTMLPurifier_ContentSets
if ($return !== false) {
return $return;
}
// error-out
trigger_error(
throw new Exception(
'Could not determine which ChildDef class to instantiate',
E_USER_ERROR
);
return false;
}
/**

View File

@@ -24,11 +24,7 @@ class HTMLPurifier_Context
public function register($name, &$ref)
{
if (array_key_exists($name, $this->_storage)) {
trigger_error(
"Name $name produces collision, cannot re-register",
E_USER_ERROR
);
return;
throw new Exception("Name $name produces collision, cannot re-register");
}
$this->_storage[$name] =& $ref;
}
@@ -43,10 +39,7 @@ class HTMLPurifier_Context
{
if (!array_key_exists($name, $this->_storage)) {
if (!$ignore_error) {
trigger_error(
"Attempted to retrieve non-existent variable $name",
E_USER_ERROR
);
throw new Exception("Attempted to retrieve non-existent variable $name");
}
$var = null; // so we can return by reference
return $var;
@@ -61,11 +54,7 @@ class HTMLPurifier_Context
public function destroy($name)
{
if (!array_key_exists($name, $this->_storage)) {
trigger_error(
"Attempted to destroy non-existent variable $name",
E_USER_ERROR
);
return;
throw new Exception("Attempted to destroy non-existent variable $name");
}
unset($this->_storage[$name]);
}

View File

@@ -139,8 +139,9 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
continue;
}
$key = substr($filename, 0, strlen($filename) - 4);
if ($this->isOld($key, $config)) {
unlink($dir . '/' . $filename);
$file = $dir . '/' . $filename;
if ($this->isOld($key, $config) && file_exists($file)) {
unlink($file);
}
}
closedir($dh);

View File

@@ -86,7 +86,7 @@ class HTMLPurifier_DoctypeRegistry
$doctype = $this->aliases[$doctype];
}
if (!isset($this->doctypes[$doctype])) {
trigger_error('Doctype ' . htmlspecialchars($doctype) . ' does not exist', E_USER_ERROR);
throw new Exception('Doctype ' . htmlspecialchars($doctype) . ' does not exist');
$anon = new HTMLPurifier_Doctype($doctype);
return $anon;
}

View File

@@ -12,7 +12,7 @@ class HTMLPurifier_Encoder
*/
private function __construct()
{
trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR);
throw new Exception('Cannot instantiate encoder, call methods statically');
}
/**
@@ -390,7 +390,7 @@ class HTMLPurifier_Encoder
$str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
if ($str === false) {
// $encoding is not a valid encoding
trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
throw new Exception('Invalid encoding ' . $encoding);
return '';
}
// If the string is bjorked by Shift_JIS or a similar encoding
@@ -404,12 +404,11 @@ class HTMLPurifier_Encoder
}
$bug = HTMLPurifier_Encoder::testIconvTruncateBug();
if ($bug == self::ICONV_OK) {
trigger_error('Encoding not supported, please install iconv', E_USER_ERROR);
throw new Exception('Encoding not supported, please install iconv');
} else {
trigger_error(
throw new Exception(
'You have a buggy version of iconv, see https://bugs.php.net/bug.php?id=48147 ' .
'and http://sourceware.org/bugzilla/show_bug.cgi?id=13541',
E_USER_ERROR
'and http://sourceware.org/bugzilla/show_bug.cgi?id=13541'
);
}
}
@@ -454,7 +453,7 @@ class HTMLPurifier_Encoder
$str = mb_convert_encoding($str, 'ISO-8859-1', 'UTF-8');
return $str;
}
trigger_error('Encoding not supported', E_USER_ERROR);
throw new Exception('Encoding not supported');
// You might be tempted to assume that the ASCII representation
// might be OK, however, this is *not* universally true over all
// encodings. So we take the conservative route here, rather
@@ -545,10 +544,9 @@ class HTMLPurifier_Encoder
} elseif (($c = strlen($r)) < 9000) {
$code = self::ICONV_TRUNCATES;
} elseif ($c > 9000) {
trigger_error(
throw new Exception(
'Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: ' .
'include your iconv version as per phpversion()',
E_USER_ERROR
'include your iconv version as per phpversion()'
);
} else {
$code = self::ICONV_OK;

View File

@@ -5,7 +5,7 @@
// $config or $context to the callback functions.
/**
* Handles referencing and derefencing character entities
* Handles referencing and dereferencing character entities
*/
class HTMLPurifier_EntityParser
{
@@ -116,8 +116,8 @@ class HTMLPurifier_EntityParser
protected function entityCallback($matches)
{
$entity = $matches[0];
$hex_part = @$matches[1];
$dec_part = @$matches[2];
$hex_part = isset($matches[1]) ? $matches[1] : null;
$dec_part = isset($matches[2]) ? $matches[2] : null;
$named_part = empty($matches[3]) ? (empty($matches[4]) ? "" : $matches[4]) : $matches[3];
if ($hex_part !== NULL && $hex_part !== "") {
return HTMLPurifier_Encoder::unichr(hexdec($hex_part));

View File

@@ -4,7 +4,7 @@
* Represents a pre or post processing filter on HTML Purifier's output
*
* Sometimes, a little ad-hoc fixing of HTML has to be done before
* it gets sent through HTML Purifier: you can use filters to acheive
* it gets sent through HTML Purifier: you can use filters to achieve
* this effect. For instance, YouTube videos can be preserved using
* this manner. You could have used a decorator for this task, but
* PHP's support for them is not terribly robust, so we're going

View File

@@ -54,6 +54,11 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
*/
private $_enum_attrdef;
/**
* @type HTMLPurifier_AttrDef_Enum
*/
private $_universal_attrdef;
public function __construct()
{
$this->_tidy = new csstidy();
@@ -70,6 +75,13 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
'focus'
)
);
$this->_universal_attrdef = new HTMLPurifier_AttrDef_Enum(
array(
'initial',
'inherit',
'unset',
)
);
}
/**
@@ -307,6 +319,11 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
unset($style[$name]);
continue;
}
$uni_ret = $this->_universal_attrdef->validate($value, $config, $context);
if ($uni_ret !== false) {
$style[$name] = $uni_ret;
continue;
}
$def = $css_definition->info[$name];
$ret = $def->validate($value, $config, $context);
if ($ret === false) {

View File

@@ -19,7 +19,7 @@ class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter
$pre_regex = '#<object[^>]+>.+?' .
'(?:http:)?//www.youtube.com/((?:v|cp)/[A-Za-z0-9\-_=]+).+?</object>#s';
$pre_replace = '<span class="youtube-embed">\1</span>';
return preg_replace($pre_regex, $pre_replace, $html);
return preg_replace($pre_regex, $pre_replace, (string)$html);
}
/**
@@ -31,7 +31,7 @@ class HTMLPurifier_Filter_YouTube extends HTMLPurifier_Filter
public function postFilter($html, $config, $context)
{
$post_regex = '#<span class="youtube-embed">((?:v|cp)/[A-Za-z0-9\-_=]+)</span>#';
return preg_replace_callback($post_regex, array($this, 'postFilterCallback'), $html);
return preg_replace_callback($post_regex, array($this, 'postFilterCallback'), (string)$html);
}
/**

View File

@@ -244,7 +244,7 @@ class HTMLPurifier_Generator
// whitespace (in fact, most don't, at least for attributes
// like alt, but an extra space at the end is barely
// noticeable). Still, we have a configuration knob for
// this, since this transformation is not necesary if you
// this, since this transformation is not necessary if you
// don't process user input with innerHTML or you don't plan
// on supporting Internet Explorer.
if ($this->_innerHTMLFix) {

View File

@@ -264,9 +264,8 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
if (isset($this->info_content_sets['Block'][$block_wrapper])) {
$this->info_block_wrapper = $block_wrapper;
} else {
trigger_error(
'Cannot use non-block element as block wrapper',
E_USER_ERROR
throw new Exception(
'Cannot use non-block element as block wrapper'
);
}
@@ -276,11 +275,7 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
$this->info_parent = $parent;
$this->info_parent_def = $def;
} else {
trigger_error(
'Cannot use unrecognized element as parent',
E_USER_ERROR
);
$this->info_parent_def = $this->manager->getElement($this->info_parent, true);
throw new Exception('Cannot use unrecognized element as parent');
}
// support template text

View File

@@ -28,7 +28,7 @@ class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule
// HTML 4.01 specifies that ins/del must not contain block
// elements when used in an inline context, chameleon is
// a complicated workaround to acheive this effect
// a complicated workaround to achieve this effect
// Inline context ! Block context (exclamation mark is
// separator, see getChildDef for parsing)

View File

@@ -28,22 +28,28 @@ class HTMLPurifier_HTMLModule_Iframe extends HTMLPurifier_HTMLModule
if ($config->get('HTML.SafeIframe')) {
$this->safe = true;
}
$attrs = array(
'src' => 'URI#embedded',
'width' => 'Length',
'height' => 'Length',
'name' => 'ID',
'scrolling' => 'Enum#yes,no,auto',
'frameborder' => 'Enum#0,1',
'longdesc' => 'URI',
'marginheight' => 'Pixels',
'marginwidth' => 'Pixels',
);
if ($config->get('HTML.Trusted')) {
$attrs['allowfullscreen'] = 'Bool#allowfullscreen';
}
$this->addElement(
'iframe',
'Inline',
'Flow',
'Common',
array(
'src' => 'URI#embedded',
'width' => 'Length',
'height' => 'Length',
'name' => 'ID',
'scrolling' => 'Enum#yes,no,auto',
'frameborder' => 'Enum#0,1',
'longdesc' => 'URI',
'marginheight' => 'Pixels',
'marginwidth' => 'Pixels',
)
$attrs
);
}
}

View File

@@ -2,7 +2,7 @@
/**
* XHTML 1.1 Ruby Annotation Module, defines elements that indicate
* short runs of text alongside base text for annotation or pronounciation.
* short runs of text alongside base text for annotation or pronunciation.
*/
class HTMLPurifier_HTMLModule_Ruby extends HTMLPurifier_HTMLModule
{

View File

@@ -112,9 +112,8 @@ class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
return;
}
if (!isset($this->fixesForLevel[$this->defaultLevel])) {
trigger_error(
'Default level ' . $this->defaultLevel . ' does not exist',
E_USER_ERROR
throw new Exception(
'Default level ' . $this->defaultLevel . ' does not exist'
);
return;
}
@@ -162,8 +161,7 @@ class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
$e->$type = $fix;
break;
default:
trigger_error("Fix type $type not supported", E_USER_ERROR);
break;
throw new Exception("Fix type $type not supported");
}
}
}

View File

@@ -1,7 +1,7 @@
<?php
/**
* Name is deprecated, but allowed in strict doctypes, so onl
* Name is deprecated, but allowed in strict doctypes, so only
*/
class HTMLPurifier_HTMLModule_Tidy_Name extends HTMLPurifier_HTMLModule_Tidy
{

View File

@@ -183,11 +183,7 @@ class HTMLPurifier_HTMLModuleManager
if (!$ok) {
$module = $original_module;
if (!class_exists($module)) {
trigger_error(
$original_module . ' module does not exist',
E_USER_ERROR
);
return;
throw new Exception($original_module . ' module does not exist');
}
}
$module = new $module();

View File

@@ -173,14 +173,8 @@ class HTMLPurifier_LanguageFactory
// infinite recursion guard
if (isset($languages_seen[$code])) {
trigger_error(
'Circular fallback reference in language ' .
$code,
E_USER_ERROR
);
$fallback = 'en';
throw new Exception('Circular fallback reference in language ' . $code);
}
$language_seen[$code] = true;
// load the fallback recursively
$this->loadLanguage($fallback);

View File

@@ -238,7 +238,7 @@ class HTMLPurifier_Lexer
*/
public function tokenizeHTML($string, $config, $context)
{
trigger_error('Call to abstract class', E_USER_ERROR);
throw new Exception('Call to abstract class');
}
/**
@@ -269,20 +269,6 @@ class HTMLPurifier_Lexer
);
}
/**
* Special Internet Explorer conditional comments should be removed.
* @param string $string HTML string to process.
* @return string HTML with conditional comments removed.
*/
protected static function removeIEConditional($string)
{
return preg_replace(
'#<!--\[if [^>]+\]>.*?<!\[endif\]-->#si', // probably should generalize for all strings
'',
$string
);
}
/**
* Callback function for escapeCDATA() that does the work.
*
@@ -323,8 +309,6 @@ class HTMLPurifier_Lexer
// escape CDATA
$html = $this->escapeCDATA($html);
$html = $this->removeIEConditional($html);
// extract body from document if applicable
if ($config->get('Core.ConvertDocumentToFragment')) {
$e = false;

View File

@@ -52,14 +52,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
// attempt to armor stray angled brackets that cannot possibly
// form tags and thus are probably being used as emoticons
if ($config->get('Core.AggressivelyFixLt')) {
$char = '[^a-z!\/]';
$comment = "/<!--(.*?)(-->|\z)/is";
$html = preg_replace_callback($comment, array($this, 'callbackArmorCommentEntities'), $html);
do {
$old = $html;
$html = preg_replace("/<($char)/i", '&lt;\\1', $html);
} while ($html !== $old);
$html = preg_replace_callback($comment, array($this, 'callbackUndoCommentSubst'), $html); // fix comments
$html = $this->aggressivelyFixLt($html);
}
// preprocess html, essential for UTF-8
@@ -72,6 +65,9 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
if ($config->get('Core.AllowParseManyTags') && defined('LIBXML_PARSEHUGE')) {
$options |= LIBXML_PARSEHUGE;
}
if ($config->get('Core.RemoveBlanks') && defined('LIBXML_NOBLANKS')) {
$options |= LIBXML_NOBLANKS;
}
set_error_handler(array($this, 'muteErrorHandler'));
// loadHTML() fails on PHP 5.3 when second parameter is given
@@ -285,7 +281,7 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
*/
public function callbackUndoCommentSubst($matches)
{
return '<!--' . strtr($matches[1], array('&amp;' => '&', '&lt;' => '<')) . $matches[2];
return '<!--' . $this->undoCommentSubstr($matches[1]) . $matches[2];
}
/**
@@ -296,7 +292,25 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
*/
public function callbackArmorCommentEntities($matches)
{
return '<!--' . str_replace('&', '&amp;', $matches[1]) . $matches[2];
return '<!--' . $this->armorEntities($matches[1]) . $matches[2];
}
/**
* @param string $string
* @return string
*/
protected function armorEntities($string)
{
return str_replace('&', '&amp;', $string);
}
/**
* @param string $string
* @return string
*/
protected function undoCommentSubstr($string)
{
return strtr($string, array('&amp;' => '&', '&lt;' => '<'));
}
/**
@@ -332,6 +346,66 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
$ret .= '</body></html>';
return $ret;
}
/**
* @param string $html
* @return string
*/
protected function aggressivelyFixLt($html)
{
$char = '[^a-z!\/]';
$html = $this->manipulateHtmlComments($html, array($this, 'armorEntities'));
do {
$old = $html;
$html = preg_replace("/<($char)/i", '&lt;\\1', $html);
} while ($html !== $old);
return $this->manipulateHtmlComments($html, array($this, 'undoCommentSubstr'));
}
/**
* Modify HTML comments in the given HTML content using a callback.
*
* @param string $html
* @param callable $callback
* @return string
*/
protected function manipulateHtmlComments($html, callable $callback)
{
$offset = 0;
$startTag = '<!--';
$endTag = '-->';
while (($startPos = strpos($html, $startTag, $offset)) !== false) {
$startPos += strlen($startTag); // Move past `<!--`
$endPos = strpos($html, $endTag, $startPos);
if ($endPos === false) {
// No matching ending comment tag found
break;
}
// Extract the original comment content
$commentContent = substr($html, $startPos, $endPos - $startPos);
// Apply the callback to the comment content
$newCommentContent = $callback($commentContent);
// Reconstruct the entire comment with the new content
$newComment = $startTag . $newCommentContent . $endTag;
// Replace the old comment in the HTML content with the new one
$html = substr($html, 0, $startPos - strlen($startTag)) .
$newComment .
substr($html, $endPos + strlen($endTag));
// Move offset to the end of the new comment for the next iteration
$offset = strpos($html, $newComment, $offset) + strlen($newComment);
}
return $html;
}
}
// vim: et sw=4 sts=4

View File

@@ -111,7 +111,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
if ($synchronize_interval && // synchronization is on
$cursor > 0 && // cursor is further than zero
$loops % $synchronize_interval === 0) { // time to synchronize!
$current_line = 1 + $this->substrCount($html, $nl, 0, $cursor);
$current_line = 1 + substr_count($html, $nl, 0, $cursor);
}
}
@@ -139,7 +139,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
);
if ($maintain_line_numbers) {
$token->rawPosition($current_line, $current_col);
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor);
$current_line += substr_count($html, $nl, $cursor, $position_next_lt - $cursor);
}
$array[] = $token;
$cursor = $position_next_lt + 1;
@@ -214,7 +214,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
);
if ($maintain_line_numbers) {
$token->rawPosition($current_line, $current_col);
$current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
$current_line += substr_count($html, $nl, $cursor, $strlen_segment);
}
$array[] = $token;
$cursor = $end ? $position_comment_end : $position_comment_end + 3;
@@ -229,7 +229,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
$token = new HTMLPurifier_Token_End($type);
if ($maintain_line_numbers) {
$token->rawPosition($current_line, $current_col);
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
$current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor);
}
$array[] = $token;
$inside_tag = false;
@@ -248,7 +248,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
$token = new HTMLPurifier_Token_Text('<');
if ($maintain_line_numbers) {
$token->rawPosition($current_line, $current_col);
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
$current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor);
}
$array[] = $token;
$inside_tag = false;
@@ -276,7 +276,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
}
if ($maintain_line_numbers) {
$token->rawPosition($current_line, $current_col);
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
$current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor);
}
$array[] = $token;
$inside_tag = false;
@@ -310,7 +310,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
}
if ($maintain_line_numbers) {
$token->rawPosition($current_line, $current_col);
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
$current_line += substr_count($html, $nl, $cursor, $position_next_gt - $cursor);
}
$array[] = $token;
$cursor = $position_next_gt + 1;
@@ -343,28 +343,6 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
return $array;
}
/**
* PHP 5.0.x compatible substr_count that implements offset and length
* @param string $haystack
* @param string $needle
* @param int $offset
* @param int $length
* @return int
*/
protected function substrCount($haystack, $needle, $offset, $length)
{
static $oldVersion;
if ($oldVersion === null) {
$oldVersion = version_compare(PHP_VERSION, '5.1', '<');
}
if ($oldVersion) {
$haystack = substr($haystack, $offset, $length);
return substr_count($haystack, $needle);
} else {
return substr_count($haystack, $needle, $offset, $length);
}
}
/**
* Takes the inside of an HTML tag and makes an assoc array of attributes.
*

View File

@@ -1223,14 +1223,14 @@ class HTML5
'type' => self::COMMENT
);
/* Otherwise if the next seven chacacters are a case-insensitive match
/* Otherwise if the next seven characters are a case-insensitive match
for the word "DOCTYPE", then consume those characters and switch to the
DOCTYPE state. */
} elseif (strtolower($this->character($this->char + 1, 7)) === 'doctype') {
$this->char += 7;
$this->state = 'doctype';
/* Otherwise, is is a parse error. Switch to the bogus comment state.
/* Otherwise, it is a parse error. Switch to the bogus comment state.
The next character that is consumed, if any, is the first character
that will be in the comment. */
} else {

View File

@@ -20,7 +20,7 @@
* The second objective is to ensure that explicitly excluded elements of
* an element do not appear in its children. Code that accomplishes this
* task is pervasive through the strategy, though the two are distinct tasks
* and could, theoretically, be seperated (although it's not recommended).
* and could, theoretically, be separated (although it's not recommended).
*
* @note Whether or not unrecognized children are silently dropped or
* translated into text depends on the child definitions.

View File

@@ -641,7 +641,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// Needless to say, we need to UN-skip the token so it gets
// reprocessed.
//
// - Suppose that you successfuly process a token, replace it with
// - Suppose that you successfully process a token, replace it with
// one with your skip mark, but now another injector wants to
// process the skipped token with another token. Should you continue
// to skip that new token, or reprocess it? If you reprocess,

View File

@@ -44,7 +44,7 @@ abstract class HTMLPurifier_Token_Tag extends HTMLPurifier_Token
$this->name = ctype_lower($name) ? $name : strtolower($name);
foreach ($attr as $key => $value) {
// normalization only necessary when key is not lowercase
if (!ctype_lower($key)) {
if (!ctype_lower((string)$key)) {
$new_key = strtolower($key);
if (!isset($attr[$new_key])) {
$attr[$new_key] = $attr[$key];

View File

@@ -108,7 +108,7 @@ class HTMLPurifier_URI
public function validate($config, $context)
{
// ABNF definitions from RFC 3986
$chars_sub_delims = '!$&\'()*+,;=';
$chars_sub_delims = $config->get('URI.AllowedSymbols');
$chars_gen_delims = ':/?#[]@';
$chars_pchar = $chars_sub_delims . ':@';

View File

@@ -71,7 +71,7 @@ class HTMLPurifier_URIFilter_Munge extends HTMLPurifier_URIFilter
} // ignore unknown schemes, maybe another postfilter did it
if (!$scheme_obj->browsable) {
return true;
} // ignore non-browseable schemes, since we can't munge those in a reasonable way
} // ignore non-browsable schemes, since we can't munge those in a reasonable way
if ($uri->isBenign($config, $context)) {
return true;
} // don't redirect if a benign URL

View File

@@ -57,11 +57,12 @@ class HTMLPurifier_URIFilter_SafeIframe extends HTMLPurifier_URIFilter
return true;
}
// check if we actually have some whitelists enabled
if ($this->regexp === null) {
return false;
if ($this->regexp !== null) {
return preg_match($this->regexp, $uri->toString());
}
// actually check the whitelists
return preg_match($this->regexp, $uri->toString());
// check if the host is in a whitelist for safe iframe hosts
$safeHosts = $config->get('URI.SafeIframeHosts');
return $safeHosts !== null && isset($safeHosts[$uri->host]);
}
}

View File

@@ -29,7 +29,7 @@ class HTMLPurifier_URIParser
// Regexp is as per Appendix B.
// Note that ["<>] are an addition to the RFC's recommended
// characters, because they represent external delimeters.
// characters, because they represent external delimiters.
$r_URI = '!'.
'(([a-zA-Z0-9\.\+\-]+):)?'. // 2. Scheme
'(//([^/?#"<>]*))?'. // 4. Authority
@@ -43,7 +43,7 @@ class HTMLPurifier_URIParser
if (!$result) return false; // *really* invalid URI
// seperate out parts
// separate out parts
$scheme = !empty($matches[1]) ? $matches[2] : null;
$authority = !empty($matches[3]) ? $matches[4] : null;
$path = $matches[5]; // always present, can be empty

View File

@@ -105,7 +105,7 @@ class HTMLPurifier_URIScheme_data extends HTMLPurifier_URIScheme
}
$image_code = $info[2];
} else {
trigger_error("could not find exif_imagetype or getimagesize functions", E_USER_ERROR);
throw new Exception("could not find exif_imagetype or getimagesize functions");
}
$real_content_type = image_type_to_mime_type($image_code);
if ($real_content_type != $content_type) {

View File

@@ -800,14 +800,14 @@ class HTML5
'type' => self::COMMENT
);
/* Otherwise if the next seven chacacters are a case-insensitive match
/* Otherwise if the next seven characters are a case-insensitive match
for the word "DOCTYPE", then consume those characters and switch to the
DOCTYPE state. */
} elseif(strtolower($this->character($this->char + 1, 7)) === 'doctype') {
$this->char += 7;
$this->state = 'doctype';
/* Otherwise, is is a parse error. Switch to the bogus comment state.
/* Otherwise, it is a parse error. Switch to the bogus comment state.
The next character that is consumed, if any, is the first character
that will be in the comment. */
} else {

View File

@@ -54,5 +54,6 @@ if (strtolower($GLOBALS['PHORUM']['DATA']['CHARSET']) !== 'utf-8') {
$config->set('Core.EscapeNonASCIICharacters', true);
}
$config->set('Core.AllowParseManyTags', false);
$config->set('Core.RemoveBlanks', false);
// vim: et sw=4 sts=4

View File

@@ -14,7 +14,7 @@ if(!defined("PHORUM_ADMIN")) exit;
// error reporting is good!
error_reporting(E_ALL ^ E_NOTICE);
// load library and other paraphenalia
// load library and other paraphernalia
require_once './include/admin/PhorumInputForm.php';
require_once (dirname(__FILE__) . '/htmlpurifier/HTMLPurifier.auto.php');
require_once (dirname(__FILE__) . '/init-config.php');

View File

@@ -36,7 +36,7 @@ function formatCode($string)
<p>XSS attacks are from
<a href="http://ha.ckers.org/xss.html">http://ha.ckers.org/xss.html</a>.</p>
<p><strong>Caveats:</strong>
<tt>Google.com</tt> has been programatically disallowed, but as you can
<tt>Google.com</tt> has been programmatically disallowed, but as you can
see, there are ways of getting around that, so coverage in this area
is not complete. Most XSS broadcasts its presence by spawning an alert dialogue.
The displayed code is not strictly correct, as linebreaks have been forced for
@@ -50,7 +50,7 @@ if (version_compare(PHP_VERSION, '5', '<')) exit('<p>Requires PHP 5.</p>');
$xml = simplexml_load_file('xssAttacks.xml');
// programatically disallow google.com for URI evasion tests
// programmatically disallow google.com for URI evasion tests
// not complete
$config = HTMLPurifier_Config::createDefault();
$config->set('URI.HostBlacklist', array('google.com'));

View File

@@ -864,7 +864,7 @@ As a side note, this was also effective against a real world XSS filter I came a
<attack>
<name>Extraneous Open Brackets</name>
<code>&lt;&lt;SCRIPT&gt;alert(&quot;XSS&quot;);//&lt;&lt;/SCRIPT&gt;</code>
<desc>(Submitted by Franz Sedlmaier http://www.pilorz.net/). This XSS vector could defeat certain detection engines that work by first using matching pairs of open and close angle brackets and then by doing a comparison of the tag inside, instead of a more efficient algorythm like Boyer-Moore (http://www.cs.utexas.edu/users/moore/best-ideas/string-searching/) that looks for entire string matches of the open angle bracket and associated tag (post de-obfuscation, of course). The double slash comments out the ending extraneous bracket to supress a JavaScript error.</desc>
<desc>(Submitted by Franz Sedlmaier http://www.pilorz.net/). This XSS vector could defeat certain detection engines that work by first using matching pairs of open and close angle brackets and then by doing a comparison of the tag inside, instead of a more efficient algorithm like Boyer-Moore (http://www.cs.utexas.edu/users/moore/best-ideas/string-searching/) that looks for entire string matches of the open angle bracket and associated tag (post de-obfuscation, of course). The double slash comments out the ending extraneous bracket to suppress a JavaScript error.</desc>
<label>Embedded Character Attacks</label>
<browser>Browser support: [&lt;span class=&quot;s&quot;&gt;IE6.0&lt;/span&gt;|&lt;span class=&quot;s&quot;&gt;NS8.1-IE&lt;/span&gt;] [&lt;span class=&quot;s&quot;&gt;NS8.1-G&lt;/span&gt;|&lt;span class=&quot;s&quot;&gt;FF1.5&lt;/span&gt;] [&lt;span class=&quot;s&quot;&gt;O8.54&lt;/span&gt;]</browser>
@@ -940,7 +940,7 @@ alert(a.source)&lt;/SCRIPT&gt;</code>
-onCut() (user needs to copy something or it can be exploited using the execCommand(&quot;Cut&quot;) command)
-onDataAvailible() (user would need to change data in an element, or attacker could perform the same function)
-onDataAvailable() (user would need to change data in an element, or attacker could perform the same function)
-onDataSetChanged() (fires when the data set exposed by a data source object changes)

View File

@@ -34,12 +34,12 @@ class HTMLPurifier_AttrCollectionsTest extends HTMLPurifier_Harness
$modules['Module2'] = new HTMLPurifier_HTMLModule();
$modules['Module2']->attr_collections = array(
'Core' => array(
0 => array('Brocolli')
0 => array('Broccoli')
),
'Soup' => array(
'attribute-3' => 'Type3'
),
'Brocolli' => array()
'Broccoli' => array()
);
$collections->doConstruct($types, $modules);
@@ -48,14 +48,14 @@ class HTMLPurifier_AttrCollectionsTest extends HTMLPurifier_Harness
$collections->info,
array(
'Core' => array(
0 => array('Soup', 'Undefined', 'Brocolli'),
0 => array('Soup', 'Undefined', 'Broccoli'),
'attribute' => 'Type',
'attribute-2' => 'Type2'
),
'Soup' => array(
'attribute-3' => 'Type3'
),
'Brocolli' => array()
'Broccoli' => array()
)
);

View File

@@ -0,0 +1,24 @@
<?php
class HTMLPurifier_AttrDef_CSS_RatioTest extends HTMLPurifier_AttrDefHarness
{
public function test()
{
$this->def = new HTMLPurifier_AttrDef_CSS_Ratio();
$this->assertDef('1/2');
$this->assertDef('1 / 2', '1/2');
$this->assertDef('1');
$this->assertDef('1/0');
$this->assertDef('0/1');
$this->assertDef('1/2/3', false);
$this->assertDef('/2/3', false);
$this->assertDef('/12', false);
$this->assertDef('1/', false);
$this->assertDef('asdf', false);
}
}
// vim: et sw=4 sts=4

View File

@@ -13,6 +13,8 @@ class HTMLPurifier_AttrDef_CSSTest extends HTMLPurifier_AttrDefHarness
{
// regular cases, singular
$this->assertDef('text-align:right;');
$this->assertDef('direction:ltr;');
$this->assertDef('direction:rtl;');
$this->assertDef('border-left-style:solid;');
$this->assertDef('border-style:solid dotted;');
$this->assertDef('clear:right;');
@@ -72,6 +74,10 @@ class HTMLPurifier_AttrDef_CSSTest extends HTMLPurifier_AttrDefHarness
$this->assertDef('min-width:50rem;');
$this->assertDef('min-width:50vw;');
$this->assertDef('min-width:-50vw;', false);
$this->assertDef('aspect-ratio:16/9;');
$this->assertDef('aspect-ratio:auto;');
$this->assertDef('aspect-ratio:16/9 auto;');
$this->assertDef('aspect-ratio:auto 16/9;');
$this->assertDef('text-decoration:underline;');
$this->assertDef('text-decoration-line:overline;');
$this->assertDef('text-decoration-style:dashed;');
@@ -116,8 +122,10 @@ class HTMLPurifier_AttrDef_CSSTest extends HTMLPurifier_AttrDefHarness
$this->assertDef('text-transform:capitalize;destroy:it;',
'text-transform:capitalize;');
// inherit works for everything
// universal values work for everything
$this->assertDef('text-align:inherit;');
$this->assertDef('text-align:initial;');
$this->assertDef('text-align:unset;');
// bad props
$this->assertDef('nodice:foobar;', false);

View File

@@ -56,7 +56,8 @@ class HTMLPurifier_AttrDef_URI_HostTest extends HTMLPurifier_AttrDefHarness
function testAllowUnderscore() {
$this->config->set('Core.AllowHostnameUnderscore', true);
$this->assertDef("foo_bar.example.com");
$this->assertDef("foo_.example.com", false);
$this->assertDef("foo_.example.com");
$this->assertDef("_dmarc.example.com");
}
}

View File

@@ -12,7 +12,7 @@ class HTMLPurifier_AttrTypesTest extends HTMLPurifier_Harness
new HTMLPurifier_AttrDef_Text()
);
$this->expectError('Cannot retrieve undefined attribute type foobar');
$this->expectException(new Exception('Cannot retrieve undefined attribute type foobar'));
$types->get('foobar');
$this->assertIdentical(

View File

@@ -59,7 +59,7 @@ extends HTMLPurifier_ChildDefHarness
);
}
public function testWrapComplicatedSring()
public function testWrapComplicatedString()
{
$this->assertResult(
$var = 'He said<br />perhaps<br />we should <b>nuke</b> them.',
@@ -84,7 +84,7 @@ extends HTMLPurifier_ChildDefHarness
public function testError()
{
$this->expectError('Cannot use non-block element as block wrapper');
$this->expectException(new Exception('Cannot use non-block element as block wrapper'));
$this->obj = new HTMLPurifier_ChildDef_StrictBlockquote('div | p');
$this->config->set('HTML.BlockWrapper', 'dav');
$this->config->set('Cache.DefinitionImpl', null);

View File

@@ -155,7 +155,7 @@ class HTMLPurifier_ConfigTest extends HTMLPurifier_Harness
$this->assertIdentical($config->get('Home.Rug'), 3);
$this->expectError('Cannot get value from aliased directive, use real name Home.Rug');
$this->expectException(new Exception('Cannot get value from aliased directive, use real name Home.Rug'));
$config->get('Home.Carpet');
$this->expectError('Home.Carpet is an alias, preferred directive name is Home.Rug');
@@ -184,7 +184,7 @@ class HTMLPurifier_ConfigTest extends HTMLPurifier_Harness
)
);
// grab a non-existant namespace
// grab a non-existent namespace
$this->expectError('Cannot retrieve undefined namespace Constants');
$config->getBatch('Constants');
@@ -384,7 +384,7 @@ class HTMLPurifier_ConfigTest extends HTMLPurifier_Harness
$config->finalize();
$this->expectError('Cannot set directive after finalization');
$this->expectException(new Exception('Cannot set directive after finalization'));
$config->set('Poem.Meter', 'vedic');
$this->expectError('Cannot load directives after finalization');

View File

@@ -27,11 +27,11 @@ class HTMLPurifier_ContextTest extends HTMLPurifier_Harness
$this->context->destroy('IDAccumulator');
$this->assertFalse($this->context->exists('IDAccumulator'));
$this->expectError('Attempted to retrieve non-existent variable IDAccumulator');
$this->expectException(new Exception('Attempted to retrieve non-existent variable IDAccumulator'));
$accumulator_3 =& $this->context->get('IDAccumulator');
$this->assertNull($accumulator_3);
$this->expectError('Attempted to destroy non-existent variable IDAccumulator');
$this->expectException(new Exception('Attempted to destroy non-existent variable IDAccumulator'));
$this->context->destroy('IDAccumulator');
}
@@ -41,7 +41,7 @@ class HTMLPurifier_ContextTest extends HTMLPurifier_Harness
$var = true;
$this->context->register('OnceOnly', $var);
$this->expectError('Name OnceOnly produces collision, cannot re-register');
$this->expectException(new Exception('Name OnceOnly produces collision, cannot re-register'));
$this->context->register('OnceOnly', $var);
// destroy it, now registration is okay

View File

@@ -36,7 +36,7 @@ class HTMLPurifier_DoctypeRegistryTest extends HTMLPurifier_Harness
$registry = new HTMLPurifier_DoctypeRegistry();
$this->expectError('Doctype XHTML 2.0 does not exist');
$this->expectException(new Exception('Doctype XHTML 2.0 does not exist'));
$registry->get('XHTML 2.0');
// prevent XSS

View File

@@ -47,7 +47,7 @@ class HTMLPurifier_EncoderTest extends HTMLPurifier_Harness
{
if (!HTMLPurifier_Encoder::iconvAvailable()) return;
$this->config->set('Core.Encoding', 'utf99');
$this->expectError('Invalid encoding utf99');
$this->expectException(new Exception('Invalid encoding utf99'));
$this->assertIdentical(
HTMLPurifier_Encoder::convertToUTF8("\xF6", $this->config, $this->context),
''

View File

@@ -88,6 +88,13 @@ class HTMLPurifier_Filter_ExtractStyleBlocksTest extends HTMLPurifier_Harness
$this->assertCleanCSS("a .foo #id div.cl#foo {\nfont-weight:700\n}");
}
public function test_cleanCSS_universals()
{
$this->assertCleanCSS("a {\nfont-weight:inherit\n}");
$this->assertCleanCSS("a {\nfont-weight:initial\n}");
$this->assertCleanCSS("a {\nfont-weight:unset\n}");
}
public function test_cleanCSS_angledBrackets()
{
// [Content] No longer can smuggle in angled brackets using

View File

@@ -0,0 +1,10 @@
<table>
<caption>
Cool Table
</caption>
<tr>
<td>
Element 1
</td>
</tr>
</table>

View File

@@ -0,0 +1,7 @@
<table><caption>
Cool Table
</caption>
<tr><td>
Element 1
</td>
</tr></table>

View File

@@ -222,6 +222,17 @@ a[href|title]
$this->assertPurification_AllowedAttributes_local_p_style();
}
public function test_AllowedAttributes_invalidAttributeDueToConsistingOfNumbers_UsingDirectLex()
{
$this->config->set('HTML.AllowedElements', array('a'));
$this->config->set('HTML.AllowedAttributes', 'href');
$this->config->set('Core.LexerImpl', 'DirectLex');
$this->assertPurification(
'<a href="https://example.com/" 10="hoge">Test</a>',
'<a href="https://example.com/">Test</a>'
);
}
public function test_ForbiddenElements()
{
$this->config->set('HTML.ForbiddenElements', 'b');
@@ -399,6 +410,19 @@ a[href|title]
$this->assertIdentical($input, $output);
}
public function test_removeBlanks()
{
$config = HTMLPurifier_Config::createDefault();
$config->set('Core.RemoveBlanks', true);
$input = file_get_contents(__DIR__ . '/FixtureData/RemoveBlankTestCaseInput.html');
$expected = file_get_contents(__DIR__ . '/FixtureData/RemoveBlankTestCaseOutput.html');
$purifier = new HTMLPurifier($config);
$actual = $purifier->purify($input);
$this->assertIdentical($expected, $actual);
}
}
// vim: et sw=4 sts=4

View File

@@ -11,6 +11,7 @@ class HTMLPurifier_HTMLModule_ImageTest extends HTMLPurifier_HTMLModuleHarness
public function testLengthTooLarge()
{
$this->config->set('HTML.MaxImgLength', 1200);
$this->assertResult(
'<img height="40000" width="40000" src="" alt="" />',
'<img height="1200" width="1200" src="" alt="" />'
@@ -19,6 +20,7 @@ class HTMLPurifier_HTMLModule_ImageTest extends HTMLPurifier_HTMLModuleHarness
public function testLengthPercentage()
{
$this->config->set('HTML.MaxImgLength', 1200);
$this->assertResult(
'<img height="100%" width="100%" src="" alt="" />',
'<img src="" alt="" />'

View File

@@ -6,6 +6,7 @@ class HTMLPurifier_HTMLModule_SafeEmbedTest extends HTMLPurifier_HTMLModuleHarne
public function setUp()
{
parent::setUp();
$this->config->set('HTML.MaxImgLength', 1200);
$def = $this->config->getHTMLDefinition(true);
$def->manager->addModule('SafeEmbed');
}

View File

@@ -8,6 +8,7 @@ class HTMLPurifier_HTMLModule_SafeObjectTest extends HTMLPurifier_HTMLModuleHarn
parent::setUp();
$this->config->set('HTML.DefinitionID', 'HTMLPurifier_HTMLModule_SafeObjectTest');
$this->config->set('HTML.SafeObject', true);
$this->config->set('HTML.MaxImgLength', 1200);
}
public function testMinimal()

Some files were not shown because too many files have changed in this diff Show More