1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-03 12:47:56 +02:00

Compare commits

...

32 Commits

Author SHA1 Message Date
Edward Z. Yang
f38e81785f Release 2.1.5
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1814 48356398-32a2-884e-a903-53898d9a118a
2008-06-19 22:57:15 +00:00
Edward Z. Yang
2cc829a8cf Fix PHP 4.3.9/10 bug with float handling
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1806 48356398-32a2-884e-a903-53898d9a118a
2008-06-19 21:13:56 +00:00
Edward Z. Yang
e80a54a7c9 Add missing include.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1805 48356398-32a2-884e-a903-53898d9a118a
2008-06-19 19:58:53 +00:00
Edward Z. Yang
6f71e65661 [2.1.5] [MFH] Fix text-decoration: none bug
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1800 48356398-32a2-884e-a903-53898d9a118a
2008-06-17 03:18:23 +00:00
Edward Z. Yang
6f25c39c3e [2.1.5] [MFH] Fix Shift_JIS bug.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1793 48356398-32a2-884e-a903-53898d9a118a
2008-06-11 19:01:22 +00:00
Edward Z. Yang
b8b1ac283d [2.1.5] [MFH] Fix regression in FontFamily
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1792 48356398-32a2-884e-a903-53898d9a118a
2008-06-11 18:54:19 +00:00
Edward Z. Yang
450fc6649d [2.1.5] [MFH] Fix Shift_JIS encoding wonkiness with yen symbols and whatnot, as well as other patches
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1791 48356398-32a2-884e-a903-53898d9a118a
2008-06-11 18:49:56 +00:00
Edward Z. Yang
369a69d533 [2.1.5] [MFH] Fix stray backslashes in font-family.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1790 48356398-32a2-884e-a903-53898d9a118a
2008-06-11 17:43:48 +00:00
Edward Z. Yang
72f5819ef6 [2.1.5] [MFH] Round up imagecrash support with HTML.MaxImgLength
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1789 48356398-32a2-884e-a903-53898d9a118a
2008-06-11 17:38:25 +00:00
Edward Z. Yang
3540ea7fce [2.1.5] [MFH] Make modules use setup($config) instead of constructor
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1788 48356398-32a2-884e-a903-53898d9a118a
2008-06-11 17:10:39 +00:00
Edward Z. Yang
c03953f85e [2.1.5] [MFH] Percent encode query and hash, and lazy update with attr validator
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1787 48356398-32a2-884e-a903-53898d9a118a
2008-06-11 04:00:06 +00:00
Edward Z. Yang
0d262b3a1d Add missing bits from previous commit.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1786 48356398-32a2-884e-a903-53898d9a118a
2008-06-11 01:56:22 +00:00
Edward Z. Yang
234cd2196f [2.1.5] [MFH] Complete the imagecrash added protection fixes
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1785 48356398-32a2-884e-a903-53898d9a118a
2008-06-11 01:53:31 +00:00
Edward Z. Yang
0dbe87bbc7 [2.1.5] [MFH] Disable Tidy tests
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1784 48356398-32a2-884e-a903-53898d9a118a
2008-06-11 01:25:05 +00:00
Edward Z. Yang
245b5bdb27 Merged r1746: Length and UnitConverter implementation.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1783 48356398-32a2-884e-a903-53898d9a118a
2008-06-11 01:21:36 +00:00
Edward Z. Yang
864cb9e136 - Fix tagging script to work off of php4
- Fix svn.php to not clobber svn extension
- Update NEWS

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1743 48356398-32a2-884e-a903-53898d9a118a
2008-05-18 20:12:17 +00:00
Edward Z. Yang
487fcd55ea Release 2.1.4
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1736 48356398-32a2-884e-a903-53898d9a118a
2008-05-18 18:56:27 +00:00
Edward Z. Yang
ec6b6821cf [2.1.4] Add information about PHP 5.0.5 or earlier.
- Fix segfault in 5.0.x with IDAccumulator test.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1726 48356398-32a2-884e-a903-53898d9a118a
2008-05-16 01:25:22 +00:00
Edward Z. Yang
f26eb7551a [2.1.4] [MFH] Fixed bug with fallback languages in LanguageFactory
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1724 48356398-32a2-884e-a903-53898d9a118a
2008-05-15 23:20:21 +00:00
Edward Z. Yang
a2aca4819d [2.1.4] [MFH] Revamp URI handling of percent encoding and validation from r1709
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1721 48356398-32a2-884e-a903-53898d9a118a
2008-05-15 05:30:20 +00:00
Edward Z. Yang
a75e4c6b7c [2.1.4] [MFH] getInstance -> instance from r1689
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1720 48356398-32a2-884e-a903-53898d9a118a
2008-05-15 05:24:34 +00:00
Edward Z. Yang
e7fa8cbdd5 [2.1.4] [MFH] Add protection against imagecrash attack with CSS height/width from r1684
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1719 48356398-32a2-884e-a903-53898d9a118a
2008-05-15 05:21:37 +00:00
Edward Z. Yang
5fa575f8ac [2.1.4] [MFH] Encoder optimization and shut-up operator bugfix from r1680
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1718 48356398-32a2-884e-a903-53898d9a118a
2008-05-15 05:16:36 +00:00
Edward Z. Yang
9f23bc005b [2.1.4] [MFH] addAttribute() can be called multiple times, from r1634
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1717 48356398-32a2-884e-a903-53898d9a118a
2008-05-15 05:13:11 +00:00
Edward Z. Yang
957a840f54 [2.1.4] [MFH] Fix bug with rgb(0, 1, 2) color syntax with spaces inside shorthand syntax from r1612
- Also, repair botched comment patch

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1716 48356398-32a2-884e-a903-53898d9a118a
2008-05-15 05:04:39 +00:00
Edward Z. Yang
a7762c5137 [2.1.4] [MFH] Fix bug in comment parsing with DirectLex from r1570
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1715 48356398-32a2-884e-a903-53898d9a118a
2008-05-15 04:43:52 +00:00
Edward Z. Yang
aca9d725ed [2.1.4] [MFH] Fix bug with trusted script handling in libxml versions later than 2.6.28 from r1553.
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1714 48356398-32a2-884e-a903-53898d9a118a
2008-05-15 04:40:13 +00:00
Edward Z. Yang
4ce3deba26 [2.1.4] [MFH] Recursive auto-close with <span><span><div> from r1492
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1713 48356398-32a2-884e-a903-53898d9a118a
2008-05-15 04:32:05 +00:00
Edward Z. Yang
d4da02ba95 [2.1.4] [MFH] Case-insensitive CSS from r1461
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1712 48356398-32a2-884e-a903-53898d9a118a
2008-05-15 04:26:30 +00:00
Edward Z. Yang
97d3c8509c [2.1.4] [MFH] register() for DefinitionCacheFactory from r1464
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1711 48356398-32a2-884e-a903-53898d9a118a
2008-05-15 04:21:23 +00:00
Edward Z. Yang
21c6803401 [2.1.4] [MFH] Color and CSS bugfixes from r1473
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1710 48356398-32a2-884e-a903-53898d9a118a
2008-05-15 04:01:45 +00:00
Edward Z. Yang
36badb06f6 Branch out PHP 4 development: we're going PHP 5!
git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1455 48356398-32a2-884e-a903-53898d9a118a
2007-11-23 21:18:32 +00:00
93 changed files with 1822 additions and 433 deletions

View File

@@ -31,7 +31,7 @@ PROJECT_NAME = HTMLPurifier
# This could be handy for archiving the generated documentation or
# if some version control system is used.
PROJECT_NUMBER = 2.1.3
PROJECT_NUMBER = 2.1.5
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put.

14
INSTALL
View File

@@ -14,16 +14,18 @@ basic sanity checks to get the most out of this library.
---------------------------------------------------------------------------
1. Compatibility
HTML Purifier works in both PHP 4 and PHP 5, and is actively tested from
PHP 4.3.7 and up (see tests/multitest.php for specific versions). It has
no core dependencies with other libraries. PHP 4 support will be
deprecated on December 31, 2007, at which time only essential security
THIS IS A DEPRECATED PHP4 VERSION OF HTML PURIFIER.
If you are running PHP5, please go to http://htmlpurifier.org to download
the latest version. This version of HTML Purifier is only actively tested
from PHP 4.3.7 to PHP 5.0.5. Essential security will be released for this branch
fixes will be issued for the PHP 4 version until August 8, 2008.
These optional extensions can enhance the capabilities of HTML Purifier:
* iconv : Converts text to and from non-UTF-8 encodings
* tidy : Used for pretty-printing HTML
* iconv : Converts text to and from non-UTF-8 encodings
* bcmath : Used for unit conversion and imagecrash protection
* tidy : Used for pretty-printing HTML
---------------------------------------------------------------------------

58
NEWS
View File

@@ -9,6 +9,64 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. Internal change
==========================
ERRATA
- PH5P is seriously broken here; it can result in fatal errors and exceptions.
If you desire to use it, please use it with the latest, PHP5-only version of
HTML Purifier.
2.1.5, released 2008-06-19
! More robust imagecrash protection with height/width CSS with %CSS.MaxImgLength,
and height/width HTML with %HTML.MaxImgLength.
- AttrValidator operations are now atomic; updates to attributes are not
manifest in token until end of operations. This prevents naughty internal
code from directly modifying CurrentToken when they're not supposed to.
- Percent encoding checks enabled for URI query and fragment
- Disable percent height/width attributes for img
- Fix stray backslashes in font-family; CSS Unicode character escapes are
now properly resolved (although *only* in font-family).
- Improve parseCDATA algorithm to take into account newline normalization
- Account for browser confusion between Yen character and backslash in
Shift_JIS encoding. This fix generalizes to any other encoding which is not
a strict superset of printable ASCII.
- Improved adherence to Unicode by checking for non-character codepoints.
Thanks Geoffrey Sneddon for reporting. This may result in degraded
performance for extremely large inputs.
- Allow CSS property-value pair ''text-decoration: none''
. Added HTMLPurifier_UnitConverter and HTMLPurifier_Length for convenient
handling of CSS-style lengths. HTMLPurifier_AttrDef_CSS_Length now uses
this class.
. API of HTMLPurifier_AttrDef_CSS_Length changed from __construct($disable_negative)
to __construct($min, $max). __construct(true) is equivalent to
__construct('0'). (replace __construct with HTMLPurifier_AttrDef_CSS_Length)
. Added HTMLPurifier_AttrDef_Switch class
. Rename HTMLPurifier_HTMLModule_Tidy->construct() to setup() and bubble method
up inheritance hierarchy to HTMLPurifier_HTMLModule. All HTMLModules
get this called with the configuration object. All modules now
use this rather than __construct(), although legacy code using constructors
will still work--the new format, however, lets modules access the
configuration object for HTML namespace dependant tweaks.
. AttrDef_HTML_Pixels now takes a single construction parameter, pixels.
2.1.4, released 2008-05-18
! DefinitionCacheFactory now can register new implementations
! CSS properties are now case-insensitive
! Encoder optimized with valid UTF-8 input
! HTML Purifier's URI handling is a lot more robust, with much stricter
validation checks and better percent encoding handling.
- Colors missing # but in hex form will be corrected
- CSS Number algorithm improved
- Autoclose now operates iteratively, i.e. <span><span><div> now has
both span tags closed.
- Fix bug with trusted script handling in libxml versions later than 2.6.28.
- Fix bug in comment parsing with DirectLex
- Fix bug with rgb(0, 1, 2) color syntax with spaces inside shorthand syntax
- HTMLPurifier_HTMLDefinition->addAttribute can now be called multiple times
on the same element without emitting errors.
- Iconv uses set_error_handler instead of shut-up operator
- Add protection against imagecrash attack with CSS height/width
- HTMLPurifier::getInstance() renamed to HTMLPurifier::instance() for consistency
- Fixed bug with fallback languages in LanguageFactory
2.1.3, released 2007-11-05
! tests/multitest.php allows you to test multiple versions by running
tests/index.php through multiple interpreters using `phpv` shell

View File

@@ -1 +1 @@
2.1.3
2.1.5

View File

@@ -1,6 +1,7 @@
Stability release 2.1.3 fixes a slew of minor bugs found in HTML Purifier,
and also includes some internal code enhancements and refactorings.
Notably, tests/multitest.php automates testing in multiple versions,
fatal AttrDef_URI_Email error fixed, blockquote contents are more lenient
in HTML 4.01 Strict and fatal errors involving ID tags in img tags were
fixed.
Security and bugfix release 2.1.5 is a backport that fixes two vulnerabilities
related to CSS, one of which only occurs under Shift_JIS. It also improves
imagecrash protection (percent CSS width and height is now disabled for
images, and you can control the bounds with %CSS.MaxImgLength and
%HTML.MaxImgLength). Finally, there are number of bug fixes, most notably
support for text-decoration: none, improved adherence to Unicode and increased
percent encoding checks.

View File

@@ -22,7 +22,7 @@
*/
/*
HTML Purifier 2.1.3 - Standards Compliant HTML Filtering
HTML Purifier 2.1.5 - Standards Compliant HTML Filtering
Copyright (C) 2006-2007 Edward Z. Yang
This library is free software; you can redistribute it and/or
@@ -83,7 +83,7 @@ since 2.0.0.
class HTMLPurifier
{
var $version = '2.1.3';
var $version = '2.1.5';
var $config;
var $filters = array();
@@ -213,7 +213,7 @@ class HTMLPurifier
* @param $prototype Optional prototype HTMLPurifier instance to
* overload singleton with.
*/
function &getInstance($prototype = null) {
function &instance($prototype = null) {
static $htmlpurifier;
if (!$htmlpurifier || $prototype) {
if (is_a($prototype, 'HTMLPurifier')) {
@@ -227,6 +227,9 @@ class HTMLPurifier
return $htmlpurifier;
}
function &getInstance($prototype = null) {
return HTMLPurifier::instance($prototype);
}
}

View File

@@ -54,18 +54,15 @@ class HTMLPurifier_AttrDef
*
* @warning This processing is inconsistent with XML's whitespace handling
* as specified by section 3.3.3 and referenced XHTML 1.0 section
* 4.7. Compliant processing requires all line breaks normalized
* to "\n", so the fix is not as simple as fixing it in this
* function. Trim and whitespace collapsing are supposed to only
* occur in NMTOKENs. However, note that we are NOT necessarily
* parsing XML, thus, this behavior may still be correct.
* 4.7. However, note that we are NOT necessarily
* parsing XML, thus, this behavior may still be correct. We
* assume that newlines have been normalized.
*
* @public
*/
function parseCDATA($string) {
$string = trim($string);
$string = str_replace("\n", '', $string);
$string = str_replace(array("\r", "\t"), ' ', $string);
$string = str_replace(array("\n", "\t", "\r"), ' ', $string);
return $string;
}
@@ -82,5 +79,13 @@ class HTMLPurifier_AttrDef
return $this;
}
/**
* Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
* properly. THIS IS A HACK!
*/
function mungeRgb($string) {
return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
}
}

View File

@@ -38,7 +38,20 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
list($property, $value) = explode(':', $declaration, 2);
$property = trim($property);
$value = trim($value);
if (!isset($definition->info[$property])) continue;
$ok = false;
do {
if (isset($definition->info[$property])) {
$ok = true;
break;
}
if (ctype_lower($property)) break;
$property = strtolower($property);
if (isset($definition->info[$property])) {
$ok = true;
break;
}
} while(0);
if (!$ok) continue;
// inefficient call, since the validator will do this again
if (strtolower(trim($value)) !== 'inherit') {
// inherit works for everything (but only on the base property)

View File

@@ -31,6 +31,9 @@ class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef
$string = $this->parseCDATA($string);
if ($string === '') return false;
// munge rgb() decl if necessary
$string = $this->mungeRgb($string);
// assumes URI doesn't have spaces in it
$bits = explode(' ', strtolower($string)); // bits to process

View File

@@ -22,7 +22,7 @@ class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef
function validate($string, $config, &$context) {
$string = $this->parseCDATA($string);
// we specifically will not support rgb() syntax with spaces
$string = $this->mungeRgb($string);
$bits = explode(' ', $string);
$done = array(); // segments we've finished
$ret = ''; // return value

View File

@@ -39,20 +39,13 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
if ($colors === null) $colors = $config->get('Core', 'ColorKeywords');
$color = trim($color);
if (!$color) return false;
if ($color === '') return false;
$lower = strtolower($color);
if (isset($colors[$lower])) return $colors[$lower];
if ($color[0] === '#') {
// hexadecimal handling
$hex = substr($color, 1);
$length = strlen($hex);
if ($length !== 3 && $length !== 6) return false;
if (!ctype_xdigit($hex)) return false;
} else {
if (strpos($color, 'rgb(') !== false) {
// rgb literal handling
if (strpos($color, 'rgb(')) return false;
$length = strlen($color);
if (strpos($color, ')') !== $length - 1) return false;
$triad = substr($color, 4, $length - 4 - 1);
@@ -90,6 +83,17 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef
}
$new_triad = implode(',', $new_parts);
$color = "rgb($new_triad)";
} else {
// hexadecimal handling
if ($color[0] === '#') {
$hex = substr($color, 1);
} else {
$hex = $color;
$color = '#' . $color;
}
$length = strlen($hex);
if ($length !== 3 && $length !== 6) return false;
if (!ctype_xdigit($hex)) return false;
}
return $color;

View File

@@ -0,0 +1,26 @@
<?php
/**
* Decorator which enables CSS properties to be disabled for specific elements.
*/
class HTMLPurifier_AttrDef_CSS_DenyElementDecorator extends HTMLPurifier_AttrDef
{
var $def, $element;
/**
* @param $def Definition to wrap
* @param $element Element to deny
*/
function HTMLPurifier_AttrDef_CSS_DenyElementDecorator(&$def, $element) {
$this->def =& $def;
$this->element = $element;
}
/**
* Checks if CurrentToken is set and equal to $this->element
*/
function validate($string, $config, $context) {
$token = $context->get('CurrentToken', true);
if ($token && $token->name == $this->element) return false;
return $this->def->validate($string, $config, $context);
}
}

View File

@@ -19,7 +19,6 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
'cursive' => true
);
$string = $this->parseCDATA($string);
// assume that no font names contain commas in them
$fonts = explode(',', $string);
$final = '';
@@ -38,13 +37,40 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
$quote = $font[0];
if ($font[$length - 1] !== $quote) continue;
$font = substr($font, 1, $length - 2);
// double-backslash processing is buggy
$font = str_replace("\\$quote", $quote, $font); // de-escape quote
$font = str_replace("\\\n", "\n", $font); // de-escape newlines
$new_font = '';
for ($i = 0, $c = strlen($font); $i < $c; $i++) {
if ($font[$i] === '\\') {
$i++;
if ($i >= $c) {
$new_font .= '\\';
break;
}
if (ctype_xdigit($font[$i])) {
$code = $font[$i];
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
if (!ctype_xdigit($font[$i])) break;
$code .= $font[$i];
}
// We have to be extremely careful when adding
// new characters, to make sure we're not breaking
// the encoding.
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue;
$new_font .= $char;
if ($i < $c && trim($font[$i]) !== '') $i--;
continue;
}
if ($font[$i] === "\n") continue;
}
$new_font .= $font[$i];
}
$font = $new_font;
}
// $font is a pure representation of the font name
if (ctype_alnum($font)) {
if (ctype_alnum($font) && $font !== '') {
// very simple font, allow it in unharmed
$final .= $font . ', ';
continue;
@@ -53,8 +79,8 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
// complicated font, requires quoting
// armor single quotes and new lines
$font = str_replace("\\", "\\\\", $font);
$font = str_replace("'", "\\'", $font);
$font = str_replace("\n", "\\\n", $font);
$final .= "'$font', ";
}
$final = rtrim($final, ', ');

View File

@@ -1,7 +1,7 @@
<?php
require_once 'HTMLPurifier/AttrDef.php';
require_once 'HTMLPurifier/AttrDef/CSS/Number.php';
require_once 'HTMLPurifier/Length.php';
require_once 'HTMLPurifier/UnitConverter.php';
/**
* Represents a Length as defined by CSS.
@@ -9,46 +9,40 @@ require_once 'HTMLPurifier/AttrDef/CSS/Number.php';
class HTMLPurifier_AttrDef_CSS_Length extends HTMLPurifier_AttrDef
{
/**
* Valid unit lookup table.
* @warning The code assumes all units are two characters long. Be careful
* if we have to change this behavior!
*/
var $units = array('em' => true, 'ex' => true, 'px' => true, 'in' => true,
'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true);
/**
* Instance of HTMLPurifier_AttrDef_Number to defer number validation to
*/
var $number_def;
var $min, $max;
/**
* @param $non_negative Bool indication whether or not negative values are
* allowed.
* @param HTMLPurifier_Length $max Minimum length, or null for no bound. String is also acceptable.
* @param HTMLPurifier_Length $max Maximum length, or null for no bound. String is also acceptable.
*/
function HTMLPurifier_AttrDef_CSS_Length($non_negative = false) {
$this->number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative);
function HTMLPurifier_AttrDef_CSS_Length($min = null, $max = null) {
$this->min = $min !== null ? HTMLPurifier_Length::make($min) : null;
$this->max = $max !== null ? HTMLPurifier_Length::make($max) : null;
}
function validate($length, $config, &$context) {
function validate($string, $config, $context) {
$string = $this->parseCDATA($string);
$length = $this->parseCDATA($length);
if ($length === '') return false;
if ($length === '0') return '0';
$strlen = strlen($length);
if ($strlen === 1) return false; // impossible!
// Optimizations
if ($string === '') return false;
if ($string === '0') return '0';
if (strlen($string) === 1) return false;
// we assume all units are two characters
$unit = substr($length, $strlen - 2);
if (!ctype_lower($unit)) $unit = strtolower($unit);
$number = substr($length, 0, $strlen - 2);
$length = HTMLPurifier_Length::make($string);
if (!$length->isValid()) return false;
if (!isset($this->units[$unit])) return false;
$number = $this->number_def->validate($number, $config, $context);
if ($number === false) return false;
return $number . $unit;
if ($this->min) {
$c = $length->compareTo($this->min);
if ($c === false) return false;
if ($c < 0) return false;
}
if ($this->max) {
$c = $length->compareTo($this->max);
if ($c === false) return false;
if ($c > 0) return false;
}
return $length->toString();
}
}

View File

@@ -18,6 +18,11 @@ class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef
$this->non_negative = $non_negative;
}
/**
* @warning Some contexts do not pass $config, $context. These
* variables should not be used without checking HTMLPurifier_Length.
* This might not work properly in PHP4.
*/
function validate($number, $config, &$context) {
$number = $this->parseCDATA($number);

View File

@@ -15,10 +15,13 @@ class HTMLPurifier_AttrDef_CSS_TextDecoration extends HTMLPurifier_AttrDef
static $allowed_values = array(
'line-through' => true,
'overline' => true,
'underline' => true
'underline' => true,
);
$string = strtolower($this->parseCDATA($string));
if ($string === 'none') return $string;
$parts = explode(' ', $string);
$final = '';
foreach ($parts as $part) {

View File

@@ -8,6 +8,12 @@ require_once 'HTMLPurifier/AttrDef.php';
class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
{
var $max;
function HTMLPurifier_AttrDef_HTML_Pixels($max = null) {
$this->max = $max;
}
function validate($string, $config, &$context) {
$string = trim($string);
@@ -26,11 +32,18 @@ class HTMLPurifier_AttrDef_HTML_Pixels extends HTMLPurifier_AttrDef
// crash operating systems, see <http://ha.ckers.org/imagecrash.html>
// WARNING, above link WILL crash you if you're using Windows
if ($int > 1200) return '1200';
if ($this->max !== null && $int > $this->max) return (string) $this->max;
return (string) $int;
}
function make($string) {
if ($string === '') $max = null;
else $max = (int) $string;
$class = get_class($this);
return new $class($max);
}
}

View File

@@ -0,0 +1,32 @@
<?php
/**
* Decorator that, depending on a token, switches between two definitions.
*/
class HTMLPurifier_AttrDef_Switch
{
var $tag;
var $withTag, $withoutTag;
/**
* @param string $tag Tag name to switch upon
* @param HTMLPurifier_AttrDef $with_tag Call if token matches tag
* @param HTMLPurifier_AttrDef $without_tag Call if token doesn't match, or there is no token
*/
function HTMLPurifier_AttrDef_Switch($tag, $with_tag, $without_tag) {
$this->tag = $tag;
$this->withTag = $with_tag;
$this->withoutTag = $without_tag;
}
function validate($string, $config, $context) {
$token = $context->get('CurrentToken', true);
if (!$token || $token->name !== $this->tag) {
return $this->withoutTag->validate($string, $config, $context);
} else {
return $this->withTag->validate($string, $config, $context);
}
}
}

View File

@@ -68,7 +68,7 @@ HTMLPurifier_ConfigSchema::define(
class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
{
var $parser, $percentEncoder;
var $parser;
var $embedsResource;
/**
@@ -76,7 +76,6 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
*/
function HTMLPurifier_AttrDef_URI($embeds_resource = false) {
$this->parser = new HTMLPurifier_URIParser();
$this->percentEncoder = new HTMLPurifier_PercentEncoder();
$this->embedsResource = (bool) $embeds_resource;
}
@@ -84,9 +83,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
if ($config->get('URI', 'Disable')) return false;
// initial operations
$uri = $this->parseCDATA($uri);
$uri = $this->percentEncoder->normalize($uri);
// parse the URI
$uri = $this->parser->parse($uri);
@@ -122,13 +119,6 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef
$context->destroy('EmbeddedURI');
if (!$ok) return false;
// munge scheme off if necessary (this must be last)
if (!is_null($uri->scheme) && is_null($uri->host)) {
if ($uri_def->defaultScheme == $uri->scheme) {
$uri->scheme = null;
}
}
// back to string
$result = $uri->toString();

View File

@@ -40,11 +40,23 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
$ipv4 = $this->ipv4->validate($string, $config, $context);
if ($ipv4 !== false) return $ipv4;
// validate a domain name here, do filtering, etc etc etc
// A regular domain name.
// We could use this, but it would break I18N domain names
//$match = preg_match('/^[a-z0-9][\w\-\.]*[a-z0-9]$/i', $string);
//if (!$match) return false;
// This breaks I18N domain names, but we don't have proper IRI support,
// so force users to insert Punycode. If there's complaining we'll
// try to fix things into an international friendly form.
// The productions describing this are:
$a = '[a-z]'; // alpha
$an = '[a-z0-9]'; // alphanum
$and = '[a-z0-9-]'; // alphanum | "-"
// domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
$domainlabel = "$an($and*$an)?";
// toplabel = alpha | alpha *( alphanum | "-" ) alphanum
$toplabel = "$a($and*$an)?";
// hostname = *( domainlabel "." ) toplabel [ "." ]
$match = preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string);
if (!$match) return false;
return $string;
}

View File

@@ -40,8 +40,8 @@ class HTMLPurifier_AttrValidator
// DEFINITION CALL
$d_defs = $definition->info_global_attr;
// reference attributes for easy manipulation
$attr =& $token->attr;
// don't update token until the very end, to ensure an atomic update
$attr = $token->attr;
// do global transformations (pre)
// nothing currently utilizes this
@@ -136,6 +136,8 @@ class HTMLPurifier_AttrValidator
if ($e && ($attr != $o)) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr);
}
$token->attr = $attr;
// destroy CurrentToken if we made it ourselves
if (!$current_token) $context->destroy('CurrentToken');

View File

@@ -7,6 +7,7 @@ require_once 'HTMLPurifier/AttrDef/CSS/BackgroundPosition.php';
require_once 'HTMLPurifier/AttrDef/CSS/Border.php';
require_once 'HTMLPurifier/AttrDef/CSS/Color.php';
require_once 'HTMLPurifier/AttrDef/CSS/Composite.php';
require_once 'HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php';
require_once 'HTMLPurifier/AttrDef/CSS/Font.php';
require_once 'HTMLPurifier/AttrDef/CSS/FontFamily.php';
require_once 'HTMLPurifier/AttrDef/CSS/Length.php';
@@ -16,6 +17,7 @@ require_once 'HTMLPurifier/AttrDef/CSS/Percentage.php';
require_once 'HTMLPurifier/AttrDef/CSS/TextDecoration.php';
require_once 'HTMLPurifier/AttrDef/CSS/URI.php';
require_once 'HTMLPurifier/AttrDef/Enum.php';
require_once 'HTMLPurifier/AttrDef/Switch.php';
HTMLPurifier_ConfigSchema::define(
'CSS', 'DefinitionRev', 1, 'int', '
@@ -26,6 +28,20 @@ HTMLPurifier_ConfigSchema::define(
</p>
');
HTMLPurifier_ConfigSchema::define(
'CSS', 'MaxImgLength', '1200px', 'string/null', '
<p>
This parameter sets the maximum allowed length on <code>img</code> tags,
effectively the <code>width</code> and <code>height</code> properties.
Only absolute units of measurement (in, pt, pc, mm, cm) and pixels (px) are allowed. This is
in place to prevent imagecrash attacks, disable with null at your own risk.
This directive is similar to %HTML.MaxImgLength, and both should be
concurrently edited, although there are
subtle differences in the input format (the CSS max is a number with
a unit).
</p>
');
/**
* Defines allowed CSS attributes and what their values are.
* @see HTMLPurifier_HTMLDefinition
@@ -116,7 +132,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
$this->info['border-left-width'] =
$this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
new HTMLPurifier_AttrDef_CSS_Length(true) //disallow negative
new HTMLPurifier_AttrDef_CSS_Length('0') //disallow negative
));
$this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width);
@@ -142,7 +158,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
$this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_Enum(array('normal')),
new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives
new HTMLPurifier_AttrDef_CSS_Length(true),
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true)
));
@@ -164,7 +180,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
$this->info['padding-bottom'] =
$this->info['padding-left'] =
$this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_CSS_Length(true),
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true)
));
@@ -175,13 +191,25 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
new HTMLPurifier_AttrDef_CSS_Percentage()
));
$this->info['width'] =
$this->info['height'] =
new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_CSS_Length(true),
$trusted_wh = new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true),
new HTMLPurifier_AttrDef_Enum(array('auto'))
));
$max = $config->get('CSS', 'MaxImgLength');
$this->info['width'] =
$this->info['height'] =
$max === null ?
$trusted_wh :
new HTMLPurifier_AttrDef_Switch('img',
// For img tags:
new HTMLPurifier_AttrDef_CSS_Composite(array(
new HTMLPurifier_AttrDef_CSS_Length('0', $max),
new HTMLPurifier_AttrDef_Enum(array('auto'))
)),
// For everyone else:
$trusted_wh
);
$this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();

View File

@@ -42,7 +42,7 @@ class HTMLPurifier_Config
/**
* HTML Purifier's version
*/
var $version = '2.1.3';
var $version = '2.1.5';
/**
* Two-level associative array of configuration directives

View File

@@ -120,6 +120,9 @@ class HTMLPurifier_DefinitionCache
/**
* Clears all expired (older version or revision) objects from cache
* @note Be carefuly implementing this method as flush. Flush must
* not interfere with other Definition types, and cleanup()
* should not be repeatedly called by userland code.
*/
function cleanup($config) {
trigger_error('Cannot call abstract method', E_USER_ERROR);

View File

@@ -1,6 +1,7 @@
<?php
require_once 'HTMLPurifier/DefinitionCache.php';
require_once 'HTMLPurifier/DefinitionCache/Serializer.php';
HTMLPurifier_ConfigSchema::define(
'Cache', 'DefinitionImpl', 'Serializer', 'string/null', '
@@ -10,10 +11,6 @@ to disable caching (not recommended, as you will see a definite
performance degradation). This directive has been available since 2.0.0.
');
HTMLPurifier_ConfigSchema::defineAllowedValues(
'Cache', 'DefinitionImpl', array('Serializer')
);
HTMLPurifier_ConfigSchema::defineAlias(
'Core', 'DefinitionCache',
'Cache', 'DefinitionImpl'
@@ -27,6 +24,7 @@ class HTMLPurifier_DefinitionCacheFactory
{
var $caches = array('Serializer' => array());
var $implementations = array();
var $decorators = array();
/**
@@ -51,14 +49,21 @@ class HTMLPurifier_DefinitionCacheFactory
return $instance;
}
/**
* Registers a new definition cache object
* @param $short Short name of cache object, for reference
* @param $long Full class name of cache object, for construction
*/
function register($short, $long) {
$this->implementations[$short] = $long;
}
/**
* Factory method that creates a cache object based on configuration
* @param $name Name of definitions handled by cache
* @param $config Instance of HTMLPurifier_Config
*/
function &create($type, $config) {
// only one implementation as for right now, $config will
// be used to determine implementation
$method = $config->get('Cache', 'DefinitionImpl');
if ($method === null) {
$null = new HTMLPurifier_DefinitionCache_Null($type);
@@ -67,7 +72,17 @@ class HTMLPurifier_DefinitionCacheFactory
if (!empty($this->caches[$method][$type])) {
return $this->caches[$method][$type];
}
$cache = new HTMLPurifier_DefinitionCache_Serializer($type);
if (
isset($this->implementations[$method]) &&
class_exists($class = $this->implementations[$method])
) {
$cache = new $class($type);
} else {
if ($method != 'Serializer') {
trigger_error("Unrecognized DefinitionCache $method, using Serializer instead", E_USER_WARNING);
}
$cache = new HTMLPurifier_DefinitionCache_Serializer($type);
}
foreach ($this->decorators as $decorator) {
$new_cache = $decorator->decorate($cache);
// prevent infinite recursion in PHP 4

View File

@@ -62,6 +62,12 @@ class HTMLPurifier_Encoder
trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR);
}
/**
* Error-handler that mutes errors, alternative to shut-up operator.
*/
function muteErrorHandler() {}
/**
/**
* Cleans a UTF-8 string for well-formedness and SGML validity
*
@@ -90,26 +96,13 @@ class HTMLPurifier_Encoder
*/
function cleanUTF8($str, $force_php = false) {
static $non_sgml_chars = array();
if (empty($non_sgml_chars)) {
for ($i = 0; $i <= 31; $i++) {
// non-SGML ASCII chars
// save \r, \t and \n
if ($i == 9 || $i == 13 || $i == 10) continue;
$non_sgml_chars[chr($i)] = '';
}
for ($i = 127; $i <= 159; $i++) {
$non_sgml_chars[HTMLPurifier_Encoder::unichr($i)] = '';
}
}
static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv');
if ($iconv && !$force_php) {
// do the shortcut way
$str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
return strtr($str, $non_sgml_chars);
// UTF-8 validity is checked since PHP 4.3.5
// This is an optimization: if the string is already valid UTF-8, no
// need to do PHP stuff. 99% of the time, this will be the case.
// The regexp matches the XML char production, as well as well as excluding
// non-SGML codepoints U+007F to U+009F
if (preg_match('/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du', $str)) {
return $str;
}
$mState = 0; // cached expected number of octets after the current octet
@@ -220,7 +213,17 @@ class HTMLPurifier_Encoder
) {
} elseif (0xFEFF != $mUcs4 && // omit BOM
!($mUcs4 >= 128 && $mUcs4 <= 159) // omit non-SGML
// check for valid Char unicode codepoints
(
0x9 == $mUcs4 ||
0xA == $mUcs4 ||
0xD == $mUcs4 ||
(0x20 <= $mUcs4 && 0x7E >= $mUcs4) ||
// 7F-9F is not strictly prohibited by XML,
// but it is non-SGML, and thus we don't allow it
(0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) ||
(0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4)
)
) {
$out .= $char;
}
@@ -313,14 +316,23 @@ class HTMLPurifier_Encoder
* @static
*/
function convertToUTF8($str, $config, &$context) {
static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv');
$encoding = $config->get('Core', 'Encoding');
if ($encoding === 'utf-8') return $str;
static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv');
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
return @iconv($encoding, 'utf-8//IGNORE', $str);
$str = iconv($encoding, 'utf-8//IGNORE', $str);
// If the string is bjorked by Shift_JIS or a similar encoding
// that doesn't support all of ASCII, convert the naughty
// characters to their true byte-wise ASCII/UTF-8 equivalents.
$str = strtr($str, HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding));
restore_error_handler();
return $str;
} elseif ($encoding === 'iso-8859-1') {
return @utf8_encode($str);
$str = utf8_encode($str);
restore_error_handler();
return $str;
}
trigger_error('Encoding not supported', E_USER_ERROR);
}
@@ -332,17 +344,31 @@ class HTMLPurifier_Encoder
* characters being omitted.
*/
function convertFromUTF8($str, $config, &$context) {
static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv');
$encoding = $config->get('Core', 'Encoding');
if ($encoding === 'utf-8') return $str;
if ($config->get('Core', 'EscapeNonASCIICharacters')) {
static $iconv = null;
if ($iconv === null) $iconv = function_exists('iconv');
if ($escape = $config->get('Core', 'EscapeNonASCIICharacters')) {
$str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str);
}
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
if ($iconv && !$config->get('Test', 'ForceNoIconv')) {
return @iconv('utf-8', $encoding . '//IGNORE', $str);
// Undo our previous fix in convertToUTF8, otherwise iconv will barf
$ascii_fix = HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding);
if (!$escape && !empty($ascii_fix)) {
$clear_fix = array();
foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = '';
$str = strtr($str, $clear_fix);
}
$str = strtr($str, array_flip($ascii_fix));
// Normal stuff
$str = iconv('utf-8', $encoding . '//IGNORE', $str);
restore_error_handler();
return $str;
} elseif ($encoding === 'iso-8859-1') {
return @utf8_decode($str);
$str = utf8_decode($str);
restore_error_handler();
return $str;
}
trigger_error('Encoding not supported', E_USER_ERROR);
}
@@ -395,6 +421,47 @@ class HTMLPurifier_Encoder
return $result;
}
/**
* This expensive function tests whether or not a given character
* encoding supports ASCII. 7/8-bit encodings like Shift_JIS will
* fail this test, and require special processing. Variable width
* encodings shouldn't ever fail.
*
* @param string $encoding Encoding name to test, as per iconv format
* @param bool $bypass Whether or not to bypass the precompiled arrays.
* @return Array of UTF-8 characters to their corresponding ASCII,
* which can be used to "undo" any overzealous iconv action.
*/
function testEncodingSupportsASCII($encoding, $bypass = false) {
static $encodings = array();
if (!$bypass) {
if (isset($encodings[$encoding])) return $encodings[$encoding];
$lenc = strtolower($encoding);
switch ($lenc) {
case 'shift_jis':
return array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~');
case 'johab':
return array("\xE2\x82\xA9" => '\\');
}
if (strpos($lenc, 'iso-8859-') === 0) return array();
}
$ret = array();
set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
if (iconv('UTF-8', $encoding, 'a') === false) return false;
for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
$c = chr($i);
if (iconv('UTF-8', "$encoding//IGNORE", $c) === '') {
// Reverse engineer: what's the UTF-8 equiv of this byte
// sequence? This assumes that there's no variable width
// encoding that doesn't support ASCII.
$ret[iconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
}
}
restore_error_handler();
$encodings[$encoding] = $ret;
return $ret;
}
}

View File

@@ -222,6 +222,8 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
/**
* Adds a custom attribute to a pre-existing element
* @note This is strictly convenience, and does not have a corresponding
* method in HTMLPurifier_HTMLModule
* @param $element_name String element name to add attribute to
* @param $attr_name String name of attribute
* @param $def Attribute definition, can be string or object, see
@@ -229,7 +231,11 @@ class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
*/
function addAttribute($element_name, $attr_name, $def) {
$module =& $this->getAnonymousModule();
$element =& $module->addBlankElement($element_name);
if (!isset($module->info[$element_name])) {
$element =& $module->addBlankElement($element_name);
} else {
$element =& $module->info[$element_name];
}
$element->attr[$attr_name] = $def;
}

View File

@@ -219,5 +219,14 @@ class HTMLPurifier_HTMLModule
}
return $ret;
}
/**
* Lazy load construction of the module after determining whether
* or not it's needed, and also when a finalized configuration object
* is available.
* @param $config Instance of HTMLPurifier_Config
*/
function setup($config) {}
}

View File

@@ -15,7 +15,7 @@ class HTMLPurifier_HTMLModule_Bdo extends HTMLPurifier_HTMLModule
'I18N' => array('dir' => false)
);
function HTMLPurifier_HTMLModule_Bdo() {
function setup($config) {
$bdo =& $this->addElement(
'bdo', true, 'Inline', 'Inline', array('Core', 'Lang'),
array(

View File

@@ -12,7 +12,7 @@ class HTMLPurifier_HTMLModule_Edit extends HTMLPurifier_HTMLModule
var $name = 'Edit';
function HTMLPurifier_HTMLModule_Edit() {
function setup($config) {
$contents = 'Chameleon: #PCDATA | Inline ! #PCDATA | Flow';
$attr = array(
'cite' => 'URI',

View File

@@ -11,7 +11,7 @@ class HTMLPurifier_HTMLModule_Hypertext extends HTMLPurifier_HTMLModule
var $name = 'Hypertext';
function HTMLPurifier_HTMLModule_Hypertext() {
function setup($config) {
$a =& $this->addElement(
'a', true, 'Inline', 'Inline', 'Common',
array(

View File

@@ -5,6 +5,18 @@ require_once 'HTMLPurifier/HTMLModule.php';
require_once 'HTMLPurifier/AttrDef/URI.php';
require_once 'HTMLPurifier/AttrTransform/ImgRequired.php';
HTMLPurifier_ConfigSchema::define(
'HTML', 'MaxImgLength', 1200, 'int/null', '
<p>
This directive controls the maximum number of pixels in the width and
height attributes in <code>img</code> tags. This is
in place to prevent imagecrash attacks, disable with null at your own risk.
This directive is similar to %CSS.MaxImgLength, and both should be
concurrently edited, although there are
subtle differences in the input format (the HTML max is an integer).
</p>
');
/**
* XHTML 1.1 Image Module provides basic image embedding.
* @note There is specialized code for removing empty images in
@@ -15,17 +27,26 @@ class HTMLPurifier_HTMLModule_Image extends HTMLPurifier_HTMLModule
var $name = 'Image';
function HTMLPurifier_HTMLModule_Image() {
function setup($config) {
$max = $config->get('HTML', 'MaxImgLength');
$img =& $this->addElement(
'img', true, 'Inline', 'Empty', 'Common',
array(
'alt*' => 'Text',
'height' => 'Length',
// According to the spec, it's Length, but percents can
// be abused, so we allow only Pixels. A trusted module
// could overload this with the real value.
'height' => 'Pixels#' . $max,
'width' => 'Pixels#' . $max,
'longdesc' => 'URI',
'src*' => new HTMLPurifier_AttrDef_URI(true), // embedded
'width' => 'Length'
)
);
if ($max === null || $config->get('HTML', 'Trusted')) {
$img->attr['height'] =
$img->attr['width'] = 'Length';
}
// kind of strange, but splitting things up would be inefficient
$img->attr_transform_pre[] =
$img->attr_transform_post[] =

View File

@@ -25,7 +25,7 @@ class HTMLPurifier_HTMLModule_Legacy extends HTMLPurifier_HTMLModule
var $name = 'Legacy';
function HTMLPurifier_HTMLModule_Legacy() {
function setup($config) {
$this->addElement('basefont', true, 'Inline', 'Empty', false, array(
'color' => 'Color',

View File

@@ -21,7 +21,7 @@ class HTMLPurifier_HTMLModule_List extends HTMLPurifier_HTMLModule
var $content_sets = array('Flow' => 'List');
function HTMLPurifier_HTMLModule_List() {
function setup($config) {
$this->addElement('ol', true, 'List', 'Required: li', 'Common');
$this->addElement('ul', true, 'List', 'Required: li', 'Common');
$this->addElement('dl', true, 'List', 'Required: dt | dd', 'Common');

View File

@@ -12,7 +12,7 @@ class HTMLPurifier_HTMLModule_Object extends HTMLPurifier_HTMLModule
var $name = 'Object';
function HTMLPurifier_HTMLModule_Object() {
function setup($config) {
$this->addElement('object', false, 'Inline', 'Optional: #PCDATA | Flow | param', 'Common',
array(

View File

@@ -17,7 +17,7 @@ class HTMLPurifier_HTMLModule_Presentation extends HTMLPurifier_HTMLModule
var $name = 'Presentation';
function HTMLPurifier_HTMLModule_Presentation() {
function setup($config) {
$this->addElement('b', true, 'Inline', 'Inline', 'Common');
$this->addElement('big', true, 'Inline', 'Inline', 'Common');
$this->addElement('hr', true, 'Block', 'Empty', 'Common');

View File

@@ -11,7 +11,7 @@ class HTMLPurifier_HTMLModule_Ruby extends HTMLPurifier_HTMLModule
var $name = 'Ruby';
function HTMLPurifier_HTMLModule_Ruby() {
function setup($config) {
$this->addElement('ruby', true, 'Inline',
'Custom: ((rb, (rt | (rp, rt, rp))) | (rbc, rtc, rtc?))',
'Common');

View File

@@ -32,7 +32,7 @@ class HTMLPurifier_HTMLModule_Scripting extends HTMLPurifier_HTMLModule
var $elements = array('script', 'noscript');
var $content_sets = array('Block' => 'script | noscript', 'Inline' => 'script | noscript');
function HTMLPurifier_HTMLModule_Scripting() {
function setup($config) {
// TODO: create custom child-definition for noscript that
// auto-wraps stray #PCDATA in a similar manner to
// blockquote's custom definition (we would use it but

View File

@@ -18,7 +18,7 @@ class HTMLPurifier_HTMLModule_StyleAttribute extends HTMLPurifier_HTMLModule
'Core' => array(0 => array('Style'))
);
function HTMLPurifier_HTMLModule_StyleAttribute() {
function setup($config) {
$this->attr_collections['Style']['style'] = new HTMLPurifier_AttrDef_CSS();
}

View File

@@ -11,7 +11,7 @@ class HTMLPurifier_HTMLModule_Tables extends HTMLPurifier_HTMLModule
var $name = 'Tables';
function HTMLPurifier_HTMLModule_Tables() {
function setup($config) {
$this->addElement('caption', true, false, 'Inline', 'Common');

View File

@@ -10,7 +10,7 @@ class HTMLPurifier_HTMLModule_Target extends HTMLPurifier_HTMLModule
var $name = 'Target';
function HTMLPurifier_HTMLModule_Target() {
function setup($config) {
$elements = array('a');
foreach ($elements as $name) {
$e =& $this->addBlankElement($name);

View File

@@ -22,7 +22,7 @@ class HTMLPurifier_HTMLModule_Text extends HTMLPurifier_HTMLModule
'Flow' => 'Heading | Block | Inline'
);
function HTMLPurifier_HTMLModule_Text() {
function setup($config) {
// Inline Phrasal -------------------------------------------------
$this->addElement('abbr', true, 'Inline', 'Inline', 'Common');

View File

@@ -70,7 +70,7 @@ class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
* @todo Wildcard matching and error reporting when an added or
* subtracted fix has no effect.
*/
function construct($config) {
function setup($config) {
// create fixes, initialize fixesForLevel
$fixes = $this->makeFixes();

View File

@@ -342,13 +342,12 @@ class HTMLPurifier_HTMLModuleManager
foreach ($modules as $module) {
$this->processModule($module);
$this->modules[$module]->setup($config);
}
foreach ($this->doctype->tidyModules as $module) {
$this->processModule($module);
if (method_exists($this->modules[$module], 'construct')) {
$this->modules[$module]->construct($config);
}
$this->modules[$module]->setup($config);
}
// setup lookup table based on all valid modules

View File

@@ -28,9 +28,9 @@ class HTMLPurifier_IDAccumulator
* @static
*/
function build($config, &$context) {
$id_accumulator = new HTMLPurifier_IDAccumulator();
$id_accumulator->load($config->get('Attr', 'IDBlacklist'));
return $id_accumulator;
$acc = new HTMLPurifier_IDAccumulator();
$acc->load($config->get('Attr', 'IDBlacklist'));
return $acc;
}
/**

View File

@@ -25,6 +25,13 @@ class HTMLPurifier_Language
*/
var $errorNames = array();
/**
* True if no message file was found for this language, so English
* is being used instead. Check this if you'd like to notify the
* user that they've used a non-supported language.
*/
var $error = false;
/**
* Has the language object been loaded yet?
* @private

View File

@@ -0,0 +1,11 @@
<?php
// private language message file for unit testing purposes
// this language file has no class associated with it
$fallback = 'en';
$messages = array(
'HTMLPurifier' => 'HTML Purifier XNone'
);

View File

@@ -16,6 +16,7 @@ This directive has been available since 2.0.0.
* caching and fallbacks.
* @note Thanks to MediaWiki for the general logic, although this version
* has been entirely rewritten
* @todo Serialized cache for languages
*/
class HTMLPurifier_LanguageFactory
{
@@ -89,40 +90,42 @@ class HTMLPurifier_LanguageFactory
* Creates a language object, handles class fallbacks
* @param $config Instance of HTMLPurifier_Config
* @param $context Instance of HTMLPurifier_Context
* @param $code Code to override configuration with. Private parameter.
*/
function create($config, &$context) {
function create($config, &$context, $code = false) {
// validate language code
$code = $this->validator->validate(
$config->get('Core', 'Language'), $config, $context
);
if ($code === false) {
$code = $this->validator->validate(
$config->get('Core', 'Language'), $config, $context
);
} else {
$code = $this->validator->validate($code, $config, $context);
}
if ($code === false) $code = 'en'; // malformed code becomes English
$pcode = str_replace('-', '_', $code); // make valid PHP classname
static $depth = 0; // recursion protection
if ($code == 'en') {
$class = 'HTMLPurifier_Language';
$file = $this->dir . '/Language.php';
$lang = new HTMLPurifier_Language($config, $context);
} else {
$class = 'HTMLPurifier_Language_' . $pcode;
$file = $this->dir . '/Language/classes/' . $code . '.php';
// PHP5/APC deps bug workaround can go here
// you can bypass the conditional include by loading the
// file yourself
if (file_exists($file) && !class_exists($class)) {
include_once $file;
}
}
if (!class_exists($class)) {
// go fallback
$fallback = HTMLPurifier_LanguageFactory::getFallbackFor($code);
$depth++;
$lang = HTMLPurifier_LanguageFactory::factory( $fallback );
$depth--;
} else {
$lang = new $class($config, $context);
if (file_exists($file)) {
include $file;
$lang = new $class($config, $context);
} else {
// Go fallback
$raw_fallback = $this->getFallbackFor($code);
$fallback = $raw_fallback ? $raw_fallback : 'en';
$depth++;
$lang = $this->create($config, $context, $fallback);
if (!$raw_fallback) {
$lang->error = true;
}
$depth--;
}
}
$lang->code = $code;

View File

@@ -0,0 +1,111 @@
<?php
/**
* Represents a measurable length, with a string numeric magnitude
* and a unit. This object is immutable.
*/
class HTMLPurifier_Length
{
/**
* String numeric magnitude.
*/
var $n;
/**
* String unit. False is permitted if $n = 0.
*/
var $unit;
/**
* Whether or not this length is valid. Null if not calculated yet.
*/
var $isValid;
/*
* @param number $n Magnitude
* @param string $u Unit
*/
function HTMLPurifier_Length($n = '0', $u = false) {
$this->n = (string) $n;
$this->unit = $u !== false ? (string) $u : false;
}
/**
* @param string $s Unit string, like '2em' or '3.4in'
* @warning Does not perform validation.
*/
function make($s) {
if (is_a($s, 'HTMLPurifier_Length')) return $s;
$n_length = strspn($s, '1234567890.+-');
$n = substr($s, 0, $n_length);
$unit = substr($s, $n_length);
if ($unit === '') $unit = false;
return new HTMLPurifier_Length($n, $unit);
}
/**
* Validates the number and unit.
*/
function validate() {
// Special case:
static $allowedUnits = array(
'em' => true, 'ex' => true, 'px' => true, 'in' => true,
'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true
);
if ($this->n === '+0' || $this->n === '-0') $this->n = '0';
if ($this->n === '0' && $this->unit === false) return true;
if (!ctype_lower($this->unit)) $this->unit = strtolower($this->unit);
if (!isset($allowedUnits[$this->unit])) return false;
// Hack:
$def = new HTMLPurifier_AttrDef_CSS_Number();
$a = false; // hack hack
$result = $def->validate($this->n, $a, $a);
if ($result === false) return false;
$this->n = $result;
return true;
}
/**
* Returns string representation of number.
*/
function toString() {
if (!$this->isValid()) return false;
return $this->n . $this->unit;
}
/**
* Retrieves string numeric magnitude.
*/
function getN() {return $this->n;}
/**
* Retrieves string unit.
*/
function getUnit() {return $this->unit;}
/**
* Returns true if this length unit is valid.
*/
function isValid() {
if ($this->isValid === null) $this->isValid = $this->validate();
return $this->isValid;
}
/**
* Compares two lengths, and returns 1 if greater, -1 if less and 0 if equal.
* @warning If both values are too large or small, this calculation will
* not work properly
*/
function compareTo($l) {
if ($l === false) return false;
if ($l->unit !== $this->unit) {
$converter = new HTMLPurifier_UnitConverter();
$l = $converter->convert($l, $this->unit);
if ($l === false) return false;
}
return $this->n - $l->n;
}
}

View File

@@ -90,10 +90,27 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
$tokens[] = $this->factory->createText($node->data);
return;
} elseif ($node->nodeType === XML_CDATA_SECTION_NODE) {
// undo DOM's special treatment of <script> tags
$tokens[] = $this->factory->createText($this->parseData($node->data));
// undo libxml's special treatment of <script> and <style> tags
$last = end($tokens);
$data = $node->data;
// (note $node->tagname is already normalized)
if ($last instanceof HTMLPurifier_Token_Start && $last->name == 'script') {
$new_data = trim($data);
if (substr($new_data, 0, 4) === '<!--') {
$data = substr($new_data, 4);
if (substr($data, -3) === '-->') {
$data = substr($data, 0, -3);
} else {
// Highly suspicious! Not sure what to do...
}
}
}
$tokens[] = $this->factory->createText($this->parseData($data));
return;
} elseif ($node->nodeType === XML_COMMENT_NODE) {
// this is code is only invoked for comments in script/style in versions
// of libxml pre-2.6.28 (regular comments, of course, are still
// handled regularly)
$tokens[] = $this->factory->createComment($node->data);
return;
} elseif (

View File

@@ -168,7 +168,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
// Check if it's a comment
if (
substr($segment, 0, 3) === '!--'
strncmp('!--', $segment, 3) === 0
) {
// re-determine segment length, looking for -->
$position_comment_end = strpos($html, '-->', $cursor);
@@ -184,12 +184,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
}
$strlen_segment = $position_comment_end - $cursor;
$segment = substr($html, $cursor, $strlen_segment);
$token = new
HTMLPurifier_Token_Comment(
substr(
$segment, 3, $strlen_segment - 3
)
);
$token = new HTMLPurifier_Token_Comment(substr($segment, 3));
if ($maintain_line_numbers) {
$token->line = $current_line;
$current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);

View File

@@ -2,12 +2,68 @@
/**
* Class that handles operations involving percent-encoding in URIs.
*
* @warning
* Be careful when reusing instances of PercentEncoder. The object
* you use for normalize() SHOULD NOT be used for encode(), or
* vice-versa.
*/
class HTMLPurifier_PercentEncoder
{
/**
* Fix up percent-encoding by decoding unreserved characters and normalizing
* Reserved characters to preserve when using encode().
*/
var $preserve = array();
/**
* String of characters that should be preserved while using encode().
*/
function HTMLPurifier_PercentEncoder($preserve = false) {
// unreserved letters, ought to const-ify
for ($i = 48; $i <= 57; $i++) $this->preserve[$i] = true; // digits
for ($i = 65; $i <= 90; $i++) $this->preserve[$i] = true; // upper-case
for ($i = 97; $i <= 122; $i++) $this->preserve[$i] = true; // lower-case
$this->preserve[45] = true; // Dash -
$this->preserve[46] = true; // Period .
$this->preserve[95] = true; // Underscore _
$this->preserve[126]= true; // Tilde ~
// extra letters not to escape
if ($preserve !== false) {
for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {
$this->preserve[ord($preserve[$i])] = true;
}
}
}
/**
* Our replacement for urlencode, it encodes all non-reserved characters,
* as well as any extra characters that were instructed to be preserved.
* @note
* Assumes that the string has already been normalized, making any
* and all percent escape sequences valid. Percents will not be
* re-escaped, regardless of their status in $preserve
* @param $string String to be encoded
* @return Encoded string.
*/
function encode($string) {
$ret = '';
for ($i = 0, $c = strlen($string); $i < $c; $i++) {
if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])]) ) {
$ret .= '%' . sprintf('%02X', $int);
} else {
$ret .= $string[$i];
}
}
return $ret;
}
/**
* Fix up percent-encoding by decoding unreserved characters and normalizing.
* @warning This function is affected by $preserve, even though the
* usual desired behavior is for this not to preserve those
* characters. Be careful when reusing instances of PercentEncoder!
* @param $string String to normalize
*/
function normalize($string) {
@@ -27,12 +83,7 @@ class HTMLPurifier_PercentEncoder
continue;
}
$int = hexdec($encoding);
if (
($int >= 48 && $int <= 57) || // digits
($int >= 65 && $int <= 90) || // uppercase letters
($int >= 97 && $int <= 122) || // lowercase letters
$int == 126 || $int == 45 || $int == 46 || $int == 95 // ~-._
) {
if (isset($this->preserve[$int])) {
$ret .= chr($int) . $text;
continue;
}

View File

@@ -158,10 +158,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// the parent
if (!isset($parent_info->child->elements[$token->name])) {
if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
// close the parent, then append the token
// close the parent, then re-loop to reprocess token
$result[] = new HTMLPurifier_Token_End($parent->name);
$result[] = $token;
$this->currentNesting[] = $token;
$this->inputIndex--;
continue;
}

View File

@@ -4,7 +4,12 @@ require_once 'HTMLPurifier/URIParser.php';
require_once 'HTMLPurifier/URIFilter.php';
/**
* HTML Purifier's internal representation of a URI
* HTML Purifier's internal representation of a URI.
* @note
* Internal data-structures are completely escaped. If the data needs
* to be used in a non-URI context (which is very unlikely), be sure
* to decode it first. The URI may not necessarily be well-formed until
* validate() is called.
*/
class HTMLPurifier_URI
{
@@ -52,13 +57,27 @@ class HTMLPurifier_URI
}
/**
* Generic validation method applicable for all schemes
* Generic validation method applicable for all schemes. May modify
* this URI in order to get it into a compliant form.
* @param $config Instance of HTMLPurifier_Config
* @param $context Instance of HTMLPurifier_Context
* @return True if validation/filtering succeeds, false if failure
*/
function validate($config, &$context) {
// ABNF definitions from RFC 3986
$chars_sub_delims = '!$&\'()*+,;=';
$chars_gen_delims = ':/?#[]@';
$chars_pchar = $chars_sub_delims . ':@';
// validate scheme (MUST BE FIRST!)
if (!is_null($this->scheme) && is_null($this->host)) {
$def = $config->getDefinition('URI');
if ($def->defaultScheme === $this->scheme) {
$this->scheme = null;
}
}
// validate host
if (!is_null($this->host)) {
$host_def = new HTMLPurifier_AttrDef_URI_Host();
@@ -66,18 +85,62 @@ class HTMLPurifier_URI
if ($this->host === false) $this->host = null;
}
// validate username
if (!is_null($this->userinfo)) {
$encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
$this->userinfo = $encoder->encode($this->userinfo);
}
// validate port
if (!is_null($this->port)) {
if ($this->port < 1 || $this->port > 65535) $this->port = null;
}
// query and fragment are quite simple in terms of definition:
// *( pchar / "/" / "?" ), so define their validation routines
// when we start fixing percent encoding
// validate path
$path_parts = array();
$segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
if (!is_null($this->host)) {
// path-abempty (hier and relative)
$this->path = $segments_encoder->encode($this->path);
} elseif ($this->path !== '' && $this->path[0] === '/') {
// path-absolute (hier and relative)
if (strlen($this->path) >= 2 && $this->path[1] === '/') {
// This shouldn't ever happen!
$this->path = '';
} else {
$this->path = $segments_encoder->encode($this->path);
}
} elseif (!is_null($this->scheme) && $this->path !== '') {
// path-rootless (hier)
// Short circuit evaluation means we don't need to check nz
$this->path = $segments_encoder->encode($this->path);
} elseif (is_null($this->scheme) && $this->path !== '') {
// path-noscheme (relative)
// (once again, not checking nz)
$segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
$c = strpos($this->path, '/');
if ($c !== false) {
$this->path =
$segment_nc_encoder->encode(substr($this->path, 0, $c)) .
$segments_encoder->encode(substr($this->path, $c));
} else {
$this->path = $segment_nc_encoder->encode($this->path);
}
} else {
// path-empty (hier and relative)
$this->path = ''; // just to be safe
}
// path gets to be validated against a hodge-podge of rules depending
// on the status of authority and scheme, but it's not that important,
// esp. since it won't be applicable to everyone
// qf = query and fragment
$qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
if (!is_null($this->query)) {
$this->query = $qf_encoder->encode($this->query);
}
if (!is_null($this->fragment)) {
$this->fragment = $qf_encoder->encode($this->fragment);
}
return true;

View File

@@ -4,24 +4,39 @@ require_once 'HTMLPurifier/URI.php';
/**
* Parses a URI into the components and fragment identifier as specified
* by RFC 2396.
* @todo Replace regexps with a native PHP parser
* by RFC 3986.
*/
class HTMLPurifier_URIParser
{
/**
* Parses a URI
* Instance of HTMLPurifier_PercentEncoder to do normalization with.
*/
var $percentEncoder;
function HTMLPurifier_URIParser() {
$this->percentEncoder = new HTMLPurifier_PercentEncoder();
}
/**
* Parses a URI.
* @param $uri string URI to parse
* @return HTMLPurifier_URI representation of URI
* @return HTMLPurifier_URI representation of URI. This representation has
* not been validated yet and may not conform to RFC.
*/
function parse($uri) {
$uri = $this->percentEncoder->normalize($uri);
// Regexp is as per Appendix B.
// Note that ["<>] are an addition to the RFC's recommended
// characters, because they represent external delimeters.
$r_URI = '!'.
'(([^:/?#<>\'"]+):)?'. // 2. Scheme
'(//([^/?#<>\'"]*))?'. // 4. Authority
'([^?#<>\'"]*)'. // 5. Path
'(\?([^#<>\'"]*))?'. // 7. Query
'(#([^<>\'"]*))?'. // 8. Fragment
'(([^:/?#"<>]+):)?'. // 2. Scheme
'(//([^/?#"<>]*))?'. // 4. Authority
'([^?#"<>]*)'. // 5. Path
'(\?([^#"<>]*))?'. // 7. Query
'(#([^"<>]*))?'. // 8. Fragment
'!';
$matches = array();
@@ -38,13 +53,7 @@ class HTMLPurifier_URIParser
// further parse authority
if ($authority !== null) {
// ridiculously inefficient: it's a stacked regex!
$HEXDIG = '[A-Fa-f0-9]';
$unreserved = 'A-Za-z0-9-._~'; // make sure you wrap with []
$sub_delims = '!$&\'()'; // needs []
$pct_encoded = "%$HEXDIG$HEXDIG";
$r_userinfo = "(?:[$unreserved$sub_delims:]|$pct_encoded)*";
$r_authority = "/^(($r_userinfo)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
$r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
$matches = array();
preg_match($r_authority, $authority, $matches);
$userinfo = !empty($matches[1]) ? $matches[2] : null;

View File

@@ -0,0 +1,241 @@
<?php
/**
* Class for converting between different unit-lengths as specified by
* CSS.
*/
class HTMLPurifier_UnitConverter
{
/**
* Minimum bcmath precision for output.
*/
var $outputPrecision;
/**
* Bcmath precision for internal calculations.
*/
var $internalPrecision;
/**
* Whether or not BCMath is available
*/
var $bcmath;
function HTMLPurifier_UnitConverter($output_precision = 4, $internal_precision = 10, $force_no_bcmath = false) {
$this->outputPrecision = $output_precision;
$this->internalPrecision = $internal_precision;
$this->bcmath = !$force_no_bcmath && function_exists('bcmul');
}
/**
* Converts a length object of one unit into another unit.
* @param HTMLPurifier_Length $length
* Instance of HTMLPurifier_Length to convert. You must validate()
* it before passing it here!
* @param string $to_unit
* Unit to convert to.
* @note
* About precision: This conversion function pays very special
* attention to the incoming precision of values and attempts
* to maintain a number of significant figure. Results are
* fairly accurate up to nine digits. Some caveats:
* - If a number is zero-padded as a result of this significant
* figure tracking, the zeroes will be eliminated.
* - If a number contains less than four sigfigs ($outputPrecision)
* and this causes some decimals to be excluded, those
* decimals will be added on.
*/
function convert($length, $to_unit) {
/**
* Units information array. Units are grouped into measuring systems
* (English, Metric), and are assigned an integer representing
* the conversion factor between that unit and the smallest unit in
* the system. Numeric indexes are actually magical constants that
* encode conversion data from one system to the next, with a O(n^2)
* constraint on memory (this is generally not a problem, since
* the number of measuring systems is small.)
*/
static $units = array(
1 => array(
'px' => 3, // This is as per CSS 2.1 and Firefox. Your mileage may vary
'pt' => 4,
'pc' => 48,
'in' => 288,
2 => array('pt', '0.352777778', 'mm'),
),
2 => array(
'mm' => 1,
'cm' => 10,
1 => array('mm', '2.83464567', 'pt'),
),
);
if (!$length->isValid()) return false;
$n = $length->getN();
$unit = $length->getUnit();
if ($n === '0' || $unit === false) {
return new HTMLPurifier_Length('0', false);
}
$state = $dest_state = false;
foreach ($units as $k => $x) {
if (isset($x[$unit])) $state = $k;
if (isset($x[$to_unit])) $dest_state = $k;
}
if (!$state || !$dest_state) return false;
// Some calculations about the initial precision of the number;
// this will be useful when we need to do final rounding.
$sigfigs = $this->getSigFigs($n);
if ($sigfigs < $this->outputPrecision) $sigfigs = $this->outputPrecision;
// Cleanup $n for PHP 4.3.9 and 4.3.10. See http://bugs.php.net/bug.php?id=30726
if (strncmp($n, '-.', 2) === 0) {
$n = '-0.' . substr($n, 2);
}
// BCMath's internal precision deals only with decimals. Use
// our default if the initial number has no decimals, or increase
// it by how ever many decimals, thus, the number of guard digits
// will always be greater than or equal to internalPrecision.
$log = (int) floor(log(abs($n), 10));
$cp = ($log < 0) ? $this->internalPrecision - $log : $this->internalPrecision; // internal precision
for ($i = 0; $i < 2; $i++) {
// Determine what unit IN THIS SYSTEM we need to convert to
if ($dest_state === $state) {
// Simple conversion
$dest_unit = $to_unit;
} else {
// Convert to the smallest unit, pending a system shift
$dest_unit = $units[$state][$dest_state][0];
}
// Do the conversion if necessary
if ($dest_unit !== $unit) {
$factor = $this->div($units[$state][$unit], $units[$state][$dest_unit], $cp);
$n = $this->mul($n, $factor, $cp);
$unit = $dest_unit;
}
// Output was zero, so bail out early. Shouldn't ever happen.
if ($n === '') {
$n = '0';
$unit = $to_unit;
break;
}
// It was a simple conversion, so bail out
if ($dest_state === $state) {
break;
}
if ($i !== 0) {
// Conversion failed! Apparently, the system we forwarded
// to didn't have this unit. This should never happen!
return false;
}
// Pre-condition: $i == 0
// Perform conversion to next system of units
$n = $this->mul($n, $units[$state][$dest_state][1], $cp);
$unit = $units[$state][$dest_state][2];
$state = $dest_state;
// One more loop around to convert the unit in the new system.
}
// Post-condition: $unit == $to_unit
if ($unit !== $to_unit) return false;
// Useful for debugging:
//echo "<pre>n";
//echo "$n\nsigfigs = $sigfigs\nnew_log = $new_log\nlog = $log\nrp = $rp\n</pre>\n";
$n = $this->round($n, $sigfigs);
if (strpos($n, '.') !== false) $n = rtrim($n, '0');
$n = rtrim($n, '.');
return new HTMLPurifier_Length($n, $unit);
}
/**
* Returns the number of significant figures in a string number.
* @param string $n Decimal number
* @return int number of sigfigs
*/
function getSigFigs($n) {
$n = ltrim($n, '0+-');
$dp = strpos($n, '.'); // decimal position
if ($dp === false) {
$sigfigs = strlen(rtrim($n, '0'));
} else {
$sigfigs = strlen(ltrim($n, '0.')); // eliminate extra decimal character
if ($dp !== 0) $sigfigs--;
}
return $sigfigs;
}
/**
* Adds two numbers, using arbitrary precision when available.
*/
function add($s1, $s2, $scale) {
if ($this->bcmath) return bcadd($s1, $s2, $scale);
else return $this->scale($s1 + $s2, $scale);
}
/**
* Multiples two numbers, using arbitrary precision when available.
*/
function mul($s1, $s2, $scale) {
if ($this->bcmath) return bcmul($s1, $s2, $scale);
else return $this->scale($s1 * $s2, $scale);
}
/**
* Divides two numbers, using arbitrary precision when available.
*/
function div($s1, $s2, $scale) {
if ($this->bcmath) return bcdiv($s1, $s2, $scale);
else return $this->scale($s1 / $s2, $scale);
}
/**
* Rounds a number according to the number of sigfigs it should have,
* using arbitrary precision when available.
*/
function round($n, $sigfigs) {
$new_log = (int) floor(log(abs($n), 10)); // Number of digits left of decimal - 1
$rp = $sigfigs - $new_log - 1; // Number of decimal places needed
$neg = $n < 0 ? '-' : ''; // Negative sign
if ($this->bcmath) {
if ($rp >= 0) {
$n = bcadd($n, $neg . '0.' . str_repeat('0', $rp) . '5', $rp + 1);
$n = bcdiv($n, '1', $rp);
} else {
// This algorithm partially depends on the standardized
// form of numbers that comes out of bcmath.
$n = bcadd($n, $neg . '5' . str_repeat('0', $new_log - $sigfigs), 0);
$n = substr($n, 0, $sigfigs + strlen($neg)) . str_repeat('0', $new_log - $sigfigs + 1);
}
return $n;
} else {
return $this->scale(round($n, $sigfigs - $new_log - 1), $rp + 1);
}
}
/**
* Scales a float to $scale digits right of decimal point, like BCMath.
*/
function scale($r, $scale) {
return sprintf('%.' . $scale . 'f', (float) $r);
}
}

View File

@@ -1,30 +0,0 @@
<?php
// Merges in changes from trunk to strict branch
// WORKING COPY MUST BE POINTED TO STRICT BRANCH
if (php_sapi_name() != 'cli') {
echo 'Release script cannot be called from web-browser.';
exit;
}
require 'svn.php';
$svn_info = svn_info('.');
$last_rev = (int) $svn_info['Last Changed Rev'];
$trunk_url = $svn_info['Repository Root'] . '/htmlpurifier/trunk';
echo "Last revision was $last_rev, merging from $last_rev to head.\n";
$merge_cmd = "svn merge -r $last_rev:HEAD $trunk_url .";
$out = explode("\n", shell_exec($merge_cmd));
echo "Conflicted files:\n";
foreach ($out as $line) {
if (empty($line)) continue;
if ($line{0} === 'C' || $line{1} === 'C') echo $line . "\n";
}
$version = trim(file_get_contents('VERSION'));
echo "Resolve conflicts and then commit as 'Release $version, merged in $last_rev to HEAD.'";

20
release2-tag.php Normal file
View File

@@ -0,0 +1,20 @@
<?php
// Tags releases
if (php_sapi_name() != 'cli') {
echo 'Release script cannot be called from web-browser.';
exit;
}
require 'svn.php';
$svn_info = my_svn_info('.');
$version = trim(file_get_contents('VERSION'));
$trunk_url = $svn_info['Repository Root'] . '/htmlpurifier/branches/php4';
$trunk_tag_url = $svn_info['Repository Root'] . '/htmlpurifier/tags/' . $version;
echo "Tagging php4 branch to tags/$version...";
passthru("svn copy --message \"Tag $version release.\" $trunk_url $trunk_tag_url");

View File

@@ -1,25 +0,0 @@
<?php
// Tags releases
if (php_sapi_name() != 'cli') {
echo 'Release script cannot be called from web-browser.';
exit;
}
require 'svn.php';
$svn_info = svn_info('.');
$version = trim(file_get_contents('VERSION'));
$trunk_url = $svn_info['Repository Root'] . '/htmlpurifier/trunk';
$strict_url = $svn_info['Repository Root'] . '/htmlpurifier/branches/strict';
$trunk_tag_url = $svn_info['Repository Root'] . '/htmlpurifier/tags/' . $version;
$strict_tag_url = $svn_info['Repository Root'] . '/htmlpurifier/tags/' . $version . '-strict';
echo "Tagging trunk to tags/$version...";
passthru("svn copy --message \"Tag $version release.\" $trunk_url $trunk_tag_url");
echo "Tagging strict to tags/$version-strict...";
passthru("svn copy --message \"Tag $version-strict release.\" $strict_url $strict_tag_url");

View File

@@ -1,6 +1,6 @@
<?php
function svn_info($dir) {
function my_svn_info($dir) {
$raw = explode("\n", shell_exec("svn info $dir"));
$svn_info = array();
foreach ($raw as $r) {

View File

@@ -14,6 +14,10 @@ class HTMLPurifier_AttrDef_CSS_BackgroundTest extends HTMLPurifier_AttrDefHarnes
$valid = '#333 url(chess.png) repeat fixed 50% top';
$this->assertDef($valid);
$this->assertDef('url("chess.png") #333 50% top repeat fixed', $valid);
$this->assertDef(
'rgb(34, 56, 33) url(chess.png) repeat fixed top',
'rgb(34,56,33) url(chess.png) repeat fixed top'
);
}

View File

@@ -14,6 +14,7 @@ class HTMLPurifier_AttrDef_CSS_BorderTest extends HTMLPurifier_AttrDefHarness
$this->assertDef('thick solid');
$this->assertDef('solid red', 'solid #FF0000');
$this->assertDef('1px solid #000');
$this->assertDef('1px solid rgb(0, 0, 0)', '1px solid rgb(0,0,0)');
}

View File

@@ -11,6 +11,8 @@ class HTMLPurifier_AttrDef_CSS_ColorTest extends HTMLPurifier_AttrDefHarness
$this->def = new HTMLPurifier_AttrDef_CSS_Color();
$this->assertDef('#F00');
$this->assertDef('#fff');
$this->assertDef('#eeeeee');
$this->assertDef('#808080');
$this->assertDef('rgb(255, 0, 0)', 'rgb(255,0,0)'); // rm spaces
$this->assertDef('rgb(100%,0%,0%)');
@@ -27,6 +29,11 @@ class HTMLPurifier_AttrDef_CSS_ColorTest extends HTMLPurifier_AttrDefHarness
// color keywords, of course
$this->assertDef('red', '#FF0000');
// malformed hex declaration
$this->assertDef('808080', '#808080');
$this->assertDef('000000', '#000000');
$this->assertDef('fed', '#fed');
// maybe hex transformations would be another nice feature
// at the very least transform rgb percent to rgb integer

View File

@@ -20,7 +20,21 @@ class HTMLPurifier_AttrDef_CSS_FontFamilyTest extends HTMLPurifier_AttrDefHarnes
$this->assertDef("John's Font", $d);
$this->assertDef($d = "'\xE5\xAE\x8B\xE4\xBD\x93'");
$this->assertDef("\xE5\xAE\x8B\xE4\xBD\x93", $d);
$this->assertDef("'\\','f'", "'\\\\', f");
$this->assertDef("'\\01'", "''");
$this->assertDef("'\\20'", "' '");
$this->assertDef("\\0020", "'\\\\0020'");
$this->assertDef("'\\000045'", "E");
$this->assertDef("','", false);
$this->assertDef("',' foobar','", "' foobar'");
$this->assertDef("'\\27'", "'\''");
$this->assertDef('"\\22"', "'\"'");
$this->assertDef('"\\""', "'\"'");
$this->assertDef('"\'"', "'\\''");
$this->assertDef("'\\000045a'", "Ea");
$this->assertDef("'\\00045 a'", "Ea");
$this->assertDef("'\\00045 a'", "'E a'");
$this->assertDef("'\\\nf'", "f");
}
}

View File

@@ -31,12 +31,20 @@ class HTMLPurifier_AttrDef_CSS_LengthTest extends HTMLPurifier_AttrDefHarness
function testNonNegative() {
$this->def = new HTMLPurifier_AttrDef_CSS_Length(true);
$this->def = new HTMLPurifier_AttrDef_CSS_Length('0');
$this->assertDef('3cm');
$this->assertDef('-3mm', false);
}
function testBounding() {
$this->def = new HTMLPurifier_AttrDef_CSS_Length('-1in', '1in');
$this->assertDef('1cm');
$this->assertDef('-1cm');
$this->assertDef('0');
$this->assertDef('1em', false);
}
}

View File

@@ -10,6 +10,9 @@ class HTMLPurifier_AttrDef_CSS_TextDecorationTest extends HTMLPurifier_AttrDefHa
$this->def = new HTMLPurifier_AttrDef_CSS_TextDecoration();
$this->assertDef('none');
$this->assertDef('none underline', 'underline');
$this->assertDef('underline');
$this->assertDef('overline');
$this->assertDef('line-through overline underline');

View File

@@ -29,7 +29,6 @@ class HTMLPurifier_AttrDef_CSS_URITest extends HTMLPurifier_AttrDefHarness
// escaping
$this->assertDef("url(http://www.example.com/foo,bar\))",
"url(http://www.example.com/foo\,bar\))");
}
}

View File

@@ -107,6 +107,9 @@ class HTMLPurifier_AttrDef_CSSTest extends HTMLPurifier_AttrDefHarness
$this->assertDef(' font-weight : bold; color : #ff0000',
'font-weight:bold;color:#ff0000;');
// case-insensitivity
$this->assertDef('FLOAT:LEFT;', 'float:left;');
}
}

View File

@@ -36,5 +36,12 @@ class HTMLPurifier_AttrDef_HTML_PixelsTest extends HTMLPurifier_AttrDefHarness
}
function test_make() {
$factory = new HTMLPurifier_AttrDef_HTML_Pixels();
$this->def = $factory->make('30');
$this->assertDef('25');
$this->assertDef('35', '30');
}
}

View File

@@ -0,0 +1,34 @@
<?php
require_once 'HTMLPurifier/AttrDef/Switch.php';
class HTMLPurifier_AttrDef_SwitchTest extends HTMLPurifier_AttrDefHarness
{
var $with, $without;
function setUp() {
parent::setUp();
generate_mock_once('HTMLPurifier_AttrDef');
$this->with = new HTMLPurifier_AttrDefMock();
$this->without = new HTMLPurifier_AttrDefMock();
$this->def = new HTMLPurifier_AttrDef_Switch('tag', $this->with, $this->without);
}
function testWith() {
$token = new HTMLPurifier_Token_Start('tag');
$this->context->register('CurrentToken', $token);
$this->with->expectOnce('validate');
$this->with->setReturnValue('validate', 'foo');
$this->assertDef('bar', 'foo');
}
function testWithout() {
$token = new HTMLPurifier_Token_Start('other-tag');
$this->context->register('CurrentToken', $token);
$this->without->expectOnce('validate');
$this->without->setReturnValue('validate', 'foo');
$this->assertDef('bar', 'foo');
}
}

View File

@@ -11,7 +11,7 @@ class HTMLPurifier_AttrDef_TextTest extends HTMLPurifier_AttrDefHarness
$this->def = new HTMLPurifier_AttrDef_Text();
$this->assertDef('This is spiffy text!');
$this->assertDef(" Casual\tCDATA parse\ncheck. ", 'Casual CDATA parsecheck.');
$this->assertDef(" Casual\tCDATA parse\ncheck. ", 'Casual CDATA parse check.');
}

View File

@@ -17,6 +17,27 @@ class HTMLPurifier_AttrDef_URI_HostTest extends HTMLPurifier_AttrDefHarness
$this->assertDef('124.15.6.89'); // IPv4
$this->assertDef('www.google.com'); // reg-name
// more domain name tests
$this->assertDef('test.');
$this->assertDef('sub.test.');
$this->assertDef('.test', false);
$this->assertDef('ff');
$this->assertDef('1f', false);
$this->assertDef('-f', false);
$this->assertDef('f1');
$this->assertDef('f-', false);
$this->assertDef('sub.ff');
$this->assertDef('sub.1f', false);
$this->assertDef('sub.-f', false);
$this->assertDef('sub.f1');
$this->assertDef('sub.f-', false);
$this->assertDef('ff.top');
$this->assertDef('1f.top');
$this->assertDef('-f.top', false);
$this->assertDef('ff.top');
$this->assertDef('f1.top');
$this->assertDef('f-.top', false);
}
}

View File

@@ -33,6 +33,19 @@ class HTMLPurifier_AttrDef_URITest extends HTMLPurifier_AttrDefHarness
);
}
function testPercentEncoding() {
$this->assertDef(
'http:colon:mercenary',
'colon%3Amercenary'
);
}
function testPercentEncodingPreserve() {
$this->assertDef(
'http://www.example.com/abcABC123-_.!~*()\''
);
}
function testEmbeds() {
$this->def = new HTMLPurifier_AttrDef_URI(true);
$this->assertDef('http://sub.example.com/alas?foo=asd');

View File

@@ -12,8 +12,7 @@ class HTMLPurifier_AttrDefTest extends HTMLPurifier_Harness
$this->assertIdentical('', $def->parseCDATA(''));
$this->assertIdentical('', $def->parseCDATA("\t\n\r \t\t"));
$this->assertIdentical('foo', $def->parseCDATA("\t\n\r foo\t\t"));
$this->assertIdentical('ignorelinefeeds', $def->parseCDATA("ignore\nline\nfeeds"));
$this->assertIdentical('translate to space', $def->parseCDATA("translate\rto\tspace"));
$this->assertIdentical('translate to space', $def->parseCDATA("translate\nto\tspace"));
}

View File

@@ -5,13 +5,14 @@ require_once 'HTMLPurifier/DefinitionCacheFactory.php';
class HTMLPurifier_DefinitionCacheFactoryTest extends HTMLPurifier_Harness
{
var $newFactory;
var $factory;
var $oldFactory;
function setup() {
$new = new HTMLPurifier_DefinitionCacheFactory();
parent::setup();
$this->factory = new HTMLPurifier_DefinitionCacheFactory();
$this->oldFactory = HTMLPurifier_DefinitionCacheFactory::instance();
HTMLPurifier_DefinitionCacheFactory::instance($new);
HTMLPurifier_DefinitionCacheFactory::instance($this->factory);
}
function teardown() {
@@ -19,46 +20,52 @@ class HTMLPurifier_DefinitionCacheFactoryTest extends HTMLPurifier_Harness
}
function test_create() {
$config = HTMLPurifier_Config::createDefault();
$factory = HTMLPurifier_DefinitionCacheFactory::instance();
$cache = $factory->create('Test', $config);
$cache = $this->factory->create('Test', $this->config);
$this->assertEqual($cache, new HTMLPurifier_DefinitionCache_Serializer('Test'));
}
function test_create_withDecorator() {
$config = HTMLPurifier_Config::createDefault();
$factory =& HTMLPurifier_DefinitionCacheFactory::instance();
$factory->addDecorator('Memory');
$cache =& $factory->create('Test', $config);
$this->factory->addDecorator('Memory');
$cache = $this->factory->create('Test', $this->config);
$cache_real = new HTMLPurifier_DefinitionCache_Decorator_Memory();
$cache_real = $cache_real->decorate(new HTMLPurifier_DefinitionCache_Serializer('Test'));
$this->assertEqual($cache, $cache_real);
}
function test_create_withDecoratorObject() {
$config = HTMLPurifier_Config::createDefault();
$factory =& HTMLPurifier_DefinitionCacheFactory::instance();
$factory->addDecorator(new HTMLPurifier_DefinitionCache_Decorator_Memory());
$cache =& $factory->create('Test', $config);
$this->factory->addDecorator(new HTMLPurifier_DefinitionCache_Decorator_Memory());
$cache = $this->factory->create('Test', $this->config);
$cache_real = new HTMLPurifier_DefinitionCache_Decorator_Memory();
$cache_real = $cache_real->decorate(new HTMLPurifier_DefinitionCache_Serializer('Test'));
$this->assertEqual($cache, $cache_real);
}
function test_create_recycling() {
$config = HTMLPurifier_Config::createDefault();
$factory =& HTMLPurifier_DefinitionCacheFactory::instance();
$cache =& $factory->create('Test', $config);
$cache2 =& $factory->create('Test', $config);
$cache =& $this->factory->create('Test', $this->config);
$cache2 =& $this->factory->create('Test', $this->config);
$this->assertReference($cache, $cache2);
}
function test_create_invalid() {
$this->config->set('Core', 'DefinitionCache', 'Invalid');
$this->expectError('Unrecognized DefinitionCache Invalid, using Serializer instead');
$cache = $this->factory->create('Test', $this->config);
$this->assertIsA($cache, 'HTMLPurifier_DefinitionCache_Serializer');
}
function test_null() {
$config = HTMLPurifier_Config::create(array('Core.DefinitionCache' => null));
$factory =& HTMLPurifier_DefinitionCacheFactory::instance();
$cache =& $factory->create('Test', $config);
$this->config->set('Core', 'DefinitionCache', null);
$cache = $this->factory->create('Test', $this->config);
$this->assertEqual($cache, new HTMLPurifier_DefinitionCache_Null('Test'));
}
function test_register() {
generate_mock_once('HTMLPurifier_DefinitionCache');
$this->config->set('Core', 'DefinitionCache', 'TestCache');
$this->factory->register('TestCache', $class = 'HTMLPurifier_DefinitionCacheMock');
$cache = $this->factory->create('Test', $this->config);
$this->assertIsA($cache, $class);
}
}

View File

@@ -9,6 +9,7 @@ class HTMLPurifier_EncoderTest extends HTMLPurifier_Harness
function setUp() {
$this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
parent::setUp();
}
function assertCleanUTF8($string, $expect = null) {
@@ -26,93 +27,90 @@ class HTMLPurifier_EncoderTest extends HTMLPurifier_Harness
$this->assertCleanUTF8("\xC2\x80", ''); // two byte invalid SGML
$this->assertCleanUTF8("\xF3\xBF\xBF\xBF"); // valid four byte
$this->assertCleanUTF8("\xDF\xFF", ''); // malformed UTF8
// invalid codepoints
$this->assertCleanUTF8("\xED\xB0\x80", '');
}
function test_convertToUTF8() {
$config = HTMLPurifier_Config::createDefault();
$context = new HTMLPurifier_Context();
function test_convertToUTF8_noConvert() {
// UTF-8 means that we don't touch it
$this->assertIdentical(
HTMLPurifier_Encoder::convertToUTF8("\xF6", $config, $context),
HTMLPurifier_Encoder::convertToUTF8("\xF6", $this->config, $this->context),
"\xF6" // this is invalid
);
$this->assertNoErrors();
$config = HTMLPurifier_Config::create(array(
'Core.Encoding' => 'ISO-8859-1'
));
// Now it gets converted
}
function test_convertToUTF8_iso8859_1() {
$this->config->set('Core', 'Encoding', 'ISO-8859-1');
$this->assertIdentical(
HTMLPurifier_Encoder::convertToUTF8("\xF6", $config, $context),
HTMLPurifier_Encoder::convertToUTF8("\xF6", $this->config, $this->context),
"\xC3\xB6"
);
$config = HTMLPurifier_Config::create(array(
'Core.Encoding' => 'ISO-8859-1',
'Test.ForceNoIconv' => true
));
}
function test_convertToUTF8_withoutIconv() {
$this->config->set('Core', 'Encoding', 'ISO-8859-1');
$this->config->set('Test', 'ForceNoIconv', true);
$this->assertIdentical(
HTMLPurifier_Encoder::convertToUTF8("\xF6", $config, $context),
HTMLPurifier_Encoder::convertToUTF8("\xF6", $this->config, $this->context),
"\xC3\xB6"
);
}
function test_convertFromUTF8() {
$config = HTMLPurifier_Config::createDefault();
$context = new HTMLPurifier_Context();
// zhong-wen
$chinese = "\xE4\xB8\xAD\xE6\x96\x87 (Chinese)";
function getZhongWen() {
return "\xE4\xB8\xAD\xE6\x96\x87 (Chinese)";
}
function test_convertFromUTF8_utf8() {
// UTF-8 means that we don't touch it
$this->assertIdentical(
HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $config, $context),
HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $this->config, $this->context),
"\xC3\xB6"
);
$config = HTMLPurifier_Config::create(array(
'Core.Encoding' => 'ISO-8859-1'
));
// Now it gets converted
}
function test_convertFromUTF8_iso8859_1() {
$this->config->set('Core', 'Encoding', 'ISO-8859-1');
$this->assertIdentical(
HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $config, $context),
HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $this->config, $this->context),
"\xF6"
);
if (function_exists('iconv')) {
// iconv has it's own way
$this->assertIdentical(
HTMLPurifier_Encoder::convertFromUTF8($chinese, $config, $context),
" (Chinese)"
);
}
}
function test_convertFromUTF8_iconvNoChars() {
if (!function_exists('iconv')) return;
$this->config->set('Core', 'Encoding', 'ISO-8859-1');
$this->assertIdentical(
HTMLPurifier_Encoder::convertFromUTF8($this->getZhongWen(), $this->config, $this->context),
" (Chinese)"
);
}
function test_convertFromUTF8_phpNormal() {
// Plain PHP implementation has slightly different behavior
$config = HTMLPurifier_Config::create(array(
'Core.Encoding' => 'ISO-8859-1',
'Test.ForceNoIconv' => true
));
$this->config->set('Core', 'Encoding', 'ISO-8859-1');
$this->config->set('Test', 'ForceNoIconv', true);
$this->assertIdentical(
HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $config, $context),
HTMLPurifier_Encoder::convertFromUTF8("\xC3\xB6", $this->config, $this->context),
"\xF6"
);
}
function test_convertFromUTF8_phpNoChars() {
$this->config->set('Core', 'Encoding', 'ISO-8859-1');
$this->config->set('Test', 'ForceNoIconv', true);
$this->assertIdentical(
HTMLPurifier_Encoder::convertFromUTF8($chinese, $config, $context),
HTMLPurifier_Encoder::convertFromUTF8($this->getZhongWen(), $this->config, $this->context),
"?? (Chinese)"
);
}
function test_convertFromUTF8_withProtection() {
// Preserve the characters!
$config = HTMLPurifier_Config::create(array(
'Core.Encoding' => 'ISO-8859-1',
'Core.EscapeNonASCIICharacters' => true
));
$this->config->set('Core', 'Encoding', 'ISO-8859-1');
$this->config->set('Core', 'EscapeNonASCIICharacters', true);
$this->assertIdentical(
HTMLPurifier_Encoder::convertFromUTF8($chinese, $config, $context),
HTMLPurifier_Encoder::convertFromUTF8($this->getZhongWen(), $this->config, $this->context),
"&#20013;&#25991; (Chinese)"
);
@@ -139,5 +137,39 @@ class HTMLPurifier_EncoderTest extends HTMLPurifier_Harness
}
function assertASCIISupportCheck($enc, $ret) {
$test = HTMLPurifier_Encoder::testEncodingSupportsASCII($enc, true);
if ($test === false) return;
$this->assertIdentical(
HTMLPurifier_Encoder::testEncodingSupportsASCII($enc),
$ret
);
$this->assertIdentical(
HTMLPurifier_Encoder::testEncodingSupportsASCII($enc, true),
$ret
);
}
function test_testEncodingSupportsASCII() {
$this->assertASCIISupportCheck('Shift_JIS', array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~'));
$this->assertASCIISupportCheck('JOHAB', array("\xE2\x82\xA9" => '\\'));
$this->assertASCIISupportCheck('ISO-8859-1', array());
$this->assertASCIISupportCheck('dontexist', array()); // canary
}
function testShiftJIS() {
if (!function_exists('iconv')) return;
$this->config->set('Core', 'Encoding', 'Shift_JIS');
// This actually looks like a Yen, but we're going to treat it differently
$this->assertIdentical(
HTMLPurifier_Encoder::convertFromUTF8('\\~', $this->config, $this->context),
'\\~'
);
$this->assertIdentical(
HTMLPurifier_Encoder::convertToUTF8('\\~', $this->config, $this->context),
'\\~'
);
}
}

View File

@@ -205,6 +205,9 @@ class HTMLPurifier_GeneratorTest extends HTMLPurifier_ComplexHarness
// abort test if tidy isn't loaded
if (!extension_loaded('tidy')) return;
// just don't test; Tidy is exploding on me.
return;
$this->config = HTMLPurifier_Config::createDefault();
$this->config->set('Core', 'TidyFormat', true);
$this->config->set('Output', 'Newline', "\n");

View File

@@ -87,6 +87,22 @@ a[href|title]
}
function test_addAttribute_multiple() {
$config = HTMLPurifier_Config::create(array(
'HTML.DefinitionID' => 'HTMLPurifier_HTMLDefinitionTest->test_addAttribute_multiple'
));
$def =& $config->getHTMLDefinition(true);
$def->addAttribute('span', 'custom', 'Enum#attribute');
$def->addAttribute('span', 'foo', 'Text');
$purifier = new HTMLPurifier($config);
$input = '<span custom="attribute" foo="asdf">Custom!</span>';
$output = $purifier->purify($input);
$this->assertIdentical($input, $output);
}
function test_addElement() {
$config = HTMLPurifier_Config::create(array(

View File

@@ -0,0 +1,57 @@
<?php
require_once 'HTMLPurifier/HTMLModuleHarness.php';
require_once 'HTMLPurifier/HTMLModule/Image.php';
class HTMLPurifier_HTMLModule_ImageTest extends HTMLPurifier_HTMLModuleHarness
{
function testNormal() {
$this->assertResult('<img height="40" width="40" src="" alt="" />');
}
function testLengthTooLarge() {
$this->assertResult(
'<img height="40000" width="40000" src="" alt="" />',
'<img height="1200" width="1200" src="" alt="" />'
);
}
function testLengthPercentage() {
$this->assertResult(
'<img height="100%" width="100%" src="" alt="" />',
'<img src="" alt="" />'
);
}
function testLengthCustomMax() {
$this->config->set('HTML', 'MaxImgLength', 20);
$this->assertResult(
'<img height="30" width="30" src="" alt="" />',
'<img height="20" width="20" src="" alt="" />'
);
}
function testLengthCrashFixDisabled() {
$this->config->set('HTML', 'MaxImgLength', null);
$this->assertResult(
'<img height="100%" width="100%" src="" alt="" />'
);
$this->assertResult(
'<img height="40000" width="40000" src="" alt="" />'
);
}
function testLengthTrusted() {
$this->config->set('HTML', 'Trusted', true);
$this->assertResult(
'<img height="100%" width="100%" src="" alt="" />'
);
$this->assertResult(
'<img height="40000" width="40000" src="" alt="" />'
);
}
}

View File

@@ -40,7 +40,7 @@ class HTMLPurifier_HTMLModule_TidyTest extends HTMLPurifier_Harness
}
function test_construct() {
function test_setup() {
$i = 0; // counter, helps us isolate expectations
@@ -65,7 +65,7 @@ class HTMLPurifier_HTMLModule_TidyTest extends HTMLPurifier_Harness
'HTML.TidyLevel' => 'none'
));
$module->expectAt($i++, 'populate', array(array()));
$module->construct($config);
$module->setup($config);
// basic levels
@@ -76,7 +76,7 @@ class HTMLPurifier_HTMLModule_TidyTest extends HTMLPurifier_Harness
'light-fix-1' => $lf1,
'light-fix-2' => $lf2
)));
$module->construct($config);
$module->setup($config);
$config = HTMLPurifier_Config::create(array(
'HTML.TidyLevel' => 'heavy'
@@ -89,7 +89,7 @@ class HTMLPurifier_HTMLModule_TidyTest extends HTMLPurifier_Harness
'heavy-fix-1' => $hf1,
'heavy-fix-2' => $hf2
)));
$module->construct($config);
$module->setup($config);
// fine grained tuning
@@ -101,7 +101,7 @@ class HTMLPurifier_HTMLModule_TidyTest extends HTMLPurifier_Harness
'light-fix-1' => $lf1,
'medium-fix-1' => $mf1
)));
$module->construct($config);
$module->setup($config);
$config = HTMLPurifier_Config::create(array(
'HTML.TidyLevel' => 'medium',
@@ -111,7 +111,7 @@ class HTMLPurifier_HTMLModule_TidyTest extends HTMLPurifier_Harness
'light-fix-2' => $lf2,
'medium-fix-2' => $mf2
)));
$module->construct($config);
$module->setup($config);
// done

View File

@@ -12,13 +12,24 @@ class HTMLPurifier_Harness extends UnitTestCase
parent::UnitTestCase();
}
var $config, $context;
var $config, $context, $purifier;
/**
* Generates easily accessible default config/context
* Generates easily accessible default config/context, as well as
* a convenience purifier for integration testing.
*/
function setUp() {
list($this->config, $this->context) = $this->createCommon();
$this->purifier = new HTMLPurifier();
}
/**
* Asserts a purification. Good for integration testing.
*/
function assertPurification($input, $expect = null) {
if ($expect === null) $expect = $input;
$result = $this->purifier->purify($input, $this->config);
$this->assertIdentical($expect, $result);
}
/**

View File

@@ -32,7 +32,10 @@ class HTMLPurifier_IDAccumulatorTest extends HTMLPurifier_Harness
function testBuild() {
$this->config->set('Attr', 'IDBlacklist', array('foo'));
$accumulator = HTMLPurifier_IDAccumulator::build($this->config, $this->context);
// For some reason, doing the static call here results in a segfault
// for early versions of PHP 5.0.x
$acc = new HTMLPurifier_IDAccumulator();
$accumulator = $acc->build($this->config, $this->context);
$this->assertTrue( isset($accumulator->ids['foo']) );
}

View File

@@ -5,13 +5,20 @@ require_once 'HTMLPurifier/LanguageFactory.php';
class HTMLPurifier_LanguageFactoryTest extends HTMLPurifier_Harness
{
/**
* Protected reference of global factory we're testing.
*/
var $factory;
function setUp() {
$this->factory = HTMLPurifier_LanguageFactory::instance();
parent::setUp();
}
function test() {
$factory = HTMLPurifier_LanguageFactory::instance();
$config = HTMLPurifier_Config::create(array('Core.Language' => 'en'));
$context = new HTMLPurifier_Context();
$language = $factory->create($config, $context);
$this->config->set('Core', 'Language', 'en');
$language = $this->factory->create($this->config, $this->context);
$this->assertIsA($language, 'HTMLPurifier_Language');
$this->assertIdentical($language->code, 'en');
@@ -21,18 +28,12 @@ class HTMLPurifier_LanguageFactoryTest extends HTMLPurifier_Harness
$language->load();
$this->assertNotEqual(count($language->messages), 0);
// actual tests for content can be found in LanguageTest
}
function testFallback() {
$factory = HTMLPurifier_LanguageFactory::instance();
$config = HTMLPurifier_Config::create(array('Core.Language' => 'en-x-test'));
$context = new HTMLPurifier_Context();
$language = $factory->create($config, $context);
$this->config->set('Core', 'Language', 'en-x-test');
$language = $this->factory->create($this->config, $this->context);
$this->assertIsA($language, 'HTMLPurifier_Language_en_x_test');
$this->assertIdentical($language->code, 'en-x-test');
@@ -47,5 +48,24 @@ class HTMLPurifier_LanguageFactoryTest extends HTMLPurifier_Harness
}
function testFallbackWithNoClass() {
$this->config->set('Core', 'Language', 'en-x-testmini');
$language = $this->factory->create($this->config, $this->context);
$this->assertIsA($language, 'HTMLPurifier_Language');
$this->assertIdentical($language->code, 'en-x-testmini');
$language->load();
$this->assertIdentical($language->getMessage('HTMLPurifier'), 'HTML Purifier XNone');
$this->assertIdentical($language->getMessage('LanguageFactoryTest: Pizza'), 'Pizza');
$this->assertIdentical($language->error, false);
}
function testNoSuchLanguage() {
$this->config->set('Core', 'Language', 'en-x-testnone');
$language = $this->factory->create($this->config, $this->context);
$this->assertIsA($language, 'HTMLPurifier_Language');
$this->assertIdentical($language->code, 'en-x-testnone');
$this->assertIdentical($language->error, true);
}
}

View File

@@ -0,0 +1,73 @@
<?php
require_once 'HTMLPurifier/Length.php';
class HTMLPurifier_LengthTest extends HTMLPurifier_Harness
{
function testConstruct() {
$l = new HTMLPurifier_Length('23', 'in');
$this->assertIdentical($l->getN(), '23');
$this->assertIdentical($l->getUnit(), 'in');
}
function testMake() {
$l = HTMLPurifier_Length::make('+23.4in');
$this->assertIdentical($l->getN(), '+23.4');
$this->assertIdentical($l->getUnit(), 'in');
}
function testToString() {
$l = new HTMLPurifier_Length('23', 'in');
$this->assertIdentical($l->toString(), '23in');
}
function assertValidate($string, $expect = true) {
if ($expect === true) $expect = $string;
$l = HTMLPurifier_Length::make($string);
$result = $l->isValid();
if ($result === false) $this->assertIdentical($expect, false);
else $this->assertIdentical($l->toString(), $expect);
}
function testValidate() {
$this->assertValidate('0');
$this->assertValidate('+0', '0');
$this->assertValidate('-0', '0');
$this->assertValidate('0px');
$this->assertValidate('4.5px');
$this->assertValidate('-4.5px');
$this->assertValidate('3ex');
$this->assertValidate('3em');
$this->assertValidate('3in');
$this->assertValidate('3cm');
$this->assertValidate('3mm');
$this->assertValidate('3pt');
$this->assertValidate('3pc');
$this->assertValidate('3PX', '3px');
$this->assertValidate('3', false);
$this->assertValidate('3miles', false);
}
/**
* @param $s1 First string to compare
* @param $s2 Second string to compare
* @param $expect 0 for $s1 == $s2, 1 for $s1 > $s2 and -1 for $s1 < $s2
*/
function assertComparison($s1, $s2, $expect = 0) {
$l1 = HTMLPurifier_Length::make($s1);
$l2 = HTMLPurifier_Length::make($s2);
$r1 = $l1->compareTo($l2);
$r2 = $l2->compareTo($l1);
$this->assertIdentical($r1 == 0 ? 0 : ($r1 > 0 ? 1 : -1), $expect);
$this->assertIdentical($r2 == 0 ? 0 : ($r2 > 0 ? 1 : -1), - $expect);
}
function testCompareTo() {
$this->assertComparison('12in', '12in');
$this->assertComparison('12in', '12mm', 1);
$this->assertComparison('1px', '1mm', -1);
$this->assertComparison(str_repeat('2', 38) . 'in', '100px', 1);
}
}

View File

@@ -37,5 +37,28 @@ class HTMLPurifier_PercentEncoderTest extends HTMLPurifier_Harness
}
function assertEncode($string, $expect = true, $preserve = false) {
if ($expect === true) $expect = $string;
$encoder = new HTMLPurifier_PercentEncoder($preserve);
$result = $encoder->encode($string);
$this->assertIdentical($result, $expect);
}
function test_encode_noChange() {
$this->assertEncode('abc012-_~.');
}
function test_encode_encode() {
$this->assertEncode('>', '%3E');
}
function test_encode_preserve() {
$this->assertEncode('<>', '<%3E', '<');
}
function test_encode_low() {
$this->assertEncode("\1", '%01');
}
}

View File

@@ -82,5 +82,12 @@ class HTMLPurifier_Strategy_MakeWellFormedTest extends HTMLPurifier_StrategyHarn
);
}
function testAutoCloseMultiple() {
$this->assertResult(
'<span><span><div></div>',
'<span><span></span></span><div></div>'
);
}
}

View File

@@ -180,6 +180,55 @@ class HTMLPurifier_Strategy_ValidateAttributesTest extends
);
}
function testKeepAbsoluteCSSWidthAndHeightOnImg() {
$this->assertResult(
'<img src="" alt="" style="width:10px;height:10px;border:1px solid #000;" />'
);
}
function testRemoveLargeCSSWidthAndHeightOnImg() {
$this->assertResult(
'<img src="" alt="" style="width:10000000px;height:10000000px;border:1px solid #000;" />',
'<img src="" alt="" style="border:1px solid #000;" />'
);
}
function testRemoveLargeCSSWidthAndHeightOnImgWithUserConf() {
$this->config->set('CSS', 'MaxImgLength', '1px');
$this->assertResult(
'<img src="" alt="" style="width:1mm;height:1mm;border:1px solid #000;" />',
'<img src="" alt="" style="border:1px solid #000;" />'
);
}
function testKeepLargeCSSWidthAndHeightOnImgWhenToldTo() {
$this->config->set('CSS', 'MaxImgLength', null);
$this->assertResult(
'<img src="" alt="" style="width:10000000px;height:10000000px;border:1px solid #000;" />'
);
}
function testKeepPercentCSSWidthAndHeightOnImgWhenToldTo() {
$this->config->set('CSS', 'MaxImgLength', null);
$this->assertResult(
'<img src="" alt="" style="width:100%;height:100%;border:1px solid #000;" />'
);
}
function testRemoveRelativeCSSWidthAndHeightOnImg() {
$this->assertResult(
'<img src="" alt="" style="width:10em;height:10em;border:1px solid #000;" />',
'<img src="" alt="" style="border:1px solid #000;" />'
);
}
function testRemovePercentCSSWidthAndHeightOnImg() {
$this->assertResult(
'<img src="" alt="" style="width:100%;height:100%;border:1px solid #000;" />',
'<img src="" alt="" style="border:1px solid #000;" />'
);
}
}

View File

@@ -16,6 +16,13 @@ class HTMLPurifier_URIParserTest extends HTMLPurifier_Harness
$this->assertEqual($result, $expect);
}
function testPercentNormalization() {
$this->assertParsing(
'%G',
null, null, null, null, '%25G', null, null
);
}
function testRegular() {
$this->assertParsing(
'http://www.example.com/webhp?q=foo#result2',
@@ -124,7 +131,7 @@ class HTMLPurifier_URIParserTest extends HTMLPurifier_Harness
function testMalformedTag() {
$this->assertParsing(
'http://www.example.com/\'>"',
'http://www.example.com/>',
'http', null, 'www.example.com', null, '/', null, null
);
}

View File

@@ -163,4 +163,40 @@ class HTMLPurifier_URITest extends HTMLPurifier_URIHarness
$this->assertValidation('http://[2001:0db8:85z3:08d3:1319:8a2e:0370:7334]', 'http:');
}
function test_validate_removeRedundantScheme() {
$this->assertValidation('http:foo:/:', 'foo%3A/:');
}
function test_validate_username() {
$this->assertValidation("http://user\xE3\x91\x94:@foo.com", 'http://user%E3%91%94:@foo.com');
}
function test_validate_path_abempty() {
$this->assertValidation("http://host/\xE3\x91\x94:", 'http://host/%E3%91%94:');
}
function test_validate_path_absolute() {
$this->assertValidation("/\xE3\x91\x94:", '/%E3%91%94:');
}
function test_validate_path_rootless() {
$this->assertValidation("mailto:\xE3\x91\x94:", 'mailto:%E3%91%94:');
}
function test_validate_path_noscheme() {
$this->assertValidation("\xE3\x91\x94", '%E3%91%94');
}
function test_validate_query() {
$this->assertValidation("?/\xE3\x91\x94", '?/%E3%91%94');
}
function test_validate_fragment() {
$this->assertValidation("#/\xE3\x91\x94", '#/%E3%91%94');
}
function test_validate_path_empty() {
$this->assertValidation('http://google.com');
}
}

View File

@@ -0,0 +1,127 @@
<?php
require_once 'HTMLPurifier/UnitConverter.php';
class HTMLPurifier_UnitConverterTest extends HTMLPurifier_Harness
{
function assertConversion($input, $expect, $unit = null, $test_negative = true) {
$length = HTMLPurifier_Length::make($input);
if ($expect !== false) $expectl = HTMLPurifier_Length::make($expect);
else $expectl = false;
$to_unit = $unit !== null ? $unit : $expectl->getUnit();
$converter = new HTMLPurifier_UnitConverter(4, 10);
$result = $converter->convert($length, $to_unit);
if (!$result || !$expectl) $this->assertIdentical($result, $expectl);
else $this->assertIdentical($result->toString(), $expectl->toString());
$converter = new HTMLPurifier_UnitConverter(4, 10, true);
$result = $converter->convert($length, $to_unit);
if (!$result || !$expectl) $this->assertIdentical($result, $expectl);
else $this->assertIdentical($result->toString(), $expectl->toString(), 'BCMath substitute: %s');
if ($test_negative) {
$this->assertConversion(
"-$input",
$expect === false ? false : "-$expect",
$unit,
false
);
}
}
function testFail() {
$this->assertConversion('1in', false, 'foo');
$this->assertConversion('1foo', false, 'in');
}
function testZero() {
$this->assertConversion('0', '0', 'in', false);
$this->assertConversion('-0', '0', 'in', false);
$this->assertConversion('0in', '0', 'in', false);
$this->assertConversion('-0in', '0', 'in', false);
$this->assertConversion('0in', '0', 'pt', false);
$this->assertConversion('-0in', '0', 'pt', false);
}
function testEnglish() {
$this->assertConversion('1in', '6pc');
$this->assertConversion('6pc', '1in');
$this->assertConversion('1in', '72pt');
$this->assertConversion('72pt', '1in');
$this->assertConversion('1pc', '12pt');
$this->assertConversion('12pt', '1pc');
$this->assertConversion('1pt', '0.01389in');
$this->assertConversion('1.000pt', '0.01389in');
$this->assertConversion('100000pt', '1389in');
$this->assertConversion('1in', '96px');
$this->assertConversion('96px', '1in');
}
function testMetric() {
$this->assertConversion('1cm', '10mm');
$this->assertConversion('10mm', '1cm');
$this->assertConversion('1mm', '0.1cm');
$this->assertConversion('100mm', '10cm');
}
function testEnglishMetric() {
$this->assertConversion('2.835pt', '1mm');
$this->assertConversion('1mm', '2.835pt');
$this->assertConversion('0.3937in', '1cm');
}
function testRoundingMinPrecision() {
// One sig-fig, modified to be four, conversion rounds up
$this->assertConversion('100pt', '1.389in');
$this->assertConversion('1000pt', '13.89in');
$this->assertConversion('10000pt', '138.9in');
$this->assertConversion('100000pt', '1389in');
$this->assertConversion('1000000pt', '13890in');
}
function testRoundingUserPrecision() {
// Five sig-figs, conversion rounds down
$this->assertConversion('11112000pt', '154330in');
$this->assertConversion('1111200pt', '15433in');
$this->assertConversion('111120pt', '1543.3in');
$this->assertConversion('11112pt', '154.33in');
$this->assertConversion('1111.2pt', '15.433in');
$this->assertConversion('111.12pt', '1.5433in');
$this->assertConversion('11.112pt', '0.15433in');
}
function assertSigFig($n, $sigfigs) {
$converter = new HTMLPurifier_UnitConverter();
$result = $converter->getSigFigs($n);
$this->assertIdentical($result, $sigfigs);
}
function test_getSigFigs() {
$this->assertSigFig('0', 0);
$this->assertSigFig('1', 1);
$this->assertSigFig('-1', 1);
$this->assertSigFig('+1', 1);
$this->assertSigFig('01', 1);
$this->assertSigFig('001', 1);
$this->assertSigFig('12', 2);
$this->assertSigFig('012', 2);
$this->assertSigFig('10', 1);
$this->assertSigFig('10.', 2);
$this->assertSigFig('100.', 3);
$this->assertSigFig('103', 3);
$this->assertSigFig('130', 2);
$this->assertSigFig('.1', 1);
$this->assertSigFig('0.1', 1);
$this->assertSigFig('00.1', 1);
$this->assertSigFig('0.01', 1);
$this->assertSigFig('0.010', 2);
$this->assertSigFig('0.012', 2);
}
}

View File

@@ -2,30 +2,15 @@
require_once 'HTMLPurifier.php';
// integration test
class HTMLPurifierTest extends HTMLPurifier_Harness
{
var $purifier;
function setUp() {
$this->purifier = new HTMLPurifier();
}
function assertPurification($input, $expect = null, $config = array()) {
if ($expect === null) $expect = $input;
$result = $this->purifier->purify($input, $config);
$this->assertIdentical($expect, $result);
}
function testNull() {
$this->assertPurification("Null byte\0", "Null byte");
}
function testStrict() {
$config = HTMLPurifier_Config::createDefault();
$config->set('HTML', 'Strict', true);
$this->purifier = new HTMLPurifier( $config ); // verbose syntax
$this->config->set('HTML', 'Strict', true);
$this->assertPurification(
'<u>Illegal underline</u>',
@@ -41,10 +26,8 @@ class HTMLPurifierTest extends HTMLPurifier_Harness
function testDifferentAllowedElements() {
$this->purifier = new HTMLPurifier(array(
'HTML.AllowedElements' => array('b', 'i', 'p', 'a'),
'HTML.AllowedAttributes' => array('a.href', '*.id')
));
$this->config->set('HTML', 'AllowedElements', array('b', 'i', 'p', 'a'));
$this->config->set('HTML', 'AllowedAttributes', array('a.href', '*.id'));
$this->assertPurification(
'<p>Par.</p><p>Para<a href="http://google.com/">gr</a>aph</p>Text<b>Bol<i>d</i></b>'
@@ -59,7 +42,7 @@ class HTMLPurifierTest extends HTMLPurifier_Harness
function testDisableURI() {
$this->purifier = new HTMLPurifier( array('Attr.DisableURI' => true) );
$this->config->set('URI', 'Disable', true);
$this->assertPurification(
'<img src="foobar"/>',
@@ -70,8 +53,6 @@ class HTMLPurifierTest extends HTMLPurifier_Harness
function test_purifyArray() {
$this->purifier = new HTMLPurifier();
$this->assertIdentical(
$this->purifier->purifyArray(
array('Good', '<b>Sketchy', 'foo' => '<script>bad</script>')
@@ -83,23 +64,24 @@ class HTMLPurifierTest extends HTMLPurifier_Harness
}
function testEnableAttrID() {
$this->purifier = new HTMLPurifier();
function testAttrIDDisabledByDefault() {
$this->assertPurification(
'<span id="moon">foobar</span>',
'<span>foobar</span>'
);
$this->purifier = new HTMLPurifier(array('HTML.EnableAttrID' => true));
}
function testEnableAttrID() {
$this->config->set('Attr', 'EnableID', true);
$this->assertPurification('<span id="moon">foobar</span>');
$this->assertPurification('<img id="folly" src="folly.png" alt="Omigosh!" />');
}
function testScript() {
$this->purifier = new HTMLPurifier(array('HTML.Trusted' => true));
$this->config->set('HTML', 'Trusted', true);
$ideal = '<script type="text/javascript"><!--//--><![CDATA[//><!--
alert("<This is compatible with XHTML>");
//--><!]]></script>';
@@ -140,13 +122,29 @@ alert("<This is compatible with XHTML>");
}
function testMakeAbsolute() {
$this->config->set('URI', 'Base', 'http://example.com/bar/baz.php');
$this->config->set('URI', 'MakeAbsolute', true);
$this->assertPurification(
'<a href="foo.txt">Foobar</a>',
'<a href="http://example.com/bar/foo.txt">Foobar</a>',
array(
'URI.Base' => 'http://example.com/bar/baz.php',
'URI.MakeAbsolute' => true
)
'<a href="http://example.com/bar/foo.txt">Foobar</a>'
);
}
function test_shiftJis() {
if (!function_exists('iconv')) return;
$this->config->set('Core', 'Encoding', 'Shift_JIS');
$this->config->set('Core', 'EscapeNonASCIICharacters', true);
$this->assertPurification(
"<b style=\"font-family:'&#165;';\">111</b>"
);
}
function test_shiftJisWorstCase() {
if (!function_exists('iconv')) return;
$this->config->set('Core', 'Encoding', 'Shift_JIS');
$this->assertPurification( // Notice how Yen disappears
"<b style=\"font-family:'&#165;';\">111</b>",
"<b style=\"font-family:'';\">111</b>"
);
}

View File

@@ -12,16 +12,7 @@ $versions_to_test = array(
'4.4.7',
'5.0.4',
'5.0.5',
'5.1.4',
'5.1.6',
'5.2.0',
'5.2.1',
'5.2.2',
'5.2.3',
'5.2.4',
'5.2.5RC2-dev',
'5.3.0-dev',
// '6.0.0-dev',
// We don't care about later versions: use HTML Purifier 3+!!!
);
echo str_repeat('-', 70) . "\n";

View File

@@ -34,6 +34,7 @@ $test_files[] = 'HTMLPurifier/AttrDef/HTML/LinkTypesTest.php';
$test_files[] = 'HTMLPurifier/AttrDef/IntegerTest.php';
$test_files[] = 'HTMLPurifier/AttrDef/LangTest.php';
$test_files[] = 'HTMLPurifier/AttrDef/TextTest.php';
$test_files[] = 'HTMLPurifier/AttrDef/SwitchTest.php';
$test_files[] = 'HTMLPurifier/AttrDef/URI/Email/SimpleCheckTest.php';
$test_files[] = 'HTMLPurifier/AttrDef/URI/HostTest.php';
$test_files[] = 'HTMLPurifier/AttrDef/URI/IPv4Test.php';
@@ -79,6 +80,7 @@ $test_files[] = 'HTMLPurifier/GeneratorTest.php';
$test_files[] = 'HTMLPurifier/HTMLDefinitionTest.php';
$test_files[] = 'HTMLPurifier/HTMLModuleManagerTest.php';
$test_files[] = 'HTMLPurifier/HTMLModuleTest.php';
$test_files[] = 'HTMLPurifier/HTMLModule/ImageTest.php';
$test_files[] = 'HTMLPurifier/HTMLModule/ObjectTest.php';
$test_files[] = 'HTMLPurifier/HTMLModule/RubyTest.php';
$test_files[] = 'HTMLPurifier/HTMLModule/ScriptingTest.php';
@@ -89,6 +91,7 @@ $test_files[] = 'HTMLPurifier/Injector/LinkifyTest.php';
$test_files[] = 'HTMLPurifier/Injector/PurifierLinkifyTest.php';
$test_files[] = 'HTMLPurifier/LanguageFactoryTest.php';
$test_files[] = 'HTMLPurifier/LanguageTest.php';
$test_files[] = 'HTMLPurifier/LengthTest.php';
$test_files[] = 'HTMLPurifier/Lexer/DirectLexTest.php';
$test_files[] = 'HTMLPurifier/Lexer/DirectLex_ErrorsTest.php';
$test_files[] = 'HTMLPurifier/LexerTest.php';
@@ -108,6 +111,7 @@ $test_files[] = 'HTMLPurifier/Strategy/ValidateAttributes_IDTest.php';
$test_files[] = 'HTMLPurifier/Strategy/ValidateAttributes_TidyTest.php';
$test_files[] = 'HTMLPurifier/TagTransformTest.php';
$test_files[] = 'HTMLPurifier/TokenTest.php';
$test_files[] = 'HTMLPurifier/UnitConverterTest.php';
$test_files[] = 'HTMLPurifier/URIDefinitionTest.php';
$test_files[] = 'HTMLPurifier/URIFilter/DisableExternalTest.php';
$test_files[] = 'HTMLPurifier/URIFilter/DisableExternalResourcesTest.php';