MDL-80636 lib: Upgrade HTML purifier to 4.17.0

This commit is contained in:
Laurent David 2024-03-18 19:34:36 +01:00
parent 757be30c39
commit 0079ba38f2
17 changed files with 315 additions and 324 deletions

View File

@ -19,7 +19,7 @@
*/
/*
HTML Purifier 4.15.0 - Standards Compliant HTML Filtering
HTML Purifier 4.17.0 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or
@ -58,12 +58,12 @@ class HTMLPurifier
* Version of HTML Purifier.
* @type string
*/
public $version = '4.15.0';
public $version = '4.17.0';
/**
* Constant with version of HTML Purifier.
*/
const VERSION = '4.15.0';
const VERSION = '4.17.0';
/**
* Global configuration object.

View File

@ -10,23 +10,21 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
public function __construct()
{
$this->mask = '_- ';
for ($c = 'a'; $c <= 'z'; $c++) {
$this->mask .= $c;
}
for ($c = 'A'; $c <= 'Z'; $c++) {
$this->mask .= $c;
}
for ($c = '0'; $c <= '9'; $c++) {
$this->mask .= $c;
} // cast-y, but should be fine
// special bytes used by UTF-8
for ($i = 0x80; $i <= 0xFF; $i++) {
// We don't bother excluding invalid bytes in this range,
// because the our restriction of well-formed UTF-8 will
// prevent these from ever occurring.
$this->mask .= chr($i);
}
// Lowercase letters
$l = range('a', 'z');
// Uppercase letters
$u = range('A', 'Z');
// Digits
$d = range('0', '9');
// Special bytes used by UTF-8
$b = array_map('chr', range(0x80, 0xFF));
// All valid characters for the mask
$c = array_merge($l, $u, $d, $b);
// Concatenate all valid characters into a string
// Use '_- ' as an initial value
$this->mask = array_reduce($c, function ($carry, $value) {
return $carry . $value;
}, '_- ');
/*
PHP's internal strcspn implementation is

View File

@ -106,7 +106,7 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
// If we have Net_IDNA2 support, we can support IRIs by
// punycoding them. (This is the most portable thing to do,
// since otherwise we have to assume browsers support
} elseif ($config->get('Core.EnableIDNA')) {
} elseif ($config->get('Core.EnableIDNA') && class_exists('Net_IDNA2')) {
$idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true));
// we need to encode each period separately
$parts = explode('.', $string);

View File

@ -33,7 +33,11 @@ class HTMLPurifier_AttrTransform_TargetBlank extends HTMLPurifier_AttrTransform
// XXX Kind of inefficient
$url = $this->parser->parse($attr['href']);
$scheme = $url->getSchemeObj($config, $context);
// Ignore invalid schemes (e.g. `javascript:`)
if (!($scheme = $url->getSchemeObj($config, $context))) {
return $attr;
}
if ($scheme->browsable && !$url->isBenign($config, $context)) {
$attr['target'] = '_blank';

View File

@ -79,44 +79,11 @@ class HTMLPurifier_Bootstrap
public static function registerAutoload()
{
$autoload = array('HTMLPurifier_Bootstrap', 'autoload');
if (($funcs = spl_autoload_functions()) === false) {
if (spl_autoload_functions() === false) {
spl_autoload_register($autoload);
} elseif (function_exists('spl_autoload_unregister')) {
if (version_compare(PHP_VERSION, '5.3.0', '>=')) {
// prepend flag exists, no need for shenanigans
spl_autoload_register($autoload, true, true);
} else {
$buggy = version_compare(PHP_VERSION, '5.2.11', '<');
$compat = version_compare(PHP_VERSION, '5.1.2', '<=') &&
version_compare(PHP_VERSION, '5.1.0', '>=');
foreach ($funcs as $func) {
if ($buggy && is_array($func)) {
// :TRICKY: There are some compatibility issues and some
// places where we need to error out
$reflector = new ReflectionMethod($func[0], $func[1]);
if (!$reflector->isStatic()) {
throw new Exception(
'HTML Purifier autoloader registrar is not compatible
with non-static object methods due to PHP Bug #44144;
Please do not use HTMLPurifier.autoload.php (or any
file that includes this file); instead, place the code:
spl_autoload_register(array(\'HTMLPurifier_Bootstrap\', \'autoload\'))
after your own autoloaders.'
);
}
// Suprisingly, spl_autoload_register supports the
// Class::staticMethod callback format, although call_user_func doesn't
if ($compat) {
$func = implode('::', $func);
}
}
spl_autoload_unregister($func);
}
spl_autoload_register($autoload);
foreach ($funcs as $func) {
spl_autoload_register($func);
}
}
} else {
// prepend flag exists, no need for shenanigans
spl_autoload_register($autoload, true, true);
}
}
}

View File

@ -13,7 +13,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
* Assoc array of attribute name to definition object.
* @type HTMLPurifier_AttrDef[]
*/
public $info = array();
public $info = [];
/**
* Constructs the info array. The meat of this class.
@ -22,7 +22,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
protected function doSetup($config)
{
$this->info['text-align'] = new HTMLPurifier_AttrDef_Enum(
array('left', 'right', 'center', 'justify'),
['left', 'right', 'center', 'justify'],
false
);
@ -31,7 +31,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
$this->info['border-right-style'] =
$this->info['border-left-style'] =
$this->info['border-top-style'] = new HTMLPurifier_AttrDef_Enum(
array(
[
'none',
'hidden',
'dotted',
@ -42,42 +42,42 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
'ridge',
'inset',
'outset'
),
],
false
);
$this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style);
$this->info['clear'] = new HTMLPurifier_AttrDef_Enum(
array('none', 'left', 'right', 'both'),
['none', 'left', 'right', 'both'],
false
);
$this->info['float'] = new HTMLPurifier_AttrDef_Enum(
array('none', 'left', 'right'),
['none', 'left', 'right'],
false
);
$this->info['font-style'] = new HTMLPurifier_AttrDef_Enum(
array('normal', 'italic', 'oblique'),
['normal', 'italic', 'oblique'],
false
);
$this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum(
array('normal', 'small-caps'),
['normal', 'small-caps'],
false
);
$uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Enum(array('none')),
[
new HTMLPurifier_AttrDef_Enum(['none']),
new HTMLPurifier_AttrDef_CSS_URI()
)
]
);
$this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum(
array('inside', 'outside'),
['inside', 'outside'],
false
);
$this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum(
array(
[
'disc',
'circle',
'square',
@ -87,7 +87,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
'lower-alpha',
'upper-alpha',
'none'
),
],
false
);
$this->info['list-style-image'] = $uri_or_none;
@ -95,34 +95,34 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
$this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config);
$this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum(
array('capitalize', 'uppercase', 'lowercase', 'none'),
['capitalize', 'uppercase', 'lowercase', 'none'],
false
);
$this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['background-image'] = $uri_or_none;
$this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum(
array('repeat', 'repeat-x', 'repeat-y', 'no-repeat')
['repeat', 'repeat-x', 'repeat-y', 'no-repeat']
);
$this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum(
array('scroll', 'fixed')
['scroll', 'fixed']
);
$this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition();
$this->info['background-size'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
[
new HTMLPurifier_AttrDef_Enum(
array(
[
'auto',
'cover',
'contain',
'initial',
'inherit',
)
]
),
new HTMLPurifier_AttrDef_CSS_Percentage(),
new HTMLPurifier_AttrDef_CSS_Length()
)
]
);
$border_color =
@ -131,10 +131,10 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
$this->info['border-left-color'] =
$this->info['border-right-color'] =
$this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Enum(array('transparent')),
[
new HTMLPurifier_AttrDef_Enum(['transparent']),
new HTMLPurifier_AttrDef_CSS_Color()
)
]
);
$this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config);
@ -146,32 +146,32 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
$this->info['border-bottom-width'] =
$this->info['border-left-width'] =
$this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')),
[
new HTMLPurifier_AttrDef_Enum(['thin', 'medium', 'thick']),
new HTMLPurifier_AttrDef_CSS_Length('0') //disallow negative
)
]
);
$this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width);
$this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Enum(array('normal')),
[
new HTMLPurifier_AttrDef_Enum(['normal']),
new HTMLPurifier_AttrDef_CSS_Length()
)
]
);
$this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Enum(array('normal')),
[
new HTMLPurifier_AttrDef_Enum(['normal']),
new HTMLPurifier_AttrDef_CSS_Length()
)
]
);
$this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
[
new HTMLPurifier_AttrDef_Enum(
array(
[
'xx-small',
'x-small',
'small',
@ -181,20 +181,20 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
'xx-large',
'larger',
'smaller'
)
]
),
new HTMLPurifier_AttrDef_CSS_Percentage(),
new HTMLPurifier_AttrDef_CSS_Length()
)
]
);
$this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
new HTMLPurifier_AttrDef_Enum(array('normal')),
[
new HTMLPurifier_AttrDef_Enum(['normal']),
new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true)
)
]
);
$margin =
@ -202,11 +202,11 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
$this->info['margin-bottom'] =
$this->info['margin-left'] =
$this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
[
new HTMLPurifier_AttrDef_CSS_Length(),
new HTMLPurifier_AttrDef_CSS_Percentage(),
new HTMLPurifier_AttrDef_Enum(array('auto'))
)
new HTMLPurifier_AttrDef_Enum(['auto'])
]
);
$this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin);
@ -217,41 +217,41 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
$this->info['padding-bottom'] =
$this->info['padding-left'] =
$this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
[
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true)
)
]
);
$this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding);
$this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
[
new HTMLPurifier_AttrDef_CSS_Length(),
new HTMLPurifier_AttrDef_CSS_Percentage()
)
]
);
$trusted_wh = new HTMLPurifier_AttrDef_CSS_Composite(
array(
[
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true),
new HTMLPurifier_AttrDef_Enum(array('auto', 'initial', 'inherit'))
)
new HTMLPurifier_AttrDef_Enum(['auto', 'initial', 'inherit'])
]
);
$trusted_min_wh = new HTMLPurifier_AttrDef_CSS_Composite(
array(
[
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true),
new HTMLPurifier_AttrDef_Enum(array('initial', 'inherit'))
)
new HTMLPurifier_AttrDef_Enum(['initial', 'inherit'])
]
);
$trusted_max_wh = new HTMLPurifier_AttrDef_CSS_Composite(
array(
[
new HTMLPurifier_AttrDef_CSS_Length('0'),
new HTMLPurifier_AttrDef_CSS_Percentage(true),
new HTMLPurifier_AttrDef_Enum(array('none', 'initial', 'inherit'))
)
new HTMLPurifier_AttrDef_Enum(['none', 'initial', 'inherit'])
]
);
$max = $config->get('CSS.MaxImgLength');
@ -263,10 +263,10 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
'img',
// For img tags:
new HTMLPurifier_AttrDef_CSS_Composite(
array(
[
new HTMLPurifier_AttrDef_CSS_Length('0', $max),
new HTMLPurifier_AttrDef_Enum(array('auto'))
)
new HTMLPurifier_AttrDef_Enum(['auto'])
]
),
// For everyone else:
$trusted_wh
@ -279,10 +279,10 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
'img',
// For img tags:
new HTMLPurifier_AttrDef_CSS_Composite(
array(
[
new HTMLPurifier_AttrDef_CSS_Length('0', $max),
new HTMLPurifier_AttrDef_Enum(array('initial', 'inherit'))
)
new HTMLPurifier_AttrDef_Enum(['initial', 'inherit'])
]
),
// For everyone else:
$trusted_min_wh
@ -295,22 +295,39 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
'img',
// For img tags:
new HTMLPurifier_AttrDef_CSS_Composite(
array(
[
new HTMLPurifier_AttrDef_CSS_Length('0', $max),
new HTMLPurifier_AttrDef_Enum(array('none', 'initial', 'inherit'))
)
new HTMLPurifier_AttrDef_Enum(['none', 'initial', 'inherit'])
]
),
// For everyone else:
$trusted_max_wh
);
// text-decoration and related shorthands
$this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration();
$this->info['text-decoration-line'] = new HTMLPurifier_AttrDef_Enum(
['none', 'underline', 'overline', 'line-through', 'initial', 'inherit']
);
$this->info['text-decoration-style'] = new HTMLPurifier_AttrDef_Enum(
['solid', 'double', 'dotted', 'dashed', 'wavy', 'initial', 'inherit']
);
$this->info['text-decoration-color'] = new HTMLPurifier_AttrDef_CSS_Color();
$this->info['text-decoration-thickness'] = new HTMLPurifier_AttrDef_CSS_Composite([
new HTMLPurifier_AttrDef_CSS_Length(),
new HTMLPurifier_AttrDef_CSS_Percentage(),
new HTMLPurifier_AttrDef_Enum(['auto', 'from-font', 'initial', 'inherit'])
]);
$this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily();
// this could use specialized code
$this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum(
array(
[
'normal',
'bold',
'bolder',
@ -324,7 +341,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
'700',
'800',
'900'
),
],
false
);
@ -340,21 +357,21 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
$this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config);
$this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(
array('collapse', 'separate')
['collapse', 'separate']
);
$this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(
array('top', 'bottom')
['top', 'bottom']
);
$this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum(
array('auto', 'fixed')
['auto', 'fixed']
);
$this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
[
new HTMLPurifier_AttrDef_Enum(
array(
[
'baseline',
'sub',
'super',
@ -363,11 +380,11 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
'middle',
'bottom',
'text-bottom'
)
]
),
new HTMLPurifier_AttrDef_CSS_Length(),
new HTMLPurifier_AttrDef_CSS_Percentage()
)
]
);
$this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2);
@ -375,7 +392,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
// These CSS properties don't work on many browsers, but we live
// in THE FUTURE!
$this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(
array('nowrap', 'normal', 'pre', 'pre-wrap', 'pre-line')
['nowrap', 'normal', 'pre', 'pre-wrap', 'pre-line']
);
if ($config->get('CSS.Proprietary')) {
@ -422,21 +439,21 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
// more CSS3
$this->info['page-break-after'] =
$this->info['page-break-before'] = new HTMLPurifier_AttrDef_Enum(
array(
[
'auto',
'always',
'avoid',
'left',
'right'
)
]
);
$this->info['page-break-inside'] = new HTMLPurifier_AttrDef_Enum(array('auto', 'avoid'));
$this->info['page-break-inside'] = new HTMLPurifier_AttrDef_Enum(['auto', 'avoid']);
$border_radius = new HTMLPurifier_AttrDef_CSS_Composite(
array(
[
new HTMLPurifier_AttrDef_CSS_Percentage(true), // disallow negative
new HTMLPurifier_AttrDef_CSS_Length('0') // disallow negative
));
]);
$this->info['border-top-left-radius'] =
$this->info['border-top-right-radius'] =
@ -453,7 +470,7 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
protected function doSetupTricky($config)
{
$this->info['display'] = new HTMLPurifier_AttrDef_Enum(
array(
[
'inline',
'block',
'list-item',
@ -472,12 +489,12 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
'table-cell',
'table-caption',
'none'
)
]
);
$this->info['visibility'] = new HTMLPurifier_AttrDef_Enum(
array('visible', 'hidden', 'collapse')
['visible', 'hidden', 'collapse']
);
$this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(array('visible', 'hidden', 'auto', 'scroll'));
$this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(['visible', 'hidden', 'auto', 'scroll']);
$this->info['opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue();
}
@ -487,23 +504,23 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
protected function doSetupTrusted($config)
{
$this->info['position'] = new HTMLPurifier_AttrDef_Enum(
array('static', 'relative', 'absolute', 'fixed')
['static', 'relative', 'absolute', 'fixed']
);
$this->info['top'] =
$this->info['left'] =
$this->info['right'] =
$this->info['bottom'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
[
new HTMLPurifier_AttrDef_CSS_Length(),
new HTMLPurifier_AttrDef_CSS_Percentage(),
new HTMLPurifier_AttrDef_Enum(array('auto')),
)
new HTMLPurifier_AttrDef_Enum(['auto']),
]
);
$this->info['z-index'] = new HTMLPurifier_AttrDef_CSS_Composite(
array(
[
new HTMLPurifier_AttrDef_Integer(),
new HTMLPurifier_AttrDef_Enum(array('auto')),
)
new HTMLPurifier_AttrDef_Enum(['auto']),
]
);
}

View File

@ -21,7 +21,7 @@ class HTMLPurifier_Config
* HTML Purifier's version
* @type string
*/
public $version = '4.15.0';
public $version = '4.17.0';
/**
* Whether or not to automatically finalize

View File

@ -287,13 +287,14 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
} elseif (filegroup($dir) === posix_getgid()) {
$chmod = $chmod | 0070;
} else {
// PHP's probably running as nobody, so we'll
// need to give global permissions
$chmod = $chmod | 0777;
// PHP's probably running as nobody, it is
// not obvious how to fix this (777 is probably
// bad if you are multi-user), let the user figure it out
$chmod = null;
}
trigger_error(
'Directory ' . $dir . ' not writable, ' .
'please chmod to ' . decoct($chmod),
'Directory ' . $dir . ' not writable. ' .
($chmod === null ? '' : 'Please chmod to ' . decoct($chmod)),
E_USER_WARNING
);
} else {

View File

View File

@ -71,7 +71,7 @@ class HTMLPurifier_DefinitionCacheFactory
return $this->caches[$method][$type];
}
if (isset($this->implementations[$method]) &&
class_exists($class = $this->implementations[$method], false)) {
class_exists($class = $this->implementations[$method])) {
$cache = new $class($type);
} else {
if ($method != 'Serializer') {

View File

@ -146,175 +146,179 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
foreach ($this->_tidy->css as $k => $decls) {
// $decls are all CSS declarations inside an @ selector
$new_decls = array();
foreach ($decls as $selector => $style) {
$selector = trim($selector);
if ($selector === '') {
continue;
} // should not happen
// Parse the selector
// Here is the relevant part of the CSS grammar:
//
// ruleset
// : selector [ ',' S* selector ]* '{' ...
// selector
// : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]?
// combinator
// : '+' S*
// : '>' S*
// simple_selector
// : element_name [ HASH | class | attrib | pseudo ]*
// | [ HASH | class | attrib | pseudo ]+
// element_name
// : IDENT | '*'
// ;
// class
// : '.' IDENT
// ;
// attrib
// : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S*
// [ IDENT | STRING ] S* ]? ']'
// ;
// pseudo
// : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ]
// ;
//
// For reference, here are the relevant tokens:
//
// HASH #{name}
// IDENT {ident}
// INCLUDES ==
// DASHMATCH |=
// STRING {string}
// FUNCTION {ident}\(
//
// And the lexical scanner tokens
//
// name {nmchar}+
// nmchar [_a-z0-9-]|{nonascii}|{escape}
// nonascii [\240-\377]
// escape {unicode}|\\[^\r\n\f0-9a-f]
// unicode \\{h}}{1,6}(\r\n|[ \t\r\n\f])?
// ident -?{nmstart}{nmchar*}
// nmstart [_a-z]|{nonascii}|{escape}
// string {string1}|{string2}
// string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
// string2 \'([^\n\r\f\\"]|\\{nl}|{escape})*\'
//
// We'll implement a subset (in order to reduce attack
// surface); in particular:
//
// - No Unicode support
// - No escapes support
// - No string support (by proxy no attrib support)
// - element_name is matched against allowed
// elements (some people might find this
// annoying...)
// - Pseudo-elements one of :first-child, :link,
// :visited, :active, :hover, :focus
if (is_array($decls)) {
foreach ($decls as $selector => $style) {
$selector = trim($selector);
if ($selector === '') {
continue;
} // should not happen
// Parse the selector
// Here is the relevant part of the CSS grammar:
//
// ruleset
// : selector [ ',' S* selector ]* '{' ...
// selector
// : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]?
// combinator
// : '+' S*
// : '>' S*
// simple_selector
// : element_name [ HASH | class | attrib | pseudo ]*
// | [ HASH | class | attrib | pseudo ]+
// element_name
// : IDENT | '*'
// ;
// class
// : '.' IDENT
// ;
// attrib
// : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S*
// [ IDENT | STRING ] S* ]? ']'
// ;
// pseudo
// : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ]
// ;
//
// For reference, here are the relevant tokens:
//
// HASH #{name}
// IDENT {ident}
// INCLUDES ==
// DASHMATCH |=
// STRING {string}
// FUNCTION {ident}\(
//
// And the lexical scanner tokens
//
// name {nmchar}+
// nmchar [_a-z0-9-]|{nonascii}|{escape}
// nonascii [\240-\377]
// escape {unicode}|\\[^\r\n\f0-9a-f]
// unicode \\{h}}{1,6}(\r\n|[ \t\r\n\f])?
// ident -?{nmstart}{nmchar*}
// nmstart [_a-z]|{nonascii}|{escape}
// string {string1}|{string2}
// string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
// string2 \'([^\n\r\f\\"]|\\{nl}|{escape})*\'
//
// We'll implement a subset (in order to reduce attack
// surface); in particular:
//
// - No Unicode support
// - No escapes support
// - No string support (by proxy no attrib support)
// - element_name is matched against allowed
// elements (some people might find this
// annoying...)
// - Pseudo-elements one of :first-child, :link,
// :visited, :active, :hover, :focus
// handle ruleset
$selectors = array_map('trim', explode(',', $selector));
$new_selectors = array();
foreach ($selectors as $sel) {
// split on +, > and spaces
$basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE);
// even indices are chunks, odd indices are
// delimiters
$nsel = null;
$delim = null; // guaranteed to be non-null after
// two loop iterations
for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) {
$x = $basic_selectors[$i];
if ($i % 2) {
// delimiter
if ($x === ' ') {
$delim = ' ';
} else {
$delim = ' ' . $x . ' ';
}
} else {
// simple selector
$components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE);
$sdelim = null;
$nx = null;
for ($j = 0, $cc = count($components); $j < $cc; $j++) {
$y = $components[$j];
if ($j === 0) {
if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) {
$nx = $y;
} else {
// $nx stays null; this matters
// if we don't manage to find
// any valid selector content,
// in which case we ignore the
// outer $delim
}
} elseif ($j % 2) {
// set delimiter
$sdelim = $y;
// handle ruleset
$selectors = array_map('trim', explode(',', $selector));
$new_selectors = array();
foreach ($selectors as $sel) {
// split on +, > and spaces
$basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE);
// even indices are chunks, odd indices are
// delimiters
$nsel = null;
$delim = null; // guaranteed to be non-null after
// two loop iterations
for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) {
$x = $basic_selectors[$i];
if ($i % 2) {
// delimiter
if ($x === ' ') {
$delim = ' ';
} else {
$attrdef = null;
if ($sdelim === '#') {
$attrdef = $this->_id_attrdef;
} elseif ($sdelim === '.') {
$attrdef = $this->_class_attrdef;
} elseif ($sdelim === ':') {
$attrdef = $this->_enum_attrdef;
} else {
throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split');
}
$r = $attrdef->validate($y, $config, $context);
if ($r !== false) {
if ($r !== true) {
$y = $r;
}
if ($nx === null) {
$nx = '';
}
$nx .= $sdelim . $y;
}
}
}
if ($nx !== null) {
if ($nsel === null) {
$nsel = $nx;
} else {
$nsel .= $delim . $nx;
$delim = ' ' . $x . ' ';
}
} else {
// delimiters to the left of invalid
// basic selector ignored
// simple selector
$components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE);
$sdelim = null;
$nx = null;
for ($j = 0, $cc = count($components); $j < $cc; $j++) {
$y = $components[$j];
if ($j === 0) {
if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) {
$nx = $y;
} else {
// $nx stays null; this matters
// if we don't manage to find
// any valid selector content,
// in which case we ignore the
// outer $delim
}
} elseif ($j % 2) {
// set delimiter
$sdelim = $y;
} else {
$attrdef = null;
if ($sdelim === '#') {
$attrdef = $this->_id_attrdef;
} elseif ($sdelim === '.') {
$attrdef = $this->_class_attrdef;
} elseif ($sdelim === ':') {
$attrdef = $this->_enum_attrdef;
} else {
throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split');
}
$r = $attrdef->validate($y, $config, $context);
if ($r !== false) {
if ($r !== true) {
$y = $r;
}
if ($nx === null) {
$nx = '';
}
$nx .= $sdelim . $y;
}
}
}
if ($nx !== null) {
if ($nsel === null) {
$nsel = $nx;
} else {
$nsel .= $delim . $nx;
}
} else {
// delimiters to the left of invalid
// basic selector ignored
}
}
}
if ($nsel !== null) {
if (!empty($scopes)) {
foreach ($scopes as $s) {
$new_selectors[] = "$s $nsel";
}
} else {
$new_selectors[] = $nsel;
}
}
}
if ($nsel !== null) {
if (!empty($scopes)) {
foreach ($scopes as $s) {
$new_selectors[] = "$s $nsel";
}
} else {
$new_selectors[] = $nsel;
}
}
}
if (empty($new_selectors)) {
continue;
}
$selector = implode(', ', $new_selectors);
foreach ($style as $name => $value) {
if (!isset($css_definition->info[$name])) {
unset($style[$name]);
if (empty($new_selectors)) {
continue;
}
$def = $css_definition->info[$name];
$ret = $def->validate($value, $config, $context);
if ($ret === false) {
unset($style[$name]);
} else {
$style[$name] = $ret;
$selector = implode(', ', $new_selectors);
foreach ($style as $name => $value) {
if (!isset($css_definition->info[$name])) {
unset($style[$name]);
continue;
}
$def = $css_definition->info[$name];
$ret = $def->validate($value, $config, $context);
if ($ret === false) {
unset($style[$name]);
} else {
$style[$name] = $ret;
}
}
$new_decls[$selector] = $style;
}
$new_decls[$selector] = $style;
} else {
continue;
}
$new_css[$k] = $new_decls;
}

View File

@ -221,6 +221,7 @@ class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
*/
public function makeFixes()
{
return array();
}
}

View File

@ -109,7 +109,7 @@ class HTMLPurifier_LanguageFactory
} else {
$class = 'HTMLPurifier_Language_' . $pcode;
$file = $this->dir . '/Language/classes/' . $code . '.php';
if (file_exists($file) || class_exists($class, false)) {
if (file_exists($file) || class_exists($class)) {
$lang = new $class($config, $context);
} else {
// Go fallback

View File

@ -101,7 +101,7 @@ class HTMLPurifier_Lexer
break;
}
if (class_exists('DOMDocument', false) &&
if (class_exists('DOMDocument') &&
method_exists('DOMDocument', 'loadHTML') &&
!extension_loaded('domxml')
) {

View File

@ -104,7 +104,6 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer
* To iterate is human, to recurse divine - L. Peter Deutsch
* @param DOMNode $node DOMNode to be tokenized.
* @param HTMLPurifier_Token[] $tokens Array-list of already tokenized tokens.
* @return HTMLPurifier_Token of node appended to previously passed tokens.
*/
protected function tokenizeDOM($node, &$tokens, $config)
{

View File

@ -33,11 +33,11 @@ class HTMLPurifier_URIScheme_tel extends HTMLPurifier_URIScheme
$uri->host = null;
$uri->port = null;
// Delete all non-numeric characters, non-x characters
// Delete all non-numeric characters, commas, and non-x characters
// from phone number, EXCEPT for a leading plus sign.
$uri->path = preg_replace('/(?!^\+)[^\dx]/', '',
$uri->path = preg_replace('/(?!^\+)[^\dx,]/', '',
// Normalize e(x)tension to lower-case
str_replace('X', 'x', $uri->path));
str_replace('X', 'x', rawurldecode($uri->path)));
return true;
}

View File

@ -261,7 +261,7 @@ class HTMLPurifier_UnitConverter
*/
private function round($n, $sigfigs)
{
$new_log = (int)floor(log(abs($n), 10)); // Number of digits left of decimal - 1
$new_log = (int)floor(log(abs((float)$n), 10)); // Number of digits left of decimal - 1
$rp = $sigfigs - $new_log - 1; // Number of decimal places needed
$neg = $n < 0 ? '-' : ''; // Negative sign
if ($this->bcmath) {
@ -276,7 +276,7 @@ class HTMLPurifier_UnitConverter
}
return $n;
} else {
return $this->scale(round($n, $sigfigs - $new_log - 1), $rp + 1);
return $this->scale(round((float)$n, $sigfigs - $new_log - 1), $rp + 1);
}
}
@ -300,7 +300,7 @@ class HTMLPurifier_UnitConverter
// Now we return it, truncating the zero that was rounded off.
return substr($precise, 0, -1) . str_repeat('0', -$scale + 1);
}
return sprintf('%.' . $scale . 'f', (float)$r);
return number_format((float)$r, $scale, '.', '');
}
}