1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-07 22:56:32 +02:00

Compare commits

..

1 Commits

Author SHA1 Message Date
Edward Z. Yang
c0132082d7 fix: fix CI
Signed-off-by: Edward Z. Yang <ezyang@meta.com>
2023-01-21 22:39:40 -05:00
8 changed files with 179 additions and 199 deletions

View File

@@ -10,21 +10,23 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
public function __construct() public function __construct()
{ {
// Lowercase letters $this->mask = '_- ';
$l = range('a', 'z'); for ($c = 'a'; $c <= 'z'; $c++) {
// Uppercase letters $this->mask .= $c;
$u = range('A', 'Z'); }
// Digits for ($c = 'A'; $c <= 'Z'; $c++) {
$d = range('0', '9'); $this->mask .= $c;
// Special bytes used by UTF-8 }
$b = array_map('chr', range(0x80, 0xFF)); for ($c = '0'; $c <= '9'; $c++) {
// All valid characters for the mask $this->mask .= $c;
$c = array_merge($l, $u, $d, $b); } // cast-y, but should be fine
// Concatenate all valid characters into a string // special bytes used by UTF-8
// Use '_- ' as an initial value for ($i = 0x80; $i <= 0xFF; $i++) {
$this->mask = array_reduce($c, function ($carry, $value) { // We don't bother excluding invalid bytes in this range,
return $carry . $value; // because the our restriction of well-formed UTF-8 will
}, '_- '); // prevent these from ever occurring.
$this->mask .= chr($i);
}
/* /*
PHP's internal strcspn implementation is PHP's internal strcspn implementation is

View File

@@ -33,11 +33,7 @@ class HTMLPurifier_AttrTransform_TargetBlank extends HTMLPurifier_AttrTransform
// XXX Kind of inefficient // XXX Kind of inefficient
$url = $this->parser->parse($attr['href']); $url = $this->parser->parse($attr['href']);
$scheme = $url->getSchemeObj($config, $context);
// Ignore invalid schemes (e.g. `javascript:`)
if (!($scheme = $url->getSchemeObj($config, $context))) {
return $attr;
}
if ($scheme->browsable && !$url->isBenign($config, $context)) { if ($scheme->browsable && !$url->isBenign($config, $context)) {
$attr['target'] = '_blank'; $attr['target'] = '_blank';

View File

@@ -287,14 +287,13 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
} elseif (filegroup($dir) === posix_getgid()) { } elseif (filegroup($dir) === posix_getgid()) {
$chmod = $chmod | 0070; $chmod = $chmod | 0070;
} else { } else {
// PHP's probably running as nobody, it is // PHP's probably running as nobody, so we'll
// not obvious how to fix this (777 is probably // need to give global permissions
// bad if you are multi-user), let the user figure it out $chmod = $chmod | 0777;
$chmod = null;
} }
trigger_error( trigger_error(
'Directory ' . $dir . ' not writable. ' . 'Directory ' . $dir . ' not writable, ' .
($chmod === null ? '' : 'Please chmod to ' . decoct($chmod)), 'please chmod to ' . decoct($chmod),
E_USER_WARNING E_USER_WARNING
); );
} else { } else {

View File

@@ -71,7 +71,7 @@ class HTMLPurifier_DefinitionCacheFactory
return $this->caches[$method][$type]; return $this->caches[$method][$type];
} }
if (isset($this->implementations[$method]) && if (isset($this->implementations[$method]) &&
class_exists($class = $this->implementations[$method])) { class_exists($class = $this->implementations[$method], false)) {
$cache = new $class($type); $cache = new $class($type);
} else { } else {
if ($method != 'Serializer') { if ($method != 'Serializer') {

View File

@@ -146,179 +146,175 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
foreach ($this->_tidy->css as $k => $decls) { foreach ($this->_tidy->css as $k => $decls) {
// $decls are all CSS declarations inside an @ selector // $decls are all CSS declarations inside an @ selector
$new_decls = array(); $new_decls = array();
if (is_array($decls)) { foreach ($decls as $selector => $style) {
foreach ($decls as $selector => $style) { $selector = trim($selector);
$selector = trim($selector); if ($selector === '') {
if ($selector === '') { continue;
continue; } // should not happen
} // should not happen // Parse the selector
// Parse the selector // Here is the relevant part of the CSS grammar:
// Here is the relevant part of the CSS grammar: //
// // ruleset
// ruleset // : selector [ ',' S* selector ]* '{' ...
// : selector [ ',' S* selector ]* '{' ... // selector
// selector // : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]?
// : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]? // combinator
// combinator // : '+' S*
// : '+' S* // : '>' S*
// : '>' S* // simple_selector
// simple_selector // : element_name [ HASH | class | attrib | pseudo ]*
// : element_name [ HASH | class | attrib | pseudo ]* // | [ HASH | class | attrib | pseudo ]+
// | [ HASH | class | attrib | pseudo ]+ // element_name
// element_name // : IDENT | '*'
// : IDENT | '*' // ;
// ; // class
// class // : '.' IDENT
// : '.' IDENT // ;
// ; // attrib
// attrib // : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S*
// : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S* // [ IDENT | STRING ] S* ]? ']'
// [ IDENT | STRING ] S* ]? ']' // ;
// ; // pseudo
// pseudo // : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ]
// : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ] // ;
// ; //
// // For reference, here are the relevant tokens:
// For reference, here are the relevant tokens: //
// // HASH #{name}
// HASH #{name} // IDENT {ident}
// IDENT {ident} // INCLUDES ==
// INCLUDES == // DASHMATCH |=
// DASHMATCH |= // STRING {string}
// STRING {string} // FUNCTION {ident}\(
// FUNCTION {ident}\( //
// // And the lexical scanner tokens
// And the lexical scanner tokens //
// // name {nmchar}+
// name {nmchar}+ // nmchar [_a-z0-9-]|{nonascii}|{escape}
// nmchar [_a-z0-9-]|{nonascii}|{escape} // nonascii [\240-\377]
// nonascii [\240-\377] // escape {unicode}|\\[^\r\n\f0-9a-f]
// escape {unicode}|\\[^\r\n\f0-9a-f] // unicode \\{h}}{1,6}(\r\n|[ \t\r\n\f])?
// unicode \\{h}}{1,6}(\r\n|[ \t\r\n\f])? // ident -?{nmstart}{nmchar*}
// ident -?{nmstart}{nmchar*} // nmstart [_a-z]|{nonascii}|{escape}
// nmstart [_a-z]|{nonascii}|{escape} // string {string1}|{string2}
// string {string1}|{string2} // string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
// string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\" // string2 \'([^\n\r\f\\"]|\\{nl}|{escape})*\'
// string2 \'([^\n\r\f\\"]|\\{nl}|{escape})*\' //
// // We'll implement a subset (in order to reduce attack
// We'll implement a subset (in order to reduce attack // surface); in particular:
// surface); in particular: //
// // - No Unicode support
// - No Unicode support // - No escapes support
// - No escapes support // - No string support (by proxy no attrib support)
// - No string support (by proxy no attrib support) // - element_name is matched against allowed
// - element_name is matched against allowed // elements (some people might find this
// elements (some people might find this // annoying...)
// annoying...) // - Pseudo-elements one of :first-child, :link,
// - Pseudo-elements one of :first-child, :link, // :visited, :active, :hover, :focus
// :visited, :active, :hover, :focus
// handle ruleset // handle ruleset
$selectors = array_map('trim', explode(',', $selector)); $selectors = array_map('trim', explode(',', $selector));
$new_selectors = array(); $new_selectors = array();
foreach ($selectors as $sel) { foreach ($selectors as $sel) {
// split on +, > and spaces // split on +, > and spaces
$basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE); $basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE);
// even indices are chunks, odd indices are // even indices are chunks, odd indices are
// delimiters // delimiters
$nsel = null; $nsel = null;
$delim = null; // guaranteed to be non-null after $delim = null; // guaranteed to be non-null after
// two loop iterations // two loop iterations
for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) { for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) {
$x = $basic_selectors[$i]; $x = $basic_selectors[$i];
if ($i % 2) { if ($i % 2) {
// delimiter // delimiter
if ($x === ' ') { if ($x === ' ') {
$delim = ' '; $delim = ' ';
} else {
$delim = ' ' . $x . ' ';
}
} else { } else {
// simple selector $delim = ' ' . $x . ' ';
$components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE); }
$sdelim = null; } else {
$nx = null; // simple selector
for ($j = 0, $cc = count($components); $j < $cc; $j++) { $components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE);
$y = $components[$j]; $sdelim = null;
if ($j === 0) { $nx = null;
if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) { for ($j = 0, $cc = count($components); $j < $cc; $j++) {
$nx = $y; $y = $components[$j];
} else { if ($j === 0) {
// $nx stays null; this matters if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) {
// if we don't manage to find $nx = $y;
// any valid selector content,
// in which case we ignore the
// outer $delim
}
} elseif ($j % 2) {
// set delimiter
$sdelim = $y;
} else { } else {
$attrdef = null; // $nx stays null; this matters
if ($sdelim === '#') { // if we don't manage to find
$attrdef = $this->_id_attrdef; // any valid selector content,
} elseif ($sdelim === '.') { // in which case we ignore the
$attrdef = $this->_class_attrdef; // outer $delim
} elseif ($sdelim === ':') {
$attrdef = $this->_enum_attrdef;
} else {
throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split');
}
$r = $attrdef->validate($y, $config, $context);
if ($r !== false) {
if ($r !== true) {
$y = $r;
}
if ($nx === null) {
$nx = '';
}
$nx .= $sdelim . $y;
}
}
}
if ($nx !== null) {
if ($nsel === null) {
$nsel = $nx;
} else {
$nsel .= $delim . $nx;
} }
} elseif ($j % 2) {
// set delimiter
$sdelim = $y;
} else { } else {
// delimiters to the left of invalid $attrdef = null;
// basic selector ignored if ($sdelim === '#') {
$attrdef = $this->_id_attrdef;
} elseif ($sdelim === '.') {
$attrdef = $this->_class_attrdef;
} elseif ($sdelim === ':') {
$attrdef = $this->_enum_attrdef;
} else {
throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split');
}
$r = $attrdef->validate($y, $config, $context);
if ($r !== false) {
if ($r !== true) {
$y = $r;
}
if ($nx === null) {
$nx = '';
}
$nx .= $sdelim . $y;
}
} }
} }
} if ($nx !== null) {
if ($nsel !== null) { if ($nsel === null) {
if (!empty($scopes)) { $nsel = $nx;
foreach ($scopes as $s) { } else {
$new_selectors[] = "$s $nsel"; $nsel .= $delim . $nx;
} }
} else { } else {
$new_selectors[] = $nsel; // delimiters to the left of invalid
// basic selector ignored
} }
} }
} }
if (empty($new_selectors)) { if ($nsel !== null) {
if (!empty($scopes)) {
foreach ($scopes as $s) {
$new_selectors[] = "$s $nsel";
}
} else {
$new_selectors[] = $nsel;
}
}
}
if (empty($new_selectors)) {
continue;
}
$selector = implode(', ', $new_selectors);
foreach ($style as $name => $value) {
if (!isset($css_definition->info[$name])) {
unset($style[$name]);
continue; continue;
} }
$selector = implode(', ', $new_selectors); $def = $css_definition->info[$name];
foreach ($style as $name => $value) { $ret = $def->validate($value, $config, $context);
if (!isset($css_definition->info[$name])) { if ($ret === false) {
unset($style[$name]); unset($style[$name]);
continue; } else {
} $style[$name] = $ret;
$def = $css_definition->info[$name];
$ret = $def->validate($value, $config, $context);
if ($ret === false) {
unset($style[$name]);
} else {
$style[$name] = $ret;
}
} }
$new_decls[$selector] = $style;
} }
} else { $new_decls[$selector] = $style;
continue;
} }
$new_css[$k] = $new_decls; $new_css[$k] = $new_decls;
} }

View File

@@ -109,7 +109,7 @@ class HTMLPurifier_LanguageFactory
} else { } else {
$class = 'HTMLPurifier_Language_' . $pcode; $class = 'HTMLPurifier_Language_' . $pcode;
$file = $this->dir . '/Language/classes/' . $code . '.php'; $file = $this->dir . '/Language/classes/' . $code . '.php';
if (file_exists($file) || class_exists($class)) { if (file_exists($file) || class_exists($class, false)) {
$lang = new $class($config, $context); $lang = new $class($config, $context);
} else { } else {
// Go fallback // Go fallback

View File

@@ -101,7 +101,7 @@ class HTMLPurifier_Lexer
break; break;
} }
if (class_exists('DOMDocument') && if (class_exists('DOMDocument', false) &&
method_exists('DOMDocument', 'loadHTML') && method_exists('DOMDocument', 'loadHTML') &&
!extension_loaded('domxml') !extension_loaded('domxml')
) { ) {

View File

@@ -214,19 +214,6 @@ text-align:right
); );
} }
public function test_keepImportantComments()
{
$this->assertCleanCSS(
"/*! Important */
div {
text-align:right /*! Important2 */
}",
"div {
text-align:right
}"
);
}
public function test_atSelector() public function test_atSelector()
{ {
$this->assertCleanCSS( $this->assertCleanCSS(