mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-07 22:56:32 +02:00
Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
c0132082d7 |
@@ -10,21 +10,23 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
|
|||||||
|
|
||||||
public function __construct()
|
public function __construct()
|
||||||
{
|
{
|
||||||
// Lowercase letters
|
$this->mask = '_- ';
|
||||||
$l = range('a', 'z');
|
for ($c = 'a'; $c <= 'z'; $c++) {
|
||||||
// Uppercase letters
|
$this->mask .= $c;
|
||||||
$u = range('A', 'Z');
|
}
|
||||||
// Digits
|
for ($c = 'A'; $c <= 'Z'; $c++) {
|
||||||
$d = range('0', '9');
|
$this->mask .= $c;
|
||||||
// Special bytes used by UTF-8
|
}
|
||||||
$b = array_map('chr', range(0x80, 0xFF));
|
for ($c = '0'; $c <= '9'; $c++) {
|
||||||
// All valid characters for the mask
|
$this->mask .= $c;
|
||||||
$c = array_merge($l, $u, $d, $b);
|
} // cast-y, but should be fine
|
||||||
// Concatenate all valid characters into a string
|
// special bytes used by UTF-8
|
||||||
// Use '_- ' as an initial value
|
for ($i = 0x80; $i <= 0xFF; $i++) {
|
||||||
$this->mask = array_reduce($c, function ($carry, $value) {
|
// We don't bother excluding invalid bytes in this range,
|
||||||
return $carry . $value;
|
// because the our restriction of well-formed UTF-8 will
|
||||||
}, '_- ');
|
// prevent these from ever occurring.
|
||||||
|
$this->mask .= chr($i);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
PHP's internal strcspn implementation is
|
PHP's internal strcspn implementation is
|
||||||
|
@@ -33,11 +33,7 @@ class HTMLPurifier_AttrTransform_TargetBlank extends HTMLPurifier_AttrTransform
|
|||||||
|
|
||||||
// XXX Kind of inefficient
|
// XXX Kind of inefficient
|
||||||
$url = $this->parser->parse($attr['href']);
|
$url = $this->parser->parse($attr['href']);
|
||||||
|
$scheme = $url->getSchemeObj($config, $context);
|
||||||
// Ignore invalid schemes (e.g. `javascript:`)
|
|
||||||
if (!($scheme = $url->getSchemeObj($config, $context))) {
|
|
||||||
return $attr;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($scheme->browsable && !$url->isBenign($config, $context)) {
|
if ($scheme->browsable && !$url->isBenign($config, $context)) {
|
||||||
$attr['target'] = '_blank';
|
$attr['target'] = '_blank';
|
||||||
|
@@ -287,14 +287,13 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
|
|||||||
} elseif (filegroup($dir) === posix_getgid()) {
|
} elseif (filegroup($dir) === posix_getgid()) {
|
||||||
$chmod = $chmod | 0070;
|
$chmod = $chmod | 0070;
|
||||||
} else {
|
} else {
|
||||||
// PHP's probably running as nobody, it is
|
// PHP's probably running as nobody, so we'll
|
||||||
// not obvious how to fix this (777 is probably
|
// need to give global permissions
|
||||||
// bad if you are multi-user), let the user figure it out
|
$chmod = $chmod | 0777;
|
||||||
$chmod = null;
|
|
||||||
}
|
}
|
||||||
trigger_error(
|
trigger_error(
|
||||||
'Directory ' . $dir . ' not writable. ' .
|
'Directory ' . $dir . ' not writable, ' .
|
||||||
($chmod === null ? '' : 'Please chmod to ' . decoct($chmod)),
|
'please chmod to ' . decoct($chmod),
|
||||||
E_USER_WARNING
|
E_USER_WARNING
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
|
@@ -71,7 +71,7 @@ class HTMLPurifier_DefinitionCacheFactory
|
|||||||
return $this->caches[$method][$type];
|
return $this->caches[$method][$type];
|
||||||
}
|
}
|
||||||
if (isset($this->implementations[$method]) &&
|
if (isset($this->implementations[$method]) &&
|
||||||
class_exists($class = $this->implementations[$method])) {
|
class_exists($class = $this->implementations[$method], false)) {
|
||||||
$cache = new $class($type);
|
$cache = new $class($type);
|
||||||
} else {
|
} else {
|
||||||
if ($method != 'Serializer') {
|
if ($method != 'Serializer') {
|
||||||
|
@@ -146,179 +146,175 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
|
|||||||
foreach ($this->_tidy->css as $k => $decls) {
|
foreach ($this->_tidy->css as $k => $decls) {
|
||||||
// $decls are all CSS declarations inside an @ selector
|
// $decls are all CSS declarations inside an @ selector
|
||||||
$new_decls = array();
|
$new_decls = array();
|
||||||
if (is_array($decls)) {
|
foreach ($decls as $selector => $style) {
|
||||||
foreach ($decls as $selector => $style) {
|
$selector = trim($selector);
|
||||||
$selector = trim($selector);
|
if ($selector === '') {
|
||||||
if ($selector === '') {
|
continue;
|
||||||
continue;
|
} // should not happen
|
||||||
} // should not happen
|
// Parse the selector
|
||||||
// Parse the selector
|
// Here is the relevant part of the CSS grammar:
|
||||||
// Here is the relevant part of the CSS grammar:
|
//
|
||||||
//
|
// ruleset
|
||||||
// ruleset
|
// : selector [ ',' S* selector ]* '{' ...
|
||||||
// : selector [ ',' S* selector ]* '{' ...
|
// selector
|
||||||
// selector
|
// : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]?
|
||||||
// : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]?
|
// combinator
|
||||||
// combinator
|
// : '+' S*
|
||||||
// : '+' S*
|
// : '>' S*
|
||||||
// : '>' S*
|
// simple_selector
|
||||||
// simple_selector
|
// : element_name [ HASH | class | attrib | pseudo ]*
|
||||||
// : element_name [ HASH | class | attrib | pseudo ]*
|
// | [ HASH | class | attrib | pseudo ]+
|
||||||
// | [ HASH | class | attrib | pseudo ]+
|
// element_name
|
||||||
// element_name
|
// : IDENT | '*'
|
||||||
// : IDENT | '*'
|
// ;
|
||||||
// ;
|
// class
|
||||||
// class
|
// : '.' IDENT
|
||||||
// : '.' IDENT
|
// ;
|
||||||
// ;
|
// attrib
|
||||||
// attrib
|
// : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S*
|
||||||
// : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S*
|
// [ IDENT | STRING ] S* ]? ']'
|
||||||
// [ IDENT | STRING ] S* ]? ']'
|
// ;
|
||||||
// ;
|
// pseudo
|
||||||
// pseudo
|
// : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ]
|
||||||
// : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ]
|
// ;
|
||||||
// ;
|
//
|
||||||
//
|
// For reference, here are the relevant tokens:
|
||||||
// For reference, here are the relevant tokens:
|
//
|
||||||
//
|
// HASH #{name}
|
||||||
// HASH #{name}
|
// IDENT {ident}
|
||||||
// IDENT {ident}
|
// INCLUDES ==
|
||||||
// INCLUDES ==
|
// DASHMATCH |=
|
||||||
// DASHMATCH |=
|
// STRING {string}
|
||||||
// STRING {string}
|
// FUNCTION {ident}\(
|
||||||
// FUNCTION {ident}\(
|
//
|
||||||
//
|
// And the lexical scanner tokens
|
||||||
// And the lexical scanner tokens
|
//
|
||||||
//
|
// name {nmchar}+
|
||||||
// name {nmchar}+
|
// nmchar [_a-z0-9-]|{nonascii}|{escape}
|
||||||
// nmchar [_a-z0-9-]|{nonascii}|{escape}
|
// nonascii [\240-\377]
|
||||||
// nonascii [\240-\377]
|
// escape {unicode}|\\[^\r\n\f0-9a-f]
|
||||||
// escape {unicode}|\\[^\r\n\f0-9a-f]
|
// unicode \\{h}}{1,6}(\r\n|[ \t\r\n\f])?
|
||||||
// unicode \\{h}}{1,6}(\r\n|[ \t\r\n\f])?
|
// ident -?{nmstart}{nmchar*}
|
||||||
// ident -?{nmstart}{nmchar*}
|
// nmstart [_a-z]|{nonascii}|{escape}
|
||||||
// nmstart [_a-z]|{nonascii}|{escape}
|
// string {string1}|{string2}
|
||||||
// string {string1}|{string2}
|
// string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
|
||||||
// string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
|
// string2 \'([^\n\r\f\\"]|\\{nl}|{escape})*\'
|
||||||
// string2 \'([^\n\r\f\\"]|\\{nl}|{escape})*\'
|
//
|
||||||
//
|
// We'll implement a subset (in order to reduce attack
|
||||||
// We'll implement a subset (in order to reduce attack
|
// surface); in particular:
|
||||||
// surface); in particular:
|
//
|
||||||
//
|
// - No Unicode support
|
||||||
// - No Unicode support
|
// - No escapes support
|
||||||
// - No escapes support
|
// - No string support (by proxy no attrib support)
|
||||||
// - No string support (by proxy no attrib support)
|
// - element_name is matched against allowed
|
||||||
// - element_name is matched against allowed
|
// elements (some people might find this
|
||||||
// elements (some people might find this
|
// annoying...)
|
||||||
// annoying...)
|
// - Pseudo-elements one of :first-child, :link,
|
||||||
// - Pseudo-elements one of :first-child, :link,
|
// :visited, :active, :hover, :focus
|
||||||
// :visited, :active, :hover, :focus
|
|
||||||
|
|
||||||
// handle ruleset
|
// handle ruleset
|
||||||
$selectors = array_map('trim', explode(',', $selector));
|
$selectors = array_map('trim', explode(',', $selector));
|
||||||
$new_selectors = array();
|
$new_selectors = array();
|
||||||
foreach ($selectors as $sel) {
|
foreach ($selectors as $sel) {
|
||||||
// split on +, > and spaces
|
// split on +, > and spaces
|
||||||
$basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE);
|
$basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE);
|
||||||
// even indices are chunks, odd indices are
|
// even indices are chunks, odd indices are
|
||||||
// delimiters
|
// delimiters
|
||||||
$nsel = null;
|
$nsel = null;
|
||||||
$delim = null; // guaranteed to be non-null after
|
$delim = null; // guaranteed to be non-null after
|
||||||
// two loop iterations
|
// two loop iterations
|
||||||
for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) {
|
for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) {
|
||||||
$x = $basic_selectors[$i];
|
$x = $basic_selectors[$i];
|
||||||
if ($i % 2) {
|
if ($i % 2) {
|
||||||
// delimiter
|
// delimiter
|
||||||
if ($x === ' ') {
|
if ($x === ' ') {
|
||||||
$delim = ' ';
|
$delim = ' ';
|
||||||
} else {
|
|
||||||
$delim = ' ' . $x . ' ';
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
// simple selector
|
$delim = ' ' . $x . ' ';
|
||||||
$components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE);
|
}
|
||||||
$sdelim = null;
|
} else {
|
||||||
$nx = null;
|
// simple selector
|
||||||
for ($j = 0, $cc = count($components); $j < $cc; $j++) {
|
$components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE);
|
||||||
$y = $components[$j];
|
$sdelim = null;
|
||||||
if ($j === 0) {
|
$nx = null;
|
||||||
if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) {
|
for ($j = 0, $cc = count($components); $j < $cc; $j++) {
|
||||||
$nx = $y;
|
$y = $components[$j];
|
||||||
} else {
|
if ($j === 0) {
|
||||||
// $nx stays null; this matters
|
if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) {
|
||||||
// if we don't manage to find
|
$nx = $y;
|
||||||
// any valid selector content,
|
|
||||||
// in which case we ignore the
|
|
||||||
// outer $delim
|
|
||||||
}
|
|
||||||
} elseif ($j % 2) {
|
|
||||||
// set delimiter
|
|
||||||
$sdelim = $y;
|
|
||||||
} else {
|
} else {
|
||||||
$attrdef = null;
|
// $nx stays null; this matters
|
||||||
if ($sdelim === '#') {
|
// if we don't manage to find
|
||||||
$attrdef = $this->_id_attrdef;
|
// any valid selector content,
|
||||||
} elseif ($sdelim === '.') {
|
// in which case we ignore the
|
||||||
$attrdef = $this->_class_attrdef;
|
// outer $delim
|
||||||
} elseif ($sdelim === ':') {
|
|
||||||
$attrdef = $this->_enum_attrdef;
|
|
||||||
} else {
|
|
||||||
throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split');
|
|
||||||
}
|
|
||||||
$r = $attrdef->validate($y, $config, $context);
|
|
||||||
if ($r !== false) {
|
|
||||||
if ($r !== true) {
|
|
||||||
$y = $r;
|
|
||||||
}
|
|
||||||
if ($nx === null) {
|
|
||||||
$nx = '';
|
|
||||||
}
|
|
||||||
$nx .= $sdelim . $y;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ($nx !== null) {
|
|
||||||
if ($nsel === null) {
|
|
||||||
$nsel = $nx;
|
|
||||||
} else {
|
|
||||||
$nsel .= $delim . $nx;
|
|
||||||
}
|
}
|
||||||
|
} elseif ($j % 2) {
|
||||||
|
// set delimiter
|
||||||
|
$sdelim = $y;
|
||||||
} else {
|
} else {
|
||||||
// delimiters to the left of invalid
|
$attrdef = null;
|
||||||
// basic selector ignored
|
if ($sdelim === '#') {
|
||||||
|
$attrdef = $this->_id_attrdef;
|
||||||
|
} elseif ($sdelim === '.') {
|
||||||
|
$attrdef = $this->_class_attrdef;
|
||||||
|
} elseif ($sdelim === ':') {
|
||||||
|
$attrdef = $this->_enum_attrdef;
|
||||||
|
} else {
|
||||||
|
throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split');
|
||||||
|
}
|
||||||
|
$r = $attrdef->validate($y, $config, $context);
|
||||||
|
if ($r !== false) {
|
||||||
|
if ($r !== true) {
|
||||||
|
$y = $r;
|
||||||
|
}
|
||||||
|
if ($nx === null) {
|
||||||
|
$nx = '';
|
||||||
|
}
|
||||||
|
$nx .= $sdelim . $y;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
if ($nx !== null) {
|
||||||
if ($nsel !== null) {
|
if ($nsel === null) {
|
||||||
if (!empty($scopes)) {
|
$nsel = $nx;
|
||||||
foreach ($scopes as $s) {
|
} else {
|
||||||
$new_selectors[] = "$s $nsel";
|
$nsel .= $delim . $nx;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
$new_selectors[] = $nsel;
|
// delimiters to the left of invalid
|
||||||
|
// basic selector ignored
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (empty($new_selectors)) {
|
if ($nsel !== null) {
|
||||||
|
if (!empty($scopes)) {
|
||||||
|
foreach ($scopes as $s) {
|
||||||
|
$new_selectors[] = "$s $nsel";
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$new_selectors[] = $nsel;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (empty($new_selectors)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$selector = implode(', ', $new_selectors);
|
||||||
|
foreach ($style as $name => $value) {
|
||||||
|
if (!isset($css_definition->info[$name])) {
|
||||||
|
unset($style[$name]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
$selector = implode(', ', $new_selectors);
|
$def = $css_definition->info[$name];
|
||||||
foreach ($style as $name => $value) {
|
$ret = $def->validate($value, $config, $context);
|
||||||
if (!isset($css_definition->info[$name])) {
|
if ($ret === false) {
|
||||||
unset($style[$name]);
|
unset($style[$name]);
|
||||||
continue;
|
} else {
|
||||||
}
|
$style[$name] = $ret;
|
||||||
$def = $css_definition->info[$name];
|
|
||||||
$ret = $def->validate($value, $config, $context);
|
|
||||||
if ($ret === false) {
|
|
||||||
unset($style[$name]);
|
|
||||||
} else {
|
|
||||||
$style[$name] = $ret;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
$new_decls[$selector] = $style;
|
|
||||||
}
|
}
|
||||||
} else {
|
$new_decls[$selector] = $style;
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
$new_css[$k] = $new_decls;
|
$new_css[$k] = $new_decls;
|
||||||
}
|
}
|
||||||
|
@@ -109,7 +109,7 @@ class HTMLPurifier_LanguageFactory
|
|||||||
} else {
|
} else {
|
||||||
$class = 'HTMLPurifier_Language_' . $pcode;
|
$class = 'HTMLPurifier_Language_' . $pcode;
|
||||||
$file = $this->dir . '/Language/classes/' . $code . '.php';
|
$file = $this->dir . '/Language/classes/' . $code . '.php';
|
||||||
if (file_exists($file) || class_exists($class)) {
|
if (file_exists($file) || class_exists($class, false)) {
|
||||||
$lang = new $class($config, $context);
|
$lang = new $class($config, $context);
|
||||||
} else {
|
} else {
|
||||||
// Go fallback
|
// Go fallback
|
||||||
|
@@ -101,7 +101,7 @@ class HTMLPurifier_Lexer
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (class_exists('DOMDocument') &&
|
if (class_exists('DOMDocument', false) &&
|
||||||
method_exists('DOMDocument', 'loadHTML') &&
|
method_exists('DOMDocument', 'loadHTML') &&
|
||||||
!extension_loaded('domxml')
|
!extension_loaded('domxml')
|
||||||
) {
|
) {
|
||||||
|
@@ -214,19 +214,6 @@ text-align:right
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
public function test_keepImportantComments()
|
|
||||||
{
|
|
||||||
$this->assertCleanCSS(
|
|
||||||
"/*! Important */
|
|
||||||
div {
|
|
||||||
text-align:right /*! Important2 */
|
|
||||||
}",
|
|
||||||
"div {
|
|
||||||
text-align:right
|
|
||||||
}"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
public function test_atSelector()
|
public function test_atSelector()
|
||||||
{
|
{
|
||||||
$this->assertCleanCSS(
|
$this->assertCleanCSS(
|
||||||
|
Reference in New Issue
Block a user