1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-04 21:28:06 +02:00

Compare commits

..

1 Commits

Author SHA1 Message Date
Edward Z. Yang
c0132082d7 fix: fix CI
Signed-off-by: Edward Z. Yang <ezyang@meta.com>
2023-01-21 22:39:40 -05:00
21 changed files with 203 additions and 253 deletions

View File

@@ -10,7 +10,7 @@ jobs:
strategy:
fail-fast: true
matrix:
php: ['5.6', '7.0', '7.1', '7.2', '7.3', '7.4', '8.0', '8.1', '8.2', '8.3']
php: ['5.6', '7.0', '7.1', '7.2', '7.3', '7.4', '8.0', '8.1', '8.2']
name: PHP ${{ matrix.php }}

View File

@@ -16,10 +16,10 @@ jobs:
- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: 8.2
php-version: 5.5
- name: Run automated release process with semantic-release
uses: cycjimmy/semantic-release-action@v4
uses: cycjimmy/semantic-release-action@v2
with:
extra_plugins: |
@semantic-release/changelog

View File

@@ -31,7 +31,7 @@ PROJECT_NAME = HTMLPurifier
# This could be handy for archiving the generated documentation or
# if some version control system is used.
PROJECT_NUMBER = 4.17.0
PROJECT_NUMBER = 4.15.0
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put.

23
NEWS
View File

@@ -1,26 +1,3 @@
# [4.17.0](https://github.com/ezyang/htmlpurifier/compare/v4.16.0...v4.17.0) (2023-11-17)
### Bug Fixes
* CSSTidy ImportantComments not handled properly ([#359](https://github.com/ezyang/htmlpurifier/issues/359)) ([78a9b4d](https://github.com/ezyang/htmlpurifier/commit/78a9b4d0dae8bce9a70e4e7e551bf51e9a23706d))
* fix CI ([#361](https://github.com/ezyang/htmlpurifier/issues/361)) ([9ec687c](https://github.com/ezyang/htmlpurifier/commit/9ec687c904a1fe66a5395d22c50f7043e045d1d3))
* Invalid scheme check in Attr.TargetBlank ([#363](https://github.com/ezyang/htmlpurifier/issues/363)) ([0176ef4](https://github.com/ezyang/htmlpurifier/commit/0176ef4bb6f57103fdcb60a802603e60e81ee93e))
* semantic release ([#339](https://github.com/ezyang/htmlpurifier/issues/339)) ([d82f3d9](https://github.com/ezyang/htmlpurifier/commit/d82f3d996a0d9b0f23364946d9a14408c1ad72c5))
* semantic release ([#341](https://github.com/ezyang/htmlpurifier/issues/341)) ([e55fead](https://github.com/ezyang/htmlpurifier/commit/e55fead09f39430d30f48438f06e7bc2326efc94)), closes [#339](https://github.com/ezyang/htmlpurifier/issues/339)
* Support for locales using decimal separators other than . (dot) ([#372](https://github.com/ezyang/htmlpurifier/issues/372)) ([43f49ac](https://github.com/ezyang/htmlpurifier/commit/43f49ac9a51b81dfd07d3bc8dcfc5ec5637a5e3b))
### Features
* Add support for all text-decoration properties ([#360](https://github.com/ezyang/htmlpurifier/issues/360)) ([2d775c0](https://github.com/ezyang/htmlpurifier/commit/2d775c01874e2f676ba1a2d9fe69b47c2a823061))
* Allows commas to be included in tel URI ([#389](https://github.com/ezyang/htmlpurifier/issues/389)) ([ec92490](https://github.com/ezyang/htmlpurifier/commit/ec924901392f088d334622f806b9449b17b75d7b)), closes [#388](https://github.com/ezyang/htmlpurifier/issues/388)
### Reverts
* Revert "fix: semantic release (#339)" (#340) ([3e83215](https://github.com/ezyang/htmlpurifier/commit/3e832152a6173f880c6495a3ab2b0e5235e253a6)), closes [#339](https://github.com/ezyang/htmlpurifier/issues/339) [#340](https://github.com/ezyang/htmlpurifier/issues/340)
NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||

View File

@@ -1 +1 @@
4.17.0
4.15.0

View File

@@ -13,7 +13,7 @@
}
],
"require": {
"php": "~5.6.0 || ~7.0.0 || ~7.1.0 || ~7.2.0 || ~7.3.0 || ~7.4.0 || ~8.0.0 || ~8.1.0 || ~8.2.0 || ~8.3.0"
"php": "~5.6.0 || ~7.0.0 || ~7.1.0 || ~7.2.0 || ~7.3.0 || ~7.4.0 || ~8.0.0 || ~8.1.0 || ~8.2.0"
},
"require-dev": {
"cerdic/css-tidy": "^1.7 || ^2.0",

View File

@@ -24,32 +24,32 @@
</directive>
<directive id="CSS.Proprietary">
<file name="HTMLPurifier/CSSDefinition.php">
<line>398</line>
<line>381</line>
</file>
</directive>
<directive id="CSS.AllowTricky">
<file name="HTMLPurifier/CSSDefinition.php">
<line>402</line>
<line>385</line>
</file>
</directive>
<directive id="CSS.Trusted">
<file name="HTMLPurifier/CSSDefinition.php">
<line>406</line>
<line>389</line>
</file>
</directive>
<directive id="CSS.AllowImportant">
<file name="HTMLPurifier/CSSDefinition.php">
<line>410</line>
<line>393</line>
</file>
</directive>
<directive id="CSS.AllowedProperties">
<file name="HTMLPurifier/CSSDefinition.php">
<line>539</line>
<line>522</line>
</file>
</directive>
<directive id="CSS.ForbiddenProperties">
<file name="HTMLPurifier/CSSDefinition.php">
<line>555</line>
<line>538</line>
</file>
</directive>
<directive id="Cache.DefinitionImpl">
@@ -357,7 +357,7 @@
</directive>
<directive id="CSS.AllowedFonts">
<file name="HTMLPurifier/AttrDef/CSS/FontFamily.php">
<line>62</line>
<line>64</line>
</file>
</directive>
<directive id="Attr.AllowedClasses">
@@ -480,7 +480,7 @@
</directive>
<directive id="Filter.ExtractStyleBlocks.Escaping">
<file name="HTMLPurifier/Filter/ExtractStyleBlocks.php">
<line>334</line>
<line>330</line>
</file>
</directive>
<directive id="HTML.Forms">

View File

@@ -7,7 +7,7 @@
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
* FILE, changes will be overwritten the next time the script is run.
*
* @version 4.17.0
* @version 4.15.0
*
* @warning
* You must *not* include any other HTML Purifier files before this file,

View File

@@ -19,7 +19,7 @@
*/
/*
HTML Purifier 4.17.0 - Standards Compliant HTML Filtering
HTML Purifier 4.15.0 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or
@@ -58,12 +58,12 @@ class HTMLPurifier
* Version of HTML Purifier.
* @type string
*/
public $version = '4.17.0';
public $version = '4.15.0';
/**
* Constant with version of HTML Purifier.
*/
const VERSION = '4.17.0';
const VERSION = '4.15.0';
/**
* Global configuration object.

View File

@@ -10,21 +10,23 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef
public function __construct()
{
// Lowercase letters
$l = range('a', 'z');
// Uppercase letters
$u = range('A', 'Z');
// Digits
$d = range('0', '9');
// Special bytes used by UTF-8
$b = array_map('chr', range(0x80, 0xFF));
// All valid characters for the mask
$c = array_merge($l, $u, $d, $b);
// Concatenate all valid characters into a string
// Use '_- ' as an initial value
$this->mask = array_reduce($c, function ($carry, $value) {
return $carry . $value;
}, '_- ');
$this->mask = '_- ';
for ($c = 'a'; $c <= 'z'; $c++) {
$this->mask .= $c;
}
for ($c = 'A'; $c <= 'Z'; $c++) {
$this->mask .= $c;
}
for ($c = '0'; $c <= '9'; $c++) {
$this->mask .= $c;
} // cast-y, but should be fine
// special bytes used by UTF-8
for ($i = 0x80; $i <= 0xFF; $i++) {
// We don't bother excluding invalid bytes in this range,
// because the our restriction of well-formed UTF-8 will
// prevent these from ever occurring.
$this->mask .= chr($i);
}
/*
PHP's internal strcspn implementation is

View File

@@ -33,11 +33,7 @@ class HTMLPurifier_AttrTransform_TargetBlank extends HTMLPurifier_AttrTransform
// XXX Kind of inefficient
$url = $this->parser->parse($attr['href']);
// Ignore invalid schemes (e.g. `javascript:`)
if (!($scheme = $url->getSchemeObj($config, $context))) {
return $attr;
}
$scheme = $url->getSchemeObj($config, $context);
if ($scheme->browsable && !$url->isBenign($config, $context)) {
$attr['target'] = '_blank';

View File

@@ -21,7 +21,7 @@ class HTMLPurifier_Config
* HTML Purifier's version
* @type string
*/
public $version = '4.17.0';
public $version = '4.15.0';
/**
* Whether or not to automatically finalize

View File

@@ -287,14 +287,13 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
} elseif (filegroup($dir) === posix_getgid()) {
$chmod = $chmod | 0070;
} else {
// PHP's probably running as nobody, it is
// not obvious how to fix this (777 is probably
// bad if you are multi-user), let the user figure it out
$chmod = null;
// PHP's probably running as nobody, so we'll
// need to give global permissions
$chmod = $chmod | 0777;
}
trigger_error(
'Directory ' . $dir . ' not writable. ' .
($chmod === null ? '' : 'Please chmod to ' . decoct($chmod)),
'Directory ' . $dir . ' not writable, ' .
'please chmod to ' . decoct($chmod),
E_USER_WARNING
);
} else {

View File

@@ -71,7 +71,7 @@ class HTMLPurifier_DefinitionCacheFactory
return $this->caches[$method][$type];
}
if (isset($this->implementations[$method]) &&
class_exists($class = $this->implementations[$method])) {
class_exists($class = $this->implementations[$method], false)) {
$cache = new $class($type);
} else {
if ($method != 'Serializer') {

View File

@@ -146,179 +146,175 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
foreach ($this->_tidy->css as $k => $decls) {
// $decls are all CSS declarations inside an @ selector
$new_decls = array();
if (is_array($decls)) {
foreach ($decls as $selector => $style) {
$selector = trim($selector);
if ($selector === '') {
continue;
} // should not happen
// Parse the selector
// Here is the relevant part of the CSS grammar:
//
// ruleset
// : selector [ ',' S* selector ]* '{' ...
// selector
// : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]?
// combinator
// : '+' S*
// : '>' S*
// simple_selector
// : element_name [ HASH | class | attrib | pseudo ]*
// | [ HASH | class | attrib | pseudo ]+
// element_name
// : IDENT | '*'
// ;
// class
// : '.' IDENT
// ;
// attrib
// : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S*
// [ IDENT | STRING ] S* ]? ']'
// ;
// pseudo
// : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ]
// ;
//
// For reference, here are the relevant tokens:
//
// HASH #{name}
// IDENT {ident}
// INCLUDES ==
// DASHMATCH |=
// STRING {string}
// FUNCTION {ident}\(
//
// And the lexical scanner tokens
//
// name {nmchar}+
// nmchar [_a-z0-9-]|{nonascii}|{escape}
// nonascii [\240-\377]
// escape {unicode}|\\[^\r\n\f0-9a-f]
// unicode \\{h}}{1,6}(\r\n|[ \t\r\n\f])?
// ident -?{nmstart}{nmchar*}
// nmstart [_a-z]|{nonascii}|{escape}
// string {string1}|{string2}
// string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
// string2 \'([^\n\r\f\\"]|\\{nl}|{escape})*\'
//
// We'll implement a subset (in order to reduce attack
// surface); in particular:
//
// - No Unicode support
// - No escapes support
// - No string support (by proxy no attrib support)
// - element_name is matched against allowed
// elements (some people might find this
// annoying...)
// - Pseudo-elements one of :first-child, :link,
// :visited, :active, :hover, :focus
foreach ($decls as $selector => $style) {
$selector = trim($selector);
if ($selector === '') {
continue;
} // should not happen
// Parse the selector
// Here is the relevant part of the CSS grammar:
//
// ruleset
// : selector [ ',' S* selector ]* '{' ...
// selector
// : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]?
// combinator
// : '+' S*
// : '>' S*
// simple_selector
// : element_name [ HASH | class | attrib | pseudo ]*
// | [ HASH | class | attrib | pseudo ]+
// element_name
// : IDENT | '*'
// ;
// class
// : '.' IDENT
// ;
// attrib
// : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S*
// [ IDENT | STRING ] S* ]? ']'
// ;
// pseudo
// : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ]
// ;
//
// For reference, here are the relevant tokens:
//
// HASH #{name}
// IDENT {ident}
// INCLUDES ==
// DASHMATCH |=
// STRING {string}
// FUNCTION {ident}\(
//
// And the lexical scanner tokens
//
// name {nmchar}+
// nmchar [_a-z0-9-]|{nonascii}|{escape}
// nonascii [\240-\377]
// escape {unicode}|\\[^\r\n\f0-9a-f]
// unicode \\{h}}{1,6}(\r\n|[ \t\r\n\f])?
// ident -?{nmstart}{nmchar*}
// nmstart [_a-z]|{nonascii}|{escape}
// string {string1}|{string2}
// string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
// string2 \'([^\n\r\f\\"]|\\{nl}|{escape})*\'
//
// We'll implement a subset (in order to reduce attack
// surface); in particular:
//
// - No Unicode support
// - No escapes support
// - No string support (by proxy no attrib support)
// - element_name is matched against allowed
// elements (some people might find this
// annoying...)
// - Pseudo-elements one of :first-child, :link,
// :visited, :active, :hover, :focus
// handle ruleset
$selectors = array_map('trim', explode(',', $selector));
$new_selectors = array();
foreach ($selectors as $sel) {
// split on +, > and spaces
$basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE);
// even indices are chunks, odd indices are
// delimiters
$nsel = null;
$delim = null; // guaranteed to be non-null after
// two loop iterations
for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) {
$x = $basic_selectors[$i];
if ($i % 2) {
// delimiter
if ($x === ' ') {
$delim = ' ';
} else {
$delim = ' ' . $x . ' ';
}
// handle ruleset
$selectors = array_map('trim', explode(',', $selector));
$new_selectors = array();
foreach ($selectors as $sel) {
// split on +, > and spaces
$basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE);
// even indices are chunks, odd indices are
// delimiters
$nsel = null;
$delim = null; // guaranteed to be non-null after
// two loop iterations
for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) {
$x = $basic_selectors[$i];
if ($i % 2) {
// delimiter
if ($x === ' ') {
$delim = ' ';
} else {
// simple selector
$components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE);
$sdelim = null;
$nx = null;
for ($j = 0, $cc = count($components); $j < $cc; $j++) {
$y = $components[$j];
if ($j === 0) {
if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) {
$nx = $y;
} else {
// $nx stays null; this matters
// if we don't manage to find
// any valid selector content,
// in which case we ignore the
// outer $delim
}
} elseif ($j % 2) {
// set delimiter
$sdelim = $y;
$delim = ' ' . $x . ' ';
}
} else {
// simple selector
$components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE);
$sdelim = null;
$nx = null;
for ($j = 0, $cc = count($components); $j < $cc; $j++) {
$y = $components[$j];
if ($j === 0) {
if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) {
$nx = $y;
} else {
$attrdef = null;
if ($sdelim === '#') {
$attrdef = $this->_id_attrdef;
} elseif ($sdelim === '.') {
$attrdef = $this->_class_attrdef;
} elseif ($sdelim === ':') {
$attrdef = $this->_enum_attrdef;
} else {
throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split');
}
$r = $attrdef->validate($y, $config, $context);
if ($r !== false) {
if ($r !== true) {
$y = $r;
}
if ($nx === null) {
$nx = '';
}
$nx .= $sdelim . $y;
}
}
}
if ($nx !== null) {
if ($nsel === null) {
$nsel = $nx;
} else {
$nsel .= $delim . $nx;
// $nx stays null; this matters
// if we don't manage to find
// any valid selector content,
// in which case we ignore the
// outer $delim
}
} elseif ($j % 2) {
// set delimiter
$sdelim = $y;
} else {
// delimiters to the left of invalid
// basic selector ignored
$attrdef = null;
if ($sdelim === '#') {
$attrdef = $this->_id_attrdef;
} elseif ($sdelim === '.') {
$attrdef = $this->_class_attrdef;
} elseif ($sdelim === ':') {
$attrdef = $this->_enum_attrdef;
} else {
throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split');
}
$r = $attrdef->validate($y, $config, $context);
if ($r !== false) {
if ($r !== true) {
$y = $r;
}
if ($nx === null) {
$nx = '';
}
$nx .= $sdelim . $y;
}
}
}
}
if ($nsel !== null) {
if (!empty($scopes)) {
foreach ($scopes as $s) {
$new_selectors[] = "$s $nsel";
if ($nx !== null) {
if ($nsel === null) {
$nsel = $nx;
} else {
$nsel .= $delim . $nx;
}
} else {
$new_selectors[] = $nsel;
// delimiters to the left of invalid
// basic selector ignored
}
}
}
if (empty($new_selectors)) {
if ($nsel !== null) {
if (!empty($scopes)) {
foreach ($scopes as $s) {
$new_selectors[] = "$s $nsel";
}
} else {
$new_selectors[] = $nsel;
}
}
}
if (empty($new_selectors)) {
continue;
}
$selector = implode(', ', $new_selectors);
foreach ($style as $name => $value) {
if (!isset($css_definition->info[$name])) {
unset($style[$name]);
continue;
}
$selector = implode(', ', $new_selectors);
foreach ($style as $name => $value) {
if (!isset($css_definition->info[$name])) {
unset($style[$name]);
continue;
}
$def = $css_definition->info[$name];
$ret = $def->validate($value, $config, $context);
if ($ret === false) {
unset($style[$name]);
} else {
$style[$name] = $ret;
}
$def = $css_definition->info[$name];
$ret = $def->validate($value, $config, $context);
if ($ret === false) {
unset($style[$name]);
} else {
$style[$name] = $ret;
}
$new_decls[$selector] = $style;
}
} else {
continue;
$new_decls[$selector] = $style;
}
$new_css[$k] = $new_decls;
}

View File

@@ -109,7 +109,7 @@ class HTMLPurifier_LanguageFactory
} else {
$class = 'HTMLPurifier_Language_' . $pcode;
$file = $this->dir . '/Language/classes/' . $code . '.php';
if (file_exists($file) || class_exists($class)) {
if (file_exists($file) || class_exists($class, false)) {
$lang = new $class($config, $context);
} else {
// Go fallback

View File

@@ -101,7 +101,7 @@ class HTMLPurifier_Lexer
break;
}
if (class_exists('DOMDocument') &&
if (class_exists('DOMDocument', false) &&
method_exists('DOMDocument', 'loadHTML') &&
!extension_loaded('domxml')
) {

View File

@@ -33,9 +33,9 @@ class HTMLPurifier_URIScheme_tel extends HTMLPurifier_URIScheme
$uri->host = null;
$uri->port = null;
// Delete all non-numeric characters, commas, and non-x characters
// Delete all non-numeric characters, non-x characters
// from phone number, EXCEPT for a leading plus sign.
$uri->path = preg_replace('/(?!^\+)[^\dx,]/', '',
$uri->path = preg_replace('/(?!^\+)[^\dx]/', '',
// Normalize e(x)tension to lower-case
str_replace('X', 'x', rawurldecode($uri->path)));

View File

@@ -261,7 +261,7 @@ class HTMLPurifier_UnitConverter
*/
private function round($n, $sigfigs)
{
$new_log = (int)floor(log(abs((float)$n), 10)); // Number of digits left of decimal - 1
$new_log = (int)floor(log(abs($n), 10)); // Number of digits left of decimal - 1
$rp = $sigfigs - $new_log - 1; // Number of decimal places needed
$neg = $n < 0 ? '-' : ''; // Negative sign
if ($this->bcmath) {
@@ -276,7 +276,7 @@ class HTMLPurifier_UnitConverter
}
return $n;
} else {
return $this->scale(round((float)$n, $sigfigs - $new_log - 1), $rp + 1);
return $this->scale(round($n, $sigfigs - $new_log - 1), $rp + 1);
}
}
@@ -300,7 +300,7 @@ class HTMLPurifier_UnitConverter
// Now we return it, truncating the zero that was rounded off.
return substr($precise, 0, -1) . str_repeat('0', -$scale + 1);
}
return number_format((float)$r, $scale, '.', '');
return sprintf('%.' . $scale . 'f', (float)$r);
}
}

View File

@@ -214,19 +214,6 @@ text-align:right
);
}
public function test_keepImportantComments()
{
$this->assertCleanCSS(
"/*! Important */
div {
text-align:right /*! Important2 */
}",
"div {
text-align:right
}"
);
}
public function test_atSelector()
{
$this->assertCleanCSS(

View File

@@ -102,13 +102,6 @@ class HTMLPurifier_UnitConverterTest extends HTMLPurifier_Harness
$this->assertConversion('11.112pt', '0.15433in');
}
public function testDecimalSeparatorComma()
{
setlocale(LC_ALL, 'de_DE@euro', 'de_DE', 'deu_deu');
$this->assertConversion('11.11px', '0.294cm');
setlocale(LC_ALL, '');
}
public function testRoundingBigNumber()
{
$this->assertConversion('444400000000000000000000in', '42660000000000000000000000px');