Don't suggest chmod to 777

Signed-off-by: Edward Z. Yang <ezyang@meta.com>
[refactor] Use range() function instead of string increment (#367 )
2025-08-06 22:26:31 +02:00 · 2023-04-29 16:38:00 -04:00 · 2023-02-23 13:11:13 -05:00 · 2023-02-05 21:40:57 -05:00 · 2023-01-26 19:06:28 -05:00 · 2023-01-21 22:44:44 -05:00
8 changed files with 204 additions and 184 deletions
--- a/library/HTMLPurifier/AttrDef/CSS/FontFamily.php
+++ b/library/HTMLPurifier/AttrDef/CSS/FontFamily.php
@@ -10,23 +10,21 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef

    public function __construct()
    {
-        $this->mask = '_- ';
-        for ($c = 'a'; $c <= 'z'; $c++) {
-            $this->mask .= $c;
-        }
-        for ($c = 'A'; $c <= 'Z'; $c++) {
-            $this->mask .= $c;
-        }
-        for ($c = '0'; $c <= '9'; $c++) {
-            $this->mask .= $c;
-        } // cast-y, but should be fine
-        // special bytes used by UTF-8
-        for ($i = 0x80; $i <= 0xFF; $i++) {
-            // We don't bother excluding invalid bytes in this range,
-            // because the our restriction of well-formed UTF-8 will
-            // prevent these from ever occurring.
-            $this->mask .= chr($i);
-        }
+        // Lowercase letters
+        $l = range('a', 'z');
+        // Uppercase letters
+        $u = range('A', 'Z');
+        // Digits
+        $d = range('0', '9');
+        // Special bytes used by UTF-8
+        $b = array_map('chr', range(0x80, 0xFF));
+        // All valid characters for the mask
+        $c = array_merge($l, $u, $d, $b);
+        // Concatenate all valid characters into a string 
+        // Use '_- ' as an initial value
+        $this->mask = array_reduce($c, function ($carry, $value) {
+            return $carry . $value;
+        }, '_- ');

        /*
            PHP's internal strcspn implementation is
--- a/library/HTMLPurifier/AttrTransform/TargetBlank.php
+++ b/library/HTMLPurifier/AttrTransform/TargetBlank.php
@@ -33,7 +33,11 @@ class HTMLPurifier_AttrTransform_TargetBlank extends HTMLPurifier_AttrTransform

        // XXX Kind of inefficient
        $url = $this->parser->parse($attr['href']);
-        $scheme = $url->getSchemeObj($config, $context);
+        
+        // Ignore invalid schemes (e.g. `javascript:`)
+        if (!($scheme = $url->getSchemeObj($config, $context))) {
+            return $attr;
+        }

        if ($scheme->browsable && !$url->isBenign($config, $context)) {
            $attr['target'] = '_blank';
--- a/library/HTMLPurifier/DefinitionCache/Serializer.php
+++ b/library/HTMLPurifier/DefinitionCache/Serializer.php
@@ -287,13 +287,14 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac
            } elseif (filegroup($dir) === posix_getgid()) {
                $chmod = $chmod | 0070;
            } else {
-                // PHP's probably running as nobody, so we'll
-                // need to give global permissions
-                $chmod = $chmod | 0777;
+              // PHP's probably running as nobody, it is
+              // not obvious how to fix this (777 is probably
+              // bad if you are multi-user), let the user figure it out
+                $chmod = null;
            }
            trigger_error(
-                'Directory ' . $dir . ' not writable, ' .
-                'please chmod to ' . decoct($chmod),
+                'Directory ' . $dir . ' not writable. ' .
+                ($chmod === null ? '' : 'Please chmod to ' . decoct($chmod)),
                E_USER_WARNING
            );
        } else {
--- a/library/HTMLPurifier/DefinitionCacheFactory.php
+++ b/library/HTMLPurifier/DefinitionCacheFactory.php
@@ -71,7 +71,7 @@ class HTMLPurifier_DefinitionCacheFactory
            return $this->caches[$method][$type];
        }
        if (isset($this->implementations[$method]) &&
-            class_exists($class = $this->implementations[$method], false)) {
+            class_exists($class = $this->implementations[$method])) {
            $cache = new $class($type);
        } else {
            if ($method != 'Serializer') {
--- a/library/HTMLPurifier/Filter/ExtractStyleBlocks.php
+++ b/library/HTMLPurifier/Filter/ExtractStyleBlocks.php
@@ -146,175 +146,179 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter
        foreach ($this->_tidy->css as $k => $decls) {
            // $decls are all CSS declarations inside an @ selector
            $new_decls = array();
-            foreach ($decls as $selector => $style) {
-                $selector = trim($selector);
-                if ($selector === '') {
-                    continue;
-                } // should not happen
-                // Parse the selector
-                // Here is the relevant part of the CSS grammar:
-                //
-                // ruleset
-                //   : selector [ ',' S* selector ]* '{' ...
-                // selector
-                //   : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]?
-                // combinator
-                //   : '+' S*
-                //   : '>' S*
-                // simple_selector
-                //   : element_name [ HASH | class | attrib | pseudo ]*
-                //   | [ HASH | class | attrib | pseudo ]+
-                // element_name
-                //   : IDENT | '*'
-                //   ;
-                // class
-                //   : '.' IDENT
-                //   ;
-                // attrib
-                //   : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S*
-                //     [ IDENT | STRING ] S* ]? ']'
-                //   ;
-                // pseudo
-                //   : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ]
-                //   ;
-                //
-                // For reference, here are the relevant tokens:
-                //
-                // HASH         #{name}
-                // IDENT        {ident}
-                // INCLUDES     ==
-                // DASHMATCH    |=
-                // STRING       {string}
-                // FUNCTION     {ident}\(
-                //
-                // And the lexical scanner tokens
-                //
-                // name         {nmchar}+
-                // nmchar       [_a-z0-9-]|{nonascii}|{escape}
-                // nonascii     [\240-\377]
-                // escape       {unicode}|\\[^\r\n\f0-9a-f]
-                // unicode      \\{h}}{1,6}(\r\n|[ \t\r\n\f])?
-                // ident        -?{nmstart}{nmchar*}
-                // nmstart      [_a-z]|{nonascii}|{escape}
-                // string       {string1}|{string2}
-                // string1      \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
-                // string2      \'([^\n\r\f\\"]|\\{nl}|{escape})*\'
-                //
-                // We'll implement a subset (in order to reduce attack
-                // surface); in particular:
-                //
-                //      - No Unicode support
-                //      - No escapes support
-                //      - No string support (by proxy no attrib support)
-                //      - element_name is matched against allowed
-                //        elements (some people might find this
-                //        annoying...)
-                //      - Pseudo-elements one of :first-child, :link,
-                //        :visited, :active, :hover, :focus
+            if (is_array($decls)) {
+                foreach ($decls as $selector => $style) {
+                    $selector = trim($selector);
+                    if ($selector === '') {
+                        continue;
+                    } // should not happen
+                    // Parse the selector
+                    // Here is the relevant part of the CSS grammar:
+                    //
+                    // ruleset
+                    //   : selector [ ',' S* selector ]* '{' ...
+                    // selector
+                    //   : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]?
+                    // combinator
+                    //   : '+' S*
+                    //   : '>' S*
+                    // simple_selector
+                    //   : element_name [ HASH | class | attrib | pseudo ]*
+                    //   | [ HASH | class | attrib | pseudo ]+
+                    // element_name
+                    //   : IDENT | '*'
+                    //   ;
+                    // class
+                    //   : '.' IDENT
+                    //   ;
+                    // attrib
+                    //   : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S*
+                    //     [ IDENT | STRING ] S* ]? ']'
+                    //   ;
+                    // pseudo
+                    //   : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ]
+                    //   ;
+                    //
+                    // For reference, here are the relevant tokens:
+                    //
+                    // HASH         #{name}
+                    // IDENT        {ident}
+                    // INCLUDES     ==
+                    // DASHMATCH    |=
+                    // STRING       {string}
+                    // FUNCTION     {ident}\(
+                    //
+                    // And the lexical scanner tokens
+                    //
+                    // name         {nmchar}+
+                    // nmchar       [_a-z0-9-]|{nonascii}|{escape}
+                    // nonascii     [\240-\377]
+                    // escape       {unicode}|\\[^\r\n\f0-9a-f]
+                    // unicode      \\{h}}{1,6}(\r\n|[ \t\r\n\f])?
+                    // ident        -?{nmstart}{nmchar*}
+                    // nmstart      [_a-z]|{nonascii}|{escape}
+                    // string       {string1}|{string2}
+                    // string1      \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
+                    // string2      \'([^\n\r\f\\"]|\\{nl}|{escape})*\'
+                    //
+                    // We'll implement a subset (in order to reduce attack
+                    // surface); in particular:
+                    //
+                    //      - No Unicode support
+                    //      - No escapes support
+                    //      - No string support (by proxy no attrib support)
+                    //      - element_name is matched against allowed
+                    //        elements (some people might find this
+                    //        annoying...)
+                    //      - Pseudo-elements one of :first-child, :link,
+                    //        :visited, :active, :hover, :focus

-                // handle ruleset
-                $selectors = array_map('trim', explode(',', $selector));
-                $new_selectors = array();
-                foreach ($selectors as $sel) {
-                    // split on +, > and spaces
-                    $basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE);
-                    // even indices are chunks, odd indices are
-                    // delimiters
-                    $nsel = null;
-                    $delim = null; // guaranteed to be non-null after
-                    // two loop iterations
-                    for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) {
-                        $x = $basic_selectors[$i];
-                        if ($i % 2) {
-                            // delimiter
-                            if ($x === ' ') {
-                                $delim = ' ';
-                            } else {
-                                $delim = ' ' . $x . ' ';
-                            }
-                        } else {
-                            // simple selector
-                            $components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE);
-                            $sdelim = null;
-                            $nx = null;
-                            for ($j = 0, $cc = count($components); $j < $cc; $j++) {
-                                $y = $components[$j];
-                                if ($j === 0) {
-                                    if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) {
-                                        $nx = $y;
-                                    } else {
-                                        // $nx stays null; this matters
-                                        // if we don't manage to find
-                                        // any valid selector content,
-                                        // in which case we ignore the
-                                        // outer $delim
-                                    }
-                                } elseif ($j % 2) {
-                                    // set delimiter
-                                    $sdelim = $y;
+                    // handle ruleset
+                    $selectors = array_map('trim', explode(',', $selector));
+                    $new_selectors = array();
+                    foreach ($selectors as $sel) {
+                        // split on +, > and spaces
+                        $basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE);
+                        // even indices are chunks, odd indices are
+                        // delimiters
+                        $nsel = null;
+                        $delim = null; // guaranteed to be non-null after
+                        // two loop iterations
+                        for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) {
+                            $x = $basic_selectors[$i];
+                            if ($i % 2) {
+                                // delimiter
+                                if ($x === ' ') {
+                                    $delim = ' ';
                                } else {
-                                    $attrdef = null;
-                                    if ($sdelim === '#') {
-                                        $attrdef = $this->_id_attrdef;
-                                    } elseif ($sdelim === '.') {
-                                        $attrdef = $this->_class_attrdef;
-                                    } elseif ($sdelim === ':') {
-                                        $attrdef = $this->_enum_attrdef;
-                                    } else {
-                                        throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split');
-                                    }
-                                    $r = $attrdef->validate($y, $config, $context);
-                                    if ($r !== false) {
-                                        if ($r !== true) {
-                                            $y = $r;
-                                        }
-                                        if ($nx === null) {
-                                            $nx = '';
-                                        }
-                                        $nx .= $sdelim . $y;
-                                    }
-                                }
-                            }
-                            if ($nx !== null) {
-                                if ($nsel === null) {
-                                    $nsel = $nx;
-                                } else {
-                                    $nsel .= $delim . $nx;
+                                    $delim = ' ' . $x . ' ';
                                }
                            } else {
-                                // delimiters to the left of invalid
-                                // basic selector ignored
+                                // simple selector
+                                $components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE);
+                                $sdelim = null;
+                                $nx = null;
+                                for ($j = 0, $cc = count($components); $j < $cc; $j++) {
+                                    $y = $components[$j];
+                                    if ($j === 0) {
+                                        if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) {
+                                            $nx = $y;
+                                        } else {
+                                            // $nx stays null; this matters
+                                            // if we don't manage to find
+                                            // any valid selector content,
+                                            // in which case we ignore the
+                                            // outer $delim
+                                        }
+                                    } elseif ($j % 2) {
+                                        // set delimiter
+                                        $sdelim = $y;
+                                    } else {
+                                        $attrdef = null;
+                                        if ($sdelim === '#') {
+                                            $attrdef = $this->_id_attrdef;
+                                        } elseif ($sdelim === '.') {
+                                            $attrdef = $this->_class_attrdef;
+                                        } elseif ($sdelim === ':') {
+                                            $attrdef = $this->_enum_attrdef;
+                                        } else {
+                                            throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split');
+                                        }
+                                        $r = $attrdef->validate($y, $config, $context);
+                                        if ($r !== false) {
+                                            if ($r !== true) {
+                                                $y = $r;
+                                            }
+                                            if ($nx === null) {
+                                                $nx = '';
+                                            }
+                                            $nx .= $sdelim . $y;
+                                        }
+                                    }
+                                }
+                                if ($nx !== null) {
+                                    if ($nsel === null) {
+                                        $nsel = $nx;
+                                    } else {
+                                        $nsel .= $delim . $nx;
+                                    }
+                                } else {
+                                    // delimiters to the left of invalid
+                                    // basic selector ignored
+                                }
+                            }
+                        }
+                        if ($nsel !== null) {
+                            if (!empty($scopes)) {
+                                foreach ($scopes as $s) {
+                                    $new_selectors[] = "$s $nsel";
+                                }
+                            } else {
+                                $new_selectors[] = $nsel;
                            }
                        }
                    }
-                    if ($nsel !== null) {
-                        if (!empty($scopes)) {
-                            foreach ($scopes as $s) {
-                                $new_selectors[] = "$s $nsel";
-                            }
-                        } else {
-                            $new_selectors[] = $nsel;
-                        }
-                    }
-                }
-                if (empty($new_selectors)) {
-                    continue;
-                }
-                $selector = implode(', ', $new_selectors);
-                foreach ($style as $name => $value) {
-                    if (!isset($css_definition->info[$name])) {
-                        unset($style[$name]);
+                    if (empty($new_selectors)) {
                        continue;
                    }
-                    $def = $css_definition->info[$name];
-                    $ret = $def->validate($value, $config, $context);
-                    if ($ret === false) {
-                        unset($style[$name]);
-                    } else {
-                        $style[$name] = $ret;
+                    $selector = implode(', ', $new_selectors);
+                    foreach ($style as $name => $value) {
+                        if (!isset($css_definition->info[$name])) {
+                            unset($style[$name]);
+                            continue;
+                        }
+                        $def = $css_definition->info[$name];
+                        $ret = $def->validate($value, $config, $context);
+                        if ($ret === false) {
+                            unset($style[$name]);
+                        } else {
+                            $style[$name] = $ret;
+                        }
                    }
+                    $new_decls[$selector] = $style;
                }
-                $new_decls[$selector] = $style;
+            } else {
+                continue;
            }
            $new_css[$k] = $new_decls;
        }
--- a/library/HTMLPurifier/LanguageFactory.php
+++ b/library/HTMLPurifier/LanguageFactory.php
@@ -109,7 +109,7 @@ class HTMLPurifier_LanguageFactory
        } else {
            $class = 'HTMLPurifier_Language_' . $pcode;
            $file  = $this->dir . '/Language/classes/' . $code . '.php';
-            if (file_exists($file) || class_exists($class, false)) {
+            if (file_exists($file) || class_exists($class)) {
                $lang = new $class($config, $context);
            } else {
                // Go fallback
--- a/library/HTMLPurifier/Lexer.php
+++ b/library/HTMLPurifier/Lexer.php
@@ -101,7 +101,7 @@ class HTMLPurifier_Lexer
                        break;
                    }

-                    if (class_exists('DOMDocument', false) &&
+                    if (class_exists('DOMDocument') &&
                        method_exists('DOMDocument', 'loadHTML') &&
                        !extension_loaded('domxml')
                    ) {
--- a/tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php
+++ b/tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php
@@ -214,6 +214,19 @@ text-align:right
        );
    }

+    public function test_keepImportantComments()
+    {
+        $this->assertCleanCSS(
+            "/*! Important */
+div {
+text-align:right /*! Important2 */
+}",
+            "div {
+text-align:right
+}"
+        );
+    }
+
    public function test_atSelector()
    {
        $this->assertCleanCSS(
Author	SHA1	Message	Date
Edward Z. Yang	4317c387fb	Don't suggest chmod to 777 Signed-off-by: Edward Z. Yang <ezyang@meta.com>	2023-04-29 16:38:00 -04:00
George Peter Banyard	c05639e0c9	[refactor] Use range() function instead of string increment (#367 ) This was found during the analysis for https://wiki.php.net/rfc/saner-inc-dec-operators I don't know what is the minimal version targeted, so the line which defines ``$c`` may need to be changes to use ``array_merge()``	2023-02-23 13:11:13 -05:00
Steve Bauman	b4136da73c	Remove unnecessary disablement of autoload (#364 )	2023-02-05 21:40:57 -05:00
Jeff Standen	0176ef4bb6	fix: Invalid scheme check in Attr.TargetBlank (#363 )	2023-01-26 19:06:28 -05:00
Francis Lévesque	78a9b4d0da	fix: CSSTidy ImportantComments not handled properly (#359 ) * fix: CSSTidy ImportantComments not handled properly Signed-off-by: Francis Lévesque <wolfrank2164@gmail.com> * fix: CSSTidy ImportantComments not handled properly -> remove comments Signed-off-by: Francis Lévesque <wolfrank2164@gmail.com> Co-authored-by: Edward Z. Yang <ezyang@meta.com>	2023-01-21 22:44:44 -05:00
Edward Z. Yang	9ec687c904	fix: fix CI (#361 ) Signed-off-by: Edward Z. Yang <ezyang@meta.com> Signed-off-by: Edward Z. Yang <ezyang@meta.com>	2023-01-21 22:42:38 -05:00