From 2045731d6054eeb01a57b5fa7e8ed54782325471 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Wed, 18 May 2016 17:01:59 -0400 Subject: [PATCH] Improve Minify_Lines algorithm and add tests Fixes #531 Fixes #534 --- lib/Minify/Lines.php | 110 ++++++++++++++---- min_extras/tools/minifyTextarea.php | 2 +- tests/MinifyLinesTest.php | 32 +++-- tests/_test_files/lines/basic.in.js | 66 +++++++++++ .../lines_output.js => lines/basic.out.js} | 24 ++-- .../lines_bugs.js => lines/misc.in.js} | 24 ++-- tests/_test_files/lines/misc.out.js | 18 +++ tests/_test_files/lines/url.in.js | 13 +++ tests/_test_files/lines/url.out.js | 17 +++ 9 files changed, 247 insertions(+), 59 deletions(-) create mode 100644 tests/_test_files/lines/basic.in.js rename tests/_test_files/{minify/lines_output.js => lines/basic.out.js} (93%) rename tests/_test_files/{minify/lines_bugs.js => lines/misc.in.js} (53%) create mode 100644 tests/_test_files/lines/misc.out.js create mode 100644 tests/_test_files/lines/url.in.js create mode 100644 tests/_test_files/lines/url.out.js diff --git a/lib/Minify/Lines.php b/lib/Minify/Lines.php index e046c59..1aa2f95 100644 --- a/lib/Minify/Lines.php +++ b/lib/Minify/Lines.php @@ -41,11 +41,6 @@ class Minify_Lines { : ''; $content = str_replace("\r\n", "\n", $content); - // Hackily rewrite strings with XPath expressions that are - // likely to throw off our dumb parser (for Prototype 1.6.1). - $content = str_replace('"/*"', '"/"+"*"', $content); - $content = preg_replace('@([\'"])(\\.?//?)\\*@', '$1$2$1+$1*', $content); - $lines = explode("\n", $content); $numLines = count($lines); // determine left padding @@ -89,33 +84,43 @@ class Minify_Lines { * * @param string $line current line of code * - * @param bool $inComment was the parser in a comment at the - * beginning of the line? + * @param bool $inComment was the parser in a C-style comment at the + * beginning of the previous line? * * @return bool */ private static function _eolInComment($line, $inComment) { - // crude way to avoid things like // */ - $line = preg_replace('~//.*?(\\*/|/\\*).*~', '', $line); - while (strlen($line)) { - $search = $inComment - ? '*/' - : '/*'; - $pos = strpos($line, $search); - if (false === $pos) { - return $inComment; - } else { - if ($pos == 0 - || ($inComment - ? substr($line, $pos, 3) - : substr($line, $pos-1, 3)) != '*/*') - { - $inComment = ! $inComment; + if ($inComment) { + // only "*/" can end the comment + $index = self::_find($line, '*/'); + if ($index === false) { + return true; } - $line = substr($line, $pos + 2); + + // stop comment and keep walking line + $inComment = false; + @$line = (string)substr($line, $index + 2); + continue; } + + // look for "//" and "/*" + $single = self::_find($line, '//'); + $multi = self::_find($line, '/*'); + if ($multi === false) { + return false; + } + + if ($single === false || $multi < $single) { + // start comment and keep walking line + $inComment = true; + @$line = (string)substr($line, $multi + 2); + continue; + } + + // a single-line comment preceeded it + return false; } return $inComment; @@ -137,8 +142,63 @@ class Minify_Lines { */ private static function _addNote($line, $note, $inComment, $padTo) { - return $inComment + $line = $inComment ? '/* ' . str_pad($note, $padTo, ' ', STR_PAD_RIGHT) . ' *| ' . $line : '/* ' . str_pad($note, $padTo, ' ', STR_PAD_RIGHT) . ' */ ' . $line; + return rtrim($line); + } + + /** + * Find a token trying to avoid false positives + * + * @param string $str String containing the token + * @param string $token Token being checked + * @return bool + */ + private static function _find($str, $token) { + switch ($token) { + case '//': + $fakes = array( + '://' => 1, + '"//' => 1, + '\'//' => 1, + '".//' => 2, + '\'.//' => 2, + ); + break; + case '/*': + $fakes = array( + '"/*' => 1, + '\'/*' => 1, + '"//*' => 2, + '\'//*' => 2, + '".//*' => 3, + '\'.//*' => 3, + '*/*' => 1, + '\\/*' => 1, + ); + break; + default: + $fakes = array(); + } + + $index = strpos($str, $token); + $offset = 0; + + while ($index !== false) { + foreach ($fakes as $fake => $skip) { + $check = substr($str, $index - $skip, strlen($fake)); + if ($check === $fake) { + // move offset and scan again + $offset += $index + strlen($token); + $index = strpos($str, $token, $offset); + break; + } + } + // legitimate find + return $index; + } + + return $index; } } diff --git a/min_extras/tools/minifyTextarea.php b/min_extras/tools/minifyTextarea.php index fbdfcef..a5b5926 100644 --- a/min_extras/tools/minifyTextarea.php +++ b/min_extras/tools/minifyTextarea.php @@ -55,7 +55,7 @@ if ($env->post('method') === 'Minify and serve') { } $tpl = array(); -$tpl['classes'] = array('Minify_HTML', 'JSMin\\JSMin', 'Minify_CSS'); +$tpl['classes'] = array('Minify_HTML', 'JSMin\\JSMin', 'Minify_CSS', 'Minify_Lines'); if (in_array($env->post('method'), $tpl['classes'])) { diff --git a/tests/MinifyLinesTest.php b/tests/MinifyLinesTest.php index 6e93434..87f27c5 100644 --- a/tests/MinifyLinesTest.php +++ b/tests/MinifyLinesTest.php @@ -4,8 +4,6 @@ class MinifyLinesTest extends TestCase { public function test_lines() { - $exp = file_get_contents(self::$test_files . "/minify/lines_output.js"); - $env = new Minify_Env(array( 'server' => array( 'DOCUMENT_ROOT' => dirname(__DIR__), @@ -15,15 +13,27 @@ class MinifyLinesTest extends TestCase $controller = new Minify_Controller_Files($env, $sourceFactory); $minify = new Minify(new Minify_Cache_Null()); - $ret = $minify->serve($controller, array( - 'debug' => true - ,'quiet' => true - ,'encodeOutput' => false - ,'files' => array( - self::$test_files . "/js/before.js" - ) - )); + $files = glob(self::$test_files . "/lines/*.in.js"); - $this->assertEquals($exp, $ret['content']); + // uncomment to debug one + //$files = array(self::$test_files . "/lines/basic.in.js"); + + foreach ($files as $file) { + $ret = $minify->serve($controller, array( + 'debug' => true, + 'quiet' => true, + 'encodeOutput' => false, + 'files' => array($file), + )); + + $outFile = str_replace('.in.js', '.out.js', $file); + + $exp = file_get_contents($outFile); + + // uncomment to set up expected output + //file_put_contents($outFile, $ret['content']); + + $this->assertEquals($exp, $ret['content'], "Did not match: " . basename($outFile)); + } } } diff --git a/tests/_test_files/lines/basic.in.js b/tests/_test_files/lines/basic.in.js new file mode 100644 index 0000000..4158b3c --- /dev/null +++ b/tests/_test_files/lines/basic.in.js @@ -0,0 +1,66 @@ +/*! is.js + + (c) 2001 Douglas Crockford + 2001 June 3 +*/ + +// is + +// The -is- object is used to identify the browser. Every browser edition +// identifies itself, but there is no standard way of doing it, and some of +// the identification is deceptive. This is because the authors of web +// browsers are liars. For example, Microsoft's IE browsers claim to be +// Mozilla 4. Netscape 6 claims to be version 5. + +var is = { + ie: navigator.appName == 'Microsoft Internet Explorer', + java: navigator.javaEnabled(), + ns: navigator.appName == 'Netscape', + ua: navigator.userAgent.toLowerCase(), + version: parseFloat(navigator.appVersion.substr(21)) || + parseFloat(navigator.appVersion), + win: navigator.platform == 'Win32' +} +/*!* + * preserve this comment, too + */ +is.mac = is.ua.indexOf('mac') >= 0; +if (is.ua.indexOf('opera') >= 0) { + is.ie = is.ns = false; + is.opera = true; +} +if (is.ua.indexOf('gecko') >= 0) { + is.ie = is.ns = false; + is.gecko = true; +} + +/*@cc_on + /*@if (@_win32) + if (is.ie && is.win) + document.write("PASS: IE/win honored conditional comment.
"); + @else @*/ + if (is.ie && is.win) + document.write("FAIL: IE/win did not honor multi-line conditional comment.
"); + else + document.write("PASS: Non-IE/win browser ignores multi-line conditional comment.
"); + /*@end +@*/ + +var recognizesCondComm = true; +//@cc_on/* +recognizesCondComm = false; +//@cc_on*/ + +if ((is.ie && is.win) == recognizesCondComm) + document.write("PASS: IE/win honored single-line conditional comment.
"); +else + document.write("FAIL: Non-IE/win browser did not ignore single-line conditional comment.
"); + +// hello +//@cc_on/* +// world +//@cc_on*/ +//@cc_on/* +'hello'; +/*!* preserved */ +/*!* preserved */ \ No newline at end of file diff --git a/tests/_test_files/minify/lines_output.js b/tests/_test_files/lines/basic.out.js similarity index 93% rename from tests/_test_files/minify/lines_output.js rename to tests/_test_files/lines/basic.out.js index 5565867..ad0d845 100644 --- a/tests/_test_files/minify/lines_output.js +++ b/tests/_test_files/lines/basic.out.js @@ -1,20 +1,20 @@ -/* before.js */ +/* basic.in.js */ /* 1 */ /*! is.js -/* 2 *| +/* 2 *| /* 3 *| (c) 2001 Douglas Crockford /* 4 *| 2001 June 3 /* 5 *| */ -/* 6 */ +/* 6 */ /* 7 */ // is -/* 8 */ +/* 8 */ /* 9 */ // The -is- object is used to identify the browser. Every browser edition /* 10 */ // identifies itself, but there is no standard way of doing it, and some of /* 11 */ // the identification is deceptive. This is because the authors of web /* 12 */ // browsers are liars. For example, Microsoft's IE browsers claim to be /* 13 */ // Mozilla 4. Netscape 6 claims to be version 5. -/* 14 */ +/* 14 */ /* 15 */ var is = { /* 16 */ ie: navigator.appName == 'Microsoft Internet Explorer', /* 17 */ java: navigator.javaEnabled(), @@ -36,7 +36,7 @@ /* 33 */ is.ie = is.ns = false; /* 34 */ is.gecko = true; /* 35 */ } -/* 36 */ +/* 36 */ /* 37 */ /*@cc_on /* 38 *| /*@if (@_win32) /* 39 *| if (is.ie && is.win) @@ -44,24 +44,24 @@ /* 41 *| @else @*/ /* 42 */ if (is.ie && is.win) /* 43 */ document.write("FAIL: IE/win did not honor multi-line conditional comment.
"); -/* 44 */ else +/* 44 */ else /* 45 */ document.write("PASS: Non-IE/win browser ignores multi-line conditional comment.
"); /* 46 */ /*@end /* 47 *| @*/ -/* 48 */ +/* 48 */ /* 49 */ var recognizesCondComm = true; /* 50 */ //@cc_on/* -/* before.js */ +/* basic.in.js */ /* 51 */ recognizesCondComm = false; /* 52 */ //@cc_on*/ -/* 53 */ +/* 53 */ /* 54 */ if ((is.ie && is.win) == recognizesCondComm) /* 55 */ document.write("PASS: IE/win honored single-line conditional comment.
"); -/* 56 */ else +/* 56 */ else /* 57 */ document.write("FAIL: Non-IE/win browser did not ignore single-line conditional comment.
"); -/* 58 */ +/* 58 */ /* 59 */ // hello /* 60 */ //@cc_on/* /* 61 */ // world diff --git a/tests/_test_files/minify/lines_bugs.js b/tests/_test_files/lines/misc.in.js similarity index 53% rename from tests/_test_files/minify/lines_bugs.js rename to tests/_test_files/lines/misc.in.js index cf56b58..6ac5d6d 100644 --- a/tests/_test_files/minify/lines_bugs.js +++ b/tests/_test_files/lines/misc.in.js @@ -1,10 +1,14 @@ -// sections from Prototype 1.6.1 -var xpath = ".//*[local-name()='ul' or local-name()='UL']" + - "//*[local-name()='li' or local-name()='LI']"; -this.matcher = ['.//*']; -xpath = { - descendant: "//*", - child: "/*", - f: 0 -}; -document._getElementsByXPath('.//*' + cond, element); \ No newline at end of file +// sections from Prototype 1.6.1 +var xpath = ".//*[local-name()='ul' or local-name()='UL']" + + "//*[local-name()='li' or local-name()='LI']"; +this.matcher = ['.//*']; +xpath = { + descendant: "//*", + child: "/*", + f: 0 +}; +document._getElementsByXPath('.//*' + cond, element); + +// from angular 1.4.8 +var URL_REGEXP = /^[a-z][a-z\d.+-]*:\/*(?:[^:@]+(?::[^@]+)?@)?(?:[^\s:/?#]+|\[[a-f\d:]+\])(?::\d+)?(?:\/[^?#]*)?(?:\?[^#]*)?(?:#.*)?$/i; + diff --git a/tests/_test_files/lines/misc.out.js b/tests/_test_files/lines/misc.out.js new file mode 100644 index 0000000..b888683 --- /dev/null +++ b/tests/_test_files/lines/misc.out.js @@ -0,0 +1,18 @@ + +/* misc.in.js */ + +/* 1 */ // sections from Prototype 1.6.1 +/* 2 */ var xpath = ".//*[local-name()='ul' or local-name()='UL']" + +/* 3 */ "//*[local-name()='li' or local-name()='LI']"; +/* 4 */ this.matcher = ['.//*']; +/* 5 */ xpath = { +/* 6 */ descendant: "//*", +/* 7 */ child: "/*", +/* 8 */ f: 0 +/* 9 */ }; +/* 10 */ document._getElementsByXPath('.//*' + cond, element); +/* 11 */ +/* 12 */ // from angular 1.4.8 +/* 13 */ var URL_REGEXP = /^[a-z][a-z\d.+-]*:\/*(?:[^:@]+(?::[^@]+)?@)?(?:[^\s:/?#]+|\[[a-f\d:]+\])(?::\d+)?(?:\/[^?#]*)?(?:\?[^#]*)?(?:#.*)?$/i; +/* 14 */ +/* 15 */ diff --git a/tests/_test_files/lines/url.in.js b/tests/_test_files/lines/url.in.js new file mode 100644 index 0000000..eb5d779 --- /dev/null +++ b/tests/_test_files/lines/url.in.js @@ -0,0 +1,13 @@ +foo; /* http://example.com */ +bar; + +foo; /* + http://example.com */ +bar; + +foo = "http://example.com"; /* hello */ +bar; + +foo = "http://example.com"; /* +hello */ +bar; diff --git a/tests/_test_files/lines/url.out.js b/tests/_test_files/lines/url.out.js new file mode 100644 index 0000000..dc43a7c --- /dev/null +++ b/tests/_test_files/lines/url.out.js @@ -0,0 +1,17 @@ + +/* url.in.js */ + +/* 1 */ foo; /* http://example.com */ +/* 2 */ bar; +/* 3 */ +/* 4 */ foo; /* +/* 5 *| http://example.com */ +/* 6 */ bar; +/* 7 */ +/* 8 */ foo = "http://example.com"; /* hello */ +/* 9 */ bar; +/* 10 */ +/* 11 */ foo = "http://example.com"; /* +/* 12 *| hello */ +/* 13 */ bar; +/* 14 */