From 42b18c1e2a5b5a0d47a53c8178f38522521b1b74 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Wed, 8 Jun 2016 19:00:35 -0400 Subject: [PATCH] Sync JSMin with mrclay/jsmin-php --- min/lib/JSMin.php | 53 +++++++++++--------- min_unit_tests/_test_files/js/regexes.min.js | 5 +- 2 files changed, 32 insertions(+), 26 deletions(-) diff --git a/min/lib/JSMin.php b/min/lib/JSMin.php index 9840d8b..c90a23d 100644 --- a/min/lib/JSMin.php +++ b/min/lib/JSMin.php @@ -108,6 +108,11 @@ class JSMin { $mbIntEnc = mb_internal_encoding(); mb_internal_encoding('8bit'); } + + if (isset($this->input[0]) && $this->input[0] === "\xef") { + $this->input = substr($this->input, 3); + } + $this->input = str_replace("\r\n", "\n", $this->input); $this->inputLength = strlen($this->input); @@ -271,37 +276,39 @@ class JSMin { protected function isRegexpLiteral() { if (false !== strpos("(,=:[!&|?+-~*{;", $this->a)) { - // we obviously aren't dividing + // we can't divide after these tokens return true; } - // we have to check for a preceding keyword, and we don't need to pattern - // match over the whole output. - $recentOutput = substr($this->output, -10); - - // check if return/typeof directly precede a pattern without a space - foreach (array('return', 'typeof') as $keyword) { - if ($this->a !== substr($keyword, -1)) { - // certainly wasn't keyword - continue; - } - if (preg_match("~(^|[\\s\\S])" . substr($keyword, 0, -1) . "$~", $recentOutput, $m)) { - if ($m[1] === '' || !$this->isAlphaNum($m[1])) { - return true; - } + // check if first non-ws token is "/" (see starts-regex.js) + $length = strlen($this->output); + if ($this->a === ' ' || $this->a === "\n") { + if ($length < 2) { // weird edge case + return true; } } - // check all keywords - if ($this->a === ' ' || $this->a === "\n") { - if (preg_match('~(^|[\\s\\S])(?:case|else|in|return|typeof)$~', $recentOutput, $m)) { - if ($m[1] === '' || !$this->isAlphaNum($m[1])) { - return true; - } - } + // if the "/" follows a keyword, it must be a regexp, otherwise it's best to assume division + + $subject = $this->output . trim($this->a); + if (!preg_match('/(?:case|else|in|return|typeof)$/', $subject, $m)) { + // not a keyword + return false; } - return false; + // can't be sure it's a keyword yet (see not-regexp.js) + $charBeforeKeyword = substr($subject, 0 - strlen($m[0]) - 1, 1); + if ($this->isAlphaNum($charBeforeKeyword)) { + // this is really an identifier ending in a keyword, e.g. "xreturn" + return false; + } + + // it's a regexp. Remove unneeded whitespace after keyword + if ($this->a === ' ' || $this->a === "\n") { + $this->a = ''; + } + + return true; } /** diff --git a/min_unit_tests/_test_files/js/regexes.min.js b/min_unit_tests/_test_files/js/regexes.min.js index d3c80a6..d7d5b4e 100644 --- a/min_unit_tests/_test_files/js/regexes.min.js +++ b/min_unit_tests/_test_files/js/regexes.min.js @@ -1,3 +1,2 @@ -function testIssue74(){return /'/;} -!function(s){return /^[£$€?.]/.test(s);}();typeof -/ ' /;x=/ [/] /;1/foo;(2)/foo;function(){return/foo/};function(){return typeof/foo/}; \ No newline at end of file +function testIssue74(){return/'/;} +!function(s){return/^[£$€?.]/.test(s);}();typeof/ ' /;x=/ [/] /;1/foo;(2)/foo;function(){return/foo/};function(){return typeof/foo/}; \ No newline at end of file