From 42b18c1e2a5b5a0d47a53c8178f38522521b1b74 Mon Sep 17 00:00:00 2001
From: Steve Clay <steve@mrclay.org>
Date: Wed, 8 Jun 2016 19:00:35 -0400
Subject: [PATCH] Sync JSMin with mrclay/jsmin-php

---
 min/lib/JSMin.php                            | 53 +++++++++++---------
 min_unit_tests/_test_files/js/regexes.min.js |  5 +-
 2 files changed, 32 insertions(+), 26 deletions(-)

diff --git a/min/lib/JSMin.php b/min/lib/JSMin.php
index 9840d8b..c90a23d 100644
--- a/min/lib/JSMin.php
+++ b/min/lib/JSMin.php
@@ -108,6 +108,11 @@ class JSMin {
             $mbIntEnc = mb_internal_encoding();
             mb_internal_encoding('8bit');
         }
+
+        if (isset($this->input[0]) && $this->input[0] === "\xef") {
+            $this->input = substr($this->input, 3);
+        }
+
         $this->input = str_replace("\r\n", "\n", $this->input);
         $this->inputLength = strlen($this->input);
 
@@ -271,37 +276,39 @@ class JSMin {
     protected function isRegexpLiteral()
     {
         if (false !== strpos("(,=:[!&|?+-~*{;", $this->a)) {
-            // we obviously aren't dividing
+            // we can't divide after these tokens
             return true;
         }
 
-		// we have to check for a preceding keyword, and we don't need to pattern
-		// match over the whole output.
-		$recentOutput = substr($this->output, -10);
-
-		// check if return/typeof directly precede a pattern without a space
-		foreach (array('return', 'typeof') as $keyword) {
-            if ($this->a !== substr($keyword, -1)) {
-                // certainly wasn't keyword
-                continue;
-            }
-            if (preg_match("~(^|[\\s\\S])" . substr($keyword, 0, -1) . "$~", $recentOutput, $m)) {
-                if ($m[1] === '' || !$this->isAlphaNum($m[1])) {
-                    return true;
-                }
+        // check if first non-ws token is "/" (see starts-regex.js)
+        $length = strlen($this->output);
+        if ($this->a === ' ' || $this->a === "\n") {
+            if ($length < 2) { // weird edge case
+                return true;
             }
         }
 
-		// check all keywords
-		if ($this->a === ' ' || $this->a === "\n") {
-			if (preg_match('~(^|[\\s\\S])(?:case|else|in|return|typeof)$~', $recentOutput, $m)) {
-				if ($m[1] === '' || !$this->isAlphaNum($m[1])) {
-					return true;
-				}
-			}
+        // if the "/" follows a keyword, it must be a regexp, otherwise it's best to assume division
+
+        $subject = $this->output . trim($this->a);
+        if (!preg_match('/(?:case|else|in|return|typeof)$/', $subject, $m)) {
+            // not a keyword
+            return false;
         }
 
-        return false;
+        // can't be sure it's a keyword yet (see not-regexp.js)
+        $charBeforeKeyword = substr($subject, 0 - strlen($m[0]) - 1, 1);
+        if ($this->isAlphaNum($charBeforeKeyword)) {
+            // this is really an identifier ending in a keyword, e.g. "xreturn"
+            return false;
+        }
+
+        // it's a regexp. Remove unneeded whitespace after keyword
+        if ($this->a === ' ' || $this->a === "\n") {
+            $this->a = '';
+        }
+
+        return true;
     }
 
     /**
diff --git a/min_unit_tests/_test_files/js/regexes.min.js b/min_unit_tests/_test_files/js/regexes.min.js
index d3c80a6..d7d5b4e 100644
--- a/min_unit_tests/_test_files/js/regexes.min.js
+++ b/min_unit_tests/_test_files/js/regexes.min.js
@@ -1,3 +1,2 @@
-function testIssue74(){return /'/;}
-!function(s){return /^[£$€?.]/.test(s);}();typeof
-/ ' /;x=/ [/] /;1/foo;(2)/foo;function(){return/foo/};function(){return typeof/foo/};
\ No newline at end of file
+function testIssue74(){return/'/;}
+!function(s){return/^[£$€?.]/.test(s);}();typeof/ ' /;x=/ [/] /;1/foo;(2)/foo;function(){return/foo/};function(){return typeof/foo/};
\ No newline at end of file