From 54ad187c49bb4ed9252533ba6a262107bc2a8cc3 Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Fri, 27 Mar 2009 21:24:43 +0000 Subject: [PATCH] JSMin.php : Overhaul for readability, more useful exceptions --- min/lib/JSMin.php | 387 ++++++++++++++++++++++------------------------ 1 file changed, 188 insertions(+), 199 deletions(-) diff --git a/min/lib/JSMin.php b/min/lib/JSMin.php index 3d9858e..d244a65 100644 --- a/min/lib/JSMin.php +++ b/min/lib/JSMin.php @@ -11,11 +11,6 @@ * comments that begin with "/*!" (for documentation purposes). In the latter case * newlines are inserted around the comment to enhance readability. * - * Known issue: regular expressions containing quote characters must be proceeded - * by one of the following characters: (,=:[!&|? - * E.g. JSMin will fail on the following: return /'/; - * The simple workaround is to wrap the expression in parenthesis: return (/'/); - * * PHP 5 or higher is required. * * Permission is hereby granted to use this version of the library under the @@ -58,15 +53,15 @@ class JSMin { const ORD_LF = 10; const ORD_SPACE = 32; - - protected $a = ''; + + protected $a = "\n"; protected $b = ''; protected $input = ''; protected $inputIndex = 0; protected $inputLength = 0; protected $lookAhead = null; protected $output = ''; - + /** * Minify Javascript * @@ -78,13 +73,62 @@ class JSMin { $jsmin = new JSMin($js); return $jsmin->min(); } - - protected function __construct($input) + + /** + * Setup process + */ + public function __construct($input) { $this->input = str_replace("\r\n", "\n", $input); $this->inputLength = strlen($this->input); } - + + /** + * Perform minification, return result + */ + public function min() + { + if ($this->output !== '') { + // min already run + return $this->output; + } + $this->action(3); + + while ($this->a !== null) { + // determine next action + if ($this->a === ' ') { + $act = $this->isAlphaNum($this->b) ? 1 : 2; + } elseif ($this->a === "\n") { + if ($this->b === ' ') { + $act = 3; + } elseif (false !== strpos('{[(+-', $this->b)) { + $act = 1; + } else { + $act = $this->isAlphaNum($this->b) ? 1 : 2; + } + } else { + if ($this->b === ' ') { + $act = $this->isAlphaNum($this->a) ? 1 : 3; + } elseif ($this->b === "\n") { + if (false !== strpos('}])+-"\'', $this->a)) { + $act = 1; + } else { + $act = $this->isAlphaNum($this->a) ? 1 : 3; + } + } else { + $act = 1; + } + } + $this->action($act); + } + return $this->output; + } + + /** + * 1 = Output A. Copy B to A. Get the next B. + * 2 = Copy B to A. Get the next B. (Delete A). + * 3 = Get the next B. (Delete B). + */ protected function action($d) { switch ($d) { @@ -94,65 +138,82 @@ class JSMin { case 2: $this->a = $this->b; if ($this->a === "'" || $this->a === '"') { - for (;;) { + // string literal + $str = ''; // in case needed for exception + while (true) { $this->output .= $this->a; $this->a = $this->get(); if ($this->a === $this->b) { + // end quote break; } if (ord($this->a) <= self::ORD_LF) { - throw new JSMinException('Unterminated string literal.'); + throw new JSMin_UnterminatedStringException('Contents: ' . $str); } + $str .= $this->a; if ($this->a === '\\') { $this->output .= $this->a; $this->a = $this->get(); + $str .= $this->a; } } } // fallthrough case 3: $this->b = $this->next(); - if ($this->b === '/') { - switch ($this->a) { - case "\n": - case ' ': - if (! $this->spaceBeforeRegExp($this->output)) { - break; - } - case '{': - case ';': - case '(': - case ',': - case '=': - case ':': - case '[': - case '!': - case '&': - case '|': - case '?': - $this->output .= $this->a.$this->b; - for (;;) { - $this->a = $this->get(); - if ($this->a === '/') { - break; // for (;;) - } elseif ($this->a === '\\') { - $this->output .= $this->a; - $this->a = $this->get(); - } elseif (ord($this->a) <= self::ORD_LF) { - throw new JSMinException('Unterminated regular expression literal.'); - } - $this->output .= $this->a; - } - $this->b = $this->next(); - break; // switch ($this->a) - // end case ? + if ($this->b === '/' && $this->isRegexpLiteral()) { + // RegExp literal + $this->output .= $this->a . $this->b; + $pattern = '/'; // in case needed for exception + while (true) { + $this->a = $this->get(); + $pattern .= $this->a; + if ($this->a === '/') { + // end pattern + break; // while (true) + } elseif ($this->a === '\\') { + $this->output .= $this->a; + $this->a = $this->get(); + $pattern .= $this->a; + } elseif (ord($this->a) <= self::ORD_LF) { + throw new JSMin_UnterminatedRegExpException('Contents: '. $pattern); + } + $this->output .= $this->a; } + $this->b = $this->next(); } break; // switch ($d) // end case 3 } } - + + protected function isRegexpLiteral() + { + if (false !== strpos("\n{;(,=:[!&|?", $this->a)) { + return true; + } + if (' ' === $this->a) { + // see if preceeded by keyword + $length = strlen($this->output); + if ($length < 2) { + return true; + } + if (preg_match('/(?:case|else|in|return|typeof)$/', $this->output, $m)) { + if ($this->output === $m[0]) { + return true; + } + $charBeforeKeyword = substr($this->output, $length - strlen($m[0]) - 1, 1); + if (! $this->isAlphaNum($charBeforeKeyword)) { + return true; + } + } + } + return false; + } + + /** + * Get next char. Convert ctrl char to space. + */ protected function get() { $c = $this->lookAhead; @@ -162,170 +223,98 @@ class JSMin { $c = $this->input[$this->inputIndex]; $this->inputIndex += 1; } else { - $c = null; + return null; } } - return ($c === "\r") - ? "\n" - : ($c === null || $c === "\n" || ord($c) >= self::ORD_SPACE - ? $c - : ' '); - } - - protected function isAlphaNum($c) - { - return (ord($c) > 126 - || $c === '\\' - || preg_match('/^[\w\$]$/', $c) === 1); - } - - protected function min() - { - $this->a = "\n"; - $this->action(3); - - while ($this->a !== null) { - switch ($this->a) { - case ' ': - if ($this->isAlphaNum($this->b)) { - $this->action(1); - } else { - $this->action(2); - } - break; - case "\n": - switch ($this->b) { - case '{': - case '[': - case '(': - case '+': - case '-': - $this->action(1); - break; - case ' ': - $this->action(3); - break; - default: - if ($this->isAlphaNum($this->b)) { - $this->action(1); - } else { - $this->action(2); - } - } - break; - default: - switch ($this->b) { - case ' ': - if ($this->isAlphaNum($this->a)) { - $this->action(1); - break; // switch ($this->b) - } - $this->action(3); - break; // switch ($this->b) - case "\n": - switch ($this->a) { - case '}': - case ']': - case ')': - case '+': - case '-': - case '"': - case "'": - $this->action(1); - break; // switch ($this->a) - default: - if ($this->isAlphaNum($this->a)) { - $this->action(1); - } else { - $this->action(3); - } - } - break; // switch ($this->b) - default: - $this->action(1); - break; // switch ($this->b) - } - // end default - } + if ($c === "\r" || $c === "\n") { + return "\n"; } - return $this->output; - } - - protected function next() - { - $get = $this->get(); - if ($get === '/') { - $commentContents = ''; - switch ($this->peek()) { - case '/': - // "//" comment - for (;;) { - $get = $this->get(); - $commentContents .= $get; - if (ord($get) <= self::ORD_LF) { - return preg_match('/^\\/@(?:cc_on|if|elif|else|end)\\b/', $commentContents) - ? "/{$commentContents}" - : $get; - } - } - case '*': - // "/* */" comment - $this->get(); - for (;;) { - $get = $this->get(); - switch ($get) { - case '*': - if ($this->peek() === '/') { - $this->get(); - if (0 === strpos($commentContents, '!')) { - // YUI Compressor style - return "\n/*" . substr($commentContents, 1) . "*/\n"; - } - return preg_match('/^@(?:cc_on|if|elif|else|end)\\b/', $commentContents) - ? "/*{$commentContents}*/" // IE conditional compilation - : ' '; - } - break; - case null: - throw new JSMinException('Unterminated comment.'); - } - $commentContents .= $get; - } - default: - return $get; - } + if (ord($c) < self::ORD_SPACE) { + // control char + return ' '; } - return $get; + return $c; } - + + /** + * Get next char. If is ctrl character, translate to a space or newline. + */ protected function peek() { $this->lookAhead = $this->get(); return $this->lookAhead; } - - protected function spaceBeforeRegExp($output) + + /** + * Is $c a letter, digit, underscore, dollar sign, escape, or non-ASCII? + */ + protected function isAlphaNum($c) { - $length = strlen($output); - $isSpace = false; - $tmp = ""; - foreach (array("case", "else", "in", "return", "typeof") as $word) { - if ($length === strlen($word)) { - $isSpace = ($word === $output); - } elseif ($length > strlen($word)) { - $tmp = substr($output, $length - strlen($word) - 1); - $isSpace = (substr($tmp, 1) === $word) && ! $this->isAlphaNum($tmp[0]); - } - if ($isSpace) { - break; + return (preg_match('/^[0-9a-zA-Z_\\$\\\\]$/', $c) || ord($c) > 126); + } + + protected function singleLineComment() + { + $comment = ''; + while (true) { + $get = $this->get(); + $comment .= $get; + if (ord($get) <= self::ORD_LF) { + // EOL reached + if (preg_match('/^\\/@(?:cc_on|if|elif|else|end)\\b/', $comment)) { + // conditional comment, preserve it + return "/{$comment}"; + } + return $get; } } - return ($length < 2) - ? true - : $isSpace; + } + + protected function multipleLineComment() + { + $this->get(); + $comment = ''; + while (true) { + $get = $this->get(); + if ($get === '*') { + if ($this->peek() === '/') { + // end of comment reached + $this->get(); + if (0 === strpos($comment, '!')) { + // is YUI Compressor style, keep it + return "\n/*" . substr($comment, 1) . "*/\n"; + } + if (preg_match('/^@(?:cc_on|if|elif|else|end)\\b/', $comment)) { + // is IE conditional, keep it + return "/*{$comment}*/"; + } + return ' '; + } + } elseif ($get === null) { + throw new JSMin_UnterminatedCommentException('Contents: ' . $comment); + } + $comment .= $get; + } + } + + /** + * Get the next character, skipping over comments. + * Some comments may be preserved. + */ + protected function next() + { + $get = $this->get(); + if ($get !== '/') { + return $get; + } + switch ($this->peek()) { + case '/': return $this->singleLineComment(); + case '*': return $this->multipleLineComment(); + default: return $get; + } } } -class JSMinException extends Exception { - -} +class JSMin_UnterminatedStringException extends Exception {} +class JSMin_UnterminatedCommentException extends Exception {} +class JSMin_UnterminatedRegExpException extends Exception {}