Serious (JS) code refactor

Meanwhile fixed issue https://github.com/matthiasmullie/minify/issues/10
This commit is contained in:
Matthias Mullie 2013-12-01 23:25:52 +01:00
parent 1cd2718da4
commit 4d6a3c8483
4 changed files with 209 additions and 194 deletions

57
CSS.php
View File

@ -22,7 +22,7 @@ namespace MatthiasMullie\Minify;
*
* @author Matthias Mullie <minify@mullie.eu>
* @author Tijs Verkoyen <minify@verkoyen.eu>
* @version 1.1.0
* @version 1.2.0
*
* @copyright Copyright (c) 2012, Matthias Mullie. All rights reserved.
* @license MIT License
@ -49,20 +49,12 @@ class CSS extends Minify
* Combine CSS from import statements.
* @import's will be loaded and their content merged into the original file, to save HTTP requests.
*
* @param string $source The file to combine imports for.
* @param string $source The file to combine imports for.
* @param string $content The CSS content to combine imports for.
* @return string
*/
protected function combineImports($source)
protected function combineImports($source, $content)
{
// little "hack" for internal use
$content = @func_get_arg(1);
// load the content
if($content === false) $content = $this->load($source);
// validate data
if($content == $source) throw new Exception('The data for "' . $source . '" could not be loaded, please make sure the path is correct.');
// the regex to match import statements
$importRegex = '/
@ -273,20 +265,12 @@ class CSS extends Minify
* Import files into the CSS, base64-ized.
* @url(image.jpg) images will be loaded and their content merged into the original file, to save HTTP requests.
*
* @param string $source The file to import files for.
* @param string $source The file to import files for.
* @param string $content The CSS content to import files for.
* @return string
*/
protected function importFiles($source)
protected function importFiles($source, $content)
{
// little "hack" for internal use
$content = @func_get_arg(1);
// load the content
if($content === false) $content = $this->load($source);
// validate data
if($content == $source) throw new Exception('The data for "' . $source . '" could not be loaded, please make sure the path is correct.');
if (preg_match_all('/url\((["\']?)((?!["\']?data:).*?\.(gif|png|jpg|jpeg|svg|woff))\\1\)/i', $content, $matches, PREG_SET_ORDER)) {
$search = array();
$replace = array();
@ -372,19 +356,11 @@ class CSS extends Minify
*
* @param string $source The file to update relative urls for.
* @param string $destination The path the data will be written to.
* @param string $content The CSS content to update relative urls for.
* @return string
*/
protected function move($source, $destination)
protected function move($source, $destination, $content)
{
// little "hack" for internal use
$content = @func_get_arg(2);
// load the content
if($content === false) $content = $this->load($source);
// validate data
if($content == $source) throw new Exception('The data for "' . $source . '" could not be loaded, please make sure the path is correct.');
// regex to match paths
$pathsRegex = '/
@ -479,14 +455,11 @@ class CSS extends Minify
* Shorthand hex color codes.
* #FF0000 -> #F00
*
* @param string $content The file/content to shorten the hex color codes for.
* @param string $content The CSS content to shorten the hex color codes for.
* @return string
*/
protected function shortenHex($content)
{
// load the content
$content = $this->load($content);
// shorthand hex color codes
$content = preg_replace('/(?<![\'"])#([0-9a-z])\\1([0-9a-z])\\2([0-9a-z])\\3(?![\'"])/i', '#$1$2$3', $content);
@ -496,14 +469,11 @@ class CSS extends Minify
/**
* Strip comments.
*
* @param string $content The file/content to strip the comments for.
* @param string $content The CSS content to strip the comments for.
* @return string
*/
protected function stripComments($content)
{
// load the content
$content = $this->load($content);
// strip comments
$content = preg_replace('/\/\*(.*?)\*\//is', '', $content);
@ -513,14 +483,11 @@ class CSS extends Minify
/**
* Strip whitespace.
*
* @param string $content The file/content to strip the whitespace for.
* @param string $content The CSS content to strip the whitespace for.
* @return string
*/
protected function stripWhitespace($content)
{
// load the content
$content = $this->load($content);
// semicolon/space before closing bracket > replace by bracket
$content = preg_replace('/;?\s*}/', '}', $content);

173
JS.php
View File

@ -4,7 +4,7 @@ namespace MatthiasMullie\Minify;
/**
* MinifyJS class
*
* This source file can be used to minify Javascript files.
* This source file can be used to minify JavaScript files.
*
* The class is documented in the file itself. If you find any bugs help me out and report them. Reporting can be done by sending an email to minify@mullie.eu.
* If you report a bug, make sure you give me enough information (include your code).
@ -22,7 +22,7 @@ namespace MatthiasMullie\Minify;
*
* @author Matthias Mullie <minify@mullie.eu>
* @author Tijs Verkoyen <minify@verkoyen.eu>
* @version 1.1.0
* @version 1.2.0
*
* @copyright Copyright (c) 2012, Matthias Mullie. All rights reserved.
* @license MIT License
@ -32,102 +32,6 @@ class JS extends Minify
const STRIP_COMMENTS = 1;
const STRIP_WHITESPACE = 2;
/**
* Extract comments & strings from source code (and replace them with a placeholder)
* This fancy parsing is neccessary because comments can contain string demarcators and vice versa, and both can
* contain content that is very similar to the rest of the code.
*
* @param string $content The file/content to extract comments & strings for.
* @return array An array containing the (manipulated) content, the strings & the comments.
*/
protected function extract($content)
{
// load the content
$content = $this->load($content);
// initialize array that will contain all strings found in the code
$strings = array();
$comments = array();
// loop all characters
for ($i = 0; $i < strlen($content); $i++) {
$character = $content[$i];
switch ($content[$i]) {
// string demarcation: ' or "
case '\'':
case '"':
$stringOpener = $character;
// process through content until we find the end of the string
for ($j = $i + 1; $j < strlen($content); $j++) {
$character = $content[$j];
$previousCharacter = isset($content[$j - 1]) ? $content[$j - 1] : '';
/*
* Find end of string:
* - string started with double quotes ends in double quotes, likewise for single quotes.
* - unterminated string ends at newline (bad code), unless newline is escaped (though nobody
* knows this.)
*/
if (($stringOpener == $character && $previousCharacter != '\\') || (in_array($character, array("\r", "\n")) && $previousCharacter != '\\')) {
// save string
$replacement = '[MINIFY-STRING-' . count($strings) . ']';
$strings[$replacement] = substr($content, $i, $j - $i + 1);
// replace string by stub
$content = substr_replace($content, $replacement, $i, $j - $i + 1);
// reset pointer to the end of this string
$i += strlen($replacement);
break;
}
}
break;
// comment demarcation: // or /*
case '/':
$commentOpener = $character . (isset($content[$i + 1]) ? $content[$i + 1] : '');
/*
* Both comment opening tags are 2 characters, so grab the next character and verify we're really
* opening a comment here.
*/
if (in_array($commentOpener, array('//', '/*'))) {
// process through content until we find the end of the comment
for ($j = $i + 1; $j < strlen($content); $j++) {
$character = $content[$j];
$previousCharacter = isset($content[$j - 1]) ? $content[$j - 1] : '';
/*
* Find end of comment:
* - // single line comments end at newline.
* - /* multiline comments and at their respective closing tag, which I can't use here or
* it'd end this very comment.
*/
if (($commentOpener == '//' && in_array($character, array("\r", "\n"))) || ($commentOpener == '/*' && $previousCharacter . $character == '*/')) {
// save comment
$replacement = '[MINIFY-COMMENT-' . count($comments) . ']';
$comments[$replacement] = substr($content, $i, $j - $i + 1);
// replace comment by stub
$content = substr_replace($content, $replacement, $i, $j - $i + 1);
// reset pointer to the end of this string
$i += strlen($replacement);
break;
}
}
}
break;
}
}
return array($content, $strings, $comments);
}
/**
* Minify the data.
* Perform JS optimizations.
@ -146,14 +50,17 @@ class JS extends Minify
$content .= $js;
}
// extract comments & strings from content
list($content, $strings, $comments) = $this->extract($content);
/*
* Strings are a pattern we need to match, in order to ignore potential
* code-like content inside them, but we just want all of the string
* content to remain untouched.
*/
$this->registerPattern('/^([\'"]).*?\\1/s', '\\0');
if($options & static::STRIP_COMMENTS) $content = $this->stripComments($content, $comments);
if($options & static::STRIP_WHITESPACE) $content = $this->stripWhitespace($content, $strings, $comments);
if($options & static::STRIP_COMMENTS) $content = $this->stripComments($content);
if($options & static::STRIP_WHITESPACE) $content = $this->stripWhitespace($content);
// reset strings
$content = str_replace(array_keys($strings), array_values($strings), $content);
$content = $this->replace($content);
// save to path
if($path !== false) $this->save($content, $path);
@ -164,66 +71,43 @@ class JS extends Minify
/**
* Strip comments from source code.
*
* @param string $content The file/content to strip the comments for.
* @param string $content The content to strip the comments for.
* @return string
*/
protected function stripComments($content)
{
// little "hack" for internal use
$comments = @func_get_arg(1);
// single-line comments
$this->registerPattern('/^\/\/.*$[\r\n]*/m', '');
// load the content
$content = $this->load($content);
// multi-line comments
$this->registerPattern('/^\/\*.*?\*\//s', '');
// content has not been parsed before, do so now
if ($comments === false) {
// extract strings & comments
list($content, $strings, $comments) = $this->extract($content);
// reset strings
$content = str_replace(array_keys($strings), array_values($strings), $content);
}
// strip comments (if any)
if ($comments) $content = str_replace(array_keys($comments), array_fill(0, count($comments), ''), $content);
return $content;
}
/**
* Strip whitespace.
*
* @param string $content The file/content to strip the whitespace for.
* @param string $content The content to strip the whitespace for.
* @return string
*/
protected function stripWhitespace($content)
{
// little "hack" for internal use
$strings = @func_get_arg(1);
$comments = @func_get_arg(2);
// load the content
$content = $this->load($content);
// content has not been parsed before, do so now
if ($strings === false || $comments === false) {
// extract strings & comments
list($content, $strings, $comments) = $this->extract($content);
}
// newlines > linefeed
$content = str_replace(array("\r\n", "\r", "\n"), "\n", $content);
$this->registerPattern('/^(\r\n|\r)/m', "\n");
// empty lines > collapse
$content = preg_replace('/^[ \t]*|[ \t]*$/m', '', $content);
$content = preg_replace('/\n+/m', "\n", $content);
$content = trim($content);
$this->registerPattern('/^\n\s+/', "\n");
// redundant whitespace > remove
$content = preg_replace('/(?<=[{}\[\]\(\)=><&\|;:,\?!\+-])[ \t]*|[ \t]*(?=[{}\[\]\(\)=><&\|;:,\?!\+-])/i', '', $content);
$content = preg_replace('/[ \t]+/', ' ', $content);
$callback = function($match) {
return $match[1];
};
$this->registerPattern('/^([{}\[\]\(\)=><&\|;:,\?!\+-])[ \t]+/', $callback);
$this->registerPattern('/^[ \t]+(?=[{}\[\]\(\)=><&\|;:,\?!\+-])/', '');
// redundant semicolons (followed by another semicolon or closing curly bracket) > remove
$content = preg_replace('/;\s*(?=[;}])/s', '', $content);
$this->registerPattern('/^;\s*(?=[;}])/s', '');
/*
* @todo: we could remove all line feeds, but then we have to be certain that all statements are properly
@ -232,13 +116,6 @@ class JS extends Minify
* bracket can then also be omitted.
*/
// reset data if this function has not been called upon through internal methods
if (@func_get_arg(1) === false || @func_get_arg(2) === false) {
// reset strings & comments
$content = str_replace(array_keys($strings), array_values($strings), $content);
$content = str_replace(array_keys($comments), array_values($comments), $content);
}
return $content;
}
}

View File

@ -21,7 +21,7 @@ namespace MatthiasMullie\Minify;
* This software is provided by the author "as is" and any express or implied warranties, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose are disclaimed. In no event shall the author be liable for any direct, indirect, incidental, special, exemplary, or consequential damages (including, but not limited to, procurement of substitute goods or services; loss of use, data, or profits; or business interruption) however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage.
*
* @author Matthias Mullie <minify@mullie.eu>
* @version 1.1.0
* @version 1.2.0
*
* @copyright Copyright (c) 2012, Matthias Mullie. All rights reserved.
* @license MIT License
@ -37,6 +37,20 @@ abstract class Minify
*/
protected $data = array();
/**
* Array of patterns to match.
*
* @var array
*/
protected $patterns = array();
/**
* Array of replacement values (or callbacks) for matching $patterns.
*
* @var array
*/
protected $replacements = array();
/**
* Init the minify class - optionally, css may be passed along already.
*
@ -117,4 +131,139 @@ abstract class Minify
* @return string The minified data.
*/
abstract public function minify($path = false, $options = self::ALL);
/**
* Register a pattern to execute against the source content.
* Patterns should always include caret (= start from the beginning of the
* string) - processing will be performed by traversing the content
* character by character, so we need the pattern to start matching
* exactly at the first character of the content at that point.
*
* @param string $pattern PCRE pattern.
* @param string|Closure $replacement Replacement value for matched pattern.
* @throws Exception
*/
protected function registerPattern($pattern, $replacement = '') {
// doublecheck if pattern actually starts at beginning of content
if(substr($pattern, 1, 1) !== '^') {
throw new Exception('Pattern "' . $pattern . '" should start processing at the beginning of the string.');
}
$this->patterns[] = $pattern;
$this->replacements[] = $replacement;
}
/**
* We can't "just" run some regular expressions against JavaScript: it's a
* complex language. E.g. having an occurrence of // xyz would be a comment,
* unless it's used within a string. Of you could have something that looks
* like a 'string', but inside a comment.
* The only way to accurately replace these pieces is to traverse the JS one
* character at a time and try to find whatever starts first.
*
* @param string $content The content to replace patterns in.
* @return string The (manipulated) content.
*/
protected function replace($content)
{
// every character that has been processed will be moved to this string
$processed = '';
// update will keep shrinking, character by character, until all of it
// has been processed
while($content) {
foreach($this->patterns as $i => $pattern) {
$replacement = $this->replacements[$i];
// replace pattern occurrences starting at this characters
list($content, $replacement, $match) = $this->replacePattern($pattern, $content, $replacement);
// if a pattern was replaceed out of the content, move the
// replacement to $processed & remove it from $content
if($match != '' || $replacement != '') {
$processed .= $replacement;
$content = substr($content, strlen($replacement));
continue 2;
}
}
// character processed: add it to $processed & strip from $content
$processed .= $content[0];
$content = substr($content, 1);
}
return $processed;
}
/**
* This is where a pattern is matched against $content and the matches
* are replaced by their respective value.
* This function will be called plenty of times, where $content will always
* move up 1 character.
*
* @param string $pattern Pattern to match
* @param string $content Content to match pattern against
* @param string|callable $replacement Replacement value
* @return array [content, replacement, match]
*/
protected function replacePattern($pattern, $content, $replacement) {
if(is_callable($replacement) || $replacement instanceof Closure) {
return $this->replaceWithCallback($pattern, $content, $replacement);
} else {
return $this->replaceWithString($pattern, $content, $replacement);
}
}
/**
* Replaces pattern by a value from a callback, via preg_replace_callback.
*
* @param string $pattern
* @param string $content
* @param callable $replacement
* @return array [content, replacement, match]
*/
protected function replaceWithCallback($pattern, $content, $replacement) {
$matched = '';
$replaced = '';
/*
* Instead of just passing the $replacement callback, we'll wrap another
* callback around it to also allow us to catch the match & replacement
* value.
*/
$callback = function($match) use ($replacement, &$replaced, &$matched) {
$matched = $match;
$replaced = call_user_func($replacement, $match);
return $replaced;
};
$content = preg_replace_callback($pattern, $callback, $content, 1, $count);
return array($content, $replaced, $matched);
}
/**
* Replaces pattern by a value from a callback, via preg_replace.
*
* @param string $pattern
* @param string $content
* @param string $replacement
* @return array [content, replacement, match]
*/
protected function replaceWithString($pattern, $content, $replacement) {
/*
* This preg_match is really only meant to capture $match, which we can
* then also use to deduce the replacement value. We can't just assume
* $replacement as replacement value, because it may be a back-reference
* (e.g. \\1)
*/
if(!preg_match($pattern, $content, $match)) {
return array($content, '', '');
}
$untouched = strlen($content) - strlen($match[0]);
$content = preg_replace($pattern, $replacement, $content, 1, $count);
$replaced = (string) substr($content, 0, strlen($content) - $untouched);
return array($content, $replaced, $match[0]);
}
}

View File

@ -66,6 +66,28 @@ class JSTest extends PHPUnit_Framework_TestCase
Minify\JS::STRIP_WHITESPACE
);
// https://github.com/matthiasmullie/minify/issues/10
$tests[] = array(
'// first mutation patch
// second mutation patch
// third mutation patch
// fourth mutation patch',
'',
Minify\JS::STRIP_COMMENTS
);
// https://github.com/matthiasmullie/minify/issues/10
$tests[] = array(
'/////////////////////////
// first mutation patch
// second mutation patch
// third mutation patch
// fourth mutation patch
/////////////////////////',
'',
Minify\JS::STRIP_COMMENTS
);
return $tests;
}
}