mirror of
https://github.com/matthiasmullie/minify.git
synced 2025-02-22 12:02:47 +01:00
Use offsets, reduce string copying to improve performance
Performance improvements in Minify::replace(): - Use offsets when regex matching, instead of substr()ing off the processed part of the string - Perform replacements manually, rather than calling preg_replace() / preg_replace_callback() on the entire input string This dramatically improves performance, especially for inputs with many things that are replaced at this stage (many strings, comments or regexes). The only change noticed by other code is that placeholders like $1 or \2 can no longer be used in Minify::registerPattern(), but no callers were using this feature anyway, and future callers can use a callback instead.
This commit is contained in:
parent
7f7aaddec1
commit
e3a0356374
@ -243,6 +243,9 @@ abstract class Minify
|
||||
/**
|
||||
* Register a pattern to execute against the source content.
|
||||
*
|
||||
* If $replacement is a string, it must be plain text. Placeholders like $1 or \2 don't work.
|
||||
* If you need that functionality, use a callback instead.
|
||||
*
|
||||
* @param string $pattern PCRE pattern
|
||||
* @param string|callable $replacement Replacement value for matched pattern
|
||||
*/
|
||||
@ -268,11 +271,13 @@ abstract class Minify
|
||||
*/
|
||||
protected function replace($content)
|
||||
{
|
||||
$processed = '';
|
||||
$contentLength = strlen($content);
|
||||
$output = '';
|
||||
$processedOffset = 0;
|
||||
$positions = array_fill(0, count($this->patterns), -1);
|
||||
$matches = array();
|
||||
|
||||
while ($content) {
|
||||
while ($processedOffset < $contentLength) {
|
||||
// find first match for all patterns
|
||||
foreach ($this->patterns as $i => $pattern) {
|
||||
list($pattern, $replacement) = $pattern;
|
||||
@ -285,12 +290,12 @@ abstract class Minify
|
||||
|
||||
// no need to re-run matches that are still in the part of the
|
||||
// content that hasn't been processed
|
||||
if ($positions[$i] >= 0) {
|
||||
if ($positions[$i] >= $processedOffset) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$match = null;
|
||||
if (preg_match($pattern, $content, $match, PREG_OFFSET_CAPTURE)) {
|
||||
if (preg_match($pattern, $content, $match, PREG_OFFSET_CAPTURE, $processedOffset)) {
|
||||
$matches[$i] = $match;
|
||||
|
||||
// we'll store the match position as well; that way, we
|
||||
@ -307,61 +312,52 @@ abstract class Minify
|
||||
|
||||
// no more matches to find: everything's been processed, break out
|
||||
if (!$matches) {
|
||||
$processed .= $content;
|
||||
// output the remaining content
|
||||
$output .= substr($content, $processedOffset);
|
||||
break;
|
||||
}
|
||||
|
||||
// see which of the patterns actually found the first thing (we'll
|
||||
// only want to execute that one, since we're unsure if what the
|
||||
// other found was not inside what the first found)
|
||||
$discardLength = min($positions);
|
||||
$firstPattern = array_search($discardLength, $positions);
|
||||
$match = $matches[$firstPattern][0][0];
|
||||
$matchOffset = min($positions);
|
||||
$firstPattern = array_search($matchOffset, $positions);
|
||||
$match = $matches[$firstPattern];
|
||||
|
||||
// execute the pattern that matches earliest in the content string
|
||||
list($pattern, $replacement) = $this->patterns[$firstPattern];
|
||||
$replacement = $this->replacePattern($pattern, $replacement, $content);
|
||||
list(, $replacement) = $this->patterns[$firstPattern];
|
||||
|
||||
// figure out which part of the string was unmatched; that's the
|
||||
// part we'll execute the patterns on again next
|
||||
$content = (string) substr($content, $discardLength);
|
||||
$unmatched = (string) substr($content, strpos($content, $match) + strlen($match));
|
||||
|
||||
// move the replaced part to $processed and prepare $content to
|
||||
// again match batch of patterns against
|
||||
$processed .= substr($replacement, 0, strlen($replacement) - strlen($unmatched));
|
||||
$content = $unmatched;
|
||||
|
||||
// first match has been replaced & that content is to be left alone,
|
||||
// the next matches will start after this replacement, so we should
|
||||
// fix their offsets
|
||||
foreach ($positions as $i => $position) {
|
||||
$positions[$i] -= $discardLength + strlen($match);
|
||||
}
|
||||
// add the part of the input between $processedOffset and the first match;
|
||||
// that content wasn't matched by anything
|
||||
$output .= substr($content, $processedOffset, $matchOffset - $processedOffset);
|
||||
// add the replacement for the match
|
||||
$output .= $this->executeReplacement($replacement, $match);
|
||||
// advance $processedOffset past the match
|
||||
$processedOffset = $matchOffset + strlen($match[0][0]);
|
||||
}
|
||||
|
||||
return $processed;
|
||||
return $output;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is where a pattern is matched against $content and the matches
|
||||
* are replaced by their respective value.
|
||||
* This function will be called plenty of times, where $content will always
|
||||
* move up 1 character.
|
||||
* If $replacement is a callback, execute it, passing in the match data.
|
||||
* If it's a string, just pass it through.
|
||||
*
|
||||
* @param string $pattern Pattern to match
|
||||
* @param string|callable $replacement Replacement value
|
||||
* @param string $content Content to match pattern against
|
||||
* @param array $match Match data, in PREG_OFFSET_CAPTURE form
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
protected function replacePattern($pattern, $replacement, $content)
|
||||
protected function executeReplacement($replacement, $match)
|
||||
{
|
||||
if (is_callable($replacement)) {
|
||||
return preg_replace_callback($pattern, $replacement, $content, 1, $count);
|
||||
} else {
|
||||
return preg_replace($pattern, $replacement, $content, 1, $count);
|
||||
if (!is_callable($replacement)) {
|
||||
return $replacement;
|
||||
}
|
||||
// convert $match from the PREG_OFFSET_CAPTURE form to the form the callback expects
|
||||
foreach ($match as &$matchItem) {
|
||||
$matchItem = $matchItem[0];
|
||||
}
|
||||
return $replacement($match);
|
||||
}
|
||||
|
||||
/**
|
||||
|
Loading…
x
Reference in New Issue
Block a user