Use offsets, reduce string copying to improve performance

Performance improvements in Minify::replace():
- Use offsets when regex matching, instead of substr()ing off the
  processed part of the string
- Perform replacements manually, rather than calling preg_replace() /
  preg_replace_callback() on the entire input string

This dramatically improves performance, especially for inputs with many
things that are replaced at this stage (many strings, comments or
regexes).

The only change noticed by other code is that placeholders like $1 or \2
can no longer be used in Minify::registerPattern(), but no callers were
using this feature anyway, and future callers can use a callback
instead.
This commit is contained in:
Roan Kattouw 2021-01-27 11:19:20 -08:00 committed by Matthias Mullie
parent 7f7aaddec1
commit e3a0356374

View File

@ -243,6 +243,9 @@ abstract class Minify
/**
* Register a pattern to execute against the source content.
*
* If $replacement is a string, it must be plain text. Placeholders like $1 or \2 don't work.
* If you need that functionality, use a callback instead.
*
* @param string $pattern PCRE pattern
* @param string|callable $replacement Replacement value for matched pattern
*/
@ -268,11 +271,13 @@ abstract class Minify
*/
protected function replace($content)
{
$processed = '';
$contentLength = strlen($content);
$output = '';
$processedOffset = 0;
$positions = array_fill(0, count($this->patterns), -1);
$matches = array();
while ($content) {
while ($processedOffset < $contentLength) {
// find first match for all patterns
foreach ($this->patterns as $i => $pattern) {
list($pattern, $replacement) = $pattern;
@ -285,12 +290,12 @@ abstract class Minify
// no need to re-run matches that are still in the part of the
// content that hasn't been processed
if ($positions[$i] >= 0) {
if ($positions[$i] >= $processedOffset) {
continue;
}
$match = null;
if (preg_match($pattern, $content, $match, PREG_OFFSET_CAPTURE)) {
if (preg_match($pattern, $content, $match, PREG_OFFSET_CAPTURE, $processedOffset)) {
$matches[$i] = $match;
// we'll store the match position as well; that way, we
@ -307,61 +312,52 @@ abstract class Minify
// no more matches to find: everything's been processed, break out
if (!$matches) {
$processed .= $content;
// output the remaining content
$output .= substr($content, $processedOffset);
break;
}
// see which of the patterns actually found the first thing (we'll
// only want to execute that one, since we're unsure if what the
// other found was not inside what the first found)
$discardLength = min($positions);
$firstPattern = array_search($discardLength, $positions);
$match = $matches[$firstPattern][0][0];
$matchOffset = min($positions);
$firstPattern = array_search($matchOffset, $positions);
$match = $matches[$firstPattern];
// execute the pattern that matches earliest in the content string
list($pattern, $replacement) = $this->patterns[$firstPattern];
$replacement = $this->replacePattern($pattern, $replacement, $content);
list(, $replacement) = $this->patterns[$firstPattern];
// figure out which part of the string was unmatched; that's the
// part we'll execute the patterns on again next
$content = (string) substr($content, $discardLength);
$unmatched = (string) substr($content, strpos($content, $match) + strlen($match));
// move the replaced part to $processed and prepare $content to
// again match batch of patterns against
$processed .= substr($replacement, 0, strlen($replacement) - strlen($unmatched));
$content = $unmatched;
// first match has been replaced & that content is to be left alone,
// the next matches will start after this replacement, so we should
// fix their offsets
foreach ($positions as $i => $position) {
$positions[$i] -= $discardLength + strlen($match);
}
// add the part of the input between $processedOffset and the first match;
// that content wasn't matched by anything
$output .= substr($content, $processedOffset, $matchOffset - $processedOffset);
// add the replacement for the match
$output .= $this->executeReplacement($replacement, $match);
// advance $processedOffset past the match
$processedOffset = $matchOffset + strlen($match[0][0]);
}
return $processed;
return $output;
}
/**
* This is where a pattern is matched against $content and the matches
* are replaced by their respective value.
* This function will be called plenty of times, where $content will always
* move up 1 character.
* If $replacement is a callback, execute it, passing in the match data.
* If it's a string, just pass it through.
*
* @param string $pattern Pattern to match
* @param string|callable $replacement Replacement value
* @param string $content Content to match pattern against
* @param array $match Match data, in PREG_OFFSET_CAPTURE form
*
* @return string
*/
protected function replacePattern($pattern, $replacement, $content)
protected function executeReplacement($replacement, $match)
{
if (is_callable($replacement)) {
return preg_replace_callback($pattern, $replacement, $content, 1, $count);
} else {
return preg_replace($pattern, $replacement, $content, 1, $count);
if (!is_callable($replacement)) {
return $replacement;
}
// convert $match from the PREG_OFFSET_CAPTURE form to the form the callback expects
foreach ($match as &$matchItem) {
$matchItem = $matchItem[0];
}
return $replacement($match);
}
/**