Remove emulation for unsupported PHP versions

2025-06-05 03:24:50 +02:00 · 2023-08-16 21:09:51 +02:00 · 2023-08-16 21:09:51 +02:00 · 3c0432b09d
commit 3c0432b09d
parent ee3e7db3fc
8 changed files with 2 additions and 272 deletions
--- a/lib/PhpParser/Internal/TokenPolyfill.php
+++ b/lib/PhpParser/Internal/TokenPolyfill.php
@ -128,9 +128,6 @@ class TokenPolyfill {
        $line = 1;
        $pos = 0;
        $origTokens = \token_get_all($code, $flags);
-        if (\PHP_VERSION_ID < 70400) {
-            $origTokens = self::fixupBadCharacters($code, $origTokens);
-        }

        $numTokens = \count($origTokens);
        for ($i = 0; $i < $numTokens; $i++) {
@ -217,38 +214,6 @@ class TokenPolyfill {
        return $tokens;
    }

-    /**
-     * Prior to PHP 7.4, token_get_all() simply dropped invalid characters from the token stream.
-     * Detect such cases and replace them with T_BAD_CHARACTER.
-     */
-    private static function fixupBadCharacters(string $code, array $origTokens): array {
-        $newTokens = [];
-        $pos = 0;
-        foreach ($origTokens as $token) {
-            $text = \is_string($token) ? $token : $token[1];
-            $len = \strlen($text);
-            if (substr($code, $pos, $len) !== $text) {
-                $nextPos = strpos($code, $text, $pos);
-                for ($i = $pos; $i < $nextPos; $i++) {
-                    // Don't bother including the line, we're not going to use it anyway.
-                    $newTokens[] = [\T_BAD_CHARACTER, $code[$i]];
-                }
-                $pos = $nextPos;
-            }
-            $pos += $len;
-            $newTokens[] = $token;
-        }
-
-        // Handle trailing invalid characters.
-        $codeLen = \strlen($code);
-        if ($pos !== $codeLen) {
-            for ($i = $pos; $i < $codeLen; $i++) {
-                $newTokens[] = [\T_BAD_CHARACTER, $code[$i]];
-            }
-        }
-        return $newTokens;
-    }
-
    /** Initialize private static state needed by tokenize(). */
    private static function init(): void {
        if (isset(self::$identifierTokens)) {
--- a/lib/PhpParser/Lexer/Emulative.php
+++ b/lib/PhpParser/Lexer/Emulative.php
@ -40,11 +40,7 @@ class Emulative extends Lexer {
        $this->hostPhpVersion = PhpVersion::getHostVersion();

        $emulators = [
-            new FlexibleDocStringEmulator(),
-            new FnTokenEmulator(),
            new MatchTokenEmulator(),
-            new CoaleseEqualTokenEmulator(),
-            new NumericLiteralSeparatorEmulator(),
            new NullsafeTokenEmulator(),
            new AttributeEmulator(),
            new EnumTokenEmulator(),
--- a/lib/PhpParser/Lexer/TokenEmulator/CoaleseEqualTokenEmulator.php
+++ b/lib/PhpParser/Lexer/TokenEmulator/CoaleseEqualTokenEmulator.php
@ -1,40 +0,0 @@
-<?php declare(strict_types=1);
-
-namespace PhpParser\Lexer\TokenEmulator;
-
-use PhpParser\PhpVersion;
-use PhpParser\Token;
-
-final class CoaleseEqualTokenEmulator extends TokenEmulator {
-    public function getPhpVersion(): PhpVersion {
-        return PhpVersion::fromComponents(7, 4);
-    }
-
-    public function isEmulationNeeded(string $code): bool {
-        return strpos($code, '??=') !== false;
-    }
-
-    public function emulate(string $code, array $tokens): array {
-        // We need to manually iterate and manage a count because we'll change
-        // the tokens array on the way
-        for ($i = 0, $c = count($tokens); $i < $c; ++$i) {
-            $token = $tokens[$i];
-            if (isset($tokens[$i + 1])) {
-                if ($token->id === T_COALESCE && $tokens[$i + 1]->text === '=') {
-                    array_splice($tokens, $i, 2, [
-                        new Token(\T_COALESCE_EQUAL, '??=', $token->line, $token->pos),
-                    ]);
-                    $c--;
-                    continue;
-                }
-            }
-        }
-
-        return $tokens;
-    }
-
-    public function reverseEmulate(string $code, array $tokens): array {
-        // ??= was not valid code previously, don't bother.
-        return $tokens;
-    }
-}
--- a/lib/PhpParser/Lexer/TokenEmulator/FlexibleDocStringEmulator.php
+++ b/lib/PhpParser/Lexer/TokenEmulator/FlexibleDocStringEmulator.php
@ -1,71 +0,0 @@
-<?php declare(strict_types=1);
-
-namespace PhpParser\Lexer\TokenEmulator;
-
-use PhpParser\PhpVersion;
-
-final class FlexibleDocStringEmulator extends TokenEmulator {
-    private const FLEXIBLE_DOC_STRING_REGEX = <<<'REGEX'
-/<<<[ \t]*(['"]?)([a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*)\1\r?\n
-(?:.*\r?\n)*?
-(?<indentation>\h*)\2(?![a-zA-Z0-9_\x80-\xff])(?<separator>(?:;?[\r\n])?)/x
-REGEX;
-
-    public function getPhpVersion(): PhpVersion {
-        return PhpVersion::fromComponents(7, 3);
-    }
-
-    public function isEmulationNeeded(string $code): bool {
-        return strpos($code, '<<<') !== false;
-    }
-
-    public function emulate(string $code, array $tokens): array {
-        // Handled by preprocessing + fixup.
-        return $tokens;
-    }
-
-    public function reverseEmulate(string $code, array $tokens): array {
-        // Not supported.
-        return $tokens;
-    }
-
-    public function preprocessCode(string $code, array &$patches): string {
-        if (!preg_match_all(self::FLEXIBLE_DOC_STRING_REGEX, $code, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE)) {
-            // No heredoc/nowdoc found
-            return $code;
-        }
-
-        // Keep track of how much we need to adjust string offsets due to the modifications we
-        // already made
-        $posDelta = 0;
-        foreach ($matches as $match) {
-            $indentation = $match['indentation'][0];
-            $indentationStart = $match['indentation'][1];
-
-            $separator = $match['separator'][0];
-            $separatorStart = $match['separator'][1];
-
-            if ($indentation === '' && $separator !== '') {
-                // Ordinary heredoc/nowdoc
-                continue;
-            }
-
-            if ($indentation !== '') {
-                // Remove indentation
-                $indentationLen = strlen($indentation);
-                $code = substr_replace($code, '', $indentationStart + $posDelta, $indentationLen);
-                $patches[] = [$indentationStart + $posDelta, 'add', $indentation];
-                $posDelta -= $indentationLen;
-            }
-
-            if ($separator === '') {
-                // Insert newline as separator
-                $code = substr_replace($code, "\n", $separatorStart + $posDelta, 0);
-                $patches[] = [$separatorStart + $posDelta, 'remove', "\n"];
-                $posDelta += 1;
-            }
-        }
-
-        return $code;
-    }
-}
--- a/lib/PhpParser/Lexer/TokenEmulator/FnTokenEmulator.php
+++ b/lib/PhpParser/Lexer/TokenEmulator/FnTokenEmulator.php
@ -1,19 +0,0 @@
-<?php declare(strict_types=1);
-
-namespace PhpParser\Lexer\TokenEmulator;
-
-use PhpParser\PhpVersion;
-
-final class FnTokenEmulator extends KeywordEmulator {
-    public function getPhpVersion(): PhpVersion {
-        return PhpVersion::fromComponents(7, 4);
-    }
-
-    public function getKeywordString(): string {
-        return 'fn';
-    }
-
-    public function getKeywordToken(): int {
-        return \T_FN;
-    }
-}
--- a/lib/PhpParser/Lexer/TokenEmulator/NumericLiteralSeparatorEmulator.php
+++ b/lib/PhpParser/Lexer/TokenEmulator/NumericLiteralSeparatorEmulator.php
@ -1,95 +0,0 @@
-<?php declare(strict_types=1);
-
-namespace PhpParser\Lexer\TokenEmulator;
-
-use PhpParser\PhpVersion;
-use PhpParser\Token;
-
-final class NumericLiteralSeparatorEmulator extends TokenEmulator {
-    private const BIN = '(?:0b[01]+(?:_[01]+)*)';
-    private const HEX = '(?:0x[0-9a-f]+(?:_[0-9a-f]+)*)';
-    private const DEC = '(?:[0-9]+(?:_[0-9]+)*)';
-    private const SIMPLE_FLOAT = '(?:' . self::DEC . '\.' . self::DEC . '?|\.' . self::DEC . ')';
-    private const EXP = '(?:e[+-]?' . self::DEC . ')';
-    private const FLOAT = '(?:' . self::SIMPLE_FLOAT . self::EXP . '?|' . self::DEC . self::EXP . ')';
-    private const NUMBER = '~' . self::FLOAT . '|' . self::BIN . '|' . self::HEX . '|' . self::DEC . '~iA';
-
-    public function getPhpVersion(): PhpVersion {
-        return PhpVersion::fromComponents(7, 4);
-    }
-
-    public function isEmulationNeeded(string $code): bool {
-        return preg_match('~[0-9]_[0-9]~', $code)
-            || preg_match('~0x[0-9a-f]+_[0-9a-f]~i', $code);
-    }
-
-    public function emulate(string $code, array $tokens): array {
-        // We need to manually iterate and manage a count because we'll change
-        // the tokens array on the way
-        for ($i = 0, $c = count($tokens); $i < $c; ++$i) {
-            $token = $tokens[$i];
-            $tokenLen = \strlen($token->text);
-
-            if ($token->id !== \T_LNUMBER && $token->id !== \T_DNUMBER) {
-                continue;
-            }
-
-            $res = preg_match(self::NUMBER, $code, $matches, 0, $token->pos);
-            assert($res, "No number at number token position");
-
-            $match = $matches[0];
-            $matchLen = \strlen($match);
-            if ($matchLen === $tokenLen) {
-                // Original token already holds the full number.
-                continue;
-            }
-
-            $tokenKind = $this->resolveIntegerOrFloatToken($match);
-            $newTokens = [new Token($tokenKind, $match, $token->line, $token->pos)];
-
-            $numTokens = 1;
-            $len = $tokenLen;
-            while ($matchLen > $len) {
-                $nextToken = $tokens[$i + $numTokens];
-                $nextTokenText = $nextToken->text;
-                $nextTokenLen = \strlen($nextTokenText);
-
-                $numTokens++;
-                if ($matchLen < $len + $nextTokenLen) {
-                    // Split trailing characters into a partial token.
-                    $partialText = substr($nextTokenText, $matchLen - $len);
-                    $newTokens[] = new Token($nextToken->id, $partialText, $nextToken->line, $nextToken->pos);
-                    break;
-                }
-
-                $len += $nextTokenLen;
-            }
-
-            array_splice($tokens, $i, $numTokens, $newTokens);
-            $c -= $numTokens - \count($newTokens);
-        }
-
-        return $tokens;
-    }
-
-    private function resolveIntegerOrFloatToken(string $str): int {
-        $str = str_replace('_', '', $str);
-
-        if (stripos($str, '0b') === 0) {
-            $num = bindec($str);
-        } elseif (stripos($str, '0x') === 0) {
-            $num = hexdec($str);
-        } elseif (stripos($str, '0') === 0 && ctype_digit($str)) {
-            $num = octdec($str);
-        } else {
-            $num = +$str;
-        }
-
-        return is_float($num) ? T_DNUMBER : T_LNUMBER;
-    }
-
-    public function reverseEmulate(string $code, array $tokens): array {
-        // Numeric separators were not legal code previously, don't bother.
-        return $tokens;
-    }
-}
--- a/lib/PhpParser/compatibility_tokens.php
+++ b/lib/PhpParser/compatibility_tokens.php
@ -5,10 +5,6 @@ namespace PhpParser;
 if (!\function_exists('PhpParser\defineCompatibilityTokens')) {
    function defineCompatibilityTokens(): void {
        $compatTokens = [
-            // PHP 7.4
-            'T_BAD_CHARACTER',
-            'T_FN',
-            'T_COALESCE_EQUAL',
            // PHP 8.0
            'T_NAME_QUALIFIED',
            'T_NAME_FULLY_QUALIFIED',
--- a/test/PhpParser/Lexer/EmulativeTest.php
+++ b/test/PhpParser/Lexer/EmulativeTest.php
@ -406,10 +406,8 @@ class EmulativeTest extends LexerTest {
            ['8.0', 'match', [[\T_MATCH, 'match']]],
            ['7.4', 'match', [[\T_STRING, 'match']]],
            // Keywords are not case-sensitive.
-            ['7.4', 'fn', [[\T_FN, 'fn']]],
-            ['7.4', 'FN', [[\T_FN, 'FN']]],
-            ['7.3', 'fn', [[\T_STRING, 'fn']]],
-            ['7.3', 'FN', [[\T_STRING, 'FN']]],
+            ['8.0', 'MATCH', [[\T_MATCH, 'MATCH']]],
+            ['7.4', 'MATCH', [[\T_STRING, 'MATCH']]],
            // Tested here to skip testLeaveStuffAloneInStrings.
            ['8.0', '"$foo?->bar"', [
                [ord('"'), '"'],