Drop Lexer::getTokens() method

This doesn't make a lot of sense now that Lexer::tokenize() returns the tokens. The tokens for the last parse should be fetched via Parser::getTokens() instead.
2025-05-05 12:55:16 +02:00 · 2023-09-16 09:33:33 +02:00 · 2023-09-16 09:33:33 +02:00 · 06c7ab51b7
commit 06c7ab51b7
parent 263fa80b81
3 changed files with 32 additions and 35 deletions
--- a/lib/PhpParser/Lexer.php
+++ b/lib/PhpParser/Lexer.php
@ -5,9 +5,6 @@ namespace PhpParser;
 require __DIR__ . '/compatibility_tokens.php';

 class Lexer {
-    /** @var list<Token> List of tokens */
-    protected array $tokens;
-
    /**
     * Tokenize the provided source code.
     *
@ -31,14 +28,14 @@ class Lexer {

        $scream = ini_set('xdebug.scream', '0');

-        $this->tokens = @Token::tokenize($code);
-        $this->postprocessTokens($errorHandler);
+        $tokens = @Token::tokenize($code);
+        $this->postprocessTokens($tokens, $errorHandler);

        if (false !== $scream) {
            ini_set('xdebug.scream', $scream);
        }

-        return $this->tokens;
+        return $tokens;
    }

    private function handleInvalidCharacter(Token $token, ErrorHandler $errorHandler): void {
@ -66,33 +63,36 @@ class Lexer {
            && substr($token->text, -2) !== '*/';
    }

-    protected function postprocessTokens(ErrorHandler $errorHandler): void {
+    /**
+     * @param list<Token> $tokens
+     */
+    protected function postprocessTokens(array &$tokens, ErrorHandler $errorHandler): void {
        // This function reports errors (bad characters and unterminated comments) in the token
        // array, and performs certain canonicalizations:
        //  * Use PHP 8.1 T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG and
        //    T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG tokens used to disambiguate intersection types.
        //  * Add a sentinel token with ID 0.

-        $numTokens = \count($this->tokens);
+        $numTokens = \count($tokens);
        if ($numTokens === 0) {
            // Empty input edge case: Just add the sentinel token.
-            $this->tokens[] = new Token(0, "\0", 1, 0);
+            $tokens[] = [new Token(0, "\0", 1, 0)];
            return;
        }

        for ($i = 0; $i < $numTokens; $i++) {
-            $token = $this->tokens[$i];
+            $token = $tokens[$i];
            if ($token->id === \T_BAD_CHARACTER) {
                $this->handleInvalidCharacter($token, $errorHandler);
            }

            if ($token->id === \ord('&')) {
                $next = $i + 1;
-                while (isset($this->tokens[$next]) && $this->tokens[$next]->id === \T_WHITESPACE) {
+                while (isset($tokens[$next]) && $tokens[$next]->id === \T_WHITESPACE) {
                    $next++;
                }
-                $followedByVarOrVarArg = isset($this->tokens[$next]) &&
-                    $this->tokens[$next]->is([\T_VARIABLE, \T_ELLIPSIS]);
+                $followedByVarOrVarArg = isset($tokens[$next]) &&
+                    $tokens[$next]->is([\T_VARIABLE, \T_ELLIPSIS]);
                $token->id = $followedByVarOrVarArg
                    ? \T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG
                    : \T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG;
@ -100,7 +100,7 @@ class Lexer {
        }

        // Check for unterminated comment
-        $lastToken = $this->tokens[$numTokens - 1];
+        $lastToken = $tokens[$numTokens - 1];
        if ($this->isUnterminatedComment($lastToken)) {
            $errorHandler->handleError(new Error('Unterminated comment', [
                'startLine' => $lastToken->line,
@ -111,15 +111,6 @@ class Lexer {
        }

        // Add sentinel token.
-        $this->tokens[] = new Token(0, "\0", $lastToken->getEndLine(), $lastToken->getEndPos());
-    }
-
-    /**
-     * Returns the token array for the last tokenized source code.
-     *
-     * @return Token[] Array of tokens
-     */
-    public function getTokens(): array {
-        return $this->tokens;
+        $tokens[] = new Token(0, "\0", $lastToken->getEndLine(), $lastToken->getEndPos());
    }
 }
--- a/lib/PhpParser/Lexer/Emulative.php
+++ b/lib/PhpParser/Lexer/Emulative.php
@ -19,6 +19,7 @@ use PhpParser\Lexer\TokenEmulator\ReadonlyTokenEmulator;
 use PhpParser\Lexer\TokenEmulator\ReverseEmulator;
 use PhpParser\Lexer\TokenEmulator\TokenEmulator;
 use PhpParser\PhpVersion;
+use PhpParser\Token;

 class Emulative extends Lexer {
    /** @var array{int, string, string}[] Patches used to reverse changes introduced in the code */
@ -81,9 +82,9 @@ class Emulative extends Lexer {
        }

        $collector = new ErrorHandler\Collecting();
-        parent::tokenize($code, $collector);
+        $tokens = parent::tokenize($code, $collector);
        $this->sortPatches();
-        $this->fixupTokens();
+        $tokens = $this->fixupTokens($tokens);

        $errors = $collector->getErrors();
        if (!empty($errors)) {
@ -94,10 +95,10 @@ class Emulative extends Lexer {
        }

        foreach ($emulators as $emulator) {
-            $this->tokens = $emulator->emulate($code, $this->tokens);
+            $tokens = $emulator->emulate($code, $tokens);
        }

-        return $this->tokens;
+        return $tokens;
    }

    private function isForwardEmulationNeeded(PhpVersion $emulatorPhpVersion): bool {
@ -118,9 +119,13 @@ class Emulative extends Lexer {
        });
    }

-    private function fixupTokens(): void {
+    /**
+     * @param list<Token> $tokens
+     * @return list<Token>
+     */
+    private function fixupTokens(array $tokens): array {
        if (\count($this->patches) === 0) {
-            return;
+            return $tokens;
        }

        // Load first patch
@ -130,8 +135,8 @@ class Emulative extends Lexer {
        // We use a manual loop over the tokens, because we modify the array on the fly
        $posDelta = 0;
        $lineDelta = 0;
-        for ($i = 0, $c = \count($this->tokens); $i < $c; $i++) {
-            $token = $this->tokens[$i];
+        for ($i = 0, $c = \count($tokens); $i < $c; $i++) {
+            $token = $tokens[$i];
            $pos = $token->pos;
            $token->pos += $posDelta;
            $token->line += $lineDelta;
@ -142,7 +147,7 @@ class Emulative extends Lexer {
                if ($patchType === 'remove') {
                    if ($patchPos === $pos && $patchTextLen === $len) {
                        // Remove token entirely
-                        array_splice($this->tokens, $i, 1, []);
+                        array_splice($tokens, $i, 1, []);
                        $i--;
                        $c--;
                    } else {
@ -182,6 +187,7 @@ class Emulative extends Lexer {

            $posDelta += $localPosDelta;
        }
+        return $tokens;
    }

    /**
--- a/test/PhpParser/PrettyPrinterTest.php
+++ b/test/PhpParser/PrettyPrinterTest.php
@ -191,7 +191,7 @@ class PrettyPrinterTest extends CodeTestAbstract {
        $printer = new PrettyPrinter\Standard();

        $oldStmts = $parser->parse($code);
-        $oldTokens = $lexer->getTokens();
+        $oldTokens = $parser->getTokens();

        $newStmts = $traverser->traverse($oldStmts);

@ -241,7 +241,7 @@ CODE
            return;
        }

-        $oldTokens = $lexer->getTokens();
+        $oldTokens = $parser->getTokens();

        $newStmts = $traverser->traverse($oldStmts);