Drop Lexer::getTokens() method

This doesn't make a lot of sense now that Lexer::tokenize() returns
the tokens.

The tokens for the last parse should be fetched via
Parser::getTokens() instead.
This commit is contained in:
Nikita Popov 2023-09-16 09:33:33 +02:00
parent 263fa80b81
commit 06c7ab51b7
3 changed files with 32 additions and 35 deletions

View File

@ -5,9 +5,6 @@ namespace PhpParser;
require __DIR__ . '/compatibility_tokens.php'; require __DIR__ . '/compatibility_tokens.php';
class Lexer { class Lexer {
/** @var list<Token> List of tokens */
protected array $tokens;
/** /**
* Tokenize the provided source code. * Tokenize the provided source code.
* *
@ -31,14 +28,14 @@ class Lexer {
$scream = ini_set('xdebug.scream', '0'); $scream = ini_set('xdebug.scream', '0');
$this->tokens = @Token::tokenize($code); $tokens = @Token::tokenize($code);
$this->postprocessTokens($errorHandler); $this->postprocessTokens($tokens, $errorHandler);
if (false !== $scream) { if (false !== $scream) {
ini_set('xdebug.scream', $scream); ini_set('xdebug.scream', $scream);
} }
return $this->tokens; return $tokens;
} }
private function handleInvalidCharacter(Token $token, ErrorHandler $errorHandler): void { private function handleInvalidCharacter(Token $token, ErrorHandler $errorHandler): void {
@ -66,33 +63,36 @@ class Lexer {
&& substr($token->text, -2) !== '*/'; && substr($token->text, -2) !== '*/';
} }
protected function postprocessTokens(ErrorHandler $errorHandler): void { /**
* @param list<Token> $tokens
*/
protected function postprocessTokens(array &$tokens, ErrorHandler $errorHandler): void {
// This function reports errors (bad characters and unterminated comments) in the token // This function reports errors (bad characters and unterminated comments) in the token
// array, and performs certain canonicalizations: // array, and performs certain canonicalizations:
// * Use PHP 8.1 T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG and // * Use PHP 8.1 T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG and
// T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG tokens used to disambiguate intersection types. // T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG tokens used to disambiguate intersection types.
// * Add a sentinel token with ID 0. // * Add a sentinel token with ID 0.
$numTokens = \count($this->tokens); $numTokens = \count($tokens);
if ($numTokens === 0) { if ($numTokens === 0) {
// Empty input edge case: Just add the sentinel token. // Empty input edge case: Just add the sentinel token.
$this->tokens[] = new Token(0, "\0", 1, 0); $tokens[] = [new Token(0, "\0", 1, 0)];
return; return;
} }
for ($i = 0; $i < $numTokens; $i++) { for ($i = 0; $i < $numTokens; $i++) {
$token = $this->tokens[$i]; $token = $tokens[$i];
if ($token->id === \T_BAD_CHARACTER) { if ($token->id === \T_BAD_CHARACTER) {
$this->handleInvalidCharacter($token, $errorHandler); $this->handleInvalidCharacter($token, $errorHandler);
} }
if ($token->id === \ord('&')) { if ($token->id === \ord('&')) {
$next = $i + 1; $next = $i + 1;
while (isset($this->tokens[$next]) && $this->tokens[$next]->id === \T_WHITESPACE) { while (isset($tokens[$next]) && $tokens[$next]->id === \T_WHITESPACE) {
$next++; $next++;
} }
$followedByVarOrVarArg = isset($this->tokens[$next]) && $followedByVarOrVarArg = isset($tokens[$next]) &&
$this->tokens[$next]->is([\T_VARIABLE, \T_ELLIPSIS]); $tokens[$next]->is([\T_VARIABLE, \T_ELLIPSIS]);
$token->id = $followedByVarOrVarArg $token->id = $followedByVarOrVarArg
? \T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG ? \T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG
: \T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG; : \T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG;
@ -100,7 +100,7 @@ class Lexer {
} }
// Check for unterminated comment // Check for unterminated comment
$lastToken = $this->tokens[$numTokens - 1]; $lastToken = $tokens[$numTokens - 1];
if ($this->isUnterminatedComment($lastToken)) { if ($this->isUnterminatedComment($lastToken)) {
$errorHandler->handleError(new Error('Unterminated comment', [ $errorHandler->handleError(new Error('Unterminated comment', [
'startLine' => $lastToken->line, 'startLine' => $lastToken->line,
@ -111,15 +111,6 @@ class Lexer {
} }
// Add sentinel token. // Add sentinel token.
$this->tokens[] = new Token(0, "\0", $lastToken->getEndLine(), $lastToken->getEndPos()); $tokens[] = new Token(0, "\0", $lastToken->getEndLine(), $lastToken->getEndPos());
}
/**
* Returns the token array for the last tokenized source code.
*
* @return Token[] Array of tokens
*/
public function getTokens(): array {
return $this->tokens;
} }
} }

View File

@ -19,6 +19,7 @@ use PhpParser\Lexer\TokenEmulator\ReadonlyTokenEmulator;
use PhpParser\Lexer\TokenEmulator\ReverseEmulator; use PhpParser\Lexer\TokenEmulator\ReverseEmulator;
use PhpParser\Lexer\TokenEmulator\TokenEmulator; use PhpParser\Lexer\TokenEmulator\TokenEmulator;
use PhpParser\PhpVersion; use PhpParser\PhpVersion;
use PhpParser\Token;
class Emulative extends Lexer { class Emulative extends Lexer {
/** @var array{int, string, string}[] Patches used to reverse changes introduced in the code */ /** @var array{int, string, string}[] Patches used to reverse changes introduced in the code */
@ -81,9 +82,9 @@ class Emulative extends Lexer {
} }
$collector = new ErrorHandler\Collecting(); $collector = new ErrorHandler\Collecting();
parent::tokenize($code, $collector); $tokens = parent::tokenize($code, $collector);
$this->sortPatches(); $this->sortPatches();
$this->fixupTokens(); $tokens = $this->fixupTokens($tokens);
$errors = $collector->getErrors(); $errors = $collector->getErrors();
if (!empty($errors)) { if (!empty($errors)) {
@ -94,10 +95,10 @@ class Emulative extends Lexer {
} }
foreach ($emulators as $emulator) { foreach ($emulators as $emulator) {
$this->tokens = $emulator->emulate($code, $this->tokens); $tokens = $emulator->emulate($code, $tokens);
} }
return $this->tokens; return $tokens;
} }
private function isForwardEmulationNeeded(PhpVersion $emulatorPhpVersion): bool { private function isForwardEmulationNeeded(PhpVersion $emulatorPhpVersion): bool {
@ -118,9 +119,13 @@ class Emulative extends Lexer {
}); });
} }
private function fixupTokens(): void { /**
* @param list<Token> $tokens
* @return list<Token>
*/
private function fixupTokens(array $tokens): array {
if (\count($this->patches) === 0) { if (\count($this->patches) === 0) {
return; return $tokens;
} }
// Load first patch // Load first patch
@ -130,8 +135,8 @@ class Emulative extends Lexer {
// We use a manual loop over the tokens, because we modify the array on the fly // We use a manual loop over the tokens, because we modify the array on the fly
$posDelta = 0; $posDelta = 0;
$lineDelta = 0; $lineDelta = 0;
for ($i = 0, $c = \count($this->tokens); $i < $c; $i++) { for ($i = 0, $c = \count($tokens); $i < $c; $i++) {
$token = $this->tokens[$i]; $token = $tokens[$i];
$pos = $token->pos; $pos = $token->pos;
$token->pos += $posDelta; $token->pos += $posDelta;
$token->line += $lineDelta; $token->line += $lineDelta;
@ -142,7 +147,7 @@ class Emulative extends Lexer {
if ($patchType === 'remove') { if ($patchType === 'remove') {
if ($patchPos === $pos && $patchTextLen === $len) { if ($patchPos === $pos && $patchTextLen === $len) {
// Remove token entirely // Remove token entirely
array_splice($this->tokens, $i, 1, []); array_splice($tokens, $i, 1, []);
$i--; $i--;
$c--; $c--;
} else { } else {
@ -182,6 +187,7 @@ class Emulative extends Lexer {
$posDelta += $localPosDelta; $posDelta += $localPosDelta;
} }
return $tokens;
} }
/** /**

View File

@ -191,7 +191,7 @@ class PrettyPrinterTest extends CodeTestAbstract {
$printer = new PrettyPrinter\Standard(); $printer = new PrettyPrinter\Standard();
$oldStmts = $parser->parse($code); $oldStmts = $parser->parse($code);
$oldTokens = $lexer->getTokens(); $oldTokens = $parser->getTokens();
$newStmts = $traverser->traverse($oldStmts); $newStmts = $traverser->traverse($oldStmts);
@ -241,7 +241,7 @@ CODE
return; return;
} }
$oldTokens = $lexer->getTokens(); $oldTokens = $parser->getTokens();
$newStmts = $traverser->traverse($oldStmts); $newStmts = $traverser->traverse($oldStmts);