Drop Lexer::getTokens() method

This doesn't make a lot of sense now that Lexer::tokenize() returns
the tokens.

The tokens for the last parse should be fetched via
Parser::getTokens() instead.
This commit is contained in:
Nikita Popov 2023-09-16 09:33:33 +02:00
parent 263fa80b81
commit 06c7ab51b7
3 changed files with 32 additions and 35 deletions

View File

@ -5,9 +5,6 @@ namespace PhpParser;
require __DIR__ . '/compatibility_tokens.php';
class Lexer {
/** @var list<Token> List of tokens */
protected array $tokens;
/**
* Tokenize the provided source code.
*
@ -31,14 +28,14 @@ class Lexer {
$scream = ini_set('xdebug.scream', '0');
$this->tokens = @Token::tokenize($code);
$this->postprocessTokens($errorHandler);
$tokens = @Token::tokenize($code);
$this->postprocessTokens($tokens, $errorHandler);
if (false !== $scream) {
ini_set('xdebug.scream', $scream);
}
return $this->tokens;
return $tokens;
}
private function handleInvalidCharacter(Token $token, ErrorHandler $errorHandler): void {
@ -66,33 +63,36 @@ class Lexer {
&& substr($token->text, -2) !== '*/';
}
protected function postprocessTokens(ErrorHandler $errorHandler): void {
/**
* @param list<Token> $tokens
*/
protected function postprocessTokens(array &$tokens, ErrorHandler $errorHandler): void {
// This function reports errors (bad characters and unterminated comments) in the token
// array, and performs certain canonicalizations:
// * Use PHP 8.1 T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG and
// T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG tokens used to disambiguate intersection types.
// * Add a sentinel token with ID 0.
$numTokens = \count($this->tokens);
$numTokens = \count($tokens);
if ($numTokens === 0) {
// Empty input edge case: Just add the sentinel token.
$this->tokens[] = new Token(0, "\0", 1, 0);
$tokens[] = [new Token(0, "\0", 1, 0)];
return;
}
for ($i = 0; $i < $numTokens; $i++) {
$token = $this->tokens[$i];
$token = $tokens[$i];
if ($token->id === \T_BAD_CHARACTER) {
$this->handleInvalidCharacter($token, $errorHandler);
}
if ($token->id === \ord('&')) {
$next = $i + 1;
while (isset($this->tokens[$next]) && $this->tokens[$next]->id === \T_WHITESPACE) {
while (isset($tokens[$next]) && $tokens[$next]->id === \T_WHITESPACE) {
$next++;
}
$followedByVarOrVarArg = isset($this->tokens[$next]) &&
$this->tokens[$next]->is([\T_VARIABLE, \T_ELLIPSIS]);
$followedByVarOrVarArg = isset($tokens[$next]) &&
$tokens[$next]->is([\T_VARIABLE, \T_ELLIPSIS]);
$token->id = $followedByVarOrVarArg
? \T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG
: \T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG;
@ -100,7 +100,7 @@ class Lexer {
}
// Check for unterminated comment
$lastToken = $this->tokens[$numTokens - 1];
$lastToken = $tokens[$numTokens - 1];
if ($this->isUnterminatedComment($lastToken)) {
$errorHandler->handleError(new Error('Unterminated comment', [
'startLine' => $lastToken->line,
@ -111,15 +111,6 @@ class Lexer {
}
// Add sentinel token.
$this->tokens[] = new Token(0, "\0", $lastToken->getEndLine(), $lastToken->getEndPos());
}
/**
* Returns the token array for the last tokenized source code.
*
* @return Token[] Array of tokens
*/
public function getTokens(): array {
return $this->tokens;
$tokens[] = new Token(0, "\0", $lastToken->getEndLine(), $lastToken->getEndPos());
}
}

View File

@ -19,6 +19,7 @@ use PhpParser\Lexer\TokenEmulator\ReadonlyTokenEmulator;
use PhpParser\Lexer\TokenEmulator\ReverseEmulator;
use PhpParser\Lexer\TokenEmulator\TokenEmulator;
use PhpParser\PhpVersion;
use PhpParser\Token;
class Emulative extends Lexer {
/** @var array{int, string, string}[] Patches used to reverse changes introduced in the code */
@ -81,9 +82,9 @@ class Emulative extends Lexer {
}
$collector = new ErrorHandler\Collecting();
parent::tokenize($code, $collector);
$tokens = parent::tokenize($code, $collector);
$this->sortPatches();
$this->fixupTokens();
$tokens = $this->fixupTokens($tokens);
$errors = $collector->getErrors();
if (!empty($errors)) {
@ -94,10 +95,10 @@ class Emulative extends Lexer {
}
foreach ($emulators as $emulator) {
$this->tokens = $emulator->emulate($code, $this->tokens);
$tokens = $emulator->emulate($code, $tokens);
}
return $this->tokens;
return $tokens;
}
private function isForwardEmulationNeeded(PhpVersion $emulatorPhpVersion): bool {
@ -118,9 +119,13 @@ class Emulative extends Lexer {
});
}
private function fixupTokens(): void {
/**
* @param list<Token> $tokens
* @return list<Token>
*/
private function fixupTokens(array $tokens): array {
if (\count($this->patches) === 0) {
return;
return $tokens;
}
// Load first patch
@ -130,8 +135,8 @@ class Emulative extends Lexer {
// We use a manual loop over the tokens, because we modify the array on the fly
$posDelta = 0;
$lineDelta = 0;
for ($i = 0, $c = \count($this->tokens); $i < $c; $i++) {
$token = $this->tokens[$i];
for ($i = 0, $c = \count($tokens); $i < $c; $i++) {
$token = $tokens[$i];
$pos = $token->pos;
$token->pos += $posDelta;
$token->line += $lineDelta;
@ -142,7 +147,7 @@ class Emulative extends Lexer {
if ($patchType === 'remove') {
if ($patchPos === $pos && $patchTextLen === $len) {
// Remove token entirely
array_splice($this->tokens, $i, 1, []);
array_splice($tokens, $i, 1, []);
$i--;
$c--;
} else {
@ -182,6 +187,7 @@ class Emulative extends Lexer {
$posDelta += $localPosDelta;
}
return $tokens;
}
/**

View File

@ -191,7 +191,7 @@ class PrettyPrinterTest extends CodeTestAbstract {
$printer = new PrettyPrinter\Standard();
$oldStmts = $parser->parse($code);
$oldTokens = $lexer->getTokens();
$oldTokens = $parser->getTokens();
$newStmts = $traverser->traverse($oldStmts);
@ -241,7 +241,7 @@ CODE
return;
}
$oldTokens = $lexer->getTokens();
$oldTokens = $parser->getTokens();
$newStmts = $traverser->traverse($oldStmts);