Compare commits

..

11 Commits

Author SHA1 Message Date
Nikita Popov
1626c19d98 Update lexer implementations and tests 2019-06-30 23:07:22 +02:00
Nikita Popov
f52785a2b8 Start on lexer refactoring 2019-06-30 19:31:22 +02:00
Nikita Popov
a21a614737 WIP 2019-06-30 17:21:55 +02:00
Nikita Popov
6f74784e16 Switch to a normalized token representation
Each token is now represented by a Token object.
2019-06-30 14:14:24 +02:00
Tomáš Votruba
3f718ee2c3 [PHP 7.4] Add support for numeric literal separators (#615)
Implements RFC https://wiki.php.net/rfc/numeric_literal_separator.

Closes #614.
2019-06-30 12:13:28 +02:00
Nikita Popov
b9b45dd2bc Insert T_BAD_CHARACTER tokens for missing characters
The token stream should cover all characters in the original code,
insert a dummy token for missing illegal characters. We should
really be doing this in token_get_all() as well.
2019-06-30 11:43:48 +02:00
Chris Hewitt
a4b43edb03 Fix one-character inconsistency 2019-06-30 09:25:26 +02:00
Nikita Popov
3cf61fdd26 Only ignore-platform-reqs on nightly 2019-06-23 15:11:05 +02:00
Nikita Popov
9484baf8f8 Make compatible with PhpUnit 8 2019-06-23 15:03:40 +02:00
Nikita Popov
aad0e2896f Remove token registration from TokenEmulator interface 2019-06-23 14:50:14 +02:00
hoels
624f71fa6f Resolve return type of arrow functions (#613) 2019-06-04 16:25:12 +02:00
25 changed files with 769 additions and 772 deletions

View File

@@ -16,7 +16,12 @@ php:
install: install:
- if [ $TRAVIS_PHP_VERSION = '7.0' ]; then composer require satooshi/php-coveralls '~1.0'; fi - if [ $TRAVIS_PHP_VERSION = '7.0' ]; then composer require satooshi/php-coveralls '~1.0'; fi
- composer install --prefer-dist --ignore-platform-reqs - |
if [ $TRAVIS_PHP_VERSION = 'nightly' ]; then
composer install --prefer-dist --ignore-platform-reqs;
else
composer install --prefer-dist;
fi
matrix: matrix:
allow_failures: allow_failures:

View File

@@ -14,10 +14,11 @@
], ],
"require": { "require": {
"php": ">=7.0", "php": ">=7.0",
"ext-json": "*",
"ext-tokenizer": "*" "ext-tokenizer": "*"
}, },
"require-dev": { "require-dev": {
"phpunit/phpunit": "^6.5 || ^7.0" "phpunit/phpunit": "^6.5 || ^7.0 || ^8.0"
}, },
"extra": { "extra": {
"branch-alias": { "branch-alias": {

View File

@@ -56,7 +56,7 @@ array(
``` ```
This matches the structure of the code: An echo statement, which takes two strings as expressions, This matches the structure of the code: An echo statement, which takes two strings as expressions,
with the values `Hi` and `World!`. with the values `Hi` and `World`.
You can also see that the AST does not contain any whitespace information (but most comments are saved). You can also see that the AST does not contain any whitespace information (but most comments are saved).
So using it for formatting analysis is not possible. So using it for formatting analysis is not possible.

View File

@@ -0,0 +1,8 @@
<?php declare(strict_types=1);
namespace PhpParser;
class FileContext {
/** @var Token[] */
public $tokens;
}

View File

@@ -2,6 +2,8 @@
namespace PhpParser\Internal; namespace PhpParser\Internal;
use PhpParser\Token;
/** /**
* Provides operations on token streams, for use by pretty printer. * Provides operations on token streams, for use by pretty printer.
* *
@@ -9,7 +11,7 @@ namespace PhpParser\Internal;
*/ */
class TokenStream class TokenStream
{ {
/** @var array Tokens (in token_get_all format) */ /** @var Token[] */
private $tokens; private $tokens;
/** @var int[] Map from position to indentation */ /** @var int[] Map from position to indentation */
private $indentMap; private $indentMap;
@@ -17,7 +19,7 @@ class TokenStream
/** /**
* Create token stream instance. * Create token stream instance.
* *
* @param array $tokens Tokens in token_get_all() format * @param Token[] $tokens Tokens
*/ */
public function __construct(array $tokens) { public function __construct(array $tokens) {
$this->tokens = $tokens; $this->tokens = $tokens;
@@ -33,8 +35,8 @@ class TokenStream
* @return bool * @return bool
*/ */
public function haveParens(int $startPos, int $endPos) : bool { public function haveParens(int $startPos, int $endPos) : bool {
return $this->haveTokenImmediativelyBefore($startPos, '(') return $this->haveTokenImmediativelyBefore($startPos, \ord('('))
&& $this->haveTokenImmediatelyAfter($endPos, ')'); && $this->haveTokenImmediatelyAfter($endPos, \ord(')'));
} }
/** /**
@@ -46,8 +48,8 @@ class TokenStream
* @return bool * @return bool
*/ */
public function haveBraces(int $startPos, int $endPos) : bool { public function haveBraces(int $startPos, int $endPos) : bool {
return $this->haveTokenImmediativelyBefore($startPos, '{') return $this->haveTokenImmediativelyBefore($startPos, \ord('{'))
&& $this->haveTokenImmediatelyAfter($endPos, '}'); && $this->haveTokenImmediatelyAfter($endPos, \ord('}'));
} }
/** /**
@@ -64,7 +66,7 @@ class TokenStream
$tokens = $this->tokens; $tokens = $this->tokens;
$pos--; $pos--;
for (; $pos >= 0; $pos--) { for (; $pos >= 0; $pos--) {
$tokenType = $tokens[$pos][0]; $tokenType = $tokens[$pos]->id;
if ($tokenType === $expectedTokenType) { if ($tokenType === $expectedTokenType) {
return true; return true;
} }
@@ -90,7 +92,7 @@ class TokenStream
$tokens = $this->tokens; $tokens = $this->tokens;
$pos++; $pos++;
for (; $pos < \count($tokens); $pos++) { for (; $pos < \count($tokens); $pos++) {
$tokenType = $tokens[$pos][0]; $tokenType = $tokens[$pos]->id;
if ($tokenType === $expectedTokenType) { if ($tokenType === $expectedTokenType) {
return true; return true;
} }
@@ -110,7 +112,7 @@ class TokenStream
return $pos; return $pos;
} }
if ($tokens[$pos][0] !== $skipTokenType) { if ($tokens[$pos]->id !== $skipTokenType) {
// Shouldn't happen. The skip token MUST be there // Shouldn't happen. The skip token MUST be there
throw new \Exception('Encountered unexpected token'); throw new \Exception('Encountered unexpected token');
} }
@@ -127,7 +129,7 @@ class TokenStream
return $pos; return $pos;
} }
if ($tokens[$pos][0] !== $skipTokenType) { if ($tokens[$pos]->id !== $skipTokenType) {
// Shouldn't happen. The skip token MUST be there // Shouldn't happen. The skip token MUST be there
throw new \Exception('Encountered unexpected token'); throw new \Exception('Encountered unexpected token');
} }
@@ -145,7 +147,7 @@ class TokenStream
public function skipLeftWhitespace(int $pos) { public function skipLeftWhitespace(int $pos) {
$tokens = $this->tokens; $tokens = $this->tokens;
for (; $pos >= 0; $pos--) { for (; $pos >= 0; $pos--) {
$type = $tokens[$pos][0]; $type = $tokens[$pos]->id;
if ($type !== \T_WHITESPACE && $type !== \T_COMMENT && $type !== \T_DOC_COMMENT) { if ($type !== \T_WHITESPACE && $type !== \T_COMMENT && $type !== \T_DOC_COMMENT) {
break; break;
} }
@@ -162,7 +164,7 @@ class TokenStream
public function skipRightWhitespace(int $pos) { public function skipRightWhitespace(int $pos) {
$tokens = $this->tokens; $tokens = $this->tokens;
for ($count = \count($tokens); $pos < $count; $pos++) { for ($count = \count($tokens); $pos < $count; $pos++) {
$type = $tokens[$pos][0]; $type = $tokens[$pos]->id;
if ($type !== \T_WHITESPACE && $type !== \T_COMMENT && $type !== \T_DOC_COMMENT) { if ($type !== \T_WHITESPACE && $type !== \T_COMMENT && $type !== \T_DOC_COMMENT) {
break; break;
} }
@@ -173,7 +175,7 @@ class TokenStream
public function findRight($pos, $findTokenType) { public function findRight($pos, $findTokenType) {
$tokens = $this->tokens; $tokens = $this->tokens;
for ($count = \count($tokens); $pos < $count; $pos++) { for ($count = \count($tokens); $pos < $count; $pos++) {
$type = $tokens[$pos][0]; $type = $tokens[$pos]->id;
if ($type === $findTokenType) { if ($type === $findTokenType) {
return $pos; return $pos;
} }
@@ -206,9 +208,8 @@ class TokenStream
$result = ''; $result = '';
for ($pos = $from; $pos < $to; $pos++) { for ($pos = $from; $pos < $to; $pos++) {
$token = $tokens[$pos]; $token = $tokens[$pos];
if (\is_array($token)) { $type = $token->id;
$type = $token[0]; $content = $token->value;
$content = $token[1];
if ($type === \T_CONSTANT_ENCAPSED_STRING || $type === \T_ENCAPSED_AND_WHITESPACE) { if ($type === \T_CONSTANT_ENCAPSED_STRING || $type === \T_ENCAPSED_AND_WHITESPACE) {
$result .= $content; $result .= $content;
} else { } else {
@@ -221,9 +222,6 @@ class TokenStream
$result .= $content; $result .= $content;
} }
} }
} else {
$result .= $token;
}
} }
return $result; return $result;
} }
@@ -239,8 +237,8 @@ class TokenStream
foreach ($this->tokens as $token) { foreach ($this->tokens as $token) {
$indentMap[] = $indent; $indentMap[] = $indent;
if ($token[0] === \T_WHITESPACE) { if ($token->id === \T_WHITESPACE) {
$content = $token[1]; $content = $token->value;
$newlinePos = \strrpos($content, "\n"); $newlinePos = \strrpos($content, "\n");
if (false !== $newlinePos) { if (false !== $newlinePos) {
$indent = \strlen($content) - $newlinePos - 1; $indent = \strlen($content) - $newlinePos - 1;

View File

@@ -6,99 +6,132 @@ use PhpParser\Parser\Tokens;
class Lexer class Lexer
{ {
protected $code; /** @var array Map from PHP tokens to PhpParser tokens. */
protected $tokens;
protected $pos;
protected $line;
protected $filePos;
protected $prevCloseTagHasNewline;
protected $tokenMap; protected $tokenMap;
protected $dropTokens;
private $attributeStartLineUsed;
private $attributeEndLineUsed;
private $attributeStartTokenPosUsed;
private $attributeEndTokenPosUsed;
private $attributeStartFilePosUsed;
private $attributeEndFilePosUsed;
private $attributeCommentsUsed;
/** /**
* Creates a Lexer. * Creates a Lexer.
* *
* @param array $options Options array. Currently only the 'usedAttributes' option is supported, * @param array $options Options array. Currently unused.
* which is an array of attributes to add to the AST nodes. Possible
* attributes are: 'comments', 'startLine', 'endLine', 'startTokenPos',
* 'endTokenPos', 'startFilePos', 'endFilePos'. The option defaults to the
* first three. For more info see getNextToken() docs.
*/ */
public function __construct(array $options = []) { public function __construct(array $options = []) {
// map from internal tokens to PhpParser tokens
$this->tokenMap = $this->createTokenMap(); $this->tokenMap = $this->createTokenMap();
// map of tokens to drop while lexing (the map is only used for isset lookup,
// that's why the value is simply set to 1; the value is never actually used.)
$this->dropTokens = array_fill_keys(
[\T_WHITESPACE, \T_OPEN_TAG, \T_COMMENT, \T_DOC_COMMENT], 1
);
$defaultAttributes = ['comments', 'startLine', 'endLine'];
$usedAttributes = array_fill_keys($options['usedAttributes'] ?? $defaultAttributes, true);
// Create individual boolean properties to make these checks faster.
$this->attributeStartLineUsed = isset($usedAttributes['startLine']);
$this->attributeEndLineUsed = isset($usedAttributes['endLine']);
$this->attributeStartTokenPosUsed = isset($usedAttributes['startTokenPos']);
$this->attributeEndTokenPosUsed = isset($usedAttributes['endTokenPos']);
$this->attributeStartFilePosUsed = isset($usedAttributes['startFilePos']);
$this->attributeEndFilePosUsed = isset($usedAttributes['endFilePos']);
$this->attributeCommentsUsed = isset($usedAttributes['comments']);
} }
/** /**
* Initializes the lexer for lexing the provided source code. * Get tokens IDs that should be ignored by the parser.
*
* @return array
*/
public function getIgnorableTokens(): array {
return [
Tokens::T_WHITESPACE,
Tokens::T_COMMENT,
Tokens::T_DOC_COMMENT,
Tokens::T_OPEN_TAG,
Tokens::T_BAD_CHARACTER,
];
}
/**
* Get map for token canonicalization.
*
* @return array
*/
public function getCanonicalizationMap(): array {
return [
Tokens::T_OPEN_TAG_WITH_ECHO => Tokens::T_ECHO,
Tokens::T_CLOSE_TAG => \ord(';'),
];
}
/**
* Tokenizes the given PHP code into an array of Tokens.
* *
* This function does not throw if lexing errors occur. Instead, errors may be retrieved using * This function does not throw if lexing errors occur. Instead, errors may be retrieved using
* the getErrors() method. * the getErrors() method.
* *
* @param string $code The source code to lex * @param string $code The source code to tokenize
* @param ErrorHandler|null $errorHandler Error handler to use for lexing errors. Defaults to * @param ErrorHandler|null $errorHandler Error handler to use for lexing errors. Defaults to
* ErrorHandler\Throwing * ErrorHandler\Throwing
*
* @return Token[] Sequence of tokens
*/ */
public function startLexing(string $code, ErrorHandler $errorHandler = null) { public function tokenize(string $code, ErrorHandler $errorHandler = null) {
if (null === $errorHandler) { if (null === $errorHandler) {
$errorHandler = new ErrorHandler\Throwing(); $errorHandler = new ErrorHandler\Throwing();
} }
$this->code = $code; // keep the code around for __halt_compiler() handling
$this->pos = -1;
$this->line = 1;
$this->filePos = 0;
// If inline HTML occurs without preceding code, treat it as if it had a leading newline.
// This ensures proper composability, because having a newline is the "safe" assumption.
$this->prevCloseTagHasNewline = true;
$scream = ini_set('xdebug.scream', '0'); $scream = ini_set('xdebug.scream', '0');
error_clear_last(); error_clear_last();
$this->tokens = @token_get_all($code); $rawTokens = @token_get_all($code);
$this->handleErrors($errorHandler); $checkForMissingTokens = null !== error_get_last();
if (false !== $scream) { if (false !== $scream) {
ini_set('xdebug.scream', $scream); ini_set('xdebug.scream', $scream);
} }
$tokens = [];
$filePos = 0;
$line = 1;
foreach ($rawTokens as $rawToken) {
if (\is_array($rawToken)) {
$token = new Token($this->tokenMap[$rawToken[0]], $rawToken[1], $line, $filePos);
} elseif (\strlen($rawToken) == 2) {
// Bug in token_get_all() when lexing b".
$token = new Token(\ord('"'), $rawToken, $line, $filePos);
} else {
$token = new Token(\ord($rawToken), $rawToken, $line, $filePos);
} }
private function handleInvalidCharacterRange($start, $end, $line, ErrorHandler $errorHandler) { $value = $token->value;
$tokenLen = \strlen($value);
if ($checkForMissingTokens && substr($code, $filePos, $tokenLen) !== $value) {
// Something is missing, must be an invalid character
$nextFilePos = strpos($code, $value, $filePos);
$badCharTokens = $this->handleInvalidCharacterRange(
$code, $filePos, $nextFilePos, $line, $errorHandler);
$tokens = array_merge($tokens, $badCharTokens);
$filePos = (int) $nextFilePos;
}
$tokens[] = $token;
$filePos += $tokenLen;
$line += substr_count($value, "\n");
}
if ($filePos !== \strlen($code)) {
// Invalid characters at the end of the input
$badCharTokens = $this->handleInvalidCharacterRange(
$code, $filePos, \strlen($code), $line, $errorHandler);
$tokens = array_merge($tokens, $badCharTokens);
}
if (\count($tokens) > 0) {
// Check for unterminated comment
$lastToken = $tokens[\count($tokens) - 1];
if ($this->isUnterminatedComment($lastToken)) {
$errorHandler->handleError(new Error('Unterminated comment', [
'startLine' => $line - substr_count($lastToken->value, "\n"),
'endLine' => $line,
'startFilePos' => $filePos - \strlen($lastToken->value),
'endFilePos' => $filePos,
]));
}
}
// Add an EOF sentinel token
// TODO: Should the value be an empty string instead?
$tokens[] = new Token(0, "\0", $line, \strlen($code));
return $tokens;
}
private function handleInvalidCharacterRange(
string $code, int $start, int $end, int $line, ErrorHandler $errorHandler
) {
$tokens = [];
for ($i = $start; $i < $end; $i++) { for ($i = $start; $i < $end; $i++) {
$chr = $this->code[$i]; $chr = $code[$i];
if ($chr === 'b' || $chr === 'B') {
// HHVM does not treat b" tokens correctly, so ignore these
continue;
}
if ($chr === "\0") { if ($chr === "\0") {
// PHP cuts error message after null byte, so need special case // PHP cuts error message after null byte, so need special case
$errorMsg = 'Unexpected null byte'; $errorMsg = 'Unexpected null byte';
@@ -108,6 +141,7 @@ class Lexer
); );
} }
$tokens[] = new Token(Tokens::T_BAD_CHARACTER, $chr, $line, $i);
$errorHandler->handleError(new Error($errorMsg, [ $errorHandler->handleError(new Error($errorMsg, [
'startLine' => $line, 'startLine' => $line,
'endLine' => $line, 'endLine' => $line,
@@ -115,275 +149,29 @@ class Lexer
'endFilePos' => $i, 'endFilePos' => $i,
])); ]));
} }
return $tokens;
} }
/** private function isUnterminatedComment(Token $token): bool {
* Check whether comment token is unterminated. return ($token->id === \T_COMMENT || $token->id === \T_DOC_COMMENT)
* && substr($token->value, 0, 2) === '/*'
* @return bool && substr($token->value, -2) !== '*/';
*/
private function isUnterminatedComment($token) : bool {
return ($token[0] === \T_COMMENT || $token[0] === \T_DOC_COMMENT)
&& substr($token[1], 0, 2) === '/*'
&& substr($token[1], -2) !== '*/';
} }
/** private function createTokenMap(): array {
* Check whether an error *may* have occurred during tokenization.
*
* @return bool
*/
private function errorMayHaveOccurred() : bool {
if (defined('HHVM_VERSION')) {
// In HHVM token_get_all() does not throw warnings, so we need to conservatively
// assume that an error occurred
return true;
}
return null !== error_get_last();
}
protected function handleErrors(ErrorHandler $errorHandler) {
if (!$this->errorMayHaveOccurred()) {
return;
}
// PHP's error handling for token_get_all() is rather bad, so if we want detailed
// error information we need to compute it ourselves. Invalid character errors are
// detected by finding "gaps" in the token array. Unterminated comments are detected
// by checking if a trailing comment has a "*/" at the end.
$filePos = 0;
$line = 1;
foreach ($this->tokens as $token) {
$tokenValue = \is_string($token) ? $token : $token[1];
$tokenLen = \strlen($tokenValue);
if (substr($this->code, $filePos, $tokenLen) !== $tokenValue) {
// Something is missing, must be an invalid character
$nextFilePos = strpos($this->code, $tokenValue, $filePos);
$this->handleInvalidCharacterRange(
$filePos, $nextFilePos, $line, $errorHandler);
$filePos = (int) $nextFilePos;
}
$filePos += $tokenLen;
$line += substr_count($tokenValue, "\n");
}
if ($filePos !== \strlen($this->code)) {
if (substr($this->code, $filePos, 2) === '/*') {
// Unlike PHP, HHVM will drop unterminated comments entirely
$comment = substr($this->code, $filePos);
$errorHandler->handleError(new Error('Unterminated comment', [
'startLine' => $line,
'endLine' => $line + substr_count($comment, "\n"),
'startFilePos' => $filePos,
'endFilePos' => $filePos + \strlen($comment),
]));
// Emulate the PHP behavior
$isDocComment = isset($comment[3]) && $comment[3] === '*';
$this->tokens[] = [$isDocComment ? \T_DOC_COMMENT : \T_COMMENT, $comment, $line];
} else {
// Invalid characters at the end of the input
$this->handleInvalidCharacterRange(
$filePos, \strlen($this->code), $line, $errorHandler);
}
return;
}
if (count($this->tokens) > 0) {
// Check for unterminated comment
$lastToken = $this->tokens[count($this->tokens) - 1];
if ($this->isUnterminatedComment($lastToken)) {
$errorHandler->handleError(new Error('Unterminated comment', [
'startLine' => $line - substr_count($lastToken[1], "\n"),
'endLine' => $line,
'startFilePos' => $filePos - \strlen($lastToken[1]),
'endFilePos' => $filePos,
]));
}
}
}
/**
* Fetches the next token.
*
* The available attributes are determined by the 'usedAttributes' option, which can
* be specified in the constructor. The following attributes are supported:
*
* * 'comments' => Array of PhpParser\Comment or PhpParser\Comment\Doc instances,
* representing all comments that occurred between the previous
* non-discarded token and the current one.
* * 'startLine' => Line in which the node starts.
* * 'endLine' => Line in which the node ends.
* * 'startTokenPos' => Offset into the token array of the first token in the node.
* * 'endTokenPos' => Offset into the token array of the last token in the node.
* * 'startFilePos' => Offset into the code string of the first character that is part of the node.
* * 'endFilePos' => Offset into the code string of the last character that is part of the node.
*
* @param mixed $value Variable to store token content in
* @param mixed $startAttributes Variable to store start attributes in
* @param mixed $endAttributes Variable to store end attributes in
*
* @return int Token id
*/
public function getNextToken(&$value = null, &$startAttributes = null, &$endAttributes = null) : int {
$startAttributes = [];
$endAttributes = [];
while (1) {
if (isset($this->tokens[++$this->pos])) {
$token = $this->tokens[$this->pos];
} else {
// EOF token with ID 0
$token = "\0";
}
if ($this->attributeStartLineUsed) {
$startAttributes['startLine'] = $this->line;
}
if ($this->attributeStartTokenPosUsed) {
$startAttributes['startTokenPos'] = $this->pos;
}
if ($this->attributeStartFilePosUsed) {
$startAttributes['startFilePos'] = $this->filePos;
}
if (\is_string($token)) {
$value = $token;
if (isset($token[1])) {
// bug in token_get_all
$this->filePos += 2;
$id = ord('"');
} else {
$this->filePos += 1;
$id = ord($token);
}
} elseif (!isset($this->dropTokens[$token[0]])) {
$value = $token[1];
$id = $this->tokenMap[$token[0]];
if (\T_CLOSE_TAG === $token[0]) {
$this->prevCloseTagHasNewline = false !== strpos($token[1], "\n");
} elseif (\T_INLINE_HTML === $token[0]) {
$startAttributes['hasLeadingNewline'] = $this->prevCloseTagHasNewline;
}
$this->line += substr_count($value, "\n");
$this->filePos += \strlen($value);
} else {
if (\T_COMMENT === $token[0] || \T_DOC_COMMENT === $token[0]) {
if ($this->attributeCommentsUsed) {
$comment = \T_DOC_COMMENT === $token[0]
? new Comment\Doc($token[1], $this->line, $this->filePos, $this->pos)
: new Comment($token[1], $this->line, $this->filePos, $this->pos);
$startAttributes['comments'][] = $comment;
}
}
$this->line += substr_count($token[1], "\n");
$this->filePos += \strlen($token[1]);
continue;
}
if ($this->attributeEndLineUsed) {
$endAttributes['endLine'] = $this->line;
}
if ($this->attributeEndTokenPosUsed) {
$endAttributes['endTokenPos'] = $this->pos;
}
if ($this->attributeEndFilePosUsed) {
$endAttributes['endFilePos'] = $this->filePos - 1;
}
return $id;
}
throw new \RuntimeException('Reached end of lexer loop');
}
/**
* Returns the token array for current code.
*
* The token array is in the same format as provided by the
* token_get_all() function and does not discard tokens (i.e.
* whitespace and comments are included). The token position
* attributes are against this token array.
*
* @return array Array of tokens in token_get_all() format
*/
public function getTokens() : array {
return $this->tokens;
}
/**
* Handles __halt_compiler() by returning the text after it.
*
* @return string Remaining text
*/
public function handleHaltCompiler() : string {
// text after T_HALT_COMPILER, still including ();
$textAfter = substr($this->code, $this->filePos);
// ensure that it is followed by ();
// this simplifies the situation, by not allowing any comments
// in between of the tokens.
if (!preg_match('~^\s*\(\s*\)\s*(?:;|\?>\r?\n?)~', $textAfter, $matches)) {
throw new Error('__HALT_COMPILER must be followed by "();"');
}
// prevent the lexer from returning any further tokens
$this->pos = count($this->tokens);
// return with (); removed
return substr($textAfter, strlen($matches[0]));
}
/**
* Creates the token map.
*
* The token map maps the PHP internal token identifiers
* to the identifiers used by the Parser. Additionally it
* maps T_OPEN_TAG_WITH_ECHO to T_ECHO and T_CLOSE_TAG to ';'.
*
* @return array The token map
*/
protected function createTokenMap() : array {
$tokenMap = []; $tokenMap = [];
// 256 is the minimum possible token number, as everything below
// it is an ASCII value
for ($i = 256; $i < 1000; ++$i) { for ($i = 256; $i < 1000; ++$i) {
if (\T_DOUBLE_COLON === $i) { $name = token_name($i);
// T_DOUBLE_COLON is equivalent to T_PAAMAYIM_NEKUDOTAYIM if ('UNKNOWN' === $name) {
$tokenMap[$i] = Tokens::T_PAAMAYIM_NEKUDOTAYIM; continue;
} elseif(\T_OPEN_TAG_WITH_ECHO === $i) {
// T_OPEN_TAG_WITH_ECHO with dropped T_OPEN_TAG results in T_ECHO
$tokenMap[$i] = Tokens::T_ECHO;
} elseif(\T_CLOSE_TAG === $i) {
// T_CLOSE_TAG is equivalent to ';'
$tokenMap[$i] = ord(';');
} elseif ('UNKNOWN' !== $name = token_name($i)) {
if ('T_HASHBANG' === $name) {
// HHVM uses a special token for #! hashbang lines
$tokenMap[$i] = Tokens::T_INLINE_HTML;
} elseif (defined($name = Tokens::class . '::' . $name)) {
// Other tokens can be mapped directly
$tokenMap[$i] = constant($name);
} }
$constName = Tokens::class . '::' . $name;
if (defined($constName)) {
$tokenMap[$i] = constant($constName);
} }
} }
// HHVM uses a special token for numbers that overflow to double
if (defined('T_ONUMBER')) {
$tokenMap[\T_ONUMBER] = Tokens::T_DNUMBER;
}
// HHVM also has a separate token for the __COMPILER_HALT_OFFSET__ constant
if (defined('T_COMPILER_HALT_OFFSET')) {
$tokenMap[\T_COMPILER_HALT_OFFSET] = Tokens::T_STRING;
}
return $tokenMap; return $tokenMap;
} }
} }

View File

@@ -7,7 +7,9 @@ use PhpParser\ErrorHandler;
use PhpParser\Lexer; use PhpParser\Lexer;
use PhpParser\Lexer\TokenEmulator\CoaleseEqualTokenEmulator; use PhpParser\Lexer\TokenEmulator\CoaleseEqualTokenEmulator;
use PhpParser\Lexer\TokenEmulator\FnTokenEmulator; use PhpParser\Lexer\TokenEmulator\FnTokenEmulator;
use PhpParser\Lexer\TokenEmulator\NumericLiteralSeparatorEmulator;
use PhpParser\Lexer\TokenEmulator\TokenEmulatorInterface; use PhpParser\Lexer\TokenEmulator\TokenEmulatorInterface;
use PhpParser\Parser\Tokens;
class Emulative extends Lexer class Emulative extends Lexer
{ {
@@ -33,39 +35,25 @@ REGEX;
{ {
parent::__construct($options); parent::__construct($options);
// prepare token emulators
$this->tokenEmulators[] = new FnTokenEmulator(); $this->tokenEmulators[] = new FnTokenEmulator();
$this->tokenEmulators[] = new CoaleseEqualTokenEmulator(); $this->tokenEmulators[] = new CoaleseEqualTokenEmulator();
$this->tokenEmulators[] = new NumericLiteralSeparatorEmulator();
// add emulated tokens here
foreach ($this->tokenEmulators as $emulativeToken) {
$this->tokenMap[$emulativeToken->getTokenId()] = $emulativeToken->getParserTokenId();
}
} }
public function startLexing(string $code, ErrorHandler $errorHandler = null) { public function tokenize(string $code, ErrorHandler $errorHandler = null) {
$this->patches = []; $this->patches = [];
if ($this->isEmulationNeeded($code) === false) { if ($this->isEmulationNeeded($code) === false) {
// Nothing to emulate, yay // Nothing to emulate, yay
parent::startLexing($code, $errorHandler); return parent::tokenize($code, $errorHandler);
return;
} }
$collector = new ErrorHandler\Collecting(); $collector = new ErrorHandler\Collecting();
// 1. emulation of heredoc and nowdoc new syntax // 1. emulation of heredoc and nowdoc new syntax
$preparedCode = $this->processHeredocNowdoc($code); $preparedCode = $this->processHeredocNowdoc($code);
parent::startLexing($preparedCode, $collector); $tokens = parent::tokenize($preparedCode, $collector);
$tokens = $this->fixupTokens($tokens);
// add token emulation
foreach ($this->tokenEmulators as $emulativeToken) {
if ($emulativeToken->isEmulationNeeded($code)) {
$this->tokens = $emulativeToken->emulate($code, $this->tokens);
}
}
$this->fixupTokens();
$errors = $collector->getErrors(); $errors = $collector->getErrors();
if (!empty($errors)) { if (!empty($errors)) {
@@ -74,6 +62,15 @@ REGEX;
$errorHandler->handleError($error); $errorHandler->handleError($error);
} }
} }
// add token emulation
foreach ($this->tokenEmulators as $emulativeToken) {
if ($emulativeToken->isEmulationNeeded($code)) {
$tokens = $emulativeToken->emulate($code, $tokens);
}
}
return $tokens;
} }
private function isHeredocNowdocEmulationNeeded(string $code): bool private function isHeredocNowdocEmulationNeeded(string $code): bool
@@ -142,10 +139,10 @@ REGEX;
return $this->isHeredocNowdocEmulationNeeded($code); return $this->isHeredocNowdocEmulationNeeded($code);
} }
private function fixupTokens() private function fixupTokens(array $tokens): array
{ {
if (\count($this->patches) === 0) { if (\count($this->patches) === 0) {
return; return $tokens;
} }
// Load first patch // Load first patch
@@ -155,35 +152,29 @@ REGEX;
// We use a manual loop over the tokens, because we modify the array on the fly // We use a manual loop over the tokens, because we modify the array on the fly
$pos = 0; $pos = 0;
for ($i = 0, $c = \count($this->tokens); $i < $c; $i++) { for ($i = 0, $c = \count($tokens); $i < $c; $i++) {
$token = $this->tokens[$i]; $token = $tokens[$i];
if (\is_string($token)) { $len = \strlen($token->value);
// We assume that patches don't apply to string tokens
$pos += \strlen($token);
continue;
}
$len = \strlen($token[1]);
$posDelta = 0; $posDelta = 0;
while ($patchPos >= $pos && $patchPos < $pos + $len) { while ($patchPos >= $pos && $patchPos < $pos + $len) {
$patchTextLen = \strlen($patchText); $patchTextLen = \strlen($patchText);
if ($patchType === 'remove') { if ($patchType === 'remove') {
if ($patchPos === $pos && $patchTextLen === $len) { if ($patchPos === $pos && $patchTextLen === $len) {
// Remove token entirely // Remove token entirely
array_splice($this->tokens, $i, 1, []); array_splice($tokens, $i, 1, []);
$i--; $i--;
$c--; $c--;
} else { } else {
// Remove from token string // Remove from token string
$this->tokens[$i][1] = substr_replace( $tokens[$i]->value = substr_replace(
$token[1], '', $patchPos - $pos + $posDelta, $patchTextLen $token->value, '', $patchPos - $pos + $posDelta, $patchTextLen
); );
$posDelta -= $patchTextLen; $posDelta -= $patchTextLen;
} }
} elseif ($patchType === 'add') { } elseif ($patchType === 'add') {
// Insert into the token string // Insert into the token string
$this->tokens[$i][1] = substr_replace( $tokens[$i]->value = substr_replace(
$token[1], $patchText, $patchPos - $pos + $posDelta, 0 $token->value, $patchText, $patchPos - $pos + $posDelta, 0
); );
$posDelta += $patchTextLen; $posDelta += $patchTextLen;
} else { } else {
@@ -194,21 +185,30 @@ REGEX;
$patchIdx++; $patchIdx++;
if ($patchIdx >= \count($this->patches)) { if ($patchIdx >= \count($this->patches)) {
// No more patches, we're done // No more patches, we're done
return; break 2;
} }
list($patchPos, $patchType, $patchText) = $this->patches[$patchIdx]; list($patchPos, $patchType, $patchText) = $this->patches[$patchIdx];
// Multiple patches may apply to the same token. Reload the current one to check // Multiple patches may apply to the same token. Reload the current one to check
// If the new patch applies // If the new patch applies
$token = $this->tokens[$i]; $token = $tokens[$i];
} }
$pos += $len; $pos += $len;
} }
// A patch did not apply // To retain a minimum amount of sanity, recompute lines and offsets in a separate loop.
assert(false); $pos = 0;
$line = 1;
foreach ($tokens as $token) {
$token->filePos = $pos;
$token->line = $line;
$pos += \strlen($token->value);
$line += \substr_count($token->value, "\n");
}
return $tokens;
} }
/** /**

View File

@@ -4,21 +4,10 @@ namespace PhpParser\Lexer\TokenEmulator;
use PhpParser\Lexer\Emulative; use PhpParser\Lexer\Emulative;
use PhpParser\Parser\Tokens; use PhpParser\Parser\Tokens;
use PhpParser\Token;
final class CoaleseEqualTokenEmulator implements TokenEmulatorInterface final class CoaleseEqualTokenEmulator implements TokenEmulatorInterface
{ {
const T_COALESCE_EQUAL = 1007;
public function getTokenId(): int
{
return self::T_COALESCE_EQUAL;
}
public function getParserTokenId(): int
{
return Tokens::T_COALESCE_EQUAL;
}
public function isEmulationNeeded(string $code) : bool public function isEmulationNeeded(string $code) : bool
{ {
// skip version where this is supported // skip version where this is supported
@@ -33,20 +22,17 @@ final class CoaleseEqualTokenEmulator implements TokenEmulatorInterface
{ {
// We need to manually iterate and manage a count because we'll change // We need to manually iterate and manage a count because we'll change
// the tokens array on the way // the tokens array on the way
$line = 1;
for ($i = 0, $c = count($tokens); $i < $c; ++$i) { for ($i = 0, $c = count($tokens); $i < $c; ++$i) {
if (isset($tokens[$i + 1])) { if (isset($tokens[$i + 1])) {
if ($tokens[$i][0] === T_COALESCE && $tokens[$i + 1] === '=') { $token = $tokens[$i];
if ($token->id === Tokens::T_COALESCE && $tokens[$i + 1]->value === '=') {
array_splice($tokens, $i, 2, [ array_splice($tokens, $i, 2, [
[self::T_COALESCE_EQUAL, '??=', $line] new Token(Tokens::T_COALESCE_EQUAL, '??=', $token->line, $token->filePos),
]); ]);
$c--; $c--;
continue; continue;
} }
} }
if (\is_array($tokens[$i])) {
$line += substr_count($tokens[$i][1], "\n");
}
} }
return $tokens; return $tokens;

View File

@@ -4,21 +4,10 @@ namespace PhpParser\Lexer\TokenEmulator;
use PhpParser\Lexer\Emulative; use PhpParser\Lexer\Emulative;
use PhpParser\Parser\Tokens; use PhpParser\Parser\Tokens;
use PhpParser\Token;
final class FnTokenEmulator implements TokenEmulatorInterface final class FnTokenEmulator implements TokenEmulatorInterface
{ {
const T_FN = 1008;
public function getTokenId(): int
{
return self::T_FN;
}
public function getParserTokenId(): int
{
return Tokens::T_FN;
}
public function isEmulationNeeded(string $code) : bool public function isEmulationNeeded(string $code) : bool
{ {
// skip version where this is supported // skip version where this is supported
@@ -34,13 +23,14 @@ final class FnTokenEmulator implements TokenEmulatorInterface
// We need to manually iterate and manage a count because we'll change // We need to manually iterate and manage a count because we'll change
// the tokens array on the way // the tokens array on the way
foreach ($tokens as $i => $token) { foreach ($tokens as $i => $token) {
if ($token[0] === T_STRING && $token[1] === 'fn') { if ($token->id === Tokens::T_STRING && $token->value === 'fn') {
$previousNonSpaceToken = $this->getPreviousNonSpaceToken($tokens, $i); $previousNonSpaceToken = $this->getPreviousNonSpaceToken($tokens, $i);
if ($previousNonSpaceToken !== null && $previousNonSpaceToken[0] === T_OBJECT_OPERATOR) { if ($previousNonSpaceToken !== null
&& $previousNonSpaceToken->id === Tokens::T_OBJECT_OPERATOR) {
continue; continue;
} }
$tokens[$i][0] = self::T_FN; $token->id = Tokens::T_FN;
} }
} }
@@ -48,13 +38,13 @@ final class FnTokenEmulator implements TokenEmulatorInterface
} }
/** /**
* @param mixed[] $tokens * @param Token[] $tokens
* @return mixed[]|null * @return Token|null
*/ */
private function getPreviousNonSpaceToken(array $tokens, int $start) private function getPreviousNonSpaceToken(array $tokens, int $start)
{ {
for ($i = $start - 1; $i >= 0; --$i) { for ($i = $start - 1; $i >= 0; --$i) {
if ($tokens[$i][0] === T_WHITESPACE) { if ($tokens[$i]->id === Tokens::T_WHITESPACE) {
continue; continue;
} }

View File

@@ -0,0 +1,100 @@
<?php declare(strict_types=1);
namespace PhpParser\Lexer\TokenEmulator;
use PhpParser\Lexer\Emulative;
use PhpParser\Parser\Tokens;
use PhpParser\Token;
final class NumericLiteralSeparatorEmulator implements TokenEmulatorInterface
{
const BIN = '(?:0b[01]+(?:_[01]+)*)';
const HEX = '(?:0x[0-9a-f]+(?:_[0-9a-f]+)*)';
const DEC = '(?:[0-9]+(?:_[0-9]+)*)';
const SIMPLE_FLOAT = '(?:' . self::DEC . '\.' . self::DEC . '?|\.' . self::DEC . ')';
const EXP = '(?:e[+-]?' . self::DEC . ')';
const FLOAT = '(?:' . self::SIMPLE_FLOAT . self::EXP . '?|' . self::DEC . self::EXP . ')';
const NUMBER = '~' . self::FLOAT . '|' . self::BIN . '|' . self::HEX . '|' . self::DEC . '~iA';
public function isEmulationNeeded(string $code) : bool
{
// skip version where this is supported
if (version_compare(\PHP_VERSION, Emulative::PHP_7_4, '>=')) {
return false;
}
return preg_match('~[0-9a-f]_[0-9a-f]~i', $code) !== false;
}
/**
* @param Token[] $tokens
* @return Token[]
*/
public function emulate(string $code, array $tokens): array
{
// We need to manually iterate and manage a count because we'll change
// the tokens array on the way
for ($i = 0, $c = count($tokens); $i < $c; ++$i) {
$token = $tokens[$i];
$tokenLen = \strlen($token->value);
if ($token->id !== Tokens::T_LNUMBER && $token->id !== Tokens::T_DNUMBER) {
continue;
}
$res = preg_match(self::NUMBER, $code, $matches, 0, $token->filePos);
assert($res, "No number at number token position");
$match = $matches[0];
$matchLen = \strlen($match);
if ($matchLen === $tokenLen) {
// Original token already holds the full number.
continue;
}
$tokenKind = $this->resolveIntegerOrFloatToken($match);
$newTokens = [new Token($tokenKind, $match, $token->line, $token->filePos)];
$numTokens = 1;
$len = $tokenLen;
while ($matchLen > $len) {
$nextToken = $tokens[$i + $numTokens];
$nextTokenLen = \strlen($nextToken->value);
$numTokens++;
if ($matchLen < $len + $nextTokenLen) {
// Split trailing characters into a partial token.
$partialText = substr($nextToken->value, $matchLen - $len);
$newTokens[] = new Token(
$nextToken->id, $partialText, $nextToken->line, $nextToken->filePos
);
break;
}
$len += $nextTokenLen;
}
array_splice($tokens, $i, $numTokens, $newTokens);
$c -= $numTokens - \count($newTokens);
}
return $tokens;
}
private function resolveIntegerOrFloatToken(string $str): int
{
$str = str_replace('_', '', $str);
if (stripos($str, '0b') === 0) {
$num = bindec($str);
} elseif (stripos($str, '0x') === 0) {
$num = hexdec($str);
} elseif (stripos($str, '0') === 0 && ctype_digit($str)) {
$num = octdec($str);
} else {
$num = +$str;
}
return is_float($num) ? Tokens::T_DNUMBER : Tokens::T_LNUMBER;
}
}

View File

@@ -2,16 +2,16 @@
namespace PhpParser\Lexer\TokenEmulator; namespace PhpParser\Lexer\TokenEmulator;
use PhpParser\Token;
/** @internal */
interface TokenEmulatorInterface interface TokenEmulatorInterface
{ {
public function getTokenId(): int;
public function getParserTokenId(): int;
public function isEmulationNeeded(string $code): bool; public function isEmulationNeeded(string $code): bool;
/** /**
* @return array Modified Tokens * @param Token[] $tokens
* @return Token[]
*/ */
public function emulate(string $code, array $tokens): array; public function emulate(string $code, array $tokens): array;
} }

View File

@@ -34,6 +34,8 @@ class DNumber extends Scalar
* @return float The parsed number * @return float The parsed number
*/ */
public static function parse(string $str) : float { public static function parse(string $str) : float {
$str = str_replace('_', '', $str);
// if string contains any of .eE just cast it to float // if string contains any of .eE just cast it to float
if (false !== strpbrk($str, '.eE')) { if (false !== strpbrk($str, '.eE')) {
return (float) $str; return (float) $str;

View File

@@ -41,6 +41,8 @@ class LNumber extends Scalar
* @return LNumber The constructed LNumber, including kind attribute * @return LNumber The constructed LNumber, including kind attribute
*/ */
public static function fromString(string $str, array $attributes = [], bool $allowInvalidOctal = false) : LNumber { public static function fromString(string $str, array $attributes = [], bool $allowInvalidOctal = false) : LNumber {
$str = str_replace('_', '', $str);
if ('0' !== $str[0] || '0' === $str) { if ('0' !== $str[0] || '0' === $str) {
$attributes['kind'] = LNumber::KIND_DEC; $attributes['kind'] = LNumber::KIND_DEC;
return new LNumber((int) $str, $attributes); return new LNumber((int) $str, $attributes);

View File

@@ -4,8 +4,14 @@ namespace PhpParser;
abstract class NodeAbstract implements Node, \JsonSerializable abstract class NodeAbstract implements Node, \JsonSerializable
{ {
// TODO: Kill.
protected $attributes; protected $attributes;
/** @var FileContext|null */
protected $context = null;
protected $startTokenPos = -1;
protected $endTokenPos = -1;
/** /**
* Creates a Node. * Creates a Node.
* *
@@ -15,35 +21,42 @@ abstract class NodeAbstract implements Node, \JsonSerializable
$this->attributes = $attributes; $this->attributes = $attributes;
} }
public function setTokenContext(FileContext $context, int $firstToken, int $lastToken) {
$this->context = $context;
$this->startTokenPos = $firstToken;
$this->endTokenPos = $lastToken;
}
/** /**
* Gets line the node started in (alias of getStartLine). * Gets line the node started in (alias of getStartLine).
* *
* @return int Start line (or -1 if not available) * @return int Start line (or -1 if not available)
*/ */
public function getLine() : int { public function getLine() : int {
return $this->attributes['startLine'] ?? -1; return $this->context->tokens[$this->startTokenPos]->line ?? -1;
} }
/** /**
* Gets line the node started in. * Gets line the node started in.
* *
* Requires the 'startLine' attribute to be enabled in the lexer (enabled by default).
*
* @return int Start line (or -1 if not available) * @return int Start line (or -1 if not available)
*/ */
public function getStartLine() : int { public function getStartLine() : int {
return $this->attributes['startLine'] ?? -1; return $this->context->tokens[$this->startTokenPos]->line ?? -1;
} }
/** /**
* Gets the line the node ended in. * Gets the line the node ended in.
* *
* Requires the 'endLine' attribute to be enabled in the lexer (enabled by default).
*
* @return int End line (or -1 if not available) * @return int End line (or -1 if not available)
*/ */
public function getEndLine() : int { public function getEndLine() : int {
return $this->attributes['endLine'] ?? -1; if (!isset($this->context->tokens[$this->endTokenPos])) {
return -1;
}
$token = $this->context->tokens[$this->endTokenPos];
return $token->line + \substr_count($token, "\n");
} }
/** /**
@@ -51,12 +64,10 @@ abstract class NodeAbstract implements Node, \JsonSerializable
* *
* The offset is an index into the array returned by Lexer::getTokens(). * The offset is an index into the array returned by Lexer::getTokens().
* *
* Requires the 'startTokenPos' attribute to be enabled in the lexer (DISABLED by default).
*
* @return int Token start position (or -1 if not available) * @return int Token start position (or -1 if not available)
*/ */
public function getStartTokenPos() : int { public function getStartTokenPos() : int {
return $this->attributes['startTokenPos'] ?? -1; return $this->startTokenPos;
} }
/** /**
@@ -64,34 +75,33 @@ abstract class NodeAbstract implements Node, \JsonSerializable
* *
* The offset is an index into the array returned by Lexer::getTokens(). * The offset is an index into the array returned by Lexer::getTokens().
* *
* Requires the 'endTokenPos' attribute to be enabled in the lexer (DISABLED by default).
*
* @return int Token end position (or -1 if not available) * @return int Token end position (or -1 if not available)
*/ */
public function getEndTokenPos() : int { public function getEndTokenPos() : int {
return $this->attributes['endTokenPos'] ?? -1; return $this->endTokenPos;
} }
/** /**
* Gets the file offset of the first character that is part of this node. * Gets the file offset of the first character that is part of this node.
* *
* Requires the 'startFilePos' attribute to be enabled in the lexer (DISABLED by default).
*
* @return int File start position (or -1 if not available) * @return int File start position (or -1 if not available)
*/ */
public function getStartFilePos() : int { public function getStartFilePos() : int {
return $this->attributes['startFilePos'] ?? -1; return $this->context->tokens[$this->startTokenPos]->filePos ?? -1;
} }
/** /**
* Gets the file offset of the last character that is part of this node. * Gets the file offset of the last character that is part of this node.
* *
* Requires the 'endFilePos' attribute to be enabled in the lexer (DISABLED by default).
*
* @return int File end position (or -1 if not available) * @return int File end position (or -1 if not available)
*/ */
public function getEndFilePos() : int { public function getEndFilePos() : int {
return $this->attributes['endFilePos'] ?? -1; if (!isset($this->context->tokens[$this->endTokenPos])) {
return -1;
}
$token = $this->context->tokens[$this->endTokenPos];
return $token->filePos + \strlen($token->value) - 1;
} }
/** /**

View File

@@ -91,6 +91,7 @@ class NameResolver extends NodeVisitorAbstract
$this->resolveSignature($node); $this->resolveSignature($node);
} elseif ($node instanceof Stmt\ClassMethod } elseif ($node instanceof Stmt\ClassMethod
|| $node instanceof Expr\Closure || $node instanceof Expr\Closure
|| $node instanceof Expr\ArrowFunction
) { ) {
$this->resolveSignature($node); $this->resolveSignature($node);
} elseif ($node instanceof Stmt\Property) { } elseif ($node instanceof Stmt\Property) {

View File

@@ -491,7 +491,7 @@ abstract class PrettyPrinterAbstract
$pos = 0; $pos = 0;
$result = $this->pArray($stmts, $origStmts, $pos, 0, 'File', 'stmts', null); $result = $this->pArray($stmts, $origStmts, $pos, 0, 'File', 'stmts', null);
if (null !== $result) { if (null !== $result) {
$result .= $this->origTokens->getTokenCode($pos, count($origTokens), 0); $result .= $this->origTokens->getTokenCode($pos, count($origTokens) - 1, 0);
} else { } else {
// Fallback // Fallback
// TODO Add <?php properly // TODO Add <?php properly
@@ -1213,8 +1213,8 @@ abstract class PrettyPrinterAbstract
$stripLeft = ['left' => \T_WHITESPACE]; $stripLeft = ['left' => \T_WHITESPACE];
$stripRight = ['right' => \T_WHITESPACE]; $stripRight = ['right' => \T_WHITESPACE];
$stripDoubleArrow = ['right' => \T_DOUBLE_ARROW]; $stripDoubleArrow = ['right' => \T_DOUBLE_ARROW];
$stripColon = ['left' => ':']; $stripColon = ['left' => \ord(':')];
$stripEquals = ['left' => '=']; $stripEquals = ['left' => \ord('=')];
$this->removalMap = [ $this->removalMap = [
'Expr_ArrayDimFetch->dim' => $stripBoth, 'Expr_ArrayDimFetch->dim' => $stripBoth,
'Expr_ArrayItem->key' => $stripDoubleArrow, 'Expr_ArrayItem->key' => $stripDoubleArrow,
@@ -1254,22 +1254,22 @@ abstract class PrettyPrinterAbstract
// TODO: "yield" where both key and value are inserted doesn't work // TODO: "yield" where both key and value are inserted doesn't work
// [$find, $beforeToken, $extraLeft, $extraRight] // [$find, $beforeToken, $extraLeft, $extraRight]
$this->insertionMap = [ $this->insertionMap = [
'Expr_ArrayDimFetch->dim' => ['[', false, null, null], 'Expr_ArrayDimFetch->dim' => [\ord('['), false, null, null],
'Expr_ArrayItem->key' => [null, false, null, ' => '], 'Expr_ArrayItem->key' => [null, false, null, ' => '],
'Expr_ArrowFunction->returnType' => [')', false, ' : ', null], 'Expr_ArrowFunction->returnType' => [\ord(')'), false, ' : ', null],
'Expr_Closure->returnType' => [')', false, ' : ', null], 'Expr_Closure->returnType' => [\ord(')'), false, ' : ', null],
'Expr_Ternary->if' => ['?', false, ' ', ' '], 'Expr_Ternary->if' => [\ord('?'), false, ' ', ' '],
'Expr_Yield->key' => [\T_YIELD, false, null, ' => '], 'Expr_Yield->key' => [\T_YIELD, false, null, ' => '],
'Expr_Yield->value' => [\T_YIELD, false, ' ', null], 'Expr_Yield->value' => [\T_YIELD, false, ' ', null],
'Param->type' => [null, false, null, ' '], 'Param->type' => [null, false, null, ' '],
'Param->default' => [null, false, ' = ', null], 'Param->default' => [null, false, ' = ', null],
'Stmt_Break->num' => [\T_BREAK, false, ' ', null], 'Stmt_Break->num' => [\T_BREAK, false, ' ', null],
'Stmt_ClassMethod->returnType' => [')', false, ' : ', null], 'Stmt_ClassMethod->returnType' => [\ord(')'), false, ' : ', null],
'Stmt_Class->extends' => [null, false, ' extends ', null], 'Stmt_Class->extends' => [null, false, ' extends ', null],
'Expr_PrintableNewAnonClass->extends' => [null, ' extends ', null], 'Expr_PrintableNewAnonClass->extends' => [null, ' extends ', null],
'Stmt_Continue->num' => [\T_CONTINUE, false, ' ', null], 'Stmt_Continue->num' => [\T_CONTINUE, false, ' ', null],
'Stmt_Foreach->keyVar' => [\T_AS, false, null, ' => '], 'Stmt_Foreach->keyVar' => [\T_AS, false, null, ' => '],
'Stmt_Function->returnType' => [')', false, ' : ', null], 'Stmt_Function->returnType' => [\ord(')'), false, ' : ', null],
'Stmt_If->else' => [null, false, ' ', null], 'Stmt_If->else' => [null, false, ' ', null],
'Stmt_Namespace->name' => [\T_NAMESPACE, false, ' ', null], 'Stmt_Namespace->name' => [\T_NAMESPACE, false, ' ', null],
'Stmt_Property->type' => [\T_VARIABLE, true, null, ' '], 'Stmt_Property->type' => [\T_VARIABLE, true, null, ' '],
@@ -1367,19 +1367,19 @@ abstract class PrettyPrinterAbstract
// [$find, $extraLeft, $extraRight] // [$find, $extraLeft, $extraRight]
$this->emptyListInsertionMap = [ $this->emptyListInsertionMap = [
'Expr_ArrowFunction->params' => ['(', '', ''], 'Expr_ArrowFunction->params' => [\ord('('), '', ''],
'Expr_Closure->uses' => [')', ' use(', ')'], 'Expr_Closure->uses' => [\ord(')'), ' use(', ')'],
'Expr_Closure->params' => ['(', '', ''], 'Expr_Closure->params' => [\ord('('), '', ''],
'Expr_FuncCall->args' => ['(', '', ''], 'Expr_FuncCall->args' => [\ord('('), '', ''],
'Expr_MethodCall->args' => ['(', '', ''], 'Expr_MethodCall->args' => [\ord('('), '', ''],
'Expr_New->args' => ['(', '', ''], 'Expr_New->args' => [\ord('('), '', ''],
'Expr_PrintableNewAnonClass->args' => ['(', '', ''], 'Expr_PrintableNewAnonClass->args' => [\ord('('), '', ''],
'Expr_PrintableNewAnonClass->implements' => [null, ' implements ', ''], 'Expr_PrintableNewAnonClass->implements' => [null, ' implements ', ''],
'Expr_StaticCall->args' => ['(', '', ''], 'Expr_StaticCall->args' => [\ord('('), '', ''],
'Stmt_Class->implements' => [null, ' implements ', ''], 'Stmt_Class->implements' => [null, ' implements ', ''],
'Stmt_ClassMethod->params' => ['(', '', ''], 'Stmt_ClassMethod->params' => [\ord('('), '', ''],
'Stmt_Interface->extends' => [null, ' extends ', ''], 'Stmt_Interface->extends' => [null, ' extends ', ''],
'Stmt_Function->params' => ['(', '', ''], 'Stmt_Function->params' => [\ord('('), '', ''],
/* These cannot be empty to start with: /* These cannot be empty to start with:
* Expr_Isset->vars * Expr_Isset->vars

21
lib/PhpParser/Token.php Normal file
View File

@@ -0,0 +1,21 @@
<?php declare(strict_types=1);
namespace PhpParser;
class Token {
/** @var int Token id (a PhpParser\Parser\Tokens::T_* constant) */
public $id; // TODO: Move this to PhpParser\Tokens.
/** @var string Textual value of the token */
public $value;
/** @var int Start line number of the token */
public $line;
/** @var int Offset of the token in the source code */
public $filePos;
public function __construct(int $id, string $value, int $line, int $filePos) {
$this->id = $id;
$this->value = $value;
$this->line = $line;
$this->filePos = $filePos;
}
}

View File

@@ -9,11 +9,8 @@ use PhpParser\Node\Stmt;
class InterfaceTest extends \PHPUnit\Framework\TestCase class InterfaceTest extends \PHPUnit\Framework\TestCase
{ {
/** @var Interface_ */ protected function createInterfaceBuilder() {
protected $builder; return new Interface_('Contract');
protected function setUp() {
$this->builder = new Interface_('Contract');
} }
private function dump($node) { private function dump($node) {
@@ -22,13 +19,14 @@ class InterfaceTest extends \PHPUnit\Framework\TestCase
} }
public function testEmpty() { public function testEmpty() {
$contract = $this->builder->getNode(); $contract = $this->createInterfaceBuilder()->getNode();
$this->assertInstanceOf(Stmt\Interface_::class, $contract); $this->assertInstanceOf(Stmt\Interface_::class, $contract);
$this->assertEquals(new Node\Identifier('Contract'), $contract->name); $this->assertEquals(new Node\Identifier('Contract'), $contract->name);
} }
public function testExtending() { public function testExtending() {
$contract = $this->builder->extend('Space\Root1', 'Root2')->getNode(); $contract = $this->createInterfaceBuilder()
->extend('Space\Root1', 'Root2')->getNode();
$this->assertEquals( $this->assertEquals(
new Stmt\Interface_('Contract', [ new Stmt\Interface_('Contract', [
'extends' => [ 'extends' => [
@@ -41,7 +39,7 @@ class InterfaceTest extends \PHPUnit\Framework\TestCase
public function testAddMethod() { public function testAddMethod() {
$method = new Stmt\ClassMethod('doSomething'); $method = new Stmt\ClassMethod('doSomething');
$contract = $this->builder->addStmt($method)->getNode(); $contract = $this->createInterfaceBuilder()->addStmt($method)->getNode();
$this->assertSame([$method], $contract->stmts); $this->assertSame([$method], $contract->stmts);
} }
@@ -49,7 +47,7 @@ class InterfaceTest extends \PHPUnit\Framework\TestCase
$const = new Stmt\ClassConst([ $const = new Stmt\ClassConst([
new Node\Const_('SPEED_OF_LIGHT', new DNumber(299792458.0)) new Node\Const_('SPEED_OF_LIGHT', new DNumber(299792458.0))
]); ]);
$contract = $this->builder->addStmt($const)->getNode(); $contract = $this->createInterfaceBuilder()->addStmt($const)->getNode();
$this->assertSame(299792458.0, $contract->stmts[0]->consts[0]->value->value); $this->assertSame(299792458.0, $contract->stmts[0]->consts[0]->value->value);
} }
@@ -58,7 +56,7 @@ class InterfaceTest extends \PHPUnit\Framework\TestCase
new Node\Const_('SPEED_OF_LIGHT', new DNumber(299792458)) new Node\Const_('SPEED_OF_LIGHT', new DNumber(299792458))
]); ]);
$method = new Stmt\ClassMethod('doSomething'); $method = new Stmt\ClassMethod('doSomething');
$contract = $this->builder $contract = $this->createInterfaceBuilder()
->addStmt($method) ->addStmt($method)
->addStmt($const) ->addStmt($const)
->getNode() ->getNode()
@@ -69,7 +67,7 @@ class InterfaceTest extends \PHPUnit\Framework\TestCase
} }
public function testDocComment() { public function testDocComment() {
$node = $this->builder $node = $this->createInterfaceBuilder()
->setDocComment('/** Test */') ->setDocComment('/** Test */')
->getNode(); ->getNode();
@@ -81,7 +79,7 @@ class InterfaceTest extends \PHPUnit\Framework\TestCase
public function testInvalidStmtError() { public function testInvalidStmtError() {
$this->expectException(\LogicException::class); $this->expectException(\LogicException::class);
$this->expectExceptionMessage('Unexpected node of type "Stmt_PropertyProperty"'); $this->expectExceptionMessage('Unexpected node of type "Stmt_PropertyProperty"');
$this->builder->addStmt(new Stmt\PropertyProperty('invalid')); $this->createInterfaceBuilder()->addStmt(new Stmt\PropertyProperty('invalid'));
} }
public function testFullFunctional() { public function testFullFunctional() {
@@ -89,7 +87,7 @@ class InterfaceTest extends \PHPUnit\Framework\TestCase
new Node\Const_('SPEED_OF_LIGHT', new DNumber(299792458)) new Node\Const_('SPEED_OF_LIGHT', new DNumber(299792458))
]); ]);
$method = new Stmt\ClassMethod('doSomething'); $method = new Stmt\ClassMethod('doSomething');
$contract = $this->builder $contract = $this->createInterfaceBuilder()
->addStmt($method) ->addStmt($method)
->addStmt($const) ->addStmt($const)
->getNode() ->getNode()

View File

@@ -5,6 +5,7 @@ namespace PhpParser\Lexer;
use PhpParser\ErrorHandler; use PhpParser\ErrorHandler;
use PhpParser\LexerTest; use PhpParser\LexerTest;
use PhpParser\Parser\Tokens; use PhpParser\Parser\Tokens;
use PhpParser\Token;
class EmulativeTest extends LexerTest class EmulativeTest extends LexerTest
{ {
@@ -17,10 +18,15 @@ class EmulativeTest extends LexerTest
*/ */
public function testReplaceKeywords($keyword, $expectedToken) { public function testReplaceKeywords($keyword, $expectedToken) {
$lexer = $this->getLexer(); $lexer = $this->getLexer();
$lexer->startLexing('<?php ' . $keyword); $tokens = $lexer->tokenize('<?php ' . $keyword);
$this->assertEquals(
$this->assertSame($expectedToken, $lexer->getNextToken()); [
$this->assertSame(0, $lexer->getNextToken()); new Token(Tokens::T_OPEN_TAG, '<?php ', 1, 0),
new Token($expectedToken, $keyword, 1, 6),
new Token(0, "\0", 1, 6 + strlen($keyword)),
],
$tokens
);
} }
/** /**
@@ -28,11 +34,16 @@ class EmulativeTest extends LexerTest
*/ */
public function testNoReplaceKeywordsAfterObjectOperator(string $keyword) { public function testNoReplaceKeywordsAfterObjectOperator(string $keyword) {
$lexer = $this->getLexer(); $lexer = $this->getLexer();
$lexer->startLexing('<?php ->' . $keyword); $tokens = $lexer->tokenize('<?php ->' . $keyword);
$this->assertEquals(
$this->assertSame(Tokens::T_OBJECT_OPERATOR, $lexer->getNextToken()); [
$this->assertSame(Tokens::T_STRING, $lexer->getNextToken()); new Token(Tokens::T_OPEN_TAG, '<?php ', 1, 0),
$this->assertSame(0, $lexer->getNextToken()); new Token(Tokens::T_OBJECT_OPERATOR, '->', 1, 6),
new Token(Tokens::T_STRING, $keyword, 1, 8),
new Token(0, "\0", 1, 8 + strlen($keyword)),
],
$tokens
);
} }
/** /**
@@ -40,11 +51,17 @@ class EmulativeTest extends LexerTest
*/ */
public function testNoReplaceKeywordsAfterObjectOperatorWithSpaces(string $keyword) { public function testNoReplaceKeywordsAfterObjectOperatorWithSpaces(string $keyword) {
$lexer = $this->getLexer(); $lexer = $this->getLexer();
$lexer->startLexing('<?php -> ' . $keyword); $tokens = $lexer->tokenize('<?php -> ' . $keyword);
$this->assertEquals(
$this->assertSame(Tokens::T_OBJECT_OPERATOR, $lexer->getNextToken()); [
$this->assertSame(Tokens::T_STRING, $lexer->getNextToken()); new Token(Tokens::T_OPEN_TAG, '<?php ', 1, 0),
$this->assertSame(0, $lexer->getNextToken()); new Token(Tokens::T_OBJECT_OPERATOR, '->', 1, 6),
new Token(Tokens::T_WHITESPACE, ' ', 1, 8),
new Token(Tokens::T_STRING, $keyword, 1, 9),
new Token(0, "\0", 1, 9 + strlen($keyword)),
],
$tokens
);
} }
public function provideTestReplaceKeywords() { public function provideTestReplaceKeywords() {
@@ -75,12 +92,12 @@ class EmulativeTest extends LexerTest
*/ */
public function testLexNewFeatures($code, array $expectedTokens) { public function testLexNewFeatures($code, array $expectedTokens) {
$lexer = $this->getLexer(); $lexer = $this->getLexer();
$lexer->startLexing('<?php ' . $code); $tokens = $lexer->tokenize('<?php ' . $code);
// Drop <?php and EOF tokens.
$tokens = []; $tokens = array_slice($tokens, 1, -1);
while (0 !== $token = $lexer->getNextToken($text)) { $tokens = array_map(function(Token $token) {
$tokens[] = [$token, $text]; return [$token->id, $token->value];
} }, $tokens);
$this->assertSame($expectedTokens, $tokens); $this->assertSame($expectedTokens, $tokens);
} }
@@ -91,11 +108,16 @@ class EmulativeTest extends LexerTest
$stringifiedToken = '"' . addcslashes($code, '"\\') . '"'; $stringifiedToken = '"' . addcslashes($code, '"\\') . '"';
$lexer = $this->getLexer(); $lexer = $this->getLexer();
$lexer->startLexing('<?php ' . $stringifiedToken); $tokens = $lexer->tokenize('<?php ' . $stringifiedToken);
$this->assertEquals([
$this->assertSame(Tokens::T_CONSTANT_ENCAPSED_STRING, $lexer->getNextToken($text)); new Token(Tokens::T_OPEN_TAG, '<?php ', 1, 0),
$this->assertSame($stringifiedToken, $text); new Token(Tokens::T_CONSTANT_ENCAPSED_STRING, $stringifiedToken, 1, 6),
$this->assertSame(0, $lexer->getNextToken()); new Token(
0, "\0",
1 + substr_count($stringifiedToken, "\n"),
6 + strlen($stringifiedToken)
),
], $tokens);
} }
/** /**
@@ -104,7 +126,7 @@ class EmulativeTest extends LexerTest
public function testErrorAfterEmulation($code) { public function testErrorAfterEmulation($code) {
$errorHandler = new ErrorHandler\Collecting; $errorHandler = new ErrorHandler\Collecting;
$lexer = $this->getLexer(); $lexer = $this->getLexer();
$lexer->startLexing('<?php ' . $code . "\0", $errorHandler); $lexer->tokenize('<?php ' . $code . "\0", $errorHandler);
$errors = $errorHandler->getErrors(); $errors = $errorHandler->getErrors();
$this->assertCount(1, $errors); $this->assertCount(1, $errors);
@@ -123,10 +145,6 @@ class EmulativeTest extends LexerTest
public function provideTestLexNewFeatures() { public function provideTestLexNewFeatures() {
return [ return [
// PHP 7.4
['??=', [
[Tokens::T_COALESCE_EQUAL, '??='],
]],
['yield from', [ ['yield from', [
[Tokens::T_YIELD_FROM, 'yield from'], [Tokens::T_YIELD_FROM, 'yield from'],
]], ]],
@@ -161,15 +179,17 @@ class EmulativeTest extends LexerTest
[Tokens::T_START_HEREDOC, "<<<'NOWDOC'\n"], [Tokens::T_START_HEREDOC, "<<<'NOWDOC'\n"],
[Tokens::T_END_HEREDOC, 'NOWDOC'], [Tokens::T_END_HEREDOC, 'NOWDOC'],
[ord(';'), ';'], [ord(';'), ';'],
[Tokens::T_WHITESPACE, "\n"],
]], ]],
["<<<'NOWDOC'\nFoobar\nNOWDOC;\n", [ ["<<<'NOWDOC'\nFoobar\nNOWDOC;\n", [
[Tokens::T_START_HEREDOC, "<<<'NOWDOC'\n"], [Tokens::T_START_HEREDOC, "<<<'NOWDOC'\n"],
[Tokens::T_ENCAPSED_AND_WHITESPACE, "Foobar\n"], [Tokens::T_ENCAPSED_AND_WHITESPACE, "Foobar\n"],
[Tokens::T_END_HEREDOC, 'NOWDOC'], [Tokens::T_END_HEREDOC, 'NOWDOC'],
[ord(';'), ';'], [ord(';'), ';'],
[Tokens::T_WHITESPACE, "\n"],
]], ]],
// Flexible heredoc/nowdoc // PHP 7.3: Flexible heredoc/nowdoc
["<<<LABEL\nLABEL,", [ ["<<<LABEL\nLABEL,", [
[Tokens::T_START_HEREDOC, "<<<LABEL\n"], [Tokens::T_START_HEREDOC, "<<<LABEL\n"],
[Tokens::T_END_HEREDOC, "LABEL"], [Tokens::T_END_HEREDOC, "LABEL"],
@@ -198,12 +218,67 @@ class EmulativeTest extends LexerTest
[Tokens::T_START_HEREDOC, "<<<LABEL\n"], [Tokens::T_START_HEREDOC, "<<<LABEL\n"],
[Tokens::T_ENCAPSED_AND_WHITESPACE, "LABELNOPE\n"], [Tokens::T_ENCAPSED_AND_WHITESPACE, "LABELNOPE\n"],
[Tokens::T_END_HEREDOC, "LABEL"], [Tokens::T_END_HEREDOC, "LABEL"],
[Tokens::T_WHITESPACE, "\n"],
]], ]],
// Interpretation changed // Interpretation changed
["<<<LABEL\n LABEL\nLABEL\n", [ ["<<<LABEL\n LABEL\nLABEL\n", [
[Tokens::T_START_HEREDOC, "<<<LABEL\n"], [Tokens::T_START_HEREDOC, "<<<LABEL\n"],
[Tokens::T_END_HEREDOC, " LABEL"], [Tokens::T_END_HEREDOC, " LABEL"],
[Tokens::T_WHITESPACE, "\n"],
[Tokens::T_STRING, "LABEL"], [Tokens::T_STRING, "LABEL"],
[Tokens::T_WHITESPACE, "\n"],
]],
// PHP 7.4: Null coalesce equal
['??=', [
[Tokens::T_COALESCE_EQUAL, '??='],
]],
// PHP 7.4: Number literal separator
['1_000', [
[Tokens::T_LNUMBER, '1_000'],
]],
['0xCAFE_F00D', [
[Tokens::T_LNUMBER, '0xCAFE_F00D'],
]],
['0b0101_1111', [
[Tokens::T_LNUMBER, '0b0101_1111'],
]],
['0137_041', [
[Tokens::T_LNUMBER, '0137_041'],
]],
['1_000.0', [
[Tokens::T_DNUMBER, '1_000.0'],
]],
['1_0.0', [
[Tokens::T_DNUMBER, '1_0.0']
]],
['1_000_000_000.0', [
[Tokens::T_DNUMBER, '1_000_000_000.0']
]],
['0e1_0', [
[Tokens::T_DNUMBER, '0e1_0']
]],
['1_0e+10', [
[Tokens::T_DNUMBER, '1_0e+10']
]],
['1_0e-10', [
[Tokens::T_DNUMBER, '1_0e-10']
]],
['0b1011010101001010_110101010010_10101101010101_0101101011001_110111100', [
[Tokens::T_DNUMBER, '0b1011010101001010_110101010010_10101101010101_0101101011001_110111100'],
]],
['0xFFFF_FFFF_FFFF_FFFF', [
[Tokens::T_DNUMBER, '0xFFFF_FFFF_FFFF_FFFF'],
]],
['1_000+1', [
[Tokens::T_LNUMBER, '1_000'],
[ord('+'), '+'],
[Tokens::T_LNUMBER, '1'],
]],
['1_0abc', [
[Tokens::T_LNUMBER, '1_0'],
[Tokens::T_STRING, 'abc'],
]], ]],
]; ];
} }

View File

@@ -11,19 +11,30 @@ class LexerTest extends \PHPUnit\Framework\TestCase
return new Lexer($options); return new Lexer($options);
} }
public function testTokenize() {
$code = '<?php "a";' . "\n" . '// foo' . "\n" . '"b";';
$expectedTokens = [
new Token(Tokens::T_OPEN_TAG, '<?php ', 1, 0),
new Token(Tokens::T_CONSTANT_ENCAPSED_STRING, '"a"', 1, 6),
new Token(\ord(';'), ';', 1, 9),
new Token(Tokens::T_WHITESPACE, "\n", 1, 10),
new Token(Tokens::T_COMMENT, '// foo' . "\n", 2, 11),
new Token(Tokens::T_CONSTANT_ENCAPSED_STRING, '"b"', 3, 18),
new Token(\ord(';'), ';', 3, 21),
new Token(0, "\0", 3, 22),
];
$lexer = $this->getLexer();
$this->assertEquals($expectedTokens, $lexer->tokenize($code));
}
/** /**
* @dataProvider provideTestError * @dataProvider provideTestError
*/ */
public function testError($code, $messages) { public function testError($code, $messages) {
if (defined('HHVM_VERSION')) {
$this->markTestSkipped('HHVM does not throw warnings from token_get_all()');
}
$errorHandler = new ErrorHandler\Collecting(); $errorHandler = new ErrorHandler\Collecting();
$lexer = $this->getLexer(['usedAttributes' => [ $lexer = $this->getLexer();
'comments', 'startLine', 'endLine', 'startFilePos', 'endFilePos' $lexer->tokenize($code, $errorHandler);
]]);
$lexer->startLexing($code, $errorHandler);
$errors = $errorHandler->getErrors(); $errors = $errorHandler->getErrors();
$this->assertCount(count($messages), $errors); $this->assertCount(count($messages), $errors);
@@ -46,218 +57,4 @@ class LexerTest extends \PHPUnit\Framework\TestCase
]], ]],
]; ];
} }
/**
* @dataProvider provideTestLex
*/
public function testLex($code, $options, $tokens) {
$lexer = $this->getLexer($options);
$lexer->startLexing($code);
while ($id = $lexer->getNextToken($value, $startAttributes, $endAttributes)) {
$token = array_shift($tokens);
$this->assertSame($token[0], $id);
$this->assertSame($token[1], $value);
$this->assertEquals($token[2], $startAttributes);
$this->assertEquals($token[3], $endAttributes);
}
}
public function provideTestLex() {
return [
// tests conversion of closing PHP tag and drop of whitespace and opening tags
[
'<?php tokens ?>plaintext',
[],
[
[
Tokens::T_STRING, 'tokens',
['startLine' => 1], ['endLine' => 1]
],
[
ord(';'), '?>',
['startLine' => 1], ['endLine' => 1]
],
[
Tokens::T_INLINE_HTML, 'plaintext',
['startLine' => 1, 'hasLeadingNewline' => false],
['endLine' => 1]
],
]
],
// tests line numbers
[
'<?php' . "\n" . '$ token /** doc' . "\n" . 'comment */ $',
[],
[
[
ord('$'), '$',
['startLine' => 2], ['endLine' => 2]
],
[
Tokens::T_STRING, 'token',
['startLine' => 2], ['endLine' => 2]
],
[
ord('$'), '$',
[
'startLine' => 3,
'comments' => [
new Comment\Doc('/** doc' . "\n" . 'comment */', 2, 14, 5),
]
],
['endLine' => 3]
],
]
],
// tests comment extraction
[
'<?php /* comment */ // comment' . "\n" . '/** docComment 1 *//** docComment 2 */ token',
[],
[
[
Tokens::T_STRING, 'token',
[
'startLine' => 2,
'comments' => [
new Comment('/* comment */', 1, 6, 1),
new Comment('// comment' . "\n", 1, 20, 3),
new Comment\Doc('/** docComment 1 */', 2, 31, 4),
new Comment\Doc('/** docComment 2 */', 2, 50, 5),
],
],
['endLine' => 2]
],
]
],
// tests differing start and end line
[
'<?php "foo' . "\n" . 'bar"',
[],
[
[
Tokens::T_CONSTANT_ENCAPSED_STRING, '"foo' . "\n" . 'bar"',
['startLine' => 1], ['endLine' => 2]
],
]
],
// tests exact file offsets
[
'<?php "a";' . "\n" . '// foo' . "\n" . '"b";',
['usedAttributes' => ['startFilePos', 'endFilePos']],
[
[
Tokens::T_CONSTANT_ENCAPSED_STRING, '"a"',
['startFilePos' => 6], ['endFilePos' => 8]
],
[
ord(';'), ';',
['startFilePos' => 9], ['endFilePos' => 9]
],
[
Tokens::T_CONSTANT_ENCAPSED_STRING, '"b"',
['startFilePos' => 18], ['endFilePos' => 20]
],
[
ord(';'), ';',
['startFilePos' => 21], ['endFilePos' => 21]
],
]
],
// tests token offsets
[
'<?php "a";' . "\n" . '// foo' . "\n" . '"b";',
['usedAttributes' => ['startTokenPos', 'endTokenPos']],
[
[
Tokens::T_CONSTANT_ENCAPSED_STRING, '"a"',
['startTokenPos' => 1], ['endTokenPos' => 1]
],
[
ord(';'), ';',
['startTokenPos' => 2], ['endTokenPos' => 2]
],
[
Tokens::T_CONSTANT_ENCAPSED_STRING, '"b"',
['startTokenPos' => 5], ['endTokenPos' => 5]
],
[
ord(';'), ';',
['startTokenPos' => 6], ['endTokenPos' => 6]
],
]
],
// tests all attributes being disabled
[
'<?php /* foo */ $bar;',
['usedAttributes' => []],
[
[
Tokens::T_VARIABLE, '$bar',
[], []
],
[
ord(';'), ';',
[], []
]
]
],
// tests no tokens
[
'',
[],
[]
],
];
}
/**
* @dataProvider provideTestHaltCompiler
*/
public function testHandleHaltCompiler($code, $remaining) {
$lexer = $this->getLexer();
$lexer->startLexing($code);
while (Tokens::T_HALT_COMPILER !== $lexer->getNextToken());
$this->assertSame($remaining, $lexer->handleHaltCompiler());
$this->assertSame(0, $lexer->getNextToken());
}
public function provideTestHaltCompiler() {
return [
['<?php ... __halt_compiler();Remaining Text', 'Remaining Text'],
['<?php ... __halt_compiler ( ) ;Remaining Text', 'Remaining Text'],
['<?php ... __halt_compiler() ?>Remaining Text', 'Remaining Text'],
//array('<?php ... __halt_compiler();' . "\0", "\0"),
//array('<?php ... __halt_compiler /* */ ( ) ;Remaining Text', 'Remaining Text'),
];
}
public function testHandleHaltCompilerError() {
$this->expectException(Error::class);
$this->expectExceptionMessage('__HALT_COMPILER must be followed by "();"');
$lexer = $this->getLexer();
$lexer->startLexing('<?php ... __halt_compiler invalid ();');
while (Tokens::T_HALT_COMPILER !== $lexer->getNextToken());
$lexer->handleHaltCompiler();
}
public function testGetTokens() {
$code = '<?php "a";' . "\n" . '// foo' . "\n" . '"b";';
$expectedTokens = [
[T_OPEN_TAG, '<?php ', 1],
[T_CONSTANT_ENCAPSED_STRING, '"a"', 1],
';',
[T_WHITESPACE, "\n", 1],
[T_COMMENT, '// foo' . "\n", 2],
[T_CONSTANT_ENCAPSED_STRING, '"b"', 3],
';',
];
$lexer = $this->getLexer();
$lexer->startLexing($code);
$this->assertSame($expectedTokens, $lexer->getTokens());
}
} }

View File

@@ -266,13 +266,17 @@ class NodeTraverserTest extends \PHPUnit\Framework\TestCase
$traverser->addVisitor($visitor2); $traverser->addVisitor($visitor2);
$traverser->addVisitor($visitor3); $traverser->addVisitor($visitor3);
$getVisitors = (function () {
return $this->visitors;
})->bindTo($traverser, NodeTraverser::class);
$preExpected = [$visitor1, $visitor2, $visitor3]; $preExpected = [$visitor1, $visitor2, $visitor3];
$this->assertAttributeSame($preExpected, 'visitors', $traverser, 'The appropriate visitors have not been added'); $this->assertSame($preExpected, $getVisitors());
$traverser->removeVisitor($visitor2); $traverser->removeVisitor($visitor2);
$postExpected = [0 => $visitor1, 2 => $visitor3]; $postExpected = [0 => $visitor1, 2 => $visitor3];
$this->assertAttributeSame($postExpected, 'visitors', $traverser, 'The appropriate visitors are not present after removal'); $this->assertSame($postExpected, $getVisitors());
} }
public function testNoCloneNodes() { public function testNoCloneNodes() {

View File

@@ -219,6 +219,10 @@ function(A $a) : A {};
function fn3(?A $a) : ?A {} function fn3(?A $a) : ?A {}
function fn4(?array $a) : ?array {} function fn4(?array $a) : ?array {}
fn(array $a): array => $a;
fn(A $a): A => $a;
fn(?A $a): ?A => $a;
A::b(); A::b();
A::$b; A::$b;
A::B; A::B;
@@ -263,6 +267,9 @@ function fn3(?\NS\A $a) : ?\NS\A
function fn4(?array $a) : ?array function fn4(?array $a) : ?array
{ {
} }
fn(array $a): array => $a;
fn(\NS\A $a): \NS\A => $a;
fn(?\NS\A $a): ?\NS\A => $a;
\NS\A::b(); \NS\A::b();
\NS\A::$b; \NS\A::$b;
\NS\A::B; \NS\A::B;

View File

@@ -32,24 +32,25 @@ $a = 42;
@@{ "\1" }@@ @@{ "\1" }@@
$b = 24; $b = 24;
----- -----
!!positions
Unexpected character "" (ASCII 1) from 4:1 to 4:1 Unexpected character "" (ASCII 1) from 4:1 to 4:1
array( array(
0: Stmt_Expression( 0: Stmt_Expression[3:1 - 3:8](
expr: Expr_Assign( expr: Expr_Assign[3:1 - 3:7](
var: Expr_Variable( var: Expr_Variable[3:1 - 3:2](
name: a name: a
) )
expr: Scalar_LNumber( expr: Scalar_LNumber[3:6 - 3:7](
value: 42 value: 42
) )
) )
) )
1: Stmt_Expression( 1: Stmt_Expression[5:1 - 5:8](
expr: Expr_Assign( expr: Expr_Assign[5:1 - 5:7](
var: Expr_Variable( var: Expr_Variable[5:1 - 5:2](
name: b name: b
) )
expr: Scalar_LNumber( expr: Scalar_LNumber[5:6 - 5:7](
value: 24 value: 24
) )
) )
@@ -62,24 +63,25 @@ $a = 42;
@@{ "\0" }@@ @@{ "\0" }@@
$b = 24; $b = 24;
----- -----
!!positions
Unexpected null byte from 4:1 to 4:1 Unexpected null byte from 4:1 to 4:1
array( array(
0: Stmt_Expression( 0: Stmt_Expression[3:1 - 3:8](
expr: Expr_Assign( expr: Expr_Assign[3:1 - 3:7](
var: Expr_Variable( var: Expr_Variable[3:1 - 3:2](
name: a name: a
) )
expr: Scalar_LNumber( expr: Scalar_LNumber[3:6 - 3:7](
value: 42 value: 42
) )
) )
) )
1: Stmt_Expression( 1: Stmt_Expression[5:1 - 5:8](
expr: Expr_Assign( expr: Expr_Assign[5:1 - 5:7](
var: Expr_Variable( var: Expr_Variable[5:1 - 5:2](
name: b name: b
) )
expr: Scalar_LNumber( expr: Scalar_LNumber[5:6 - 5:7](
value: 24 value: 24
) )
) )
@@ -94,35 +96,36 @@ $b = 2;
@@{ "\2" }@@ @@{ "\2" }@@
$c = 3; $c = 3;
----- -----
Unexpected character "@@{ "\1" }@@" (ASCII 1) from 4:1 to 4:1 !!positions
Unexpected character "@@{ "\2" }@@" (ASCII 2) from 6:1 to 6:1 Unexpected character "" (ASCII 1) from 4:1 to 4:1
Unexpected character "" (ASCII 2) from 6:1 to 6:1
array( array(
0: Stmt_Expression( 0: Stmt_Expression[3:1 - 3:7](
expr: Expr_Assign( expr: Expr_Assign[3:1 - 3:6](
var: Expr_Variable( var: Expr_Variable[3:1 - 3:2](
name: a name: a
) )
expr: Scalar_LNumber( expr: Scalar_LNumber[3:6 - 3:6](
value: 1 value: 1
) )
) )
) )
1: Stmt_Expression( 1: Stmt_Expression[5:1 - 5:7](
expr: Expr_Assign( expr: Expr_Assign[5:1 - 5:6](
var: Expr_Variable( var: Expr_Variable[5:1 - 5:2](
name: b name: b
) )
expr: Scalar_LNumber( expr: Scalar_LNumber[5:6 - 5:6](
value: 2 value: 2
) )
) )
) )
2: Stmt_Expression( 2: Stmt_Expression[7:1 - 7:7](
expr: Expr_Assign( expr: Expr_Assign[7:1 - 7:6](
var: Expr_Variable( var: Expr_Variable[7:1 - 7:2](
name: c name: c
) )
expr: Scalar_LNumber( expr: Scalar_LNumber[7:6 - 7:6](
value: 3 value: 3
) )
) )

View File

@@ -0,0 +1,199 @@
Different integer syntaxes
-----
<?php
6.674_083e-11;
299_792_458;
0xCAFE_F00D;
0b0101_1111;
0137_041;
// already a valid constant name
_100;
// syntax errors
100_;
1__1;
1_.0;
1._0;
0x_123;
0b_101;
1_e2;
1e_2;
-----
Syntax error, unexpected T_STRING from 13:4 to 13:4
Syntax error, unexpected T_STRING from 14:2 to 14:4
Syntax error, unexpected T_STRING from 15:2 to 15:2
Syntax error, unexpected T_STRING from 16:3 to 16:4
Syntax error, unexpected T_STRING from 17:2 to 17:6
Syntax error, unexpected T_STRING from 18:2 to 18:6
Syntax error, unexpected T_STRING from 19:2 to 19:4
Syntax error, unexpected T_STRING from 20:2 to 20:4
array(
0: Stmt_Expression(
expr: Scalar_DNumber(
value: 6.674083E-11
)
)
1: Stmt_Expression(
expr: Scalar_LNumber(
value: 299792458
)
)
2: Stmt_Expression(
expr: Scalar_LNumber(
value: 3405705229
)
)
3: Stmt_Expression(
expr: Scalar_LNumber(
value: 95
)
)
4: Stmt_Expression(
expr: Scalar_LNumber(
value: 48673
)
)
5: Stmt_Expression(
expr: Expr_ConstFetch(
name: Name(
parts: array(
0: _100
)
comments: array(
0: // already a valid constant name
)
)
comments: array(
0: // already a valid constant name
)
)
comments: array(
0: // already a valid constant name
)
)
6: Stmt_Expression(
expr: Scalar_LNumber(
value: 100
comments: array(
0: // syntax errors
)
)
comments: array(
0: // syntax errors
)
)
7: Stmt_Expression(
expr: Expr_ConstFetch(
name: Name(
parts: array(
0: _
)
)
)
)
8: Stmt_Expression(
expr: Scalar_LNumber(
value: 1
)
)
9: Stmt_Expression(
expr: Expr_ConstFetch(
name: Name(
parts: array(
0: __1
)
)
)
)
10: Stmt_Expression(
expr: Scalar_LNumber(
value: 1
)
)
11: Stmt_Expression(
expr: Expr_ConstFetch(
name: Name(
parts: array(
0: _
)
)
)
)
12: Stmt_Expression(
expr: Scalar_DNumber(
value: 0
)
)
13: Stmt_Expression(
expr: Scalar_DNumber(
value: 1
)
)
14: Stmt_Expression(
expr: Expr_ConstFetch(
name: Name(
parts: array(
0: _0
)
)
)
)
15: Stmt_Expression(
expr: Scalar_LNumber(
value: 0
)
)
16: Stmt_Expression(
expr: Expr_ConstFetch(
name: Name(
parts: array(
0: x_123
)
)
)
)
17: Stmt_Expression(
expr: Scalar_LNumber(
value: 0
)
)
18: Stmt_Expression(
expr: Expr_ConstFetch(
name: Name(
parts: array(
0: b_101
)
)
)
)
19: Stmt_Expression(
expr: Scalar_LNumber(
value: 1
)
)
20: Stmt_Expression(
expr: Expr_ConstFetch(
name: Name(
parts: array(
0: _e2
)
)
)
)
21: Stmt_Expression(
expr: Scalar_LNumber(
value: 1
)
)
22: Stmt_Expression(
expr: Expr_ConstFetch(
name: Name(
parts: array(
0: e_2
)
)
)
)
)

View File

@@ -231,6 +231,8 @@ foreach (new RecursiveIteratorIterator(
echo $file, ":\n Parse failed with message: {$e->getMessage()}\n"; echo $file, ":\n Parse failed with message: {$e->getMessage()}\n";
++$parseFail; ++$parseFail;
} catch (Throwable $e) {
echo $file, ":\n Unknown error occurred: $e\n";
} }
} }