Update lexer implementations and tests

Start on lexer refactoring
WIP
2025-07-27 01:10:27 +02:00 · 2019-06-30 23:07:22 +02:00 · 2019-06-30 19:31:22 +02:00 · 2019-06-30 17:21:55 +02:00 · 2019-06-30 14:14:24 +02:00 · 2019-06-30 12:13:28 +02:00
25 changed files with 769 additions and 772 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -16,7 +16,12 @@ php:
 install:
  - if [ $TRAVIS_PHP_VERSION = '7.0' ]; then composer require satooshi/php-coveralls '~1.0'; fi
-  - composer install --prefer-dist --ignore-platform-reqs
+  - |
    if [ $TRAVIS_PHP_VERSION = 'nightly' ]; then
      composer install --prefer-dist --ignore-platform-reqs;
    else
      composer install --prefer-dist;
    fi
 matrix:
  allow_failures:
--- a/composer.json
+++ b/composer.json
@@ -14,10 +14,11 @@
    ],
    "require": {
        "php": ">=7.0",
        "ext-json": "*",
        "ext-tokenizer": "*"
    },
    "require-dev": {
-        "phpunit/phpunit": "^6.5 || ^7.0"
+        "phpunit/phpunit": "^6.5 || ^7.0 || ^8.0"
    },
    "extra": {
        "branch-alias": {
--- a/doc/0_Introduction.markdown
+++ b/doc/0_Introduction.markdown
@@ -56,7 +56,7 @@ array(
 ```
 This matches the structure of the code: An echo statement, which takes two strings as expressions,
-with the values `Hi` and `World!`.
+with the values `Hi` and `World`.
 You can also see that the AST does not contain any whitespace information (but most comments are saved).
 So using it for formatting analysis is not possible.
--- a/lib/PhpParser/FileContext.php
+++ b/lib/PhpParser/FileContext.php
@@ -0,0 +1,8 @@
 <?php declare(strict_types=1);
 namespace PhpParser;
 class FileContext {
    /** @var Token[] */
    public $tokens;
 }
--- a/lib/PhpParser/Internal/TokenStream.php
+++ b/lib/PhpParser/Internal/TokenStream.php
@@ -2,6 +2,8 @@
 namespace PhpParser\Internal;
 use PhpParser\Token;
 /**
 * Provides operations on token streams, for use by pretty printer.
 *
@@ -9,7 +11,7 @@ namespace PhpParser\Internal;
 */
 class TokenStream
 {
-    /** @var array Tokens (in token_get_all format) */
+    /** @var Token[] */
    private $tokens;
    /** @var int[] Map from position to indentation */
    private $indentMap;
@@ -17,7 +19,7 @@ class TokenStream
    /**
     * Create token stream instance.
     *
-     * @param array $tokens Tokens in token_get_all() format
+     * @param Token[] $tokens Tokens
     */
    public function __construct(array $tokens) {
        $this->tokens = $tokens;
@@ -33,8 +35,8 @@ class TokenStream
     * @return bool
     */
    public function haveParens(int $startPos, int $endPos) : bool {
-        return $this->haveTokenImmediativelyBefore($startPos, '(')
+        return $this->haveTokenImmediativelyBefore($startPos, \ord('('))
-            && $this->haveTokenImmediatelyAfter($endPos, ')');
+            && $this->haveTokenImmediatelyAfter($endPos, \ord(')'));
    }
    /**
@@ -46,8 +48,8 @@ class TokenStream
     * @return bool
     */
    public function haveBraces(int $startPos, int $endPos) : bool {
-        return $this->haveTokenImmediativelyBefore($startPos, '{')
+        return $this->haveTokenImmediativelyBefore($startPos, \ord('{'))
-            && $this->haveTokenImmediatelyAfter($endPos, '}');
+            && $this->haveTokenImmediatelyAfter($endPos, \ord('}'));
    }
    /**
@@ -64,7 +66,7 @@ class TokenStream
        $tokens = $this->tokens;
        $pos--;
        for (; $pos >= 0; $pos--) {
-            $tokenType = $tokens[$pos][0];
+            $tokenType = $tokens[$pos]->id;
            if ($tokenType === $expectedTokenType) {
                return true;
            }
@@ -90,7 +92,7 @@ class TokenStream
        $tokens = $this->tokens;
        $pos++;
        for (; $pos < \count($tokens); $pos++) {
-            $tokenType = $tokens[$pos][0];
+            $tokenType = $tokens[$pos]->id;
            if ($tokenType === $expectedTokenType) {
                return true;
            }
@@ -110,7 +112,7 @@ class TokenStream
            return $pos;
        }
-        if ($tokens[$pos][0] !== $skipTokenType) {
+        if ($tokens[$pos]->id !== $skipTokenType) {
            // Shouldn't happen. The skip token MUST be there
            throw new \Exception('Encountered unexpected token');
        }
@@ -127,7 +129,7 @@ class TokenStream
            return $pos;
        }
-        if ($tokens[$pos][0] !== $skipTokenType) {
+        if ($tokens[$pos]->id !== $skipTokenType) {
            // Shouldn't happen. The skip token MUST be there
            throw new \Exception('Encountered unexpected token');
        }
@@ -145,7 +147,7 @@ class TokenStream
    public function skipLeftWhitespace(int $pos) {
        $tokens = $this->tokens;
        for (; $pos >= 0; $pos--) {
-            $type = $tokens[$pos][0];
+            $type = $tokens[$pos]->id;
            if ($type !== \T_WHITESPACE && $type !== \T_COMMENT && $type !== \T_DOC_COMMENT) {
                break;
            }
@@ -162,7 +164,7 @@ class TokenStream
    public function skipRightWhitespace(int $pos) {
        $tokens = $this->tokens;
        for ($count = \count($tokens); $pos < $count; $pos++) {
-            $type = $tokens[$pos][0];
+            $type = $tokens[$pos]->id;
            if ($type !== \T_WHITESPACE && $type !== \T_COMMENT && $type !== \T_DOC_COMMENT) {
                break;
            }
@@ -173,7 +175,7 @@ class TokenStream
    public function findRight($pos, $findTokenType) {
        $tokens = $this->tokens;
        for ($count = \count($tokens); $pos < $count; $pos++) {
-            $type = $tokens[$pos][0];
+            $type = $tokens[$pos]->id;
            if ($type === $findTokenType) {
                return $pos;
            }
@@ -206,9 +208,8 @@ class TokenStream
        $result = '';
        for ($pos = $from; $pos < $to; $pos++) {
            $token = $tokens[$pos];
-            if (\is_array($token)) {
+            $type = $token->id;
-                $type = $token[0];
+            $content = $token->value;
                $content = $token[1];
            if ($type === \T_CONSTANT_ENCAPSED_STRING || $type === \T_ENCAPSED_AND_WHITESPACE) {
                $result .= $content;
            } else {
@@ -221,9 +222,6 @@ class TokenStream
                    $result .= $content;
                }
            }
            } else {
                $result .= $token;
            }
        }
        return $result;
    }
@@ -239,8 +237,8 @@ class TokenStream
        foreach ($this->tokens as $token) {
            $indentMap[] = $indent;
-            if ($token[0] === \T_WHITESPACE) {
+            if ($token->id === \T_WHITESPACE) {
-                $content = $token[1];
+                $content = $token->value;
                $newlinePos = \strrpos($content, "\n");
                if (false !== $newlinePos) {
                    $indent = \strlen($content) - $newlinePos - 1;
--- a/lib/PhpParser/Lexer.php
+++ b/lib/PhpParser/Lexer.php
@@ -6,99 +6,132 @@ use PhpParser\Parser\Tokens;
 class Lexer
 {
-    protected $code;
+    /** @var array Map from PHP tokens to PhpParser tokens. */
    protected $tokens;
    protected $pos;
    protected $line;
    protected $filePos;
    protected $prevCloseTagHasNewline;
    protected $tokenMap;
    protected $dropTokens;
    private $attributeStartLineUsed;
    private $attributeEndLineUsed;
    private $attributeStartTokenPosUsed;
    private $attributeEndTokenPosUsed;
    private $attributeStartFilePosUsed;
    private $attributeEndFilePosUsed;
    private $attributeCommentsUsed;
    /**
     * Creates a Lexer.
     *
-     * @param array $options Options array. Currently only the 'usedAttributes' option is supported,
+     * @param array $options Options array. Currently unused.
     *                       which is an array of attributes to add to the AST nodes. Possible
     *                       attributes are: 'comments', 'startLine', 'endLine', 'startTokenPos',
     *                       'endTokenPos', 'startFilePos', 'endFilePos'. The option defaults to the
     *                       first three. For more info see getNextToken() docs.
     */
    public function __construct(array $options = []) {
        // map from internal tokens to PhpParser tokens
        $this->tokenMap = $this->createTokenMap();
        // map of tokens to drop while lexing (the map is only used for isset lookup,
        // that's why the value is simply set to 1; the value is never actually used.)
        $this->dropTokens = array_fill_keys(
            [\T_WHITESPACE, \T_OPEN_TAG, \T_COMMENT, \T_DOC_COMMENT], 1
        );
        $defaultAttributes = ['comments', 'startLine', 'endLine'];
        $usedAttributes = array_fill_keys($options['usedAttributes'] ?? $defaultAttributes, true);
        // Create individual boolean properties to make these checks faster.
        $this->attributeStartLineUsed = isset($usedAttributes['startLine']);
        $this->attributeEndLineUsed = isset($usedAttributes['endLine']);
        $this->attributeStartTokenPosUsed = isset($usedAttributes['startTokenPos']);
        $this->attributeEndTokenPosUsed = isset($usedAttributes['endTokenPos']);
        $this->attributeStartFilePosUsed = isset($usedAttributes['startFilePos']);
        $this->attributeEndFilePosUsed = isset($usedAttributes['endFilePos']);
        $this->attributeCommentsUsed = isset($usedAttributes['comments']);
    }
    /**
-     * Initializes the lexer for lexing the provided source code.
+     * Get tokens IDs that should be ignored by the parser.
     *
     * @return array
     */
    public function getIgnorableTokens(): array {
        return [
            Tokens::T_WHITESPACE,
            Tokens::T_COMMENT,
            Tokens::T_DOC_COMMENT,
            Tokens::T_OPEN_TAG,
            Tokens::T_BAD_CHARACTER,
        ];
    }
    /**
     * Get map for token canonicalization.
     *
     * @return array
     */
    public function getCanonicalizationMap(): array {
        return [
            Tokens::T_OPEN_TAG_WITH_ECHO => Tokens::T_ECHO,
            Tokens::T_CLOSE_TAG => \ord(';'),
        ];
    }
    /**
     * Tokenizes the given PHP code into an array of Tokens.
     *
     * This function does not throw if lexing errors occur. Instead, errors may be retrieved using
     * the getErrors() method.
     *
-     * @param string $code The source code to lex
+     * @param string $code The source code to tokenize
     * @param ErrorHandler|null $errorHandler Error handler to use for lexing errors. Defaults to
     *                                        ErrorHandler\Throwing
     *
     * @return Token[] Sequence of tokens
     */
-    public function startLexing(string $code, ErrorHandler $errorHandler = null) {
+    public function tokenize(string $code, ErrorHandler $errorHandler = null) {
        if (null === $errorHandler) {
            $errorHandler = new ErrorHandler\Throwing();
        }
        $this->code = $code; // keep the code around for __halt_compiler() handling
        $this->pos  = -1;
        $this->line =  1;
        $this->filePos = 0;
        // If inline HTML occurs without preceding code, treat it as if it had a leading newline.
        // This ensures proper composability, because having a newline is the "safe" assumption.
        $this->prevCloseTagHasNewline = true;
        $scream = ini_set('xdebug.scream', '0');
        error_clear_last();
-        $this->tokens = @token_get_all($code);
+        $rawTokens = @token_get_all($code);
-        $this->handleErrors($errorHandler);
+        $checkForMissingTokens = null !== error_get_last();
        if (false !== $scream) {
            ini_set('xdebug.scream', $scream);
        }
        $tokens = [];
        $filePos = 0;
        $line = 1;
        foreach ($rawTokens as $rawToken) {
            if (\is_array($rawToken)) {
                $token = new Token($this->tokenMap[$rawToken[0]], $rawToken[1], $line, $filePos);
            } elseif (\strlen($rawToken) == 2) {
                // Bug in token_get_all() when lexing b".
                $token = new Token(\ord('"'), $rawToken, $line, $filePos);
            } else {
                $token = new Token(\ord($rawToken), $rawToken, $line, $filePos);
            }
-    private function handleInvalidCharacterRange($start, $end, $line, ErrorHandler $errorHandler) {
+            $value = $token->value;
            $tokenLen = \strlen($value);
            if ($checkForMissingTokens && substr($code, $filePos, $tokenLen) !== $value) {
                // Something is missing, must be an invalid character
                $nextFilePos = strpos($code, $value, $filePos);
                $badCharTokens = $this->handleInvalidCharacterRange(
                    $code, $filePos, $nextFilePos, $line, $errorHandler);
                $tokens = array_merge($tokens, $badCharTokens);
                $filePos = (int) $nextFilePos;
            }
            $tokens[] = $token;
            $filePos += $tokenLen;
            $line += substr_count($value, "\n");
        }
        if ($filePos !== \strlen($code)) {
            // Invalid characters at the end of the input
            $badCharTokens = $this->handleInvalidCharacterRange(
                $code, $filePos, \strlen($code), $line, $errorHandler);
            $tokens = array_merge($tokens, $badCharTokens);
        }
        if (\count($tokens) > 0) {
            // Check for unterminated comment
            $lastToken = $tokens[\count($tokens) - 1];
            if ($this->isUnterminatedComment($lastToken)) {
                $errorHandler->handleError(new Error('Unterminated comment', [
                    'startLine' => $line - substr_count($lastToken->value, "\n"),
                    'endLine' => $line,
                    'startFilePos' => $filePos - \strlen($lastToken->value),
                    'endFilePos' => $filePos,
                ]));
            }
        }
        // Add an EOF sentinel token
        // TODO: Should the value be an empty string instead?
        $tokens[] = new Token(0, "\0", $line, \strlen($code));
        return $tokens;
    }
    private function handleInvalidCharacterRange(
        string $code, int $start, int $end, int $line, ErrorHandler $errorHandler
    ) {
        $tokens = [];
        for ($i = $start; $i < $end; $i++) {
-            $chr = $this->code[$i];
+            $chr = $code[$i];
            if ($chr === 'b' || $chr === 'B') {
                // HHVM does not treat b" tokens correctly, so ignore these
                continue;
            }
            if ($chr === "\0") {
                // PHP cuts error message after null byte, so need special case
                $errorMsg = 'Unexpected null byte';
@@ -108,6 +141,7 @@ class Lexer
                );
            }
            $tokens[] = new Token(Tokens::T_BAD_CHARACTER, $chr, $line, $i);
            $errorHandler->handleError(new Error($errorMsg, [
                'startLine' => $line,
                'endLine' => $line,
@@ -115,275 +149,29 @@ class Lexer
                'endFilePos' => $i,
            ]));
        }
        return $tokens;
    }
-    /**
+    private function isUnterminatedComment(Token $token): bool {
-     * Check whether comment token is unterminated.
+        return ($token->id === \T_COMMENT || $token->id === \T_DOC_COMMENT)
-     *
+            && substr($token->value, 0, 2) === '/*'
-     * @return bool
+            && substr($token->value, -2) !== '*/';
     */
    private function isUnterminatedComment($token) : bool {
        return ($token[0] === \T_COMMENT || $token[0] === \T_DOC_COMMENT)
            && substr($token[1], 0, 2) === '/*'
            && substr($token[1], -2) !== '*/';
    }
-    /**
+    private function createTokenMap(): array {
     * Check whether an error *may* have occurred during tokenization.
     *
     * @return bool
     */
    private function errorMayHaveOccurred() : bool {
        if (defined('HHVM_VERSION')) {
            // In HHVM token_get_all() does not throw warnings, so we need to conservatively
            // assume that an error occurred
            return true;
        }
        return null !== error_get_last();
    }
    protected function handleErrors(ErrorHandler $errorHandler) {
        if (!$this->errorMayHaveOccurred()) {
            return;
        }
        // PHP's error handling for token_get_all() is rather bad, so if we want detailed
        // error information we need to compute it ourselves. Invalid character errors are
        // detected by finding "gaps" in the token array. Unterminated comments are detected
        // by checking if a trailing comment has a "*/" at the end.
        $filePos = 0;
        $line = 1;
        foreach ($this->tokens as $token) {
            $tokenValue = \is_string($token) ? $token : $token[1];
            $tokenLen = \strlen($tokenValue);
            if (substr($this->code, $filePos, $tokenLen) !== $tokenValue) {
                // Something is missing, must be an invalid character
                $nextFilePos = strpos($this->code, $tokenValue, $filePos);
                $this->handleInvalidCharacterRange(
                    $filePos, $nextFilePos, $line, $errorHandler);
                $filePos = (int) $nextFilePos;
            }
            $filePos += $tokenLen;
            $line += substr_count($tokenValue, "\n");
        }
        if ($filePos !== \strlen($this->code)) {
            if (substr($this->code, $filePos, 2) === '/*') {
                // Unlike PHP, HHVM will drop unterminated comments entirely
                $comment = substr($this->code, $filePos);
                $errorHandler->handleError(new Error('Unterminated comment', [
                    'startLine' => $line,
                    'endLine' => $line + substr_count($comment, "\n"),
                    'startFilePos' => $filePos,
                    'endFilePos' => $filePos + \strlen($comment),
                ]));
                // Emulate the PHP behavior
                $isDocComment = isset($comment[3]) && $comment[3] === '*';
                $this->tokens[] = [$isDocComment ? \T_DOC_COMMENT : \T_COMMENT, $comment, $line];
            } else {
                // Invalid characters at the end of the input
                $this->handleInvalidCharacterRange(
                    $filePos, \strlen($this->code), $line, $errorHandler);
            }
            return;
        }
        if (count($this->tokens) > 0) {
            // Check for unterminated comment
            $lastToken = $this->tokens[count($this->tokens) - 1];
            if ($this->isUnterminatedComment($lastToken)) {
                $errorHandler->handleError(new Error('Unterminated comment', [
                    'startLine' => $line - substr_count($lastToken[1], "\n"),
                    'endLine' => $line,
                    'startFilePos' => $filePos - \strlen($lastToken[1]),
                    'endFilePos' => $filePos,
                ]));
            }
        }
    }
    /**
     * Fetches the next token.
     *
     * The available attributes are determined by the 'usedAttributes' option, which can
     * be specified in the constructor. The following attributes are supported:
     *
     *  * 'comments'      => Array of PhpParser\Comment or PhpParser\Comment\Doc instances,
     *                       representing all comments that occurred between the previous
     *                       non-discarded token and the current one.
     *  * 'startLine'     => Line in which the node starts.
     *  * 'endLine'       => Line in which the node ends.
     *  * 'startTokenPos' => Offset into the token array of the first token in the node.
     *  * 'endTokenPos'   => Offset into the token array of the last token in the node.
     *  * 'startFilePos'  => Offset into the code string of the first character that is part of the node.
     *  * 'endFilePos'    => Offset into the code string of the last character that is part of the node.
     *
     * @param mixed $value           Variable to store token content in
     * @param mixed $startAttributes Variable to store start attributes in
     * @param mixed $endAttributes   Variable to store end attributes in
     *
     * @return int Token id
     */
    public function getNextToken(&$value = null, &$startAttributes = null, &$endAttributes = null) : int {
        $startAttributes = [];
        $endAttributes   = [];
        while (1) {
            if (isset($this->tokens[++$this->pos])) {
                $token = $this->tokens[$this->pos];
            } else {
                // EOF token with ID 0
                $token = "\0";
            }
            if ($this->attributeStartLineUsed) {
                $startAttributes['startLine'] = $this->line;
            }
            if ($this->attributeStartTokenPosUsed) {
                $startAttributes['startTokenPos'] = $this->pos;
            }
            if ($this->attributeStartFilePosUsed) {
                $startAttributes['startFilePos'] = $this->filePos;
            }
            if (\is_string($token)) {
                $value = $token;
                if (isset($token[1])) {
                    // bug in token_get_all
                    $this->filePos += 2;
                    $id = ord('"');
                } else {
                    $this->filePos += 1;
                    $id = ord($token);
                }
            } elseif (!isset($this->dropTokens[$token[0]])) {
                $value = $token[1];
                $id = $this->tokenMap[$token[0]];
                if (\T_CLOSE_TAG === $token[0]) {
                    $this->prevCloseTagHasNewline = false !== strpos($token[1], "\n");
                } elseif (\T_INLINE_HTML === $token[0]) {
                    $startAttributes['hasLeadingNewline'] = $this->prevCloseTagHasNewline;
                }
                $this->line += substr_count($value, "\n");
                $this->filePos += \strlen($value);
            } else {
                if (\T_COMMENT === $token[0] || \T_DOC_COMMENT === $token[0]) {
                    if ($this->attributeCommentsUsed) {
                        $comment = \T_DOC_COMMENT === $token[0]
                            ? new Comment\Doc($token[1], $this->line, $this->filePos, $this->pos)
                            : new Comment($token[1], $this->line, $this->filePos, $this->pos);
                        $startAttributes['comments'][] = $comment;
                    }
                }
                $this->line += substr_count($token[1], "\n");
                $this->filePos += \strlen($token[1]);
                continue;
            }
            if ($this->attributeEndLineUsed) {
                $endAttributes['endLine'] = $this->line;
            }
            if ($this->attributeEndTokenPosUsed) {
                $endAttributes['endTokenPos'] = $this->pos;
            }
            if ($this->attributeEndFilePosUsed) {
                $endAttributes['endFilePos'] = $this->filePos - 1;
            }
            return $id;
        }
        throw new \RuntimeException('Reached end of lexer loop');
    }
    /**
     * Returns the token array for current code.
     *
     * The token array is in the same format as provided by the
     * token_get_all() function and does not discard tokens (i.e.
     * whitespace and comments are included). The token position
     * attributes are against this token array.
     *
     * @return array Array of tokens in token_get_all() format
     */
    public function getTokens() : array {
        return $this->tokens;
    }
    /**
     * Handles __halt_compiler() by returning the text after it.
     *
     * @return string Remaining text
     */
    public function handleHaltCompiler() : string {
        // text after T_HALT_COMPILER, still including ();
        $textAfter = substr($this->code, $this->filePos);
        // ensure that it is followed by ();
        // this simplifies the situation, by not allowing any comments
        // in between of the tokens.
        if (!preg_match('~^\s*\(\s*\)\s*(?:;|\?>\r?\n?)~', $textAfter, $matches)) {
            throw new Error('__HALT_COMPILER must be followed by "();"');
        }
        // prevent the lexer from returning any further tokens
        $this->pos = count($this->tokens);
        // return with (); removed
        return substr($textAfter, strlen($matches[0]));
    }
    /**
     * Creates the token map.
     *
     * The token map maps the PHP internal token identifiers
     * to the identifiers used by the Parser. Additionally it
     * maps T_OPEN_TAG_WITH_ECHO to T_ECHO and T_CLOSE_TAG to ';'.
     *
     * @return array The token map
     */
    protected function createTokenMap() : array {
        $tokenMap = [];
        // 256 is the minimum possible token number, as everything below
        // it is an ASCII value
        for ($i = 256; $i < 1000; ++$i) {
-            if (\T_DOUBLE_COLON === $i) {
+            $name = token_name($i);
-                // T_DOUBLE_COLON is equivalent to T_PAAMAYIM_NEKUDOTAYIM
+            if ('UNKNOWN' === $name) {
-                $tokenMap[$i] = Tokens::T_PAAMAYIM_NEKUDOTAYIM;
+                continue;
            } elseif(\T_OPEN_TAG_WITH_ECHO === $i) {
                // T_OPEN_TAG_WITH_ECHO with dropped T_OPEN_TAG results in T_ECHO
                $tokenMap[$i] = Tokens::T_ECHO;
            } elseif(\T_CLOSE_TAG === $i) {
                // T_CLOSE_TAG is equivalent to ';'
                $tokenMap[$i] = ord(';');
            } elseif ('UNKNOWN' !== $name = token_name($i)) {
                if ('T_HASHBANG' === $name) {
                    // HHVM uses a special token for #! hashbang lines
                    $tokenMap[$i] = Tokens::T_INLINE_HTML;
                } elseif (defined($name = Tokens::class . '::' . $name)) {
                    // Other tokens can be mapped directly
                    $tokenMap[$i] = constant($name);
            }
            $constName = Tokens::class . '::' . $name;
            if (defined($constName)) {
                $tokenMap[$i] = constant($constName);
            }
        }
        // HHVM uses a special token for numbers that overflow to double
        if (defined('T_ONUMBER')) {
            $tokenMap[\T_ONUMBER] = Tokens::T_DNUMBER;
        }
        // HHVM also has a separate token for the __COMPILER_HALT_OFFSET__ constant
        if (defined('T_COMPILER_HALT_OFFSET')) {
            $tokenMap[\T_COMPILER_HALT_OFFSET] = Tokens::T_STRING;
        }
        return $tokenMap;
    }
 }
--- a/lib/PhpParser/Lexer/Emulative.php
+++ b/lib/PhpParser/Lexer/Emulative.php
@@ -7,7 +7,9 @@ use PhpParser\ErrorHandler;
 use PhpParser\Lexer;
 use PhpParser\Lexer\TokenEmulator\CoaleseEqualTokenEmulator;
 use PhpParser\Lexer\TokenEmulator\FnTokenEmulator;
 use PhpParser\Lexer\TokenEmulator\NumericLiteralSeparatorEmulator;
 use PhpParser\Lexer\TokenEmulator\TokenEmulatorInterface;
 use PhpParser\Parser\Tokens;
 class Emulative extends Lexer
 {
@@ -33,39 +35,25 @@ REGEX;
    {
        parent::__construct($options);
        // prepare token emulators
        $this->tokenEmulators[] = new FnTokenEmulator();
        $this->tokenEmulators[] = new CoaleseEqualTokenEmulator();
-
+        $this->tokenEmulators[] = new NumericLiteralSeparatorEmulator();
        // add emulated tokens here
        foreach ($this->tokenEmulators as $emulativeToken) {
            $this->tokenMap[$emulativeToken->getTokenId()] = $emulativeToken->getParserTokenId();
        }
    }
-    public function startLexing(string $code, ErrorHandler $errorHandler = null) {
+    public function tokenize(string $code, ErrorHandler $errorHandler = null) {
        $this->patches = [];
        if ($this->isEmulationNeeded($code) === false) {
            // Nothing to emulate, yay
-            parent::startLexing($code, $errorHandler);
+            return parent::tokenize($code, $errorHandler);
            return;
        }
        $collector = new ErrorHandler\Collecting();
        // 1. emulation of heredoc and nowdoc new syntax
        $preparedCode = $this->processHeredocNowdoc($code);
-        parent::startLexing($preparedCode, $collector);
+        $tokens = parent::tokenize($preparedCode, $collector);
-
+        $tokens = $this->fixupTokens($tokens);
        // add token emulation
        foreach ($this->tokenEmulators as $emulativeToken) {
            if ($emulativeToken->isEmulationNeeded($code)) {
                $this->tokens = $emulativeToken->emulate($code, $this->tokens);
            }
        }
        $this->fixupTokens();
        $errors = $collector->getErrors();
        if (!empty($errors)) {
@@ -74,6 +62,15 @@ REGEX;
                $errorHandler->handleError($error);
            }
        }
        // add token emulation
        foreach ($this->tokenEmulators as $emulativeToken) {
            if ($emulativeToken->isEmulationNeeded($code)) {
                $tokens = $emulativeToken->emulate($code, $tokens);
            }
        }
        return $tokens;
    }
    private function isHeredocNowdocEmulationNeeded(string $code): bool
@@ -142,10 +139,10 @@ REGEX;
        return $this->isHeredocNowdocEmulationNeeded($code);
    }
-    private function fixupTokens()
+    private function fixupTokens(array $tokens): array
    {
        if (\count($this->patches) === 0) {
-            return;
+            return $tokens;
        }
        // Load first patch
@@ -155,35 +152,29 @@ REGEX;
        // We use a manual loop over the tokens, because we modify the array on the fly
        $pos = 0;
-        for ($i = 0, $c = \count($this->tokens); $i < $c; $i++) {
+        for ($i = 0, $c = \count($tokens); $i < $c; $i++) {
-            $token = $this->tokens[$i];
+            $token = $tokens[$i];
-            if (\is_string($token)) {
+            $len = \strlen($token->value);
                // We assume that patches don't apply to string tokens
                $pos += \strlen($token);
                continue;
            }
            $len = \strlen($token[1]);
            $posDelta = 0;
            while ($patchPos >= $pos && $patchPos < $pos + $len) {
                $patchTextLen = \strlen($patchText);
                if ($patchType === 'remove') {
                    if ($patchPos === $pos && $patchTextLen === $len) {
                        // Remove token entirely
-                        array_splice($this->tokens, $i, 1, []);
+                        array_splice($tokens, $i, 1, []);
                        $i--;
                        $c--;
                    } else {
                        // Remove from token string
-                        $this->tokens[$i][1] = substr_replace(
+                        $tokens[$i]->value = substr_replace(
-                            $token[1], '', $patchPos - $pos + $posDelta, $patchTextLen
+                            $token->value, '', $patchPos - $pos + $posDelta, $patchTextLen
                        );
                        $posDelta -= $patchTextLen;
                    }
                } elseif ($patchType === 'add') {
                    // Insert into the token string
-                    $this->tokens[$i][1] = substr_replace(
+                    $tokens[$i]->value = substr_replace(
-                        $token[1], $patchText, $patchPos - $pos + $posDelta, 0
+                        $token->value, $patchText, $patchPos - $pos + $posDelta, 0
                    );
                    $posDelta += $patchTextLen;
                } else {
@@ -194,21 +185,30 @@ REGEX;
                $patchIdx++;
                if ($patchIdx >= \count($this->patches)) {
                    // No more patches, we're done
-                    return;
+                    break 2;
                }
                list($patchPos, $patchType, $patchText) = $this->patches[$patchIdx];
                // Multiple patches may apply to the same token. Reload the current one to check
                // If the new patch applies
-                $token = $this->tokens[$i];
+                $token = $tokens[$i];
            }
            $pos += $len;
        }
-        // A patch did not apply
+        // To retain a minimum amount of sanity, recompute lines and offsets in a separate loop.
-        assert(false);
+        $pos = 0;
        $line = 1;
        foreach ($tokens as $token) {
            $token->filePos = $pos;
            $token->line = $line;
            $pos += \strlen($token->value);
            $line += \substr_count($token->value, "\n");
        }
        return $tokens;
    }
    /**
--- a/lib/PhpParser/Lexer/TokenEmulator/CoaleseEqualTokenEmulator.php
+++ b/lib/PhpParser/Lexer/TokenEmulator/CoaleseEqualTokenEmulator.php
@@ -4,21 +4,10 @@ namespace PhpParser\Lexer\TokenEmulator;
 use PhpParser\Lexer\Emulative;
 use PhpParser\Parser\Tokens;
 use PhpParser\Token;
 final class CoaleseEqualTokenEmulator implements TokenEmulatorInterface
 {
    const T_COALESCE_EQUAL = 1007;
    public function getTokenId(): int
    {
        return self::T_COALESCE_EQUAL;
    }
    public function getParserTokenId(): int
    {
        return Tokens::T_COALESCE_EQUAL;
    }
    public function isEmulationNeeded(string $code) : bool
    {
        // skip version where this is supported
@@ -33,20 +22,17 @@ final class CoaleseEqualTokenEmulator implements TokenEmulatorInterface
    {
        // We need to manually iterate and manage a count because we'll change
        // the tokens array on the way
        $line = 1;
        for ($i = 0, $c = count($tokens); $i < $c; ++$i) {
            if (isset($tokens[$i + 1])) {
-                if ($tokens[$i][0] === T_COALESCE && $tokens[$i + 1] === '=') {
+                $token = $tokens[$i];
                if ($token->id === Tokens::T_COALESCE && $tokens[$i + 1]->value === '=') {
                    array_splice($tokens, $i, 2, [
-                        [self::T_COALESCE_EQUAL, '??=', $line]
+                        new Token(Tokens::T_COALESCE_EQUAL, '??=', $token->line, $token->filePos),
                    ]);
                    $c--;
                    continue;
                }
            }
            if (\is_array($tokens[$i])) {
                $line += substr_count($tokens[$i][1], "\n");
            }
        }
        return $tokens;
--- a/lib/PhpParser/Lexer/TokenEmulator/FnTokenEmulator.php
+++ b/lib/PhpParser/Lexer/TokenEmulator/FnTokenEmulator.php
@@ -4,21 +4,10 @@ namespace PhpParser\Lexer\TokenEmulator;
 use PhpParser\Lexer\Emulative;
 use PhpParser\Parser\Tokens;
 use PhpParser\Token;
 final class FnTokenEmulator implements TokenEmulatorInterface
 {
    const T_FN = 1008;
    public function getTokenId(): int
    {
        return self::T_FN;
    }
    public function getParserTokenId(): int
    {
        return Tokens::T_FN;
    }
    public function isEmulationNeeded(string $code) : bool
    {
        // skip version where this is supported
@@ -34,13 +23,14 @@ final class FnTokenEmulator implements TokenEmulatorInterface
        // We need to manually iterate and manage a count because we'll change
        // the tokens array on the way
        foreach ($tokens as $i => $token) {
-            if ($token[0] === T_STRING && $token[1] === 'fn') {
+            if ($token->id === Tokens::T_STRING && $token->value === 'fn') {
                $previousNonSpaceToken = $this->getPreviousNonSpaceToken($tokens, $i);
-                if ($previousNonSpaceToken !== null && $previousNonSpaceToken[0] === T_OBJECT_OPERATOR) {
+                if ($previousNonSpaceToken !== null
                        && $previousNonSpaceToken->id === Tokens::T_OBJECT_OPERATOR) {
                    continue;
                }
-                $tokens[$i][0] = self::T_FN;
+                $token->id = Tokens::T_FN;
            }
        }
@@ -48,13 +38,13 @@ final class FnTokenEmulator implements TokenEmulatorInterface
    }
    /**
-     * @param mixed[] $tokens
+     * @param Token[] $tokens
-     * @return mixed[]|null
+     * @return Token|null
     */
    private function getPreviousNonSpaceToken(array $tokens, int $start)
    {
        for ($i = $start - 1; $i >= 0; --$i) {
-            if ($tokens[$i][0] === T_WHITESPACE) {
+            if ($tokens[$i]->id === Tokens::T_WHITESPACE) {
                continue;
            }
--- a/lib/PhpParser/Lexer/TokenEmulator/NumericLiteralSeparatorEmulator.php
+++ b/lib/PhpParser/Lexer/TokenEmulator/NumericLiteralSeparatorEmulator.php
@@ -0,0 +1,100 @@
 <?php declare(strict_types=1);
 namespace PhpParser\Lexer\TokenEmulator;
 use PhpParser\Lexer\Emulative;
 use PhpParser\Parser\Tokens;
 use PhpParser\Token;
 final class NumericLiteralSeparatorEmulator implements TokenEmulatorInterface
 {
    const BIN = '(?:0b[01]+(?:_[01]+)*)';
    const HEX = '(?:0x[0-9a-f]+(?:_[0-9a-f]+)*)';
    const DEC = '(?:[0-9]+(?:_[0-9]+)*)';
    const SIMPLE_FLOAT = '(?:' . self::DEC . '\.' . self::DEC . '?|\.' . self::DEC . ')';
    const EXP = '(?:e[+-]?' . self::DEC . ')';
    const FLOAT = '(?:' . self::SIMPLE_FLOAT . self::EXP . '?|' . self::DEC . self::EXP . ')';
    const NUMBER = '~' . self::FLOAT . '|' . self::BIN . '|' . self::HEX . '|' . self::DEC . '~iA';
    public function isEmulationNeeded(string $code) : bool
    {
        // skip version where this is supported
        if (version_compare(\PHP_VERSION, Emulative::PHP_7_4, '>=')) {
            return false;
        }
        return preg_match('~[0-9a-f]_[0-9a-f]~i', $code) !== false;
    }
    /**
     * @param Token[] $tokens
     * @return Token[]
     */
    public function emulate(string $code, array $tokens): array
    {
        // We need to manually iterate and manage a count because we'll change
        // the tokens array on the way
        for ($i = 0, $c = count($tokens); $i < $c; ++$i) {
            $token = $tokens[$i];
            $tokenLen = \strlen($token->value);
            if ($token->id !== Tokens::T_LNUMBER && $token->id !== Tokens::T_DNUMBER) {
                continue;
            }
            $res = preg_match(self::NUMBER, $code, $matches, 0, $token->filePos);
            assert($res, "No number at number token position");
            $match = $matches[0];
            $matchLen = \strlen($match);
            if ($matchLen === $tokenLen) {
                // Original token already holds the full number.
                continue;
            }
            $tokenKind = $this->resolveIntegerOrFloatToken($match);
            $newTokens = [new Token($tokenKind, $match, $token->line, $token->filePos)];
            $numTokens = 1;
            $len = $tokenLen;
            while ($matchLen > $len) {
                $nextToken = $tokens[$i + $numTokens];
                $nextTokenLen = \strlen($nextToken->value);
                $numTokens++;
                if ($matchLen < $len + $nextTokenLen) {
                    // Split trailing characters into a partial token.
                    $partialText = substr($nextToken->value, $matchLen - $len);
                    $newTokens[] = new Token(
                        $nextToken->id, $partialText, $nextToken->line, $nextToken->filePos
                    );
                    break;
                }
                $len += $nextTokenLen;
            }
            array_splice($tokens, $i, $numTokens, $newTokens);
            $c -= $numTokens - \count($newTokens);
        }
        return $tokens;
    }
    private function resolveIntegerOrFloatToken(string $str): int
    {
        $str = str_replace('_', '', $str);
        if (stripos($str, '0b') === 0) {
            $num = bindec($str);
        } elseif (stripos($str, '0x') === 0) {
            $num = hexdec($str);
        } elseif (stripos($str, '0') === 0 && ctype_digit($str)) {
            $num = octdec($str);
        } else {
            $num = +$str;
        }
        return is_float($num) ? Tokens::T_DNUMBER : Tokens::T_LNUMBER;
    }
 }
--- a/lib/PhpParser/Lexer/TokenEmulator/TokenEmulatorInterface.php
+++ b/lib/PhpParser/Lexer/TokenEmulator/TokenEmulatorInterface.php
@@ -2,16 +2,16 @@
 namespace PhpParser\Lexer\TokenEmulator;
 use PhpParser\Token;
 /** @internal */
 interface TokenEmulatorInterface
 {
    public function getTokenId(): int;
    public function getParserTokenId(): int;
    public function isEmulationNeeded(string $code): bool;
    /**
-     * @return array Modified Tokens
+     * @param Token[] $tokens
     * @return Token[]
     */
    public function emulate(string $code, array $tokens): array;
 }
--- a/lib/PhpParser/Node/Scalar/DNumber.php
+++ b/lib/PhpParser/Node/Scalar/DNumber.php
@@ -34,6 +34,8 @@ class DNumber extends Scalar
     * @return float The parsed number
     */
    public static function parse(string $str) : float {
        $str = str_replace('_', '', $str);
        // if string contains any of .eE just cast it to float
        if (false !== strpbrk($str, '.eE')) {
            return (float) $str;
--- a/lib/PhpParser/Node/Scalar/LNumber.php
+++ b/lib/PhpParser/Node/Scalar/LNumber.php
@@ -41,6 +41,8 @@ class LNumber extends Scalar
     * @return LNumber The constructed LNumber, including kind attribute
     */
    public static function fromString(string $str, array $attributes = [], bool $allowInvalidOctal = false) : LNumber {
        $str = str_replace('_', '', $str);
        if ('0' !== $str[0] || '0' === $str) {
            $attributes['kind'] = LNumber::KIND_DEC;
            return new LNumber((int) $str, $attributes);
--- a/lib/PhpParser/NodeAbstract.php
+++ b/lib/PhpParser/NodeAbstract.php
@@ -4,8 +4,14 @@ namespace PhpParser;
 abstract class NodeAbstract implements Node, \JsonSerializable
 {
    // TODO: Kill.
    protected $attributes;
    /** @var FileContext|null  */
    protected $context = null;
    protected $startTokenPos = -1;
    protected $endTokenPos = -1;
    /**
     * Creates a Node.
     *
@@ -15,35 +21,42 @@ abstract class NodeAbstract implements Node, \JsonSerializable
        $this->attributes = $attributes;
    }
    public function setTokenContext(FileContext $context, int $firstToken, int $lastToken) {
        $this->context = $context;
        $this->startTokenPos = $firstToken;
        $this->endTokenPos = $lastToken;
    }
    /**
     * Gets line the node started in (alias of getStartLine).
     *
     * @return int Start line (or -1 if not available)
     */
    public function getLine() : int {
-        return $this->attributes['startLine'] ?? -1;
+        return $this->context->tokens[$this->startTokenPos]->line ?? -1;
    }
    /**
     * Gets line the node started in.
     *
     * Requires the 'startLine' attribute to be enabled in the lexer (enabled by default).
     *
     * @return int Start line (or -1 if not available)
     */
    public function getStartLine() : int {
-        return $this->attributes['startLine'] ?? -1;
+        return $this->context->tokens[$this->startTokenPos]->line ?? -1;
    }
    /**
     * Gets the line the node ended in.
     *
     * Requires the 'endLine' attribute to be enabled in the lexer (enabled by default).
     *
     * @return int End line (or -1 if not available)
     */
    public function getEndLine() : int {
-        return $this->attributes['endLine'] ?? -1;
+        if (!isset($this->context->tokens[$this->endTokenPos])) {
            return -1;
        }
        $token = $this->context->tokens[$this->endTokenPos];
        return $token->line + \substr_count($token, "\n");
    }
    /**
@@ -51,12 +64,10 @@ abstract class NodeAbstract implements Node, \JsonSerializable
     *
     * The offset is an index into the array returned by Lexer::getTokens().
     *
     * Requires the 'startTokenPos' attribute to be enabled in the lexer (DISABLED by default).
     *
     * @return int Token start position (or -1 if not available)
     */
    public function getStartTokenPos() : int {
-        return $this->attributes['startTokenPos'] ?? -1;
+        return $this->startTokenPos;
    }
    /**
@@ -64,34 +75,33 @@ abstract class NodeAbstract implements Node, \JsonSerializable
     *
     * The offset is an index into the array returned by Lexer::getTokens().
     *
     * Requires the 'endTokenPos' attribute to be enabled in the lexer (DISABLED by default).
     *
     * @return int Token end position (or -1 if not available)
     */
    public function getEndTokenPos() : int {
-        return $this->attributes['endTokenPos'] ?? -1;
+        return $this->endTokenPos;
    }
    /**
     * Gets the file offset of the first character that is part of this node.
     *
     * Requires the 'startFilePos' attribute to be enabled in the lexer (DISABLED by default).
     *
     * @return int File start position (or -1 if not available)
     */
    public function getStartFilePos() : int {
-        return $this->attributes['startFilePos'] ?? -1;
+        return $this->context->tokens[$this->startTokenPos]->filePos ?? -1;
    }
    /**
     * Gets the file offset of the last character that is part of this node.
     *
     * Requires the 'endFilePos' attribute to be enabled in the lexer (DISABLED by default).
     *
     * @return int File end position (or -1 if not available)
     */
    public function getEndFilePos() : int {
-        return $this->attributes['endFilePos'] ?? -1;
+        if (!isset($this->context->tokens[$this->endTokenPos])) {
            return -1;
        }
        $token = $this->context->tokens[$this->endTokenPos];
        return $token->filePos + \strlen($token->value) - 1;
    }
    /**
--- a/lib/PhpParser/NodeVisitor/NameResolver.php
+++ b/lib/PhpParser/NodeVisitor/NameResolver.php
@@ -91,6 +91,7 @@ class NameResolver extends NodeVisitorAbstract
            $this->resolveSignature($node);
        } elseif ($node instanceof Stmt\ClassMethod
                  || $node instanceof Expr\Closure
                  || $node instanceof Expr\ArrowFunction
        ) {
            $this->resolveSignature($node);
        } elseif ($node instanceof Stmt\Property) {
--- a/lib/PhpParser/PrettyPrinterAbstract.php
+++ b/lib/PhpParser/PrettyPrinterAbstract.php
@@ -491,7 +491,7 @@ abstract class PrettyPrinterAbstract
        $pos = 0;
        $result = $this->pArray($stmts, $origStmts, $pos, 0, 'File', 'stmts', null);
        if (null !== $result) {
-            $result .= $this->origTokens->getTokenCode($pos, count($origTokens), 0);
+            $result .= $this->origTokens->getTokenCode($pos, count($origTokens) - 1, 0);
        } else {
            // Fallback
            // TODO Add <?php properly
@@ -1213,8 +1213,8 @@ abstract class PrettyPrinterAbstract
        $stripLeft = ['left' => \T_WHITESPACE];
        $stripRight = ['right' => \T_WHITESPACE];
        $stripDoubleArrow = ['right' => \T_DOUBLE_ARROW];
-        $stripColon = ['left' => ':'];
+        $stripColon = ['left' => \ord(':')];
-        $stripEquals = ['left' => '='];
+        $stripEquals = ['left' => \ord('=')];
        $this->removalMap = [
            'Expr_ArrayDimFetch->dim' => $stripBoth,
            'Expr_ArrayItem->key' => $stripDoubleArrow,
@@ -1254,22 +1254,22 @@ abstract class PrettyPrinterAbstract
        // TODO: "yield" where both key and value are inserted doesn't work
        // [$find, $beforeToken, $extraLeft, $extraRight]
        $this->insertionMap = [
-            'Expr_ArrayDimFetch->dim' => ['[', false, null, null],
+            'Expr_ArrayDimFetch->dim' => [\ord('['), false, null, null],
            'Expr_ArrayItem->key' => [null, false, null, ' => '],
-            'Expr_ArrowFunction->returnType' => [')', false, ' : ', null],
+            'Expr_ArrowFunction->returnType' => [\ord(')'), false, ' : ', null],
-            'Expr_Closure->returnType' => [')', false, ' : ', null],
+            'Expr_Closure->returnType' => [\ord(')'), false, ' : ', null],
-            'Expr_Ternary->if' => ['?', false, ' ', ' '],
+            'Expr_Ternary->if' => [\ord('?'), false, ' ', ' '],
            'Expr_Yield->key' => [\T_YIELD, false, null, ' => '],
            'Expr_Yield->value' => [\T_YIELD, false, ' ', null],
            'Param->type' => [null, false, null, ' '],
            'Param->default' => [null, false, ' = ', null],
            'Stmt_Break->num' => [\T_BREAK, false, ' ', null],
-            'Stmt_ClassMethod->returnType' => [')', false, ' : ', null],
+            'Stmt_ClassMethod->returnType' => [\ord(')'), false, ' : ', null],
            'Stmt_Class->extends' => [null, false, ' extends ', null],
            'Expr_PrintableNewAnonClass->extends' => [null, ' extends ', null],
            'Stmt_Continue->num' => [\T_CONTINUE, false, ' ', null],
            'Stmt_Foreach->keyVar' => [\T_AS, false, null, ' => '],
-            'Stmt_Function->returnType' => [')', false, ' : ', null],
+            'Stmt_Function->returnType' => [\ord(')'), false, ' : ', null],
            'Stmt_If->else' => [null, false, ' ', null],
            'Stmt_Namespace->name' => [\T_NAMESPACE, false, ' ', null],
            'Stmt_Property->type' => [\T_VARIABLE, true, null, ' '],
@@ -1367,19 +1367,19 @@ abstract class PrettyPrinterAbstract
        // [$find, $extraLeft, $extraRight]
        $this->emptyListInsertionMap = [
-            'Expr_ArrowFunction->params' => ['(', '', ''],
+            'Expr_ArrowFunction->params' => [\ord('('), '', ''],
-            'Expr_Closure->uses' => [')', ' use(', ')'],
+            'Expr_Closure->uses' => [\ord(')'), ' use(', ')'],
-            'Expr_Closure->params' => ['(', '', ''],
+            'Expr_Closure->params' => [\ord('('), '', ''],
-            'Expr_FuncCall->args' => ['(', '', ''],
+            'Expr_FuncCall->args' => [\ord('('), '', ''],
-            'Expr_MethodCall->args' => ['(', '', ''],
+            'Expr_MethodCall->args' => [\ord('('), '', ''],
-            'Expr_New->args' => ['(', '', ''],
+            'Expr_New->args' => [\ord('('), '', ''],
-            'Expr_PrintableNewAnonClass->args' => ['(', '', ''],
+            'Expr_PrintableNewAnonClass->args' => [\ord('('), '', ''],
            'Expr_PrintableNewAnonClass->implements' => [null, ' implements ', ''],
-            'Expr_StaticCall->args' => ['(', '', ''],
+            'Expr_StaticCall->args' => [\ord('('), '', ''],
            'Stmt_Class->implements' => [null, ' implements ', ''],
-            'Stmt_ClassMethod->params' => ['(', '', ''],
+            'Stmt_ClassMethod->params' => [\ord('('), '', ''],
            'Stmt_Interface->extends' => [null, ' extends ', ''],
-            'Stmt_Function->params' => ['(', '', ''],
+            'Stmt_Function->params' => [\ord('('), '', ''],
            /* These cannot be empty to start with:
             * Expr_Isset->vars
--- a/lib/PhpParser/Token.php
+++ b/lib/PhpParser/Token.php
@@ -0,0 +1,21 @@
 <?php declare(strict_types=1);
 namespace PhpParser;
 class Token {
    /** @var int Token id (a PhpParser\Parser\Tokens::T_* constant) */
    public $id; // TODO: Move this to PhpParser\Tokens.
    /** @var string Textual value of the token */
    public $value;
    /** @var int Start line number of the token */
    public $line;
    /** @var int Offset of the token in the source code */
    public $filePos;
    public function __construct(int $id, string $value, int $line, int $filePos) {
        $this->id = $id;
        $this->value = $value;
        $this->line = $line;
        $this->filePos = $filePos;
    }
 }
--- a/test/PhpParser/Builder/InterfaceTest.php
+++ b/test/PhpParser/Builder/InterfaceTest.php
@@ -9,11 +9,8 @@ use PhpParser\Node\Stmt;
 class InterfaceTest extends \PHPUnit\Framework\TestCase
 {
-    /** @var Interface_ */
+    protected function createInterfaceBuilder() {
-    protected $builder;
+        return new Interface_('Contract');
    protected function setUp() {
        $this->builder = new Interface_('Contract');
    }
    private function dump($node) {
@@ -22,13 +19,14 @@ class InterfaceTest extends \PHPUnit\Framework\TestCase
    }
    public function testEmpty() {
-        $contract = $this->builder->getNode();
+        $contract = $this->createInterfaceBuilder()->getNode();
        $this->assertInstanceOf(Stmt\Interface_::class, $contract);
        $this->assertEquals(new Node\Identifier('Contract'), $contract->name);
    }
    public function testExtending() {
-        $contract = $this->builder->extend('Space\Root1', 'Root2')->getNode();
+        $contract = $this->createInterfaceBuilder()
            ->extend('Space\Root1', 'Root2')->getNode();
        $this->assertEquals(
            new Stmt\Interface_('Contract', [
                'extends' => [
@@ -41,7 +39,7 @@ class InterfaceTest extends \PHPUnit\Framework\TestCase
    public function testAddMethod() {
        $method = new Stmt\ClassMethod('doSomething');
-        $contract = $this->builder->addStmt($method)->getNode();
+        $contract = $this->createInterfaceBuilder()->addStmt($method)->getNode();
        $this->assertSame([$method], $contract->stmts);
    }
@@ -49,7 +47,7 @@ class InterfaceTest extends \PHPUnit\Framework\TestCase
        $const = new Stmt\ClassConst([
            new Node\Const_('SPEED_OF_LIGHT', new DNumber(299792458.0))
        ]);
-        $contract = $this->builder->addStmt($const)->getNode();
+        $contract = $this->createInterfaceBuilder()->addStmt($const)->getNode();
        $this->assertSame(299792458.0, $contract->stmts[0]->consts[0]->value->value);
    }
@@ -58,7 +56,7 @@ class InterfaceTest extends \PHPUnit\Framework\TestCase
            new Node\Const_('SPEED_OF_LIGHT', new DNumber(299792458))
        ]);
        $method = new Stmt\ClassMethod('doSomething');
-        $contract = $this->builder
+        $contract = $this->createInterfaceBuilder()
            ->addStmt($method)
            ->addStmt($const)
            ->getNode()
@@ -69,7 +67,7 @@ class InterfaceTest extends \PHPUnit\Framework\TestCase
    }
    public function testDocComment() {
-        $node = $this->builder
+        $node = $this->createInterfaceBuilder()
            ->setDocComment('/** Test */')
            ->getNode();
@@ -81,7 +79,7 @@ class InterfaceTest extends \PHPUnit\Framework\TestCase
    public function testInvalidStmtError() {
        $this->expectException(\LogicException::class);
        $this->expectExceptionMessage('Unexpected node of type "Stmt_PropertyProperty"');
-        $this->builder->addStmt(new Stmt\PropertyProperty('invalid'));
+        $this->createInterfaceBuilder()->addStmt(new Stmt\PropertyProperty('invalid'));
    }
    public function testFullFunctional() {
@@ -89,7 +87,7 @@ class InterfaceTest extends \PHPUnit\Framework\TestCase
            new Node\Const_('SPEED_OF_LIGHT', new DNumber(299792458))
        ]);
        $method = new Stmt\ClassMethod('doSomething');
-        $contract = $this->builder
+        $contract = $this->createInterfaceBuilder()
            ->addStmt($method)
            ->addStmt($const)
            ->getNode()
--- a/test/PhpParser/Lexer/EmulativeTest.php
+++ b/test/PhpParser/Lexer/EmulativeTest.php
@@ -5,6 +5,7 @@ namespace PhpParser\Lexer;
 use PhpParser\ErrorHandler;
 use PhpParser\LexerTest;
 use PhpParser\Parser\Tokens;
 use PhpParser\Token;
 class EmulativeTest extends LexerTest
 {
@@ -17,10 +18,15 @@ class EmulativeTest extends LexerTest
     */
    public function testReplaceKeywords($keyword, $expectedToken) {
        $lexer = $this->getLexer();
-        $lexer->startLexing('<?php ' . $keyword);
+        $tokens = $lexer->tokenize('<?php ' . $keyword);
-
+        $this->assertEquals(
-        $this->assertSame($expectedToken, $lexer->getNextToken());
+            [
-        $this->assertSame(0, $lexer->getNextToken());
+                new Token(Tokens::T_OPEN_TAG, '<?php ', 1, 0),
                new Token($expectedToken, $keyword, 1, 6),
                new Token(0, "\0", 1, 6 + strlen($keyword)),
            ],
            $tokens
        );
    }
    /**
@@ -28,11 +34,16 @@ class EmulativeTest extends LexerTest
     */
    public function testNoReplaceKeywordsAfterObjectOperator(string $keyword) {
        $lexer = $this->getLexer();
-        $lexer->startLexing('<?php ->' . $keyword);
+        $tokens = $lexer->tokenize('<?php ->' . $keyword);
-
+        $this->assertEquals(
-        $this->assertSame(Tokens::T_OBJECT_OPERATOR, $lexer->getNextToken());
+            [
-        $this->assertSame(Tokens::T_STRING, $lexer->getNextToken());
+                new Token(Tokens::T_OPEN_TAG, '<?php ', 1, 0),
-        $this->assertSame(0, $lexer->getNextToken());
+                new Token(Tokens::T_OBJECT_OPERATOR, '->', 1, 6),
                new Token(Tokens::T_STRING, $keyword, 1, 8),
                new Token(0, "\0", 1, 8 + strlen($keyword)),
            ],
            $tokens
        );
    }
    /**
@@ -40,11 +51,17 @@ class EmulativeTest extends LexerTest
     */
    public function testNoReplaceKeywordsAfterObjectOperatorWithSpaces(string $keyword) {
        $lexer = $this->getLexer();
-        $lexer->startLexing('<?php ->    ' . $keyword);
+        $tokens = $lexer->tokenize('<?php -> ' . $keyword);
-
+        $this->assertEquals(
-        $this->assertSame(Tokens::T_OBJECT_OPERATOR, $lexer->getNextToken());
+            [
-        $this->assertSame(Tokens::T_STRING, $lexer->getNextToken());
+                new Token(Tokens::T_OPEN_TAG, '<?php ', 1, 0),
-        $this->assertSame(0, $lexer->getNextToken());
+                new Token(Tokens::T_OBJECT_OPERATOR, '->', 1, 6),
                new Token(Tokens::T_WHITESPACE, ' ', 1, 8),
                new Token(Tokens::T_STRING, $keyword, 1, 9),
                new Token(0, "\0", 1, 9 + strlen($keyword)),
            ],
            $tokens
        );
    }
    public function provideTestReplaceKeywords() {
@@ -75,12 +92,12 @@ class EmulativeTest extends LexerTest
     */
    public function testLexNewFeatures($code, array $expectedTokens) {
        $lexer = $this->getLexer();
-        $lexer->startLexing('<?php ' . $code);
+        $tokens = $lexer->tokenize('<?php ' . $code);
-
+        // Drop <?php and EOF tokens.
-        $tokens = [];
+        $tokens = array_slice($tokens, 1, -1);
-        while (0 !== $token = $lexer->getNextToken($text)) {
+        $tokens = array_map(function(Token $token) {
-            $tokens[] = [$token, $text];
+            return [$token->id, $token->value];
-        }
+        }, $tokens);
        $this->assertSame($expectedTokens, $tokens);
    }
@@ -91,11 +108,16 @@ class EmulativeTest extends LexerTest
        $stringifiedToken = '"' . addcslashes($code, '"\\') . '"';
        $lexer = $this->getLexer();
-        $lexer->startLexing('<?php ' . $stringifiedToken);
+        $tokens = $lexer->tokenize('<?php ' . $stringifiedToken);
-
+        $this->assertEquals([
-        $this->assertSame(Tokens::T_CONSTANT_ENCAPSED_STRING, $lexer->getNextToken($text));
+            new Token(Tokens::T_OPEN_TAG, '<?php ', 1, 0),
-        $this->assertSame($stringifiedToken, $text);
+            new Token(Tokens::T_CONSTANT_ENCAPSED_STRING, $stringifiedToken, 1, 6),
-        $this->assertSame(0, $lexer->getNextToken());
+            new Token(
                0, "\0",
                1 + substr_count($stringifiedToken, "\n"),
                6 + strlen($stringifiedToken)
            ),
        ], $tokens);
    }
    /**
@@ -104,7 +126,7 @@ class EmulativeTest extends LexerTest
    public function testErrorAfterEmulation($code) {
        $errorHandler = new ErrorHandler\Collecting;
        $lexer = $this->getLexer();
-        $lexer->startLexing('<?php ' . $code . "\0", $errorHandler);
+        $lexer->tokenize('<?php ' . $code . "\0", $errorHandler);
        $errors = $errorHandler->getErrors();
        $this->assertCount(1, $errors);
@@ -123,10 +145,6 @@ class EmulativeTest extends LexerTest
    public function provideTestLexNewFeatures() {
        return [
            // PHP 7.4
            ['??=', [
                [Tokens::T_COALESCE_EQUAL, '??='],
            ]],
            ['yield from', [
                [Tokens::T_YIELD_FROM, 'yield from'],
            ]],
@@ -161,15 +179,17 @@ class EmulativeTest extends LexerTest
                [Tokens::T_START_HEREDOC, "<<<'NOWDOC'\n"],
                [Tokens::T_END_HEREDOC, 'NOWDOC'],
                [ord(';'), ';'],
                [Tokens::T_WHITESPACE, "\n"],
            ]],
            ["<<<'NOWDOC'\nFoobar\nNOWDOC;\n", [
                [Tokens::T_START_HEREDOC, "<<<'NOWDOC'\n"],
                [Tokens::T_ENCAPSED_AND_WHITESPACE, "Foobar\n"],
                [Tokens::T_END_HEREDOC, 'NOWDOC'],
                [ord(';'), ';'],
                [Tokens::T_WHITESPACE, "\n"],
            ]],
-            // Flexible heredoc/nowdoc
+            // PHP 7.3: Flexible heredoc/nowdoc
            ["<<<LABEL\nLABEL,", [
                [Tokens::T_START_HEREDOC, "<<<LABEL\n"],
                [Tokens::T_END_HEREDOC, "LABEL"],
@@ -198,12 +218,67 @@ class EmulativeTest extends LexerTest
                [Tokens::T_START_HEREDOC, "<<<LABEL\n"],
                [Tokens::T_ENCAPSED_AND_WHITESPACE, "LABELNOPE\n"],
                [Tokens::T_END_HEREDOC, "LABEL"],
                [Tokens::T_WHITESPACE, "\n"],
            ]],
            // Interpretation changed
            ["<<<LABEL\n    LABEL\nLABEL\n", [
                [Tokens::T_START_HEREDOC, "<<<LABEL\n"],
                [Tokens::T_END_HEREDOC, "    LABEL"],
                [Tokens::T_WHITESPACE, "\n"],
                [Tokens::T_STRING, "LABEL"],
                [Tokens::T_WHITESPACE, "\n"],
            ]],
            // PHP 7.4: Null coalesce equal
            ['??=', [
                [Tokens::T_COALESCE_EQUAL, '??='],
            ]],
            // PHP 7.4: Number literal separator
            ['1_000', [
                [Tokens::T_LNUMBER, '1_000'],
            ]],
            ['0xCAFE_F00D', [
                [Tokens::T_LNUMBER, '0xCAFE_F00D'],
            ]],
            ['0b0101_1111', [
                [Tokens::T_LNUMBER, '0b0101_1111'],
            ]],
            ['0137_041', [
                [Tokens::T_LNUMBER, '0137_041'],
            ]],
            ['1_000.0', [
                [Tokens::T_DNUMBER, '1_000.0'],
            ]],
            ['1_0.0', [
                [Tokens::T_DNUMBER, '1_0.0']
            ]],
            ['1_000_000_000.0', [
                [Tokens::T_DNUMBER, '1_000_000_000.0']
            ]],
            ['0e1_0', [
                [Tokens::T_DNUMBER, '0e1_0']
            ]],
            ['1_0e+10', [
                [Tokens::T_DNUMBER, '1_0e+10']
            ]],
            ['1_0e-10', [
                [Tokens::T_DNUMBER, '1_0e-10']
            ]],
            ['0b1011010101001010_110101010010_10101101010101_0101101011001_110111100', [
                [Tokens::T_DNUMBER, '0b1011010101001010_110101010010_10101101010101_0101101011001_110111100'],
            ]],
            ['0xFFFF_FFFF_FFFF_FFFF', [
                [Tokens::T_DNUMBER, '0xFFFF_FFFF_FFFF_FFFF'],
            ]],
            ['1_000+1', [
                [Tokens::T_LNUMBER, '1_000'],
                [ord('+'), '+'],
                [Tokens::T_LNUMBER, '1'],
            ]],
            ['1_0abc', [
                [Tokens::T_LNUMBER, '1_0'],
                [Tokens::T_STRING, 'abc'],
            ]],
        ];
    }
--- a/test/PhpParser/LexerTest.php
+++ b/test/PhpParser/LexerTest.php
@@ -11,19 +11,30 @@ class LexerTest extends \PHPUnit\Framework\TestCase
        return new Lexer($options);
    }
    public function testTokenize() {
        $code = '<?php "a";' . "\n" . '// foo' . "\n" . '"b";';
        $expectedTokens = [
            new Token(Tokens::T_OPEN_TAG, '<?php ', 1, 0),
            new Token(Tokens::T_CONSTANT_ENCAPSED_STRING, '"a"', 1, 6),
            new Token(\ord(';'), ';', 1, 9),
            new Token(Tokens::T_WHITESPACE, "\n", 1, 10),
            new Token(Tokens::T_COMMENT, '// foo' . "\n", 2, 11),
            new Token(Tokens::T_CONSTANT_ENCAPSED_STRING, '"b"', 3, 18),
            new Token(\ord(';'), ';', 3, 21),
            new Token(0, "\0", 3, 22),
        ];
        $lexer = $this->getLexer();
        $this->assertEquals($expectedTokens, $lexer->tokenize($code));
    }
    /**
     * @dataProvider provideTestError
     */
    public function testError($code, $messages) {
        if (defined('HHVM_VERSION')) {
            $this->markTestSkipped('HHVM does not throw warnings from token_get_all()');
        }
        $errorHandler = new ErrorHandler\Collecting();
-        $lexer = $this->getLexer(['usedAttributes' => [
+        $lexer = $this->getLexer();
-            'comments', 'startLine', 'endLine', 'startFilePos', 'endFilePos'
+        $lexer->tokenize($code, $errorHandler);
        ]]);
        $lexer->startLexing($code, $errorHandler);
        $errors = $errorHandler->getErrors();
        $this->assertCount(count($messages), $errors);
@@ -46,218 +57,4 @@ class LexerTest extends \PHPUnit\Framework\TestCase
            ]],
        ];
    }
    /**
     * @dataProvider provideTestLex
     */
    public function testLex($code, $options, $tokens) {
        $lexer = $this->getLexer($options);
        $lexer->startLexing($code);
        while ($id = $lexer->getNextToken($value, $startAttributes, $endAttributes)) {
            $token = array_shift($tokens);
            $this->assertSame($token[0], $id);
            $this->assertSame($token[1], $value);
            $this->assertEquals($token[2], $startAttributes);
            $this->assertEquals($token[3], $endAttributes);
        }
    }
    public function provideTestLex() {
        return [
            // tests conversion of closing PHP tag and drop of whitespace and opening tags
            [
                '<?php tokens ?>plaintext',
                [],
                [
                    [
                        Tokens::T_STRING, 'tokens',
                        ['startLine' => 1], ['endLine' => 1]
                    ],
                    [
                        ord(';'), '?>',
                        ['startLine' => 1], ['endLine' => 1]
                    ],
                    [
                        Tokens::T_INLINE_HTML, 'plaintext',
                        ['startLine' => 1, 'hasLeadingNewline' => false],
                        ['endLine' => 1]
                    ],
                ]
            ],
            // tests line numbers
            [
                '<?php' . "\n" . '$ token /** doc' . "\n" . 'comment */ $',
                [],
                [
                    [
                        ord('$'), '$',
                        ['startLine' => 2], ['endLine' => 2]
                    ],
                    [
                        Tokens::T_STRING, 'token',
                        ['startLine' => 2], ['endLine' => 2]
                    ],
                    [
                        ord('$'), '$',
                        [
                            'startLine' => 3,
                            'comments' => [
                                new Comment\Doc('/** doc' . "\n" . 'comment */', 2, 14, 5),
                            ]
                        ],
                        ['endLine' => 3]
                    ],
                ]
            ],
            // tests comment extraction
            [
                '<?php /* comment */ // comment' . "\n" . '/** docComment 1 *//** docComment 2 */ token',
                [],
                [
                    [
                        Tokens::T_STRING, 'token',
                        [
                            'startLine' => 2,
                            'comments' => [
                                new Comment('/* comment */', 1, 6, 1),
                                new Comment('// comment' . "\n", 1, 20, 3),
                                new Comment\Doc('/** docComment 1 */', 2, 31, 4),
                                new Comment\Doc('/** docComment 2 */', 2, 50, 5),
                            ],
                        ],
                        ['endLine' => 2]
                    ],
                ]
            ],
            // tests differing start and end line
            [
                '<?php "foo' . "\n" . 'bar"',
                [],
                [
                    [
                        Tokens::T_CONSTANT_ENCAPSED_STRING, '"foo' . "\n" . 'bar"',
                        ['startLine' => 1], ['endLine' => 2]
                    ],
                ]
            ],
            // tests exact file offsets
            [
                '<?php "a";' . "\n" . '// foo' . "\n" . '"b";',
                ['usedAttributes' => ['startFilePos', 'endFilePos']],
                [
                    [
                        Tokens::T_CONSTANT_ENCAPSED_STRING, '"a"',
                        ['startFilePos' => 6], ['endFilePos' => 8]
                    ],
                    [
                        ord(';'), ';',
                        ['startFilePos' => 9], ['endFilePos' => 9]
                    ],
                    [
                        Tokens::T_CONSTANT_ENCAPSED_STRING, '"b"',
                        ['startFilePos' => 18], ['endFilePos' => 20]
                    ],
                    [
                        ord(';'), ';',
                        ['startFilePos' => 21], ['endFilePos' => 21]
                    ],
                ]
            ],
            // tests token offsets
            [
                '<?php "a";' . "\n" . '// foo' . "\n" . '"b";',
                ['usedAttributes' => ['startTokenPos', 'endTokenPos']],
                [
                    [
                        Tokens::T_CONSTANT_ENCAPSED_STRING, '"a"',
                        ['startTokenPos' => 1], ['endTokenPos' => 1]
                    ],
                    [
                        ord(';'), ';',
                        ['startTokenPos' => 2], ['endTokenPos' => 2]
                    ],
                    [
                        Tokens::T_CONSTANT_ENCAPSED_STRING, '"b"',
                        ['startTokenPos' => 5], ['endTokenPos' => 5]
                    ],
                    [
                        ord(';'), ';',
                        ['startTokenPos' => 6], ['endTokenPos' => 6]
                    ],
                ]
            ],
            // tests all attributes being disabled
            [
                '<?php /* foo */ $bar;',
                ['usedAttributes' => []],
                [
                    [
                        Tokens::T_VARIABLE, '$bar',
                        [], []
                    ],
                    [
                        ord(';'), ';',
                        [], []
                    ]
                ]
            ],
            // tests no tokens
            [
                '',
                [],
                []
            ],
        ];
    }
    /**
     * @dataProvider provideTestHaltCompiler
     */
    public function testHandleHaltCompiler($code, $remaining) {
        $lexer = $this->getLexer();
        $lexer->startLexing($code);
        while (Tokens::T_HALT_COMPILER !== $lexer->getNextToken());
        $this->assertSame($remaining, $lexer->handleHaltCompiler());
        $this->assertSame(0, $lexer->getNextToken());
    }
    public function provideTestHaltCompiler() {
        return [
            ['<?php ... __halt_compiler();Remaining Text', 'Remaining Text'],
            ['<?php ... __halt_compiler ( ) ;Remaining Text', 'Remaining Text'],
            ['<?php ... __halt_compiler() ?>Remaining Text', 'Remaining Text'],
            //array('<?php ... __halt_compiler();' . "\0", "\0"),
            //array('<?php ... __halt_compiler /* */ ( ) ;Remaining Text', 'Remaining Text'),
        ];
    }
    public function testHandleHaltCompilerError() {
        $this->expectException(Error::class);
        $this->expectExceptionMessage('__HALT_COMPILER must be followed by "();"');
        $lexer = $this->getLexer();
        $lexer->startLexing('<?php ... __halt_compiler invalid ();');
        while (Tokens::T_HALT_COMPILER !== $lexer->getNextToken());
        $lexer->handleHaltCompiler();
    }
    public function testGetTokens() {
        $code = '<?php "a";' . "\n" . '// foo' . "\n" . '"b";';
        $expectedTokens = [
            [T_OPEN_TAG, '<?php ', 1],
            [T_CONSTANT_ENCAPSED_STRING, '"a"', 1],
            ';',
            [T_WHITESPACE, "\n", 1],
            [T_COMMENT, '// foo' . "\n", 2],
            [T_CONSTANT_ENCAPSED_STRING, '"b"', 3],
            ';',
        ];
        $lexer = $this->getLexer();
        $lexer->startLexing($code);
        $this->assertSame($expectedTokens, $lexer->getTokens());
    }
 }
--- a/test/PhpParser/NodeTraverserTest.php
+++ b/test/PhpParser/NodeTraverserTest.php
@@ -266,13 +266,17 @@ class NodeTraverserTest extends \PHPUnit\Framework\TestCase
        $traverser->addVisitor($visitor2);
        $traverser->addVisitor($visitor3);
        $getVisitors = (function () {
            return $this->visitors;
        })->bindTo($traverser, NodeTraverser::class);
        $preExpected = [$visitor1, $visitor2, $visitor3];
-        $this->assertAttributeSame($preExpected, 'visitors', $traverser, 'The appropriate visitors have not been added');
+        $this->assertSame($preExpected, $getVisitors());
        $traverser->removeVisitor($visitor2);
        $postExpected = [0 => $visitor1, 2 => $visitor3];
-        $this->assertAttributeSame($postExpected, 'visitors', $traverser, 'The appropriate visitors are not present after removal');
+        $this->assertSame($postExpected, $getVisitors());
    }
    public function testNoCloneNodes() {
--- a/test/PhpParser/NodeVisitor/NameResolverTest.php
+++ b/test/PhpParser/NodeVisitor/NameResolverTest.php
@@ -219,6 +219,10 @@ function(A $a) : A {};
 function fn3(?A $a) : ?A {}
 function fn4(?array $a) : ?array {}
 fn(array $a): array => $a;
 fn(A $a): A => $a;
 fn(?A $a): ?A => $a;
 A::b();
 A::$b;
 A::B;
@@ -263,6 +267,9 @@ function fn3(?\NS\A $a) : ?\NS\A
 function fn4(?array $a) : ?array
 {
 }
 fn(array $a): array => $a;
 fn(\NS\A $a): \NS\A => $a;
 fn(?\NS\A $a): ?\NS\A => $a;
 \NS\A::b();
 \NS\A::$b;
 \NS\A::B;
--- a/test/code/parser/errorHandling/lexerErrors.test
+++ b/test/code/parser/errorHandling/lexerErrors.test
@@ -32,24 +32,25 @@ $a = 42;
@@{ "\1" }@@
 $b = 24;
 -----
 !!positions
 Unexpected character "" (ASCII 1) from 4:1 to 4:1
 array(
-    0: Stmt_Expression(
+    0: Stmt_Expression[3:1 - 3:8](
-        expr: Expr_Assign(
+        expr: Expr_Assign[3:1 - 3:7](
-            var: Expr_Variable(
+            var: Expr_Variable[3:1 - 3:2](
                name: a
            )
-            expr: Scalar_LNumber(
+            expr: Scalar_LNumber[3:6 - 3:7](
                value: 42
            )
        )
    )
-    1: Stmt_Expression(
+    1: Stmt_Expression[5:1 - 5:8](
-        expr: Expr_Assign(
+        expr: Expr_Assign[5:1 - 5:7](
-            var: Expr_Variable(
+            var: Expr_Variable[5:1 - 5:2](
                name: b
            )
-            expr: Scalar_LNumber(
+            expr: Scalar_LNumber[5:6 - 5:7](
                value: 24
            )
        )
@@ -62,24 +63,25 @@ $a = 42;
@@{ "\0" }@@
 $b = 24;
 -----
 !!positions
 Unexpected null byte from 4:1 to 4:1
 array(
-    0: Stmt_Expression(
+    0: Stmt_Expression[3:1 - 3:8](
-        expr: Expr_Assign(
+        expr: Expr_Assign[3:1 - 3:7](
-            var: Expr_Variable(
+            var: Expr_Variable[3:1 - 3:2](
                name: a
            )
-            expr: Scalar_LNumber(
+            expr: Scalar_LNumber[3:6 - 3:7](
                value: 42
            )
        )
    )
-    1: Stmt_Expression(
+    1: Stmt_Expression[5:1 - 5:8](
-        expr: Expr_Assign(
+        expr: Expr_Assign[5:1 - 5:7](
-            var: Expr_Variable(
+            var: Expr_Variable[5:1 - 5:2](
                name: b
            )
-            expr: Scalar_LNumber(
+            expr: Scalar_LNumber[5:6 - 5:7](
                value: 24
            )
        )
@@ -94,35 +96,36 @@ $b = 2;
@@{ "\2" }@@
 $c = 3;
 -----
-Unexpected character "@@{ "\1" }@@" (ASCII 1) from 4:1 to 4:1
+!!positions
-Unexpected character "@@{ "\2" }@@" (ASCII 2) from 6:1 to 6:1
+Unexpected character "" (ASCII 1) from 4:1 to 4:1
 Unexpected character "" (ASCII 2) from 6:1 to 6:1
 array(
-    0: Stmt_Expression(
+    0: Stmt_Expression[3:1 - 3:7](
-        expr: Expr_Assign(
+        expr: Expr_Assign[3:1 - 3:6](
-            var: Expr_Variable(
+            var: Expr_Variable[3:1 - 3:2](
                name: a
            )
-            expr: Scalar_LNumber(
+            expr: Scalar_LNumber[3:6 - 3:6](
                value: 1
            )
        )
    )
-    1: Stmt_Expression(
+    1: Stmt_Expression[5:1 - 5:7](
-        expr: Expr_Assign(
+        expr: Expr_Assign[5:1 - 5:6](
-            var: Expr_Variable(
+            var: Expr_Variable[5:1 - 5:2](
                name: b
            )
-            expr: Scalar_LNumber(
+            expr: Scalar_LNumber[5:6 - 5:6](
                value: 2
            )
        )
    )
-    2: Stmt_Expression(
+    2: Stmt_Expression[7:1 - 7:7](
-        expr: Expr_Assign(
+        expr: Expr_Assign[7:1 - 7:6](
-            var: Expr_Variable(
+            var: Expr_Variable[7:1 - 7:2](
                name: c
            )
-            expr: Scalar_LNumber(
+            expr: Scalar_LNumber[7:6 - 7:6](
                value: 3
            )
        )
--- a/test/code/parser/scalar/numberSeparators.test
+++ b/test/code/parser/scalar/numberSeparators.test
@@ -0,0 +1,199 @@
 Different integer syntaxes
 -----
 <?php
 6.674_083e-11;
 299_792_458;
 0xCAFE_F00D;
 0b0101_1111;
 0137_041;
 // already a valid constant name
 _100;
 // syntax errors
 100_;
 1__1;
 1_.0;
 1._0;
 0x_123;
 0b_101;
 1_e2;
 1e_2;
 -----
 Syntax error, unexpected T_STRING from 13:4 to 13:4
 Syntax error, unexpected T_STRING from 14:2 to 14:4
 Syntax error, unexpected T_STRING from 15:2 to 15:2
 Syntax error, unexpected T_STRING from 16:3 to 16:4
 Syntax error, unexpected T_STRING from 17:2 to 17:6
 Syntax error, unexpected T_STRING from 18:2 to 18:6
 Syntax error, unexpected T_STRING from 19:2 to 19:4
 Syntax error, unexpected T_STRING from 20:2 to 20:4
 array(
    0: Stmt_Expression(
        expr: Scalar_DNumber(
            value: 6.674083E-11
        )
    )
    1: Stmt_Expression(
        expr: Scalar_LNumber(
            value: 299792458
        )
    )
    2: Stmt_Expression(
        expr: Scalar_LNumber(
            value: 3405705229
        )
    )
    3: Stmt_Expression(
        expr: Scalar_LNumber(
            value: 95
        )
    )
    4: Stmt_Expression(
        expr: Scalar_LNumber(
            value: 48673
        )
    )
    5: Stmt_Expression(
        expr: Expr_ConstFetch(
            name: Name(
                parts: array(
                    0: _100
                )
                comments: array(
                    0: // already a valid constant name
                )
            )
            comments: array(
                0: // already a valid constant name
            )
        )
        comments: array(
            0: // already a valid constant name
        )
    )
    6: Stmt_Expression(
        expr: Scalar_LNumber(
            value: 100
            comments: array(
                0: // syntax errors
            )
        )
        comments: array(
            0: // syntax errors
        )
    )
    7: Stmt_Expression(
        expr: Expr_ConstFetch(
            name: Name(
                parts: array(
                    0: _
                )
            )
        )
    )
    8: Stmt_Expression(
        expr: Scalar_LNumber(
            value: 1
        )
    )
    9: Stmt_Expression(
        expr: Expr_ConstFetch(
            name: Name(
                parts: array(
                    0: __1
                )
            )
        )
    )
    10: Stmt_Expression(
        expr: Scalar_LNumber(
            value: 1
        )
    )
    11: Stmt_Expression(
        expr: Expr_ConstFetch(
            name: Name(
                parts: array(
                    0: _
                )
            )
        )
    )
    12: Stmt_Expression(
        expr: Scalar_DNumber(
            value: 0
        )
    )
    13: Stmt_Expression(
        expr: Scalar_DNumber(
            value: 1
        )
    )
    14: Stmt_Expression(
        expr: Expr_ConstFetch(
            name: Name(
                parts: array(
                    0: _0
                )
            )
        )
    )
    15: Stmt_Expression(
        expr: Scalar_LNumber(
            value: 0
        )
    )
    16: Stmt_Expression(
        expr: Expr_ConstFetch(
            name: Name(
                parts: array(
                    0: x_123
                )
            )
        )
    )
    17: Stmt_Expression(
        expr: Scalar_LNumber(
            value: 0
        )
    )
    18: Stmt_Expression(
        expr: Expr_ConstFetch(
            name: Name(
                parts: array(
                    0: b_101
                )
            )
        )
    )
    19: Stmt_Expression(
        expr: Scalar_LNumber(
            value: 1
        )
    )
    20: Stmt_Expression(
        expr: Expr_ConstFetch(
            name: Name(
                parts: array(
                    0: _e2
                )
            )
        )
    )
    21: Stmt_Expression(
        expr: Scalar_LNumber(
            value: 1
        )
    )
    22: Stmt_Expression(
        expr: Expr_ConstFetch(
            name: Name(
                parts: array(
                    0: e_2
                )
            )
        )
    )
 )
--- a/test_old/run.php
+++ b/test_old/run.php
@@ -231,6 +231,8 @@ foreach (new RecursiveIteratorIterator(
        echo $file, ":\n    Parse failed with message: {$e->getMessage()}\n";
        ++$parseFail;
    } catch (Throwable $e) {
        echo $file, ":\n    Unknown error occurred: $e\n";
    }
 }
Author	SHA1	Message	Date
Nikita Popov	1626c19d98	Update lexer implementations and tests	2019-06-30 23:07:22 +02:00
Nikita Popov	f52785a2b8	Start on lexer refactoring	2019-06-30 19:31:22 +02:00
Nikita Popov	a21a614737	WIP	2019-06-30 17:21:55 +02:00
Nikita Popov	6f74784e16	Switch to a normalized token representation Each token is now represented by a Token object.	2019-06-30 14:14:24 +02:00
Tomáš Votruba	3f718ee2c3	[PHP 7.4] Add support for numeric literal separators (#615 ) Implements RFC https://wiki.php.net/rfc/numeric_literal_separator. Closes #614.	2019-06-30 12:13:28 +02:00
Nikita Popov	b9b45dd2bc	Insert T_BAD_CHARACTER tokens for missing characters The token stream should cover all characters in the original code, insert a dummy token for missing illegal characters. We should really be doing this in token_get_all() as well.	2019-06-30 11:43:48 +02:00
Chris Hewitt	a4b43edb03	Fix one-character inconsistency	2019-06-30 09:25:26 +02:00
Nikita Popov	3cf61fdd26	Only ignore-platform-reqs on nightly	2019-06-23 15:11:05 +02:00
Nikita Popov	9484baf8f8	Make compatible with PhpUnit 8	2019-06-23 15:03:40 +02:00
Nikita Popov	aad0e2896f	Remove token registration from TokenEmulator interface	2019-06-23 14:50:14 +02:00
hoels	624f71fa6f	Resolve return type of arrow functions (#613 )	2019-06-04 16:25:12 +02:00