diff --git a/lib/PhpParser/Lexer/Emulative.php b/lib/PhpParser/Lexer/Emulative.php index ac473ad9..864bffa9 100644 --- a/lib/PhpParser/Lexer/Emulative.php +++ b/lib/PhpParser/Lexer/Emulative.php @@ -7,6 +7,7 @@ use PhpParser\ErrorHandler; use PhpParser\Lexer; use PhpParser\Lexer\TokenEmulator\CoaleseEqualTokenEmulator; use PhpParser\Lexer\TokenEmulator\FnTokenEmulator; +use PhpParser\Lexer\TokenEmulator\NumericLiteralSeparatorEmulator; use PhpParser\Lexer\TokenEmulator\TokenEmulatorInterface; use PhpParser\Parser\Tokens; @@ -39,6 +40,7 @@ REGEX; $this->tokenEmulators[] = new FnTokenEmulator(); $this->tokenEmulators[] = new CoaleseEqualTokenEmulator(); + $this->tokenEmulators[] = new NumericLiteralSeparatorEmulator(); $this->tokenMap[self::T_COALESCE_EQUAL] = Tokens::T_COALESCE_EQUAL; $this->tokenMap[self::T_FN] = Tokens::T_FN; @@ -58,14 +60,6 @@ REGEX; // 1. emulation of heredoc and nowdoc new syntax $preparedCode = $this->processHeredocNowdoc($code); parent::startLexing($preparedCode, $collector); - - // add token emulation - foreach ($this->tokenEmulators as $emulativeToken) { - if ($emulativeToken->isEmulationNeeded($code)) { - $this->tokens = $emulativeToken->emulate($code, $this->tokens); - } - } - $this->fixupTokens(); $errors = $collector->getErrors(); @@ -75,6 +69,13 @@ REGEX; $errorHandler->handleError($error); } } + + // add token emulation + foreach ($this->tokenEmulators as $emulativeToken) { + if ($emulativeToken->isEmulationNeeded($code)) { + $this->tokens = $emulativeToken->emulate($code, $this->tokens); + } + } } private function isHeredocNowdocEmulationNeeded(string $code): bool diff --git a/lib/PhpParser/Lexer/TokenEmulator/NumericLiteralSeparatorEmulator.php b/lib/PhpParser/Lexer/TokenEmulator/NumericLiteralSeparatorEmulator.php new file mode 100644 index 00000000..f564c594 --- /dev/null +++ b/lib/PhpParser/Lexer/TokenEmulator/NumericLiteralSeparatorEmulator.php @@ -0,0 +1,98 @@ +=')) { + return false; + } + + return preg_match('~[0-9a-f]_[0-9a-f]~i', $code) !== false; + } + + public function emulate(string $code, array $tokens): array + { + // We need to manually iterate and manage a count because we'll change + // the tokens array on the way + $codeOffset = 0; + for ($i = 0, $c = count($tokens); $i < $c; ++$i) { + $token = $tokens[$i]; + $tokenLen = \strlen(\is_array($token) ? $token[1] : $token); + + if ($token[0] !== T_LNUMBER && $token[0] !== T_DNUMBER) { + $codeOffset += $tokenLen; + continue; + } + + $res = preg_match(self::NUMBER, $code, $matches, 0, $codeOffset); + assert($res, "No number at number token position"); + + $match = $matches[0]; + $matchLen = \strlen($match); + if ($matchLen === $tokenLen) { + // Original token already holds the full number. + $codeOffset += $tokenLen; + continue; + } + + $tokenKind = $this->resolveIntegerOrFloatToken($match); + $newTokens = [[$tokenKind, $match, $token[2]]]; + + $numTokens = 1; + $len = $tokenLen; + while ($matchLen > $len) { + $nextToken = $tokens[$i + $numTokens]; + $nextTokenText = \is_array($nextToken) ? $nextToken[1] : $nextToken; + $nextTokenLen = \strlen($nextTokenText); + + $numTokens++; + if ($matchLen < $len + $nextTokenLen) { + // Split trailing characters into a partial token. + assert(is_array($nextToken), "Partial token should be an array token"); + $partialText = substr($nextTokenText, $matchLen - $len); + $newTokens[] = [$nextToken[0], $partialText, $nextToken[2]]; + break; + } + + $len += $nextTokenLen; + } + + array_splice($tokens, $i, $numTokens, $newTokens); + $c -= $numTokens - \count($newTokens); + $codeOffset += $matchLen; + } + + return $tokens; + } + + private function resolveIntegerOrFloatToken(string $str): int + { + $str = str_replace('_', '', $str); + + if (stripos($str, '0b') === 0) { + $num = bindec($str); + } elseif (stripos($str, '0x') === 0) { + $num = hexdec($str); + } elseif (stripos($str, '0') === 0 && ctype_digit($str)) { + $num = octdec($str); + } else { + $num = +$str; + } + + return is_float($num) ? T_DNUMBER : T_LNUMBER; + } +} diff --git a/lib/PhpParser/Node/Scalar/DNumber.php b/lib/PhpParser/Node/Scalar/DNumber.php index 7b7ae378..29ce0dd4 100644 --- a/lib/PhpParser/Node/Scalar/DNumber.php +++ b/lib/PhpParser/Node/Scalar/DNumber.php @@ -34,6 +34,8 @@ class DNumber extends Scalar * @return float The parsed number */ public static function parse(string $str) : float { + $str = str_replace('_', '', $str); + // if string contains any of .eE just cast it to float if (false !== strpbrk($str, '.eE')) { return (float) $str; diff --git a/lib/PhpParser/Node/Scalar/LNumber.php b/lib/PhpParser/Node/Scalar/LNumber.php index 5c5aca3f..b3394354 100644 --- a/lib/PhpParser/Node/Scalar/LNumber.php +++ b/lib/PhpParser/Node/Scalar/LNumber.php @@ -41,6 +41,8 @@ class LNumber extends Scalar * @return LNumber The constructed LNumber, including kind attribute */ public static function fromString(string $str, array $attributes = [], bool $allowInvalidOctal = false) : LNumber { + $str = str_replace('_', '', $str); + if ('0' !== $str[0] || '0' === $str) { $attributes['kind'] = LNumber::KIND_DEC; return new LNumber((int) $str, $attributes); diff --git a/test/PhpParser/Lexer/EmulativeTest.php b/test/PhpParser/Lexer/EmulativeTest.php index a53c379b..2a39beac 100644 --- a/test/PhpParser/Lexer/EmulativeTest.php +++ b/test/PhpParser/Lexer/EmulativeTest.php @@ -123,10 +123,6 @@ class EmulativeTest extends LexerTest public function provideTestLexNewFeatures() { return [ - // PHP 7.4 - ['??=', [ - [Tokens::T_COALESCE_EQUAL, '??='], - ]], ['yield from', [ [Tokens::T_YIELD_FROM, 'yield from'], ]], @@ -169,7 +165,7 @@ class EmulativeTest extends LexerTest [ord(';'), ';'], ]], - // Flexible heredoc/nowdoc + // PHP 7.3: Flexible heredoc/nowdoc ["<<getMessage()}\n"; ++$parseFail; + } catch (Throwable $e) { + echo $file, ":\n Unknown error occurred: $e\n"; } }