Remove emulation for unsupported PHP versions

This commit is contained in:
Nikita Popov 2023-08-16 21:09:51 +02:00
parent ee3e7db3fc
commit 3c0432b09d
8 changed files with 2 additions and 272 deletions

View File

@ -128,9 +128,6 @@ class TokenPolyfill {
$line = 1;
$pos = 0;
$origTokens = \token_get_all($code, $flags);
if (\PHP_VERSION_ID < 70400) {
$origTokens = self::fixupBadCharacters($code, $origTokens);
}
$numTokens = \count($origTokens);
for ($i = 0; $i < $numTokens; $i++) {
@ -217,38 +214,6 @@ class TokenPolyfill {
return $tokens;
}
/**
* Prior to PHP 7.4, token_get_all() simply dropped invalid characters from the token stream.
* Detect such cases and replace them with T_BAD_CHARACTER.
*/
private static function fixupBadCharacters(string $code, array $origTokens): array {
$newTokens = [];
$pos = 0;
foreach ($origTokens as $token) {
$text = \is_string($token) ? $token : $token[1];
$len = \strlen($text);
if (substr($code, $pos, $len) !== $text) {
$nextPos = strpos($code, $text, $pos);
for ($i = $pos; $i < $nextPos; $i++) {
// Don't bother including the line, we're not going to use it anyway.
$newTokens[] = [\T_BAD_CHARACTER, $code[$i]];
}
$pos = $nextPos;
}
$pos += $len;
$newTokens[] = $token;
}
// Handle trailing invalid characters.
$codeLen = \strlen($code);
if ($pos !== $codeLen) {
for ($i = $pos; $i < $codeLen; $i++) {
$newTokens[] = [\T_BAD_CHARACTER, $code[$i]];
}
}
return $newTokens;
}
/** Initialize private static state needed by tokenize(). */
private static function init(): void {
if (isset(self::$identifierTokens)) {

View File

@ -40,11 +40,7 @@ class Emulative extends Lexer {
$this->hostPhpVersion = PhpVersion::getHostVersion();
$emulators = [
new FlexibleDocStringEmulator(),
new FnTokenEmulator(),
new MatchTokenEmulator(),
new CoaleseEqualTokenEmulator(),
new NumericLiteralSeparatorEmulator(),
new NullsafeTokenEmulator(),
new AttributeEmulator(),
new EnumTokenEmulator(),

View File

@ -1,40 +0,0 @@
<?php declare(strict_types=1);
namespace PhpParser\Lexer\TokenEmulator;
use PhpParser\PhpVersion;
use PhpParser\Token;
final class CoaleseEqualTokenEmulator extends TokenEmulator {
public function getPhpVersion(): PhpVersion {
return PhpVersion::fromComponents(7, 4);
}
public function isEmulationNeeded(string $code): bool {
return strpos($code, '??=') !== false;
}
public function emulate(string $code, array $tokens): array {
// We need to manually iterate and manage a count because we'll change
// the tokens array on the way
for ($i = 0, $c = count($tokens); $i < $c; ++$i) {
$token = $tokens[$i];
if (isset($tokens[$i + 1])) {
if ($token->id === T_COALESCE && $tokens[$i + 1]->text === '=') {
array_splice($tokens, $i, 2, [
new Token(\T_COALESCE_EQUAL, '??=', $token->line, $token->pos),
]);
$c--;
continue;
}
}
}
return $tokens;
}
public function reverseEmulate(string $code, array $tokens): array {
// ??= was not valid code previously, don't bother.
return $tokens;
}
}

View File

@ -1,71 +0,0 @@
<?php declare(strict_types=1);
namespace PhpParser\Lexer\TokenEmulator;
use PhpParser\PhpVersion;
final class FlexibleDocStringEmulator extends TokenEmulator {
private const FLEXIBLE_DOC_STRING_REGEX = <<<'REGEX'
/<<<[ \t]*(['"]?)([a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*)\1\r?\n
(?:.*\r?\n)*?
(?<indentation>\h*)\2(?![a-zA-Z0-9_\x80-\xff])(?<separator>(?:;?[\r\n])?)/x
REGEX;
public function getPhpVersion(): PhpVersion {
return PhpVersion::fromComponents(7, 3);
}
public function isEmulationNeeded(string $code): bool {
return strpos($code, '<<<') !== false;
}
public function emulate(string $code, array $tokens): array {
// Handled by preprocessing + fixup.
return $tokens;
}
public function reverseEmulate(string $code, array $tokens): array {
// Not supported.
return $tokens;
}
public function preprocessCode(string $code, array &$patches): string {
if (!preg_match_all(self::FLEXIBLE_DOC_STRING_REGEX, $code, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE)) {
// No heredoc/nowdoc found
return $code;
}
// Keep track of how much we need to adjust string offsets due to the modifications we
// already made
$posDelta = 0;
foreach ($matches as $match) {
$indentation = $match['indentation'][0];
$indentationStart = $match['indentation'][1];
$separator = $match['separator'][0];
$separatorStart = $match['separator'][1];
if ($indentation === '' && $separator !== '') {
// Ordinary heredoc/nowdoc
continue;
}
if ($indentation !== '') {
// Remove indentation
$indentationLen = strlen($indentation);
$code = substr_replace($code, '', $indentationStart + $posDelta, $indentationLen);
$patches[] = [$indentationStart + $posDelta, 'add', $indentation];
$posDelta -= $indentationLen;
}
if ($separator === '') {
// Insert newline as separator
$code = substr_replace($code, "\n", $separatorStart + $posDelta, 0);
$patches[] = [$separatorStart + $posDelta, 'remove', "\n"];
$posDelta += 1;
}
}
return $code;
}
}

View File

@ -1,19 +0,0 @@
<?php declare(strict_types=1);
namespace PhpParser\Lexer\TokenEmulator;
use PhpParser\PhpVersion;
final class FnTokenEmulator extends KeywordEmulator {
public function getPhpVersion(): PhpVersion {
return PhpVersion::fromComponents(7, 4);
}
public function getKeywordString(): string {
return 'fn';
}
public function getKeywordToken(): int {
return \T_FN;
}
}

View File

@ -1,95 +0,0 @@
<?php declare(strict_types=1);
namespace PhpParser\Lexer\TokenEmulator;
use PhpParser\PhpVersion;
use PhpParser\Token;
final class NumericLiteralSeparatorEmulator extends TokenEmulator {
private const BIN = '(?:0b[01]+(?:_[01]+)*)';
private const HEX = '(?:0x[0-9a-f]+(?:_[0-9a-f]+)*)';
private const DEC = '(?:[0-9]+(?:_[0-9]+)*)';
private const SIMPLE_FLOAT = '(?:' . self::DEC . '\.' . self::DEC . '?|\.' . self::DEC . ')';
private const EXP = '(?:e[+-]?' . self::DEC . ')';
private const FLOAT = '(?:' . self::SIMPLE_FLOAT . self::EXP . '?|' . self::DEC . self::EXP . ')';
private const NUMBER = '~' . self::FLOAT . '|' . self::BIN . '|' . self::HEX . '|' . self::DEC . '~iA';
public function getPhpVersion(): PhpVersion {
return PhpVersion::fromComponents(7, 4);
}
public function isEmulationNeeded(string $code): bool {
return preg_match('~[0-9]_[0-9]~', $code)
|| preg_match('~0x[0-9a-f]+_[0-9a-f]~i', $code);
}
public function emulate(string $code, array $tokens): array {
// We need to manually iterate and manage a count because we'll change
// the tokens array on the way
for ($i = 0, $c = count($tokens); $i < $c; ++$i) {
$token = $tokens[$i];
$tokenLen = \strlen($token->text);
if ($token->id !== \T_LNUMBER && $token->id !== \T_DNUMBER) {
continue;
}
$res = preg_match(self::NUMBER, $code, $matches, 0, $token->pos);
assert($res, "No number at number token position");
$match = $matches[0];
$matchLen = \strlen($match);
if ($matchLen === $tokenLen) {
// Original token already holds the full number.
continue;
}
$tokenKind = $this->resolveIntegerOrFloatToken($match);
$newTokens = [new Token($tokenKind, $match, $token->line, $token->pos)];
$numTokens = 1;
$len = $tokenLen;
while ($matchLen > $len) {
$nextToken = $tokens[$i + $numTokens];
$nextTokenText = $nextToken->text;
$nextTokenLen = \strlen($nextTokenText);
$numTokens++;
if ($matchLen < $len + $nextTokenLen) {
// Split trailing characters into a partial token.
$partialText = substr($nextTokenText, $matchLen - $len);
$newTokens[] = new Token($nextToken->id, $partialText, $nextToken->line, $nextToken->pos);
break;
}
$len += $nextTokenLen;
}
array_splice($tokens, $i, $numTokens, $newTokens);
$c -= $numTokens - \count($newTokens);
}
return $tokens;
}
private function resolveIntegerOrFloatToken(string $str): int {
$str = str_replace('_', '', $str);
if (stripos($str, '0b') === 0) {
$num = bindec($str);
} elseif (stripos($str, '0x') === 0) {
$num = hexdec($str);
} elseif (stripos($str, '0') === 0 && ctype_digit($str)) {
$num = octdec($str);
} else {
$num = +$str;
}
return is_float($num) ? T_DNUMBER : T_LNUMBER;
}
public function reverseEmulate(string $code, array $tokens): array {
// Numeric separators were not legal code previously, don't bother.
return $tokens;
}
}

View File

@ -5,10 +5,6 @@ namespace PhpParser;
if (!\function_exists('PhpParser\defineCompatibilityTokens')) {
function defineCompatibilityTokens(): void {
$compatTokens = [
// PHP 7.4
'T_BAD_CHARACTER',
'T_FN',
'T_COALESCE_EQUAL',
// PHP 8.0
'T_NAME_QUALIFIED',
'T_NAME_FULLY_QUALIFIED',

View File

@ -406,10 +406,8 @@ class EmulativeTest extends LexerTest {
['8.0', 'match', [[\T_MATCH, 'match']]],
['7.4', 'match', [[\T_STRING, 'match']]],
// Keywords are not case-sensitive.
['7.4', 'fn', [[\T_FN, 'fn']]],
['7.4', 'FN', [[\T_FN, 'FN']]],
['7.3', 'fn', [[\T_STRING, 'fn']]],
['7.3', 'FN', [[\T_STRING, 'FN']]],
['8.0', 'MATCH', [[\T_MATCH, 'MATCH']]],
['7.4', 'MATCH', [[\T_STRING, 'MATCH']]],
// Tested here to skip testLeaveStuffAloneInStrings.
['8.0', '"$foo?->bar"', [
[ord('"'), '"'],