mirror of
https://github.com/nikic/PHP-Parser.git
synced 2025-06-05 03:24:50 +02:00
Remove emulation for unsupported PHP versions
This commit is contained in:
parent
ee3e7db3fc
commit
3c0432b09d
@ -128,9 +128,6 @@ class TokenPolyfill {
|
||||
$line = 1;
|
||||
$pos = 0;
|
||||
$origTokens = \token_get_all($code, $flags);
|
||||
if (\PHP_VERSION_ID < 70400) {
|
||||
$origTokens = self::fixupBadCharacters($code, $origTokens);
|
||||
}
|
||||
|
||||
$numTokens = \count($origTokens);
|
||||
for ($i = 0; $i < $numTokens; $i++) {
|
||||
@ -217,38 +214,6 @@ class TokenPolyfill {
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prior to PHP 7.4, token_get_all() simply dropped invalid characters from the token stream.
|
||||
* Detect such cases and replace them with T_BAD_CHARACTER.
|
||||
*/
|
||||
private static function fixupBadCharacters(string $code, array $origTokens): array {
|
||||
$newTokens = [];
|
||||
$pos = 0;
|
||||
foreach ($origTokens as $token) {
|
||||
$text = \is_string($token) ? $token : $token[1];
|
||||
$len = \strlen($text);
|
||||
if (substr($code, $pos, $len) !== $text) {
|
||||
$nextPos = strpos($code, $text, $pos);
|
||||
for ($i = $pos; $i < $nextPos; $i++) {
|
||||
// Don't bother including the line, we're not going to use it anyway.
|
||||
$newTokens[] = [\T_BAD_CHARACTER, $code[$i]];
|
||||
}
|
||||
$pos = $nextPos;
|
||||
}
|
||||
$pos += $len;
|
||||
$newTokens[] = $token;
|
||||
}
|
||||
|
||||
// Handle trailing invalid characters.
|
||||
$codeLen = \strlen($code);
|
||||
if ($pos !== $codeLen) {
|
||||
for ($i = $pos; $i < $codeLen; $i++) {
|
||||
$newTokens[] = [\T_BAD_CHARACTER, $code[$i]];
|
||||
}
|
||||
}
|
||||
return $newTokens;
|
||||
}
|
||||
|
||||
/** Initialize private static state needed by tokenize(). */
|
||||
private static function init(): void {
|
||||
if (isset(self::$identifierTokens)) {
|
||||
|
@ -40,11 +40,7 @@ class Emulative extends Lexer {
|
||||
$this->hostPhpVersion = PhpVersion::getHostVersion();
|
||||
|
||||
$emulators = [
|
||||
new FlexibleDocStringEmulator(),
|
||||
new FnTokenEmulator(),
|
||||
new MatchTokenEmulator(),
|
||||
new CoaleseEqualTokenEmulator(),
|
||||
new NumericLiteralSeparatorEmulator(),
|
||||
new NullsafeTokenEmulator(),
|
||||
new AttributeEmulator(),
|
||||
new EnumTokenEmulator(),
|
||||
|
@ -1,40 +0,0 @@
|
||||
<?php declare(strict_types=1);
|
||||
|
||||
namespace PhpParser\Lexer\TokenEmulator;
|
||||
|
||||
use PhpParser\PhpVersion;
|
||||
use PhpParser\Token;
|
||||
|
||||
final class CoaleseEqualTokenEmulator extends TokenEmulator {
|
||||
public function getPhpVersion(): PhpVersion {
|
||||
return PhpVersion::fromComponents(7, 4);
|
||||
}
|
||||
|
||||
public function isEmulationNeeded(string $code): bool {
|
||||
return strpos($code, '??=') !== false;
|
||||
}
|
||||
|
||||
public function emulate(string $code, array $tokens): array {
|
||||
// We need to manually iterate and manage a count because we'll change
|
||||
// the tokens array on the way
|
||||
for ($i = 0, $c = count($tokens); $i < $c; ++$i) {
|
||||
$token = $tokens[$i];
|
||||
if (isset($tokens[$i + 1])) {
|
||||
if ($token->id === T_COALESCE && $tokens[$i + 1]->text === '=') {
|
||||
array_splice($tokens, $i, 2, [
|
||||
new Token(\T_COALESCE_EQUAL, '??=', $token->line, $token->pos),
|
||||
]);
|
||||
$c--;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
public function reverseEmulate(string $code, array $tokens): array {
|
||||
// ??= was not valid code previously, don't bother.
|
||||
return $tokens;
|
||||
}
|
||||
}
|
@ -1,71 +0,0 @@
|
||||
<?php declare(strict_types=1);
|
||||
|
||||
namespace PhpParser\Lexer\TokenEmulator;
|
||||
|
||||
use PhpParser\PhpVersion;
|
||||
|
||||
final class FlexibleDocStringEmulator extends TokenEmulator {
|
||||
private const FLEXIBLE_DOC_STRING_REGEX = <<<'REGEX'
|
||||
/<<<[ \t]*(['"]?)([a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*)\1\r?\n
|
||||
(?:.*\r?\n)*?
|
||||
(?<indentation>\h*)\2(?![a-zA-Z0-9_\x80-\xff])(?<separator>(?:;?[\r\n])?)/x
|
||||
REGEX;
|
||||
|
||||
public function getPhpVersion(): PhpVersion {
|
||||
return PhpVersion::fromComponents(7, 3);
|
||||
}
|
||||
|
||||
public function isEmulationNeeded(string $code): bool {
|
||||
return strpos($code, '<<<') !== false;
|
||||
}
|
||||
|
||||
public function emulate(string $code, array $tokens): array {
|
||||
// Handled by preprocessing + fixup.
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
public function reverseEmulate(string $code, array $tokens): array {
|
||||
// Not supported.
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
public function preprocessCode(string $code, array &$patches): string {
|
||||
if (!preg_match_all(self::FLEXIBLE_DOC_STRING_REGEX, $code, $matches, PREG_SET_ORDER|PREG_OFFSET_CAPTURE)) {
|
||||
// No heredoc/nowdoc found
|
||||
return $code;
|
||||
}
|
||||
|
||||
// Keep track of how much we need to adjust string offsets due to the modifications we
|
||||
// already made
|
||||
$posDelta = 0;
|
||||
foreach ($matches as $match) {
|
||||
$indentation = $match['indentation'][0];
|
||||
$indentationStart = $match['indentation'][1];
|
||||
|
||||
$separator = $match['separator'][0];
|
||||
$separatorStart = $match['separator'][1];
|
||||
|
||||
if ($indentation === '' && $separator !== '') {
|
||||
// Ordinary heredoc/nowdoc
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($indentation !== '') {
|
||||
// Remove indentation
|
||||
$indentationLen = strlen($indentation);
|
||||
$code = substr_replace($code, '', $indentationStart + $posDelta, $indentationLen);
|
||||
$patches[] = [$indentationStart + $posDelta, 'add', $indentation];
|
||||
$posDelta -= $indentationLen;
|
||||
}
|
||||
|
||||
if ($separator === '') {
|
||||
// Insert newline as separator
|
||||
$code = substr_replace($code, "\n", $separatorStart + $posDelta, 0);
|
||||
$patches[] = [$separatorStart + $posDelta, 'remove', "\n"];
|
||||
$posDelta += 1;
|
||||
}
|
||||
}
|
||||
|
||||
return $code;
|
||||
}
|
||||
}
|
@ -1,19 +0,0 @@
|
||||
<?php declare(strict_types=1);
|
||||
|
||||
namespace PhpParser\Lexer\TokenEmulator;
|
||||
|
||||
use PhpParser\PhpVersion;
|
||||
|
||||
final class FnTokenEmulator extends KeywordEmulator {
|
||||
public function getPhpVersion(): PhpVersion {
|
||||
return PhpVersion::fromComponents(7, 4);
|
||||
}
|
||||
|
||||
public function getKeywordString(): string {
|
||||
return 'fn';
|
||||
}
|
||||
|
||||
public function getKeywordToken(): int {
|
||||
return \T_FN;
|
||||
}
|
||||
}
|
@ -1,95 +0,0 @@
|
||||
<?php declare(strict_types=1);
|
||||
|
||||
namespace PhpParser\Lexer\TokenEmulator;
|
||||
|
||||
use PhpParser\PhpVersion;
|
||||
use PhpParser\Token;
|
||||
|
||||
final class NumericLiteralSeparatorEmulator extends TokenEmulator {
|
||||
private const BIN = '(?:0b[01]+(?:_[01]+)*)';
|
||||
private const HEX = '(?:0x[0-9a-f]+(?:_[0-9a-f]+)*)';
|
||||
private const DEC = '(?:[0-9]+(?:_[0-9]+)*)';
|
||||
private const SIMPLE_FLOAT = '(?:' . self::DEC . '\.' . self::DEC . '?|\.' . self::DEC . ')';
|
||||
private const EXP = '(?:e[+-]?' . self::DEC . ')';
|
||||
private const FLOAT = '(?:' . self::SIMPLE_FLOAT . self::EXP . '?|' . self::DEC . self::EXP . ')';
|
||||
private const NUMBER = '~' . self::FLOAT . '|' . self::BIN . '|' . self::HEX . '|' . self::DEC . '~iA';
|
||||
|
||||
public function getPhpVersion(): PhpVersion {
|
||||
return PhpVersion::fromComponents(7, 4);
|
||||
}
|
||||
|
||||
public function isEmulationNeeded(string $code): bool {
|
||||
return preg_match('~[0-9]_[0-9]~', $code)
|
||||
|| preg_match('~0x[0-9a-f]+_[0-9a-f]~i', $code);
|
||||
}
|
||||
|
||||
public function emulate(string $code, array $tokens): array {
|
||||
// We need to manually iterate and manage a count because we'll change
|
||||
// the tokens array on the way
|
||||
for ($i = 0, $c = count($tokens); $i < $c; ++$i) {
|
||||
$token = $tokens[$i];
|
||||
$tokenLen = \strlen($token->text);
|
||||
|
||||
if ($token->id !== \T_LNUMBER && $token->id !== \T_DNUMBER) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$res = preg_match(self::NUMBER, $code, $matches, 0, $token->pos);
|
||||
assert($res, "No number at number token position");
|
||||
|
||||
$match = $matches[0];
|
||||
$matchLen = \strlen($match);
|
||||
if ($matchLen === $tokenLen) {
|
||||
// Original token already holds the full number.
|
||||
continue;
|
||||
}
|
||||
|
||||
$tokenKind = $this->resolveIntegerOrFloatToken($match);
|
||||
$newTokens = [new Token($tokenKind, $match, $token->line, $token->pos)];
|
||||
|
||||
$numTokens = 1;
|
||||
$len = $tokenLen;
|
||||
while ($matchLen > $len) {
|
||||
$nextToken = $tokens[$i + $numTokens];
|
||||
$nextTokenText = $nextToken->text;
|
||||
$nextTokenLen = \strlen($nextTokenText);
|
||||
|
||||
$numTokens++;
|
||||
if ($matchLen < $len + $nextTokenLen) {
|
||||
// Split trailing characters into a partial token.
|
||||
$partialText = substr($nextTokenText, $matchLen - $len);
|
||||
$newTokens[] = new Token($nextToken->id, $partialText, $nextToken->line, $nextToken->pos);
|
||||
break;
|
||||
}
|
||||
|
||||
$len += $nextTokenLen;
|
||||
}
|
||||
|
||||
array_splice($tokens, $i, $numTokens, $newTokens);
|
||||
$c -= $numTokens - \count($newTokens);
|
||||
}
|
||||
|
||||
return $tokens;
|
||||
}
|
||||
|
||||
private function resolveIntegerOrFloatToken(string $str): int {
|
||||
$str = str_replace('_', '', $str);
|
||||
|
||||
if (stripos($str, '0b') === 0) {
|
||||
$num = bindec($str);
|
||||
} elseif (stripos($str, '0x') === 0) {
|
||||
$num = hexdec($str);
|
||||
} elseif (stripos($str, '0') === 0 && ctype_digit($str)) {
|
||||
$num = octdec($str);
|
||||
} else {
|
||||
$num = +$str;
|
||||
}
|
||||
|
||||
return is_float($num) ? T_DNUMBER : T_LNUMBER;
|
||||
}
|
||||
|
||||
public function reverseEmulate(string $code, array $tokens): array {
|
||||
// Numeric separators were not legal code previously, don't bother.
|
||||
return $tokens;
|
||||
}
|
||||
}
|
@ -5,10 +5,6 @@ namespace PhpParser;
|
||||
if (!\function_exists('PhpParser\defineCompatibilityTokens')) {
|
||||
function defineCompatibilityTokens(): void {
|
||||
$compatTokens = [
|
||||
// PHP 7.4
|
||||
'T_BAD_CHARACTER',
|
||||
'T_FN',
|
||||
'T_COALESCE_EQUAL',
|
||||
// PHP 8.0
|
||||
'T_NAME_QUALIFIED',
|
||||
'T_NAME_FULLY_QUALIFIED',
|
||||
|
@ -406,10 +406,8 @@ class EmulativeTest extends LexerTest {
|
||||
['8.0', 'match', [[\T_MATCH, 'match']]],
|
||||
['7.4', 'match', [[\T_STRING, 'match']]],
|
||||
// Keywords are not case-sensitive.
|
||||
['7.4', 'fn', [[\T_FN, 'fn']]],
|
||||
['7.4', 'FN', [[\T_FN, 'FN']]],
|
||||
['7.3', 'fn', [[\T_STRING, 'fn']]],
|
||||
['7.3', 'FN', [[\T_STRING, 'FN']]],
|
||||
['8.0', 'MATCH', [[\T_MATCH, 'MATCH']]],
|
||||
['7.4', 'MATCH', [[\T_STRING, 'MATCH']]],
|
||||
// Tested here to skip testLeaveStuffAloneInStrings.
|
||||
['8.0', '"$foo?->bar"', [
|
||||
[ord('"'), '"'],
|
||||
|
Loading…
x
Reference in New Issue
Block a user