mirror of
https://github.com/nikic/PHP-Parser.git
synced 2025-01-17 15:18:17 +01:00
616be1d0fc
Earlier releases come with incorrect tokenizer data for T_POW and T_POW_EQUAL.
182 lines
6.7 KiB
PHP
182 lines
6.7 KiB
PHP
<?php
|
|
|
|
namespace PhpParser\Lexer;
|
|
|
|
use PhpParser\Parser;
|
|
|
|
/**
|
|
* ATTENTION: This code is WRITE-ONLY. Do not try to read it.
|
|
*/
|
|
class Emulative extends \PhpParser\Lexer
|
|
{
|
|
protected $newKeywords;
|
|
protected $inObjectAccess;
|
|
|
|
const T_ELLIPSIS = 1001;
|
|
const T_POW = 1002;
|
|
const T_POW_EQUAL = 1003;
|
|
|
|
const PHP_5_6 = '5.6.0rc1';
|
|
const PHP_5_5 = '5.5.0beta1';
|
|
const PHP_5_4 = '5.4.0beta1';
|
|
|
|
public function __construct() {
|
|
parent::__construct();
|
|
|
|
$newKeywordsPerVersion = array(
|
|
self::PHP_5_5 => array(
|
|
'finally' => Parser::T_FINALLY,
|
|
'yield' => Parser::T_YIELD,
|
|
),
|
|
self::PHP_5_4 => array(
|
|
'callable' => Parser::T_CALLABLE,
|
|
'insteadof' => Parser::T_INSTEADOF,
|
|
'trait' => Parser::T_TRAIT,
|
|
'__trait__' => Parser::T_TRAIT_C,
|
|
),
|
|
);
|
|
|
|
$this->newKeywords = array();
|
|
foreach ($newKeywordsPerVersion as $version => $newKeywords) {
|
|
if (version_compare(PHP_VERSION, $version, '>=')) {
|
|
break;
|
|
}
|
|
|
|
$this->newKeywords += $newKeywords;
|
|
}
|
|
|
|
if (version_compare(PHP_VERSION, self::PHP_5_6, '<')) {
|
|
$this->tokenMap[self::T_ELLIPSIS] = Parser::T_ELLIPSIS;
|
|
$this->tokenMap[self::T_POW] = Parser::T_POW;
|
|
$this->tokenMap[self::T_POW_EQUAL] = Parser::T_POW_EQUAL;
|
|
}
|
|
}
|
|
|
|
public function startLexing($code) {
|
|
$this->inObjectAccess = false;
|
|
|
|
$preprocessedCode = $this->preprocessCode($code);
|
|
parent::startLexing($preprocessedCode);
|
|
if ($preprocessedCode !== $code) {
|
|
$this->postprocessTokens();
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Replaces new features in the code by ~__EMU__{NAME}__{DATA}__~ sequences.
|
|
* ~LABEL~ is never valid PHP code, that's why we can (to some degree) safely
|
|
* use it here.
|
|
* Later when preprocessing the tokens these sequences will either be replaced
|
|
* by real tokens or replaced with their original content (e.g. if they occurred
|
|
* inside a string, i.e. a place where they don't have a special meaning).
|
|
*/
|
|
protected function preprocessCode($code) {
|
|
if (version_compare(PHP_VERSION, self::PHP_5_6, '>=')) {
|
|
return $code;
|
|
}
|
|
|
|
$code = str_replace('...', '~__EMU__ELLIPSIS__~', $code);
|
|
$code = preg_replace('((?<!/)\*\*=)', '~__EMU__POWEQUAL__~', $code);
|
|
$code = preg_replace('((?<!/)\*\*(?!/))', '~__EMU__POW__~', $code);
|
|
|
|
if (version_compare(PHP_VERSION, self::PHP_5_4, '>=')) {
|
|
return $code;
|
|
}
|
|
|
|
// binary notation (0b010101101001...)
|
|
return preg_replace('(\b0b[01]+\b)', '~__EMU__BINARY__$0__~', $code);
|
|
}
|
|
|
|
/*
|
|
* Replaces the ~__EMU__...~ sequences with real tokens or their original
|
|
* value.
|
|
*/
|
|
protected function postprocessTokens() {
|
|
// we need to manually iterate and manage a count because we'll change
|
|
// the tokens array on the way
|
|
for ($i = 0, $c = count($this->tokens); $i < $c; ++$i) {
|
|
// first check that the following tokens are of form ~LABEL~,
|
|
// then match the __EMU__... sequence.
|
|
if ('~' === $this->tokens[$i]
|
|
&& isset($this->tokens[$i + 2])
|
|
&& '~' === $this->tokens[$i + 2]
|
|
&& T_STRING === $this->tokens[$i + 1][0]
|
|
&& preg_match('(^__EMU__([A-Z]++)__(?:([A-Za-z0-9]++)__)?$)', $this->tokens[$i + 1][1], $matches)
|
|
) {
|
|
if ('BINARY' === $matches[1]) {
|
|
// the binary number can either be an integer or a double, so return a LNUMBER
|
|
// or DNUMBER respectively
|
|
$replace = array(
|
|
array(is_int(bindec($matches[2])) ? T_LNUMBER : T_DNUMBER, $matches[2], $this->tokens[$i + 1][2])
|
|
);
|
|
} else if ('ELLIPSIS' === $matches[1]) {
|
|
$replace = array(
|
|
array(self::T_ELLIPSIS, '...', $this->tokens[$i + 1][2])
|
|
);
|
|
} else if ('POW' === $matches[1]) {
|
|
$replace = array(
|
|
array(self::T_POW, '**', $this->tokens[$i + 1][2])
|
|
);
|
|
} else if ('POWEQUAL' === $matches[1]) {
|
|
$replace = array(
|
|
array(self::T_POW_EQUAL, '**=', $this->tokens[$i + 1][2])
|
|
);
|
|
} else {
|
|
// just ignore all other __EMU__ sequences
|
|
continue;
|
|
}
|
|
|
|
array_splice($this->tokens, $i, 3, $replace);
|
|
$c -= 3 - count($replace);
|
|
// for multichar tokens (e.g. strings) replace any ~__EMU__...~ sequences
|
|
// in their content with the original character sequence
|
|
} elseif (is_array($this->tokens[$i])
|
|
&& 0 !== strpos($this->tokens[$i][1], '__EMU__')
|
|
) {
|
|
$this->tokens[$i][1] = preg_replace_callback(
|
|
'(~__EMU__([A-Z]++)__(?:([A-Za-z0-9]++)__)?~)',
|
|
array($this, 'restoreContentCallback'),
|
|
$this->tokens[$i][1]
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* This method is a callback for restoring EMU sequences in
|
|
* multichar tokens (like strings) to their original value.
|
|
*/
|
|
public function restoreContentCallback(array $matches) {
|
|
if ('BINARY' === $matches[1]) {
|
|
return $matches[2];
|
|
} else if ('ELLIPSIS' === $matches[1]) {
|
|
return '...';
|
|
} else if ('POW' === $matches[1]) {
|
|
return '**';
|
|
} else if ('POWEQUAL' === $matches[1]) {
|
|
return '**=';
|
|
} else {
|
|
return $matches[0];
|
|
}
|
|
}
|
|
|
|
public function getNextToken(&$value = null, &$startAttributes = null, &$endAttributes = null) {
|
|
$token = parent::getNextToken($value, $startAttributes, $endAttributes);
|
|
|
|
// replace new keywords by their respective tokens. This is not done
|
|
// if we currently are in an object access (e.g. in $obj->namespace
|
|
// "namespace" stays a T_STRING tokens and isn't converted to T_NAMESPACE)
|
|
if (Parser::T_STRING === $token && !$this->inObjectAccess) {
|
|
if (isset($this->newKeywords[strtolower($value)])) {
|
|
return $this->newKeywords[strtolower($value)];
|
|
}
|
|
// keep track of whether we currently are in an object access (after ->)
|
|
} elseif (Parser::T_OBJECT_OPERATOR === $token) {
|
|
$this->inObjectAccess = true;
|
|
} else {
|
|
$this->inObjectAccess = false;
|
|
}
|
|
|
|
return $token;
|
|
}
|
|
} |