lexer = $lexer; } /** * Parses PHP code into a node tree. * * @param string $code The source code to parse * * @return Node[] Array of statements */ public function parse($code) { $this->lexer->startLexing($code); // We start off with no lookahead-token $symbol = self::SYMBOL_NONE; // The attributes for a node are taken from the first and last token of the node. // From the first token only the startAttributes are taken and from the last only // the endAttributes. Both are merged using the array union operator (+). $startAttributes = array('startLine' => 1); $endAttributes = array(); // In order to figure out the attributes for the starting token, we have to keep // them in a stack $attributeStack = array($startAttributes); // Start off in the initial state and keep a stack of previous states $state = 0; $stateStack = array($state); // Semantic value stack (contains values of tokens and semantic action results) $this->semStack = array(); // Current position in the stack(s) $this->stackPos = 0; for (;;) { //$this->traceNewState($state, $symbol); if ($this->actionBase[$state] == 0) { $rule = $this->actionDefault[$state]; } else { if ($symbol === self::SYMBOL_NONE) { // Fetch the next token id from the lexer and fetch additional info by-ref. // The end attributes are fetched into a temporary variable and only set once the token is really // shifted (not during read). Otherwise you would sometimes get off-by-one errors, when a rule is // reduced after a token was read but not yet shifted. $tokenId = $this->lexer->getNextToken($tokenValue, $startAttributes, $nextEndAttributes); // map the lexer token id to the internally used symbols $symbol = $tokenId >= 0 && $tokenId < $this->tokenToSymbolMapSize ? $this->tokenToSymbol[$tokenId] : $this->invalidSymbol; if ($symbol === $this->invalidSymbol) { throw new \RangeException(sprintf( 'The lexer returned an invalid token (id=%d, value=%s)', $tokenId, $tokenValue )); } $attributeStack[$this->stackPos] = $startAttributes; //$this->traceRead($symbol); } $idx = $this->actionBase[$state] + $symbol; if ((($idx >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] == $symbol) || ($state < $this->YY2TBLSTATE && ($idx = $this->actionBase[$state + $this->YYNLSTATES] + $symbol) >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] == $symbol)) && ($action = $this->action[$idx]) != $this->defaultAction) { /* * >= YYNLSTATES: shift and reduce * > 0: shift * = 0: accept * < 0: reduce * = -YYUNEXPECTED: error */ if ($action > 0) { /* shift */ //$this->traceShift($symbol); ++$this->stackPos; $stateStack[$this->stackPos] = $state = $action; $this->semStack[$this->stackPos] = $tokenValue; $attributeStack[$this->stackPos] = $startAttributes; $endAttributes = $nextEndAttributes; $symbol = self::SYMBOL_NONE; if ($action < $this->YYNLSTATES) continue; /* $yyn >= YYNLSTATES means shift-and-reduce */ $rule = $action - $this->YYNLSTATES; } else { $rule = -$action; } } else { $rule = $this->actionDefault[$state]; } } for (;;) { if ($rule === 0) { /* accept */ //$this->traceAccept(); return $this->semValue; } elseif ($rule !== $this->unexpectedTokenRule) { /* reduce */ //$this->traceReduce($rule); try { $this->{'reduceRule' . $rule}( $attributeStack[$this->stackPos - $this->ruleToLength[$rule]] + $endAttributes ); } catch (Error $e) { if (-1 === $e->getRawLine()) { $e->setRawLine($startAttributes['startLine']); } throw $e; } /* Goto - shift nonterminal */ $this->stackPos -= $this->ruleToLength[$rule]; $nonTerminal = $this->ruleToNonTerminal[$rule]; $idx = $this->gotoBase[$nonTerminal] + $stateStack[$this->stackPos]; if ($idx >= 0 && $idx < $this->gotoTableSize && $this->gotoCheck[$idx] == $nonTerminal) { $state = $this->goto[$idx]; } else { $state = $this->gotoDefault[$nonTerminal]; } ++$this->stackPos; $stateStack[$this->stackPos] = $state; $this->semStack[$this->stackPos] = $this->semValue; $attributeStack[$this->stackPos] = $startAttributes; if ($state < $this->YYNLSTATES) break; /* >= YYNLSTATES means shift-and-reduce */ $rule = $state - $this->YYNLSTATES; } else { /* error */ if ($expected = $this->getExpectedTokens($state)) { $expectedString = ', expecting ' . implode(' or ', $expected); } else { $expectedString = ''; } throw new Error( 'Syntax error, unexpected ' . $this->symbolToName[$symbol] . $expectedString, $startAttributes['startLine'] ); } } } } protected function getExpectedTokens($state) { $expected = array(); $base = $this->actionBase[$state]; foreach ($this->symbolToName as $symbol => $name) { $idx = $base + $symbol; if ($idx >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol || $state < $this->YY2TBLSTATE && ($idx = $this->actionBase[$state + $this->YYNLSTATES] + $symbol) >= 0 && $idx < $this->actionTableSize && $this->actionCheck[$idx] === $symbol ) { if ($this->action[$idx] != $this->unexpectedTokenRule) { if (count($expected) == 4) { /* Too many expected tokens */ return array(); } $expected[] = $name; } } } return $expected; } /* * Tracing functions used for debugging the parser. */ protected function traceNewState($state, $symbol) { echo '% State ' . $state . ', Lookahead ' . ($symbol == self::SYMBOL_NONE ? '--none--' : $this->symbolToName[$symbol]) . "\n"; } protected function traceRead($symbol) { echo '% Reading ' . $this->symbolToName[$symbol] . "\n"; } protected function traceShift($symbol) { echo '% Shift ' . $this->symbolToName[$symbol] . "\n"; } protected function traceAccept() { echo "% Accepted.\n"; } protected function traceReduce($n) { echo '% Reduce by (' . $n . ') ' . $this->productions[$n] . "\n"; } /* * Helper functions invoked by semantic actions */ protected function handleNamespaces(array $stmts) { // null = not in namespace, false = semicolon style, true = bracket style $bracketed = null; // whether any statements that aren't allowed before a namespace declaration are encountered // (the only valid statement currently is a declare) $hasNotAllowedStmts = false; // offsets for semicolon style namespaces // (required for transplanting the following statements into their ->stmts property) $nsOffsets = array(); foreach ($stmts as $i => $stmt) { if ($stmt instanceof Node\Stmt\Namespace_) { // ->stmts is null if semicolon style is used $currentBracketed = null !== $stmt->stmts; // if no namespace statement has been encountered yet if (!isset($bracketed)) { // set the namespacing style $bracketed = $currentBracketed; // and ensure that it isn't preceded by a not allowed statement if ($hasNotAllowedStmts) { throw new Error('Namespace declaration statement has to be the very first statement in the script', $stmt->getLine()); } // otherwise ensure that the style of the current namespace matches the style of // namespaceing used before in this document } elseif ($bracketed !== $currentBracketed) { throw new Error('Cannot mix bracketed namespace declarations with unbracketed namespace declarations', $stmt->getLine()); } // for semicolon style namespaces remember the offset if (!$bracketed) { $nsOffsets[] = $i; } // declare() and __halt_compiler() are the only valid statements outside of namespace declarations } elseif (!$stmt instanceof Node\Stmt\Declare_ && !$stmt instanceof Node\Stmt\HaltCompiler ) { if (true === $bracketed) { throw new Error('No code may exist outside of namespace {}', $stmt->getLine()); } $hasNotAllowedStmts = true; } } // if bracketed namespaces were used or no namespaces were used at all just return the // original statements if (!isset($bracketed) || true === $bracketed) { return $stmts; // for semicolon style transplant statements } else { // take all statements preceding the first namespace $newStmts = array_slice($stmts, 0, $nsOffsets[0]); // iterate over all following namespaces for ($i = 0, $c = count($nsOffsets); $i < $c; ++$i) { $newStmts[] = $nsStmt = $stmts[$nsOffsets[$i]]; // the last namespace takes all statements after it if ($c === $i + 1) { $nsStmt->stmts = array_slice($stmts, $nsOffsets[$i] + 1); // if the last statement is __halt_compiler() put it outside the namespace if (end($nsStmt->stmts) instanceof Node\Stmt\HaltCompiler) { $newStmts[] = array_pop($nsStmt->stmts); } // and all the others take all statements between the current and the following one } else { $nsStmt->stmts = array_slice($stmts, $nsOffsets[$i] + 1, $nsOffsets[$i + 1] - $nsOffsets[$i] - 1); } } return $newStmts; } } }