From c5c7aa5125a0f8e1eb82fba3b57c2f49931353c0 Mon Sep 17 00:00:00 2001
From: nikic <nikita.ppv@googlemail.com>
Date: Sun, 18 Dec 2011 13:04:27 +0100
Subject: [PATCH] Add initial version of an emulative lexer

The emulative lexer allows lexing of PHP 5.4 on PHP 5.3 and PHP 5.2.
---
 lib/PHPParser/Lexer/Emulative.php             | 125 ++++++++++++++++++
 test/PHPParser/Tests/Serializer/XMLTest.php   |   4 +-
 test/PHPParser/Tests/codeTest.php             |  12 +-
 test/code/scalar/int.test                     |   4 +
 test/code/scalar/int.test-5.4                 |  11 --
 test/code/scalar/magicConst.test              |   3 +
 test/code/scalar/magicConst.test-5.4          |  10 --
 .../stmt/class/{trait.test-5.4 => trait.test} |   0
 test/code/stmt/function/typeHints.test        |  12 +-
 test/code/stmt/function/typeHints.test-5.4    |  22 ---
 10 files changed, 146 insertions(+), 57 deletions(-)
 create mode 100644 lib/PHPParser/Lexer/Emulative.php
 delete mode 100644 test/code/scalar/int.test-5.4
 delete mode 100644 test/code/scalar/magicConst.test-5.4
 rename test/code/stmt/class/{trait.test-5.4 => trait.test} (100%)
 delete mode 100644 test/code/stmt/function/typeHints.test-5.4

diff --git a/lib/PHPParser/Lexer/Emulative.php b/lib/PHPParser/Lexer/Emulative.php
new file mode 100644
index 00000000..663a7a30
--- /dev/null
+++ b/lib/PHPParser/Lexer/Emulative.php
@@ -0,0 +1,125 @@
+<?php
+
+/**
+ * ATTENTION: This code is WRITE-ONLY. Do not try to read it.
+ */
+class PHPParser_Lexer_Emulative extends PHPParser_Lexer
+{
+    protected static $keywords = array(
+        // PHP 5.4
+        'callable'      => PHPParser_Parser::T_CALLABLE,
+        'insteadof'     => PHPParser_Parser::T_INSTEADOF,
+        'trait'         => PHPParser_Parser::T_TRAIT,
+        '__trait__'     => PHPParser_Parser::T_TRAIT_C,
+        // PHP 5.3
+        '__dir__'       => PHPParser_Parser::T_DIR,
+        'goto'          => PHPParser_Parser::T_GOTO,
+        'namespace'     => PHPParser_Parser::T_NAMESPACE,
+        '__namespace__' => PHPParser_Parser::T_NS_C,
+    );
+
+    protected $inObjectAccess;
+
+    public function __construct($code) {
+        $this->inObjectAccess = false;
+
+        if (version_compare(PHP_VERSION, '5.4.0RC1', '<')) {
+            // binary notation
+            $code = preg_replace('(\b0b[01]+\b)', '~__EMU__BINARY__$0__~', $code);
+        }
+
+        if (version_compare(PHP_VERSION, '5.3.0', '<')) {
+            // namespace separator
+            $code = preg_replace('(\\\\(?!["\'`$\\\\]))', '~__EMU__NS__~', $code);
+
+            // nowdoc
+            $code = preg_replace_callback(
+                '((*BSR_ANYCRLF)    # set \R to (\r|\n|\r\n)
+                  (b?<<<[\t ]*\'([a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*)\'\R) # opening token
+                  ((?:(?!\2).*\R)*) # content
+                  (\2)              # closing token
+                  (?=;?\R)          # must be followed by newline (with optional semicolon)
+                 )x',
+                array($this, 'encodeNowdocCallback'),
+                $code
+            );
+        }
+
+        parent::__construct($code);
+
+        for ($i = 0, $c = count($this->tokens); $i < $c; ++$i) {
+            if ('~' === $this->tokens[$i]
+                && isset($this->tokens[$i + 2])
+                && '~' === $this->tokens[$i + 2]
+                && T_STRING === $this->tokens[$i + 1][0]
+                && preg_match('(^__EMU__([A-Z]++)__(?:([A-Za-z0-9]++)__)?$)', $this->tokens[$i + 1][1], $matches)
+            ) {
+                if ('BINARY' === $matches[1]) {
+                    $replace = array(array(T_LNUMBER, $matches[2], $this->tokens[$i + 1][2]));
+                } elseif ('NS' === $matches[1]) {
+                    $replace = array('\\');
+                } elseif ('NOWDOC' === $matches[1]) {
+                    list($start, $content, $end) = explode('x', $matches[2]);
+                    list($start, $content, $end) = array(pack('H*', $start), pack('H*', $content), pack('H*', $end));
+
+                    $replace = array();
+                    $replace[] = array(T_START_HEREDOC, $start, $this->tokens[$i + 1][2]);
+                    if ('' !== $content) {
+                        $replace[] = array(T_ENCAPSED_AND_WHITESPACE, $content, -1);
+                    }
+                    $replace[] = array(T_END_HEREDOC, $end, -1);
+                } else {
+                    continue;
+                }
+
+                array_splice($this->tokens, $i, 3, $replace);
+                $c -= 3 - count($replace);
+            } elseif (is_array($this->tokens[$i])
+                      && 0 !== strpos($this->tokens[$i][1], '__EMU__')
+            ) {
+                $this->tokens[$i][1] = preg_replace_callback(
+                    '(~__EMU__([A-Z]++)__(?:([A-Za-z0-9]++)__)?~)',
+                    array($this, 'restoreContentCallback'),
+                    $this->tokens[$i][1]
+                );
+            }
+        }
+    }
+
+    public function encodeNowdocCallback(array $matches) {
+        return '~__EMU__NOWDOC__'
+             . bin2hex($matches[1]) . 'x' . bin2hex($matches[3]) . 'x' . bin2hex($matches[4])
+             . '__~';
+    }
+
+    public function restoreContentCallback(array $matches) {
+        if ('BINARY' === $matches[1]) {
+            return $matches[2];
+        } elseif ('NS' === $matches[1]) {
+            return '\\';
+        } elseif ('NOWDOC' === $matches[1]) {
+            list($start, $content, $end) = explode('x', $matches[2]);
+            return pack('H*', $start) . pack('H*', $content) . pack('H*', $end);
+        } else {
+            return $matches[0];
+        }
+    }
+
+    public function lex(&$value = null, &$line = null, &$docComment = null) {
+        $token = parent::lex($value, $line, $docComment);
+
+        if (PHPParser_Parser::T_STRING === $token && !$this->inObjectAccess) {
+            if (isset(self::$keywords[strtolower($value)])) {
+                return self::$keywords[strtolower($value)];
+            }
+        } elseif (92 === $token) { // ord('\\')
+            return PHPParser_Parser::T_NS_SEPARATOR;
+        } elseif (PHPParser_Parser::T_OBJECT_OPERATOR === $token) {
+            $this->inObjectAccess = true;
+        } else {
+            $this->inObjectAccess = false;
+        }
+
+        return $token;
+    }
+}
\ No newline at end of file
diff --git a/test/PHPParser/Tests/Serializer/XMLTest.php b/test/PHPParser/Tests/Serializer/XMLTest.php
index 0e560a18..9e826884 100644
--- a/test/PHPParser/Tests/Serializer/XMLTest.php
+++ b/test/PHPParser/Tests/Serializer/XMLTest.php
@@ -6,10 +6,10 @@ class PHPParser_Tests_Serializer_XMLTest extends PHPUnit_Framework_TestCase
      * @covers PHPParser_Serializer_XML<extended>
      */
     public function testSerialize() {
-        $code = <<<'CODE'
+        $code = <<<CODE
 <?php
 /** doc comment */
-function functionName(&$a = 0, $b = 1.0) {
+function functionName(&\$a = 0, \$b = 1.0) {
     echo 'Foo';
 }
 CODE;
diff --git a/test/PHPParser/Tests/codeTest.php b/test/PHPParser/Tests/codeTest.php
index 11fd0b41..32360d09 100644
--- a/test/PHPParser/Tests/codeTest.php
+++ b/test/PHPParser/Tests/codeTest.php
@@ -9,7 +9,7 @@ class PHPParser_Tests_codeTest extends PHPUnit_Framework_TestCase
         $parser = new PHPParser_Parser;
         $dumper = new PHPParser_NodeDumper;
 
-        $stmts = $parser->parse(new PHPParser_Lexer($code));
+        $stmts = $parser->parse(new PHPParser_Lexer_Emulative($code));
         $this->assertEquals(
             $this->canonicalize($dump),
             $this->canonicalize($dumper->dump($stmts)),
@@ -28,7 +28,7 @@ class PHPParser_Tests_codeTest extends PHPUnit_Framework_TestCase
         $parser = new PHPParser_Parser;
 
         try {
-            $parser->parse(new PHPParser_Lexer($code));
+            $parser->parse(new PHPParser_Lexer_Emulative($code));
 
             $this->fail(sprintf('"%s": Expected PHPParser_Error', $name));
         } catch (PHPParser_Error $e) {
@@ -43,13 +43,7 @@ class PHPParser_Tests_codeTest extends PHPUnit_Framework_TestCase
     protected function getTests($ext) {
         $it = new RecursiveDirectoryIterator(dirname(__FILE__) . '/../../code');
         $it = new RecursiveIteratorIterator($it, RecursiveIteratorIterator::LEAVES_ONLY);
-
-        $ext = preg_quote($ext, '~');
-        if (version_compare(PHP_VERSION, '5.4.0RC1', '>=')) {
-            $it = new RegexIterator($it, '~\.' . $ext . '(-5\.4)?$~');
-        } else {
-            $it = new RegexIterator($it, '~\.' . $ext . '$~');
-        }
+        $it = new RegexIterator($it, '(\.' . preg_quote($ext) . '$)');
 
         $tests = array();
         foreach ($it as $file) {
diff --git a/test/code/scalar/int.test b/test/code/scalar/int.test
index c8f7191f..47a8f14f 100644
--- a/test/code/scalar/int.test
+++ b/test/code/scalar/int.test
@@ -11,6 +11,7 @@ Different integer syntaxes
 0XfFf;
 0777;
 0787;
+0b111000111000;
 -----
 array(
     0: Scalar_LNumber(
@@ -40,4 +41,7 @@ array(
     8: Scalar_LNumber(
         value: 7
     )
+    9: Scalar_LNumber(
+        value: 3640
+    )
 )
\ No newline at end of file
diff --git a/test/code/scalar/int.test-5.4 b/test/code/scalar/int.test-5.4
deleted file mode 100644
index 13fac508..00000000
--- a/test/code/scalar/int.test-5.4
+++ /dev/null
@@ -1,11 +0,0 @@
-Different integer syntaxes
------
-<?php
-
-0b111000111000;
------
-array(
-    0: Scalar_LNumber(
-        value: 3640
-    )
-)
\ No newline at end of file
diff --git a/test/code/scalar/magicConst.test b/test/code/scalar/magicConst.test
index fdf5d1e5..58291f78 100644
--- a/test/code/scalar/magicConst.test
+++ b/test/code/scalar/magicConst.test
@@ -9,6 +9,7 @@ __FUNCTION__;
 __LINE__;
 __METHOD__;
 __NAMESPACE__;
+__TRAIT__;
 -----
 array(
     0: Scalar_ClassConst(
@@ -25,4 +26,6 @@ array(
     )
     6: Scalar_NSConst(
     )
+    7: Scalar_TraitConst(
+    )
 )
\ No newline at end of file
diff --git a/test/code/scalar/magicConst.test-5.4 b/test/code/scalar/magicConst.test-5.4
deleted file mode 100644
index 29c5ca15..00000000
--- a/test/code/scalar/magicConst.test-5.4
+++ /dev/null
@@ -1,10 +0,0 @@
-Magic constants
------
-<?php
-
-__TRAIT__;
------
-array(
-    0: Scalar_TraitConst(
-    )
-)
\ No newline at end of file
diff --git a/test/code/stmt/class/trait.test-5.4 b/test/code/stmt/class/trait.test
similarity index 100%
rename from test/code/stmt/class/trait.test-5.4
rename to test/code/stmt/class/trait.test
diff --git a/test/code/stmt/function/typeHints.test b/test/code/stmt/function/typeHints.test
index 99729a5d..5f49645f 100644
--- a/test/code/stmt/function/typeHints.test
+++ b/test/code/stmt/function/typeHints.test
@@ -2,7 +2,7 @@ Type hints
 -----
 <?php
 
-function a($b, array $c, D $e) {}
+function a($b, array $c, callable $d, E $f) {}
 -----
 array(
     0: Stmt_Function(
@@ -21,11 +21,17 @@ array(
                 byRef: false
             )
             2: Param(
-                name: e
+                name: d
+                default: null
+                type: callable
+                byRef: false
+            )
+            3: Param(
+                name: f
                 default: null
                 type: Name(
                     parts: array(
-                        0: D
+                        0: E
                     )
                 )
                 byRef: false
diff --git a/test/code/stmt/function/typeHints.test-5.4 b/test/code/stmt/function/typeHints.test-5.4
deleted file mode 100644
index 1fab99fa..00000000
--- a/test/code/stmt/function/typeHints.test-5.4
+++ /dev/null
@@ -1,22 +0,0 @@
-Callable type hint
------
-<?php
-
-function a(callable $b) {}
------
-array(
-    0: Stmt_Function(
-        byRef: false
-        params: array(
-            0: Param(
-                name: b
-                default: null
-                type: callable
-                byRef: false
-            )
-        )
-        stmts: array(
-        )
-        name: a
-    )
-)
\ No newline at end of file