Properly parse escape sequences:

* Add support for oct and hex escape sequences
* Take used quote type into account when parsing encapsed strings
This commit is contained in:
nikic 2011-08-20 10:40:27 +02:00
parent 05c514f9c5
commit 17a81b5c8f
6 changed files with 122 additions and 40 deletions

View File

@ -89,7 +89,6 @@ class #(-p)
protected $yyval;
protected $yyastk;
protected $yysp;
protected $yyaccept;
protected $lexer;
#endif
#if -t

View File

@ -99,7 +99,7 @@ function resolveNodes($code) {
function resolveMacros($code) {
return preg_replace_callback(
'~(?<name>error|init|push|pushNormalizing|toArray|parse(?:Var|Encapsed|LNumber|DNumber))' . ARGS . '~',
'~(?<name>error|init|push|pushNormalizing|toArray|parse(?:Var|LNumber|DNumber|Encapsed))' . ARGS . '~',
function($matches) {
// recurse
$matches['args'] = resolveMacros($matches['args']);
@ -144,12 +144,6 @@ function resolveMacros($code) {
return 'substr(' . $args[0] . ', 1)';
}
if ('parseEncapsed' == $name) {
assertArgs(1, $args, $name);
return 'stripcslashes(' . $args[0] . ')';
}
if ('parseLNumber' == $name) {
assertArgs(1, $args, $name);
@ -161,6 +155,12 @@ function resolveMacros($code) {
return '(double) ' . $args[0];
}
if ('parseEncapsed' == $name) {
assertArgs(2, $args, $name);
return 'foreach (' . $args[0] . ' as &$s) { if (is_string($s)) { $s = PHPParser_Node_Scalar_String::parseEscapeSequences($s, ' . $args[1] . '); } }';
}
},
$code
);

View File

@ -601,8 +601,8 @@ exit_expr:
backticks_expr:
/* empty */ { $$ = array(); }
| T_ENCAPSED_AND_WHITESPACE { $$ = array(Scalar_String::parseEscapeSequences($1)); }
| encaps_list { $$ = $1; }
| T_ENCAPSED_AND_WHITESPACE { $$ = array(Scalar_String::parseEscapeSequences($1, '`')); }
| encaps_list { parseEncapsed($1, '`'); $$ = $1; }
;
ctor_arguments:
@ -622,7 +622,7 @@ common_scalar:
| T_FUNC_C { $$ = Scalar_FuncConst[]; }
| T_NS_C { $$ = Scalar_NSConst[]; }
| T_START_HEREDOC T_ENCAPSED_AND_WHITESPACE T_END_HEREDOC
{ $$ = Scalar_String[Scalar_String::parseEscapeSequences($2)]; }
{ $$ = Scalar_String[Scalar_String::parseEscapeSequences($2, null)]; }
| T_START_HEREDOC T_END_HEREDOC
{ $$ = Scalar_String['']; }
;
@ -641,8 +641,10 @@ scalar:
| class_constant { $$ = $1; }
| name { $$ = Expr_ConstFetch[$1]; }
| common_scalar { $$ = $1; }
| '"' encaps_list '"' { $$ = Scalar_Encapsed[$2]; }
| T_START_HEREDOC encaps_list T_END_HEREDOC { $$ = Scalar_Encapsed[$2]; }
| '"' encaps_list '"'
{ parseEncapsed($2, '"'); $$ = Scalar_Encapsed[$2]; }
| T_START_HEREDOC encaps_list T_END_HEREDOC
{ parseEncapsed($2, null); $$ = Scalar_Encapsed[$2]; }
;
static_array_pair_list:
@ -760,9 +762,9 @@ array_pair:
encaps_list:
encaps_list encaps_var { push($1, $2); }
| encaps_list T_ENCAPSED_AND_WHITESPACE { push($1, Scalar_String::parseEscapeSequences($2)); }
| encaps_list T_ENCAPSED_AND_WHITESPACE { push($1, $2); }
| encaps_var { init($1); }
| T_ENCAPSED_AND_WHITESPACE encaps_var { init(Scalar_String::parseEscapeSequences($1), $2); }
| T_ENCAPSED_AND_WHITESPACE encaps_var { init($1, $2); }
;
encaps_var:

View File

@ -24,46 +24,70 @@ class PHPParser_Node_Scalar_String extends PHPParser_Node_Scalar
/**
* Creates a String node from a string token (parses escape sequences).
*
* @param string $s String
* @param string $str String
* @param int $line Line
* @param null|string $docComment Nearest doc comment
*
* @return PHPParser_Node_Scalar_String String Node
*/
public static function create($s, $line, $docComment) {
public static function create($str, $line = -1, $docComment = null) {
$bLength = 0;
if ('b' === $s[0]) {
if ('b' === $str[0]) {
$bLength = 1;
}
if ('\'' === $s[$bLength]) {
$s = str_replace(
if ('\'' === $str[$bLength]) {
$str = str_replace(
array('\\\\', '\\\''),
array( '\\', '\''),
substr($s, $bLength + 1, -1)
substr($str, $bLength + 1, -1)
);
} else {
$s = self::parseEscapeSequences(substr($s, $bLength + 1, -1));
$str = self::parseEscapeSequences(substr($str, $bLength + 1, -1), '"');
}
return new self($s, $line, $docComment);
return new self($str, $line, $docComment);
}
/**
* Parses escape sequences in the content of a doubly quoted string
* or heredoc string.
* Parses escape sequences in strings (all string types apart from single quoted).
*
* @param string $s String without quotes
* @param string $str String without quotes
* @param null|string $quote Quote type
*
* @return string String with escape sequences parsed
*/
public static function parseEscapeSequences($s) {
// TODO: parse hex and oct escape sequences
public static function parseEscapeSequences($str, $quote) {
if (null !== $quote) {
$str = str_replace('\\' . $quote, $quote, $str);
}
return str_replace(
array('\\\\', '\"', '\$', '\n', '\r', '\t', '\f', '\v'),
array( '\\', '"', '$', "\n", "\r", "\t", "\f", "\v"),
$s
return preg_replace_callback(
'~\\\\([\\\\$nrtfv]|[xX][0-9a-fA-F]{1,2}|[0-7]{1,3})~',
array(__CLASS__, 'parseCallback'),
$str
);
}
protected static $replacements = array(
'\\' => '\\',
'$' => '$',
'n' => "\n",
'r' => "\r",
't' => "\t",
'f' => "\f",
'v' => "\v",
);
public static function parseCallback($matches) {
$str = $matches[1];
if (isset(self::$replacements[$str])) {
return self::$replacements[$str];
} elseif ('x' === $str[0] || 'X' === $str[0]) {
return chr(hexdec($str));
} else {
return chr(octdec($str));
}
}
}

View File

@ -858,7 +858,6 @@ class PHPParser_Parser
protected $yyval;
protected $yyastk;
protected $yysp;
protected $yyaccept;
protected $lexer;
/**
@ -2039,11 +2038,11 @@ class PHPParser_Parser
}
protected function yyn261($line, $docComment) {
$this->yyval = array(PHPParser_Node_Scalar_String::parseEscapeSequences($this->yyastk[$this->yysp-(1-1)]));
$this->yyval = array(PHPParser_Node_Scalar_String::parseEscapeSequences($this->yyastk[$this->yysp-(1-1)], '`'));
}
protected function yyn262($line, $docComment) {
$this->yyval = $this->yyastk[$this->yysp-(1-1)];
foreach ($this->yyastk[$this->yysp-(1-1)] as &$s) { if (is_string($s)) { $s = PHPParser_Node_Scalar_String::parseEscapeSequences($s, '`'); } }; $this->yyval = $this->yyastk[$this->yysp-(1-1)];
}
protected function yyn263($line, $docComment) {
@ -2095,7 +2094,7 @@ class PHPParser_Parser
}
protected function yyn275($line, $docComment) {
$this->yyval = new PHPParser_Node_Scalar_String(PHPParser_Node_Scalar_String::parseEscapeSequences($this->yyastk[$this->yysp-(3-2)]), $line, $docComment);
$this->yyval = new PHPParser_Node_Scalar_String(PHPParser_Node_Scalar_String::parseEscapeSequences($this->yyastk[$this->yysp-(3-2)], null), $line, $docComment);
}
protected function yyn276($line, $docComment) {
@ -2143,11 +2142,11 @@ class PHPParser_Parser
}
protected function yyn287($line, $docComment) {
$this->yyval = new PHPParser_Node_Scalar_Encapsed($this->yyastk[$this->yysp-(3-2)], $line, $docComment);
foreach ($this->yyastk[$this->yysp-(3-2)] as &$s) { if (is_string($s)) { $s = PHPParser_Node_Scalar_String::parseEscapeSequences($s, '"'); } }; $this->yyval = new PHPParser_Node_Scalar_Encapsed($this->yyastk[$this->yysp-(3-2)], $line, $docComment);
}
protected function yyn288($line, $docComment) {
$this->yyval = new PHPParser_Node_Scalar_Encapsed($this->yyastk[$this->yysp-(3-2)], $line, $docComment);
foreach ($this->yyastk[$this->yysp-(3-2)] as &$s) { if (is_string($s)) { $s = PHPParser_Node_Scalar_String::parseEscapeSequences($s, null); } }; $this->yyval = new PHPParser_Node_Scalar_Encapsed($this->yyastk[$this->yysp-(3-2)], $line, $docComment);
}
protected function yyn289($line, $docComment) {
@ -2359,7 +2358,7 @@ class PHPParser_Parser
}
protected function yyn341($line, $docComment) {
$this->yyastk[$this->yysp-(2-1)][] = PHPParser_Node_Scalar_String::parseEscapeSequences($this->yyastk[$this->yysp-(2-2)]); $this->yyval = $this->yyastk[$this->yysp-(2-1)];
$this->yyastk[$this->yysp-(2-1)][] = $this->yyastk[$this->yysp-(2-2)]; $this->yyval = $this->yyastk[$this->yysp-(2-1)];
}
protected function yyn342($line, $docComment) {
@ -2367,7 +2366,7 @@ class PHPParser_Parser
}
protected function yyn343($line, $docComment) {
$this->yyval = array(PHPParser_Node_Scalar_String::parseEscapeSequences($this->yyastk[$this->yysp-(2-1)]), $this->yyastk[$this->yysp-(2-2)]);
$this->yyval = array($this->yyastk[$this->yysp-(2-1)], $this->yyastk[$this->yysp-(2-2)]);
}
protected function yyn344($line, $docComment) {

View File

@ -0,0 +1,58 @@
<?php
class PHPParser_Tests_Node_Scalar_StringTest extends PHPUnit_Framework_TestCase
{
/**
* @dataProvider provideTestParseEscapeSequences
*/
public function testParseEscapeSequences($expected, $string, $quote) {
$this->assertEquals(
$expected,
PHPParser_Node_Scalar_String::parseEscapeSequences($string, $quote)
);
}
/**
* @dataProvider provideTestCreate
*/
public function testCreate($expected, $string) {
$this->assertEquals(
$expected,
PHPParser_Node_Scalar_String::create($string)->value
);
}
public function provideTestParseEscapeSequences() {
return array(
array('"', '\\"', '"'),
array('\\"', '\\"', '`'),
array('\\"\\`', '\\"\\`', null),
array("\\\$\n\r\t\f\v", '\\\\\$\n\r\t\f\v', null),
array(chr(255), '\xFF', null),
array(chr(255), '\377', null),
array(chr(0), '\400', null),
array("\0", '\0', null),
array('\xFF', '\\\\xFF', null),
);
}
public function provideTestCreate() {
$tests = array(
array('A', '\'A\''),
array('A', 'b\'A\''),
array('A', '"A"'),
array('A', 'b"A"'),
array('\\', '\'\\\\\''),
array('\'', '\'\\\'\''),
);
foreach ($this->provideTestParseEscapeSequences() as $i => $test) {
// skip second and third tests, they aren't for double quotes
if ($i != 1 && $i != 2) {
$tests[] = array($test[0], '"' . $test[1] . '"');
}
}
return $tests;
}
}