diff --git a/tools/fuzzing/generateCorpus.php b/tools/fuzzing/generateCorpus.php new file mode 100644 index 00000000..f3598322 --- /dev/null +++ b/tools/fuzzing/generateCorpus.php @@ -0,0 +1,35 @@ + $code) { + if (false !== strpos($code, '@@{')) { + // Skip tests with evaluate segments + continue; + } + + list($_name, $tests) = $testParser->parseTest($code, 2); + foreach ($tests as list($_modeLine, list($input, $_expected))) { + $path = $corpusDir . '/' . md5($input) . '.txt'; + file_put_contents($path, $input); + } + } +} diff --git a/tools/fuzzing/php.dict b/tools/fuzzing/php.dict new file mode 100644 index 00000000..e0889f98 --- /dev/null +++ b/tools/fuzzing/php.dict @@ -0,0 +1,89 @@ +"" +"__class__" +"__dir__" +"__file__" +"__function__" +"__halt_compiler" +"__line__" +"__method__" +"__namespace__" +"__trait__" +"abstract" +"array" +"as" +"binary" +"bool" +"boolean" +"break" +"callable" +"case" +"catch" +"class" +"clone" +"const" +"continue" +"declare" +"default" +"die" +"do" +"double" +"echo" +"else" +"elseif" +"empty" +"enddeclare" +"endfor" +"endforeach" +"endif" +"endswitch" +"endwhile" +"eval" +"exit" +"extends" +"final" +"finally" +"float" +"fn" +"for" +"foreach" +"function" +"global" +"goto" +"if" +"implements" +"include" +"include_once" +"instanceof" +"insteadof" +"int" +"integer" +"interface" +"isset" +"list" +"namespace" +"new" +"object" +"print" +"private" +"protected" +"public" +"readonly" +"real" +"require" +"require_once" +"return" +"static" +"string" +"switch" +"throw" +"trait" +"try" +"unset" +"unset" +"use" +"var" +"while" +"yield from" +"yield" diff --git a/tools/fuzzing/target.php b/tools/fuzzing/target.php new file mode 100644 index 00000000..ece41c08 --- /dev/null +++ b/tools/fuzzing/target.php @@ -0,0 +1,104 @@ +hasProblematicConstruct = false; + } + + public function leaveNode(PhpParser\Node $node) { + // We don't precisely preserve nop statements. + if ($node instanceof Stmt\Nop) { + return PhpParser\NodeTraverser::REMOVE_NODE; + } + + // We don't precisely preserve redundant trailing commas in array destructuring. + if ($node instanceof Expr\List_) { + while (!empty($node->items) && $node->items[count($node->items) - 1] === null) { + array_pop($node->items); + } + } + + // For T_NUM_STRING the parser produced negative integer literals. Convert these into + // a unary minus followed by a positive integer. + if ($node instanceof Scalar\Int_ && $node->value < 0) { + if ($node->value === \PHP_INT_MIN) { + // PHP_INT_MIN == -PHP_INT_MAX - 1 + return new Expr\BinaryOp\Minus( + new Expr\UnaryMinus(new Scalar\Int_(\PHP_INT_MAX)), + new Scalar\Int_(1)); + } + return new Expr\UnaryMinus(new Scalar\Int_(-$node->value)); + } + + // If a constant with the same name as a cast operand occurs inside parentheses, it will + // be parsed back as a cast. E.g. "foo(int)" will fail to parse, because the argument is + // interpreted as a cast. We can run into this with inputs like "foo(int\n)", where the + // newline is not preserved. + if ($node instanceof Expr\ConstFetch && $node->name->isUnqualified() && + in_array($node->name->toLowerString(), self::CAST_NAMES) + ) { + $this->hasProblematicConstruct = true; + } + } +}; +$traverser = new PhpParser\NodeTraverser(); +$traverser->addVisitor($visitor); + +$fuzzer->setTarget(function(string $input) use($parser, $prettyPrinter, $nodeDumper, $visitor, $traverser) { + $stmts = $parser->parse($input); + $printed = $prettyPrinter->prettyPrintFile($stmts); + + $stmts = $traverser->traverse($stmts); + if ($visitor->hasProblematicConstruct) { + return; + } + + try { + $printedStmts = $parser->parse($printed); + } catch (PhpParser\Error $e) { + throw new Error("Failed to parse pretty printer output"); + } + + $printedStmts = $traverser->traverse($printedStmts); + $same = $nodeDumper->dump($stmts) == $nodeDumper->dump($printedStmts); + if (!$same && !preg_match('/<\?php<\?php/i', $input)) { + throw new Error("Result after pretty printing differs"); + } +}); + +$fuzzer->setMaxLen(1024); +$fuzzer->addDictionary(__DIR__ . '/php.dict');