mirror of
https://github.com/halaxa/json-machine.git
synced 2025-02-23 23:52:37 +01:00
Performance optimizations ~8%
This commit is contained in:
parent
3b1c4582f8
commit
c5339df400
@ -29,6 +29,8 @@ class Lexer implements \IteratorAggregate, PositionAware
|
||||
$isEscaping = false;
|
||||
$width = 0;
|
||||
$trackingLineBreak = false;
|
||||
$position = 0;
|
||||
$column = 0;
|
||||
|
||||
// Treat UTF-8 BOM bytes as whitespace
|
||||
${"\xEF"} = ${"\xBB"} = ${"\xBF"} = 0;
|
||||
@ -48,14 +50,12 @@ class Lexer implements \IteratorAggregate, PositionAware
|
||||
$bytesLength = strlen($bytes);
|
||||
for ($i = 0; $i < $bytesLength; ++$i) {
|
||||
$byte = $bytes[$i];
|
||||
++$this->position;
|
||||
++$position;
|
||||
if ($inString) {
|
||||
if ($byte === '"' && !$isEscaping) {
|
||||
$inString = false;
|
||||
}
|
||||
$inString = ! ($byte === '"' && !$isEscaping);
|
||||
$isEscaping = ($byte === '\\' && !$isEscaping);
|
||||
$tokenBuffer .= $byte;
|
||||
$width++;
|
||||
++$width;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -66,31 +66,37 @@ class Lexer implements \IteratorAggregate, PositionAware
|
||||
}
|
||||
|
||||
if (isset($$byte)) {
|
||||
$this->column++;
|
||||
++$column;
|
||||
if ($tokenBuffer !== '') {
|
||||
$this->position = $position;
|
||||
$this->column = $column;
|
||||
yield $tokenBuffer;
|
||||
$this->column += $width;
|
||||
$column += $width;
|
||||
$tokenBuffer = '';
|
||||
$width = 0;
|
||||
}
|
||||
if ($$byte) { // is not whitespace
|
||||
$this->position = $position;
|
||||
$this->column = $column;
|
||||
yield $byte;
|
||||
// track line number and reset column for each newline
|
||||
} elseif ($byte === "\r" || $byte === "\n") {
|
||||
$trackingLineBreak = ($byte === "\r");
|
||||
$this->line++;
|
||||
$this->column = 0;
|
||||
$column = 0;
|
||||
}
|
||||
} else {
|
||||
if ($byte === '"') {
|
||||
$inString = true;
|
||||
}
|
||||
$tokenBuffer .= $byte;
|
||||
$width++;
|
||||
++$width;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($tokenBuffer !== '') {
|
||||
$this->position = $position;
|
||||
$this->column = $column;
|
||||
yield $tokenBuffer;
|
||||
}
|
||||
}
|
||||
|
124
src/Parser.php
124
src/Parser.php
@ -27,36 +27,9 @@ class Parser implements \IteratorAggregate, PositionAware
|
||||
const AFTER_ARRAY_VALUE = self::COMMA | self::ARRAY_END;
|
||||
const AFTER_OBJECT_VALUE = self::COMMA | self::OBJECT_END;
|
||||
|
||||
private $type = [
|
||||
'n' => self::SCALAR_CONST,
|
||||
't' => self::SCALAR_CONST,
|
||||
'f' => self::SCALAR_CONST,
|
||||
'-' => self::SCALAR_CONST,
|
||||
'0' => self::SCALAR_CONST,
|
||||
'1' => self::SCALAR_CONST,
|
||||
'2' => self::SCALAR_CONST,
|
||||
'3' => self::SCALAR_CONST,
|
||||
'4' => self::SCALAR_CONST,
|
||||
'5' => self::SCALAR_CONST,
|
||||
'6' => self::SCALAR_CONST,
|
||||
'7' => self::SCALAR_CONST,
|
||||
'8' => self::SCALAR_CONST,
|
||||
'9' => self::SCALAR_CONST,
|
||||
'"' => self::SCALAR_STRING,
|
||||
'{' => self::OBJECT_START,
|
||||
'}' => self::OBJECT_END,
|
||||
'[' => self::ARRAY_START,
|
||||
']' => self::ARRAY_END,
|
||||
',' => self::COMMA,
|
||||
':' => self::COLON,
|
||||
];
|
||||
|
||||
/** @var Lexer */
|
||||
private $lexer;
|
||||
|
||||
/** @var string */
|
||||
private $token;
|
||||
|
||||
/** @var string */
|
||||
private $jsonPointerPath;
|
||||
|
||||
@ -95,6 +68,29 @@ class Parser implements \IteratorAggregate, PositionAware
|
||||
public function getIterator()
|
||||
{
|
||||
// todo Allow to call getIterator only once per instance
|
||||
${'n'} = self::SCALAR_CONST;
|
||||
${'t'} = self::SCALAR_CONST;
|
||||
${'f'} = self::SCALAR_CONST;
|
||||
${'-'} = self::SCALAR_CONST;
|
||||
${'0'} = self::SCALAR_CONST;
|
||||
${'1'} = self::SCALAR_CONST;
|
||||
${'2'} = self::SCALAR_CONST;
|
||||
${'3'} = self::SCALAR_CONST;
|
||||
${'4'} = self::SCALAR_CONST;
|
||||
${'5'} = self::SCALAR_CONST;
|
||||
${'6'} = self::SCALAR_CONST;
|
||||
${'7'} = self::SCALAR_CONST;
|
||||
${'8'} = self::SCALAR_CONST;
|
||||
${'9'} = self::SCALAR_CONST;
|
||||
|
||||
${'"'} = self::SCALAR_STRING;
|
||||
${'{'} = self::OBJECT_START;
|
||||
${'}'} = self::OBJECT_END;
|
||||
${'['} = self::ARRAY_START;
|
||||
${']'} = self::ARRAY_END;
|
||||
${','} = self::COMMA;
|
||||
${':'} = self::COLON;
|
||||
|
||||
$iteratorLevel = count($this->jsonPointerPath);
|
||||
$iteratorStruct = null;
|
||||
$currentPath = [];
|
||||
@ -105,38 +101,40 @@ class Parser implements \IteratorAggregate, PositionAware
|
||||
$key = null;
|
||||
$previousToken = null;
|
||||
$objectKeyExpected = false;
|
||||
$inArray = false; // todo remove one of inArray, inObject
|
||||
$inObject = false;
|
||||
$inObject = true; // hack to make "!$inObject" in first iteration work. Better code structure?
|
||||
$expectedType = self::OBJECT_START | self::ARRAY_START;
|
||||
$subtreeEnded = false;
|
||||
$token = null;
|
||||
|
||||
foreach ($this->lexer as $this->token) {
|
||||
$firstChar = $this->token[0];
|
||||
$tokenType = isset($this->type[$firstChar]) ? $this->type[$firstChar] : 0;
|
||||
$lexer = $this->lexer;
|
||||
$jsonPointerPath = $this->jsonPointerPath;
|
||||
|
||||
foreach ($lexer as $token) {
|
||||
$tokenType = ${$token[0]};
|
||||
if (0 === ($tokenType & $expectedType)) {
|
||||
$this->error("Unexpected symbol");
|
||||
$this->error("Unexpected symbol", $token);
|
||||
}
|
||||
$isValue = ($tokenType | self::ANY_VALUE) === self::ANY_VALUE;
|
||||
if ($currentPath === $this->jsonPointerPath && ($currentLevel > $iteratorLevel || ($currentLevel === $iteratorLevel && $isValue))) {
|
||||
$jsonBuffer .= $this->token;
|
||||
$isValue = ($tokenType | 23) === 23; // 23 = self::ANY_VALUE
|
||||
if ($currentPath === $jsonPointerPath && ($currentLevel > $iteratorLevel || ($currentLevel === $iteratorLevel && $isValue))) {
|
||||
$jsonBuffer .= $token;
|
||||
}
|
||||
if ($inArray && $isValue && $currentLevel < $iteratorLevel) {
|
||||
if ( ! $inObject && $isValue && $currentLevel < $iteratorLevel) {
|
||||
$currentPath[$currentLevel] = isset($currentPath[$currentLevel]) ? (string)(1+(int)$currentPath[$currentLevel]) : "0";
|
||||
unset($currentPath[$currentLevel+1]);
|
||||
}
|
||||
switch ($firstChar) {
|
||||
switch ($token[0]) {
|
||||
case '"':
|
||||
if ($objectKeyExpected) {
|
||||
$objectKeyExpected = false;
|
||||
$expectedType = self::COLON;
|
||||
$expectedType = 128; // 128 = self::COLON
|
||||
if ($currentLevel === $iteratorLevel) {
|
||||
$key = $this->token;
|
||||
$key = $token;
|
||||
$jsonBuffer = '';
|
||||
} elseif ($currentLevel < $iteratorLevel) {
|
||||
// inlined
|
||||
$keyResult = $this->jsonDecoder->decodeKey($this->token);
|
||||
$keyResult = $this->jsonDecoder->decodeKey($token);
|
||||
if ( ! $keyResult->isOk()) {
|
||||
$this->error($keyResult->getErrorMessage());
|
||||
$this->error($keyResult->getErrorMessage(), $token);
|
||||
}
|
||||
// endinlined
|
||||
// fixme: If there's an error in a key outside the iterator level and ErrorWrappingDecoder
|
||||
@ -153,13 +151,13 @@ class Parser implements \IteratorAggregate, PositionAware
|
||||
case ',':
|
||||
if ($inObject) {
|
||||
$objectKeyExpected = true;
|
||||
$expectedType = self::SCALAR_STRING;
|
||||
$expectedType = 2; // 2 = self::SCALAR_STRING
|
||||
} else {
|
||||
$expectedType = self::ANY_VALUE;
|
||||
$expectedType = 23; // 23 = self::ANY_VALUE
|
||||
}
|
||||
break;
|
||||
case ':':
|
||||
$expectedType = self::ANY_VALUE;
|
||||
$expectedType = 23; // 23 = self::ANY_VALUE
|
||||
break;
|
||||
case '{':
|
||||
++$currentLevel;
|
||||
@ -167,8 +165,8 @@ class Parser implements \IteratorAggregate, PositionAware
|
||||
$iteratorStruct = '{';
|
||||
}
|
||||
$stack[$currentLevel] = '{';
|
||||
$inArray = !$inObject = true;
|
||||
$expectedType = self::AFTER_OBJECT_START;
|
||||
$inObject = true;
|
||||
$expectedType = 10; // 10 = self::AFTER_OBJECT_START
|
||||
$objectKeyExpected = true;
|
||||
break;
|
||||
case '[':
|
||||
@ -177,34 +175,34 @@ class Parser implements \IteratorAggregate, PositionAware
|
||||
$iteratorStruct = '[';
|
||||
}
|
||||
$stack[$currentLevel] = '[';
|
||||
$inArray = !$inObject = false;
|
||||
$expectedType = self::AFTER_ARRAY_START;
|
||||
$inObject = false;
|
||||
$expectedType = 55; // 55 = self::AFTER_ARRAY_START;
|
||||
break;
|
||||
case '}':
|
||||
case ']':
|
||||
--$currentLevel;
|
||||
$inArray = !$inObject = $stack[$currentLevel] === '{';
|
||||
$inObject = $stack[$currentLevel] === '{';
|
||||
default:
|
||||
expectedTypeAfterValue:
|
||||
if ($inArray) {
|
||||
$expectedType = self::AFTER_ARRAY_VALUE;
|
||||
if ($inObject) {
|
||||
$expectedType = 72; // 72 = self::AFTER_OBJECT_VALUE;
|
||||
} else {
|
||||
$expectedType = self::AFTER_OBJECT_VALUE;
|
||||
$expectedType = 96; // 96 = self::AFTER_ARRAY_VALUE;
|
||||
}
|
||||
}
|
||||
if ( ! $pathFound && $currentPath === $this->jsonPointerPath) {
|
||||
if ( ! $pathFound && $currentPath === $jsonPointerPath) {
|
||||
$pathFound = true;
|
||||
}
|
||||
if ($pathFound && $currentPath !== $this->jsonPointerPath) {
|
||||
if ($pathFound && $currentPath !== $jsonPointerPath) {
|
||||
$subtreeEnded = true;
|
||||
break;
|
||||
}
|
||||
if ($currentLevel === $iteratorLevel && $jsonBuffer !== '') {
|
||||
if ($currentPath === $this->jsonPointerPath) {
|
||||
if ($currentPath === $jsonPointerPath) {
|
||||
$valueResult = $this->jsonDecoder->decodeValue($jsonBuffer);
|
||||
// inlined
|
||||
if ( ! $valueResult->isOk()) {
|
||||
$this->error($valueResult->getErrorMessage());
|
||||
$this->error($valueResult->getErrorMessage(), $token);
|
||||
}
|
||||
// endinlined
|
||||
if ($iteratorStruct === '[') {
|
||||
@ -213,7 +211,7 @@ class Parser implements \IteratorAggregate, PositionAware
|
||||
// inlined
|
||||
$keyResult = $this->jsonDecoder->decodeKey($key);
|
||||
if ( ! $keyResult->isOk()) {
|
||||
$this->error($keyResult->getErrorMessage());
|
||||
$this->error($keyResult->getErrorMessage(), $token);
|
||||
}
|
||||
// endinlined
|
||||
yield $keyResult->getValue() => $valueResult->getValue();
|
||||
@ -223,8 +221,8 @@ class Parser implements \IteratorAggregate, PositionAware
|
||||
}
|
||||
}
|
||||
|
||||
if ($this->token === null) {
|
||||
$this->error('Cannot iterate empty JSON');
|
||||
if ($token === null) {
|
||||
$this->error('Cannot iterate empty JSON', $token);
|
||||
}
|
||||
|
||||
if ( ! $pathFound) {
|
||||
@ -232,7 +230,7 @@ class Parser implements \IteratorAggregate, PositionAware
|
||||
}
|
||||
|
||||
if ($currentLevel > -1 && ! $subtreeEnded){
|
||||
$this->error('JSON string ended unexpectedly', UnexpectedEndSyntaxErrorException::class);
|
||||
$this->error('JSON string ended unexpectedly', $token,UnexpectedEndSyntaxErrorException::class);
|
||||
}
|
||||
}
|
||||
|
||||
@ -252,9 +250,9 @@ class Parser implements \IteratorAggregate, PositionAware
|
||||
return $this->jsonPointer;
|
||||
}
|
||||
|
||||
private function error($msg, $exception = SyntaxError::class)
|
||||
private function error($msg, $token, $exception = SyntaxError::class)
|
||||
{
|
||||
throw new $exception($msg." '".$this->token."'", $this->lexer->getPosition());
|
||||
throw new $exception($msg." '".$token."'", $this->lexer->getPosition());
|
||||
}
|
||||
|
||||
public function getPosition()
|
||||
|
Loading…
x
Reference in New Issue
Block a user