Handle tokens while parsing

This commit is contained in:
Andrea Marco Sartori
2022-10-17 18:18:32 +02:00
parent 4c9d635add
commit 80f85ca059
4 changed files with 191 additions and 85 deletions

View File

@ -3,6 +3,7 @@
namespace Cerbero\JsonParser; namespace Cerbero\JsonParser;
use Cerbero\JsonParser\Sources\Source; use Cerbero\JsonParser\Sources\Source;
use Cerbero\JsonParser\Tokens\Token;
use Cerbero\JsonParser\Tokens\Tokens; use Cerbero\JsonParser\Tokens\Tokens;
use IteratorAggregate; use IteratorAggregate;
use Traversable; use Traversable;
@ -13,6 +14,13 @@ use Traversable;
*/ */
class Lexer implements IteratorAggregate class Lexer implements IteratorAggregate
{ {
/**
* The map of token instances.
*
* @var array<int, Token>
*/
protected static array $tokensMap = [];
/** /**
* The buffer to yield. * The buffer to yield.
* *
@ -28,11 +36,11 @@ class Lexer implements IteratorAggregate
protected bool $isEscape = false; protected bool $isEscape = false;
/** /**
* The map of token instances. * Whether the current character belongs to a string.
* *
* @var array * @var bool
*/ */
protected array $tokensMap = []; protected bool $inString = false;
/** /**
* Instantiate the class. * Instantiate the class.
@ -51,8 +59,12 @@ class Lexer implements IteratorAggregate
*/ */
protected function hydrateTokens(): void protected function hydrateTokens(): void
{ {
foreach (Tokens::MAP as $token => $class) { if (static::$tokensMap) {
$this->tokensMap[$token] = new $class(); return;
}
foreach (Tokens::MAP as $type => $class) {
static::$tokensMap[$type] = new $class();
} }
} }
@ -64,26 +76,41 @@ class Lexer implements IteratorAggregate
public function getIterator(): Traversable public function getIterator(): Traversable
{ {
foreach ($this->source as $chunk) { foreach ($this->source as $chunk) {
foreach (mb_str_split($chunk) as $char) { foreach (mb_str_split($chunk) as $character) {
$this->isEscape = $char == '\\' && !$this->isEscape; $this->inString = $character == '"' && !$this->isEscape && !$this->inString;
$this->isEscape = $character == '\\' && !$this->isEscape;
if (isset(Tokens::BOUNDARIES[$char]) && $this->buffer != '') { if (isset(Tokens::BOUNDARIES[$character]) && $this->buffer != '' && !$this->inString) {
yield $this->buffer; yield $this->toToken($this->buffer);
$this->buffer = ''; $this->buffer = '';
if (isset(Tokens::DELIMITERS[$char])) { if (isset(Tokens::DELIMITERS[$character])) {
yield $char; yield $this->toToken($character);
} }
} elseif (!$this->isEscape) { } elseif (!$this->isEscape) {
$this->buffer .= $char; $this->buffer .= $character;
} }
} }
} }
if ($this->buffer != '') { if ($this->buffer != '') {
// @todo test whether this is ever called // @todo test whether this is ever called
yield $this->buffer; yield $this->toToken($this->buffer);
$this->buffer = ''; $this->buffer = '';
} }
} }
/**
* Turn the given value into a token
*
* @param string $value
* @return Token
*/
protected function toToken(string $value): Token
{
$character = $value[0];
$type = Tokens::TYPES[$character];
return static::$tokensMap[$type]->setValue($value);
}
} }

View File

@ -2,10 +2,9 @@
namespace Cerbero\JsonParser; namespace Cerbero\JsonParser;
use Cerbero\JsonParser\Decoders\Decoder;
use Cerbero\JsonParser\Pointers\Pointers; use Cerbero\JsonParser\Pointers\Pointers;
use Cerbero\JsonParser\Tokens\StateMutator;
use Cerbero\JsonParser\Tokens\Token; use Cerbero\JsonParser\Tokens\Token;
use Cerbero\JsonParser\Tokens\Value;
use Generator; use Generator;
use IteratorAggregate; use IteratorAggregate;
use Traversable; use Traversable;
@ -23,6 +22,13 @@ class Parser implements IteratorAggregate
*/ */
protected State $state; protected State $state;
/**
* The JSON decoder.
*
* @var Decoder
*/
protected Decoder $decoder;
/** /**
* The JSON pointers collection. * The JSON pointers collection.
* *
@ -39,8 +45,8 @@ class Parser implements IteratorAggregate
public function __construct(protected Lexer $lexer, protected Config $config) public function __construct(protected Lexer $lexer, protected Config $config)
{ {
$this->state = new State(); $this->state = new State();
$this->decoder = $config->decoder;
$this->pointers = new Pointers(...$config->pointers); $this->pointers = new Pointers(...$config->pointers);
$this->state->matchPointer($this->pointers);
} }
/** /**
@ -50,13 +56,13 @@ class Parser implements IteratorAggregate
*/ */
public function getIterator(): Traversable public function getIterator(): Traversable
{ {
foreach ($this->lexer as $token) { $this->state->matchPointer($this->pointers);
$this->rematchPointer();
$this->traverseToken($token);
$this->bufferToken($token);
$this->mutateState($token);
if (!$token->closesChunk() || $this->state->treeIsDeep()) { foreach ($this->lexer as $token) {
$this->handleToken($token);
$this->rematchPointer();
if (!$token->endsChunk() || $this->state->treeIsDeep()) {
continue; continue;
} }
@ -72,6 +78,25 @@ class Parser implements IteratorAggregate
} }
} }
/**
* Handle the given token
*
* @param Token $token
* @return void
*/
public function handleToken(Token $token): void
{
$token->mutateState($this->state);
if ($token->isValue() && !$this->state->inObject() && $this->state->treeIsShallow()) {
$this->state->traverseArray();
}
if ($this->state->shouldBufferToken($token)) {
$this->state->bufferToken($token);
}
}
/** /**
* Set the matching JSON pointer when the tree changes * Set the matching JSON pointer when the tree changes
* *
@ -81,46 +106,7 @@ class Parser implements IteratorAggregate
{ {
if ($this->state->treeChanged() && $this->pointers->count() > 1) { if ($this->state->treeChanged() && $this->pointers->count() > 1) {
$this->state->matchPointer($this->pointers); $this->state->matchPointer($this->pointers);
$this->state->treeDidntChange(); $this->state->treeDidNotChange();
}
}
/**
* Keep track of the JSON tree when traversing the given token
*
* @param Token $token
* @return void
*/
protected function traverseToken(Token $token): void
{
if (!$this->state->inObject() && $token instanceof Value && $this->state->treeIsShallow()) {
$this->state->traverseArray();
}
}
/**
* Preserve the given token in the buffer
*
* @param Token $token
* @return void
*/
protected function bufferToken(Token $token): void
{
if ($this->state->shouldBufferToken($token)) {
$this->state->bufferToken($token);
}
}
/**
* Preserve the given token in the buffer
*
* @param Token $token
* @return void
*/
protected function mutateState(Token $token): void
{
if ($token instanceof StateMutator) {
$token->mutateState($this->state);
} }
} }
@ -131,7 +117,7 @@ class Parser implements IteratorAggregate
*/ */
protected function yieldDecodedBuffer(): Generator protected function yieldDecodedBuffer(): Generator
{ {
$decoded = $this->config->decoder->decode($this->state->pullBuffer()); $decoded = $this->decoder->decode($this->state->pullBuffer());
if (!$decoded->succeeded) { if (!$decoded->succeeded) {
call_user_func($this->config->onError, $decoded); call_user_func($this->config->onError, $decoded);

View File

@ -4,9 +4,7 @@ namespace Cerbero\JsonParser;
use Cerbero\JsonParser\Pointers\Pointer; use Cerbero\JsonParser\Pointers\Pointer;
use Cerbero\JsonParser\Pointers\Pointers; use Cerbero\JsonParser\Pointers\Pointers;
use Cerbero\JsonParser\Tokens\Scalar;
use Cerbero\JsonParser\Tokens\Token; use Cerbero\JsonParser\Tokens\Token;
use Cerbero\JsonParser\Tokens\Value;
/** /**
* The JSON parsing state. * The JSON parsing state.
@ -43,7 +41,7 @@ class State
protected string $buffer = ''; protected string $buffer = '';
/** /**
* Whether the token should be an object key. * Whether an object key is expected.
* *
* @var bool * @var bool
*/ */
@ -120,7 +118,7 @@ class State
* *
* @return static * @return static
*/ */
public function treeDidntChange(): static public function treeDidNotChange(): static
{ {
$this->treeChanged = false; $this->treeChanged = false;
@ -170,6 +168,20 @@ class State
return $this->pointer->matchesTree($this->tree); return $this->pointer->matchesTree($this->tree);
} }
/**
* Traverse the JSON tree through the given key
*
* @param string $key
* @return static
*/
public function traverseTree(string $key): static
{
$this->tree->traverse($key);
$this->treeChanged = true;
return $this;
}
/** /**
* Traverse a JSON array * Traverse a JSON array
* *
@ -211,8 +223,8 @@ class State
*/ */
protected function expectsToken(Token $token): bool protected function expectsToken(Token $token): bool
{ {
return ($this->tree->depth() == $this->pointer->depth() && $token instanceof Value) return ($this->tree->depth() == $this->pointer->depth() && $token->isValue())
|| ($this->tree->depth() + 1 == $this->pointer->depth() && $token instanceof Scalar); || ($this->tree->depth() + 1 == $this->pointer->depth() && $token->isScalar());
} }
/** /**
@ -242,6 +254,53 @@ class State
return $buffer; return $buffer;
} }
/**
* Determine whether an object key is expected
*
* @return bool
*/
public function expectsKey(): bool
{
return $this->expectsKey;
}
/**
* Expect an object key
*
* @return static
*/
public function expectKey(): static
{
$this->expectsKey = true;
return $this;
}
/**
* Do not expect any object key
*
* @return static
*/
public function doNotExpectKey(): static
{
$this->expectsKey = false;
return $this;
}
/**
* Set whether the currently parsed node is an object
*
* @param bool $inObject
* @return static
*/
public function setInObject(bool $inObject): static
{
$this->inObject = $inObject;
return $this;
}
/** /**
* Determine whether the currently parsed node is an object * Determine whether the currently parsed node is an object
* *
@ -249,6 +308,6 @@ class State
*/ */
public function inObject(): bool public function inObject(): bool
{ {
return $this->inObject; return $this->tree->inObject();
} }
} }

View File

@ -33,21 +33,6 @@ class Tree implements IteratorAggregate
*/ */
protected int $depth = -1; protected int $depth = -1;
/**
* Traverse an array
*
* @param Pointer $pointer
* @return void
*/
public function traverseArray(Pointer $pointer): void
{
$this->original[$this->depth] = isset($this->original[$this->depth]) ? $this->original[$this->depth] + 1 : 0;
array_splice($this->original, $this->depth + 1);
$this->wildcarded[$this->depth] = $pointer[$this->depth] == '-' ? '-' : $this->original[$this->depth];
array_splice($this->wildcarded, $this->depth + 1);
}
/** /**
* Retrieve the original JSON tree * Retrieve the original JSON tree
* *
@ -98,6 +83,55 @@ class Tree implements IteratorAggregate
$this->depth--; $this->depth--;
} }
/**
* Determine whether the tree is traversing an object
*
* @return bool
*/
public function inObject(): bool
{
return is_string($this->original[$this->depth]);
}
/**
* Traverse the given key
*
* @param string $key
* @return void
*/
public function traverse(string $key): void
{
$this->original[$this->depth] = $key;
$this->wildcarded[$this->depth] = $key;
$this->trim();
}
/**
* Trim the tree after the latest traversed key
*
* @return void
*/
protected function trim(): void
{
array_splice($this->original, $this->depth + 1);
array_splice($this->wildcarded, $this->depth + 1);
}
/**
* Traverse an array
*
* @param Pointer $pointer
* @return void
*/
public function traverseArray(Pointer $pointer): void
{
$this->original[$this->depth] = isset($this->original[$this->depth]) ? $this->original[$this->depth] + 1 : 0;
$this->wildcarded[$this->depth] = $pointer[$this->depth] == '-' ? '-' : $this->original[$this->depth];
$this->trim();
}
/** /**
* Retrieve the original tree iterator * Retrieve the original tree iterator
* *