Handle tokens while parsing

This commit is contained in:
Andrea Marco Sartori 2022-10-17 18:18:32 +02:00
parent 4c9d635add
commit 80f85ca059
4 changed files with 191 additions and 85 deletions

View File

@ -3,6 +3,7 @@
namespace Cerbero\JsonParser;
use Cerbero\JsonParser\Sources\Source;
use Cerbero\JsonParser\Tokens\Token;
use Cerbero\JsonParser\Tokens\Tokens;
use IteratorAggregate;
use Traversable;
@ -13,6 +14,13 @@ use Traversable;
*/
class Lexer implements IteratorAggregate
{
/**
* The map of token instances.
*
* @var array<int, Token>
*/
protected static array $tokensMap = [];
/**
* The buffer to yield.
*
@ -28,11 +36,11 @@ class Lexer implements IteratorAggregate
protected bool $isEscape = false;
/**
* The map of token instances.
* Whether the current character belongs to a string.
*
* @var array
* @var bool
*/
protected array $tokensMap = [];
protected bool $inString = false;
/**
* Instantiate the class.
@ -51,8 +59,12 @@ class Lexer implements IteratorAggregate
*/
protected function hydrateTokens(): void
{
foreach (Tokens::MAP as $token => $class) {
$this->tokensMap[$token] = new $class();
if (static::$tokensMap) {
return;
}
foreach (Tokens::MAP as $type => $class) {
static::$tokensMap[$type] = new $class();
}
}
@ -64,26 +76,41 @@ class Lexer implements IteratorAggregate
public function getIterator(): Traversable
{
foreach ($this->source as $chunk) {
foreach (mb_str_split($chunk) as $char) {
$this->isEscape = $char == '\\' && !$this->isEscape;
foreach (mb_str_split($chunk) as $character) {
$this->inString = $character == '"' && !$this->isEscape && !$this->inString;
$this->isEscape = $character == '\\' && !$this->isEscape;
if (isset(Tokens::BOUNDARIES[$char]) && $this->buffer != '') {
yield $this->buffer;
if (isset(Tokens::BOUNDARIES[$character]) && $this->buffer != '' && !$this->inString) {
yield $this->toToken($this->buffer);
$this->buffer = '';
if (isset(Tokens::DELIMITERS[$char])) {
yield $char;
if (isset(Tokens::DELIMITERS[$character])) {
yield $this->toToken($character);
}
} elseif (!$this->isEscape) {
$this->buffer .= $char;
$this->buffer .= $character;
}
}
}
if ($this->buffer != '') {
// @todo test whether this is ever called
yield $this->buffer;
yield $this->toToken($this->buffer);
$this->buffer = '';
}
}
/**
* Turn the given value into a token
*
* @param string $value
* @return Token
*/
protected function toToken(string $value): Token
{
$character = $value[0];
$type = Tokens::TYPES[$character];
return static::$tokensMap[$type]->setValue($value);
}
}

View File

@ -2,10 +2,9 @@
namespace Cerbero\JsonParser;
use Cerbero\JsonParser\Decoders\Decoder;
use Cerbero\JsonParser\Pointers\Pointers;
use Cerbero\JsonParser\Tokens\StateMutator;
use Cerbero\JsonParser\Tokens\Token;
use Cerbero\JsonParser\Tokens\Value;
use Generator;
use IteratorAggregate;
use Traversable;
@ -23,6 +22,13 @@ class Parser implements IteratorAggregate
*/
protected State $state;
/**
* The JSON decoder.
*
* @var Decoder
*/
protected Decoder $decoder;
/**
* The JSON pointers collection.
*
@ -39,8 +45,8 @@ class Parser implements IteratorAggregate
public function __construct(protected Lexer $lexer, protected Config $config)
{
$this->state = new State();
$this->decoder = $config->decoder;
$this->pointers = new Pointers(...$config->pointers);
$this->state->matchPointer($this->pointers);
}
/**
@ -50,13 +56,13 @@ class Parser implements IteratorAggregate
*/
public function getIterator(): Traversable
{
foreach ($this->lexer as $token) {
$this->rematchPointer();
$this->traverseToken($token);
$this->bufferToken($token);
$this->mutateState($token);
$this->state->matchPointer($this->pointers);
if (!$token->closesChunk() || $this->state->treeIsDeep()) {
foreach ($this->lexer as $token) {
$this->handleToken($token);
$this->rematchPointer();
if (!$token->endsChunk() || $this->state->treeIsDeep()) {
continue;
}
@ -72,6 +78,25 @@ class Parser implements IteratorAggregate
}
}
/**
* Handle the given token
*
* @param Token $token
* @return void
*/
public function handleToken(Token $token): void
{
$token->mutateState($this->state);
if ($token->isValue() && !$this->state->inObject() && $this->state->treeIsShallow()) {
$this->state->traverseArray();
}
if ($this->state->shouldBufferToken($token)) {
$this->state->bufferToken($token);
}
}
/**
* Set the matching JSON pointer when the tree changes
*
@ -81,46 +106,7 @@ class Parser implements IteratorAggregate
{
if ($this->state->treeChanged() && $this->pointers->count() > 1) {
$this->state->matchPointer($this->pointers);
$this->state->treeDidntChange();
}
}
/**
* Keep track of the JSON tree when traversing the given token
*
* @param Token $token
* @return void
*/
protected function traverseToken(Token $token): void
{
if (!$this->state->inObject() && $token instanceof Value && $this->state->treeIsShallow()) {
$this->state->traverseArray();
}
}
/**
* Preserve the given token in the buffer
*
* @param Token $token
* @return void
*/
protected function bufferToken(Token $token): void
{
if ($this->state->shouldBufferToken($token)) {
$this->state->bufferToken($token);
}
}
/**
* Preserve the given token in the buffer
*
* @param Token $token
* @return void
*/
protected function mutateState(Token $token): void
{
if ($token instanceof StateMutator) {
$token->mutateState($this->state);
$this->state->treeDidNotChange();
}
}
@ -131,7 +117,7 @@ class Parser implements IteratorAggregate
*/
protected function yieldDecodedBuffer(): Generator
{
$decoded = $this->config->decoder->decode($this->state->pullBuffer());
$decoded = $this->decoder->decode($this->state->pullBuffer());
if (!$decoded->succeeded) {
call_user_func($this->config->onError, $decoded);

View File

@ -4,9 +4,7 @@ namespace Cerbero\JsonParser;
use Cerbero\JsonParser\Pointers\Pointer;
use Cerbero\JsonParser\Pointers\Pointers;
use Cerbero\JsonParser\Tokens\Scalar;
use Cerbero\JsonParser\Tokens\Token;
use Cerbero\JsonParser\Tokens\Value;
/**
* The JSON parsing state.
@ -43,7 +41,7 @@ class State
protected string $buffer = '';
/**
* Whether the token should be an object key.
* Whether an object key is expected.
*
* @var bool
*/
@ -120,7 +118,7 @@ class State
*
* @return static
*/
public function treeDidntChange(): static
public function treeDidNotChange(): static
{
$this->treeChanged = false;
@ -170,6 +168,20 @@ class State
return $this->pointer->matchesTree($this->tree);
}
/**
* Traverse the JSON tree through the given key
*
* @param string $key
* @return static
*/
public function traverseTree(string $key): static
{
$this->tree->traverse($key);
$this->treeChanged = true;
return $this;
}
/**
* Traverse a JSON array
*
@ -211,8 +223,8 @@ class State
*/
protected function expectsToken(Token $token): bool
{
return ($this->tree->depth() == $this->pointer->depth() && $token instanceof Value)
|| ($this->tree->depth() + 1 == $this->pointer->depth() && $token instanceof Scalar);
return ($this->tree->depth() == $this->pointer->depth() && $token->isValue())
|| ($this->tree->depth() + 1 == $this->pointer->depth() && $token->isScalar());
}
/**
@ -242,6 +254,53 @@ class State
return $buffer;
}
/**
* Determine whether an object key is expected
*
* @return bool
*/
public function expectsKey(): bool
{
return $this->expectsKey;
}
/**
* Expect an object key
*
* @return static
*/
public function expectKey(): static
{
$this->expectsKey = true;
return $this;
}
/**
* Do not expect any object key
*
* @return static
*/
public function doNotExpectKey(): static
{
$this->expectsKey = false;
return $this;
}
/**
* Set whether the currently parsed node is an object
*
* @param bool $inObject
* @return static
*/
public function setInObject(bool $inObject): static
{
$this->inObject = $inObject;
return $this;
}
/**
* Determine whether the currently parsed node is an object
*
@ -249,6 +308,6 @@ class State
*/
public function inObject(): bool
{
return $this->inObject;
return $this->tree->inObject();
}
}

View File

@ -33,21 +33,6 @@ class Tree implements IteratorAggregate
*/
protected int $depth = -1;
/**
* Traverse an array
*
* @param Pointer $pointer
* @return void
*/
public function traverseArray(Pointer $pointer): void
{
$this->original[$this->depth] = isset($this->original[$this->depth]) ? $this->original[$this->depth] + 1 : 0;
array_splice($this->original, $this->depth + 1);
$this->wildcarded[$this->depth] = $pointer[$this->depth] == '-' ? '-' : $this->original[$this->depth];
array_splice($this->wildcarded, $this->depth + 1);
}
/**
* Retrieve the original JSON tree
*
@ -98,6 +83,55 @@ class Tree implements IteratorAggregate
$this->depth--;
}
/**
* Determine whether the tree is traversing an object
*
* @return bool
*/
public function inObject(): bool
{
return is_string($this->original[$this->depth]);
}
/**
* Traverse the given key
*
* @param string $key
* @return void
*/
public function traverse(string $key): void
{
$this->original[$this->depth] = $key;
$this->wildcarded[$this->depth] = $key;
$this->trim();
}
/**
* Trim the tree after the latest traversed key
*
* @return void
*/
protected function trim(): void
{
array_splice($this->original, $this->depth + 1);
array_splice($this->wildcarded, $this->depth + 1);
}
/**
* Traverse an array
*
* @param Pointer $pointer
* @return void
*/
public function traverseArray(Pointer $pointer): void
{
$this->original[$this->depth] = isset($this->original[$this->depth]) ? $this->original[$this->depth] + 1 : 0;
$this->wildcarded[$this->depth] = $pointer[$this->depth] == '-' ? '-' : $this->original[$this->depth];
$this->trim();
}
/**
* Retrieve the original tree iterator
*