mirror of
https://github.com/cerbero90/json-parser.git
synced 2025-04-15 08:15:10 +02:00
Handle tokens while parsing
This commit is contained in:
parent
4c9d635add
commit
80f85ca059
@ -3,6 +3,7 @@
|
||||
namespace Cerbero\JsonParser;
|
||||
|
||||
use Cerbero\JsonParser\Sources\Source;
|
||||
use Cerbero\JsonParser\Tokens\Token;
|
||||
use Cerbero\JsonParser\Tokens\Tokens;
|
||||
use IteratorAggregate;
|
||||
use Traversable;
|
||||
@ -13,6 +14,13 @@ use Traversable;
|
||||
*/
|
||||
class Lexer implements IteratorAggregate
|
||||
{
|
||||
/**
|
||||
* The map of token instances.
|
||||
*
|
||||
* @var array<int, Token>
|
||||
*/
|
||||
protected static array $tokensMap = [];
|
||||
|
||||
/**
|
||||
* The buffer to yield.
|
||||
*
|
||||
@ -28,11 +36,11 @@ class Lexer implements IteratorAggregate
|
||||
protected bool $isEscape = false;
|
||||
|
||||
/**
|
||||
* The map of token instances.
|
||||
* Whether the current character belongs to a string.
|
||||
*
|
||||
* @var array
|
||||
* @var bool
|
||||
*/
|
||||
protected array $tokensMap = [];
|
||||
protected bool $inString = false;
|
||||
|
||||
/**
|
||||
* Instantiate the class.
|
||||
@ -51,8 +59,12 @@ class Lexer implements IteratorAggregate
|
||||
*/
|
||||
protected function hydrateTokens(): void
|
||||
{
|
||||
foreach (Tokens::MAP as $token => $class) {
|
||||
$this->tokensMap[$token] = new $class();
|
||||
if (static::$tokensMap) {
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (Tokens::MAP as $type => $class) {
|
||||
static::$tokensMap[$type] = new $class();
|
||||
}
|
||||
}
|
||||
|
||||
@ -64,26 +76,41 @@ class Lexer implements IteratorAggregate
|
||||
public function getIterator(): Traversable
|
||||
{
|
||||
foreach ($this->source as $chunk) {
|
||||
foreach (mb_str_split($chunk) as $char) {
|
||||
$this->isEscape = $char == '\\' && !$this->isEscape;
|
||||
foreach (mb_str_split($chunk) as $character) {
|
||||
$this->inString = $character == '"' && !$this->isEscape && !$this->inString;
|
||||
$this->isEscape = $character == '\\' && !$this->isEscape;
|
||||
|
||||
if (isset(Tokens::BOUNDARIES[$char]) && $this->buffer != '') {
|
||||
yield $this->buffer;
|
||||
if (isset(Tokens::BOUNDARIES[$character]) && $this->buffer != '' && !$this->inString) {
|
||||
yield $this->toToken($this->buffer);
|
||||
$this->buffer = '';
|
||||
|
||||
if (isset(Tokens::DELIMITERS[$char])) {
|
||||
yield $char;
|
||||
if (isset(Tokens::DELIMITERS[$character])) {
|
||||
yield $this->toToken($character);
|
||||
}
|
||||
} elseif (!$this->isEscape) {
|
||||
$this->buffer .= $char;
|
||||
$this->buffer .= $character;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($this->buffer != '') {
|
||||
// @todo test whether this is ever called
|
||||
yield $this->buffer;
|
||||
yield $this->toToken($this->buffer);
|
||||
$this->buffer = '';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Turn the given value into a token
|
||||
*
|
||||
* @param string $value
|
||||
* @return Token
|
||||
*/
|
||||
protected function toToken(string $value): Token
|
||||
{
|
||||
$character = $value[0];
|
||||
$type = Tokens::TYPES[$character];
|
||||
|
||||
return static::$tokensMap[$type]->setValue($value);
|
||||
}
|
||||
}
|
||||
|
@ -2,10 +2,9 @@
|
||||
|
||||
namespace Cerbero\JsonParser;
|
||||
|
||||
use Cerbero\JsonParser\Decoders\Decoder;
|
||||
use Cerbero\JsonParser\Pointers\Pointers;
|
||||
use Cerbero\JsonParser\Tokens\StateMutator;
|
||||
use Cerbero\JsonParser\Tokens\Token;
|
||||
use Cerbero\JsonParser\Tokens\Value;
|
||||
use Generator;
|
||||
use IteratorAggregate;
|
||||
use Traversable;
|
||||
@ -23,6 +22,13 @@ class Parser implements IteratorAggregate
|
||||
*/
|
||||
protected State $state;
|
||||
|
||||
/**
|
||||
* The JSON decoder.
|
||||
*
|
||||
* @var Decoder
|
||||
*/
|
||||
protected Decoder $decoder;
|
||||
|
||||
/**
|
||||
* The JSON pointers collection.
|
||||
*
|
||||
@ -39,8 +45,8 @@ class Parser implements IteratorAggregate
|
||||
public function __construct(protected Lexer $lexer, protected Config $config)
|
||||
{
|
||||
$this->state = new State();
|
||||
$this->decoder = $config->decoder;
|
||||
$this->pointers = new Pointers(...$config->pointers);
|
||||
$this->state->matchPointer($this->pointers);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -50,13 +56,13 @@ class Parser implements IteratorAggregate
|
||||
*/
|
||||
public function getIterator(): Traversable
|
||||
{
|
||||
foreach ($this->lexer as $token) {
|
||||
$this->rematchPointer();
|
||||
$this->traverseToken($token);
|
||||
$this->bufferToken($token);
|
||||
$this->mutateState($token);
|
||||
$this->state->matchPointer($this->pointers);
|
||||
|
||||
if (!$token->closesChunk() || $this->state->treeIsDeep()) {
|
||||
foreach ($this->lexer as $token) {
|
||||
$this->handleToken($token);
|
||||
$this->rematchPointer();
|
||||
|
||||
if (!$token->endsChunk() || $this->state->treeIsDeep()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -72,6 +78,25 @@ class Parser implements IteratorAggregate
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle the given token
|
||||
*
|
||||
* @param Token $token
|
||||
* @return void
|
||||
*/
|
||||
public function handleToken(Token $token): void
|
||||
{
|
||||
$token->mutateState($this->state);
|
||||
|
||||
if ($token->isValue() && !$this->state->inObject() && $this->state->treeIsShallow()) {
|
||||
$this->state->traverseArray();
|
||||
}
|
||||
|
||||
if ($this->state->shouldBufferToken($token)) {
|
||||
$this->state->bufferToken($token);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the matching JSON pointer when the tree changes
|
||||
*
|
||||
@ -81,46 +106,7 @@ class Parser implements IteratorAggregate
|
||||
{
|
||||
if ($this->state->treeChanged() && $this->pointers->count() > 1) {
|
||||
$this->state->matchPointer($this->pointers);
|
||||
$this->state->treeDidntChange();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Keep track of the JSON tree when traversing the given token
|
||||
*
|
||||
* @param Token $token
|
||||
* @return void
|
||||
*/
|
||||
protected function traverseToken(Token $token): void
|
||||
{
|
||||
if (!$this->state->inObject() && $token instanceof Value && $this->state->treeIsShallow()) {
|
||||
$this->state->traverseArray();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Preserve the given token in the buffer
|
||||
*
|
||||
* @param Token $token
|
||||
* @return void
|
||||
*/
|
||||
protected function bufferToken(Token $token): void
|
||||
{
|
||||
if ($this->state->shouldBufferToken($token)) {
|
||||
$this->state->bufferToken($token);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Preserve the given token in the buffer
|
||||
*
|
||||
* @param Token $token
|
||||
* @return void
|
||||
*/
|
||||
protected function mutateState(Token $token): void
|
||||
{
|
||||
if ($token instanceof StateMutator) {
|
||||
$token->mutateState($this->state);
|
||||
$this->state->treeDidNotChange();
|
||||
}
|
||||
}
|
||||
|
||||
@ -131,7 +117,7 @@ class Parser implements IteratorAggregate
|
||||
*/
|
||||
protected function yieldDecodedBuffer(): Generator
|
||||
{
|
||||
$decoded = $this->config->decoder->decode($this->state->pullBuffer());
|
||||
$decoded = $this->decoder->decode($this->state->pullBuffer());
|
||||
|
||||
if (!$decoded->succeeded) {
|
||||
call_user_func($this->config->onError, $decoded);
|
||||
|
@ -4,9 +4,7 @@ namespace Cerbero\JsonParser;
|
||||
|
||||
use Cerbero\JsonParser\Pointers\Pointer;
|
||||
use Cerbero\JsonParser\Pointers\Pointers;
|
||||
use Cerbero\JsonParser\Tokens\Scalar;
|
||||
use Cerbero\JsonParser\Tokens\Token;
|
||||
use Cerbero\JsonParser\Tokens\Value;
|
||||
|
||||
/**
|
||||
* The JSON parsing state.
|
||||
@ -43,7 +41,7 @@ class State
|
||||
protected string $buffer = '';
|
||||
|
||||
/**
|
||||
* Whether the token should be an object key.
|
||||
* Whether an object key is expected.
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
@ -120,7 +118,7 @@ class State
|
||||
*
|
||||
* @return static
|
||||
*/
|
||||
public function treeDidntChange(): static
|
||||
public function treeDidNotChange(): static
|
||||
{
|
||||
$this->treeChanged = false;
|
||||
|
||||
@ -170,6 +168,20 @@ class State
|
||||
return $this->pointer->matchesTree($this->tree);
|
||||
}
|
||||
|
||||
/**
|
||||
* Traverse the JSON tree through the given key
|
||||
*
|
||||
* @param string $key
|
||||
* @return static
|
||||
*/
|
||||
public function traverseTree(string $key): static
|
||||
{
|
||||
$this->tree->traverse($key);
|
||||
$this->treeChanged = true;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Traverse a JSON array
|
||||
*
|
||||
@ -211,8 +223,8 @@ class State
|
||||
*/
|
||||
protected function expectsToken(Token $token): bool
|
||||
{
|
||||
return ($this->tree->depth() == $this->pointer->depth() && $token instanceof Value)
|
||||
|| ($this->tree->depth() + 1 == $this->pointer->depth() && $token instanceof Scalar);
|
||||
return ($this->tree->depth() == $this->pointer->depth() && $token->isValue())
|
||||
|| ($this->tree->depth() + 1 == $this->pointer->depth() && $token->isScalar());
|
||||
}
|
||||
|
||||
/**
|
||||
@ -242,6 +254,53 @@ class State
|
||||
return $buffer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine whether an object key is expected
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function expectsKey(): bool
|
||||
{
|
||||
return $this->expectsKey;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expect an object key
|
||||
*
|
||||
* @return static
|
||||
*/
|
||||
public function expectKey(): static
|
||||
{
|
||||
$this->expectsKey = true;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Do not expect any object key
|
||||
*
|
||||
* @return static
|
||||
*/
|
||||
public function doNotExpectKey(): static
|
||||
{
|
||||
$this->expectsKey = false;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set whether the currently parsed node is an object
|
||||
*
|
||||
* @param bool $inObject
|
||||
* @return static
|
||||
*/
|
||||
public function setInObject(bool $inObject): static
|
||||
{
|
||||
$this->inObject = $inObject;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine whether the currently parsed node is an object
|
||||
*
|
||||
@ -249,6 +308,6 @@ class State
|
||||
*/
|
||||
public function inObject(): bool
|
||||
{
|
||||
return $this->inObject;
|
||||
return $this->tree->inObject();
|
||||
}
|
||||
}
|
||||
|
64
src/Tree.php
64
src/Tree.php
@ -33,21 +33,6 @@ class Tree implements IteratorAggregate
|
||||
*/
|
||||
protected int $depth = -1;
|
||||
|
||||
/**
|
||||
* Traverse an array
|
||||
*
|
||||
* @param Pointer $pointer
|
||||
* @return void
|
||||
*/
|
||||
public function traverseArray(Pointer $pointer): void
|
||||
{
|
||||
$this->original[$this->depth] = isset($this->original[$this->depth]) ? $this->original[$this->depth] + 1 : 0;
|
||||
array_splice($this->original, $this->depth + 1);
|
||||
|
||||
$this->wildcarded[$this->depth] = $pointer[$this->depth] == '-' ? '-' : $this->original[$this->depth];
|
||||
array_splice($this->wildcarded, $this->depth + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the original JSON tree
|
||||
*
|
||||
@ -98,6 +83,55 @@ class Tree implements IteratorAggregate
|
||||
$this->depth--;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine whether the tree is traversing an object
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function inObject(): bool
|
||||
{
|
||||
return is_string($this->original[$this->depth]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Traverse the given key
|
||||
*
|
||||
* @param string $key
|
||||
* @return void
|
||||
*/
|
||||
public function traverse(string $key): void
|
||||
{
|
||||
$this->original[$this->depth] = $key;
|
||||
$this->wildcarded[$this->depth] = $key;
|
||||
|
||||
$this->trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Trim the tree after the latest traversed key
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
protected function trim(): void
|
||||
{
|
||||
array_splice($this->original, $this->depth + 1);
|
||||
array_splice($this->wildcarded, $this->depth + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Traverse an array
|
||||
*
|
||||
* @param Pointer $pointer
|
||||
* @return void
|
||||
*/
|
||||
public function traverseArray(Pointer $pointer): void
|
||||
{
|
||||
$this->original[$this->depth] = isset($this->original[$this->depth]) ? $this->original[$this->depth] + 1 : 0;
|
||||
$this->wildcarded[$this->depth] = $pointer[$this->depth] == '-' ? '-' : $this->original[$this->depth];
|
||||
|
||||
$this->trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve the original tree iterator
|
||||
*
|
||||
|
Loading…
x
Reference in New Issue
Block a user