diff --git a/src/Parser.php b/src/Parser.php index a08b329..71f5429 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -5,6 +5,10 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Decoders\ConfigurableDecoder; use Cerbero\JsonParser\Exceptions\SyntaxException; use Cerbero\JsonParser\Sources\Source; +use Cerbero\JsonParser\Tokens\CompoundBegin; +use Cerbero\JsonParser\Tokens\CompoundEnd; +use Cerbero\JsonParser\Tokens\Token; +use Generator; use IteratorAggregate; use Traversable; @@ -15,6 +19,13 @@ use Traversable; */ final class Parser implements IteratorAggregate { + /** + * The tokens to parse. + * + * @var Generator + */ + private Generator $tokens; + /** * The decoder handling potential errors. * @@ -22,14 +33,23 @@ final class Parser implements IteratorAggregate */ private ConfigurableDecoder $decoder; + /** + * Whether the parser is fast-forwarding. + * + * @var bool + */ + private bool $isFastForwarding = false; + /** * Instantiate the class. * - * @param Lexer $lexer + * @param Lexer|Generator $lexer * @param Config $config */ - public function __construct(private Lexer $lexer, private Config $config) + public function __construct(private Lexer|Generator $lexer, private Config $config) { + /** @phpstan-ignore-next-line */ + $this->tokens = $lexer instanceof Lexer ? $lexer->getIterator() : $lexer; $this->decoder = new ConfigurableDecoder($config); } @@ -51,11 +71,13 @@ final class Parser implements IteratorAggregate */ public function getIterator(): Traversable { - $state = new State($this->config->pointers); + $state = new State($this->config->pointers, fn () => new self($this->lazyLoad(), clone $this->config)); - foreach ($this->lexer as $token) { - if (!$token->matches($state->expectedToken)) { - throw new SyntaxException($token, $this->lexer->position()); + foreach ($this->tokens as $token) { + if ($this->isFastForwarding) { + continue; + } elseif (!$token->matches($state->expectedToken)) { + throw new SyntaxException($token); } $state->mutateByToken($token); @@ -70,6 +92,8 @@ final class Parser implements IteratorAggregate $value = $this->decoder->decode($state->value()); yield $key => $state->callPointer($value, $key); + + $value instanceof self && $value->fastForward(); } if ($state->canStopParsing()) { @@ -78,6 +102,46 @@ final class Parser implements IteratorAggregate } } + /** + * Retrieve the generator to lazy load the current compound + * + * @return Generator + */ + public function lazyLoad(): Generator + { + $depth = 0; + + do { + yield $token = $this->tokens->current(); + + if ($token instanceof CompoundBegin) { + $depth++; + } elseif ($token instanceof CompoundEnd) { + $depth--; + } + + $depth > 0 && $this->tokens->next(); + } while ($depth > 0); + } + + /** + * Fast-forward the parser + * + * @return void + */ + public function fastForward(): void + { + if (!$this->tokens->valid()) { + return; + } + + $this->isFastForwarding = true; + + foreach ($this as $value) { + $value instanceof self && $value->fastForward(); + } + } + /** * Retrieve the parsing progress * @@ -85,6 +149,18 @@ final class Parser implements IteratorAggregate */ public function progress(): Progress { + /** @phpstan-ignore-next-line */ return $this->lexer->progress(); } + + /** + * Retrieve the parsing position + * + * @return int + */ + public function position(): int + { + /** @phpstan-ignore-next-line */ + return $this->lexer->position(); + } } diff --git a/src/State.php b/src/State.php index e4d55e5..1b15e0d 100644 --- a/src/State.php +++ b/src/State.php @@ -3,8 +3,10 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Pointers\Pointers; +use Cerbero\JsonParser\Tokens\CompoundBegin; use Cerbero\JsonParser\Tokens\Token; use Cerbero\JsonParser\Tokens\Tokens; +use Closure; /** * The JSON parsing state. @@ -22,9 +24,9 @@ final class State /** * The JSON buffer. * - * @var string + * @var Parser|string */ - private string $buffer = ''; + private Parser|string $buffer = ''; /** * Whether an object key is expected. @@ -44,8 +46,9 @@ final class State * Instantiate the class. * * @param Pointers $pointers + * @param Closure $lazyLoad */ - public function __construct(private Pointers $pointers) + public function __construct(private Pointers $pointers, private Closure $lazyLoad) { $this->tree = new Tree($this->pointers); } @@ -100,17 +103,14 @@ final class State */ public function mutateByToken(Token $token): void { - $shouldTrackTree = $this->tree->shouldBeTracked(); - - if ($shouldTrackTree && $this->expectsKey) { - $this->tree->traverseKey($token); - } elseif ($shouldTrackTree && $token->isValue() && !$this->tree->inObject()) { - $this->tree->traverseArray(); - } + $this->tree->traverseToken($token, $this->expectsKey); if ($this->tree->isMatched() && ((!$this->expectsKey && $token->isValue()) || $this->tree->isDeep())) { - $this->buffer .= $token; - $this->pointers->markAsFound(); + $shouldLazyLoad = $token instanceof CompoundBegin && $this->pointers->matching()->isLazy(); + /** @phpstan-ignore-next-line */ + $this->buffer = $shouldLazyLoad ? ($this->lazyLoad)() : $this->buffer . $token; + /** @var CompoundBegin $token */ + $shouldLazyLoad && $token->shouldLazyLoad = true; } $token->mutateState($this); @@ -129,9 +129,9 @@ final class State /** * Retrieve the value from the buffer and reset it * - * @return string + * @return Parser|string */ - public function value(): string + public function value(): Parser|string { $buffer = $this->buffer; diff --git a/src/Tokens/CompoundBegin.php b/src/Tokens/CompoundBegin.php index 53f8e1e..5bb0579 100644 --- a/src/Tokens/CompoundBegin.php +++ b/src/Tokens/CompoundBegin.php @@ -10,6 +10,13 @@ use Cerbero\JsonParser\State; */ final class CompoundBegin extends Token { + /** + * Whether this compound should be lazy loaded. + * + * @var bool + */ + public bool $shouldLazyLoad = false; + /** * Mutate the given state * @@ -18,8 +25,38 @@ final class CompoundBegin extends Token */ public function mutateState(State $state): void { + $tree = $state->tree(); + + if ($this->shouldLazyLoad = $this->shouldLazyLoad && $tree->depth() >= 0) { + $state->expectedToken = $tree->inObject() ? Tokens::AFTER_OBJECT_VALUE : Tokens::AFTER_ARRAY_VALUE; + return; + } + $state->expectsKey = $beginsObject = $this->value == '{'; $state->expectedToken = $beginsObject ? Tokens::AFTER_OBJECT_BEGIN : Tokens::AFTER_ARRAY_BEGIN; - $state->tree()->deepen($beginsObject); + $tree->deepen($beginsObject); + } + + /** + * Set the token value + * + * @param string $value + * @return static + */ + public function setValue(string $value): static + { + $this->shouldLazyLoad = false; + + return parent::setValue($value); + } + + /** + * Determine whether this token ends a JSON chunk + * + * @return bool + */ + public function endsChunk(): bool + { + return $this->shouldLazyLoad; } }