Implement lazy loading

This commit is contained in:
Andrea Marco Sartori 2023-03-20 19:32:17 +10:00
parent db626b0a31
commit b6b965d7fc
3 changed files with 134 additions and 21 deletions

View File

@ -5,6 +5,10 @@ namespace Cerbero\JsonParser;
use Cerbero\JsonParser\Decoders\ConfigurableDecoder;
use Cerbero\JsonParser\Exceptions\SyntaxException;
use Cerbero\JsonParser\Sources\Source;
use Cerbero\JsonParser\Tokens\CompoundBegin;
use Cerbero\JsonParser\Tokens\CompoundEnd;
use Cerbero\JsonParser\Tokens\Token;
use Generator;
use IteratorAggregate;
use Traversable;
@ -15,6 +19,13 @@ use Traversable;
*/
final class Parser implements IteratorAggregate
{
/**
* The tokens to parse.
*
* @var Generator<int, Token>
*/
private Generator $tokens;
/**
* The decoder handling potential errors.
*
@ -22,14 +33,23 @@ final class Parser implements IteratorAggregate
*/
private ConfigurableDecoder $decoder;
/**
* Whether the parser is fast-forwarding.
*
* @var bool
*/
private bool $isFastForwarding = false;
/**
* Instantiate the class.
*
* @param Lexer $lexer
* @param Lexer|Generator<int, Token> $lexer
* @param Config $config
*/
public function __construct(private Lexer $lexer, private Config $config)
public function __construct(private Lexer|Generator $lexer, private Config $config)
{
/** @phpstan-ignore-next-line */
$this->tokens = $lexer instanceof Lexer ? $lexer->getIterator() : $lexer;
$this->decoder = new ConfigurableDecoder($config);
}
@ -51,11 +71,13 @@ final class Parser implements IteratorAggregate
*/
public function getIterator(): Traversable
{
$state = new State($this->config->pointers);
$state = new State($this->config->pointers, fn () => new self($this->lazyLoad(), clone $this->config));
foreach ($this->lexer as $token) {
if (!$token->matches($state->expectedToken)) {
throw new SyntaxException($token, $this->lexer->position());
foreach ($this->tokens as $token) {
if ($this->isFastForwarding) {
continue;
} elseif (!$token->matches($state->expectedToken)) {
throw new SyntaxException($token);
}
$state->mutateByToken($token);
@ -70,6 +92,8 @@ final class Parser implements IteratorAggregate
$value = $this->decoder->decode($state->value());
yield $key => $state->callPointer($value, $key);
$value instanceof self && $value->fastForward();
}
if ($state->canStopParsing()) {
@ -78,6 +102,46 @@ final class Parser implements IteratorAggregate
}
}
/**
* Retrieve the generator to lazy load the current compound
*
* @return Generator<int, Token>
*/
public function lazyLoad(): Generator
{
$depth = 0;
do {
yield $token = $this->tokens->current();
if ($token instanceof CompoundBegin) {
$depth++;
} elseif ($token instanceof CompoundEnd) {
$depth--;
}
$depth > 0 && $this->tokens->next();
} while ($depth > 0);
}
/**
* Fast-forward the parser
*
* @return void
*/
public function fastForward(): void
{
if (!$this->tokens->valid()) {
return;
}
$this->isFastForwarding = true;
foreach ($this as $value) {
$value instanceof self && $value->fastForward();
}
}
/**
* Retrieve the parsing progress
*
@ -85,6 +149,18 @@ final class Parser implements IteratorAggregate
*/
public function progress(): Progress
{
/** @phpstan-ignore-next-line */
return $this->lexer->progress();
}
/**
* Retrieve the parsing position
*
* @return int
*/
public function position(): int
{
/** @phpstan-ignore-next-line */
return $this->lexer->position();
}
}

View File

@ -3,8 +3,10 @@
namespace Cerbero\JsonParser;
use Cerbero\JsonParser\Pointers\Pointers;
use Cerbero\JsonParser\Tokens\CompoundBegin;
use Cerbero\JsonParser\Tokens\Token;
use Cerbero\JsonParser\Tokens\Tokens;
use Closure;
/**
* The JSON parsing state.
@ -22,9 +24,9 @@ final class State
/**
* The JSON buffer.
*
* @var string
* @var Parser|string
*/
private string $buffer = '';
private Parser|string $buffer = '';
/**
* Whether an object key is expected.
@ -44,8 +46,9 @@ final class State
* Instantiate the class.
*
* @param Pointers $pointers
* @param Closure $lazyLoad
*/
public function __construct(private Pointers $pointers)
public function __construct(private Pointers $pointers, private Closure $lazyLoad)
{
$this->tree = new Tree($this->pointers);
}
@ -100,17 +103,14 @@ final class State
*/
public function mutateByToken(Token $token): void
{
$shouldTrackTree = $this->tree->shouldBeTracked();
if ($shouldTrackTree && $this->expectsKey) {
$this->tree->traverseKey($token);
} elseif ($shouldTrackTree && $token->isValue() && !$this->tree->inObject()) {
$this->tree->traverseArray();
}
$this->tree->traverseToken($token, $this->expectsKey);
if ($this->tree->isMatched() && ((!$this->expectsKey && $token->isValue()) || $this->tree->isDeep())) {
$this->buffer .= $token;
$this->pointers->markAsFound();
$shouldLazyLoad = $token instanceof CompoundBegin && $this->pointers->matching()->isLazy();
/** @phpstan-ignore-next-line */
$this->buffer = $shouldLazyLoad ? ($this->lazyLoad)() : $this->buffer . $token;
/** @var CompoundBegin $token */
$shouldLazyLoad && $token->shouldLazyLoad = true;
}
$token->mutateState($this);
@ -129,9 +129,9 @@ final class State
/**
* Retrieve the value from the buffer and reset it
*
* @return string
* @return Parser|string
*/
public function value(): string
public function value(): Parser|string
{
$buffer = $this->buffer;

View File

@ -10,6 +10,13 @@ use Cerbero\JsonParser\State;
*/
final class CompoundBegin extends Token
{
/**
* Whether this compound should be lazy loaded.
*
* @var bool
*/
public bool $shouldLazyLoad = false;
/**
* Mutate the given state
*
@ -18,8 +25,38 @@ final class CompoundBegin extends Token
*/
public function mutateState(State $state): void
{
$tree = $state->tree();
if ($this->shouldLazyLoad = $this->shouldLazyLoad && $tree->depth() >= 0) {
$state->expectedToken = $tree->inObject() ? Tokens::AFTER_OBJECT_VALUE : Tokens::AFTER_ARRAY_VALUE;
return;
}
$state->expectsKey = $beginsObject = $this->value == '{';
$state->expectedToken = $beginsObject ? Tokens::AFTER_OBJECT_BEGIN : Tokens::AFTER_ARRAY_BEGIN;
$state->tree()->deepen($beginsObject);
$tree->deepen($beginsObject);
}
/**
* Set the token value
*
* @param string $value
* @return static
*/
public function setValue(string $value): static
{
$this->shouldLazyLoad = false;
return parent::setValue($value);
}
/**
* Determine whether this token ends a JSON chunk
*
* @return bool
*/
public function endsChunk(): bool
{
return $this->shouldLazyLoad;
}
}