From 1ae562cdc110062996b12ddca685b3619bbfc5e1 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 17 Sep 2022 05:02:40 +0200 Subject: [PATCH 001/249] First draft --- helpers.php | 16 +++ src/Config.php | 52 ++++++++ src/Decoders/ArrayDecoder.php | 36 ++++++ src/Decoders/DecodedValue.php | 50 ++++++++ src/Decoders/Decoder.php | 18 +++ src/Decoders/ObjectDecoder.php | 17 +++ src/JsonParser.php | 124 ++++++++++++++++++++ src/Lexer.php | 106 +++++++++++++++++ src/Parser.php | 85 ++++++++++++++ src/Pointer.php | 31 +++++ src/Providers/JsonParserServiceProvider.php | 32 ----- src/Sources/Source.php | 32 +++++ tests/JsonParserTest.php | 26 ---- 13 files changed, 567 insertions(+), 58 deletions(-) create mode 100644 helpers.php create mode 100644 src/Config.php create mode 100644 src/Decoders/ArrayDecoder.php create mode 100644 src/Decoders/DecodedValue.php create mode 100644 src/Decoders/Decoder.php create mode 100644 src/Decoders/ObjectDecoder.php create mode 100644 src/JsonParser.php create mode 100644 src/Lexer.php create mode 100644 src/Parser.php create mode 100644 src/Pointer.php delete mode 100644 src/Providers/JsonParserServiceProvider.php create mode 100644 src/Sources/Source.php delete mode 100644 tests/JsonParserTest.php diff --git a/helpers.php b/helpers.php new file mode 100644 index 0000000..69caa95 --- /dev/null +++ b/helpers.php @@ -0,0 +1,16 @@ +decoder = new ArrayDecoder(); + $this->onError = fn () => true; + } +} diff --git a/src/Decoders/ArrayDecoder.php b/src/Decoders/ArrayDecoder.php new file mode 100644 index 0000000..bf20c56 --- /dev/null +++ b/src/Decoders/ArrayDecoder.php @@ -0,0 +1,36 @@ +decodesToArray, flags: JSON_THROW_ON_ERROR); + } catch (JsonException $e) { + return DecodedValue::failed($e, $json); + } + + return DecodedValue::succeeded($value); + } +} diff --git a/src/Decoders/DecodedValue.php b/src/Decoders/DecodedValue.php new file mode 100644 index 0000000..48218e3 --- /dev/null +++ b/src/Decoders/DecodedValue.php @@ -0,0 +1,50 @@ +getMessage(), $e->getCode(), $e, $json); + } +} diff --git a/src/Decoders/Decoder.php b/src/Decoders/Decoder.php new file mode 100644 index 0000000..278123f --- /dev/null +++ b/src/Decoders/Decoder.php @@ -0,0 +1,18 @@ +config = new Config(); + $this->parser = new Parser(new Lexer(new Source($source)), $this->config); + } + + /** + * Statically instantiate the class + * + * @param mixed $source + * @return static + */ + public static function parse(mixed $source): static + { + return new static($source); + } + + /** + * Set the JSON decoder to turn a JSON into objects + * + * @return static + */ + public function toObjects(): static + { + return $this->decoder(new ObjectDecoder()); + } + + /** + * Set the JSON decoder + * + * @param Decoder $decoder + * @return static + */ + public function decoder(Decoder $decoder): static + { + $this->config->decoder = $decoder; + + return $this; + } + + /** + * Set the JSON pointers + * + * @param string ...$pointers + * @return static + */ + public function pointer(string ...$pointers): static + { + $this->config->pointers = array_map(fn (string $pointer) => new Pointer($pointer), $pointers); + + return $this; + } + + /** + * The number of bytes to read in each chunk + * + * @param int $bytes + * @return static + */ + public function bytes(int $bytes): static + { + $this->config->bytes = $bytes; + + return $this; + } + + /** + * Set the logic to run during parsing errors + * + * @param callable $callback + * @return static + */ + public function onError(callable $callback): static + { + $this->config->onError = $callback; + + return $this; + } + + /** + * Retrieve the lazily iterable JSON + * + * @return Traversable + */ + public function getIterator(): Traversable + { + return $this->parser; + } +} diff --git a/src/Lexer.php b/src/Lexer.php new file mode 100644 index 0000000..70710f7 --- /dev/null +++ b/src/Lexer.php @@ -0,0 +1,106 @@ + true, + "\xBB" => true, + "\xBF" => true, + "\n" => true, + "\r" => true, + "\t" => true, + ' ' => true, + '{' => true, + '}' => true, + '[' => true, + ']' => true, + ':' => true, + ',' => true, + ]; + + /** + * The JSON structural boundaries. + * + * @var array + */ + protected const STRUCTURES = [ + '{' => true, + '}' => true, + '[' => true, + ']' => true, + ':' => true, + ',' => true, + ]; + + /** + * The buffer to yield. + * + * @var string + */ + protected string $buffer = ''; + + /** + * Whether the current character is an escape. + * + * @var bool + */ + // protected bool $isEscape = false; + + /** + * Instantiate the class. + * + * @param Source $source + */ + public function __construct(protected Source $source) + { + } + + /** + * Retrieve the JSON fragments + * + * @return Traversable + */ + public function getIterator(): Traversable + { + foreach ($this->source as $chunk) { + foreach (mb_str_split($chunk) as $char) { + // $this->isEscape = $char == '\\' && !$this->isEscape; + + if (isset(static::BOUNDARIES[$char]) && $this->buffer != '') { + if (isset(static::STRUCTURES[$char])) { + $this->buffer .= $char; + } + + yield $this->buffer; + $this->buffer = ''; + continue; + } + + // if (!$this->isEscape) { + $this->buffer .= $char; + // } + } + } + + if ($this->buffer != '') { + // @todo test whether this is ever called + yield $this->buffer; + $this->buffer = ''; + } + } +} diff --git a/src/Parser.php b/src/Parser.php new file mode 100644 index 0000000..6b21339 --- /dev/null +++ b/src/Parser.php @@ -0,0 +1,85 @@ + self::SCALAR_CONST, + 't' => self::SCALAR_CONST, + 'f' => self::SCALAR_CONST, + '-' => self::SCALAR_CONST, + '0' => self::SCALAR_CONST, + '1' => self::SCALAR_CONST, + '2' => self::SCALAR_CONST, + '3' => self::SCALAR_CONST, + '4' => self::SCALAR_CONST, + '5' => self::SCALAR_CONST, + '6' => self::SCALAR_CONST, + '7' => self::SCALAR_CONST, + '8' => self::SCALAR_CONST, + '9' => self::SCALAR_CONST, + '"' => self::SCALAR_STRING, + '{' => self::OBJECT_START, + '}' => self::OBJECT_END, + '[' => self::ARRAY_START, + ']' => self::ARRAY_END, + ',' => self::COMMA, + ':' => self::COLON, + ]; + + /** + * Instantiate the class. + * + * @param Lexer $lexer + * @param Config $config + */ + public function __construct(protected Lexer $lexer, protected Config $config) + { + } + + /** + * Retrieve the JSON fragments + * + * @return Traversable + */ + public function getIterator(): Traversable + { + foreach ($this->lexer as $token) { + // + } + } +} diff --git a/src/Pointer.php b/src/Pointer.php new file mode 100644 index 0000000..4f25f72 --- /dev/null +++ b/src/Pointer.php @@ -0,0 +1,31 @@ +pointer; + } +} diff --git a/src/Providers/JsonParserServiceProvider.php b/src/Providers/JsonParserServiceProvider.php deleted file mode 100644 index 036d8c1..0000000 --- a/src/Providers/JsonParserServiceProvider.php +++ /dev/null @@ -1,32 +0,0 @@ - Date: Sat, 17 Sep 2022 05:02:54 +0200 Subject: [PATCH 002/249] Autoload helpers --- composer.json | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/composer.json b/composer.json index 6f286ba..1ef46f3 100644 --- a/composer.json +++ b/composer.json @@ -33,7 +33,10 @@ "autoload-dev": { "psr-4": { "Cerbero\\JsonParser\\": "tests" - } + }, + "files": [ + "helpers.php" + ] }, "scripts": { "test": "pest", From 2fa9e8877d4aa72f08e708cdc238e44a3ff2d076 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 17 Sep 2022 15:44:42 +0200 Subject: [PATCH 003/249] Correct data types --- src/Config.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Config.php b/src/Config.php index 5fb35ae..12cfc3e 100644 --- a/src/Config.php +++ b/src/Config.php @@ -22,7 +22,7 @@ class Config /** * The JSON pointers. * - * @var string[] + * @var Pointer[] */ public array $pointers = []; @@ -36,7 +36,7 @@ class Config /** * The callback to run during a parsing error. * - * @var callable|null + * @var Closure|null */ public ?Closure $onError = null; From bccb0858149908348e7ddc0e69fded2018022e15 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 17 Sep 2022 15:45:01 +0200 Subject: [PATCH 004/249] Move token information --- src/Lexer.php | 39 ++------------------- src/Parser.php | 50 -------------------------- src/Tokens.php | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 87 deletions(-) create mode 100644 src/Tokens.php diff --git a/src/Lexer.php b/src/Lexer.php index 70710f7..e659ab7 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -12,41 +12,6 @@ use Traversable; */ class Lexer implements IteratorAggregate { - /** - * The JSON token boundaries. - * - * @var array - */ - protected const BOUNDARIES = [ - "\xEF" => true, - "\xBB" => true, - "\xBF" => true, - "\n" => true, - "\r" => true, - "\t" => true, - ' ' => true, - '{' => true, - '}' => true, - '[' => true, - ']' => true, - ':' => true, - ',' => true, - ]; - - /** - * The JSON structural boundaries. - * - * @var array - */ - protected const STRUCTURES = [ - '{' => true, - '}' => true, - '[' => true, - ']' => true, - ':' => true, - ',' => true, - ]; - /** * The buffer to yield. * @@ -81,8 +46,8 @@ class Lexer implements IteratorAggregate foreach (mb_str_split($chunk) as $char) { // $this->isEscape = $char == '\\' && !$this->isEscape; - if (isset(static::BOUNDARIES[$char]) && $this->buffer != '') { - if (isset(static::STRUCTURES[$char])) { + if (isset(Tokens::BOUNDARIES[$char]) && $this->buffer != '') { + if (isset(Tokens::STRUCTURES[$char])) { $this->buffer .= $char; } diff --git a/src/Parser.php b/src/Parser.php index 6b21339..97ec501 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -11,56 +11,6 @@ use Traversable; */ class Parser implements IteratorAggregate { - const SCALAR_CONST = 1 << 0; - const SCALAR_STRING = 1 << 1; - const SCALAR_VALUE = self::SCALAR_CONST | self::SCALAR_STRING; - - const OBJECT_START = 1 << 2; - const OBJECT_END = 1 << 3; - - const ARRAY_START = 1 << 4; - const ARRAY_END = 1 << 5; - - const COMMA = 1 << 6; - const COLON = 1 << 7; - - const ANY_VALUE = self::OBJECT_START | self::ARRAY_START | self::SCALAR_VALUE; - - const AFTER_ARRAY_START = self::ANY_VALUE | self::ARRAY_END; - const AFTER_ARRAY_VALUE = self::COMMA | self::ARRAY_END; - - const AFTER_OBJECT_START = self::SCALAR_STRING | self::OBJECT_END; - const AFTER_OBJECT_VALUE = self::COMMA | self::OBJECT_END; - - /** - * The token types. - * - * @var array - */ - protected const TYPES = [ - 'n' => self::SCALAR_CONST, - 't' => self::SCALAR_CONST, - 'f' => self::SCALAR_CONST, - '-' => self::SCALAR_CONST, - '0' => self::SCALAR_CONST, - '1' => self::SCALAR_CONST, - '2' => self::SCALAR_CONST, - '3' => self::SCALAR_CONST, - '4' => self::SCALAR_CONST, - '5' => self::SCALAR_CONST, - '6' => self::SCALAR_CONST, - '7' => self::SCALAR_CONST, - '8' => self::SCALAR_CONST, - '9' => self::SCALAR_CONST, - '"' => self::SCALAR_STRING, - '{' => self::OBJECT_START, - '}' => self::OBJECT_END, - '[' => self::ARRAY_START, - ']' => self::ARRAY_END, - ',' => self::COMMA, - ':' => self::COLON, - ]; - /** * Instantiate the class. * diff --git a/src/Tokens.php b/src/Tokens.php new file mode 100644 index 0000000..7d4e506 --- /dev/null +++ b/src/Tokens.php @@ -0,0 +1,95 @@ + self::SCALAR_CONST, + 't' => self::SCALAR_CONST, + 'f' => self::SCALAR_CONST, + '-' => self::SCALAR_CONST, + '0' => self::SCALAR_CONST, + '1' => self::SCALAR_CONST, + '2' => self::SCALAR_CONST, + '3' => self::SCALAR_CONST, + '4' => self::SCALAR_CONST, + '5' => self::SCALAR_CONST, + '6' => self::SCALAR_CONST, + '7' => self::SCALAR_CONST, + '8' => self::SCALAR_CONST, + '9' => self::SCALAR_CONST, + '"' => self::SCALAR_STRING, + '{' => self::OBJECT_START, + '}' => self::OBJECT_END, + '[' => self::ARRAY_START, + ']' => self::ARRAY_END, + ',' => self::COMMA, + ':' => self::COLON, + ]; + + /** + * The token boundaries. + * + * @var array + */ + public const BOUNDARIES = [ + "\xEF" => true, + "\xBB" => true, + "\xBF" => true, + "\n" => true, + "\r" => true, + "\t" => true, + ' ' => true, + '{' => true, + '}' => true, + '[' => true, + ']' => true, + ':' => true, + ',' => true, + ]; + + /** + * The structural boundaries. + * + * @var array + */ + public const STRUCTURES = [ + '{' => true, + '}' => true, + '[' => true, + ']' => true, + ':' => true, + ',' => true, + ]; +} From ff2d11d7daddadb56e342e0d3de4366281f99669 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 19 Sep 2022 00:32:43 +0200 Subject: [PATCH 005/249] Move pointer --- src/Config.php | 5 +++-- src/JsonParser.php | 1 + src/{ => Pointers}/Pointer.php | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) rename src/{ => Pointers}/Pointer.php (91%) diff --git a/src/Config.php b/src/Config.php index 12cfc3e..32b6a2c 100644 --- a/src/Config.php +++ b/src/Config.php @@ -4,6 +4,7 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Decoders\ArrayDecoder; use Cerbero\JsonParser\Decoders\Decoder; +use Cerbero\JsonParser\Pointers\Pointer; use Closure; /** @@ -36,9 +37,9 @@ class Config /** * The callback to run during a parsing error. * - * @var Closure|null + * @var Closure */ - public ?Closure $onError = null; + public Closure $onError; /** * Instantiate the class diff --git a/src/JsonParser.php b/src/JsonParser.php index bd0a0ca..65afba3 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -4,6 +4,7 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Decoders\Decoder; use Cerbero\JsonParser\Decoders\ObjectDecoder; +use Cerbero\JsonParser\Pointers\Pointer; use Cerbero\JsonParser\Sources\Source; use IteratorAggregate; use Traversable; diff --git a/src/Pointer.php b/src/Pointers/Pointer.php similarity index 91% rename from src/Pointer.php rename to src/Pointers/Pointer.php index 4f25f72..dda0c94 100644 --- a/src/Pointer.php +++ b/src/Pointers/Pointer.php @@ -1,6 +1,6 @@ Date: Mon, 19 Sep 2022 00:33:45 +0200 Subject: [PATCH 006/249] Yield structural boundaries separately --- src/Lexer.php | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Lexer.php b/src/Lexer.php index e659ab7..9901d03 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -3,6 +3,7 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Sources\Source; +use Cerbero\JsonParser\Tokens\Tokens; use IteratorAggregate; use Traversable; @@ -47,18 +48,17 @@ class Lexer implements IteratorAggregate // $this->isEscape = $char == '\\' && !$this->isEscape; if (isset(Tokens::BOUNDARIES[$char]) && $this->buffer != '') { - if (isset(Tokens::STRUCTURES[$char])) { - $this->buffer .= $char; - } - yield $this->buffer; $this->buffer = ''; - continue; - } - // if (!$this->isEscape) { - $this->buffer .= $char; - // } + if (isset(Tokens::STRUCTURES[$char])) { + yield $char; + } + } else { + // if (!$this->isEscape) { + $this->buffer .= $char; + // } + } } } From 20a638e4508b8ce056019843b2203ba288256e49 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 19 Sep 2022 00:34:22 +0200 Subject: [PATCH 007/249] Move tokens --- src/{ => Tokens}/Tokens.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename src/{ => Tokens}/Tokens.php (98%) diff --git a/src/Tokens.php b/src/Tokens/Tokens.php similarity index 98% rename from src/Tokens.php rename to src/Tokens/Tokens.php index 7d4e506..a1f6380 100644 --- a/src/Tokens.php +++ b/src/Tokens/Tokens.php @@ -1,6 +1,6 @@ Date: Mon, 19 Sep 2022 00:35:05 +0200 Subject: [PATCH 008/249] Create state --- src/State.php | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 src/State.php diff --git a/src/State.php b/src/State.php new file mode 100644 index 0000000..86ffef5 --- /dev/null +++ b/src/State.php @@ -0,0 +1,12 @@ + Date: Mon, 19 Sep 2022 00:35:19 +0200 Subject: [PATCH 009/249] Create pointers collection --- src/Pointers/Pointers.php | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 src/Pointers/Pointers.php diff --git a/src/Pointers/Pointers.php b/src/Pointers/Pointers.php new file mode 100644 index 0000000..bb0613d --- /dev/null +++ b/src/Pointers/Pointers.php @@ -0,0 +1,29 @@ +pointers = $pointers; + } +} From 10b18ee0b6aafcfad2dd2976efe6d8dbf9e9eeea Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 19 Sep 2022 00:35:27 +0200 Subject: [PATCH 010/249] Implement parser --- src/Parser.php | 118 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 117 insertions(+), 1 deletion(-) diff --git a/src/Parser.php b/src/Parser.php index 97ec501..5e6f873 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -2,6 +2,9 @@ namespace Cerbero\JsonParser; +use Cerbero\JsonParser\Pointers\Pointer; +use Cerbero\JsonParser\Pointers\Pointers; +use Generator; use IteratorAggregate; use Traversable; @@ -11,6 +14,27 @@ use Traversable; */ class Parser implements IteratorAggregate { + /** + * The JSON parsing state. + * + * @var State + */ + protected State $state; + + /** + * The JSON pointers collection. + * + * @var Pointers + */ + protected Pointers $pointers; + + /** + * The JSON pointer matching the current tree. + * + * @var Pointer + */ + protected Pointer $pointer; + /** * Instantiate the class. * @@ -19,6 +43,9 @@ class Parser implements IteratorAggregate */ public function __construct(protected Lexer $lexer, protected Config $config) { + $this->state = new State(); + $this->pointers = new Pointers(...$config->pointers); + $this->pointer = $this->pointers->matchTree($this->state->tree); } /** @@ -29,7 +56,96 @@ class Parser implements IteratorAggregate public function getIterator(): Traversable { foreach ($this->lexer as $token) { - // + $this->rematchPointer(); + $this->traverseToken($token); + $this->bufferToken($token); + $this->mutateState($token); + + if ($this->state->depth > $this->pointer->depth()) { + continue; + } + + if ($this->state->buffer != '') { + yield from $this->yieldDecodedBuffer(); + } + + if ($this->pointers->wereFound() && !$this->pointer->includesTree($this->tree)) { + break; + } + } + } + + /** + * Set the matching JSON pointer when the tree changes + * + * @return void + */ + protected function rematchPointer(): void + { + if ($this->state->treeChanged && $this->pointers->count() > 1) { + $this->pointer = $this->pointers->matchTree($this->state->tree); + $this->state->treeChanged = false; + } + } + + /** + * Keep track of the JSON tree when traversing the given token + * + * @param Token $token + * @return void + */ + protected function traverseToken(Token $token): void + { + if (!$this->state->inObject && $token instanceof Value && $this->state->depth < $this->pointer->depth()) { + $this->state->treeChanged = true; + $this->state->tree->traverse($token); + } + } + + /** + * Preserve the given token in the buffer + * + * @param Token $token + * @return void + */ + protected function bufferToken(Token $token): void + { + if ($this->pointer->matchesTree($this->state->tree) && $this->shouldBufferToken($token)) { + $this->state->buffer .= $token; + } + } + + /** + * Preserve the given token in the buffer + * + * @param Token $token + * @return void + */ + protected function mutateState(Token $token): void + { + if ($token instanceof StateMutator) { + $token->mutateState($this->state); + } + } + + /** + * Yield the decoded JSON of the buffer + * + * @return Generator + */ + protected function yieldDecodedBuffer(): Generator + { + $decoded = $this->config->decoder->decode($this->state->buffer); + $this->state->buffer = ''; + + if (!$decoded->succeeded) { + call_user_func($this->config->onError, $decoded); + } + + if ($this->state->inObject) { + yield $this->state->tree[$this->state->depth] => $decoded->value; + } else { + yield $decoded->value; } } } From f813e55996dea489f1824d4383ebf696b53b28fd Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 20 Sep 2022 11:41:09 +0200 Subject: [PATCH 011/249] Add option to silence parsing errors --- src/Config.php | 3 ++- src/JsonParser.php | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/Config.php b/src/Config.php index 32b6a2c..bd80562 100644 --- a/src/Config.php +++ b/src/Config.php @@ -3,6 +3,7 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Decoders\ArrayDecoder; +use Cerbero\JsonParser\Decoders\DecodedValue; use Cerbero\JsonParser\Decoders\Decoder; use Cerbero\JsonParser\Pointers\Pointer; use Closure; @@ -48,6 +49,6 @@ class Config public function __construct() { $this->decoder = new ArrayDecoder(); - $this->onError = fn () => true; + $this->onError = fn (DecodedValue $decoded) => throw $decoded->exception; } } diff --git a/src/JsonParser.php b/src/JsonParser.php index 65afba3..4a369a4 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -100,6 +100,16 @@ class JsonParser implements IteratorAggregate return $this; } + /** + * Silence errors while parsing + * + * @return static + */ + public function ignoreErrors(): static + { + return $this->onError(fn () => true); + } + /** * Set the logic to run during parsing errors * From 99d0586baa48569df42893f719b6c01a3d98e81d Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 20 Sep 2022 18:07:00 +0200 Subject: [PATCH 012/249] Create tree --- src/Tree.php | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 src/Tree.php diff --git a/src/Tree.php b/src/Tree.php new file mode 100644 index 0000000..5820ea5 --- /dev/null +++ b/src/Tree.php @@ -0,0 +1,107 @@ +original[$this->depth] = $token->value(); + $this->wildcarded[$this->depth] = is_int($token->value()) ? '-' : $token->value(); + } + + /** + * Retrieve the original JSON tree + * + * @return array + */ + public function original(): array + { + return $this->original; + } + + /** + * Retrieve the wildcarded JSON tree + * + * @return array + */ + public function wildcarded(): array + { + return $this->wildcarded; + } + + /** + * Retrieve the JSON tree depth + * + * @return int + */ + public function depth(): int + { + return $this->depth; + } + + /** + * Increase the tree depth + * + * @return void + */ + public function deepen(): void + { + $this->depth++; + } + + /** + * Decrease the tree depth + * + * @return void + */ + public function emerge(): void + { + $this->depth--; + } + + /** + * Retrieve the original tree iterator + * + * @return Traversable + */ + public function getIterator(): Traversable + { + yield from $this->original(); + } +} From 382a553f9dad7377bbe1d6971b35f965ea5b23bc Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 20 Sep 2022 18:07:09 +0200 Subject: [PATCH 013/249] Create null pointer --- src/Pointers/NullPointer.php | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 src/Pointers/NullPointer.php diff --git a/src/Pointers/NullPointer.php b/src/Pointers/NullPointer.php new file mode 100644 index 0000000..c7bf1d6 --- /dev/null +++ b/src/Pointers/NullPointer.php @@ -0,0 +1,33 @@ +pointer = ''; + } +} From 58ed5c95d3926b466ad8422ff4efd58bd367ddf6 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 20 Sep 2022 18:07:25 +0200 Subject: [PATCH 014/249] Create token --- src/Tokens/Token.php | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 src/Tokens/Token.php diff --git a/src/Tokens/Token.php b/src/Tokens/Token.php new file mode 100644 index 0000000..29b93c4 --- /dev/null +++ b/src/Tokens/Token.php @@ -0,0 +1,39 @@ +value; + } + + /** + * Determine whether the JSON parsing should continue after this token + * + * @return bool + */ + public function shouldContinue(): bool + { + return false; + } +} From 07040317873318444aa486e19b39602391256b9e Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 20 Sep 2022 18:07:49 +0200 Subject: [PATCH 015/249] Rename property --- src/Decoders/DecodedValue.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Decoders/DecodedValue.php b/src/Decoders/DecodedValue.php index 48218e3..393389d 100644 --- a/src/Decoders/DecodedValue.php +++ b/src/Decoders/DecodedValue.php @@ -20,7 +20,7 @@ class DecodedValue public ?mixed $value = null, public ?string $error = null, public ?int $code = null, - public ?JsonException $e = null, + public ?JsonException $exception = null, public ?string $json = null, ) { } From f6fb63fa7ce105ba30eb9705c6737436bb99a11f Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 20 Sep 2022 18:08:21 +0200 Subject: [PATCH 016/249] Implement state --- src/State.php | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/State.php b/src/State.php index 86ffef5..a792cd1 100644 --- a/src/State.php +++ b/src/State.php @@ -8,5 +8,33 @@ namespace Cerbero\JsonParser; */ class State { - // + /** + * The JSON tree. + * + * @var Tree + */ + public Tree $tree; + + /** + * Whether the tree changed. + * + * @var bool + */ + public bool $treeChanged = false; + + /** + * The JSON buffer. + * + * @var string + */ + public string $buffer = ''; + + /** + * Instantiate the class. + * + */ + public function __construct() + { + $this->tree = new Tree(); + } } From 9058f6353c58a32f56424694db0257d9e0f7cae7 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 20 Sep 2022 18:08:30 +0200 Subject: [PATCH 017/249] Implement parser --- src/Parser.php | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/Parser.php b/src/Parser.php index 5e6f873..4fc3eb3 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -4,6 +4,7 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Pointers\Pointer; use Cerbero\JsonParser\Pointers\Pointers; +use Cerbero\JsonParser\Tokens\Token; use Generator; use IteratorAggregate; use Traversable; @@ -55,13 +56,14 @@ class Parser implements IteratorAggregate */ public function getIterator(): Traversable { + /** @var Token $token */ foreach ($this->lexer as $token) { $this->rematchPointer(); $this->traverseToken($token); $this->bufferToken($token); $this->mutateState($token); - if ($this->state->depth > $this->pointer->depth()) { + if ($token->shouldContinue() || $this->state->tree->depth() > $this->pointer->depth()) { continue; } @@ -69,7 +71,9 @@ class Parser implements IteratorAggregate yield from $this->yieldDecodedBuffer(); } - if ($this->pointers->wereFound() && !$this->pointer->includesTree($this->tree)) { + $this->markPointerAsFound(); + + if ($this->pointers->wereFound() && !$this->pointer->includesTree($this->state->tree)) { break; } } @@ -97,8 +101,8 @@ class Parser implements IteratorAggregate protected function traverseToken(Token $token): void { if (!$this->state->inObject && $token instanceof Value && $this->state->depth < $this->pointer->depth()) { - $this->state->treeChanged = true; $this->state->tree->traverse($token); + $this->state->treeChanged = true; } } @@ -148,4 +152,16 @@ class Parser implements IteratorAggregate yield $decoded->value; } } + + /** + * Mark the matching JSON pointer as found + * + * @return void + */ + protected function markPointerAsFound(): void + { + if ($this->pointer->matchesTree($this->state->tree)) { + $this->pointers->markAsFound($this->pointer); + } + } } From b2611c4bf1ab907c0d6792ba0632bc1dadf16528 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 20 Sep 2022 18:08:37 +0200 Subject: [PATCH 018/249] Implement pointer --- src/Pointers/Pointer.php | 95 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/src/Pointers/Pointer.php b/src/Pointers/Pointer.php index dda0c94..38e90c1 100644 --- a/src/Pointers/Pointer.php +++ b/src/Pointers/Pointer.php @@ -2,6 +2,7 @@ namespace Cerbero\JsonParser\Pointers; +use Cerbero\JsonParser\Tree; use Stringable; /** @@ -10,6 +11,20 @@ use Stringable; */ class Pointer implements Stringable { + /** + * The reference tokens. + * + * @var string[] + */ + protected array $referenceTokens; + + /** + * The pointer depth. + * + * @var int + */ + protected int $depth; + /** * Instantiate the class. * @@ -17,6 +32,86 @@ class Pointer implements Stringable */ public function __construct(protected string $pointer) { + $this->referenceTokens = $this->toReferenceTokens(); + $this->depth = count($this->referenceTokens); + } + + /** + * Turn the JSON pointer into reference tokens + * + * @return array + */ + protected function toReferenceTokens(): array + { + $tokens = explode('/', substr($this->pointer, 1)); + + return array_map(fn (string $token) => str_replace(['~1', '~0'], ['/', '~'], $token), $tokens); + } + + /** + * Retrieve the reference tokens + * + * @return array + */ + public function referenceTokens(): array + { + return $this->referenceTokens; + } + + /** + * Retrieve the JSON pointer depth + * + * @return int + */ + public function depth(): int + { + return $this->depth; + } + + /** + * Determine whether the reference token at the given depth matches the provided node + * + * @param int $depth + * @param mixed $node + * @return bool + */ + public function depthMatchesNode(int $depth, mixed $node): bool + { + if (!isset($this->referenceTokens[$depth])) { + return false; + } + + if ($this->referenceTokens[$depth] === (string) $node) { + return true; + } + + return is_int($node) && $this->referenceTokens[$depth] === '-'; + } + + /** + * Determine whether the pointer matches the given tree + * + * @param Tree $tree + * @return bool + */ + public function matchesTree(Tree $tree): bool + { + return $this->referenceTokens == $tree->original() || $this->referenceTokens == $tree->wildcarded(); + } + + /** + * Determine whether the pointer includes the given tree + * + * @param Tree $tree + * @return bool + */ + public function includesTree(Tree $tree): bool + { + if (($firstNest = array_search('-', $this->referenceTokens)) === false) { + return false; + } + + return array_slice($this->referenceTokens, 0, $firstNest) == array_slice($tree->original(), 0, $firstNest); } /** From d6297238ad9aaac44fff02ea6d1999e22d72f248 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 20 Sep 2022 18:08:45 +0200 Subject: [PATCH 019/249] Implement pointers collection --- src/Pointers/Pointers.php | 63 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/src/Pointers/Pointers.php b/src/Pointers/Pointers.php index bb0613d..93f66fc 100644 --- a/src/Pointers/Pointers.php +++ b/src/Pointers/Pointers.php @@ -2,13 +2,14 @@ namespace Cerbero\JsonParser\Pointers; -use Stringable; +use Cerbero\JsonParser\Tree; +use Countable; /** * The JSON pointers collection. * */ -class Pointers +class Pointers implements Countable { /** * The JSON pointers collection. @@ -17,6 +18,20 @@ class Pointers */ protected array $pointers; + /** + * The default pointer. + * + * @var Pointer + */ + protected Pointer $defaultPointer; + + /** + * The list of pointers that were found within the JSON. + * + * @var array + */ + protected array $found = []; + /** * Instantiate the class. * @@ -25,5 +40,49 @@ class Pointers public function __construct(Pointer ...$pointers) { $this->pointers = $pointers; + $this->defaultPointer = new NullPointer(); + } + + /** + * Retrieve the number of JSON pointers + * + * @return int + */ + public function count(): int + { + return count($this->pointers); + } + + /** + * Retrieve the pointer matching the given tree + * + * @param Tree $tree + * @return Pointer + */ + public function matchTree(Tree $tree): Pointer + { + $pointers = []; + + foreach ($this->pointers as $pointer) { + foreach ($tree as $depth => $node) { + if (!$pointer->depthMatchesNode($depth, $node)) { + continue 2; + } elseif (!isset($pointers[$depth])) { + $pointers[$depth] = $pointer; + } + } + } + + return end($pointers) ?: $this->defaultPointer; + } + + /** + * Determine whether all pointers were found within the JSON + * + * @return bool + */ + public function wereFound(): bool + { + return count($this->pointers) == count($this->found); } } From 82c70b7374957c2674c10393af040103d0d6c7e8 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 21 Sep 2022 01:53:28 +0200 Subject: [PATCH 020/249] Create token types --- src/Tokens/Scalar.php | 12 ++++++++++++ src/Tokens/StateMutator.php | 20 ++++++++++++++++++++ src/Tokens/Value.php | 12 ++++++++++++ 3 files changed, 44 insertions(+) create mode 100644 src/Tokens/Scalar.php create mode 100644 src/Tokens/StateMutator.php create mode 100644 src/Tokens/Value.php diff --git a/src/Tokens/Scalar.php b/src/Tokens/Scalar.php new file mode 100644 index 0000000..c5fdb03 --- /dev/null +++ b/src/Tokens/Scalar.php @@ -0,0 +1,12 @@ + Date: Wed, 21 Sep 2022 01:54:32 +0200 Subject: [PATCH 021/249] Add method to mark pointers as found --- src/Pointers/Pointers.php | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/Pointers/Pointers.php b/src/Pointers/Pointers.php index 93f66fc..9f84846 100644 --- a/src/Pointers/Pointers.php +++ b/src/Pointers/Pointers.php @@ -76,6 +76,19 @@ class Pointers implements Countable return end($pointers) ?: $this->defaultPointer; } + /** + * Mark the given pointer as found + * + * @param Pointer $pointer + * @return void + */ + public function markAsFound(Pointer $pointer): void + { + $key = (string) $pointer; + + $this->found[$key] = true; + } + /** * Determine whether all pointers were found within the JSON * From 2a811cec4d60a2dd5c466735e912101bec497a8b Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 21 Sep 2022 01:55:01 +0200 Subject: [PATCH 022/249] Make pointer accessible as an array --- src/Pointers/Pointer.php | 48 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/src/Pointers/Pointer.php b/src/Pointers/Pointer.php index 38e90c1..11e98d8 100644 --- a/src/Pointers/Pointer.php +++ b/src/Pointers/Pointer.php @@ -2,6 +2,7 @@ namespace Cerbero\JsonParser\Pointers; +use ArrayAccess; use Cerbero\JsonParser\Tree; use Stringable; @@ -9,7 +10,7 @@ use Stringable; * The JSON pointer. * */ -class Pointer implements Stringable +class Pointer implements ArrayAccess, Stringable { /** * The reference tokens. @@ -114,6 +115,51 @@ class Pointer implements Stringable return array_slice($this->referenceTokens, 0, $firstNest) == array_slice($tree->original(), 0, $firstNest); } + /** + * Determine whether the given reference token exists + * + * @param mixed $offset + * @return bool + */ + public function offsetExists(mixed $offset): bool + { + return isset($this->referenceTokens[$offset]); + } + + /** + * Retrieve the given reference token + * + * @param mixed $offset + * @return mixed + */ + public function offsetGet(mixed $offset): mixed + { + return $this->referenceTokens[$offset] ?? null; + } + + /** + * Do not set any reference token + * + * @param mixed $offset + * @param mixed $value + * @return void + */ + public function offsetSet(mixed $offset, mixed $value): void + { + return; + } + + /** + * Do not unset any reference token + * + * @param mixed $offset + * @return void + */ + public function offsetUnset(mixed $offset): void + { + return; + } + /** * Retrieve the underlying JSON pointer * From d0dfbedbb5e814aba28eec3c29b877c85bf74149 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 21 Sep 2022 01:55:25 +0200 Subject: [PATCH 023/249] Always match the JSON tree --- src/Pointers/NullPointer.php | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/Pointers/NullPointer.php b/src/Pointers/NullPointer.php index c7bf1d6..6d1e734 100644 --- a/src/Pointers/NullPointer.php +++ b/src/Pointers/NullPointer.php @@ -2,6 +2,8 @@ namespace Cerbero\JsonParser\Pointers; +use Cerbero\JsonParser\Tree; + /** * The null pointer. * @@ -30,4 +32,15 @@ class NullPointer extends Pointer { $this->pointer = ''; } + + /** + * Determine whether the pointer matches the given tree + * + * @param Tree $tree + * @return bool + */ + public function matchesTree(Tree $tree): bool + { + return true; + } } From e50c8f023e4f9a3f400f1035c9cbdf6231a98988 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 21 Sep 2022 01:55:47 +0200 Subject: [PATCH 024/249] Implement parser --- src/Parser.php | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/src/Parser.php b/src/Parser.php index 4fc3eb3..1a43ebe 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -4,7 +4,10 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Pointers\Pointer; use Cerbero\JsonParser\Pointers\Pointers; +use Cerbero\JsonParser\Tokens\Scalar; +use Cerbero\JsonParser\Tokens\StateMutator; use Cerbero\JsonParser\Tokens\Token; +use Cerbero\JsonParser\Tokens\Value; use Generator; use IteratorAggregate; use Traversable; @@ -100,8 +103,12 @@ class Parser implements IteratorAggregate */ protected function traverseToken(Token $token): void { - if (!$this->state->inObject && $token instanceof Value && $this->state->depth < $this->pointer->depth()) { - $this->state->tree->traverse($token); + if ( + !$this->state->inObject && + $token instanceof Value && + $this->state->tree->depth() < $this->pointer->depth() + ) { + $this->state->tree->traverseArray($this->pointer); $this->state->treeChanged = true; } } @@ -119,6 +126,30 @@ class Parser implements IteratorAggregate } } + /** + * Determine whether the given token should be buffered + * + * @param Token $token + * @return bool + */ + protected function shouldBufferToken(Token $token): bool + { + return $this->state->tree->depth() > $this->pointer->depth() + || (!$this->state->expectsKey && $this->tokenIsExpected($token)); + } + + /** + * Determine whether the given token is expected + * + * @param Token $token + * @return bool + */ + protected function tokenIsExpected(Token $token): bool + { + return ($this->state->tree->depth() == $this->pointer->depth() && $token instanceof Value) + || ($this->state->tree->depth() + 1 == $this->pointer->depth() && $token instanceof Scalar); + } + /** * Preserve the given token in the buffer * From 46fbfc73fb8b305d2bb73f4523c1fb1a3ed08a17 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 21 Sep 2022 01:56:24 +0200 Subject: [PATCH 025/249] Add state to expect object keys --- src/State.php | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/State.php b/src/State.php index a792cd1..9bbcdca 100644 --- a/src/State.php +++ b/src/State.php @@ -29,6 +29,13 @@ class State */ public string $buffer = ''; + /** + * Whether the token should be an object key. + * + * @var bool + */ + public bool $expectsKey = false; + /** * Instantiate the class. * From 4e33e5d2472a9474a083e15e3fc88b79bb6bec09 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 21 Sep 2022 01:56:57 +0200 Subject: [PATCH 026/249] Let tree traverse arrays --- src/Tree.php | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/Tree.php b/src/Tree.php index 5820ea5..81f3f70 100644 --- a/src/Tree.php +++ b/src/Tree.php @@ -2,7 +2,7 @@ namespace Cerbero\JsonParser; -use Cerbero\JsonParser\Tokens\Token; +use Cerbero\JsonParser\Pointers\Pointer; use IteratorAggregate; use Traversable; @@ -31,18 +31,21 @@ class Tree implements IteratorAggregate * * @var int */ - protected int $depth = 0; + protected int $depth = -1; /** - * Traverse the given token + * Traverse an array * - * @param Token $token + * @param Pointer $pointer * @return void */ - public function traverse(Token $token): void + public function traverseArray(Pointer $pointer): void { - $this->original[$this->depth] = $token->value(); - $this->wildcarded[$this->depth] = is_int($token->value()) ? '-' : $token->value(); + $this->original[$this->depth] = isset($this->original[$this->depth]) ? $this->original[$this->depth] + 1 : 0; + array_splice($this->original, $this->depth + 1); + + $this->wildcarded[$this->depth] = $pointer[$this->depth] == '-' ? '-' : $this->original[$this->depth]; + array_splice($this->wildcarded, $this->depth + 1); } /** From d2b8a8f6bebd0e05dae5b6e395445cb08ea0db43 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 16 Oct 2022 02:07:41 +0200 Subject: [PATCH 027/249] Make the error callback callable --- src/Config.php | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Config.php b/src/Config.php index bd80562..7d7fb58 100644 --- a/src/Config.php +++ b/src/Config.php @@ -6,7 +6,6 @@ use Cerbero\JsonParser\Decoders\ArrayDecoder; use Cerbero\JsonParser\Decoders\DecodedValue; use Cerbero\JsonParser\Decoders\Decoder; use Cerbero\JsonParser\Pointers\Pointer; -use Closure; /** * The configuration. @@ -38,9 +37,9 @@ class Config /** * The callback to run during a parsing error. * - * @var Closure + * @var callable */ - public Closure $onError; + public callable $onError; /** * Instantiate the class From e42ff04cdaa0c7b5f0aeedc6993a25bb2288974a Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 16 Oct 2022 02:08:32 +0200 Subject: [PATCH 028/249] Introduce tokens map --- src/Lexer.php | 32 +++++++++++++++++++++++++------- src/Tokens/Tokens.php | 11 ++++++++++- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/src/Lexer.php b/src/Lexer.php index 9901d03..c87af9b 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -25,7 +25,14 @@ class Lexer implements IteratorAggregate * * @var bool */ - // protected bool $isEscape = false; + protected bool $isEscape = false; + + /** + * The map of token instances. + * + * @var array + */ + protected array $tokensMap = []; /** * Instantiate the class. @@ -34,30 +41,41 @@ class Lexer implements IteratorAggregate */ public function __construct(protected Source $source) { + $this->hydrateTokens(); + } + + /** + * Set the hydrated tokens + * + * @return void + */ + protected function hydrateTokens(): void + { + foreach (Tokens::MAP as $token => $class) { + $this->tokensMap[$token] = new $class(); + } } /** * Retrieve the JSON fragments * - * @return Traversable + * @return \Cerbero\JsonParser\Tokens\Token[] */ public function getIterator(): Traversable { foreach ($this->source as $chunk) { foreach (mb_str_split($chunk) as $char) { - // $this->isEscape = $char == '\\' && !$this->isEscape; + $this->isEscape = $char == '\\' && !$this->isEscape; if (isset(Tokens::BOUNDARIES[$char]) && $this->buffer != '') { yield $this->buffer; $this->buffer = ''; - if (isset(Tokens::STRUCTURES[$char])) { + if (isset(Tokens::DELIMITERS[$char])) { yield $char; } - } else { - // if (!$this->isEscape) { + } elseif (!$this->isEscape) { $this->buffer .= $char; - // } } } } diff --git a/src/Tokens/Tokens.php b/src/Tokens/Tokens.php index a1f6380..3250006 100644 --- a/src/Tokens/Tokens.php +++ b/src/Tokens/Tokens.php @@ -84,7 +84,7 @@ class Tokens * * @var array */ - public const STRUCTURES = [ + public const DELIMITERS = [ '{' => true, '}' => true, '[' => true, @@ -92,4 +92,13 @@ class Tokens ':' => true, ',' => true, ]; + + /** + * The tokens class map. + * + * @var array + */ + public const MAP = [ + // + ]; } From 5329d51bc1b3b80aaf85c7ec1aef24e993937693 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 16 Oct 2022 02:09:11 +0200 Subject: [PATCH 029/249] Implement pointers --- src/Pointers/Pointers.php | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/src/Pointers/Pointers.php b/src/Pointers/Pointers.php index 9f84846..90bf207 100644 --- a/src/Pointers/Pointers.php +++ b/src/Pointers/Pointers.php @@ -43,16 +43,6 @@ class Pointers implements Countable $this->defaultPointer = new NullPointer(); } - /** - * Retrieve the number of JSON pointers - * - * @return int - */ - public function count(): int - { - return count($this->pointers); - } - /** * Retrieve the pointer matching the given tree * @@ -80,13 +70,13 @@ class Pointers implements Countable * Mark the given pointer as found * * @param Pointer $pointer - * @return void + * @return static */ - public function markAsFound(Pointer $pointer): void + public function markAsFound(Pointer $pointer): static { - $key = (string) $pointer; + $this->found[(string) $pointer] = true; - $this->found[$key] = true; + return $this; } /** @@ -96,6 +86,17 @@ class Pointers implements Countable */ public function wereFound(): bool { - return count($this->pointers) == count($this->found); + return $this->count() > 0 + && $this->count() == count($this->found); + } + + /** + * Retrieve the number of JSON pointers + * + * @return int + */ + public function count(): int + { + return count($this->pointers); } } From 1bd69611b6b2e6fa0a3c876e887521648176183e Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 16 Oct 2022 02:09:51 +0200 Subject: [PATCH 030/249] Create double-quote token --- src/Tokens/DoubleQuote.php | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 src/Tokens/DoubleQuote.php diff --git a/src/Tokens/DoubleQuote.php b/src/Tokens/DoubleQuote.php new file mode 100644 index 0000000..523bf4c --- /dev/null +++ b/src/Tokens/DoubleQuote.php @@ -0,0 +1,23 @@ + Date: Sun, 16 Oct 2022 02:10:14 +0200 Subject: [PATCH 031/249] Redefine token --- src/Tokens/Token.php | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/src/Tokens/Token.php b/src/Tokens/Token.php index 29b93c4..079eacb 100644 --- a/src/Tokens/Token.php +++ b/src/Tokens/Token.php @@ -9,30 +9,11 @@ namespace Cerbero\JsonParser\Tokens; abstract class Token { /** - * Instantiate the class. - * - * @param mixed $value - */ - public function __construct(protected mixed $value) - { - } - - /** - * Retrieve the underlying value - * - * @return mixed - */ - public function value(): mixed - { - return $this->value; - } - - /** - * Determine whether the JSON parsing should continue after this token + * Determine whether this token closes a JSON chunk * * @return bool */ - public function shouldContinue(): bool + public function closesChunk(): bool { return false; } From 19a64d30c2e7ecc610d7bae8203b19536c3dc8b8 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 16 Oct 2022 02:11:34 +0200 Subject: [PATCH 032/249] Move related logic to the state value object --- src/Parser.php | 72 ++++------------- src/State.php | 215 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 227 insertions(+), 60 deletions(-) diff --git a/src/Parser.php b/src/Parser.php index 1a43ebe..a9ce462 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -2,9 +2,7 @@ namespace Cerbero\JsonParser; -use Cerbero\JsonParser\Pointers\Pointer; use Cerbero\JsonParser\Pointers\Pointers; -use Cerbero\JsonParser\Tokens\Scalar; use Cerbero\JsonParser\Tokens\StateMutator; use Cerbero\JsonParser\Tokens\Token; use Cerbero\JsonParser\Tokens\Value; @@ -32,13 +30,6 @@ class Parser implements IteratorAggregate */ protected Pointers $pointers; - /** - * The JSON pointer matching the current tree. - * - * @var Pointer - */ - protected Pointer $pointer; - /** * Instantiate the class. * @@ -49,7 +40,7 @@ class Parser implements IteratorAggregate { $this->state = new State(); $this->pointers = new Pointers(...$config->pointers); - $this->pointer = $this->pointers->matchTree($this->state->tree); + $this->state->matchPointer($this->pointers); } /** @@ -59,24 +50,23 @@ class Parser implements IteratorAggregate */ public function getIterator(): Traversable { - /** @var Token $token */ foreach ($this->lexer as $token) { $this->rematchPointer(); $this->traverseToken($token); $this->bufferToken($token); $this->mutateState($token); - if ($token->shouldContinue() || $this->state->tree->depth() > $this->pointer->depth()) { + if (!$token->closesChunk() || $this->state->treeIsDeep()) { continue; } - if ($this->state->buffer != '') { + if ($this->state->hasBuffer()) { yield from $this->yieldDecodedBuffer(); } $this->markPointerAsFound(); - if ($this->pointers->wereFound() && !$this->pointer->includesTree($this->state->tree)) { + if ($this->pointers->wereFound() && !$this->state->treeInPointer()) { break; } } @@ -89,9 +79,9 @@ class Parser implements IteratorAggregate */ protected function rematchPointer(): void { - if ($this->state->treeChanged && $this->pointers->count() > 1) { - $this->pointer = $this->pointers->matchTree($this->state->tree); - $this->state->treeChanged = false; + if ($this->state->treeChanged() && $this->pointers->count() > 1) { + $this->state->matchPointer($this->pointers); + $this->state->treeDidntChange(); } } @@ -103,13 +93,8 @@ class Parser implements IteratorAggregate */ protected function traverseToken(Token $token): void { - if ( - !$this->state->inObject && - $token instanceof Value && - $this->state->tree->depth() < $this->pointer->depth() - ) { - $this->state->tree->traverseArray($this->pointer); - $this->state->treeChanged = true; + if (!$this->state->inObject() && $token instanceof Value && $this->state->treeIsShallow()) { + $this->state->traverseArray(); } } @@ -121,35 +106,11 @@ class Parser implements IteratorAggregate */ protected function bufferToken(Token $token): void { - if ($this->pointer->matchesTree($this->state->tree) && $this->shouldBufferToken($token)) { - $this->state->buffer .= $token; + if ($this->state->shouldBufferToken($token)) { + $this->state->bufferToken($token); } } - /** - * Determine whether the given token should be buffered - * - * @param Token $token - * @return bool - */ - protected function shouldBufferToken(Token $token): bool - { - return $this->state->tree->depth() > $this->pointer->depth() - || (!$this->state->expectsKey && $this->tokenIsExpected($token)); - } - - /** - * Determine whether the given token is expected - * - * @param Token $token - * @return bool - */ - protected function tokenIsExpected(Token $token): bool - { - return ($this->state->tree->depth() == $this->pointer->depth() && $token instanceof Value) - || ($this->state->tree->depth() + 1 == $this->pointer->depth() && $token instanceof Scalar); - } - /** * Preserve the given token in the buffer * @@ -170,15 +131,14 @@ class Parser implements IteratorAggregate */ protected function yieldDecodedBuffer(): Generator { - $decoded = $this->config->decoder->decode($this->state->buffer); - $this->state->buffer = ''; + $decoded = $this->config->decoder->decode($this->state->pullBuffer()); if (!$decoded->succeeded) { call_user_func($this->config->onError, $decoded); } - if ($this->state->inObject) { - yield $this->state->tree[$this->state->depth] => $decoded->value; + if ($this->state->inObject()) { + yield $this->state->node() => $decoded->value; } else { yield $decoded->value; } @@ -191,8 +151,8 @@ class Parser implements IteratorAggregate */ protected function markPointerAsFound(): void { - if ($this->pointer->matchesTree($this->state->tree)) { - $this->pointers->markAsFound($this->pointer); + if ($this->state->pointerMatchesTree()) { + $this->pointers->markAsFound($this->state->pointer()); } } } diff --git a/src/State.php b/src/State.php index 9bbcdca..b2a7cbd 100644 --- a/src/State.php +++ b/src/State.php @@ -2,6 +2,12 @@ namespace Cerbero\JsonParser; +use Cerbero\JsonParser\Pointers\Pointer; +use Cerbero\JsonParser\Pointers\Pointers; +use Cerbero\JsonParser\Tokens\Scalar; +use Cerbero\JsonParser\Tokens\Token; +use Cerbero\JsonParser\Tokens\Value; + /** * The JSON parsing state. * @@ -13,28 +19,42 @@ class State * * @var Tree */ - public Tree $tree; + protected Tree $tree; /** * Whether the tree changed. * * @var bool */ - public bool $treeChanged = false; + protected bool $treeChanged = false; + + /** + * The JSON pointer matching the tree. + * + * @var Pointer + */ + protected Pointer $pointer; /** * The JSON buffer. * * @var string */ - public string $buffer = ''; + protected string $buffer = ''; /** * Whether the token should be an object key. * * @var bool */ - public bool $expectsKey = false; + protected bool $expectsKey = false; + + /** + * Whether the currently parsed node is an object. + * + * @var bool + */ + protected bool $inObject = false; /** * Instantiate the class. @@ -44,4 +64,191 @@ class State { $this->tree = new Tree(); } + + /** + * Retrieve the JSON tree + * + * @return Tree + */ + public function tree(): Tree + { + return $this->tree; + } + + /** + * Determine whether the tree is shallow + * + * @return bool + */ + public function treeIsShallow(): bool + { + return $this->tree->depth() < $this->pointer->depth(); + } + + /** + * Determine whether the tree is deep + * + * @return bool + */ + public function treeIsDeep(): bool + { + return $this->tree->depth() > $this->pointer->depth(); + } + + /** + * Retrieve the current node of the JSON tree + * + * @return string + */ + public function node(): string + { + return $this->tree[$this->tree->depth()]; + } + + /** + * Determine whether the tree changed + * + * @return bool + */ + public function treeChanged(): bool + { + return $this->treeChanged; + } + + /** + * Mark the JSON tree as not changed + * + * @return static + */ + public function treeDidntChange(): static + { + $this->treeChanged = false; + + return $this; + } + + /** + * Set the JSON pointer matching the tree from the given pointers + * + * @param Pointers $pointers + * @return static + */ + public function matchPointer(Pointers $pointers): static + { + $this->pointer = $pointers->matchTree($this->tree); + + return $this; + } + + /** + * Retrieve the JSON pointer matching the tree + * + * @return Pointer + */ + public function pointer(): Pointer + { + return $this->pointer; + } + + /** + * Determine whether the tree is within the JSON pointer + * + * @return bool + */ + public function treeInPointer(): bool + { + return $this->pointer->includesTree($this->tree); + } + + /** + * Determine whether the tree matches the JSON pointer + * + * @return bool + */ + public function pointerMatchesTree(): bool + { + return $this->pointer->matchesTree($this->tree); + } + + /** + * Traverse a JSON array + * + * @return void + */ + public function traverseArray(): void + { + $this->tree->traverseArray($this->pointer); + $this->treeChanged = true; + } + + /** + * Determine whether the buffer contains tokens + * + * @return bool + */ + public function hasBuffer(): bool + { + return $this->buffer != ''; + } + + /** + * Determine whether the given token should be buffered + * + * @param Token $token + * @return bool + */ + public function shouldBufferToken(Token $token): bool + { + return $this->pointer->matchesTree($this->tree) + && ($this->treeIsDeep() || (!$this->expectsKey && $this->expectsToken($token))); + } + + /** + * Determine whether the given token is expected + * + * @param Token $token + * @return bool + */ + protected function expectsToken(Token $token): bool + { + return ($this->tree->depth() == $this->pointer->depth() && $token instanceof Value) + || ($this->tree->depth() + 1 == $this->pointer->depth() && $token instanceof Scalar); + } + + /** + * Buffer the given token + * + * @param Token $token + * @return static + */ + public function bufferToken(Token $token): static + { + $this->buffer .= $token; + + return $this; + } + + /** + * Retrieve and reset the buffer + * + * @return string + */ + public function pullBuffer(): string + { + $buffer = $this->buffer; + + $this->buffer = ''; + + return $buffer; + } + + /** + * Determine whether the currently parsed node is an object + * + * @return bool + */ + public function inObject(): bool + { + return $this->inObject; + } } From 62fd71145084787a7f0b549fdfb2d2b64b45deb1 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 16 Oct 2022 02:19:40 +0200 Subject: [PATCH 033/249] Define specific return type --- helpers.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/helpers.php b/helpers.php index 69caa95..400478a 100644 --- a/helpers.php +++ b/helpers.php @@ -7,9 +7,9 @@ if (!function_exists('parseJson')) { * Parse the given source of JSON * * @param mixed $source - * @return iterable + * @return JsonParser */ - function parseJson(mixed $source): iterable + function parseJson(mixed $source): JsonParser { return new JsonParser($source); } From 4c9d635add06c1d2cb7cb6f21d99e4110aa23f4a Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 17 Oct 2022 18:17:28 +0200 Subject: [PATCH 034/249] Implement tokens --- src/Tokens/Comma.php | 35 ++++++++++++++++++++ src/Tokens/CompoundBegin.php | 37 +++++++++++++++++++++ src/Tokens/CompoundEnd.php | 47 ++++++++++++++++++++++++++ src/Tokens/Constant.php | 30 +++++++++++++++++ src/Tokens/DoubleQuote.php | 49 +++++++++++++++++++++++++-- src/Tokens/Scalar.php | 12 ------- src/Tokens/StateMutator.php | 20 ----------- src/Tokens/Token.php | 64 ++++++++++++++++++++++++++++++++++-- src/Tokens/Tokens.php | 28 +++++++++++----- src/Tokens/Value.php | 12 ------- 10 files changed, 277 insertions(+), 57 deletions(-) create mode 100644 src/Tokens/Comma.php create mode 100644 src/Tokens/CompoundBegin.php create mode 100644 src/Tokens/CompoundEnd.php create mode 100644 src/Tokens/Constant.php delete mode 100644 src/Tokens/Scalar.php delete mode 100644 src/Tokens/StateMutator.php delete mode 100644 src/Tokens/Value.php diff --git a/src/Tokens/Comma.php b/src/Tokens/Comma.php new file mode 100644 index 0000000..6884fa5 --- /dev/null +++ b/src/Tokens/Comma.php @@ -0,0 +1,35 @@ +inObject()) { + $state->expectKey(); + } + } +} diff --git a/src/Tokens/CompoundBegin.php b/src/Tokens/CompoundBegin.php new file mode 100644 index 0000000..eefbec9 --- /dev/null +++ b/src/Tokens/CompoundBegin.php @@ -0,0 +1,37 @@ +tree()->deepen(); + + if ($this->value == '{') { + $state->expectKey(); + } + } +} diff --git a/src/Tokens/CompoundEnd.php b/src/Tokens/CompoundEnd.php new file mode 100644 index 0000000..99c7202 --- /dev/null +++ b/src/Tokens/CompoundEnd.php @@ -0,0 +1,47 @@ +tree()->emerge(); + + if ($this->value == '}') { + $state->doNotExpectKey(); + } + } + + /** + * Determine whether this token ends a JSON chunk + * + * @return bool + */ + public function endsChunk(): bool + { + return true; + } +} diff --git a/src/Tokens/Constant.php b/src/Tokens/Constant.php new file mode 100644 index 0000000..9beebb9 --- /dev/null +++ b/src/Tokens/Constant.php @@ -0,0 +1,30 @@ +value != ':' ? Tokens::COLON : Tokens::SCALAR_CONST; + } + + /** + * Determine whether this token ends a JSON chunk + * + * @return bool + */ + public function endsChunk(): bool + { + return $this->value != ':'; + } +} diff --git a/src/Tokens/DoubleQuote.php b/src/Tokens/DoubleQuote.php index 523bf4c..a189fbb 100644 --- a/src/Tokens/DoubleQuote.php +++ b/src/Tokens/DoubleQuote.php @@ -8,8 +8,25 @@ use Cerbero\JsonParser\State; * The double quote token. * */ -class DoubleQuote extends Token implements StateMutator +class DoubleQuote extends Token { + /** + * Whether this token is an object key. + * + * @var bool + */ + protected bool $isKey; + + /** + * Retrieve the token type + * + * @return int + */ + public function type(): int + { + return Tokens::SCALAR_STRING; + } + /** * Mutate the given state * @@ -18,6 +35,34 @@ class DoubleQuote extends Token implements StateMutator */ public function mutateState(State $state): void { - // + if (!$this->isKey = $state->expectsKey()) { + return; + } + + $state->doNotExpectKey(); + + if ($state->treeIsShallow()) { + $state->traverseTree($this->key()); + } + } + + /** + * Retrieve the object key + * + * @return string + */ + protected function key(): string + { + return substr($this->value, 1, -1); + } + + /** + * Determine whether this token ends a JSON chunk + * + * @return bool + */ + public function endsChunk(): bool + { + return !$this->isKey; } } diff --git a/src/Tokens/Scalar.php b/src/Tokens/Scalar.php deleted file mode 100644 index c5fdb03..0000000 --- a/src/Tokens/Scalar.php +++ /dev/null @@ -1,12 +0,0 @@ -value = $value; + + return $this; + } + + /** + * Determine whether the token is a value * * @return bool */ - public function closesChunk(): bool + public function isValue(): bool + { + return ($this->type() | Tokens::VALUE_ANY) == Tokens::VALUE_ANY; + } + + /** + * Determine whether the token is a scalar value + * + * @return bool + */ + public function isScalar(): bool + { + return ($this->type() | Tokens::VALUE_SCALAR) == Tokens::VALUE_SCALAR; + } + + /** + * Mutate the given state + * + * @param State $state + * @return void + */ + public function mutateState(State $state): void + { + return; + } + + /** + * Determine whether this token ends a JSON chunk + * + * @return bool + */ + public function endsChunk(): bool { return false; } diff --git a/src/Tokens/Tokens.php b/src/Tokens/Tokens.php index 3250006..e543662 100644 --- a/src/Tokens/Tokens.php +++ b/src/Tokens/Tokens.php @@ -10,23 +10,26 @@ class Tokens { public const SCALAR_CONST = 1 << 0; public const SCALAR_STRING = 1 << 1; - public const SCALAR_VALUE = self::SCALAR_CONST | self::SCALAR_STRING; - public const OBJECT_START = 1 << 2; + public const OBJECT_BEGIN = 1 << 2; public const OBJECT_END = 1 << 3; - public const ARRAY_START = 1 << 4; + public const ARRAY_BEGIN = 1 << 4; public const ARRAY_END = 1 << 5; public const COMMA = 1 << 6; public const COLON = 1 << 7; - public const ANY_VALUE = self::OBJECT_START | self::ARRAY_START | self::SCALAR_VALUE; + public const COMPOUND_BEGIN = self::OBJECT_BEGIN | self::ARRAY_BEGIN; + public const COMPOUND_END = self::OBJECT_END | self::ARRAY_END; - public const AFTER_ARRAY_START = self::ANY_VALUE | self::ARRAY_END; + public const VALUE_SCALAR = self::SCALAR_CONST | self::SCALAR_STRING; + public const VALUE_ANY = self::COMPOUND_BEGIN | self::VALUE_SCALAR; + + public const AFTER_ARRAY_BEGIN = self::VALUE_ANY | self::ARRAY_END; public const AFTER_ARRAY_VALUE = self::COMMA | self::ARRAY_END; - public const AFTER_OBJECT_START = self::SCALAR_STRING | self::OBJECT_END; + public const AFTER_OBJECT_BEGIN = self::SCALAR_STRING | self::OBJECT_END; public const AFTER_OBJECT_VALUE = self::COMMA | self::OBJECT_END; /** @@ -50,9 +53,9 @@ class Tokens '8' => self::SCALAR_CONST, '9' => self::SCALAR_CONST, '"' => self::SCALAR_STRING, - '{' => self::OBJECT_START, + '{' => self::OBJECT_BEGIN, '}' => self::OBJECT_END, - '[' => self::ARRAY_START, + '[' => self::ARRAY_BEGIN, ']' => self::ARRAY_END, ',' => self::COMMA, ':' => self::COLON, @@ -99,6 +102,13 @@ class Tokens * @var array */ public const MAP = [ - // + self::COMMA => Comma::class, + self::OBJECT_BEGIN => CompoundBegin::class, + self::ARRAY_BEGIN => CompoundBegin::class, + self::OBJECT_END => CompoundEnd::class, + self::ARRAY_END => CompoundEnd::class, + self::COLON => Constant::class, + self::SCALAR_CONST => Constant::class, + self::SCALAR_STRING => DoubleQuote::class, ]; } diff --git a/src/Tokens/Value.php b/src/Tokens/Value.php deleted file mode 100644 index 1e1b07e..0000000 --- a/src/Tokens/Value.php +++ /dev/null @@ -1,12 +0,0 @@ - Date: Mon, 17 Oct 2022 18:18:32 +0200 Subject: [PATCH 035/249] Handle tokens while parsing --- src/Lexer.php | 53 +++++++++++++++++++++++-------- src/Parser.php | 86 +++++++++++++++++++++----------------------------- src/State.php | 73 ++++++++++++++++++++++++++++++++++++++---- src/Tree.php | 64 ++++++++++++++++++++++++++++--------- 4 files changed, 191 insertions(+), 85 deletions(-) diff --git a/src/Lexer.php b/src/Lexer.php index c87af9b..92da3c2 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -3,6 +3,7 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Sources\Source; +use Cerbero\JsonParser\Tokens\Token; use Cerbero\JsonParser\Tokens\Tokens; use IteratorAggregate; use Traversable; @@ -13,6 +14,13 @@ use Traversable; */ class Lexer implements IteratorAggregate { + /** + * The map of token instances. + * + * @var array + */ + protected static array $tokensMap = []; + /** * The buffer to yield. * @@ -28,11 +36,11 @@ class Lexer implements IteratorAggregate protected bool $isEscape = false; /** - * The map of token instances. + * Whether the current character belongs to a string. * - * @var array + * @var bool */ - protected array $tokensMap = []; + protected bool $inString = false; /** * Instantiate the class. @@ -51,8 +59,12 @@ class Lexer implements IteratorAggregate */ protected function hydrateTokens(): void { - foreach (Tokens::MAP as $token => $class) { - $this->tokensMap[$token] = new $class(); + if (static::$tokensMap) { + return; + } + + foreach (Tokens::MAP as $type => $class) { + static::$tokensMap[$type] = new $class(); } } @@ -64,26 +76,41 @@ class Lexer implements IteratorAggregate public function getIterator(): Traversable { foreach ($this->source as $chunk) { - foreach (mb_str_split($chunk) as $char) { - $this->isEscape = $char == '\\' && !$this->isEscape; + foreach (mb_str_split($chunk) as $character) { + $this->inString = $character == '"' && !$this->isEscape && !$this->inString; + $this->isEscape = $character == '\\' && !$this->isEscape; - if (isset(Tokens::BOUNDARIES[$char]) && $this->buffer != '') { - yield $this->buffer; + if (isset(Tokens::BOUNDARIES[$character]) && $this->buffer != '' && !$this->inString) { + yield $this->toToken($this->buffer); $this->buffer = ''; - if (isset(Tokens::DELIMITERS[$char])) { - yield $char; + if (isset(Tokens::DELIMITERS[$character])) { + yield $this->toToken($character); } } elseif (!$this->isEscape) { - $this->buffer .= $char; + $this->buffer .= $character; } } } if ($this->buffer != '') { // @todo test whether this is ever called - yield $this->buffer; + yield $this->toToken($this->buffer); $this->buffer = ''; } } + + /** + * Turn the given value into a token + * + * @param string $value + * @return Token + */ + protected function toToken(string $value): Token + { + $character = $value[0]; + $type = Tokens::TYPES[$character]; + + return static::$tokensMap[$type]->setValue($value); + } } diff --git a/src/Parser.php b/src/Parser.php index a9ce462..6da2584 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -2,10 +2,9 @@ namespace Cerbero\JsonParser; +use Cerbero\JsonParser\Decoders\Decoder; use Cerbero\JsonParser\Pointers\Pointers; -use Cerbero\JsonParser\Tokens\StateMutator; use Cerbero\JsonParser\Tokens\Token; -use Cerbero\JsonParser\Tokens\Value; use Generator; use IteratorAggregate; use Traversable; @@ -23,6 +22,13 @@ class Parser implements IteratorAggregate */ protected State $state; + /** + * The JSON decoder. + * + * @var Decoder + */ + protected Decoder $decoder; + /** * The JSON pointers collection. * @@ -39,8 +45,8 @@ class Parser implements IteratorAggregate public function __construct(protected Lexer $lexer, protected Config $config) { $this->state = new State(); + $this->decoder = $config->decoder; $this->pointers = new Pointers(...$config->pointers); - $this->state->matchPointer($this->pointers); } /** @@ -50,13 +56,13 @@ class Parser implements IteratorAggregate */ public function getIterator(): Traversable { - foreach ($this->lexer as $token) { - $this->rematchPointer(); - $this->traverseToken($token); - $this->bufferToken($token); - $this->mutateState($token); + $this->state->matchPointer($this->pointers); - if (!$token->closesChunk() || $this->state->treeIsDeep()) { + foreach ($this->lexer as $token) { + $this->handleToken($token); + $this->rematchPointer(); + + if (!$token->endsChunk() || $this->state->treeIsDeep()) { continue; } @@ -72,6 +78,25 @@ class Parser implements IteratorAggregate } } + /** + * Handle the given token + * + * @param Token $token + * @return void + */ + public function handleToken(Token $token): void + { + $token->mutateState($this->state); + + if ($token->isValue() && !$this->state->inObject() && $this->state->treeIsShallow()) { + $this->state->traverseArray(); + } + + if ($this->state->shouldBufferToken($token)) { + $this->state->bufferToken($token); + } + } + /** * Set the matching JSON pointer when the tree changes * @@ -81,46 +106,7 @@ class Parser implements IteratorAggregate { if ($this->state->treeChanged() && $this->pointers->count() > 1) { $this->state->matchPointer($this->pointers); - $this->state->treeDidntChange(); - } - } - - /** - * Keep track of the JSON tree when traversing the given token - * - * @param Token $token - * @return void - */ - protected function traverseToken(Token $token): void - { - if (!$this->state->inObject() && $token instanceof Value && $this->state->treeIsShallow()) { - $this->state->traverseArray(); - } - } - - /** - * Preserve the given token in the buffer - * - * @param Token $token - * @return void - */ - protected function bufferToken(Token $token): void - { - if ($this->state->shouldBufferToken($token)) { - $this->state->bufferToken($token); - } - } - - /** - * Preserve the given token in the buffer - * - * @param Token $token - * @return void - */ - protected function mutateState(Token $token): void - { - if ($token instanceof StateMutator) { - $token->mutateState($this->state); + $this->state->treeDidNotChange(); } } @@ -131,7 +117,7 @@ class Parser implements IteratorAggregate */ protected function yieldDecodedBuffer(): Generator { - $decoded = $this->config->decoder->decode($this->state->pullBuffer()); + $decoded = $this->decoder->decode($this->state->pullBuffer()); if (!$decoded->succeeded) { call_user_func($this->config->onError, $decoded); diff --git a/src/State.php b/src/State.php index b2a7cbd..a8f3e0b 100644 --- a/src/State.php +++ b/src/State.php @@ -4,9 +4,7 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Pointers\Pointer; use Cerbero\JsonParser\Pointers\Pointers; -use Cerbero\JsonParser\Tokens\Scalar; use Cerbero\JsonParser\Tokens\Token; -use Cerbero\JsonParser\Tokens\Value; /** * The JSON parsing state. @@ -43,7 +41,7 @@ class State protected string $buffer = ''; /** - * Whether the token should be an object key. + * Whether an object key is expected. * * @var bool */ @@ -120,7 +118,7 @@ class State * * @return static */ - public function treeDidntChange(): static + public function treeDidNotChange(): static { $this->treeChanged = false; @@ -170,6 +168,20 @@ class State return $this->pointer->matchesTree($this->tree); } + /** + * Traverse the JSON tree through the given key + * + * @param string $key + * @return static + */ + public function traverseTree(string $key): static + { + $this->tree->traverse($key); + $this->treeChanged = true; + + return $this; + } + /** * Traverse a JSON array * @@ -211,8 +223,8 @@ class State */ protected function expectsToken(Token $token): bool { - return ($this->tree->depth() == $this->pointer->depth() && $token instanceof Value) - || ($this->tree->depth() + 1 == $this->pointer->depth() && $token instanceof Scalar); + return ($this->tree->depth() == $this->pointer->depth() && $token->isValue()) + || ($this->tree->depth() + 1 == $this->pointer->depth() && $token->isScalar()); } /** @@ -242,6 +254,53 @@ class State return $buffer; } + /** + * Determine whether an object key is expected + * + * @return bool + */ + public function expectsKey(): bool + { + return $this->expectsKey; + } + + /** + * Expect an object key + * + * @return static + */ + public function expectKey(): static + { + $this->expectsKey = true; + + return $this; + } + + /** + * Do not expect any object key + * + * @return static + */ + public function doNotExpectKey(): static + { + $this->expectsKey = false; + + return $this; + } + + /** + * Set whether the currently parsed node is an object + * + * @param bool $inObject + * @return static + */ + public function setInObject(bool $inObject): static + { + $this->inObject = $inObject; + + return $this; + } + /** * Determine whether the currently parsed node is an object * @@ -249,6 +308,6 @@ class State */ public function inObject(): bool { - return $this->inObject; + return $this->tree->inObject(); } } diff --git a/src/Tree.php b/src/Tree.php index 81f3f70..9cfc334 100644 --- a/src/Tree.php +++ b/src/Tree.php @@ -33,21 +33,6 @@ class Tree implements IteratorAggregate */ protected int $depth = -1; - /** - * Traverse an array - * - * @param Pointer $pointer - * @return void - */ - public function traverseArray(Pointer $pointer): void - { - $this->original[$this->depth] = isset($this->original[$this->depth]) ? $this->original[$this->depth] + 1 : 0; - array_splice($this->original, $this->depth + 1); - - $this->wildcarded[$this->depth] = $pointer[$this->depth] == '-' ? '-' : $this->original[$this->depth]; - array_splice($this->wildcarded, $this->depth + 1); - } - /** * Retrieve the original JSON tree * @@ -98,6 +83,55 @@ class Tree implements IteratorAggregate $this->depth--; } + /** + * Determine whether the tree is traversing an object + * + * @return bool + */ + public function inObject(): bool + { + return is_string($this->original[$this->depth]); + } + + /** + * Traverse the given key + * + * @param string $key + * @return void + */ + public function traverse(string $key): void + { + $this->original[$this->depth] = $key; + $this->wildcarded[$this->depth] = $key; + + $this->trim(); + } + + /** + * Trim the tree after the latest traversed key + * + * @return void + */ + protected function trim(): void + { + array_splice($this->original, $this->depth + 1); + array_splice($this->wildcarded, $this->depth + 1); + } + + /** + * Traverse an array + * + * @param Pointer $pointer + * @return void + */ + public function traverseArray(Pointer $pointer): void + { + $this->original[$this->depth] = isset($this->original[$this->depth]) ? $this->original[$this->depth] + 1 : 0; + $this->wildcarded[$this->depth] = $pointer[$this->depth] == '-' ? '-' : $this->original[$this->depth]; + + $this->trim(); + } + /** * Retrieve the original tree iterator * From 60f2405729b62bdf92216e1a46e39ac57dca201c Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 29 Oct 2022 17:01:42 +1000 Subject: [PATCH 036/249] Update dependencies --- composer.json | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/composer.json b/composer.json index 1ef46f3..6d518c1 100644 --- a/composer.json +++ b/composer.json @@ -16,15 +16,19 @@ "role": "Developer" }], "require": { - "ext-json": "*", - "ext-mbstring": "*", "php": "^8.0" }, "require-dev": { + "ext-json": "*", + "guzzlehttp/guzzle": "^7.2", + "illuminate/http": ">=6.20", "pestphp/pest": "^1.21", "scrutinizer/ocular": "^1.8", "squizlabs/php_codesniffer": "^3.0" }, + "suggest": { + "guzzlehttp/guzzle": "Required to load JSON from endpoints (^7.2)." + }, "autoload": { "psr-4": { "Cerbero\\JsonParser\\": "src" From 0f78f3c6e6846189145809862a92d422849c0e6d Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 29 Oct 2022 17:01:57 +1000 Subject: [PATCH 037/249] Create exceptions --- src/Exceptions/JsonParserException.php | 22 ++++++++++++++ src/Exceptions/SourceException.php | 41 ++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 src/Exceptions/JsonParserException.php create mode 100644 src/Exceptions/SourceException.php diff --git a/src/Exceptions/JsonParserException.php b/src/Exceptions/JsonParserException.php new file mode 100644 index 0000000..0e8ed8a --- /dev/null +++ b/src/Exceptions/JsonParserException.php @@ -0,0 +1,22 @@ + Date: Sat, 29 Oct 2022 17:02:41 +1000 Subject: [PATCH 038/249] Implement trait to detect endpoints --- src/Concerns/DetectsEndpoints.php | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 src/Concerns/DetectsEndpoints.php diff --git a/src/Concerns/DetectsEndpoints.php b/src/Concerns/DetectsEndpoints.php new file mode 100644 index 0000000..16c1c93 --- /dev/null +++ b/src/Concerns/DetectsEndpoints.php @@ -0,0 +1,25 @@ + Date: Sat, 29 Oct 2022 17:03:51 +1000 Subject: [PATCH 039/249] Avoid multi-byte function --- src/Lexer.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Lexer.php b/src/Lexer.php index 92da3c2..a93758b 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -71,12 +71,13 @@ class Lexer implements IteratorAggregate /** * Retrieve the JSON fragments * - * @return \Cerbero\JsonParser\Tokens\Token[] + * @return Token[] */ public function getIterator(): Traversable { foreach ($this->source as $chunk) { - foreach (mb_str_split($chunk) as $character) { + for ($i = 0, $size = strlen($chunk); $i < $size; $i++) { + $character = $chunk[$i]; $this->inString = $character == '"' && !$this->isEscape && !$this->inString; $this->isEscape = $character == '\\' && !$this->isEscape; From 1ec7b9af1baf8ace1f6aa6baba7b71560a825e38 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 29 Oct 2022 17:04:02 +1000 Subject: [PATCH 040/249] Implement sources --- src/Sources/AnySource.php | 89 +++++++++++++++++++++++++++ src/Sources/Endpoint.php | 76 +++++++++++++++++++++++ src/Sources/Filename.php | 48 +++++++++++++++ src/Sources/IterableSource.php | 42 +++++++++++++ src/Sources/JsonString.php | 49 +++++++++++++++ src/Sources/LaravelClientResponse.php | 43 +++++++++++++ src/Sources/Psr7Message.php | 43 +++++++++++++ src/Sources/Psr7Stream.php | 51 +++++++++++++++ src/Sources/Resource.php | 47 ++++++++++++++ src/Sources/Source.php | 83 ++++++++++++++++++++++--- src/Sources/StreamWrapper.php | 75 ++++++++++++++++++++++ 11 files changed, 638 insertions(+), 8 deletions(-) create mode 100644 src/Sources/AnySource.php create mode 100644 src/Sources/Endpoint.php create mode 100644 src/Sources/Filename.php create mode 100644 src/Sources/IterableSource.php create mode 100644 src/Sources/JsonString.php create mode 100644 src/Sources/LaravelClientResponse.php create mode 100644 src/Sources/Psr7Message.php create mode 100644 src/Sources/Psr7Stream.php create mode 100644 src/Sources/Resource.php create mode 100644 src/Sources/StreamWrapper.php diff --git a/src/Sources/AnySource.php b/src/Sources/AnySource.php new file mode 100644 index 0000000..c79a624 --- /dev/null +++ b/src/Sources/AnySource.php @@ -0,0 +1,89 @@ +sources() as $source) { + if ($source->matches()) { + return $this->matchingSource = $source; + } + } + + throw SourceException::unsupported(); + } + + /** + * Retrieve all available sources + * + * @return Source[] + */ + protected function sources(): Generator + { + foreach (static::$customSources as $source) { + yield $source::from($this->source, $this->config); + } + + foreach ($this->supportedSources as $source) { + yield $source::from($this->source, $this->config); + } + } + + /** + * Determine whether the JSON source can be handled + * + * @return bool + */ + public function matches(): bool + { + return true; + } + + /** + * Retrieve the calculated size of the JSON source + * + * @return int|null + */ + protected function calculateSize(): ?int + { + return $this->matchingSource?->size(); + } +} diff --git a/src/Sources/Endpoint.php b/src/Sources/Endpoint.php new file mode 100644 index 0000000..b3bde0d --- /dev/null +++ b/src/Sources/Endpoint.php @@ -0,0 +1,76 @@ +guzzleIsLoaded()) { + throw SourceException::requireGuzzle(); + } + + $this->response = (new Client())->get($this->source, [ + 'headers' => [ + 'Accept' => 'application/json', + 'Content-Type' => 'application/json', + ], + ]); + + return Psr7Message::from($this->response, $this->config); + } + + /** + * Determine whether the Guzzle client is loaded + * + * @return bool + */ + protected function guzzleIsLoaded(): bool + { + return class_exists(Client::class); + } + + /** + * Determine whether the JSON source can be handled + * + * @return bool + */ + public function matches(): bool + { + return is_string($this->source) && $this->isEndpoint($this->source); + } + + /** + * Retrieve the calculated size of the JSON source + * + * @return int|null + */ + protected function calculateSize(): ?int + { + return $this->response?->getBody()->getSize(); + } +} diff --git a/src/Sources/Filename.php b/src/Sources/Filename.php new file mode 100644 index 0000000..38f37f5 --- /dev/null +++ b/src/Sources/Filename.php @@ -0,0 +1,48 @@ +source, 'rb'); + + try { + yield from Resource::from($handle, $this->config); + } finally { + fclose($handle); + } + } + + /** + * Determine whether the JSON source can be handled + * + * @return bool + */ + public function matches(): bool + { + return is_string($this->source) && is_file($this->source); + } + + /** + * Retrieve the calculated size of the JSON source + * + * @return int|null + */ + protected function calculateSize(): ?int + { + return filesize($this->source) ?: null; + } +} diff --git a/src/Sources/IterableSource.php b/src/Sources/IterableSource.php new file mode 100644 index 0000000..0c14b5b --- /dev/null +++ b/src/Sources/IterableSource.php @@ -0,0 +1,42 @@ +source; + } + + /** + * Determine whether the JSON source can be handled + * + * @return bool + */ + public function matches(): bool + { + return is_iterable($this->source); + } + + /** + * Retrieve the calculated size of the JSON source + * + * @return int|null + */ + protected function calculateSize(): ?int + { + return iterator_count(clone $this->source); + } +} diff --git a/src/Sources/JsonString.php b/src/Sources/JsonString.php new file mode 100644 index 0000000..bb53a52 --- /dev/null +++ b/src/Sources/JsonString.php @@ -0,0 +1,49 @@ +size(); $i += $this->config->bytes) { + yield substr($this->source, $i, $this->config->bytes); + } + } + + /** + * Determine whether the JSON source can be handled + * + * @return bool + */ + public function matches(): bool + { + return is_string($this->source) + && !is_file($this->source) + && !$this->isEndpoint($this->source); + } + + /** + * Retrieve the calculated size of the JSON source + * + * @return int|null + */ + protected function calculateSize(): ?int + { + return strlen($this->source); + } +} diff --git a/src/Sources/LaravelClientResponse.php b/src/Sources/LaravelClientResponse.php new file mode 100644 index 0000000..da257aa --- /dev/null +++ b/src/Sources/LaravelClientResponse.php @@ -0,0 +1,43 @@ +source->toPsrResponse(), $this->config); + } + + /** + * Determine whether the JSON source can be handled + * + * @return bool + */ + public function matches(): bool + { + return $this->source instanceof Response; + } + + /** + * Retrieve the calculated size of the JSON source + * + * @return int|null + */ + protected function calculateSize(): ?int + { + return $this->source->toPsrResponse()->getBody()->getSize(); + } +} diff --git a/src/Sources/Psr7Message.php b/src/Sources/Psr7Message.php new file mode 100644 index 0000000..9d0584d --- /dev/null +++ b/src/Sources/Psr7Message.php @@ -0,0 +1,43 @@ +source->getBody(), $this->config); + } + + /** + * Determine whether the JSON source can be handled + * + * @return bool + */ + public function matches(): bool + { + return $this->source instanceof MessageInterface; + } + + /** + * Retrieve the calculated size of the JSON source + * + * @return int|null + */ + protected function calculateSize(): ?int + { + return $this->source->getBody()->getSize(); + } +} diff --git a/src/Sources/Psr7Stream.php b/src/Sources/Psr7Stream.php new file mode 100644 index 0000000..8117d62 --- /dev/null +++ b/src/Sources/Psr7Stream.php @@ -0,0 +1,51 @@ + ['stream' => $this->source], + ])); + + return Resource::from($stream, $this->config); + } + + /** + * Determine whether the JSON source can be handled + * + * @return bool + */ + public function matches(): bool + { + return $this->source instanceof StreamInterface; + } + + /** + * Retrieve the calculated size of the JSON source + * + * @return int|null + */ + protected function calculateSize(): ?int + { + return $this->source->getSize(); + } +} diff --git a/src/Sources/Resource.php b/src/Sources/Resource.php new file mode 100644 index 0000000..3af5171 --- /dev/null +++ b/src/Sources/Resource.php @@ -0,0 +1,47 @@ +source)) { + yield fread($this->source, $this->config->bytes); + } + } + + /** + * Determine whether the JSON source can be handled + * + * @return bool + */ + public function matches(): bool + { + return is_resource($this->source) || get_resource_type($this->source) == 'stream'; + } + + /** + * Retrieve the calculated size of the JSON source + * + * @return int|null + */ + protected function calculateSize(): ?int + { + $stats = fstat($this->source); + $size = $stats['size'] ?? null; + + return $size ?: null; + } +} diff --git a/src/Sources/Source.php b/src/Sources/Source.php index ac87594..9486d4f 100644 --- a/src/Sources/Source.php +++ b/src/Sources/Source.php @@ -2,6 +2,8 @@ namespace Cerbero\JsonParser\Sources; +use Cerbero\JsonParser\Config; +use Cerbero\JsonParser\Exceptions\SourceException; use IteratorAggregate; use Traversable; @@ -9,24 +11,89 @@ use Traversable; * The JSON source. * */ -class Source implements IteratorAggregate +abstract class Source implements IteratorAggregate { /** - * Instantiate the class. + * The registered custom sources. * - * @param mixed $source + * @var array */ - public function __construct(protected mixed $source) - { - } + protected static array $customSources = []; + + /** + * The cached size of the JSON source. + * + * @var int|null + */ + protected int $size; /** * Retrieve the JSON fragments * * @return Traversable */ - public function getIterator(): Traversable + abstract public function getIterator(): Traversable; + + /** + * Determine whether the JSON source can be handled + * + * @return bool + */ + abstract public function matches(): bool; + + /** + * Retrieve the calculated size of the JSON source + * + * @return int|null + */ + abstract protected function calculateSize(): ?int; + + /** + * Enforce the factory method to instantiate the class. + * + * @param mixed $source + * @param Config $config + */ + protected function __construct(protected mixed $source, protected Config $config) { - // + } + + /** + * Instantiate the class statically + * + * @param mixed $source + * @param Config $config + * @return static + */ + public static function from(mixed $source, Config $config): static + { + return new static($source, $config); + } + + /** + * Register the given custom sources + * + * @param string ...$customSource + * @return void + */ + public function register(string ...$customSource): void + { + foreach ($customSource as $class) { + if (is_subclass_of($class, Source::class)) { + static::$customSources[] = $class; + } + + throw SourceException::invalidSource($class); + } + } + + /** + * Retrieve the size of the JSON source and cache it + * + * @return int|null + */ + public function size(): ?int + { + return $this->size ??= $this->calculateSize(); } } diff --git a/src/Sources/StreamWrapper.php b/src/Sources/StreamWrapper.php new file mode 100644 index 0000000..9ee5abe --- /dev/null +++ b/src/Sources/StreamWrapper.php @@ -0,0 +1,75 @@ +context); + + $this->stream = $options[static::NAME]['stream'] ?? null; + + return $this->stream instanceof StreamInterface && $this->stream->isReadable(); + } + + /** + * Determine whether the pointer is at the end of the stream + * + * @return bool + */ + public function stream_eof(): bool + { + return $this->stream->eof(); + } + + /** + * Read from the stream + * + * @param int $count + * @return string + */ + public function stream_read(int $count): string + { + return $this->stream->read($count); + } +} From a26ebd8ae87190a48882dd45e18d08bec08cfe79 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 29 Oct 2022 17:04:39 +1000 Subject: [PATCH 041/249] Support any JSON source by default --- src/JsonParser.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/JsonParser.php b/src/JsonParser.php index 4a369a4..abc7375 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -5,7 +5,7 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Decoders\Decoder; use Cerbero\JsonParser\Decoders\ObjectDecoder; use Cerbero\JsonParser\Pointers\Pointer; -use Cerbero\JsonParser\Sources\Source; +use Cerbero\JsonParser\Sources\AnySource; use IteratorAggregate; use Traversable; @@ -37,7 +37,8 @@ class JsonParser implements IteratorAggregate public function __construct(mixed $source) { $this->config = new Config(); - $this->parser = new Parser(new Lexer(new Source($source)), $this->config); + $source = new AnySource($source, $this->config); + $this->parser = new Parser(new Lexer($source), $this->config); } /** From a08a048061d91cb5854f9d1572529075451c4315 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 29 Oct 2022 17:59:19 +1000 Subject: [PATCH 042/249] Throw exception when registering an invalid source --- src/Sources/Source.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Sources/Source.php b/src/Sources/Source.php index 9486d4f..b0805e2 100644 --- a/src/Sources/Source.php +++ b/src/Sources/Source.php @@ -79,11 +79,11 @@ abstract class Source implements IteratorAggregate public function register(string ...$customSource): void { foreach ($customSource as $class) { - if (is_subclass_of($class, Source::class)) { - static::$customSources[] = $class; + if (!is_subclass_of($class, Source::class)) { + throw SourceException::invalidSource($class); } - throw SourceException::invalidSource($class); + static::$customSources[] = $class; } } From c72f4a6313dbfd56b77d44367d4a0b2d8931c2f6 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 29 Oct 2022 18:27:43 +1000 Subject: [PATCH 043/249] Auto-load helpers --- composer.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/composer.json b/composer.json index 6d518c1..3b0db60 100644 --- a/composer.json +++ b/composer.json @@ -32,16 +32,16 @@ "autoload": { "psr-4": { "Cerbero\\JsonParser\\": "src" - } - }, - "autoload-dev": { - "psr-4": { - "Cerbero\\JsonParser\\": "tests" }, "files": [ "helpers.php" ] }, + "autoload-dev": { + "psr-4": { + "Cerbero\\JsonParser\\": "tests" + } + }, "scripts": { "test": "pest", "check-style": "phpcs --standard=PSR12 src", From 1772eb3e377d077bc76095c005bf0635b12f43a3 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 29 Oct 2022 18:28:21 +1000 Subject: [PATCH 044/249] Use closure for error handling --- src/Config.php | 5 +++-- src/JsonParser.php | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Config.php b/src/Config.php index 7d7fb58..bd80562 100644 --- a/src/Config.php +++ b/src/Config.php @@ -6,6 +6,7 @@ use Cerbero\JsonParser\Decoders\ArrayDecoder; use Cerbero\JsonParser\Decoders\DecodedValue; use Cerbero\JsonParser\Decoders\Decoder; use Cerbero\JsonParser\Pointers\Pointer; +use Closure; /** * The configuration. @@ -37,9 +38,9 @@ class Config /** * The callback to run during a parsing error. * - * @var callable + * @var Closure */ - public callable $onError; + public Closure $onError; /** * Instantiate the class diff --git a/src/JsonParser.php b/src/JsonParser.php index abc7375..f867569 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -6,6 +6,7 @@ use Cerbero\JsonParser\Decoders\Decoder; use Cerbero\JsonParser\Decoders\ObjectDecoder; use Cerbero\JsonParser\Pointers\Pointer; use Cerbero\JsonParser\Sources\AnySource; +use Closure; use IteratorAggregate; use Traversable; @@ -114,10 +115,10 @@ class JsonParser implements IteratorAggregate /** * Set the logic to run during parsing errors * - * @param callable $callback + * @param Closure $callback * @return static */ - public function onError(callable $callback): static + public function onError(Closure $callback): static { $this->config->onError = $callback; From 2ec4bbe58b8426266c73c9a1256c79537b8eda06 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 29 Oct 2022 18:29:04 +1000 Subject: [PATCH 045/249] Call factory method --- src/JsonParser.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/JsonParser.php b/src/JsonParser.php index f867569..a49aa03 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -38,7 +38,7 @@ class JsonParser implements IteratorAggregate public function __construct(mixed $source) { $this->config = new Config(); - $source = new AnySource($source, $this->config); + $source = AnySource::from($source, $this->config); $this->parser = new Parser(new Lexer($source), $this->config); } From 94667ba3e3b1f6a1988036ef8b784eee39c2d63b Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 29 Oct 2022 18:33:28 +1000 Subject: [PATCH 046/249] Make tokens stringable --- src/Tokens/Token.php | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/Tokens/Token.php b/src/Tokens/Token.php index 2179ca3..92b89dd 100644 --- a/src/Tokens/Token.php +++ b/src/Tokens/Token.php @@ -3,12 +3,13 @@ namespace Cerbero\JsonParser\Tokens; use Cerbero\JsonParser\State; +use Stringable; /** * The abstract implementation of a token. * */ -abstract class Token +abstract class Token implements Stringable { /** * The token value. @@ -77,4 +78,14 @@ abstract class Token { return false; } + + /** + * Retrieve the underlying token value + * + * @return string + */ + public function __toString(): string + { + return $this->value; + } } From fb717264059af9ef5873aafb4b2446d78f29d9e6 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 29 Oct 2022 18:34:06 +1000 Subject: [PATCH 047/249] Remove nullable declaration from mixed type --- src/Decoders/DecodedValue.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Decoders/DecodedValue.php b/src/Decoders/DecodedValue.php index 393389d..e80c2fd 100644 --- a/src/Decoders/DecodedValue.php +++ b/src/Decoders/DecodedValue.php @@ -17,7 +17,7 @@ class DecodedValue */ protected function __construct( public bool $succeeded, - public ?mixed $value = null, + public mixed $value = null, public ?string $error = null, public ?int $code = null, public ?JsonException $exception = null, From ed1ec102e27ef2982a6b05ad55bbb64bf7793ffb Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 6 Nov 2022 00:42:45 +1000 Subject: [PATCH 048/249] Update description and suggestion --- composer.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/composer.json b/composer.json index 3b0db60..556ca94 100644 --- a/composer.json +++ b/composer.json @@ -1,7 +1,7 @@ { "name": "cerbero/json-parser", "type": "library", - "description": "Zero-dependencies pull parser and lexer to save memory while reading big JSONs.", + "description": "Zero-dependencies, framework-agnostic pull parser to read big JSON in a memory-efficient way.", "keywords": [ "json", "parser", @@ -27,6 +27,7 @@ "squizlabs/php_codesniffer": "^3.0" }, "suggest": { + "ext-json": "Required to decode JSON with the built-in decoder.", "guzzlehttp/guzzle": "Required to load JSON from endpoints (^7.2)." }, "autoload": { From 24721016705b0f11e30136f6e535e89e1963355d Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 6 Nov 2022 01:02:09 +1000 Subject: [PATCH 049/249] Extract tokenization logic --- src/Lexer.php | 61 +++++++++++++++------------------------- src/Tokens/Tokenizer.php | 56 ++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 38 deletions(-) create mode 100644 src/Tokens/Tokenizer.php diff --git a/src/Lexer.php b/src/Lexer.php index a93758b..1d4e116 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -4,7 +4,9 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Sources\Source; use Cerbero\JsonParser\Tokens\Token; +use Cerbero\JsonParser\Tokens\Tokenizer; use Cerbero\JsonParser\Tokens\Tokens; +use Generator; use IteratorAggregate; use Traversable; @@ -15,11 +17,11 @@ use Traversable; class Lexer implements IteratorAggregate { /** - * The map of token instances. + * The tokenizer. * - * @var array + * @var Tokenizer */ - protected static array $tokensMap = []; + protected Tokenizer $tokenizer; /** * The buffer to yield. @@ -49,23 +51,7 @@ class Lexer implements IteratorAggregate */ public function __construct(protected Source $source) { - $this->hydrateTokens(); - } - - /** - * Set the hydrated tokens - * - * @return void - */ - protected function hydrateTokens(): void - { - if (static::$tokensMap) { - return; - } - - foreach (Tokens::MAP as $type => $class) { - static::$tokensMap[$type] = new $class(); - } + $this->tokenizer = new Tokenizer(); } /** @@ -81,37 +67,36 @@ class Lexer implements IteratorAggregate $this->inString = $character == '"' && !$this->isEscape && !$this->inString; $this->isEscape = $character == '\\' && !$this->isEscape; - if (isset(Tokens::BOUNDARIES[$character]) && $this->buffer != '' && !$this->inString) { - yield $this->toToken($this->buffer); - $this->buffer = ''; - - if (isset(Tokens::DELIMITERS[$character])) { - yield $this->toToken($character); - } - } elseif (!$this->isEscape) { - $this->buffer .= $character; - } + yield from $this->yieldOrBufferCharacter($character); } } if ($this->buffer != '') { // @todo test whether this is ever called - yield $this->toToken($this->buffer); + yield $this->tokenizer->toToken($this->buffer); $this->buffer = ''; } } /** - * Turn the given value into a token + * Yield the given character or buffer it * - * @param string $value - * @return Token + * @param string $character + * @return Generator */ - protected function toToken(string $value): Token + protected function yieldOrBufferCharacter(string $character): Generator { - $character = $value[0]; - $type = Tokens::TYPES[$character]; + if (isset(Tokens::BOUNDARIES[$character]) && !$this->inString) { + if ($this->buffer != '') { + yield $this->tokenizer->toToken($this->buffer); + $this->buffer = ''; + } - return static::$tokensMap[$type]->setValue($value); + if (isset(Tokens::DELIMITERS[$character])) { + yield $this->tokenizer->toToken($character); + } + } elseif (!$this->isEscape) { + $this->buffer .= $character; + } } } diff --git a/src/Tokens/Tokenizer.php b/src/Tokens/Tokenizer.php new file mode 100644 index 0000000..66d5f0f --- /dev/null +++ b/src/Tokens/Tokenizer.php @@ -0,0 +1,56 @@ + + */ + protected static array $tokensMap = []; + + /** + * Instantiate the class. + * + */ + public function __construct() + { + $this->hydrateTokens(); + } + + /** + * Set the hydrated tokens + * + * @return void + */ + protected function hydrateTokens(): void + { + if (static::$tokensMap) { + return; + } + + foreach (Tokens::MAP as $type => $class) { + static::$tokensMap[$type] = new $class(); + } + } + + /** + * Turn the given value into a token + * + * @param string $value + * @return Token + */ + public function toToken(string $value): Token + { + $character = $value[0]; + $type = Tokens::TYPES[$character]; + + return static::$tokensMap[$type]->setValue($value); + } +} From 87a99785b36987212dcc4f068d9cfe1b2221961e Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 6 Nov 2022 01:03:03 +1000 Subject: [PATCH 050/249] Add support for custom sources --- src/Sources/AnySource.php | 1 + src/Sources/CustomSource.php | 42 ++++++++++++++++++++++++++++++++++ src/Sources/IterableSource.php | 2 +- 3 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 src/Sources/CustomSource.php diff --git a/src/Sources/AnySource.php b/src/Sources/AnySource.php index c79a624..fcec18a 100644 --- a/src/Sources/AnySource.php +++ b/src/Sources/AnySource.php @@ -18,6 +18,7 @@ class AnySource extends Source * @var array */ protected array $supportedSources = [ + CustomSource::class, Endpoint::class, Filename::class, IterableSource::class, diff --git a/src/Sources/CustomSource.php b/src/Sources/CustomSource.php new file mode 100644 index 0000000..b49d21b --- /dev/null +++ b/src/Sources/CustomSource.php @@ -0,0 +1,42 @@ +source; + } + + /** + * Determine whether the JSON source can be handled + * + * @return bool + */ + public function matches(): bool + { + return $this->source instanceof Source; + } + + /** + * Retrieve the calculated size of the JSON source + * + * @return int|null + */ + protected function calculateSize(): ?int + { + return $this->source->size(); + } +} diff --git a/src/Sources/IterableSource.php b/src/Sources/IterableSource.php index 0c14b5b..c3abe85 100644 --- a/src/Sources/IterableSource.php +++ b/src/Sources/IterableSource.php @@ -27,7 +27,7 @@ class IterableSource extends Source */ public function matches(): bool { - return is_iterable($this->source); + return is_iterable($this->source) && !$this->source instanceof Source; } /** From 2de167e29b88bfa1a7f1d9ffc2aa693b94ae7e30 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 6 Nov 2022 01:03:21 +1000 Subject: [PATCH 051/249] Fix token type --- src/Tokens/Constant.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Tokens/Constant.php b/src/Tokens/Constant.php index 9beebb9..dc27d76 100644 --- a/src/Tokens/Constant.php +++ b/src/Tokens/Constant.php @@ -15,7 +15,7 @@ class Constant extends Token */ public function type(): int { - return $this->value != ':' ? Tokens::COLON : Tokens::SCALAR_CONST; + return $this->value == ':' ? Tokens::COLON : Tokens::SCALAR_CONST; } /** From 870e83cd3927c169ba8f96cc77014222140796af Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 6 Nov 2022 01:05:00 +1000 Subject: [PATCH 052/249] Define method to register sources statically --- src/Sources/Source.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Sources/Source.php b/src/Sources/Source.php index b0805e2..cf7cf92 100644 --- a/src/Sources/Source.php +++ b/src/Sources/Source.php @@ -76,7 +76,7 @@ abstract class Source implements IteratorAggregate * @param string ...$customSource * @return void */ - public function register(string ...$customSource): void + public static function register(string ...$customSource): void { foreach ($customSource as $class) { if (!is_subclass_of($class, Source::class)) { From c32080c344890a8970f27aac85adeb08fbd9cf31 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 6 Nov 2022 01:06:25 +1000 Subject: [PATCH 053/249] Add method to check whether JSON root is traversed --- src/State.php | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/State.php b/src/State.php index a8f3e0b..40fef83 100644 --- a/src/State.php +++ b/src/State.php @@ -310,4 +310,14 @@ class State { return $this->tree->inObject(); } + + /** + * Determine whether the tree is within the JSON root + * + * @return bool + */ + public function inRoot(): bool + { + return $this->tree->depth() >= 0; + } } From f08fff6f54ffabb4c259a18f1cf00683e1fa97ba Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 6 Nov 2022 01:06:49 +1000 Subject: [PATCH 054/249] Handle unset keys --- src/Tree.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Tree.php b/src/Tree.php index 9cfc334..b2e9c5d 100644 --- a/src/Tree.php +++ b/src/Tree.php @@ -90,7 +90,9 @@ class Tree implements IteratorAggregate */ public function inObject(): bool { - return is_string($this->original[$this->depth]); + $key = $this->original[$this->depth] ?? null; + + return is_string($key); } /** From 58a77ffb85ef3c43bb985be768be266a783ca520 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 6 Nov 2022 01:08:25 +1000 Subject: [PATCH 055/249] Move yielding logic to main method to read array keys properly --- src/Parser.php | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/Parser.php b/src/Parser.php index 6da2584..0bc9665 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -5,7 +5,6 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Decoders\Decoder; use Cerbero\JsonParser\Pointers\Pointers; use Cerbero\JsonParser\Tokens\Token; -use Generator; use IteratorAggregate; use Traversable; @@ -66,8 +65,10 @@ class Parser implements IteratorAggregate continue; } - if ($this->state->hasBuffer()) { - yield from $this->yieldDecodedBuffer(); + if ($this->state->hasBuffer() && $this->state->inObject()) { + yield $this->decode($this->state->key()) => $this->decode($this->state->pullBuffer()); + } elseif ($this->state->hasBuffer() && !$this->state->inObject()) { + yield $this->decode($this->state->pullBuffer()); } $this->markPointerAsFound(); @@ -84,15 +85,17 @@ class Parser implements IteratorAggregate * @param Token $token * @return void */ - public function handleToken(Token $token): void + protected function handleToken(Token $token): void { + $inRoot = $this->state->inRoot(); + $token->mutateState($this->state); if ($token->isValue() && !$this->state->inObject() && $this->state->treeIsShallow()) { $this->state->traverseArray(); } - if ($this->state->shouldBufferToken($token)) { + if ($inRoot && $this->state->shouldBufferToken($token)) { $this->state->bufferToken($token); } } @@ -111,11 +114,11 @@ class Parser implements IteratorAggregate } /** - * Yield the decoded JSON of the buffer + * Retrieve the decoded JSON of the buffer * - * @return Generator + * @return mixed */ - protected function yieldDecodedBuffer(): Generator + protected function decodeBuffer(): mixed { $decoded = $this->decoder->decode($this->state->pullBuffer()); @@ -123,11 +126,7 @@ class Parser implements IteratorAggregate call_user_func($this->config->onError, $decoded); } - if ($this->state->inObject()) { - yield $this->state->node() => $decoded->value; - } else { - yield $decoded->value; - } + return $decoded->value; } /** From 61522f95617dad66aa0b5675adb56ded6a9ee40b Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 7 Nov 2022 23:44:57 +1000 Subject: [PATCH 056/249] Fix parsing issues --- src/Lexer.php | 49 +++++++++---------- src/Parser.php | 24 +++------ src/State.php | 36 ++++---------- .../{DoubleQuote.php => ScalarString.php} | 16 ++---- src/Tokens/Tokens.php | 2 +- src/Tree.php | 22 +++++++-- 6 files changed, 62 insertions(+), 87 deletions(-) rename src/Tokens/{DoubleQuote.php => ScalarString.php} (75%) diff --git a/src/Lexer.php b/src/Lexer.php index 1d4e116..2e8034f 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -30,13 +30,6 @@ class Lexer implements IteratorAggregate */ protected string $buffer = ''; - /** - * Whether the current character is an escape. - * - * @var bool - */ - protected bool $isEscape = false; - /** * Whether the current character belongs to a string. * @@ -57,25 +50,30 @@ class Lexer implements IteratorAggregate /** * Retrieve the JSON fragments * - * @return Token[] + * @return Generator */ public function getIterator(): Traversable { foreach ($this->source as $chunk) { for ($i = 0, $size = strlen($chunk); $i < $size; $i++) { $character = $chunk[$i]; - $this->inString = $character == '"' && !$this->isEscape && !$this->inString; - $this->isEscape = $character == '\\' && !$this->isEscape; + $this->inString = $this->inString($character); yield from $this->yieldOrBufferCharacter($character); } } + } - if ($this->buffer != '') { - // @todo test whether this is ever called - yield $this->tokenizer->toToken($this->buffer); - $this->buffer = ''; - } + /** + * Determine whether the given character is within a string + * + * @param string $character + * @return bool + */ + protected function inString(string $character): bool + { + return ($character == '"' && !$this->inString) + || ($character != '"' && $this->inString); } /** @@ -86,17 +84,18 @@ class Lexer implements IteratorAggregate */ protected function yieldOrBufferCharacter(string $character): Generator { - if (isset(Tokens::BOUNDARIES[$character]) && !$this->inString) { - if ($this->buffer != '') { - yield $this->tokenizer->toToken($this->buffer); - $this->buffer = ''; - } - - if (isset(Tokens::DELIMITERS[$character])) { - yield $this->tokenizer->toToken($character); - } - } elseif (!$this->isEscape) { + if ($this->inString || !isset(Tokens::BOUNDARIES[$character])) { $this->buffer .= $character; + return; + } + + if ($this->buffer != '') { + yield $this->tokenizer->toToken($this->buffer); + $this->buffer = ''; + } + + if (isset(Tokens::DELIMITERS[$character])) { + yield $this->tokenizer->toToken($character); } } } diff --git a/src/Parser.php b/src/Parser.php index 0bc9665..cfa8b68 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -2,7 +2,6 @@ namespace Cerbero\JsonParser; -use Cerbero\JsonParser\Decoders\Decoder; use Cerbero\JsonParser\Pointers\Pointers; use Cerbero\JsonParser\Tokens\Token; use IteratorAggregate; @@ -21,13 +20,6 @@ class Parser implements IteratorAggregate */ protected State $state; - /** - * The JSON decoder. - * - * @var Decoder - */ - protected Decoder $decoder; - /** * The JSON pointers collection. * @@ -44,7 +36,6 @@ class Parser implements IteratorAggregate public function __construct(protected Lexer $lexer, protected Config $config) { $this->state = new State(); - $this->decoder = $config->decoder; $this->pointers = new Pointers(...$config->pointers); } @@ -87,17 +78,15 @@ class Parser implements IteratorAggregate */ protected function handleToken(Token $token): void { - $inRoot = $this->state->inRoot(); - - $token->mutateState($this->state); - if ($token->isValue() && !$this->state->inObject() && $this->state->treeIsShallow()) { $this->state->traverseArray(); } - if ($inRoot && $this->state->shouldBufferToken($token)) { + if ($this->state->inRoot() && $this->state->shouldBufferToken($token)) { $this->state->bufferToken($token); } + + $token->mutateState($this->state); } /** @@ -114,13 +103,14 @@ class Parser implements IteratorAggregate } /** - * Retrieve the decoded JSON of the buffer + * Retrieve the decoded value of the given JSON fragment * + * @param string $json * @return mixed */ - protected function decodeBuffer(): mixed + protected function decode(string $json): mixed { - $decoded = $this->decoder->decode($this->state->pullBuffer()); + $decoded = $this->config->decoder->decode($json); if (!$decoded->succeeded) { call_user_func($this->config->onError, $decoded); diff --git a/src/State.php b/src/State.php index 40fef83..e1aff3f 100644 --- a/src/State.php +++ b/src/State.php @@ -2,6 +2,7 @@ namespace Cerbero\JsonParser; +use Cerbero\JsonParser\Pointers\NullPointer; use Cerbero\JsonParser\Pointers\Pointer; use Cerbero\JsonParser\Pointers\Pointers; use Cerbero\JsonParser\Tokens\Token; @@ -47,13 +48,6 @@ class State */ protected bool $expectsKey = false; - /** - * Whether the currently parsed node is an object. - * - * @var bool - */ - protected bool $inObject = false; - /** * Instantiate the class. * @@ -80,7 +74,8 @@ class State */ public function treeIsShallow(): bool { - return $this->tree->depth() < $this->pointer->depth(); + return $this->pointer instanceof NullPointer + || $this->tree->depth() < $this->pointer->depth(); } /** @@ -98,9 +93,9 @@ class State * * @return string */ - public function node(): string + public function key(): string { - return $this->tree[$this->tree->depth()]; + return $this->tree->currentKey(); } /** @@ -169,14 +164,14 @@ class State } /** - * Traverse the JSON tree through the given key + * Traverse the given object key * * @param string $key * @return static */ - public function traverseTree(string $key): static + public function traverseKey(string $key): static { - $this->tree->traverse($key); + $this->tree->traverseKey($key); $this->treeChanged = true; return $this; @@ -212,7 +207,7 @@ class State public function shouldBufferToken(Token $token): bool { return $this->pointer->matchesTree($this->tree) - && ($this->treeIsDeep() || (!$this->expectsKey && $this->expectsToken($token))); + && ($this->treeIsDeep() || (!$this->expectsKey() && $this->expectsToken($token))); } /** @@ -288,19 +283,6 @@ class State return $this; } - /** - * Set whether the currently parsed node is an object - * - * @param bool $inObject - * @return static - */ - public function setInObject(bool $inObject): static - { - $this->inObject = $inObject; - - return $this; - } - /** * Determine whether the currently parsed node is an object * diff --git a/src/Tokens/DoubleQuote.php b/src/Tokens/ScalarString.php similarity index 75% rename from src/Tokens/DoubleQuote.php rename to src/Tokens/ScalarString.php index a189fbb..9dce3c9 100644 --- a/src/Tokens/DoubleQuote.php +++ b/src/Tokens/ScalarString.php @@ -5,10 +5,10 @@ namespace Cerbero\JsonParser\Tokens; use Cerbero\JsonParser\State; /** - * The double quote token. + * The scalar string token. * */ -class DoubleQuote extends Token +class ScalarString extends Token { /** * Whether this token is an object key. @@ -42,20 +42,10 @@ class DoubleQuote extends Token $state->doNotExpectKey(); if ($state->treeIsShallow()) { - $state->traverseTree($this->key()); + $state->traverseKey($this->value); } } - /** - * Retrieve the object key - * - * @return string - */ - protected function key(): string - { - return substr($this->value, 1, -1); - } - /** * Determine whether this token ends a JSON chunk * diff --git a/src/Tokens/Tokens.php b/src/Tokens/Tokens.php index e543662..41257de 100644 --- a/src/Tokens/Tokens.php +++ b/src/Tokens/Tokens.php @@ -109,6 +109,6 @@ class Tokens self::ARRAY_END => CompoundEnd::class, self::COLON => Constant::class, self::SCALAR_CONST => Constant::class, - self::SCALAR_STRING => DoubleQuote::class, + self::SCALAR_STRING => ScalarString::class, ]; } diff --git a/src/Tree.php b/src/Tree.php index b2e9c5d..735e026 100644 --- a/src/Tree.php +++ b/src/Tree.php @@ -96,15 +96,17 @@ class Tree implements IteratorAggregate } /** - * Traverse the given key + * Traverse the given object key * * @param string $key * @return void */ - public function traverse(string $key): void + public function traverseKey(string $key): void { - $this->original[$this->depth] = $key; - $this->wildcarded[$this->depth] = $key; + $trimmedKey = substr($key, 1, -1); + + $this->original[$this->depth] = $trimmedKey; + $this->wildcarded[$this->depth] = $trimmedKey; $this->trim(); } @@ -134,6 +136,18 @@ class Tree implements IteratorAggregate $this->trim(); } + /** + * Retrieve the current key + * + * @return string|int + */ + public function currentKey(): string|int + { + $key = $this->original[$this->depth]; + + return is_string($key) ? "\"$key\"" : $key; + } + /** * Retrieve the original tree iterator * From 78c8531879dfb36f15781589e7b47dbd5de7f9f6 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 7 Nov 2022 23:45:07 +1000 Subject: [PATCH 057/249] Create tests --- tests/Dataset.php | 45 +++++++ tests/Feature/ParsingTest.php | 17 +++ tests/Pest.php | 48 ++++++++ tests/fixtures/parsing/array.json | 1 + tests/fixtures/parsing/array.php | 3 + tests/fixtures/parsing/complex_array.json | 70 +++++++++++ tests/fixtures/parsing/complex_array.php | 132 +++++++++++++++++++++ tests/fixtures/parsing/complex_object.json | 26 ++++ tests/fixtures/parsing/complex_object.php | 58 +++++++++ tests/fixtures/parsing/empty_array.json | 1 + tests/fixtures/parsing/empty_array.php | 3 + tests/fixtures/parsing/empty_object.json | 1 + tests/fixtures/parsing/empty_object.php | 3 + tests/fixtures/parsing/object.json | 12 ++ tests/fixtures/parsing/object.php | 14 +++ 15 files changed, 434 insertions(+) create mode 100644 tests/Dataset.php create mode 100644 tests/Feature/ParsingTest.php create mode 100644 tests/Pest.php create mode 100644 tests/fixtures/parsing/array.json create mode 100644 tests/fixtures/parsing/array.php create mode 100644 tests/fixtures/parsing/complex_array.json create mode 100644 tests/fixtures/parsing/complex_array.php create mode 100644 tests/fixtures/parsing/complex_object.json create mode 100644 tests/fixtures/parsing/complex_object.php create mode 100644 tests/fixtures/parsing/empty_array.json create mode 100644 tests/fixtures/parsing/empty_array.php create mode 100644 tests/fixtures/parsing/empty_object.json create mode 100644 tests/fixtures/parsing/empty_object.php create mode 100644 tests/fixtures/parsing/object.json create mode 100644 tests/fixtures/parsing/object.php diff --git a/tests/Dataset.php b/tests/Dataset.php new file mode 100644 index 0000000..3a77ff3 --- /dev/null +++ b/tests/Dataset.php @@ -0,0 +1,45 @@ +getExtension() === 'json') { + yield $file->getBasename('.json'); + } + } + } +} diff --git a/tests/Feature/ParsingTest.php b/tests/Feature/ParsingTest.php new file mode 100644 index 0000000..c3909dc --- /dev/null +++ b/tests/Feature/ParsingTest.php @@ -0,0 +1,17 @@ +toParseTo($parsed); +})->with(Dataset::forParsing()); + +it('parses JSON when calling the factory method', function (string $json, array $parsed) { + expect(JsonParser::parse($json))->toParseTo($parsed); +})->with(Dataset::forParsing()); + +it('parses JSON when calling the helper', function (string $json, array $parsed) { + expect(parseJson($json))->toParseTo($parsed); +})->with(Dataset::forParsing()); diff --git a/tests/Pest.php b/tests/Pest.php new file mode 100644 index 0000000..5398c77 --- /dev/null +++ b/tests/Pest.php @@ -0,0 +1,48 @@ +in('Feature'); + +/* +|-------------------------------------------------------------------------- +| Expectations +|-------------------------------------------------------------------------- +| +| When you're writing tests, you often need to check that values meet certain conditions. The +| "expect()" function gives you access to a set of "expectations" methods that you can use +| to assert different things. Of course, you may extend the Expectation API at any time. +| +*/ + +expect()->extend('toParseTo', function (array $parsed) { + $actual = []; + + foreach ($this->value as $key => $value) { + $actual[$key] = $value; + } + + expect($actual)->toBe($parsed); +}); + +/* +|-------------------------------------------------------------------------- +| Functions +|-------------------------------------------------------------------------- +| +| While Pest is very powerful out-of-the-box, you may have some testing code specific to your +| project that you don't want to repeat in every file. Here you can also expose helpers as +| global functions to help you to reduce the number of lines of code in your test files. +| +*/ + +// ... diff --git a/tests/fixtures/parsing/array.json b/tests/fixtures/parsing/array.json new file mode 100644 index 0000000..ca7e149 --- /dev/null +++ b/tests/fixtures/parsing/array.json @@ -0,0 +1 @@ +[1, "", "foo", "\"bar\"", 3.14, false, null, [], {}] diff --git a/tests/fixtures/parsing/array.php b/tests/fixtures/parsing/array.php new file mode 100644 index 0000000..c7380fb --- /dev/null +++ b/tests/fixtures/parsing/array.php @@ -0,0 +1,3 @@ + "0001", + "type" => "donut", + "name" => "Cake", + "ppu" => 0.55, + "batters" => [ + "batter" => [ + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + [ + "id" => "1003", + "type" => "Blueberry", + ], + [ + "id" => "1004", + "type" => "Devil's Food", + ], + ], + ], + "topping" => [ + [ + "id" => "5001", + "type" => "None", + ], + [ + "id" => "5002", + "type" => "Glazed", + ], + [ + "id" => "5005", + "type" => "Sugar", + ], + [ + "id" => "5007", + "type" => "Powdered Sugar", + ], + [ + "id" => "5006", + "type" => "Chocolate with Sprinkles", + ], + [ + "id" => "5003", + "type" => "Chocolate", + ], + [ + "id" => "5004", + "type" => "Maple", + ], + ], + ], + [ + "id" => "0002", + "type" => "donut", + "name" => "Raised", + "ppu" => 0.55, + "batters" => [ + "batter" => [ + [ + "id" => "1001", + "type" => "Regular", + ], + ], + ], + "topping" => [ + [ + "id" => "5001", + "type" => "None", + ], + [ + "id" => "5002", + "type" => "Glazed", + ], + [ + "id" => "5005", + "type" => "Sugar", + ], + [ + "id" => "5003", + "type" => "Chocolate", + ], + [ + "id" => "5004", + "type" => "Maple", + ], + ], + ], + [ + "id" => "0003", + "type" => "donut", + "name" => "Old Fashioned", + "ppu" => 0.55, + "batters" => [ + "batter" => [ + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + ], + ], + "topping" => [ + [ + "id" => "5001", + "type" => "None", + ], + [ + "id" => "5002", + "type" => "Glazed", + ], + [ + "id" => "5003", + "type" => "Chocolate", + ], + [ + "id" => "5004", + "type" => "Maple", + ], + ], + ], +]; diff --git a/tests/fixtures/parsing/complex_object.json b/tests/fixtures/parsing/complex_object.json new file mode 100644 index 0000000..ef2b654 --- /dev/null +++ b/tests/fixtures/parsing/complex_object.json @@ -0,0 +1,26 @@ +{ + "id": "0001", + "type": "donut", + "name": "Cake", + "ppu": 0.55, + "batters": + { + "batter": + [ + { "id": "1001", "type": "Regular" }, + { "id": "1002", "type": "Chocolate" }, + { "id": "1003", "type": "Blueberry" }, + { "id": "1004", "type": "Devil's Food" } + ] + }, + "topping": + [ + { "id": "5001", "type": "None" }, + { "id": "5002", "type": "Glazed" }, + { "id": "5005", "type": "Sugar" }, + { "id": "5007", "type": "Powdered Sugar" }, + { "id": "5006", "type": "Chocolate with Sprinkles" }, + { "id": "5003", "type": "Chocolate" }, + { "id": "5004", "type": "Maple" } + ] +} diff --git a/tests/fixtures/parsing/complex_object.php b/tests/fixtures/parsing/complex_object.php new file mode 100644 index 0000000..06b2a6d --- /dev/null +++ b/tests/fixtures/parsing/complex_object.php @@ -0,0 +1,58 @@ + '0001', + 'type' => 'donut', + 'name' => 'Cake', + 'ppu' => 0.55, + 'batters' => [ + 'batter' => [ + [ + 'id' => '1001', + 'type' => 'Regular', + ], + [ + 'id' => '1002', + 'type' => 'Chocolate', + ], + [ + 'id' => '1003', + 'type' => 'Blueberry', + ], + [ + 'id' => '1004', + 'type' => 'Devil\'s Food', + ], + ], + ], + 'topping' => [ + [ + 'id' => '5001', + 'type' => 'None', + ], + [ + 'id' => '5002', + 'type' => 'Glazed', + ], + [ + 'id' => '5005', + 'type' => 'Sugar', + ], + [ + 'id' => '5007', + 'type' => 'Powdered Sugar', + ], + [ + 'id' => '5006', + 'type' => 'Chocolate with Sprinkles', + ], + [ + 'id' => '5003', + 'type' => 'Chocolate', + ], + [ + 'id' => '5004', + 'type' => 'Maple', + ], + ], +]; diff --git a/tests/fixtures/parsing/empty_array.json b/tests/fixtures/parsing/empty_array.json new file mode 100644 index 0000000..fe51488 --- /dev/null +++ b/tests/fixtures/parsing/empty_array.json @@ -0,0 +1 @@ +[] diff --git a/tests/fixtures/parsing/empty_array.php b/tests/fixtures/parsing/empty_array.php new file mode 100644 index 0000000..0b67a5f --- /dev/null +++ b/tests/fixtures/parsing/empty_array.php @@ -0,0 +1,3 @@ + 1, + 'empty_string' => '', + 'string' => 'foo', + 'escaped_string' => '"bar"', + '"escaped_key"' => 'baz', + 'float' => 3.14, + 'bool' => false, + 'null' => null, + 'empty_array' => [], + 'empty_object' => [], +]; From 255b5633c85bf08372504b11c0138e5dfa54a2b8 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 8 Nov 2022 00:17:17 +1000 Subject: [PATCH 058/249] Rename method --- src/Parser.php | 2 +- src/State.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parser.php b/src/Parser.php index cfa8b68..cd08f9a 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -64,7 +64,7 @@ class Parser implements IteratorAggregate $this->markPointerAsFound(); - if ($this->pointers->wereFound() && !$this->state->treeInPointer()) { + if ($this->pointers->wereFound() && !$this->state->inPointer()) { break; } } diff --git a/src/State.php b/src/State.php index e1aff3f..04a61c0 100644 --- a/src/State.php +++ b/src/State.php @@ -148,7 +148,7 @@ class State * * @return bool */ - public function treeInPointer(): bool + public function inPointer(): bool { return $this->pointer->includesTree($this->tree); } From 1d85f387d7fed2b4239c95f14fedc6a281084b5e Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 8 Nov 2022 22:03:45 +1000 Subject: [PATCH 059/249] Update description --- README.md | 2 +- composer.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2789f03..4e36115 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ [![PSR-12][ico-psr12]][link-psr12] [![Total Downloads][ico-downloads]][link-downloads] -Zero-dependencies pull parser and lexer to save memory while reading big JSONs. +Zero-dependencies pull parser to read big JSON from any source in a memory-efficient way. ## 📦 Install diff --git a/composer.json b/composer.json index 556ca94..f438156 100644 --- a/composer.json +++ b/composer.json @@ -1,7 +1,7 @@ { "name": "cerbero/json-parser", "type": "library", - "description": "Zero-dependencies, framework-agnostic pull parser to read big JSON in a memory-efficient way.", + "description": "Zero-dependencies pull parser to read big JSON from any source in a memory-efficient way.", "keywords": [ "json", "parser", From a5146b2ef6aceab97d4d1d02895a74f15b7a7a78 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 8 Nov 2022 22:09:06 +1000 Subject: [PATCH 060/249] Add checks for unicode characters --- tests/fixtures/parsing/array.json | 2 +- tests/fixtures/parsing/array.php | 2 +- tests/fixtures/parsing/object.json | 1 + tests/fixtures/parsing/object.php | 1 + 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/fixtures/parsing/array.json b/tests/fixtures/parsing/array.json index ca7e149..67cb839 100644 --- a/tests/fixtures/parsing/array.json +++ b/tests/fixtures/parsing/array.json @@ -1 +1 @@ -[1, "", "foo", "\"bar\"", 3.14, false, null, [], {}] +[1, "", "foo", "\"bar\"", "hej då", 3.14, false, null, [], {}] diff --git a/tests/fixtures/parsing/array.php b/tests/fixtures/parsing/array.php index c7380fb..4288667 100644 --- a/tests/fixtures/parsing/array.php +++ b/tests/fixtures/parsing/array.php @@ -1,3 +1,3 @@ 'foo', 'escaped_string' => '"bar"', '"escaped_key"' => 'baz', + "unicode" => "hej då", 'float' => 3.14, 'bool' => false, 'null' => null, From ab11f4e41659f49bb743f74c2cd110d026cc48a2 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 10 Nov 2022 21:45:56 +1000 Subject: [PATCH 061/249] Add factory method to parser --- src/JsonParser.php | 3 +-- src/Parser.php | 12 ++++++++++++ src/Sources/Source.php | 10 ++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/JsonParser.php b/src/JsonParser.php index a49aa03..89dc7f6 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -38,8 +38,7 @@ class JsonParser implements IteratorAggregate public function __construct(mixed $source) { $this->config = new Config(); - $source = AnySource::from($source, $this->config); - $this->parser = new Parser(new Lexer($source), $this->config); + $this->parser = Parser::for(AnySource::from($source, $this->config)); } /** diff --git a/src/Parser.php b/src/Parser.php index cd08f9a..00e7c53 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -3,6 +3,7 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Pointers\Pointers; +use Cerbero\JsonParser\Sources\Source; use Cerbero\JsonParser\Tokens\Token; use IteratorAggregate; use Traversable; @@ -39,6 +40,17 @@ class Parser implements IteratorAggregate $this->pointers = new Pointers(...$config->pointers); } + /** + * Instantiate the class statically + * + * @param Source $source + * @return static + */ + public static function for(Source $source): static + { + return new static(new Lexer($source), $source->config()); + } + /** * Retrieve the JSON fragments * diff --git a/src/Sources/Source.php b/src/Sources/Source.php index cf7cf92..317419c 100644 --- a/src/Sources/Source.php +++ b/src/Sources/Source.php @@ -87,6 +87,16 @@ abstract class Source implements IteratorAggregate } } + /** + * Retrieve the underlying configuration + * + * @return Config + */ + public function config(): Config + { + return $this->config; + } + /** * Retrieve the size of the JSON source and cache it * From 18521483431d7ee9e4b776a895b273cd8ccdda0e Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Fri, 11 Nov 2022 17:03:29 +1000 Subject: [PATCH 062/249] Remove implicit requirement --- composer.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/composer.json b/composer.json index f438156..be3dbe7 100644 --- a/composer.json +++ b/composer.json @@ -1,11 +1,13 @@ { "name": "cerbero/json-parser", "type": "library", - "description": "Zero-dependencies pull parser to read big JSON from any source in a memory-efficient way.", + "description": "Zero-dependencies pull parser to read large JSON from any source in a memory-efficient way.", "keywords": [ "json", "parser", - "lexer" + "json-parser", + "lexer", + "memory" ], "homepage": "https://github.com/cerbero90/json-parser", "license": "MIT", @@ -19,7 +21,6 @@ "php": "^8.0" }, "require-dev": { - "ext-json": "*", "guzzlehttp/guzzle": "^7.2", "illuminate/http": ">=6.20", "pestphp/pest": "^1.21", @@ -27,7 +28,6 @@ "squizlabs/php_codesniffer": "^3.0" }, "suggest": { - "ext-json": "Required to decode JSON with the built-in decoder.", "guzzlehttp/guzzle": "Required to load JSON from endpoints (^7.2)." }, "autoload": { From 183c8ba9824087bcb24d131bc2ee424025e70c60 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 13 Nov 2022 17:08:57 +1000 Subject: [PATCH 063/249] Move JSON fixtures --- tests/Dataset.php | 23 +++++++++---------- tests/fixtures/{parsing => json}/array.json | 0 .../{parsing => json}/complex_array.json | 0 .../{parsing => json}/complex_object.json | 0 .../{parsing => json}/empty_array.json | 0 .../{parsing => json}/empty_object.json | 0 tests/fixtures/{parsing => json}/object.json | 0 7 files changed, 11 insertions(+), 12 deletions(-) rename tests/fixtures/{parsing => json}/array.json (100%) rename tests/fixtures/{parsing => json}/complex_array.json (100%) rename tests/fixtures/{parsing => json}/complex_object.json (100%) rename tests/fixtures/{parsing => json}/empty_array.json (100%) rename tests/fixtures/{parsing => json}/empty_object.json (100%) rename tests/fixtures/{parsing => json}/object.json (100%) diff --git a/tests/Dataset.php b/tests/Dataset.php index 3a77ff3..83d669c 100644 --- a/tests/Dataset.php +++ b/tests/Dataset.php @@ -18,27 +18,26 @@ class Dataset */ public static function forParsing(): Generator { - foreach (static::fixtureNamesIn('parsing') as $fixture) { + foreach (static::fixtures() as $fixture) { + $name = $fixture->getBasename('.json'); + yield [ - file_get_contents(__DIR__ . "/fixtures/parsing/{$fixture}.json"), - require __DIR__ . "/fixtures/parsing/{$fixture}.php", + file_get_contents($fixture->getRealPath()), + require __DIR__ . "/fixtures/parsing/{$name}.php", ]; } } /** - * Retrieve the names of the fixtures + * Retrieve the fixtures * - * @param string $directory - * @return Generator + * @return Generator */ - protected static function fixtureNamesIn(string $directory): Generator + protected static function fixtures(): Generator { - $fixtures = new DirectoryIterator(__DIR__ . "/fixtures/{$directory}"); - - foreach ($fixtures as $file) { - if ($file->getExtension() === 'json') { - yield $file->getBasename('.json'); + foreach (new DirectoryIterator(__DIR__ . '/fixtures/json') as $file) { + if (!$file->isDot()) { + yield $file; } } } diff --git a/tests/fixtures/parsing/array.json b/tests/fixtures/json/array.json similarity index 100% rename from tests/fixtures/parsing/array.json rename to tests/fixtures/json/array.json diff --git a/tests/fixtures/parsing/complex_array.json b/tests/fixtures/json/complex_array.json similarity index 100% rename from tests/fixtures/parsing/complex_array.json rename to tests/fixtures/json/complex_array.json diff --git a/tests/fixtures/parsing/complex_object.json b/tests/fixtures/json/complex_object.json similarity index 100% rename from tests/fixtures/parsing/complex_object.json rename to tests/fixtures/json/complex_object.json diff --git a/tests/fixtures/parsing/empty_array.json b/tests/fixtures/json/empty_array.json similarity index 100% rename from tests/fixtures/parsing/empty_array.json rename to tests/fixtures/json/empty_array.json diff --git a/tests/fixtures/parsing/empty_object.json b/tests/fixtures/json/empty_object.json similarity index 100% rename from tests/fixtures/parsing/empty_object.json rename to tests/fixtures/json/empty_object.json diff --git a/tests/fixtures/parsing/object.json b/tests/fixtures/json/object.json similarity index 100% rename from tests/fixtures/parsing/object.json rename to tests/fixtures/json/object.json From 8197d9b77b13161b15782ff7beb39299fb0c8965 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 13 Nov 2022 17:10:31 +1000 Subject: [PATCH 064/249] Rename node to key --- src/Pointers/Pointer.php | 10 +++++----- src/Pointers/Pointers.php | 6 +++--- src/State.php | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/Pointers/Pointer.php b/src/Pointers/Pointer.php index 11e98d8..834fe13 100644 --- a/src/Pointers/Pointer.php +++ b/src/Pointers/Pointer.php @@ -70,23 +70,23 @@ class Pointer implements ArrayAccess, Stringable } /** - * Determine whether the reference token at the given depth matches the provided node + * Determine whether the reference token at the given depth matches the provided key * * @param int $depth - * @param mixed $node + * @param mixed $key * @return bool */ - public function depthMatchesNode(int $depth, mixed $node): bool + public function depthMatchesKey(int $depth, mixed $key): bool { if (!isset($this->referenceTokens[$depth])) { return false; } - if ($this->referenceTokens[$depth] === (string) $node) { + if ($this->referenceTokens[$depth] === (string) $key) { return true; } - return is_int($node) && $this->referenceTokens[$depth] === '-'; + return is_int($key) && $this->referenceTokens[$depth] === '-'; } /** diff --git a/src/Pointers/Pointers.php b/src/Pointers/Pointers.php index 90bf207..70163f5 100644 --- a/src/Pointers/Pointers.php +++ b/src/Pointers/Pointers.php @@ -54,8 +54,8 @@ class Pointers implements Countable $pointers = []; foreach ($this->pointers as $pointer) { - foreach ($tree as $depth => $node) { - if (!$pointer->depthMatchesNode($depth, $node)) { + foreach ($tree as $depth => $key) { + if (!$pointer->depthMatchesKey($depth, $key)) { continue 2; } elseif (!isset($pointers[$depth])) { $pointers[$depth] = $pointer; @@ -63,7 +63,7 @@ class Pointers implements Countable } } - return end($pointers) ?: $this->defaultPointer; + return end($pointers) ?: $this->pointers[0] ?? $this->defaultPointer; } /** diff --git a/src/State.php b/src/State.php index 04a61c0..b482d4c 100644 --- a/src/State.php +++ b/src/State.php @@ -89,7 +89,7 @@ class State } /** - * Retrieve the current node of the JSON tree + * Retrieve the current key of the JSON tree * * @return string */ @@ -284,7 +284,7 @@ class State } /** - * Determine whether the currently parsed node is an object + * Determine whether the current position is within an object * * @return bool */ From 91d89c7209ae91ab8cee037c9e61ca9cbe3cde61 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 14 Nov 2022 18:43:01 +1000 Subject: [PATCH 065/249] Rename fixtures --- tests/fixtures/json/{array.json => simple_array.json} | 0 tests/fixtures/json/{object.json => simple_object.json} | 0 tests/fixtures/parsing/{array.php => simple_array.php} | 0 tests/fixtures/parsing/{object.php => simple_object.php} | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename tests/fixtures/json/{array.json => simple_array.json} (100%) rename tests/fixtures/json/{object.json => simple_object.json} (100%) rename tests/fixtures/parsing/{array.php => simple_array.php} (100%) rename tests/fixtures/parsing/{object.php => simple_object.php} (100%) diff --git a/tests/fixtures/json/array.json b/tests/fixtures/json/simple_array.json similarity index 100% rename from tests/fixtures/json/array.json rename to tests/fixtures/json/simple_array.json diff --git a/tests/fixtures/json/object.json b/tests/fixtures/json/simple_object.json similarity index 100% rename from tests/fixtures/json/object.json rename to tests/fixtures/json/simple_object.json diff --git a/tests/fixtures/parsing/array.php b/tests/fixtures/parsing/simple_array.php similarity index 100% rename from tests/fixtures/parsing/array.php rename to tests/fixtures/parsing/simple_array.php diff --git a/tests/fixtures/parsing/object.php b/tests/fixtures/parsing/simple_object.php similarity index 100% rename from tests/fixtures/parsing/object.php rename to tests/fixtures/parsing/simple_object.php From acde9f4572f090c53cd3103aedbd25c69f30e15f Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 14 Nov 2022 18:44:48 +1000 Subject: [PATCH 066/249] Reduce number of token instances --- src/Tokens/Tokenizer.php | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/Tokens/Tokenizer.php b/src/Tokens/Tokenizer.php index 66d5f0f..6f97985 100644 --- a/src/Tokens/Tokenizer.php +++ b/src/Tokens/Tokenizer.php @@ -9,11 +9,11 @@ namespace Cerbero\JsonParser\Tokens; class Tokenizer { /** - * The map of token instances. + * The map of token instances by type. * * @var array */ - protected static array $tokensMap = []; + protected static array $tokensMap; /** * Instantiate the class. @@ -21,23 +21,23 @@ class Tokenizer */ public function __construct() { - $this->hydrateTokens(); + static::$tokensMap ??= $this->hydrateTokensMap(); } /** - * Set the hydrated tokens + * Retrieve the hydrated tokens map * - * @return void + * @return array */ - protected function hydrateTokens(): void + protected function hydrateTokensMap(): array { - if (static::$tokensMap) { - return; - } + $map = $instances = []; foreach (Tokens::MAP as $type => $class) { - static::$tokensMap[$type] = new $class(); + $map[$type] = $instances[$class] ??= new $class(); } + + return $map; } /** From bf1ef5cad7d7f88658bed7aad8099e686701fb6e Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 17 Nov 2022 23:49:26 +1000 Subject: [PATCH 067/249] Define logic to mutate the state inside tokens --- src/Parser.php | 24 ++---------------------- src/Tokens/Comma.php | 4 ++-- src/Tokens/CompoundBegin.php | 4 ++-- src/Tokens/CompoundEnd.php | 4 ++-- src/Tokens/ScalarString.php | 8 ++------ src/Tokens/Token.php | 33 +++++++++++++++++++++++++++++++++ 6 files changed, 43 insertions(+), 34 deletions(-) diff --git a/src/Parser.php b/src/Parser.php index 00e7c53..2de62cc 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -4,7 +4,6 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Pointers\Pointers; use Cerbero\JsonParser\Sources\Source; -use Cerbero\JsonParser\Tokens\Token; use IteratorAggregate; use Traversable; @@ -37,7 +36,6 @@ class Parser implements IteratorAggregate public function __construct(protected Lexer $lexer, protected Config $config) { $this->state = new State(); - $this->pointers = new Pointers(...$config->pointers); } /** @@ -58,10 +56,11 @@ class Parser implements IteratorAggregate */ public function getIterator(): Traversable { + $this->pointers = new Pointers(...$this->config->pointers); $this->state->matchPointer($this->pointers); foreach ($this->lexer as $token) { - $this->handleToken($token); + $token->mutateState($this->state); $this->rematchPointer(); if (!$token->endsChunk() || $this->state->treeIsDeep()) { @@ -82,25 +81,6 @@ class Parser implements IteratorAggregate } } - /** - * Handle the given token - * - * @param Token $token - * @return void - */ - protected function handleToken(Token $token): void - { - if ($token->isValue() && !$this->state->inObject() && $this->state->treeIsShallow()) { - $this->state->traverseArray(); - } - - if ($this->state->inRoot() && $this->state->shouldBufferToken($token)) { - $this->state->bufferToken($token); - } - - $token->mutateState($this->state); - } - /** * Set the matching JSON pointer when the tree changes * diff --git a/src/Tokens/Comma.php b/src/Tokens/Comma.php index 6884fa5..cd6694c 100644 --- a/src/Tokens/Comma.php +++ b/src/Tokens/Comma.php @@ -21,12 +21,12 @@ class Comma extends Token } /** - * Mutate the given state + * Update the given state * * @param State $state * @return void */ - public function mutateState(State $state): void + protected function updateState(State $state): void { if ($state->inObject()) { $state->expectKey(); diff --git a/src/Tokens/CompoundBegin.php b/src/Tokens/CompoundBegin.php index eefbec9..0130ecc 100644 --- a/src/Tokens/CompoundBegin.php +++ b/src/Tokens/CompoundBegin.php @@ -21,12 +21,12 @@ class CompoundBegin extends Token } /** - * Mutate the given state + * Update the given state * * @param State $state * @return void */ - public function mutateState(State $state): void + protected function updateState(State $state): void { $state->tree()->deepen(); diff --git a/src/Tokens/CompoundEnd.php b/src/Tokens/CompoundEnd.php index 99c7202..fea88ce 100644 --- a/src/Tokens/CompoundEnd.php +++ b/src/Tokens/CompoundEnd.php @@ -21,12 +21,12 @@ class CompoundEnd extends Token } /** - * Mutate the given state + * Update the given state * * @param State $state * @return void */ - public function mutateState(State $state): void + protected function updateState(State $state): void { $state->tree()->emerge(); diff --git a/src/Tokens/ScalarString.php b/src/Tokens/ScalarString.php index 9dce3c9..c0cf392 100644 --- a/src/Tokens/ScalarString.php +++ b/src/Tokens/ScalarString.php @@ -28,22 +28,18 @@ class ScalarString extends Token } /** - * Mutate the given state + * Update the given state * * @param State $state * @return void */ - public function mutateState(State $state): void + protected function updateState(State $state): void { if (!$this->isKey = $state->expectsKey()) { return; } $state->doNotExpectKey(); - - if ($state->treeIsShallow()) { - $state->traverseKey($this->value); - } } /** diff --git a/src/Tokens/Token.php b/src/Tokens/Token.php index 92b89dd..b83cfb8 100644 --- a/src/Tokens/Token.php +++ b/src/Tokens/Token.php @@ -58,6 +58,16 @@ abstract class Token implements Stringable return ($this->type() | Tokens::VALUE_SCALAR) == Tokens::VALUE_SCALAR; } + /** + * Determine whether the token is a string + * + * @return bool + */ + public function isString(): bool + { + return ($this->type() | Tokens::SCALAR_STRING) == Tokens::SCALAR_STRING; + } + /** * Mutate the given state * @@ -65,6 +75,29 @@ abstract class Token implements Stringable * @return void */ public function mutateState(State $state): void + { + if ($this->isValue() && !$state->inObject() && $state->treeIsShallow()) { + $state->traverseArray(); + } + + if ($this->isString() && $state->expectsKey() && $state->treeIsShallow()) { + $state->traverseKey($this); + } + + if ($state->inRoot() && $state->shouldBufferToken($this)) { + $state->bufferToken($this); + } + + $this->updateState($state); + } + + /** + * Update the given state + * + * @param State $state + * @return void + */ + protected function updateState(State $state): void { return; } From 3e9da676ce0587edbc4b23a5cd6126566baa81d2 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 17 Nov 2022 23:51:49 +1000 Subject: [PATCH 068/249] Fix logic to check if a token should be buffered --- src/State.php | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/State.php b/src/State.php index b482d4c..d8d0b22 100644 --- a/src/State.php +++ b/src/State.php @@ -85,7 +85,9 @@ class State */ public function treeIsDeep(): bool { - return $this->tree->depth() > $this->pointer->depth(); + return $this->pointer instanceof NullPointer + ? $this->tree->depth() > $this->pointer->depth() + : $this->tree->depth() >= $this->pointer->depth(); } /** @@ -207,7 +209,7 @@ class State public function shouldBufferToken(Token $token): bool { return $this->pointer->matchesTree($this->tree) - && ($this->treeIsDeep() || (!$this->expectsKey() && $this->expectsToken($token))); + && ($this->treeIsDeep() || (!$this->expectsKey() && ($token->isValue() || $this->expectsToken($token)))); } /** From da7b20e489794cb8ff269fa684aeae68462e5fa8 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 17 Nov 2022 23:52:34 +1000 Subject: [PATCH 069/249] Add dataset for single pointers --- tests/Dataset.php | 18 ++ tests/fixtures/pointers/single_pointer.php | 229 +++++++++++++++++++++ 2 files changed, 247 insertions(+) create mode 100644 tests/fixtures/pointers/single_pointer.php diff --git a/tests/Dataset.php b/tests/Dataset.php index 83d669c..57307dd 100644 --- a/tests/Dataset.php +++ b/tests/Dataset.php @@ -41,4 +41,22 @@ class Dataset } } } + + /** + * Retrieve the dataset to test single pointers + * + * @return Generator + */ + public static function forSinglePointers(): Generator + { + $singlePointers = require __DIR__ . '/fixtures/pointers/single_pointer.php'; + + foreach ($singlePointers as $fixture => $pointers) { + $json = file_get_contents(__DIR__ . "/fixtures/json/{$fixture}.json"); + + foreach ($pointers as $pointer => $value) { + yield [$json, $pointer, $value]; + } + } + } } diff --git a/tests/fixtures/pointers/single_pointer.php b/tests/fixtures/pointers/single_pointer.php new file mode 100644 index 0000000..c77bf8f --- /dev/null +++ b/tests/fixtures/pointers/single_pointer.php @@ -0,0 +1,229 @@ + [ + '/-/id' => ['id' => ['0001', '0002', '0003']], + '/-/batters' => [ + 'batters' => [ + [ + 'batter' => [ + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + [ + "id" => "1003", + "type" => "Blueberry", + ], + [ + "id" => "1004", + "type" => "Devil's Food", + ], + ], + ], + [ + 'batter' => [ + [ + "id" => "1001", + "type" => "Regular", + ], + ], + ], + [ + 'batter' => [ + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + ], + ], + ], + ], + '/-/batters/batter' => [ + 'batter' => [ + [ + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + [ + "id" => "1003", + "type" => "Blueberry", + ], + [ + "id" => "1004", + "type" => "Devil's Food", + ], + ], + [ + [ + "id" => "1001", + "type" => "Regular", + ], + ], + [ + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + ], + ], + ], + '/-/batters/batter/-' => [ + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + [ + "id" => "1003", + "type" => "Blueberry", + ], + [ + "id" => "1004", + "type" => "Devil's Food", + ], + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + ], + '/-/batters/batter/-/id' => ['id' => ["1001", "1002", "1003", "1004", "1001", "1001", "1002"]], + ], + 'complex_object' => [ + '/id' => ['id' => '0001'], + '/batters' => [ + 'batters' => [ + 'batter' => [ + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + [ + "id" => "1003", + "type" => "Blueberry", + ], + [ + "id" => "1004", + "type" => "Devil's Food", + ], + ], + ], + ], + '/batters/batter' => [ + 'batter' => [ + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + [ + "id" => "1003", + "type" => "Blueberry", + ], + [ + "id" => "1004", + "type" => "Devil's Food", + ], + ], + ], + '/batters/batter/-' => [ + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + [ + "id" => "1003", + "type" => "Blueberry", + ], + [ + "id" => "1004", + "type" => "Devil's Food", + ], + ], + '/batters/batter/-/id' => ['id' => ["1001", "1002", "1003", "1004"]], + ], + 'empty_array' => [ + '/-' => [], + '/-1' => [], + '/0' => [], + '/foo' => [], + ], + 'empty_object' => [ + '/-' => [], + '/-1' => [], + '/0' => [], + '/foo' => [], + ], + 'simple_array' => [ + '/-' => [1, '', 'foo', '"bar"', 'hej då', 3.14, false, null, [], []], + '/-1' => [], + '/0' => [1], + '/1' => [''], + '/2' => ['foo'], + '/3' => ['"bar"'], + '/4' => ['hej då'], + '/5' => [3.14], + '/6' => [false], + '/7' => [null], + '/8' => [[]], + '/9' => [[]], + '/10' => [], + '/foo' => [], + ], + 'simple_object' => [ + '/-' => [], + '/-1' => [], + '/int' => ['int' => 1], + '/empty_string' => ['empty_string' => ''], + '/string' => ['string' => 'foo'], + '/escaped_string' => ['escaped_string' => '"bar"'], + '/\"escaped_key\"' => ['"escaped_key"' => 'baz'], + '/unicode' => ['unicode' => "hej då"], + '/float' => ['float' => 3.14], + '/bool' => ['bool' => false], + '/null' => ['null' => null], + '/empty_array' => ['empty_array' => []], + '/empty_object' => ['empty_object' => []], + '/10' => [], + '/foo' => [], + ], +]; From bb3baca8f5964aa4c8dca5b9bd326be1346f7eb3 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 17 Nov 2022 23:53:54 +1000 Subject: [PATCH 070/249] Update parsing expectation --- tests/Pest.php | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tests/Pest.php b/tests/Pest.php index 5398c77..d72f48f 100644 --- a/tests/Pest.php +++ b/tests/Pest.php @@ -24,14 +24,22 @@ | */ -expect()->extend('toParseTo', function (array $parsed) { - $actual = []; +expect()->extend('toParseTo', function (array $expected) { + $actual = $itemsCount = []; - foreach ($this->value as $key => $value) { - $actual[$key] = $value; + foreach ($this->value as $parsedKey => $parsedValue) { + $itemsCount[$parsedKey] = empty($itemsCount[$parsedKey]) ? 1 : $itemsCount[$parsedKey] + 1; + + // the following match is required as we may deal with parsed values that are arrays + // and unpacking a parsed value that is an array may lead to unexpected results + $actual[$parsedKey] = match ($itemsCount[$parsedKey]) { + 1 => $parsedValue, + 2 => [$actual[$parsedKey], $parsedValue], + default => [...$actual[$parsedKey], $parsedValue], + }; } - expect($actual)->toBe($parsed); + expect($actual)->toBe($expected); }); /* From 76aa75645306ffda420f139c674f5235456403da Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 17 Nov 2022 23:54:18 +1000 Subject: [PATCH 071/249] Create test for pointers --- tests/Feature/PointersTest.php | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 tests/Feature/PointersTest.php diff --git a/tests/Feature/PointersTest.php b/tests/Feature/PointersTest.php new file mode 100644 index 0000000..b1a6c37 --- /dev/null +++ b/tests/Feature/PointersTest.php @@ -0,0 +1,13 @@ +pointer($pointer))->toParseTo($parsed); +})->with(Dataset::forSinglePointers()); + + +// it('supports multiple JSON pointers', function (string $json, array $pointers, array $parsed) { +// expect(JsonParser::parse($json)->pointer(...$pointers))->toParseTo($parsed); +// })->with(Dataset::forMultiplePointers()); From dea8a49af554e4156ad55aefb5a0e7a2ece3daac Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 21 Nov 2022 00:35:13 +1000 Subject: [PATCH 072/249] Introduce exception codes --- src/Exceptions/JsonParserException.php | 14 +++++--------- src/Exceptions/SourceException.php | 8 ++++---- src/Sources/Source.php | 2 +- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/src/Exceptions/JsonParserException.php b/src/Exceptions/JsonParserException.php index 0e8ed8a..42acabf 100644 --- a/src/Exceptions/JsonParserException.php +++ b/src/Exceptions/JsonParserException.php @@ -10,13 +10,9 @@ use Exception; */ abstract class JsonParserException extends Exception { - /** - * Enforce factory methods to instantiate exceptions - * - * @param string $message - */ - protected function __construct(string $message) - { - parent::__construct($message); - } + public const CODE_SOURCE_INVALID = 0; + public const CODE_SOURCE_UNSUPPORTED = 1; + public const CODE_SOURCE_GUZZLE = 2; + + public const CODE_POINTER_INVALID = 3; } diff --git a/src/Exceptions/SourceException.php b/src/Exceptions/SourceException.php index 4a06aa7..9ce57c8 100644 --- a/src/Exceptions/SourceException.php +++ b/src/Exceptions/SourceException.php @@ -14,9 +14,9 @@ class SourceException extends JsonParserException * @param string $source * @return static */ - public static function invalidSource(string $source): static + public static function invalid(string $source): static { - return new static("[$source] is not a valid source"); + return new static("[$source] is not a valid source", static::CODE_SOURCE_INVALID); } /** @@ -26,7 +26,7 @@ class SourceException extends JsonParserException */ public static function unsupported(): static { - return new static('Unable to load JSON from the provided source'); + return new static('Unable to load JSON from the provided source', static::CODE_SOURCE_UNSUPPORTED); } /** @@ -36,6 +36,6 @@ class SourceException extends JsonParserException */ public static function requireGuzzle(): static { - return new static('Guzzle is required to load JSON from endpoints'); + return new static('Guzzle is required to load JSON from endpoints', static::CODE_SOURCE_GUZZLE); } } diff --git a/src/Sources/Source.php b/src/Sources/Source.php index 317419c..f3c80d1 100644 --- a/src/Sources/Source.php +++ b/src/Sources/Source.php @@ -80,7 +80,7 @@ abstract class Source implements IteratorAggregate { foreach ($customSource as $class) { if (!is_subclass_of($class, Source::class)) { - throw SourceException::invalidSource($class); + throw SourceException::invalid($class); } static::$customSources[] = $class; From 2d9292a039bd8befd027ec83e99f67672605c021 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 21 Nov 2022 00:36:08 +1000 Subject: [PATCH 073/249] Throw exception on invalid pointer --- src/Exceptions/PointerException.php | 21 +++++++++++++++++++++ src/Pointers/Pointer.php | 5 +++++ tests/Dataset.php | 10 ++++++++++ tests/Feature/PointersTest.php | 7 +++++++ 4 files changed, 43 insertions(+) create mode 100644 src/Exceptions/PointerException.php diff --git a/src/Exceptions/PointerException.php b/src/Exceptions/PointerException.php new file mode 100644 index 0000000..b17487d --- /dev/null +++ b/src/Exceptions/PointerException.php @@ -0,0 +1,21 @@ +pointer) === 0) { + throw PointerException::invalid($this->pointer); + } + $tokens = explode('/', substr($this->pointer, 1)); return array_map(fn (string $token) => str_replace(['~1', '~0'], ['/', '~'], $token), $tokens); diff --git a/tests/Dataset.php b/tests/Dataset.php index 57307dd..dc2333d 100644 --- a/tests/Dataset.php +++ b/tests/Dataset.php @@ -59,4 +59,14 @@ class Dataset } } } + + /** + * Retrieve the dataset to test invalid pointers + * + * @return Generator + */ + public static function forInvalidPointers(): Generator + { + yield from ['abc', '/foo~2', '/~', ' ']; + } } diff --git a/tests/Feature/PointersTest.php b/tests/Feature/PointersTest.php index b1a6c37..7ff64e6 100644 --- a/tests/Feature/PointersTest.php +++ b/tests/Feature/PointersTest.php @@ -1,6 +1,7 @@ with(Dataset::forSinglePointers()); +it('throws an exception when providing an invalid JSON pointer', function (string $pointer) { + expect(fn () => iterator_to_array(JsonParser::parse('{}')->pointer($pointer))) + ->toThrow(PointerException::class, "The string [$pointer] is not a valid JSON pointer"); +})->with(Dataset::forInvalidPointers()); + + // it('supports multiple JSON pointers', function (string $json, array $pointers, array $parsed) { // expect(JsonParser::parse($json)->pointer(...$pointers))->toParseTo($parsed); // })->with(Dataset::forMultiplePointers()); From c57a4782c4e357ff2779a597e10a6a24e041efbb Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 28 Nov 2022 20:52:36 +1000 Subject: [PATCH 074/249] Add escaping logic --- src/Lexer.php | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/Lexer.php b/src/Lexer.php index 2e8034f..6f7a4ea 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -30,6 +30,13 @@ class Lexer implements IteratorAggregate */ protected string $buffer = ''; + /** + * Whether the current character is escaped. + * + * @var bool + */ + protected bool $isEscaping = false; + /** * Whether the current character belongs to a string. * @@ -58,6 +65,7 @@ class Lexer implements IteratorAggregate for ($i = 0, $size = strlen($chunk); $i < $size; $i++) { $character = $chunk[$i]; $this->inString = $this->inString($character); + $this->isEscaping = $character == '\\' && !$this->isEscaping; yield from $this->yieldOrBufferCharacter($character); } @@ -72,8 +80,9 @@ class Lexer implements IteratorAggregate */ protected function inString(string $character): bool { - return ($character == '"' && !$this->inString) - || ($character != '"' && $this->inString); + return ($character == '"' && $this->inString && $this->isEscaping) + || ($character != '"' && $this->inString) + || ($character == '"' && !$this->inString); } /** From 979fdae4ae940b200df0cf21dbe99e0fdcfddb01 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 28 Nov 2022 20:54:43 +1000 Subject: [PATCH 075/249] Replace null pointer --- src/Pointers/NullPointer.php | 46 ------------------------------------ src/Pointers/Pointers.php | 2 +- src/State.php | 10 ++++---- 3 files changed, 6 insertions(+), 52 deletions(-) delete mode 100644 src/Pointers/NullPointer.php diff --git a/src/Pointers/NullPointer.php b/src/Pointers/NullPointer.php deleted file mode 100644 index 6d1e734..0000000 --- a/src/Pointers/NullPointer.php +++ /dev/null @@ -1,46 +0,0 @@ -pointer = ''; - } - - /** - * Determine whether the pointer matches the given tree - * - * @param Tree $tree - * @return bool - */ - public function matchesTree(Tree $tree): bool - { - return true; - } -} diff --git a/src/Pointers/Pointers.php b/src/Pointers/Pointers.php index 70163f5..8533373 100644 --- a/src/Pointers/Pointers.php +++ b/src/Pointers/Pointers.php @@ -40,7 +40,7 @@ class Pointers implements Countable public function __construct(Pointer ...$pointers) { $this->pointers = $pointers; - $this->defaultPointer = new NullPointer(); + $this->defaultPointer = new Pointer(''); } /** diff --git a/src/State.php b/src/State.php index d8d0b22..3172b6e 100644 --- a/src/State.php +++ b/src/State.php @@ -2,7 +2,6 @@ namespace Cerbero\JsonParser; -use Cerbero\JsonParser\Pointers\NullPointer; use Cerbero\JsonParser\Pointers\Pointer; use Cerbero\JsonParser\Pointers\Pointers; use Cerbero\JsonParser\Tokens\Token; @@ -74,7 +73,7 @@ class State */ public function treeIsShallow(): bool { - return $this->pointer instanceof NullPointer + return $this->pointer == '' || $this->tree->depth() < $this->pointer->depth(); } @@ -85,7 +84,7 @@ class State */ public function treeIsDeep(): bool { - return $this->pointer instanceof NullPointer + return $this->pointer == '' ? $this->tree->depth() > $this->pointer->depth() : $this->tree->depth() >= $this->pointer->depth(); } @@ -162,7 +161,8 @@ class State */ public function pointerMatchesTree(): bool { - return $this->pointer->matchesTree($this->tree); + return $this->pointer == '' + || in_array($this->pointer->referenceTokens(), [$this->tree->original(), $this->tree->wildcarded()]); } /** @@ -208,7 +208,7 @@ class State */ public function shouldBufferToken(Token $token): bool { - return $this->pointer->matchesTree($this->tree) + return $this->pointerMatchesTree() && ($this->treeIsDeep() || (!$this->expectsKey() && ($token->isValue() || $this->expectsToken($token)))); } From da5df48779c6718d13a53822c78a3ffd644698b8 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 28 Nov 2022 20:55:25 +1000 Subject: [PATCH 076/249] Improve reference tokens transformation --- src/Pointers/Pointer.php | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/src/Pointers/Pointer.php b/src/Pointers/Pointer.php index eee220b..23ecf9e 100644 --- a/src/Pointers/Pointer.php +++ b/src/Pointers/Pointer.php @@ -49,9 +49,10 @@ class Pointer implements ArrayAccess, Stringable throw PointerException::invalid($this->pointer); } - $tokens = explode('/', substr($this->pointer, 1)); + $tokens = explode('/', $this->pointer); + $referenceTokens = array_map(fn (string $token) => str_replace(['~1', '~0'], ['/', '~'], $token), $tokens); - return array_map(fn (string $token) => str_replace(['~1', '~0'], ['/', '~'], $token), $tokens); + return array_slice($referenceTokens, 1); } /** @@ -94,17 +95,6 @@ class Pointer implements ArrayAccess, Stringable return is_int($key) && $this->referenceTokens[$depth] === '-'; } - /** - * Determine whether the pointer matches the given tree - * - * @param Tree $tree - * @return bool - */ - public function matchesTree(Tree $tree): bool - { - return $this->referenceTokens == $tree->original() || $this->referenceTokens == $tree->wildcarded(); - } - /** * Determine whether the pointer includes the given tree * From f36a3fb4431fb93f1cf72251de8b099dbe54f56c Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 28 Nov 2022 20:56:30 +1000 Subject: [PATCH 077/249] Add more use cases --- tests/fixtures/json/simple_object.json | 11 +- tests/fixtures/parsing/simple_object.php | 9 ++ tests/fixtures/pointers/single_pointer.php | 140 +++++++++++++++++++++ 3 files changed, 159 insertions(+), 1 deletion(-) diff --git a/tests/fixtures/json/simple_object.json b/tests/fixtures/json/simple_object.json index a85578d..137929c 100644 --- a/tests/fixtures/json/simple_object.json +++ b/tests/fixtures/json/simple_object.json @@ -9,5 +9,14 @@ "bool": false, "null": null, "empty_array": [], - "empty_object": {} + "empty_object": {}, + "": 0, + "a/b": 1, + "c%d": 2, + "e^f": 3, + "g|h": 4, + "i\\j": 5, + "k\"l": 6, + " ": 7, + "m~n": 8 } diff --git a/tests/fixtures/parsing/simple_object.php b/tests/fixtures/parsing/simple_object.php index be14389..f673c9c 100644 --- a/tests/fixtures/parsing/simple_object.php +++ b/tests/fixtures/parsing/simple_object.php @@ -12,4 +12,13 @@ return [ 'null' => null, 'empty_array' => [], 'empty_object' => [], + '' => 0, + 'a/b' => 1, + 'c%d' => 2, + 'e^f' => 3, + 'g|h' => 4, + 'i\\j' => 5, + 'k"l' => 6, + ' ' => 7, + 'm~n' => 8 ]; diff --git a/tests/fixtures/pointers/single_pointer.php b/tests/fixtures/pointers/single_pointer.php index c77bf8f..3acae9b 100644 --- a/tests/fixtures/pointers/single_pointer.php +++ b/tests/fixtures/pointers/single_pointer.php @@ -2,6 +2,136 @@ return [ 'complex_array' => [ + '/-' => [ + [ + "id" => "0001", + "type" => "donut", + "name" => "Cake", + "ppu" => 0.55, + "batters" => [ + "batter" => [ + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + [ + "id" => "1003", + "type" => "Blueberry", + ], + [ + "id" => "1004", + "type" => "Devil's Food", + ], + ], + ], + "topping" => [ + [ + "id" => "5001", + "type" => "None", + ], + [ + "id" => "5002", + "type" => "Glazed", + ], + [ + "id" => "5005", + "type" => "Sugar", + ], + [ + "id" => "5007", + "type" => "Powdered Sugar", + ], + [ + "id" => "5006", + "type" => "Chocolate with Sprinkles", + ], + [ + "id" => "5003", + "type" => "Chocolate", + ], + [ + "id" => "5004", + "type" => "Maple", + ], + ], + ], + [ + "id" => "0002", + "type" => "donut", + "name" => "Raised", + "ppu" => 0.55, + "batters" => [ + "batter" => [ + [ + "id" => "1001", + "type" => "Regular", + ], + ], + ], + "topping" => [ + [ + "id" => "5001", + "type" => "None", + ], + [ + "id" => "5002", + "type" => "Glazed", + ], + [ + "id" => "5005", + "type" => "Sugar", + ], + [ + "id" => "5003", + "type" => "Chocolate", + ], + [ + "id" => "5004", + "type" => "Maple", + ], + ], + ], + [ + "id" => "0003", + "type" => "donut", + "name" => "Old Fashioned", + "ppu" => 0.55, + "batters" => [ + "batter" => [ + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + ], + ], + "topping" => [ + [ + "id" => "5001", + "type" => "None", + ], + [ + "id" => "5002", + "type" => "Glazed", + ], + [ + "id" => "5003", + "type" => "Chocolate", + ], + [ + "id" => "5004", + "type" => "Maple", + ], + ], + ], + ], '/-/id' => ['id' => ['0001', '0002', '0003']], '/-/batters' => [ 'batters' => [ @@ -194,6 +324,7 @@ return [ '/foo' => [], ], 'simple_array' => [ + // '' => [1, '', 'foo', '"bar"', 'hej då', 3.14, false, null, [], []], '/-' => [1, '', 'foo', '"bar"', 'hej då', 3.14, false, null, [], []], '/-1' => [], '/0' => [1], @@ -225,5 +356,14 @@ return [ '/empty_object' => ['empty_object' => []], '/10' => [], '/foo' => [], + '/' => ['' => 0], + '/a~1b' => ['a/b' => 1], + '/c%d' => ['c%d' => 2], + '/e^f' => ['e^f' => 3], + '/g|h' => ['g|h' => 4], + '/i\\\\j' => ['i\\j' => 5], + '/k\"l' => ['k"l' => 6], + '/ ' => [' ' => 7], + '/m~0n' => ['m~n' => 8], ], ]; From e81f3594fac559a3f963be67be946809944f435a Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 29 Nov 2022 21:06:49 +1000 Subject: [PATCH 078/249] Extend logic to mutate the state when needed --- src/Tokens/Comma.php | 6 ++++-- src/Tokens/CompoundBegin.php | 6 ++++-- src/Tokens/CompoundEnd.php | 6 ++++-- src/Tokens/ScalarString.php | 6 ++++-- src/Tokens/Token.php | 13 ------------- 5 files changed, 16 insertions(+), 21 deletions(-) diff --git a/src/Tokens/Comma.php b/src/Tokens/Comma.php index cd6694c..4d96769 100644 --- a/src/Tokens/Comma.php +++ b/src/Tokens/Comma.php @@ -21,13 +21,15 @@ class Comma extends Token } /** - * Update the given state + * Mutate the given state * * @param State $state * @return void */ - protected function updateState(State $state): void + public function mutateState(State $state): void { + parent::mutateState($state); + if ($state->inObject()) { $state->expectKey(); } diff --git a/src/Tokens/CompoundBegin.php b/src/Tokens/CompoundBegin.php index 0130ecc..684ab46 100644 --- a/src/Tokens/CompoundBegin.php +++ b/src/Tokens/CompoundBegin.php @@ -21,13 +21,15 @@ class CompoundBegin extends Token } /** - * Update the given state + * Mutate the given state * * @param State $state * @return void */ - protected function updateState(State $state): void + public function mutateState(State $state): void { + parent::mutateState($state); + $state->tree()->deepen(); if ($this->value == '{') { diff --git a/src/Tokens/CompoundEnd.php b/src/Tokens/CompoundEnd.php index fea88ce..05ae632 100644 --- a/src/Tokens/CompoundEnd.php +++ b/src/Tokens/CompoundEnd.php @@ -21,13 +21,15 @@ class CompoundEnd extends Token } /** - * Update the given state + * Mutate the given state * * @param State $state * @return void */ - protected function updateState(State $state): void + public function mutateState(State $state): void { + parent::mutateState($state); + $state->tree()->emerge(); if ($this->value == '}') { diff --git a/src/Tokens/ScalarString.php b/src/Tokens/ScalarString.php index c0cf392..1375ba4 100644 --- a/src/Tokens/ScalarString.php +++ b/src/Tokens/ScalarString.php @@ -28,13 +28,15 @@ class ScalarString extends Token } /** - * Update the given state + * Mutate the given state * * @param State $state * @return void */ - protected function updateState(State $state): void + public function mutateState(State $state): void { + parent::mutateState($state); + if (!$this->isKey = $state->expectsKey()) { return; } diff --git a/src/Tokens/Token.php b/src/Tokens/Token.php index b83cfb8..45aa1e7 100644 --- a/src/Tokens/Token.php +++ b/src/Tokens/Token.php @@ -87,19 +87,6 @@ abstract class Token implements Stringable if ($state->inRoot() && $state->shouldBufferToken($this)) { $state->bufferToken($this); } - - $this->updateState($state); - } - - /** - * Update the given state - * - * @param State $state - * @return void - */ - protected function updateState(State $state): void - { - return; } /** From 36f8325604dbf31b5627f2fa13cc11d9407ac03f Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 30 Nov 2022 00:38:07 +1000 Subject: [PATCH 079/249] Rename method --- src/State.php | 4 ++-- src/Tokens/Token.php | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/State.php b/src/State.php index 3172b6e..8daa881 100644 --- a/src/State.php +++ b/src/State.php @@ -67,11 +67,11 @@ class State } /** - * Determine whether the tree is shallow + * Determine whether the tree should be tracked * * @return bool */ - public function treeIsShallow(): bool + public function shouldTrackTree(): bool { return $this->pointer == '' || $this->tree->depth() < $this->pointer->depth(); diff --git a/src/Tokens/Token.php b/src/Tokens/Token.php index 45aa1e7..1791f76 100644 --- a/src/Tokens/Token.php +++ b/src/Tokens/Token.php @@ -76,11 +76,11 @@ abstract class Token implements Stringable */ public function mutateState(State $state): void { - if ($this->isValue() && !$state->inObject() && $state->treeIsShallow()) { + if ($this->isValue() && !$state->inObject() && $state->shouldTrackTree()) { $state->traverseArray(); } - if ($this->isString() && $state->expectsKey() && $state->treeIsShallow()) { + if ($this->isString() && $state->expectsKey() && $state->shouldTrackTree()) { $state->traverseKey($this); } From 27d1e3f9af42819061f2e06784cca5de0b102c49 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 30 Nov 2022 00:50:04 +1000 Subject: [PATCH 080/249] Method chaining not needed --- src/State.php | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/State.php b/src/State.php index 8daa881..d6ff555 100644 --- a/src/State.php +++ b/src/State.php @@ -169,14 +169,12 @@ class State * Traverse the given object key * * @param string $key - * @return static + * @return void */ - public function traverseKey(string $key): static + public function traverseKey(string $key): void { $this->tree->traverseKey($key); $this->treeChanged = true; - - return $this; } /** From cfaf87e554e40499e66f498c08139b22b82c395d Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 30 Nov 2022 00:52:07 +1000 Subject: [PATCH 081/249] Move logic to flag that the tree did not change --- src/Parser.php | 1 - src/Tokens/Token.php | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Parser.php b/src/Parser.php index 2de62cc..4f0aef1 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -90,7 +90,6 @@ class Parser implements IteratorAggregate { if ($this->state->treeChanged() && $this->pointers->count() > 1) { $this->state->matchPointer($this->pointers); - $this->state->treeDidNotChange(); } } diff --git a/src/Tokens/Token.php b/src/Tokens/Token.php index 1791f76..bb6797d 100644 --- a/src/Tokens/Token.php +++ b/src/Tokens/Token.php @@ -76,6 +76,8 @@ abstract class Token implements Stringable */ public function mutateState(State $state): void { + $state->treeDidNotChange(); + if ($this->isValue() && !$state->inObject() && $state->shouldTrackTree()) { $state->traverseArray(); } From 262713b9c30c8a35ab3294cb2e0fb810f4e05e12 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 30 Nov 2022 01:12:31 +1000 Subject: [PATCH 082/249] No need for methods chaining --- src/Pointers/Pointers.php | 9 +++------ src/State.php | 30 ++++++++++-------------------- 2 files changed, 13 insertions(+), 26 deletions(-) diff --git a/src/Pointers/Pointers.php b/src/Pointers/Pointers.php index 8533373..f489c0f 100644 --- a/src/Pointers/Pointers.php +++ b/src/Pointers/Pointers.php @@ -70,13 +70,11 @@ class Pointers implements Countable * Mark the given pointer as found * * @param Pointer $pointer - * @return static + * @return void */ - public function markAsFound(Pointer $pointer): static + public function markAsFound(Pointer $pointer): void { $this->found[(string) $pointer] = true; - - return $this; } /** @@ -86,8 +84,7 @@ class Pointers implements Countable */ public function wereFound(): bool { - return $this->count() > 0 - && $this->count() == count($this->found); + return $this->count() > 0 && $this->count() == count($this->found); } /** diff --git a/src/State.php b/src/State.php index d6ff555..9720eef 100644 --- a/src/State.php +++ b/src/State.php @@ -112,26 +112,22 @@ class State /** * Mark the JSON tree as not changed * - * @return static + * @return void */ - public function treeDidNotChange(): static + public function treeDidNotChange(): void { $this->treeChanged = false; - - return $this; } /** * Set the JSON pointer matching the tree from the given pointers * * @param Pointers $pointers - * @return static + * @return void */ - public function matchPointer(Pointers $pointers): static + public function matchPointer(Pointers $pointers): void { $this->pointer = $pointers->matchTree($this->tree); - - return $this; } /** @@ -226,13 +222,11 @@ class State * Buffer the given token * * @param Token $token - * @return static + * @return void */ - public function bufferToken(Token $token): static + public function bufferToken(Token $token): void { $this->buffer .= $token; - - return $this; } /** @@ -262,25 +256,21 @@ class State /** * Expect an object key * - * @return static + * @return void */ - public function expectKey(): static + public function expectKey(): void { $this->expectsKey = true; - - return $this; } /** * Do not expect any object key * - * @return static + * @return void */ - public function doNotExpectKey(): static + public function doNotExpectKey(): void { $this->expectsKey = false; - - return $this; } /** From fa4e71f11103aebf28492de4254956d5240ed06f Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Fri, 2 Dec 2022 21:05:55 +1000 Subject: [PATCH 083/249] Move logic to traverse a key to the string token --- src/Tokens/ScalarString.php | 12 ++++++++---- src/Tokens/Token.php | 14 -------------- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/src/Tokens/ScalarString.php b/src/Tokens/ScalarString.php index 1375ba4..f12cdc7 100644 --- a/src/Tokens/ScalarString.php +++ b/src/Tokens/ScalarString.php @@ -15,7 +15,7 @@ class ScalarString extends Token * * @var bool */ - protected bool $isKey; + protected bool $isKey = false; /** * Retrieve the token type @@ -37,11 +37,15 @@ class ScalarString extends Token { parent::mutateState($state); - if (!$this->isKey = $state->expectsKey()) { - return; + $this->isKey = $state->expectsKey(); + + if ($this->isKey && $state->shouldTrackTree()) { + $state->traverseKey($this); } - $state->doNotExpectKey(); + if ($this->isKey) { + $state->doNotExpectKey(); + } } /** diff --git a/src/Tokens/Token.php b/src/Tokens/Token.php index bb6797d..dab84a9 100644 --- a/src/Tokens/Token.php +++ b/src/Tokens/Token.php @@ -58,16 +58,6 @@ abstract class Token implements Stringable return ($this->type() | Tokens::VALUE_SCALAR) == Tokens::VALUE_SCALAR; } - /** - * Determine whether the token is a string - * - * @return bool - */ - public function isString(): bool - { - return ($this->type() | Tokens::SCALAR_STRING) == Tokens::SCALAR_STRING; - } - /** * Mutate the given state * @@ -82,10 +72,6 @@ abstract class Token implements Stringable $state->traverseArray(); } - if ($this->isString() && $state->expectsKey() && $state->shouldTrackTree()) { - $state->traverseKey($this); - } - if ($state->inRoot() && $state->shouldBufferToken($this)) { $state->bufferToken($this); } From d4b8b5037bbbbfe265f65dbef31354c7f8fff337 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Fri, 2 Dec 2022 22:27:49 +1000 Subject: [PATCH 084/249] Traverse key before buffering token --- src/Tokens/ScalarString.php | 8 +------- src/Tokens/Token.php | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/Tokens/ScalarString.php b/src/Tokens/ScalarString.php index f12cdc7..302abe6 100644 --- a/src/Tokens/ScalarString.php +++ b/src/Tokens/ScalarString.php @@ -37,13 +37,7 @@ class ScalarString extends Token { parent::mutateState($state); - $this->isKey = $state->expectsKey(); - - if ($this->isKey && $state->shouldTrackTree()) { - $state->traverseKey($this); - } - - if ($this->isKey) { + if ($this->isKey = $state->expectsKey()) { $state->doNotExpectKey(); } } diff --git a/src/Tokens/Token.php b/src/Tokens/Token.php index dab84a9..bb6797d 100644 --- a/src/Tokens/Token.php +++ b/src/Tokens/Token.php @@ -58,6 +58,16 @@ abstract class Token implements Stringable return ($this->type() | Tokens::VALUE_SCALAR) == Tokens::VALUE_SCALAR; } + /** + * Determine whether the token is a string + * + * @return bool + */ + public function isString(): bool + { + return ($this->type() | Tokens::SCALAR_STRING) == Tokens::SCALAR_STRING; + } + /** * Mutate the given state * @@ -72,6 +82,10 @@ abstract class Token implements Stringable $state->traverseArray(); } + if ($this->isString() && $state->expectsKey() && $state->shouldTrackTree()) { + $state->traverseKey($this); + } + if ($state->inRoot() && $state->shouldBufferToken($this)) { $state->bufferToken($this); } From 1d7d65b0513be7da42ef8b76610e95db323dcfc2 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 3 Dec 2022 13:34:13 +1000 Subject: [PATCH 085/249] Improve expectations --- tests/Feature/PointersTest.php | 7 ++-- tests/Pest.php | 63 ++++++++++++++-------------------- 2 files changed, 28 insertions(+), 42 deletions(-) diff --git a/tests/Feature/PointersTest.php b/tests/Feature/PointersTest.php index 7ff64e6..3cf19e6 100644 --- a/tests/Feature/PointersTest.php +++ b/tests/Feature/PointersTest.php @@ -4,17 +4,16 @@ use Cerbero\JsonParser\Dataset; use Cerbero\JsonParser\Exceptions\PointerException; use Cerbero\JsonParser\JsonParser; -it('supports single JSON pointers', function (string $json, string $pointer, array $parsed) { - expect(JsonParser::parse($json)->pointer($pointer))->toParseTo($parsed); -})->with(Dataset::forSinglePointers()); +it('supports single JSON pointers', function (string $json, string $pointer, array $parsed) { + expect(JsonParser::parse($json)->pointer($pointer))->toPointTo($parsed); +})->with(Dataset::forSinglePointers()); it('throws an exception when providing an invalid JSON pointer', function (string $pointer) { expect(fn () => iterator_to_array(JsonParser::parse('{}')->pointer($pointer))) ->toThrow(PointerException::class, "The string [$pointer] is not a valid JSON pointer"); })->with(Dataset::forInvalidPointers()); - // it('supports multiple JSON pointers', function (string $json, array $pointers, array $parsed) { // expect(JsonParser::parse($json)->pointer(...$pointers))->toParseTo($parsed); // })->with(Dataset::forMultiplePointers()); diff --git a/tests/Pest.php b/tests/Pest.php index d72f48f..d0e436b 100644 --- a/tests/Pest.php +++ b/tests/Pest.php @@ -1,30 +1,30 @@ in('Feature'); - -/* -|-------------------------------------------------------------------------- -| Expectations -|-------------------------------------------------------------------------- -| -| When you're writing tests, you often need to check that values meet certain conditions. The -| "expect()" function gives you access to a set of "expectations" methods that you can use -| to assert different things. Of course, you may extend the Expectation API at any time. -| -*/ - +/** + * Expect that keys and values are parsed correctly + * + * @param array $expected + * @return Expectation + */ expect()->extend('toParseTo', function (array $expected) { + $actual = []; + + foreach ($this->value as $parsedKey => $parsedValue) { + expect($parsedValue)->toBe($expected[$parsedKey]); + + $actual[$parsedKey] = $parsedValue; + } + + return expect($actual)->toBe($expected); +}); + +/** + * Expect that values defined by JSON pointers are parsed correctly + * + * @param array $expected + * @return Expectation + */ +expect()->extend('toPointTo', function (array $expected) { $actual = $itemsCount = []; foreach ($this->value as $parsedKey => $parsedValue) { @@ -39,18 +39,5 @@ expect()->extend('toParseTo', function (array $expected) { }; } - expect($actual)->toBe($expected); + return expect($actual)->toBe($expected); }); - -/* -|-------------------------------------------------------------------------- -| Functions -|-------------------------------------------------------------------------- -| -| While Pest is very powerful out-of-the-box, you may have some testing code specific to your -| project that you don't want to repeat in every file. Here you can also expose helpers as -| global functions to help you to reduce the number of lines of code in your test files. -| -*/ - -// ... From 72c61d55cb61451165e82961e403995192c81d9c Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 3 Dec 2022 13:36:47 +1000 Subject: [PATCH 086/249] Simplify logic to check whether to buffer a token --- src/State.php | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/src/State.php b/src/State.php index 9720eef..16586c4 100644 --- a/src/State.php +++ b/src/State.php @@ -203,19 +203,7 @@ class State public function shouldBufferToken(Token $token): bool { return $this->pointerMatchesTree() - && ($this->treeIsDeep() || (!$this->expectsKey() && ($token->isValue() || $this->expectsToken($token)))); - } - - /** - * Determine whether the given token is expected - * - * @param Token $token - * @return bool - */ - protected function expectsToken(Token $token): bool - { - return ($this->tree->depth() == $this->pointer->depth() && $token->isValue()) - || ($this->tree->depth() + 1 == $this->pointer->depth() && $token->isScalar()); + && ($this->treeIsDeep() || ($token->isValue() && !$this->expectsKey())); } /** From 690fb2c0bcda28a5be1f0c53878196a0bfbb397a Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 3 Dec 2022 13:49:55 +1000 Subject: [PATCH 087/249] Move logic to traverse a key to the string token --- src/Tokens/ScalarString.php | 4 ++++ src/Tokens/Token.php | 6 +----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Tokens/ScalarString.php b/src/Tokens/ScalarString.php index 302abe6..05f008d 100644 --- a/src/Tokens/ScalarString.php +++ b/src/Tokens/ScalarString.php @@ -37,6 +37,10 @@ class ScalarString extends Token { parent::mutateState($state); + if ($state->expectsKey() && $state->shouldTrackTree()) { + $state->traverseKey($this); + } + if ($this->isKey = $state->expectsKey()) { $state->doNotExpectKey(); } diff --git a/src/Tokens/Token.php b/src/Tokens/Token.php index bb6797d..50b7ef5 100644 --- a/src/Tokens/Token.php +++ b/src/Tokens/Token.php @@ -82,11 +82,7 @@ abstract class Token implements Stringable $state->traverseArray(); } - if ($this->isString() && $state->expectsKey() && $state->shouldTrackTree()) { - $state->traverseKey($this); - } - - if ($state->inRoot() && $state->shouldBufferToken($this)) { + if ($state->shouldBufferToken($this)) { $state->bufferToken($this); } } From 419ced2621542d66e2ef4692896fb28691fe2bb2 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 3 Dec 2022 13:51:04 +1000 Subject: [PATCH 088/249] Add being in root to conditions to buffer a token --- src/State.php | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/src/State.php b/src/State.php index 16586c4..f9ff44b 100644 --- a/src/State.php +++ b/src/State.php @@ -202,7 +202,8 @@ class State */ public function shouldBufferToken(Token $token): bool { - return $this->pointerMatchesTree() + return $this->tree->depth() >= 0 + && $this->pointerMatchesTree() && ($this->treeIsDeep() || ($token->isValue() && !$this->expectsKey())); } @@ -270,14 +271,4 @@ class State { return $this->tree->inObject(); } - - /** - * Determine whether the tree is within the JSON root - * - * @return bool - */ - public function inRoot(): bool - { - return $this->tree->depth() >= 0; - } } From cc9f4a1725940f8df7e53e120ebe3007e1f3cb74 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 3 Dec 2022 16:24:40 +1000 Subject: [PATCH 089/249] Rename source --- src/Sources/AnySource.php | 5 +++-- src/Sources/{JsonString.php => Json.php} | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) rename src/Sources/{JsonString.php => Json.php} (93%) diff --git a/src/Sources/AnySource.php b/src/Sources/AnySource.php index fcec18a..620b9ec 100644 --- a/src/Sources/AnySource.php +++ b/src/Sources/AnySource.php @@ -22,7 +22,7 @@ class AnySource extends Source Endpoint::class, Filename::class, IterableSource::class, - JsonString::class, + Json::class, LaravelClientResponse::class, Psr7Message::class, Psr7Stream::class, @@ -40,6 +40,7 @@ class AnySource extends Source * Retrieve the JSON fragments * * @return Traversable + * @throws SourceException */ public function getIterator(): Traversable { @@ -55,7 +56,7 @@ class AnySource extends Source /** * Retrieve all available sources * - * @return Source[] + * @return Generator */ protected function sources(): Generator { diff --git a/src/Sources/JsonString.php b/src/Sources/Json.php similarity index 93% rename from src/Sources/JsonString.php rename to src/Sources/Json.php index bb53a52..670f665 100644 --- a/src/Sources/JsonString.php +++ b/src/Sources/Json.php @@ -6,10 +6,10 @@ use Cerbero\JsonParser\Concerns\DetectsEndpoints; use Traversable; /** - * The JSON string source. + * The JSON source. * */ -class JsonString extends Source +class Json extends Source { use DetectsEndpoints; From ce66f8cef2411d4374b3ee079aae95aee39ebe7a Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 3 Dec 2022 16:37:09 +1000 Subject: [PATCH 090/249] Move logic to handle decoding errors --- src/Decoders/ErrorHandlingDecoder.php | 38 +++++++++++++++++++++++++++ src/Parser.php | 29 +++++--------------- 2 files changed, 45 insertions(+), 22 deletions(-) create mode 100644 src/Decoders/ErrorHandlingDecoder.php diff --git a/src/Decoders/ErrorHandlingDecoder.php b/src/Decoders/ErrorHandlingDecoder.php new file mode 100644 index 0000000..11a204e --- /dev/null +++ b/src/Decoders/ErrorHandlingDecoder.php @@ -0,0 +1,38 @@ +config->decoder->decode($json); + + if (!$decoded->succeeded) { + call_user_func($this->config->onError, $decoded); + } + + return $decoded->value; + } +} diff --git a/src/Parser.php b/src/Parser.php index 4f0aef1..24ed121 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -2,6 +2,7 @@ namespace Cerbero\JsonParser; +use Cerbero\JsonParser\Decoders\ErrorHandlingDecoder; use Cerbero\JsonParser\Pointers\Pointers; use Cerbero\JsonParser\Sources\Source; use IteratorAggregate; @@ -21,11 +22,11 @@ class Parser implements IteratorAggregate protected State $state; /** - * The JSON pointers collection. + * The decoder handling potential errors. * - * @var Pointers + * @var ErrorHandlingDecoder */ - protected Pointers $pointers; + protected ErrorHandlingDecoder $decoder; /** * Instantiate the class. @@ -36,6 +37,7 @@ class Parser implements IteratorAggregate public function __construct(protected Lexer $lexer, protected Config $config) { $this->state = new State(); + $this->decoder = new ErrorHandlingDecoder($config); } /** @@ -68,9 +70,9 @@ class Parser implements IteratorAggregate } if ($this->state->hasBuffer() && $this->state->inObject()) { - yield $this->decode($this->state->key()) => $this->decode($this->state->pullBuffer()); + yield $this->decoder->decode($this->state->key()) => $this->decoder->decode($this->state->pullBuffer()); } elseif ($this->state->hasBuffer() && !$this->state->inObject()) { - yield $this->decode($this->state->pullBuffer()); + yield $this->decoder->decode($this->state->pullBuffer()); } $this->markPointerAsFound(); @@ -93,23 +95,6 @@ class Parser implements IteratorAggregate } } - /** - * Retrieve the decoded value of the given JSON fragment - * - * @param string $json - * @return mixed - */ - protected function decode(string $json): mixed - { - $decoded = $this->config->decoder->decode($json); - - if (!$decoded->succeeded) { - call_user_func($this->config->onError, $decoded); - } - - return $decoded->value; - } - /** * Mark the matching JSON pointer as found * From 5e2ee11d5a0d77c9f8300e9fbeb311b342ae3e96 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 3 Dec 2022 18:11:22 +1000 Subject: [PATCH 091/249] Simplify parser by delegating extra responsibilities --- src/Parser.php | 33 +-------- src/State.php | 138 +++++++++++++++++++++--------------- src/Tokens/ScalarString.php | 4 -- src/Tokens/Token.php | 8 ++- 4 files changed, 90 insertions(+), 93 deletions(-) diff --git a/src/Parser.php b/src/Parser.php index 24ed121..e5717a4 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -3,7 +3,6 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Decoders\ErrorHandlingDecoder; -use Cerbero\JsonParser\Pointers\Pointers; use Cerbero\JsonParser\Sources\Source; use IteratorAggregate; use Traversable; @@ -58,12 +57,10 @@ class Parser implements IteratorAggregate */ public function getIterator(): Traversable { - $this->pointers = new Pointers(...$this->config->pointers); - $this->state->matchPointer($this->pointers); + $this->state->setPointers(...$this->config->pointers); foreach ($this->lexer as $token) { $token->mutateState($this->state); - $this->rematchPointer(); if (!$token->endsChunk() || $this->state->treeIsDeep()) { continue; @@ -75,35 +72,9 @@ class Parser implements IteratorAggregate yield $this->decoder->decode($this->state->pullBuffer()); } - $this->markPointerAsFound(); - - if ($this->pointers->wereFound() && !$this->state->inPointer()) { + if ($this->state->canStopParsing()) { break; } } } - - /** - * Set the matching JSON pointer when the tree changes - * - * @return void - */ - protected function rematchPointer(): void - { - if ($this->state->treeChanged() && $this->pointers->count() > 1) { - $this->state->matchPointer($this->pointers); - } - } - - /** - * Mark the matching JSON pointer as found - * - * @return void - */ - protected function markPointerAsFound(): void - { - if ($this->state->pointerMatchesTree()) { - $this->pointers->markAsFound($this->state->pointer()); - } - } } diff --git a/src/State.php b/src/State.php index f9ff44b..85058a6 100644 --- a/src/State.php +++ b/src/State.php @@ -26,6 +26,13 @@ class State */ protected bool $treeChanged = false; + /** + * The JSON pointers. + * + * @var Pointers + */ + protected Pointers $pointers; + /** * The JSON pointer matching the tree. * @@ -73,8 +80,7 @@ class State */ public function shouldTrackTree(): bool { - return $this->pointer == '' - || $this->tree->depth() < $this->pointer->depth(); + return $this->pointer == '' || $this->tree->depth() < $this->pointer->depth(); } /** @@ -99,6 +105,31 @@ class State return $this->tree->currentKey(); } + /** + * Traverse the given object key + * + * @param string $key + * @return void + */ + public function traverseKey(string $key): void + { + $this->tree->traverseKey($key); + + $this->treeChanged = true; + } + + /** + * Traverse a JSON array + * + * @return void + */ + public function traverseArray(): void + { + $this->tree->traverseArray($this->pointer); + + $this->treeChanged = true; + } + /** * Determine whether the tree changed * @@ -120,14 +151,48 @@ class State } /** - * Set the JSON pointer matching the tree from the given pointers + * Determine whether the current position is within an object * - * @param Pointers $pointers + * @return bool + */ + public function inObject(): bool + { + return $this->tree->inObject(); + } + + /** + * Set and match the given pointers + * + * @param Pointer ...$pointers * @return void */ - public function matchPointer(Pointers $pointers): void + public function setPointers(Pointer ...$pointers): void { - $this->pointer = $pointers->matchTree($this->tree); + $this->pointers = new Pointers(...$pointers); + + $this->matchPointer(); + } + + /** + * Set the JSON pointer matching the tree + * + * @return void + */ + public function matchPointer(): void + { + $this->pointer = $this->pointers->matchTree($this->tree); + } + + /** + * Set the new matching JSON pointer when the tree changes + * + * @return void + */ + public function rematchPointer(): void + { + if ($this->treeChanged && $this->pointers->count() > 1) { + $this->matchPointer(); + } } /** @@ -141,13 +206,13 @@ class State } /** - * Determine whether the tree is within the JSON pointer + * Determine whether the parser can stop parsing * * @return bool */ - public function inPointer(): bool + public function canStopParsing(): bool { - return $this->pointer->includesTree($this->tree); + return $this->pointers->wereFound() && !$this->pointer->includesTree($this->tree); } /** @@ -161,29 +226,6 @@ class State || in_array($this->pointer->referenceTokens(), [$this->tree->original(), $this->tree->wildcarded()]); } - /** - * Traverse the given object key - * - * @param string $key - * @return void - */ - public function traverseKey(string $key): void - { - $this->tree->traverseKey($key); - $this->treeChanged = true; - } - - /** - * Traverse a JSON array - * - * @return void - */ - public function traverseArray(): void - { - $this->tree->traverseArray($this->pointer); - $this->treeChanged = true; - } - /** * Determine whether the buffer contains tokens * @@ -194,19 +236,6 @@ class State return $this->buffer != ''; } - /** - * Determine whether the given token should be buffered - * - * @param Token $token - * @return bool - */ - public function shouldBufferToken(Token $token): bool - { - return $this->tree->depth() >= 0 - && $this->pointerMatchesTree() - && ($this->treeIsDeep() || ($token->isValue() && !$this->expectsKey())); - } - /** * Buffer the given token * @@ -215,7 +244,14 @@ class State */ public function bufferToken(Token $token): void { - $this->buffer .= $token; + $shouldBuffer = $this->tree->depth() >= 0 + && $this->pointerMatchesTree() + && ($this->treeIsDeep() || ($token->isValue() && !$this->expectsKey())); + + if ($shouldBuffer) { + $this->buffer .= $token; + $this->pointers->markAsFound($this->pointer); + } } /** @@ -261,14 +297,4 @@ class State { $this->expectsKey = false; } - - /** - * Determine whether the current position is within an object - * - * @return bool - */ - public function inObject(): bool - { - return $this->tree->inObject(); - } } diff --git a/src/Tokens/ScalarString.php b/src/Tokens/ScalarString.php index 05f008d..302abe6 100644 --- a/src/Tokens/ScalarString.php +++ b/src/Tokens/ScalarString.php @@ -37,10 +37,6 @@ class ScalarString extends Token { parent::mutateState($state); - if ($state->expectsKey() && $state->shouldTrackTree()) { - $state->traverseKey($this); - } - if ($this->isKey = $state->expectsKey()) { $state->doNotExpectKey(); } diff --git a/src/Tokens/Token.php b/src/Tokens/Token.php index 50b7ef5..a8be193 100644 --- a/src/Tokens/Token.php +++ b/src/Tokens/Token.php @@ -82,9 +82,13 @@ abstract class Token implements Stringable $state->traverseArray(); } - if ($state->shouldBufferToken($this)) { - $state->bufferToken($this); + if ($state->expectsKey() && $state->shouldTrackTree()) { + $state->traverseKey($this); } + + $state->bufferToken($this); + + $state->rematchPointer(); } /** From a758cf570bd8143ebc96b4ea67be80f9ca849f0c Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 3 Dec 2022 19:04:21 +1000 Subject: [PATCH 092/249] Rename decoder --- .../{ErrorHandlingDecoder.php => ConfiguredDecoder.php} | 2 +- src/Parser.php | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) rename src/Decoders/{ErrorHandlingDecoder.php => ConfiguredDecoder.php} (96%) diff --git a/src/Decoders/ErrorHandlingDecoder.php b/src/Decoders/ConfiguredDecoder.php similarity index 96% rename from src/Decoders/ErrorHandlingDecoder.php rename to src/Decoders/ConfiguredDecoder.php index 11a204e..dc13d91 100644 --- a/src/Decoders/ErrorHandlingDecoder.php +++ b/src/Decoders/ConfiguredDecoder.php @@ -8,7 +8,7 @@ use Cerbero\JsonParser\Config; * The decoder handling potential decoding errors. * */ -class ErrorHandlingDecoder +class ConfiguredDecoder { /** * Instantiate the class. diff --git a/src/Parser.php b/src/Parser.php index e5717a4..828f6a8 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -2,7 +2,7 @@ namespace Cerbero\JsonParser; -use Cerbero\JsonParser\Decoders\ErrorHandlingDecoder; +use Cerbero\JsonParser\Decoders\ConfiguredDecoder; use Cerbero\JsonParser\Sources\Source; use IteratorAggregate; use Traversable; @@ -23,9 +23,9 @@ class Parser implements IteratorAggregate /** * The decoder handling potential errors. * - * @var ErrorHandlingDecoder + * @var ConfiguredDecoder */ - protected ErrorHandlingDecoder $decoder; + protected ConfiguredDecoder $decoder; /** * Instantiate the class. @@ -36,7 +36,7 @@ class Parser implements IteratorAggregate public function __construct(protected Lexer $lexer, protected Config $config) { $this->state = new State(); - $this->decoder = new ErrorHandlingDecoder($config); + $this->decoder = new ConfiguredDecoder($config); } /** From ab8c10c05db98849e0be18ffcfa0dac74b193cdb Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 3 Dec 2022 20:27:45 +1000 Subject: [PATCH 093/249] Centralize state mutation logic --- src/Parser.php | 6 +- src/Pointers/Pointer.php | 7 ++ src/Pointers/Pointers.php | 5 +- src/State.php | 196 ++++++++++------------------------- src/Tokens/Comma.php | 4 +- src/Tokens/CompoundBegin.php | 4 +- src/Tokens/CompoundEnd.php | 4 +- src/Tokens/ScalarString.php | 6 +- src/Tokens/Token.php | 14 +-- src/Tree.php | 12 --- 10 files changed, 76 insertions(+), 182 deletions(-) diff --git a/src/Parser.php b/src/Parser.php index 828f6a8..80a9305 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -60,16 +60,16 @@ class Parser implements IteratorAggregate $this->state->setPointers(...$this->config->pointers); foreach ($this->lexer as $token) { - $token->mutateState($this->state); + $this->state->mutateByToken($token); if (!$token->endsChunk() || $this->state->treeIsDeep()) { continue; } if ($this->state->hasBuffer() && $this->state->inObject()) { - yield $this->decoder->decode($this->state->key()) => $this->decoder->decode($this->state->pullBuffer()); + yield $this->decoder->decode($this->state->key()) => $this->decoder->decode($this->state->value()); } elseif ($this->state->hasBuffer() && !$this->state->inObject()) { - yield $this->decoder->decode($this->state->pullBuffer()); + yield $this->decoder->decode($this->state->value()); } if ($this->state->canStopParsing()) { diff --git a/src/Pointers/Pointer.php b/src/Pointers/Pointer.php index 23ecf9e..3e76e2c 100644 --- a/src/Pointers/Pointer.php +++ b/src/Pointers/Pointer.php @@ -27,6 +27,13 @@ class Pointer implements ArrayAccess, Stringable */ protected int $depth; + /** + * Whether the pointer was found. + * + * @var bool + */ + public bool $wasFound = false; + /** * Instantiate the class. * diff --git a/src/Pointers/Pointers.php b/src/Pointers/Pointers.php index f489c0f..babce37 100644 --- a/src/Pointers/Pointers.php +++ b/src/Pointers/Pointers.php @@ -74,7 +74,10 @@ class Pointers implements Countable */ public function markAsFound(Pointer $pointer): void { - $this->found[(string) $pointer] = true; + if (!$pointer->wasFound) { + $pointer->wasFound = true; + $this->found[(string) $pointer] = true; + } } /** diff --git a/src/State.php b/src/State.php index 85058a6..979c02f 100644 --- a/src/State.php +++ b/src/State.php @@ -52,7 +52,7 @@ class State * * @var bool */ - protected bool $expectsKey = false; + public bool $expectsKey = false; /** * Instantiate the class. @@ -73,16 +73,6 @@ class State return $this->tree; } - /** - * Determine whether the tree should be tracked - * - * @return bool - */ - public function shouldTrackTree(): bool - { - return $this->pointer == '' || $this->tree->depth() < $this->pointer->depth(); - } - /** * Determine whether the tree is deep * @@ -105,51 +95,6 @@ class State return $this->tree->currentKey(); } - /** - * Traverse the given object key - * - * @param string $key - * @return void - */ - public function traverseKey(string $key): void - { - $this->tree->traverseKey($key); - - $this->treeChanged = true; - } - - /** - * Traverse a JSON array - * - * @return void - */ - public function traverseArray(): void - { - $this->tree->traverseArray($this->pointer); - - $this->treeChanged = true; - } - - /** - * Determine whether the tree changed - * - * @return bool - */ - public function treeChanged(): bool - { - return $this->treeChanged; - } - - /** - * Mark the JSON tree as not changed - * - * @return void - */ - public function treeDidNotChange(): void - { - $this->treeChanged = false; - } - /** * Determine whether the current position is within an object * @@ -157,7 +102,10 @@ class State */ public function inObject(): bool { - return $this->tree->inObject(); + $tree = $this->tree->original(); + $depth = $this->tree->depth(); + + return is_string($tree[$depth] ?? null); } /** @@ -170,41 +118,9 @@ class State { $this->pointers = new Pointers(...$pointers); - $this->matchPointer(); - } - - /** - * Set the JSON pointer matching the tree - * - * @return void - */ - public function matchPointer(): void - { $this->pointer = $this->pointers->matchTree($this->tree); } - /** - * Set the new matching JSON pointer when the tree changes - * - * @return void - */ - public function rematchPointer(): void - { - if ($this->treeChanged && $this->pointers->count() > 1) { - $this->matchPointer(); - } - } - - /** - * Retrieve the JSON pointer matching the tree - * - * @return Pointer - */ - public function pointer(): Pointer - { - return $this->pointer; - } - /** * Determine whether the parser can stop parsing * @@ -215,12 +131,60 @@ class State return $this->pointers->wereFound() && !$this->pointer->includesTree($this->tree); } + /** + * Mutate state depending on the given token + * + * @param Token $token + * @return void + */ + public function mutateByToken(Token $token): void + { + $this->treeChanged = false; + $shouldTrackTree = $this->pointer == '' || $this->tree->depth() < $this->pointer->depth(); + + if ($shouldTrackTree && $token->isValue() && !$this->inObject()) { + $this->tree->traverseArray($this->pointer); + $this->treeChanged = true; + } + + if ($shouldTrackTree && $token->isString() && $this->expectsKey) { + $this->tree->traverseKey($token); + $this->treeChanged = true; + } + + $this->bufferToken($token); + + if ($this->treeChanged && $this->pointers->count() > 1) { + $this->pointer = $this->pointers->matchTree($this->tree); + } + + $token->mutateState($this); + } + + /** + * Buffer the given token + * + * @param Token $token + * @return void + */ + protected function bufferToken(Token $token): void + { + $shouldBuffer = $this->tree->depth() >= 0 + && $this->pointerMatchesTree() + && ($this->treeIsDeep() || ($token->isValue() && !$this->expectsKey)); + + if ($shouldBuffer) { + $this->buffer .= $token; + $this->pointers->markAsFound($this->pointer); + } + } + /** * Determine whether the tree matches the JSON pointer * * @return bool */ - public function pointerMatchesTree(): bool + protected function pointerMatchesTree(): bool { return $this->pointer == '' || in_array($this->pointer->referenceTokens(), [$this->tree->original(), $this->tree->wildcarded()]); @@ -237,29 +201,11 @@ class State } /** - * Buffer the given token - * - * @param Token $token - * @return void - */ - public function bufferToken(Token $token): void - { - $shouldBuffer = $this->tree->depth() >= 0 - && $this->pointerMatchesTree() - && ($this->treeIsDeep() || ($token->isValue() && !$this->expectsKey())); - - if ($shouldBuffer) { - $this->buffer .= $token; - $this->pointers->markAsFound($this->pointer); - } - } - - /** - * Retrieve and reset the buffer + * Retrieve the value from the buffer and reset it * * @return string */ - public function pullBuffer(): string + public function value(): string { $buffer = $this->buffer; @@ -267,34 +213,4 @@ class State return $buffer; } - - /** - * Determine whether an object key is expected - * - * @return bool - */ - public function expectsKey(): bool - { - return $this->expectsKey; - } - - /** - * Expect an object key - * - * @return void - */ - public function expectKey(): void - { - $this->expectsKey = true; - } - - /** - * Do not expect any object key - * - * @return void - */ - public function doNotExpectKey(): void - { - $this->expectsKey = false; - } } diff --git a/src/Tokens/Comma.php b/src/Tokens/Comma.php index 4d96769..f172f6e 100644 --- a/src/Tokens/Comma.php +++ b/src/Tokens/Comma.php @@ -28,10 +28,8 @@ class Comma extends Token */ public function mutateState(State $state): void { - parent::mutateState($state); - if ($state->inObject()) { - $state->expectKey(); + $state->expectsKey = true; } } } diff --git a/src/Tokens/CompoundBegin.php b/src/Tokens/CompoundBegin.php index 684ab46..ffd3f12 100644 --- a/src/Tokens/CompoundBegin.php +++ b/src/Tokens/CompoundBegin.php @@ -28,12 +28,10 @@ class CompoundBegin extends Token */ public function mutateState(State $state): void { - parent::mutateState($state); - $state->tree()->deepen(); if ($this->value == '{') { - $state->expectKey(); + $state->expectsKey = true; } } } diff --git a/src/Tokens/CompoundEnd.php b/src/Tokens/CompoundEnd.php index 05ae632..261bed3 100644 --- a/src/Tokens/CompoundEnd.php +++ b/src/Tokens/CompoundEnd.php @@ -28,12 +28,10 @@ class CompoundEnd extends Token */ public function mutateState(State $state): void { - parent::mutateState($state); - $state->tree()->emerge(); if ($this->value == '}') { - $state->doNotExpectKey(); + $state->expectsKey = false; } } diff --git a/src/Tokens/ScalarString.php b/src/Tokens/ScalarString.php index 302abe6..dee0ebe 100644 --- a/src/Tokens/ScalarString.php +++ b/src/Tokens/ScalarString.php @@ -35,10 +35,8 @@ class ScalarString extends Token */ public function mutateState(State $state): void { - parent::mutateState($state); - - if ($this->isKey = $state->expectsKey()) { - $state->doNotExpectKey(); + if ($this->isKey = $state->expectsKey) { + $state->expectsKey = false; } } diff --git a/src/Tokens/Token.php b/src/Tokens/Token.php index a8be193..be51747 100644 --- a/src/Tokens/Token.php +++ b/src/Tokens/Token.php @@ -76,19 +76,7 @@ abstract class Token implements Stringable */ public function mutateState(State $state): void { - $state->treeDidNotChange(); - - if ($this->isValue() && !$state->inObject() && $state->shouldTrackTree()) { - $state->traverseArray(); - } - - if ($state->expectsKey() && $state->shouldTrackTree()) { - $state->traverseKey($this); - } - - $state->bufferToken($this); - - $state->rematchPointer(); + return; } /** diff --git a/src/Tree.php b/src/Tree.php index 735e026..5b59362 100644 --- a/src/Tree.php +++ b/src/Tree.php @@ -83,18 +83,6 @@ class Tree implements IteratorAggregate $this->depth--; } - /** - * Determine whether the tree is traversing an object - * - * @return bool - */ - public function inObject(): bool - { - $key = $this->original[$this->depth] ?? null; - - return is_string($key); - } - /** * Traverse the given object key * From 54b19eb70b18d731139105c4c9a615ee24ef33f0 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 3 Dec 2022 20:29:47 +1000 Subject: [PATCH 094/249] Iterate through the original tree --- src/Pointers/Pointers.php | 2 +- src/Tree.php | 14 +------------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/src/Pointers/Pointers.php b/src/Pointers/Pointers.php index babce37..2ba06a7 100644 --- a/src/Pointers/Pointers.php +++ b/src/Pointers/Pointers.php @@ -54,7 +54,7 @@ class Pointers implements Countable $pointers = []; foreach ($this->pointers as $pointer) { - foreach ($tree as $depth => $key) { + foreach ($tree->original() as $depth => $key) { if (!$pointer->depthMatchesKey($depth, $key)) { continue 2; } elseif (!isset($pointers[$depth])) { diff --git a/src/Tree.php b/src/Tree.php index 5b59362..d126001 100644 --- a/src/Tree.php +++ b/src/Tree.php @@ -3,14 +3,12 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Pointers\Pointer; -use IteratorAggregate; -use Traversable; /** * The JSON tree. * */ -class Tree implements IteratorAggregate +class Tree { /** * The original JSON tree. @@ -135,14 +133,4 @@ class Tree implements IteratorAggregate return is_string($key) ? "\"$key\"" : $key; } - - /** - * Retrieve the original tree iterator - * - * @return Traversable - */ - public function getIterator(): Traversable - { - yield from $this->original(); - } } From b92193cda9f080cc63c6657d462fe8031df51c06 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 3 Dec 2022 20:32:57 +1000 Subject: [PATCH 095/249] Rename decoder --- .../{ConfiguredDecoder.php => ConfigurableDecoder.php} | 4 ++-- src/Parser.php | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) rename src/Decoders/{ConfiguredDecoder.php => ConfigurableDecoder.php} (89%) diff --git a/src/Decoders/ConfiguredDecoder.php b/src/Decoders/ConfigurableDecoder.php similarity index 89% rename from src/Decoders/ConfiguredDecoder.php rename to src/Decoders/ConfigurableDecoder.php index dc13d91..e9a099b 100644 --- a/src/Decoders/ConfiguredDecoder.php +++ b/src/Decoders/ConfigurableDecoder.php @@ -5,10 +5,10 @@ namespace Cerbero\JsonParser\Decoders; use Cerbero\JsonParser\Config; /** - * The decoder handling potential decoding errors. + * The configurable decoder. * */ -class ConfiguredDecoder +class ConfigurableDecoder { /** * Instantiate the class. diff --git a/src/Parser.php b/src/Parser.php index 80a9305..f3a508f 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -2,7 +2,7 @@ namespace Cerbero\JsonParser; -use Cerbero\JsonParser\Decoders\ConfiguredDecoder; +use Cerbero\JsonParser\Decoders\ConfigurableDecoder; use Cerbero\JsonParser\Sources\Source; use IteratorAggregate; use Traversable; @@ -23,9 +23,9 @@ class Parser implements IteratorAggregate /** * The decoder handling potential errors. * - * @var ConfiguredDecoder + * @var ConfigurableDecoder */ - protected ConfiguredDecoder $decoder; + protected ConfigurableDecoder $decoder; /** * Instantiate the class. @@ -36,7 +36,7 @@ class Parser implements IteratorAggregate public function __construct(protected Lexer $lexer, protected Config $config) { $this->state = new State(); - $this->decoder = new ConfiguredDecoder($config); + $this->decoder = new ConfigurableDecoder($config); } /** From ed1def157273d93593ae3c3b0f6d089afc5d906e Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 3 Dec 2022 20:41:54 +1000 Subject: [PATCH 096/249] Rename codes --- src/Exceptions/JsonParserException.php | 8 ++++---- src/Exceptions/PointerException.php | 2 +- src/Exceptions/SourceException.php | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Exceptions/JsonParserException.php b/src/Exceptions/JsonParserException.php index 42acabf..0155410 100644 --- a/src/Exceptions/JsonParserException.php +++ b/src/Exceptions/JsonParserException.php @@ -10,9 +10,9 @@ use Exception; */ abstract class JsonParserException extends Exception { - public const CODE_SOURCE_INVALID = 0; - public const CODE_SOURCE_UNSUPPORTED = 1; - public const CODE_SOURCE_GUZZLE = 2; + public const SOURCE_INVALID = 0; + public const SOURCE_UNSUPPORTED = 1; + public const SOURCE_GUZZLE = 2; - public const CODE_POINTER_INVALID = 3; + public const POINTER_INVALID = 0; } diff --git a/src/Exceptions/PointerException.php b/src/Exceptions/PointerException.php index b17487d..7cc5504 100644 --- a/src/Exceptions/PointerException.php +++ b/src/Exceptions/PointerException.php @@ -16,6 +16,6 @@ class PointerException extends JsonParserException */ public static function invalid(string $pointer): static { - return new static("The string [$pointer] is not a valid JSON pointer", static::CODE_POINTER_INVALID); + return new static("The string [$pointer] is not a valid JSON pointer", static::POINTER_INVALID); } } diff --git a/src/Exceptions/SourceException.php b/src/Exceptions/SourceException.php index 9ce57c8..4fcf367 100644 --- a/src/Exceptions/SourceException.php +++ b/src/Exceptions/SourceException.php @@ -16,7 +16,7 @@ class SourceException extends JsonParserException */ public static function invalid(string $source): static { - return new static("[$source] is not a valid source", static::CODE_SOURCE_INVALID); + return new static("[$source] is not a valid source", static::SOURCE_INVALID); } /** @@ -26,7 +26,7 @@ class SourceException extends JsonParserException */ public static function unsupported(): static { - return new static('Unable to load JSON from the provided source', static::CODE_SOURCE_UNSUPPORTED); + return new static('Unable to load JSON from the provided source', static::SOURCE_UNSUPPORTED); } /** @@ -36,6 +36,6 @@ class SourceException extends JsonParserException */ public static function requireGuzzle(): static { - return new static('Guzzle is required to load JSON from endpoints', static::CODE_SOURCE_GUZZLE); + return new static('Guzzle is required to load JSON from endpoints', static::SOURCE_GUZZLE); } } From cc1d3f18e73f1973173873a23bf0940c23ddf03e Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 3 Dec 2022 20:51:23 +1000 Subject: [PATCH 097/249] Access reference tokens instead of the pointer --- src/Pointers/Pointer.php | 52 +++------------------------------------- src/State.php | 2 +- src/Tree.php | 7 +++--- 3 files changed, 8 insertions(+), 53 deletions(-) diff --git a/src/Pointers/Pointer.php b/src/Pointers/Pointer.php index 3e76e2c..5a00794 100644 --- a/src/Pointers/Pointer.php +++ b/src/Pointers/Pointer.php @@ -2,7 +2,6 @@ namespace Cerbero\JsonParser\Pointers; -use ArrayAccess; use Cerbero\JsonParser\Exceptions\PointerException; use Cerbero\JsonParser\Tree; use Stringable; @@ -11,7 +10,7 @@ use Stringable; * The JSON pointer. * */ -class Pointer implements ArrayAccess, Stringable +class Pointer implements Stringable { /** * The reference tokens. @@ -48,7 +47,7 @@ class Pointer implements ArrayAccess, Stringable /** * Turn the JSON pointer into reference tokens * - * @return array + * @return string[] */ protected function toReferenceTokens(): array { @@ -65,7 +64,7 @@ class Pointer implements ArrayAccess, Stringable /** * Retrieve the reference tokens * - * @return array + * @return string[] */ public function referenceTokens(): array { @@ -117,51 +116,6 @@ class Pointer implements ArrayAccess, Stringable return array_slice($this->referenceTokens, 0, $firstNest) == array_slice($tree->original(), 0, $firstNest); } - /** - * Determine whether the given reference token exists - * - * @param mixed $offset - * @return bool - */ - public function offsetExists(mixed $offset): bool - { - return isset($this->referenceTokens[$offset]); - } - - /** - * Retrieve the given reference token - * - * @param mixed $offset - * @return mixed - */ - public function offsetGet(mixed $offset): mixed - { - return $this->referenceTokens[$offset] ?? null; - } - - /** - * Do not set any reference token - * - * @param mixed $offset - * @param mixed $value - * @return void - */ - public function offsetSet(mixed $offset, mixed $value): void - { - return; - } - - /** - * Do not unset any reference token - * - * @param mixed $offset - * @return void - */ - public function offsetUnset(mixed $offset): void - { - return; - } - /** * Retrieve the underlying JSON pointer * diff --git a/src/State.php b/src/State.php index 979c02f..ec4aa0a 100644 --- a/src/State.php +++ b/src/State.php @@ -143,7 +143,7 @@ class State $shouldTrackTree = $this->pointer == '' || $this->tree->depth() < $this->pointer->depth(); if ($shouldTrackTree && $token->isValue() && !$this->inObject()) { - $this->tree->traverseArray($this->pointer); + $this->tree->traverseArray($this->pointer->referenceTokens()); $this->treeChanged = true; } diff --git a/src/Tree.php b/src/Tree.php index d126001..39204f5 100644 --- a/src/Tree.php +++ b/src/Tree.php @@ -111,13 +111,14 @@ class Tree /** * Traverse an array * - * @param Pointer $pointer + * @param string[] $referenceTokens * @return void */ - public function traverseArray(Pointer $pointer): void + public function traverseArray(array $referenceTokens): void { + $referenceToken = $referenceTokens[$this->depth] ?? null; $this->original[$this->depth] = isset($this->original[$this->depth]) ? $this->original[$this->depth] + 1 : 0; - $this->wildcarded[$this->depth] = $pointer[$this->depth] == '-' ? '-' : $this->original[$this->depth]; + $this->wildcarded[$this->depth] = $referenceToken == '-' ? '-' : $this->original[$this->depth]; $this->trim(); } From 78a041a56a473cae45c8a825909b296f52f14e17 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 3 Dec 2022 21:03:39 +1000 Subject: [PATCH 098/249] Remove custom sources registration --- src/Exceptions/JsonParserException.php | 5 ++--- src/Exceptions/SourceException.php | 11 ----------- src/Sources/AnySource.php | 6 +----- src/Sources/Source.php | 25 ------------------------- 4 files changed, 3 insertions(+), 44 deletions(-) diff --git a/src/Exceptions/JsonParserException.php b/src/Exceptions/JsonParserException.php index 0155410..20e67cc 100644 --- a/src/Exceptions/JsonParserException.php +++ b/src/Exceptions/JsonParserException.php @@ -10,9 +10,8 @@ use Exception; */ abstract class JsonParserException extends Exception { - public const SOURCE_INVALID = 0; - public const SOURCE_UNSUPPORTED = 1; - public const SOURCE_GUZZLE = 2; + public const SOURCE_UNSUPPORTED = 0; + public const SOURCE_GUZZLE = 1; public const POINTER_INVALID = 0; } diff --git a/src/Exceptions/SourceException.php b/src/Exceptions/SourceException.php index 4fcf367..275f2d3 100644 --- a/src/Exceptions/SourceException.php +++ b/src/Exceptions/SourceException.php @@ -8,17 +8,6 @@ namespace Cerbero\JsonParser\Exceptions; */ class SourceException extends JsonParserException { - /** - * Retrieve the exception when the given source is invalid - * - * @param string $source - * @return static - */ - public static function invalid(string $source): static - { - return new static("[$source] is not a valid source", static::SOURCE_INVALID); - } - /** * Retrieve the exception when a JSON source is not supported * diff --git a/src/Sources/AnySource.php b/src/Sources/AnySource.php index 620b9ec..e1fdc4f 100644 --- a/src/Sources/AnySource.php +++ b/src/Sources/AnySource.php @@ -15,7 +15,7 @@ class AnySource extends Source /** * The supported sources. * - * @var array + * @var string[] */ protected array $supportedSources = [ CustomSource::class, @@ -60,10 +60,6 @@ class AnySource extends Source */ protected function sources(): Generator { - foreach (static::$customSources as $source) { - yield $source::from($this->source, $this->config); - } - foreach ($this->supportedSources as $source) { yield $source::from($this->source, $this->config); } diff --git a/src/Sources/Source.php b/src/Sources/Source.php index f3c80d1..b2ae0de 100644 --- a/src/Sources/Source.php +++ b/src/Sources/Source.php @@ -3,7 +3,6 @@ namespace Cerbero\JsonParser\Sources; use Cerbero\JsonParser\Config; -use Cerbero\JsonParser\Exceptions\SourceException; use IteratorAggregate; use Traversable; @@ -13,13 +12,6 @@ use Traversable; */ abstract class Source implements IteratorAggregate { - /** - * The registered custom sources. - * - * @var array - */ - protected static array $customSources = []; - /** * The cached size of the JSON source. * @@ -70,23 +62,6 @@ abstract class Source implements IteratorAggregate return new static($source, $config); } - /** - * Register the given custom sources - * - * @param string ...$customSource - * @return void - */ - public static function register(string ...$customSource): void - { - foreach ($customSource as $class) { - if (!is_subclass_of($class, Source::class)) { - throw SourceException::invalid($class); - } - - static::$customSources[] = $class; - } - } - /** * Retrieve the underlying configuration * From 1eb1a56164597e0ab89f2dd15f006f6660aa8ce2 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 3 Dec 2022 21:21:14 +1000 Subject: [PATCH 099/249] Remove obsolete methods --- src/State.php | 2 +- src/Tokens/Token.php | 20 -------------------- 2 files changed, 1 insertion(+), 21 deletions(-) diff --git a/src/State.php b/src/State.php index ec4aa0a..8f235b9 100644 --- a/src/State.php +++ b/src/State.php @@ -147,7 +147,7 @@ class State $this->treeChanged = true; } - if ($shouldTrackTree && $token->isString() && $this->expectsKey) { + if ($shouldTrackTree && $this->expectsKey) { $this->tree->traverseKey($token); $this->treeChanged = true; } diff --git a/src/Tokens/Token.php b/src/Tokens/Token.php index be51747..36e7f85 100644 --- a/src/Tokens/Token.php +++ b/src/Tokens/Token.php @@ -48,26 +48,6 @@ abstract class Token implements Stringable return ($this->type() | Tokens::VALUE_ANY) == Tokens::VALUE_ANY; } - /** - * Determine whether the token is a scalar value - * - * @return bool - */ - public function isScalar(): bool - { - return ($this->type() | Tokens::VALUE_SCALAR) == Tokens::VALUE_SCALAR; - } - - /** - * Determine whether the token is a string - * - * @return bool - */ - public function isString(): bool - { - return ($this->type() | Tokens::SCALAR_STRING) == Tokens::SCALAR_STRING; - } - /** * Mutate the given state * From 77408fec20c0f3a74869756db665cd0cbebbe8e5 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 3 Dec 2022 21:21:44 +1000 Subject: [PATCH 100/249] Improve types in docblocks --- src/JsonParser.php | 2 +- src/Lexer.php | 4 ++-- src/Parser.php | 2 +- src/Sources/AnySource.php | 2 +- src/Sources/CustomSource.php | 2 +- src/Sources/Endpoint.php | 2 +- src/Sources/Filename.php | 2 +- src/Sources/IterableSource.php | 2 +- src/Sources/Json.php | 2 +- src/Sources/LaravelClientResponse.php | 2 +- src/Sources/Psr7Message.php | 2 +- src/Sources/Psr7Stream.php | 2 +- src/Sources/Resource.php | 2 +- src/Sources/Source.php | 2 +- src/Tree.php | 8 ++++---- 15 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/JsonParser.php b/src/JsonParser.php index 89dc7f6..c2737ca 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -127,7 +127,7 @@ class JsonParser implements IteratorAggregate /** * Retrieve the lazily iterable JSON * - * @return Traversable + * @return Traversable */ public function getIterator(): Traversable { diff --git a/src/Lexer.php b/src/Lexer.php index 6f7a4ea..7caa3d9 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -57,7 +57,7 @@ class Lexer implements IteratorAggregate /** * Retrieve the JSON fragments * - * @return Generator + * @return Traversable */ public function getIterator(): Traversable { @@ -89,7 +89,7 @@ class Lexer implements IteratorAggregate * Yield the given character or buffer it * * @param string $character - * @return Generator + * @return Generator */ protected function yieldOrBufferCharacter(string $character): Generator { diff --git a/src/Parser.php b/src/Parser.php index f3a508f..db450a0 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -53,7 +53,7 @@ class Parser implements IteratorAggregate /** * Retrieve the JSON fragments * - * @return Traversable + * @return Traversable */ public function getIterator(): Traversable { diff --git a/src/Sources/AnySource.php b/src/Sources/AnySource.php index e1fdc4f..087d5fb 100644 --- a/src/Sources/AnySource.php +++ b/src/Sources/AnySource.php @@ -39,7 +39,7 @@ class AnySource extends Source /** * Retrieve the JSON fragments * - * @return Traversable + * @return Traversable * @throws SourceException */ public function getIterator(): Traversable diff --git a/src/Sources/CustomSource.php b/src/Sources/CustomSource.php index b49d21b..28ba042 100644 --- a/src/Sources/CustomSource.php +++ b/src/Sources/CustomSource.php @@ -13,7 +13,7 @@ class CustomSource extends Source /** * Retrieve the JSON fragments * - * @return Traversable + * @return Traversable */ public function getIterator(): Traversable { diff --git a/src/Sources/Endpoint.php b/src/Sources/Endpoint.php index b3bde0d..2a5ff93 100644 --- a/src/Sources/Endpoint.php +++ b/src/Sources/Endpoint.php @@ -26,7 +26,7 @@ class Endpoint extends Source /** * Retrieve the JSON fragments * - * @return Traversable + * @return Traversable */ public function getIterator(): Traversable { diff --git a/src/Sources/Filename.php b/src/Sources/Filename.php index 38f37f5..c6a517c 100644 --- a/src/Sources/Filename.php +++ b/src/Sources/Filename.php @@ -13,7 +13,7 @@ class Filename extends Source /** * Retrieve the JSON fragments * - * @return Traversable + * @return Traversable */ public function getIterator(): Traversable { diff --git a/src/Sources/IterableSource.php b/src/Sources/IterableSource.php index c3abe85..0f6ab22 100644 --- a/src/Sources/IterableSource.php +++ b/src/Sources/IterableSource.php @@ -13,7 +13,7 @@ class IterableSource extends Source /** * Retrieve the JSON fragments * - * @return Traversable + * @return Traversable */ public function getIterator(): Traversable { diff --git a/src/Sources/Json.php b/src/Sources/Json.php index 670f665..d6745b3 100644 --- a/src/Sources/Json.php +++ b/src/Sources/Json.php @@ -16,7 +16,7 @@ class Json extends Source /** * Retrieve the JSON fragments * - * @return Traversable + * @return Traversable */ public function getIterator(): Traversable { diff --git a/src/Sources/LaravelClientResponse.php b/src/Sources/LaravelClientResponse.php index da257aa..b03a60d 100644 --- a/src/Sources/LaravelClientResponse.php +++ b/src/Sources/LaravelClientResponse.php @@ -14,7 +14,7 @@ class LaravelClientResponse extends Source /** * Retrieve the JSON fragments * - * @return Traversable + * @return Traversable */ public function getIterator(): Traversable { diff --git a/src/Sources/Psr7Message.php b/src/Sources/Psr7Message.php index 9d0584d..46e5d6f 100644 --- a/src/Sources/Psr7Message.php +++ b/src/Sources/Psr7Message.php @@ -14,7 +14,7 @@ class Psr7Message extends Source /** * Retrieve the JSON fragments * - * @return Traversable + * @return Traversable */ public function getIterator(): Traversable { diff --git a/src/Sources/Psr7Stream.php b/src/Sources/Psr7Stream.php index 8117d62..54eca4c 100644 --- a/src/Sources/Psr7Stream.php +++ b/src/Sources/Psr7Stream.php @@ -14,7 +14,7 @@ class Psr7Stream extends Source /** * Retrieve the JSON fragments * - * @return Traversable + * @return Traversable */ public function getIterator(): Traversable { diff --git a/src/Sources/Resource.php b/src/Sources/Resource.php index 3af5171..5a2b77d 100644 --- a/src/Sources/Resource.php +++ b/src/Sources/Resource.php @@ -13,7 +13,7 @@ class Resource extends Source /** * Retrieve the JSON fragments * - * @return Traversable + * @return Traversable */ public function getIterator(): Traversable { diff --git a/src/Sources/Source.php b/src/Sources/Source.php index b2ae0de..362043a 100644 --- a/src/Sources/Source.php +++ b/src/Sources/Source.php @@ -22,7 +22,7 @@ abstract class Source implements IteratorAggregate /** * Retrieve the JSON fragments * - * @return Traversable + * @return Traversable */ abstract public function getIterator(): Traversable; diff --git a/src/Tree.php b/src/Tree.php index 39204f5..115da32 100644 --- a/src/Tree.php +++ b/src/Tree.php @@ -13,14 +13,14 @@ class Tree /** * The original JSON tree. * - * @var array + * @var array */ protected array $original = []; /** * The wildcarded JSON tree. * - * @var array + * @var array */ protected array $wildcarded = []; @@ -34,7 +34,7 @@ class Tree /** * Retrieve the original JSON tree * - * @return array + * @return array */ public function original(): array { @@ -44,7 +44,7 @@ class Tree /** * Retrieve the wildcarded JSON tree * - * @return array + * @return array */ public function wildcarded(): array { From 00c0bf4a0b946ffe831e35032b4b96b506e6a3d9 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 3 Dec 2022 21:23:51 +1000 Subject: [PATCH 101/249] Rename decoder --- .../{ConfigurableDecoder.php => CustomDecoder.php} | 2 +- src/Parser.php | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) rename src/Decoders/{ConfigurableDecoder.php => CustomDecoder.php} (96%) diff --git a/src/Decoders/ConfigurableDecoder.php b/src/Decoders/CustomDecoder.php similarity index 96% rename from src/Decoders/ConfigurableDecoder.php rename to src/Decoders/CustomDecoder.php index e9a099b..c7a181f 100644 --- a/src/Decoders/ConfigurableDecoder.php +++ b/src/Decoders/CustomDecoder.php @@ -8,7 +8,7 @@ use Cerbero\JsonParser\Config; * The configurable decoder. * */ -class ConfigurableDecoder +class CustomDecoder { /** * Instantiate the class. diff --git a/src/Parser.php b/src/Parser.php index db450a0..ac60843 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -2,7 +2,7 @@ namespace Cerbero\JsonParser; -use Cerbero\JsonParser\Decoders\ConfigurableDecoder; +use Cerbero\JsonParser\Decoders\CustomDecoder; use Cerbero\JsonParser\Sources\Source; use IteratorAggregate; use Traversable; @@ -23,9 +23,9 @@ class Parser implements IteratorAggregate /** * The decoder handling potential errors. * - * @var ConfigurableDecoder + * @var CustomDecoder */ - protected ConfigurableDecoder $decoder; + protected CustomDecoder $decoder; /** * Instantiate the class. @@ -36,7 +36,7 @@ class Parser implements IteratorAggregate public function __construct(protected Lexer $lexer, protected Config $config) { $this->state = new State(); - $this->decoder = new ConfigurableDecoder($config); + $this->decoder = new CustomDecoder($config); } /** From 66229aeea470d3664b680eee9b4f94e8bad9bf5c Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 8 Dec 2022 21:52:28 +1000 Subject: [PATCH 102/249] Allow setting an empty-string pointer --- src/Pointers/Pointer.php | 7 ++++--- src/State.php | 18 +++++------------- tests/fixtures/pointers/single_pointer.php | 7 ++++++- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/src/Pointers/Pointer.php b/src/Pointers/Pointer.php index 5a00794..401e96d 100644 --- a/src/Pointers/Pointer.php +++ b/src/Pointers/Pointer.php @@ -109,11 +109,12 @@ class Pointer implements Stringable */ public function includesTree(Tree $tree): bool { - if (($firstNest = array_search('-', $this->referenceTokens)) === false) { - return false; + if ($this->pointer == '') { + return true; } - return array_slice($this->referenceTokens, 0, $firstNest) == array_slice($tree->original(), 0, $firstNest); + return (($firstNest = array_search('-', $this->referenceTokens)) !== false) + && array_slice($this->referenceTokens, 0, $firstNest) == array_slice($tree->original(), 0, $firstNest); } /** diff --git a/src/State.php b/src/State.php index 8f235b9..838852e 100644 --- a/src/State.php +++ b/src/State.php @@ -19,13 +19,6 @@ class State */ protected Tree $tree; - /** - * Whether the tree changed. - * - * @var bool - */ - protected bool $treeChanged = false; - /** * The JSON pointers. * @@ -139,22 +132,22 @@ class State */ public function mutateByToken(Token $token): void { - $this->treeChanged = false; + $treeChanged = false; $shouldTrackTree = $this->pointer == '' || $this->tree->depth() < $this->pointer->depth(); if ($shouldTrackTree && $token->isValue() && !$this->inObject()) { $this->tree->traverseArray($this->pointer->referenceTokens()); - $this->treeChanged = true; + $treeChanged = true; } if ($shouldTrackTree && $this->expectsKey) { $this->tree->traverseKey($token); - $this->treeChanged = true; + $treeChanged = true; } $this->bufferToken($token); - if ($this->treeChanged && $this->pointers->count() > 1) { + if ($treeChanged && $this->pointers->count() > 1) { $this->pointer = $this->pointers->matchTree($this->tree); } @@ -186,8 +179,7 @@ class State */ protected function pointerMatchesTree(): bool { - return $this->pointer == '' - || in_array($this->pointer->referenceTokens(), [$this->tree->original(), $this->tree->wildcarded()]); + return in_array($this->pointer->referenceTokens(), [[], $this->tree->original(), $this->tree->wildcarded()]); } /** diff --git a/tests/fixtures/pointers/single_pointer.php b/tests/fixtures/pointers/single_pointer.php index 3acae9b..536eecf 100644 --- a/tests/fixtures/pointers/single_pointer.php +++ b/tests/fixtures/pointers/single_pointer.php @@ -2,6 +2,7 @@ return [ 'complex_array' => [ + '' => require __DIR__ . '/../parsing/complex_array.php', '/-' => [ [ "id" => "0001", @@ -248,6 +249,7 @@ return [ '/-/batters/batter/-/id' => ['id' => ["1001", "1002", "1003", "1004", "1001", "1001", "1002"]], ], 'complex_object' => [ + '' => require __DIR__ . '/../parsing/complex_object.php', '/id' => ['id' => '0001'], '/batters' => [ 'batters' => [ @@ -312,19 +314,21 @@ return [ '/batters/batter/-/id' => ['id' => ["1001", "1002", "1003", "1004"]], ], 'empty_array' => [ + '' => [], '/-' => [], '/-1' => [], '/0' => [], '/foo' => [], ], 'empty_object' => [ + '' => [], '/-' => [], '/-1' => [], '/0' => [], '/foo' => [], ], 'simple_array' => [ - // '' => [1, '', 'foo', '"bar"', 'hej då', 3.14, false, null, [], []], + '' => require __DIR__ . '/../parsing/simple_array.php', '/-' => [1, '', 'foo', '"bar"', 'hej då', 3.14, false, null, [], []], '/-1' => [], '/0' => [1], @@ -341,6 +345,7 @@ return [ '/foo' => [], ], 'simple_object' => [ + '' => require __DIR__ . '/../parsing/simple_object.php', '/-' => [], '/-1' => [], '/int' => ['int' => 1], From b914eb7cc14fe05436b044352fc49c6c5bc4f9b8 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 8 Dec 2022 22:00:22 +1000 Subject: [PATCH 103/249] Configure empty-string pointer as default pointer --- src/Config.php | 1 + src/Pointers/Pointers.php | 15 +++------------ 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/src/Config.php b/src/Config.php index bd80562..b39431e 100644 --- a/src/Config.php +++ b/src/Config.php @@ -49,6 +49,7 @@ class Config public function __construct() { $this->decoder = new ArrayDecoder(); + $this->pointers[] = new Pointer(''); $this->onError = fn (DecodedValue $decoded) => throw $decoded->exception; } } diff --git a/src/Pointers/Pointers.php b/src/Pointers/Pointers.php index 2ba06a7..b28dfc4 100644 --- a/src/Pointers/Pointers.php +++ b/src/Pointers/Pointers.php @@ -3,13 +3,12 @@ namespace Cerbero\JsonParser\Pointers; use Cerbero\JsonParser\Tree; -use Countable; /** * The JSON pointers collection. * */ -class Pointers implements Countable +class Pointers { /** * The JSON pointers collection. @@ -18,13 +17,6 @@ class Pointers implements Countable */ protected array $pointers; - /** - * The default pointer. - * - * @var Pointer - */ - protected Pointer $defaultPointer; - /** * The list of pointers that were found within the JSON. * @@ -40,7 +32,6 @@ class Pointers implements Countable public function __construct(Pointer ...$pointers) { $this->pointers = $pointers; - $this->defaultPointer = new Pointer(''); } /** @@ -63,7 +54,7 @@ class Pointers implements Countable } } - return end($pointers) ?: $this->pointers[0] ?? $this->defaultPointer; + return end($pointers) ?: $this->pointers[0]; } /** @@ -87,7 +78,7 @@ class Pointers implements Countable */ public function wereFound(): bool { - return $this->count() > 0 && $this->count() == count($this->found); + return $this->count() == count($this->found); } /** From 8e9e3817135c49e3c1615178ba322c7b5cee4fbb Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 21 Dec 2022 19:27:08 +1000 Subject: [PATCH 104/249] Shorten dataset --- tests/fixtures/pointers/single_pointer.php | 138 +-------------------- 1 file changed, 5 insertions(+), 133 deletions(-) diff --git a/tests/fixtures/pointers/single_pointer.php b/tests/fixtures/pointers/single_pointer.php index 536eecf..8a024b4 100644 --- a/tests/fixtures/pointers/single_pointer.php +++ b/tests/fixtures/pointers/single_pointer.php @@ -2,137 +2,8 @@ return [ 'complex_array' => [ - '' => require __DIR__ . '/../parsing/complex_array.php', - '/-' => [ - [ - "id" => "0001", - "type" => "donut", - "name" => "Cake", - "ppu" => 0.55, - "batters" => [ - "batter" => [ - [ - "id" => "1001", - "type" => "Regular", - ], - [ - "id" => "1002", - "type" => "Chocolate", - ], - [ - "id" => "1003", - "type" => "Blueberry", - ], - [ - "id" => "1004", - "type" => "Devil's Food", - ], - ], - ], - "topping" => [ - [ - "id" => "5001", - "type" => "None", - ], - [ - "id" => "5002", - "type" => "Glazed", - ], - [ - "id" => "5005", - "type" => "Sugar", - ], - [ - "id" => "5007", - "type" => "Powdered Sugar", - ], - [ - "id" => "5006", - "type" => "Chocolate with Sprinkles", - ], - [ - "id" => "5003", - "type" => "Chocolate", - ], - [ - "id" => "5004", - "type" => "Maple", - ], - ], - ], - [ - "id" => "0002", - "type" => "donut", - "name" => "Raised", - "ppu" => 0.55, - "batters" => [ - "batter" => [ - [ - "id" => "1001", - "type" => "Regular", - ], - ], - ], - "topping" => [ - [ - "id" => "5001", - "type" => "None", - ], - [ - "id" => "5002", - "type" => "Glazed", - ], - [ - "id" => "5005", - "type" => "Sugar", - ], - [ - "id" => "5003", - "type" => "Chocolate", - ], - [ - "id" => "5004", - "type" => "Maple", - ], - ], - ], - [ - "id" => "0003", - "type" => "donut", - "name" => "Old Fashioned", - "ppu" => 0.55, - "batters" => [ - "batter" => [ - [ - "id" => "1001", - "type" => "Regular", - ], - [ - "id" => "1002", - "type" => "Chocolate", - ], - ], - ], - "topping" => [ - [ - "id" => "5001", - "type" => "None", - ], - [ - "id" => "5002", - "type" => "Glazed", - ], - [ - "id" => "5003", - "type" => "Chocolate", - ], - [ - "id" => "5004", - "type" => "Maple", - ], - ], - ], - ], + '' => $complexArray = require __DIR__ . '/../parsing/complex_array.php', + '/-' => $complexArray, '/-/id' => ['id' => ['0001', '0002', '0003']], '/-/batters' => [ 'batters' => [ @@ -250,6 +121,7 @@ return [ ], 'complex_object' => [ '' => require __DIR__ . '/../parsing/complex_object.php', + '/-' => [], '/id' => ['id' => '0001'], '/batters' => [ 'batters' => [ @@ -328,8 +200,8 @@ return [ '/foo' => [], ], 'simple_array' => [ - '' => require __DIR__ . '/../parsing/simple_array.php', - '/-' => [1, '', 'foo', '"bar"', 'hej då', 3.14, false, null, [], []], + '' => $simpleArray = require __DIR__ . '/../parsing/simple_array.php', + '/-' => $simpleArray, '/-1' => [], '/0' => [1], '/1' => [''], From 1f34de5e2b56c1072e75ecc9ad55a8cbc70e5178 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 21 Dec 2022 19:28:38 +1000 Subject: [PATCH 105/249] Mark pointer as found --- src/Pointers/Pointers.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Pointers/Pointers.php b/src/Pointers/Pointers.php index b28dfc4..eb3a508 100644 --- a/src/Pointers/Pointers.php +++ b/src/Pointers/Pointers.php @@ -66,8 +66,7 @@ class Pointers public function markAsFound(Pointer $pointer): void { if (!$pointer->wasFound) { - $pointer->wasFound = true; - $this->found[(string) $pointer] = true; + $this->found[(string) $pointer] = $pointer->wasFound = true; } } From e2a28ba93daa9759950932eceec74063f74895a2 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 21 Dec 2022 19:28:54 +1000 Subject: [PATCH 106/249] Leverage strict comparison --- src/Pointers/Pointer.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Pointers/Pointer.php b/src/Pointers/Pointer.php index 401e96d..0aac41d 100644 --- a/src/Pointers/Pointer.php +++ b/src/Pointers/Pointer.php @@ -114,7 +114,7 @@ class Pointer implements Stringable } return (($firstNest = array_search('-', $this->referenceTokens)) !== false) - && array_slice($this->referenceTokens, 0, $firstNest) == array_slice($tree->original(), 0, $firstNest); + && array_slice($this->referenceTokens, 0, $firstNest) === array_slice($tree->original(), 0, $firstNest); } /** From e1450733d1f87d22b7b12c23dd3623be9c416f08 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 21 Dec 2022 19:29:54 +1000 Subject: [PATCH 107/249] Match pointer before buffering the token --- src/State.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/State.php b/src/State.php index 838852e..a7e8160 100644 --- a/src/State.php +++ b/src/State.php @@ -145,12 +145,12 @@ class State $treeChanged = true; } - $this->bufferToken($token); - if ($treeChanged && $this->pointers->count() > 1) { $this->pointer = $this->pointers->matchTree($this->tree); } + $this->bufferToken($token); + $token->mutateState($this); } From c760a5981c6f1f1af07f02c6ff93c8e9dbad3e38 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 21 Dec 2022 19:30:18 +1000 Subject: [PATCH 108/249] Test multiple pointers --- tests/Dataset.php | 24 +++- tests/Feature/PointersTest.php | 14 +-- tests/fixtures/pointers/multiple_pointers.php | 106 ++++++++++++++++++ 3 files changed, 134 insertions(+), 10 deletions(-) create mode 100644 tests/fixtures/pointers/multiple_pointers.php diff --git a/tests/Dataset.php b/tests/Dataset.php index dc2333d..69910e8 100644 --- a/tests/Dataset.php +++ b/tests/Dataset.php @@ -42,6 +42,16 @@ class Dataset } } + /** + * Retrieve the dataset to test invalid pointers + * + * @return Generator + */ + public static function forInvalidPointers(): Generator + { + yield from ['abc', '/foo~2', '/~', ' ']; + } + /** * Retrieve the dataset to test single pointers * @@ -61,12 +71,20 @@ class Dataset } /** - * Retrieve the dataset to test invalid pointers + * Retrieve the dataset to test multiple pointers * * @return Generator */ - public static function forInvalidPointers(): Generator + public static function forMultiplePointers(): Generator { - yield from ['abc', '/foo~2', '/~', ' ']; + $multiplePointers = require __DIR__ . '/fixtures/pointers/multiple_pointers.php'; + + foreach ($multiplePointers as $fixture => $valueByPointers) { + $json = file_get_contents(__DIR__ . "/fixtures/json/{$fixture}.json"); + + foreach ($valueByPointers as $pointers => $value) { + yield [$json, explode(',', $pointers), $value]; + } + } } } diff --git a/tests/Feature/PointersTest.php b/tests/Feature/PointersTest.php index 3cf19e6..44962fa 100644 --- a/tests/Feature/PointersTest.php +++ b/tests/Feature/PointersTest.php @@ -5,15 +5,15 @@ use Cerbero\JsonParser\Exceptions\PointerException; use Cerbero\JsonParser\JsonParser; -it('supports single JSON pointers', function (string $json, string $pointer, array $parsed) { - expect(JsonParser::parse($json)->pointer($pointer))->toPointTo($parsed); -})->with(Dataset::forSinglePointers()); - it('throws an exception when providing an invalid JSON pointer', function (string $pointer) { expect(fn () => iterator_to_array(JsonParser::parse('{}')->pointer($pointer))) ->toThrow(PointerException::class, "The string [$pointer] is not a valid JSON pointer"); })->with(Dataset::forInvalidPointers()); -// it('supports multiple JSON pointers', function (string $json, array $pointers, array $parsed) { -// expect(JsonParser::parse($json)->pointer(...$pointers))->toParseTo($parsed); -// })->with(Dataset::forMultiplePointers()); +it('supports single JSON pointers', function (string $json, string $pointer, array $parsed) { + expect(JsonParser::parse($json)->pointer($pointer))->toPointTo($parsed); +})->with(Dataset::forSinglePointers()); + +it('supports multiple JSON pointers', function (string $json, array $pointers, array $parsed) { + expect(JsonParser::parse($json)->pointer(...$pointers))->toPointTo($parsed); +})->with(Dataset::forMultiplePointers()); diff --git a/tests/fixtures/pointers/multiple_pointers.php b/tests/fixtures/pointers/multiple_pointers.php new file mode 100644 index 0000000..1e34d31 --- /dev/null +++ b/tests/fixtures/pointers/multiple_pointers.php @@ -0,0 +1,106 @@ + [ + '/-1,/-2' => [], + '/-/id,/-/batters/batter/-/type' => [ + 'id' => ['0001', '0002', '0003'], + 'type' => ['Regular', 'Chocolate', 'Blueberry', "Devil's Food", 'Regular', 'Regular', 'Chocolate'], + ], + '/-/name,/-/topping/-/type,/-/id' => [ + 'id' => ['0001', '0002', '0003'], + 'name' => ['Cake', 'Raised', 'Old Fashioned'], + 'type' => ['None', 'Glazed', 'Sugar', 'Powdered Sugar', 'Chocolate with Sprinkles', 'Chocolate', 'Maple', 'None', 'Glazed', 'Sugar', 'Chocolate', 'Maple', 'None', 'Glazed', 'Chocolate', 'Maple'], + ], + '/-/batters/batter/-,/-/name' => [ + 'name' => ['Cake', 'Raised', 'Old Fashioned'], + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + [ + "id" => "1003", + "type" => "Blueberry", + ], + [ + "id" => "1004", + "type" => "Devil's Food", + ], + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + ], + ], + 'complex_object' => [ + '/-1,/-2' => [], + '/id,/batters/batter/-/type' => [ + 'id' => '0001', + 'type' => ['Regular', 'Chocolate', 'Blueberry', "Devil's Food"], + ], + '/name,/topping/-/type,/id' => [ + 'id' => '0001', + 'name' => 'Cake', + 'type' => ['None', 'Glazed', 'Sugar', 'Powdered Sugar', 'Chocolate with Sprinkles', 'Chocolate', 'Maple'], + ], + '/batters/batter/-,/type' => [ + 'type' => 'donut', + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + [ + "id" => "1003", + "type" => "Blueberry", + ], + [ + "id" => "1004", + "type" => "Devil's Food", + ], + ], + ], + 'empty_array' => [ + '/-1,/-2' => [], + '/foo,/bar' => [], + ], + 'empty_object' => [ + '/-1,/-2' => [], + '/foo,/bar' => [], + ], + 'simple_array' => [ + '/-1,/-2' => [], + '/0,/1' => [1, ''], + '/1,/0' => [1, ''], + '/0,/2' => [1, 'foo'], + '/2,/3' => ['foo', '"bar"'], + '/3,/4,/5' => ['"bar"', 'hej då', 3.14], + '/4,/5,/3' => ['"bar"', 'hej då', 3.14], + '/6,/7,/8,/9' => [false, null, [], []], + '/9,/8,/7,/6' => [false, null, [], []], + ], + 'simple_object' => [ + '/-1,/-2' => [], + '/int,/empty_string' => ['int' => 1, 'empty_string' => ''], + '/empty_string,/int' => ['int' => 1, 'empty_string' => ''], + '/string,/escaped_string,/\"escaped_key\"' => ['string' => 'foo', 'escaped_string' => '"bar"', '"escaped_key"' => 'baz'], + '/unicode,/bool,/empty_array' => ['unicode' => "hej då", 'bool' => false, 'empty_array' => []], + '/,/a~1b,/c%d,/e^f,/g|h,/i\\\\j' => ['' => 0, 'a/b' => 1, 'c%d' => 2, 'e^f' => 3, 'g|h' => 4, 'i\\j' => 5], + '/k\"l,/ ,/m~0n' => ['k"l' => 6, ' ' => 7, 'm~n' => 8], + ], +]; From d35f1175d0cfbc9c9098198c79ca63347e586ec4 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 21 Dec 2022 21:56:58 +1000 Subject: [PATCH 109/249] Update workflow badge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4e36115..2d06ef2 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ The MIT License (MIT). Please see [License File](LICENSE.md) for more informatio [ico-author]: https://img.shields.io/static/v1?label=author&message=cerbero90&color=50ABF1&logo=twitter&style=flat-square [ico-php]: https://img.shields.io/packagist/php-v/cerbero/json-parser?color=%234F5B93&logo=php&style=flat-square [ico-version]: https://img.shields.io/packagist/v/cerbero/json-parser.svg?label=version&style=flat-square -[ico-actions]: https://img.shields.io/github/workflow/status/cerbero90/json-parser/build?style=flat-square&logo=github +[ico-actions]: https://img.shields.io/github/actions/workflow/status/cerbero90/json-parser/workflows/build.yml?branch=master&style=flat-square&logo=github [ico-license]: https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat-square [ico-psr12]: https://img.shields.io/static/v1?label=compliance&message=PSR-12&color=blue&style=flat-square [ico-scrutinizer]: https://img.shields.io/scrutinizer/coverage/g/cerbero90/json-parser.svg?style=flat-square&logo=scrutinizer From e0b11f011baf1bb9164a3c62e938e24f9736c4fa Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Fri, 23 Dec 2022 21:45:50 +1000 Subject: [PATCH 110/249] Add namespace to helper --- helpers.php | 2 +- tests/Feature/ParsingTest.php | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/helpers.php b/helpers.php index 400478a..6782532 100644 --- a/helpers.php +++ b/helpers.php @@ -1,6 +1,6 @@ toParseTo($parsed); From 6c038e67abb0e57e97630a9aa6097c6e7073dd90 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 24 Dec 2022 08:57:51 +1000 Subject: [PATCH 111/249] Add PHP 8.2 to the testing matrix --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c90596a..34fda1a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -11,7 +11,7 @@ jobs: strategy: fail-fast: false matrix: - php: [8.0, 8.1] + php: [8.0, 8.1, 8.2] dependency-version: [prefer-stable] os: [ubuntu-latest] From 99aa404e0ebe0ebc8d89038c6d1cd13f8cf03667 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 24 Dec 2022 09:08:41 +1000 Subject: [PATCH 112/249] Update matrix --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 34fda1a..00daabf 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -13,7 +13,7 @@ jobs: matrix: php: [8.0, 8.1, 8.2] dependency-version: [prefer-stable] - os: [ubuntu-latest] + os: [ubuntu-latest, windows-latest] name: PHP ${{ matrix.php }} - ${{ matrix.dependency-version }} - ${{ matrix.os }} From e8d5099602dc5be83635b5129e067d331cc7c31c Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 24 Dec 2022 11:26:38 +1000 Subject: [PATCH 113/249] Anticipate faster condition --- src/Concerns/DetectsEndpoints.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Concerns/DetectsEndpoints.php b/src/Concerns/DetectsEndpoints.php index 16c1c93..c7b745f 100644 --- a/src/Concerns/DetectsEndpoints.php +++ b/src/Concerns/DetectsEndpoints.php @@ -20,6 +20,6 @@ trait DetectsEndpoints return false; } - return in_array($url['scheme'] ?? null, ['http', 'https']) && isset($url['host']); + return isset($url['host']) && in_array($url['scheme'] ?? null, ['http', 'https']); } } From 81199371c86221fd1af11431d6520491d450e714 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 24 Dec 2022 12:03:55 +1000 Subject: [PATCH 114/249] Move logic to pointer --- src/Pointers/Pointer.php | 11 +++++++++++ src/State.php | 12 +----------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/Pointers/Pointer.php b/src/Pointers/Pointer.php index 0aac41d..8a4ce94 100644 --- a/src/Pointers/Pointer.php +++ b/src/Pointers/Pointer.php @@ -101,6 +101,17 @@ class Pointer implements Stringable return is_int($key) && $this->referenceTokens[$depth] === '-'; } + /** + * Determine whether the pointer matches the given tree + * + * @param Tree $tree + * @return bool + */ + public function matchesTree(Tree $tree): bool + { + return in_array($this->referenceTokens, [[], $tree->original(), $tree->wildcarded()]); + } + /** * Determine whether the pointer includes the given tree * diff --git a/src/State.php b/src/State.php index a7e8160..5801e7b 100644 --- a/src/State.php +++ b/src/State.php @@ -163,7 +163,7 @@ class State protected function bufferToken(Token $token): void { $shouldBuffer = $this->tree->depth() >= 0 - && $this->pointerMatchesTree() + && $this->pointer->matchesTree($this->tree) && ($this->treeIsDeep() || ($token->isValue() && !$this->expectsKey)); if ($shouldBuffer) { @@ -172,16 +172,6 @@ class State } } - /** - * Determine whether the tree matches the JSON pointer - * - * @return bool - */ - protected function pointerMatchesTree(): bool - { - return in_array($this->pointer->referenceTokens(), [[], $this->tree->original(), $this->tree->wildcarded()]); - } - /** * Determine whether the buffer contains tokens * From 20a20ca2dd030b6a72fd8915f8877cc90b1a23ef Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 24 Dec 2022 12:23:23 +1000 Subject: [PATCH 115/249] Define generics for class-string --- src/Sources/AnySource.php | 2 +- src/Tokens/Tokens.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Sources/AnySource.php b/src/Sources/AnySource.php index 087d5fb..f7fc4c0 100644 --- a/src/Sources/AnySource.php +++ b/src/Sources/AnySource.php @@ -15,7 +15,7 @@ class AnySource extends Source /** * The supported sources. * - * @var string[] + * @var class-string[] */ protected array $supportedSources = [ CustomSource::class, diff --git a/src/Tokens/Tokens.php b/src/Tokens/Tokens.php index 41257de..9d81067 100644 --- a/src/Tokens/Tokens.php +++ b/src/Tokens/Tokens.php @@ -99,7 +99,7 @@ class Tokens /** * The tokens class map. * - * @var array + * @var array> */ public const MAP = [ self::COMMA => Comma::class, From 5d30eac49e6506eebe51e230df1fac83e41a2c5a Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 29 Dec 2022 21:32:35 +1000 Subject: [PATCH 116/249] Install and configure PHPStan --- .github/workflows/build.yml | 21 ++++++++++++++++++++- README.md | 3 +++ composer.json | 1 + phpstan-baseline.neon | 16 ++++++++++++++++ phpstan.neon | 6 ++++++ 5 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 phpstan-baseline.neon create mode 100644 phpstan.neon diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 00daabf..c0fce47 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,7 +12,7 @@ jobs: fail-fast: false matrix: php: [8.0, 8.1, 8.2] - dependency-version: [prefer-stable] + dependency-version: [prefer-lowest, prefer-stable] os: [ubuntu-latest, windows-latest] name: PHP ${{ matrix.php }} - ${{ matrix.dependency-version }} - ${{ matrix.os }} @@ -81,3 +81,22 @@ jobs: - name: Execute check run: phpcs --standard=psr12 src/ + + static: + runs-on: ubuntu-latest + + name: Coding style + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Setup PHP + uses: shivammathur/setup-php@v2 + with: + php-version: 8.0 + tools: phpstan + coverage: none + + - name: Execute check + run: phpstan analyse diff --git a/README.md b/README.md index 2d06ef2..9d270c3 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ [![Build Status][ico-actions]][link-actions] [![Coverage Status][ico-scrutinizer]][link-scrutinizer] [![Quality Score][ico-code-quality]][link-code-quality] +[![PHPStan Level][ico-phpstan]][link-phpstan] [![Latest Version][ico-version]][link-packagist] [![Software License][ico-license]](LICENSE.md) [![PSR-12][ico-psr12]][link-psr12] @@ -60,6 +61,7 @@ The MIT License (MIT). Please see [License File](LICENSE.md) for more informatio [ico-psr12]: https://img.shields.io/static/v1?label=compliance&message=PSR-12&color=blue&style=flat-square [ico-scrutinizer]: https://img.shields.io/scrutinizer/coverage/g/cerbero90/json-parser.svg?style=flat-square&logo=scrutinizer [ico-code-quality]: https://img.shields.io/scrutinizer/g/cerbero90/json-parser.svg?style=flat-square&logo=scrutinizer +[ico-phpstan]: https://img.shields.io/badge/level-max-success?style=flat-square&logo= [ico-downloads]: https://img.shields.io/packagist/dt/cerbero/json-parser.svg?style=flat-square [link-author]: https://twitter.com/cerbero90 @@ -69,5 +71,6 @@ The MIT License (MIT). Please see [License File](LICENSE.md) for more informatio [link-psr12]: https://www.php-fig.org/psr/psr-12/ [link-scrutinizer]: https://scrutinizer-ci.com/g/cerbero90/json-parser/code-structure [link-code-quality]: https://scrutinizer-ci.com/g/cerbero90/json-parser +[link-phpstan]: https://phpstan.org/ [link-downloads]: https://packagist.org/packages/cerbero/json-parser [link-contributors]: ../../contributors diff --git a/composer.json b/composer.json index be3dbe7..64df435 100644 --- a/composer.json +++ b/composer.json @@ -24,6 +24,7 @@ "guzzlehttp/guzzle": "^7.2", "illuminate/http": ">=6.20", "pestphp/pest": "^1.21", + "phpstan/phpstan": "^1.9", "scrutinizer/ocular": "^1.8", "squizlabs/php_codesniffer": "^3.0" }, diff --git a/phpstan-baseline.neon b/phpstan-baseline.neon new file mode 100644 index 0000000..a3df18f --- /dev/null +++ b/phpstan-baseline.neon @@ -0,0 +1,16 @@ +parameters: + ignoreErrors: + - + message: "#^Generator expects key type int\\|string, mixed given\\.$#" + count: 1 + path: src/Parser.php + + - + message: "#^Parameter \\#1 \\$json of method Cerbero\\\\JsonParser\\\\Decoders\\\\ConfigurableDecoder\\:\\:decode\\(\\) expects string, int\\|string given\\.$#" + count: 1 + path: src/Parser.php + + - + message: "#^Binary operation \"\\+\" between int\\|string and 1 results in an error\\.$#" + count: 1 + path: src/Tree.php diff --git a/phpstan.neon b/phpstan.neon new file mode 100644 index 0000000..5209c3e --- /dev/null +++ b/phpstan.neon @@ -0,0 +1,6 @@ +parameters: + level: max + paths: + - src +includes: + - phpstan-baseline.neon From 1a5accca0f339a6fc4b650c0b36d6d64969457c7 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 29 Dec 2022 21:33:41 +1000 Subject: [PATCH 117/249] Rename job --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c0fce47..a775f7c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -85,7 +85,7 @@ jobs: static: runs-on: ubuntu-latest - name: Coding style + name: Static analysis steps: - name: Checkout code From 4df6056671c90cc6687ebcc52c6bae9bc3d6a5b1 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 29 Dec 2022 22:07:08 +1000 Subject: [PATCH 118/249] Update code to reach PHPStan max level --- src/Concerns/DetectsEndpoints.php | 14 +++++----- src/Config.php | 4 +-- ...tomDecoder.php => ConfigurableDecoder.php} | 4 +-- src/Decoders/DecodedValue.php | 4 +-- src/Exceptions/PointerException.php | 2 +- src/Exceptions/SourceException.php | 2 +- src/JsonParser.php | 11 ++++---- src/Lexer.php | 17 ++++++------ src/Parser.php | 15 ++++++----- src/Pointers/Pointer.php | 27 ++++++++----------- src/Pointers/Pointers.php | 8 +++--- src/Sources/AnySource.php | 8 +++--- src/Sources/CustomSource.php | 1 + src/Sources/Endpoint.php | 7 ++--- src/Sources/Filename.php | 5 ++-- src/Sources/IterableSource.php | 3 ++- src/Sources/Json.php | 1 + .../{Resource.php => JsonResource.php} | 9 ++++--- src/Sources/LaravelClientResponse.php | 3 ++- src/Sources/Psr7Message.php | 3 ++- src/Sources/Psr7Stream.php | 3 ++- src/Sources/Source.php | 17 +++--------- src/Sources/StreamWrapper.php | 6 ++--- src/State.php | 16 +++++------ src/Tokens/Comma.php | 6 ++--- src/Tokens/CompoundBegin.php | 6 ++--- src/Tokens/CompoundEnd.php | 6 +---- src/Tokens/Constant.php | 2 +- src/Tokens/ScalarString.php | 4 +-- src/Tokens/Tokenizer.php | 6 ++--- src/Tokens/Tokens.php | 8 +++--- src/Tree.php | 12 ++++----- 32 files changed, 113 insertions(+), 127 deletions(-) rename src/Decoders/{CustomDecoder.php => ConfigurableDecoder.php} (86%) rename src/Sources/{Resource.php => JsonResource.php} (75%) diff --git a/src/Concerns/DetectsEndpoints.php b/src/Concerns/DetectsEndpoints.php index c7b745f..e9947d3 100644 --- a/src/Concerns/DetectsEndpoints.php +++ b/src/Concerns/DetectsEndpoints.php @@ -9,17 +9,15 @@ namespace Cerbero\JsonParser\Concerns; trait DetectsEndpoints { /** - * Determine whether the given string points to an endpoint + * Determine whether the given value points to an endpoint * - * @param string $string + * @param string $value * @return bool */ - public function isEndpoint(string $string): bool + public function isEndpoint(string $value): bool { - if (($url = parse_url($string)) === false) { - return false; - } - - return isset($url['host']) && in_array($url['scheme'] ?? null, ['http', 'https']); + return is_array($url = parse_url($value)) + && in_array($url['scheme'] ?? null, ['http', 'https']) + && isset($url['host']); } } diff --git a/src/Config.php b/src/Config.php index b39431e..9f9ab43 100644 --- a/src/Config.php +++ b/src/Config.php @@ -12,7 +12,7 @@ use Closure; * The configuration. * */ -class Config +final class Config { /** * The JSON decoder. @@ -31,7 +31,7 @@ class Config /** * The number of bytes to read in each chunk. * - * @var int + * @var int<1, max> */ public int $bytes = 1024 * 8; diff --git a/src/Decoders/CustomDecoder.php b/src/Decoders/ConfigurableDecoder.php similarity index 86% rename from src/Decoders/CustomDecoder.php rename to src/Decoders/ConfigurableDecoder.php index c7a181f..02309f5 100644 --- a/src/Decoders/CustomDecoder.php +++ b/src/Decoders/ConfigurableDecoder.php @@ -8,14 +8,14 @@ use Cerbero\JsonParser\Config; * The configurable decoder. * */ -class CustomDecoder +final class ConfigurableDecoder { /** * Instantiate the class. * * @param Config $config */ - public function __construct(protected Config $config) + public function __construct(private Config $config) { } diff --git a/src/Decoders/DecodedValue.php b/src/Decoders/DecodedValue.php index e80c2fd..f88e294 100644 --- a/src/Decoders/DecodedValue.php +++ b/src/Decoders/DecodedValue.php @@ -8,14 +8,14 @@ use JsonException; * The decoded value. * */ -class DecodedValue +final class DecodedValue { /** * Instantiate the class. * * @param mixed $value */ - protected function __construct( + private function __construct( public bool $succeeded, public mixed $value = null, public ?string $error = null, diff --git a/src/Exceptions/PointerException.php b/src/Exceptions/PointerException.php index 7cc5504..5d95831 100644 --- a/src/Exceptions/PointerException.php +++ b/src/Exceptions/PointerException.php @@ -6,7 +6,7 @@ namespace Cerbero\JsonParser\Exceptions; * The exception thrown when a pointer-related error occurs. * */ -class PointerException extends JsonParserException +final class PointerException extends JsonParserException { /** * Retrieve the exception when the given pointer is invalid diff --git a/src/Exceptions/SourceException.php b/src/Exceptions/SourceException.php index 275f2d3..5f51b68 100644 --- a/src/Exceptions/SourceException.php +++ b/src/Exceptions/SourceException.php @@ -6,7 +6,7 @@ namespace Cerbero\JsonParser\Exceptions; * The exception thrown when a source-related error occurs. * */ -class SourceException extends JsonParserException +final class SourceException extends JsonParserException { /** * Retrieve the exception when a JSON source is not supported diff --git a/src/JsonParser.php b/src/JsonParser.php index c2737ca..9528ffc 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -13,22 +13,23 @@ use Traversable; /** * The JSON parser entry-point. * + * @implements IteratorAggregate */ -class JsonParser implements IteratorAggregate +final class JsonParser implements IteratorAggregate { /** * The configuration. * * @var Config */ - protected Config $config; + private Config $config; /** * The parser. * * @var Parser */ - protected Parser $parser; + private Parser $parser; /** * Instantiate the class. @@ -38,7 +39,7 @@ class JsonParser implements IteratorAggregate public function __construct(mixed $source) { $this->config = new Config(); - $this->parser = Parser::for(AnySource::from($source, $this->config)); + $this->parser = Parser::for(new AnySource($source, $this->config)); } /** @@ -91,7 +92,7 @@ class JsonParser implements IteratorAggregate /** * The number of bytes to read in each chunk * - * @param int $bytes + * @param int<1, max> $bytes * @return static */ public function bytes(int $bytes): static diff --git a/src/Lexer.php b/src/Lexer.php index 7caa3d9..d3f6903 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -13,43 +13,44 @@ use Traversable; /** * The JSON lexer. * + * @implements IteratorAggregate */ -class Lexer implements IteratorAggregate +final class Lexer implements IteratorAggregate { /** * The tokenizer. * * @var Tokenizer */ - protected Tokenizer $tokenizer; + private Tokenizer $tokenizer; /** * The buffer to yield. * * @var string */ - protected string $buffer = ''; + private string $buffer = ''; /** * Whether the current character is escaped. * * @var bool */ - protected bool $isEscaping = false; + private bool $isEscaping = false; /** * Whether the current character belongs to a string. * * @var bool */ - protected bool $inString = false; + private bool $inString = false; /** * Instantiate the class. * * @param Source $source */ - public function __construct(protected Source $source) + public function __construct(private Source $source) { $this->tokenizer = new Tokenizer(); } @@ -78,7 +79,7 @@ class Lexer implements IteratorAggregate * @param string $character * @return bool */ - protected function inString(string $character): bool + private function inString(string $character): bool { return ($character == '"' && $this->inString && $this->isEscaping) || ($character != '"' && $this->inString) @@ -91,7 +92,7 @@ class Lexer implements IteratorAggregate * @param string $character * @return Generator */ - protected function yieldOrBufferCharacter(string $character): Generator + private function yieldOrBufferCharacter(string $character): Generator { if ($this->inString || !isset(Tokens::BOUNDARIES[$character])) { $this->buffer .= $character; diff --git a/src/Parser.php b/src/Parser.php index ac60843..c43db20 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -2,7 +2,7 @@ namespace Cerbero\JsonParser; -use Cerbero\JsonParser\Decoders\CustomDecoder; +use Cerbero\JsonParser\Decoders\ConfigurableDecoder; use Cerbero\JsonParser\Sources\Source; use IteratorAggregate; use Traversable; @@ -10,22 +10,23 @@ use Traversable; /** * The JSON parser. * + * @implements IteratorAggregate */ -class Parser implements IteratorAggregate +final class Parser implements IteratorAggregate { /** * The JSON parsing state. * * @var State */ - protected State $state; + private State $state; /** * The decoder handling potential errors. * - * @var CustomDecoder + * @var ConfigurableDecoder */ - protected CustomDecoder $decoder; + private ConfigurableDecoder $decoder; /** * Instantiate the class. @@ -33,10 +34,10 @@ class Parser implements IteratorAggregate * @param Lexer $lexer * @param Config $config */ - public function __construct(protected Lexer $lexer, protected Config $config) + public function __construct(private Lexer $lexer, private Config $config) { $this->state = new State(); - $this->decoder = new CustomDecoder($config); + $this->decoder = new ConfigurableDecoder($config); } /** diff --git a/src/Pointers/Pointer.php b/src/Pointers/Pointer.php index 8a4ce94..0f1c5bc 100644 --- a/src/Pointers/Pointer.php +++ b/src/Pointers/Pointer.php @@ -10,21 +10,21 @@ use Stringable; * The JSON pointer. * */ -class Pointer implements Stringable +final class Pointer implements Stringable { /** * The reference tokens. * * @var string[] */ - protected array $referenceTokens; + private array $referenceTokens; /** * The pointer depth. * * @var int */ - protected int $depth; + private int $depth; /** * Whether the pointer was found. @@ -38,7 +38,7 @@ class Pointer implements Stringable * * @param string $pointer */ - public function __construct(protected string $pointer) + public function __construct(private string $pointer) { $this->referenceTokens = $this->toReferenceTokens(); $this->depth = count($this->referenceTokens); @@ -49,7 +49,7 @@ class Pointer implements Stringable * * @return string[] */ - protected function toReferenceTokens(): array + private function toReferenceTokens(): array { if (preg_match('#^(?:/(?:(?:[^/~])|(?:~[01]))*)*$#', $this->pointer) === 0) { throw PointerException::invalid($this->pointer); @@ -85,20 +85,15 @@ class Pointer implements Stringable * Determine whether the reference token at the given depth matches the provided key * * @param int $depth - * @param mixed $key + * @param string|int $key * @return bool */ - public function depthMatchesKey(int $depth, mixed $key): bool + public function depthMatchesKey(int $depth, string|int $key): bool { - if (!isset($this->referenceTokens[$depth])) { - return false; - } + $referenceToken = $this->referenceTokens[$depth] ?? null; - if ($this->referenceTokens[$depth] === (string) $key) { - return true; - } - - return is_int($key) && $this->referenceTokens[$depth] === '-'; + return $referenceToken === (string) $key + || (is_int($key) && $referenceToken === '-'); } /** @@ -124,7 +119,7 @@ class Pointer implements Stringable return true; } - return (($firstNest = array_search('-', $this->referenceTokens)) !== false) + return is_int($firstNest = array_search('-', $this->referenceTokens)) && array_slice($this->referenceTokens, 0, $firstNest) === array_slice($tree->original(), 0, $firstNest); } diff --git a/src/Pointers/Pointers.php b/src/Pointers/Pointers.php index eb3a508..8063918 100644 --- a/src/Pointers/Pointers.php +++ b/src/Pointers/Pointers.php @@ -8,21 +8,21 @@ use Cerbero\JsonParser\Tree; * The JSON pointers collection. * */ -class Pointers +final class Pointers { /** * The JSON pointers collection. * * @var Pointer[] */ - protected array $pointers; + private array $pointers; /** * The list of pointers that were found within the JSON. * - * @var array + * @var array */ - protected array $found = []; + private array $found = []; /** * Instantiate the class. diff --git a/src/Sources/AnySource.php b/src/Sources/AnySource.php index f7fc4c0..4d04d97 100644 --- a/src/Sources/AnySource.php +++ b/src/Sources/AnySource.php @@ -23,18 +23,18 @@ class AnySource extends Source Filename::class, IterableSource::class, Json::class, + JsonResource::class, LaravelClientResponse::class, Psr7Message::class, Psr7Stream::class, - Resource::class, ]; /** * The matching source. * - * @var Source + * @var Source|null */ - protected Source $matchingSource; + protected ?Source $matchingSource; /** * Retrieve the JSON fragments @@ -61,7 +61,7 @@ class AnySource extends Source protected function sources(): Generator { foreach ($this->supportedSources as $source) { - yield $source::from($this->source, $this->config); + yield new $source($this->source, $this->config); } } diff --git a/src/Sources/CustomSource.php b/src/Sources/CustomSource.php index 28ba042..e5c3f1f 100644 --- a/src/Sources/CustomSource.php +++ b/src/Sources/CustomSource.php @@ -7,6 +7,7 @@ use Traversable; /** * The custom source. * + * @property-read Source $source */ class CustomSource extends Source { diff --git a/src/Sources/Endpoint.php b/src/Sources/Endpoint.php index 2a5ff93..109bbcf 100644 --- a/src/Sources/Endpoint.php +++ b/src/Sources/Endpoint.php @@ -11,6 +11,7 @@ use Traversable; /** * The endpoint source. * + * @property-read string $source */ class Endpoint extends Source { @@ -19,9 +20,9 @@ class Endpoint extends Source /** * The endpoint response. * - * @var ResponseInterface + * @var ResponseInterface|null */ - protected ResponseInterface $response; + protected ?ResponseInterface $response; /** * Retrieve the JSON fragments @@ -41,7 +42,7 @@ class Endpoint extends Source ], ]); - return Psr7Message::from($this->response, $this->config); + return new Psr7Message($this->response, $this->config); } /** diff --git a/src/Sources/Filename.php b/src/Sources/Filename.php index c6a517c..349d439 100644 --- a/src/Sources/Filename.php +++ b/src/Sources/Filename.php @@ -7,6 +7,7 @@ use Traversable; /** * The filename source. * + * @property-read string $source */ class Filename extends Source { @@ -20,9 +21,9 @@ class Filename extends Source $handle = fopen($this->source, 'rb'); try { - yield from Resource::from($handle, $this->config); + yield from new JsonResource($handle, $this->config); } finally { - fclose($handle); + $handle && fclose($handle); } } diff --git a/src/Sources/IterableSource.php b/src/Sources/IterableSource.php index 0f6ab22..944597e 100644 --- a/src/Sources/IterableSource.php +++ b/src/Sources/IterableSource.php @@ -7,6 +7,7 @@ use Traversable; /** * The iterable source. * + * @property-read iterable $source */ class IterableSource extends Source { @@ -37,6 +38,6 @@ class IterableSource extends Source */ protected function calculateSize(): ?int { - return iterator_count(clone $this->source); + return is_array($this->source) ? count($this->source) : iterator_count(clone $this->source); } } diff --git a/src/Sources/Json.php b/src/Sources/Json.php index d6745b3..04e0ba3 100644 --- a/src/Sources/Json.php +++ b/src/Sources/Json.php @@ -8,6 +8,7 @@ use Traversable; /** * The JSON source. * + * @property-read string $source */ class Json extends Source { diff --git a/src/Sources/Resource.php b/src/Sources/JsonResource.php similarity index 75% rename from src/Sources/Resource.php rename to src/Sources/JsonResource.php index 5a2b77d..635f474 100644 --- a/src/Sources/Resource.php +++ b/src/Sources/JsonResource.php @@ -7,8 +7,9 @@ use Traversable; /** * The resource source. * + * @property-read resource $source */ -class Resource extends Source +class JsonResource extends Source { /** * Retrieve the JSON fragments @@ -18,7 +19,9 @@ class Resource extends Source public function getIterator(): Traversable { while (!feof($this->source)) { - yield fread($this->source, $this->config->bytes); + if (is_string($chunk = fread($this->source, $this->config->bytes))) { + yield $chunk; + } } } @@ -29,7 +32,7 @@ class Resource extends Source */ public function matches(): bool { - return is_resource($this->source) || get_resource_type($this->source) == 'stream'; + return is_resource($this->source); } /** diff --git a/src/Sources/LaravelClientResponse.php b/src/Sources/LaravelClientResponse.php index b03a60d..e4c3d23 100644 --- a/src/Sources/LaravelClientResponse.php +++ b/src/Sources/LaravelClientResponse.php @@ -8,6 +8,7 @@ use Traversable; /** * The Laravel client response source. * + * @property-read Response $source */ class LaravelClientResponse extends Source { @@ -18,7 +19,7 @@ class LaravelClientResponse extends Source */ public function getIterator(): Traversable { - return Psr7Message::from($this->source->toPsrResponse(), $this->config); + return new Psr7Message($this->source->toPsrResponse(), $this->config); } /** diff --git a/src/Sources/Psr7Message.php b/src/Sources/Psr7Message.php index 46e5d6f..7707c71 100644 --- a/src/Sources/Psr7Message.php +++ b/src/Sources/Psr7Message.php @@ -8,6 +8,7 @@ use Traversable; /** * The PSR-7 message source. * + * @property-read MessageInterface $source */ class Psr7Message extends Source { @@ -18,7 +19,7 @@ class Psr7Message extends Source */ public function getIterator(): Traversable { - return Psr7Stream::from($this->source->getBody(), $this->config); + return new Psr7Stream($this->source->getBody(), $this->config); } /** diff --git a/src/Sources/Psr7Stream.php b/src/Sources/Psr7Stream.php index 54eca4c..3f28d92 100644 --- a/src/Sources/Psr7Stream.php +++ b/src/Sources/Psr7Stream.php @@ -8,6 +8,7 @@ use Traversable; /** * The PSR-7 stream source. * + * @property-read StreamInterface $source */ class Psr7Stream extends Source { @@ -26,7 +27,7 @@ class Psr7Stream extends Source StreamWrapper::NAME => ['stream' => $this->source], ])); - return Resource::from($stream, $this->config); + return new JsonResource($stream, $this->config); } /** diff --git a/src/Sources/Source.php b/src/Sources/Source.php index 362043a..76945ae 100644 --- a/src/Sources/Source.php +++ b/src/Sources/Source.php @@ -9,6 +9,7 @@ use Traversable; /** * The JSON source. * + * @implements IteratorAggregate */ abstract class Source implements IteratorAggregate { @@ -17,7 +18,7 @@ abstract class Source implements IteratorAggregate * * @var int|null */ - protected int $size; + protected ?int $size; /** * Retrieve the JSON fragments @@ -46,22 +47,10 @@ abstract class Source implements IteratorAggregate * @param mixed $source * @param Config $config */ - protected function __construct(protected mixed $source, protected Config $config) + final public function __construct(protected mixed $source, protected Config $config) { } - /** - * Instantiate the class statically - * - * @param mixed $source - * @param Config $config - * @return static - */ - public static function from(mixed $source, Config $config): static - { - return new static($source, $config); - } - /** * Retrieve the underlying configuration * diff --git a/src/Sources/StreamWrapper.php b/src/Sources/StreamWrapper.php index 9ee5abe..4bc505a 100644 --- a/src/Sources/StreamWrapper.php +++ b/src/Sources/StreamWrapper.php @@ -9,7 +9,7 @@ use Psr\Http\Message\StreamInterface; * * @phpcs:disable PSR1.Methods.CamelCapsMethodName.NotCamelCaps */ -class StreamWrapper +final class StreamWrapper { /** * The name of the stream wrapper. @@ -23,14 +23,14 @@ class StreamWrapper * * @var resource */ - public $context; + public mixed $context; /** * The PSR-7 stream. * * @var StreamInterface */ - protected $stream; + private $stream; /** * Open the stream diff --git a/src/State.php b/src/State.php index 5801e7b..805d3e4 100644 --- a/src/State.php +++ b/src/State.php @@ -10,35 +10,35 @@ use Cerbero\JsonParser\Tokens\Token; * The JSON parsing state. * */ -class State +final class State { /** * The JSON tree. * * @var Tree */ - protected Tree $tree; + private Tree $tree; /** * The JSON pointers. * * @var Pointers */ - protected Pointers $pointers; + private Pointers $pointers; /** * The JSON pointer matching the tree. * * @var Pointer */ - protected Pointer $pointer; + private Pointer $pointer; /** * The JSON buffer. * * @var string */ - protected string $buffer = ''; + private string $buffer = ''; /** * Whether an object key is expected. @@ -81,9 +81,9 @@ class State /** * Retrieve the current key of the JSON tree * - * @return string + * @return string|int */ - public function key(): string + public function key(): string|int { return $this->tree->currentKey(); } @@ -160,7 +160,7 @@ class State * @param Token $token * @return void */ - protected function bufferToken(Token $token): void + private function bufferToken(Token $token): void { $shouldBuffer = $this->tree->depth() >= 0 && $this->pointer->matchesTree($this->tree) diff --git a/src/Tokens/Comma.php b/src/Tokens/Comma.php index f172f6e..337b0d7 100644 --- a/src/Tokens/Comma.php +++ b/src/Tokens/Comma.php @@ -8,7 +8,7 @@ use Cerbero\JsonParser\State; * The comma token. * */ -class Comma extends Token +final class Comma extends Token { /** * Retrieve the token type @@ -28,8 +28,6 @@ class Comma extends Token */ public function mutateState(State $state): void { - if ($state->inObject()) { - $state->expectsKey = true; - } + $state->expectsKey = $state->inObject(); } } diff --git a/src/Tokens/CompoundBegin.php b/src/Tokens/CompoundBegin.php index ffd3f12..f09bf91 100644 --- a/src/Tokens/CompoundBegin.php +++ b/src/Tokens/CompoundBegin.php @@ -8,7 +8,7 @@ use Cerbero\JsonParser\State; * The token that begins compound data (JSON arrays or objects). * */ -class CompoundBegin extends Token +final class CompoundBegin extends Token { /** * Retrieve the token type @@ -30,8 +30,6 @@ class CompoundBegin extends Token { $state->tree()->deepen(); - if ($this->value == '{') { - $state->expectsKey = true; - } + $state->expectsKey = $this->value == '{'; } } diff --git a/src/Tokens/CompoundEnd.php b/src/Tokens/CompoundEnd.php index 261bed3..eeb5bab 100644 --- a/src/Tokens/CompoundEnd.php +++ b/src/Tokens/CompoundEnd.php @@ -8,7 +8,7 @@ use Cerbero\JsonParser\State; * The token that ends compound data (JSON arrays or objects). * */ -class CompoundEnd extends Token +final class CompoundEnd extends Token { /** * Retrieve the token type @@ -29,10 +29,6 @@ class CompoundEnd extends Token public function mutateState(State $state): void { $state->tree()->emerge(); - - if ($this->value == '}') { - $state->expectsKey = false; - } } /** diff --git a/src/Tokens/Constant.php b/src/Tokens/Constant.php index dc27d76..9bf190f 100644 --- a/src/Tokens/Constant.php +++ b/src/Tokens/Constant.php @@ -6,7 +6,7 @@ namespace Cerbero\JsonParser\Tokens; * The constant token, includes colons for convenience. * */ -class Constant extends Token +final class Constant extends Token { /** * Retrieve the token type diff --git a/src/Tokens/ScalarString.php b/src/Tokens/ScalarString.php index dee0ebe..52313df 100644 --- a/src/Tokens/ScalarString.php +++ b/src/Tokens/ScalarString.php @@ -8,14 +8,14 @@ use Cerbero\JsonParser\State; * The scalar string token. * */ -class ScalarString extends Token +final class ScalarString extends Token { /** * Whether this token is an object key. * * @var bool */ - protected bool $isKey = false; + private bool $isKey = false; /** * Retrieve the token type diff --git a/src/Tokens/Tokenizer.php b/src/Tokens/Tokenizer.php index 6f97985..924ebef 100644 --- a/src/Tokens/Tokenizer.php +++ b/src/Tokens/Tokenizer.php @@ -6,14 +6,14 @@ namespace Cerbero\JsonParser\Tokens; * The tokenizer. * */ -class Tokenizer +final class Tokenizer { /** * The map of token instances by type. * * @var array */ - protected static array $tokensMap; + private static array $tokensMap; /** * Instantiate the class. @@ -29,7 +29,7 @@ class Tokenizer * * @return array */ - protected function hydrateTokensMap(): array + private function hydrateTokensMap(): array { $map = $instances = []; diff --git a/src/Tokens/Tokens.php b/src/Tokens/Tokens.php index 9d81067..fce1343 100644 --- a/src/Tokens/Tokens.php +++ b/src/Tokens/Tokens.php @@ -6,7 +6,7 @@ namespace Cerbero\JsonParser\Tokens; * The tokens related information. * */ -class Tokens +final class Tokens { public const SCALAR_CONST = 1 << 0; public const SCALAR_STRING = 1 << 1; @@ -35,7 +35,7 @@ class Tokens /** * The token types. * - * @var array + * @var array */ public const TYPES = [ 'n' => self::SCALAR_CONST, @@ -64,7 +64,7 @@ class Tokens /** * The token boundaries. * - * @var array + * @var array */ public const BOUNDARIES = [ "\xEF" => true, @@ -85,7 +85,7 @@ class Tokens /** * The structural boundaries. * - * @var array + * @var array */ public const DELIMITERS = [ '{' => true, diff --git a/src/Tree.php b/src/Tree.php index 115da32..ac3c84d 100644 --- a/src/Tree.php +++ b/src/Tree.php @@ -2,34 +2,32 @@ namespace Cerbero\JsonParser; -use Cerbero\JsonParser\Pointers\Pointer; - /** * The JSON tree. * */ -class Tree +final class Tree { /** * The original JSON tree. * * @var array */ - protected array $original = []; + private array $original = []; /** * The wildcarded JSON tree. * * @var array */ - protected array $wildcarded = []; + private array $wildcarded = []; /** * The JSON tree depth. * * @var int */ - protected int $depth = -1; + private int $depth = -1; /** * Retrieve the original JSON tree @@ -102,7 +100,7 @@ class Tree * * @return void */ - protected function trim(): void + private function trim(): void { array_splice($this->original, $this->depth + 1); array_splice($this->wildcarded, $this->depth + 1); From 4ae685988f03593bc470af96582b781b450aee1a Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 29 Dec 2022 22:11:56 +1000 Subject: [PATCH 119/249] Install dependencies to discover symbols --- .github/workflows/build.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a775f7c..f9a984b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -95,8 +95,11 @@ jobs: uses: shivammathur/setup-php@v2 with: php-version: 8.0 - tools: phpstan + tools: composer:v2 coverage: none + - name: Install dependencies + run: composer update --prefer-stable --prefer-dist --no-interaction + - name: Execute check - run: phpstan analyse + run: vendor/bin/phpstan analyse From 8b39422123c9a4a9f9c7da6df38004613fdcde96 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 29 Dec 2022 23:34:23 +1000 Subject: [PATCH 120/249] Turn abstract exception into an interface --- src/Exceptions/JsonParserException.php | 8 ++------ src/Exceptions/PointerException.php | 8 ++++++-- src/Exceptions/SourceException.php | 11 ++++++++--- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/src/Exceptions/JsonParserException.php b/src/Exceptions/JsonParserException.php index 20e67cc..f7f05f8 100644 --- a/src/Exceptions/JsonParserException.php +++ b/src/Exceptions/JsonParserException.php @@ -2,16 +2,12 @@ namespace Cerbero\JsonParser\Exceptions; -use Exception; +use Throwable; /** * Any exception thrown by JSON Parser. * */ -abstract class JsonParserException extends Exception +interface JsonParserException extends Throwable { - public const SOURCE_UNSUPPORTED = 0; - public const SOURCE_GUZZLE = 1; - - public const POINTER_INVALID = 0; } diff --git a/src/Exceptions/PointerException.php b/src/Exceptions/PointerException.php index 5d95831..d4ba869 100644 --- a/src/Exceptions/PointerException.php +++ b/src/Exceptions/PointerException.php @@ -2,12 +2,16 @@ namespace Cerbero\JsonParser\Exceptions; +use Exception; + /** * The exception thrown when a pointer-related error occurs. * */ -final class PointerException extends JsonParserException +final class PointerException extends Exception implements JsonParserException { + public const CODE_INVALID = 0; + /** * Retrieve the exception when the given pointer is invalid * @@ -16,6 +20,6 @@ final class PointerException extends JsonParserException */ public static function invalid(string $pointer): static { - return new static("The string [$pointer] is not a valid JSON pointer", static::POINTER_INVALID); + return new static("The string [$pointer] is not a valid JSON pointer", static::CODE_INVALID); } } diff --git a/src/Exceptions/SourceException.php b/src/Exceptions/SourceException.php index 5f51b68..4e50da1 100644 --- a/src/Exceptions/SourceException.php +++ b/src/Exceptions/SourceException.php @@ -2,12 +2,17 @@ namespace Cerbero\JsonParser\Exceptions; +use Exception; + /** * The exception thrown when a source-related error occurs. * */ -final class SourceException extends JsonParserException +final class SourceException extends Exception implements JsonParserException { + public const CODE_UNSUPPORTED = 0; + public const CODE_GUZZLE = 1; + /** * Retrieve the exception when a JSON source is not supported * @@ -15,7 +20,7 @@ final class SourceException extends JsonParserException */ public static function unsupported(): static { - return new static('Unable to load JSON from the provided source', static::SOURCE_UNSUPPORTED); + return new static('Unable to load JSON from the provided source', static::CODE_UNSUPPORTED); } /** @@ -25,6 +30,6 @@ final class SourceException extends JsonParserException */ public static function requireGuzzle(): static { - return new static('Guzzle is required to load JSON from endpoints', static::SOURCE_GUZZLE); + return new static('Guzzle is required to load JSON from endpoints', static::CODE_GUZZLE); } } From 3a3618a13f0db62c1efaeeaf5c449a6620d96e91 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 29 Dec 2022 23:50:07 +1000 Subject: [PATCH 121/249] Turn Tokenizer into singleton --- src/Lexer.php | 12 ++---------- src/Tokens/Tokenizer.php | 37 ++++++++++++++++++++++++++----------- 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/src/Lexer.php b/src/Lexer.php index d3f6903..dac4206 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -17,13 +17,6 @@ use Traversable; */ final class Lexer implements IteratorAggregate { - /** - * The tokenizer. - * - * @var Tokenizer - */ - private Tokenizer $tokenizer; - /** * The buffer to yield. * @@ -52,7 +45,6 @@ final class Lexer implements IteratorAggregate */ public function __construct(private Source $source) { - $this->tokenizer = new Tokenizer(); } /** @@ -100,12 +92,12 @@ final class Lexer implements IteratorAggregate } if ($this->buffer != '') { - yield $this->tokenizer->toToken($this->buffer); + yield Tokenizer::instance()->toToken($this->buffer); $this->buffer = ''; } if (isset(Tokens::DELIMITERS[$character])) { - yield $this->tokenizer->toToken($character); + yield Tokenizer::instance()->toToken($character); } } } diff --git a/src/Tokens/Tokenizer.php b/src/Tokens/Tokenizer.php index 924ebef..6b6c556 100644 --- a/src/Tokens/Tokenizer.php +++ b/src/Tokens/Tokenizer.php @@ -8,36 +8,51 @@ namespace Cerbero\JsonParser\Tokens; */ final class Tokenizer { + /** + * The singleton instance. + * + * @var static + */ + private static self $instance; + /** * The map of token instances by type. * * @var array */ - private static array $tokensMap; + private array $tokensMap; /** * Instantiate the class. * */ - public function __construct() + private function __construct() { - static::$tokensMap ??= $this->hydrateTokensMap(); + $this->setTokensMap(); } /** - * Retrieve the hydrated tokens map + * Retrieve the singleton instance * - * @return array + * @return static */ - private function hydrateTokensMap(): array + public static function instance(): static { - $map = $instances = []; + return static::$instance ??= new static(); + } + + /** + * Set the tokens map + * + * @return void + */ + private function setTokensMap(): void + { + $instances = []; foreach (Tokens::MAP as $type => $class) { - $map[$type] = $instances[$class] ??= new $class(); + $this->tokensMap[$type] = $instances[$class] ??= new $class(); } - - return $map; } /** @@ -51,6 +66,6 @@ final class Tokenizer $character = $value[0]; $type = Tokens::TYPES[$character]; - return static::$tokensMap[$type]->setValue($value); + return $this->tokensMap[$type]->setValue($value); } } From 0f1f3228a6a3ad7f17b8ce01de157ffc12c826a0 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 4 Jan 2023 16:51:20 +1000 Subject: [PATCH 122/249] Remove condition --- helpers.php | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/helpers.php b/helpers.php index 6782532..b6d5fad 100644 --- a/helpers.php +++ b/helpers.php @@ -2,15 +2,13 @@ namespace Cerbero\JsonParser; -if (!function_exists('parseJson')) { - /** - * Parse the given source of JSON - * - * @param mixed $source - * @return JsonParser - */ - function parseJson(mixed $source): JsonParser - { - return new JsonParser($source); - } +/** + * Parse the given source of JSON + * + * @param mixed $source + * @return JsonParser + */ +function parseJson(mixed $source): JsonParser +{ + return new JsonParser($source); } From bda12c1f895cf4e715e9934bac59839f7a89d3ce Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 11 Jan 2023 15:35:58 +1000 Subject: [PATCH 123/249] Introduce callable pointers --- src/Config.php | 1 - src/JsonParser.php | 22 +++++++++++++++++++--- src/Pointers/Pointer.php | 24 +++++++++++++++++++++++- src/State.php | 2 +- tests/Feature/ParsingTest.php | 2 +- tests/Feature/PointersTest.php | 2 +- 6 files changed, 45 insertions(+), 8 deletions(-) diff --git a/src/Config.php b/src/Config.php index 9f9ab43..add27ab 100644 --- a/src/Config.php +++ b/src/Config.php @@ -49,7 +49,6 @@ final class Config public function __construct() { $this->decoder = new ArrayDecoder(); - $this->pointers[] = new Pointer(''); $this->onError = fn (DecodedValue $decoded) => throw $decoded->exception; } } diff --git a/src/JsonParser.php b/src/JsonParser.php index 9528ffc..0356448 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -79,12 +79,28 @@ final class JsonParser implements IteratorAggregate /** * Set the JSON pointers * - * @param string ...$pointers + * @param string[]|array $pointers * @return static */ - public function pointer(string ...$pointers): static + public function pointers(array $pointers): static { - $this->config->pointers = array_map(fn (string $pointer) => new Pointer($pointer), $pointers); + foreach ($pointers as $pointer => $callback) { + is_callable($callback) ? $this->pointer($pointer, $callback) : $this->pointer($callback); + } + + return $this; + } + + /** + * Set a JSON pointer + * + * @param string $pointer + * @param Closure|null $callback + * @return static + */ + public function pointer(string $pointer, Closure $callback = null): static + { + $this->config->pointers[] = new Pointer($pointer, $callback); return $this; } diff --git a/src/Pointers/Pointer.php b/src/Pointers/Pointer.php index 0f1c5bc..57a25dd 100644 --- a/src/Pointers/Pointer.php +++ b/src/Pointers/Pointer.php @@ -4,6 +4,7 @@ namespace Cerbero\JsonParser\Pointers; use Cerbero\JsonParser\Exceptions\PointerException; use Cerbero\JsonParser\Tree; +use Closure; use Stringable; /** @@ -26,6 +27,13 @@ final class Pointer implements Stringable */ private int $depth; + /** + * The pointer callback. + * + * @var Closure + */ + private Closure $callback; + /** * Whether the pointer was found. * @@ -37,11 +45,13 @@ final class Pointer implements Stringable * Instantiate the class. * * @param string $pointer + * @param Closure|null $callback */ - public function __construct(private string $pointer) + public function __construct(private string $pointer, Closure $callback = null) { $this->referenceTokens = $this->toReferenceTokens(); $this->depth = count($this->referenceTokens); + $this->callback = $callback ?: fn (mixed $value) => $value; } /** @@ -81,6 +91,18 @@ final class Pointer implements Stringable return $this->depth; } + /** + * Call the pointer callback + * + * @param mixed $value + * @param mixed $key + * @return mixed + */ + public function call(mixed $value, mixed $key): mixed + { + return call_user_func($this->callback, $value, $key); + } + /** * Determine whether the reference token at the given depth matches the provided key * diff --git a/src/State.php b/src/State.php index 805d3e4..0c6cc36 100644 --- a/src/State.php +++ b/src/State.php @@ -109,7 +109,7 @@ final class State */ public function setPointers(Pointer ...$pointers): void { - $this->pointers = new Pointers(...$pointers); + $this->pointers = new Pointers(...$pointers ?: [new Pointer('')]); $this->pointer = $this->pointers->matchTree($this->tree); } diff --git a/tests/Feature/ParsingTest.php b/tests/Feature/ParsingTest.php index a63b68e..3134795 100644 --- a/tests/Feature/ParsingTest.php +++ b/tests/Feature/ParsingTest.php @@ -9,7 +9,7 @@ it('parses JSON when instantiated', function (string $json, array $parsed) { expect(new JsonParser($json))->toParseTo($parsed); })->with(Dataset::forParsing()); -it('parses JSON when calling the factory method', function (string $json, array $parsed) { +it('parses JSON when instantiated statically', function (string $json, array $parsed) { expect(JsonParser::parse($json))->toParseTo($parsed); })->with(Dataset::forParsing()); diff --git a/tests/Feature/PointersTest.php b/tests/Feature/PointersTest.php index 44962fa..4e6e4f2 100644 --- a/tests/Feature/PointersTest.php +++ b/tests/Feature/PointersTest.php @@ -15,5 +15,5 @@ it('supports single JSON pointers', function (string $json, string $pointer, arr })->with(Dataset::forSinglePointers()); it('supports multiple JSON pointers', function (string $json, array $pointers, array $parsed) { - expect(JsonParser::parse($json)->pointer(...$pointers))->toPointTo($parsed); + expect(JsonParser::parse($json)->pointers($pointers))->toPointTo($parsed); })->with(Dataset::forMultiplePointers()); From 1ec714ab6290415ea9e37162151a9aec01f61aee Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 11 Jan 2023 15:58:01 +1000 Subject: [PATCH 124/249] Check callable pointer by Closure --- src/JsonParser.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/JsonParser.php b/src/JsonParser.php index 0356448..19e8999 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -85,7 +85,7 @@ final class JsonParser implements IteratorAggregate public function pointers(array $pointers): static { foreach ($pointers as $pointer => $callback) { - is_callable($callback) ? $this->pointer($pointer, $callback) : $this->pointer($callback); + $callback instanceof Closure ? $this->pointer($pointer, $callback) : $this->pointer($callback); } return $this; From 6ddb7fb23e9507e0b2ef490685eadd38e9c2db32 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 11 Jan 2023 15:58:27 +1000 Subject: [PATCH 125/249] Add command to run static analysis --- composer.json | 1 + 1 file changed, 1 insertion(+) diff --git a/composer.json b/composer.json index 64df435..8038deb 100644 --- a/composer.json +++ b/composer.json @@ -46,6 +46,7 @@ }, "scripts": { "test": "pest", + "static": "phpstan analyze", "check-style": "phpcs --standard=PSR12 src", "fix-style": "phpcbf --standard=PSR12 src" }, From 114514291ec4b1c7780b0cfc77053bbccf3ddf47 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 12 Jan 2023 21:11:12 +1000 Subject: [PATCH 126/249] Keep conditions on one line --- src/Sources/Json.php | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Sources/Json.php b/src/Sources/Json.php index 04e0ba3..c9e6303 100644 --- a/src/Sources/Json.php +++ b/src/Sources/Json.php @@ -33,9 +33,7 @@ class Json extends Source */ public function matches(): bool { - return is_string($this->source) - && !is_file($this->source) - && !$this->isEndpoint($this->source); + return is_string($this->source) && !is_file($this->source) && !$this->isEndpoint($this->source); } /** From 6b0ea331fc0ce2e7ad97af50201e51a3b90fface Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 12 Jan 2023 21:13:00 +1000 Subject: [PATCH 127/249] Add PSR-7 requests to supported sources --- src/Concerns/GuzzleAware.php | 56 +++++++++++++++++++++++++++++++++ src/Sources/AnySource.php | 1 + src/Sources/Endpoint.php | 32 ++++++------------- src/Sources/Psr7Message.php | 3 +- src/Sources/Psr7Request.php | 60 ++++++++++++++++++++++++++++++++++++ 5 files changed, 128 insertions(+), 24 deletions(-) create mode 100644 src/Concerns/GuzzleAware.php create mode 100644 src/Sources/Psr7Request.php diff --git a/src/Concerns/GuzzleAware.php b/src/Concerns/GuzzleAware.php new file mode 100644 index 0000000..7626873 --- /dev/null +++ b/src/Concerns/GuzzleAware.php @@ -0,0 +1,56 @@ +get($url, [ + 'headers' => [ + 'Accept' => 'application/json', + 'Content-Type' => 'application/json', + ], + ]); + } + + /** + * Retrieve the JSON response of the given request + * + * @param RequestInterface $request + * @return ResponseInterface + */ + protected function sendRequest(RequestInterface $request): ResponseInterface + { + return (new Client())->sendRequest($request); + } +} diff --git a/src/Sources/AnySource.php b/src/Sources/AnySource.php index 4d04d97..aea27eb 100644 --- a/src/Sources/AnySource.php +++ b/src/Sources/AnySource.php @@ -26,6 +26,7 @@ class AnySource extends Source JsonResource::class, LaravelClientResponse::class, Psr7Message::class, + Psr7Request::class, Psr7Stream::class, ]; diff --git a/src/Sources/Endpoint.php b/src/Sources/Endpoint.php index 109bbcf..34d7b8e 100644 --- a/src/Sources/Endpoint.php +++ b/src/Sources/Endpoint.php @@ -3,19 +3,20 @@ namespace Cerbero\JsonParser\Sources; use Cerbero\JsonParser\Concerns\DetectsEndpoints; -use Cerbero\JsonParser\Exceptions\SourceException; -use GuzzleHttp\Client; +use Cerbero\JsonParser\Concerns\GuzzleAware; use Psr\Http\Message\ResponseInterface; +use Psr\Http\Message\UriInterface; use Traversable; /** * The endpoint source. * - * @property-read string $source + * @property-read UriInterface|string $source */ class Endpoint extends Source { use DetectsEndpoints; + use GuzzleAware; /** * The endpoint response. @@ -28,33 +29,17 @@ class Endpoint extends Source * Retrieve the JSON fragments * * @return Traversable + * @throws \Cerbero\JsonParser\Exceptions\SourceException */ public function getIterator(): Traversable { - if (!$this->guzzleIsLoaded()) { - throw SourceException::requireGuzzle(); - } + $this->requireGuzzle(); - $this->response = (new Client())->get($this->source, [ - 'headers' => [ - 'Accept' => 'application/json', - 'Content-Type' => 'application/json', - ], - ]); + $this->response = $this->getJson($this->source); return new Psr7Message($this->response, $this->config); } - /** - * Determine whether the Guzzle client is loaded - * - * @return bool - */ - protected function guzzleIsLoaded(): bool - { - return class_exists(Client::class); - } - /** * Determine whether the JSON source can be handled * @@ -62,7 +47,8 @@ class Endpoint extends Source */ public function matches(): bool { - return is_string($this->source) && $this->isEndpoint($this->source); + // @phpstan-ignore-next-line + return (is_string($this->source) || $this->source instanceof UriInterface) && $this->isEndpoint($this->source); } /** diff --git a/src/Sources/Psr7Message.php b/src/Sources/Psr7Message.php index 7707c71..2471240 100644 --- a/src/Sources/Psr7Message.php +++ b/src/Sources/Psr7Message.php @@ -3,6 +3,7 @@ namespace Cerbero\JsonParser\Sources; use Psr\Http\Message\MessageInterface; +use Psr\Http\Message\RequestInterface; use Traversable; /** @@ -29,7 +30,7 @@ class Psr7Message extends Source */ public function matches(): bool { - return $this->source instanceof MessageInterface; + return $this->source instanceof MessageInterface && !$this->source instanceof RequestInterface; } /** diff --git a/src/Sources/Psr7Request.php b/src/Sources/Psr7Request.php new file mode 100644 index 0000000..0340b96 --- /dev/null +++ b/src/Sources/Psr7Request.php @@ -0,0 +1,60 @@ + + * @throws \Cerbero\JsonParser\Exceptions\SourceException + */ + public function getIterator(): Traversable + { + $this->requireGuzzle(); + + $this->response = $this->sendRequest($this->source); + + return new Psr7Message($this->response, $this->config); + } + + /** + * Determine whether the JSON source can be handled + * + * @return bool + */ + public function matches(): bool + { + return $this->source instanceof RequestInterface; + } + + /** + * Retrieve the calculated size of the JSON source + * + * @return int|null + */ + protected function calculateSize(): ?int + { + return $this->response?->getBody()->getSize(); + } +} From dcb39ff7b66bfd5cfe79e9efccefd8cb96be1fd4 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 12 Jan 2023 21:22:34 +1000 Subject: [PATCH 128/249] Update README --- README.md | 51 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9d270c3..110dbc3 100644 --- a/README.md +++ b/README.md @@ -8,10 +8,11 @@ [![PHPStan Level][ico-phpstan]][link-phpstan] [![Latest Version][ico-version]][link-packagist] [![Software License][ico-license]](LICENSE.md) +[![PSR-7][ico-psr7]][link-psr7] [![PSR-12][ico-psr12]][link-psr12] [![Total Downloads][ico-downloads]][link-downloads] -Zero-dependencies pull parser to read big JSON from any source in a memory-efficient way. +Zero-dependencies pull parser to read large JSON from any source in a memory-efficient way. ## 📦 Install @@ -24,7 +25,51 @@ composer require cerbero/json-parser ## 🔮 Usage -work in progress... :) +* [Sources](#sources) + +JSON Parser provides a minimal API to read large JSON from any source: + +```php +use Cerbero\JsonParser\JsonParser; + +// the JSON source in this example is an API endpoint +$source = 'https://randomuser.me/api/1.4?seed=json-parser&results=5'; + +foreach (new JsonParser($source) as $key => $value) { + // instead of loading the whole JSON, we keep in memory only one key and value at a time +} +``` + +Depending on our taste, we can instantiate the parser in 3 different ways: + +```php +use Cerbero\JsonParser\JsonParser; + +// classic object instantiation +new JsonParser($source); + +// static instantiation, facilitates methods chaining +JsonParser::parse($source); + +// namespaced function +use function Cerbero\JsonParser\parseJson; + +parseJson($source); +``` + +### Sources + +A wide range of JSON sources is supported, here is the full list: +- strings, e.g. `{"foo":"bar"}` +- iterables, i.e. arrays or instances of `Traversable` +- files, e.g. `/path/to/large_file.json` +- resources, e.g. streams +- API endpoint URLs, e.g. `https://endpoint.json` or any instance of `Psr\Http\Message\UriInterface` +- PSR-7 compliant requests, i.e. any instance of `Psr\Http\Message\RequestInterface` +- PSR-7 compliant messages, i.e. any instance of `Psr\Http\Message\MessageInterface` +- PSR-7 compliant streams, i.e. any instance of `Psr\Http\Message\StreamInterface` +- responses from the Laravel HTTP client, i.e. any instance of `Illuminate\Http\Client\Response` +- user-defined sources, i.e. any instance of `Cerbero\JsonParser\Sources\Source` ## 📆 Change log @@ -58,6 +103,7 @@ The MIT License (MIT). Please see [License File](LICENSE.md) for more informatio [ico-version]: https://img.shields.io/packagist/v/cerbero/json-parser.svg?label=version&style=flat-square [ico-actions]: https://img.shields.io/github/actions/workflow/status/cerbero90/json-parser/workflows/build.yml?branch=master&style=flat-square&logo=github [ico-license]: https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat-square +[ico-psr7]: https://img.shields.io/static/v1?label=compliance&message=PSR-7&color=blue&style=flat-square [ico-psr12]: https://img.shields.io/static/v1?label=compliance&message=PSR-12&color=blue&style=flat-square [ico-scrutinizer]: https://img.shields.io/scrutinizer/coverage/g/cerbero90/json-parser.svg?style=flat-square&logo=scrutinizer [ico-code-quality]: https://img.shields.io/scrutinizer/g/cerbero90/json-parser.svg?style=flat-square&logo=scrutinizer @@ -68,6 +114,7 @@ The MIT License (MIT). Please see [License File](LICENSE.md) for more informatio [link-php]: https://www.php.net [link-packagist]: https://packagist.org/packages/cerbero/json-parser [link-actions]: https://github.com/cerbero90/json-parser/actions?query=workflow%3Abuild +[link-psr7]: https://www.php-fig.org/psr/psr-7/ [link-psr12]: https://www.php-fig.org/psr/psr-12/ [link-scrutinizer]: https://scrutinizer-ci.com/g/cerbero90/json-parser/code-structure [link-code-quality]: https://scrutinizer-ci.com/g/cerbero90/json-parser From f722e41c93c33cb915de6c27b2229cd80f33ce23 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Fri, 13 Jan 2023 20:49:59 +1000 Subject: [PATCH 129/249] Update README --- README.md | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/README.md b/README.md index 110dbc3..dfc464e 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,46 @@ A wide range of JSON sources is supported, here is the full list: - responses from the Laravel HTTP client, i.e. any instance of `Illuminate\Http\Client\Response` - user-defined sources, i.e. any instance of `Cerbero\JsonParser\Sources\Source` +If the source we need to parse is not supported by default, we can implement our own custom source. + +
Click here to see how to implement a custom source. + +To implement a custom source, we need to extend `Source` and implement 3 methods: + +```php +use Cerbero\JsonParser\Sources\Source; +use Traversable; + +class CustomSource extends Source +{ + public function getIterator(): Traversable + { + // return a Traversable holding the JSON source, e.g. a Generator yielding chunks of JSON + } + + public function matches(): bool + { + // return TRUE if this class can handle the JSON source + } + + protected function calculateSize(): ?int + { + // return the size of the JSON in bytes or NULL if it can't be calculated + } +} +``` + +The parent class `Source` gives us access to 2 properties: +- `$source`: the JSON source we pass to the parser, i.e.: `new JsonParser($source)` +- `$config`: the configuration we set by chaining methods, e.g.: `$parser->pointer('/foo')` + +The method `getIterator()` defines the logic to read the JSON source in a memory-efficient way. It feeds the parser with small pieces of JSON. Please refer to the [already existing sources](https://github.com/cerbero90/json-parser/tree/master/src/Sources) to see some implementations. + +The method `matches()` determines whether the JSON source passed to the parser can be handled by our custom implementation. In other words, we are telling the parser if it should use our class for the JSON to parse. + +Finally, `calculateSize()` computes the whole size of the JSON source. It's used to track the parsing progress, however it's not always possible to know the size of a JSON source. In this case, or if we don't need to track the progress, we can return `null`. +
+ ## 📆 Change log Please see [CHANGELOG](CHANGELOG.md) for more information on what has changed recently. From 6d731cdb511f4719ae14d4d50e5148bbbfb9ba08 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Fri, 13 Jan 2023 20:56:14 +1000 Subject: [PATCH 130/249] Update README --- README.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index dfc464e..917637d 100644 --- a/README.md +++ b/README.md @@ -60,20 +60,20 @@ parseJson($source); ### Sources A wide range of JSON sources is supported, here is the full list: -- strings, e.g. `{"foo":"bar"}` -- iterables, i.e. arrays or instances of `Traversable` -- files, e.g. `/path/to/large_file.json` -- resources, e.g. streams -- API endpoint URLs, e.g. `https://endpoint.json` or any instance of `Psr\Http\Message\UriInterface` -- PSR-7 compliant requests, i.e. any instance of `Psr\Http\Message\RequestInterface` -- PSR-7 compliant messages, i.e. any instance of `Psr\Http\Message\MessageInterface` -- PSR-7 compliant streams, i.e. any instance of `Psr\Http\Message\StreamInterface` -- responses from the Laravel HTTP client, i.e. any instance of `Illuminate\Http\Client\Response` -- user-defined sources, i.e. any instance of `Cerbero\JsonParser\Sources\Source` +- **strings**, e.g. `{"foo":"bar"}` +- **iterables**, i.e. arrays or instances of `Traversable` +- **files**, e.g. `/path/to/large_file.json` +- **resources**, e.g. streams +- **API endpoint URLs**, e.g. `https://endpoint.json` or any instance of `Psr\Http\Message\UriInterface` +- **PSR-7 requests**, i.e. any instance of `Psr\Http\Message\RequestInterface` +- **PSR-7 messages**, i.e. any instance of `Psr\Http\Message\MessageInterface` +- **PSR-7 streams**, i.e. any instance of `Psr\Http\Message\StreamInterface` +- **Laravel HTTP client responses**, i.e. any instance of `Illuminate\Http\Client\Response` +- **user-defined sources**, i.e. any instance of `Cerbero\JsonParser\Sources\Source` If the source we need to parse is not supported by default, we can implement our own custom source. -
Click here to see how to implement a custom source. +
Click here to see how to implement a custom source. To implement a custom source, we need to extend `Source` and implement 3 methods: From 45ed8e9650516a2a7893fc5b315323c53b1c85bf Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Fri, 13 Jan 2023 23:57:12 +1000 Subject: [PATCH 131/249] Refine callable pointers --- src/Parser.php | 9 +++++---- src/Pointers/Pointer.php | 2 +- src/State.php | 12 ++++++++++++ 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/src/Parser.php b/src/Parser.php index c43db20..37aab44 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -67,10 +67,11 @@ final class Parser implements IteratorAggregate continue; } - if ($this->state->hasBuffer() && $this->state->inObject()) { - yield $this->decoder->decode($this->state->key()) => $this->decoder->decode($this->state->value()); - } elseif ($this->state->hasBuffer() && !$this->state->inObject()) { - yield $this->decoder->decode($this->state->value()); + if ($this->state->hasBuffer()) { + $key = $this->decoder->decode($this->state->key()); + $value = $this->decoder->decode($this->state->value()); + + yield $key => $this->state->callPointer($value, $key); } if ($this->state->canStopParsing()) { diff --git a/src/Pointers/Pointer.php b/src/Pointers/Pointer.php index 57a25dd..36e867f 100644 --- a/src/Pointers/Pointer.php +++ b/src/Pointers/Pointer.php @@ -100,7 +100,7 @@ final class Pointer implements Stringable */ public function call(mixed $value, mixed $key): mixed { - return call_user_func($this->callback, $value, $key); + return call_user_func($this->callback, $value, $key) ?? $value; } /** diff --git a/src/State.php b/src/State.php index 0c6cc36..0b947f9 100644 --- a/src/State.php +++ b/src/State.php @@ -124,6 +124,18 @@ final class State return $this->pointers->wereFound() && !$this->pointer->includesTree($this->tree); } + /** + * Call the current pointer callback + * + * @param mixed $value + * @param mixed $key + * @return mixed + */ + public function callPointer(mixed $value, mixed $key): mixed + { + return $this->pointer->call($value, $key); + } + /** * Mutate state depending on the given token * From b09b7381734f0858abb0848dc06f1b82a172a71f Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 14 Jan 2023 10:15:25 +1000 Subject: [PATCH 132/249] Test that indexes are preserved --- tests/fixtures/pointers/multiple_pointers.php | 60 +++++++++-------- tests/fixtures/pointers/single_pointer.php | 64 ++++++++++--------- 2 files changed, 66 insertions(+), 58 deletions(-) diff --git a/tests/fixtures/pointers/multiple_pointers.php b/tests/fixtures/pointers/multiple_pointers.php index 1e34d31..5cfab2a 100644 --- a/tests/fixtures/pointers/multiple_pointers.php +++ b/tests/fixtures/pointers/multiple_pointers.php @@ -14,34 +14,38 @@ return [ ], '/-/batters/batter/-,/-/name' => [ 'name' => ['Cake', 'Raised', 'Old Fashioned'], - [ - "id" => "1001", - "type" => "Regular", + 0 => [ + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1001", + "type" => "Regular", + ], ], - [ - "id" => "1002", - "type" => "Chocolate", + 1 => [ + [ + "id" => "1002", + "type" => "Chocolate", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], ], - [ + 2 => [ "id" => "1003", "type" => "Blueberry", ], - [ + 3 => [ "id" => "1004", "type" => "Devil's Food", ], - [ - "id" => "1001", - "type" => "Regular", - ], - [ - "id" => "1001", - "type" => "Regular", - ], - [ - "id" => "1002", - "type" => "Chocolate", - ], ], ], 'complex_object' => [ @@ -85,14 +89,14 @@ return [ ], 'simple_array' => [ '/-1,/-2' => [], - '/0,/1' => [1, ''], - '/1,/0' => [1, ''], - '/0,/2' => [1, 'foo'], - '/2,/3' => ['foo', '"bar"'], - '/3,/4,/5' => ['"bar"', 'hej då', 3.14], - '/4,/5,/3' => ['"bar"', 'hej då', 3.14], - '/6,/7,/8,/9' => [false, null, [], []], - '/9,/8,/7,/6' => [false, null, [], []], + '/0,/1' => [0 => 1, 1 => ''], + '/1,/0' => [0 => 1, 1 => ''], + '/0,/2' => [0 => 1, 2 => 'foo'], + '/2,/3' => [2 => 'foo', 3 => '"bar"'], + '/3,/4,/5' => [3 => '"bar"', 4 => 'hej då', 5 => 3.14], + '/4,/5,/3' => [3 => '"bar"', 4 => 'hej då', 5 => 3.14], + '/6,/7,/8,/9' => [6 => false, 7 => null, 8 => [], 9 => []], + '/9,/8,/7,/6' => [6 => false, 7 => null, 8 => [], 9 => []], ], 'simple_object' => [ '/-1,/-2' => [], diff --git a/tests/fixtures/pointers/single_pointer.php b/tests/fixtures/pointers/single_pointer.php index 8a024b4..9042026 100644 --- a/tests/fixtures/pointers/single_pointer.php +++ b/tests/fixtures/pointers/single_pointer.php @@ -88,34 +88,38 @@ return [ ], ], '/-/batters/batter/-' => [ - [ - "id" => "1001", - "type" => "Regular", + 0 => [ + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1001", + "type" => "Regular", + ], ], - [ - "id" => "1002", - "type" => "Chocolate", + 1 => [ + [ + "id" => "1002", + "type" => "Chocolate", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], ], - [ + 2 => [ "id" => "1003", "type" => "Blueberry", ], - [ + 3 => [ "id" => "1004", "type" => "Devil's Food", ], - [ - "id" => "1001", - "type" => "Regular", - ], - [ - "id" => "1001", - "type" => "Regular", - ], - [ - "id" => "1002", - "type" => "Chocolate", - ], ], '/-/batters/batter/-/id' => ['id' => ["1001", "1002", "1003", "1004", "1001", "1001", "1002"]], ], @@ -203,16 +207,16 @@ return [ '' => $simpleArray = require __DIR__ . '/../parsing/simple_array.php', '/-' => $simpleArray, '/-1' => [], - '/0' => [1], - '/1' => [''], - '/2' => ['foo'], - '/3' => ['"bar"'], - '/4' => ['hej då'], - '/5' => [3.14], - '/6' => [false], - '/7' => [null], - '/8' => [[]], - '/9' => [[]], + '/0' => [0 => 1], + '/1' => [1 => ''], + '/2' => [2 => 'foo'], + '/3' => [3 => '"bar"'], + '/4' => [4 => 'hej då'], + '/5' => [5 => 3.14], + '/6' => [6 => false], + '/7' => [7 => null], + '/8' => [8 => []], + '/9' => [9 => []], '/10' => [], '/foo' => [], ], From fbafffaa3c662f282a2d6216834719a182228ba9 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 14 Jan 2023 10:15:59 +1000 Subject: [PATCH 133/249] Update README --- README.md | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/README.md b/README.md index 917637d..096fa7c 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ composer require cerbero/json-parser ## 🔮 Usage * [Sources](#sources) +* [Pointers](#pointers) JSON Parser provides a minimal API to read large JSON from any source: @@ -111,6 +112,63 @@ The method `matches()` determines whether the JSON source passed to the parser c Finally, `calculateSize()` computes the whole size of the JSON source. It's used to track the parsing progress, however it's not always possible to know the size of a JSON source. In this case, or if we don't need to track the progress, we can return `null`.
+ +### Pointers + +A JSON pointer is a [standard](https://www.rfc-editor.org/rfc/rfc6901) used to point to nodes within a JSON. This package leverages JSON pointers to extract only some sub-trees from a large JSON. + +Consider [this JSON](https://randomuser.me/api/1.4?seed=json-parser&results=5) for example. To extract only the first gender and avoid parsing the rest of the JSON, we can set the `/0/gender` pointer: + +```php +$json = JsonParser::parse($source)->pointer('/0/gender'); + +foreach ($json as $key => $value) { + // 1st and only iteration: $key === 'gender', $value === 'female' +} +``` + +JSON Parser takes advantage of the `-` character to define any array index, so we can extract all the genders with the `/-/gender` pointer: + +```php +$json = JsonParser::parse($source)->pointer('/-/gender'); + +foreach ($json as $key => $value) { + // 1st iteration: $key === 'gender', $value === 'female' + // 2nd iteration: $key === 'gender', $value === 'female' + // 3rd iteration: $key === 'gender', $value === 'male' + // and so on for all the objects in the array... +} +``` + +If we want to extract more sub-trees, we can set multiple pointers. Let's extract all genders and countries: + +```php +$json = JsonParser::parse($source)->pointers(['/-/gender', '/-/location/country']); + +foreach ($json as $key => $value) { + // 1st iteration: $key === 'gender', $value === 'female' + // 2nd iteration: $key === 'country', $value === 'Germany' + // 3rd iteration: $key === 'gender', $value === 'female' + // 4th iteration: $key === 'country', $value === 'Mexico' + // and so on for all the objects in the array... +} +``` + +We can also specify a callback to execute when JSON pointers are found. This is handy when we have multiple pointers and we need to run custom logic for each of them: + +```php +$json = JsonParser::parse($source)->pointers([ + '/-/gender' => fn (string $gender, string $key) => new Gender($gender), + '/-/location/country' => fn (string $country, string $key) => new Country($country), +]); + +foreach ($json as $key => $value) { + // 1st iteration: $key === 'gender', $value instanceof Gender + // 2nd iteration: $key === 'country', $value instanceof Country + // and so on for all the objects in the array... +} +``` + ## 📆 Change log Please see [CHANGELOG](CHANGELOG.md) for more information on what has changed recently. From 3bc9eab5b04e5bcb60ddb36b043bcb3902f60a49 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 14 Jan 2023 10:31:18 +1000 Subject: [PATCH 134/249] Improve static analysis --- phpstan-baseline.neon | 16 ---------------- src/Decoders/ConfigurableDecoder.php | 8 ++++---- src/Parser.php | 1 + src/Tree.php | 4 +++- 4 files changed, 8 insertions(+), 21 deletions(-) diff --git a/phpstan-baseline.neon b/phpstan-baseline.neon index a3df18f..e69de29 100644 --- a/phpstan-baseline.neon +++ b/phpstan-baseline.neon @@ -1,16 +0,0 @@ -parameters: - ignoreErrors: - - - message: "#^Generator expects key type int\\|string, mixed given\\.$#" - count: 1 - path: src/Parser.php - - - - message: "#^Parameter \\#1 \\$json of method Cerbero\\\\JsonParser\\\\Decoders\\\\ConfigurableDecoder\\:\\:decode\\(\\) expects string, int\\|string given\\.$#" - count: 1 - path: src/Parser.php - - - - message: "#^Binary operation \"\\+\" between int\\|string and 1 results in an error\\.$#" - count: 1 - path: src/Tree.php diff --git a/src/Decoders/ConfigurableDecoder.php b/src/Decoders/ConfigurableDecoder.php index 02309f5..13b7b46 100644 --- a/src/Decoders/ConfigurableDecoder.php +++ b/src/Decoders/ConfigurableDecoder.php @@ -20,14 +20,14 @@ final class ConfigurableDecoder } /** - * Decode the given JSON. + * Decode the given value. * - * @param string $json + * @param string|int $value * @return mixed */ - public function decode(string $json): mixed + public function decode(string|int $value): mixed { - $decoded = $this->config->decoder->decode($json); + $decoded = $this->config->decoder->decode((string) $value); if (!$decoded->succeeded) { call_user_func($this->config->onError, $decoded); diff --git a/src/Parser.php b/src/Parser.php index 37aab44..869ab84 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -68,6 +68,7 @@ final class Parser implements IteratorAggregate } if ($this->state->hasBuffer()) { + /** @var string|int $key */ $key = $this->decoder->decode($this->state->key()); $value = $this->decoder->decode($this->state->value()); diff --git a/src/Tree.php b/src/Tree.php index ac3c84d..53dd722 100644 --- a/src/Tree.php +++ b/src/Tree.php @@ -115,7 +115,9 @@ final class Tree public function traverseArray(array $referenceTokens): void { $referenceToken = $referenceTokens[$this->depth] ?? null; - $this->original[$this->depth] = isset($this->original[$this->depth]) ? $this->original[$this->depth] + 1 : 0; + $index = $this->original[$this->depth] ?? null; + + $this->original[$this->depth] = is_int($index) ? $index + 1 : 0; $this->wildcarded[$this->depth] = $referenceToken == '-' ? '-' : $this->original[$this->depth]; $this->trim(); From 76904d37687be40186c89f7f0466314446876452 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 14 Jan 2023 10:42:59 +1000 Subject: [PATCH 135/249] Update README --- README.md | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 096fa7c..c3cbe46 100644 --- a/README.md +++ b/README.md @@ -115,7 +115,7 @@ Finally, `calculateSize()` computes the whole size of the JSON source. It's used ### Pointers -A JSON pointer is a [standard](https://www.rfc-editor.org/rfc/rfc6901) used to point to nodes within a JSON. This package leverages JSON pointers to extract only some sub-trees from a large JSON. +A JSON pointer is a [standard](https://www.rfc-editor.org/rfc/rfc6901) used to point to nodes within a JSON. This package leverages JSON pointers to extract only some sub-trees from large JSONs. Consider [this JSON](https://randomuser.me/api/1.4?seed=json-parser&results=5) for example. To extract only the first gender and avoid parsing the rest of the JSON, we can set the `/0/gender` pointer: @@ -154,7 +154,7 @@ foreach ($json as $key => $value) { } ``` -We can also specify a callback to execute when JSON pointers are found. This is handy when we have multiple pointers and we need to run custom logic for each of them: +We can also specify a callback to execute when JSON pointers are found. This is handy when we have different pointers and we need to run custom logic for each of them: ```php $json = JsonParser::parse($source)->pointers([ @@ -169,6 +169,14 @@ foreach ($json as $key => $value) { } ``` +The same can also be achieved by chaining the method `pointer()` multiple times: + +```php +$json = JsonParser::parse($source) + ->pointer('/-/gender', fn (string $gender, string $key) => new Gender($gender)) + ->pointer('/-/location/country', fn (string $country, string $key) => new Country($country)); +``` + ## 📆 Change log Please see [CHANGELOG](CHANGELOG.md) for more information on what has changed recently. From bdb20ff836e7f7291ee563359e7e947552d8a6cf Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 14 Jan 2023 13:12:49 +1000 Subject: [PATCH 136/249] Traverse the parser via method chaining --- src/JsonParser.php | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/JsonParser.php b/src/JsonParser.php index 19e8999..fb7f3fe 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -141,6 +141,21 @@ final class JsonParser implements IteratorAggregate return $this; } + /** + * Traverse the lazily iterable JSON + * + * @param Closure|null $callback + * @return void + */ + public function traverse(Closure $callback = null): void + { + $callback ??= fn () => true; + + foreach ($this as $key => $value) { + $callback($value, $key); + } + } + /** * Retrieve the lazily iterable JSON * From 6485fe925587f812b9f50aa3eafc3891ba25d1be Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 14 Jan 2023 13:13:03 +1000 Subject: [PATCH 137/249] Improve tests --- tests/Feature/PointersTest.php | 2 +- tests/Pest.php | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/Feature/PointersTest.php b/tests/Feature/PointersTest.php index 4e6e4f2..d5895ee 100644 --- a/tests/Feature/PointersTest.php +++ b/tests/Feature/PointersTest.php @@ -6,7 +6,7 @@ use Cerbero\JsonParser\JsonParser; it('throws an exception when providing an invalid JSON pointer', function (string $pointer) { - expect(fn () => iterator_to_array(JsonParser::parse('{}')->pointer($pointer))) + expect(fn () => JsonParser::parse('{}')->pointer($pointer)->traverse()) ->toThrow(PointerException::class, "The string [$pointer] is not a valid JSON pointer"); })->with(Dataset::forInvalidPointers()); diff --git a/tests/Pest.php b/tests/Pest.php index d0e436b..8128c67 100644 --- a/tests/Pest.php +++ b/tests/Pest.php @@ -10,7 +10,8 @@ expect()->extend('toParseTo', function (array $expected) { $actual = []; foreach ($this->value as $parsedKey => $parsedValue) { - expect($parsedValue)->toBe($expected[$parsedKey]); + expect($expected)->toHaveKey($parsedKey); + expect($expected[$parsedKey])->toBe($parsedValue); $actual[$parsedKey] = $parsedValue; } From e21558c39d3b1fa655a13e6f02d4ec873d9f36e2 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 14 Jan 2023 13:13:10 +1000 Subject: [PATCH 138/249] Update README --- README.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/README.md b/README.md index c3cbe46..3944d58 100644 --- a/README.md +++ b/README.md @@ -169,14 +169,50 @@ foreach ($json as $key => $value) { } ``` +> ⚠️ Please note the parameters order of the callbacks: the value is passed before the key. + The same can also be achieved by chaining the method `pointer()` multiple times: ```php $json = JsonParser::parse($source) ->pointer('/-/gender', fn (string $gender, string $key) => new Gender($gender)) ->pointer('/-/location/country', fn (string $country, string $key) => new Country($country)); + +foreach ($json as $key => $value) { + // 1st iteration: $key === 'gender', $value instanceof Gender + // 2nd iteration: $key === 'country', $value instanceof Country + // and so on for all the objects in the array... +} ``` +If the callbacks are enough to handle the pointers and we don't need to run any common logic for all pointers, we can avoid to manually call `foreach()` by chaining the method `traverse()`: + +```php +JsonParser::parse($source) + ->pointer('/-/gender', $this->storeGender(...)) + ->pointer('/-/location/country', $this->storeCountry(...)) + ->traverse(); + +// no foreach needed +``` + +Otherwise if some common logic for all pointers is needed and we prefer methods chaining to manual loops, we can pass a callback to the `traverse()` method: + +```php +JsonParser::parse($source) + ->pointer('/-/gender', fn (string $gender, string $key) => new Gender($gender)) + ->pointer('/-/location/country', fn (string $country, string $key) => new Country($country)) + ->traverse(function (Gender|Country $value, string $key) { + // 1st iteration: $key === 'gender', $value instanceof Gender + // 2nd iteration: $key === 'country', $value instanceof Country + // and so on for all the objects in the array... + }); + +// no foreach needed +``` + +> ⚠️ Please note the parameters order of the callbacks: the value is passed before the key. + ## 📆 Change log Please see [CHANGELOG](CHANGELOG.md) for more information on what has changed recently. From d14a8c4461e8a640af55647c4a89f83ac732fc2a Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 14 Jan 2023 13:17:15 +1000 Subject: [PATCH 139/249] Simplify expectation --- tests/Pest.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/Pest.php b/tests/Pest.php index 8128c67..1068c42 100644 --- a/tests/Pest.php +++ b/tests/Pest.php @@ -10,8 +10,7 @@ expect()->extend('toParseTo', function (array $expected) { $actual = []; foreach ($this->value as $parsedKey => $parsedValue) { - expect($expected)->toHaveKey($parsedKey); - expect($expected[$parsedKey])->toBe($parsedValue); + expect($expected)->toHaveKey($parsedKey, $parsedValue); $actual[$parsedKey] = $parsedValue; } From 13255eda6db46b7ad56dc603a3e286389720ce20 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 14 Jan 2023 17:53:44 +1000 Subject: [PATCH 140/249] Update README --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3944d58..2cb3e45 100644 --- a/README.md +++ b/README.md @@ -127,7 +127,7 @@ foreach ($json as $key => $value) { } ``` -JSON Parser takes advantage of the `-` character to define any array index, so we can extract all the genders with the `/-/gender` pointer: +JSON Parser takes advantage of the `-` character to point to any array index, so we can extract all the genders with the `/-/gender` pointer: ```php $json = JsonParser::parse($source)->pointer('/-/gender'); @@ -154,6 +154,8 @@ foreach ($json as $key => $value) { } ``` +> ⚠️ Please avoid intersecting pointers (e.g. setting both `/foo` and `/foo/bar`) as the deeper pointer won't be found and will force the parser to parse the whole JSON. + We can also specify a callback to execute when JSON pointers are found. This is handy when we have different pointers and we need to run custom logic for each of them: ```php @@ -196,7 +198,7 @@ JsonParser::parse($source) // no foreach needed ``` -Otherwise if some common logic for all pointers is needed and we prefer methods chaining to manual loops, we can pass a callback to the `traverse()` method: +Otherwise if some common logic for all pointers is needed but we prefer methods chaining to manual loops, we can pass a callback to the `traverse()` method: ```php JsonParser::parse($source) From 5308163097c7ece4648af96893f49acbe2311cee Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 16 Jan 2023 17:00:21 +1000 Subject: [PATCH 141/249] Pass JSON Parser instance to the traverse callback --- src/JsonParser.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/JsonParser.php b/src/JsonParser.php index fb7f3fe..1b48781 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -152,7 +152,7 @@ final class JsonParser implements IteratorAggregate $callback ??= fn () => true; foreach ($this as $key => $value) { - $callback($value, $key); + $callback($value, $key, $this); } } From d3f4b94a38b214fbba617114744f8fbd912ced53 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 16 Jan 2023 17:00:34 +1000 Subject: [PATCH 142/249] Update README --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 2cb3e45..fcb8cd6 100644 --- a/README.md +++ b/README.md @@ -191,8 +191,8 @@ If the callbacks are enough to handle the pointers and we don't need to run any ```php JsonParser::parse($source) - ->pointer('/-/gender', $this->storeGender(...)) - ->pointer('/-/location/country', $this->storeCountry(...)) + ->pointer('/-/gender', $this->handleGender(...)) + ->pointer('/-/location/country', $this->handleCountry(...)) ->traverse(); // no foreach needed @@ -204,7 +204,7 @@ Otherwise if some common logic for all pointers is needed but we prefer methods JsonParser::parse($source) ->pointer('/-/gender', fn (string $gender, string $key) => new Gender($gender)) ->pointer('/-/location/country', fn (string $country, string $key) => new Country($country)) - ->traverse(function (Gender|Country $value, string $key) { + ->traverse(function (Gender|Country $value, string $key, JsonParser $parser) { // 1st iteration: $key === 'gender', $value instanceof Gender // 2nd iteration: $key === 'country', $value instanceof Country // and so on for all the objects in the array... From e956ec9a6cbf09f9069f2a2a6a53b882b1d6fb25 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 18 Jan 2023 23:59:08 +1000 Subject: [PATCH 143/249] Upgrade to Pest v2 --- .gitignore | 1 + composer.json | 4 +++- phpunit.xml.dist | 45 +++++++++++++++++++-------------------------- 3 files changed, 23 insertions(+), 27 deletions(-) diff --git a/.gitignore b/.gitignore index 7ae6add..dd8b26f 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ composer.lock vendor phpcs.xml phpunit.xml +.phpunit.cache .phpunit.result.cache diff --git a/composer.json b/composer.json index 8038deb..baa0257 100644 --- a/composer.json +++ b/composer.json @@ -23,11 +23,13 @@ "require-dev": { "guzzlehttp/guzzle": "^7.2", "illuminate/http": ">=6.20", - "pestphp/pest": "^1.21", + "pestphp/pest": "^2.0", "phpstan/phpstan": "^1.9", "scrutinizer/ocular": "^1.8", "squizlabs/php_codesniffer": "^3.0" }, + "minimum-stability": "dev", + "prefer-stable": true, "suggest": { "guzzlehttp/guzzle": "Required to load JSON from endpoints (^7.2)." }, diff --git a/phpunit.xml.dist b/phpunit.xml.dist index 917e093..158564c 100644 --- a/phpunit.xml.dist +++ b/phpunit.xml.dist @@ -1,28 +1,21 @@ - - - - tests - - - - - src/ - - - - - - - - + + + + src/ + + + + + + + + + + tests + + + + + From 340768e24738a57c155f63e0f9d9c19f5592dded Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 18 Jan 2023 23:59:46 +1000 Subject: [PATCH 144/249] Add simdjson extension --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f9a984b..45f8e80 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -25,7 +25,7 @@ jobs: uses: shivammathur/setup-php@v2 with: php-version: ${{ matrix.php }} - extensions: json, mbstring + extensions: json, mbstring, simdjson tools: composer:v2 coverage: none @@ -50,7 +50,7 @@ jobs: uses: shivammathur/setup-php@v2 with: php-version: 8.0 - extensions: json, mbstring + extensions: json, mbstring, simdjson tools: composer:v2 coverage: xdebug From 47b2e4cfafda006bdf322fa40d53017f70ab5803 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 19 Jan 2023 00:01:20 +1000 Subject: [PATCH 145/249] Implement the simdjson decoder --- src/Config.php | 4 +-- .../{ArrayDecoder.php => AbstractDecoder.php} | 17 ++++++----- src/Decoders/DecodedValue.php | 8 ++--- src/Decoders/JsonDecoder.php | 30 +++++++++++++++++++ src/Decoders/ObjectDecoder.php | 17 ----------- src/Decoders/SimdjsonDecoder.php | 30 +++++++++++++++++++ src/JsonParser.php | 9 +++--- 7 files changed, 80 insertions(+), 35 deletions(-) rename src/Decoders/{ArrayDecoder.php => AbstractDecoder.php} (51%) create mode 100644 src/Decoders/JsonDecoder.php delete mode 100644 src/Decoders/ObjectDecoder.php create mode 100644 src/Decoders/SimdjsonDecoder.php diff --git a/src/Config.php b/src/Config.php index add27ab..36fb437 100644 --- a/src/Config.php +++ b/src/Config.php @@ -2,7 +2,7 @@ namespace Cerbero\JsonParser; -use Cerbero\JsonParser\Decoders\ArrayDecoder; +use Cerbero\JsonParser\Decoders\JsonDecoder; use Cerbero\JsonParser\Decoders\DecodedValue; use Cerbero\JsonParser\Decoders\Decoder; use Cerbero\JsonParser\Pointers\Pointer; @@ -48,7 +48,7 @@ final class Config */ public function __construct() { - $this->decoder = new ArrayDecoder(); + $this->decoder = new JsonDecoder(); $this->onError = fn (DecodedValue $decoded) => throw $decoded->exception; } } diff --git a/src/Decoders/ArrayDecoder.php b/src/Decoders/AbstractDecoder.php similarity index 51% rename from src/Decoders/ArrayDecoder.php rename to src/Decoders/AbstractDecoder.php index bf20c56..a4fdb40 100644 --- a/src/Decoders/ArrayDecoder.php +++ b/src/Decoders/AbstractDecoder.php @@ -2,20 +2,21 @@ namespace Cerbero\JsonParser\Decoders; -use JsonException; +use Throwable; /** - * The decoder to turn a JSON into an associative array. + * The abstract implementation of a JSON decoder. * */ -class ArrayDecoder implements Decoder +abstract class AbstractDecoder implements Decoder { /** - * Whether to decode the JSON into an associative array. + * Retrieve the decoded value of the given JSON * - * @var bool + * @param string $json + * @return mixed */ - protected bool $decodesToArray = true; + abstract protected function decodeJson(string $json): mixed; /** * Decode the given JSON. @@ -26,8 +27,8 @@ class ArrayDecoder implements Decoder public function decode(string $json): DecodedValue { try { - $value = json_decode($json, $this->decodesToArray, flags: JSON_THROW_ON_ERROR); - } catch (JsonException $e) { + $value = $this->decodeJson($json); + } catch (Throwable $e) { return DecodedValue::failed($e, $json); } diff --git a/src/Decoders/DecodedValue.php b/src/Decoders/DecodedValue.php index f88e294..850b4aa 100644 --- a/src/Decoders/DecodedValue.php +++ b/src/Decoders/DecodedValue.php @@ -2,7 +2,7 @@ namespace Cerbero\JsonParser\Decoders; -use JsonException; +use Throwable; /** * The decoded value. @@ -20,7 +20,7 @@ final class DecodedValue public mixed $value = null, public ?string $error = null, public ?int $code = null, - public ?JsonException $exception = null, + public ?Throwable $exception = null, public ?string $json = null, ) { } @@ -39,11 +39,11 @@ final class DecodedValue /** * Retrieve a value failed to be decoded * - * @param JsonException $e + * @param Throwable $e * @param string $json * @return static */ - public static function failed(JsonException $e, string $json): static + public static function failed(Throwable $e, string $json): static { return new static(false, null, $e->getMessage(), $e->getCode(), $e, $json); } diff --git a/src/Decoders/JsonDecoder.php b/src/Decoders/JsonDecoder.php new file mode 100644 index 0000000..628fa86 --- /dev/null +++ b/src/Decoders/JsonDecoder.php @@ -0,0 +1,30 @@ +decodesToArray, flags: JSON_THROW_ON_ERROR); + } +} diff --git a/src/Decoders/ObjectDecoder.php b/src/Decoders/ObjectDecoder.php deleted file mode 100644 index e9c7972..0000000 --- a/src/Decoders/ObjectDecoder.php +++ /dev/null @@ -1,17 +0,0 @@ -decodesToArray); + } +} diff --git a/src/JsonParser.php b/src/JsonParser.php index 1b48781..625be22 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -3,7 +3,7 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Decoders\Decoder; -use Cerbero\JsonParser\Decoders\ObjectDecoder; +use Cerbero\JsonParser\Decoders\SimdjsonDecoder; use Cerbero\JsonParser\Pointers\Pointer; use Cerbero\JsonParser\Sources\AnySource; use Closure; @@ -54,13 +54,14 @@ final class JsonParser implements IteratorAggregate } /** - * Set the JSON decoder to turn a JSON into objects + * Set the simdjson decoder * + * @param bool $decodesToArray * @return static */ - public function toObjects(): static + public function simdjson(bool $decodesToArray = true): static { - return $this->decoder(new ObjectDecoder()); + return $this->decoder(new SimdjsonDecoder($decodesToArray)); } /** From 24f40aee8ddc5245cd06bbeeaba132ee29a05208 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 19 Jan 2023 00:09:30 +1000 Subject: [PATCH 146/249] Downgrade Pest to keep compatibility with PHP 8 --- composer.json | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/composer.json b/composer.json index baa0257..e4b82e7 100644 --- a/composer.json +++ b/composer.json @@ -23,13 +23,11 @@ "require-dev": { "guzzlehttp/guzzle": "^7.2", "illuminate/http": ">=6.20", - "pestphp/pest": "^2.0", + "pestphp/pest": "^1.22", "phpstan/phpstan": "^1.9", "scrutinizer/ocular": "^1.8", "squizlabs/php_codesniffer": "^3.0" }, - "minimum-stability": "dev", - "prefer-stable": true, "suggest": { "guzzlehttp/guzzle": "Required to load JSON from endpoints (^7.2)." }, From d8f494172ca2985005599ed8494663bcd16acb9a Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 19 Jan 2023 00:13:39 +1000 Subject: [PATCH 147/249] Add simdjson extension also for static analysis --- .github/workflows/build.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 45f8e80..fca9520 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -25,7 +25,7 @@ jobs: uses: shivammathur/setup-php@v2 with: php-version: ${{ matrix.php }} - extensions: json, mbstring, simdjson + extensions: simdjson tools: composer:v2 coverage: none @@ -50,7 +50,7 @@ jobs: uses: shivammathur/setup-php@v2 with: php-version: 8.0 - extensions: json, mbstring, simdjson + extensions: simdjson tools: composer:v2 coverage: xdebug @@ -95,6 +95,7 @@ jobs: uses: shivammathur/setup-php@v2 with: php-version: 8.0 + extensions: simdjson tools: composer:v2 coverage: none From 61f2102302ba4151432dacf089b956bfdb333827 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 19 Jan 2023 19:16:02 +1000 Subject: [PATCH 148/249] Refine decoders --- src/Decoders/AbstractDecoder.php | 1 + src/Decoders/JsonDecoder.php | 7 ++++--- src/Decoders/SimdjsonDecoder.php | 7 ++++--- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/Decoders/AbstractDecoder.php b/src/Decoders/AbstractDecoder.php index a4fdb40..c371297 100644 --- a/src/Decoders/AbstractDecoder.php +++ b/src/Decoders/AbstractDecoder.php @@ -15,6 +15,7 @@ abstract class AbstractDecoder implements Decoder * * @param string $json * @return mixed + * @throws Throwable */ abstract protected function decodeJson(string $json): mixed; diff --git a/src/Decoders/JsonDecoder.php b/src/Decoders/JsonDecoder.php index 628fa86..dcf3d27 100644 --- a/src/Decoders/JsonDecoder.php +++ b/src/Decoders/JsonDecoder.php @@ -3,17 +3,17 @@ namespace Cerbero\JsonParser\Decoders; /** - * The decoder using the default JSON decoder. + * The decoder using the built-in JSON decoder. * */ -class JsonDecoder extends AbstractDecoder +final class JsonDecoder extends AbstractDecoder { /** * Instantiate the class. * * @param bool $decodesToArray */ - public function __construct(protected bool $decodesToArray = true) + public function __construct(private bool $decodesToArray = true) { } @@ -22,6 +22,7 @@ class JsonDecoder extends AbstractDecoder * * @param string $json * @return mixed + * @throws \Throwable */ protected function decodeJson(string $json): mixed { diff --git a/src/Decoders/SimdjsonDecoder.php b/src/Decoders/SimdjsonDecoder.php index ca72ddc..641f2a7 100644 --- a/src/Decoders/SimdjsonDecoder.php +++ b/src/Decoders/SimdjsonDecoder.php @@ -3,17 +3,17 @@ namespace Cerbero\JsonParser\Decoders; /** - * The simdjson decoder. + * The decoder using the simdjson library. * */ -class SimdjsonDecoder extends AbstractDecoder +final class SimdjsonDecoder extends AbstractDecoder { /** * Instantiate the class. * * @param bool $decodesToArray */ - public function __construct(protected bool $decodesToArray = true) + public function __construct(private bool $decodesToArray = true) { } @@ -22,6 +22,7 @@ class SimdjsonDecoder extends AbstractDecoder * * @param string $json * @return mixed + * @throws \Throwable */ protected function decodeJson(string $json): mixed { From 8d52bfb95bc290aa8055879c2efd50e3300933d7 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 19 Jan 2023 19:16:11 +1000 Subject: [PATCH 149/249] Update README --- README.md | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/README.md b/README.md index fcb8cd6..1222aad 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ composer require cerbero/json-parser * [Sources](#sources) * [Pointers](#pointers) +* [Decoders](#decoders) JSON Parser provides a minimal API to read large JSON from any source: @@ -215,6 +216,70 @@ JsonParser::parse($source) > ⚠️ Please note the parameters order of the callbacks: the value is passed before the key. + +### Decoders + +By default JSON Parser uses the built-in PHP function `json_decode()` to decode one key and value at a time. + +Normally it decodes values to associative arrays but, if we prefer to decode values to objects, we can set a custom decoder: + +```php +use Cerbero\JsonParser\Decoders\JsonDecoder; + +JsonParser::parse($source)->decoder(new JsonDecoder(decodesToArray: false)); +``` + +JSON Parser also provides a convenient method to set the [simdjson](https://github.com/crazyxman/simdjson_php#simdjson_php) decoder: + +```php +JsonParser::parse($source)->simdjson(); // decode JSON to associative arrays using simdjson + +JsonParser::parse($source)->simdjson(decodesToArray: false); // decode JSON to objects using simdjson +``` + +[Simdjson is faster](https://github.com/crazyxman/simdjson_php/tree/master/benchmark#run-phpbench-benchmark) than `json_decode()` and can be installed via `pecl install simdjson` if your server satisfies [the requirements](https://github.com/crazyxman/simdjson_php#requirement). + +If we need a decoder that is not supported by default, we can implement our custom one. + +
Click here to see how to implement a custom decoder. + +To create a custom decoder, we need to implement the `Decoder` interface and implement 1 method: + +```php +use Cerbero\JsonParser\Decoders\Decoder; +use Cerbero\JsonParser\Decoders\DecodedValue; + +class CustomDecoder implements Decoder +{ + public function decode(string $json): DecodedValue + { + // return an instance of DecodedValue both in case of success or failure + } +} +``` + +The method `decode()` defines the logic to decode the given JSON value and it needs to return an instance of `DecodedValue` both in case of success or failure. + +To make custom decoder implementations even easier, JSON Parser provides an [abstract decoder](https://github.com/cerbero90/json-parser/tree/master/src/Decoders/AbstractDecoder.php) that hydrates `DecodedValue` for us so that we just need to define how a JSON value should be decoded: + +```php +use Cerbero\JsonParser\Decoders\AbstractDecoder; + +class CustomDecoder extends AbstractDecoder +{ + protected function decodeJson(string $json): mixed + { + // decode the given JSON or throw an exception on failure + return json_decode($json, flags: JSON_THROW_ON_ERROR); + } +} +``` + +> ⚠️ Please make sure to throw an exception in `decodeJson()` if the decoding process fails. + +To see some implementation examples, please refer to the [already existing decoders](https://github.com/cerbero90/json-parser/tree/master/src/Decoders). +
+ ## 📆 Change log Please see [CHANGELOG](CHANGELOG.md) for more information on what has changed recently. From c37c2db5c2c4abdd93980cf109e08cee24602eee Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 19 Jan 2023 19:52:34 +1000 Subject: [PATCH 150/249] Make configuration optional --- src/Sources/Source.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Sources/Source.php b/src/Sources/Source.php index 76945ae..b001175 100644 --- a/src/Sources/Source.php +++ b/src/Sources/Source.php @@ -45,10 +45,11 @@ abstract class Source implements IteratorAggregate * Enforce the factory method to instantiate the class. * * @param mixed $source - * @param Config $config + * @param Config|null $config */ - final public function __construct(protected mixed $source, protected Config $config) + final public function __construct(protected mixed $source, protected Config $config = null) { + $this->config ??= new Config(); } /** From 0e8130aabf929aa738363f16e9fb931754cf33bc Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 19 Jan 2023 19:52:52 +1000 Subject: [PATCH 151/249] Update README --- README.md | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1222aad..7dd4131 100644 --- a/README.md +++ b/README.md @@ -111,6 +111,16 @@ The method `getIterator()` defines the logic to read the JSON source in a memory The method `matches()` determines whether the JSON source passed to the parser can be handled by our custom implementation. In other words, we are telling the parser if it should use our class for the JSON to parse. Finally, `calculateSize()` computes the whole size of the JSON source. It's used to track the parsing progress, however it's not always possible to know the size of a JSON source. In this case, or if we don't need to track the progress, we can return `null`. + +Now that we have implemented our custom source, we can pass it to the parser: + +```php +$json = JsonParser::parse(new CustomSource($source)); + +foreach ($json as $key => $value) { + // process one key and value of $source at a time +} +```
@@ -237,7 +247,7 @@ JsonParser::parse($source)->simdjson(); // decode JSON to associative arrays usi JsonParser::parse($source)->simdjson(decodesToArray: false); // decode JSON to objects using simdjson ``` -[Simdjson is faster](https://github.com/crazyxman/simdjson_php/tree/master/benchmark#run-phpbench-benchmark) than `json_decode()` and can be installed via `pecl install simdjson` if your server satisfies [the requirements](https://github.com/crazyxman/simdjson_php#requirement). +[Simdjson is faster](https://github.com/crazyxman/simdjson_php/tree/master/benchmark#run-phpbench-benchmark) than `json_decode()` and can be installed via `pecl install simdjson` if your server satisfies the [requirements](https://github.com/crazyxman/simdjson_php#requirement). If we need a decoder that is not supported by default, we can implement our custom one. @@ -277,6 +287,12 @@ class CustomDecoder extends AbstractDecoder > ⚠️ Please make sure to throw an exception in `decodeJson()` if the decoding process fails. +Now that we have implemented our custom decoder, we can set it like this: + +```php +JsonParser::parse($source)->decoder(new CustomDecoder()); +``` + To see some implementation examples, please refer to the [already existing decoders](https://github.com/cerbero90/json-parser/tree/master/src/Decoders). From a3a63d36e8da3e9aca05ae052d0ec65a334f4ef7 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 19 Jan 2023 20:01:42 +1000 Subject: [PATCH 152/249] Set default configuration if none is provided --- src/Sources/Source.php | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Sources/Source.php b/src/Sources/Source.php index b001175..0c00991 100644 --- a/src/Sources/Source.php +++ b/src/Sources/Source.php @@ -13,6 +13,13 @@ use Traversable; */ abstract class Source implements IteratorAggregate { + /** + * The configuration. + * + * @var Config + */ + protected Config $config; + /** * The cached size of the JSON source. * @@ -47,9 +54,9 @@ abstract class Source implements IteratorAggregate * @param mixed $source * @param Config|null $config */ - final public function __construct(protected mixed $source, protected Config $config = null) + final public function __construct(protected mixed $source, Config $config = null) { - $this->config ??= new Config(); + $this->config = $config ?: new Config(); } /** From 89fce387e9c79834ee0d749d5e43cd9df0f17018 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 19 Jan 2023 20:27:42 +1000 Subject: [PATCH 153/249] Update README --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 7dd4131..4e5014e 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,8 @@ foreach ($json as $key => $value) { // process one key and value of $source at a time } ``` + +If you find yourself implementing the same custom source in different projects, feel free to send a PR and we will consider to support your custom source by default. Thank you in advance for any contribution! @@ -294,6 +296,8 @@ JsonParser::parse($source)->decoder(new CustomDecoder()); ``` To see some implementation examples, please refer to the [already existing decoders](https://github.com/cerbero90/json-parser/tree/master/src/Decoders). + +If you find yourself implementing the same custom decoder in different projects, feel free to send a PR and we will consider to support your custom decoder by default. Thank you in advance for any contribution! ## 📆 Change log From 61d60c38f387ee14bf420a29e73b623c6bb4c5f8 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 19 Jan 2023 20:42:55 +1000 Subject: [PATCH 154/249] Update README --- README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/README.md b/README.md index 4e5014e..68f0ca3 100644 --- a/README.md +++ b/README.md @@ -25,10 +25,14 @@ composer require cerbero/json-parser ## 🔮 Usage +* [Intro](#intro) * [Sources](#sources) * [Pointers](#pointers) * [Decoders](#decoders) + +### Intro + JSON Parser provides a minimal API to read large JSON from any source: ```php @@ -59,6 +63,18 @@ use function Cerbero\JsonParser\parseJson; parseJson($source); ``` +If we don't want to use `foreach()` to loop through each key and value, we can chain the `traverse()` method: + +```php +JsonParser::parse($source)->traverse(function (mixed $value, string|int $key, JsonParser $parser) { + // lazily load one key and value at a time, we can also access the parser if needed +}); + +// no foreach needed +``` + +> ⚠️ Please note the parameters order of the callback: the value is passed before the key. + ### Sources A wide range of JSON sources is supported, here is the full list: From 7b7e3af470b6dff3f1c9c9bd6ec058e096407f44 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 19 Jan 2023 20:47:47 +1000 Subject: [PATCH 155/249] Update README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 68f0ca3..56b7ae8 100644 --- a/README.md +++ b/README.md @@ -25,13 +25,13 @@ composer require cerbero/json-parser ## 🔮 Usage -* [Intro](#intro) +* [Basics](#basics) * [Sources](#sources) * [Pointers](#pointers) * [Decoders](#decoders) -### Intro +### Basics JSON Parser provides a minimal API to read large JSON from any source: From a3f24d3e961388c6ec95a93eab20d0f1a4444be6 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 22 Jan 2023 09:47:38 +1000 Subject: [PATCH 156/249] Ensure size is calculated once --- src/Sources/Source.php | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/Sources/Source.php b/src/Sources/Source.php index 0c00991..f5d3620 100644 --- a/src/Sources/Source.php +++ b/src/Sources/Source.php @@ -27,6 +27,14 @@ abstract class Source implements IteratorAggregate */ protected ?int $size; + /** + * Whether the size was already calculated. + * Avoid re-calculations when the size is NULL (not computable). + * + * @var bool + */ + protected bool $sizeWasSet = false; + /** * Retrieve the JSON fragments * @@ -76,6 +84,11 @@ abstract class Source implements IteratorAggregate */ public function size(): ?int { - return $this->size ??= $this->calculateSize(); + if(!$this->sizeWasSet) { + $this->size = $this->calculateSize(); + $this->sizeWasSet = true; + } + + return $this->size; } } From 9f5e8089629657cd09e46d99f02cab1efada6c7a Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 22 Jan 2023 09:49:16 +1000 Subject: [PATCH 157/249] Retrieve matching source when size is requested --- src/Sources/AnySource.php | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/Sources/AnySource.php b/src/Sources/AnySource.php index aea27eb..5f195c4 100644 --- a/src/Sources/AnySource.php +++ b/src/Sources/AnySource.php @@ -45,6 +45,21 @@ class AnySource extends Source */ public function getIterator(): Traversable { + return $this->matchingSource(); + } + + /** + * Retrieve the matching source + * + * @return Source + * @throws SourceException + */ + protected function matchingSource(): Source + { + if (isset($this->matchingSource)) { + return $this->matchingSource; + } + foreach ($this->sources() as $source) { if ($source->matches()) { return $this->matchingSource = $source; @@ -83,6 +98,6 @@ class AnySource extends Source */ protected function calculateSize(): ?int { - return $this->matchingSource?->size(); + return $this->matchingSource()->size(); } } From 403abb88596818a26e2db613ca41609698972bd5 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 22 Jan 2023 09:49:24 +1000 Subject: [PATCH 158/249] Implement progress --- src/JsonParser.php | 96 ++++++++++++++++++++++++-------------------- src/Lexer.php | 20 +++++++++- src/Parser.php | 10 +++++ src/Progress.php | 99 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 181 insertions(+), 44 deletions(-) create mode 100644 src/Progress.php diff --git a/src/JsonParser.php b/src/JsonParser.php index 625be22..96a5c1a 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -54,27 +54,13 @@ final class JsonParser implements IteratorAggregate } /** - * Set the simdjson decoder + * Retrieve the lazily iterable JSON * - * @param bool $decodesToArray - * @return static + * @return Traversable */ - public function simdjson(bool $decodesToArray = true): static + public function getIterator(): Traversable { - return $this->decoder(new SimdjsonDecoder($decodesToArray)); - } - - /** - * Set the JSON decoder - * - * @param Decoder $decoder - * @return static - */ - public function decoder(Decoder $decoder): static - { - $this->config->decoder = $decoder; - - return $this; + return $this->parser; } /** @@ -106,6 +92,55 @@ final class JsonParser implements IteratorAggregate return $this; } + /** + * Traverse the lazily iterable JSON + * + * @param Closure|null $callback + * @return void + */ + public function traverse(Closure $callback = null): void + { + $callback ??= fn () => true; + + foreach ($this as $key => $value) { + $callback($value, $key, $this); + } + } + + /** + * Set the simdjson decoder + * + * @param bool $decodesToArray + * @return static + */ + public function simdjson(bool $decodesToArray = true): static + { + return $this->decoder(new SimdjsonDecoder($decodesToArray)); + } + + /** + * Set the JSON decoder + * + * @param Decoder $decoder + * @return static + */ + public function decoder(Decoder $decoder): static + { + $this->config->decoder = $decoder; + + return $this; + } + + /** + * Retrieve the parsing progress + * + * @return Progress + */ + public function progress(): Progress + { + return $this->parser->progress(); + } + /** * The number of bytes to read in each chunk * @@ -141,29 +176,4 @@ final class JsonParser implements IteratorAggregate return $this; } - - /** - * Traverse the lazily iterable JSON - * - * @param Closure|null $callback - * @return void - */ - public function traverse(Closure $callback = null): void - { - $callback ??= fn () => true; - - foreach ($this as $key => $value) { - $callback($value, $key, $this); - } - } - - /** - * Retrieve the lazily iterable JSON - * - * @return Traversable - */ - public function getIterator(): Traversable - { - return $this->parser; - } } diff --git a/src/Lexer.php b/src/Lexer.php index dac4206..1fd54a6 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -17,6 +17,13 @@ use Traversable; */ final class Lexer implements IteratorAggregate { + /** + * The parsing progress. + * + * @var Progress + */ + private Progress $progress; + /** * The buffer to yield. * @@ -45,6 +52,7 @@ final class Lexer implements IteratorAggregate */ public function __construct(private Source $source) { + $this->progress = new Progress(); } /** @@ -55,7 +63,7 @@ final class Lexer implements IteratorAggregate public function getIterator(): Traversable { foreach ($this->source as $chunk) { - for ($i = 0, $size = strlen($chunk); $i < $size; $i++) { + for ($i = 0, $size = strlen($chunk); $i < $size; $i++, $this->progress->advance()) { $character = $chunk[$i]; $this->inString = $this->inString($character); $this->isEscaping = $character == '\\' && !$this->isEscaping; @@ -100,4 +108,14 @@ final class Lexer implements IteratorAggregate yield Tokenizer::instance()->toToken($character); } } + + /** + * Retrieve the parsing progress + * + * @return Progress + */ + public function progress(): Progress + { + return $this->progress->setTotal($this->source->size()); + } } diff --git a/src/Parser.php b/src/Parser.php index 869ab84..37e76ad 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -80,4 +80,14 @@ final class Parser implements IteratorAggregate } } } + + /** + * Retrieve the parsing progress + * + * @return Progress + */ + public function progress(): Progress + { + return $this->lexer->progress(); + } } diff --git a/src/Progress.php b/src/Progress.php new file mode 100644 index 0000000..aff1889 --- /dev/null +++ b/src/Progress.php @@ -0,0 +1,99 @@ +current++; + + return $this; + } + + /** + * Retrieve the current progress + * + * @return int + */ + public function current(): int + { + return $this->current; + } + + /** + * Set the total possible progress + * + * @param int|null $total + * @return static + */ + public function setTotal(?int $total): static + { + $this->total ??= $total; + + return $this; + } + + /** + * Retrieve the total possible progress + * + * @return int|null + */ + public function total(): ?int + { + return $this->total; + } + + /** + * Retrieve the formatted percentage of the progress + * + * @return string|null + */ + public function format(): ?string + { + return is_null($percentage = $this->percentage()) ? null : number_format($percentage, 1) . '%'; + } + + /** + * Retrieve the percentage of the progress + * + * @return float|null + */ + public function percentage(): ?float + { + return is_null($fraction = $this->fraction()) ? null : $fraction * 100; + } + + /** + * Retrieve the fraction of the progress + * + * @return float|null + */ + public function fraction(): ?float + { + return $this->total ? $this->current / $this->total : null; + } +} From b74045e9e30312b552555c8f9ac17b81945a435b Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 22 Jan 2023 09:55:28 +1000 Subject: [PATCH 159/249] Fix style --- src/Sources/Source.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Sources/Source.php b/src/Sources/Source.php index f5d3620..f732b29 100644 --- a/src/Sources/Source.php +++ b/src/Sources/Source.php @@ -84,7 +84,7 @@ abstract class Source implements IteratorAggregate */ public function size(): ?int { - if(!$this->sizeWasSet) { + if (!$this->sizeWasSet) { $this->size = $this->calculateSize(); $this->sizeWasSet = true; } From 18ea75da4b0a3d96e7f647472e73385167bc689c Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 23 Jan 2023 20:13:01 +1000 Subject: [PATCH 160/249] Optimize time consumption --- src/Lexer.php | 84 ++++++++++++---------------------------- src/Pointers/Pointer.php | 12 ++++-- src/Progress.php | 7 ++-- src/State.php | 20 +++------- src/Tokens/Tokenizer.php | 18 ++++----- src/Tree.php | 13 +------ 6 files changed, 54 insertions(+), 100 deletions(-) diff --git a/src/Lexer.php b/src/Lexer.php index 1fd54a6..e476462 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -6,7 +6,6 @@ use Cerbero\JsonParser\Sources\Source; use Cerbero\JsonParser\Tokens\Token; use Cerbero\JsonParser\Tokens\Tokenizer; use Cerbero\JsonParser\Tokens\Tokens; -use Generator; use IteratorAggregate; use Traversable; @@ -25,25 +24,11 @@ final class Lexer implements IteratorAggregate private Progress $progress; /** - * The buffer to yield. + * The current position. * - * @var string + * @var int */ - private string $buffer = ''; - - /** - * Whether the current character is escaped. - * - * @var bool - */ - private bool $isEscaping = false; - - /** - * Whether the current character belongs to a string. - * - * @var bool - */ - private bool $inString = false; + private int $position = 0; /** * Instantiate the class. @@ -62,53 +47,34 @@ final class Lexer implements IteratorAggregate */ public function getIterator(): Traversable { + $buffer = ''; + $inString = $isEscaping = false; + foreach ($this->source as $chunk) { - for ($i = 0, $size = strlen($chunk); $i < $size; $i++, $this->progress->advance()) { + for ($i = 0, $size = strlen($chunk); $i < $size; $i++, $this->position++) { $character = $chunk[$i]; - $this->inString = $this->inString($character); - $this->isEscaping = $character == '\\' && !$this->isEscaping; + $inString = ($character == '"' && $inString && $isEscaping) + || ($character != '"' && $inString) + || ($character == '"' && !$inString); + $isEscaping = $character == '\\' && !$isEscaping; - yield from $this->yieldOrBufferCharacter($character); + if ($inString || !isset(Tokens::BOUNDARIES[$character])) { + $buffer .= $character; + continue; + } + + if ($buffer != '') { + yield Tokenizer::instance()->toToken($buffer); + $buffer = ''; + } + + if (isset(Tokens::DELIMITERS[$character])) { + yield Tokenizer::instance()->toToken($character); + } } } } - /** - * Determine whether the given character is within a string - * - * @param string $character - * @return bool - */ - private function inString(string $character): bool - { - return ($character == '"' && $this->inString && $this->isEscaping) - || ($character != '"' && $this->inString) - || ($character == '"' && !$this->inString); - } - - /** - * Yield the given character or buffer it - * - * @param string $character - * @return Generator - */ - private function yieldOrBufferCharacter(string $character): Generator - { - if ($this->inString || !isset(Tokens::BOUNDARIES[$character])) { - $this->buffer .= $character; - return; - } - - if ($this->buffer != '') { - yield Tokenizer::instance()->toToken($this->buffer); - $this->buffer = ''; - } - - if (isset(Tokens::DELIMITERS[$character])) { - yield Tokenizer::instance()->toToken($character); - } - } - /** * Retrieve the parsing progress * @@ -116,6 +82,6 @@ final class Lexer implements IteratorAggregate */ public function progress(): Progress { - return $this->progress->setTotal($this->source->size()); + return $this->progress->setCurrent($this->position)->setTotal($this->source->size()); } } diff --git a/src/Pointers/Pointer.php b/src/Pointers/Pointer.php index 36e867f..a1a879c 100644 --- a/src/Pointers/Pointer.php +++ b/src/Pointers/Pointer.php @@ -32,7 +32,7 @@ final class Pointer implements Stringable * * @var Closure */ - private Closure $callback; + private ?Closure $callback; /** * Whether the pointer was found. @@ -51,7 +51,7 @@ final class Pointer implements Stringable { $this->referenceTokens = $this->toReferenceTokens(); $this->depth = count($this->referenceTokens); - $this->callback = $callback ?: fn (mixed $value) => $value; + $this->callback = $callback; } /** @@ -100,6 +100,10 @@ final class Pointer implements Stringable */ public function call(mixed $value, mixed $key): mixed { + if ($this->callback === null) { + return $value; + } + return call_user_func($this->callback, $value, $key) ?? $value; } @@ -126,7 +130,9 @@ final class Pointer implements Stringable */ public function matchesTree(Tree $tree): bool { - return in_array($this->referenceTokens, [[], $tree->original(), $tree->wildcarded()]); + return $this->referenceTokens == [] + || $this->referenceTokens == $tree->original() + || $this->referenceTokens == $tree->wildcarded(); } /** diff --git a/src/Progress.php b/src/Progress.php index aff1889..ac1c0d0 100644 --- a/src/Progress.php +++ b/src/Progress.php @@ -23,13 +23,14 @@ final class Progress private ?int $total = null; /** - * Advance the progress + * Set the current progress * + * @param int $current * @return static */ - public function advance(): static + public function setCurrent(int $current): static { - $this->current++; + $this->current = $current; return $this; } diff --git a/src/State.php b/src/State.php index 0b947f9..fb2a800 100644 --- a/src/State.php +++ b/src/State.php @@ -146,8 +146,9 @@ final class State { $treeChanged = false; $shouldTrackTree = $this->pointer == '' || $this->tree->depth() < $this->pointer->depth(); + $tokenIsValue = $token->isValue(); - if ($shouldTrackTree && $token->isValue() && !$this->inObject()) { + if ($shouldTrackTree && $tokenIsValue && !$this->inObject()) { $this->tree->traverseArray($this->pointer->referenceTokens()); $treeChanged = true; } @@ -161,27 +162,16 @@ final class State $this->pointer = $this->pointers->matchTree($this->tree); } - $this->bufferToken($token); - - $token->mutateState($this); - } - - /** - * Buffer the given token - * - * @param Token $token - * @return void - */ - private function bufferToken(Token $token): void - { $shouldBuffer = $this->tree->depth() >= 0 && $this->pointer->matchesTree($this->tree) - && ($this->treeIsDeep() || ($token->isValue() && !$this->expectsKey)); + && (($tokenIsValue && !$this->expectsKey) || $this->treeIsDeep()); if ($shouldBuffer) { $this->buffer .= $token; $this->pointers->markAsFound($this->pointer); } + + $token->mutateState($this); } /** diff --git a/src/Tokens/Tokenizer.php b/src/Tokens/Tokenizer.php index 6b6c556..32351b1 100644 --- a/src/Tokens/Tokenizer.php +++ b/src/Tokens/Tokenizer.php @@ -22,15 +22,6 @@ final class Tokenizer */ private array $tokensMap; - /** - * Instantiate the class. - * - */ - private function __construct() - { - $this->setTokensMap(); - } - /** * Retrieve the singleton instance * @@ -41,6 +32,15 @@ final class Tokenizer return static::$instance ??= new static(); } + /** + * Instantiate the class. + * + */ + private function __construct() + { + $this->setTokensMap(); + } + /** * Set the tokens map * diff --git a/src/Tree.php b/src/Tree.php index 53dd722..c59a33d 100644 --- a/src/Tree.php +++ b/src/Tree.php @@ -92,16 +92,6 @@ final class Tree $this->original[$this->depth] = $trimmedKey; $this->wildcarded[$this->depth] = $trimmedKey; - $this->trim(); - } - - /** - * Trim the tree after the latest traversed key - * - * @return void - */ - private function trim(): void - { array_splice($this->original, $this->depth + 1); array_splice($this->wildcarded, $this->depth + 1); } @@ -120,7 +110,8 @@ final class Tree $this->original[$this->depth] = is_int($index) ? $index + 1 : 0; $this->wildcarded[$this->depth] = $referenceToken == '-' ? '-' : $this->original[$this->depth]; - $this->trim(); + array_splice($this->original, $this->depth + 1); + array_splice($this->wildcarded, $this->depth + 1); } /** From 898e1feff04c46978783b156891c51f121fe33d0 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 24 Jan 2023 17:30:13 +1000 Subject: [PATCH 161/249] Optimize native function calls --- src/Concerns/DetectsEndpoints.php | 3 +++ src/Decoders/ConfigurableDecoder.php | 2 ++ src/Pointers/Pointer.php | 5 +++++ src/Pointers/Pointers.php | 2 ++ src/Progress.php | 2 ++ src/Sources/Endpoint.php | 2 ++ src/Sources/Filename.php | 2 ++ src/Sources/IterableSource.php | 3 +++ src/Sources/Json.php | 3 +++ src/Sources/JsonResource.php | 3 +++ src/Sources/Psr7Stream.php | 2 ++ src/State.php | 2 ++ src/Tree.php | 3 +++ 13 files changed, 34 insertions(+) diff --git a/src/Concerns/DetectsEndpoints.php b/src/Concerns/DetectsEndpoints.php index e9947d3..bc05388 100644 --- a/src/Concerns/DetectsEndpoints.php +++ b/src/Concerns/DetectsEndpoints.php @@ -2,6 +2,9 @@ namespace Cerbero\JsonParser\Concerns; +use function is_array; +use function in_array; + /** * The trait to detect endpoints. * diff --git a/src/Decoders/ConfigurableDecoder.php b/src/Decoders/ConfigurableDecoder.php index 13b7b46..58de8b9 100644 --- a/src/Decoders/ConfigurableDecoder.php +++ b/src/Decoders/ConfigurableDecoder.php @@ -4,6 +4,8 @@ namespace Cerbero\JsonParser\Decoders; use Cerbero\JsonParser\Config; +use function call_user_func; + /** * The configurable decoder. * diff --git a/src/Pointers/Pointer.php b/src/Pointers/Pointer.php index a1a879c..89d5540 100644 --- a/src/Pointers/Pointer.php +++ b/src/Pointers/Pointer.php @@ -7,6 +7,11 @@ use Cerbero\JsonParser\Tree; use Closure; use Stringable; +use function count; +use function call_user_func; +use function is_int; +use function array_slice; + /** * The JSON pointer. * diff --git a/src/Pointers/Pointers.php b/src/Pointers/Pointers.php index 8063918..b689e0d 100644 --- a/src/Pointers/Pointers.php +++ b/src/Pointers/Pointers.php @@ -4,6 +4,8 @@ namespace Cerbero\JsonParser\Pointers; use Cerbero\JsonParser\Tree; +use function count; + /** * The JSON pointers collection. * diff --git a/src/Progress.php b/src/Progress.php index ac1c0d0..83b5719 100644 --- a/src/Progress.php +++ b/src/Progress.php @@ -2,6 +2,8 @@ namespace Cerbero\JsonParser; +use function is_null; + /** * The parsing progress. * diff --git a/src/Sources/Endpoint.php b/src/Sources/Endpoint.php index 34d7b8e..868a165 100644 --- a/src/Sources/Endpoint.php +++ b/src/Sources/Endpoint.php @@ -8,6 +8,8 @@ use Psr\Http\Message\ResponseInterface; use Psr\Http\Message\UriInterface; use Traversable; +use function is_string; + /** * The endpoint source. * diff --git a/src/Sources/Filename.php b/src/Sources/Filename.php index 349d439..e357946 100644 --- a/src/Sources/Filename.php +++ b/src/Sources/Filename.php @@ -4,6 +4,8 @@ namespace Cerbero\JsonParser\Sources; use Traversable; +use function is_string; + /** * The filename source. * diff --git a/src/Sources/IterableSource.php b/src/Sources/IterableSource.php index 944597e..383020f 100644 --- a/src/Sources/IterableSource.php +++ b/src/Sources/IterableSource.php @@ -4,6 +4,9 @@ namespace Cerbero\JsonParser\Sources; use Traversable; +use function is_array; +use function count; + /** * The iterable source. * diff --git a/src/Sources/Json.php b/src/Sources/Json.php index c9e6303..edc27a2 100644 --- a/src/Sources/Json.php +++ b/src/Sources/Json.php @@ -5,6 +5,9 @@ namespace Cerbero\JsonParser\Sources; use Cerbero\JsonParser\Concerns\DetectsEndpoints; use Traversable; +use function is_string; +use function strlen; + /** * The JSON source. * diff --git a/src/Sources/JsonResource.php b/src/Sources/JsonResource.php index 635f474..ef4e601 100644 --- a/src/Sources/JsonResource.php +++ b/src/Sources/JsonResource.php @@ -4,6 +4,9 @@ namespace Cerbero\JsonParser\Sources; use Traversable; +use function is_string; +use function is_resource; + /** * The resource source. * diff --git a/src/Sources/Psr7Stream.php b/src/Sources/Psr7Stream.php index 3f28d92..f3c2604 100644 --- a/src/Sources/Psr7Stream.php +++ b/src/Sources/Psr7Stream.php @@ -5,6 +5,8 @@ namespace Cerbero\JsonParser\Sources; use Psr\Http\Message\StreamInterface; use Traversable; +use function in_array; + /** * The PSR-7 stream source. * diff --git a/src/State.php b/src/State.php index fb2a800..5c42c00 100644 --- a/src/State.php +++ b/src/State.php @@ -6,6 +6,8 @@ use Cerbero\JsonParser\Pointers\Pointer; use Cerbero\JsonParser\Pointers\Pointers; use Cerbero\JsonParser\Tokens\Token; +use function is_string; + /** * The JSON parsing state. * diff --git a/src/Tree.php b/src/Tree.php index c59a33d..01d2dca 100644 --- a/src/Tree.php +++ b/src/Tree.php @@ -2,6 +2,9 @@ namespace Cerbero\JsonParser; +use function is_int; +use function is_string; + /** * The JSON tree. * From 6710ef4415110c43399158182c57a29f53496fdd Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 24 Jan 2023 17:32:39 +1000 Subject: [PATCH 162/249] Move Tokenizer logic to Lexer to improve speed --- src/Lexer.php | 31 ++++++++++++++++-- src/Tokens/Tokenizer.php | 71 ---------------------------------------- 2 files changed, 28 insertions(+), 74 deletions(-) delete mode 100644 src/Tokens/Tokenizer.php diff --git a/src/Lexer.php b/src/Lexer.php index e476462..afe56c1 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -4,11 +4,12 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Sources\Source; use Cerbero\JsonParser\Tokens\Token; -use Cerbero\JsonParser\Tokens\Tokenizer; use Cerbero\JsonParser\Tokens\Tokens; use IteratorAggregate; use Traversable; +use function strlen; + /** * The JSON lexer. * @@ -23,6 +24,13 @@ final class Lexer implements IteratorAggregate */ private Progress $progress; + /** + * The map of token instances by type. + * + * @var array + */ + private array $tokensMap; + /** * The current position. * @@ -38,6 +46,21 @@ final class Lexer implements IteratorAggregate public function __construct(private Source $source) { $this->progress = new Progress(); + $this->setTokensMap(); + } + + /** + * Set the tokens map + * + * @return void + */ + private function setTokensMap(): void + { + $instances = []; + + foreach (Tokens::MAP as $type => $class) { + $this->tokensMap[$type] = $instances[$class] ??= new $class(); + } } /** @@ -64,12 +87,14 @@ final class Lexer implements IteratorAggregate } if ($buffer != '') { - yield Tokenizer::instance()->toToken($buffer); + $type = Tokens::TYPES[$buffer[0]]; + yield $this->tokensMap[$type]->setValue($buffer); $buffer = ''; } if (isset(Tokens::DELIMITERS[$character])) { - yield Tokenizer::instance()->toToken($character); + $type = Tokens::TYPES[$character]; + yield $this->tokensMap[$type]->setValue($character); } } } diff --git a/src/Tokens/Tokenizer.php b/src/Tokens/Tokenizer.php deleted file mode 100644 index 32351b1..0000000 --- a/src/Tokens/Tokenizer.php +++ /dev/null @@ -1,71 +0,0 @@ - - */ - private array $tokensMap; - - /** - * Retrieve the singleton instance - * - * @return static - */ - public static function instance(): static - { - return static::$instance ??= new static(); - } - - /** - * Instantiate the class. - * - */ - private function __construct() - { - $this->setTokensMap(); - } - - /** - * Set the tokens map - * - * @return void - */ - private function setTokensMap(): void - { - $instances = []; - - foreach (Tokens::MAP as $type => $class) { - $this->tokensMap[$type] = $instances[$class] ??= new $class(); - } - } - - /** - * Turn the given value into a token - * - * @param string $value - * @return Token - */ - public function toToken(string $value): Token - { - $character = $value[0]; - $type = Tokens::TYPES[$character]; - - return $this->tokensMap[$type]->setValue($value); - } -} From 49a315d22cc46c56cc72a957026b63021f32ac5c Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 1 Feb 2023 19:15:37 +1000 Subject: [PATCH 163/249] Splice tree only when needed --- src/Tree.php | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/Tree.php b/src/Tree.php index 01d2dca..7f4dd3e 100644 --- a/src/Tree.php +++ b/src/Tree.php @@ -2,9 +2,6 @@ namespace Cerbero\JsonParser; -use function is_int; -use function is_string; - /** * The JSON tree. * @@ -94,9 +91,6 @@ final class Tree $this->original[$this->depth] = $trimmedKey; $this->wildcarded[$this->depth] = $trimmedKey; - - array_splice($this->original, $this->depth + 1); - array_splice($this->wildcarded, $this->depth + 1); } /** @@ -113,8 +107,10 @@ final class Tree $this->original[$this->depth] = is_int($index) ? $index + 1 : 0; $this->wildcarded[$this->depth] = $referenceToken == '-' ? '-' : $this->original[$this->depth]; - array_splice($this->original, $this->depth + 1); - array_splice($this->wildcarded, $this->depth + 1); + if (count($this->original) > $this->depth) { + array_splice($this->original, $this->depth + 1); + array_splice($this->wildcarded, $this->depth + 1); + } } /** From e7b93f108fe4462d2126c6e96d3e99a5eedddd1b Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 1 Feb 2023 19:49:00 +1000 Subject: [PATCH 164/249] Optimize function call --- src/Tree.php | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Tree.php b/src/Tree.php index 7f4dd3e..537b829 100644 --- a/src/Tree.php +++ b/src/Tree.php @@ -2,6 +2,8 @@ namespace Cerbero\JsonParser; +use function count; + /** * The JSON tree. * From 589b5e2e1851595d84670b78477c61b5e6e6528b Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 1 Feb 2023 22:25:13 +1000 Subject: [PATCH 165/249] Update README --- README.md | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 56b7ae8..d9090e5 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,7 @@ composer require cerbero/json-parser * [Sources](#sources) * [Pointers](#pointers) * [Decoders](#decoders) +* [Progress](#progress) ### Basics @@ -126,7 +127,7 @@ The method `getIterator()` defines the logic to read the JSON source in a memory The method `matches()` determines whether the JSON source passed to the parser can be handled by our custom implementation. In other words, we are telling the parser if it should use our class for the JSON to parse. -Finally, `calculateSize()` computes the whole size of the JSON source. It's used to track the parsing progress, however it's not always possible to know the size of a JSON source. In this case, or if we don't need to track the progress, we can return `null`. +Finally, `calculateSize()` computes the whole size of the JSON source. It's used to track the [parsing progress](#progress), however it's not always possible to know the size of a JSON source. In this case, or if we don't need to track the progress, we can return `null`. Now that we have implemented our custom source, we can pass it to the parser: @@ -316,6 +317,32 @@ To see some implementation examples, please refer to the [already existing decod If you find yourself implementing the same custom decoder in different projects, feel free to send a PR and we will consider to support your custom decoder by default. Thank you in advance for any contribution! + +### Progress + +When processing large JSONs, we may need to know the parsing progress. JSON Parser offers convenient methods to access all the progress details: + +```php +$json = new JsonParser($source); + +$json->progress(); // +$json->progress()->current(); // the already parsed bytes e.g. 86759341 +$json->progress()->total(); // the total bytes to parse e.g. 182332642 +$json->progress()->fraction(); // the completed fraction e.g. 0.47583 +$json->progress()->percentage(); // the completed percentage e.g. 47.583 +$json->progress()->format(); // the formatted progress e.g. 47.5% +``` + +The total size of a JSON is calculated differently depending on the [source](#sources). It is not always possible to determine how large a JSON is, in these cases only the current progress is known: + +```php +$json->progress()->current(); // 86759341 +$json->progress()->total(); // null +$json->progress()->fraction(); // null +$json->progress()->percentage(); // null +$json->progress()->format(); // null +``` + ## 📆 Change log Please see [CHANGELOG](CHANGELOG.md) for more information on what has changed recently. From 63786a33c8c23b27e404c6ba191774549d062977 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 5 Feb 2023 16:10:46 +1000 Subject: [PATCH 166/249] Call method directly --- src/State.php | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/State.php b/src/State.php index 5c42c00..d904f5e 100644 --- a/src/State.php +++ b/src/State.php @@ -148,9 +148,8 @@ final class State { $treeChanged = false; $shouldTrackTree = $this->pointer == '' || $this->tree->depth() < $this->pointer->depth(); - $tokenIsValue = $token->isValue(); - if ($shouldTrackTree && $tokenIsValue && !$this->inObject()) { + if ($shouldTrackTree && $token->isValue() && !$this->inObject()) { $this->tree->traverseArray($this->pointer->referenceTokens()); $treeChanged = true; } @@ -166,7 +165,7 @@ final class State $shouldBuffer = $this->tree->depth() >= 0 && $this->pointer->matchesTree($this->tree) - && (($tokenIsValue && !$this->expectsKey) || $this->treeIsDeep()); + && ((!$this->expectsKey && $token->isValue()) || $this->treeIsDeep()); if ($shouldBuffer) { $this->buffer .= $token; From f585d2bb64f716bbdf0be8ab4d3028aa0fdde2e7 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 5 Feb 2023 16:11:36 +1000 Subject: [PATCH 167/249] Decode only strings --- src/Decoders/ConfigurableDecoder.php | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Decoders/ConfigurableDecoder.php b/src/Decoders/ConfigurableDecoder.php index 58de8b9..8a56b72 100644 --- a/src/Decoders/ConfigurableDecoder.php +++ b/src/Decoders/ConfigurableDecoder.php @@ -29,7 +29,11 @@ final class ConfigurableDecoder */ public function decode(string|int $value): mixed { - $decoded = $this->config->decoder->decode((string) $value); + if (is_int($value)) { + return $value; + } + + $decoded = $this->config->decoder->decode($value); if (!$decoded->succeeded) { call_user_func($this->config->onError, $decoded); From 79656e3f55633cc6c87b327745df6ea19856030a Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 5 Feb 2023 16:12:33 +1000 Subject: [PATCH 168/249] Improve explanation --- tests/Pest.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/Pest.php b/tests/Pest.php index 1068c42..a29e25e 100644 --- a/tests/Pest.php +++ b/tests/Pest.php @@ -30,8 +30,8 @@ expect()->extend('toPointTo', function (array $expected) { foreach ($this->value as $parsedKey => $parsedValue) { $itemsCount[$parsedKey] = empty($itemsCount[$parsedKey]) ? 1 : $itemsCount[$parsedKey] + 1; - // the following match is required as we may deal with parsed values that are arrays - // and unpacking a parsed value that is an array may lead to unexpected results + // associate $parsedKey to $parsedValue if $parsedKey occurs once + // associate $parsedKey to an array of $parsedValue if $parsedKey occurs multiple times $actual[$parsedKey] = match ($itemsCount[$parsedKey]) { 1 => $parsedValue, 2 => [$actual[$parsedKey], $parsedValue], From 1d7a20499a572c05a58e877b02bbbaea1b84c938 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 6 Feb 2023 18:16:31 +1000 Subject: [PATCH 169/249] Restore Tokenizer --- src/Exceptions/SyntaxException.php | 23 +++++++++++++++++++++++ src/Lexer.php | 29 +++-------------------------- 2 files changed, 26 insertions(+), 26 deletions(-) create mode 100644 src/Exceptions/SyntaxException.php diff --git a/src/Exceptions/SyntaxException.php b/src/Exceptions/SyntaxException.php new file mode 100644 index 0000000..6dae8e9 --- /dev/null +++ b/src/Exceptions/SyntaxException.php @@ -0,0 +1,23 @@ + - */ - private array $tokensMap; - /** * The current position. * @@ -46,21 +40,6 @@ final class Lexer implements IteratorAggregate public function __construct(private Source $source) { $this->progress = new Progress(); - $this->setTokensMap(); - } - - /** - * Set the tokens map - * - * @return void - */ - private function setTokensMap(): void - { - $instances = []; - - foreach (Tokens::MAP as $type => $class) { - $this->tokensMap[$type] = $instances[$class] ??= new $class(); - } } /** @@ -87,14 +66,12 @@ final class Lexer implements IteratorAggregate } if ($buffer != '') { - $type = Tokens::TYPES[$buffer[0]]; - yield $this->tokensMap[$type]->setValue($buffer); + yield Tokenizer::instance()->toToken($buffer, $this->position); $buffer = ''; } if (isset(Tokens::DELIMITERS[$character])) { - $type = Tokens::TYPES[$character]; - yield $this->tokensMap[$type]->setValue($character); + yield Tokenizer::instance()->toToken($character, $this->position); } } } From f103138ddfd9cef648a8c06f528b2eeee48d6db6 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 6 Feb 2023 18:16:52 +1000 Subject: [PATCH 170/249] Restore Tokenizer --- src/Tokens/Tokenizer.php | 80 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 src/Tokens/Tokenizer.php diff --git a/src/Tokens/Tokenizer.php b/src/Tokens/Tokenizer.php new file mode 100644 index 0000000..af8f421 --- /dev/null +++ b/src/Tokens/Tokenizer.php @@ -0,0 +1,80 @@ + + */ + private array $tokensMap; + + /** + * Retrieve the singleton instance + * + * @return static + */ + public static function instance(): static + { + return static::$instance ??= new static(); + } + + /** + * Instantiate the class. + * + */ + private function __construct() + { + $this->setTokensMap(); + } + + /** + * Set the tokens map + * + * @return void + */ + private function setTokensMap(): void + { + $instances = []; + + foreach (Tokens::MAP as $type => $class) { + $this->tokensMap[$type] = $instances[$class] ??= new $class(); + } + } + + /** + * Turn the given value into a token + * + * @param string $value + * @param int $position + * @return Token + */ + public function toToken(string $value, int $position): Token + { + $character = $value[0]; + + if (!isset(Tokens::TYPES[$character])) { + throw new SyntaxException($value, $position); + } + + $type = Tokens::TYPES[$character]; + + return $this->tokensMap[$type]->setValue($value); + } +} From 23f1b6906fbbd02ba73897153ea85be0480a2775 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 6 Feb 2023 18:18:14 +1000 Subject: [PATCH 171/249] Add fixture helper --- tests/Dataset.php | 12 ++++++------ tests/Pest.php | 13 +++++++++++++ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/tests/Dataset.php b/tests/Dataset.php index 69910e8..98b92c4 100644 --- a/tests/Dataset.php +++ b/tests/Dataset.php @@ -23,7 +23,7 @@ class Dataset yield [ file_get_contents($fixture->getRealPath()), - require __DIR__ . "/fixtures/parsing/{$name}.php", + require fixture("parsing/{$name}.php"), ]; } } @@ -35,7 +35,7 @@ class Dataset */ protected static function fixtures(): Generator { - foreach (new DirectoryIterator(__DIR__ . '/fixtures/json') as $file) { + foreach (new DirectoryIterator(fixture('json')) as $file) { if (!$file->isDot()) { yield $file; } @@ -59,10 +59,10 @@ class Dataset */ public static function forSinglePointers(): Generator { - $singlePointers = require __DIR__ . '/fixtures/pointers/single_pointer.php'; + $singlePointers = require fixture('pointers/single_pointer.php'); foreach ($singlePointers as $fixture => $pointers) { - $json = file_get_contents(__DIR__ . "/fixtures/json/{$fixture}.json"); + $json = file_get_contents(fixture("json/{$fixture}.json")); foreach ($pointers as $pointer => $value) { yield [$json, $pointer, $value]; @@ -77,10 +77,10 @@ class Dataset */ public static function forMultiplePointers(): Generator { - $multiplePointers = require __DIR__ . '/fixtures/pointers/multiple_pointers.php'; + $multiplePointers = require fixture('pointers/multiple_pointers.php'); foreach ($multiplePointers as $fixture => $valueByPointers) { - $json = file_get_contents(__DIR__ . "/fixtures/json/{$fixture}.json"); + $json = file_get_contents(fixture("json/{$fixture}.json")); foreach ($valueByPointers as $pointers => $value) { yield [$json, explode(',', $pointers), $value]; diff --git a/tests/Pest.php b/tests/Pest.php index a29e25e..81730b8 100644 --- a/tests/Pest.php +++ b/tests/Pest.php @@ -1,5 +1,18 @@ Date: Mon, 6 Feb 2023 18:18:50 +1000 Subject: [PATCH 172/249] Add spacing --- tests/Feature/ParsingTest.php | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/Feature/ParsingTest.php b/tests/Feature/ParsingTest.php index 3134795..7e2e07f 100644 --- a/tests/Feature/ParsingTest.php +++ b/tests/Feature/ParsingTest.php @@ -5,6 +5,7 @@ use Cerbero\JsonParser\JsonParser; use function Cerbero\JsonParser\parseJson; + it('parses JSON when instantiated', function (string $json, array $parsed) { expect(new JsonParser($json))->toParseTo($parsed); })->with(Dataset::forParsing()); From 3cda37e3712d0257b253d6c3f00094bb74e1553b Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 25 Feb 2023 20:30:06 +1000 Subject: [PATCH 173/249] Install Mockery --- composer.json | 1 + 1 file changed, 1 insertion(+) diff --git a/composer.json b/composer.json index e4b82e7..b42fb9d 100644 --- a/composer.json +++ b/composer.json @@ -23,6 +23,7 @@ "require-dev": { "guzzlehttp/guzzle": "^7.2", "illuminate/http": ">=6.20", + "mockery/mockery": "^1.5", "pestphp/pest": "^1.22", "phpstan/phpstan": "^1.9", "scrutinizer/ocular": "^1.8", From b4683dce627a1eac2246dd4b390a409457b01f69 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 25 Feb 2023 20:30:19 +1000 Subject: [PATCH 174/249] Update README --- README.md | 90 ++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 72 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index d9090e5..9a9b409 100644 --- a/README.md +++ b/README.md @@ -25,21 +25,20 @@ composer require cerbero/json-parser ## 🔮 Usage -* [Basics](#basics) -* [Sources](#sources) -* [Pointers](#pointers) -* [Decoders](#decoders) -* [Progress](#progress) +* [👣 Basics](#basics) +* [💧 Sources](#sources) +* [🎯 Pointers](#pointers) +* [⚙️ Decoders](#decoders) +* [💢 Errors](#errors) +* [⏳ Progress](#progress) -### Basics +### 👣 Basics JSON Parser provides a minimal API to read large JSON from any source: ```php -use Cerbero\JsonParser\JsonParser; - -// the JSON source in this example is an API endpoint +// a source is anything that can provide a JSON, in this case an endpoint $source = 'https://randomuser.me/api/1.4?seed=json-parser&results=5'; foreach (new JsonParser($source) as $key => $value) { @@ -47,20 +46,20 @@ foreach (new JsonParser($source) as $key => $value) { } ``` -Depending on our taste, we can instantiate the parser in 3 different ways: +Depending on our code style, we can instantiate the parser in 3 different ways: ```php use Cerbero\JsonParser\JsonParser; +use function Cerbero\JsonParser\parseJson; + // classic object instantiation new JsonParser($source); -// static instantiation, facilitates methods chaining +// static instantiation JsonParser::parse($source); // namespaced function -use function Cerbero\JsonParser\parseJson; - parseJson($source); ``` @@ -76,12 +75,12 @@ JsonParser::parse($source)->traverse(function (mixed $value, string|int $key, Js > ⚠️ Please note the parameters order of the callback: the value is passed before the key. -### Sources +### 💧 Sources A wide range of JSON sources is supported, here is the full list: - **strings**, e.g. `{"foo":"bar"}` - **iterables**, i.e. arrays or instances of `Traversable` -- **files**, e.g. `/path/to/large_file.json` +- **file paths**, e.g. `/path/to/large.json` - **resources**, e.g. streams - **API endpoint URLs**, e.g. `https://endpoint.json` or any instance of `Psr\Http\Message\UriInterface` - **PSR-7 requests**, i.e. any instance of `Psr\Http\Message\RequestInterface` @@ -143,7 +142,7 @@ If you find yourself implementing the same custom source in different projects, -### Pointers +### 🎯 Pointers A JSON pointer is a [standard](https://www.rfc-editor.org/rfc/rfc6901) used to point to nodes within a JSON. This package leverages JSON pointers to extract only some sub-trees from large JSONs. @@ -246,7 +245,7 @@ JsonParser::parse($source) > ⚠️ Please note the parameters order of the callbacks: the value is passed before the key. -### Decoders +### ⚙️ Decoders By default JSON Parser uses the built-in PHP function `json_decode()` to decode one key and value at a time. @@ -318,7 +317,62 @@ If you find yourself implementing the same custom decoder in different projects, -### Progress +### 💢 Errors + +Not all JSONs are valid, some may present syntax errors due to an incorrect structure (e.g. `[}`) or decoding errors when values can't be decoded properly (e.g. `[1a]`). JSON Parser allows us to intervene and define the logic to run when these issues occur: + +```php +use Cerbero\JsonParser\Decoders\DecodedValue; +use Cerbero\JsonParser\Exceptions\SyntaxException; + +$json = JsonParser::parse($source) + ->onSyntaxError(fn (SyntaxException $e) => $this->handleSyntaxError($e)) + ->onDecodingError(fn (DecodedValue $decoded) => $this->handleDecodingError($decoded)); +``` + +We can even replace invalid values with placeholders to avoid that the entire JSON parsing fails because of them: + +```php +// instead of failing, replace invalid values with NULL +$json = JsonParser::parse($source)->patchDecodingError(); + +// instead of failing, replace invalid values with '' +$json = JsonParser::parse($source)->patchDecodingError(''); +``` + +For more advanced decoding errors patching, we can pass a closure that has access to the `DecodedValue` instance: + +```php +use Cerbero\JsonParser\Decoders\DecodedValue; + +$patches = ['1a' => 1, '2b' => 2]; +$json = JsonParser::parse($source) + ->patchDecodingError(fn (DecodedValue $decoded) => $patches[$decoded->json] ?? null); +``` + +Any exception thrown by this package implements the `JsonParserException` interface, which makes it easy to handle all exceptions in one catch: + +```php +use Cerbero\JsonParser\Exceptions\JsonParserException; + +try { + JsonParser::parse($source)->traverse(); +} catch (JsonParserException) { + // handle any exception thrown by JSON Parser +} +``` + +For reference, here is a comprehensive table of all the exceptions thrown by this package: +|`Cerbero\JsonParser\Exceptions\`|thrown when| +|---|---| +|`DecodingException`|a value in the JSON can't be decoded| +|`GuzzleRequiredException`|Guzzle is not installed and the JSON source is an endpoint| +|`InvalidPointerException`|a JSON pointer syntax is not valid| +|`SyntaxException`|the JSON structure is not valid| +|`UnsupportedSourceException`|a JSON source is not supported| + + +### ⏳ Progress When processing large JSONs, we may need to know the parsing progress. JSON Parser offers convenient methods to access all the progress details: From 7aecd1ef06111372b90d3fac5bd81d559e4e7faa Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 25 Feb 2023 20:37:11 +1000 Subject: [PATCH 175/249] Fix in-object detection logic --- src/State.php | 35 ++++++++++++----------------------- src/Tree.php | 34 +++++++++++++++++++++++++++++++--- 2 files changed, 43 insertions(+), 26 deletions(-) diff --git a/src/State.php b/src/State.php index d904f5e..1275de6 100644 --- a/src/State.php +++ b/src/State.php @@ -5,8 +5,7 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Pointers\Pointer; use Cerbero\JsonParser\Pointers\Pointers; use Cerbero\JsonParser\Tokens\Token; - -use function is_string; +use Cerbero\JsonParser\Tokens\Tokens; /** * The JSON parsing state. @@ -49,6 +48,13 @@ final class State */ public bool $expectsKey = false; + /** + * The expected token. + * + * @var int + */ + public int $expectedToken = Tokens::COMPOUND_BEGIN; + /** * Instantiate the class. * @@ -90,19 +96,6 @@ final class State return $this->tree->currentKey(); } - /** - * Determine whether the current position is within an object - * - * @return bool - */ - public function inObject(): bool - { - $tree = $this->tree->original(); - $depth = $this->tree->depth(); - - return is_string($tree[$depth] ?? null); - } - /** * Set and match the given pointers * @@ -146,20 +139,16 @@ final class State */ public function mutateByToken(Token $token): void { - $treeChanged = false; + $this->tree->changed = false; $shouldTrackTree = $this->pointer == '' || $this->tree->depth() < $this->pointer->depth(); - if ($shouldTrackTree && $token->isValue() && !$this->inObject()) { - $this->tree->traverseArray($this->pointer->referenceTokens()); - $treeChanged = true; - } - if ($shouldTrackTree && $this->expectsKey) { $this->tree->traverseKey($token); - $treeChanged = true; + } elseif ($shouldTrackTree && $token->isValue() && !$this->tree->inObject()) { + $this->tree->traverseArray($this->pointer->referenceTokens()); } - if ($treeChanged && $this->pointers->count() > 1) { + if ($this->tree->changed && $this->pointers->count() > 1) { $this->pointer = $this->pointers->matchTree($this->tree); } diff --git a/src/Tree.php b/src/Tree.php index 537b829..e558fa5 100644 --- a/src/Tree.php +++ b/src/Tree.php @@ -24,6 +24,13 @@ final class Tree */ private array $wildcarded = []; + /** + * Whether a depth is within an object. + * + * @var array + */ + private array $inObjectByDepth = []; + /** * The JSON tree depth. * @@ -31,6 +38,13 @@ final class Tree */ private int $depth = -1; + /** + * Whether the tree changed. + * + * @var bool + */ + public bool $changed = false; + /** * Retrieve the original JSON tree * @@ -51,6 +65,16 @@ final class Tree return $this->wildcarded; } + /** + * Determine whether the current depth is within an object + * + * @return bool + */ + public function inObject(): bool + { + return $this->inObjectByDepth[$this->depth] ?? false; + } + /** * Retrieve the JSON tree depth * @@ -62,13 +86,15 @@ final class Tree } /** - * Increase the tree depth + * Increase the tree depth by entering an object or an array * + * @param bool $inObject * @return void */ - public function deepen(): void + public function deepen(bool $inObject): void { $this->depth++; + $this->inObjectByDepth[$this->depth] = $inObject; } /** @@ -93,6 +119,7 @@ final class Tree $this->original[$this->depth] = $trimmedKey; $this->wildcarded[$this->depth] = $trimmedKey; + $this->changed = true; } /** @@ -108,8 +135,9 @@ final class Tree $this->original[$this->depth] = is_int($index) ? $index + 1 : 0; $this->wildcarded[$this->depth] = $referenceToken == '-' ? '-' : $this->original[$this->depth]; + $this->changed = true; - if (count($this->original) > $this->depth) { + if (count($this->original) > $this->depth + 1) { array_splice($this->original, $this->depth + 1); array_splice($this->wildcarded, $this->depth + 1); } From 308b15a5d800284e2c813b1ec34e162aba90e977 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 25 Feb 2023 20:37:41 +1000 Subject: [PATCH 176/249] Update visibility --- src/Concerns/DetectsEndpoints.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Concerns/DetectsEndpoints.php b/src/Concerns/DetectsEndpoints.php index bc05388..a21d06e 100644 --- a/src/Concerns/DetectsEndpoints.php +++ b/src/Concerns/DetectsEndpoints.php @@ -17,7 +17,7 @@ trait DetectsEndpoints * @param string $value * @return bool */ - public function isEndpoint(string $value): bool + protected function isEndpoint(string $value): bool { return is_array($url = parse_url($value)) && in_array($url['scheme'] ?? null, ['http', 'https']) From 961b8a8278d7195dea9e3ac52d0209885da43049 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 25 Feb 2023 20:38:35 +1000 Subject: [PATCH 177/249] Extract method to check if Guzzle is installed --- src/Concerns/GuzzleAware.php | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/Concerns/GuzzleAware.php b/src/Concerns/GuzzleAware.php index 7626873..3466189 100644 --- a/src/Concerns/GuzzleAware.php +++ b/src/Concerns/GuzzleAware.php @@ -2,7 +2,7 @@ namespace Cerbero\JsonParser\Concerns; -use Cerbero\JsonParser\Exceptions\SourceException; +use Cerbero\JsonParser\Exceptions\GuzzleRequiredException; use GuzzleHttp\Client; use Psr\Http\Message\ResponseInterface; use Psr\Http\Message\RequestInterface; @@ -18,15 +18,25 @@ trait GuzzleAware * Abort if Guzzle is not loaded * * @return void - * @throws SourceException + * @throws GuzzleRequiredException */ protected function requireGuzzle(): void { - if (!class_exists(Client::class)) { - throw SourceException::requireGuzzle(); + if (!$this->guzzleIsInstalled()) { + throw new GuzzleRequiredException(); } } + /** + * Determine whether Guzzle is installed + * + * @return bool + */ + protected function guzzleIsInstalled(): bool + { + return class_exists(Client::class); + } + /** * Retrieve the JSON response of the given URL * From efba19ec9b6fe597165a6a0661911cf760dc5558 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 25 Feb 2023 20:39:46 +1000 Subject: [PATCH 178/249] Redefine exceptions --- src/Exceptions/DecodingException.php | 23 ++++++++++++ src/Exceptions/GuzzleRequiredException.php | 21 +++++++++++ src/Exceptions/InvalidPointerException.php | 22 ++++++++++++ src/Exceptions/PointerException.php | 25 ------------- src/Exceptions/SourceException.php | 35 ------------------- src/Exceptions/SyntaxException.php | 4 +-- src/Exceptions/UnsupportedSourceException.php | 22 ++++++++++++ src/Pointers/Pointer.php | 4 +-- src/Sources/AnySource.php | 8 ++--- src/Sources/Endpoint.php | 2 +- src/Sources/Psr7Request.php | 2 +- 11 files changed, 98 insertions(+), 70 deletions(-) create mode 100644 src/Exceptions/DecodingException.php create mode 100644 src/Exceptions/GuzzleRequiredException.php create mode 100644 src/Exceptions/InvalidPointerException.php delete mode 100644 src/Exceptions/PointerException.php delete mode 100644 src/Exceptions/SourceException.php create mode 100644 src/Exceptions/UnsupportedSourceException.php diff --git a/src/Exceptions/DecodingException.php b/src/Exceptions/DecodingException.php new file mode 100644 index 0000000..9a4e63c --- /dev/null +++ b/src/Exceptions/DecodingException.php @@ -0,0 +1,23 @@ +error, $decoded->code); + } +} diff --git a/src/Exceptions/GuzzleRequiredException.php b/src/Exceptions/GuzzleRequiredException.php new file mode 100644 index 0000000..4126ada --- /dev/null +++ b/src/Exceptions/GuzzleRequiredException.php @@ -0,0 +1,21 @@ +pointer) === 0) { - throw PointerException::invalid($this->pointer); + throw new InvalidPointerException($this->pointer); } $tokens = explode('/', $this->pointer); diff --git a/src/Sources/AnySource.php b/src/Sources/AnySource.php index 5f195c4..d44865e 100644 --- a/src/Sources/AnySource.php +++ b/src/Sources/AnySource.php @@ -2,7 +2,7 @@ namespace Cerbero\JsonParser\Sources; -use Cerbero\JsonParser\Exceptions\SourceException; +use Cerbero\JsonParser\Exceptions\UnsupportedSourceException; use Generator; use Traversable; @@ -41,7 +41,7 @@ class AnySource extends Source * Retrieve the JSON fragments * * @return Traversable - * @throws SourceException + * @throws UnsupportedSourceException */ public function getIterator(): Traversable { @@ -52,7 +52,7 @@ class AnySource extends Source * Retrieve the matching source * * @return Source - * @throws SourceException + * @throws UnsupportedSourceException */ protected function matchingSource(): Source { @@ -66,7 +66,7 @@ class AnySource extends Source } } - throw SourceException::unsupported(); + throw new UnsupportedSourceException($this->source); } /** diff --git a/src/Sources/Endpoint.php b/src/Sources/Endpoint.php index 868a165..180d6da 100644 --- a/src/Sources/Endpoint.php +++ b/src/Sources/Endpoint.php @@ -31,7 +31,7 @@ class Endpoint extends Source * Retrieve the JSON fragments * * @return Traversable - * @throws \Cerbero\JsonParser\Exceptions\SourceException + * @throws \Cerbero\JsonParser\Exceptions\GuzzleRequiredException */ public function getIterator(): Traversable { diff --git a/src/Sources/Psr7Request.php b/src/Sources/Psr7Request.php index 0340b96..9559e76 100644 --- a/src/Sources/Psr7Request.php +++ b/src/Sources/Psr7Request.php @@ -27,7 +27,7 @@ class Psr7Request extends Source * Retrieve the JSON fragments * * @return Traversable - * @throws \Cerbero\JsonParser\Exceptions\SourceException + * @throws \Cerbero\JsonParser\Exceptions\GuzzleRequiredException */ public function getIterator(): Traversable { From d57625ae9b490d309cde9df3b6fe337358644804 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 25 Feb 2023 20:42:41 +1000 Subject: [PATCH 179/249] Keep track of expected tokens --- src/Tokens/Colon.php | 23 +++++++++++++++++++++++ src/Tokens/Comma.php | 13 ++----------- src/Tokens/CompoundBegin.php | 16 +++------------- src/Tokens/CompoundEnd.php | 12 ++---------- src/Tokens/Constant.php | 15 +++++++++------ src/Tokens/ScalarString.php | 14 ++++---------- src/Tokens/Token.php | 31 ++++++++++++++++--------------- src/Tokens/Tokens.php | 20 ++++++++++---------- 8 files changed, 69 insertions(+), 75 deletions(-) create mode 100644 src/Tokens/Colon.php diff --git a/src/Tokens/Colon.php b/src/Tokens/Colon.php new file mode 100644 index 0000000..a99f3c5 --- /dev/null +++ b/src/Tokens/Colon.php @@ -0,0 +1,23 @@ +expectedToken = Tokens::VALUE_ANY; + } +} diff --git a/src/Tokens/Comma.php b/src/Tokens/Comma.php index 337b0d7..6481f8d 100644 --- a/src/Tokens/Comma.php +++ b/src/Tokens/Comma.php @@ -10,16 +10,6 @@ use Cerbero\JsonParser\State; */ final class Comma extends Token { - /** - * Retrieve the token type - * - * @return int - */ - public function type(): int - { - return Tokens::COMMA; - } - /** * Mutate the given state * @@ -28,6 +18,7 @@ final class Comma extends Token */ public function mutateState(State $state): void { - $state->expectsKey = $state->inObject(); + $state->expectsKey = $state->tree()->inObject(); + $state->expectedToken = $state->expectsKey ? Tokens::SCALAR_STRING : Tokens::VALUE_ANY; } } diff --git a/src/Tokens/CompoundBegin.php b/src/Tokens/CompoundBegin.php index f09bf91..53f8e1e 100644 --- a/src/Tokens/CompoundBegin.php +++ b/src/Tokens/CompoundBegin.php @@ -10,16 +10,6 @@ use Cerbero\JsonParser\State; */ final class CompoundBegin extends Token { - /** - * Retrieve the token type - * - * @return int - */ - public function type(): int - { - return Tokens::COMPOUND_BEGIN; - } - /** * Mutate the given state * @@ -28,8 +18,8 @@ final class CompoundBegin extends Token */ public function mutateState(State $state): void { - $state->tree()->deepen(); - - $state->expectsKey = $this->value == '{'; + $state->expectsKey = $beginsObject = $this->value == '{'; + $state->expectedToken = $beginsObject ? Tokens::AFTER_OBJECT_BEGIN : Tokens::AFTER_ARRAY_BEGIN; + $state->tree()->deepen($beginsObject); } } diff --git a/src/Tokens/CompoundEnd.php b/src/Tokens/CompoundEnd.php index eeb5bab..e7b9fd6 100644 --- a/src/Tokens/CompoundEnd.php +++ b/src/Tokens/CompoundEnd.php @@ -10,16 +10,6 @@ use Cerbero\JsonParser\State; */ final class CompoundEnd extends Token { - /** - * Retrieve the token type - * - * @return int - */ - public function type(): int - { - return Tokens::COMPOUND_END; - } - /** * Mutate the given state * @@ -29,6 +19,8 @@ final class CompoundEnd extends Token public function mutateState(State $state): void { $state->tree()->emerge(); + + $state->expectedToken = $state->tree()->inObject() ? Tokens::AFTER_OBJECT_VALUE : Tokens::AFTER_ARRAY_VALUE; } /** diff --git a/src/Tokens/Constant.php b/src/Tokens/Constant.php index 9bf190f..9070c3e 100644 --- a/src/Tokens/Constant.php +++ b/src/Tokens/Constant.php @@ -2,20 +2,23 @@ namespace Cerbero\JsonParser\Tokens; +use Cerbero\JsonParser\State; + /** - * The constant token, includes colons for convenience. + * The constant token. * */ final class Constant extends Token { /** - * Retrieve the token type + * Mutate the given state * - * @return int + * @param State $state + * @return void */ - public function type(): int + public function mutateState(State $state): void { - return $this->value == ':' ? Tokens::COLON : Tokens::SCALAR_CONST; + $state->expectedToken = $state->tree()->inObject() ? Tokens::AFTER_OBJECT_VALUE : Tokens::AFTER_ARRAY_VALUE; } /** @@ -25,6 +28,6 @@ final class Constant extends Token */ public function endsChunk(): bool { - return $this->value != ':'; + return true; } } diff --git a/src/Tokens/ScalarString.php b/src/Tokens/ScalarString.php index 52313df..880ede0 100644 --- a/src/Tokens/ScalarString.php +++ b/src/Tokens/ScalarString.php @@ -17,16 +17,6 @@ final class ScalarString extends Token */ private bool $isKey = false; - /** - * Retrieve the token type - * - * @return int - */ - public function type(): int - { - return Tokens::SCALAR_STRING; - } - /** * Mutate the given state * @@ -37,7 +27,11 @@ final class ScalarString extends Token { if ($this->isKey = $state->expectsKey) { $state->expectsKey = false; + $state->expectedToken = Tokens::COLON; + return; } + + $state->expectedToken = $state->tree()->inObject() ? Tokens::AFTER_OBJECT_VALUE : Tokens::AFTER_ARRAY_VALUE; } /** diff --git a/src/Tokens/Token.php b/src/Tokens/Token.php index 36e7f85..6093f6b 100644 --- a/src/Tokens/Token.php +++ b/src/Tokens/Token.php @@ -19,11 +19,23 @@ abstract class Token implements Stringable protected string $value; /** - * Retrieve the token type + * Mutate the given state * - * @return int + * @param State $state + * @return void */ - abstract public function type(): int; + abstract public function mutateState(State $state): void; + + /** + * Determine whether this token matches the given type + * + * @param int $type + * @return bool + */ + public function matches(int $type): bool + { + return (Tokens::TYPES[$this->value[0]] & $type) != 0; + } /** * Set the token value @@ -45,18 +57,7 @@ abstract class Token implements Stringable */ public function isValue(): bool { - return ($this->type() | Tokens::VALUE_ANY) == Tokens::VALUE_ANY; - } - - /** - * Mutate the given state - * - * @param State $state - * @return void - */ - public function mutateState(State $state): void - { - return; + return (Tokens::TYPES[$this->value[0]] | Tokens::VALUE_ANY) == Tokens::VALUE_ANY; } /** diff --git a/src/Tokens/Tokens.php b/src/Tokens/Tokens.php index fce1343..2141555 100644 --- a/src/Tokens/Tokens.php +++ b/src/Tokens/Tokens.php @@ -67,19 +67,19 @@ final class Tokens * @var array */ public const BOUNDARIES = [ - "\xEF" => true, - "\xBB" => true, - "\xBF" => true, - "\n" => true, - "\r" => true, - "\t" => true, - ' ' => true, '{' => true, '}' => true, '[' => true, ']' => true, - ':' => true, ',' => true, + ':' => true, + ' ' => true, + "\n" => true, + "\r" => true, + "\t" => true, + "\xEF" => true, + "\xBB" => true, + "\xBF" => true, ]; /** @@ -92,8 +92,8 @@ final class Tokens '}' => true, '[' => true, ']' => true, - ':' => true, ',' => true, + ':' => true, ]; /** @@ -107,7 +107,7 @@ final class Tokens self::ARRAY_BEGIN => CompoundBegin::class, self::OBJECT_END => CompoundEnd::class, self::ARRAY_END => CompoundEnd::class, - self::COLON => Constant::class, + self::COLON => Colon::class, self::SCALAR_CONST => Constant::class, self::SCALAR_STRING => ScalarString::class, ]; From d844e6387ad83daf219b67b2c7ad198d7664b793 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 25 Feb 2023 20:44:56 +1000 Subject: [PATCH 180/249] Do not check for syntax errors --- src/Tokens/Tokenizer.php | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/Tokens/Tokenizer.php b/src/Tokens/Tokenizer.php index af8f421..9826c17 100644 --- a/src/Tokens/Tokenizer.php +++ b/src/Tokens/Tokenizer.php @@ -2,9 +2,6 @@ namespace Cerbero\JsonParser\Tokens; -use Cerbero\JsonParser\Exceptions\ParserException; -use Cerbero\JsonParser\Exceptions\SyntaxException; - /** * The tokenizer. * @@ -62,18 +59,11 @@ final class Tokenizer * Turn the given value into a token * * @param string $value - * @param int $position * @return Token */ - public function toToken(string $value, int $position): Token + public function toToken(string $value): Token { - $character = $value[0]; - - if (!isset(Tokens::TYPES[$character])) { - throw new SyntaxException($value, $position); - } - - $type = Tokens::TYPES[$character]; + $type = Tokens::TYPES[$value[0]]; return $this->tokensMap[$type]->setValue($value); } From 064c66c33615477ea1af9cb741fad7e65ee13ec0 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 25 Feb 2023 20:45:48 +1000 Subject: [PATCH 181/249] Implement exceptions handling --- src/Config.php | 16 ++++++++++--- src/Decoders/ConfigurableDecoder.php | 2 +- src/JsonParser.php | 36 ++++++++++++++++++++++------ src/Lexer.php | 24 +++++++++++++++---- src/Parser.php | 5 ++++ 5 files changed, 68 insertions(+), 15 deletions(-) diff --git a/src/Config.php b/src/Config.php index 36fb437..97de8d4 100644 --- a/src/Config.php +++ b/src/Config.php @@ -5,6 +5,8 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Decoders\JsonDecoder; use Cerbero\JsonParser\Decoders\DecodedValue; use Cerbero\JsonParser\Decoders\Decoder; +use Cerbero\JsonParser\Exceptions\DecodingException; +use Cerbero\JsonParser\Exceptions\SyntaxException; use Cerbero\JsonParser\Pointers\Pointer; use Closure; @@ -36,11 +38,18 @@ final class Config public int $bytes = 1024 * 8; /** - * The callback to run during a parsing error. + * The callback to run during a decoding error. * * @var Closure */ - public Closure $onError; + public Closure $onDecodingError; + + /** + * The callback to run during a syntax error. + * + * @var Closure + */ + public Closure $onSyntaxError; /** * Instantiate the class @@ -49,6 +58,7 @@ final class Config public function __construct() { $this->decoder = new JsonDecoder(); - $this->onError = fn (DecodedValue $decoded) => throw $decoded->exception; + $this->onDecodingError = fn (DecodedValue $decoded) => throw new DecodingException($decoded); + $this->onSyntaxError = fn (SyntaxException $e) => throw $e; } } diff --git a/src/Decoders/ConfigurableDecoder.php b/src/Decoders/ConfigurableDecoder.php index 8a56b72..786f690 100644 --- a/src/Decoders/ConfigurableDecoder.php +++ b/src/Decoders/ConfigurableDecoder.php @@ -36,7 +36,7 @@ final class ConfigurableDecoder $decoded = $this->config->decoder->decode($value); if (!$decoded->succeeded) { - call_user_func($this->config->onError, $decoded); + call_user_func($this->config->onDecodingError, $decoded); } return $decoded->value; diff --git a/src/JsonParser.php b/src/JsonParser.php index 96a5c1a..1e9cf90 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -2,8 +2,10 @@ namespace Cerbero\JsonParser; +use Cerbero\JsonParser\Decoders\DecodedValue; use Cerbero\JsonParser\Decoders\Decoder; use Cerbero\JsonParser\Decoders\SimdjsonDecoder; +use Cerbero\JsonParser\Exceptions\SyntaxException; use Cerbero\JsonParser\Pointers\Pointer; use Cerbero\JsonParser\Sources\AnySource; use Closure; @@ -60,7 +62,11 @@ final class JsonParser implements IteratorAggregate */ public function getIterator(): Traversable { - return $this->parser; + try { + yield from $this->parser; + } catch (SyntaxException $e) { + call_user_func($this->config->onSyntaxError, $e); + } } /** @@ -155,24 +161,40 @@ final class JsonParser implements IteratorAggregate } /** - * Silence errors while parsing + * Set the patch to apply during a decoding error * + * @param mixed $patch * @return static */ - public function ignoreErrors(): static + public function patchDecodingError(mixed $patch = null): static { - return $this->onError(fn () => true); + return $this->onDecodingError(function (DecodedValue $decoded) use ($patch) { + $decoded->value = is_callable($patch) ? $patch($decoded) : $patch; + }); } /** - * Set the logic to run during parsing errors + * Set the logic to run during a decoding error * * @param Closure $callback * @return static */ - public function onError(Closure $callback): static + public function onDecodingError(Closure $callback): static { - $this->config->onError = $callback; + $this->config->onDecodingError = $callback; + + return $this; + } + + /** + * Set the logic to run during a syntax error + * + * @param Closure $callback + * @return static + */ + public function onSyntaxError(Closure $callback): static + { + $this->config->onSyntaxError = $callback; return $this; } diff --git a/src/Lexer.php b/src/Lexer.php index a29b5f3..3cf2243 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -2,6 +2,7 @@ namespace Cerbero\JsonParser; +use Cerbero\JsonParser\Exceptions\SyntaxException; use Cerbero\JsonParser\Sources\Source; use Cerbero\JsonParser\Tokens\Token; use Cerbero\JsonParser\Tokens\Tokenizer; @@ -30,7 +31,7 @@ final class Lexer implements IteratorAggregate * * @var int */ - private int $position = 0; + private int $position = 1; /** * Instantiate the class. @@ -59,24 +60,39 @@ final class Lexer implements IteratorAggregate || ($character != '"' && $inString) || ($character == '"' && !$inString); $isEscaping = $character == '\\' && !$isEscaping; + $shouldBuffer = $inString || !isset(Tokens::BOUNDARIES[$character]); - if ($inString || !isset(Tokens::BOUNDARIES[$character])) { + if ($shouldBuffer && $buffer == '' && !isset(Tokens::TYPES[$character])) { + throw new SyntaxException($character, $this->position); + } + + if ($shouldBuffer) { $buffer .= $character; continue; } if ($buffer != '') { - yield Tokenizer::instance()->toToken($buffer, $this->position); + yield Tokenizer::instance()->toToken($buffer); $buffer = ''; } if (isset(Tokens::DELIMITERS[$character])) { - yield Tokenizer::instance()->toToken($character, $this->position); + yield Tokenizer::instance()->toToken($character); } } } } + /** + * Retrieve the current position + * + * @return int + */ + public function position(): int + { + return $this->position; + } + /** * Retrieve the parsing progress * diff --git a/src/Parser.php b/src/Parser.php index 37e76ad..caed642 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -3,6 +3,7 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Decoders\ConfigurableDecoder; +use Cerbero\JsonParser\Exceptions\SyntaxException; use Cerbero\JsonParser\Sources\Source; use IteratorAggregate; use Traversable; @@ -61,6 +62,10 @@ final class Parser implements IteratorAggregate $this->state->setPointers(...$this->config->pointers); foreach ($this->lexer as $token) { + if (!$token->matches($this->state->expectedToken)) { + throw new SyntaxException($token, $this->lexer->position()); + } + $this->state->mutateByToken($token); if (!$token->endsChunk() || $this->state->treeIsDeep()) { From 49a28db2bf11a5a0b1f55b6af91c6008315987b7 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 25 Feb 2023 20:46:41 +1000 Subject: [PATCH 182/249] Test exceptions handling --- tests/Dataset.php | 56 +++++++++++++++++++++++++++- tests/Feature/ErrorsHandlingTest.php | 44 ++++++++++++++++++++++ tests/Feature/PointersTest.php | 4 +- tests/Feature/SourcesTest.php | 18 +++++++++ tests/fixtures/errors/decoding.json | 1 + tests/fixtures/errors/syntax.php | 54 +++++++++++++++++++++++++++ 6 files changed, 174 insertions(+), 3 deletions(-) create mode 100644 tests/Feature/ErrorsHandlingTest.php create mode 100644 tests/Feature/SourcesTest.php create mode 100644 tests/fixtures/errors/decoding.json create mode 100644 tests/fixtures/errors/syntax.php diff --git a/tests/Dataset.php b/tests/Dataset.php index 98b92c4..0f16e84 100644 --- a/tests/Dataset.php +++ b/tests/Dataset.php @@ -2,11 +2,15 @@ namespace Cerbero\JsonParser; +use Cerbero\JsonParser\Decoders\DecodedValue; +use Cerbero\JsonParser\Sources\Endpoint; +use Cerbero\JsonParser\Sources\Psr7Request; use DirectoryIterator; use Generator; +use Mockery; /** - * The datasets entry-point. + * The dataset provider. * */ class Dataset @@ -87,4 +91,54 @@ class Dataset } } } + + /** + * Retrieve the dataset to test syntax errors + * + * @return Generator + */ + public static function forSyntaxErrors(): Generator + { + yield from require fixture('errors/syntax.php'); + } + + /** + * Retrieve the dataset to test decoding errors patching + * + * @return Generator + */ + public static function forDecodingErrorsPatching(): Generator + { + $patches = [null, 'baz', 123]; + $json = '[1a, ""b, "foo", 3.1c4, falsed, null, [1, 2e], {"bar": 1, "baz"f: 2}]'; + $patchJson = fn (mixed $patch) => [$patch, $patch, 'foo', $patch, $patch, null, $patch, $patch]; + + foreach ($patches as $patch) { + yield [$json, $patch, $patchJson($patch)]; + } + + $patch = fn (DecodedValue $decoded) => strrev($decoded->json); + $patched = ['a1', 'b""', 'foo', '4c1.3', 'deslaf', null, ']e2,1[', '}2:f"zab",1:"rab"{']; + + yield [$json, fn () => $patch, $patched]; + } + + /** + * Retrieve the dataset to test sources requiring Guzzle + * + * @return Generator + */ + public static function forSourcesRequiringGuzzle(): Generator + { + $sources = [Endpoint::class, Psr7Request::class]; + + foreach ($sources as $source) { + yield Mockery::mock($source) + ->makePartial() + ->shouldAllowMockingProtectedMethods() + ->shouldReceive('guzzleIsInstalled') + ->andReturn(false) + ->getMock(); + } + } } diff --git a/tests/Feature/ErrorsHandlingTest.php b/tests/Feature/ErrorsHandlingTest.php new file mode 100644 index 0000000..99f9446 --- /dev/null +++ b/tests/Feature/ErrorsHandlingTest.php @@ -0,0 +1,44 @@ + JsonParser::parse($json)->traverse()) + ->toThrow(SyntaxException::class, "Syntax error: unexpected '$unexpected' at position {$position}"); +})->with(Dataset::forSyntaxErrors()); + +it('lets the user handle syntax errors', function () { + JsonParser::parse('{a}') + ->onSyntaxError(function (SyntaxException $e) { + expect($e) + ->getMessage()->toBe("Syntax error: unexpected 'a' at position 2") + ->value->toBe('a') + ->position->toBe(2); + }) + ->traverse(); +}); + +it('throws a decoding exception if unable to decode a JSON fragment', function () { + JsonParser::parse(fixture('errors/decoding.json'))->traverse(); +})->throws(DecodingException::class, 'Decoding error: Syntax error'); + +it('lets the user handle decoding errors', function () { + $decodingErrors = []; + + JsonParser::parse(fixture('errors/decoding.json')) + ->onDecodingError(function (DecodedValue $decoded) use (&$decodingErrors) { + $decodingErrors[] = $decoded->json; + }) + ->traverse(); + + expect($decodingErrors)->toBe(['1a', '""b', '3.c14', '[f]']); +}); + +it('lets the user patch decoding errors', function (string $json, mixed $patch, array $patched) { + expect(JsonParser::parse($json)->patchDecodingError($patch))->toParseTo($patched); +})->with(Dataset::forDecodingErrorsPatching()); diff --git a/tests/Feature/PointersTest.php b/tests/Feature/PointersTest.php index d5895ee..b41b5c9 100644 --- a/tests/Feature/PointersTest.php +++ b/tests/Feature/PointersTest.php @@ -1,13 +1,13 @@ JsonParser::parse('{}')->pointer($pointer)->traverse()) - ->toThrow(PointerException::class, "The string [$pointer] is not a valid JSON pointer"); + ->toThrow(InvalidPointerException::class, "The string [$pointer] is not a valid JSON pointer"); })->with(Dataset::forInvalidPointers()); it('supports single JSON pointers', function (string $json, string $pointer, array $parsed) { diff --git a/tests/Feature/SourcesTest.php b/tests/Feature/SourcesTest.php new file mode 100644 index 0000000..ca28f9b --- /dev/null +++ b/tests/Feature/SourcesTest.php @@ -0,0 +1,18 @@ + JsonParser::parse(123)->traverse()) + ->toThrow(UnsupportedSourceException::class, 'Unable to load JSON from the provided source'); +}); + +it('throws an exception when Guzzle is required but not installed', function (Source $source) { + expect(fn () => JsonParser::parse($source)->traverse()) + ->toThrow(GuzzleRequiredException::class, 'Guzzle is required to load JSON from endpoints'); +})->with(Dataset::forSourcesRequiringGuzzle()); diff --git a/tests/fixtures/errors/decoding.json b/tests/fixtures/errors/decoding.json new file mode 100644 index 0000000..25bfd61 --- /dev/null +++ b/tests/fixtures/errors/decoding.json @@ -0,0 +1 @@ +[1a, ""b, 3.c14, [f]] diff --git a/tests/fixtures/errors/syntax.php b/tests/fixtures/errors/syntax.php new file mode 100644 index 0000000..1eadc68 --- /dev/null +++ b/tests/fixtures/errors/syntax.php @@ -0,0 +1,54 @@ + 'a[1, "", 3.14, [], {}]', + 'unexpected' => 'a', + 'position' => 1, + ], + [ + 'json' => '[b1, "", 3.14, [], {}]', + 'unexpected' => 'b', + 'position' => 2, + ], + [ + 'json' => '[1,c "", 3.14, [], {}]', + 'unexpected' => 'c', + 'position' => 4, + ], + [ + 'json' => '[1, d"", 3.14, [], {}]', + 'unexpected' => 'd', + 'position' => 5, + ], + [ + 'json' => '[1, "", e3.14, [], {}]', + 'unexpected' => 'e', + 'position' => 9, + ], + [ + 'json' => '[1, "", 3.14, []f, {}]', + 'unexpected' => 'f', + 'position' => 18, + ], + [ + 'json' => '[1, "", 3.14, [], g{}]', + 'unexpected' => 'g', + 'position' => 19, + ], + [ + 'json' => '[1, "", 3.14, [], {h}]', + 'unexpected' => 'h', + 'position' => 20, + ], + [ + 'json' => '[1, "", 3.14, [], {}i]', + 'unexpected' => 'i', + 'position' => 21, + ], + [ + 'json' => '[1, "", 3.14, [], {}]j', + 'unexpected' => 'j', + 'position' => 22, + ], +]; From c193e23984a1941a15f0db4e9503cbb2e0649c8d Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 25 Feb 2023 20:57:57 +1000 Subject: [PATCH 183/249] Ensure integer type --- src/Exceptions/DecodingException.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Exceptions/DecodingException.php b/src/Exceptions/DecodingException.php index 9a4e63c..cb17067 100644 --- a/src/Exceptions/DecodingException.php +++ b/src/Exceptions/DecodingException.php @@ -18,6 +18,6 @@ final class DecodingException extends Exception implements JsonParserException */ public function __construct(public DecodedValue $decoded) { - parent::__construct('Decoding error: ' . $decoded->error, $decoded->code); + parent::__construct('Decoding error: ' . $decoded->error, (int) $decoded->code); } } From 43e0cc98d11c1a8239bf6ba03307dcdc0b442f1a Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 25 Feb 2023 20:58:04 +1000 Subject: [PATCH 184/249] Update readme --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 9a9b409..c9cb0a7 100644 --- a/README.md +++ b/README.md @@ -25,12 +25,12 @@ composer require cerbero/json-parser ## 🔮 Usage -* [👣 Basics](#basics) -* [💧 Sources](#sources) -* [🎯 Pointers](#pointers) -* [⚙️ Decoders](#decoders) -* [💢 Errors](#errors) -* [⏳ Progress](#progress) +* [👣 Basics](#-basics) +* [💧 Sources](#-sources) +* [🎯 Pointers](#-pointers) +* [⚙️ Decoders](#-decoders) +* [💢 Errors](#-errors) +* [⏳ Progress](#-progress) ### 👣 Basics From 73916689461d9daa56e7341237704a636a15329d Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 26 Feb 2023 07:37:59 +1000 Subject: [PATCH 185/249] Update readme --- README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.md b/README.md index c9cb0a7..22a725a 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,7 @@ composer require cerbero/json-parser * [⚙️ Decoders](#-decoders) * [💢 Errors](#-errors) * [⏳ Progress](#-progress) +* [🛠 Settings](#-settings) ### 👣 Basics @@ -397,6 +398,15 @@ $json->progress()->percentage(); // null $json->progress()->format(); // null ``` + +### 🛠 Settings + +JSON Parser also provides other settings to fine-tune the parsing process. For example we can set the number of bytes to read when parsing JSON strings or streams: + +```php +$json = JsonParser::parse($source)->bytes(1024 * 16); // read JSON chunks of 16KB +``` + ## 📆 Change log Please see [CHANGELOG](CHANGELOG.md) for more information on what has changed recently. From b018c8457e47c1376866228fc502c6ddd01c87ad Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 26 Feb 2023 08:07:53 +1000 Subject: [PATCH 186/249] Improve comment --- src/Sources/Source.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Sources/Source.php b/src/Sources/Source.php index f732b29..9dd8935 100644 --- a/src/Sources/Source.php +++ b/src/Sources/Source.php @@ -28,7 +28,7 @@ abstract class Source implements IteratorAggregate protected ?int $size; /** - * Whether the size was already calculated. + * Whether the JSON size has already been calculated. * Avoid re-calculations when the size is NULL (not computable). * * @var bool From 8580a5daf05b125428a0fcf65933d3f55ec0ded5 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 26 Feb 2023 08:08:55 +1000 Subject: [PATCH 187/249] Let JSON size be calculated before parsing if needed --- src/Sources/Endpoint.php | 18 ++++++++++++++---- src/Sources/Psr7Request.php | 17 +++++++++++++---- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/Sources/Endpoint.php b/src/Sources/Endpoint.php index 180d6da..e1b1476 100644 --- a/src/Sources/Endpoint.php +++ b/src/Sources/Endpoint.php @@ -34,12 +34,21 @@ class Endpoint extends Source * @throws \Cerbero\JsonParser\Exceptions\GuzzleRequiredException */ public function getIterator(): Traversable + { + return new Psr7Message($this->response(), $this->config); + } + + /** + * Retrieve the endpoint response + * + * @return ResponseInterface + * @throws \Cerbero\JsonParser\Exceptions\GuzzleRequiredException + */ + protected function response(): ResponseInterface { $this->requireGuzzle(); - $this->response = $this->getJson($this->source); - - return new Psr7Message($this->response, $this->config); + return $this->response ??= $this->getJson($this->source); } /** @@ -57,9 +66,10 @@ class Endpoint extends Source * Retrieve the calculated size of the JSON source * * @return int|null + * @throws \Cerbero\JsonParser\Exceptions\GuzzleRequiredException */ protected function calculateSize(): ?int { - return $this->response?->getBody()->getSize(); + return $this->response()->getBody()->getSize(); } } diff --git a/src/Sources/Psr7Request.php b/src/Sources/Psr7Request.php index 9559e76..3b1e320 100644 --- a/src/Sources/Psr7Request.php +++ b/src/Sources/Psr7Request.php @@ -30,12 +30,21 @@ class Psr7Request extends Source * @throws \Cerbero\JsonParser\Exceptions\GuzzleRequiredException */ public function getIterator(): Traversable + { + return new Psr7Message($this->response(), $this->config); + } + + /** + * Retrieve the response of the PSR-7 request + * + * @return ResponseInterface + * @throws \Cerbero\JsonParser\Exceptions\GuzzleRequiredException + */ + protected function response(): ResponseInterface { $this->requireGuzzle(); - $this->response = $this->sendRequest($this->source); - - return new Psr7Message($this->response, $this->config); + return $this->response ??= $this->sendRequest($this->source); } /** @@ -55,6 +64,6 @@ class Psr7Request extends Source */ protected function calculateSize(): ?int { - return $this->response?->getBody()->getSize(); + return $this->response()->getBody()->getSize(); } } From 25b40242281cd2a5afde71216066daddc9354b65 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 26 Feb 2023 20:02:07 +1000 Subject: [PATCH 188/249] Make depth customizable --- src/Decoders/JsonDecoder.php | 5 +++-- src/Decoders/SimdjsonDecoder.php | 7 ++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Decoders/JsonDecoder.php b/src/Decoders/JsonDecoder.php index dcf3d27..350027f 100644 --- a/src/Decoders/JsonDecoder.php +++ b/src/Decoders/JsonDecoder.php @@ -12,8 +12,9 @@ final class JsonDecoder extends AbstractDecoder * Instantiate the class. * * @param bool $decodesToArray + * @param int $depth */ - public function __construct(private bool $decodesToArray = true) + public function __construct(private bool $decodesToArray = true, private int $depth = 512) { } @@ -26,6 +27,6 @@ final class JsonDecoder extends AbstractDecoder */ protected function decodeJson(string $json): mixed { - return json_decode($json, $this->decodesToArray, flags: JSON_THROW_ON_ERROR); + return json_decode($json, $this->decodesToArray, $this->depth, JSON_THROW_ON_ERROR); } } diff --git a/src/Decoders/SimdjsonDecoder.php b/src/Decoders/SimdjsonDecoder.php index 641f2a7..ecccafe 100644 --- a/src/Decoders/SimdjsonDecoder.php +++ b/src/Decoders/SimdjsonDecoder.php @@ -3,7 +3,7 @@ namespace Cerbero\JsonParser\Decoders; /** - * The decoder using the simdjson library. + * The decoder using the simdjson extension. * */ final class SimdjsonDecoder extends AbstractDecoder @@ -12,8 +12,9 @@ final class SimdjsonDecoder extends AbstractDecoder * Instantiate the class. * * @param bool $decodesToArray + * @param int $depth */ - public function __construct(private bool $decodesToArray = true) + public function __construct(private bool $decodesToArray = true, private int $depth = 512) { } @@ -26,6 +27,6 @@ final class SimdjsonDecoder extends AbstractDecoder */ protected function decodeJson(string $json): mixed { - return simdjson_decode($json, $this->decodesToArray); + return simdjson_decode($json, $this->decodesToArray, $this->depth); } } From 2daf876d9901c480c4c019d47678e2897cec5397 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 26 Feb 2023 20:02:44 +1000 Subject: [PATCH 189/249] Remove simdjson helper --- src/JsonParser.php | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/JsonParser.php b/src/JsonParser.php index 1e9cf90..1eb6e48 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -4,7 +4,6 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Decoders\DecodedValue; use Cerbero\JsonParser\Decoders\Decoder; -use Cerbero\JsonParser\Decoders\SimdjsonDecoder; use Cerbero\JsonParser\Exceptions\SyntaxException; use Cerbero\JsonParser\Pointers\Pointer; use Cerbero\JsonParser\Sources\AnySource; @@ -113,17 +112,6 @@ final class JsonParser implements IteratorAggregate } } - /** - * Set the simdjson decoder - * - * @param bool $decodesToArray - * @return static - */ - public function simdjson(bool $decodesToArray = true): static - { - return $this->decoder(new SimdjsonDecoder($decodesToArray)); - } - /** * Set the JSON decoder * From 8d773bda24f2e5bee309887790b4af9ff83a58d9 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 26 Feb 2023 20:03:11 +1000 Subject: [PATCH 190/249] Use simdjson decoder if loaded --- src/Config.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Config.php b/src/Config.php index 97de8d4..4025dec 100644 --- a/src/Config.php +++ b/src/Config.php @@ -5,6 +5,7 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Decoders\JsonDecoder; use Cerbero\JsonParser\Decoders\DecodedValue; use Cerbero\JsonParser\Decoders\Decoder; +use Cerbero\JsonParser\Decoders\SimdjsonDecoder; use Cerbero\JsonParser\Exceptions\DecodingException; use Cerbero\JsonParser\Exceptions\SyntaxException; use Cerbero\JsonParser\Pointers\Pointer; @@ -57,7 +58,7 @@ final class Config */ public function __construct() { - $this->decoder = new JsonDecoder(); + $this->decoder = extension_loaded('simdjson') ? new SimdjsonDecoder() : new JsonDecoder(); $this->onDecodingError = fn (DecodedValue $decoded) => throw new DecodingException($decoded); $this->onSyntaxError = fn (SyntaxException $e) => throw $e; } From ecb14ce4fff8568fbb2d8e5fae08c96eb1ceb8bb Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 26 Feb 2023 20:03:29 +1000 Subject: [PATCH 191/249] Update thrown exception --- tests/Feature/ErrorsHandlingTest.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Feature/ErrorsHandlingTest.php b/tests/Feature/ErrorsHandlingTest.php index 99f9446..eed7e24 100644 --- a/tests/Feature/ErrorsHandlingTest.php +++ b/tests/Feature/ErrorsHandlingTest.php @@ -25,7 +25,7 @@ it('lets the user handle syntax errors', function () { it('throws a decoding exception if unable to decode a JSON fragment', function () { JsonParser::parse(fixture('errors/decoding.json'))->traverse(); -})->throws(DecodingException::class, 'Decoding error: Syntax error'); +})->throws(DecodingException::class, 'Decoding error: Problem while parsing a number'); it('lets the user handle decoding errors', function () { $decodingErrors = []; From 7cb5d8d6ab3165fd45934f6e979247f69e074c65 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 26 Feb 2023 20:03:45 +1000 Subject: [PATCH 192/249] Test decoders --- tests/Dataset.php | 20 +++++++++++++++++- tests/Unit/JsonDecoderTest.php | 33 ++++++++++++++++++++++++++++++ tests/Unit/SimdjsonDecoderTest.php | 33 ++++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 tests/Unit/JsonDecoderTest.php create mode 100644 tests/Unit/SimdjsonDecoderTest.php diff --git a/tests/Dataset.php b/tests/Dataset.php index 0f16e84..f781c43 100644 --- a/tests/Dataset.php +++ b/tests/Dataset.php @@ -13,7 +13,7 @@ use Mockery; * The dataset provider. * */ -class Dataset +final class Dataset { /** * Retrieve the dataset to test parsing @@ -141,4 +141,22 @@ class Dataset ->getMock(); } } + + /** + * Retrieve the dataset to test decoders + * + * @return Generator + */ + public static function forDecoders(): Generator + { + $json = '{"foo":"bar"}'; + $values = [ + true => ['foo' => 'bar'], + false => (object) ['foo' => 'bar'], + ]; + + foreach ([true, false] as $decodesToArray) { + yield [$decodesToArray, $json, $values[$decodesToArray]]; + } + } } diff --git a/tests/Unit/JsonDecoderTest.php b/tests/Unit/JsonDecoderTest.php new file mode 100644 index 0000000..c44d290 --- /dev/null +++ b/tests/Unit/JsonDecoderTest.php @@ -0,0 +1,33 @@ +decode($json)) + ->toBeInstanceOf(DecodedValue::class) + ->succeeded->toBeTrue() + ->value->toEqual($value) + ->error->toBeNull() + ->code->toBeNull() + ->exception->toBeNull(); + + expect($decoded->json)->toBeNull(); +})->with(Dataset::forDecoders()); + +it('reports issues when a JSON is not valid', function () { + $json = '[1a]'; + $e = new JsonException('Syntax error', 4); + + expect($decoded = (new JsonDecoder())->decode($json)) + ->toBeInstanceOf(DecodedValue::class) + ->succeeded->toBeFalse() + ->value->toBeNull() + ->error->toBe($e->getMessage()) + ->code->toBe($e->getCode()) + ->exception->toEqual($e); + + expect($decoded->json)->toBe($json); +}); diff --git a/tests/Unit/SimdjsonDecoderTest.php b/tests/Unit/SimdjsonDecoderTest.php new file mode 100644 index 0000000..112ed23 --- /dev/null +++ b/tests/Unit/SimdjsonDecoderTest.php @@ -0,0 +1,33 @@ +decode($json)) + ->toBeInstanceOf(DecodedValue::class) + ->succeeded->toBeTrue() + ->value->toEqual($value) + ->error->toBeNull() + ->code->toBeNull() + ->exception->toBeNull(); + + expect($decoded->json)->toBeNull(); +})->with(Dataset::forDecoders()); + +it('reports issues when a JSON is not valid', function () { + $json = '[1a]'; + $e = new SimdJsonException('Problem while parsing a number', 9); + + expect($decoded = (new SimdjsonDecoder())->decode($json)) + ->toBeInstanceOf(DecodedValue::class) + ->succeeded->toBeFalse() + ->value->toBeNull() + ->error->toBe($e->getMessage()) + ->code->toBe($e->getCode()) + ->exception->toEqual($e); + + expect($decoded->json)->toBe($json); +}); From 5282ed4678dc877ca548d81b2112b43cb78b1230 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 26 Feb 2023 20:03:52 +1000 Subject: [PATCH 193/249] Update readme --- README.md | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 22a725a..e220de9 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ composer require cerbero/json-parser * [👣 Basics](#-basics) * [💧 Sources](#-sources) * [🎯 Pointers](#-pointers) -* [⚙️ Decoders](#-decoders) +* [⚙️ Decoders](#%EF%B8%8F-decoders) * [💢 Errors](#-errors) * [⏳ Progress](#-progress) * [🛠 Settings](#-settings) @@ -258,15 +258,7 @@ use Cerbero\JsonParser\Decoders\JsonDecoder; JsonParser::parse($source)->decoder(new JsonDecoder(decodesToArray: false)); ``` -JSON Parser also provides a convenient method to set the [simdjson](https://github.com/crazyxman/simdjson_php#simdjson_php) decoder: - -```php -JsonParser::parse($source)->simdjson(); // decode JSON to associative arrays using simdjson - -JsonParser::parse($source)->simdjson(decodesToArray: false); // decode JSON to objects using simdjson -``` - -[Simdjson is faster](https://github.com/crazyxman/simdjson_php/tree/master/benchmark#run-phpbench-benchmark) than `json_decode()` and can be installed via `pecl install simdjson` if your server satisfies the [requirements](https://github.com/crazyxman/simdjson_php#requirement). +The [simdjson extension](https://github.com/crazyxman/simdjson_php#simdjson_php) offers a decoder [faster](https://github.com/crazyxman/simdjson_php/tree/master/benchmark#run-phpbench-benchmark) than `json_decode()` that can be installed via `pecl install simdjson` if your server satisfies the [requirements](https://github.com/crazyxman/simdjson_php#requirement). JSON Parser leverages the simdjson decoder by default if the extension is loaded. If we need a decoder that is not supported by default, we can implement our custom one. From 201a4de81b5f9e8e9bc07f4af358d92555445f78 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 26 Feb 2023 20:24:34 +1000 Subject: [PATCH 194/249] Remove windows --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fca9520..a27d1e1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -13,7 +13,7 @@ jobs: matrix: php: [8.0, 8.1, 8.2] dependency-version: [prefer-lowest, prefer-stable] - os: [ubuntu-latest, windows-latest] + os: [ubuntu-latest] name: PHP ${{ matrix.php }} - ${{ matrix.dependency-version }} - ${{ matrix.os }} From aef455d4fbe5da49bd0a858154d2c665cf219845 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 26 Feb 2023 20:24:54 +1000 Subject: [PATCH 195/249] Improve type declaration --- src/Decoders/JsonDecoder.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Decoders/JsonDecoder.php b/src/Decoders/JsonDecoder.php index 350027f..641741b 100644 --- a/src/Decoders/JsonDecoder.php +++ b/src/Decoders/JsonDecoder.php @@ -12,7 +12,7 @@ final class JsonDecoder extends AbstractDecoder * Instantiate the class. * * @param bool $decodesToArray - * @param int $depth + * @param int<1, max> $depth */ public function __construct(private bool $decodesToArray = true, private int $depth = 512) { From 84b4ae41f7b654c1f23cd3611f7fa9701528d2b8 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 26 Feb 2023 21:39:51 +1000 Subject: [PATCH 196/249] Support Laravel HTTP client requests --- README.md | 3 +- src/Sources/LaravelClientRequest.php | 69 ++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 src/Sources/LaravelClientRequest.php diff --git a/README.md b/README.md index e220de9..e84830a 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ JsonParser::parse($source)->traverse(function (mixed $value, string|int $key, Js ### 💧 Sources -A wide range of JSON sources is supported, here is the full list: +A JSON source is any data point that provides a JSON. A wide range of sources is supported by default: - **strings**, e.g. `{"foo":"bar"}` - **iterables**, i.e. arrays or instances of `Traversable` - **file paths**, e.g. `/path/to/large.json` @@ -87,6 +87,7 @@ A wide range of JSON sources is supported, here is the full list: - **PSR-7 requests**, i.e. any instance of `Psr\Http\Message\RequestInterface` - **PSR-7 messages**, i.e. any instance of `Psr\Http\Message\MessageInterface` - **PSR-7 streams**, i.e. any instance of `Psr\Http\Message\StreamInterface` +- **Laravel HTTP client requests**, i.e. any instance of `Illuminate\Http\Client\Request` - **Laravel HTTP client responses**, i.e. any instance of `Illuminate\Http\Client\Response` - **user-defined sources**, i.e. any instance of `Cerbero\JsonParser\Sources\Source` diff --git a/src/Sources/LaravelClientRequest.php b/src/Sources/LaravelClientRequest.php new file mode 100644 index 0000000..3e823db --- /dev/null +++ b/src/Sources/LaravelClientRequest.php @@ -0,0 +1,69 @@ + + * @throws \Cerbero\JsonParser\Exceptions\GuzzleRequiredException + */ + public function getIterator(): Traversable + { + return new Psr7Message($this->response(), $this->config); + } + + /** + * Retrieve the response of the Laravel request + * + * @return ResponseInterface + * @throws \Cerbero\JsonParser\Exceptions\GuzzleRequiredException + */ + protected function response(): ResponseInterface + { + $this->requireGuzzle(); + + return $this->response ??= $this->sendRequest($this->source->toPsrRequest()); + } + + /** + * Determine whether the JSON source can be handled + * + * @return bool + */ + public function matches(): bool + { + return $this->source instanceof Request; + } + + /** + * Retrieve the calculated size of the JSON source + * + * @return int|null + */ + protected function calculateSize(): ?int + { + return $this->response()->getBody()->getSize(); + } +} From 7f3e1734ea89b98fadc62be8304fc22c4f006f34 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 26 Feb 2023 21:48:56 +1000 Subject: [PATCH 197/249] Refactor sources communicating with endpoints --- src/Sources/Endpoint.php | 12 +++++++- src/Sources/LaravelClientRequest.php | 43 +++------------------------- src/Sources/Psr7Request.php | 43 +++------------------------- 3 files changed, 19 insertions(+), 79 deletions(-) diff --git a/src/Sources/Endpoint.php b/src/Sources/Endpoint.php index e1b1476..1ecab8e 100644 --- a/src/Sources/Endpoint.php +++ b/src/Sources/Endpoint.php @@ -48,7 +48,17 @@ class Endpoint extends Source { $this->requireGuzzle(); - return $this->response ??= $this->getJson($this->source); + return $this->response ??= $this->fetchResponse(); + } + + /** + * Retrieve the fetched HTTP response + * + * @return ResponseInterface + */ + protected function fetchResponse(): ResponseInterface + { + return $this->getJson($this->source); } /** diff --git a/src/Sources/LaravelClientRequest.php b/src/Sources/LaravelClientRequest.php index 3e823db..c139bfd 100644 --- a/src/Sources/LaravelClientRequest.php +++ b/src/Sources/LaravelClientRequest.php @@ -2,49 +2,24 @@ namespace Cerbero\JsonParser\Sources; -use Cerbero\JsonParser\Concerns\GuzzleAware; use Illuminate\Http\Client\Request; use Psr\Http\Message\ResponseInterface; -use Traversable; /** * The Laravel client request source. * * @property-read Request $source */ -class LaravelClientRequest extends Source +class LaravelClientRequest extends Psr7Request { - use GuzzleAware; - /** - * The endpoint response. - * - * @var ResponseInterface|null - */ - protected ?ResponseInterface $response; - - /** - * Retrieve the JSON fragments - * - * @return Traversable - * @throws \Cerbero\JsonParser\Exceptions\GuzzleRequiredException - */ - public function getIterator(): Traversable - { - return new Psr7Message($this->response(), $this->config); - } - - /** - * Retrieve the response of the Laravel request + * Retrieve the fetched HTTP response * * @return ResponseInterface - * @throws \Cerbero\JsonParser\Exceptions\GuzzleRequiredException */ - protected function response(): ResponseInterface + protected function fetchResponse(): ResponseInterface { - $this->requireGuzzle(); - - return $this->response ??= $this->sendRequest($this->source->toPsrRequest()); + return $this->sendRequest($this->source->toPsrRequest()); } /** @@ -56,14 +31,4 @@ class LaravelClientRequest extends Source { return $this->source instanceof Request; } - - /** - * Retrieve the calculated size of the JSON source - * - * @return int|null - */ - protected function calculateSize(): ?int - { - return $this->response()->getBody()->getSize(); - } } diff --git a/src/Sources/Psr7Request.php b/src/Sources/Psr7Request.php index 3b1e320..09b15f2 100644 --- a/src/Sources/Psr7Request.php +++ b/src/Sources/Psr7Request.php @@ -2,49 +2,24 @@ namespace Cerbero\JsonParser\Sources; -use Cerbero\JsonParser\Concerns\GuzzleAware; use Psr\Http\Message\RequestInterface; use Psr\Http\Message\ResponseInterface; -use Traversable; /** * The PSR-7 request source. * * @property-read RequestInterface $source */ -class Psr7Request extends Source +class Psr7Request extends Endpoint { - use GuzzleAware; - /** - * The endpoint response. - * - * @var ResponseInterface|null - */ - protected ?ResponseInterface $response; - - /** - * Retrieve the JSON fragments - * - * @return Traversable - * @throws \Cerbero\JsonParser\Exceptions\GuzzleRequiredException - */ - public function getIterator(): Traversable - { - return new Psr7Message($this->response(), $this->config); - } - - /** - * Retrieve the response of the PSR-7 request + * Retrieve the fetched HTTP response * * @return ResponseInterface - * @throws \Cerbero\JsonParser\Exceptions\GuzzleRequiredException */ - protected function response(): ResponseInterface + protected function fetchResponse(): ResponseInterface { - $this->requireGuzzle(); - - return $this->response ??= $this->sendRequest($this->source); + return $this->sendRequest($this->source); } /** @@ -56,14 +31,4 @@ class Psr7Request extends Source { return $this->source instanceof RequestInterface; } - - /** - * Retrieve the calculated size of the JSON source - * - * @return int|null - */ - protected function calculateSize(): ?int - { - return $this->response()->getBody()->getSize(); - } } From 66e57b4e797fc897f7fba63171dc642ae9b168bd Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 8 Mar 2023 16:54:25 +1000 Subject: [PATCH 198/249] Fix intersections among pointers with wildcards --- src/Parser.php | 26 ++++++------------ src/Pointers/Pointers.php | 50 ++++++++++++++++++++++------------ src/State.php | 49 +++++++++------------------------ src/Tree.php | 27 ++++++++++-------- tests/Dataset.php | 31 +++++++++++++++++++++ tests/Feature/PointersTest.php | 4 +++ 6 files changed, 105 insertions(+), 82 deletions(-) diff --git a/src/Parser.php b/src/Parser.php index caed642..e78f368 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -15,13 +15,6 @@ use Traversable; */ final class Parser implements IteratorAggregate { - /** - * The JSON parsing state. - * - * @var State - */ - private State $state; - /** * The decoder handling potential errors. * @@ -37,7 +30,6 @@ final class Parser implements IteratorAggregate */ public function __construct(private Lexer $lexer, private Config $config) { - $this->state = new State(); $this->decoder = new ConfigurableDecoder($config); } @@ -59,28 +51,28 @@ final class Parser implements IteratorAggregate */ public function getIterator(): Traversable { - $this->state->setPointers(...$this->config->pointers); + $state = new State(...$this->config->pointers); foreach ($this->lexer as $token) { - if (!$token->matches($this->state->expectedToken)) { + if (!$token->matches($state->expectedToken)) { throw new SyntaxException($token, $this->lexer->position()); } - $this->state->mutateByToken($token); + $state->mutateByToken($token); - if (!$token->endsChunk() || $this->state->treeIsDeep()) { + if (!$token->endsChunk() || $state->treeIsDeep()) { continue; } - if ($this->state->hasBuffer()) { + if ($state->hasBuffer()) { /** @var string|int $key */ - $key = $this->decoder->decode($this->state->key()); - $value = $this->decoder->decode($this->state->value()); + $key = $this->decoder->decode($state->key()); + $value = $this->decoder->decode($state->value()); - yield $key => $this->state->callPointer($value, $key); + yield $key => $state->callPointer($value, $key); } - if ($this->state->canStopParsing()) { + if ($state->canStopParsing()) { break; } } diff --git a/src/Pointers/Pointers.php b/src/Pointers/Pointers.php index b689e0d..cb4d8e6 100644 --- a/src/Pointers/Pointers.php +++ b/src/Pointers/Pointers.php @@ -19,6 +19,13 @@ final class Pointers */ private array $pointers; + /** + * The JSON pointer matching with the current tree. + * + * @var Pointer + */ + private Pointer $matching; + /** * The list of pointers that were found within the JSON. * @@ -34,6 +41,17 @@ final class Pointers public function __construct(Pointer ...$pointers) { $this->pointers = $pointers; + $this->matching = $pointers[0] ?? new Pointer(''); + } + + /** + * Retrieve the pointer matching the current tree + * + * @return Pointer + */ + public function matching(): Pointer + { + return $this->matching; } /** @@ -44,31 +62,37 @@ final class Pointers */ public function matchTree(Tree $tree): Pointer { + if (count($this->pointers) < 2) { + return $this->matching; + } + $pointers = []; + $originalTree = $tree->original(); foreach ($this->pointers as $pointer) { - foreach ($tree->original() as $depth => $key) { + $referenceTokens = $pointer->referenceTokens(); + + foreach ($originalTree as $depth => $key) { if (!$pointer->depthMatchesKey($depth, $key)) { continue 2; - } elseif (!isset($pointers[$depth])) { + } elseif (!isset($pointers[$depth]) || $referenceTokens == $originalTree) { $pointers[$depth] = $pointer; } } } - return end($pointers) ?: $this->pointers[0]; + return $this->matching = end($pointers) ?: $this->matching; } /** * Mark the given pointer as found * - * @param Pointer $pointer * @return void */ - public function markAsFound(Pointer $pointer): void + public function markAsFound(): void { - if (!$pointer->wasFound) { - $this->found[(string) $pointer] = $pointer->wasFound = true; + if (!$this->matching->wasFound) { + $this->found[(string) $this->matching] = $this->matching->wasFound = true; } } @@ -79,16 +103,6 @@ final class Pointers */ public function wereFound(): bool { - return $this->count() == count($this->found); - } - - /** - * Retrieve the number of JSON pointers - * - * @return int - */ - public function count(): int - { - return count($this->pointers); + return count($this->pointers) == count($this->found) && !empty($this->pointers); } } diff --git a/src/State.php b/src/State.php index 1275de6..2a2d2b3 100644 --- a/src/State.php +++ b/src/State.php @@ -27,13 +27,6 @@ final class State */ private Pointers $pointers; - /** - * The JSON pointer matching the tree. - * - * @var Pointer - */ - private Pointer $pointer; - /** * The JSON buffer. * @@ -58,10 +51,12 @@ final class State /** * Instantiate the class. * + * @param Pointer ...$pointers */ - public function __construct() + public function __construct(Pointer ...$pointers) { - $this->tree = new Tree(); + $this->pointers = new Pointers(...$pointers); + $this->tree = new Tree($this->pointers); } /** @@ -81,9 +76,9 @@ final class State */ public function treeIsDeep(): bool { - return $this->pointer == '' - ? $this->tree->depth() > $this->pointer->depth() - : $this->tree->depth() >= $this->pointer->depth(); + return $this->pointers->matching() == '' + ? $this->tree->depth() > $this->pointers->matching()->depth() + : $this->tree->depth() >= $this->pointers->matching()->depth(); } /** @@ -96,19 +91,6 @@ final class State return $this->tree->currentKey(); } - /** - * Set and match the given pointers - * - * @param Pointer ...$pointers - * @return void - */ - public function setPointers(Pointer ...$pointers): void - { - $this->pointers = new Pointers(...$pointers ?: [new Pointer('')]); - - $this->pointer = $this->pointers->matchTree($this->tree); - } - /** * Determine whether the parser can stop parsing * @@ -116,7 +98,7 @@ final class State */ public function canStopParsing(): bool { - return $this->pointers->wereFound() && !$this->pointer->includesTree($this->tree); + return $this->pointers->wereFound() && !$this->pointers->matching()->includesTree($this->tree); } /** @@ -128,7 +110,7 @@ final class State */ public function callPointer(mixed $value, mixed $key): mixed { - return $this->pointer->call($value, $key); + return $this->pointers->matching()->call($value, $key); } /** @@ -139,26 +121,21 @@ final class State */ public function mutateByToken(Token $token): void { - $this->tree->changed = false; - $shouldTrackTree = $this->pointer == '' || $this->tree->depth() < $this->pointer->depth(); + $shouldTrackTree = $this->pointers->matching() == '' || $this->tree->depth() < $this->pointers->matching()->depth(); if ($shouldTrackTree && $this->expectsKey) { $this->tree->traverseKey($token); } elseif ($shouldTrackTree && $token->isValue() && !$this->tree->inObject()) { - $this->tree->traverseArray($this->pointer->referenceTokens()); - } - - if ($this->tree->changed && $this->pointers->count() > 1) { - $this->pointer = $this->pointers->matchTree($this->tree); + $this->tree->traverseArray(); } $shouldBuffer = $this->tree->depth() >= 0 - && $this->pointer->matchesTree($this->tree) + && $this->pointers->matching()->matchesTree($this->tree) && ((!$this->expectsKey && $token->isValue()) || $this->treeIsDeep()); if ($shouldBuffer) { $this->buffer .= $token; - $this->pointers->markAsFound($this->pointer); + $this->pointers->markAsFound(); } $token->mutateState($this); diff --git a/src/Tree.php b/src/Tree.php index e558fa5..4e83de4 100644 --- a/src/Tree.php +++ b/src/Tree.php @@ -2,6 +2,8 @@ namespace Cerbero\JsonParser; +use Cerbero\JsonParser\Pointers\Pointers; + use function count; /** @@ -39,11 +41,13 @@ final class Tree private int $depth = -1; /** - * Whether the tree changed. + * Instantiate the class. * - * @var bool + * @param Pointers $pointers */ - public bool $changed = false; + public function __construct(private Pointers $pointers) + { + } /** * Retrieve the original JSON tree @@ -119,26 +123,27 @@ final class Tree $this->original[$this->depth] = $trimmedKey; $this->wildcarded[$this->depth] = $trimmedKey; - $this->changed = true; + $this->pointers->matchTree($this); } /** * Traverse an array * - * @param string[] $referenceTokens * @return void */ - public function traverseArray(array $referenceTokens): void + public function traverseArray(): void { - $referenceToken = $referenceTokens[$this->depth] ?? null; $index = $this->original[$this->depth] ?? null; - - $this->original[$this->depth] = is_int($index) ? $index + 1 : 0; - $this->wildcarded[$this->depth] = $referenceToken == '-' ? '-' : $this->original[$this->depth]; - $this->changed = true; + $this->original[$this->depth] = $index = is_int($index) ? $index + 1 : 0; if (count($this->original) > $this->depth + 1) { array_splice($this->original, $this->depth + 1); + } + + $referenceTokens = $this->pointers->matchTree($this)->referenceTokens(); + $this->wildcarded[$this->depth] = ($referenceTokens[$this->depth] ?? null) == '-' ? '-' : $index; + + if (count($this->wildcarded) > $this->depth + 1) { array_splice($this->wildcarded, $this->depth + 1); } } diff --git a/tests/Dataset.php b/tests/Dataset.php index f781c43..e11c4a1 100644 --- a/tests/Dataset.php +++ b/tests/Dataset.php @@ -92,6 +92,37 @@ final class Dataset } } + /** + * Retrieve the dataset to test intersecting pointers with wildcards + * + * @return Generator + */ + public static function forIntersectingPointersWithWildcards(): Generator + { + $json = fixture('json/complex_object.json'); + + $pointers = [ + '/topping/6/type' => fn (string $value) => "$value @ /topping/6/type", + '/topping/-/type' => fn (string $value) => "$value @ /topping/-/type", + '/topping/0/type' => fn (string $value) => "$value @ /topping/0/type", + '/topping/2/type' => fn (string $value) => "$value @ /topping/2/type", + ]; + + $parsed = [ + 'type' => [ + 'None @ /topping/0/type', + 'Glazed @ /topping/-/type', + 'Sugar @ /topping/2/type', + 'Powdered Sugar @ /topping/-/type', + 'Chocolate with Sprinkles @ /topping/-/type', + 'Chocolate @ /topping/-/type', + 'Maple @ /topping/6/type', + ] + ]; + + yield [$json, $pointers, $parsed]; + } + /** * Retrieve the dataset to test syntax errors * diff --git a/tests/Feature/PointersTest.php b/tests/Feature/PointersTest.php index b41b5c9..072ec40 100644 --- a/tests/Feature/PointersTest.php +++ b/tests/Feature/PointersTest.php @@ -17,3 +17,7 @@ it('supports single JSON pointers', function (string $json, string $pointer, arr it('supports multiple JSON pointers', function (string $json, array $pointers, array $parsed) { expect(JsonParser::parse($json)->pointers($pointers))->toPointTo($parsed); })->with(Dataset::forMultiplePointers()); + +it('can intersect pointers with wildcards', function (string $json, array $pointers, array $parsed) { + expect(JsonParser::parse($json)->pointers($pointers))->toPointTo($parsed); +})->with(Dataset::forIntersectingPointersWithWildcards()); From 987bc6a683de1edf2915c7cff558ff31e30c691e Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 8 Mar 2023 16:59:37 +1000 Subject: [PATCH 199/249] Fix code style --- src/State.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/State.php b/src/State.php index 2a2d2b3..956d7ca 100644 --- a/src/State.php +++ b/src/State.php @@ -121,7 +121,8 @@ final class State */ public function mutateByToken(Token $token): void { - $shouldTrackTree = $this->pointers->matching() == '' || $this->tree->depth() < $this->pointers->matching()->depth(); + $pointer = $this->pointers->matching(); + $shouldTrackTree = $pointer == '' || $this->tree->depth() < $pointer->depth(); if ($shouldTrackTree && $this->expectsKey) { $this->tree->traverseKey($token); From 50a097ef846318fb33941d069c70b41212a68d35 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 8 Mar 2023 17:27:23 +1000 Subject: [PATCH 200/249] Replace static with self in final classes --- src/Decoders/DecodedValue.php | 12 ++++++------ src/JsonParser.php | 34 +++++++++++++++++----------------- src/Parser.php | 6 +++--- src/Progress.php | 8 ++++---- src/Sources/StreamWrapper.php | 2 +- src/Tokens/Tokenizer.php | 8 ++++---- tests/Dataset.php | 2 +- 7 files changed, 36 insertions(+), 36 deletions(-) diff --git a/src/Decoders/DecodedValue.php b/src/Decoders/DecodedValue.php index 850b4aa..f00fd22 100644 --- a/src/Decoders/DecodedValue.php +++ b/src/Decoders/DecodedValue.php @@ -29,11 +29,11 @@ final class DecodedValue * Retrieve a successfully decoded value * * @param mixed $value - * @return static + * @return self */ - public static function succeeded(mixed $value): static + public static function succeeded(mixed $value): self { - return new static(true, $value); + return new self(true, $value); } /** @@ -41,10 +41,10 @@ final class DecodedValue * * @param Throwable $e * @param string $json - * @return static + * @return self */ - public static function failed(Throwable $e, string $json): static + public static function failed(Throwable $e, string $json): self { - return new static(false, null, $e->getMessage(), $e->getCode(), $e, $json); + return new self(false, null, $e->getMessage(), $e->getCode(), $e, $json); } } diff --git a/src/JsonParser.php b/src/JsonParser.php index 1eb6e48..6c91d6f 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -47,11 +47,11 @@ final class JsonParser implements IteratorAggregate * Statically instantiate the class * * @param mixed $source - * @return static + * @return self */ - public static function parse(mixed $source): static + public static function parse(mixed $source): self { - return new static($source); + return new self($source); } /** @@ -72,9 +72,9 @@ final class JsonParser implements IteratorAggregate * Set the JSON pointers * * @param string[]|array $pointers - * @return static + * @return self */ - public function pointers(array $pointers): static + public function pointers(array $pointers): self { foreach ($pointers as $pointer => $callback) { $callback instanceof Closure ? $this->pointer($pointer, $callback) : $this->pointer($callback); @@ -88,9 +88,9 @@ final class JsonParser implements IteratorAggregate * * @param string $pointer * @param Closure|null $callback - * @return static + * @return self */ - public function pointer(string $pointer, Closure $callback = null): static + public function pointer(string $pointer, Closure $callback = null): self { $this->config->pointers[] = new Pointer($pointer, $callback); @@ -116,9 +116,9 @@ final class JsonParser implements IteratorAggregate * Set the JSON decoder * * @param Decoder $decoder - * @return static + * @return self */ - public function decoder(Decoder $decoder): static + public function decoder(Decoder $decoder): self { $this->config->decoder = $decoder; @@ -139,9 +139,9 @@ final class JsonParser implements IteratorAggregate * The number of bytes to read in each chunk * * @param int<1, max> $bytes - * @return static + * @return self */ - public function bytes(int $bytes): static + public function bytes(int $bytes): self { $this->config->bytes = $bytes; @@ -152,9 +152,9 @@ final class JsonParser implements IteratorAggregate * Set the patch to apply during a decoding error * * @param mixed $patch - * @return static + * @return self */ - public function patchDecodingError(mixed $patch = null): static + public function patchDecodingError(mixed $patch = null): self { return $this->onDecodingError(function (DecodedValue $decoded) use ($patch) { $decoded->value = is_callable($patch) ? $patch($decoded) : $patch; @@ -165,9 +165,9 @@ final class JsonParser implements IteratorAggregate * Set the logic to run during a decoding error * * @param Closure $callback - * @return static + * @return self */ - public function onDecodingError(Closure $callback): static + public function onDecodingError(Closure $callback): self { $this->config->onDecodingError = $callback; @@ -178,9 +178,9 @@ final class JsonParser implements IteratorAggregate * Set the logic to run during a syntax error * * @param Closure $callback - * @return static + * @return self */ - public function onSyntaxError(Closure $callback): static + public function onSyntaxError(Closure $callback): self { $this->config->onSyntaxError = $callback; diff --git a/src/Parser.php b/src/Parser.php index e78f368..b9304ff 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -37,11 +37,11 @@ final class Parser implements IteratorAggregate * Instantiate the class statically * * @param Source $source - * @return static + * @return self */ - public static function for(Source $source): static + public static function for(Source $source): self { - return new static(new Lexer($source), $source->config()); + return new self(new Lexer($source), $source->config()); } /** diff --git a/src/Progress.php b/src/Progress.php index 83b5719..9d99247 100644 --- a/src/Progress.php +++ b/src/Progress.php @@ -28,9 +28,9 @@ final class Progress * Set the current progress * * @param int $current - * @return static + * @return self */ - public function setCurrent(int $current): static + public function setCurrent(int $current): self { $this->current = $current; @@ -51,9 +51,9 @@ final class Progress * Set the total possible progress * * @param int|null $total - * @return static + * @return self */ - public function setTotal(?int $total): static + public function setTotal(?int $total): self { $this->total ??= $total; diff --git a/src/Sources/StreamWrapper.php b/src/Sources/StreamWrapper.php index 4bc505a..c3fe7bf 100644 --- a/src/Sources/StreamWrapper.php +++ b/src/Sources/StreamWrapper.php @@ -47,7 +47,7 @@ final class StreamWrapper { $options = stream_context_get_options($this->context); - $this->stream = $options[static::NAME]['stream'] ?? null; + $this->stream = $options[self::NAME]['stream'] ?? null; return $this->stream instanceof StreamInterface && $this->stream->isReadable(); } diff --git a/src/Tokens/Tokenizer.php b/src/Tokens/Tokenizer.php index 9826c17..6a85e42 100644 --- a/src/Tokens/Tokenizer.php +++ b/src/Tokens/Tokenizer.php @@ -11,7 +11,7 @@ final class Tokenizer /** * The singleton instance. * - * @var static + * @var self */ private static self $instance; @@ -25,11 +25,11 @@ final class Tokenizer /** * Retrieve the singleton instance * - * @return static + * @return self */ - public static function instance(): static + public static function instance(): self { - return static::$instance ??= new static(); + return self::$instance ??= new self(); } /** diff --git a/tests/Dataset.php b/tests/Dataset.php index e11c4a1..346268d 100644 --- a/tests/Dataset.php +++ b/tests/Dataset.php @@ -22,7 +22,7 @@ final class Dataset */ public static function forParsing(): Generator { - foreach (static::fixtures() as $fixture) { + foreach (self::fixtures() as $fixture) { $name = $fixture->getBasename('.json'); yield [ From 5d7ef120131e402c2745a943ff9107459e466ff8 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 8 Mar 2023 19:39:31 +1000 Subject: [PATCH 201/249] Check whether two pointers intersect --- src/Config.php | 7 +-- .../IntersectingPointersException.php | 24 +++++++++ src/JsonParser.php | 2 +- src/Parser.php | 2 +- src/Pointers/Pointers.php | 24 +++++---- src/State.php | 12 +---- tests/Dataset.php | 53 +++++++++++++++++++ tests/Feature/PointersTest.php | 6 +++ 8 files changed, 106 insertions(+), 24 deletions(-) create mode 100644 src/Exceptions/IntersectingPointersException.php diff --git a/src/Config.php b/src/Config.php index 4025dec..c34fbb7 100644 --- a/src/Config.php +++ b/src/Config.php @@ -8,7 +8,7 @@ use Cerbero\JsonParser\Decoders\Decoder; use Cerbero\JsonParser\Decoders\SimdjsonDecoder; use Cerbero\JsonParser\Exceptions\DecodingException; use Cerbero\JsonParser\Exceptions\SyntaxException; -use Cerbero\JsonParser\Pointers\Pointer; +use Cerbero\JsonParser\Pointers\Pointers; use Closure; /** @@ -27,9 +27,9 @@ final class Config /** * The JSON pointers. * - * @var Pointer[] + * @var Pointers */ - public array $pointers = []; + public Pointers $pointers; /** * The number of bytes to read in each chunk. @@ -59,6 +59,7 @@ final class Config public function __construct() { $this->decoder = extension_loaded('simdjson') ? new SimdjsonDecoder() : new JsonDecoder(); + $this->pointers = new Pointers(); $this->onDecodingError = fn (DecodedValue $decoded) => throw new DecodingException($decoded); $this->onSyntaxError = fn (SyntaxException $e) => throw $e; } diff --git a/src/Exceptions/IntersectingPointersException.php b/src/Exceptions/IntersectingPointersException.php new file mode 100644 index 0000000..cca6993 --- /dev/null +++ b/src/Exceptions/IntersectingPointersException.php @@ -0,0 +1,24 @@ +config->pointers[] = new Pointer($pointer, $callback); + $this->config->pointers->add(new Pointer($pointer, $callback)); return $this; } diff --git a/src/Parser.php b/src/Parser.php index b9304ff..5d495d6 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -51,7 +51,7 @@ final class Parser implements IteratorAggregate */ public function getIterator(): Traversable { - $state = new State(...$this->config->pointers); + $state = new State($this->config->pointers); foreach ($this->lexer as $token) { if (!$token->matches($state->expectedToken)) { diff --git a/src/Pointers/Pointers.php b/src/Pointers/Pointers.php index cb4d8e6..df6e18a 100644 --- a/src/Pointers/Pointers.php +++ b/src/Pointers/Pointers.php @@ -2,22 +2,23 @@ namespace Cerbero\JsonParser\Pointers; +use Cerbero\JsonParser\Exceptions\IntersectingPointersException; use Cerbero\JsonParser\Tree; use function count; /** - * The JSON pointers collection. + * The JSON pointers aggregate. * */ final class Pointers { /** - * The JSON pointers collection. + * The JSON pointers. * * @var Pointer[] */ - private array $pointers; + private array $pointers = []; /** * The JSON pointer matching with the current tree. @@ -34,14 +35,19 @@ final class Pointers private array $found = []; /** - * Instantiate the class. + * Add the given pointer * - * @param Pointer ...$pointers + * @param Pointer $pointer */ - public function __construct(Pointer ...$pointers) + public function add(Pointer $pointer): void { - $this->pointers = $pointers; - $this->matching = $pointers[0] ?? new Pointer(''); + foreach ($this->pointers as $existingPointer) { + if (str_starts_with($existingPointer, "$pointer/") || str_starts_with($pointer, "$existingPointer/")) { + throw new IntersectingPointersException($existingPointer, $pointer); + } + } + + $this->pointers[] = $pointer; } /** @@ -51,7 +57,7 @@ final class Pointers */ public function matching(): Pointer { - return $this->matching; + return $this->matching ??= $this->pointers[0] ?? new Pointer(''); } /** diff --git a/src/State.php b/src/State.php index 956d7ca..c781a65 100644 --- a/src/State.php +++ b/src/State.php @@ -20,13 +20,6 @@ final class State */ private Tree $tree; - /** - * The JSON pointers. - * - * @var Pointers - */ - private Pointers $pointers; - /** * The JSON buffer. * @@ -51,11 +44,10 @@ final class State /** * Instantiate the class. * - * @param Pointer ...$pointers + * @param Pointers $pointers */ - public function __construct(Pointer ...$pointers) + public function __construct(private Pointers $pointers) { - $this->pointers = new Pointers(...$pointers); $this->tree = new Tree($this->pointers); } diff --git a/tests/Dataset.php b/tests/Dataset.php index 346268d..0c415cb 100644 --- a/tests/Dataset.php +++ b/tests/Dataset.php @@ -123,6 +123,59 @@ final class Dataset yield [$json, $pointers, $parsed]; } + /** + * Retrieve the dataset to test intersecting pointers + * + * @return Generator + */ + public static function forIntersectingPointers(): Generator + { + $json = fixture('json/complex_object.json'); + $message = 'The pointers [%s] and [%s] are intersecting'; + $pointersByIntersection = [ + '/topping,/topping/0' => [ + '/topping', + '/topping/0', + ], + '/topping/0,/topping' => [ + '/topping/0', + '/topping', + ], + '/topping,/topping/-' => [ + '/topping', + '/topping/-', + ], + '/topping/-,/topping' => [ + '/topping/-', + '/topping', + ], + '/topping/0/type,/topping' => [ + '/topping/0/type', + '/topping/-/type', + '/topping', + ], + '/topping,/topping/-/type' => [ + '/topping', + '/topping/-/type', + '/topping/0/type', + ], + '/topping/-/type,/topping/-/type/baz' => [ + '/topping/-/type', + '/topping/-/types', + '/topping/-/type/baz', + ], + '/topping/-/type/baz,/topping/-/type' => [ + '/topping/-/type/baz', + '/topping/-/type', + '/topping/-/types', + ], + ]; + + foreach ($pointersByIntersection as $intersection => $pointers) { + yield [$json, $pointers, vsprintf($message, explode(',', $intersection))]; + } + } + /** * Retrieve the dataset to test syntax errors * diff --git a/tests/Feature/PointersTest.php b/tests/Feature/PointersTest.php index 072ec40..98917e5 100644 --- a/tests/Feature/PointersTest.php +++ b/tests/Feature/PointersTest.php @@ -1,6 +1,7 @@ pointers($pointers))->toPointTo($parsed); })->with(Dataset::forIntersectingPointersWithWildcards()); + +it('throws an exception when two pointers intersect', function (string $json, array $pointers, string $message) { + expect(fn () => JsonParser::parse($json)->pointers($pointers)->traverse()) + ->toThrow(IntersectingPointersException::class, $message); +})->with(Dataset::forIntersectingPointers()); From 2851723dc0735ae13a8069cc803443ce216f9cb0 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 8 Mar 2023 19:49:45 +1000 Subject: [PATCH 202/249] Update readme --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e84830a..000ef2a 100644 --- a/README.md +++ b/README.md @@ -158,7 +158,7 @@ foreach ($json as $key => $value) { } ``` -JSON Parser takes advantage of the `-` character to point to any array index, so we can extract all the genders with the `/-/gender` pointer: +JSON Parser takes advantage of the `-` wildcard to point to any array index, so we can extract all the genders with the `/-/gender` pointer: ```php $json = JsonParser::parse($source)->pointer('/-/gender'); @@ -185,7 +185,7 @@ foreach ($json as $key => $value) { } ``` -> ⚠️ Please avoid intersecting pointers (e.g. setting both `/foo` and `/foo/bar`) as the deeper pointer won't be found and will force the parser to parse the whole JSON. +> ⚠️ Intersecting pointers like `/foo` and `/foo/bar` is not allowed but intersecting wildcards like `foo/-/bar` and `foo/0/bar` is possible. We can also specify a callback to execute when JSON pointers are found. This is handy when we have different pointers and we need to run custom logic for each of them: @@ -361,6 +361,7 @@ For reference, here is a comprehensive table of all the exceptions thrown by thi |---|---| |`DecodingException`|a value in the JSON can't be decoded| |`GuzzleRequiredException`|Guzzle is not installed and the JSON source is an endpoint| +|`IntersectingPointersException`|two JSON pointers intersect| |`InvalidPointerException`|a JSON pointer syntax is not valid| |`SyntaxException`|the JSON structure is not valid| |`UnsupportedSourceException`|a JSON source is not supported| From 4212f201491067b83bebb09b4fc7a7513dbfe95b Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 8 Mar 2023 21:06:22 +1000 Subject: [PATCH 203/249] Move logic related to the JSON tree --- src/Parser.php | 2 +- src/Pointers/Pointers.php | 9 ++++++--- src/State.php | 24 +++--------------------- src/Tree.php | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 44 insertions(+), 25 deletions(-) diff --git a/src/Parser.php b/src/Parser.php index 5d495d6..a08b329 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -60,7 +60,7 @@ final class Parser implements IteratorAggregate $state->mutateByToken($token); - if (!$token->endsChunk() || $state->treeIsDeep()) { + if (!$token->endsChunk() || $state->tree()->isDeep()) { continue; } diff --git a/src/Pointers/Pointers.php b/src/Pointers/Pointers.php index df6e18a..390b483 100644 --- a/src/Pointers/Pointers.php +++ b/src/Pointers/Pointers.php @@ -103,12 +103,15 @@ final class Pointers } /** - * Determine whether all pointers were found within the JSON + * Determine whether all pointers were found in the given tree * + * @param Tree $tree * @return bool */ - public function wereFound(): bool + public function wereFoundInTree(Tree $tree): bool { - return count($this->pointers) == count($this->found) && !empty($this->pointers); + return count($this->pointers) == count($this->found) + && !empty($this->pointers) + && !$this->matching->includesTree($tree); } } diff --git a/src/State.php b/src/State.php index c781a65..e4d55e5 100644 --- a/src/State.php +++ b/src/State.php @@ -2,7 +2,6 @@ namespace Cerbero\JsonParser; -use Cerbero\JsonParser\Pointers\Pointer; use Cerbero\JsonParser\Pointers\Pointers; use Cerbero\JsonParser\Tokens\Token; use Cerbero\JsonParser\Tokens\Tokens; @@ -61,18 +60,6 @@ final class State return $this->tree; } - /** - * Determine whether the tree is deep - * - * @return bool - */ - public function treeIsDeep(): bool - { - return $this->pointers->matching() == '' - ? $this->tree->depth() > $this->pointers->matching()->depth() - : $this->tree->depth() >= $this->pointers->matching()->depth(); - } - /** * Retrieve the current key of the JSON tree * @@ -90,7 +77,7 @@ final class State */ public function canStopParsing(): bool { - return $this->pointers->wereFound() && !$this->pointers->matching()->includesTree($this->tree); + return $this->pointers->wereFoundInTree($this->tree); } /** @@ -113,8 +100,7 @@ final class State */ public function mutateByToken(Token $token): void { - $pointer = $this->pointers->matching(); - $shouldTrackTree = $pointer == '' || $this->tree->depth() < $pointer->depth(); + $shouldTrackTree = $this->tree->shouldBeTracked(); if ($shouldTrackTree && $this->expectsKey) { $this->tree->traverseKey($token); @@ -122,11 +108,7 @@ final class State $this->tree->traverseArray(); } - $shouldBuffer = $this->tree->depth() >= 0 - && $this->pointers->matching()->matchesTree($this->tree) - && ((!$this->expectsKey && $token->isValue()) || $this->treeIsDeep()); - - if ($shouldBuffer) { + if ($this->tree->isMatched() && ((!$this->expectsKey && $token->isValue()) || $this->tree->isDeep())) { $this->buffer .= $token; $this->pointers->markAsFound(); } diff --git a/src/Tree.php b/src/Tree.php index 4e83de4..ba0913f 100644 --- a/src/Tree.php +++ b/src/Tree.php @@ -111,6 +111,40 @@ final class Tree $this->depth--; } + /** + * Determine whether the tree is deep + * + * @return bool + */ + public function isDeep(): bool + { + $pointer = $this->pointers->matching(); + + return $pointer == '' ? $this->depth > $pointer->depth() : $this->depth >= $pointer->depth(); + } + + /** + * Determine whether the tree should be tracked + * + * @return bool + */ + public function shouldBeTracked(): bool + { + $pointer = $this->pointers->matching(); + + return $pointer == '' || $this->depth() < $pointer->depth(); + } + + /** + * Determine whether the tree is matched by the JSON pointer + * + * @return bool + */ + public function isMatched(): bool + { + return $this->depth >= 0 && $this->pointers->matching()->matchesTree($this); + } + /** * Traverse the given object key * From f9304236277c212c669c71960e80fe6f59cd8a4e Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 20 Mar 2023 19:23:02 +1000 Subject: [PATCH 204/249] Define a global lazy pointer when cloning the config --- src/Config.php | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/Config.php b/src/Config.php index c34fbb7..1adb04c 100644 --- a/src/Config.php +++ b/src/Config.php @@ -8,6 +8,7 @@ use Cerbero\JsonParser\Decoders\Decoder; use Cerbero\JsonParser\Decoders\SimdjsonDecoder; use Cerbero\JsonParser\Exceptions\DecodingException; use Cerbero\JsonParser\Exceptions\SyntaxException; +use Cerbero\JsonParser\Pointers\Pointer; use Cerbero\JsonParser\Pointers\Pointers; use Closure; @@ -63,4 +64,15 @@ final class Config $this->onDecodingError = fn (DecodedValue $decoded) => throw new DecodingException($decoded); $this->onSyntaxError = fn (SyntaxException $e) => throw $e; } + + /** + * Clone the configuration + * + * @return void + */ + public function __clone(): void + { + $this->pointers = new Pointers(); + $this->pointers->add(new Pointer('', true)); + } } From 8a69cabfd82f486216a856aa041fda2834490797 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 20 Mar 2023 19:26:48 +1000 Subject: [PATCH 205/249] Introduce lazy pointers --- src/JsonParser.php | 32 +++++++++++++++++++++++++++++++- src/Pointers/Pointer.php | 14 +++++++++++++- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/src/JsonParser.php b/src/JsonParser.php index ccacee1..6c57a9a 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -64,6 +64,7 @@ final class JsonParser implements IteratorAggregate try { yield from $this->parser; } catch (SyntaxException $e) { + $e->setPosition($this->parser->position()); call_user_func($this->config->onSyntaxError, $e); } } @@ -92,7 +93,36 @@ final class JsonParser implements IteratorAggregate */ public function pointer(string $pointer, Closure $callback = null): self { - $this->config->pointers->add(new Pointer($pointer, $callback)); + $this->config->pointers->add(new Pointer($pointer, false, $callback)); + + return $this; + } + + /** + * Set the lazy JSON pointers + * + * @param string[]|array $pointers + * @return self + */ + public function lazyPointers(array $pointers): self + { + foreach ($pointers as $pointer => $callback) { + $callback instanceof Closure ? $this->lazyPointer($pointer, $callback) : $this->lazyPointer($callback); + } + + return $this; + } + + /** + * Set a lazy JSON pointer + * + * @param string $pointer + * @param Closure|null $callback + * @return self + */ + public function lazyPointer(string $pointer, Closure $callback = null): self + { + $this->config->pointers->add(new Pointer($pointer, true, $callback)); return $this; } diff --git a/src/Pointers/Pointer.php b/src/Pointers/Pointer.php index b03d937..f0681ed 100644 --- a/src/Pointers/Pointer.php +++ b/src/Pointers/Pointer.php @@ -50,9 +50,10 @@ final class Pointer implements Stringable * Instantiate the class. * * @param string $pointer + * @param bool $isLazy * @param Closure|null $callback */ - public function __construct(private string $pointer, Closure $callback = null) + public function __construct(private string $pointer, private bool $isLazy = false, Closure $callback = null) { $this->referenceTokens = $this->toReferenceTokens(); $this->depth = count($this->referenceTokens); @@ -76,6 +77,16 @@ final class Pointer implements Stringable return array_slice($referenceTokens, 1); } + /** + * Determine whether the pointer is lazy + * + * @return bool + */ + public function isLazy(): bool + { + return $this->isLazy; + } + /** * Retrieve the reference tokens * @@ -148,6 +159,7 @@ final class Pointer implements Stringable */ public function includesTree(Tree $tree): bool { + // if ($this->pointer == '' && !$this->isLazy) { if ($this->pointer == '') { return true; } From d603787911dec3564aee75b3fdef45adba2d2c0f Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 20 Mar 2023 19:27:17 +1000 Subject: [PATCH 206/249] Update PHPStan comment --- src/Sources/Endpoint.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Sources/Endpoint.php b/src/Sources/Endpoint.php index 1ecab8e..8ba08a1 100644 --- a/src/Sources/Endpoint.php +++ b/src/Sources/Endpoint.php @@ -68,7 +68,7 @@ class Endpoint extends Source */ public function matches(): bool { - // @phpstan-ignore-next-line + /** @phpstan-ignore-next-line */ return (is_string($this->source) || $this->source instanceof UriInterface) && $this->isEndpoint($this->source); } From c000ce2fa36fb99b1146842f0c4943703ccbfb29 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 20 Mar 2023 19:28:53 +1000 Subject: [PATCH 207/249] Set position when exception is thrown --- src/Exceptions/SyntaxException.php | 26 +++++++++++++++++++++++--- src/Lexer.php | 2 +- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/Exceptions/SyntaxException.php b/src/Exceptions/SyntaxException.php index cb8c906..111f00a 100644 --- a/src/Exceptions/SyntaxException.php +++ b/src/Exceptions/SyntaxException.php @@ -10,14 +10,34 @@ use Exception; */ final class SyntaxException extends Exception implements JsonParserException { + /** + * The error position. + * + * @var int|null + */ + public ?int $position = null; + /** * Instantiate the class * * @param string $value - * @param int $position */ - public function __construct(public string $value, public int $position) + public function __construct(public string $value) { - parent::__construct("Syntax error: unexpected '$value' at position {$position}"); + parent::__construct("Syntax error: unexpected '$value'"); + } + + /** + * Set the error position + * + * @param int $position + * @return self + */ + public function setPosition(int $position): self + { + $this->position = $position; + $this->message .= " at position {$position}"; + + return $this; } } diff --git a/src/Lexer.php b/src/Lexer.php index 3cf2243..75ac2e5 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -63,7 +63,7 @@ final class Lexer implements IteratorAggregate $shouldBuffer = $inString || !isset(Tokens::BOUNDARIES[$character]); if ($shouldBuffer && $buffer == '' && !isset(Tokens::TYPES[$character])) { - throw new SyntaxException($character, $this->position); + throw new SyntaxException($character); } if ($shouldBuffer) { From d7bab834e53c0632af0a2d20dd855ba7b6b86741 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 20 Mar 2023 19:31:27 +1000 Subject: [PATCH 208/249] Extract method to traverse a token --- src/Tree.php | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/Tree.php b/src/Tree.php index ba0913f..2626ab9 100644 --- a/src/Tree.php +++ b/src/Tree.php @@ -3,6 +3,7 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Pointers\Pointers; +use Cerbero\JsonParser\Tokens\Token; use function count; @@ -124,15 +125,23 @@ final class Tree } /** - * Determine whether the tree should be tracked + * Traverse the given token * - * @return bool + * @param Token $token + * @param bool $expectsKey + * @return void */ - public function shouldBeTracked(): bool + public function traverseToken(Token $token, bool $expectsKey): void { $pointer = $this->pointers->matching(); - return $pointer == '' || $this->depth() < $pointer->depth(); + if ($pointer != '' && $this->depth >= $pointer->depth()) { + return; + } elseif ($expectsKey) { + $this->traverseKey($token); + } elseif ($token->isValue() && !$this->inObject()) { + $this->traverseArray(); + } } /** @@ -142,7 +151,11 @@ final class Tree */ public function isMatched(): bool { - return $this->depth >= 0 && $this->pointers->matching()->matchesTree($this); + if ($isMatched = $this->depth >= 0 && $this->pointers->matching()->matchesTree($this)) { + $this->pointers->markAsFound(); + } + + return $isMatched; } /** From db626b0a31bc4f293a5d570b0dd83d4630d0b3b5 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 20 Mar 2023 19:32:04 +1000 Subject: [PATCH 209/249] Do not decode when the value is lazy loaded --- src/Decoders/ConfigurableDecoder.php | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Decoders/ConfigurableDecoder.php b/src/Decoders/ConfigurableDecoder.php index 786f690..d85b783 100644 --- a/src/Decoders/ConfigurableDecoder.php +++ b/src/Decoders/ConfigurableDecoder.php @@ -3,6 +3,7 @@ namespace Cerbero\JsonParser\Decoders; use Cerbero\JsonParser\Config; +use Cerbero\JsonParser\Parser; use function call_user_func; @@ -24,12 +25,12 @@ final class ConfigurableDecoder /** * Decode the given value. * - * @param string|int $value + * @param Parser|string|int $value * @return mixed */ - public function decode(string|int $value): mixed + public function decode(Parser|string|int $value): mixed { - if (is_int($value)) { + if (!is_string($value)) { return $value; } From b6b965d7fcd5c1fe7cb905c1426c2281ea2093f8 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 20 Mar 2023 19:32:17 +1000 Subject: [PATCH 210/249] Implement lazy loading --- src/Parser.php | 88 +++++++++++++++++++++++++++++++++--- src/State.php | 28 ++++++------ src/Tokens/CompoundBegin.php | 39 +++++++++++++++- 3 files changed, 134 insertions(+), 21 deletions(-) diff --git a/src/Parser.php b/src/Parser.php index a08b329..71f5429 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -5,6 +5,10 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Decoders\ConfigurableDecoder; use Cerbero\JsonParser\Exceptions\SyntaxException; use Cerbero\JsonParser\Sources\Source; +use Cerbero\JsonParser\Tokens\CompoundBegin; +use Cerbero\JsonParser\Tokens\CompoundEnd; +use Cerbero\JsonParser\Tokens\Token; +use Generator; use IteratorAggregate; use Traversable; @@ -15,6 +19,13 @@ use Traversable; */ final class Parser implements IteratorAggregate { + /** + * The tokens to parse. + * + * @var Generator + */ + private Generator $tokens; + /** * The decoder handling potential errors. * @@ -22,14 +33,23 @@ final class Parser implements IteratorAggregate */ private ConfigurableDecoder $decoder; + /** + * Whether the parser is fast-forwarding. + * + * @var bool + */ + private bool $isFastForwarding = false; + /** * Instantiate the class. * - * @param Lexer $lexer + * @param Lexer|Generator $lexer * @param Config $config */ - public function __construct(private Lexer $lexer, private Config $config) + public function __construct(private Lexer|Generator $lexer, private Config $config) { + /** @phpstan-ignore-next-line */ + $this->tokens = $lexer instanceof Lexer ? $lexer->getIterator() : $lexer; $this->decoder = new ConfigurableDecoder($config); } @@ -51,11 +71,13 @@ final class Parser implements IteratorAggregate */ public function getIterator(): Traversable { - $state = new State($this->config->pointers); + $state = new State($this->config->pointers, fn () => new self($this->lazyLoad(), clone $this->config)); - foreach ($this->lexer as $token) { - if (!$token->matches($state->expectedToken)) { - throw new SyntaxException($token, $this->lexer->position()); + foreach ($this->tokens as $token) { + if ($this->isFastForwarding) { + continue; + } elseif (!$token->matches($state->expectedToken)) { + throw new SyntaxException($token); } $state->mutateByToken($token); @@ -70,6 +92,8 @@ final class Parser implements IteratorAggregate $value = $this->decoder->decode($state->value()); yield $key => $state->callPointer($value, $key); + + $value instanceof self && $value->fastForward(); } if ($state->canStopParsing()) { @@ -78,6 +102,46 @@ final class Parser implements IteratorAggregate } } + /** + * Retrieve the generator to lazy load the current compound + * + * @return Generator + */ + public function lazyLoad(): Generator + { + $depth = 0; + + do { + yield $token = $this->tokens->current(); + + if ($token instanceof CompoundBegin) { + $depth++; + } elseif ($token instanceof CompoundEnd) { + $depth--; + } + + $depth > 0 && $this->tokens->next(); + } while ($depth > 0); + } + + /** + * Fast-forward the parser + * + * @return void + */ + public function fastForward(): void + { + if (!$this->tokens->valid()) { + return; + } + + $this->isFastForwarding = true; + + foreach ($this as $value) { + $value instanceof self && $value->fastForward(); + } + } + /** * Retrieve the parsing progress * @@ -85,6 +149,18 @@ final class Parser implements IteratorAggregate */ public function progress(): Progress { + /** @phpstan-ignore-next-line */ return $this->lexer->progress(); } + + /** + * Retrieve the parsing position + * + * @return int + */ + public function position(): int + { + /** @phpstan-ignore-next-line */ + return $this->lexer->position(); + } } diff --git a/src/State.php b/src/State.php index e4d55e5..1b15e0d 100644 --- a/src/State.php +++ b/src/State.php @@ -3,8 +3,10 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Pointers\Pointers; +use Cerbero\JsonParser\Tokens\CompoundBegin; use Cerbero\JsonParser\Tokens\Token; use Cerbero\JsonParser\Tokens\Tokens; +use Closure; /** * The JSON parsing state. @@ -22,9 +24,9 @@ final class State /** * The JSON buffer. * - * @var string + * @var Parser|string */ - private string $buffer = ''; + private Parser|string $buffer = ''; /** * Whether an object key is expected. @@ -44,8 +46,9 @@ final class State * Instantiate the class. * * @param Pointers $pointers + * @param Closure $lazyLoad */ - public function __construct(private Pointers $pointers) + public function __construct(private Pointers $pointers, private Closure $lazyLoad) { $this->tree = new Tree($this->pointers); } @@ -100,17 +103,14 @@ final class State */ public function mutateByToken(Token $token): void { - $shouldTrackTree = $this->tree->shouldBeTracked(); - - if ($shouldTrackTree && $this->expectsKey) { - $this->tree->traverseKey($token); - } elseif ($shouldTrackTree && $token->isValue() && !$this->tree->inObject()) { - $this->tree->traverseArray(); - } + $this->tree->traverseToken($token, $this->expectsKey); if ($this->tree->isMatched() && ((!$this->expectsKey && $token->isValue()) || $this->tree->isDeep())) { - $this->buffer .= $token; - $this->pointers->markAsFound(); + $shouldLazyLoad = $token instanceof CompoundBegin && $this->pointers->matching()->isLazy(); + /** @phpstan-ignore-next-line */ + $this->buffer = $shouldLazyLoad ? ($this->lazyLoad)() : $this->buffer . $token; + /** @var CompoundBegin $token */ + $shouldLazyLoad && $token->shouldLazyLoad = true; } $token->mutateState($this); @@ -129,9 +129,9 @@ final class State /** * Retrieve the value from the buffer and reset it * - * @return string + * @return Parser|string */ - public function value(): string + public function value(): Parser|string { $buffer = $this->buffer; diff --git a/src/Tokens/CompoundBegin.php b/src/Tokens/CompoundBegin.php index 53f8e1e..5bb0579 100644 --- a/src/Tokens/CompoundBegin.php +++ b/src/Tokens/CompoundBegin.php @@ -10,6 +10,13 @@ use Cerbero\JsonParser\State; */ final class CompoundBegin extends Token { + /** + * Whether this compound should be lazy loaded. + * + * @var bool + */ + public bool $shouldLazyLoad = false; + /** * Mutate the given state * @@ -18,8 +25,38 @@ final class CompoundBegin extends Token */ public function mutateState(State $state): void { + $tree = $state->tree(); + + if ($this->shouldLazyLoad = $this->shouldLazyLoad && $tree->depth() >= 0) { + $state->expectedToken = $tree->inObject() ? Tokens::AFTER_OBJECT_VALUE : Tokens::AFTER_ARRAY_VALUE; + return; + } + $state->expectsKey = $beginsObject = $this->value == '{'; $state->expectedToken = $beginsObject ? Tokens::AFTER_OBJECT_BEGIN : Tokens::AFTER_ARRAY_BEGIN; - $state->tree()->deepen($beginsObject); + $tree->deepen($beginsObject); + } + + /** + * Set the token value + * + * @param string $value + * @return static + */ + public function setValue(string $value): static + { + $this->shouldLazyLoad = false; + + return parent::setValue($value); + } + + /** + * Determine whether this token ends a JSON chunk + * + * @return bool + */ + public function endsChunk(): bool + { + return $this->shouldLazyLoad; } } From 863a3eb77c81f1bff36bcabe1ecee718b40187d3 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 20 Mar 2023 19:33:16 +1000 Subject: [PATCH 211/249] Test lazy pointers and lazy loading --- tests/Dataset.php | 102 +++++++++++++++++++++++++++++++++ tests/Feature/PointersTest.php | 18 +++++- tests/Pest.php | 20 +++++++ 3 files changed, 137 insertions(+), 3 deletions(-) diff --git a/tests/Dataset.php b/tests/Dataset.php index 0c415cb..50f1ab0 100644 --- a/tests/Dataset.php +++ b/tests/Dataset.php @@ -8,6 +8,7 @@ use Cerbero\JsonParser\Sources\Psr7Request; use DirectoryIterator; use Generator; use Mockery; +use Pest\Expectation; /** * The dataset provider. @@ -176,6 +177,107 @@ final class Dataset } } + /** + * Retrieve the dataset to test single lazy pointers + * + * @return Generator + */ + public static function forSingleLazyPointers(): Generator + { + $json = fixture('json/complex_object.json'); + $sequenceByPointer = [ + '' => [ + fn ($value, $key) => $key->toBe('id')->and($value->value)->toBe('0001'), + fn ($value, $key) => $key->toBe('type')->and($value->value)->toBe('donut'), + fn ($value, $key) => $key->toBe('name')->and($value->value)->toBe('Cake'), + fn ($value, $key) => $key->toBe('ppu')->and($value->value)->toBe(0.55), + fn ($value, $key) => $key->toBe('batters')->and($value->value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe('topping')->and($value->value)->toBeInstanceOf(Parser::class), + ], + '/batters/batter/-' => [ + fn ($value, $key) => $key->toBe(0)->and($value->value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(1)->and($value->value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(2)->and($value->value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(3)->and($value->value)->toBeInstanceOf(Parser::class), + ], + '/topping/-' => [ + fn ($value, $key) => $key->toBe(0)->and($value->value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(1)->and($value->value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(2)->and($value->value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(3)->and($value->value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(4)->and($value->value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(5)->and($value->value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(6)->and($value->value)->toBeInstanceOf(Parser::class), + ], + ]; + + foreach ($sequenceByPointer as $pointer => $sequence) { + yield [$json, $pointer, $sequence]; + } + } + + /** + * Retrieve the dataset to test multiple lazy pointers + * + * @return Generator + */ + public static function forMultipleLazyPointers(): Generator + { + $json = fixture('json/complex_object.json'); + $sequenceByPointer = [ + '/topping,/batters' => [ + fn ($value, $key) => $key->toBe('batters')->and($value->value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe('topping')->and($value->value)->toBeInstanceOf(Parser::class), + ], + '/topping/-,/batters/batter' => [ + fn ($value, $key) => $key->toBe('batter')->and($value->value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(0)->and($value->value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(1)->and($value->value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(2)->and($value->value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(3)->and($value->value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(4)->and($value->value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(5)->and($value->value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(6)->and($value->value)->toBeInstanceOf(Parser::class), + ], + ]; + + foreach ($sequenceByPointer as $pointers => $sequence) { + yield [$json, explode(',', $pointers), $sequence]; + } + } + + /** + * Retrieve the dataset to test recursive lazy loading + * + * @return Generator + */ + public static function forRecursiveLazyLoading(): Generator + { + $json = fixture('json/complex_object.json'); + $expectedByKeys = [ + 'batters,batter' => [ + ['id' => '1001', 'type' => 'Regular'], + ['id' => '1002', 'type' => 'Chocolate'], + ['id' => '1003', 'type' => 'Blueberry'], + ['id' => '1004', 'type' => 'Devil\'s Food'], + ], + 'topping' => [ + ['id' => '5001', 'type' => 'None'], + ['id' => '5002', 'type' => 'Glazed'], + ['id' => '5005', 'type' => 'Sugar'], + ['id' => '5007', 'type' => 'Powdered Sugar'], + ['id' => '5006', 'type' => 'Chocolate with Sprinkles'], + ['id' => '5003', 'type' => 'Chocolate'], + ['id' => '5004', 'type' => 'Maple'], + ], + ]; + + foreach ($expectedByKeys as $keys => $expected) { + $keys = explode(',', $keys); + yield [$json, '/' . $keys[0], $keys, $expected]; + } + } + /** * Retrieve the dataset to test syntax errors * diff --git a/tests/Feature/PointersTest.php b/tests/Feature/PointersTest.php index 98917e5..f75683b 100644 --- a/tests/Feature/PointersTest.php +++ b/tests/Feature/PointersTest.php @@ -11,11 +11,11 @@ it('throws an exception when providing an invalid JSON pointer', function (strin ->toThrow(InvalidPointerException::class, "The string [$pointer] is not a valid JSON pointer"); })->with(Dataset::forInvalidPointers()); -it('supports single JSON pointers', function (string $json, string $pointer, array $parsed) { +it('loads JSON from a single JSON pointer', function (string $json, string $pointer, array $parsed) { expect(JsonParser::parse($json)->pointer($pointer))->toPointTo($parsed); })->with(Dataset::forSinglePointers()); -it('supports multiple JSON pointers', function (string $json, array $pointers, array $parsed) { +it('loads JSON from multiple JSON pointers', function (string $json, array $pointers, array $parsed) { expect(JsonParser::parse($json)->pointers($pointers))->toPointTo($parsed); })->with(Dataset::forMultiplePointers()); @@ -24,6 +24,18 @@ it('can intersect pointers with wildcards', function (string $json, array $point })->with(Dataset::forIntersectingPointersWithWildcards()); it('throws an exception when two pointers intersect', function (string $json, array $pointers, string $message) { - expect(fn () => JsonParser::parse($json)->pointers($pointers)->traverse()) + expect(fn () => JsonParser::parse($json)->pointers($pointers)) ->toThrow(IntersectingPointersException::class, $message); })->with(Dataset::forIntersectingPointers()); + +it('lazy loads JSON from a single lazy JSON pointer', function (string $json, string $pointer, array $sequence) { + expect(JsonParser::parse($json)->lazyPointer($pointer))->sequence(...$sequence); +})->with(Dataset::forSingleLazyPointers()); + +it('lazy loads JSON from multiple lazy JSON pointers', function (string $json, array $pointers, array $sequence) { + expect(JsonParser::parse($json)->lazyPointers($pointers))->sequence(...$sequence); +})->with(Dataset::forMultipleLazyPointers()); + +it('lazy loads JSON recursively', function (string $json, string $pointer, array $keys, array $expected) { + expect(JsonParser::parse($json)->lazyPointer($pointer))->toLazyLoadRecursively($keys, $expected); +})->with(Dataset::forRecursiveLazyLoading()); diff --git a/tests/Pest.php b/tests/Pest.php index 81730b8..a85408c 100644 --- a/tests/Pest.php +++ b/tests/Pest.php @@ -1,5 +1,7 @@ extend('toPointTo', function (array $expected) { return expect($actual)->toBe($expected); }); + +/** + * Expect that values defined by lazy JSON pointers are parsed correctly + * + * @param array $expected + * @return Expectation + */ +expect()->extend('toLazyLoadRecursively', function (array $keys, array $expected) { + foreach ($this->value as $key => $value) { + expect($value)->toBeInstanceOf(Parser::class); + + if (is_null($expectedKey = array_shift($keys))) { + expect($key)->toBeInt()->and($value)->toParseTo($expected[$key]); + } else { + expect($key)->toBe($expectedKey)->and($value)->toLazyLoadRecursively($keys, $expected); + } + } +}); From 9629c2ec8568270403f38883012332c107500970 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 21 Mar 2023 17:32:55 +1000 Subject: [PATCH 212/249] Replace call_user_func() --- src/Decoders/ConfigurableDecoder.php | 4 +--- src/JsonParser.php | 2 +- src/Pointers/Pointer.php | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/Decoders/ConfigurableDecoder.php b/src/Decoders/ConfigurableDecoder.php index d85b783..c99ea64 100644 --- a/src/Decoders/ConfigurableDecoder.php +++ b/src/Decoders/ConfigurableDecoder.php @@ -5,8 +5,6 @@ namespace Cerbero\JsonParser\Decoders; use Cerbero\JsonParser\Config; use Cerbero\JsonParser\Parser; -use function call_user_func; - /** * The configurable decoder. * @@ -37,7 +35,7 @@ final class ConfigurableDecoder $decoded = $this->config->decoder->decode($value); if (!$decoded->succeeded) { - call_user_func($this->config->onDecodingError, $decoded); + ($this->config->onDecodingError)($decoded); } return $decoded->value; diff --git a/src/JsonParser.php b/src/JsonParser.php index 6c57a9a..203a313 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -65,7 +65,7 @@ final class JsonParser implements IteratorAggregate yield from $this->parser; } catch (SyntaxException $e) { $e->setPosition($this->parser->position()); - call_user_func($this->config->onSyntaxError, $e); + ($this->config->onSyntaxError)($e); } } diff --git a/src/Pointers/Pointer.php b/src/Pointers/Pointer.php index f0681ed..82ddbc1 100644 --- a/src/Pointers/Pointer.php +++ b/src/Pointers/Pointer.php @@ -8,7 +8,6 @@ use Closure; use Stringable; use function count; -use function call_user_func; use function is_int; use function array_slice; @@ -120,7 +119,7 @@ final class Pointer implements Stringable return $value; } - return call_user_func($this->callback, $value, $key) ?? $value; + return ($this->callback)($value, $key) ?? $value; } /** From a6f037833cb5f420bea0d1d7c93627316480a6fb Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 21 Mar 2023 17:33:16 +1000 Subject: [PATCH 213/249] Add missing parameter docblock --- tests/Pest.php | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/Pest.php b/tests/Pest.php index a85408c..69390b9 100644 --- a/tests/Pest.php +++ b/tests/Pest.php @@ -60,6 +60,7 @@ expect()->extend('toPointTo', function (array $expected) { /** * Expect that values defined by lazy JSON pointers are parsed correctly * + * @param array $keys * @param array $expected * @return Expectation */ From 6e9c0996c4df7123b37f442694e5657da31b8e92 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 21 Mar 2023 17:33:52 +1000 Subject: [PATCH 214/249] Test mixed pointers --- tests/Dataset.php | 45 +++++++++++++++++++++++++++++++++- tests/Feature/PointersTest.php | 4 +++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/tests/Dataset.php b/tests/Dataset.php index 50f1ab0..c6b7173 100644 --- a/tests/Dataset.php +++ b/tests/Dataset.php @@ -8,7 +8,6 @@ use Cerbero\JsonParser\Sources\Psr7Request; use DirectoryIterator; use Generator; use Mockery; -use Pest\Expectation; /** * The dataset provider. @@ -278,6 +277,50 @@ final class Dataset } } + /** + * Retrieve the dataset to test mixed pointers + * + * @return Generator + */ + public static function forMixedPointers(): Generator + { + $json = fixture('json/complex_object.json'); + $pointersList = [ + [ + '/name' => fn (string $name) => "name_{$name}", + ], + [ + '/id' => fn (string $id) => "id_{$id}", + '/type' => fn (string $type) => "type_{$type}", + ], + ]; + $lazyPointers = [ + [ + '/batters/batter' => fn (Parser $batter) => $batter::class, + ], + [ + '/batters' => fn (Parser $batters) => $batters::class, + '/topping' => fn (Parser $topping) => $topping::class, + ], + ]; + $expected = [ + [ + 'name' => 'name_Cake', + 'batter' => Parser::class, + ], + [ + 'id' => 'id_0001', + 'type' => 'type_donut', + 'batters' => Parser::class, + 'topping' => Parser::class, + ], + ]; + + foreach ($pointersList as $index => $pointers) { + yield [$json, $pointers, $lazyPointers[$index], $expected[$index]]; + } + } + /** * Retrieve the dataset to test syntax errors * diff --git a/tests/Feature/PointersTest.php b/tests/Feature/PointersTest.php index f75683b..2a44cd3 100644 --- a/tests/Feature/PointersTest.php +++ b/tests/Feature/PointersTest.php @@ -39,3 +39,7 @@ it('lazy loads JSON from multiple lazy JSON pointers', function (string $json, a it('lazy loads JSON recursively', function (string $json, string $pointer, array $keys, array $expected) { expect(JsonParser::parse($json)->lazyPointer($pointer))->toLazyLoadRecursively($keys, $expected); })->with(Dataset::forRecursiveLazyLoading()); + +it('mixes pointers and lazy pointers', function (string $json, array $pointers, array $lazyPointers, array $expected) { + expect(JsonParser::parse($json)->pointers($pointers)->lazyPointers($lazyPointers))->toParseTo($expected); +})->with(Dataset::forMixedPointers()); From 5d23f83e692ac235f3a4cd57a36facbaf3ebe851 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 21 Mar 2023 17:33:59 +1000 Subject: [PATCH 215/249] Update readme --- README.md | 95 +++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 82 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 000ef2a..846587b 100644 --- a/README.md +++ b/README.md @@ -28,8 +28,9 @@ composer require cerbero/json-parser * [👣 Basics](#-basics) * [💧 Sources](#-sources) * [🎯 Pointers](#-pointers) +* [🐼 Lazy pointers](#-lazy-pointers) * [⚙️ Decoders](#%EF%B8%8F-decoders) -* [💢 Errors](#-errors) +* [💢 Errors handling](#-errors-handling) * [⏳ Progress](#-progress) * [🛠 Settings](#-settings) @@ -148,20 +149,20 @@ If you find yourself implementing the same custom source in different projects, A JSON pointer is a [standard](https://www.rfc-editor.org/rfc/rfc6901) used to point to nodes within a JSON. This package leverages JSON pointers to extract only some sub-trees from large JSONs. -Consider [this JSON](https://randomuser.me/api/1.4?seed=json-parser&results=5) for example. To extract only the first gender and avoid parsing the rest of the JSON, we can set the `/0/gender` pointer: +Consider [this JSON](https://randomuser.me/api/1.4?seed=json-parser&results=5) for example. To extract only the first gender and avoid parsing the rest of the JSON, we can set the `/results/0/gender` pointer: ```php -$json = JsonParser::parse($source)->pointer('/0/gender'); +$json = JsonParser::parse($source)->pointer('/results/0/gender'); foreach ($json as $key => $value) { // 1st and only iteration: $key === 'gender', $value === 'female' } ``` -JSON Parser takes advantage of the `-` wildcard to point to any array index, so we can extract all the genders with the `/-/gender` pointer: +JSON Parser takes advantage of the `-` wildcard to point to any array index, so we can extract all the genders with the `/results/-/gender` pointer: ```php -$json = JsonParser::parse($source)->pointer('/-/gender'); +$json = JsonParser::parse($source)->pointer('/results/-/gender'); foreach ($json as $key => $value) { // 1st iteration: $key === 'gender', $value === 'female' @@ -174,7 +175,7 @@ foreach ($json as $key => $value) { If we want to extract more sub-trees, we can set multiple pointers. Let's extract all genders and countries: ```php -$json = JsonParser::parse($source)->pointers(['/-/gender', '/-/location/country']); +$json = JsonParser::parse($source)->pointers(['/results/-/gender', '/results/-/location/country']); foreach ($json as $key => $value) { // 1st iteration: $key === 'gender', $value === 'female' @@ -191,8 +192,8 @@ We can also specify a callback to execute when JSON pointers are found. This is ```php $json = JsonParser::parse($source)->pointers([ - '/-/gender' => fn (string $gender, string $key) => new Gender($gender), - '/-/location/country' => fn (string $country, string $key) => new Country($country), + '/results/-/gender' => fn (string $gender, string $key) => new Gender($gender), + '/results/-/location/country' => fn (string $country, string $key) => new Country($country), ]); foreach ($json as $key => $value) { @@ -208,8 +209,8 @@ The same can also be achieved by chaining the method `pointer()` multiple times: ```php $json = JsonParser::parse($source) - ->pointer('/-/gender', fn (string $gender, string $key) => new Gender($gender)) - ->pointer('/-/location/country', fn (string $country, string $key) => new Country($country)); + ->pointer('/results/-/gender', fn (string $gender, string $key) => new Gender($gender)) + ->pointer('/results/-/location/country', fn (string $country, string $key) => new Country($country)); foreach ($json as $key => $value) { // 1st iteration: $key === 'gender', $value instanceof Gender @@ -233,8 +234,8 @@ Otherwise if some common logic for all pointers is needed but we prefer methods ```php JsonParser::parse($source) - ->pointer('/-/gender', fn (string $gender, string $key) => new Gender($gender)) - ->pointer('/-/location/country', fn (string $country, string $key) => new Country($country)) + ->pointer('/results/-/gender', fn (string $gender, string $key) => new Gender($gender)) + ->pointer('/results/-/location/country', fn (string $country, string $key) => new Country($country)) ->traverse(function (Gender|Country $value, string $key, JsonParser $parser) { // 1st iteration: $key === 'gender', $value instanceof Gender // 2nd iteration: $key === 'country', $value instanceof Country @@ -246,6 +247,74 @@ JsonParser::parse($source) > ⚠️ Please note the parameters order of the callbacks: the value is passed before the key. +### 🐼 Lazy pointers + +JSON Parser keeps in memory only one key and one value at a time. However, if the value is a large array or a large object, we may not want to keep it all in memory. + +The solution is to use lazy pointers, which recursively keep in memory only one key and one value at a time of any nested array or object: + +```php +$json = JsonParser::parse($source)->lazyPointer('/results/0/name'); + +foreach ($json as $key => $value) { + // 1st iteration: $key === 'name', $value instanceof Parser +} +``` + +Lazy pointers return a light-weight instance of `Cerbero\JsonParser\Parser` instead of the actual large value. To lazy load nested keys and values, we can then loop through the parser: + +```php +$json = JsonParser::parse($source)->lazyPointer('/results/0/name'); + +foreach ($json as $key => $value) { + // 1st iteration: $key === 'name', $value instanceof Parser + foreach ($value as $nestedKey => $nestedValue) { + // 1st iteration: $nestedKey === 'title', $nestedValue === 'Mrs' + // 2nd iteration: $nestedKey === 'first', $nestedValue === 'Sara' + // 3rd iteration: $nestedKey === 'last', $nestedValue === 'Meder' + } +} +``` + +As mentioned above, lazy pointers are recursive. This means that no nested objects or arrays will ever be kept in memory: + +```php +$json = JsonParser::parse($source)->lazyPointer('/results/0/location'); + +foreach ($json as $key => $value) { + // 1st iteration: $key === 'location', $value instanceof Parser + foreach ($value as $nestedKey => $nestedValue) { + // 1st iteration: $nestedKey === 'street', $nestedValue instanceof Parser + // 2nd iteration: $nestedKey === 'city', $nestedValue === 'Sontra' + // ... + // 6th iteration: $nestedKey === 'coordinates', $nestedValue instanceof Parser + // 7th iteration: $nestedKey === 'timezone', $nestedValue instanceof Parser + } +} +``` + +Lazy pointers also have all the other functionalities of normal pointers: they accept callbacks, they can be set one by one or all together and they can be mixed with normal pointers as well: + +```php +// set custom callback to run only when names are found +$json = JsonParser::parse($source)->lazyPointer('/results/-/name', fn (Parser $name) => $this->handleName($name)); + +// set multiple lazy pointers one by one +$json = JsonParser::parse($source) + ->lazyPointer('/results/-/name', fn (Parser $name) => $this->handleName($name)) + ->lazyPointer('/results/-/location', fn (Parser $location) => $this->handleLocation($location)); + +// set multiple lazy pointers all together +$json = JsonParser::parse($source)->lazyPointers([ + '/results/-/name' => fn (Parser $name) => $this->handleName($name)), + '/results/-/location' => fn (Parser $location) => $this->handleLocation($location)), +]); + +// mix pointers and lazy pointers +$json = JsonParser::parse($source) + ->pointer('/results/-/gender', fn (string $gender) => $this->handleGender($gender)) + ->lazyPointer('/results/-/name', fn (Parser $name) => $this->handleName($name)); +``` ### ⚙️ Decoders @@ -311,7 +380,7 @@ If you find yourself implementing the same custom decoder in different projects, -### 💢 Errors +### 💢 Errors handling Not all JSONs are valid, some may present syntax errors due to an incorrect structure (e.g. `[}`) or decoding errors when values can't be decoded properly (e.g. `[1a]`). JSON Parser allows us to intervene and define the logic to run when these issues occur: From 672f6953beb0f400547a192904a2281b9e4dc793 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 22 Mar 2023 15:28:57 +1000 Subject: [PATCH 216/249] Move classes around --- src/Decoders/ConfigurableDecoder.php | 4 ++-- src/JsonParser.php | 3 +++ src/Pointers/Pointer.php | 2 +- src/Pointers/Pointers.php | 2 +- src/Sources/Source.php | 2 +- src/Tokens/Colon.php | 2 +- src/Tokens/Comma.php | 2 +- src/Tokens/CompoundBegin.php | 2 +- src/Tokens/CompoundEnd.php | 2 +- src/Tokens/Constant.php | 2 +- src/{ => Tokens}/Lexer.php | 3 ++- src/{ => Tokens}/Parser.php | 5 ++++- src/Tokens/ScalarString.php | 2 +- src/Tokens/Token.php | 2 +- src/{ => ValueObjects}/Config.php | 2 +- src/{ => ValueObjects}/Progress.php | 2 +- src/{ => ValueObjects}/State.php | 3 ++- src/{ => ValueObjects}/Tree.php | 2 +- tests/Dataset.php | 1 + tests/Pest.php | 2 +- 20 files changed, 28 insertions(+), 19 deletions(-) rename src/{ => Tokens}/Lexer.php (96%) rename src/{ => Tokens}/Parser.php (95%) rename src/{ => ValueObjects}/Config.php (97%) rename src/{ => ValueObjects}/Progress.php (97%) rename src/{ => ValueObjects}/State.php (97%) rename src/{ => ValueObjects}/Tree.php (99%) diff --git a/src/Decoders/ConfigurableDecoder.php b/src/Decoders/ConfigurableDecoder.php index c99ea64..43a7e2b 100644 --- a/src/Decoders/ConfigurableDecoder.php +++ b/src/Decoders/ConfigurableDecoder.php @@ -2,8 +2,8 @@ namespace Cerbero\JsonParser\Decoders; -use Cerbero\JsonParser\Config; -use Cerbero\JsonParser\Parser; +use Cerbero\JsonParser\Tokens\Parser; +use Cerbero\JsonParser\ValueObjects\Config; /** * The configurable decoder. diff --git a/src/JsonParser.php b/src/JsonParser.php index 203a313..0236a6d 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -7,6 +7,9 @@ use Cerbero\JsonParser\Decoders\Decoder; use Cerbero\JsonParser\Exceptions\SyntaxException; use Cerbero\JsonParser\Pointers\Pointer; use Cerbero\JsonParser\Sources\AnySource; +use Cerbero\JsonParser\Tokens\Parser; +use Cerbero\JsonParser\ValueObjects\Config; +use Cerbero\JsonParser\ValueObjects\Progress; use Closure; use IteratorAggregate; use Traversable; diff --git a/src/Pointers/Pointer.php b/src/Pointers/Pointer.php index 82ddbc1..43bf9bc 100644 --- a/src/Pointers/Pointer.php +++ b/src/Pointers/Pointer.php @@ -3,7 +3,7 @@ namespace Cerbero\JsonParser\Pointers; use Cerbero\JsonParser\Exceptions\InvalidPointerException; -use Cerbero\JsonParser\Tree; +use Cerbero\JsonParser\ValueObjects\Tree; use Closure; use Stringable; diff --git a/src/Pointers/Pointers.php b/src/Pointers/Pointers.php index 390b483..7e2efcc 100644 --- a/src/Pointers/Pointers.php +++ b/src/Pointers/Pointers.php @@ -3,7 +3,7 @@ namespace Cerbero\JsonParser\Pointers; use Cerbero\JsonParser\Exceptions\IntersectingPointersException; -use Cerbero\JsonParser\Tree; +use Cerbero\JsonParser\ValueObjects\Tree; use function count; diff --git a/src/Sources/Source.php b/src/Sources/Source.php index 9dd8935..5924445 100644 --- a/src/Sources/Source.php +++ b/src/Sources/Source.php @@ -2,7 +2,7 @@ namespace Cerbero\JsonParser\Sources; -use Cerbero\JsonParser\Config; +use Cerbero\JsonParser\ValueObjects\Config; use IteratorAggregate; use Traversable; diff --git a/src/Tokens/Colon.php b/src/Tokens/Colon.php index a99f3c5..c9fe4d8 100644 --- a/src/Tokens/Colon.php +++ b/src/Tokens/Colon.php @@ -2,7 +2,7 @@ namespace Cerbero\JsonParser\Tokens; -use Cerbero\JsonParser\State; +use Cerbero\JsonParser\ValueObjects\State; /** * The colon token. diff --git a/src/Tokens/Comma.php b/src/Tokens/Comma.php index 6481f8d..12b092a 100644 --- a/src/Tokens/Comma.php +++ b/src/Tokens/Comma.php @@ -2,7 +2,7 @@ namespace Cerbero\JsonParser\Tokens; -use Cerbero\JsonParser\State; +use Cerbero\JsonParser\ValueObjects\State; /** * The comma token. diff --git a/src/Tokens/CompoundBegin.php b/src/Tokens/CompoundBegin.php index 5bb0579..35c75f1 100644 --- a/src/Tokens/CompoundBegin.php +++ b/src/Tokens/CompoundBegin.php @@ -2,7 +2,7 @@ namespace Cerbero\JsonParser\Tokens; -use Cerbero\JsonParser\State; +use Cerbero\JsonParser\ValueObjects\State; /** * The token that begins compound data (JSON arrays or objects). diff --git a/src/Tokens/CompoundEnd.php b/src/Tokens/CompoundEnd.php index e7b9fd6..5152eef 100644 --- a/src/Tokens/CompoundEnd.php +++ b/src/Tokens/CompoundEnd.php @@ -2,7 +2,7 @@ namespace Cerbero\JsonParser\Tokens; -use Cerbero\JsonParser\State; +use Cerbero\JsonParser\ValueObjects\State; /** * The token that ends compound data (JSON arrays or objects). diff --git a/src/Tokens/Constant.php b/src/Tokens/Constant.php index 9070c3e..97cc119 100644 --- a/src/Tokens/Constant.php +++ b/src/Tokens/Constant.php @@ -2,7 +2,7 @@ namespace Cerbero\JsonParser\Tokens; -use Cerbero\JsonParser\State; +use Cerbero\JsonParser\ValueObjects\State; /** * The constant token. diff --git a/src/Lexer.php b/src/Tokens/Lexer.php similarity index 96% rename from src/Lexer.php rename to src/Tokens/Lexer.php index 75ac2e5..e0677a7 100644 --- a/src/Lexer.php +++ b/src/Tokens/Lexer.php @@ -1,12 +1,13 @@ Date: Wed, 22 Mar 2023 23:35:41 +1000 Subject: [PATCH 217/249] Eager load JSON into an array --- src/JsonParser.php | 14 +++++++++++--- src/Tokens/Parser.php | 16 ++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/JsonParser.php b/src/JsonParser.php index 0236a6d..ffa81e3 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -138,13 +138,21 @@ final class JsonParser implements IteratorAggregate */ public function traverse(Closure $callback = null): void { - $callback ??= fn () => true; - foreach ($this as $key => $value) { - $callback($value, $key, $this); + $callback && $callback($value, $key, $this); } } + /** + * Eager load the JSON into an array + * + * @return array + */ + public function toArray(): array + { + return $this->parser->toArray(); + } + /** * Set the JSON decoder * diff --git a/src/Tokens/Parser.php b/src/Tokens/Parser.php index f6d9e82..0f3e862 100644 --- a/src/Tokens/Parser.php +++ b/src/Tokens/Parser.php @@ -127,6 +127,22 @@ final class Parser implements IteratorAggregate } while ($depth > 0); } + /** + * Eager load the current compound into an array + * + * @return array + */ + public function toArray(): array + { + $array = []; + + foreach ($this as $key => $value) { + $array[$key] = $value instanceof self ? $value->toArray() : $value; + } + + return $array; + } + /** * Fast-forward the parser * From e93aacac7837509c5675be27887ac2275fa3c21b Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 22 Mar 2023 23:36:00 +1000 Subject: [PATCH 218/249] Test eager loading --- tests/Dataset.php | 46 ++++- tests/Feature/ParsingTest.php | 4 + tests/Feature/PointersTest.php | 16 ++ .../pointers/multiple_pointers_to_array.php | 94 +++++++++ .../pointers/single_pointer_to_array.php | 178 ++++++++++++++++++ 5 files changed, 336 insertions(+), 2 deletions(-) create mode 100644 tests/fixtures/pointers/multiple_pointers_to_array.php create mode 100644 tests/fixtures/pointers/single_pointer_to_array.php diff --git a/tests/Dataset.php b/tests/Dataset.php index 511abee..351c89a 100644 --- a/tests/Dataset.php +++ b/tests/Dataset.php @@ -64,7 +64,18 @@ final class Dataset */ public static function forSinglePointers(): Generator { - $singlePointers = require fixture('pointers/single_pointer.php'); + yield from self::forSinglePointersWithFixture('pointers/single_pointer.php'); + } + + /** + * Retrieve the dataset to test single pointers with the given fixture + * + * @param string $path + * @return Generator + */ + private static function forSinglePointersWithFixture(string $path): Generator + { + $singlePointers = require fixture($path); foreach ($singlePointers as $fixture => $pointers) { $json = file_get_contents(fixture("json/{$fixture}.json")); @@ -75,6 +86,16 @@ final class Dataset } } + /** + * Retrieve the dataset to test single pointers eager loading + * + * @return Generator + */ + public static function forSinglePointersToArray(): Generator + { + yield from self::forSinglePointersWithFixture('pointers/single_pointer_to_array.php'); + } + /** * Retrieve the dataset to test multiple pointers * @@ -82,7 +103,18 @@ final class Dataset */ public static function forMultiplePointers(): Generator { - $multiplePointers = require fixture('pointers/multiple_pointers.php'); + yield from self::forMultiplePointersWithFixture('pointers/multiple_pointers.php'); + } + + /** + * Retrieve the dataset to test multiple pointers with the given fixture + * + * @param string $path + * @return Generator + */ + private static function forMultiplePointersWithFixture(string $path): Generator + { + $multiplePointers = require fixture($path); foreach ($multiplePointers as $fixture => $valueByPointers) { $json = file_get_contents(fixture("json/{$fixture}.json")); @@ -93,6 +125,16 @@ final class Dataset } } + /** + * Retrieve the dataset to test multiple pointers eager loading + * + * @return Generator + */ + public static function forMultiplePointersToArray(): Generator + { + yield from self::forMultiplePointersWithFixture('pointers/multiple_pointers_to_array.php'); + } + /** * Retrieve the dataset to test intersecting pointers with wildcards * diff --git a/tests/Feature/ParsingTest.php b/tests/Feature/ParsingTest.php index 7e2e07f..b93b1a7 100644 --- a/tests/Feature/ParsingTest.php +++ b/tests/Feature/ParsingTest.php @@ -17,3 +17,7 @@ it('parses JSON when instantiated statically', function (string $json, array $pa it('parses JSON when calling the helper', function (string $json, array $parsed) { expect(parseJson($json))->toParseTo($parsed); })->with(Dataset::forParsing()); + +it('eager loads JSON into an array', function (string $json, array $parsed) { + expect(JsonParser::parse($json)->toArray())->toBe($parsed); +})->with(Dataset::forParsing()); diff --git a/tests/Feature/PointersTest.php b/tests/Feature/PointersTest.php index 2a44cd3..d7d7298 100644 --- a/tests/Feature/PointersTest.php +++ b/tests/Feature/PointersTest.php @@ -15,10 +15,26 @@ it('loads JSON from a single JSON pointer', function (string $json, string $poin expect(JsonParser::parse($json)->pointer($pointer))->toPointTo($parsed); })->with(Dataset::forSinglePointers()); +it('eager loads pointers into an array', function (string $json, string $pointer, array $expected) { + expect(JsonParser::parse($json)->pointer($pointer)->toArray())->toBe($expected); +})->with(Dataset::forSinglePointersToArray()); + +it('eager loads lazy pointers into an array', function (string $json, string $pointer, array $expected) { + expect(JsonParser::parse($json)->lazyPointer($pointer)->toArray())->toBe($expected); +})->with(Dataset::forSinglePointersToArray()); + it('loads JSON from multiple JSON pointers', function (string $json, array $pointers, array $parsed) { expect(JsonParser::parse($json)->pointers($pointers))->toPointTo($parsed); })->with(Dataset::forMultiplePointers()); +it('eager loads multiple pointers into an array', function (string $json, array $pointers, array $expected) { + expect(JsonParser::parse($json)->pointers($pointers)->toArray())->toBe($expected); +})->with(Dataset::forMultiplePointersToArray()); + +it('eager loads multiple lazy pointers into an array', function (string $json, array $pointers, array $expected) { + expect(JsonParser::parse($json)->lazyPointers($pointers)->toArray())->toBe($expected); +})->with(Dataset::forMultiplePointersToArray()); + it('can intersect pointers with wildcards', function (string $json, array $pointers, array $parsed) { expect(JsonParser::parse($json)->pointers($pointers))->toPointTo($parsed); })->with(Dataset::forIntersectingPointersWithWildcards()); diff --git a/tests/fixtures/pointers/multiple_pointers_to_array.php b/tests/fixtures/pointers/multiple_pointers_to_array.php new file mode 100644 index 0000000..a57adf3 --- /dev/null +++ b/tests/fixtures/pointers/multiple_pointers_to_array.php @@ -0,0 +1,94 @@ + [ + '/-1,/-2' => [], + '/-/id,/-/batters/batter/-/type' => [ + 'id' => '0003', + 'type' => 'Chocolate', + ], + '/-/name,/-/topping/-/type,/-/id' => [ + 'id' => '0003', + 'name' => 'Old Fashioned', + 'type' => 'Maple', + ], + '/-/batters/batter/-,/-/name' => [ + 'name' => 'Old Fashioned', + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + [ + "id" => "1003", + "type" => "Blueberry", + ], + [ + "id" => "1004", + "type" => "Devil's Food", + ], + ], + ], + 'complex_object' => [ + '/-1,/-2' => [], + '/id,/batters/batter/-/type' => [ + 'id' => '0001', + 'type' => "Devil's Food", + ], + '/name,/topping/-/type,/id' => [ + 'id' => '0001', + 'name' => 'Cake', + 'type' => 'Maple', + ], + '/batters/batter/-,/type' => [ + 'type' => 'donut', + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + [ + "id" => "1003", + "type" => "Blueberry", + ], + [ + "id" => "1004", + "type" => "Devil's Food", + ], + ], + ], + 'empty_array' => [ + '/-1,/-2' => [], + '/foo,/bar' => [], + ], + 'empty_object' => [ + '/-1,/-2' => [], + '/foo,/bar' => [], + ], + 'simple_array' => [ + '/-1,/-2' => [], + '/0,/1' => [0 => 1, 1 => ''], + '/1,/0' => [0 => 1, 1 => ''], + '/0,/2' => [0 => 1, 2 => 'foo'], + '/2,/3' => [2 => 'foo', 3 => '"bar"'], + '/3,/4,/5' => [3 => '"bar"', 4 => 'hej då', 5 => 3.14], + '/4,/5,/3' => [3 => '"bar"', 4 => 'hej då', 5 => 3.14], + '/6,/7,/8,/9' => [6 => false, 7 => null, 8 => [], 9 => []], + '/9,/8,/7,/6' => [6 => false, 7 => null, 8 => [], 9 => []], + ], + 'simple_object' => [ + '/-1,/-2' => [], + '/int,/empty_string' => ['int' => 1, 'empty_string' => ''], + '/empty_string,/int' => ['int' => 1, 'empty_string' => ''], + '/string,/escaped_string,/\"escaped_key\"' => ['string' => 'foo', 'escaped_string' => '"bar"', '"escaped_key"' => 'baz'], + '/unicode,/bool,/empty_array' => ['unicode' => "hej då", 'bool' => false, 'empty_array' => []], + '/,/a~1b,/c%d,/e^f,/g|h,/i\\\\j' => ['' => 0, 'a/b' => 1, 'c%d' => 2, 'e^f' => 3, 'g|h' => 4, 'i\\j' => 5], + '/k\"l,/ ,/m~0n' => ['k"l' => 6, ' ' => 7, 'm~n' => 8], + ], +]; diff --git a/tests/fixtures/pointers/single_pointer_to_array.php b/tests/fixtures/pointers/single_pointer_to_array.php new file mode 100644 index 0000000..fd116f3 --- /dev/null +++ b/tests/fixtures/pointers/single_pointer_to_array.php @@ -0,0 +1,178 @@ + [ + '' => $complexArray = require __DIR__ . '/../parsing/complex_array.php', + '/-' => $complexArray, + '/-/id' => ['id' => '0003'], + '/-/batters' => [ + 'batters' => [ + 'batter' => [ + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + ], + ], + ], + '/-/batters/batter' => [ + 'batter' => [ + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + ], + ], + '/-/batters/batter/-' => [ + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + [ + "id" => "1003", + "type" => "Blueberry", + ], + [ + "id" => "1004", + "type" => "Devil's Food", + ], + ], + '/-/batters/batter/-/id' => ['id' => "1002"], + ], + 'complex_object' => [ + '' => require __DIR__ . '/../parsing/complex_object.php', + '/-' => [], + '/id' => ['id' => '0001'], + '/batters' => [ + 'batters' => [ + 'batter' => [ + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + [ + "id" => "1003", + "type" => "Blueberry", + ], + [ + "id" => "1004", + "type" => "Devil's Food", + ], + ], + ], + ], + '/batters/batter' => [ + 'batter' => [ + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + [ + "id" => "1003", + "type" => "Blueberry", + ], + [ + "id" => "1004", + "type" => "Devil's Food", + ], + ], + ], + '/batters/batter/-' => [ + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + [ + "id" => "1003", + "type" => "Blueberry", + ], + [ + "id" => "1004", + "type" => "Devil's Food", + ], + ], + '/batters/batter/-/id' => ['id' => "1004"], + ], + 'empty_array' => [ + '' => [], + '/-' => [], + '/-1' => [], + '/0' => [], + '/foo' => [], + ], + 'empty_object' => [ + '' => [], + '/-' => [], + '/-1' => [], + '/0' => [], + '/foo' => [], + ], + 'simple_array' => [ + '' => $simpleArray = require __DIR__ . '/../parsing/simple_array.php', + '/-' => $simpleArray, + '/-1' => [], + '/0' => [0 => 1], + '/1' => [1 => ''], + '/2' => [2 => 'foo'], + '/3' => [3 => '"bar"'], + '/4' => [4 => 'hej då'], + '/5' => [5 => 3.14], + '/6' => [6 => false], + '/7' => [7 => null], + '/8' => [8 => []], + '/9' => [9 => []], + '/10' => [], + '/foo' => [], + ], + 'simple_object' => [ + '' => require __DIR__ . '/../parsing/simple_object.php', + '/-' => [], + '/-1' => [], + '/int' => ['int' => 1], + '/empty_string' => ['empty_string' => ''], + '/string' => ['string' => 'foo'], + '/escaped_string' => ['escaped_string' => '"bar"'], + '/\"escaped_key\"' => ['"escaped_key"' => 'baz'], + '/unicode' => ['unicode' => "hej då"], + '/float' => ['float' => 3.14], + '/bool' => ['bool' => false], + '/null' => ['null' => null], + '/empty_array' => ['empty_array' => []], + '/empty_object' => ['empty_object' => []], + '/10' => [], + '/foo' => [], + '/' => ['' => 0], + '/a~1b' => ['a/b' => 1], + '/c%d' => ['c%d' => 2], + '/e^f' => ['e^f' => 3], + '/g|h' => ['g|h' => 4], + '/i\\\\j' => ['i\\j' => 5], + '/k\"l' => ['k"l' => 6], + '/ ' => [' ' => 7], + '/m~0n' => ['m~n' => 8], + ], +]; From 90b2f59f5afa951832690465685367915ff26c28 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 22 Mar 2023 23:36:09 +1000 Subject: [PATCH 219/249] Update readme --- README.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 846587b..1d4cf26 100644 --- a/README.md +++ b/README.md @@ -247,6 +247,13 @@ JsonParser::parse($source) > ⚠️ Please note the parameters order of the callbacks: the value is passed before the key. +Sometimes the sub-trees extracted by pointers are small enough to be kept all in memory. We can chain `toArray()` to eager load the extracted sub-trees into an array: + +```php +// ['gender' => 'female', 'country' => 'Germany'] +$array = JsonParser::parse($source)->pointers(['/results/0/gender', '/results/0/location/country'])->toArray(); +``` + ### 🐼 Lazy pointers JSON Parser keeps in memory only one key and one value at a time. However, if the value is a large array or a large object, we may not want to keep it all in memory. @@ -293,7 +300,7 @@ foreach ($json as $key => $value) { } ``` -Lazy pointers also have all the other functionalities of normal pointers: they accept callbacks, they can be set one by one or all together and they can be mixed with normal pointers as well: +Lazy pointers also have all the other functionalities of normal pointers: they accept callbacks, can be set one by one or all together, can be eager loaded into an array and can be mixed with normal pointers as well: ```php // set custom callback to run only when names are found @@ -310,6 +317,10 @@ $json = JsonParser::parse($source)->lazyPointers([ '/results/-/location' => fn (Parser $location) => $this->handleLocation($location)), ]); +// eager load lazy pointers into an array +// ['name' => ['title' => 'Mrs', 'first' => 'Sara', 'last' => 'Meder'], 'street' => ['number' => 46, 'name' => 'Römerstraße']] +$array = JsonParser::parse($source)->lazyPointers(['/results/0/name', '/results/0/location/street'])->toArray(); + // mix pointers and lazy pointers $json = JsonParser::parse($source) ->pointer('/results/-/gender', fn (string $gender) => $this->handleGender($gender)) From ac41f05904391de909707fc75d51eee4eba5fa12 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 22 Mar 2023 23:40:39 +1000 Subject: [PATCH 220/249] Specify types for iterable --- src/JsonParser.php | 2 +- src/Tokens/Parser.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/JsonParser.php b/src/JsonParser.php index ffa81e3..1f0858a 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -146,7 +146,7 @@ final class JsonParser implements IteratorAggregate /** * Eager load the JSON into an array * - * @return array + * @return array */ public function toArray(): array { diff --git a/src/Tokens/Parser.php b/src/Tokens/Parser.php index 0f3e862..cd45b56 100644 --- a/src/Tokens/Parser.php +++ b/src/Tokens/Parser.php @@ -130,7 +130,7 @@ final class Parser implements IteratorAggregate /** * Eager load the current compound into an array * - * @return array + * @return array */ public function toArray(): array { From 29e8940deaf65b2192e94bf535186ed3fafcf987 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 25 Mar 2023 16:54:12 +1000 Subject: [PATCH 221/249] Update dependencies and simplify code --- src/JsonParser.php | 15 +++++++++++--- src/Tokens/Parser.php | 48 ++----------------------------------------- 2 files changed, 14 insertions(+), 49 deletions(-) diff --git a/src/JsonParser.php b/src/JsonParser.php index 1f0858a..13f0e68 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -7,6 +7,7 @@ use Cerbero\JsonParser\Decoders\Decoder; use Cerbero\JsonParser\Exceptions\SyntaxException; use Cerbero\JsonParser\Pointers\Pointer; use Cerbero\JsonParser\Sources\AnySource; +use Cerbero\JsonParser\Tokens\Lexer; use Cerbero\JsonParser\Tokens\Parser; use Cerbero\JsonParser\ValueObjects\Config; use Cerbero\JsonParser\ValueObjects\Progress; @@ -28,6 +29,13 @@ final class JsonParser implements IteratorAggregate */ private Config $config; + /** + * The lexer. + * + * @var Lexer + */ + private Lexer $lexer; + /** * The parser. * @@ -43,7 +51,8 @@ final class JsonParser implements IteratorAggregate public function __construct(mixed $source) { $this->config = new Config(); - $this->parser = Parser::for(new AnySource($source, $this->config)); + $this->lexer = new Lexer(new AnySource($source, $this->config)); + $this->parser = new Parser($this->lexer->getIterator(), $this->config); } /** @@ -67,7 +76,7 @@ final class JsonParser implements IteratorAggregate try { yield from $this->parser; } catch (SyntaxException $e) { - $e->setPosition($this->parser->position()); + $e->setPosition($this->lexer->position()); ($this->config->onSyntaxError)($e); } } @@ -173,7 +182,7 @@ final class JsonParser implements IteratorAggregate */ public function progress(): Progress { - return $this->parser->progress(); + return $this->lexer->progress(); } /** diff --git a/src/Tokens/Parser.php b/src/Tokens/Parser.php index cd45b56..a180d69 100644 --- a/src/Tokens/Parser.php +++ b/src/Tokens/Parser.php @@ -4,12 +4,10 @@ namespace Cerbero\JsonParser\Tokens; use Cerbero\JsonParser\Decoders\ConfigurableDecoder; use Cerbero\JsonParser\Exceptions\SyntaxException; -use Cerbero\JsonParser\Sources\Source; use Cerbero\JsonParser\Tokens\CompoundBegin; use Cerbero\JsonParser\Tokens\CompoundEnd; use Cerbero\JsonParser\Tokens\Token; use Cerbero\JsonParser\ValueObjects\Config; -use Cerbero\JsonParser\ValueObjects\Progress; use Cerbero\JsonParser\ValueObjects\State; use Generator; use IteratorAggregate; @@ -22,13 +20,6 @@ use Traversable; */ final class Parser implements IteratorAggregate { - /** - * The tokens to parse. - * - * @var Generator - */ - private Generator $tokens; - /** * The decoder handling potential errors. * @@ -46,27 +37,14 @@ final class Parser implements IteratorAggregate /** * Instantiate the class. * - * @param Lexer|Generator $lexer + * @param Generator $tokens * @param Config $config */ - public function __construct(private Lexer|Generator $lexer, private Config $config) + public function __construct(private Generator $tokens, private Config $config) { - /** @phpstan-ignore-next-line */ - $this->tokens = $lexer instanceof Lexer ? $lexer->getIterator() : $lexer; $this->decoder = new ConfigurableDecoder($config); } - /** - * Instantiate the class statically - * - * @param Source $source - * @return self - */ - public static function for(Source $source): self - { - return new self(new Lexer($source), $source->config()); - } - /** * Retrieve the JSON fragments * @@ -160,26 +138,4 @@ final class Parser implements IteratorAggregate $value instanceof self && $value->fastForward(); } } - - /** - * Retrieve the parsing progress - * - * @return Progress - */ - public function progress(): Progress - { - /** @phpstan-ignore-next-line */ - return $this->lexer->progress(); - } - - /** - * Retrieve the parsing position - * - * @return int - */ - public function position(): int - { - /** @phpstan-ignore-next-line */ - return $this->lexer->position(); - } } From 74e9a46e426bedaae3f60cc02da96f5a365f585a Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 25 Mar 2023 16:55:09 +1000 Subject: [PATCH 222/249] Simplify conditions --- src/Tokens/Lexer.php | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/Tokens/Lexer.php b/src/Tokens/Lexer.php index e0677a7..267b2e4 100644 --- a/src/Tokens/Lexer.php +++ b/src/Tokens/Lexer.php @@ -47,7 +47,7 @@ final class Lexer implements IteratorAggregate /** * Retrieve the JSON fragments * - * @return Traversable + * @return \Generator */ public function getIterator(): Traversable { @@ -56,10 +56,8 @@ final class Lexer implements IteratorAggregate foreach ($this->source as $chunk) { for ($i = 0, $size = strlen($chunk); $i < $size; $i++, $this->position++) { - $character = $chunk[$i]; - $inString = ($character == '"' && $inString && $isEscaping) - || ($character != '"' && $inString) - || ($character == '"' && !$inString); + $isQuote = '"' == $character = $chunk[$i]; + $inString = $isQuote != $inString || ($isQuote && $inString && $isEscaping); $isEscaping = $character == '\\' && !$isEscaping; $shouldBuffer = $inString || !isset(Tokens::BOUNDARIES[$character]); From 627458cb692db602ca881a5526ca3e98c4e7676b Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 25 Mar 2023 17:00:37 +1000 Subject: [PATCH 223/249] Simplify conditions --- src/Tokens/Lexer.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Tokens/Lexer.php b/src/Tokens/Lexer.php index 267b2e4..31ca795 100644 --- a/src/Tokens/Lexer.php +++ b/src/Tokens/Lexer.php @@ -57,7 +57,7 @@ final class Lexer implements IteratorAggregate foreach ($this->source as $chunk) { for ($i = 0, $size = strlen($chunk); $i < $size; $i++, $this->position++) { $isQuote = '"' == $character = $chunk[$i]; - $inString = $isQuote != $inString || ($isQuote && $inString && $isEscaping); + $inString = $isQuote != $inString || $isEscaping; $isEscaping = $character == '\\' && !$isEscaping; $shouldBuffer = $inString || !isset(Tokens::BOUNDARIES[$character]); From 4f172a0fc6634de46852fa4527329a6180bcd603 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sat, 25 Mar 2023 17:20:09 +1000 Subject: [PATCH 224/249] Simplify conditions --- src/Tokens/Lexer.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Tokens/Lexer.php b/src/Tokens/Lexer.php index 31ca795..a5d6c7b 100644 --- a/src/Tokens/Lexer.php +++ b/src/Tokens/Lexer.php @@ -56,8 +56,8 @@ final class Lexer implements IteratorAggregate foreach ($this->source as $chunk) { for ($i = 0, $size = strlen($chunk); $i < $size; $i++, $this->position++) { - $isQuote = '"' == $character = $chunk[$i]; - $inString = $isQuote != $inString || $isEscaping; + $character = $chunk[$i]; + $inString = ($character == '"') != $inString || $isEscaping; $isEscaping = $character == '\\' && !$isEscaping; $shouldBuffer = $inString || !isset(Tokens::BOUNDARIES[$character]); From 7d841f4d779b764378584295068fc20859613425 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 27 Mar 2023 23:03:21 +1000 Subject: [PATCH 225/249] Update pointer key by reference --- src/Pointers/Pointer.php | 2 +- src/ValueObjects/State.php | 18 +++++++++++------- tests/Dataset.php | 17 +++++++++++++++++ tests/Feature/PointersTest.php | 4 ++++ 4 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/Pointers/Pointer.php b/src/Pointers/Pointer.php index 43bf9bc..b553cb3 100644 --- a/src/Pointers/Pointer.php +++ b/src/Pointers/Pointer.php @@ -113,7 +113,7 @@ final class Pointer implements Stringable * @param mixed $key * @return mixed */ - public function call(mixed $value, mixed $key): mixed + public function call(mixed $value, mixed &$key): mixed { if ($this->callback === null) { return $value; diff --git a/src/ValueObjects/State.php b/src/ValueObjects/State.php index e88da0a..15fc273 100644 --- a/src/ValueObjects/State.php +++ b/src/ValueObjects/State.php @@ -51,7 +51,7 @@ final class State */ public function __construct(private Pointers $pointers, private Closure $lazyLoad) { - $this->tree = new Tree($this->pointers); + $this->tree = new Tree($pointers); } /** @@ -91,7 +91,7 @@ final class State * @param mixed $key * @return mixed */ - public function callPointer(mixed $value, mixed $key): mixed + public function callPointer(mixed $value, mixed &$key): mixed { return $this->pointers->matching()->call($value, $key); } @@ -107,11 +107,15 @@ final class State $this->tree->traverseToken($token, $this->expectsKey); if ($this->tree->isMatched() && ((!$this->expectsKey && $token->isValue()) || $this->tree->isDeep())) { - $shouldLazyLoad = $token instanceof CompoundBegin && $this->pointers->matching()->isLazy(); - /** @phpstan-ignore-next-line */ - $this->buffer = $shouldLazyLoad ? ($this->lazyLoad)() : $this->buffer . $token; - /** @var CompoundBegin $token */ - $shouldLazyLoad && $token->shouldLazyLoad = true; + $this->pointers->markAsFound(); + + if ($token instanceof CompoundBegin && $this->pointers->matching()->isLazy()) { + $this->buffer = ($this->lazyLoad)(); + $token->shouldLazyLoad = true; + } else { + /** @phpstan-ignore-next-line */ + $this->buffer .= $token; + } } $token->mutateState($this); diff --git a/tests/Dataset.php b/tests/Dataset.php index 351c89a..18b0545 100644 --- a/tests/Dataset.php +++ b/tests/Dataset.php @@ -96,6 +96,23 @@ final class Dataset yield from self::forSinglePointersWithFixture('pointers/single_pointer_to_array.php'); } + /** + * Retrieve the dataset to test the key update + * + * @return Generator + */ + public static function forKeyUpdate(): Generator + { + $json = fixture('json/complex_object.json'); + $pointers = [ + '/type' => function ($value, &$key) { + $key = 'foo'; + }, + ]; + + yield [$json, $pointers, ['foo' => 'donut']]; + } + /** * Retrieve the dataset to test multiple pointers * diff --git a/tests/Feature/PointersTest.php b/tests/Feature/PointersTest.php index d7d7298..1f10475 100644 --- a/tests/Feature/PointersTest.php +++ b/tests/Feature/PointersTest.php @@ -23,6 +23,10 @@ it('eager loads lazy pointers into an array', function (string $json, string $po expect(JsonParser::parse($json)->lazyPointer($pointer)->toArray())->toBe($expected); })->with(Dataset::forSinglePointersToArray()); +it('can modify key and value of a pointer', function (string $json, array $pointers, array $expected) { + expect(JsonParser::parse($json)->pointers($pointers)->toArray())->toBe($expected); +})->with(Dataset::forKeyUpdate()); + it('loads JSON from multiple JSON pointers', function (string $json, array $pointers, array $parsed) { expect(JsonParser::parse($json)->pointers($pointers))->toPointTo($parsed); })->with(Dataset::forMultiplePointers()); From f86a27197da980327dd9a7ca09d23f459234c6ba Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 27 Mar 2023 23:04:14 +1000 Subject: [PATCH 226/249] Optimize conditions --- src/ValueObjects/Tree.php | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/ValueObjects/Tree.php b/src/ValueObjects/Tree.php index 134655f..c741969 100644 --- a/src/ValueObjects/Tree.php +++ b/src/ValueObjects/Tree.php @@ -121,7 +121,7 @@ final class Tree { $pointer = $this->pointers->matching(); - return $pointer == '' ? $this->depth > $pointer->depth() : $this->depth >= $pointer->depth(); + return $pointer == '' ? $this->depth > 0 : $this->depth >= $pointer->depth(); } /** @@ -151,11 +151,7 @@ final class Tree */ public function isMatched(): bool { - if ($isMatched = $this->depth >= 0 && $this->pointers->matching()->matchesTree($this)) { - $this->pointers->markAsFound(); - } - - return $isMatched; + return $this->depth >= 0 && $this->pointers->matching()->matchesTree($this); } /** From 7743a7efd03735d0404760103d866128d6cb424e Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Mon, 27 Mar 2023 23:16:37 +1000 Subject: [PATCH 227/249] Update readme --- README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/README.md b/README.md index 1d4cf26..5ccc26a 100644 --- a/README.md +++ b/README.md @@ -219,6 +219,20 @@ foreach ($json as $key => $value) { } ``` +Pointer callbacks can also be used to customize a key. We can achieve that by updating the key **reference**: + +```php +$json = JsonParser::parse($source)->pointer('/results/-/name/first', function (string $name, string &$key) { + $key = 'first_name'; +}); + +foreach ($json as $key => $value) { + // 1st iteration: $key === 'first_name', $value === 'Sara' + // 2nd iteration: $key === 'first_name', $value === 'Andrea' + // and so on for all the objects in the array... +} +``` + If the callbacks are enough to handle the pointers and we don't need to run any common logic for all pointers, we can avoid to manually call `foreach()` by chaining the method `traverse()`: ```php From 53f28fc6eac3a431c15a8ee3400f1263eb1d1959 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 4 Jun 2023 17:16:11 +0200 Subject: [PATCH 228/249] Update readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5ccc26a..e4d8d47 100644 --- a/README.md +++ b/README.md @@ -282,7 +282,7 @@ foreach ($json as $key => $value) { } ``` -Lazy pointers return a light-weight instance of `Cerbero\JsonParser\Parser` instead of the actual large value. To lazy load nested keys and values, we can then loop through the parser: +Lazy pointers return a light-weight instance of `Cerbero\JsonParser\Tokens\Parser` instead of the actual large value. To lazy load nested keys and values, we can then loop through the parser: ```php $json = JsonParser::parse($source)->lazyPointer('/results/0/name'); @@ -468,7 +468,7 @@ When processing large JSONs, we may need to know the parsing progress. JSON Pars ```php $json = new JsonParser($source); -$json->progress(); // +$json->progress(); // $json->progress()->current(); // the already parsed bytes e.g. 86759341 $json->progress()->total(); // the total bytes to parse e.g. 182332642 $json->progress()->fraction(); // the completed fraction e.g. 0.47583 From edf39f9233f63c5227f99f36bb8d5eebac5c2e03 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 4 Jun 2023 17:17:02 +0200 Subject: [PATCH 229/249] Update comments --- src/JsonParser.php | 4 ++-- src/Pointers/Pointer.php | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/JsonParser.php b/src/JsonParser.php index 13f0e68..37bdc2e 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -56,7 +56,7 @@ final class JsonParser implements IteratorAggregate } /** - * Statically instantiate the class + * Instantiate the class statically * * @param mixed $source * @return self @@ -140,7 +140,7 @@ final class JsonParser implements IteratorAggregate } /** - * Traverse the lazily iterable JSON + * Traverse the JSON one key and value at a time * * @param Closure|null $callback * @return void diff --git a/src/Pointers/Pointer.php b/src/Pointers/Pointer.php index b553cb3..5fc24ef 100644 --- a/src/Pointers/Pointer.php +++ b/src/Pointers/Pointer.php @@ -158,7 +158,6 @@ final class Pointer implements Stringable */ public function includesTree(Tree $tree): bool { - // if ($this->pointer == '' && !$this->isLazy) { if ($this->pointer == '') { return true; } From dc21c4facbb5698f724cb1bdb7110487b56afda2 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 4 Jun 2023 17:18:43 +0200 Subject: [PATCH 230/249] Reduce instructions --- src/Pointers/Pointers.php | 12 ++++++++---- src/Tokens/Lexer.php | 13 +++++-------- src/ValueObjects/State.php | 4 ++-- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/Pointers/Pointers.php b/src/Pointers/Pointers.php index 7e2efcc..1f62ff3 100644 --- a/src/Pointers/Pointers.php +++ b/src/Pointers/Pointers.php @@ -76,12 +76,14 @@ final class Pointers $originalTree = $tree->original(); foreach ($this->pointers as $pointer) { - $referenceTokens = $pointer->referenceTokens(); + if ($pointer->referenceTokens() == $originalTree) { + return $this->matching = $pointer; + } foreach ($originalTree as $depth => $key) { if (!$pointer->depthMatchesKey($depth, $key)) { continue 2; - } elseif (!isset($pointers[$depth]) || $referenceTokens == $originalTree) { + } elseif (!isset($pointers[$depth])) { $pointers[$depth] = $pointer; } } @@ -93,13 +95,15 @@ final class Pointers /** * Mark the given pointer as found * - * @return void + * @return Pointer */ - public function markAsFound(): void + public function markAsFound(): Pointer { if (!$this->matching->wasFound) { $this->found[(string) $this->matching] = $this->matching->wasFound = true; } + + return $this->matching; } /** diff --git a/src/Tokens/Lexer.php b/src/Tokens/Lexer.php index a5d6c7b..d6b8869 100644 --- a/src/Tokens/Lexer.php +++ b/src/Tokens/Lexer.php @@ -53,30 +53,27 @@ final class Lexer implements IteratorAggregate { $buffer = ''; $inString = $isEscaping = false; + $tokenizer = Tokenizer::instance(); foreach ($this->source as $chunk) { for ($i = 0, $size = strlen($chunk); $i < $size; $i++, $this->position++) { $character = $chunk[$i]; $inString = ($character == '"') != $inString || $isEscaping; $isEscaping = $character == '\\' && !$isEscaping; - $shouldBuffer = $inString || !isset(Tokens::BOUNDARIES[$character]); - if ($shouldBuffer && $buffer == '' && !isset(Tokens::TYPES[$character])) { - throw new SyntaxException($character); - } - - if ($shouldBuffer) { + if ($inString || !isset(Tokens::BOUNDARIES[$character])) { + $buffer == '' && !isset(Tokens::TYPES[$character]) && throw new SyntaxException($character); $buffer .= $character; continue; } if ($buffer != '') { - yield Tokenizer::instance()->toToken($buffer); + yield $tokenizer->toToken($buffer); $buffer = ''; } if (isset(Tokens::DELIMITERS[$character])) { - yield Tokenizer::instance()->toToken($character); + yield $tokenizer->toToken($character); } } } diff --git a/src/ValueObjects/State.php b/src/ValueObjects/State.php index 15fc273..c1ef011 100644 --- a/src/ValueObjects/State.php +++ b/src/ValueObjects/State.php @@ -107,9 +107,9 @@ final class State $this->tree->traverseToken($token, $this->expectsKey); if ($this->tree->isMatched() && ((!$this->expectsKey && $token->isValue()) || $this->tree->isDeep())) { - $this->pointers->markAsFound(); + $pointer = $this->pointers->markAsFound(); - if ($token instanceof CompoundBegin && $this->pointers->matching()->isLazy()) { + if ($token instanceof CompoundBegin && $pointer->isLazy()) { $this->buffer = ($this->lazyLoad)(); $token->shouldLazyLoad = true; } else { From b324d3b5453dc12ff0cc2c23cc92b6fdb617c043 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 4 Jun 2023 17:20:32 +0200 Subject: [PATCH 231/249] Improve tracking of tree --- src/ValueObjects/Tree.php | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/ValueObjects/Tree.php b/src/ValueObjects/Tree.php index c741969..91cbe39 100644 --- a/src/ValueObjects/Tree.php +++ b/src/ValueObjects/Tree.php @@ -166,6 +166,13 @@ final class Tree $this->original[$this->depth] = $trimmedKey; $this->wildcarded[$this->depth] = $trimmedKey; + + if (count($this->original) > $offset = $this->depth + 1) { + array_splice($this->original, $offset); + array_splice($this->wildcarded, $offset); + array_splice($this->inObjectByDepth, $offset); + } + $this->pointers->matchTree($this); } @@ -179,15 +186,16 @@ final class Tree $index = $this->original[$this->depth] ?? null; $this->original[$this->depth] = $index = is_int($index) ? $index + 1 : 0; - if (count($this->original) > $this->depth + 1) { - array_splice($this->original, $this->depth + 1); + if (count($this->original) > $offset = $this->depth + 1) { + array_splice($this->original, $offset); + array_splice($this->inObjectByDepth, $offset); } $referenceTokens = $this->pointers->matchTree($this)->referenceTokens(); $this->wildcarded[$this->depth] = ($referenceTokens[$this->depth] ?? null) == '-' ? '-' : $index; - if (count($this->wildcarded) > $this->depth + 1) { - array_splice($this->wildcarded, $this->depth + 1); + if (count($this->wildcarded) > $offset) { + array_splice($this->wildcarded, $offset); } } From 263de30577296e49a4ba457b0c3c58e5b03634fc Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Sun, 4 Jun 2023 17:24:20 +0200 Subject: [PATCH 232/249] Add fixtures --- tests/fixtures/pointers/multiple_pointers.php | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/tests/fixtures/pointers/multiple_pointers.php b/tests/fixtures/pointers/multiple_pointers.php index 5cfab2a..266da40 100644 --- a/tests/fixtures/pointers/multiple_pointers.php +++ b/tests/fixtures/pointers/multiple_pointers.php @@ -78,6 +78,56 @@ return [ "type" => "Devil's Food", ], ], + '/batters/batter,/topping' => [ + 'batter' => [ + [ + "id" => "1001", + "type" => "Regular", + ], + [ + "id" => "1002", + "type" => "Chocolate", + ], + [ + "id" => "1003", + "type" => "Blueberry", + ], + [ + "id" => "1004", + "type" => "Devil's Food", + ], + ], + 'topping' => [ + [ + "id" => "5001", + "type" => "None", + ], + [ + "id" => "5002", + "type" => "Glazed", + ], + [ + "id" => "5005", + "type" => "Sugar", + ], + [ + "id" => "5007", + "type" => "Powdered Sugar", + ], + [ + "id" => "5006", + "type" => "Chocolate with Sprinkles", + ], + [ + "id" => "5003", + "type" => "Chocolate", + ], + [ + "id" => "5004", + "type" => "Maple", + ], + ], + ], ], 'empty_array' => [ '/-1,/-2' => [], From c9ecf6fe6cb40d61b109da9649d20884b1664e50 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 8 Jun 2023 17:47:10 +0200 Subject: [PATCH 233/249] Adjust lexer position --- src/Tokens/Lexer.php | 2 +- tests/Feature/ErrorsHandlingTest.php | 4 ++-- tests/fixtures/errors/syntax.php | 20 ++++++++++---------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/Tokens/Lexer.php b/src/Tokens/Lexer.php index d6b8869..0ac3e90 100644 --- a/src/Tokens/Lexer.php +++ b/src/Tokens/Lexer.php @@ -32,7 +32,7 @@ final class Lexer implements IteratorAggregate * * @var int */ - private int $position = 1; + private int $position = 0; /** * Instantiate the class. diff --git a/tests/Feature/ErrorsHandlingTest.php b/tests/Feature/ErrorsHandlingTest.php index eed7e24..791ea20 100644 --- a/tests/Feature/ErrorsHandlingTest.php +++ b/tests/Feature/ErrorsHandlingTest.php @@ -16,9 +16,9 @@ it('lets the user handle syntax errors', function () { JsonParser::parse('{a}') ->onSyntaxError(function (SyntaxException $e) { expect($e) - ->getMessage()->toBe("Syntax error: unexpected 'a' at position 2") + ->getMessage()->toBe("Syntax error: unexpected 'a' at position 1") ->value->toBe('a') - ->position->toBe(2); + ->position->toBe(1); }) ->traverse(); }); diff --git a/tests/fixtures/errors/syntax.php b/tests/fixtures/errors/syntax.php index 1eadc68..5bea6a3 100644 --- a/tests/fixtures/errors/syntax.php +++ b/tests/fixtures/errors/syntax.php @@ -4,51 +4,51 @@ return [ [ 'json' => 'a[1, "", 3.14, [], {}]', 'unexpected' => 'a', - 'position' => 1, + 'position' => 0, ], [ 'json' => '[b1, "", 3.14, [], {}]', 'unexpected' => 'b', - 'position' => 2, + 'position' => 1, ], [ 'json' => '[1,c "", 3.14, [], {}]', 'unexpected' => 'c', - 'position' => 4, + 'position' => 3, ], [ 'json' => '[1, d"", 3.14, [], {}]', 'unexpected' => 'd', - 'position' => 5, + 'position' => 4, ], [ 'json' => '[1, "", e3.14, [], {}]', 'unexpected' => 'e', - 'position' => 9, + 'position' => 8, ], [ 'json' => '[1, "", 3.14, []f, {}]', 'unexpected' => 'f', - 'position' => 18, + 'position' => 17, ], [ 'json' => '[1, "", 3.14, [], g{}]', 'unexpected' => 'g', - 'position' => 19, + 'position' => 18, ], [ 'json' => '[1, "", 3.14, [], {h}]', 'unexpected' => 'h', - 'position' => 20, + 'position' => 19, ], [ 'json' => '[1, "", 3.14, [], {}i]', 'unexpected' => 'i', - 'position' => 21, + 'position' => 20, ], [ 'json' => '[1, "", 3.14, [], {}]j', 'unexpected' => 'j', - 'position' => 22, + 'position' => 21, ], ]; From 18cde0f63da259443dd868906e4752efeaaa3a66 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 8 Jun 2023 17:47:26 +0200 Subject: [PATCH 234/249] Move tests --- tests/Unit/{ => Decoders}/JsonDecoderTest.php | 0 tests/Unit/{ => Decoders}/SimdjsonDecoderTest.php | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/Unit/{ => Decoders}/JsonDecoderTest.php (100%) rename tests/Unit/{ => Decoders}/SimdjsonDecoderTest.php (100%) diff --git a/tests/Unit/JsonDecoderTest.php b/tests/Unit/Decoders/JsonDecoderTest.php similarity index 100% rename from tests/Unit/JsonDecoderTest.php rename to tests/Unit/Decoders/JsonDecoderTest.php diff --git a/tests/Unit/SimdjsonDecoderTest.php b/tests/Unit/Decoders/SimdjsonDecoderTest.php similarity index 100% rename from tests/Unit/SimdjsonDecoderTest.php rename to tests/Unit/Decoders/SimdjsonDecoderTest.php From 5a658842551c84443d17fb6f4e712622f399df37 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 8 Jun 2023 17:47:56 +0200 Subject: [PATCH 235/249] Add tests for the progress value object --- tests/Feature/ParsingTest.php | 12 ++++++++++++ tests/Unit/ValueObjects/ProgressTest.php | 24 ++++++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 tests/Unit/ValueObjects/ProgressTest.php diff --git a/tests/Feature/ParsingTest.php b/tests/Feature/ParsingTest.php index b93b1a7..5309962 100644 --- a/tests/Feature/ParsingTest.php +++ b/tests/Feature/ParsingTest.php @@ -21,3 +21,15 @@ it('parses JSON when calling the helper', function (string $json, array $parsed) it('eager loads JSON into an array', function (string $json, array $parsed) { expect(JsonParser::parse($json)->toArray())->toBe($parsed); })->with(Dataset::forParsing()); + +it('shows the progress while parsing', function () { + $parser = new JsonParser(fixture('json/simple_array.json')); + + expect($parser->progress()->percentage())->toBe($percentage = 0.0); + + foreach ($parser as $value) { + expect($percentage)->toBeLessThan($percentage = $parser->progress()->percentage()); + } + + expect($parser->progress()->percentage())->toBe(100.0); +}); diff --git a/tests/Unit/ValueObjects/ProgressTest.php b/tests/Unit/ValueObjects/ProgressTest.php new file mode 100644 index 0000000..0786d59 --- /dev/null +++ b/tests/Unit/ValueObjects/ProgressTest.php @@ -0,0 +1,24 @@ +current()->toBe(0) + ->total()->toBeNull() + ->format()->toBeNull() + ->percentage()->toBeNull() + ->fraction()->toBeNull(); + + $progress->setTotal(200)->setCurrent(33); + + expect($progress) + ->current()->toBe(33) + ->total()->toBe(200) + ->format()->toBe('16.5%') + ->percentage()->toBe(16.5) + ->fraction()->toBe(0.165); +}); From eee0e8bc63ae606802d80eebd137b5574ab25157 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 13 Jun 2023 17:08:58 +0200 Subject: [PATCH 236/249] Add method to retrieve the Guzzle client --- src/Concerns/GuzzleAware.php | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/Concerns/GuzzleAware.php b/src/Concerns/GuzzleAware.php index 3466189..a1c8450 100644 --- a/src/Concerns/GuzzleAware.php +++ b/src/Concerns/GuzzleAware.php @@ -45,7 +45,7 @@ trait GuzzleAware */ protected function getJson(UriInterface|string $url): ResponseInterface { - return (new Client())->get($url, [ + return $this->guzzle()->get($url, [ 'headers' => [ 'Accept' => 'application/json', 'Content-Type' => 'application/json', @@ -53,6 +53,16 @@ trait GuzzleAware ]); } + /** + * Retrieve the Guzzle client + * + * @return Client + */ + protected function guzzle(): Client + { + return new Client(); + } + /** * Retrieve the JSON response of the given request * @@ -61,6 +71,6 @@ trait GuzzleAware */ protected function sendRequest(RequestInterface $request): ResponseInterface { - return (new Client())->sendRequest($request); + return $this->guzzle()->sendRequest($request); } } From 60fb5b094fdcf4f30f587409933e9b55568085f3 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 13 Jun 2023 17:09:09 +0200 Subject: [PATCH 237/249] Remove unneeded method --- src/Sources/Source.php | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/Sources/Source.php b/src/Sources/Source.php index 5924445..55d1802 100644 --- a/src/Sources/Source.php +++ b/src/Sources/Source.php @@ -67,16 +67,6 @@ abstract class Source implements IteratorAggregate $this->config = $config ?: new Config(); } - /** - * Retrieve the underlying configuration - * - * @return Config - */ - public function config(): Config - { - return $this->config; - } - /** * Retrieve the size of the JSON source and cache it * From f081ea716e630584874ac684d93dbd56fa2b7572 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 13 Jun 2023 17:09:16 +0200 Subject: [PATCH 238/249] Add tests for sources --- tests/Dataset.php | 87 +++++++++++++++++++++++++++++++++-- tests/Feature/SourcesTest.php | 7 +++ 2 files changed, 91 insertions(+), 3 deletions(-) diff --git a/tests/Dataset.php b/tests/Dataset.php index 18b0545..71d55b2 100644 --- a/tests/Dataset.php +++ b/tests/Dataset.php @@ -3,12 +3,18 @@ namespace Cerbero\JsonParser; use Cerbero\JsonParser\Decoders\DecodedValue; -use Cerbero\JsonParser\Sources\Endpoint; -use Cerbero\JsonParser\Sources\Psr7Request; +use Cerbero\JsonParser\Sources; use Cerbero\JsonParser\Tokens\Parser; use DirectoryIterator; use Generator; +use GuzzleHttp\Client; +use GuzzleHttp\Psr7\Request as Psr7Request; +use GuzzleHttp\Psr7\Response as Psr7Response; +use Illuminate\Http\Client\Request; +use Illuminate\Http\Client\Response; use Mockery; +use Psr\Http\Message\ResponseInterface; +use Psr\Http\Message\StreamInterface; /** * The dataset provider. @@ -419,7 +425,7 @@ final class Dataset */ public static function forSourcesRequiringGuzzle(): Generator { - $sources = [Endpoint::class, Psr7Request::class]; + $sources = [Sources\Endpoint::class, Sources\Psr7Request::class]; foreach ($sources as $source) { yield Mockery::mock($source) @@ -448,4 +454,79 @@ final class Dataset yield [$decodesToArray, $json, $values[$decodesToArray]]; } } + + /** + * Retrieve the dataset to test sources + * + * @return Generator + */ + public static function forSources(): Generator + { + $path = fixture('json/simple_array.json'); + $json = file_get_contents($path); + $size = strlen($json); + $request = new Psr7Request('GET', 'foo'); + + $stream = Mockery::mock(StreamInterface::class) + ->shouldReceive([ + 'getSize' => $size, + 'isReadable' => true, + ]) + ->getMock(); + + $response = Mockery::mock(ResponseInterface::class) + ->shouldReceive('getBody') + ->andReturn($stream) + ->getMock(); + + $client = Mockery::mock(Client::class) + ->shouldReceive('get', 'sendRequest') + ->andReturn($response) + ->getMock(); + + $endpoint = Mockery::mock(Sources\Endpoint::class, ['https://example.com']) + ->makePartial() + ->shouldAllowMockingProtectedMethods() + ->shouldReceive('guzzle') + ->andReturn($client) + ->getMock(); + + $laravelClientRequest = Mockery::mock(Sources\LaravelClientRequest::class, [new Request($request)]) + ->makePartial() + ->shouldAllowMockingProtectedMethods() + ->shouldReceive('guzzle') + ->andReturn($client) + ->getMock(); + + $psr7Response = Mockery::mock(Psr7Response::class) + ->shouldReceive('getBody') + ->andReturn($stream) + ->getMock(); + + $psr7Request = Mockery::mock(Sources\Psr7Request::class, [$request]) + ->makePartial() + ->shouldAllowMockingProtectedMethods() + ->shouldReceive('guzzle') + ->andReturn($client) + ->getMock(); + + $sources = [ + new Sources\AnySource(new Sources\Json($json)), + new Sources\CustomSource(new Sources\Json($json)), + $endpoint, + new Sources\Filename($path), + new Sources\IterableSource(str_split($json)), + new Sources\Json($json), + new Sources\JsonResource(fopen($path, 'rb')), + $laravelClientRequest, + new Sources\LaravelClientResponse(new Response($psr7Response)), + new Sources\Psr7Message($response), + $psr7Request, + new Sources\Psr7Stream($stream), + ]; + + foreach ($sources as $source) { + yield [$source, $size]; + } + } } diff --git a/tests/Feature/SourcesTest.php b/tests/Feature/SourcesTest.php index ca28f9b..f54801b 100644 --- a/tests/Feature/SourcesTest.php +++ b/tests/Feature/SourcesTest.php @@ -16,3 +16,10 @@ it('throws an exception when Guzzle is required but not installed', function (So expect(fn () => JsonParser::parse($source)->traverse()) ->toThrow(GuzzleRequiredException::class, 'Guzzle is required to load JSON from endpoints'); })->with(Dataset::forSourcesRequiringGuzzle()); + +it('supports multiple sources', function (Source $source, int $size) { + expect($source) + ->getIterator()->toBeInstanceOf(Traversable::class) + ->matches()->toBeTrue() + ->size()->toBe($size); +})->with(Dataset::forSources()); From a6e7eca23a9317bb4661031cb72b16edc1ff3ec5 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 13 Jun 2023 22:13:42 +0200 Subject: [PATCH 239/249] Add coverage annotations --- src/Concerns/GuzzleAware.php | 1 + src/Sources/StreamWrapper.php | 1 + 2 files changed, 2 insertions(+) diff --git a/src/Concerns/GuzzleAware.php b/src/Concerns/GuzzleAware.php index a1c8450..37c4ef2 100644 --- a/src/Concerns/GuzzleAware.php +++ b/src/Concerns/GuzzleAware.php @@ -56,6 +56,7 @@ trait GuzzleAware /** * Retrieve the Guzzle client * + * @codeCoverageIgnore * @return Client */ protected function guzzle(): Client diff --git a/src/Sources/StreamWrapper.php b/src/Sources/StreamWrapper.php index c3fe7bf..86f8dd8 100644 --- a/src/Sources/StreamWrapper.php +++ b/src/Sources/StreamWrapper.php @@ -7,6 +7,7 @@ use Psr\Http\Message\StreamInterface; /** * The JSON stream wrapper. * + * @codeCoverageIgnore * @phpcs:disable PSR1.Methods.CamelCapsMethodName.NotCamelCaps */ final class StreamWrapper From 840469a24f7887c5c56e283b3682e4abfd008899 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 13 Jun 2023 22:13:58 +0200 Subject: [PATCH 240/249] Add parsing tests --- tests/Feature/ParsingTest.php | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/Feature/ParsingTest.php b/tests/Feature/ParsingTest.php index 5309962..63d9e96 100644 --- a/tests/Feature/ParsingTest.php +++ b/tests/Feature/ParsingTest.php @@ -1,6 +1,7 @@ toParseTo($parsed); })->with(Dataset::forParsing()); +it('parses with custom decoders', function (string $json, array $parsed) { + expect(JsonParser::parse($json)->decoder(new SimdjsonDecoder()))->toParseTo($parsed); +})->with(Dataset::forParsing()); + +it('parses a custom number of bytes', function (string $json, array $parsed) { + expect(JsonParser::parse($json)->bytes(1024))->toParseTo($parsed); +})->with(Dataset::forParsing()); + it('eager loads JSON into an array', function (string $json, array $parsed) { expect(JsonParser::parse($json)->toArray())->toBe($parsed); })->with(Dataset::forParsing()); From 3eec5fc8fbe00b312879b975ac922ef20d376fbb Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Tue, 13 Jun 2023 22:53:47 +0200 Subject: [PATCH 241/249] Test that all sources are parsed correctly --- tests/Dataset.php | 18 ++++++------------ tests/Feature/SourcesTest.php | 4 +++- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/tests/Dataset.php b/tests/Dataset.php index 71d55b2..e1ceb52 100644 --- a/tests/Dataset.php +++ b/tests/Dataset.php @@ -10,11 +10,11 @@ use Generator; use GuzzleHttp\Client; use GuzzleHttp\Psr7\Request as Psr7Request; use GuzzleHttp\Psr7\Response as Psr7Response; +use GuzzleHttp\Psr7\Stream; use Illuminate\Http\Client\Request; use Illuminate\Http\Client\Response; use Mockery; use Psr\Http\Message\ResponseInterface; -use Psr\Http\Message\StreamInterface; /** * The dataset provider. @@ -462,21 +462,15 @@ final class Dataset */ public static function forSources(): Generator { + $parsed = require fixture('parsing/simple_array.php'); $path = fixture('json/simple_array.json'); $json = file_get_contents($path); $size = strlen($json); $request = new Psr7Request('GET', 'foo'); - $stream = Mockery::mock(StreamInterface::class) - ->shouldReceive([ - 'getSize' => $size, - 'isReadable' => true, - ]) - ->getMock(); - $response = Mockery::mock(ResponseInterface::class) ->shouldReceive('getBody') - ->andReturn($stream) + ->andReturnUsing(fn () => new Stream(fopen($path, 'rb'))) ->getMock(); $client = Mockery::mock(Client::class) @@ -500,7 +494,7 @@ final class Dataset $psr7Response = Mockery::mock(Psr7Response::class) ->shouldReceive('getBody') - ->andReturn($stream) + ->andReturn(new Stream(fopen($path, 'rb'))) ->getMock(); $psr7Request = Mockery::mock(Sources\Psr7Request::class, [$request]) @@ -522,11 +516,11 @@ final class Dataset new Sources\LaravelClientResponse(new Response($psr7Response)), new Sources\Psr7Message($response), $psr7Request, - new Sources\Psr7Stream($stream), + new Sources\Psr7Stream(new Stream(fopen($path, 'rb'))), ]; foreach ($sources as $source) { - yield [$source, $size]; + yield [$source, $size, $parsed]; } } } diff --git a/tests/Feature/SourcesTest.php b/tests/Feature/SourcesTest.php index f54801b..29f45ca 100644 --- a/tests/Feature/SourcesTest.php +++ b/tests/Feature/SourcesTest.php @@ -17,9 +17,11 @@ it('throws an exception when Guzzle is required but not installed', function (So ->toThrow(GuzzleRequiredException::class, 'Guzzle is required to load JSON from endpoints'); })->with(Dataset::forSourcesRequiringGuzzle()); -it('supports multiple sources', function (Source $source, int $size) { +it('supports multiple sources', function (Source $source, int $size, array $parsed) { expect($source) ->getIterator()->toBeInstanceOf(Traversable::class) ->matches()->toBeTrue() ->size()->toBe($size); + + expect(new JsonParser($source))->toParseTo($parsed); })->with(Dataset::forSources()); From 787882e7b5f9d4326f103f6949b3ce8fb0b9867d Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 14 Jun 2023 21:20:08 +0200 Subject: [PATCH 242/249] Add coverage annotations --- src/Tokens/Parser.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Tokens/Parser.php b/src/Tokens/Parser.php index a180d69..ea4a50a 100644 --- a/src/Tokens/Parser.php +++ b/src/Tokens/Parser.php @@ -135,7 +135,7 @@ final class Parser implements IteratorAggregate $this->isFastForwarding = true; foreach ($this as $value) { - $value instanceof self && $value->fastForward(); + $value instanceof self && $value->fastForward(); // @codeCoverageIgnore } } } From d01bdf7fa10fb438e4980bb99b884a9f33fa3fed Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 14 Jun 2023 21:21:17 +0200 Subject: [PATCH 243/249] Upgrade PHP version --- .github/workflows/build.yml | 8 +++--- composer.json | 4 +-- phpunit.xml.dist | 10 ++++--- tests/Dataset.php | 56 ++++++++++++++++++------------------- 4 files changed, 40 insertions(+), 38 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a27d1e1..845da38 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -11,7 +11,7 @@ jobs: strategy: fail-fast: false matrix: - php: [8.0, 8.1, 8.2] + php: [8.1, 8.2, 8.3] dependency-version: [prefer-lowest, prefer-stable] os: [ubuntu-latest] @@ -49,7 +49,7 @@ jobs: - name: Setup PHP uses: shivammathur/setup-php@v2 with: - php-version: 8.0 + php-version: 8.1 extensions: simdjson tools: composer:v2 coverage: xdebug @@ -75,7 +75,7 @@ jobs: - name: Setup PHP uses: shivammathur/setup-php@v2 with: - php-version: 8.0 + php-version: 8.1 tools: phpcs coverage: none @@ -94,7 +94,7 @@ jobs: - name: Setup PHP uses: shivammathur/setup-php@v2 with: - php-version: 8.0 + php-version: 8.1 extensions: simdjson tools: composer:v2 coverage: none diff --git a/composer.json b/composer.json index b42fb9d..04ead94 100644 --- a/composer.json +++ b/composer.json @@ -18,13 +18,13 @@ "role": "Developer" }], "require": { - "php": "^8.0" + "php": "^8.1" }, "require-dev": { "guzzlehttp/guzzle": "^7.2", "illuminate/http": ">=6.20", "mockery/mockery": "^1.5", - "pestphp/pest": "^1.22", + "pestphp/pest": "^2.0", "phpstan/phpstan": "^1.9", "scrutinizer/ocular": "^1.8", "squizlabs/php_codesniffer": "^3.0" diff --git a/phpunit.xml.dist b/phpunit.xml.dist index 158564c..990c7b1 100644 --- a/phpunit.xml.dist +++ b/phpunit.xml.dist @@ -1,9 +1,6 @@ - + - - src/ - @@ -18,4 +15,9 @@ + + + src/ + + diff --git a/tests/Dataset.php b/tests/Dataset.php index e1ceb52..cc04fbb 100644 --- a/tests/Dataset.php +++ b/tests/Dataset.php @@ -252,27 +252,27 @@ final class Dataset $json = fixture('json/complex_object.json'); $sequenceByPointer = [ '' => [ - fn ($value, $key) => $key->toBe('id')->and($value->value)->toBe('0001'), - fn ($value, $key) => $key->toBe('type')->and($value->value)->toBe('donut'), - fn ($value, $key) => $key->toBe('name')->and($value->value)->toBe('Cake'), - fn ($value, $key) => $key->toBe('ppu')->and($value->value)->toBe(0.55), - fn ($value, $key) => $key->toBe('batters')->and($value->value)->toBeInstanceOf(Parser::class), - fn ($value, $key) => $key->toBe('topping')->and($value->value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe('id')->and($value)->toBe('0001'), + fn ($value, $key) => $key->toBe('type')->and($value)->toBe('donut'), + fn ($value, $key) => $key->toBe('name')->and($value)->toBe('Cake'), + fn ($value, $key) => $key->toBe('ppu')->and($value)->toBe(0.55), + fn ($value, $key) => $key->toBe('batters')->and($value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe('topping')->and($value)->toBeInstanceOf(Parser::class), ], '/batters/batter/-' => [ - fn ($value, $key) => $key->toBe(0)->and($value->value)->toBeInstanceOf(Parser::class), - fn ($value, $key) => $key->toBe(1)->and($value->value)->toBeInstanceOf(Parser::class), - fn ($value, $key) => $key->toBe(2)->and($value->value)->toBeInstanceOf(Parser::class), - fn ($value, $key) => $key->toBe(3)->and($value->value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(0)->and($value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(1)->and($value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(2)->and($value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(3)->and($value)->toBeInstanceOf(Parser::class), ], '/topping/-' => [ - fn ($value, $key) => $key->toBe(0)->and($value->value)->toBeInstanceOf(Parser::class), - fn ($value, $key) => $key->toBe(1)->and($value->value)->toBeInstanceOf(Parser::class), - fn ($value, $key) => $key->toBe(2)->and($value->value)->toBeInstanceOf(Parser::class), - fn ($value, $key) => $key->toBe(3)->and($value->value)->toBeInstanceOf(Parser::class), - fn ($value, $key) => $key->toBe(4)->and($value->value)->toBeInstanceOf(Parser::class), - fn ($value, $key) => $key->toBe(5)->and($value->value)->toBeInstanceOf(Parser::class), - fn ($value, $key) => $key->toBe(6)->and($value->value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(0)->and($value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(1)->and($value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(2)->and($value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(3)->and($value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(4)->and($value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(5)->and($value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(6)->and($value)->toBeInstanceOf(Parser::class), ], ]; @@ -291,18 +291,18 @@ final class Dataset $json = fixture('json/complex_object.json'); $sequenceByPointer = [ '/topping,/batters' => [ - fn ($value, $key) => $key->toBe('batters')->and($value->value)->toBeInstanceOf(Parser::class), - fn ($value, $key) => $key->toBe('topping')->and($value->value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe('batters')->and($value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe('topping')->and($value)->toBeInstanceOf(Parser::class), ], '/topping/-,/batters/batter' => [ - fn ($value, $key) => $key->toBe('batter')->and($value->value)->toBeInstanceOf(Parser::class), - fn ($value, $key) => $key->toBe(0)->and($value->value)->toBeInstanceOf(Parser::class), - fn ($value, $key) => $key->toBe(1)->and($value->value)->toBeInstanceOf(Parser::class), - fn ($value, $key) => $key->toBe(2)->and($value->value)->toBeInstanceOf(Parser::class), - fn ($value, $key) => $key->toBe(3)->and($value->value)->toBeInstanceOf(Parser::class), - fn ($value, $key) => $key->toBe(4)->and($value->value)->toBeInstanceOf(Parser::class), - fn ($value, $key) => $key->toBe(5)->and($value->value)->toBeInstanceOf(Parser::class), - fn ($value, $key) => $key->toBe(6)->and($value->value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe('batter')->and($value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(0)->and($value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(1)->and($value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(2)->and($value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(3)->and($value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(4)->and($value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(5)->and($value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(6)->and($value)->toBeInstanceOf(Parser::class), ], ]; @@ -415,7 +415,7 @@ final class Dataset $patch = fn (DecodedValue $decoded) => strrev($decoded->json); $patched = ['a1', 'b""', 'foo', '4c1.3', 'deslaf', null, ']e2,1[', '}2:f"zab",1:"rab"{']; - yield [$json, fn () => $patch, $patched]; + yield [$json, $patch, $patched]; } /** From 7cb1d906e0b78ac55358f736cbdd42e8dbc3d3a5 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 14 Jun 2023 21:25:13 +0200 Subject: [PATCH 244/249] Remove verbose option --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 845da38..fa75cc2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -11,7 +11,7 @@ jobs: strategy: fail-fast: false matrix: - php: [8.1, 8.2, 8.3] + php: [8.1, 8.2] dependency-version: [prefer-lowest, prefer-stable] os: [ubuntu-latest] @@ -33,7 +33,7 @@ jobs: run: composer update --${{ matrix.dependency-version }} --prefer-dist --no-interaction - name: Execute tests - run: vendor/bin/pest --verbose + run: vendor/bin/pest coverage: runs-on: ubuntu-latest From 3a0d6fe660d3595e6f0a7bba39b1f99d790f1442 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Wed, 14 Jun 2023 23:59:46 +0200 Subject: [PATCH 245/249] Leverage PHP 8.1 features --- src/Decoders/ConfigurableDecoder.php | 2 +- src/Decoders/DecodedValue.php | 30 ++++++------ src/Decoders/JsonDecoder.php | 2 +- src/Decoders/SimdjsonDecoder.php | 2 +- src/Exceptions/DecodingException.php | 2 +- .../IntersectingPointersException.php | 2 +- src/Exceptions/InvalidPointerException.php | 2 +- src/Exceptions/SyntaxException.php | 2 +- src/Exceptions/UnsupportedSourceException.php | 2 +- src/JsonParser.php | 28 +++++------ src/Pointers/Pointer.php | 49 +++---------------- src/Pointers/Pointers.php | 2 +- src/Sources/Source.php | 16 ++---- src/Tokens/Comma.php | 2 +- src/Tokens/CompoundBegin.php | 8 ++- src/Tokens/CompoundEnd.php | 4 +- src/Tokens/Constant.php | 2 +- src/Tokens/Lexer.php | 4 +- src/Tokens/Parser.php | 8 +-- src/Tokens/ScalarString.php | 2 +- src/Tokens/Tokenizer.php | 2 +- src/ValueObjects/State.php | 16 ++---- src/ValueObjects/Tree.php | 8 +-- 23 files changed, 72 insertions(+), 125 deletions(-) diff --git a/src/Decoders/ConfigurableDecoder.php b/src/Decoders/ConfigurableDecoder.php index 43a7e2b..f493501 100644 --- a/src/Decoders/ConfigurableDecoder.php +++ b/src/Decoders/ConfigurableDecoder.php @@ -16,7 +16,7 @@ final class ConfigurableDecoder * * @param Config $config */ - public function __construct(private Config $config) + public function __construct(private readonly Config $config) { } diff --git a/src/Decoders/DecodedValue.php b/src/Decoders/DecodedValue.php index f00fd22..5b11313 100644 --- a/src/Decoders/DecodedValue.php +++ b/src/Decoders/DecodedValue.php @@ -10,21 +10,6 @@ use Throwable; */ final class DecodedValue { - /** - * Instantiate the class. - * - * @param mixed $value - */ - private function __construct( - public bool $succeeded, - public mixed $value = null, - public ?string $error = null, - public ?int $code = null, - public ?Throwable $exception = null, - public ?string $json = null, - ) { - } - /** * Retrieve a successfully decoded value * @@ -47,4 +32,19 @@ final class DecodedValue { return new self(false, null, $e->getMessage(), $e->getCode(), $e, $json); } + + /** + * Instantiate the class. + * + * @param mixed $value + */ + private function __construct( + public readonly bool $succeeded, + public mixed $value = null, + public readonly ?string $error = null, + public readonly ?int $code = null, + public readonly ?Throwable $exception = null, + public readonly ?string $json = null, + ) { + } } diff --git a/src/Decoders/JsonDecoder.php b/src/Decoders/JsonDecoder.php index 641741b..5eb4dfd 100644 --- a/src/Decoders/JsonDecoder.php +++ b/src/Decoders/JsonDecoder.php @@ -14,7 +14,7 @@ final class JsonDecoder extends AbstractDecoder * @param bool $decodesToArray * @param int<1, max> $depth */ - public function __construct(private bool $decodesToArray = true, private int $depth = 512) + public function __construct(private readonly bool $decodesToArray = true, private readonly int $depth = 512) { } diff --git a/src/Decoders/SimdjsonDecoder.php b/src/Decoders/SimdjsonDecoder.php index ecccafe..cda6616 100644 --- a/src/Decoders/SimdjsonDecoder.php +++ b/src/Decoders/SimdjsonDecoder.php @@ -14,7 +14,7 @@ final class SimdjsonDecoder extends AbstractDecoder * @param bool $decodesToArray * @param int $depth */ - public function __construct(private bool $decodesToArray = true, private int $depth = 512) + public function __construct(private readonly bool $decodesToArray = true, private readonly int $depth = 512) { } diff --git a/src/Exceptions/DecodingException.php b/src/Exceptions/DecodingException.php index cb17067..8acbb53 100644 --- a/src/Exceptions/DecodingException.php +++ b/src/Exceptions/DecodingException.php @@ -16,7 +16,7 @@ final class DecodingException extends Exception implements JsonParserException * * @param DecodedValue $decoded */ - public function __construct(public DecodedValue $decoded) + public function __construct(public readonly DecodedValue $decoded) { parent::__construct('Decoding error: ' . $decoded->error, (int) $decoded->code); } diff --git a/src/Exceptions/IntersectingPointersException.php b/src/Exceptions/IntersectingPointersException.php index cca6993..0939eb6 100644 --- a/src/Exceptions/IntersectingPointersException.php +++ b/src/Exceptions/IntersectingPointersException.php @@ -17,7 +17,7 @@ class IntersectingPointersException extends Exception implements JsonParserExcep * @param Pointer $pointer1 * @param Pointer $pointer2 */ - public function __construct(public Pointer $pointer1, public Pointer $pointer2) + public function __construct(public readonly Pointer $pointer1, public readonly Pointer $pointer2) { parent::__construct("The pointers [$pointer1] and [$pointer2] are intersecting"); } diff --git a/src/Exceptions/InvalidPointerException.php b/src/Exceptions/InvalidPointerException.php index c71973e..dcfe7ea 100644 --- a/src/Exceptions/InvalidPointerException.php +++ b/src/Exceptions/InvalidPointerException.php @@ -15,7 +15,7 @@ final class InvalidPointerException extends Exception implements JsonParserExcep * * @param string $pointer */ - public function __construct(public string $pointer) + public function __construct(public readonly string $pointer) { parent::__construct("The string [$pointer] is not a valid JSON pointer"); } diff --git a/src/Exceptions/SyntaxException.php b/src/Exceptions/SyntaxException.php index 111f00a..6b7cca9 100644 --- a/src/Exceptions/SyntaxException.php +++ b/src/Exceptions/SyntaxException.php @@ -22,7 +22,7 @@ final class SyntaxException extends Exception implements JsonParserException * * @param string $value */ - public function __construct(public string $value) + public function __construct(public readonly string $value) { parent::__construct("Syntax error: unexpected '$value'"); } diff --git a/src/Exceptions/UnsupportedSourceException.php b/src/Exceptions/UnsupportedSourceException.php index 5e28be5..72e32ae 100644 --- a/src/Exceptions/UnsupportedSourceException.php +++ b/src/Exceptions/UnsupportedSourceException.php @@ -15,7 +15,7 @@ final class UnsupportedSourceException extends Exception implements JsonParserEx * * @param mixed $source */ - public function __construct(public mixed $source) + public function __construct(public readonly mixed $source) { parent::__construct('Unable to load JSON from the provided source'); } diff --git a/src/JsonParser.php b/src/JsonParser.php index 37bdc2e..6fe0a71 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -27,21 +27,32 @@ final class JsonParser implements IteratorAggregate * * @var Config */ - private Config $config; + private readonly Config $config; /** * The lexer. * * @var Lexer */ - private Lexer $lexer; + private readonly Lexer $lexer; /** * The parser. * * @var Parser */ - private Parser $parser; + private readonly Parser $parser; + + /** + * Instantiate the class statically + * + * @param mixed $source + * @return self + */ + public static function parse(mixed $source): self + { + return new self($source); + } /** * Instantiate the class. @@ -55,17 +66,6 @@ final class JsonParser implements IteratorAggregate $this->parser = new Parser($this->lexer->getIterator(), $this->config); } - /** - * Instantiate the class statically - * - * @param mixed $source - * @return self - */ - public static function parse(mixed $source): self - { - return new self($source); - } - /** * Retrieve the lazily iterable JSON * diff --git a/src/Pointers/Pointer.php b/src/Pointers/Pointer.php index 5fc24ef..613b343 100644 --- a/src/Pointers/Pointer.php +++ b/src/Pointers/Pointer.php @@ -22,21 +22,14 @@ final class Pointer implements Stringable * * @var string[] */ - private array $referenceTokens; + public readonly array $referenceTokens; /** * The pointer depth. * * @var int */ - private int $depth; - - /** - * The pointer callback. - * - * @var Closure - */ - private ?Closure $callback; + public readonly int $depth; /** * Whether the pointer was found. @@ -52,11 +45,13 @@ final class Pointer implements Stringable * @param bool $isLazy * @param Closure|null $callback */ - public function __construct(private string $pointer, private bool $isLazy = false, Closure $callback = null) - { + public function __construct( + private readonly string $pointer, + public readonly bool $isLazy = false, + private readonly ?Closure $callback = null, + ) { $this->referenceTokens = $this->toReferenceTokens(); $this->depth = count($this->referenceTokens); - $this->callback = $callback; } /** @@ -76,36 +71,6 @@ final class Pointer implements Stringable return array_slice($referenceTokens, 1); } - /** - * Determine whether the pointer is lazy - * - * @return bool - */ - public function isLazy(): bool - { - return $this->isLazy; - } - - /** - * Retrieve the reference tokens - * - * @return string[] - */ - public function referenceTokens(): array - { - return $this->referenceTokens; - } - - /** - * Retrieve the JSON pointer depth - * - * @return int - */ - public function depth(): int - { - return $this->depth; - } - /** * Call the pointer callback * diff --git a/src/Pointers/Pointers.php b/src/Pointers/Pointers.php index 1f62ff3..3398e92 100644 --- a/src/Pointers/Pointers.php +++ b/src/Pointers/Pointers.php @@ -76,7 +76,7 @@ final class Pointers $originalTree = $tree->original(); foreach ($this->pointers as $pointer) { - if ($pointer->referenceTokens() == $originalTree) { + if ($pointer->referenceTokens == $originalTree) { return $this->matching = $pointer; } diff --git a/src/Sources/Source.php b/src/Sources/Source.php index 55d1802..5590c7b 100644 --- a/src/Sources/Source.php +++ b/src/Sources/Source.php @@ -13,13 +13,6 @@ use Traversable; */ abstract class Source implements IteratorAggregate { - /** - * The configuration. - * - * @var Config - */ - protected Config $config; - /** * The cached size of the JSON source. * @@ -57,14 +50,15 @@ abstract class Source implements IteratorAggregate abstract protected function calculateSize(): ?int; /** - * Enforce the factory method to instantiate the class. + * Instantiate the class. * * @param mixed $source * @param Config|null $config */ - final public function __construct(protected mixed $source, Config $config = null) - { - $this->config = $config ?: new Config(); + final public function __construct( + protected readonly mixed $source, + protected readonly Config $config = new Config(), + ) { } /** diff --git a/src/Tokens/Comma.php b/src/Tokens/Comma.php index 12b092a..57c4181 100644 --- a/src/Tokens/Comma.php +++ b/src/Tokens/Comma.php @@ -18,7 +18,7 @@ final class Comma extends Token */ public function mutateState(State $state): void { - $state->expectsKey = $state->tree()->inObject(); + $state->expectsKey = $state->tree->inObject(); $state->expectedToken = $state->expectsKey ? Tokens::SCALAR_STRING : Tokens::VALUE_ANY; } } diff --git a/src/Tokens/CompoundBegin.php b/src/Tokens/CompoundBegin.php index 35c75f1..fc9c3e1 100644 --- a/src/Tokens/CompoundBegin.php +++ b/src/Tokens/CompoundBegin.php @@ -25,16 +25,14 @@ final class CompoundBegin extends Token */ public function mutateState(State $state): void { - $tree = $state->tree(); - - if ($this->shouldLazyLoad = $this->shouldLazyLoad && $tree->depth() >= 0) { - $state->expectedToken = $tree->inObject() ? Tokens::AFTER_OBJECT_VALUE : Tokens::AFTER_ARRAY_VALUE; + if ($this->shouldLazyLoad = $this->shouldLazyLoad && $state->tree->depth() >= 0) { + $state->expectedToken = $state->tree->inObject() ? Tokens::AFTER_OBJECT_VALUE : Tokens::AFTER_ARRAY_VALUE; return; } $state->expectsKey = $beginsObject = $this->value == '{'; $state->expectedToken = $beginsObject ? Tokens::AFTER_OBJECT_BEGIN : Tokens::AFTER_ARRAY_BEGIN; - $tree->deepen($beginsObject); + $state->tree->deepen($beginsObject); } /** diff --git a/src/Tokens/CompoundEnd.php b/src/Tokens/CompoundEnd.php index 5152eef..b78044a 100644 --- a/src/Tokens/CompoundEnd.php +++ b/src/Tokens/CompoundEnd.php @@ -18,9 +18,9 @@ final class CompoundEnd extends Token */ public function mutateState(State $state): void { - $state->tree()->emerge(); + $state->tree->emerge(); - $state->expectedToken = $state->tree()->inObject() ? Tokens::AFTER_OBJECT_VALUE : Tokens::AFTER_ARRAY_VALUE; + $state->expectedToken = $state->tree->inObject() ? Tokens::AFTER_OBJECT_VALUE : Tokens::AFTER_ARRAY_VALUE; } /** diff --git a/src/Tokens/Constant.php b/src/Tokens/Constant.php index 97cc119..c365118 100644 --- a/src/Tokens/Constant.php +++ b/src/Tokens/Constant.php @@ -18,7 +18,7 @@ final class Constant extends Token */ public function mutateState(State $state): void { - $state->expectedToken = $state->tree()->inObject() ? Tokens::AFTER_OBJECT_VALUE : Tokens::AFTER_ARRAY_VALUE; + $state->expectedToken = $state->tree->inObject() ? Tokens::AFTER_OBJECT_VALUE : Tokens::AFTER_ARRAY_VALUE; } /** diff --git a/src/Tokens/Lexer.php b/src/Tokens/Lexer.php index 0ac3e90..2ff64bb 100644 --- a/src/Tokens/Lexer.php +++ b/src/Tokens/Lexer.php @@ -25,7 +25,7 @@ final class Lexer implements IteratorAggregate * * @var Progress */ - private Progress $progress; + private readonly Progress $progress; /** * The current position. @@ -39,7 +39,7 @@ final class Lexer implements IteratorAggregate * * @param Source $source */ - public function __construct(private Source $source) + public function __construct(private readonly Source $source) { $this->progress = new Progress(); } diff --git a/src/Tokens/Parser.php b/src/Tokens/Parser.php index ea4a50a..b21a60c 100644 --- a/src/Tokens/Parser.php +++ b/src/Tokens/Parser.php @@ -25,7 +25,7 @@ final class Parser implements IteratorAggregate * * @var ConfigurableDecoder */ - private ConfigurableDecoder $decoder; + private readonly ConfigurableDecoder $decoder; /** * Whether the parser is fast-forwarding. @@ -40,7 +40,7 @@ final class Parser implements IteratorAggregate * @param Generator $tokens * @param Config $config */ - public function __construct(private Generator $tokens, private Config $config) + public function __construct(private readonly Generator $tokens, private readonly Config $config) { $this->decoder = new ConfigurableDecoder($config); } @@ -63,13 +63,13 @@ final class Parser implements IteratorAggregate $state->mutateByToken($token); - if (!$token->endsChunk() || $state->tree()->isDeep()) { + if (!$token->endsChunk() || $state->tree->isDeep()) { continue; } if ($state->hasBuffer()) { /** @var string|int $key */ - $key = $this->decoder->decode($state->key()); + $key = $this->decoder->decode($state->tree->currentKey()); $value = $this->decoder->decode($state->value()); yield $key => $state->callPointer($value, $key); diff --git a/src/Tokens/ScalarString.php b/src/Tokens/ScalarString.php index f811b3b..5e27d5c 100644 --- a/src/Tokens/ScalarString.php +++ b/src/Tokens/ScalarString.php @@ -31,7 +31,7 @@ final class ScalarString extends Token return; } - $state->expectedToken = $state->tree()->inObject() ? Tokens::AFTER_OBJECT_VALUE : Tokens::AFTER_ARRAY_VALUE; + $state->expectedToken = $state->tree->inObject() ? Tokens::AFTER_OBJECT_VALUE : Tokens::AFTER_ARRAY_VALUE; } /** diff --git a/src/Tokens/Tokenizer.php b/src/Tokens/Tokenizer.php index 6a85e42..94394b4 100644 --- a/src/Tokens/Tokenizer.php +++ b/src/Tokens/Tokenizer.php @@ -20,7 +20,7 @@ final class Tokenizer * * @var array */ - private array $tokensMap; + private array $tokensMap = []; /** * Retrieve the singleton instance diff --git a/src/ValueObjects/State.php b/src/ValueObjects/State.php index c1ef011..b4bd7b7 100644 --- a/src/ValueObjects/State.php +++ b/src/ValueObjects/State.php @@ -20,7 +20,7 @@ final class State * * @var Tree */ - private Tree $tree; + public readonly Tree $tree; /** * The JSON buffer. @@ -49,7 +49,7 @@ final class State * @param Pointers $pointers * @param Closure $lazyLoad */ - public function __construct(private Pointers $pointers, private Closure $lazyLoad) + public function __construct(private readonly Pointers $pointers, private readonly Closure $lazyLoad) { $this->tree = new Tree($pointers); } @@ -64,16 +64,6 @@ final class State return $this->tree; } - /** - * Retrieve the current key of the JSON tree - * - * @return string|int - */ - public function key(): string|int - { - return $this->tree->currentKey(); - } - /** * Determine whether the parser can stop parsing * @@ -109,7 +99,7 @@ final class State if ($this->tree->isMatched() && ((!$this->expectsKey && $token->isValue()) || $this->tree->isDeep())) { $pointer = $this->pointers->markAsFound(); - if ($token instanceof CompoundBegin && $pointer->isLazy()) { + if ($token instanceof CompoundBegin && $pointer->isLazy) { $this->buffer = ($this->lazyLoad)(); $token->shouldLazyLoad = true; } else { diff --git a/src/ValueObjects/Tree.php b/src/ValueObjects/Tree.php index 91cbe39..8430d6a 100644 --- a/src/ValueObjects/Tree.php +++ b/src/ValueObjects/Tree.php @@ -46,7 +46,7 @@ final class Tree * * @param Pointers $pointers */ - public function __construct(private Pointers $pointers) + public function __construct(private readonly Pointers $pointers) { } @@ -121,7 +121,7 @@ final class Tree { $pointer = $this->pointers->matching(); - return $pointer == '' ? $this->depth > 0 : $this->depth >= $pointer->depth(); + return $pointer == '' ? $this->depth > 0 : $this->depth >= $pointer->depth; } /** @@ -135,7 +135,7 @@ final class Tree { $pointer = $this->pointers->matching(); - if ($pointer != '' && $this->depth >= $pointer->depth()) { + if ($pointer != '' && $this->depth >= $pointer->depth) { return; } elseif ($expectsKey) { $this->traverseKey($token); @@ -191,7 +191,7 @@ final class Tree array_splice($this->inObjectByDepth, $offset); } - $referenceTokens = $this->pointers->matchTree($this)->referenceTokens(); + $referenceTokens = $this->pointers->matchTree($this)->referenceTokens; $this->wildcarded[$this->depth] = ($referenceTokens[$this->depth] ?? null) == '-' ? '-' : $index; if (count($this->wildcarded) > $offset) { From 6dbcf627912441f2286b172e0680d9a8051dfdc6 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 15 Jun 2023 00:04:51 +0200 Subject: [PATCH 246/249] Update parameter type --- src/Sources/Source.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Sources/Source.php b/src/Sources/Source.php index 5590c7b..d06dec1 100644 --- a/src/Sources/Source.php +++ b/src/Sources/Source.php @@ -53,7 +53,7 @@ abstract class Source implements IteratorAggregate * Instantiate the class. * * @param mixed $source - * @param Config|null $config + * @param Config $config */ final public function __construct( protected readonly mixed $source, From 5424a4ef3d86191adc6f2775e1f4a87f122246e6 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 15 Jun 2023 00:13:43 +0200 Subject: [PATCH 247/249] Update image --- .scrutinizer.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.scrutinizer.yml b/.scrutinizer.yml index 5d63b30..ab864d8 100644 --- a/.scrutinizer.yml +++ b/.scrutinizer.yml @@ -1,6 +1,7 @@ build: nodes: analysis: + image: default-bionic project_setup: override: true tests: From 95012a602dbb1fae6be09cba1faa1b34997cb3f6 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Thu, 15 Jun 2023 18:55:44 +0200 Subject: [PATCH 248/249] Update readme --- README.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index e4d8d47..86b1bd5 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ JsonParser::parse($source)->traverse(function (mixed $value, string|int $key, Js ### 💧 Sources -A JSON source is any data point that provides a JSON. A wide range of sources is supported by default: +A JSON source is any data point that provides a JSON. A wide range of sources are supported by default: - **strings**, e.g. `{"foo":"bar"}` - **iterables**, i.e. arrays or instances of `Traversable` - **file paths**, e.g. `/path/to/large.json` @@ -123,13 +123,13 @@ class CustomSource extends Source The parent class `Source` gives us access to 2 properties: - `$source`: the JSON source we pass to the parser, i.e.: `new JsonParser($source)` -- `$config`: the configuration we set by chaining methods, e.g.: `$parser->pointer('/foo')` +- `$config`: the configuration we set by chaining methods like `$parser->pointer('/foo')` The method `getIterator()` defines the logic to read the JSON source in a memory-efficient way. It feeds the parser with small pieces of JSON. Please refer to the [already existing sources](https://github.com/cerbero90/json-parser/tree/master/src/Sources) to see some implementations. The method `matches()` determines whether the JSON source passed to the parser can be handled by our custom implementation. In other words, we are telling the parser if it should use our class for the JSON to parse. -Finally, `calculateSize()` computes the whole size of the JSON source. It's used to track the [parsing progress](#progress), however it's not always possible to know the size of a JSON source. In this case, or if we don't need to track the progress, we can return `null`. +Finally, `calculateSize()` computes the whole size of the JSON source. It's used to track the [parsing progress](#-progress), however it's not always possible to know the size of a JSON source. In this case, or if we don't need to track the progress, we can return `null`. Now that we have implemented our custom source, we can pass it to the parser: @@ -261,7 +261,7 @@ JsonParser::parse($source) > ⚠️ Please note the parameters order of the callbacks: the value is passed before the key. -Sometimes the sub-trees extracted by pointers are small enough to be kept all in memory. We can chain `toArray()` to eager load the extracted sub-trees into an array: +Sometimes the sub-trees extracted by pointers are small enough to be kept entirely in memory. In such cases, we can chain `toArray()` to eager load the extracted sub-trees into an array: ```php // ['gender' => 'female', 'country' => 'Germany'] @@ -270,9 +270,9 @@ $array = JsonParser::parse($source)->pointers(['/results/0/gender', '/results/0/ ### 🐼 Lazy pointers -JSON Parser keeps in memory only one key and one value at a time. However, if the value is a large array or a large object, we may not want to keep it all in memory. +JSON Parser only keeps one key and one value in memory at a time. However, if the value is a large array or object, it may be inefficient to keep it all in memory. -The solution is to use lazy pointers, which recursively keep in memory only one key and one value at a time of any nested array or object: +To solve this problem, we can use lazy pointers. These pointers recursively keep in memory only one key and one value at a time for any nested array or object. ```php $json = JsonParser::parse($source)->lazyPointer('/results/0/name'); @@ -282,7 +282,7 @@ foreach ($json as $key => $value) { } ``` -Lazy pointers return a light-weight instance of `Cerbero\JsonParser\Tokens\Parser` instead of the actual large value. To lazy load nested keys and values, we can then loop through the parser: +Lazy pointers return a lightweight instance of `Cerbero\JsonParser\Tokens\Parser` instead of the actual large value. To lazy load nested keys and values, we can then loop through the parser: ```php $json = JsonParser::parse($source)->lazyPointer('/results/0/name'); @@ -438,7 +438,7 @@ $json = JsonParser::parse($source) ->patchDecodingError(fn (DecodedValue $decoded) => $patches[$decoded->json] ?? null); ``` -Any exception thrown by this package implements the `JsonParserException` interface, which makes it easy to handle all exceptions in one catch: +Any exception thrown by this package implements the `JsonParserException` interface. This makes it easy to handle all exceptions in a single catch block: ```php use Cerbero\JsonParser\Exceptions\JsonParserException; @@ -463,7 +463,7 @@ For reference, here is a comprehensive table of all the exceptions thrown by thi ### ⏳ Progress -When processing large JSONs, we may need to know the parsing progress. JSON Parser offers convenient methods to access all the progress details: +When processing large JSONs, it can be helpful to track the parsing progress. JSON Parser provides convenient methods for accessing all the progress details: ```php $json = new JsonParser($source); @@ -476,7 +476,7 @@ $json->progress()->percentage(); // the completed percentage e.g. 47.583 $json->progress()->format(); // the formatted progress e.g. 47.5% ``` -The total size of a JSON is calculated differently depending on the [source](#sources). It is not always possible to determine how large a JSON is, in these cases only the current progress is known: +The total size of a JSON is calculated differently depending on the [source](#-sources). In some cases, it may not be possible to determine the size of a JSON and only the current progress is known: ```php $json->progress()->current(); // 86759341 @@ -525,7 +525,7 @@ The MIT License (MIT). Please see [License File](LICENSE.md) for more informatio [ico-author]: https://img.shields.io/static/v1?label=author&message=cerbero90&color=50ABF1&logo=twitter&style=flat-square [ico-php]: https://img.shields.io/packagist/php-v/cerbero/json-parser?color=%234F5B93&logo=php&style=flat-square [ico-version]: https://img.shields.io/packagist/v/cerbero/json-parser.svg?label=version&style=flat-square -[ico-actions]: https://img.shields.io/github/actions/workflow/status/cerbero90/json-parser/workflows/build.yml?branch=master&style=flat-square&logo=github +[ico-actions]: https://img.shields.io/github/actions/workflow/status/cerbero90/json-parser/build.yml?branch=master&style=flat-square&logo=github [ico-license]: https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat-square [ico-psr7]: https://img.shields.io/static/v1?label=compliance&message=PSR-7&color=blue&style=flat-square [ico-psr12]: https://img.shields.io/static/v1?label=compliance&message=PSR-12&color=blue&style=flat-square From 5cd0d4b90e24a927b683ea6481471531a1cda453 Mon Sep 17 00:00:00 2001 From: Andrea Marco Sartori Date: Fri, 16 Jun 2023 17:41:06 +0200 Subject: [PATCH 249/249] Implement and test method to lazily parse the entire JSON --- README.md | 9 +++++++++ src/JsonParser.php | 10 ++++++++++ tests/Dataset.php | 28 ++++++++++++++++++++++++++++ tests/Feature/PointersTest.php | 4 ++++ 4 files changed, 51 insertions(+) diff --git a/README.md b/README.md index 86b1bd5..3cf790e 100644 --- a/README.md +++ b/README.md @@ -314,6 +314,15 @@ foreach ($json as $key => $value) { } ``` +To lazily parse the entire JSON, we can simply chain the `lazy()` method: + +```php +foreach (JsonParser::parse($source)->lazy() as $key => $value) { + // 1st iteration: $key === 'results', $value instanceof Parser + // 2nd iteration: $key === 'info', $value instanceof Parser +} +``` + Lazy pointers also have all the other functionalities of normal pointers: they accept callbacks, can be set one by one or all together, can be eager loaded into an array and can be mixed with normal pointers as well: ```php diff --git a/src/JsonParser.php b/src/JsonParser.php index 6fe0a71..d74ae19 100644 --- a/src/JsonParser.php +++ b/src/JsonParser.php @@ -139,6 +139,16 @@ final class JsonParser implements IteratorAggregate return $this; } + /** + * Set a lazy JSON pointer for the whole JSON + * + * @return self + */ + public function lazy(): self + { + return $this->lazyPointer(''); + } + /** * Traverse the JSON one key and value at a time * diff --git a/tests/Dataset.php b/tests/Dataset.php index cc04fbb..70a7449 100644 --- a/tests/Dataset.php +++ b/tests/Dataset.php @@ -387,6 +387,34 @@ final class Dataset } } + /** + * Retrieve the dataset to test a global lazy pointer + * + * @return Generator + */ + public static function forGlobalLazyPointer(): Generator + { + $sequenceByFixture = [ + 'complex_object' => [ + fn ($value, $key) => $key->toBe('id')->and($value)->toBe('0001'), + fn ($value, $key) => $key->toBe('type')->and($value)->toBe('donut'), + fn ($value, $key) => $key->toBe('name')->and($value)->toBe('Cake'), + fn ($value, $key) => $key->toBe('ppu')->and($value)->toBe(0.55), + fn ($value, $key) => $key->toBe('batters')->and($value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe('topping')->and($value)->toBeInstanceOf(Parser::class), + ], + 'complex_array' => [ + fn ($value, $key) => $key->toBe(0)->and($value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(1)->and($value)->toBeInstanceOf(Parser::class), + fn ($value, $key) => $key->toBe(2)->and($value)->toBeInstanceOf(Parser::class), + ], + ]; + + foreach ($sequenceByFixture as $fixture => $sequence) { + yield [fixture("json/{$fixture}.json"), $sequence]; + } + } + /** * Retrieve the dataset to test syntax errors * diff --git a/tests/Feature/PointersTest.php b/tests/Feature/PointersTest.php index 1f10475..16c94f3 100644 --- a/tests/Feature/PointersTest.php +++ b/tests/Feature/PointersTest.php @@ -63,3 +63,7 @@ it('lazy loads JSON recursively', function (string $json, string $pointer, array it('mixes pointers and lazy pointers', function (string $json, array $pointers, array $lazyPointers, array $expected) { expect(JsonParser::parse($json)->pointers($pointers)->lazyPointers($lazyPointers))->toParseTo($expected); })->with(Dataset::forMixedPointers()); + +it('lazy loads an entire JSON', function (string $json, array $sequence) { + expect(JsonParser::parse($json)->lazy())->sequence(...$sequence); +})->with(Dataset::forGlobalLazyPointer());