From 68ff966c398329eac6349b953899787e17a2ceaf Mon Sep 17 00:00:00 2001 From: Stefan de Bruijn Date: Fri, 18 Dec 2020 22:42:01 +0100 Subject: [PATCH] Implemented parser for configuration. A small x64 test can be found in ParserTest.cpp; haven't ran it on ESP32 yet. --- Grbl_Esp32/src/Configuration/ParseException.h | 29 +++ Grbl_Esp32/src/Configuration/Parser.cpp | 112 ++++++++++ Grbl_Esp32/src/Configuration/Parser.h | 49 +++++ Grbl_Esp32/src/Configuration/ParserTest.cpp | 93 +++++++++ Grbl_Esp32/src/Configuration/Test.yaml | 28 +++ Grbl_Esp32/src/Configuration/TokenKind.h | 13 ++ Grbl_Esp32/src/Configuration/Tokenizer.cpp | 193 ++++++++++++++++++ Grbl_Esp32/src/Configuration/Tokenizer.h | 85 ++++++++ 8 files changed, 602 insertions(+) create mode 100644 Grbl_Esp32/src/Configuration/ParseException.h create mode 100644 Grbl_Esp32/src/Configuration/Parser.cpp create mode 100644 Grbl_Esp32/src/Configuration/Parser.h create mode 100644 Grbl_Esp32/src/Configuration/ParserTest.cpp create mode 100644 Grbl_Esp32/src/Configuration/Test.yaml create mode 100644 Grbl_Esp32/src/Configuration/TokenKind.h create mode 100644 Grbl_Esp32/src/Configuration/Tokenizer.cpp create mode 100644 Grbl_Esp32/src/Configuration/Tokenizer.h diff --git a/Grbl_Esp32/src/Configuration/ParseException.h b/Grbl_Esp32/src/Configuration/ParseException.h new file mode 100644 index 00000000..65f09e51 --- /dev/null +++ b/Grbl_Esp32/src/Configuration/ParseException.h @@ -0,0 +1,29 @@ +#pragma once + +namespace Configuration { + class ParseException { + int line_; + int column_; + const char* description_; + + public: + ParseException() = default; + ParseException(const ParseException&) = default; + + ParseException(const char* start, const char* current, const char* description) : description_(description) { + line_ = 1; + column_ = 1; + while (start != current) { + if (*start == '\n') { + ++line_; + column_ = 1; + } + ++start; + } + } + + inline int LineNumber() const { return line_; } + inline int ColumnNumber() const { return column_; } + inline const char* What() const { return description_; } + }; +} diff --git a/Grbl_Esp32/src/Configuration/Parser.cpp b/Grbl_Esp32/src/Configuration/Parser.cpp new file mode 100644 index 00000000..aa32a6c0 --- /dev/null +++ b/Grbl_Esp32/src/Configuration/Parser.cpp @@ -0,0 +1,112 @@ +#include "Parser.h" + +#include "ParseException.h" + +namespace Configuration { + Parser::Parser(const char* start, const char* end) : Tokenizer(start, end), current_() { + Tokenize(); + current_ = token_; + if (current_.kind_ != TokenKind::Eof) { + Tokenize(); + } + } + + void Parser::ParseError(const char* description) const { + // Attempt to use the correct position in the parser: + if (current_.keyEnd_) { + throw ParseException(start_, current_.keyEnd_, description); + } + else { + Tokenizer::ParseError(description); + } + } + + /// + /// MoveNext: moves to the next entry in the current section. By default we're in the + /// root section. + /// + bool Parser::MoveNext() { + // While the indent of the token is > current indent, we have to skip it. This is a + // sub-section, that we're apparently not interested in. + while (token_.indent_ > current_.indent_) { + Tokenize(); + } + + // If the indent is the same, we're in the same section. Update current, move to next + // token. + if (token_.indent_ == current_.indent_) { + current_ = token_; + Tokenize(); + } else { + // Apparently token_.indent < current_.indent_, which means we have no more items + // in our tokenizer that are relevant. + // + // Note that we want to preserve current_.indent_! + current_.kind_ = TokenKind::Eof; + } + + return current_.kind_ != TokenKind::Eof; + } + + void Parser::Enter() { + indentStack_.push(current_.indent_); + + // If we can enter, token_.indent_ > current_.indent_: + if (token_.indent_ > current_.indent_) { + current_ = token_; + Tokenize(); + } else { + current_ = TokenData(); + current_.indent_ = INT_MAX; + } + } + + void Parser::Leave() { + // While the indent of the tokenizer is >= current, we can ignore the contents: + while (token_.indent_ >= current_.indent_ && token_.kind_ != TokenKind::Eof) { + Tokenize(); + } + + // At this point, we just know the indent is smaller. We don't know if we're in + // the *right* section tho. + auto last = indentStack_.top(); + indentStack_.pop(); + + if (last == token_.indent_) { + // Yes, the token continues where we left off: + current_ = token_; + // Tokenize(); --> No need, this is handled by MoveNext! + } else { + current_ = TokenData(); + current_.indent_ = last; + } + } + + std::string Parser::StringValue() const { + if (current_.kind_ != TokenKind::String) { + ParseError("Expected a string value (e.g. 'foo')"); + } + return std::string(current_.sValueStart_, current_.sValueEnd_); + } + + bool Parser::BoolValue() const { + if (current_.kind_ != TokenKind::Boolean) { + ParseError("Expected a boolean value (e.g. true or value)"); + } + return current_.bValue_; + } + + int Parser::IntValue() const { + if (current_.kind_ != TokenKind::Boolean) { + ParseError("Expected an integer value (e.g. 123456)"); + } + return current_.iValue_; + } + + double Parser::FloatValue() const { + if (current_.kind_ != TokenKind::Boolean) { + ParseError("Expected a float value (e.g. 123.456)"); + } + return current_.fValue_; + } +} diff --git a/Grbl_Esp32/src/Configuration/Parser.h b/Grbl_Esp32/src/Configuration/Parser.h new file mode 100644 index 00000000..f4b760ec --- /dev/null +++ b/Grbl_Esp32/src/Configuration/Parser.h @@ -0,0 +1,49 @@ +#pragma once + +#include "Tokenizer.h" + +#include +#include + +namespace Configuration { + class Parser : public Tokenizer { + // Parsing here might be a bit confusing, because the state of the tokenizer is one step + // ahead of the parser. That way we always have 2 tokens at our disposal, so we know when + // we're entering or exiting a section. + + std::stack indentStack_; + TokenData current_; + + void ParseError(const char* description) const; + + public: + Parser(const char* start, const char* end); + + /// + /// MoveNext: moves to the next entry in the current section. By default we're in the + /// root section. + /// + bool MoveNext(); + + inline bool IsEndSection() { return current_.kind_ == TokenKind::Eof || token_.indent_ < current_.indent_; } + + // !!! Important !!! We cannot use a scoped variable for enter & leave, because 'leave' can throw, + // and it could be called using stack unrolling. Destructors by definition have to be 'nothrow', + // so forget it: it just Won't Work. In other words, if we leave the 'leave' call up to the + // destructor, we end up what we in C++ call 'undefined behavior'. + + void Enter(); + void Leave(); + + inline bool Is(const char* expected) const { + return !strncmp(expected, current_.keyStart_, size_t(current_.keyEnd_ - current_.keyStart_)); + } + + inline std::string Key() const { return std::string(current_.keyStart_, current_.keyEnd_); } + + std::string StringValue() const; + bool BoolValue() const; + int IntValue() const; + double FloatValue() const; + }; +} diff --git a/Grbl_Esp32/src/Configuration/ParserTest.cpp b/Grbl_Esp32/src/Configuration/ParserTest.cpp new file mode 100644 index 00000000..88eb9fe9 --- /dev/null +++ b/Grbl_Esp32/src/Configuration/ParserTest.cpp @@ -0,0 +1,93 @@ +#ifndef ESP32 + +# include "Parser.h" + +# include +# include +# include +# include + +namespace Configuration { + + void ParseSpecificAxis(Parser& parser, int axis, int ganged) { + const char* allAxis = "xyzabc"; + std::cout << "Parsing axis " << allAxis[axis] << ", ganged #" << ganged << std::endl; + + for (; !parser.IsEndSection(); parser.MoveNext()) { + if (parser.Is("limit")) { + auto limitPin = parser.StringValue(); + std::cout << "Limit pin: " << limitPin << std::endl; + } + // and so on... + } + } + + void ParseAxis(Parser& parser) { + std::cout << "Parsing axis." << std::endl; + + const char* allAxis = "xyzabc"; + + for (; !parser.IsEndSection(); parser.MoveNext()) { + auto str = parser.Key(); + if (str.size() == 1) { + auto idx = strchr(allAxis, str[0]); + if (idx != nullptr) { + parser.Enter(); + ParseSpecificAxis(parser, idx - allAxis, 0); + parser.Leave(); + } + } else if (str.size() == 2) { + auto idx = strchr(allAxis, str[0]); + if (idx != nullptr && str[1] >= '1' && str[1] <= '9') { + int ganged = str[1] - '1'; + + parser.Enter(); + ParseSpecificAxis(parser, idx - allAxis, ganged); + parser.Leave(); + } + } + } + } + + void ParseBus(Parser& parser) { + std::cout << "Parsing bus." << std::endl; + + // TODO + } + + void ParseRoot(Parser& parser) { + std::cout << "Parsing root." << std::endl; + + for (; !parser.IsEndSection(); parser.MoveNext()) { + if (parser.Is("axis")) { + parser.Enter(); + ParseAxis(parser); + parser.Leave(); + } else if (parser.Is("bus")) { + parser.Enter(); + ParseBus(parser); + parser.Leave(); + } + } + } + + int main() { + std::ifstream t("..\\Fiddling\\Test.yaml"); + std::string str((std::istreambuf_iterator(t)), std::istreambuf_iterator()); + + const auto begin = str.c_str(); + const auto end = begin + str.size(); + + try { + Parser parser(begin, end); + ParseRoot(parser); + } catch (ParseException ex) { + std::cout << "Parse error: " << ex.What() << " @ " << ex.LineNumber() << ":" << ex.ColumnNumber() << std::endl; + } catch (...) { std::cout << "Uncaught exception" << std::endl; } + + std::string s; + std::getline(std::cin, s); + return 0; + } +} +#endif diff --git a/Grbl_Esp32/src/Configuration/Test.yaml b/Grbl_Esp32/src/Configuration/Test.yaml new file mode 100644 index 00000000..b4d27d0a --- /dev/null +++ b/Grbl_Esp32/src/Configuration/Test.yaml @@ -0,0 +1,28 @@ +axis: + x: + limit: gpio.33:low + stepstick: + step: gpio.4 + direction: gpio.16 + y2: + limit: gpio.32:low + stepstick: + step: gpio.18 + direction: gpio.18 + z: + limit: gpio.34:low + dynamixel: + channel: 3 + + #yeah this rocks +bus: + rs485: + baud: 19200 + rx: gpio.12 + tx: gpio.13 +spindle: + vfd: + channel: 1 + unattached: true + vfd: + channel: 2 diff --git a/Grbl_Esp32/src/Configuration/TokenKind.h b/Grbl_Esp32/src/Configuration/TokenKind.h new file mode 100644 index 00000000..f1cb4be4 --- /dev/null +++ b/Grbl_Esp32/src/Configuration/TokenKind.h @@ -0,0 +1,13 @@ +#pragma once + +namespace Configuration { + + enum struct TokenKind { + Section, + Boolean, + String, + IntegerValue, + FloatingPoint, + Eof, + }; +} diff --git a/Grbl_Esp32/src/Configuration/Tokenizer.cpp b/Grbl_Esp32/src/Configuration/Tokenizer.cpp new file mode 100644 index 00000000..5ce8129f --- /dev/null +++ b/Grbl_Esp32/src/Configuration/Tokenizer.cpp @@ -0,0 +1,193 @@ +#include "Tokenizer.h" + +#include "ParseException.h" +#include + +namespace Configuration { + inline void Tokenizer::ParseError(const char* description) const { throw ParseException(start_, current_, description); } + void Tokenizer::Tokenize() { + // We parse 1 line at a time. Each time we get here, we can assume that the cursor + // is at the start of the line. + + parseAgain: + int indent = 0; + + while (!Eof() && IsSpace()) { + Inc(); + ++indent; + } + + if (!Eof()) { + switch (Current()) { + case '\t': + // TODO FIXME: We can do tabs or spaces, not both. However, we *could* let the user decide. + ParseError("Indentation through tabs is not allowed. Convert all tabs to spaces please."); + break; + + case '#': // Comment till end of line + Inc(); + while (!Eof() && !IsEndLine()) { + Inc(); + } + return; + + case '\r': + case '\n': + Inc(); + if (!Eof() && Current() == '\n') { + Inc(); + } // \r\n + goto parseAgain; + + default: + if (!IsAlpha()) { + ParseError("Expected identifier."); + } + + token_.keyStart_ = current_; + Inc(); + while (!Eof() && (IsAlpha() || IsDigit() || Current() == '_')) { + Inc(); + } + token_.keyEnd_ = current_; + + // Skip whitespaces: + while (!Eof() && IsWhiteSpace()) { + Inc(); + } + + if (Current() != ':') { + ParseError("After a key or section name, we expect a colon character ':'."); + } + Inc(); + + // Skip whitespaces after the colon: + while (!Eof() && IsWhiteSpace()) { + Inc(); + } + + token_.indent_ = indent; + if (IsEndLine()) { + token_.kind_ = TokenKind::Section; + + Inc(); + if (!Eof() && Current() == '\n') { + Inc(); + } // \r\n + } else { + switch (Current()) { + case '"': + case '\'': { + auto delimiter = Current(); + + token_.kind_ = TokenKind::String; + Inc(); + token_.sValueStart_ = current_; + while (!Eof() && Current() != delimiter && !IsEndLine()) { + Inc(); + } + token_.sValueEnd_ = current_; + if (Current() != delimiter) { + ParseError("Could not find matching delimiter in string value."); + } + Inc(); + } break; + + default: + if (EqualsCaseInsensitive("true")) { + token_.kind_ = TokenKind::Boolean; + token_.bValue_ = true; + + for (auto i = 0; i < 4; ++i) { + Inc(); + } + } else if (EqualsCaseInsensitive("false")) { + token_.kind_ = TokenKind::Boolean; + token_.bValue_ = false; + + for (auto i = 0; i < 5; ++i) { + Inc(); + } + } else if (IsDigit() || Current() == '-') { + auto doubleOrIntStart = current_; + + int intValue = 0; + bool negative = false; + + if (Current() == '-') { + Inc(); + negative = true; + } + + while (IsDigit()) { + intValue = intValue * 10 + int(Current() - '0'); + Inc(); + } + + if (Current() == 'e' || Current() == 'E' || Current() == '.' || // markers + (current_ - doubleOrIntStart) >= 9) { // liberal interpretation of 'out of int range' + char* floatEnd; + token_.fValue_ = strtod(doubleOrIntStart, &floatEnd); + token_.kind_ = TokenKind::FloatingPoint; + + current_ = floatEnd; + } else { + if (negative) { + intValue = -intValue; + } + token_.iValue_ = intValue; + token_.kind_ = TokenKind::IntegerValue; + } + } else { + // If it's not 'true', not 'false', and not a digit, we have a string delimited by a whitespace + token_.kind_ = TokenKind::String; + token_.sValueStart_ = current_; + while (!Eof() && !IsWhiteSpace() && !IsEndLine()) { + Inc(); + } + token_.sValueEnd_ = current_; + } + break; + } + + // Skip more whitespaces + while (!Eof() && IsSpace()) { + Inc(); + } + + // A comment after a key-value pair is allowed. + if (Current() == '#') { + Inc(); + while (!Eof() && !IsEndLine()) { + Inc(); + } + } + + // Should be EOL or EOF at this point. + if (!IsEndLine() && !Eof()) { + ParseError("Expected line end after key/value pair."); + } + } + } + } else { + token_.kind_ = TokenKind::Eof; + token_.indent_ = 0; + } + } + inline Tokenizer::Tokenizer(const char* start, const char* end) : start_(start), current_(start), end_(end), token_() { + // If start is a yaml document start ('---' [newline]), skip that first. + if (EqualsCaseInsensitive("---")) { + for (int i = 0; i < 3; ++i) { + Inc(); + } + while (IsWhiteSpace()) { + Inc(); + } + while (Current() == '\r' || Current() == '\n') { + Inc(); + } + + start_ = current_; + } + } +} diff --git a/Grbl_Esp32/src/Configuration/Tokenizer.h b/Grbl_Esp32/src/Configuration/Tokenizer.h new file mode 100644 index 00000000..335915cb --- /dev/null +++ b/Grbl_Esp32/src/Configuration/Tokenizer.h @@ -0,0 +1,85 @@ +#pragma once + +#include "TokenKind.h" + +#include + +namespace Configuration { + + class Tokenizer { + const char* current_; + const char* end_; + + inline void Inc() { + if (current_ != end_) { + ++current_; + } + } + inline char Current() const { return Eof() ? '\0' : (*current_); } + + inline bool IsAlpha() { + char c = Current(); + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); + } + + inline bool IsSpace() { return Current() == ' '; } + + inline bool IsWhiteSpace() { + char c = Current(); + return c == ' ' || c == '\t' || c == '\f'; + } + + inline bool IsEndLine() { return Current() == '\n'; } + + inline bool IsDigit() { + char c = Current(); + return (c >= '0' && c <= '9'); + } + + static inline char ToLower(char c) { return (c >= 'A' && c <= 'Z') ? (char)(c + 32) : c; } + + inline bool EqualsCaseInsensitive(const char* input) { + const char* tmp = current_; + while (ToLower(*input) == ToLower(Current()) && *input != '\0') { + Inc(); + } + + bool isSame = *input == '\0'; // Everything till the end of the input string is the same + current_ = tmp; // Restore situation + return isSame; + } + + protected: + const char* start_; + + // Results: + struct TokenData { + TokenData() : + keyStart_(nullptr), keyEnd_(nullptr), indent_(0), kind_(TokenKind::Eof), sValueStart_(nullptr), sValueEnd_(nullptr) {} + + const char* keyStart_; + const char* keyEnd_; + int indent_; + + TokenKind kind_; + union { + struct { + const char* sValueStart_; + const char* sValueEnd_; + }; + int iValue_; + double fValue_; + bool bValue_; + }; + } token_; + + void ParseError(const char* description) const; + + inline bool Eof() const { return current_ == end_; } + + void Tokenize(); + + public: + Tokenizer(const char* start, const char* end); + }; +}