From 68ff966c398329eac6349b953899787e17a2ceaf Mon Sep 17 00:00:00 2001
From: Stefan de Bruijn <stefan@nubilosoft.com>
Date: Fri, 18 Dec 2020 22:42:01 +0100
Subject: [PATCH] Implemented parser for configuration. A small x64 test can be
 found in ParserTest.cpp; haven't ran it on ESP32 yet.

---
 Grbl_Esp32/src/Configuration/ParseException.h |  29 +++
 Grbl_Esp32/src/Configuration/Parser.cpp       | 112 ++++++++++
 Grbl_Esp32/src/Configuration/Parser.h         |  49 +++++
 Grbl_Esp32/src/Configuration/ParserTest.cpp   |  93 +++++++++
 Grbl_Esp32/src/Configuration/Test.yaml        |  28 +++
 Grbl_Esp32/src/Configuration/TokenKind.h      |  13 ++
 Grbl_Esp32/src/Configuration/Tokenizer.cpp    | 193 ++++++++++++++++++
 Grbl_Esp32/src/Configuration/Tokenizer.h      |  85 ++++++++
 8 files changed, 602 insertions(+)
 create mode 100644 Grbl_Esp32/src/Configuration/ParseException.h
 create mode 100644 Grbl_Esp32/src/Configuration/Parser.cpp
 create mode 100644 Grbl_Esp32/src/Configuration/Parser.h
 create mode 100644 Grbl_Esp32/src/Configuration/ParserTest.cpp
 create mode 100644 Grbl_Esp32/src/Configuration/Test.yaml
 create mode 100644 Grbl_Esp32/src/Configuration/TokenKind.h
 create mode 100644 Grbl_Esp32/src/Configuration/Tokenizer.cpp
 create mode 100644 Grbl_Esp32/src/Configuration/Tokenizer.h
diff --git a/Grbl_Esp32/src/Configuration/ParseException.h b/Grbl_Esp32/src/Configuration/ParseException.h
new file mode 100644
index 00000000..65f09e51
--- /dev/null
+++ b/Grbl_Esp32/src/Configuration/ParseException.h
@@ -0,0 +1,29 @@
+#pragma once
+
+namespace Configuration {
+    class ParseException {
+        int         line_;
+        int         column_;
+        const char* description_;
+
+    public:
+        ParseException()                      = default;
+        ParseException(const ParseException&) = default;
+
+        ParseException(const char* start, const char* current, const char* description) : description_(description) {
+            line_   = 1;
+            column_ = 1;
+            while (start != current) {
+                if (*start == '\n') {
+                    ++line_;
+                    column_ = 1;
+                }
+                ++start;
+            }
+        }
+
+        inline int         LineNumber() const { return line_; }
+        inline int         ColumnNumber() const { return column_; }
+        inline const char* What() const { return description_; }
+    };
+}
diff --git a/Grbl_Esp32/src/Configuration/Parser.cpp b/Grbl_Esp32/src/Configuration/Parser.cpp
new file mode 100644
index 00000000..aa32a6c0
--- /dev/null
+++ b/Grbl_Esp32/src/Configuration/Parser.cpp
@@ -0,0 +1,112 @@
+#include "Parser.h"
+
+#include "ParseException.h"
+
+namespace Configuration {
+    Parser::Parser(const char* start, const char* end) : Tokenizer(start, end), current_() {
+        Tokenize();
+        current_ = token_;
+        if (current_.kind_ != TokenKind::Eof) {
+            Tokenize();
+        }
+    }
+
+    void Parser::ParseError(const char* description) const {
+        // Attempt to use the correct position in the parser:
+        if (current_.keyEnd_) {
+            throw ParseException(start_, current_.keyEnd_, description);
+        }
+        else {
+            Tokenizer::ParseError(description);
+        }
+    }
+
+    /// <summary>
+    /// MoveNext: moves to the next entry in the current section. By default we're in the
+    /// root section.
+    /// </summary>
+    bool Parser::MoveNext() {
+        // While the indent of the token is > current indent, we have to skip it. This is a
+        // sub-section, that we're apparently not interested in.
+        while (token_.indent_ > current_.indent_) {
+            Tokenize();
+        }
+
+        // If the indent is the same, we're in the same section. Update current, move to next
+        // token.
+        if (token_.indent_ == current_.indent_) {
+            current_ = token_;
+            Tokenize();
+        } else {
+            // Apparently token_.indent < current_.indent_, which means we have no more items
+            // in our tokenizer that are relevant.
+            //
+            // Note that we want to preserve current_.indent_!
+            current_.kind_ = TokenKind::Eof;
+        }
+
+        return current_.kind_ != TokenKind::Eof;
+    }
+
+    void Parser::Enter() {
+        indentStack_.push(current_.indent_);
+
+        // If we can enter, token_.indent_ > current_.indent_:
+        if (token_.indent_ > current_.indent_) {
+            current_ = token_;
+            Tokenize();
+        } else {
+            current_         = TokenData();
+            current_.indent_ = INT_MAX;
+        }
+    }
+
+    void Parser::Leave() {
+        // While the indent of the tokenizer is >= current, we can ignore the contents:
+        while (token_.indent_ >= current_.indent_ && token_.kind_ != TokenKind::Eof) {
+            Tokenize();
+        }
+
+        // At this point, we just know the indent is smaller. We don't know if we're in
+        // the *right* section tho.
+        auto last = indentStack_.top();
+        indentStack_.pop();
+
+        if (last == token_.indent_) {
+            // Yes, the token continues where we left off:
+            current_ = token_;
+            // Tokenize(); --> No need, this is handled by MoveNext!
+        } else {
+            current_         = TokenData();
+            current_.indent_ = last;
+        }
+    }
+
+    std::string Parser::StringValue() const {
+        if (current_.kind_ != TokenKind::String) {
+            ParseError("Expected a string value (e.g. 'foo')");
+        }
+        return std::string(current_.sValueStart_, current_.sValueEnd_);
+    }
+    
+    bool Parser::BoolValue() const {
+        if (current_.kind_ != TokenKind::Boolean) {
+            ParseError("Expected a boolean value (e.g. true or value)");
+        }
+        return current_.bValue_;
+    }
+    
+    int Parser::IntValue() const {
+        if (current_.kind_ != TokenKind::Boolean) {
+            ParseError("Expected an integer value (e.g. 123456)");
+        }
+        return current_.iValue_;
+    }
+    
+    double Parser::FloatValue() const {
+        if (current_.kind_ != TokenKind::Boolean) {
+            ParseError("Expected a float value (e.g. 123.456)");
+        }
+        return current_.fValue_;
+    }
+}
diff --git a/Grbl_Esp32/src/Configuration/Parser.h b/Grbl_Esp32/src/Configuration/Parser.h
new file mode 100644
index 00000000..f4b760ec
--- /dev/null
+++ b/Grbl_Esp32/src/Configuration/Parser.h
@@ -0,0 +1,49 @@
+#pragma once
+
+#include "Tokenizer.h"
+
+#include <stack>
+#include <cstring>
+
+namespace Configuration {
+    class Parser : public Tokenizer {
+        // Parsing here might be a bit confusing, because the state of the tokenizer is one step
+        // ahead of the parser. That way we always have 2 tokens at our disposal, so we know when
+        // we're entering or exiting a section.
+
+        std::stack<int> indentStack_;
+        TokenData       current_;
+
+        void ParseError(const char* description) const;
+
+    public:
+        Parser(const char* start, const char* end);
+
+        /// <summary>
+        /// MoveNext: moves to the next entry in the current section. By default we're in the
+        /// root section.
+        /// </summary>
+        bool MoveNext();
+
+        inline bool IsEndSection() { return current_.kind_ == TokenKind::Eof || token_.indent_ < current_.indent_; }
+
+        // !!! Important !!! We cannot use a scoped variable for enter & leave, because 'leave' can throw,
+        // and it could be called using stack unrolling. Destructors by definition have to be 'nothrow',
+        // so forget it: it just Won't Work. In other words, if we leave the 'leave' call up to the
+        // destructor, we end up what we in C++ call 'undefined behavior'.
+
+        void Enter();
+        void Leave();
+
+        inline bool Is(const char* expected) const {
+            return !strncmp(expected, current_.keyStart_, size_t(current_.keyEnd_ - current_.keyStart_));
+        }
+
+        inline std::string Key() const { return std::string(current_.keyStart_, current_.keyEnd_); }
+
+        std::string StringValue() const;
+        bool BoolValue() const;
+        int IntValue() const;
+        double FloatValue() const;
+    };
+}
diff --git a/Grbl_Esp32/src/Configuration/ParserTest.cpp b/Grbl_Esp32/src/Configuration/ParserTest.cpp
new file mode 100644
index 00000000..88eb9fe9
--- /dev/null
+++ b/Grbl_Esp32/src/Configuration/ParserTest.cpp
@@ -0,0 +1,93 @@
+#ifndef ESP32
+
+#    include "Parser.h"
+
+#    include <string>
+#    include <fstream>
+#    include <iostream>
+#    include <streambuf>
+
+namespace Configuration {
+
+    void ParseSpecificAxis(Parser& parser, int axis, int ganged) {
+        const char* allAxis = "xyzabc";
+        std::cout << "Parsing axis " << allAxis[axis] << ", ganged #" << ganged << std::endl;
+
+        for (; !parser.IsEndSection(); parser.MoveNext()) {
+            if (parser.Is("limit")) {
+                auto limitPin = parser.StringValue();
+                std::cout << "Limit pin: " << limitPin << std::endl;
+            }
+            // and so on...
+        }
+    }
+
+    void ParseAxis(Parser& parser) {
+        std::cout << "Parsing axis." << std::endl;
+
+        const char* allAxis = "xyzabc";
+
+        for (; !parser.IsEndSection(); parser.MoveNext()) {
+            auto str = parser.Key();
+            if (str.size() == 1) {
+                auto idx = strchr(allAxis, str[0]);
+                if (idx != nullptr) {
+                    parser.Enter();
+                    ParseSpecificAxis(parser, idx - allAxis, 0);
+                    parser.Leave();
+                }
+            } else if (str.size() == 2) {
+                auto idx = strchr(allAxis, str[0]);
+                if (idx != nullptr && str[1] >= '1' && str[1] <= '9') {
+                    int ganged = str[1] - '1';
+
+                    parser.Enter();
+                    ParseSpecificAxis(parser, idx - allAxis, ganged);
+                    parser.Leave();
+                }
+            }
+        }
+    }
+
+    void ParseBus(Parser& parser) {
+        std::cout << "Parsing bus." << std::endl;
+
+        // TODO
+    }
+
+    void ParseRoot(Parser& parser) {
+        std::cout << "Parsing root." << std::endl;
+
+        for (; !parser.IsEndSection(); parser.MoveNext()) {
+            if (parser.Is("axis")) {
+                parser.Enter();
+                ParseAxis(parser);
+                parser.Leave();
+            } else if (parser.Is("bus")) {
+                parser.Enter();
+                ParseBus(parser);
+                parser.Leave();
+            }
+        }
+    }
+
+    int main() {
+        std::ifstream t("..\\Fiddling\\Test.yaml");
+        std::string   str((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
+
+        const auto begin = str.c_str();
+        const auto end   = begin + str.size();
+
+        try {
+            Parser parser(begin, end);
+            ParseRoot(parser);
+        } catch (ParseException ex) {
+            std::cout << "Parse error: " << ex.What() << " @ " << ex.LineNumber() << ":" << ex.ColumnNumber() << std::endl;
+        } catch (...) { std::cout << "Uncaught exception" << std::endl; }
+
+        std::string s;
+        std::getline(std::cin, s);
+        return 0;
+    }
+}
+#endif
diff --git a/Grbl_Esp32/src/Configuration/Test.yaml b/Grbl_Esp32/src/Configuration/Test.yaml
new file mode 100644
index 00000000..b4d27d0a
--- /dev/null
+++ b/Grbl_Esp32/src/Configuration/Test.yaml
@@ -0,0 +1,28 @@
+axis:
+  x:
+      limit: gpio.33:low
+      stepstick:
+        step: gpio.4
+        direction: gpio.16
+  y2:
+      limit: gpio.32:low
+      stepstick:
+        step: gpio.18
+        direction: gpio.18
+  z:
+      limit: gpio.34:low
+      dynamixel:
+        channel: 3
+
+        #yeah this rocks
+bus:
+  rs485:
+    baud: 19200
+    rx: gpio.12
+    tx: gpio.13
+spindle:
+   vfd:
+      channel: 1
+      unattached: true
+   vfd:
+      channel: 2
diff --git a/Grbl_Esp32/src/Configuration/TokenKind.h b/Grbl_Esp32/src/Configuration/TokenKind.h
new file mode 100644
index 00000000..f1cb4be4
--- /dev/null
+++ b/Grbl_Esp32/src/Configuration/TokenKind.h
@@ -0,0 +1,13 @@
+#pragma once
+
+namespace Configuration {
+
+    enum struct TokenKind {
+        Section,
+        Boolean,
+        String,
+        IntegerValue,
+        FloatingPoint,
+        Eof,
+    };
+}
diff --git a/Grbl_Esp32/src/Configuration/Tokenizer.cpp b/Grbl_Esp32/src/Configuration/Tokenizer.cpp
new file mode 100644
index 00000000..5ce8129f
--- /dev/null
+++ b/Grbl_Esp32/src/Configuration/Tokenizer.cpp
@@ -0,0 +1,193 @@
+#include "Tokenizer.h"
+
+#include "ParseException.h"
+#include <cstdlib>
+
+namespace Configuration {
+    inline void Tokenizer::ParseError(const char* description) const { throw ParseException(start_, current_, description); }
+    void        Tokenizer::Tokenize() {
+        // We parse 1 line at a time. Each time we get here, we can assume that the cursor
+        // is at the start of the line.
+
+    parseAgain:
+        int indent = 0;
+
+        while (!Eof() && IsSpace()) {
+            Inc();
+            ++indent;
+        }
+
+        if (!Eof()) {
+            switch (Current()) {
+                case '\t':
+                    // TODO FIXME: We can do tabs or spaces, not both. However, we *could* let the user decide.
+                    ParseError("Indentation through tabs is not allowed. Convert all tabs to spaces please.");
+                    break;
+
+                case '#':  // Comment till end of line
+                    Inc();
+                    while (!Eof() && !IsEndLine()) {
+                        Inc();
+                    }
+                    return;
+
+                case '\r':
+                case '\n':
+                    Inc();
+                    if (!Eof() && Current() == '\n') {
+                        Inc();
+                    }  // \r\n
+                    goto parseAgain;
+
+                default:
+                    if (!IsAlpha()) {
+                        ParseError("Expected identifier.");
+                    }
+
+                    token_.keyStart_ = current_;
+                    Inc();
+                    while (!Eof() && (IsAlpha() || IsDigit() || Current() == '_')) {
+                        Inc();
+                    }
+                    token_.keyEnd_ = current_;
+
+                    // Skip whitespaces:
+                    while (!Eof() && IsWhiteSpace()) {
+                        Inc();
+                    }
+
+                    if (Current() != ':') {
+                        ParseError("After a key or section name, we expect a colon character ':'.");
+                    }
+                    Inc();
+
+                    // Skip whitespaces after the colon:
+                    while (!Eof() && IsWhiteSpace()) {
+                        Inc();
+                    }
+
+                    token_.indent_ = indent;
+                    if (IsEndLine()) {
+                        token_.kind_ = TokenKind::Section;
+
+                        Inc();
+                        if (!Eof() && Current() == '\n') {
+                            Inc();
+                        }  // \r\n
+                    } else {
+                        switch (Current()) {
+                            case '"':
+                            case '\'': {
+                                auto delimiter = Current();
+
+                                token_.kind_ = TokenKind::String;
+                                Inc();
+                                token_.sValueStart_ = current_;
+                                while (!Eof() && Current() != delimiter && !IsEndLine()) {
+                                    Inc();
+                                }
+                                token_.sValueEnd_ = current_;
+                                if (Current() != delimiter) {
+                                    ParseError("Could not find matching delimiter in string value.");
+                                }
+                                Inc();
+                            } break;
+
+                            default:
+                                if (EqualsCaseInsensitive("true")) {
+                                    token_.kind_   = TokenKind::Boolean;
+                                    token_.bValue_ = true;
+
+                                    for (auto i = 0; i < 4; ++i) {
+                                        Inc();
+                                    }
+                                } else if (EqualsCaseInsensitive("false")) {
+                                    token_.kind_   = TokenKind::Boolean;
+                                    token_.bValue_ = false;
+
+                                    for (auto i = 0; i < 5; ++i) {
+                                        Inc();
+                                    }
+                                } else if (IsDigit() || Current() == '-') {
+                                    auto doubleOrIntStart = current_;
+
+                                    int  intValue = 0;
+                                    bool negative = false;
+
+                                    if (Current() == '-') {
+                                        Inc();
+                                        negative = true;
+                                    }
+
+                                    while (IsDigit()) {
+                                        intValue = intValue * 10 + int(Current() - '0');
+                                        Inc();
+                                    }
+
+                                    if (Current() == 'e' || Current() == 'E' || Current() == '.' ||  // markers
+                                        (current_ - doubleOrIntStart) >= 9) {  // liberal interpretation of 'out of int range'
+                                        char* floatEnd;
+                                        token_.fValue_ = strtod(doubleOrIntStart, &floatEnd);
+                                        token_.kind_   = TokenKind::FloatingPoint;
+
+                                        current_ = floatEnd;
+                                    } else {
+                                        if (negative) {
+                                            intValue = -intValue;
+                                        }
+                                        token_.iValue_ = intValue;
+                                        token_.kind_   = TokenKind::IntegerValue;
+                                    }
+                                } else {
+                                    // If it's not 'true', not 'false', and not a digit, we have a string delimited by a whitespace
+                                    token_.kind_        = TokenKind::String;
+                                    token_.sValueStart_ = current_;
+                                    while (!Eof() && !IsWhiteSpace() && !IsEndLine()) {
+                                        Inc();
+                                    }
+                                    token_.sValueEnd_ = current_;
+                                }
+                                break;
+                        }
+
+                        // Skip more whitespaces
+                        while (!Eof() && IsSpace()) {
+                            Inc();
+                        }
+
+                        // A comment after a key-value pair is allowed.
+                        if (Current() == '#') {
+                            Inc();
+                            while (!Eof() && !IsEndLine()) {
+                                Inc();
+                            }
+                        }
+
+                        // Should be EOL or EOF at this point.
+                        if (!IsEndLine() && !Eof()) {
+                            ParseError("Expected line end after key/value pair.");
+                        }
+                    }
+            }
+        } else {
+            token_.kind_   = TokenKind::Eof;
+            token_.indent_ = 0;
+        }
+    }
+    inline Tokenizer::Tokenizer(const char* start, const char* end) : start_(start), current_(start), end_(end), token_() {
+        // If start is a yaml document start ('---' [newline]), skip that first.
+        if (EqualsCaseInsensitive("---")) {
+            for (int i = 0; i < 3; ++i) {
+                Inc();
+            }
+            while (IsWhiteSpace()) {
+                Inc();
+            }
+            while (Current() == '\r' || Current() == '\n') {
+                Inc();
+            }
+
+            start_ = current_;
+        }
+    }
+}
diff --git a/Grbl_Esp32/src/Configuration/Tokenizer.h b/Grbl_Esp32/src/Configuration/Tokenizer.h
new file mode 100644
index 00000000..335915cb
--- /dev/null
+++ b/Grbl_Esp32/src/Configuration/Tokenizer.h
@@ -0,0 +1,85 @@
+#pragma once
+
+#include "TokenKind.h"
+
+#include <string>
+
+namespace Configuration {
+
+    class Tokenizer {
+        const char* current_;
+        const char* end_;
+
+        inline void Inc() {
+            if (current_ != end_) {
+                ++current_;
+            }
+        }
+        inline char Current() const { return Eof() ? '\0' : (*current_); }
+
+        inline bool IsAlpha() {
+            char c = Current();
+            return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+        }
+
+        inline bool IsSpace() { return Current() == ' '; }
+
+        inline bool IsWhiteSpace() {
+            char c = Current();
+            return c == ' ' || c == '\t' || c == '\f';
+        }
+
+        inline bool IsEndLine() { return Current() == '\n'; }
+
+        inline bool IsDigit() {
+            char c = Current();
+            return (c >= '0' && c <= '9');
+        }
+
+        static inline char ToLower(char c) { return (c >= 'A' && c <= 'Z') ? (char)(c + 32) : c; }
+
+        inline bool EqualsCaseInsensitive(const char* input) {
+            const char* tmp = current_;
+            while (ToLower(*input) == ToLower(Current()) && *input != '\0') {
+                Inc();
+            }
+
+            bool isSame = *input == '\0';  // Everything till the end of the input string is the same
+            current_    = tmp;             // Restore situation
+            return isSame;
+        }
+
+    protected:
+        const char* start_;
+
+        // Results:
+        struct TokenData {
+            TokenData() :
+                keyStart_(nullptr), keyEnd_(nullptr), indent_(0), kind_(TokenKind::Eof), sValueStart_(nullptr), sValueEnd_(nullptr) {}
+
+            const char* keyStart_;
+            const char* keyEnd_;
+            int         indent_;
+
+            TokenKind kind_;
+            union {
+                struct {
+                    const char* sValueStart_;
+                    const char* sValueEnd_;
+                };
+                int    iValue_;
+                double fValue_;
+                bool   bValue_;
+            };
+        } token_;
+
+        void ParseError(const char* description) const;
+
+        inline bool Eof() const { return current_ == end_; }
+
+        void Tokenize();
+
+    public:
+        Tokenizer(const char* start, const char* end);
+    };
+}