First template implementation for c++

2016-05-25 15:02:22 +02:00 · 2016-05-25 15:02:22 +02:00 · d17dc608dc
parent 8a59d19e5c
commit d17dc608dc
2 changed files with 169 additions and 0 deletions
--- a/templates/c++/lexer.cpp
+++ b/templates/c++/lexer.cpp
@ -0,0 +1,108 @@
+#include "{{name}}.h"
+
+#include <sstream>
+#include <iostream>
+
+namespace { //The automaton data
+    typedef unsigned long long State;
+
+    State REJECT = {{reject_state}};
+
+    char TRANS_IDX[256] = { {{#trans_idx}}{{trans}}, {{/trans_idx}} };
+
+    State TABLE[{{num_states}}][{{num_transitions_per_state}}] = { {{#table}}
+        { {{#row}} {{state}}, {{/row}} },
+        {{/table}} };
+
+    {{name}}::TokenType TOKENS[{{num_states}}] = { {{#tokens}} {{name}}::{{token}}, {{/tokens}} };
+}
+
+{{name}}::{{name}}(std::istream& in) : m_offset(0), m_input(in) {
+
+}
+
+{{name}}::~{{name}}() {
+}
+
+{{name}}::Token {{name}}::nextToken() {
+    TRANS_IDX['a'] = 1;
+    TRANS_IDX['b'] = 2;
+    TokenType type = ignore;
+    std::string token;
+
+    while (type == ignore) {
+        State state = 0;
+        std::size_t match_length = 0;
+        token = "";
+
+        while (!m_input.eof() && state != REJECT) {
+            char c = m_input.peek();
+            if (m_input.eof())
+                break;
+
+            token += c;
+
+            state = TABLE[state][TRANS_IDX[c]];
+            if (TOKENS[state])
+            {
+                match_length = token.length();
+                type = TOKENS[state];
+            }
+            m_input.get();
+            ++m_offset;
+        }
+
+        std::size_t sdiff = token.length() - match_length;
+        for (std::size_t i = 0; i < sdiff; i++)
+        {
+            m_input.putback(token[token.length() - i - 1]);
+        }
+        m_offset -= sdiff;
+
+        if (!type || !match_length) {
+            if (m_input.eof())
+                throw NoMoreTokens();
+            throw NoMatch();
+        }
+
+        token = token.substr(0, match_length);
+    }
+
+    Token t;
+    t.type = type;
+    t.content = token;
+    return t;
+}
+
+void {{name}}::skip(std::size_t n) {
+    for (size_t i = 0; i < n; i++) {
+        m_input.get();
+        ++m_offset;
+    }
+}
+
+char {{name}}::peek() {
+    if (m_input.eof())
+        throw NoMoreTokens();
+    return m_input.peek();
+}
+
+std::size_t {{name}}::getByteOffset() {
+    return m_offset;
+}
+
+//Temporary main
+int main(int argc, char** argv) {
+    std::istringstream in(argv[1]);
+    {{name}} lex(in);
+    try {
+        while (true)
+            std::cout << "Match: " << lex.nextToken().content << std::endl;
+    }
+    catch (decltype(lex)::NoMoreTokens& err) {
+        std::cout << "DONE, read " << lex.getByteOffset() << " bytes." << std::endl;
+    }
+    catch (decltype(lex)::NoMatch& err) {
+        std::cout << "No match, " << lex.getByteOffset() << std::endl;
+    }
+}
--- a/templates/c++/lexer.h
+++ b/templates/c++/lexer.h
@ -0,0 +1,61 @@
+#pragma once
+#ifndef LEXER_{{name}}_H
+#define LEXER_{{name}}_H
+
+#include <exception>
+#include <istream>
+#include <string>
+
+class {{name}} {
+    public:
+        class NoMoreTokens : public std::exception {};
+        class NoMatch : public std::exception {};
+
+        enum TokenType {
+            nonmatching,
+            {{#token_types}}
+            {{type}},
+            {{/token_types}}
+        };
+
+        struct Token {
+            TokenType type;
+            std::string content;
+        };
+        
+        {{name}}(std::istream& in);
+        ~{{name}}();
+
+        /**
+         * Get the next token
+         *
+         * @throws NoMoreTokens if no more tokens are available
+         * @throws NoMatch if no match was found
+         */
+        Token nextToken();
+
+        /**
+         * Skip the following `n` bytes.
+         *
+         * @param n The number of bytes to skip
+         */
+        void skip(std::size_t n);
+
+        /**
+         * Peek at the current head of the input stream, useful in error reporting when a character mismatches for example
+         *
+         * @throws NoMoreTokens if the input stream is at an end
+         */
+        char peek();
+
+        /**
+         * Get the current byte offset
+         */
+        std::size_t getByteOffset();
+
+    private:
+        std::size_t m_offset;
+        std::istream& m_input;
+};
+
+#endif //LEXER_{{name}}_H