Lexesis/include/Lexesis/RegexLexer.h

#pragma once
#ifndef LEXER_RegexLexer_H
#define LEXER_RegexLexer_H

#include <exception>
#include <istream>
#include <string>

class RegexLexer {
    public:
        class NoMoreTokens : public std::exception {};
        class NoMatch : public std::exception {};

        enum TokenType {
            nonmatching,
            BACKSLASH,
            BACKSPACE,
            BELL,
            CARRIAGE_RETURN,
            CHAR,
            CHAR_CLASS,
            DOT,
            ERROR,
            ESCAPED_DOT,
            ESCAPED_LBRACKET,
            ESCAPED_LPAREN,
            ESCAPED_PIPE,
            ESCAPED_PLUS,
            ESCAPED_QUESTIONMARK,
            ESCAPED_RBRACKET,
            ESCAPED_RPAREN,
            ESCAPED_STAR,
            FORMFEED,
            LPAREN,
            NEWLINE,
            PIPE,
            PLUS,
            QUESTIONMARK,
            RPAREN,
            SPACE,
            STAR,
            TAB,
            VTAB,
            ignore,
        };

        struct Token {
            TokenType type;
            std::string content;
        };

        RegexLexer(std::istream& in);
        ~RegexLexer();

        /**
         * Get the next token
         *
         * @throws NoMoreTokens if no more tokens are available
         * @throws NoMatch if no match was found
         */
        Token nextToken();

        /**
         * Skip the following `n` bytes.
         *
         * @param n The number of bytes to skip
         */
        void skip(std::size_t n);

        /**
         * Peek at the current head of the input stream, useful in error reporting when a character mismatches for example
         *
         * @throws NoMoreTokens if the input stream is at an end
         */
        char peek();

        /**
         * Get the current byte offset
         */
        std::size_t getByteOffset();

    private:
        std::size_t m_offset;
        std::istream& m_input;
};

#endif //LEXER_RegexLexer_H