#pragma once #ifndef LEXER_RegexLexer_H #define LEXER_RegexLexer_H #include #include #include class RegexLexer { public: class NoMoreTokens : public std::exception {}; class NoMatch : public std::exception {}; enum TokenType { nonmatching, BACKSLASH, BACKSPACE, BELL, CARRIAGE_RETURN, CHAR, CHAR_CLASS, DOT, ERROR, ESCAPED_DOT, ESCAPED_LBRACKET, ESCAPED_LPAREN, ESCAPED_PIPE, ESCAPED_PLUS, ESCAPED_QUESTIONMARK, ESCAPED_RBRACKET, ESCAPED_RPAREN, ESCAPED_STAR, FORMFEED, LPAREN, NEWLINE, PIPE, PLUS, QUESTIONMARK, RPAREN, SPACE, STAR, TAB, VTAB, ignore, }; struct Token { TokenType type; std::string content; }; RegexLexer(std::istream& in); ~RegexLexer(); /** * Get the next token * * @throws NoMoreTokens if no more tokens are available * @throws NoMatch if no match was found */ Token nextToken(); /** * Skip the following `n` bytes. * * @param n The number of bytes to skip */ void skip(std::size_t n); /** * Peek at the current head of the input stream, useful in error reporting when a character mismatches for example * * @throws NoMoreTokens if the input stream is at an end */ char peek(); /** * Get the current byte offset */ std::size_t getByteOffset(); private: std::size_t m_offset; std::istream& m_input; }; #endif //LEXER_RegexLexer_H