Lexesis/include/Lexesis/RegexLexer.h

88 lines
1.9 KiB
C++

#pragma once
#ifndef LEXER_RegexLexer_H
#define LEXER_RegexLexer_H
#include <exception>
#include <istream>
#include <string>
class RegexLexer {
public:
class NoMoreTokens : public std::exception {};
class NoMatch : public std::exception {};
enum TokenType {
nonmatching,
BACKSLASH,
BACKSPACE,
BELL,
CARRIAGE_RETURN,
CHAR,
CHAR_CLASS,
DOT,
ERROR,
ESCAPED_DOT,
ESCAPED_LBRACKET,
ESCAPED_LPAREN,
ESCAPED_PIPE,
ESCAPED_PLUS,
ESCAPED_QUESTIONMARK,
ESCAPED_RBRACKET,
ESCAPED_RPAREN,
ESCAPED_STAR,
FORMFEED,
LPAREN,
NEWLINE,
PIPE,
PLUS,
QUESTIONMARK,
RPAREN,
SPACE,
STAR,
TAB,
VTAB,
ignore,
};
struct Token {
TokenType type;
std::string content;
};
RegexLexer(std::istream& in);
~RegexLexer();
/**
* Get the next token
*
* @throws NoMoreTokens if no more tokens are available
* @throws NoMatch if no match was found
*/
Token nextToken();
/**
* Skip the following `n` bytes.
*
* @param n The number of bytes to skip
*/
void skip(std::size_t n);
/**
* Peek at the current head of the input stream, useful in error reporting when a character mismatches for example
*
* @throws NoMoreTokens if the input stream is at an end
*/
char peek();
/**
* Get the current byte offset
*/
std::size_t getByteOffset();
private:
std::size_t m_offset;
std::istream& m_input;
};
#endif //LEXER_RegexLexer_H