88 lines
1.9 KiB
C++
88 lines
1.9 KiB
C++
#pragma once
|
|
#ifndef LEXER_RegexLexer_H
|
|
#define LEXER_RegexLexer_H
|
|
|
|
#include <exception>
|
|
#include <istream>
|
|
#include <string>
|
|
|
|
class RegexLexer {
|
|
public:
|
|
class NoMoreTokens : public std::exception {};
|
|
class NoMatch : public std::exception {};
|
|
|
|
enum TokenType {
|
|
nonmatching,
|
|
BACKSLASH,
|
|
BACKSPACE,
|
|
BELL,
|
|
CARRIAGE_RETURN,
|
|
CHAR,
|
|
CHAR_CLASS,
|
|
DOT,
|
|
ERROR,
|
|
ESCAPED_DOT,
|
|
ESCAPED_LBRACKET,
|
|
ESCAPED_LPAREN,
|
|
ESCAPED_PIPE,
|
|
ESCAPED_PLUS,
|
|
ESCAPED_QUESTIONMARK,
|
|
ESCAPED_RBRACKET,
|
|
ESCAPED_RPAREN,
|
|
ESCAPED_STAR,
|
|
FORMFEED,
|
|
LPAREN,
|
|
NEWLINE,
|
|
PIPE,
|
|
PLUS,
|
|
QUESTIONMARK,
|
|
RPAREN,
|
|
SPACE,
|
|
STAR,
|
|
TAB,
|
|
VTAB,
|
|
ignore,
|
|
};
|
|
|
|
struct Token {
|
|
TokenType type;
|
|
std::string content;
|
|
};
|
|
|
|
RegexLexer(std::istream& in);
|
|
~RegexLexer();
|
|
|
|
/**
|
|
* Get the next token
|
|
*
|
|
* @throws NoMoreTokens if no more tokens are available
|
|
* @throws NoMatch if no match was found
|
|
*/
|
|
Token nextToken();
|
|
|
|
/**
|
|
* Skip the following `n` bytes.
|
|
*
|
|
* @param n The number of bytes to skip
|
|
*/
|
|
void skip(std::size_t n);
|
|
|
|
/**
|
|
* Peek at the current head of the input stream, useful in error reporting when a character mismatches for example
|
|
*
|
|
* @throws NoMoreTokens if the input stream is at an end
|
|
*/
|
|
char peek();
|
|
|
|
/**
|
|
* Get the current byte offset
|
|
*/
|
|
std::size_t getByteOffset();
|
|
|
|
private:
|
|
std::size_t m_offset;
|
|
std::istream& m_input;
|
|
};
|
|
|
|
#endif //LEXER_RegexLexer_H
|