Lexesis/templates/c++/lexer.cpp

109 lines
2.5 KiB
C++
Raw Normal View History

2016-05-25 15:02:22 +02:00
#include "{{name}}.h"
#include <sstream>
#include <iostream>
namespace { //The automaton data
typedef unsigned long long State;
State REJECT = {{reject_state}};
char TRANS_IDX[256] = { {{#trans_idx}}{{trans}}, {{/trans_idx}} };
State TABLE[{{num_states}}][{{num_transitions_per_state}}] = { {{#table}}
{ {{#row}} {{state}}, {{/row}} },
{{/table}} };
{{name}}::TokenType TOKENS[{{num_states}}] = { {{#tokens}} {{name}}::{{token}}, {{/tokens}} };
}
{{name}}::{{name}}(std::istream& in) : m_offset(0), m_input(in) {
}
{{name}}::~{{name}}() {
}
{{name}}::Token {{name}}::nextToken() {
TRANS_IDX['a'] = 1;
TRANS_IDX['b'] = 2;
TokenType type = ignore;
std::string token;
while (type == ignore) {
State state = 0;
std::size_t match_length = 0;
token = "";
while (!m_input.eof() && state != REJECT) {
char c = m_input.peek();
if (m_input.eof())
break;
token += c;
state = TABLE[state][TRANS_IDX[c]];
if (TOKENS[state])
{
match_length = token.length();
type = TOKENS[state];
}
m_input.get();
++m_offset;
}
std::size_t sdiff = token.length() - match_length;
for (std::size_t i = 0; i < sdiff; i++)
{
m_input.putback(token[token.length() - i - 1]);
}
m_offset -= sdiff;
if (!type || !match_length) {
if (m_input.eof())
throw NoMoreTokens();
throw NoMatch();
}
token = token.substr(0, match_length);
}
Token t;
t.type = type;
t.content = token;
return t;
}
void {{name}}::skip(std::size_t n) {
for (size_t i = 0; i < n; i++) {
m_input.get();
++m_offset;
}
}
char {{name}}::peek() {
if (m_input.eof())
throw NoMoreTokens();
return m_input.peek();
}
std::size_t {{name}}::getByteOffset() {
return m_offset;
}
//Temporary main
int main(int argc, char** argv) {
std::istringstream in(argv[1]);
{{name}} lex(in);
try {
while (true)
std::cout << "Match: " << lex.nextToken().content << std::endl;
}
catch (decltype(lex)::NoMoreTokens& err) {
std::cout << "DONE, read " << lex.getByteOffset() << " bytes." << std::endl;
}
catch (decltype(lex)::NoMatch& err) {
std::cout << "No match, " << lex.getByteOffset() << std::endl;
}
}