Lexesis/templates/c++/lexer.cpp

/*
 * This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
 *
 * 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
 *
 * 3. This notice may not be removed or altered from any source distribution.
 */
#include "{{name}}.h"

#include <sstream>
#include <iostream>

namespace { //The automaton data
    typedef std::size_t State;

    State REJECT = {{reject_state}};

    unsigned char TRANS_IDX[256] = { {{#trans_idx}}{{trans}}, {{/trans_idx}} };

    State TABLE[{{num_states}} - 1][{{num_transitions_per_state}}] = { {{#table}}
        { {{#row}} {{state}}, {{/row}} },
        {{/table}} };

    {{name}}::TokenType TOKENS[{{num_states}}] = { {{#tokens}} {{name}}::{{token}}, {{/tokens}} };
}

{{name}}::{{name}}(std::istream& in) : m_offset(0), m_input(in) {

}

{{name}}::~{{name}}() {
}

{{name}}::Token {{name}}::nextToken() {
    TokenType type = ignore;
    std::string token;

    while (type == ignore) {
        State state = 0;
        std::size_t match_length = 0;
        token = "";

        while (!m_input.eof() && state != REJECT) {
            char c = m_input.peek();
            if (m_input.eof())
                break;

            token += c;

            state = TABLE[state][TRANS_IDX[(unsigned char)c]];
            if (TOKENS[state])
            {
                match_length = token.length();
                type = TOKENS[state];
            }
            m_input.get();
            ++m_offset;
        }

        std::size_t sdiff = token.length() - match_length;
        for (std::size_t i = 0; i < sdiff; i++)
        {
            m_input.putback(token[token.length() - i - 1]);
        }
        m_offset -= sdiff;

        if (!type || !match_length) {
            if (m_input.eof())
                throw NoMoreTokens();
            throw NoMatch();
        }

        token = token.substr(0, match_length);
    }

    Token t;
    t.type = type;
    t.content = token;
    return t;
}

void {{name}}::skip(std::size_t n) {
    for (size_t i = 0; i < n; i++) {
        m_input.get();
        ++m_offset;
    }
}

char {{name}}::peek() {
    if (m_input.eof())
        throw NoMoreTokens();
    return m_input.peek();
}

std::size_t {{name}}::getByteOffset() {
    return m_offset;
}