First implementation for c++ backend, waiting for template and main

integration
This commit is contained in:
Robin Jadoul 2016-05-25 18:49:15 +02:00
parent a1220fee64
commit 3d59a970f8
4 changed files with 140 additions and 6 deletions

View File

@ -5,7 +5,10 @@
#include "Lexesis/automata.h"
#include "Lexesis/template.h"
#include "mstch/mstch.hpp"
#include <functional>
#include <memory>
#include <string>
namespace lxs {
@ -17,7 +20,7 @@ namespace lxs {
virtual std::string getName() = 0;
virtual bool canProcessLang(std::string lang);
virtual void generateLexer(std::function<std::ostream&(std::string)> getOstreamForFileName, std::string lexerName, const DFA& dfa) = 0;
virtual void generateLexer(std::function<std::unique_ptr<std::ostream>(std::string)> getOstreamForFileName, std::string lexerName, const DFA& dfa) = 0;
protected:
void doTemplate(std::ostream& out, std::string templateName, templ::TemplateContext context);

View File

@ -0,0 +1,30 @@
#pragma once
#ifndef LEXESIS_BACKENDS_CPP_H
#define LEXESIS_BACKENDS_CPP_H
#include "Lexesis/backend.h"
namespace lxs
{
namespace backends
{
class CppBackend : public Backend {
public:
CppBackend();
virtual ~CppBackend();
virtual bool canProcessLang(std::string lang);
virtual void generateLexer(std::function<std::unique_ptr<std::ostream>(std::string)> getOstreamForFileName, std::string lexerName, const DFA& dfa);
private:
templ::TemplateContext buildTable(const DFA& dfa, const std::vector<char>& transition_idx, int num_transitions_per_state) const;
templ::TemplateContext buildTokenList(const DFA& dfa) const;
std::pair<std::vector<char>, int> buildTransitionIndices(const DFA& dfa) const;
templ::TemplateContext transformTransitionIndices(std::vector<char> transition_indices) const;
};
}
}
#endif //LEXESIS_BACKENDS_CPP_H

101
src/backends/cpp.cpp Normal file
View File

@ -0,0 +1,101 @@
#include "Lexesis/backends/cpp.h"
#include <cassert>
namespace lxs { namespace backends {
CppBackend::CppBackend() : Backend()
{}
CppBackend::~CppBackend()
{}
bool CppBackend::canProcessLang(std::string lang) {
for (char& c : lang)
c = std::tolower(c);
return lang == "c++" || lang == "cpp" || lang == "cxx";
}
void CppBackend::generateLexer(
std::function<std::unique_ptr<std::ostream>(std::string)> getOstreamForFileName,
std::string lexerName,
const DFA& dfa)
{
assert(lexerName.length());
std::unique_ptr<std::ostream> headerStream = getOstreamForFileName(lexerName + ".h");
std::unique_ptr<std::ostream> implementationStream = getOstreamForFileName(lexerName + ".cpp");
std::map<std::string, templ::TemplateContext> topLevel;
lexerName[0] = std::toupper(lexerName[0]);
topLevel["name"] = templ::make_string(lexerName);
//The DEADSTATE gets a brand new state: dfa.numStates
topLevel["reject_state"] = templ::make_string(std::to_string(dfa.numStates));
topLevel["num_states"] = templ::make_string(std::to_string(dfa.numStates + 1));
auto transition_indices = buildTransitionIndices(dfa);
topLevel["trans_idx"] = transformTransitionIndices(transition_indices.first);
topLevel["num_transitions_per_state"] = templ::make_string(std::to_string(transition_indices.second));
topLevel["table"] = buildTable(dfa, transition_indices.first);
topLevel["token_types"] = buildTokenList(dfa);
templ::TemplateContext topLevelMap = templ::make_map(topLevel);
doTemplate(*headerStream, "c++/lexer.h", topLevelMap);
doTemplate(*implementationStream, "c++/lexer.cpp", topLevelMap);
}
templ::TemplateContext CppBackend::buildTable(const DFA& dfa, const std::vector<char>& transition_idx, int num_transitions_per_state) const {
std::map<char, char> reverse_trans;
for (int i = 0; i < 256; i++) {
reverse_trans[transition_idx[i]] = i;
}
std::vector<templ::TemplateContext> table;
for (State s = 0; s < dfa.numStates; s++) {
std::vector<templ::TemplateContext> row;
for (int i = 0; i < num_transitions_per_state; i++) {
State to = dfa.delta.find(s)->second.find(reverse_trans[i])->second;
row.push_back(templ::make_map({{"state", templ::make_string(std::to_string(to))}}));
}
}
return templ::make_array(table);
}
templ::TemplateContext CppBackend::buildTokenList(const DFA& dfa) const {
std::set<std::string> tokens;
for (const auto& pr : dfa.acceptingToken) {
tokens.insert(pr.second);
}
tokens.insert("ignore");
std::vector<templ::TemplateContext> tokenList;
for (const std::string& s : tokens) {
tokenList.push_back(templ::make_map({{"type", templ::make_string(s)}}));
}
return templ::make_array(tokenList);
}
std::pair<std::vector<char>, int> CppBackend::buildTransitionIndices(const DFA& /* dfa */) const {
//FIXME: this is not really optimal ;-)
std::vector<char> transition_idx;
for (int i = 0; i < 256; i++)
transition_idx.push_back(i);
return std::make_pair(transition_idx, 256);
}
templ::TemplateContext CppBackend::transformTransitionIndices(std::vector<char> transition_indices) const {
std::vector<templ::TemplateContext> new_trans;
for (auto& i : transition_indices) {
new_trans.push_back(templ::make_map({{"trans", templ::make_string("(char)" + std::to_string(i))}}));
}
return templ::make_array(new_trans);
}
} } //namespace lxs::backends

View File

@ -4,11 +4,11 @@
#include <iostream>
namespace { //The automaton data
typedef unsigned long long State;
typedef std::size_t State;
State REJECT = {{reject_state}};
char TRANS_IDX[256] = { {{#trans_idx}}{{trans}}, {{/trans_idx}} };
unsigned char TRANS_IDX[256] = { {{#trans_idx}}{{trans}}, {{/trans_idx}} };
State TABLE[{{num_states}}][{{num_transitions_per_state}}] = { {{#table}}
{ {{#row}} {{state}}, {{/row}} },
@ -42,7 +42,7 @@ namespace { //The automaton data
token += c;
state = TABLE[state][TRANS_IDX[c]];
state = TABLE[state][TRANS_IDX[(unsigned char)c]];
if (TOKENS[state])
{
match_length = token.length();
@ -99,10 +99,10 @@ int main(int argc, char** argv) {
while (true)
std::cout << "Match: " << lex.nextToken().content << std::endl;
}
catch (decltype(lex)::NoMoreTokens& err) {
catch ({{name}}::NoMoreTokens& err) {
std::cout << "DONE, read " << lex.getByteOffset() << " bytes." << std::endl;
}
catch (decltype(lex)::NoMatch& err) {
catch ({{name}}::NoMatch& err) {
std::cout << "No match, " << lex.getByteOffset() << std::endl;
}
}