First implementation for c++ backend, waiting for template and main
integration
This commit is contained in:
parent
a1220fee64
commit
3d59a970f8
|
@ -5,7 +5,10 @@
|
|||
#include "Lexesis/automata.h"
|
||||
#include "Lexesis/template.h"
|
||||
|
||||
#include "mstch/mstch.hpp"
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
namespace lxs {
|
||||
|
@ -17,7 +20,7 @@ namespace lxs {
|
|||
virtual std::string getName() = 0;
|
||||
virtual bool canProcessLang(std::string lang);
|
||||
|
||||
virtual void generateLexer(std::function<std::ostream&(std::string)> getOstreamForFileName, std::string lexerName, const DFA& dfa) = 0;
|
||||
virtual void generateLexer(std::function<std::unique_ptr<std::ostream>(std::string)> getOstreamForFileName, std::string lexerName, const DFA& dfa) = 0;
|
||||
|
||||
protected:
|
||||
void doTemplate(std::ostream& out, std::string templateName, templ::TemplateContext context);
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
#pragma once
|
||||
#ifndef LEXESIS_BACKENDS_CPP_H
|
||||
#define LEXESIS_BACKENDS_CPP_H
|
||||
|
||||
#include "Lexesis/backend.h"
|
||||
|
||||
namespace lxs
|
||||
{
|
||||
namespace backends
|
||||
{
|
||||
class CppBackend : public Backend {
|
||||
public:
|
||||
CppBackend();
|
||||
virtual ~CppBackend();
|
||||
|
||||
virtual bool canProcessLang(std::string lang);
|
||||
|
||||
virtual void generateLexer(std::function<std::unique_ptr<std::ostream>(std::string)> getOstreamForFileName, std::string lexerName, const DFA& dfa);
|
||||
|
||||
private:
|
||||
templ::TemplateContext buildTable(const DFA& dfa, const std::vector<char>& transition_idx, int num_transitions_per_state) const;
|
||||
templ::TemplateContext buildTokenList(const DFA& dfa) const;
|
||||
std::pair<std::vector<char>, int> buildTransitionIndices(const DFA& dfa) const;
|
||||
templ::TemplateContext transformTransitionIndices(std::vector<char> transition_indices) const;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif //LEXESIS_BACKENDS_CPP_H
|
|
@ -0,0 +1,101 @@
|
|||
#include "Lexesis/backends/cpp.h"
|
||||
|
||||
#include <cassert>
|
||||
|
||||
namespace lxs { namespace backends {
|
||||
|
||||
CppBackend::CppBackend() : Backend()
|
||||
{}
|
||||
|
||||
CppBackend::~CppBackend()
|
||||
{}
|
||||
|
||||
bool CppBackend::canProcessLang(std::string lang) {
|
||||
for (char& c : lang)
|
||||
c = std::tolower(c);
|
||||
return lang == "c++" || lang == "cpp" || lang == "cxx";
|
||||
}
|
||||
|
||||
void CppBackend::generateLexer(
|
||||
std::function<std::unique_ptr<std::ostream>(std::string)> getOstreamForFileName,
|
||||
std::string lexerName,
|
||||
const DFA& dfa)
|
||||
{
|
||||
assert(lexerName.length());
|
||||
|
||||
std::unique_ptr<std::ostream> headerStream = getOstreamForFileName(lexerName + ".h");
|
||||
std::unique_ptr<std::ostream> implementationStream = getOstreamForFileName(lexerName + ".cpp");
|
||||
|
||||
std::map<std::string, templ::TemplateContext> topLevel;
|
||||
|
||||
lexerName[0] = std::toupper(lexerName[0]);
|
||||
topLevel["name"] = templ::make_string(lexerName);
|
||||
|
||||
//The DEADSTATE gets a brand new state: dfa.numStates
|
||||
topLevel["reject_state"] = templ::make_string(std::to_string(dfa.numStates));
|
||||
topLevel["num_states"] = templ::make_string(std::to_string(dfa.numStates + 1));
|
||||
|
||||
auto transition_indices = buildTransitionIndices(dfa);
|
||||
topLevel["trans_idx"] = transformTransitionIndices(transition_indices.first);
|
||||
topLevel["num_transitions_per_state"] = templ::make_string(std::to_string(transition_indices.second));
|
||||
|
||||
topLevel["table"] = buildTable(dfa, transition_indices.first);
|
||||
|
||||
topLevel["token_types"] = buildTokenList(dfa);
|
||||
|
||||
templ::TemplateContext topLevelMap = templ::make_map(topLevel);
|
||||
|
||||
doTemplate(*headerStream, "c++/lexer.h", topLevelMap);
|
||||
doTemplate(*implementationStream, "c++/lexer.cpp", topLevelMap);
|
||||
}
|
||||
|
||||
templ::TemplateContext CppBackend::buildTable(const DFA& dfa, const std::vector<char>& transition_idx, int num_transitions_per_state) const {
|
||||
std::map<char, char> reverse_trans;
|
||||
for (int i = 0; i < 256; i++) {
|
||||
reverse_trans[transition_idx[i]] = i;
|
||||
}
|
||||
|
||||
std::vector<templ::TemplateContext> table;
|
||||
|
||||
for (State s = 0; s < dfa.numStates; s++) {
|
||||
std::vector<templ::TemplateContext> row;
|
||||
for (int i = 0; i < num_transitions_per_state; i++) {
|
||||
State to = dfa.delta.find(s)->second.find(reverse_trans[i])->second;
|
||||
row.push_back(templ::make_map({{"state", templ::make_string(std::to_string(to))}}));
|
||||
}
|
||||
}
|
||||
|
||||
return templ::make_array(table);
|
||||
}
|
||||
|
||||
templ::TemplateContext CppBackend::buildTokenList(const DFA& dfa) const {
|
||||
std::set<std::string> tokens;
|
||||
for (const auto& pr : dfa.acceptingToken) {
|
||||
tokens.insert(pr.second);
|
||||
}
|
||||
tokens.insert("ignore");
|
||||
|
||||
std::vector<templ::TemplateContext> tokenList;
|
||||
for (const std::string& s : tokens) {
|
||||
tokenList.push_back(templ::make_map({{"type", templ::make_string(s)}}));
|
||||
}
|
||||
return templ::make_array(tokenList);
|
||||
}
|
||||
|
||||
std::pair<std::vector<char>, int> CppBackend::buildTransitionIndices(const DFA& /* dfa */) const {
|
||||
//FIXME: this is not really optimal ;-)
|
||||
std::vector<char> transition_idx;
|
||||
for (int i = 0; i < 256; i++)
|
||||
transition_idx.push_back(i);
|
||||
return std::make_pair(transition_idx, 256);
|
||||
}
|
||||
|
||||
templ::TemplateContext CppBackend::transformTransitionIndices(std::vector<char> transition_indices) const {
|
||||
std::vector<templ::TemplateContext> new_trans;
|
||||
for (auto& i : transition_indices) {
|
||||
new_trans.push_back(templ::make_map({{"trans", templ::make_string("(char)" + std::to_string(i))}}));
|
||||
}
|
||||
return templ::make_array(new_trans);
|
||||
}
|
||||
|
||||
} } //namespace lxs::backends
|
|
@ -4,11 +4,11 @@
|
|||
#include <iostream>
|
||||
|
||||
namespace { //The automaton data
|
||||
typedef unsigned long long State;
|
||||
typedef std::size_t State;
|
||||
|
||||
State REJECT = {{reject_state}};
|
||||
|
||||
char TRANS_IDX[256] = { {{#trans_idx}}{{trans}}, {{/trans_idx}} };
|
||||
unsigned char TRANS_IDX[256] = { {{#trans_idx}}{{trans}}, {{/trans_idx}} };
|
||||
|
||||
State TABLE[{{num_states}}][{{num_transitions_per_state}}] = { {{#table}}
|
||||
{ {{#row}} {{state}}, {{/row}} },
|
||||
|
@ -42,7 +42,7 @@ namespace { //The automaton data
|
|||
|
||||
token += c;
|
||||
|
||||
state = TABLE[state][TRANS_IDX[c]];
|
||||
state = TABLE[state][TRANS_IDX[(unsigned char)c]];
|
||||
if (TOKENS[state])
|
||||
{
|
||||
match_length = token.length();
|
||||
|
@ -99,10 +99,10 @@ int main(int argc, char** argv) {
|
|||
while (true)
|
||||
std::cout << "Match: " << lex.nextToken().content << std::endl;
|
||||
}
|
||||
catch (decltype(lex)::NoMoreTokens& err) {
|
||||
catch ({{name}}::NoMoreTokens& err) {
|
||||
std::cout << "DONE, read " << lex.getByteOffset() << " bytes." << std::endl;
|
||||
}
|
||||
catch (decltype(lex)::NoMatch& err) {
|
||||
catch ({{name}}::NoMatch& err) {
|
||||
std::cout << "No match, " << lex.getByteOffset() << std::endl;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue