First implementation for c++ backend, waiting for template and main
integration
This commit is contained in:
parent
a1220fee64
commit
3d59a970f8
|
@ -5,7 +5,10 @@
|
||||||
#include "Lexesis/automata.h"
|
#include "Lexesis/automata.h"
|
||||||
#include "Lexesis/template.h"
|
#include "Lexesis/template.h"
|
||||||
|
|
||||||
|
#include "mstch/mstch.hpp"
|
||||||
|
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
namespace lxs {
|
namespace lxs {
|
||||||
|
@ -17,7 +20,7 @@ namespace lxs {
|
||||||
virtual std::string getName() = 0;
|
virtual std::string getName() = 0;
|
||||||
virtual bool canProcessLang(std::string lang);
|
virtual bool canProcessLang(std::string lang);
|
||||||
|
|
||||||
virtual void generateLexer(std::function<std::ostream&(std::string)> getOstreamForFileName, std::string lexerName, const DFA& dfa) = 0;
|
virtual void generateLexer(std::function<std::unique_ptr<std::ostream>(std::string)> getOstreamForFileName, std::string lexerName, const DFA& dfa) = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void doTemplate(std::ostream& out, std::string templateName, templ::TemplateContext context);
|
void doTemplate(std::ostream& out, std::string templateName, templ::TemplateContext context);
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
#pragma once
|
||||||
|
#ifndef LEXESIS_BACKENDS_CPP_H
|
||||||
|
#define LEXESIS_BACKENDS_CPP_H
|
||||||
|
|
||||||
|
#include "Lexesis/backend.h"
|
||||||
|
|
||||||
|
namespace lxs
|
||||||
|
{
|
||||||
|
namespace backends
|
||||||
|
{
|
||||||
|
class CppBackend : public Backend {
|
||||||
|
public:
|
||||||
|
CppBackend();
|
||||||
|
virtual ~CppBackend();
|
||||||
|
|
||||||
|
virtual bool canProcessLang(std::string lang);
|
||||||
|
|
||||||
|
virtual void generateLexer(std::function<std::unique_ptr<std::ostream>(std::string)> getOstreamForFileName, std::string lexerName, const DFA& dfa);
|
||||||
|
|
||||||
|
private:
|
||||||
|
templ::TemplateContext buildTable(const DFA& dfa, const std::vector<char>& transition_idx, int num_transitions_per_state) const;
|
||||||
|
templ::TemplateContext buildTokenList(const DFA& dfa) const;
|
||||||
|
std::pair<std::vector<char>, int> buildTransitionIndices(const DFA& dfa) const;
|
||||||
|
templ::TemplateContext transformTransitionIndices(std::vector<char> transition_indices) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif //LEXESIS_BACKENDS_CPP_H
|
|
@ -0,0 +1,101 @@
|
||||||
|
#include "Lexesis/backends/cpp.h"
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
|
namespace lxs { namespace backends {
|
||||||
|
|
||||||
|
CppBackend::CppBackend() : Backend()
|
||||||
|
{}
|
||||||
|
|
||||||
|
CppBackend::~CppBackend()
|
||||||
|
{}
|
||||||
|
|
||||||
|
bool CppBackend::canProcessLang(std::string lang) {
|
||||||
|
for (char& c : lang)
|
||||||
|
c = std::tolower(c);
|
||||||
|
return lang == "c++" || lang == "cpp" || lang == "cxx";
|
||||||
|
}
|
||||||
|
|
||||||
|
void CppBackend::generateLexer(
|
||||||
|
std::function<std::unique_ptr<std::ostream>(std::string)> getOstreamForFileName,
|
||||||
|
std::string lexerName,
|
||||||
|
const DFA& dfa)
|
||||||
|
{
|
||||||
|
assert(lexerName.length());
|
||||||
|
|
||||||
|
std::unique_ptr<std::ostream> headerStream = getOstreamForFileName(lexerName + ".h");
|
||||||
|
std::unique_ptr<std::ostream> implementationStream = getOstreamForFileName(lexerName + ".cpp");
|
||||||
|
|
||||||
|
std::map<std::string, templ::TemplateContext> topLevel;
|
||||||
|
|
||||||
|
lexerName[0] = std::toupper(lexerName[0]);
|
||||||
|
topLevel["name"] = templ::make_string(lexerName);
|
||||||
|
|
||||||
|
//The DEADSTATE gets a brand new state: dfa.numStates
|
||||||
|
topLevel["reject_state"] = templ::make_string(std::to_string(dfa.numStates));
|
||||||
|
topLevel["num_states"] = templ::make_string(std::to_string(dfa.numStates + 1));
|
||||||
|
|
||||||
|
auto transition_indices = buildTransitionIndices(dfa);
|
||||||
|
topLevel["trans_idx"] = transformTransitionIndices(transition_indices.first);
|
||||||
|
topLevel["num_transitions_per_state"] = templ::make_string(std::to_string(transition_indices.second));
|
||||||
|
|
||||||
|
topLevel["table"] = buildTable(dfa, transition_indices.first);
|
||||||
|
|
||||||
|
topLevel["token_types"] = buildTokenList(dfa);
|
||||||
|
|
||||||
|
templ::TemplateContext topLevelMap = templ::make_map(topLevel);
|
||||||
|
|
||||||
|
doTemplate(*headerStream, "c++/lexer.h", topLevelMap);
|
||||||
|
doTemplate(*implementationStream, "c++/lexer.cpp", topLevelMap);
|
||||||
|
}
|
||||||
|
|
||||||
|
templ::TemplateContext CppBackend::buildTable(const DFA& dfa, const std::vector<char>& transition_idx, int num_transitions_per_state) const {
|
||||||
|
std::map<char, char> reverse_trans;
|
||||||
|
for (int i = 0; i < 256; i++) {
|
||||||
|
reverse_trans[transition_idx[i]] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<templ::TemplateContext> table;
|
||||||
|
|
||||||
|
for (State s = 0; s < dfa.numStates; s++) {
|
||||||
|
std::vector<templ::TemplateContext> row;
|
||||||
|
for (int i = 0; i < num_transitions_per_state; i++) {
|
||||||
|
State to = dfa.delta.find(s)->second.find(reverse_trans[i])->second;
|
||||||
|
row.push_back(templ::make_map({{"state", templ::make_string(std::to_string(to))}}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return templ::make_array(table);
|
||||||
|
}
|
||||||
|
|
||||||
|
templ::TemplateContext CppBackend::buildTokenList(const DFA& dfa) const {
|
||||||
|
std::set<std::string> tokens;
|
||||||
|
for (const auto& pr : dfa.acceptingToken) {
|
||||||
|
tokens.insert(pr.second);
|
||||||
|
}
|
||||||
|
tokens.insert("ignore");
|
||||||
|
|
||||||
|
std::vector<templ::TemplateContext> tokenList;
|
||||||
|
for (const std::string& s : tokens) {
|
||||||
|
tokenList.push_back(templ::make_map({{"type", templ::make_string(s)}}));
|
||||||
|
}
|
||||||
|
return templ::make_array(tokenList);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<std::vector<char>, int> CppBackend::buildTransitionIndices(const DFA& /* dfa */) const {
|
||||||
|
//FIXME: this is not really optimal ;-)
|
||||||
|
std::vector<char> transition_idx;
|
||||||
|
for (int i = 0; i < 256; i++)
|
||||||
|
transition_idx.push_back(i);
|
||||||
|
return std::make_pair(transition_idx, 256);
|
||||||
|
}
|
||||||
|
|
||||||
|
templ::TemplateContext CppBackend::transformTransitionIndices(std::vector<char> transition_indices) const {
|
||||||
|
std::vector<templ::TemplateContext> new_trans;
|
||||||
|
for (auto& i : transition_indices) {
|
||||||
|
new_trans.push_back(templ::make_map({{"trans", templ::make_string("(char)" + std::to_string(i))}}));
|
||||||
|
}
|
||||||
|
return templ::make_array(new_trans);
|
||||||
|
}
|
||||||
|
|
||||||
|
} } //namespace lxs::backends
|
|
@ -4,11 +4,11 @@
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
namespace { //The automaton data
|
namespace { //The automaton data
|
||||||
typedef unsigned long long State;
|
typedef std::size_t State;
|
||||||
|
|
||||||
State REJECT = {{reject_state}};
|
State REJECT = {{reject_state}};
|
||||||
|
|
||||||
char TRANS_IDX[256] = { {{#trans_idx}}{{trans}}, {{/trans_idx}} };
|
unsigned char TRANS_IDX[256] = { {{#trans_idx}}{{trans}}, {{/trans_idx}} };
|
||||||
|
|
||||||
State TABLE[{{num_states}}][{{num_transitions_per_state}}] = { {{#table}}
|
State TABLE[{{num_states}}][{{num_transitions_per_state}}] = { {{#table}}
|
||||||
{ {{#row}} {{state}}, {{/row}} },
|
{ {{#row}} {{state}}, {{/row}} },
|
||||||
|
@ -42,7 +42,7 @@ namespace { //The automaton data
|
||||||
|
|
||||||
token += c;
|
token += c;
|
||||||
|
|
||||||
state = TABLE[state][TRANS_IDX[c]];
|
state = TABLE[state][TRANS_IDX[(unsigned char)c]];
|
||||||
if (TOKENS[state])
|
if (TOKENS[state])
|
||||||
{
|
{
|
||||||
match_length = token.length();
|
match_length = token.length();
|
||||||
|
@ -99,10 +99,10 @@ int main(int argc, char** argv) {
|
||||||
while (true)
|
while (true)
|
||||||
std::cout << "Match: " << lex.nextToken().content << std::endl;
|
std::cout << "Match: " << lex.nextToken().content << std::endl;
|
||||||
}
|
}
|
||||||
catch (decltype(lex)::NoMoreTokens& err) {
|
catch ({{name}}::NoMoreTokens& err) {
|
||||||
std::cout << "DONE, read " << lex.getByteOffset() << " bytes." << std::endl;
|
std::cout << "DONE, read " << lex.getByteOffset() << " bytes." << std::endl;
|
||||||
}
|
}
|
||||||
catch (decltype(lex)::NoMatch& err) {
|
catch ({{name}}::NoMatch& err) {
|
||||||
std::cout << "No match, " << lex.getByteOffset() << std::endl;
|
std::cout << "No match, " << lex.getByteOffset() << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue