Moved inputparsing to seperate library and moved some work to the driver

This commit is contained in:
Thomas Avé 2017-01-19 19:10:07 +01:00
parent 27ab8dc6da
commit c1befc5a76
4 changed files with 55 additions and 48 deletions

View File

@ -7,9 +7,7 @@
#include <string> #include <string>
namespace lxs { namespace lxs {
struct DFA; namespace input {
struct ENFA;
/** /**
* Used for parsing token rules * Used for parsing token rules
@ -17,19 +15,21 @@ namespace lxs {
class InputParser { class InputParser {
public: public:
/** /**
* parse the tokens rules read from `is` and return the minimized constructed dfa from those rules * parse the tokens rules read from `is`
*
* std::vector<std::pair<int, std::pair<std::string,std::string>>>
* <line number, <tokenname, regex> >
*/ */
static DFA parseInput(std::istream& is); static std::vector<std::pair<int, std::pair<std::string,std::string>>> parseInput(std::istream& is);
private:
/**
* parse the lines and return pairs of (Token type, regex)
*/
static std::vector<std::pair<int, std::pair<std::string,std::string> > > parseLines(std::istream &is);
/** /**
* Convert the lines from `parseLines` to ENFA's * Get a list of the tokens specified in the istream
*
* @param is The istream to get the tokens from
*
* @return the list of tokens
*/ */
static std::vector<ENFA> linesToEnfa(std::vector<std::pair<int, std::pair<std::string,std::string> > > &input); static std::vector<std::string> getTokens(std::istream& is);
}; };
/** /**
@ -45,5 +45,5 @@ namespace lxs {
std::string m_what; std::string m_what;
}; };
} }
}
#endif // INPUT_PARSER_H #endif // INPUT_PARSER_H

View File

@ -12,20 +12,23 @@ add_library(lxs
driver.cpp driver.cpp
RegexLexer.cpp RegexLexer.cpp
re.cpp re.cpp
inputparser.cpp
template.cpp template.cpp
) )
add_library(lxsinput
inputparser.cpp
)
add_executable(Lexesis add_executable(Lexesis
main.cpp main.cpp
) )
target_link_libraries(Lexesis Lexesis-backends lxs mstch::mstch) target_link_libraries(Lexesis Lexesis-backends lxs lxsinput mstch::mstch)
if (CMAKE_BUILD_TYPE MATCHES Debug) if (CMAKE_BUILD_TYPE MATCHES Debug)
add_executable(Lexesis-test add_executable(Lexesis-test
test.cpp test.cpp
) )
target_link_libraries(Lexesis-test Lexesis-backends lxs mstch::mstch) target_link_libraries(Lexesis-test Lexesis-backends lxs lxsinput mstch::mstch)
endif() endif()
install(TARGETS Lexesis install(TARGETS Lexesis

View File

@ -1,5 +1,7 @@
#include "Lexesis/driver.h" #include "Lexesis/driver.h"
#include "Lexesis/inputparser.h" #include "Lexesis/inputparser.h"
#include "Lexesis/automata.h"
#include "Lexesis/re.h"
#include <iostream> #include <iostream>
#include <fstream> #include <fstream>
@ -16,6 +18,22 @@ namespace {
} }
return s; return s;
} }
std::vector<lxs::ENFA> linesToEnfa(std::vector<std::pair<int, std::pair<std::string,std::string> > > &input) {
std::vector<lxs::ENFA> result;
for(unsigned int i=0;i<input.size();i++) {
std::shared_ptr<lxs::RE> re = lxs::parseRE(input[i].second.second);
lxs::ENFA enfa;
re->toENFA(enfa,0);
enfa.numStates++;
enfa.starting = 0;
enfa.priority[(lxs::State) *enfa.accepting.begin()] = (lxs::Priority) i;
enfa.acceptingToken[(lxs::State) *enfa.accepting.begin()] = input[i].second.first;
result.push_back(enfa);
}
return result;
}
} }
namespace lxs { namespace lxs {
@ -41,7 +59,10 @@ namespace lxs {
Backend* back = m_backends->findBackendForLang(m_language); Backend* back = m_backends->findBackendForLang(m_language);
if (!back) throw DriverException("Could not find a valid backend for language " + m_language ); if (!back) throw DriverException("Could not find a valid backend for language " + m_language );
DFA dfa = InputParser::parseInput(m_inputfile); auto input = input::InputParser::parseInput(m_inputfile);
auto enfas = linesToEnfa(input);
auto enfa = merge(enfas);
auto dfa = minimize(mssc(enfa));
back->generateLexer([this](std::string filename) -> std::unique_ptr<std::ostream> { back->generateLexer([this](std::string filename) -> std::unique_ptr<std::ostream> {
return std::unique_ptr<std::ostream>(new std::ofstream(m_outputdir + "/" + filename)); return std::unique_ptr<std::ostream>(new std::ofstream(m_outputdir + "/" + filename));

View File

@ -1,6 +1,4 @@
#include "Lexesis/automata.h"
#include "Lexesis/inputparser.h" #include "Lexesis/inputparser.h"
#include "Lexesis/re.h"
#include <istream> #include <istream>
#include <string> #include <string>
@ -9,21 +7,24 @@
#include <exception> #include <exception>
namespace lxs { namespace lxs {
namespace input {
InputParserException::InputParserException(std::string what): m_what(what) {} InputParserException::InputParserException(std::string what): m_what(what) {}
const char* InputParserException::what() const throw() { const char* InputParserException::what() const throw() {
return m_what.c_str(); return m_what.c_str();
} }
DFA InputParser::parseInput(std::istream &is) { std::vector<std::string> InputParser::getTokens(std::istream& is) {
auto enfavect = parseLines(is); auto lines = parseInput(is);
auto enfas = linesToEnfa(enfavect); std::vector<std::string> tokens;
auto enfa = merge(enfas); for(auto line: lines) {
auto dfa = minimize(mssc(enfa)); tokens.push_back(line.second.first);
return dfa; std::cout << line.second.first << std::endl;
}
return tokens;
} }
std::vector<std::pair<int, std::pair<std::string,std::string> > > InputParser::parseLines(std::istream &is) { std::vector<std::pair<int, std::pair<std::string,std::string>>> InputParser::parseInput(std::istream &is) {
std::string line; std::string line;
std::vector<std::pair<int, std::pair<std::string,std::string> > > result; std::vector<std::pair<int, std::pair<std::string,std::string> > > result;
unsigned int i=0; unsigned int i=0;
@ -49,23 +50,5 @@ namespace lxs {
return result; return result;
} }
std::vector<ENFA> InputParser::linesToEnfa(std::vector<std::pair<int, std::pair<std::string,std::string> > > &input) { }
std::vector<ENFA> result;
for(unsigned int i=0;i<input.size();i++) {
try {
std::shared_ptr<lxs::RE> re = lxs::parseRE(input[i].second.second);
ENFA enfa;
re->toENFA(enfa,0);
enfa.numStates++;
enfa.starting = 0;
enfa.priority[(State) *enfa.accepting.begin()] = (Priority) i;
enfa.acceptingToken[(State) *enfa.accepting.begin()] = input[i].second.first;
result.push_back(enfa);
} catch(SyntaxError &err) {
throw InputParserException("Error when parsing regex on line " + std::to_string(input[i].first) + ":\n\t" + err.what());
}
}
return result;
}
} }