Moved inputparsing to seperate library and moved some work to the driver
This commit is contained in:
parent
27ab8dc6da
commit
c1befc5a76
|
@ -7,9 +7,7 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
namespace lxs {
|
namespace lxs {
|
||||||
struct DFA;
|
namespace input {
|
||||||
struct ENFA;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Used for parsing token rules
|
* Used for parsing token rules
|
||||||
|
@ -17,19 +15,21 @@ namespace lxs {
|
||||||
class InputParser {
|
class InputParser {
|
||||||
public:
|
public:
|
||||||
/**
|
/**
|
||||||
* parse the tokens rules read from `is` and return the minimized constructed dfa from those rules
|
* parse the tokens rules read from `is`
|
||||||
|
*
|
||||||
|
* std::vector<std::pair<int, std::pair<std::string,std::string>>>
|
||||||
|
* <line number, <tokenname, regex> >
|
||||||
*/
|
*/
|
||||||
static DFA parseInput(std::istream& is);
|
static std::vector<std::pair<int, std::pair<std::string,std::string>>> parseInput(std::istream& is);
|
||||||
private:
|
|
||||||
/**
|
|
||||||
* parse the lines and return pairs of (Token type, regex)
|
|
||||||
*/
|
|
||||||
static std::vector<std::pair<int, std::pair<std::string,std::string> > > parseLines(std::istream &is);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convert the lines from `parseLines` to ENFA's
|
* Get a list of the tokens specified in the istream
|
||||||
|
*
|
||||||
|
* @param is The istream to get the tokens from
|
||||||
|
*
|
||||||
|
* @return the list of tokens
|
||||||
*/
|
*/
|
||||||
static std::vector<ENFA> linesToEnfa(std::vector<std::pair<int, std::pair<std::string,std::string> > > &input);
|
static std::vector<std::string> getTokens(std::istream& is);
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -45,5 +45,5 @@ namespace lxs {
|
||||||
std::string m_what;
|
std::string m_what;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif // INPUT_PARSER_H
|
#endif // INPUT_PARSER_H
|
||||||
|
|
|
@ -12,20 +12,23 @@ add_library(lxs
|
||||||
driver.cpp
|
driver.cpp
|
||||||
RegexLexer.cpp
|
RegexLexer.cpp
|
||||||
re.cpp
|
re.cpp
|
||||||
inputparser.cpp
|
|
||||||
template.cpp
|
template.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
|
add_library(lxsinput
|
||||||
|
inputparser.cpp
|
||||||
|
)
|
||||||
|
|
||||||
add_executable(Lexesis
|
add_executable(Lexesis
|
||||||
main.cpp
|
main.cpp
|
||||||
)
|
)
|
||||||
target_link_libraries(Lexesis Lexesis-backends lxs mstch::mstch)
|
target_link_libraries(Lexesis Lexesis-backends lxs lxsinput mstch::mstch)
|
||||||
|
|
||||||
if (CMAKE_BUILD_TYPE MATCHES Debug)
|
if (CMAKE_BUILD_TYPE MATCHES Debug)
|
||||||
add_executable(Lexesis-test
|
add_executable(Lexesis-test
|
||||||
test.cpp
|
test.cpp
|
||||||
)
|
)
|
||||||
target_link_libraries(Lexesis-test Lexesis-backends lxs mstch::mstch)
|
target_link_libraries(Lexesis-test Lexesis-backends lxs lxsinput mstch::mstch)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
install(TARGETS Lexesis
|
install(TARGETS Lexesis
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#include "Lexesis/driver.h"
|
#include "Lexesis/driver.h"
|
||||||
#include "Lexesis/inputparser.h"
|
#include "Lexesis/inputparser.h"
|
||||||
|
#include "Lexesis/automata.h"
|
||||||
|
#include "Lexesis/re.h"
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
|
@ -16,6 +18,22 @@ namespace {
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<lxs::ENFA> linesToEnfa(std::vector<std::pair<int, std::pair<std::string,std::string> > > &input) {
|
||||||
|
std::vector<lxs::ENFA> result;
|
||||||
|
for(unsigned int i=0;i<input.size();i++) {
|
||||||
|
std::shared_ptr<lxs::RE> re = lxs::parseRE(input[i].second.second);
|
||||||
|
lxs::ENFA enfa;
|
||||||
|
re->toENFA(enfa,0);
|
||||||
|
enfa.numStates++;
|
||||||
|
enfa.starting = 0;
|
||||||
|
enfa.priority[(lxs::State) *enfa.accepting.begin()] = (lxs::Priority) i;
|
||||||
|
enfa.acceptingToken[(lxs::State) *enfa.accepting.begin()] = input[i].second.first;
|
||||||
|
result.push_back(enfa);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace lxs {
|
namespace lxs {
|
||||||
|
@ -41,7 +59,10 @@ namespace lxs {
|
||||||
Backend* back = m_backends->findBackendForLang(m_language);
|
Backend* back = m_backends->findBackendForLang(m_language);
|
||||||
if (!back) throw DriverException("Could not find a valid backend for language " + m_language );
|
if (!back) throw DriverException("Could not find a valid backend for language " + m_language );
|
||||||
|
|
||||||
DFA dfa = InputParser::parseInput(m_inputfile);
|
auto input = input::InputParser::parseInput(m_inputfile);
|
||||||
|
auto enfas = linesToEnfa(input);
|
||||||
|
auto enfa = merge(enfas);
|
||||||
|
auto dfa = minimize(mssc(enfa));
|
||||||
|
|
||||||
back->generateLexer([this](std::string filename) -> std::unique_ptr<std::ostream> {
|
back->generateLexer([this](std::string filename) -> std::unique_ptr<std::ostream> {
|
||||||
return std::unique_ptr<std::ostream>(new std::ofstream(m_outputdir + "/" + filename));
|
return std::unique_ptr<std::ostream>(new std::ofstream(m_outputdir + "/" + filename));
|
||||||
|
|
|
@ -1,6 +1,4 @@
|
||||||
#include "Lexesis/automata.h"
|
|
||||||
#include "Lexesis/inputparser.h"
|
#include "Lexesis/inputparser.h"
|
||||||
#include "Lexesis/re.h"
|
|
||||||
|
|
||||||
#include <istream>
|
#include <istream>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
@ -9,21 +7,24 @@
|
||||||
#include <exception>
|
#include <exception>
|
||||||
|
|
||||||
namespace lxs {
|
namespace lxs {
|
||||||
|
namespace input {
|
||||||
|
|
||||||
InputParserException::InputParserException(std::string what): m_what(what) {}
|
InputParserException::InputParserException(std::string what): m_what(what) {}
|
||||||
const char* InputParserException::what() const throw() {
|
const char* InputParserException::what() const throw() {
|
||||||
return m_what.c_str();
|
return m_what.c_str();
|
||||||
}
|
}
|
||||||
|
|
||||||
DFA InputParser::parseInput(std::istream &is) {
|
std::vector<std::string> InputParser::getTokens(std::istream& is) {
|
||||||
auto enfavect = parseLines(is);
|
auto lines = parseInput(is);
|
||||||
auto enfas = linesToEnfa(enfavect);
|
std::vector<std::string> tokens;
|
||||||
auto enfa = merge(enfas);
|
for(auto line: lines) {
|
||||||
auto dfa = minimize(mssc(enfa));
|
tokens.push_back(line.second.first);
|
||||||
return dfa;
|
std::cout << line.second.first << std::endl;
|
||||||
|
}
|
||||||
|
return tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::pair<int, std::pair<std::string,std::string> > > InputParser::parseLines(std::istream &is) {
|
std::vector<std::pair<int, std::pair<std::string,std::string>>> InputParser::parseInput(std::istream &is) {
|
||||||
std::string line;
|
std::string line;
|
||||||
std::vector<std::pair<int, std::pair<std::string,std::string> > > result;
|
std::vector<std::pair<int, std::pair<std::string,std::string> > > result;
|
||||||
unsigned int i=0;
|
unsigned int i=0;
|
||||||
|
@ -49,23 +50,5 @@ namespace lxs {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<ENFA> InputParser::linesToEnfa(std::vector<std::pair<int, std::pair<std::string,std::string> > > &input) {
|
}
|
||||||
std::vector<ENFA> result;
|
|
||||||
for(unsigned int i=0;i<input.size();i++) {
|
|
||||||
try {
|
|
||||||
std::shared_ptr<lxs::RE> re = lxs::parseRE(input[i].second.second);
|
|
||||||
ENFA enfa;
|
|
||||||
re->toENFA(enfa,0);
|
|
||||||
enfa.numStates++;
|
|
||||||
enfa.starting = 0;
|
|
||||||
enfa.priority[(State) *enfa.accepting.begin()] = (Priority) i;
|
|
||||||
enfa.acceptingToken[(State) *enfa.accepting.begin()] = input[i].second.first;
|
|
||||||
result.push_back(enfa);
|
|
||||||
} catch(SyntaxError &err) {
|
|
||||||
throw InputParserException("Error when parsing regex on line " + std::to_string(input[i].first) + ":\n\t" + err.what());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue