From c1befc5a768de1c97dc7fe6c6d87dfdd89600e17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Av=C3=A9?= <email@thomasave.be> Date: Thu, 19 Jan 2017 19:10:07 +0100 Subject: [PATCH] Moved inputparsing to seperate library and moved some work to the driver --- include/Lexesis/inputparser.h | 28 ++++++++++++------------ src/CMakeLists.txt | 9 +++++--- src/driver.cpp | 25 +++++++++++++++++++-- src/inputparser.cpp | 41 ++++++++++------------------------- 4 files changed, 55 insertions(+), 48 deletions(-) diff --git a/include/Lexesis/inputparser.h b/include/Lexesis/inputparser.h index 822aba8..e90125b 100644 --- a/include/Lexesis/inputparser.h +++ b/include/Lexesis/inputparser.h @@ -7,9 +7,7 @@ #include <string> namespace lxs { - struct DFA; - struct ENFA; - +namespace input { /** * Used for parsing token rules @@ -17,19 +15,21 @@ namespace lxs { class InputParser { public: /** - * parse the tokens rules read from `is` and return the minimized constructed dfa from those rules + * parse the tokens rules read from `is` + * + * std::vector<std::pair<int, std::pair<std::string,std::string>>> + * <line number, <tokenname, regex> > */ - static DFA parseInput(std::istream& is); - private: + static std::vector<std::pair<int, std::pair<std::string,std::string>>> parseInput(std::istream& is); + /** - * parse the lines and return pairs of (Token type, regex) + * Get a list of the tokens specified in the istream + * + * @param is The istream to get the tokens from + * + * @return the list of tokens */ - static std::vector<std::pair<int, std::pair<std::string,std::string> > > parseLines(std::istream &is); - - /** - * Convert the lines from `parseLines` to ENFA's - */ - static std::vector<ENFA> linesToEnfa(std::vector<std::pair<int, std::pair<std::string,std::string> > > &input); + static std::vector<std::string> getTokens(std::istream& is); }; /** @@ -45,5 +45,5 @@ namespace lxs { std::string m_what; }; } - +} #endif // INPUT_PARSER_H diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8253ed2..091a932 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -12,20 +12,23 @@ add_library(lxs driver.cpp RegexLexer.cpp re.cpp - inputparser.cpp template.cpp ) +add_library(lxsinput + inputparser.cpp + ) + add_executable(Lexesis main.cpp ) -target_link_libraries(Lexesis Lexesis-backends lxs mstch::mstch) +target_link_libraries(Lexesis Lexesis-backends lxs lxsinput mstch::mstch) if (CMAKE_BUILD_TYPE MATCHES Debug) add_executable(Lexesis-test test.cpp ) - target_link_libraries(Lexesis-test Lexesis-backends lxs mstch::mstch) + target_link_libraries(Lexesis-test Lexesis-backends lxs lxsinput mstch::mstch) endif() install(TARGETS Lexesis diff --git a/src/driver.cpp b/src/driver.cpp index d1e1e70..0338217 100644 --- a/src/driver.cpp +++ b/src/driver.cpp @@ -1,5 +1,7 @@ #include "Lexesis/driver.h" #include "Lexesis/inputparser.h" +#include "Lexesis/automata.h" +#include "Lexesis/re.h" #include <iostream> #include <fstream> @@ -16,6 +18,22 @@ namespace { } return s; } + + std::vector<lxs::ENFA> linesToEnfa(std::vector<std::pair<int, std::pair<std::string,std::string> > > &input) { + std::vector<lxs::ENFA> result; + for(unsigned int i=0;i<input.size();i++) { + std::shared_ptr<lxs::RE> re = lxs::parseRE(input[i].second.second); + lxs::ENFA enfa; + re->toENFA(enfa,0); + enfa.numStates++; + enfa.starting = 0; + enfa.priority[(lxs::State) *enfa.accepting.begin()] = (lxs::Priority) i; + enfa.acceptingToken[(lxs::State) *enfa.accepting.begin()] = input[i].second.first; + result.push_back(enfa); + } + + return result; + } } namespace lxs { @@ -40,8 +58,11 @@ namespace lxs { if (!m_lexername.length()) throw DriverException("no valid lexer name possible"); Backend* back = m_backends->findBackendForLang(m_language); if (!back) throw DriverException("Could not find a valid backend for language " + m_language ); - - DFA dfa = InputParser::parseInput(m_inputfile); + + auto input = input::InputParser::parseInput(m_inputfile); + auto enfas = linesToEnfa(input); + auto enfa = merge(enfas); + auto dfa = minimize(mssc(enfa)); back->generateLexer([this](std::string filename) -> std::unique_ptr<std::ostream> { return std::unique_ptr<std::ostream>(new std::ofstream(m_outputdir + "/" + filename)); diff --git a/src/inputparser.cpp b/src/inputparser.cpp index 87bc3ae..8d4c970 100644 --- a/src/inputparser.cpp +++ b/src/inputparser.cpp @@ -1,6 +1,4 @@ -#include "Lexesis/automata.h" #include "Lexesis/inputparser.h" -#include "Lexesis/re.h" #include <istream> #include <string> @@ -9,21 +7,24 @@ #include <exception> namespace lxs { +namespace input { InputParserException::InputParserException(std::string what): m_what(what) {} const char* InputParserException::what() const throw() { return m_what.c_str(); } - - DFA InputParser::parseInput(std::istream &is) { - auto enfavect = parseLines(is); - auto enfas = linesToEnfa(enfavect); - auto enfa = merge(enfas); - auto dfa = minimize(mssc(enfa)); - return dfa; + + std::vector<std::string> InputParser::getTokens(std::istream& is) { + auto lines = parseInput(is); + std::vector<std::string> tokens; + for(auto line: lines) { + tokens.push_back(line.second.first); + std::cout << line.second.first << std::endl; + } + return tokens; } - std::vector<std::pair<int, std::pair<std::string,std::string> > > InputParser::parseLines(std::istream &is) { + std::vector<std::pair<int, std::pair<std::string,std::string>>> InputParser::parseInput(std::istream &is) { std::string line; std::vector<std::pair<int, std::pair<std::string,std::string> > > result; unsigned int i=0; @@ -49,23 +50,5 @@ namespace lxs { return result; } - std::vector<ENFA> InputParser::linesToEnfa(std::vector<std::pair<int, std::pair<std::string,std::string> > > &input) { - std::vector<ENFA> result; - for(unsigned int i=0;i<input.size();i++) { - try { - std::shared_ptr<lxs::RE> re = lxs::parseRE(input[i].second.second); - ENFA enfa; - re->toENFA(enfa,0); - enfa.numStates++; - enfa.starting = 0; - enfa.priority[(State) *enfa.accepting.begin()] = (Priority) i; - enfa.acceptingToken[(State) *enfa.accepting.begin()] = input[i].second.first; - result.push_back(enfa); - } catch(SyntaxError &err) { - throw InputParserException("Error when parsing regex on line " + std::to_string(input[i].first) + ":\n\t" + err.what()); - } - } - - return result; - } +} }