From c1befc5a768de1c97dc7fe6c6d87dfdd89600e17 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Av=C3=A9?= <email@thomasave.be>
Date: Thu, 19 Jan 2017 19:10:07 +0100
Subject: [PATCH] Moved inputparsing to seperate library and moved some work to
 the driver

---
 include/Lexesis/inputparser.h | 28 ++++++++++++------------
 src/CMakeLists.txt            |  9 +++++---
 src/driver.cpp                | 25 +++++++++++++++++++--
 src/inputparser.cpp           | 41 ++++++++++-------------------------
 4 files changed, 55 insertions(+), 48 deletions(-)

diff --git a/include/Lexesis/inputparser.h b/include/Lexesis/inputparser.h
index 822aba8..e90125b 100644
--- a/include/Lexesis/inputparser.h
+++ b/include/Lexesis/inputparser.h
@@ -7,9 +7,7 @@
 #include <string>
 
 namespace lxs {
-    struct DFA;
-    struct ENFA; 
-
+namespace input {
 
     /**
      * Used for parsing token rules
@@ -17,19 +15,21 @@ namespace lxs {
     class InputParser {
         public:
             /**
-             * parse the tokens rules read from `is` and return the minimized constructed dfa from those rules
+             * parse the tokens rules read from `is`
+             *
+             * std::vector<std::pair<int, std::pair<std::string,std::string>>>
+             *                     <line number, <tokenname, regex> >
              */
-            static DFA parseInput(std::istream& is);
-        private:
+            static std::vector<std::pair<int, std::pair<std::string,std::string>>> parseInput(std::istream& is);
+            
             /**
-             * parse the lines and return pairs of (Token type, regex)
+             * Get a list of the tokens specified in the istream 
+             *
+             * @param is The istream to get the tokens from
+             *
+             * @return the list of tokens
              */
-            static std::vector<std::pair<int, std::pair<std::string,std::string> > > parseLines(std::istream &is);
-
-            /**
-             * Convert the lines from `parseLines` to ENFA's
-             */
-            static std::vector<ENFA> linesToEnfa(std::vector<std::pair<int, std::pair<std::string,std::string> > > &input);
+            static std::vector<std::string> getTokens(std::istream& is);
     };
     
     /**
@@ -45,5 +45,5 @@ namespace lxs {
         std::string m_what;
     };
 }
-
+}
 #endif // INPUT_PARSER_H
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 8253ed2..091a932 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -12,20 +12,23 @@ add_library(lxs
     driver.cpp
     RegexLexer.cpp
     re.cpp
-    inputparser.cpp
     template.cpp
     )
 
+add_library(lxsinput
+    inputparser.cpp
+    )
+
 add_executable(Lexesis
     main.cpp
     )
-target_link_libraries(Lexesis Lexesis-backends lxs mstch::mstch)
+target_link_libraries(Lexesis Lexesis-backends lxs lxsinput mstch::mstch)
 
 if (CMAKE_BUILD_TYPE MATCHES Debug)
     add_executable(Lexesis-test
         test.cpp
         )
-    target_link_libraries(Lexesis-test Lexesis-backends lxs mstch::mstch)
+    target_link_libraries(Lexesis-test Lexesis-backends lxs lxsinput mstch::mstch)
 endif()
 
 install(TARGETS Lexesis
diff --git a/src/driver.cpp b/src/driver.cpp
index d1e1e70..0338217 100644
--- a/src/driver.cpp
+++ b/src/driver.cpp
@@ -1,5 +1,7 @@
 #include "Lexesis/driver.h"
 #include "Lexesis/inputparser.h"
+#include "Lexesis/automata.h"
+#include "Lexesis/re.h"
 
 #include <iostream>
 #include <fstream>
@@ -16,6 +18,22 @@ namespace {
         }
         return s;
     }
+    
+    std::vector<lxs::ENFA> linesToEnfa(std::vector<std::pair<int, std::pair<std::string,std::string> > > &input) {
+        std::vector<lxs::ENFA> result;
+        for(unsigned int i=0;i<input.size();i++) {
+                std::shared_ptr<lxs::RE> re = lxs::parseRE(input[i].second.second);
+                lxs::ENFA enfa;
+                re->toENFA(enfa,0);
+                enfa.numStates++;
+                enfa.starting = 0;
+                enfa.priority[(lxs::State) *enfa.accepting.begin()] = (lxs::Priority) i; 
+                enfa.acceptingToken[(lxs::State) *enfa.accepting.begin()] = input[i].second.first;
+                result.push_back(enfa);
+        }
+
+        return result;
+    }
 }
 
 namespace lxs {
@@ -40,8 +58,11 @@ namespace lxs {
         if (!m_lexername.length()) throw DriverException("no valid lexer name possible");
         Backend* back = m_backends->findBackendForLang(m_language);
         if (!back) throw DriverException("Could not find a valid backend for language " + m_language );
-
-        DFA dfa = InputParser::parseInput(m_inputfile);
+        
+        auto input = input::InputParser::parseInput(m_inputfile);
+        auto enfas = linesToEnfa(input);
+        auto enfa = merge(enfas);
+        auto dfa = minimize(mssc(enfa));
 
         back->generateLexer([this](std::string filename) -> std::unique_ptr<std::ostream> {
                     return std::unique_ptr<std::ostream>(new std::ofstream(m_outputdir + "/" + filename));
diff --git a/src/inputparser.cpp b/src/inputparser.cpp
index 87bc3ae..8d4c970 100644
--- a/src/inputparser.cpp
+++ b/src/inputparser.cpp
@@ -1,6 +1,4 @@
-#include "Lexesis/automata.h"
 #include "Lexesis/inputparser.h"
-#include "Lexesis/re.h"
 
 #include <istream>
 #include <string>
@@ -9,21 +7,24 @@
 #include <exception>
 
 namespace lxs {
+namespace input {
 
     InputParserException::InputParserException(std::string what): m_what(what) {} 
     const char* InputParserException::what() const throw() {
             return m_what.c_str();
         }
-
-    DFA InputParser::parseInput(std::istream &is) {
-        auto enfavect = parseLines(is);
-        auto enfas = linesToEnfa(enfavect);
-        auto enfa = merge(enfas);
-        auto dfa = minimize(mssc(enfa));
-        return dfa;
+    
+    std::vector<std::string> InputParser::getTokens(std::istream& is) {
+        auto lines = parseInput(is);
+        std::vector<std::string> tokens;
+        for(auto line: lines) {
+            tokens.push_back(line.second.first);
+            std::cout << line.second.first << std::endl;
+        }
+        return tokens;
     }
 
-    std::vector<std::pair<int, std::pair<std::string,std::string> > > InputParser::parseLines(std::istream &is) {
+    std::vector<std::pair<int, std::pair<std::string,std::string>>> InputParser::parseInput(std::istream &is) {
         std::string line;
         std::vector<std::pair<int, std::pair<std::string,std::string> > > result;
         unsigned int i=0;
@@ -49,23 +50,5 @@ namespace lxs {
         return result;
     }
 
-    std::vector<ENFA> InputParser::linesToEnfa(std::vector<std::pair<int, std::pair<std::string,std::string> > > &input) {
-        std::vector<ENFA> result;
-        for(unsigned int i=0;i<input.size();i++) {
-            try {
-                std::shared_ptr<lxs::RE> re = lxs::parseRE(input[i].second.second);
-                ENFA enfa;
-                re->toENFA(enfa,0);
-                enfa.numStates++;
-                enfa.starting = 0;
-                enfa.priority[(State) *enfa.accepting.begin()] = (Priority) i; 
-                enfa.acceptingToken[(State) *enfa.accepting.begin()] = input[i].second.first;
-                result.push_back(enfa);
-            } catch(SyntaxError &err) {
-                throw InputParserException("Error when parsing regex on line " + std::to_string(input[i].first) + ":\n\t" + err.what());
-            }
-        }
-
-        return result;
-    }
+}
 }