diff --git a/include/Lexesis/inputparser.h b/include/Lexesis/inputparser.h new file mode 100644 index 0000000..4a50670 --- /dev/null +++ b/include/Lexesis/inputparser.h @@ -0,0 +1,22 @@ +#pragma once +#ifndef INPUT_PARSER_H +#define INPUT_PARSER_H + +#include <istream> +#include <vector> +#include <string> + +namespace lxs { + struct DFA; + struct ENFA; + + class InputParser { + public: + static DFA parseInput(std::istream& is); + private: + static std::vector<std::pair<std::string,std::string> > parseLines(std::istream &is); + static std::vector<ENFA> linesToEnfa(std::vector<std::pair<std::string,std::string> > &input); + }; +} + +#endif // INPUT_PARSER_H diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 32206e6..6ba0bb2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,4 +2,5 @@ add_executable(Lexesis main.cpp automata.cpp re.cpp - ) + inputparser.cpp +) diff --git a/src/inputparser.cpp b/src/inputparser.cpp new file mode 100644 index 0000000..a2f8b3e --- /dev/null +++ b/src/inputparser.cpp @@ -0,0 +1,57 @@ +#include "Lexesis/automata.h" +#include "Lexesis/inputparser.h" +#include "Lexesis/re.h" + +#include <istream> +#include <string> +#include <iostream> +#include <memory> + +namespace lxs { + DFA InputParser::parseInput(std::istream &is) { + auto enfavect = parseLines(is); + auto enfas = linesToEnfa(enfavect); + auto enfa = merge(enfas); + for(auto tokens: enfa.acceptingToken) { + std::cout << tokens.first << "\t" << tokens.second << std::endl; + } + for(auto tokens: enfa.priority) { + std::cout << tokens.first << "\t" << tokens.second << std::endl; + } + auto dfa = mssc(enfa); + dfa = minimize(dfa); + return dfa; + } + + std::vector<std::pair<std::string,std::string> > InputParser::parseLines(std::istream &is) { + std::string line; + std::vector<std::pair<std::string,std::string> > result; + while(std::getline(is,line)) { + int loc = line.find_first_of('='); + int start = line.find_first_not_of(" "); + std::string tokenname = line.substr(start, loc); + int end = tokenname.find_last_not_of(" "); + tokenname = tokenname.substr(0,end + 1); + std::string regex = line.substr(loc+1); + start = regex.find_first_not_of(" "); + regex = regex.substr(start); + result.push_back(std::make_pair(tokenname,regex)); + } + return result; + } + std::vector<ENFA> InputParser::linesToEnfa(std::vector<std::pair<std::string,std::string> > &input) { + std::vector<ENFA> result; + for(unsigned int i=0;i<input.size();i++) { + std::shared_ptr<lxs::RE> re = lxs::parseRE(input[i].second); + ENFA enfa; + re->toENFA(enfa,0); + enfa.numStates++; + enfa.starting = 0; + enfa.priority.insert(std::make_pair((State) *enfa.accepting.begin(),(Priority) i)); + enfa.acceptingToken.insert(std::make_pair((State) *enfa.accepting.begin(),input[i].first)); + result.push_back(enfa); + } + + return result; + } +}