From 272e9f657ab8776c4254b8065cbe49458b9f51cd Mon Sep 17 00:00:00 2001 From: Thomas Ave Date: Tue, 24 May 2016 00:46:23 +0200 Subject: [PATCH] First version of input parser --- include/Lexesis/inputparser.h | 22 ++++++++++++++ src/CMakeLists.txt | 3 +- src/inputparser.cpp | 57 +++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 include/Lexesis/inputparser.h create mode 100644 src/inputparser.cpp diff --git a/include/Lexesis/inputparser.h b/include/Lexesis/inputparser.h new file mode 100644 index 0000000..4a50670 --- /dev/null +++ b/include/Lexesis/inputparser.h @@ -0,0 +1,22 @@ +#pragma once +#ifndef INPUT_PARSER_H +#define INPUT_PARSER_H + +#include +#include +#include + +namespace lxs { + struct DFA; + struct ENFA; + + class InputParser { + public: + static DFA parseInput(std::istream& is); + private: + static std::vector > parseLines(std::istream &is); + static std::vector linesToEnfa(std::vector > &input); + }; +} + +#endif // INPUT_PARSER_H diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 32206e6..6ba0bb2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,4 +2,5 @@ add_executable(Lexesis main.cpp automata.cpp re.cpp - ) + inputparser.cpp +) diff --git a/src/inputparser.cpp b/src/inputparser.cpp new file mode 100644 index 0000000..a2f8b3e --- /dev/null +++ b/src/inputparser.cpp @@ -0,0 +1,57 @@ +#include "Lexesis/automata.h" +#include "Lexesis/inputparser.h" +#include "Lexesis/re.h" + +#include +#include +#include +#include + +namespace lxs { + DFA InputParser::parseInput(std::istream &is) { + auto enfavect = parseLines(is); + auto enfas = linesToEnfa(enfavect); + auto enfa = merge(enfas); + for(auto tokens: enfa.acceptingToken) { + std::cout << tokens.first << "\t" << tokens.second << std::endl; + } + for(auto tokens: enfa.priority) { + std::cout << tokens.first << "\t" << tokens.second << std::endl; + } + auto dfa = mssc(enfa); + dfa = minimize(dfa); + return dfa; + } + + std::vector > InputParser::parseLines(std::istream &is) { + std::string line; + std::vector > result; + while(std::getline(is,line)) { + int loc = line.find_first_of('='); + int start = line.find_first_not_of(" "); + std::string tokenname = line.substr(start, loc); + int end = tokenname.find_last_not_of(" "); + tokenname = tokenname.substr(0,end + 1); + std::string regex = line.substr(loc+1); + start = regex.find_first_not_of(" "); + regex = regex.substr(start); + result.push_back(std::make_pair(tokenname,regex)); + } + return result; + } + std::vector InputParser::linesToEnfa(std::vector > &input) { + std::vector result; + for(unsigned int i=0;i re = lxs::parseRE(input[i].second); + ENFA enfa; + re->toENFA(enfa,0); + enfa.numStates++; + enfa.starting = 0; + enfa.priority.insert(std::make_pair((State) *enfa.accepting.begin(),(Priority) i)); + enfa.acceptingToken.insert(std::make_pair((State) *enfa.accepting.begin(),input[i].first)); + result.push_back(enfa); + } + + return result; + } +}