#include "Lexesis/automata.h" #include "Lexesis/inputparser.h" #include "Lexesis/re.h" #include #include #include #include namespace lxs { DFA InputParser::parseInput(std::istream &is) { auto enfavect = parseLines(is); auto enfas = linesToEnfa(enfavect); auto enfa = merge(enfas); auto dfa = minimize(mssc(enfa)); return dfa; } std::vector > InputParser::parseLines(std::istream &is) { std::string line; std::vector > result; unsigned int i=0; while(std::getline(is,line)) { i++; size_t start = line.find_first_not_of(" \t\v\f\r"); if(line.length() == 0 || (start != std::string::npos && line.length() > start && line[start] == '#')) continue; std::size_t loc = line.find_first_of('='); if(loc == std::string::npos) { std::cerr << "Invalid syntax on line " << i << ": no '=' found!" << std::endl; exit(1); } if(start == loc) { std::cerr << "Invalid syntax on line " << i << ": no valid tokenname specified!" << std::endl; exit(1); } std::string tokenname = line.substr(start, loc); if(tokenname.length() == 0) { std::cerr << "Invalid syntax on line " << i << ": no valid tokenname specified!" << std::endl; exit(1); } std::size_t end = tokenname.find_last_not_of(" \t\v\f\r"); tokenname = tokenname.substr(0,end + 1); std::string regex = line.substr(loc+1); if(regex.length() == 0) { std::cerr << "Invalid syntax on line " << i << ": no valid regex specified!" << std::endl; exit(1); } start = regex.find_first_not_of(" \t\v\f\r"); if(start == std::string::npos) { std::cerr << "Invalid syntax on line " << i << ": no valid regex specified!" << std::endl; exit(1); } regex = regex.substr(start); result.push_back(std::make_pair(tokenname,regex)); } if(result.size() == 0) { std::cerr << "No valid rules found in the input file!" << std::endl; exit(1); } return result; } std::vector InputParser::linesToEnfa(std::vector > &input) { std::vector result; for(unsigned int i=0;i re = lxs::parseRE(input[i].second); ENFA enfa; re->toENFA(enfa,0); enfa.numStates++; enfa.starting = 0; enfa.priority[(State) *enfa.accepting.begin()] = (Priority) i; enfa.acceptingToken[(State) *enfa.accepting.begin()] = input[i].first; result.push_back(enfa); } return result; } }