79 lines
3.0 KiB
C++
79 lines
3.0 KiB
C++
#include "Lexesis/automata.h"
|
|
#include "Lexesis/inputparser.h"
|
|
#include "Lexesis/re.h"
|
|
|
|
#include <istream>
|
|
#include <string>
|
|
#include <iostream>
|
|
#include <memory>
|
|
|
|
namespace lxs {
|
|
DFA InputParser::parseInput(std::istream &is) {
|
|
auto enfavect = parseLines(is);
|
|
auto enfas = linesToEnfa(enfavect);
|
|
auto enfa = merge(enfas);
|
|
auto dfa = minimize(mssc(enfa));
|
|
return dfa;
|
|
}
|
|
|
|
std::vector<std::pair<std::string,std::string> > InputParser::parseLines(std::istream &is) {
|
|
std::string line;
|
|
std::vector<std::pair<std::string,std::string> > result;
|
|
unsigned int i=0;
|
|
while(std::getline(is,line)) {
|
|
i++;
|
|
size_t start = line.find_first_not_of(" \t\v\f\r");
|
|
if(line.length() == 0 || (start != std::string::npos && line.length() > start && line[start] == '#')) continue;
|
|
std::size_t loc = line.find_first_of('=');
|
|
if(loc == std::string::npos) {
|
|
std::cerr << "Invalid syntax on line " << i << ": no '=' found!" << std::endl;
|
|
exit(1);
|
|
}
|
|
if(start == loc) {
|
|
std::cerr << "Invalid syntax on line " << i << ": no valid tokenname specified!" << std::endl;
|
|
exit(1);
|
|
}
|
|
std::string tokenname = line.substr(start, loc);
|
|
if(tokenname.length() == 0) {
|
|
std::cerr << "Invalid syntax on line " << i << ": no valid tokenname specified!" << std::endl;
|
|
exit(1);
|
|
}
|
|
std::size_t end = tokenname.find_last_not_of(" \t\v\f\r");
|
|
tokenname = tokenname.substr(0,end + 1);
|
|
std::string regex = line.substr(loc+1);
|
|
if(regex.length() == 0) {
|
|
std::cerr << "Invalid syntax on line " << i << ": no valid regex specified!" << std::endl;
|
|
exit(1);
|
|
}
|
|
start = regex.find_first_not_of(" \t\v\f\r");
|
|
if(start == std::string::npos) {
|
|
std::cerr << "Invalid syntax on line " << i << ": no valid regex specified!" << std::endl;
|
|
exit(1);
|
|
}
|
|
regex = regex.substr(start);
|
|
result.push_back(std::make_pair(tokenname,regex));
|
|
}
|
|
if(result.size() == 0) {
|
|
std::cerr << "No valid rules found in the input file!" << std::endl;
|
|
exit(1);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
std::vector<ENFA> InputParser::linesToEnfa(std::vector<std::pair<std::string,std::string> > &input) {
|
|
std::vector<ENFA> result;
|
|
for(unsigned int i=0;i<input.size();i++) {
|
|
std::shared_ptr<lxs::RE> re = lxs::parseRE(input[i].second);
|
|
ENFA enfa;
|
|
re->toENFA(enfa,0);
|
|
enfa.numStates++;
|
|
enfa.starting = 0;
|
|
enfa.priority[(State) *enfa.accepting.begin()] = (Priority) i;
|
|
enfa.acceptingToken[(State) *enfa.accepting.begin()] = input[i].first;
|
|
result.push_back(enfa);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
}
|