Import re.cpp and modify ENFA generation to use new ENFA structures

This commit is contained in:
Robin Jadoul 2016-04-24 16:00:18 +02:00
parent 3f21513ac5
commit c80fcdb8aa
1 changed files with 206 additions and 0 deletions

206
src/re.cpp Normal file
View File

@ -0,0 +1,206 @@
#include "Lexesis/re.h"
#include <algorithm>
#include <iostream>
#include <stack>
using namespace std;
namespace lxs {
string EmptyRE::toRe()
{
return "";
}
State EmptyRE::toENFA(ENFA& enfa, State attach)
{
enfa.numStates = std::max(attach + 1, enfa.numStates);
enfa.accepting.clear();
enfa.accepting.insert(attach + 1);
return attach + 1;
}
string EpsilonRE::toRe()
{
return "ε";
}
State EpsilonRE::toENFA(ENFA& enfa, State attach)
{
enfa.numStates = std::max(attach + 1, enfa.numStates);
enfa.accepting.clear();
enfa.accepting.insert(attach + 1);
enfa.epsilonTransitions[attach].insert(attach + 1);
return attach + 1;
}
string SingleRE::toRe()
{
return string(1, c);
}
State SingleRE::toENFA(ENFA& enfa, State attach)
{
enfa.numStates = std::max(attach + 1, enfa.numStates);
enfa.accepting.clear();
enfa.accepting.insert(attach + 1);
enfa.delta[attach][c].insert(attach + 1);
return attach + 1;
}
string ConcatRE::toRe()
{
return e->toRe() + f->toRe();
}
State ConcatRE::toENFA(ENFA& enfa, State attach)
{
State a = e->toENFA(enfa, attach);
enfa.epsilonTransitions[a].insert(a + 1);
return f->toENFA(enfa, a + 1);
}
string StarRE::toRe()
{
return "(" + e->toRe() + ")*";
}
State StarRE::toENFA(ENFA& enfa, State attach)
{
State a = e->toENFA(enfa, attach + 1);
enfa.numStates = std::max(a + 1, enfa.numStates);
enfa.accepting.clear();
enfa.accepting.insert(a + 1);
enfa.epsilonTransitions[attach].insert(attach + 1);
enfa.epsilonTransitions[attach].insert(a + 1);
enfa.epsilonTransitions[a].insert(attach + 1);
enfa.epsilonTransitions[a].insert(a + 1);
return a + 1;
}
string PlusRE::toRe()
{
return "(" + e->toRe() + "+" + f->toRe() + ")";
}
State PlusRE::toENFA(ENFA& enfa, State attach)
{
State a = e->toENFA(enfa, attach + 1);
State b = f->toENFA(enfa, a + 1);
enfa.numStates = std::max(enfa.numStates, b + 1);
enfa.epsilonTransitions[attach].insert(attach + 1);
enfa.epsilonTransitions[attach].insert(a + 1);
enfa.epsilonTransitions[a].insert(b + 1);
enfa.epsilonTransitions[b].insert(b + 1);
enfa.accepting.clear();
enfa.accepting.insert(b + 1);
return b + 1;
}
static void compress(stack<RE*>& stk)
{
RE* a = stk.top();
stk.pop();
RE* b = stk.top();
stk.pop();
stk.push(new ConcatRE(b, a)); //Attention: reversed order because of stack
}
static void compactStack(stack<RE*>& stk)
{
if (stk.empty()) return;
RE* tp = stk.top();
stk.pop();
while (stk.size() >= 2)
{
compress(stk);
}
stk.push(tp);
}
namespace {
RE* parseRE(string& input, size_t& idx)
{
stack<RE*> stk;
for (; idx < input.length(); idx++)
{
RE* n;
switch (input[idx])
{
case '\n':
if (idx != input.size() - 1)
throw SyntaxError("Cannot have a newline inside of a regex");
break;
case '\\':
idx++;
if (idx >= input.length())
throw SyntaxError("Escape sequence at the end of the string");
if (input[idx] == 'e')
stk.push(new EpsilonRE());
else if (input[idx] == 'E')
stk.push(new EmptyRE());
else if (input[idx] == '\\' || input[idx] == '*' || input[idx] == '+' || input[idx] == '(' || input[idx] == ')')
stk.push(new SingleRE(input[idx]));
else
throw SyntaxError(("invalid escape sequence: \\" + string(1, input[idx])).c_str());
break;
case '*':
if (stk.empty())
throw SyntaxError("Cannot apply kleene star to empty regex");
n = new StarRE(stk.top());
stk.pop();
stk.push(n);
break;
case '+':
if (stk.empty())
throw SyntaxError("Invalid regex: nothing to the left of '+'");
if (stk.size() > 1)
compactStack(stk), compress(stk);
n = new PlusRE(nullptr, nullptr);
((PlusRE*) n)->e = stk.top();
((PlusRE*) n)->f = parseRE(input, ++idx);
stk.pop();
stk.push(n);
idx--;
break;
case '(':
n = parseRE(input, ++idx);
if (idx >= input.size() || input[idx] != ')')
throw SyntaxError("Could not parse regex, unclosed parentheses");
stk.push(n);
break;
case ')':
if (stk.size() == 1)
return stk.top();
else if (stk.size() == 2)
return compress(stk), stk.top();
throw SyntaxError("Could not parse regex, nothing inside parentheses");
default:
stk.push(new SingleRE(input[idx]));
}
compactStack(stk);
}
if (stk.size() == 1)
return stk.top();
else if (stk.size() == 2)
return compress(stk), stk.top();
throw SyntaxError("Could not parse regex");
}
}
RE* parseRE(string& input)
{
size_t i = 0;
RE* res = parseRE(input, i);
if (i < input.length() - 1)
throw SyntaxError("Incorrect regex");
return res;
}
}