Start modification/extension of RE parsing

This commit is contained in:
Robin Jadoul 2016-04-24 16:54:30 +02:00
parent c80fcdb8aa
commit efb88f2519
2 changed files with 66 additions and 60 deletions

View File

@ -4,6 +4,7 @@
#include "Lexesis/automata.h"
#include <memory>
#include <stdexcept>
namespace lxs {
@ -11,7 +12,7 @@ namespace lxs {
{
public:
virtual ~RE() {}
virtual int toENFA(ENFA& enfa, int attach) = 0;
virtual State toENFA(ENFA& enfa, State attach) = 0;
virtual std::string toRe() = 0;
};
@ -20,7 +21,7 @@ namespace lxs {
public:
EmptyRE() {}
~EmptyRE() {}
virtual int toENFA(ENFA& enfa, int attach);
virtual State toENFA(ENFA& enfa, State attach);
virtual std::string toRe();
};
@ -29,7 +30,7 @@ namespace lxs {
public:
EpsilonRE() {}
~EpsilonRE() {}
virtual int toENFA(ENFA& enfa, int attach);
virtual State toENFA(ENFA& enfa, State attach);
virtual std::string toRe();
};
@ -38,7 +39,7 @@ namespace lxs {
public:
SingleRE(char c) : c(c) {}
~SingleRE() {}
virtual int toENFA(ENFA& enfa, int attach);
virtual State toENFA(ENFA& enfa, State attach);
virtual std::string toRe();
char c;
@ -47,37 +48,37 @@ namespace lxs {
class ConcatRE : public RE
{
public:
ConcatRE(RE* e, RE* f) : e(e), f(f) {}
~ConcatRE() {delete e; delete f;}
virtual int toENFA(ENFA& enfa, int attach);
ConcatRE(std::shared_ptr<RE> e, std::shared_ptr<RE> f) : e(e), f(f) {}
~ConcatRE() {}
virtual State toENFA(ENFA& enfa, State attach);
virtual std::string toRe();
RE* e, *f;
std::shared_ptr<RE> e, f;
};
class StarRE : public RE
{
public:
StarRE(RE* e) : e(e) {}
~StarRE() {delete e;}
virtual int toENFA(ENFA& enfa, int attach);
StarRE(std::shared_ptr<RE> e) : e(e) {}
~StarRE() {}
virtual State toENFA(ENFA& enfa, State attach);
virtual std::string toRe();
RE* e;
std::shared_ptr<RE> e;
};
class PlusRE : public RE
{
public:
PlusRE(RE* e, RE* f) : e(e), f(f) {}
~PlusRE() {delete e; delete f;}
virtual int toENFA(ENFA& enfa, int attach);
PlusRE(std::shared_ptr<RE> e, std::shared_ptr<RE> f) : e(e), f(f) {}
~PlusRE() {}
virtual State toENFA(ENFA& enfa, State attach);
virtual std::string toRe();
RE* e, *f;
std::shared_ptr<RE> e, f;
};
RE* parseRE(std::string& input);
std::shared_ptr<RE> parseRE(std::string& input);
class SyntaxError : public std::runtime_error
{

View File

@ -97,51 +97,41 @@ namespace lxs {
return b + 1;
}
static void compress(stack<RE*>& stk)
{
RE* a = stk.top();
stk.pop();
RE* b = stk.top();
stk.pop();
stk.push(new ConcatRE(b, a)); //Attention: reversed order because of stack
}
static void compactStack(stack<RE*>& stk)
{
if (stk.empty()) return;
RE* tp = stk.top();
stk.pop();
while (stk.size() >= 2)
{
compress(stk);
}
stk.push(tp);
}
namespace {
RE* parseRE(string& input, size_t& idx)
void compress(stack<std::shared_ptr<RE>>& stk)
{
stack<RE*> stk;
std::shared_ptr<RE> a = stk.top();
stk.pop();
std::shared_ptr<RE> b = stk.top();
stk.pop();
stk.push(std::make_shared<ConcatRE>(b, a)); //Attention: reversed order because of stack
}
void compactStack(stack<std::shared_ptr<RE> >& stk)
{
if (stk.empty()) return;
std::shared_ptr<RE> tp = stk.top();
stk.pop();
while (stk.size() >= 2)
{
compress(stk);
}
stk.push(tp);
}
std::shared_ptr<RE> parseRE(string& input, size_t& idx)
{
stack<std::shared_ptr<RE> > stk;
for (; idx < input.length(); idx++)
{
RE* n;
std::shared_ptr<RE> n;
switch (input[idx])
{
case '\n':
if (idx != input.size() - 1)
throw SyntaxError("Cannot have a newline inside of a regex");
break;
case '\\':
idx++;
if (idx >= input.length())
throw SyntaxError("Escape sequence at the end of the string");
if (input[idx] == 'e')
stk.push(new EpsilonRE());
else if (input[idx] == 'E')
stk.push(new EmptyRE());
else if (input[idx] == '\\' || input[idx] == '*' || input[idx] == '+' || input[idx] == '(' || input[idx] == ')')
stk.push(new SingleRE(input[idx]));
//TODO: escape chars
else
throw SyntaxError(("invalid escape sequence: \\" + string(1, input[idx])).c_str());
break;
@ -149,19 +139,34 @@ namespace lxs {
case '*':
if (stk.empty())
throw SyntaxError("Cannot apply kleene star to empty regex");
n = new StarRE(stk.top());
n = std::make_shared<StarRE>(stk.top());
stk.pop();
stk.push(n);
break;
case '+':
if (stk.empty())
throw SyntaxError("Invalid regex: nothing to the left of '+'");
throw SyntaxError("Cannot apply kleene plus to empty regex");
n = stk.top();
stk.pop();
n = std::make_shared<ConcatRE>(n, std::make_shared<StarRE>(n));
stk.push(n);
break;
case '?':
if (stk.empty())
throw SyntaxError("Cannot apply '?' to empty regex");
n = std::make_shared<PlusRE>(stk.top(), std::make_shared<EpsilonRE>());
stk.pop();
stk.push(n);
break;
case '|':
if (stk.empty())
throw SyntaxError("Invalid regex: nothing to the left of '|'");
if (stk.size() > 1)
compactStack(stk), compress(stk);
n = new PlusRE(nullptr, nullptr);
((PlusRE*) n)->e = stk.top();
((PlusRE*) n)->f = parseRE(input, ++idx);
n = std::make_shared<PlusRE>(stk.top(), parseRE(input, ++idx));
stk.pop();
stk.push(n);
idx--;
@ -182,7 +187,7 @@ namespace lxs {
throw SyntaxError("Could not parse regex, nothing inside parentheses");
default:
stk.push(new SingleRE(input[idx]));
stk.push(std::make_shared<SingleRE>(input[idx]));
}
compactStack(stk);
}
@ -195,10 +200,10 @@ namespace lxs {
}
RE* parseRE(string& input)
std::shared_ptr<RE> parseRE(string& input)
{
size_t i = 0;
RE* res = parseRE(input, i);
std::shared_ptr<RE> res = parseRE(input, i);
if (i < input.length() - 1)
throw SyntaxError("Incorrect regex");
return res;