Start modification/extension of RE parsing

This commit is contained in:
Robin Jadoul 2016-04-24 16:54:30 +02:00
parent c80fcdb8aa
commit efb88f2519
2 changed files with 66 additions and 60 deletions

View File

@ -4,6 +4,7 @@
#include "Lexesis/automata.h" #include "Lexesis/automata.h"
#include <memory>
#include <stdexcept> #include <stdexcept>
namespace lxs { namespace lxs {
@ -11,7 +12,7 @@ namespace lxs {
{ {
public: public:
virtual ~RE() {} virtual ~RE() {}
virtual int toENFA(ENFA& enfa, int attach) = 0; virtual State toENFA(ENFA& enfa, State attach) = 0;
virtual std::string toRe() = 0; virtual std::string toRe() = 0;
}; };
@ -20,7 +21,7 @@ namespace lxs {
public: public:
EmptyRE() {} EmptyRE() {}
~EmptyRE() {} ~EmptyRE() {}
virtual int toENFA(ENFA& enfa, int attach); virtual State toENFA(ENFA& enfa, State attach);
virtual std::string toRe(); virtual std::string toRe();
}; };
@ -29,7 +30,7 @@ namespace lxs {
public: public:
EpsilonRE() {} EpsilonRE() {}
~EpsilonRE() {} ~EpsilonRE() {}
virtual int toENFA(ENFA& enfa, int attach); virtual State toENFA(ENFA& enfa, State attach);
virtual std::string toRe(); virtual std::string toRe();
}; };
@ -38,7 +39,7 @@ namespace lxs {
public: public:
SingleRE(char c) : c(c) {} SingleRE(char c) : c(c) {}
~SingleRE() {} ~SingleRE() {}
virtual int toENFA(ENFA& enfa, int attach); virtual State toENFA(ENFA& enfa, State attach);
virtual std::string toRe(); virtual std::string toRe();
char c; char c;
@ -47,37 +48,37 @@ namespace lxs {
class ConcatRE : public RE class ConcatRE : public RE
{ {
public: public:
ConcatRE(RE* e, RE* f) : e(e), f(f) {} ConcatRE(std::shared_ptr<RE> e, std::shared_ptr<RE> f) : e(e), f(f) {}
~ConcatRE() {delete e; delete f;} ~ConcatRE() {}
virtual int toENFA(ENFA& enfa, int attach); virtual State toENFA(ENFA& enfa, State attach);
virtual std::string toRe(); virtual std::string toRe();
RE* e, *f; std::shared_ptr<RE> e, f;
}; };
class StarRE : public RE class StarRE : public RE
{ {
public: public:
StarRE(RE* e) : e(e) {} StarRE(std::shared_ptr<RE> e) : e(e) {}
~StarRE() {delete e;} ~StarRE() {}
virtual int toENFA(ENFA& enfa, int attach); virtual State toENFA(ENFA& enfa, State attach);
virtual std::string toRe(); virtual std::string toRe();
RE* e; std::shared_ptr<RE> e;
}; };
class PlusRE : public RE class PlusRE : public RE
{ {
public: public:
PlusRE(RE* e, RE* f) : e(e), f(f) {} PlusRE(std::shared_ptr<RE> e, std::shared_ptr<RE> f) : e(e), f(f) {}
~PlusRE() {delete e; delete f;} ~PlusRE() {}
virtual int toENFA(ENFA& enfa, int attach); virtual State toENFA(ENFA& enfa, State attach);
virtual std::string toRe(); virtual std::string toRe();
RE* e, *f; std::shared_ptr<RE> e, f;
}; };
RE* parseRE(std::string& input); std::shared_ptr<RE> parseRE(std::string& input);
class SyntaxError : public std::runtime_error class SyntaxError : public std::runtime_error
{ {

View File

@ -97,51 +97,41 @@ namespace lxs {
return b + 1; return b + 1;
} }
static void compress(stack<RE*>& stk)
{
RE* a = stk.top();
stk.pop();
RE* b = stk.top();
stk.pop();
stk.push(new ConcatRE(b, a)); //Attention: reversed order because of stack
}
static void compactStack(stack<RE*>& stk)
{
if (stk.empty()) return;
RE* tp = stk.top();
stk.pop();
while (stk.size() >= 2)
{
compress(stk);
}
stk.push(tp);
}
namespace { namespace {
RE* parseRE(string& input, size_t& idx) void compress(stack<std::shared_ptr<RE>>& stk)
{ {
stack<RE*> stk; std::shared_ptr<RE> a = stk.top();
stk.pop();
std::shared_ptr<RE> b = stk.top();
stk.pop();
stk.push(std::make_shared<ConcatRE>(b, a)); //Attention: reversed order because of stack
}
void compactStack(stack<std::shared_ptr<RE> >& stk)
{
if (stk.empty()) return;
std::shared_ptr<RE> tp = stk.top();
stk.pop();
while (stk.size() >= 2)
{
compress(stk);
}
stk.push(tp);
}
std::shared_ptr<RE> parseRE(string& input, size_t& idx)
{
stack<std::shared_ptr<RE> > stk;
for (; idx < input.length(); idx++) for (; idx < input.length(); idx++)
{ {
RE* n; std::shared_ptr<RE> n;
switch (input[idx]) switch (input[idx])
{ {
case '\n':
if (idx != input.size() - 1)
throw SyntaxError("Cannot have a newline inside of a regex");
break;
case '\\': case '\\':
idx++; idx++;
if (idx >= input.length()) if (idx >= input.length())
throw SyntaxError("Escape sequence at the end of the string"); throw SyntaxError("Escape sequence at the end of the string");
if (input[idx] == 'e') //TODO: escape chars
stk.push(new EpsilonRE());
else if (input[idx] == 'E')
stk.push(new EmptyRE());
else if (input[idx] == '\\' || input[idx] == '*' || input[idx] == '+' || input[idx] == '(' || input[idx] == ')')
stk.push(new SingleRE(input[idx]));
else else
throw SyntaxError(("invalid escape sequence: \\" + string(1, input[idx])).c_str()); throw SyntaxError(("invalid escape sequence: \\" + string(1, input[idx])).c_str());
break; break;
@ -149,19 +139,34 @@ namespace lxs {
case '*': case '*':
if (stk.empty()) if (stk.empty())
throw SyntaxError("Cannot apply kleene star to empty regex"); throw SyntaxError("Cannot apply kleene star to empty regex");
n = new StarRE(stk.top()); n = std::make_shared<StarRE>(stk.top());
stk.pop(); stk.pop();
stk.push(n); stk.push(n);
break; break;
case '+': case '+':
if (stk.empty()) if (stk.empty())
throw SyntaxError("Invalid regex: nothing to the left of '+'"); throw SyntaxError("Cannot apply kleene plus to empty regex");
n = stk.top();
stk.pop();
n = std::make_shared<ConcatRE>(n, std::make_shared<StarRE>(n));
stk.push(n);
break;
case '?':
if (stk.empty())
throw SyntaxError("Cannot apply '?' to empty regex");
n = std::make_shared<PlusRE>(stk.top(), std::make_shared<EpsilonRE>());
stk.pop();
stk.push(n);
break;
case '|':
if (stk.empty())
throw SyntaxError("Invalid regex: nothing to the left of '|'");
if (stk.size() > 1) if (stk.size() > 1)
compactStack(stk), compress(stk); compactStack(stk), compress(stk);
n = new PlusRE(nullptr, nullptr); n = std::make_shared<PlusRE>(stk.top(), parseRE(input, ++idx));
((PlusRE*) n)->e = stk.top();
((PlusRE*) n)->f = parseRE(input, ++idx);
stk.pop(); stk.pop();
stk.push(n); stk.push(n);
idx--; idx--;
@ -182,7 +187,7 @@ namespace lxs {
throw SyntaxError("Could not parse regex, nothing inside parentheses"); throw SyntaxError("Could not parse regex, nothing inside parentheses");
default: default:
stk.push(new SingleRE(input[idx])); stk.push(std::make_shared<SingleRE>(input[idx]));
} }
compactStack(stk); compactStack(stk);
} }
@ -195,10 +200,10 @@ namespace lxs {
} }
RE* parseRE(string& input) std::shared_ptr<RE> parseRE(string& input)
{ {
size_t i = 0; size_t i = 0;
RE* res = parseRE(input, i); std::shared_ptr<RE> res = parseRE(input, i);
if (i < input.length() - 1) if (i < input.length() - 1)
throw SyntaxError("Incorrect regex"); throw SyntaxError("Incorrect regex");
return res; return res;