Start modification/extension of RE parsing
This commit is contained in:
parent
c80fcdb8aa
commit
efb88f2519
|
@ -4,6 +4,7 @@
|
|||
|
||||
#include "Lexesis/automata.h"
|
||||
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace lxs {
|
||||
|
@ -11,7 +12,7 @@ namespace lxs {
|
|||
{
|
||||
public:
|
||||
virtual ~RE() {}
|
||||
virtual int toENFA(ENFA& enfa, int attach) = 0;
|
||||
virtual State toENFA(ENFA& enfa, State attach) = 0;
|
||||
virtual std::string toRe() = 0;
|
||||
};
|
||||
|
||||
|
@ -20,7 +21,7 @@ namespace lxs {
|
|||
public:
|
||||
EmptyRE() {}
|
||||
~EmptyRE() {}
|
||||
virtual int toENFA(ENFA& enfa, int attach);
|
||||
virtual State toENFA(ENFA& enfa, State attach);
|
||||
virtual std::string toRe();
|
||||
};
|
||||
|
||||
|
@ -29,7 +30,7 @@ namespace lxs {
|
|||
public:
|
||||
EpsilonRE() {}
|
||||
~EpsilonRE() {}
|
||||
virtual int toENFA(ENFA& enfa, int attach);
|
||||
virtual State toENFA(ENFA& enfa, State attach);
|
||||
virtual std::string toRe();
|
||||
};
|
||||
|
||||
|
@ -38,7 +39,7 @@ namespace lxs {
|
|||
public:
|
||||
SingleRE(char c) : c(c) {}
|
||||
~SingleRE() {}
|
||||
virtual int toENFA(ENFA& enfa, int attach);
|
||||
virtual State toENFA(ENFA& enfa, State attach);
|
||||
virtual std::string toRe();
|
||||
|
||||
char c;
|
||||
|
@ -47,37 +48,37 @@ namespace lxs {
|
|||
class ConcatRE : public RE
|
||||
{
|
||||
public:
|
||||
ConcatRE(RE* e, RE* f) : e(e), f(f) {}
|
||||
~ConcatRE() {delete e; delete f;}
|
||||
virtual int toENFA(ENFA& enfa, int attach);
|
||||
ConcatRE(std::shared_ptr<RE> e, std::shared_ptr<RE> f) : e(e), f(f) {}
|
||||
~ConcatRE() {}
|
||||
virtual State toENFA(ENFA& enfa, State attach);
|
||||
virtual std::string toRe();
|
||||
|
||||
RE* e, *f;
|
||||
std::shared_ptr<RE> e, f;
|
||||
};
|
||||
|
||||
class StarRE : public RE
|
||||
{
|
||||
public:
|
||||
StarRE(RE* e) : e(e) {}
|
||||
~StarRE() {delete e;}
|
||||
virtual int toENFA(ENFA& enfa, int attach);
|
||||
StarRE(std::shared_ptr<RE> e) : e(e) {}
|
||||
~StarRE() {}
|
||||
virtual State toENFA(ENFA& enfa, State attach);
|
||||
virtual std::string toRe();
|
||||
|
||||
RE* e;
|
||||
std::shared_ptr<RE> e;
|
||||
};
|
||||
|
||||
class PlusRE : public RE
|
||||
{
|
||||
public:
|
||||
PlusRE(RE* e, RE* f) : e(e), f(f) {}
|
||||
~PlusRE() {delete e; delete f;}
|
||||
virtual int toENFA(ENFA& enfa, int attach);
|
||||
PlusRE(std::shared_ptr<RE> e, std::shared_ptr<RE> f) : e(e), f(f) {}
|
||||
~PlusRE() {}
|
||||
virtual State toENFA(ENFA& enfa, State attach);
|
||||
virtual std::string toRe();
|
||||
|
||||
RE* e, *f;
|
||||
std::shared_ptr<RE> e, f;
|
||||
};
|
||||
|
||||
RE* parseRE(std::string& input);
|
||||
std::shared_ptr<RE> parseRE(std::string& input);
|
||||
|
||||
class SyntaxError : public std::runtime_error
|
||||
{
|
||||
|
|
91
src/re.cpp
91
src/re.cpp
|
@ -97,51 +97,41 @@ namespace lxs {
|
|||
return b + 1;
|
||||
}
|
||||
|
||||
static void compress(stack<RE*>& stk)
|
||||
{
|
||||
RE* a = stk.top();
|
||||
stk.pop();
|
||||
RE* b = stk.top();
|
||||
stk.pop();
|
||||
stk.push(new ConcatRE(b, a)); //Attention: reversed order because of stack
|
||||
}
|
||||
|
||||
static void compactStack(stack<RE*>& stk)
|
||||
{
|
||||
if (stk.empty()) return;
|
||||
RE* tp = stk.top();
|
||||
stk.pop();
|
||||
while (stk.size() >= 2)
|
||||
{
|
||||
compress(stk);
|
||||
}
|
||||
stk.push(tp);
|
||||
}
|
||||
|
||||
namespace {
|
||||
RE* parseRE(string& input, size_t& idx)
|
||||
void compress(stack<std::shared_ptr<RE>>& stk)
|
||||
{
|
||||
stack<RE*> stk;
|
||||
std::shared_ptr<RE> a = stk.top();
|
||||
stk.pop();
|
||||
std::shared_ptr<RE> b = stk.top();
|
||||
stk.pop();
|
||||
stk.push(std::make_shared<ConcatRE>(b, a)); //Attention: reversed order because of stack
|
||||
}
|
||||
|
||||
void compactStack(stack<std::shared_ptr<RE> >& stk)
|
||||
{
|
||||
if (stk.empty()) return;
|
||||
std::shared_ptr<RE> tp = stk.top();
|
||||
stk.pop();
|
||||
while (stk.size() >= 2)
|
||||
{
|
||||
compress(stk);
|
||||
}
|
||||
stk.push(tp);
|
||||
}
|
||||
|
||||
std::shared_ptr<RE> parseRE(string& input, size_t& idx)
|
||||
{
|
||||
stack<std::shared_ptr<RE> > stk;
|
||||
for (; idx < input.length(); idx++)
|
||||
{
|
||||
RE* n;
|
||||
std::shared_ptr<RE> n;
|
||||
switch (input[idx])
|
||||
{
|
||||
case '\n':
|
||||
if (idx != input.size() - 1)
|
||||
throw SyntaxError("Cannot have a newline inside of a regex");
|
||||
break;
|
||||
|
||||
case '\\':
|
||||
idx++;
|
||||
if (idx >= input.length())
|
||||
throw SyntaxError("Escape sequence at the end of the string");
|
||||
if (input[idx] == 'e')
|
||||
stk.push(new EpsilonRE());
|
||||
else if (input[idx] == 'E')
|
||||
stk.push(new EmptyRE());
|
||||
else if (input[idx] == '\\' || input[idx] == '*' || input[idx] == '+' || input[idx] == '(' || input[idx] == ')')
|
||||
stk.push(new SingleRE(input[idx]));
|
||||
//TODO: escape chars
|
||||
else
|
||||
throw SyntaxError(("invalid escape sequence: \\" + string(1, input[idx])).c_str());
|
||||
break;
|
||||
|
@ -149,19 +139,34 @@ namespace lxs {
|
|||
case '*':
|
||||
if (stk.empty())
|
||||
throw SyntaxError("Cannot apply kleene star to empty regex");
|
||||
n = new StarRE(stk.top());
|
||||
n = std::make_shared<StarRE>(stk.top());
|
||||
stk.pop();
|
||||
stk.push(n);
|
||||
break;
|
||||
|
||||
case '+':
|
||||
if (stk.empty())
|
||||
throw SyntaxError("Invalid regex: nothing to the left of '+'");
|
||||
throw SyntaxError("Cannot apply kleene plus to empty regex");
|
||||
n = stk.top();
|
||||
stk.pop();
|
||||
n = std::make_shared<ConcatRE>(n, std::make_shared<StarRE>(n));
|
||||
stk.push(n);
|
||||
break;
|
||||
|
||||
case '?':
|
||||
if (stk.empty())
|
||||
throw SyntaxError("Cannot apply '?' to empty regex");
|
||||
n = std::make_shared<PlusRE>(stk.top(), std::make_shared<EpsilonRE>());
|
||||
stk.pop();
|
||||
stk.push(n);
|
||||
break;
|
||||
|
||||
case '|':
|
||||
if (stk.empty())
|
||||
throw SyntaxError("Invalid regex: nothing to the left of '|'");
|
||||
if (stk.size() > 1)
|
||||
compactStack(stk), compress(stk);
|
||||
n = new PlusRE(nullptr, nullptr);
|
||||
((PlusRE*) n)->e = stk.top();
|
||||
((PlusRE*) n)->f = parseRE(input, ++idx);
|
||||
n = std::make_shared<PlusRE>(stk.top(), parseRE(input, ++idx));
|
||||
stk.pop();
|
||||
stk.push(n);
|
||||
idx--;
|
||||
|
@ -182,7 +187,7 @@ namespace lxs {
|
|||
throw SyntaxError("Could not parse regex, nothing inside parentheses");
|
||||
|
||||
default:
|
||||
stk.push(new SingleRE(input[idx]));
|
||||
stk.push(std::make_shared<SingleRE>(input[idx]));
|
||||
}
|
||||
compactStack(stk);
|
||||
}
|
||||
|
@ -195,10 +200,10 @@ namespace lxs {
|
|||
|
||||
}
|
||||
|
||||
RE* parseRE(string& input)
|
||||
std::shared_ptr<RE> parseRE(string& input)
|
||||
{
|
||||
size_t i = 0;
|
||||
RE* res = parseRE(input, i);
|
||||
std::shared_ptr<RE> res = parseRE(input, i);
|
||||
if (i < input.length() - 1)
|
||||
throw SyntaxError("Incorrect regex");
|
||||
return res;
|
||||
|
|
Loading…
Reference in New Issue