From efb88f25190b94b93a1c08f8a8442c5b1ec68ed4 Mon Sep 17 00:00:00 2001 From: Robin Jadoul Date: Sun, 24 Apr 2016 16:54:30 +0200 Subject: [PATCH] Start modification/extension of RE parsing --- include/Lexesis/re.h | 35 ++++++++--------- src/re.cpp | 91 +++++++++++++++++++++++--------------------- 2 files changed, 66 insertions(+), 60 deletions(-) diff --git a/include/Lexesis/re.h b/include/Lexesis/re.h index 9671a30..6625339 100644 --- a/include/Lexesis/re.h +++ b/include/Lexesis/re.h @@ -4,6 +4,7 @@ #include "Lexesis/automata.h" +#include #include namespace lxs { @@ -11,7 +12,7 @@ namespace lxs { { public: virtual ~RE() {} - virtual int toENFA(ENFA& enfa, int attach) = 0; + virtual State toENFA(ENFA& enfa, State attach) = 0; virtual std::string toRe() = 0; }; @@ -20,7 +21,7 @@ namespace lxs { public: EmptyRE() {} ~EmptyRE() {} - virtual int toENFA(ENFA& enfa, int attach); + virtual State toENFA(ENFA& enfa, State attach); virtual std::string toRe(); }; @@ -29,7 +30,7 @@ namespace lxs { public: EpsilonRE() {} ~EpsilonRE() {} - virtual int toENFA(ENFA& enfa, int attach); + virtual State toENFA(ENFA& enfa, State attach); virtual std::string toRe(); }; @@ -38,7 +39,7 @@ namespace lxs { public: SingleRE(char c) : c(c) {} ~SingleRE() {} - virtual int toENFA(ENFA& enfa, int attach); + virtual State toENFA(ENFA& enfa, State attach); virtual std::string toRe(); char c; @@ -47,37 +48,37 @@ namespace lxs { class ConcatRE : public RE { public: - ConcatRE(RE* e, RE* f) : e(e), f(f) {} - ~ConcatRE() {delete e; delete f;} - virtual int toENFA(ENFA& enfa, int attach); + ConcatRE(std::shared_ptr e, std::shared_ptr f) : e(e), f(f) {} + ~ConcatRE() {} + virtual State toENFA(ENFA& enfa, State attach); virtual std::string toRe(); - RE* e, *f; + std::shared_ptr e, f; }; class StarRE : public RE { public: - StarRE(RE* e) : e(e) {} - ~StarRE() {delete e;} - virtual int toENFA(ENFA& enfa, int attach); + StarRE(std::shared_ptr e) : e(e) {} + ~StarRE() {} + virtual State toENFA(ENFA& enfa, State attach); virtual std::string toRe(); - RE* e; + std::shared_ptr e; }; class PlusRE : public RE { public: - PlusRE(RE* e, RE* f) : e(e), f(f) {} - ~PlusRE() {delete e; delete f;} - virtual int toENFA(ENFA& enfa, int attach); + PlusRE(std::shared_ptr e, std::shared_ptr f) : e(e), f(f) {} + ~PlusRE() {} + virtual State toENFA(ENFA& enfa, State attach); virtual std::string toRe(); - RE* e, *f; + std::shared_ptr e, f; }; - RE* parseRE(std::string& input); + std::shared_ptr parseRE(std::string& input); class SyntaxError : public std::runtime_error { diff --git a/src/re.cpp b/src/re.cpp index e884ae4..63e4e40 100644 --- a/src/re.cpp +++ b/src/re.cpp @@ -97,51 +97,41 @@ namespace lxs { return b + 1; } - static void compress(stack& stk) - { - RE* a = stk.top(); - stk.pop(); - RE* b = stk.top(); - stk.pop(); - stk.push(new ConcatRE(b, a)); //Attention: reversed order because of stack - } - - static void compactStack(stack& stk) - { - if (stk.empty()) return; - RE* tp = stk.top(); - stk.pop(); - while (stk.size() >= 2) - { - compress(stk); - } - stk.push(tp); - } - namespace { - RE* parseRE(string& input, size_t& idx) + void compress(stack>& stk) { - stack stk; + std::shared_ptr a = stk.top(); + stk.pop(); + std::shared_ptr b = stk.top(); + stk.pop(); + stk.push(std::make_shared(b, a)); //Attention: reversed order because of stack + } + + void compactStack(stack >& stk) + { + if (stk.empty()) return; + std::shared_ptr tp = stk.top(); + stk.pop(); + while (stk.size() >= 2) + { + compress(stk); + } + stk.push(tp); + } + + std::shared_ptr parseRE(string& input, size_t& idx) + { + stack > stk; for (; idx < input.length(); idx++) { - RE* n; + std::shared_ptr n; switch (input[idx]) { - case '\n': - if (idx != input.size() - 1) - throw SyntaxError("Cannot have a newline inside of a regex"); - break; - case '\\': idx++; if (idx >= input.length()) throw SyntaxError("Escape sequence at the end of the string"); - if (input[idx] == 'e') - stk.push(new EpsilonRE()); - else if (input[idx] == 'E') - stk.push(new EmptyRE()); - else if (input[idx] == '\\' || input[idx] == '*' || input[idx] == '+' || input[idx] == '(' || input[idx] == ')') - stk.push(new SingleRE(input[idx])); + //TODO: escape chars else throw SyntaxError(("invalid escape sequence: \\" + string(1, input[idx])).c_str()); break; @@ -149,19 +139,34 @@ namespace lxs { case '*': if (stk.empty()) throw SyntaxError("Cannot apply kleene star to empty regex"); - n = new StarRE(stk.top()); + n = std::make_shared(stk.top()); stk.pop(); stk.push(n); break; case '+': if (stk.empty()) - throw SyntaxError("Invalid regex: nothing to the left of '+'"); + throw SyntaxError("Cannot apply kleene plus to empty regex"); + n = stk.top(); + stk.pop(); + n = std::make_shared(n, std::make_shared(n)); + stk.push(n); + break; + + case '?': + if (stk.empty()) + throw SyntaxError("Cannot apply '?' to empty regex"); + n = std::make_shared(stk.top(), std::make_shared()); + stk.pop(); + stk.push(n); + break; + + case '|': + if (stk.empty()) + throw SyntaxError("Invalid regex: nothing to the left of '|'"); if (stk.size() > 1) compactStack(stk), compress(stk); - n = new PlusRE(nullptr, nullptr); - ((PlusRE*) n)->e = stk.top(); - ((PlusRE*) n)->f = parseRE(input, ++idx); + n = std::make_shared(stk.top(), parseRE(input, ++idx)); stk.pop(); stk.push(n); idx--; @@ -182,7 +187,7 @@ namespace lxs { throw SyntaxError("Could not parse regex, nothing inside parentheses"); default: - stk.push(new SingleRE(input[idx])); + stk.push(std::make_shared(input[idx])); } compactStack(stk); } @@ -195,10 +200,10 @@ namespace lxs { } - RE* parseRE(string& input) + std::shared_ptr parseRE(string& input) { size_t i = 0; - RE* res = parseRE(input, i); + std::shared_ptr res = parseRE(input, i); if (i < input.length() - 1) throw SyntaxError("Incorrect regex"); return res;