diff --git a/include/Lexesis/re.h b/include/Lexesis/re.h index 60875b8..3d30bf3 100644 --- a/include/Lexesis/re.h +++ b/include/Lexesis/re.h @@ -45,6 +45,17 @@ namespace lxs { char c; }; + class MultiRE : public RE + { + public: + MultiRE(std::vector chars) : chars(std::move(chars)) {} + ~MultiRE() {} + virtual State toENFA(ENFA& enfa, State attach); + virtual std::string toRe(); + + std::vector chars; + }; + class ConcatRE : public RE { public: diff --git a/src/re.cpp b/src/re.cpp index e78cc80..face93f 100644 --- a/src/re.cpp +++ b/src/re.cpp @@ -48,6 +48,23 @@ namespace lxs { return attach + 1; } + string MultiRE::toRe() + { + //FIXME: this does not consider characters that need escaping + return "[" + string(chars.begin(), chars.end()) + "]"; + } + + State MultiRE::toENFA(ENFA& enfa, State attach) + { + enfa.numStates = std::max(attach + 1, enfa.numStates); + enfa.accepting.clear(); + enfa.accepting.insert(attach + 1); + for (char c : chars) { + enfa.delta[attach][c].insert(attach + 1); + } + return attach + 1; + } + string ConcatRE::toRe() { return e->toRe() + f->toRe(); @@ -158,22 +175,9 @@ namespace lxs { return c; } - void sumREs(std::vector >& res) - { - for (std::size_t step = 1; step < res.size(); step <<= 1) - { - for (std::size_t i = 0; i < res.size(); i += step * 2) - { - if (i + step < res.size()) - res[i] = std::make_shared(res[i], res[i + step]); - } - } - } - std::shared_ptr parseCharacterClass(const string& input, size_t& idx) { if (idx >= input.size()) throw SyntaxError("Unclosed character class"); - std::vector > chars; std::set used_chars; bool invert = false; @@ -243,24 +247,22 @@ namespace lxs { if (idx >= input.size()) throw SyntaxError("Unclosed character class"); + std::vector chars; for (int i = 0; i < 256; i++) { if (invert ^ (used_chars.count((char) i) > 0)) - chars.push_back(std::make_shared((char) i)); + chars.push_back((char) i); } - sumREs(chars); - - return chars[0]; + return std::make_shared(chars); } std::shared_ptr dotChar() { - std::vector > any; + std::vector any; for (int i = 0; i < 256; i++) if ((char) i != '\n') //Dot matches anything except newlines - any.push_back(std::make_shared((char) i)); - sumREs(any); - return any[0]; + any.push_back((char) i); + return std::make_shared(any); } std::shared_ptr parseRE(const string& input, size_t& idx)