Speed up: implement MultiRE

This commit is contained in:
Robin Jadoul 2016-05-27 12:16:18 +02:00
parent 81f3c022c8
commit 0735760eef
2 changed files with 34 additions and 21 deletions

View File

@ -45,6 +45,17 @@ namespace lxs {
char c; char c;
}; };
class MultiRE : public RE
{
public:
MultiRE(std::vector<char> chars) : chars(std::move(chars)) {}
~MultiRE() {}
virtual State toENFA(ENFA& enfa, State attach);
virtual std::string toRe();
std::vector<char> chars;
};
class ConcatRE : public RE class ConcatRE : public RE
{ {
public: public:

View File

@ -48,6 +48,23 @@ namespace lxs {
return attach + 1; return attach + 1;
} }
string MultiRE::toRe()
{
//FIXME: this does not consider characters that need escaping
return "[" + string(chars.begin(), chars.end()) + "]";
}
State MultiRE::toENFA(ENFA& enfa, State attach)
{
enfa.numStates = std::max(attach + 1, enfa.numStates);
enfa.accepting.clear();
enfa.accepting.insert(attach + 1);
for (char c : chars) {
enfa.delta[attach][c].insert(attach + 1);
}
return attach + 1;
}
string ConcatRE::toRe() string ConcatRE::toRe()
{ {
return e->toRe() + f->toRe(); return e->toRe() + f->toRe();
@ -158,22 +175,9 @@ namespace lxs {
return c; return c;
} }
void sumREs(std::vector<std::shared_ptr<RE> >& res)
{
for (std::size_t step = 1; step < res.size(); step <<= 1)
{
for (std::size_t i = 0; i < res.size(); i += step * 2)
{
if (i + step < res.size())
res[i] = std::make_shared<PlusRE>(res[i], res[i + step]);
}
}
}
std::shared_ptr<RE> parseCharacterClass(const string& input, size_t& idx) { std::shared_ptr<RE> parseCharacterClass(const string& input, size_t& idx) {
if (idx >= input.size()) if (idx >= input.size())
throw SyntaxError("Unclosed character class"); throw SyntaxError("Unclosed character class");
std::vector<std::shared_ptr<RE> > chars;
std::set<char> used_chars; std::set<char> used_chars;
bool invert = false; bool invert = false;
@ -243,24 +247,22 @@ namespace lxs {
if (idx >= input.size()) if (idx >= input.size())
throw SyntaxError("Unclosed character class"); throw SyntaxError("Unclosed character class");
std::vector<char> chars;
for (int i = 0; i < 256; i++) for (int i = 0; i < 256; i++)
{ {
if (invert ^ (used_chars.count((char) i) > 0)) if (invert ^ (used_chars.count((char) i) > 0))
chars.push_back(std::make_shared<SingleRE>((char) i)); chars.push_back((char) i);
} }
sumREs(chars); return std::make_shared<MultiRE>(chars);
return chars[0];
} }
std::shared_ptr<RE> dotChar() { std::shared_ptr<RE> dotChar() {
std::vector<std::shared_ptr<RE> > any; std::vector<char> any;
for (int i = 0; i < 256; i++) for (int i = 0; i < 256; i++)
if ((char) i != '\n') //Dot matches anything except newlines if ((char) i != '\n') //Dot matches anything except newlines
any.push_back(std::make_shared<SingleRE>((char) i)); any.push_back((char) i);
sumREs(any); return std::make_shared<MultiRE>(any);
return any[0];
} }
std::shared_ptr<RE> parseRE(const string& input, size_t& idx) std::shared_ptr<RE> parseRE(const string& input, size_t& idx)