Speed up: implement MultiRE
This commit is contained in:
parent
81f3c022c8
commit
0735760eef
|
@ -45,6 +45,17 @@ namespace lxs {
|
|||
char c;
|
||||
};
|
||||
|
||||
class MultiRE : public RE
|
||||
{
|
||||
public:
|
||||
MultiRE(std::vector<char> chars) : chars(std::move(chars)) {}
|
||||
~MultiRE() {}
|
||||
virtual State toENFA(ENFA& enfa, State attach);
|
||||
virtual std::string toRe();
|
||||
|
||||
std::vector<char> chars;
|
||||
};
|
||||
|
||||
class ConcatRE : public RE
|
||||
{
|
||||
public:
|
||||
|
|
44
src/re.cpp
44
src/re.cpp
|
@ -48,6 +48,23 @@ namespace lxs {
|
|||
return attach + 1;
|
||||
}
|
||||
|
||||
string MultiRE::toRe()
|
||||
{
|
||||
//FIXME: this does not consider characters that need escaping
|
||||
return "[" + string(chars.begin(), chars.end()) + "]";
|
||||
}
|
||||
|
||||
State MultiRE::toENFA(ENFA& enfa, State attach)
|
||||
{
|
||||
enfa.numStates = std::max(attach + 1, enfa.numStates);
|
||||
enfa.accepting.clear();
|
||||
enfa.accepting.insert(attach + 1);
|
||||
for (char c : chars) {
|
||||
enfa.delta[attach][c].insert(attach + 1);
|
||||
}
|
||||
return attach + 1;
|
||||
}
|
||||
|
||||
string ConcatRE::toRe()
|
||||
{
|
||||
return e->toRe() + f->toRe();
|
||||
|
@ -158,22 +175,9 @@ namespace lxs {
|
|||
return c;
|
||||
}
|
||||
|
||||
void sumREs(std::vector<std::shared_ptr<RE> >& res)
|
||||
{
|
||||
for (std::size_t step = 1; step < res.size(); step <<= 1)
|
||||
{
|
||||
for (std::size_t i = 0; i < res.size(); i += step * 2)
|
||||
{
|
||||
if (i + step < res.size())
|
||||
res[i] = std::make_shared<PlusRE>(res[i], res[i + step]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<RE> parseCharacterClass(const string& input, size_t& idx) {
|
||||
if (idx >= input.size())
|
||||
throw SyntaxError("Unclosed character class");
|
||||
std::vector<std::shared_ptr<RE> > chars;
|
||||
std::set<char> used_chars;
|
||||
|
||||
bool invert = false;
|
||||
|
@ -243,24 +247,22 @@ namespace lxs {
|
|||
if (idx >= input.size())
|
||||
throw SyntaxError("Unclosed character class");
|
||||
|
||||
std::vector<char> chars;
|
||||
for (int i = 0; i < 256; i++)
|
||||
{
|
||||
if (invert ^ (used_chars.count((char) i) > 0))
|
||||
chars.push_back(std::make_shared<SingleRE>((char) i));
|
||||
chars.push_back((char) i);
|
||||
}
|
||||
|
||||
sumREs(chars);
|
||||
|
||||
return chars[0];
|
||||
return std::make_shared<MultiRE>(chars);
|
||||
}
|
||||
|
||||
std::shared_ptr<RE> dotChar() {
|
||||
std::vector<std::shared_ptr<RE> > any;
|
||||
std::vector<char> any;
|
||||
for (int i = 0; i < 256; i++)
|
||||
if ((char) i != '\n') //Dot matches anything except newlines
|
||||
any.push_back(std::make_shared<SingleRE>((char) i));
|
||||
sumREs(any);
|
||||
return any[0];
|
||||
any.push_back((char) i);
|
||||
return std::make_shared<MultiRE>(any);
|
||||
}
|
||||
|
||||
std::shared_ptr<RE> parseRE(const string& input, size_t& idx)
|
||||
|
|
Loading…
Reference in New Issue