Fix re
This commit is contained in:
parent
e476591495
commit
5338d91fbf
|
@ -78,7 +78,7 @@ namespace lxs {
|
||||||
std::shared_ptr<RE> e, f;
|
std::shared_ptr<RE> e, f;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::shared_ptr<RE> parseRE(std::string& input);
|
std::shared_ptr<RE> parseRE(const std::string& input);
|
||||||
|
|
||||||
class SyntaxError : public std::runtime_error
|
class SyntaxError : public std::runtime_error
|
||||||
{
|
{
|
||||||
|
|
29
src/re.cpp
29
src/re.cpp
|
@ -14,7 +14,7 @@ namespace lxs {
|
||||||
|
|
||||||
State EmptyRE::toENFA(ENFA& enfa, State attach)
|
State EmptyRE::toENFA(ENFA& enfa, State attach)
|
||||||
{
|
{
|
||||||
enfa.numStates = std::max(attach + 1, enfa.numStates);
|
enfa.numStates = attach + 1;
|
||||||
enfa.accepting.clear();
|
enfa.accepting.clear();
|
||||||
enfa.accepting.insert(attach + 1);
|
enfa.accepting.insert(attach + 1);
|
||||||
return attach + 1;
|
return attach + 1;
|
||||||
|
@ -134,6 +134,7 @@ namespace lxs {
|
||||||
case '.':
|
case '.':
|
||||||
case '\'':
|
case '\'':
|
||||||
case '"':
|
case '"':
|
||||||
|
case '-':
|
||||||
break;
|
break;
|
||||||
case 'n':
|
case 'n':
|
||||||
c = '\n'; break;
|
c = '\n'; break;
|
||||||
|
@ -159,23 +160,24 @@ namespace lxs {
|
||||||
|
|
||||||
void sumREs(std::vector<std::shared_ptr<RE> >& res)
|
void sumREs(std::vector<std::shared_ptr<RE> >& res)
|
||||||
{
|
{
|
||||||
for (std::size_t step = 2; step < res.size(); step <<= 1)
|
for (std::size_t step = 1; step < res.size(); step <<= 1)
|
||||||
{
|
{
|
||||||
for (std::size_t i = 0; i < res.size(); i += step)
|
for (std::size_t i = 0; i < res.size(); i += step * 2)
|
||||||
{
|
{
|
||||||
if (i + step / 2 < res.size())
|
if (i + step < res.size())
|
||||||
res[i] = std::make_shared<PlusRE>(res[i], res[i + step / 2]);
|
res[i] = std::make_shared<PlusRE>(res[i], res[i + step]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<RE> parseCharacterClass(string& input, size_t& idx) {
|
std::shared_ptr<RE> parseCharacterClass(const string& input, size_t& idx) {
|
||||||
if (idx >= input.size())
|
if (idx >= input.size())
|
||||||
throw SyntaxError("Unclosed character class");
|
throw SyntaxError("Unclosed character class");
|
||||||
std::vector<std::shared_ptr<RE> > chars;
|
std::vector<std::shared_ptr<RE> > chars;
|
||||||
std::set<char> used_chars;
|
std::set<char> used_chars;
|
||||||
|
|
||||||
bool invert = false;
|
bool invert = false;
|
||||||
|
int last_char = -1;
|
||||||
|
|
||||||
if (input[idx] == '^')
|
if (input[idx] == '^')
|
||||||
{
|
{
|
||||||
|
@ -191,6 +193,7 @@ namespace lxs {
|
||||||
{
|
{
|
||||||
used_chars.insert(']');
|
used_chars.insert(']');
|
||||||
idx++;
|
idx++;
|
||||||
|
last_char = ']';
|
||||||
}
|
}
|
||||||
|
|
||||||
if (idx >= input.size())
|
if (idx >= input.size())
|
||||||
|
@ -200,6 +203,7 @@ namespace lxs {
|
||||||
{
|
{
|
||||||
used_chars.insert('-');
|
used_chars.insert('-');
|
||||||
idx++;
|
idx++;
|
||||||
|
last_char = '-';
|
||||||
}
|
}
|
||||||
|
|
||||||
if (idx >= input.size())
|
if (idx >= input.size())
|
||||||
|
@ -220,10 +224,13 @@ namespace lxs {
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (int i = ((SingleRE*)(chars[chars.size() - 1].get()))->c + 1; i <= input[idx]; i++)
|
if (last_char == -1)
|
||||||
|
throw SyntaxError("Nothing to apply range to");
|
||||||
|
for (int i = last_char + 1; i <= input[idx]; i++)
|
||||||
{
|
{
|
||||||
used_chars.insert((char) i);
|
used_chars.insert((char) i);
|
||||||
}
|
}
|
||||||
|
last_char = -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (input[idx] == '\\')
|
else if (input[idx] == '\\')
|
||||||
|
@ -231,11 +238,13 @@ namespace lxs {
|
||||||
idx++;
|
idx++;
|
||||||
if (idx >= input.size())
|
if (idx >= input.size())
|
||||||
throw SyntaxError("Unclosed character classe");
|
throw SyntaxError("Unclosed character classe");
|
||||||
used_chars.insert(parseEscapeChar(input[idx]));
|
last_char = parseEscapeChar(input[idx]);
|
||||||
|
used_chars.insert(last_char);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
used_chars.insert(input[idx]);
|
used_chars.insert(input[idx]);
|
||||||
|
last_char = input[idx];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -263,7 +272,7 @@ namespace lxs {
|
||||||
return any[0];
|
return any[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<RE> parseRE(string& input, size_t& idx)
|
std::shared_ptr<RE> parseRE(const string& input, size_t& idx)
|
||||||
{
|
{
|
||||||
stack<std::shared_ptr<RE> > stk;
|
stack<std::shared_ptr<RE> > stk;
|
||||||
for (; idx < input.length(); idx++)
|
for (; idx < input.length(); idx++)
|
||||||
|
@ -358,7 +367,7 @@ namespace lxs {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<RE> parseRE(string& input)
|
std::shared_ptr<RE> parseRE(const string& input)
|
||||||
{
|
{
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
std::shared_ptr<RE> res = parseRE(input, i);
|
std::shared_ptr<RE> res = parseRE(input, i);
|
||||||
|
|
Loading…
Reference in New Issue