Finish regex parsing

This commit is contained in:
Robin Jadoul 2016-04-25 20:39:38 +02:00
parent 90ff409281
commit e476591495
1 changed files with 115 additions and 6 deletions

View File

@ -119,7 +119,7 @@ namespace lxs {
stk.push(tp);
}
std::shared_ptr<RE> parseEscapeChar(char c) {
char parseEscapeChar(char c) {
switch (c)
{
case '\\':
@ -154,7 +154,113 @@ namespace lxs {
default:
throw SyntaxError(("Invalid escape sequence: \\" + std::string(1, c)).c_str());
}
return std::make_shared<SingleRE>(c);
return c;
}
void sumREs(std::vector<std::shared_ptr<RE> >& res)
{
for (std::size_t step = 2; step < res.size(); step <<= 1)
{
for (std::size_t i = 0; i < res.size(); i += step)
{
if (i + step / 2 < res.size())
res[i] = std::make_shared<PlusRE>(res[i], res[i + step / 2]);
}
}
}
std::shared_ptr<RE> parseCharacterClass(string& input, size_t& idx) {
if (idx >= input.size())
throw SyntaxError("Unclosed character class");
std::vector<std::shared_ptr<RE> > chars;
std::set<char> used_chars;
bool invert = false;
if (input[idx] == '^')
{
invert = true;
idx++;
}
if (idx >= input.size())
throw SyntaxError("Unclosed character class");
if (input[idx] == ']')
{
used_chars.insert(']');
idx++;
}
if (idx >= input.size())
throw SyntaxError("Unclosed character class");
if (input[idx] == '-')
{
used_chars.insert('-');
idx++;
}
if (idx >= input.size())
throw SyntaxError("Unclosed character class");
for (; idx < input.size() && input[idx] != ']'; idx++)
{
if (input[idx] == '-')
{
idx++;
if (idx >= input.size())
throw SyntaxError("Unclosed character class");
if (input[idx] == ']')
{
used_chars.insert('-');
}
else
{
for (int i = ((SingleRE*)(chars[chars.size() - 1].get()))->c + 1; i <= input[idx]; i++)
{
used_chars.insert((char) i);
}
}
}
else if (input[idx] == '\\')
{
idx++;
if (idx >= input.size())
throw SyntaxError("Unclosed character classe");
used_chars.insert(parseEscapeChar(input[idx]));
}
else
{
used_chars.insert(input[idx]);
}
}
if (idx >= input.size())
throw SyntaxError("Unclosed character class");
idx++; //Eat the ]
for (int i = 0; i < 256; i++)
{
if (invert ^ (used_chars.count((char) i) > 0))
chars.push_back(std::make_shared<SingleRE>((char) i));
}
sumREs(chars);
return chars[0];
}
std::shared_ptr<RE> dotChar() {
std::vector<std::shared_ptr<RE> > any;
for (int i = 0; i < 256; i++)
if ((char) i != '\n') //Dot matches anything except newlines
any.push_back(std::make_shared<SingleRE>((char) i));
sumREs(any);
return any[0];
}
std::shared_ptr<RE> parseRE(string& input, size_t& idx)
@ -170,16 +276,19 @@ namespace lxs {
if (idx >= input.length())
throw SyntaxError("Escape sequence at the end of the string");
else
throw SyntaxError(("invalid escape sequence: \\" + string(1, input[idx])).c_str());
stk.push(parseEscapeChar(input[idx++]));
stk.push(std::make_shared<SingleRE>(parseEscapeChar(input[idx])));
break;
case '[':
//TODO: parse character classes
stk.push(parseCharacterClass(input, ++idx));
break;
case '.':
//TODO: any character
for (int c = 0; c <= 256; c++)
{
stk.push(dotChar());
}
break;
case ']':
throw SyntaxError("Unopened ']'");