Finish regex parsing
This commit is contained in:
parent
90ff409281
commit
e476591495
121
src/re.cpp
121
src/re.cpp
|
@ -119,7 +119,7 @@ namespace lxs {
|
|||
stk.push(tp);
|
||||
}
|
||||
|
||||
std::shared_ptr<RE> parseEscapeChar(char c) {
|
||||
char parseEscapeChar(char c) {
|
||||
switch (c)
|
||||
{
|
||||
case '\\':
|
||||
|
@ -154,7 +154,113 @@ namespace lxs {
|
|||
default:
|
||||
throw SyntaxError(("Invalid escape sequence: \\" + std::string(1, c)).c_str());
|
||||
}
|
||||
return std::make_shared<SingleRE>(c);
|
||||
return c;
|
||||
}
|
||||
|
||||
void sumREs(std::vector<std::shared_ptr<RE> >& res)
|
||||
{
|
||||
for (std::size_t step = 2; step < res.size(); step <<= 1)
|
||||
{
|
||||
for (std::size_t i = 0; i < res.size(); i += step)
|
||||
{
|
||||
if (i + step / 2 < res.size())
|
||||
res[i] = std::make_shared<PlusRE>(res[i], res[i + step / 2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<RE> parseCharacterClass(string& input, size_t& idx) {
|
||||
if (idx >= input.size())
|
||||
throw SyntaxError("Unclosed character class");
|
||||
std::vector<std::shared_ptr<RE> > chars;
|
||||
std::set<char> used_chars;
|
||||
|
||||
bool invert = false;
|
||||
|
||||
if (input[idx] == '^')
|
||||
{
|
||||
invert = true;
|
||||
idx++;
|
||||
}
|
||||
|
||||
if (idx >= input.size())
|
||||
throw SyntaxError("Unclosed character class");
|
||||
|
||||
|
||||
if (input[idx] == ']')
|
||||
{
|
||||
used_chars.insert(']');
|
||||
idx++;
|
||||
}
|
||||
|
||||
if (idx >= input.size())
|
||||
throw SyntaxError("Unclosed character class");
|
||||
|
||||
if (input[idx] == '-')
|
||||
{
|
||||
used_chars.insert('-');
|
||||
idx++;
|
||||
}
|
||||
|
||||
if (idx >= input.size())
|
||||
throw SyntaxError("Unclosed character class");
|
||||
|
||||
for (; idx < input.size() && input[idx] != ']'; idx++)
|
||||
{
|
||||
if (input[idx] == '-')
|
||||
{
|
||||
idx++;
|
||||
|
||||
if (idx >= input.size())
|
||||
throw SyntaxError("Unclosed character class");
|
||||
|
||||
if (input[idx] == ']')
|
||||
{
|
||||
used_chars.insert('-');
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = ((SingleRE*)(chars[chars.size() - 1].get()))->c + 1; i <= input[idx]; i++)
|
||||
{
|
||||
used_chars.insert((char) i);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (input[idx] == '\\')
|
||||
{
|
||||
idx++;
|
||||
if (idx >= input.size())
|
||||
throw SyntaxError("Unclosed character classe");
|
||||
used_chars.insert(parseEscapeChar(input[idx]));
|
||||
}
|
||||
else
|
||||
{
|
||||
used_chars.insert(input[idx]);
|
||||
}
|
||||
}
|
||||
|
||||
if (idx >= input.size())
|
||||
throw SyntaxError("Unclosed character class");
|
||||
idx++; //Eat the ]
|
||||
|
||||
for (int i = 0; i < 256; i++)
|
||||
{
|
||||
if (invert ^ (used_chars.count((char) i) > 0))
|
||||
chars.push_back(std::make_shared<SingleRE>((char) i));
|
||||
}
|
||||
|
||||
sumREs(chars);
|
||||
|
||||
return chars[0];
|
||||
}
|
||||
|
||||
std::shared_ptr<RE> dotChar() {
|
||||
std::vector<std::shared_ptr<RE> > any;
|
||||
for (int i = 0; i < 256; i++)
|
||||
if ((char) i != '\n') //Dot matches anything except newlines
|
||||
any.push_back(std::make_shared<SingleRE>((char) i));
|
||||
sumREs(any);
|
||||
return any[0];
|
||||
}
|
||||
|
||||
std::shared_ptr<RE> parseRE(string& input, size_t& idx)
|
||||
|
@ -170,16 +276,19 @@ namespace lxs {
|
|||
if (idx >= input.length())
|
||||
throw SyntaxError("Escape sequence at the end of the string");
|
||||
else
|
||||
throw SyntaxError(("invalid escape sequence: \\" + string(1, input[idx])).c_str());
|
||||
stk.push(parseEscapeChar(input[idx++]));
|
||||
stk.push(std::make_shared<SingleRE>(parseEscapeChar(input[idx])));
|
||||
break;
|
||||
|
||||
case '[':
|
||||
//TODO: parse character classes
|
||||
stk.push(parseCharacterClass(input, ++idx));
|
||||
break;
|
||||
|
||||
case '.':
|
||||
//TODO: any character
|
||||
for (int c = 0; c <= 256; c++)
|
||||
{
|
||||
stk.push(dotChar());
|
||||
}
|
||||
break;
|
||||
|
||||
case ']':
|
||||
throw SyntaxError("Unopened ']'");
|
||||
|
|
Loading…
Reference in New Issue