diff --git a/include/Lexesis/re.h b/include/Lexesis/re.h index 6625339..60875b8 100644 --- a/include/Lexesis/re.h +++ b/include/Lexesis/re.h @@ -78,7 +78,7 @@ namespace lxs { std::shared_ptr e, f; }; - std::shared_ptr parseRE(std::string& input); + std::shared_ptr parseRE(const std::string& input); class SyntaxError : public std::runtime_error { diff --git a/src/re.cpp b/src/re.cpp index 84318b4..86757fe 100644 --- a/src/re.cpp +++ b/src/re.cpp @@ -14,7 +14,7 @@ namespace lxs { State EmptyRE::toENFA(ENFA& enfa, State attach) { - enfa.numStates = std::max(attach + 1, enfa.numStates); + enfa.numStates = attach + 1; enfa.accepting.clear(); enfa.accepting.insert(attach + 1); return attach + 1; @@ -134,6 +134,7 @@ namespace lxs { case '.': case '\'': case '"': + case '-': break; case 'n': c = '\n'; break; @@ -159,23 +160,24 @@ namespace lxs { void sumREs(std::vector >& res) { - for (std::size_t step = 2; step < res.size(); step <<= 1) + for (std::size_t step = 1; step < res.size(); step <<= 1) { - for (std::size_t i = 0; i < res.size(); i += step) + for (std::size_t i = 0; i < res.size(); i += step * 2) { - if (i + step / 2 < res.size()) - res[i] = std::make_shared(res[i], res[i + step / 2]); + if (i + step < res.size()) + res[i] = std::make_shared(res[i], res[i + step]); } } } - std::shared_ptr parseCharacterClass(string& input, size_t& idx) { + std::shared_ptr parseCharacterClass(const string& input, size_t& idx) { if (idx >= input.size()) throw SyntaxError("Unclosed character class"); std::vector > chars; std::set used_chars; bool invert = false; + int last_char = -1; if (input[idx] == '^') { @@ -191,6 +193,7 @@ namespace lxs { { used_chars.insert(']'); idx++; + last_char = ']'; } if (idx >= input.size()) @@ -200,6 +203,7 @@ namespace lxs { { used_chars.insert('-'); idx++; + last_char = '-'; } if (idx >= input.size()) @@ -220,10 +224,13 @@ namespace lxs { } else { - for (int i = ((SingleRE*)(chars[chars.size() - 1].get()))->c + 1; i <= input[idx]; i++) + if (last_char == -1) + throw SyntaxError("Nothing to apply range to"); + for (int i = last_char + 1; i <= input[idx]; i++) { used_chars.insert((char) i); } + last_char = -1; } } else if (input[idx] == '\\') @@ -231,11 +238,13 @@ namespace lxs { idx++; if (idx >= input.size()) throw SyntaxError("Unclosed character classe"); - used_chars.insert(parseEscapeChar(input[idx])); + last_char = parseEscapeChar(input[idx]); + used_chars.insert(last_char); } else { used_chars.insert(input[idx]); + last_char = input[idx]; } } @@ -263,7 +272,7 @@ namespace lxs { return any[0]; } - std::shared_ptr parseRE(string& input, size_t& idx) + std::shared_ptr parseRE(const string& input, size_t& idx) { stack > stk; for (; idx < input.length(); idx++) @@ -358,7 +367,7 @@ namespace lxs { } - std::shared_ptr parseRE(string& input) + std::shared_ptr parseRE(const string& input) { size_t i = 0; std::shared_ptr res = parseRE(input, i);