Give byte offset on regex parse errors
This commit is contained in:
parent
e425e9ff81
commit
652ee18461
|
@ -3,6 +3,7 @@
|
||||||
#define RE_H
|
#define RE_H
|
||||||
|
|
||||||
#include "Lexesis/automata.h"
|
#include "Lexesis/automata.h"
|
||||||
|
#include "RegexLexer.h"
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
@ -154,7 +155,7 @@ namespace lxs {
|
||||||
class SyntaxError : public std::runtime_error
|
class SyntaxError : public std::runtime_error
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
SyntaxError(const char* w) : std::runtime_error(w) {}
|
SyntaxError(RegexLexer& lex, const std::string w) : std::runtime_error((std::to_string(lex.getByteOffset()) + ": " + w)) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
} //namespace lxs
|
} //namespace lxs
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h @ONLY)
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h @ONLY)
|
||||||
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
|
||||||
add_library(Lexesis-backends
|
add_library(Lexesis-backends
|
||||||
backends/cpp.cpp
|
backends/cpp.cpp
|
||||||
|
|
20
src/re.cpp
20
src/re.cpp
|
@ -270,7 +270,7 @@ namespace lxs {
|
||||||
|
|
||||||
case RegexLexer::STAR:
|
case RegexLexer::STAR:
|
||||||
if (stk.empty())
|
if (stk.empty())
|
||||||
throw SyntaxError("Cannot apply kleene star to empty regex");
|
throw SyntaxError(lex, "Cannot apply kleene star to empty regex");
|
||||||
n = std::make_shared<StarRE>(stk.top());
|
n = std::make_shared<StarRE>(stk.top());
|
||||||
stk.pop();
|
stk.pop();
|
||||||
stk.push(n);
|
stk.push(n);
|
||||||
|
@ -278,7 +278,7 @@ namespace lxs {
|
||||||
|
|
||||||
case RegexLexer::PLUS:
|
case RegexLexer::PLUS:
|
||||||
if (stk.empty())
|
if (stk.empty())
|
||||||
throw SyntaxError("Cannot apply kleene plus to empty regex");
|
throw SyntaxError(lex, "Cannot apply kleene plus to empty regex");
|
||||||
n = stk.top();
|
n = stk.top();
|
||||||
stk.pop();
|
stk.pop();
|
||||||
n = std::make_shared<ConcatRE>(n, std::make_shared<StarRE>(n));
|
n = std::make_shared<ConcatRE>(n, std::make_shared<StarRE>(n));
|
||||||
|
@ -287,7 +287,7 @@ namespace lxs {
|
||||||
|
|
||||||
case RegexLexer::QUESTIONMARK:
|
case RegexLexer::QUESTIONMARK:
|
||||||
if (stk.empty())
|
if (stk.empty())
|
||||||
throw SyntaxError("Cannot apply '?' to empty regex");
|
throw SyntaxError(lex, "Cannot apply '?' to empty regex");
|
||||||
n = std::make_shared<PlusRE>(stk.top(), std::make_shared<EpsilonRE>());
|
n = std::make_shared<PlusRE>(stk.top(), std::make_shared<EpsilonRE>());
|
||||||
stk.pop();
|
stk.pop();
|
||||||
stk.push(n);
|
stk.push(n);
|
||||||
|
@ -295,7 +295,7 @@ namespace lxs {
|
||||||
|
|
||||||
case RegexLexer::PIPE:
|
case RegexLexer::PIPE:
|
||||||
if (stk.empty())
|
if (stk.empty())
|
||||||
throw SyntaxError("Invalid regex: nothing to the left of '|'");
|
throw SyntaxError(lex, "Invalid regex: nothing to the left of '|'");
|
||||||
if (stk.size() > 1)
|
if (stk.size() > 1)
|
||||||
compactStack(stk), compress(stk);
|
compactStack(stk), compress(stk);
|
||||||
n = std::make_shared<PlusRE>(stk.top(), parseRE(lex, exit_by_closed_paren, inside_parens));
|
n = std::make_shared<PlusRE>(stk.top(), parseRE(lex, exit_by_closed_paren, inside_parens));
|
||||||
|
@ -307,28 +307,28 @@ namespace lxs {
|
||||||
else if (stk.size() == 2)
|
else if (stk.size() == 2)
|
||||||
return compress(stk), stk.top();
|
return compress(stk), stk.top();
|
||||||
else
|
else
|
||||||
throw SyntaxError("Invalid regex");
|
throw SyntaxError(lex, "Invalid regex");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case RegexLexer::LPAREN:
|
case RegexLexer::LPAREN:
|
||||||
n = parseRE(lex, exit_by_closed_paren, true);
|
n = parseRE(lex, exit_by_closed_paren, true);
|
||||||
if (!exit_by_closed_paren) {
|
if (!exit_by_closed_paren) {
|
||||||
throw SyntaxError("Unclosed parenthesis");
|
throw SyntaxError(lex, "Unclosed parenthesis");
|
||||||
}
|
}
|
||||||
stk.push(n);
|
stk.push(n);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case RegexLexer::RPAREN:
|
case RegexLexer::RPAREN:
|
||||||
if (!inside_parens)
|
if (!inside_parens)
|
||||||
throw SyntaxError("Unopened parenthesis");
|
throw SyntaxError(lex, "Unopened parenthesis");
|
||||||
|
|
||||||
exit_by_closed_paren = true;
|
exit_by_closed_paren = true;
|
||||||
if (stk.size() == 1)
|
if (stk.size() == 1)
|
||||||
return stk.top();
|
return stk.top();
|
||||||
else if (stk.size() == 2)
|
else if (stk.size() == 2)
|
||||||
return compress(stk), stk.top();
|
return compress(stk), stk.top();
|
||||||
throw SyntaxError("Could not parse regex, nothing inside parentheses");
|
throw SyntaxError(lex, "Could not parse regex, nothing inside parentheses");
|
||||||
|
|
||||||
case RegexLexer::CHAR:
|
case RegexLexer::CHAR:
|
||||||
stk.push(std::make_shared<SingleRE>(tok.content[0]));
|
stk.push(std::make_shared<SingleRE>(tok.content[0]));
|
||||||
|
@ -339,7 +339,7 @@ namespace lxs {
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case RegexLexer::ERROR:
|
case RegexLexer::ERROR:
|
||||||
throw SyntaxError((std::to_string(lex.getByteOffset()) + ": Error on character: " + tok.content).c_str());
|
throw SyntaxError(lex, "Error on character: " + tok.content);
|
||||||
|
|
||||||
case RegexLexer::ignore: case RegexLexer::nonmatching:
|
case RegexLexer::ignore: case RegexLexer::nonmatching:
|
||||||
//Just ignore these
|
//Just ignore these
|
||||||
|
@ -352,7 +352,7 @@ namespace lxs {
|
||||||
return stk.top();
|
return stk.top();
|
||||||
else if (stk.size() == 2)
|
else if (stk.size() == 2)
|
||||||
return compress(stk), stk.top();
|
return compress(stk), stk.top();
|
||||||
throw SyntaxError("Could not parse regex");
|
throw SyntaxError(lex, "Could not parse regex");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue