Error handler in the generated parser

This commit is contained in:
Robin Jadoul 2017-01-18 16:11:39 +01:00
parent 3bb2004fdc
commit 4b1b0ab9e0
3 changed files with 78 additions and 14 deletions

2
TODO
View File

@ -4,7 +4,6 @@
T> Parser selection without enum (Thomas) (flexible matching (case insensitive, ...))
R> Generated code error handler
T> Parsodus regex parser in Lexesis
-> Vrijgeven in libraryformaat: mogelijkheid verschillende tokens opvragen
K> Parsodus Parsodus parser
@ -50,3 +49,4 @@ R> README
R> Precedence resolution in generator
R> man pages
R> bash completion
R> Generated code error handler

View File

@ -41,29 +41,32 @@ namespace backends {
void generateParser(std::function<std::unique_ptr<std::ostream>(std::string)> getOstreamForFileName, std::string parserName, const Config& config) {
assert(parserName.length());
Generator gen(config.grammar);
Grammar grammar(config.grammar);
grammar.variables.emplace("error");
Generator gen(grammar);
lr::LRTable table(gen.generate());
std::map<const std::string, templ::TemplateContext> topLevel;
topLevel["name"] = parserName;
topLevel["num_states"] = templ::make_string(std::to_string(table.act.size()));
topLevel["num_rules"] = templ::make_string(std::to_string(config.grammar.rules.size()));
topLevel["num_symbols"] = templ::make_string(std::to_string(config.grammar.terminals.size() + 1 + config.grammar.variables.size())); // + 1 for EOF
topLevel["num_rules"] = templ::make_string(std::to_string(grammar.rules.size()));
topLevel["num_symbols"] = templ::make_string(std::to_string(grammar.terminals.size() + 1 + grammar.variables.size())); // + 1 for EOF
std::vector<templ::TemplateContext> symbols;
symbols.push_back(templ::make_map({{"symbol", templ::make_string("T_EOF")}}));
for (auto& s : config.grammar.terminals)
for (auto& s : grammar.terminals)
symbols.push_back(templ::make_map({{"symbol", templ::make_string("T_" + s)}}));
for (auto& s : config.grammar.variables)
symbols.push_back(templ::make_map({{"symbol", templ::make_string("T_EOF")}}));
for (auto& s : grammar.variables)
symbols.push_back(templ::make_map({{"symbol", templ::make_string("V_" + s)}}));
topLevel["symbols"] = std::move(symbols);
std::vector<templ::TemplateContext> rules;
std::set<std::string> rulenames;
for (std::size_t i = 0; i < config.grammar.rules.size(); i++) {
for (std::size_t i = 0; i < grammar.rules.size(); i++) {
std::map<const std::string, templ::TemplateContext> r;
r["index"] = templ::make_string(std::to_string(i));
r["rhs_length"] = templ::make_string(std::to_string(config.grammar.rules[i]->tail.size()));
r["rhs_length"] = templ::make_string(std::to_string(grammar.rules[i]->tail.size()));
if (false /* the rule has a name */) {
r["rname"] = templ::make_string(""); //The name
rulenames.insert(""); //The name
@ -80,7 +83,7 @@ namespace backends {
}
topLevel["rulenames"] = templ::make_array(std::move(rulenamesT));
std::set<std::string> terminals = config.grammar.terminals;
std::set<std::string> terminals = grammar.terminals;
terminals.insert(util::EOF_PLACEHOLDER);
std::vector<templ::TemplateContext> states(table.act.size());
for (std::size_t i = 0; i < table.act.size(); i++) {
@ -99,7 +102,7 @@ namespace backends {
break;
case lr::Action::REDUCE:
a = "REDUCE";
data = "static_cast<std::uint64_t>(" + parserName + "_Symbol::V_" + config.grammar.rules[tmp.second]->head + ") << 31 | " + std::to_string(tmp.second);
data = "static_cast<std::uint64_t>(" + parserName + "_Symbol::V_" + grammar.rules[tmp.second]->head + ") << 31 | " + std::to_string(tmp.second);
break;
case lr::Action::ACCEPT:
a = "ACCEPT";
@ -115,7 +118,7 @@ namespace backends {
}
std::vector<templ::TemplateContext> gotos;
for (const auto& nonterm : config.grammar.variables) {
for (const auto& nonterm : grammar.variables) {
gotos.push_back(templ::make_string(std::to_string(table.goto_[i][nonterm])));
}

View File

@ -6,6 +6,8 @@
#include <cstdint>
#include <deque>
#include <stack>
#include <stdexcept>
#include <vector>
/**
* Represents the type of the symbol (both terminals and nonterminals)
@ -16,6 +18,11 @@ enum class {{name}}_Symbol : std::uint64_t {
{{/symbols}}
};
class SyntaxError : public std::runtime_error {
public:
SyntaxError(const char* c) : std::runtime_error(c) {}
};
template <typename Value>
class {{name}} {
public:
@ -43,6 +50,15 @@ class {{name}} {
* Functions to be supplied by the user *
******************************************/
/**
* Handle an error
* current is the current Token, one that has no action associated in the current state
* expected is a listing of all terminals that do have an action
*
* By default throws an error
*/
virtual Value error(Token current, const std::vector<{{name}}_Symbol>& expected);
/**
* Get the next token from the lexer
*/
@ -72,6 +88,32 @@ enum Action {
};
/*********************************************
* Translate a Symbol to a readable string *
*********************************************/
std::string to_string({{name}}_Symbol s) {
switch (s) {
{{#symbols}}
case {{name}}_Symbol::{{symbol}}:
return "{{symbol}}";
{{/symbols}}
}
}
/**************************
* Default error method *
**************************/
template <typename Value>
Value {{name}}<Value>::error(Token current, const std::vector<{{name}}_Symbol>& expected) {
std::string msg = "Syntax Error: got " + to_string(current.symbol) + "\n Expected any of:";
for (auto& s : expected) {
msg += "\n " + to_string(s);
}
throw SyntaxError(msg.c_str());
}
/***************************
* Parser implementation *
***************************/
@ -88,8 +130,27 @@ Value {{name}}<Value>::parse() {
switch (act & 0x3) {
case ERROR:
//TODO: error handling
assert(false);
{
constexpr std::uint64_t verr = static_cast<std::uint64_t>({{name}}_Symbol::V_error);
std::vector<{{name}}_Symbol> expected;
{{#symbols}}
if ({{name}}_Symbol::{{symbol}} <= {{name}}_Symbol::T_EOF && (TABLE[stateStack.top()][static_cast<std::uint64_t>({{name}}_Symbol::{{symbol}})] & 0x3) != ERROR) {
expected.emplace_back({{name}}_Symbol::{{symbol}});
}
{{/symbols}}
Value errorVal = error(tok, expected);
while (!valueStack.empty() && (TABLE[stateStack.top()][verr] & 0x3) == ERROR) {
valueStack.pop();
stateStack.pop();
}
if ((TABLE[stateStack.top()][verr] & 0x3) == ERROR) {
throw SyntaxError("Syntax error: could not recover");
}
stateStack.push(TABLE[stateStack.top()][verr] >> 2);
valueStack.emplace(std::move(errorVal));
}
break;
case SHIFT:
valueStack.emplace(std::move(tok));