Error handler in the generated parser

This commit is contained in:
Robin Jadoul 2017-01-18 16:11:39 +01:00
parent 3bb2004fdc
commit 4b1b0ab9e0
3 changed files with 78 additions and 14 deletions

2
TODO
View File

@ -4,7 +4,6 @@
T> Parser selection without enum (Thomas) (flexible matching (case insensitive, ...)) T> Parser selection without enum (Thomas) (flexible matching (case insensitive, ...))
R> Generated code error handler
T> Parsodus regex parser in Lexesis T> Parsodus regex parser in Lexesis
-> Vrijgeven in libraryformaat: mogelijkheid verschillende tokens opvragen -> Vrijgeven in libraryformaat: mogelijkheid verschillende tokens opvragen
K> Parsodus Parsodus parser K> Parsodus Parsodus parser
@ -50,3 +49,4 @@ R> README
R> Precedence resolution in generator R> Precedence resolution in generator
R> man pages R> man pages
R> bash completion R> bash completion
R> Generated code error handler

View File

@ -41,29 +41,32 @@ namespace backends {
void generateParser(std::function<std::unique_ptr<std::ostream>(std::string)> getOstreamForFileName, std::string parserName, const Config& config) { void generateParser(std::function<std::unique_ptr<std::ostream>(std::string)> getOstreamForFileName, std::string parserName, const Config& config) {
assert(parserName.length()); assert(parserName.length());
Generator gen(config.grammar); Grammar grammar(config.grammar);
grammar.variables.emplace("error");
Generator gen(grammar);
lr::LRTable table(gen.generate()); lr::LRTable table(gen.generate());
std::map<const std::string, templ::TemplateContext> topLevel; std::map<const std::string, templ::TemplateContext> topLevel;
topLevel["name"] = parserName; topLevel["name"] = parserName;
topLevel["num_states"] = templ::make_string(std::to_string(table.act.size())); topLevel["num_states"] = templ::make_string(std::to_string(table.act.size()));
topLevel["num_rules"] = templ::make_string(std::to_string(config.grammar.rules.size())); topLevel["num_rules"] = templ::make_string(std::to_string(grammar.rules.size()));
topLevel["num_symbols"] = templ::make_string(std::to_string(config.grammar.terminals.size() + 1 + config.grammar.variables.size())); // + 1 for EOF topLevel["num_symbols"] = templ::make_string(std::to_string(grammar.terminals.size() + 1 + grammar.variables.size())); // + 1 for EOF
std::vector<templ::TemplateContext> symbols; std::vector<templ::TemplateContext> symbols;
symbols.push_back(templ::make_map({{"symbol", templ::make_string("T_EOF")}})); for (auto& s : grammar.terminals)
for (auto& s : config.grammar.terminals)
symbols.push_back(templ::make_map({{"symbol", templ::make_string("T_" + s)}})); symbols.push_back(templ::make_map({{"symbol", templ::make_string("T_" + s)}}));
for (auto& s : config.grammar.variables) symbols.push_back(templ::make_map({{"symbol", templ::make_string("T_EOF")}}));
for (auto& s : grammar.variables)
symbols.push_back(templ::make_map({{"symbol", templ::make_string("V_" + s)}})); symbols.push_back(templ::make_map({{"symbol", templ::make_string("V_" + s)}}));
topLevel["symbols"] = std::move(symbols); topLevel["symbols"] = std::move(symbols);
std::vector<templ::TemplateContext> rules; std::vector<templ::TemplateContext> rules;
std::set<std::string> rulenames; std::set<std::string> rulenames;
for (std::size_t i = 0; i < config.grammar.rules.size(); i++) { for (std::size_t i = 0; i < grammar.rules.size(); i++) {
std::map<const std::string, templ::TemplateContext> r; std::map<const std::string, templ::TemplateContext> r;
r["index"] = templ::make_string(std::to_string(i)); r["index"] = templ::make_string(std::to_string(i));
r["rhs_length"] = templ::make_string(std::to_string(config.grammar.rules[i]->tail.size())); r["rhs_length"] = templ::make_string(std::to_string(grammar.rules[i]->tail.size()));
if (false /* the rule has a name */) { if (false /* the rule has a name */) {
r["rname"] = templ::make_string(""); //The name r["rname"] = templ::make_string(""); //The name
rulenames.insert(""); //The name rulenames.insert(""); //The name
@ -80,7 +83,7 @@ namespace backends {
} }
topLevel["rulenames"] = templ::make_array(std::move(rulenamesT)); topLevel["rulenames"] = templ::make_array(std::move(rulenamesT));
std::set<std::string> terminals = config.grammar.terminals; std::set<std::string> terminals = grammar.terminals;
terminals.insert(util::EOF_PLACEHOLDER); terminals.insert(util::EOF_PLACEHOLDER);
std::vector<templ::TemplateContext> states(table.act.size()); std::vector<templ::TemplateContext> states(table.act.size());
for (std::size_t i = 0; i < table.act.size(); i++) { for (std::size_t i = 0; i < table.act.size(); i++) {
@ -99,7 +102,7 @@ namespace backends {
break; break;
case lr::Action::REDUCE: case lr::Action::REDUCE:
a = "REDUCE"; a = "REDUCE";
data = "static_cast<std::uint64_t>(" + parserName + "_Symbol::V_" + config.grammar.rules[tmp.second]->head + ") << 31 | " + std::to_string(tmp.second); data = "static_cast<std::uint64_t>(" + parserName + "_Symbol::V_" + grammar.rules[tmp.second]->head + ") << 31 | " + std::to_string(tmp.second);
break; break;
case lr::Action::ACCEPT: case lr::Action::ACCEPT:
a = "ACCEPT"; a = "ACCEPT";
@ -115,7 +118,7 @@ namespace backends {
} }
std::vector<templ::TemplateContext> gotos; std::vector<templ::TemplateContext> gotos;
for (const auto& nonterm : config.grammar.variables) { for (const auto& nonterm : grammar.variables) {
gotos.push_back(templ::make_string(std::to_string(table.goto_[i][nonterm]))); gotos.push_back(templ::make_string(std::to_string(table.goto_[i][nonterm])));
} }

View File

@ -6,6 +6,8 @@
#include <cstdint> #include <cstdint>
#include <deque> #include <deque>
#include <stack> #include <stack>
#include <stdexcept>
#include <vector>
/** /**
* Represents the type of the symbol (both terminals and nonterminals) * Represents the type of the symbol (both terminals and nonterminals)
@ -16,6 +18,11 @@ enum class {{name}}_Symbol : std::uint64_t {
{{/symbols}} {{/symbols}}
}; };
class SyntaxError : public std::runtime_error {
public:
SyntaxError(const char* c) : std::runtime_error(c) {}
};
template <typename Value> template <typename Value>
class {{name}} { class {{name}} {
public: public:
@ -43,6 +50,15 @@ class {{name}} {
* Functions to be supplied by the user * * Functions to be supplied by the user *
******************************************/ ******************************************/
/**
* Handle an error
* current is the current Token, one that has no action associated in the current state
* expected is a listing of all terminals that do have an action
*
* By default throws an error
*/
virtual Value error(Token current, const std::vector<{{name}}_Symbol>& expected);
/** /**
* Get the next token from the lexer * Get the next token from the lexer
*/ */
@ -72,6 +88,32 @@ enum Action {
}; };
/*********************************************
* Translate a Symbol to a readable string *
*********************************************/
std::string to_string({{name}}_Symbol s) {
switch (s) {
{{#symbols}}
case {{name}}_Symbol::{{symbol}}:
return "{{symbol}}";
{{/symbols}}
}
}
/**************************
* Default error method *
**************************/
template <typename Value>
Value {{name}}<Value>::error(Token current, const std::vector<{{name}}_Symbol>& expected) {
std::string msg = "Syntax Error: got " + to_string(current.symbol) + "\n Expected any of:";
for (auto& s : expected) {
msg += "\n " + to_string(s);
}
throw SyntaxError(msg.c_str());
}
/*************************** /***************************
* Parser implementation * * Parser implementation *
***************************/ ***************************/
@ -88,8 +130,27 @@ Value {{name}}<Value>::parse() {
switch (act & 0x3) { switch (act & 0x3) {
case ERROR: case ERROR:
//TODO: error handling {
assert(false); constexpr std::uint64_t verr = static_cast<std::uint64_t>({{name}}_Symbol::V_error);
std::vector<{{name}}_Symbol> expected;
{{#symbols}}
if ({{name}}_Symbol::{{symbol}} <= {{name}}_Symbol::T_EOF && (TABLE[stateStack.top()][static_cast<std::uint64_t>({{name}}_Symbol::{{symbol}})] & 0x3) != ERROR) {
expected.emplace_back({{name}}_Symbol::{{symbol}});
}
{{/symbols}}
Value errorVal = error(tok, expected);
while (!valueStack.empty() && (TABLE[stateStack.top()][verr] & 0x3) == ERROR) {
valueStack.pop();
stateStack.pop();
}
if ((TABLE[stateStack.top()][verr] & 0x3) == ERROR) {
throw SyntaxError("Syntax error: could not recover");
}
stateStack.push(TABLE[stateStack.top()][verr] >> 2);
valueStack.emplace(std::move(errorVal));
}
break; break;
case SHIFT: case SHIFT:
valueStack.emplace(std::move(tok)); valueStack.emplace(std::move(tok));