From 4b1b0ab9e05d49d3b78da26746aba53811a71a2f Mon Sep 17 00:00:00 2001 From: Robin Jadoul Date: Wed, 18 Jan 2017 16:11:39 +0100 Subject: [PATCH] Error handler in the generated parser --- TODO | 2 +- include/Parsodus/backends/cppLR.h | 25 ++++++------ templates/c++/lr.h | 65 ++++++++++++++++++++++++++++++- 3 files changed, 78 insertions(+), 14 deletions(-) diff --git a/TODO b/TODO index 1691eb8..ba4ffd8 100644 --- a/TODO +++ b/TODO @@ -4,7 +4,6 @@ T> Parser selection without enum (Thomas) (flexible matching (case insensitive, ...)) -R> Generated code error handler T> Parsodus regex parser in Lexesis -> Vrijgeven in libraryformaat: mogelijkheid verschillende tokens opvragen K> Parsodus Parsodus parser @@ -50,3 +49,4 @@ R> README R> Precedence resolution in generator R> man pages R> bash completion +R> Generated code error handler diff --git a/include/Parsodus/backends/cppLR.h b/include/Parsodus/backends/cppLR.h index bcbb46f..3ac016a 100644 --- a/include/Parsodus/backends/cppLR.h +++ b/include/Parsodus/backends/cppLR.h @@ -41,29 +41,32 @@ namespace backends { void generateParser(std::function(std::string)> getOstreamForFileName, std::string parserName, const Config& config) { assert(parserName.length()); - Generator gen(config.grammar); + Grammar grammar(config.grammar); + grammar.variables.emplace("error"); + Generator gen(grammar); lr::LRTable table(gen.generate()); std::map topLevel; topLevel["name"] = parserName; topLevel["num_states"] = templ::make_string(std::to_string(table.act.size())); - topLevel["num_rules"] = templ::make_string(std::to_string(config.grammar.rules.size())); - topLevel["num_symbols"] = templ::make_string(std::to_string(config.grammar.terminals.size() + 1 + config.grammar.variables.size())); // + 1 for EOF + topLevel["num_rules"] = templ::make_string(std::to_string(grammar.rules.size())); + topLevel["num_symbols"] = templ::make_string(std::to_string(grammar.terminals.size() + 1 + grammar.variables.size())); // + 1 for EOF std::vector symbols; - symbols.push_back(templ::make_map({{"symbol", templ::make_string("T_EOF")}})); - for (auto& s : config.grammar.terminals) + for (auto& s : grammar.terminals) symbols.push_back(templ::make_map({{"symbol", templ::make_string("T_" + s)}})); - for (auto& s : config.grammar.variables) + symbols.push_back(templ::make_map({{"symbol", templ::make_string("T_EOF")}})); + + for (auto& s : grammar.variables) symbols.push_back(templ::make_map({{"symbol", templ::make_string("V_" + s)}})); topLevel["symbols"] = std::move(symbols); std::vector rules; std::set rulenames; - for (std::size_t i = 0; i < config.grammar.rules.size(); i++) { + for (std::size_t i = 0; i < grammar.rules.size(); i++) { std::map r; r["index"] = templ::make_string(std::to_string(i)); - r["rhs_length"] = templ::make_string(std::to_string(config.grammar.rules[i]->tail.size())); + r["rhs_length"] = templ::make_string(std::to_string(grammar.rules[i]->tail.size())); if (false /* the rule has a name */) { r["rname"] = templ::make_string(""); //The name rulenames.insert(""); //The name @@ -80,7 +83,7 @@ namespace backends { } topLevel["rulenames"] = templ::make_array(std::move(rulenamesT)); - std::set terminals = config.grammar.terminals; + std::set terminals = grammar.terminals; terminals.insert(util::EOF_PLACEHOLDER); std::vector states(table.act.size()); for (std::size_t i = 0; i < table.act.size(); i++) { @@ -99,7 +102,7 @@ namespace backends { break; case lr::Action::REDUCE: a = "REDUCE"; - data = "static_cast(" + parserName + "_Symbol::V_" + config.grammar.rules[tmp.second]->head + ") << 31 | " + std::to_string(tmp.second); + data = "static_cast(" + parserName + "_Symbol::V_" + grammar.rules[tmp.second]->head + ") << 31 | " + std::to_string(tmp.second); break; case lr::Action::ACCEPT: a = "ACCEPT"; @@ -115,7 +118,7 @@ namespace backends { } std::vector gotos; - for (const auto& nonterm : config.grammar.variables) { + for (const auto& nonterm : grammar.variables) { gotos.push_back(templ::make_string(std::to_string(table.goto_[i][nonterm]))); } diff --git a/templates/c++/lr.h b/templates/c++/lr.h index 1d7685f..b96158b 100644 --- a/templates/c++/lr.h +++ b/templates/c++/lr.h @@ -6,6 +6,8 @@ #include #include #include +#include +#include /** * Represents the type of the symbol (both terminals and nonterminals) @@ -16,6 +18,11 @@ enum class {{name}}_Symbol : std::uint64_t { {{/symbols}} }; +class SyntaxError : public std::runtime_error { +public: + SyntaxError(const char* c) : std::runtime_error(c) {} +}; + template class {{name}} { public: @@ -43,6 +50,15 @@ class {{name}} { * Functions to be supplied by the user * ******************************************/ + /** + * Handle an error + * current is the current Token, one that has no action associated in the current state + * expected is a listing of all terminals that do have an action + * + * By default throws an error + */ + virtual Value error(Token current, const std::vector<{{name}}_Symbol>& expected); + /** * Get the next token from the lexer */ @@ -72,6 +88,32 @@ enum Action { }; +/********************************************* +* Translate a Symbol to a readable string * +*********************************************/ +std::string to_string({{name}}_Symbol s) { + switch (s) { + {{#symbols}} + case {{name}}_Symbol::{{symbol}}: + return "{{symbol}}"; + {{/symbols}} + } +} + + +/************************** +* Default error method * +**************************/ +template +Value {{name}}::error(Token current, const std::vector<{{name}}_Symbol>& expected) { + std::string msg = "Syntax Error: got " + to_string(current.symbol) + "\n Expected any of:"; + for (auto& s : expected) { + msg += "\n " + to_string(s); + } + throw SyntaxError(msg.c_str()); +} + + /*************************** * Parser implementation * ***************************/ @@ -88,8 +130,27 @@ Value {{name}}::parse() { switch (act & 0x3) { case ERROR: - //TODO: error handling - assert(false); + { + constexpr std::uint64_t verr = static_cast({{name}}_Symbol::V_error); + std::vector<{{name}}_Symbol> expected; + {{#symbols}} + if ({{name}}_Symbol::{{symbol}} <= {{name}}_Symbol::T_EOF && (TABLE[stateStack.top()][static_cast({{name}}_Symbol::{{symbol}})] & 0x3) != ERROR) { + expected.emplace_back({{name}}_Symbol::{{symbol}}); + } + {{/symbols}} + Value errorVal = error(tok, expected); + + while (!valueStack.empty() && (TABLE[stateStack.top()][verr] & 0x3) == ERROR) { + valueStack.pop(); + stateStack.pop(); + } + if ((TABLE[stateStack.top()][verr] & 0x3) == ERROR) { + throw SyntaxError("Syntax error: could not recover"); + } + + stateStack.push(TABLE[stateStack.top()][verr] >> 2); + valueStack.emplace(std::move(errorVal)); + } break; case SHIFT: valueStack.emplace(std::move(tok));