#pragma once #ifndef PARSODUS_PARSER_{{name}}_H #define PARSODUS_PARSER_{{name}}_H #include #include #include #include #include #include /** * Represents the type of the symbol (both terminals and nonterminals) */ enum class {{name}}_Symbol : std::uint64_t { {{#symbols}} {{symbol}}, {{/symbols}} }; class SyntaxError : public std::runtime_error { public: SyntaxError(const char* c) : std::runtime_error(c) {} }; template class {{name}} { public: {{name}}() {} virtual ~{{name}}() {} /** * Parse it */ Value parse(); protected: /** * A token, consisting of a Symbol type (should be a terminal) and a Value */ struct Token { Token(const {{name}}_Symbol& sym, const Value& val) : symbol(sym), value(val) {} Token(const {{name}}_Symbol& sym, Value&& val) : symbol(sym), value(std::move(val)) {} {{name}}_Symbol symbol; Value value; }; /****************************************** * Functions to be supplied by the user * ******************************************/ /** * Handle an error * current is the current Token, one that has no action associated in the current state * expected is a listing of all terminals that do have an action * * By default throws an error */ virtual Value error(Token current, const std::vector<{{name}}_Symbol>& expected); /** * Get the next token from the lexer */ virtual Token lex() = 0; /** * Apply a reduction (a grammar rule in reverse) */ {{#rulenames}} virtual Value reduce_{{rname}}(std::deque subparts) = 0; {{/rulenames}} private: }; template <> class {{name}} { public: {{name}}() {} virtual ~{{name}}() {} /** * Parse it */ bool parse(); protected: /****************************************** * Functions to be supplied by the user * ******************************************/ /** * Get the next token from the lexer */ virtual {{name}}_Symbol lex() = 0; }; #define TABLE {{name}}___Table___{{name}} #define REDUCE_COUNT {{name}}___Num_Reduces___{{name}} // Not a static member because the table should not be replicated for different instantiations of the parser extern const std::uint64_t TABLE[{{num_states}}][{{num_symbols}}]; extern const unsigned char REDUCE_COUNT[{{num_rules}}]; enum Action { ERROR = 0, SHIFT = 1, REDUCE = 2, ACCEPT = 3 }; /********************************************* * Translate a Symbol to a readable string * *********************************************/ inline std::string to_string({{name}}_Symbol s) { switch (s) { {{#symbols}} case {{name}}_Symbol::{{symbol}}: return "{{symbol}}"; {{/symbols}} default: //should not happen return "?"; } } /************************** * Default error method * **************************/ template Value {{name}}::error(Token current, const std::vector<{{name}}_Symbol>& expected) { std::string msg = "Syntax Error: got " + to_string(current.symbol) + "\n Expected any of:"; for (auto& s : expected) { msg += "\n " + to_string(s); } throw SyntaxError(msg.c_str()); } /*************************** * Parser implementation * ***************************/ template Value {{name}}::parse() { std::stack valueStack; std::stack stateStack; stateStack.push(0); Token tok = lex(); while (true) { std::uint64_t act = TABLE[stateStack.top()][static_cast(tok.symbol)]; switch (act & 0x3) { case ERROR: { constexpr std::uint64_t verr = static_cast({{name}}_Symbol::V_error); std::vector<{{name}}_Symbol> expected; std::uint64_t top = stateStack.top(); for (std::uint64_t i = 0; i <= static_cast({{name}}_Symbol::{{last_terminal}}); i++) { if ((TABLE[top][i] & 0x3) != ERROR) expected.emplace_back(static_cast<{{name}}_Symbol>(i)); } Token report = Token{tok.symbol, std::move(tok.value)}; Value errorVal = error(report, expected); while (!valueStack.empty() && (TABLE[stateStack.top()][verr] & 0x3) == ERROR) { valueStack.pop(); stateStack.pop(); } if ((TABLE[stateStack.top()][verr] & 0x3) == ERROR) { throw SyntaxError("Syntax error: could not recover"); } stateStack.push(TABLE[stateStack.top()][verr] >> 2); valueStack.emplace(Token{ {{name}}_Symbol::V_error, std::move(errorVal)}); while (tok.symbol != {{name}}_Symbol::T_EOF && (TABLE[stateStack.top()][static_cast(tok.symbol)] & 0x3) == ERROR) { tok = lex(); } if ((TABLE[stateStack.top()][static_cast(tok.symbol)] & 0x3) == ERROR) { throw SyntaxError("Syntax error: could not recover"); } } break; case SHIFT: valueStack.emplace(std::move(tok)); stateStack.push(act >> 2); tok = lex(); break; case REDUCE: { std::uint64_t tmp = act >> 2; {{name}}_Symbol symbol = static_cast<{{name}}_Symbol>(tmp >> 31); std::uint32_t rule = tmp & ((1ull << 31) - 1); std::deque dq; for (unsigned char i = 0; i < REDUCE_COUNT[rule]; i++) { dq.emplace_front(std::move(valueStack.top())); valueStack.pop(); stateStack.pop(); } switch (rule) { {{#rules}} case {{index}}: {{#rname}}valueStack.emplace(symbol, reduce_{{rname}}(std::move(dq)));{{/rname}} {{^rname}}valueStack.emplace(symbol, reduce_{{index}}(std::move(dq)));{{/rname}} break; {{/rules}} default: assert(false); //There should be no such rule break; } stateStack.push(TABLE[stateStack.top()][static_cast(valueStack.top().symbol)] >> 2); } break; case ACCEPT: assert(stateStack.size() == 2); assert(valueStack.size() == 1); return std::move(valueStack.top().value); default: //IMPOSSIBLE break; } } } #undef REDUCE_COUNT #undef TABLE #endif /* PARSODUS_PARSER_{{name}}_H */