Parsodus/templates/c++/lr.h

244 lines
8.1 KiB
C
Raw Normal View History

2017-01-30 16:34:56 +01:00
/*
* This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
*
* 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
*
* 3. This notice may not be removed or altered from any source distribution.
*/
2016-12-31 16:16:26 +01:00
#pragma once
#ifndef PARSODUS_PARSER_{{name}}_H
#define PARSODUS_PARSER_{{name}}_H
2017-01-01 12:44:40 +01:00
#include <cassert>
2017-01-02 17:02:43 +01:00
#include <cstdint>
2016-12-31 16:16:26 +01:00
#include <deque>
2017-01-01 12:44:40 +01:00
#include <stack>
2017-01-18 16:11:39 +01:00
#include <stdexcept>
#include <vector>
2016-12-31 16:16:26 +01:00
2017-01-02 17:02:43 +01:00
/**
* Represents the type of the symbol (both terminals and nonterminals)
*/
enum class {{name}}_Symbol : std::uint64_t {
{{#symbols}}
{{symbol}},
{{/symbols}}
};
2017-01-18 16:11:39 +01:00
class SyntaxError : public std::runtime_error {
public:
SyntaxError(const char* c) : std::runtime_error(c) {}
};
2016-12-31 16:16:26 +01:00
template <typename Value>
class {{name}} {
public:
{{name}}() {}
virtual ~{{name}}() {}
/**
* Parse it
*/
Value parse();
protected:
/**
* A token, consisting of a Symbol type (should be a terminal) and a Value
*/
struct Token {
2017-01-02 19:00:07 +01:00
Token(const {{name}}_Symbol& sym, const Value& val) : symbol(sym), value(val) {}
2017-01-26 12:51:43 +01:00
Token(const {{name}}_Symbol& sym, Value&& val) : symbol(sym), value(std::move(val)) {}
2017-01-02 17:02:43 +01:00
{{name}}_Symbol symbol;
Value value;
2016-12-31 16:16:26 +01:00
};
/******************************************
* Functions to be supplied by the user *
******************************************/
2017-01-18 16:11:39 +01:00
/**
* Handle an error
* current is the current Token, one that has no action associated in the current state
* expected is a listing of all terminals that do have an action
*
* By default throws an error
*/
virtual Value error(Token current, const std::vector<{{name}}_Symbol>& expected);
2016-12-31 16:16:26 +01:00
/**
* Get the next token from the lexer
*/
virtual Token lex() = 0;
/**
* Apply a reduction (a grammar rule in reverse)
*/
{{#rulenames}}
virtual Value reduce_{{rname}}(std::deque<Token> subparts) = 0;
{{/rulenames}}
2016-12-31 16:16:26 +01:00
private:
};
template <>
class {{name}}<bool> {
public:
{{name}}() {}
virtual ~{{name}}() {}
/**
* Parse it
*/
bool parse();
protected:
/******************************************
* Functions to be supplied by the user *
******************************************/
/**
* Get the next token from the lexer
*/
virtual {{name}}_Symbol lex() = 0;
};
2017-01-01 12:44:40 +01:00
#define TABLE {{name}}___Table___{{name}}
#define REDUCE_COUNT {{name}}___Num_Reduces___{{name}}
2016-12-31 16:16:26 +01:00
// Not a static member because the table should not be replicated for different instantiations of the parser
2017-01-02 11:45:50 +01:00
extern const std::uint64_t TABLE[{{num_states}}][{{num_symbols}}];
extern const unsigned char REDUCE_COUNT[{{num_rules}}];
2017-01-01 12:44:40 +01:00
enum Action {
ERROR = 0,
SHIFT = 1,
REDUCE = 2,
ACCEPT = 3
};
2016-12-31 16:16:26 +01:00
2017-01-18 16:11:39 +01:00
/*********************************************
* Translate a Symbol to a readable string *
*********************************************/
inline std::string to_string({{name}}_Symbol s) {
2017-01-18 16:11:39 +01:00
switch (s) {
{{#symbols}}
case {{name}}_Symbol::{{symbol}}:
return "{{symbol}}";
{{/symbols}}
}
}
/**************************
* Default error method *
**************************/
template <typename Value>
Value {{name}}<Value>::error(Token current, const std::vector<{{name}}_Symbol>& expected) {
std::string msg = "Syntax Error: got " + to_string(current.symbol) + "\n Expected any of:";
for (auto& s : expected) {
msg += "\n " + to_string(s);
}
throw SyntaxError(msg.c_str());
}
2016-12-31 16:16:26 +01:00
/***************************
* Parser implementation *
***************************/
template <typename Value>
2017-01-02 17:02:43 +01:00
Value {{name}}<Value>::parse() {
2017-01-01 12:44:40 +01:00
std::stack<Token> valueStack;
std::stack<std::uint64_t> stateStack;
stateStack.push(0);
Token tok = lex();
while (true) {
2017-01-02 19:00:07 +01:00
std::uint64_t act = TABLE[stateStack.top()][static_cast<std::uint64_t>(tok.symbol)];
2017-01-01 12:44:40 +01:00
switch (act & 0x3) {
case ERROR:
2017-01-18 16:11:39 +01:00
{
constexpr std::uint64_t verr = static_cast<std::uint64_t>({{name}}_Symbol::V_error);
std::vector<{{name}}_Symbol> expected;
std::uint64_t top = stateStack.top();
for (std::uint64_t i = 0; i <= static_cast<std::uint64_t>({{name}}_Symbol::{{last_terminal}}); i++) {
if ((TABLE[top][i] & 0x3) != ERROR)
expected.emplace_back(static_cast<{{name}}_Symbol>(i));
2017-01-18 16:11:39 +01:00
}
2017-01-26 12:51:43 +01:00
Token report = Token{tok.symbol, std::move(tok.value)};
2017-01-26 12:53:55 +01:00
Value errorVal = error(std::move(report), expected);
2017-01-18 16:11:39 +01:00
2017-01-28 15:52:57 +01:00
while (!valueStack.empty() && !TABLE[stateStack.top()][verr]) {
2017-01-18 16:11:39 +01:00
valueStack.pop();
stateStack.pop();
}
2017-01-28 15:52:57 +01:00
if (!TABLE[stateStack.top()][verr]) {
2017-01-18 16:11:39 +01:00
throw SyntaxError("Syntax error: could not recover");
}
stateStack.push(TABLE[stateStack.top()][verr] >> 2);
valueStack.emplace(Token{ {{name}}_Symbol::V_error, std::move(errorVal)});
while (tok.symbol != {{name}}_Symbol::T_EOF && (TABLE[stateStack.top()][static_cast<std::uint64_t>(tok.symbol)] & 0x3) == ERROR) {
tok = lex();
}
if ((TABLE[stateStack.top()][static_cast<std::uint64_t>(tok.symbol)] & 0x3) == ERROR) {
throw SyntaxError("Syntax error: could not recover");
}
2017-01-18 16:11:39 +01:00
}
2017-01-01 12:44:40 +01:00
break;
case SHIFT:
valueStack.emplace(std::move(tok));
stateStack.push(act >> 2);
tok = lex();
break;
case REDUCE:
{
std::uint64_t tmp = act >> 2;
2017-01-02 17:02:43 +01:00
{{name}}_Symbol symbol = static_cast<{{name}}_Symbol>(tmp >> 31);
2017-01-02 19:00:07 +01:00
std::uint32_t rule = tmp & ((1ull << 31) - 1);
2017-01-01 12:44:40 +01:00
std::deque<Token> dq;
2017-01-02 17:02:43 +01:00
for (unsigned char i = 0; i < REDUCE_COUNT[rule]; i++) {
2017-01-01 12:44:40 +01:00
dq.emplace_front(std::move(valueStack.top()));
valueStack.pop();
stateStack.pop();
}
switch (rule) {
{{#rules}}
case {{index}}:
2017-01-02 19:00:07 +01:00
{{#rname}}valueStack.emplace(symbol, reduce_{{rname}}(std::move(dq)));{{/rname}}
{{^rname}}valueStack.emplace(symbol, reduce_{{index}}(std::move(dq)));{{/rname}}
2017-01-01 12:44:40 +01:00
break;
{{/rules}}
default:
assert(false); //There should be no such rule
break;
}
2017-01-02 19:00:07 +01:00
stateStack.push(TABLE[stateStack.top()][static_cast<std::uint64_t>(valueStack.top().symbol)] >> 2);
2017-01-01 12:44:40 +01:00
}
break;
case ACCEPT:
assert(stateStack.size() == 2);
assert(valueStack.size() == 1);
2017-01-26 12:51:43 +01:00
return std::move(valueStack.top().value);
2017-01-01 12:44:40 +01:00
default:
//IMPOSSIBLE
break;
}
}
2016-12-31 16:16:26 +01:00
}
2017-01-01 12:44:40 +01:00
#undef REDUCE_COUNT
#undef TABLE
2016-12-31 16:16:26 +01:00
#endif /* PARSODUS_PARSER_{{name}}_H */