General LR table generation, to be done: specific parts
This commit is contained in:
parent
b526d95750
commit
6df924a282
|
@ -5,6 +5,8 @@
|
|||
#include "Parsodus/grammar.h"
|
||||
#include "Parsodus/lrtables/table.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <queue>
|
||||
|
||||
namespace pds {
|
||||
|
@ -15,9 +17,20 @@ const std::string EOF_PLACEHOLDER = "$";
|
|||
|
||||
/**
|
||||
* Base class for LR (and derivative) table generators (such as SLR and LALR)
|
||||
* Parametrized on the type of item to be used in the configuration sets
|
||||
* Parametrized on the type of itemset (configuration set) to be used
|
||||
*
|
||||
* An Itemset should support:
|
||||
* - A constructor taking a single Rule, that makes this a starting rule
|
||||
* - void close(const Grammar&); compute the closure
|
||||
* - Itemset succ(const Grammar&, std::string) const; compute the successor of this set, over the given symbol
|
||||
* - bool operator==(const Itemset&); are these two Itemsets equal
|
||||
* - bool canMerge(const Itemset&) const; Can the given Itemset be merged into this one
|
||||
* - void merge(const Itemset&); Merge the given Itemset into this one
|
||||
* - Action action(const std::string&) const; Determine the action to be executed on given lookahead
|
||||
* - bool empty() const; is this Itemset empty (== not useful)
|
||||
* - std::set<std::size_t> getReduces(std::string) const; get all Rule indices where a reduce should happen with given lookahead (not necessarily a set, but iterable)
|
||||
*/
|
||||
template <typename Item>
|
||||
template <typename Itemset>
|
||||
class Generator {
|
||||
public:
|
||||
/**
|
||||
|
@ -35,167 +48,102 @@ class Generator {
|
|||
*/
|
||||
LRTable generate();
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Indicate whether this specific algorithm needs to have the First and Follow sets generated
|
||||
*/
|
||||
virtual bool needsFollowSet() = 0;
|
||||
|
||||
/**
|
||||
* Build the starting item to build all item sets from
|
||||
*
|
||||
* @param startrule The constructed extended starting rule
|
||||
* @param eof The token used as end of file
|
||||
*/
|
||||
virtual Item initial_item(Rule startrule, std::string eof) = 0;
|
||||
|
||||
|
||||
std::set<std::string> first(std::string s);
|
||||
std::set<std::string> follow(std::string s);
|
||||
|
||||
private:
|
||||
/**
|
||||
* Build the `First` set
|
||||
*/
|
||||
void buildFirst();
|
||||
|
||||
/**
|
||||
* Build the `Follow` set
|
||||
*/
|
||||
void buildFollow();
|
||||
|
||||
/**
|
||||
* Compute the closure of an item set
|
||||
*/
|
||||
std::set<Item> closure(const std::set<Item>& its);
|
||||
|
||||
Grammar m_gram;
|
||||
Rule m_startrule;
|
||||
std::map<std::string, std::set<std::string>> m_first;
|
||||
std::map<std::string, std::set<std::string>> m_follow;
|
||||
};
|
||||
|
||||
template <typename Item>
|
||||
Generator<Item>::Generator(const std::string& start, const Grammar& g) : m_gram(g), m_startrule(Rule{EXTENDED_START, {start}}), m_first(), m_follow() {
|
||||
template <typename Itemset>
|
||||
Generator<Itemset>::Generator(const std::string& start, const Grammar& g) : m_gram(g), m_startrule(Rule{EXTENDED_START, {start}}) {
|
||||
m_gram.terminals.insert(EOF_PLACEHOLDER); //End of file
|
||||
m_gram.variables.insert(EXTENDED_START); //modified start rule
|
||||
m_gram.rules[EXTENDED_START].insert(m_startrule);
|
||||
|
||||
if (needsFollowSet()) {
|
||||
buildFirst();
|
||||
buildFollow();
|
||||
}
|
||||
m_gram.rules[EXTENDED_START].push_back(m_startrule);
|
||||
}
|
||||
|
||||
template <typename Item>
|
||||
LRTable Generator<Item>::generate() {
|
||||
//TODO
|
||||
template <typename Itemset>
|
||||
LRTable Generator<Itemset>::generate() {
|
||||
//TODO: generate the table
|
||||
LRTable table;
|
||||
std::vector<std::set<Item>> itemsets;
|
||||
itemsets.emplace_back(closure({initial_item(m_startrule, EOF_PLACEHOLDER)}));
|
||||
|
||||
std::queue<Item> itemqueue;
|
||||
itemqueue.push(itemsets[0]);
|
||||
while (!itemqueue.empty()) {
|
||||
std::set<Item> cur = std::move(itemqueue.front());
|
||||
itemqueue.pop();
|
||||
//Start with size 1
|
||||
table.act.emplace_back();
|
||||
table.goto_.emplace_back();
|
||||
|
||||
std::vector<Itemset> itemsets;
|
||||
itemsets.emplace_back(Itemset(m_startrule));
|
||||
itemsets[0].close(m_gram);
|
||||
|
||||
std::set<std::string> symbols = std::set_union(m_gram.terminals.begin(), m_gram.terminals.end(),
|
||||
m_gram.variables.begin(), m_gram.variables.end());
|
||||
|
||||
std::queue<std::pair<std::size_t, Itemset>> q;
|
||||
while (!q.empty()) {
|
||||
auto& curP = q.front();
|
||||
std::size_t curIdx = curP.first;
|
||||
Itemset cur = curP.second;
|
||||
q.pop();
|
||||
|
||||
for (const std::string& sym : symbols) {
|
||||
Itemset s = cur.succ(m_gram, sym);
|
||||
if (s.empty())
|
||||
continue;
|
||||
s.close(m_gram);
|
||||
|
||||
std::size_t idx;
|
||||
for (idx = 0; idx < itemsets.size(); idx++) {
|
||||
if (itemsets[idx] == s) {
|
||||
break;
|
||||
} else if (itemsets[idx].canMerge(s)) {
|
||||
itemsets[idx].merge(s);
|
||||
q.emplace(idx, std::move(s));
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (idx == itemsets.size()) {
|
||||
q.push(s);
|
||||
itemsets.emplace_back(idx, std::move(s));
|
||||
|
||||
//Grow the table
|
||||
table.act.emplace_back();
|
||||
table.goto_.emplace_back();
|
||||
}
|
||||
|
||||
if (m_gram.variables.count(sym)) {
|
||||
table.goto_[curIdx][sym] = idx;
|
||||
} else {
|
||||
table.act[curIdx][sym] = std::make_pair(Action::SHIFT, idx);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for (std::pair<std::string, std::set<Item>> succ : successors(cur)) {
|
||||
//Add new itemset or merge
|
||||
for (std::string term : m_gram.terminals) {
|
||||
//Get reduces from the itemset, add them to the table, look for conflicts
|
||||
for (std::size_t rule_applied : cur.getReduces(term)) {
|
||||
if (rule_applied == m_gram.rules.size() - 1) { // The last added rule
|
||||
// The extended start rule
|
||||
assert(term == EOF_PLACEHOLDER);
|
||||
table.act[curIdx][term] = std::make_pair(Action::ACCEPT, 0);
|
||||
} else if (table.act[curIdx].count(term)) {
|
||||
if (table.act[curIdx][term].first == Action::SHIFT) {
|
||||
//Shift-Reduce conflict, rapport and resolve it (TODO)
|
||||
} else if (table.act[curIdx][term].first == Action::REDUCE
|
||||
&& table.act[curIdx][term].second != rule_applied) {
|
||||
//Reduce-Reduce conflict, rapport it (TODO)
|
||||
} else {
|
||||
//Reduce using the same rule, no problem, NO-OP
|
||||
}
|
||||
} else {
|
||||
// No conflicts
|
||||
table.act[curIdx][term] = std::make_pair(Action::REDUCE, rule_applied);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return table;
|
||||
}
|
||||
|
||||
template <typename Item>
|
||||
void Generator<Item>::buildFirst() {
|
||||
for (std::string term : m_gram.terminals) {
|
||||
m_first[term].insert(term);
|
||||
}
|
||||
|
||||
|
||||
bool changes = true;
|
||||
|
||||
auto update = [&changes, this](std::string head, auto& s) {
|
||||
for (std::string elem : s) {
|
||||
if (!m_first[head].count(elem)) {
|
||||
changes = true;
|
||||
m_first[head].insert(s.begin(), s.end());
|
||||
return;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
while (changes) {
|
||||
changes = false;
|
||||
|
||||
for (const auto& p : m_gram.rules) {
|
||||
const std::string& head = p.first;
|
||||
const std::set<Rule>& rules = p.second;
|
||||
for (const Rule& rule : rules) {
|
||||
if (rule.tail.size() == 0) {
|
||||
update(head, {""});
|
||||
}
|
||||
|
||||
bool ended = false;
|
||||
for (const std::string& replacement : rule.tail) {
|
||||
if (m_first[replacement].count("")) {
|
||||
std::set<std::string> tmp = m_first[replacement];
|
||||
tmp.erase("");
|
||||
update(head, tmp);
|
||||
} else {
|
||||
update(head, m_first[replacement]);
|
||||
ended = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!ended) {
|
||||
update(head, {""});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Item>
|
||||
void Generator<Item>::buildFollow() {
|
||||
//EOF follow the added start rule.
|
||||
m_follow[EXTENDED_START].insert(EOF_PLACEHOLDER);
|
||||
|
||||
bool changes = true;
|
||||
|
||||
auto update = [&changes, this](std::string head, auto s) {
|
||||
s.erase("");
|
||||
for (std::string elem : s) {
|
||||
if (!m_follow[head].count(elem)) {
|
||||
changes = true;
|
||||
m_follow[head].insert(s.begin(), s.end());
|
||||
return;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
while (changes) {
|
||||
changes = false;
|
||||
for (const auto& p : m_gram.rules) {
|
||||
std::string& head = p.first;
|
||||
for (const auto& rule : p.second) {
|
||||
for (std::size_t i = 0; i < rule.tail.size(); i++) {
|
||||
if (!m_gram.terminals.count(rule.tail[i])) {
|
||||
if (i == rule.tail.size() - 1 || m_first[rule.tail[i + 1]].count("")) {
|
||||
update(rule.tail[i], m_follow[head]);
|
||||
}
|
||||
if (i < rule.tail.size() - 1) {
|
||||
update(rule.tail[i], m_first[rule.tail[i + 1]]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} /* lr */
|
||||
} /* pdf */
|
||||
|
|
Loading…
Reference in New Issue