General LR table generation, to be done: specific parts
This commit is contained in:
parent
b526d95750
commit
6df924a282
|
@ -5,6 +5,8 @@
|
||||||
#include "Parsodus/grammar.h"
|
#include "Parsodus/grammar.h"
|
||||||
#include "Parsodus/lrtables/table.h"
|
#include "Parsodus/lrtables/table.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cassert>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
|
|
||||||
namespace pds {
|
namespace pds {
|
||||||
|
@ -15,9 +17,20 @@ const std::string EOF_PLACEHOLDER = "$";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Base class for LR (and derivative) table generators (such as SLR and LALR)
|
* Base class for LR (and derivative) table generators (such as SLR and LALR)
|
||||||
* Parametrized on the type of item to be used in the configuration sets
|
* Parametrized on the type of itemset (configuration set) to be used
|
||||||
|
*
|
||||||
|
* An Itemset should support:
|
||||||
|
* - A constructor taking a single Rule, that makes this a starting rule
|
||||||
|
* - void close(const Grammar&); compute the closure
|
||||||
|
* - Itemset succ(const Grammar&, std::string) const; compute the successor of this set, over the given symbol
|
||||||
|
* - bool operator==(const Itemset&); are these two Itemsets equal
|
||||||
|
* - bool canMerge(const Itemset&) const; Can the given Itemset be merged into this one
|
||||||
|
* - void merge(const Itemset&); Merge the given Itemset into this one
|
||||||
|
* - Action action(const std::string&) const; Determine the action to be executed on given lookahead
|
||||||
|
* - bool empty() const; is this Itemset empty (== not useful)
|
||||||
|
* - std::set<std::size_t> getReduces(std::string) const; get all Rule indices where a reduce should happen with given lookahead (not necessarily a set, but iterable)
|
||||||
*/
|
*/
|
||||||
template <typename Item>
|
template <typename Itemset>
|
||||||
class Generator {
|
class Generator {
|
||||||
public:
|
public:
|
||||||
/**
|
/**
|
||||||
|
@ -35,167 +48,102 @@ class Generator {
|
||||||
*/
|
*/
|
||||||
LRTable generate();
|
LRTable generate();
|
||||||
|
|
||||||
protected:
|
|
||||||
/**
|
|
||||||
* Indicate whether this specific algorithm needs to have the First and Follow sets generated
|
|
||||||
*/
|
|
||||||
virtual bool needsFollowSet() = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Build the starting item to build all item sets from
|
|
||||||
*
|
|
||||||
* @param startrule The constructed extended starting rule
|
|
||||||
* @param eof The token used as end of file
|
|
||||||
*/
|
|
||||||
virtual Item initial_item(Rule startrule, std::string eof) = 0;
|
|
||||||
|
|
||||||
|
|
||||||
std::set<std::string> first(std::string s);
|
|
||||||
std::set<std::string> follow(std::string s);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/**
|
|
||||||
* Build the `First` set
|
|
||||||
*/
|
|
||||||
void buildFirst();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Build the `Follow` set
|
|
||||||
*/
|
|
||||||
void buildFollow();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute the closure of an item set
|
|
||||||
*/
|
|
||||||
std::set<Item> closure(const std::set<Item>& its);
|
|
||||||
|
|
||||||
Grammar m_gram;
|
Grammar m_gram;
|
||||||
Rule m_startrule;
|
Rule m_startrule;
|
||||||
std::map<std::string, std::set<std::string>> m_first;
|
|
||||||
std::map<std::string, std::set<std::string>> m_follow;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Item>
|
template <typename Itemset>
|
||||||
Generator<Item>::Generator(const std::string& start, const Grammar& g) : m_gram(g), m_startrule(Rule{EXTENDED_START, {start}}), m_first(), m_follow() {
|
Generator<Itemset>::Generator(const std::string& start, const Grammar& g) : m_gram(g), m_startrule(Rule{EXTENDED_START, {start}}) {
|
||||||
m_gram.terminals.insert(EOF_PLACEHOLDER); //End of file
|
m_gram.terminals.insert(EOF_PLACEHOLDER); //End of file
|
||||||
m_gram.variables.insert(EXTENDED_START); //modified start rule
|
m_gram.variables.insert(EXTENDED_START); //modified start rule
|
||||||
m_gram.rules[EXTENDED_START].insert(m_startrule);
|
m_gram.rules[EXTENDED_START].push_back(m_startrule);
|
||||||
|
|
||||||
if (needsFollowSet()) {
|
|
||||||
buildFirst();
|
|
||||||
buildFollow();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Item>
|
template <typename Itemset>
|
||||||
LRTable Generator<Item>::generate() {
|
LRTable Generator<Itemset>::generate() {
|
||||||
//TODO
|
//TODO: generate the table
|
||||||
LRTable table;
|
LRTable table;
|
||||||
std::vector<std::set<Item>> itemsets;
|
|
||||||
itemsets.emplace_back(closure({initial_item(m_startrule, EOF_PLACEHOLDER)}));
|
|
||||||
|
|
||||||
std::queue<Item> itemqueue;
|
//Start with size 1
|
||||||
itemqueue.push(itemsets[0]);
|
table.act.emplace_back();
|
||||||
while (!itemqueue.empty()) {
|
table.goto_.emplace_back();
|
||||||
std::set<Item> cur = std::move(itemqueue.front());
|
|
||||||
itemqueue.pop();
|
|
||||||
|
|
||||||
for (std::pair<std::string, std::set<Item>> succ : successors(cur)) {
|
std::vector<Itemset> itemsets;
|
||||||
//Add new itemset or merge
|
itemsets.emplace_back(Itemset(m_startrule));
|
||||||
|
itemsets[0].close(m_gram);
|
||||||
|
|
||||||
|
std::set<std::string> symbols = std::set_union(m_gram.terminals.begin(), m_gram.terminals.end(),
|
||||||
|
m_gram.variables.begin(), m_gram.variables.end());
|
||||||
|
|
||||||
|
std::queue<std::pair<std::size_t, Itemset>> q;
|
||||||
|
while (!q.empty()) {
|
||||||
|
auto& curP = q.front();
|
||||||
|
std::size_t curIdx = curP.first;
|
||||||
|
Itemset cur = curP.second;
|
||||||
|
q.pop();
|
||||||
|
|
||||||
|
for (const std::string& sym : symbols) {
|
||||||
|
Itemset s = cur.succ(m_gram, sym);
|
||||||
|
if (s.empty())
|
||||||
|
continue;
|
||||||
|
s.close(m_gram);
|
||||||
|
|
||||||
|
std::size_t idx;
|
||||||
|
for (idx = 0; idx < itemsets.size(); idx++) {
|
||||||
|
if (itemsets[idx] == s) {
|
||||||
|
break;
|
||||||
|
} else if (itemsets[idx].canMerge(s)) {
|
||||||
|
itemsets[idx].merge(s);
|
||||||
|
q.emplace(idx, std::move(s));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (idx == itemsets.size()) {
|
||||||
|
q.push(s);
|
||||||
|
itemsets.emplace_back(idx, std::move(s));
|
||||||
|
|
||||||
|
//Grow the table
|
||||||
|
table.act.emplace_back();
|
||||||
|
table.goto_.emplace_back();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m_gram.variables.count(sym)) {
|
||||||
|
table.goto_[curIdx][sym] = idx;
|
||||||
|
} else {
|
||||||
|
table.act[curIdx][sym] = std::make_pair(Action::SHIFT, idx);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
for (std::string term : m_gram.terminals) {
|
||||||
|
//Get reduces from the itemset, add them to the table, look for conflicts
|
||||||
|
for (std::size_t rule_applied : cur.getReduces(term)) {
|
||||||
|
if (rule_applied == m_gram.rules.size() - 1) { // The last added rule
|
||||||
|
// The extended start rule
|
||||||
|
assert(term == EOF_PLACEHOLDER);
|
||||||
|
table.act[curIdx][term] = std::make_pair(Action::ACCEPT, 0);
|
||||||
|
} else if (table.act[curIdx].count(term)) {
|
||||||
|
if (table.act[curIdx][term].first == Action::SHIFT) {
|
||||||
|
//Shift-Reduce conflict, rapport and resolve it (TODO)
|
||||||
|
} else if (table.act[curIdx][term].first == Action::REDUCE
|
||||||
|
&& table.act[curIdx][term].second != rule_applied) {
|
||||||
|
//Reduce-Reduce conflict, rapport it (TODO)
|
||||||
|
} else {
|
||||||
|
//Reduce using the same rule, no problem, NO-OP
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// No conflicts
|
||||||
|
table.act[curIdx][term] = std::make_pair(Action::REDUCE, rule_applied);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return table;
|
return table;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Item>
|
|
||||||
void Generator<Item>::buildFirst() {
|
|
||||||
for (std::string term : m_gram.terminals) {
|
|
||||||
m_first[term].insert(term);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
bool changes = true;
|
|
||||||
|
|
||||||
auto update = [&changes, this](std::string head, auto& s) {
|
|
||||||
for (std::string elem : s) {
|
|
||||||
if (!m_first[head].count(elem)) {
|
|
||||||
changes = true;
|
|
||||||
m_first[head].insert(s.begin(), s.end());
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
while (changes) {
|
|
||||||
changes = false;
|
|
||||||
|
|
||||||
for (const auto& p : m_gram.rules) {
|
|
||||||
const std::string& head = p.first;
|
|
||||||
const std::set<Rule>& rules = p.second;
|
|
||||||
for (const Rule& rule : rules) {
|
|
||||||
if (rule.tail.size() == 0) {
|
|
||||||
update(head, {""});
|
|
||||||
}
|
|
||||||
|
|
||||||
bool ended = false;
|
|
||||||
for (const std::string& replacement : rule.tail) {
|
|
||||||
if (m_first[replacement].count("")) {
|
|
||||||
std::set<std::string> tmp = m_first[replacement];
|
|
||||||
tmp.erase("");
|
|
||||||
update(head, tmp);
|
|
||||||
} else {
|
|
||||||
update(head, m_first[replacement]);
|
|
||||||
ended = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!ended) {
|
|
||||||
update(head, {""});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename Item>
|
|
||||||
void Generator<Item>::buildFollow() {
|
|
||||||
//EOF follow the added start rule.
|
|
||||||
m_follow[EXTENDED_START].insert(EOF_PLACEHOLDER);
|
|
||||||
|
|
||||||
bool changes = true;
|
|
||||||
|
|
||||||
auto update = [&changes, this](std::string head, auto s) {
|
|
||||||
s.erase("");
|
|
||||||
for (std::string elem : s) {
|
|
||||||
if (!m_follow[head].count(elem)) {
|
|
||||||
changes = true;
|
|
||||||
m_follow[head].insert(s.begin(), s.end());
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
while (changes) {
|
|
||||||
changes = false;
|
|
||||||
for (const auto& p : m_gram.rules) {
|
|
||||||
std::string& head = p.first;
|
|
||||||
for (const auto& rule : p.second) {
|
|
||||||
for (std::size_t i = 0; i < rule.tail.size(); i++) {
|
|
||||||
if (!m_gram.terminals.count(rule.tail[i])) {
|
|
||||||
if (i == rule.tail.size() - 1 || m_first[rule.tail[i + 1]].count("")) {
|
|
||||||
update(rule.tail[i], m_follow[head]);
|
|
||||||
}
|
|
||||||
if (i < rule.tail.size() - 1) {
|
|
||||||
update(rule.tail[i], m_first[rule.tail[i + 1]]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
} /* lr */
|
} /* lr */
|
||||||
} /* pdf */
|
} /* pdf */
|
||||||
|
|
Loading…
Reference in New Issue