LR(0) table generator

This commit is contained in:
Robin Jadoul 2016-12-18 15:10:54 +01:00
parent 6df924a282
commit 91fd08ba3e
9 changed files with 216 additions and 30 deletions

View File

@ -2,6 +2,7 @@
#ifndef PARSODUS_GRAMMAR_H
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>
@ -22,6 +23,9 @@ namespace pds {
return tail < other.tail;
}
}
Rule() : head(""), tail() {}
Rule(const std::string& h, const std::vector<std::string>& t) : head(h), tail(t) {}
};
/**
@ -32,7 +36,7 @@ namespace pds {
std::string start; ///< the starting variable
std::set<std::string> variables; ///< the variables
std::set<std::string> terminals; ///< the terminals
std::vector<Rule> rules; ///< the replacement rules
std::vector<std::shared_ptr<Rule>> rules; ///< the replacement rules
};
}

View File

@ -0,0 +1,23 @@
#pragma once
#ifndef LRTABLES_LR0ITEM_H_3RNST1YA
#define LRTABLES_LR0ITEM_H_3RNST1YA
#include "Parsodus/grammar.h"
#include <memory>
namespace pds {
namespace lr {
struct LR0Item {
std::shared_ptr<Rule> rule;
std::size_t dotIdx;
bool operator<(const LR0Item& rhs) const; ///< Useful to keep it in a set/map
bool operator==(const LR0Item& rhs) const;
};
} /* lr */
} /* pds */
#endif /* LRTABLES_LR0ITEM_H_3RNST1YA */

View File

@ -0,0 +1,39 @@
#pragma once
#ifndef LRTABLES_LR0ITEMSET_H_HTSWOGFB
#define LRTABLES_LR0ITEMSET_H_HTSWOGFB
#include "Parsodus/lrtables/generator.h"
#include "Parsodus/lrtables/LR0Item.h"
#include <memory>
namespace pds {
namespace lr {
/**
* An LR(0) itemset, @see Generator for details on the public methods
*/
class LR0Itemset {
public:
LR0Itemset();
LR0Itemset(std::shared_ptr<Rule> start);
void close(const Grammar& g);
LR0Itemset succ(std::string sym) const;
bool operator==(const LR0Itemset& rhs) const;
bool canMerge(const LR0Itemset& rhs) const;
void merge(const LR0Itemset& rhs);
bool empty() const;
std::set<std::size_t> getReduces(const Grammar& g, std::string lookahead) const;
void print() const; ///TODO remove me
private:
std::set<LR0Item> m_items;
};
} /* lr */
} /* pds */
#endif /* LRTABLES_LR0ITEMSET_H_HTSWOGFB */

View File

@ -7,6 +7,7 @@
#include <algorithm>
#include <cassert>
#include <memory>
#include <queue>
namespace pds {
@ -22,13 +23,12 @@ const std::string EOF_PLACEHOLDER = "$";
* An Itemset should support:
* - A constructor taking a single Rule, that makes this a starting rule
* - void close(const Grammar&); compute the closure
* - Itemset succ(const Grammar&, std::string) const; compute the successor of this set, over the given symbol
* - Itemset succ(std::string) const; compute the successor of this set, over the given symbol
* - bool operator==(const Itemset&); are these two Itemsets equal
* - bool canMerge(const Itemset&) const; Can the given Itemset be merged into this one
* - void merge(const Itemset&); Merge the given Itemset into this one
* - Action action(const std::string&) const; Determine the action to be executed on given lookahead
* - bool empty() const; is this Itemset empty (== not useful)
* - std::set<std::size_t> getReduces(std::string) const; get all Rule indices where a reduce should happen with given lookahead (not necessarily a set, but iterable)
* - std::set<std::size_t> getReduces(const Grammar&, std::string) const; get all Rule indices where a reduce should happen with given lookahead (not necessarily a set, but iterable)
*/
template <typename Itemset>
class Generator {
@ -39,7 +39,7 @@ class Generator {
* @param start The start symbol for the grammar
* @param g The grammar to translate
*/
Generator(const std::string& start, const Grammar& g);
Generator(const Grammar& g);
/**
* Generate an LRTable based on given grammar
@ -50,19 +50,18 @@ class Generator {
private:
Grammar m_gram;
Rule m_startrule;
std::shared_ptr<Rule> m_startrule;
};
template <typename Itemset>
Generator<Itemset>::Generator(const std::string& start, const Grammar& g) : m_gram(g), m_startrule(Rule{EXTENDED_START, {start}}) {
Generator<Itemset>::Generator(const Grammar& g) : m_gram(g), m_startrule(std::make_shared<Rule>(EXTENDED_START, std::vector<std::string>{g.start})) {
m_gram.terminals.insert(EOF_PLACEHOLDER); //End of file
m_gram.variables.insert(EXTENDED_START); //modified start rule
m_gram.rules[EXTENDED_START].push_back(m_startrule);
m_gram.rules.push_back(m_startrule);
}
template <typename Itemset>
LRTable Generator<Itemset>::generate() {
//TODO: generate the table
LRTable table;
//Start with size 1
@ -73,10 +72,13 @@ LRTable Generator<Itemset>::generate() {
itemsets.emplace_back(Itemset(m_startrule));
itemsets[0].close(m_gram);
std::set<std::string> symbols = std::set_union(m_gram.terminals.begin(), m_gram.terminals.end(),
m_gram.variables.begin(), m_gram.variables.end());
std::set<std::string> symbols;
std::set_union(m_gram.terminals.begin(), m_gram.terminals.end(),
m_gram.variables.begin(), m_gram.variables.end(),
std::inserter(symbols, symbols.end()));
std::queue<std::pair<std::size_t, Itemset>> q;
q.emplace(0, itemsets[0]);
while (!q.empty()) {
auto& curP = q.front();
std::size_t curIdx = curP.first;
@ -84,7 +86,7 @@ LRTable Generator<Itemset>::generate() {
q.pop();
for (const std::string& sym : symbols) {
Itemset s = cur.succ(m_gram, sym);
Itemset s = cur.succ(sym);
if (s.empty())
continue;
s.close(m_gram);
@ -100,8 +102,8 @@ LRTable Generator<Itemset>::generate() {
}
}
if (idx == itemsets.size()) {
q.push(s);
itemsets.emplace_back(idx, std::move(s));
q.emplace(idx, s);
itemsets.emplace_back(std::move(s));
//Grow the table
table.act.emplace_back();
@ -112,24 +114,25 @@ LRTable Generator<Itemset>::generate() {
table.goto_[curIdx][sym] = idx;
} else {
table.act[curIdx][sym] = std::make_pair(Action::SHIFT, idx);
}
}
for (std::string term : m_gram.terminals) {
//Get reduces from the itemset, add them to the table, look for conflicts
for (std::size_t rule_applied : cur.getReduces(term)) {
for (std::size_t rule_applied : cur.getReduces(m_gram, term)) {
if (rule_applied == m_gram.rules.size() - 1) { // The last added rule
// The extended start rule
assert(term == EOF_PLACEHOLDER);
table.act[curIdx][term] = std::make_pair(Action::ACCEPT, 0);
if (term == EOF_PLACEHOLDER)
table.act[curIdx][term] = std::make_pair(Action::ACCEPT, 0);
} else if (table.act[curIdx].count(term)) {
if (table.act[curIdx][term].first == Action::SHIFT) {
//Shift-Reduce conflict, rapport and resolve it (TODO)
throw "shift-reduce";
} else if (table.act[curIdx][term].first == Action::REDUCE
&& table.act[curIdx][term].second != rule_applied) {
//Reduce-Reduce conflict, rapport it (TODO)
throw 1;
} else {
//Reduce using the same rule, no problem, NO-OP
}

View File

@ -14,8 +14,10 @@ else()
endif()
include_directories("${CMAKE_CURRENT_BINARY_DIR}")
# add_library(Parsodus-tables
# )
add_library(Parsodus-tables
lrtables/LR0Item.cpp
lrtables/LR0Itemset.cpp
)
add_library(Parsodus-backends
backends/cpp.cpp
@ -32,7 +34,7 @@ add_executable(Parsodus
main.cpp
)
target_link_libraries(Parsodus
#Parsodus-tables
Parsodus-tables
Parsodus-backends
pds
mstch::mstch)

View File

@ -76,24 +76,24 @@ namespace pds {
token = lex.nextToken();
if(token.type != ParsodusLexer::ARROW)
throw InputParserException("No arrow found after '"+ current_head+"', but found '" + token.content + "' instead");
Rule rule;
rule.head = current_head;
std::shared_ptr<Rule> rule = std::make_shared<Rule>();
rule->head = current_head;
bool parsing_head = true;
while(parsing_head) {
token = lex.nextToken();
switch(token.type) {
case ParsodusLexer::VARIABLE:
rule.tail.push_back(token.content);
rule->tail.push_back(token.content);
break;
case ParsodusLexer::TERMINAL:
rule.tail.push_back(token.content);
rule->tail.push_back(token.content);
break;
case ParsodusLexer::SEMICOLON:
parsing_head = false;
case ParsodusLexer::PIPE:
rule.tail.shrink_to_fit();
config.grammar.rules.push_back(rule);
rule.tail.clear();
rule->tail.shrink_to_fit();
config.grammar.rules.push_back(std::make_shared<Rule>(*rule));
rule->tail.clear();
break;
default:
throw InputParserException("Expecting to find a variable, terminal, pipe or a semicolon, but found '" + token.content + "' instead");

19
src/lrtables/LR0Item.cpp Normal file
View File

@ -0,0 +1,19 @@
#include "Parsodus/lrtables/LR0Item.h"
namespace pds {
namespace lr {
bool LR0Item::operator<(const LR0Item& rhs) const {
if (dotIdx != rhs.dotIdx) {
return dotIdx < rhs.dotIdx;
} else {
return rule < rhs.rule;
}
}
bool LR0Item::operator==(const LR0Item& rhs) const {
return dotIdx == rhs.dotIdx && rule == rhs.rule;
}
} /* lr */
} /* pds */

View File

@ -0,0 +1,79 @@
#include "Parsodus/lrtables/LR0Itemset.h"
namespace pds {
namespace lr {
LR0Itemset::LR0Itemset() {
}
LR0Itemset::LR0Itemset(std::shared_ptr<Rule> start) {
m_items.emplace(LR0Item{start, 0});
}
void LR0Itemset::close(const Grammar& g) {
bool changes = true;
std::vector<LR0Item> todo;
std::set<std::string> added;
while (changes) {
changes = false;
std::set<LR0Item> toAdd;
for (const LR0Item& i : m_items) {
if (i.dotIdx < i.rule->tail.size()) {
std::string& sym = i.rule->tail[i.dotIdx];
if (g.variables.count(sym) && !added.count(sym)) {
added.insert(sym);
changes = true;
for (const auto& rule : g.rules) {
if (rule->head == sym) {
toAdd.insert(LR0Item{rule, 0});
}
}
}
}
}
m_items.insert(toAdd.begin(), toAdd.end());
}
}
LR0Itemset LR0Itemset::succ(std::string sym) const {
LR0Itemset sc;
for (auto& item : m_items) {
if (item.dotIdx < item.rule->tail.size()) {
if (item.rule->tail[item.dotIdx] == sym) {
sc.m_items.insert(LR0Item{item.rule, item.dotIdx + 1});
}
}
}
return sc;
}
bool LR0Itemset::operator==(const LR0Itemset& rhs) const {
return m_items == rhs.m_items;
}
bool LR0Itemset::canMerge(const LR0Itemset&) const {
return false;
}
void LR0Itemset::merge(const LR0Itemset&) {
//NO-OP
}
bool LR0Itemset::empty() const {
return m_items.empty();
}
std::set<std::size_t> LR0Itemset::getReduces(const Grammar& g, std::string) const {
std::set<std::size_t> res;
for (auto& item : m_items) {
if (item.dotIdx >= item.rule->tail.size()) {
res.insert(std::find(g.rules.begin(), g.rules.end(), item.rule) - g.rules.begin());
}
}
return res;
}
} /* lr */
} /* pds */

View File

@ -3,6 +3,9 @@
#include "optparse.h"
#include "Parsodus/inputparser.h"
#include "Parsodus/lrtables/generator.h"
#include "Parsodus/lrtables/LR0Itemset.h"
int main(int argc, char** argv) {
optparse::OptionParser parser = optparse::OptionParser().description("Parsodus").usage("Parsodus [-d <outputdir>] [-l <language>] [-n <lexername>] <inputfile.pds>");
@ -37,11 +40,25 @@ int main(int argc, char** argv) {
std::cout << "Variable: " << a << std::endl;
std::cout << "Rules: " << std::endl;
for(auto a: config.grammar.rules) {
std::cout << "\t" << a.head << " -> ";
for(auto c: a.tail) {
std::cout << "\t" << a->head << " -> ";
for(auto c: a->tail) {
std::cout << c << " ";
}
std::cout << std::endl;
}
std::vector<std::string> names = {"ERROR", "SHIFT", "REDUCE", "ACCEPT"};
pds::lr::Generator<pds::lr::LR0Itemset> g(config.grammar);
auto tbl = g.generate();
for (std::size_t i = 0; i < tbl.act.size(); i++) {
std::cout << "State " << i << std::endl;
std::cout << " Action:" << std::endl;
for (auto& p : tbl.act[i]) {
std::cout << " " << p.first << ": " << names[static_cast<int>(p.second.first)] << " " << p.second.second << std::endl;
}
std::cout << " Goto:" << std::endl;
for (auto& p : tbl.goto_[i]) {
std::cout << " " << p.first << ": " << p.second << std::endl;;
}
}
}