LR(0) table generator

This commit is contained in:
Robin Jadoul 2016-12-18 15:10:54 +01:00
parent 6df924a282
commit 91fd08ba3e
9 changed files with 216 additions and 30 deletions

View File

@ -2,6 +2,7 @@
#ifndef PARSODUS_GRAMMAR_H #ifndef PARSODUS_GRAMMAR_H
#include <map> #include <map>
#include <memory>
#include <set> #include <set>
#include <string> #include <string>
#include <vector> #include <vector>
@ -22,6 +23,9 @@ namespace pds {
return tail < other.tail; return tail < other.tail;
} }
} }
Rule() : head(""), tail() {}
Rule(const std::string& h, const std::vector<std::string>& t) : head(h), tail(t) {}
}; };
/** /**
@ -32,7 +36,7 @@ namespace pds {
std::string start; ///< the starting variable std::string start; ///< the starting variable
std::set<std::string> variables; ///< the variables std::set<std::string> variables; ///< the variables
std::set<std::string> terminals; ///< the terminals std::set<std::string> terminals; ///< the terminals
std::vector<Rule> rules; ///< the replacement rules std::vector<std::shared_ptr<Rule>> rules; ///< the replacement rules
}; };
} }

View File

@ -0,0 +1,23 @@
#pragma once
#ifndef LRTABLES_LR0ITEM_H_3RNST1YA
#define LRTABLES_LR0ITEM_H_3RNST1YA
#include "Parsodus/grammar.h"
#include <memory>
namespace pds {
namespace lr {
struct LR0Item {
std::shared_ptr<Rule> rule;
std::size_t dotIdx;
bool operator<(const LR0Item& rhs) const; ///< Useful to keep it in a set/map
bool operator==(const LR0Item& rhs) const;
};
} /* lr */
} /* pds */
#endif /* LRTABLES_LR0ITEM_H_3RNST1YA */

View File

@ -0,0 +1,39 @@
#pragma once
#ifndef LRTABLES_LR0ITEMSET_H_HTSWOGFB
#define LRTABLES_LR0ITEMSET_H_HTSWOGFB
#include "Parsodus/lrtables/generator.h"
#include "Parsodus/lrtables/LR0Item.h"
#include <memory>
namespace pds {
namespace lr {
/**
* An LR(0) itemset, @see Generator for details on the public methods
*/
class LR0Itemset {
public:
LR0Itemset();
LR0Itemset(std::shared_ptr<Rule> start);
void close(const Grammar& g);
LR0Itemset succ(std::string sym) const;
bool operator==(const LR0Itemset& rhs) const;
bool canMerge(const LR0Itemset& rhs) const;
void merge(const LR0Itemset& rhs);
bool empty() const;
std::set<std::size_t> getReduces(const Grammar& g, std::string lookahead) const;
void print() const; ///TODO remove me
private:
std::set<LR0Item> m_items;
};
} /* lr */
} /* pds */
#endif /* LRTABLES_LR0ITEMSET_H_HTSWOGFB */

View File

@ -7,6 +7,7 @@
#include <algorithm> #include <algorithm>
#include <cassert> #include <cassert>
#include <memory>
#include <queue> #include <queue>
namespace pds { namespace pds {
@ -22,13 +23,12 @@ const std::string EOF_PLACEHOLDER = "$";
* An Itemset should support: * An Itemset should support:
* - A constructor taking a single Rule, that makes this a starting rule * - A constructor taking a single Rule, that makes this a starting rule
* - void close(const Grammar&); compute the closure * - void close(const Grammar&); compute the closure
* - Itemset succ(const Grammar&, std::string) const; compute the successor of this set, over the given symbol * - Itemset succ(std::string) const; compute the successor of this set, over the given symbol
* - bool operator==(const Itemset&); are these two Itemsets equal * - bool operator==(const Itemset&); are these two Itemsets equal
* - bool canMerge(const Itemset&) const; Can the given Itemset be merged into this one * - bool canMerge(const Itemset&) const; Can the given Itemset be merged into this one
* - void merge(const Itemset&); Merge the given Itemset into this one * - void merge(const Itemset&); Merge the given Itemset into this one
* - Action action(const std::string&) const; Determine the action to be executed on given lookahead
* - bool empty() const; is this Itemset empty (== not useful) * - bool empty() const; is this Itemset empty (== not useful)
* - std::set<std::size_t> getReduces(std::string) const; get all Rule indices where a reduce should happen with given lookahead (not necessarily a set, but iterable) * - std::set<std::size_t> getReduces(const Grammar&, std::string) const; get all Rule indices where a reduce should happen with given lookahead (not necessarily a set, but iterable)
*/ */
template <typename Itemset> template <typename Itemset>
class Generator { class Generator {
@ -39,7 +39,7 @@ class Generator {
* @param start The start symbol for the grammar * @param start The start symbol for the grammar
* @param g The grammar to translate * @param g The grammar to translate
*/ */
Generator(const std::string& start, const Grammar& g); Generator(const Grammar& g);
/** /**
* Generate an LRTable based on given grammar * Generate an LRTable based on given grammar
@ -50,19 +50,18 @@ class Generator {
private: private:
Grammar m_gram; Grammar m_gram;
Rule m_startrule; std::shared_ptr<Rule> m_startrule;
}; };
template <typename Itemset> template <typename Itemset>
Generator<Itemset>::Generator(const std::string& start, const Grammar& g) : m_gram(g), m_startrule(Rule{EXTENDED_START, {start}}) { Generator<Itemset>::Generator(const Grammar& g) : m_gram(g), m_startrule(std::make_shared<Rule>(EXTENDED_START, std::vector<std::string>{g.start})) {
m_gram.terminals.insert(EOF_PLACEHOLDER); //End of file m_gram.terminals.insert(EOF_PLACEHOLDER); //End of file
m_gram.variables.insert(EXTENDED_START); //modified start rule m_gram.variables.insert(EXTENDED_START); //modified start rule
m_gram.rules[EXTENDED_START].push_back(m_startrule); m_gram.rules.push_back(m_startrule);
} }
template <typename Itemset> template <typename Itemset>
LRTable Generator<Itemset>::generate() { LRTable Generator<Itemset>::generate() {
//TODO: generate the table
LRTable table; LRTable table;
//Start with size 1 //Start with size 1
@ -73,10 +72,13 @@ LRTable Generator<Itemset>::generate() {
itemsets.emplace_back(Itemset(m_startrule)); itemsets.emplace_back(Itemset(m_startrule));
itemsets[0].close(m_gram); itemsets[0].close(m_gram);
std::set<std::string> symbols = std::set_union(m_gram.terminals.begin(), m_gram.terminals.end(), std::set<std::string> symbols;
m_gram.variables.begin(), m_gram.variables.end()); std::set_union(m_gram.terminals.begin(), m_gram.terminals.end(),
m_gram.variables.begin(), m_gram.variables.end(),
std::inserter(symbols, symbols.end()));
std::queue<std::pair<std::size_t, Itemset>> q; std::queue<std::pair<std::size_t, Itemset>> q;
q.emplace(0, itemsets[0]);
while (!q.empty()) { while (!q.empty()) {
auto& curP = q.front(); auto& curP = q.front();
std::size_t curIdx = curP.first; std::size_t curIdx = curP.first;
@ -84,7 +86,7 @@ LRTable Generator<Itemset>::generate() {
q.pop(); q.pop();
for (const std::string& sym : symbols) { for (const std::string& sym : symbols) {
Itemset s = cur.succ(m_gram, sym); Itemset s = cur.succ(sym);
if (s.empty()) if (s.empty())
continue; continue;
s.close(m_gram); s.close(m_gram);
@ -100,8 +102,8 @@ LRTable Generator<Itemset>::generate() {
} }
} }
if (idx == itemsets.size()) { if (idx == itemsets.size()) {
q.push(s); q.emplace(idx, s);
itemsets.emplace_back(idx, std::move(s)); itemsets.emplace_back(std::move(s));
//Grow the table //Grow the table
table.act.emplace_back(); table.act.emplace_back();
@ -112,24 +114,25 @@ LRTable Generator<Itemset>::generate() {
table.goto_[curIdx][sym] = idx; table.goto_[curIdx][sym] = idx;
} else { } else {
table.act[curIdx][sym] = std::make_pair(Action::SHIFT, idx); table.act[curIdx][sym] = std::make_pair(Action::SHIFT, idx);
} }
} }
for (std::string term : m_gram.terminals) { for (std::string term : m_gram.terminals) {
//Get reduces from the itemset, add them to the table, look for conflicts //Get reduces from the itemset, add them to the table, look for conflicts
for (std::size_t rule_applied : cur.getReduces(term)) { for (std::size_t rule_applied : cur.getReduces(m_gram, term)) {
if (rule_applied == m_gram.rules.size() - 1) { // The last added rule if (rule_applied == m_gram.rules.size() - 1) { // The last added rule
// The extended start rule // The extended start rule
assert(term == EOF_PLACEHOLDER); if (term == EOF_PLACEHOLDER)
table.act[curIdx][term] = std::make_pair(Action::ACCEPT, 0); table.act[curIdx][term] = std::make_pair(Action::ACCEPT, 0);
} else if (table.act[curIdx].count(term)) { } else if (table.act[curIdx].count(term)) {
if (table.act[curIdx][term].first == Action::SHIFT) { if (table.act[curIdx][term].first == Action::SHIFT) {
//Shift-Reduce conflict, rapport and resolve it (TODO) //Shift-Reduce conflict, rapport and resolve it (TODO)
throw "shift-reduce";
} else if (table.act[curIdx][term].first == Action::REDUCE } else if (table.act[curIdx][term].first == Action::REDUCE
&& table.act[curIdx][term].second != rule_applied) { && table.act[curIdx][term].second != rule_applied) {
//Reduce-Reduce conflict, rapport it (TODO) //Reduce-Reduce conflict, rapport it (TODO)
throw 1;
} else { } else {
//Reduce using the same rule, no problem, NO-OP //Reduce using the same rule, no problem, NO-OP
} }

View File

@ -14,8 +14,10 @@ else()
endif() endif()
include_directories("${CMAKE_CURRENT_BINARY_DIR}") include_directories("${CMAKE_CURRENT_BINARY_DIR}")
# add_library(Parsodus-tables add_library(Parsodus-tables
# ) lrtables/LR0Item.cpp
lrtables/LR0Itemset.cpp
)
add_library(Parsodus-backends add_library(Parsodus-backends
backends/cpp.cpp backends/cpp.cpp
@ -32,7 +34,7 @@ add_executable(Parsodus
main.cpp main.cpp
) )
target_link_libraries(Parsodus target_link_libraries(Parsodus
#Parsodus-tables Parsodus-tables
Parsodus-backends Parsodus-backends
pds pds
mstch::mstch) mstch::mstch)

View File

@ -76,24 +76,24 @@ namespace pds {
token = lex.nextToken(); token = lex.nextToken();
if(token.type != ParsodusLexer::ARROW) if(token.type != ParsodusLexer::ARROW)
throw InputParserException("No arrow found after '"+ current_head+"', but found '" + token.content + "' instead"); throw InputParserException("No arrow found after '"+ current_head+"', but found '" + token.content + "' instead");
Rule rule; std::shared_ptr<Rule> rule = std::make_shared<Rule>();
rule.head = current_head; rule->head = current_head;
bool parsing_head = true; bool parsing_head = true;
while(parsing_head) { while(parsing_head) {
token = lex.nextToken(); token = lex.nextToken();
switch(token.type) { switch(token.type) {
case ParsodusLexer::VARIABLE: case ParsodusLexer::VARIABLE:
rule.tail.push_back(token.content); rule->tail.push_back(token.content);
break; break;
case ParsodusLexer::TERMINAL: case ParsodusLexer::TERMINAL:
rule.tail.push_back(token.content); rule->tail.push_back(token.content);
break; break;
case ParsodusLexer::SEMICOLON: case ParsodusLexer::SEMICOLON:
parsing_head = false; parsing_head = false;
case ParsodusLexer::PIPE: case ParsodusLexer::PIPE:
rule.tail.shrink_to_fit(); rule->tail.shrink_to_fit();
config.grammar.rules.push_back(rule); config.grammar.rules.push_back(std::make_shared<Rule>(*rule));
rule.tail.clear(); rule->tail.clear();
break; break;
default: default:
throw InputParserException("Expecting to find a variable, terminal, pipe or a semicolon, but found '" + token.content + "' instead"); throw InputParserException("Expecting to find a variable, terminal, pipe or a semicolon, but found '" + token.content + "' instead");

19
src/lrtables/LR0Item.cpp Normal file
View File

@ -0,0 +1,19 @@
#include "Parsodus/lrtables/LR0Item.h"
namespace pds {
namespace lr {
bool LR0Item::operator<(const LR0Item& rhs) const {
if (dotIdx != rhs.dotIdx) {
return dotIdx < rhs.dotIdx;
} else {
return rule < rhs.rule;
}
}
bool LR0Item::operator==(const LR0Item& rhs) const {
return dotIdx == rhs.dotIdx && rule == rhs.rule;
}
} /* lr */
} /* pds */

View File

@ -0,0 +1,79 @@
#include "Parsodus/lrtables/LR0Itemset.h"
namespace pds {
namespace lr {
LR0Itemset::LR0Itemset() {
}
LR0Itemset::LR0Itemset(std::shared_ptr<Rule> start) {
m_items.emplace(LR0Item{start, 0});
}
void LR0Itemset::close(const Grammar& g) {
bool changes = true;
std::vector<LR0Item> todo;
std::set<std::string> added;
while (changes) {
changes = false;
std::set<LR0Item> toAdd;
for (const LR0Item& i : m_items) {
if (i.dotIdx < i.rule->tail.size()) {
std::string& sym = i.rule->tail[i.dotIdx];
if (g.variables.count(sym) && !added.count(sym)) {
added.insert(sym);
changes = true;
for (const auto& rule : g.rules) {
if (rule->head == sym) {
toAdd.insert(LR0Item{rule, 0});
}
}
}
}
}
m_items.insert(toAdd.begin(), toAdd.end());
}
}
LR0Itemset LR0Itemset::succ(std::string sym) const {
LR0Itemset sc;
for (auto& item : m_items) {
if (item.dotIdx < item.rule->tail.size()) {
if (item.rule->tail[item.dotIdx] == sym) {
sc.m_items.insert(LR0Item{item.rule, item.dotIdx + 1});
}
}
}
return sc;
}
bool LR0Itemset::operator==(const LR0Itemset& rhs) const {
return m_items == rhs.m_items;
}
bool LR0Itemset::canMerge(const LR0Itemset&) const {
return false;
}
void LR0Itemset::merge(const LR0Itemset&) {
//NO-OP
}
bool LR0Itemset::empty() const {
return m_items.empty();
}
std::set<std::size_t> LR0Itemset::getReduces(const Grammar& g, std::string) const {
std::set<std::size_t> res;
for (auto& item : m_items) {
if (item.dotIdx >= item.rule->tail.size()) {
res.insert(std::find(g.rules.begin(), g.rules.end(), item.rule) - g.rules.begin());
}
}
return res;
}
} /* lr */
} /* pds */

View File

@ -3,6 +3,9 @@
#include "optparse.h" #include "optparse.h"
#include "Parsodus/inputparser.h" #include "Parsodus/inputparser.h"
#include "Parsodus/lrtables/generator.h"
#include "Parsodus/lrtables/LR0Itemset.h"
int main(int argc, char** argv) { int main(int argc, char** argv) {
optparse::OptionParser parser = optparse::OptionParser().description("Parsodus").usage("Parsodus [-d <outputdir>] [-l <language>] [-n <lexername>] <inputfile.pds>"); optparse::OptionParser parser = optparse::OptionParser().description("Parsodus").usage("Parsodus [-d <outputdir>] [-l <language>] [-n <lexername>] <inputfile.pds>");
@ -37,11 +40,25 @@ int main(int argc, char** argv) {
std::cout << "Variable: " << a << std::endl; std::cout << "Variable: " << a << std::endl;
std::cout << "Rules: " << std::endl; std::cout << "Rules: " << std::endl;
for(auto a: config.grammar.rules) { for(auto a: config.grammar.rules) {
std::cout << "\t" << a.head << " -> "; std::cout << "\t" << a->head << " -> ";
for(auto c: a.tail) { for(auto c: a->tail) {
std::cout << c << " "; std::cout << c << " ";
} }
std::cout << std::endl; std::cout << std::endl;
} }
std::vector<std::string> names = {"ERROR", "SHIFT", "REDUCE", "ACCEPT"};
pds::lr::Generator<pds::lr::LR0Itemset> g(config.grammar);
auto tbl = g.generate();
for (std::size_t i = 0; i < tbl.act.size(); i++) {
std::cout << "State " << i << std::endl;
std::cout << " Action:" << std::endl;
for (auto& p : tbl.act[i]) {
std::cout << " " << p.first << ": " << names[static_cast<int>(p.second.first)] << " " << p.second.second << std::endl;
}
std::cout << " Goto:" << std::endl;
for (auto& p : tbl.goto_[i]) {
std::cout << " " << p.first << ": " << p.second << std::endl;;
}
}
} }