From 91fd08ba3e72e5775c1e2017e80bf4ef89a2fbe0 Mon Sep 17 00:00:00 2001 From: Robin Jadoul Date: Sun, 18 Dec 2016 15:10:54 +0100 Subject: [PATCH] LR(0) table generator --- include/Parsodus/grammar.h | 6 +- include/Parsodus/lrtables/LR0Item.h | 23 ++++++++ include/Parsodus/lrtables/LR0Itemset.h | 39 +++++++++++++ include/Parsodus/lrtables/generator.h | 37 ++++++------ src/CMakeLists.txt | 8 ++- src/inputparser.cpp | 14 ++--- src/lrtables/LR0Item.cpp | 19 +++++++ src/lrtables/LR0Itemset.cpp | 79 ++++++++++++++++++++++++++ src/main.cpp | 21 ++++++- 9 files changed, 216 insertions(+), 30 deletions(-) create mode 100644 include/Parsodus/lrtables/LR0Item.h create mode 100644 include/Parsodus/lrtables/LR0Itemset.h create mode 100644 src/lrtables/LR0Item.cpp create mode 100644 src/lrtables/LR0Itemset.cpp diff --git a/include/Parsodus/grammar.h b/include/Parsodus/grammar.h index 532fc2a..81466ea 100644 --- a/include/Parsodus/grammar.h +++ b/include/Parsodus/grammar.h @@ -2,6 +2,7 @@ #ifndef PARSODUS_GRAMMAR_H #include +#include #include #include #include @@ -22,6 +23,9 @@ namespace pds { return tail < other.tail; } } + + Rule() : head(""), tail() {} + Rule(const std::string& h, const std::vector& t) : head(h), tail(t) {} }; /** @@ -32,7 +36,7 @@ namespace pds { std::string start; ///< the starting variable std::set variables; ///< the variables std::set terminals; ///< the terminals - std::vector rules; ///< the replacement rules + std::vector> rules; ///< the replacement rules }; } diff --git a/include/Parsodus/lrtables/LR0Item.h b/include/Parsodus/lrtables/LR0Item.h new file mode 100644 index 0000000..a6cf4e0 --- /dev/null +++ b/include/Parsodus/lrtables/LR0Item.h @@ -0,0 +1,23 @@ +#pragma once +#ifndef LRTABLES_LR0ITEM_H_3RNST1YA +#define LRTABLES_LR0ITEM_H_3RNST1YA + +#include "Parsodus/grammar.h" + +#include + +namespace pds { +namespace lr { + +struct LR0Item { + std::shared_ptr rule; + std::size_t dotIdx; + + bool operator<(const LR0Item& rhs) const; ///< Useful to keep it in a set/map + bool operator==(const LR0Item& rhs) const; +}; + +} /* lr */ +} /* pds */ + +#endif /* LRTABLES_LR0ITEM_H_3RNST1YA */ diff --git a/include/Parsodus/lrtables/LR0Itemset.h b/include/Parsodus/lrtables/LR0Itemset.h new file mode 100644 index 0000000..6cb13b4 --- /dev/null +++ b/include/Parsodus/lrtables/LR0Itemset.h @@ -0,0 +1,39 @@ +#pragma once +#ifndef LRTABLES_LR0ITEMSET_H_HTSWOGFB +#define LRTABLES_LR0ITEMSET_H_HTSWOGFB + +#include "Parsodus/lrtables/generator.h" +#include "Parsodus/lrtables/LR0Item.h" + +#include + +namespace pds { +namespace lr { + +/** + * An LR(0) itemset, @see Generator for details on the public methods + */ +class LR0Itemset { +public: + LR0Itemset(); + LR0Itemset(std::shared_ptr start); + + void close(const Grammar& g); + LR0Itemset succ(std::string sym) const; + bool operator==(const LR0Itemset& rhs) const; + bool canMerge(const LR0Itemset& rhs) const; + void merge(const LR0Itemset& rhs); + bool empty() const; + std::set getReduces(const Grammar& g, std::string lookahead) const; + + void print() const; ///TODO remove me + +private: + std::set m_items; + +}; + +} /* lr */ +} /* pds */ + +#endif /* LRTABLES_LR0ITEMSET_H_HTSWOGFB */ diff --git a/include/Parsodus/lrtables/generator.h b/include/Parsodus/lrtables/generator.h index 8fb1a96..41dd980 100644 --- a/include/Parsodus/lrtables/generator.h +++ b/include/Parsodus/lrtables/generator.h @@ -7,6 +7,7 @@ #include #include +#include #include namespace pds { @@ -22,13 +23,12 @@ const std::string EOF_PLACEHOLDER = "$"; * An Itemset should support: * - A constructor taking a single Rule, that makes this a starting rule * - void close(const Grammar&); compute the closure - * - Itemset succ(const Grammar&, std::string) const; compute the successor of this set, over the given symbol + * - Itemset succ(std::string) const; compute the successor of this set, over the given symbol * - bool operator==(const Itemset&); are these two Itemsets equal * - bool canMerge(const Itemset&) const; Can the given Itemset be merged into this one * - void merge(const Itemset&); Merge the given Itemset into this one - * - Action action(const std::string&) const; Determine the action to be executed on given lookahead * - bool empty() const; is this Itemset empty (== not useful) - * - std::set getReduces(std::string) const; get all Rule indices where a reduce should happen with given lookahead (not necessarily a set, but iterable) + * - std::set getReduces(const Grammar&, std::string) const; get all Rule indices where a reduce should happen with given lookahead (not necessarily a set, but iterable) */ template class Generator { @@ -39,7 +39,7 @@ class Generator { * @param start The start symbol for the grammar * @param g The grammar to translate */ - Generator(const std::string& start, const Grammar& g); + Generator(const Grammar& g); /** * Generate an LRTable based on given grammar @@ -50,19 +50,18 @@ class Generator { private: Grammar m_gram; - Rule m_startrule; + std::shared_ptr m_startrule; }; template -Generator::Generator(const std::string& start, const Grammar& g) : m_gram(g), m_startrule(Rule{EXTENDED_START, {start}}) { +Generator::Generator(const Grammar& g) : m_gram(g), m_startrule(std::make_shared(EXTENDED_START, std::vector{g.start})) { m_gram.terminals.insert(EOF_PLACEHOLDER); //End of file m_gram.variables.insert(EXTENDED_START); //modified start rule - m_gram.rules[EXTENDED_START].push_back(m_startrule); + m_gram.rules.push_back(m_startrule); } template LRTable Generator::generate() { - //TODO: generate the table LRTable table; //Start with size 1 @@ -73,10 +72,13 @@ LRTable Generator::generate() { itemsets.emplace_back(Itemset(m_startrule)); itemsets[0].close(m_gram); - std::set symbols = std::set_union(m_gram.terminals.begin(), m_gram.terminals.end(), - m_gram.variables.begin(), m_gram.variables.end()); + std::set symbols; + std::set_union(m_gram.terminals.begin(), m_gram.terminals.end(), + m_gram.variables.begin(), m_gram.variables.end(), + std::inserter(symbols, symbols.end())); std::queue> q; + q.emplace(0, itemsets[0]); while (!q.empty()) { auto& curP = q.front(); std::size_t curIdx = curP.first; @@ -84,7 +86,7 @@ LRTable Generator::generate() { q.pop(); for (const std::string& sym : symbols) { - Itemset s = cur.succ(m_gram, sym); + Itemset s = cur.succ(sym); if (s.empty()) continue; s.close(m_gram); @@ -100,8 +102,8 @@ LRTable Generator::generate() { } } if (idx == itemsets.size()) { - q.push(s); - itemsets.emplace_back(idx, std::move(s)); + q.emplace(idx, s); + itemsets.emplace_back(std::move(s)); //Grow the table table.act.emplace_back(); @@ -112,24 +114,25 @@ LRTable Generator::generate() { table.goto_[curIdx][sym] = idx; } else { table.act[curIdx][sym] = std::make_pair(Action::SHIFT, idx); - } } for (std::string term : m_gram.terminals) { //Get reduces from the itemset, add them to the table, look for conflicts - for (std::size_t rule_applied : cur.getReduces(term)) { + for (std::size_t rule_applied : cur.getReduces(m_gram, term)) { if (rule_applied == m_gram.rules.size() - 1) { // The last added rule // The extended start rule - assert(term == EOF_PLACEHOLDER); - table.act[curIdx][term] = std::make_pair(Action::ACCEPT, 0); + if (term == EOF_PLACEHOLDER) + table.act[curIdx][term] = std::make_pair(Action::ACCEPT, 0); } else if (table.act[curIdx].count(term)) { if (table.act[curIdx][term].first == Action::SHIFT) { //Shift-Reduce conflict, rapport and resolve it (TODO) + throw "shift-reduce"; } else if (table.act[curIdx][term].first == Action::REDUCE && table.act[curIdx][term].second != rule_applied) { //Reduce-Reduce conflict, rapport it (TODO) + throw 1; } else { //Reduce using the same rule, no problem, NO-OP } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c535d9d..b09328e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -14,8 +14,10 @@ else() endif() include_directories("${CMAKE_CURRENT_BINARY_DIR}") -# add_library(Parsodus-tables - # ) +add_library(Parsodus-tables + lrtables/LR0Item.cpp + lrtables/LR0Itemset.cpp + ) add_library(Parsodus-backends backends/cpp.cpp @@ -32,7 +34,7 @@ add_executable(Parsodus main.cpp ) target_link_libraries(Parsodus - #Parsodus-tables + Parsodus-tables Parsodus-backends pds mstch::mstch) diff --git a/src/inputparser.cpp b/src/inputparser.cpp index f96bf50..434110e 100644 --- a/src/inputparser.cpp +++ b/src/inputparser.cpp @@ -76,24 +76,24 @@ namespace pds { token = lex.nextToken(); if(token.type != ParsodusLexer::ARROW) throw InputParserException("No arrow found after '"+ current_head+"', but found '" + token.content + "' instead"); - Rule rule; - rule.head = current_head; + std::shared_ptr rule = std::make_shared(); + rule->head = current_head; bool parsing_head = true; while(parsing_head) { token = lex.nextToken(); switch(token.type) { case ParsodusLexer::VARIABLE: - rule.tail.push_back(token.content); + rule->tail.push_back(token.content); break; case ParsodusLexer::TERMINAL: - rule.tail.push_back(token.content); + rule->tail.push_back(token.content); break; case ParsodusLexer::SEMICOLON: parsing_head = false; case ParsodusLexer::PIPE: - rule.tail.shrink_to_fit(); - config.grammar.rules.push_back(rule); - rule.tail.clear(); + rule->tail.shrink_to_fit(); + config.grammar.rules.push_back(std::make_shared(*rule)); + rule->tail.clear(); break; default: throw InputParserException("Expecting to find a variable, terminal, pipe or a semicolon, but found '" + token.content + "' instead"); diff --git a/src/lrtables/LR0Item.cpp b/src/lrtables/LR0Item.cpp new file mode 100644 index 0000000..54e0af3 --- /dev/null +++ b/src/lrtables/LR0Item.cpp @@ -0,0 +1,19 @@ +#include "Parsodus/lrtables/LR0Item.h" + +namespace pds { +namespace lr { + +bool LR0Item::operator<(const LR0Item& rhs) const { + if (dotIdx != rhs.dotIdx) { + return dotIdx < rhs.dotIdx; + } else { + return rule < rhs.rule; + } +} + +bool LR0Item::operator==(const LR0Item& rhs) const { + return dotIdx == rhs.dotIdx && rule == rhs.rule; +} + +} /* lr */ +} /* pds */ diff --git a/src/lrtables/LR0Itemset.cpp b/src/lrtables/LR0Itemset.cpp new file mode 100644 index 0000000..8d23c48 --- /dev/null +++ b/src/lrtables/LR0Itemset.cpp @@ -0,0 +1,79 @@ +#include "Parsodus/lrtables/LR0Itemset.h" + +namespace pds { +namespace lr { + +LR0Itemset::LR0Itemset() { +} + +LR0Itemset::LR0Itemset(std::shared_ptr start) { + m_items.emplace(LR0Item{start, 0}); +} + +void LR0Itemset::close(const Grammar& g) { + bool changes = true; + std::vector todo; + std::set added; + + while (changes) { + changes = false; + std::set toAdd; + + for (const LR0Item& i : m_items) { + if (i.dotIdx < i.rule->tail.size()) { + std::string& sym = i.rule->tail[i.dotIdx]; + if (g.variables.count(sym) && !added.count(sym)) { + added.insert(sym); + changes = true; + for (const auto& rule : g.rules) { + if (rule->head == sym) { + toAdd.insert(LR0Item{rule, 0}); + } + } + } + } + } + m_items.insert(toAdd.begin(), toAdd.end()); + } +} + +LR0Itemset LR0Itemset::succ(std::string sym) const { + LR0Itemset sc; + for (auto& item : m_items) { + if (item.dotIdx < item.rule->tail.size()) { + if (item.rule->tail[item.dotIdx] == sym) { + sc.m_items.insert(LR0Item{item.rule, item.dotIdx + 1}); + } + } + } + return sc; +} + +bool LR0Itemset::operator==(const LR0Itemset& rhs) const { + return m_items == rhs.m_items; +} + +bool LR0Itemset::canMerge(const LR0Itemset&) const { + return false; +} + +void LR0Itemset::merge(const LR0Itemset&) { + //NO-OP +} + +bool LR0Itemset::empty() const { + return m_items.empty(); +} + +std::set LR0Itemset::getReduces(const Grammar& g, std::string) const { + std::set res; + for (auto& item : m_items) { + if (item.dotIdx >= item.rule->tail.size()) { + res.insert(std::find(g.rules.begin(), g.rules.end(), item.rule) - g.rules.begin()); + } + } + return res; +} + +} /* lr */ +} /* pds */ diff --git a/src/main.cpp b/src/main.cpp index 3157d81..f11b4fa 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -3,6 +3,9 @@ #include "optparse.h" #include "Parsodus/inputparser.h" +#include "Parsodus/lrtables/generator.h" +#include "Parsodus/lrtables/LR0Itemset.h" + int main(int argc, char** argv) { optparse::OptionParser parser = optparse::OptionParser().description("Parsodus").usage("Parsodus [-d ] [-l ] [-n ] "); @@ -37,11 +40,25 @@ int main(int argc, char** argv) { std::cout << "Variable: " << a << std::endl; std::cout << "Rules: " << std::endl; for(auto a: config.grammar.rules) { - std::cout << "\t" << a.head << " -> "; - for(auto c: a.tail) { + std::cout << "\t" << a->head << " -> "; + for(auto c: a->tail) { std::cout << c << " "; } std::cout << std::endl; } + std::vector names = {"ERROR", "SHIFT", "REDUCE", "ACCEPT"}; + pds::lr::Generator g(config.grammar); + auto tbl = g.generate(); + for (std::size_t i = 0; i < tbl.act.size(); i++) { + std::cout << "State " << i << std::endl; + std::cout << " Action:" << std::endl; + for (auto& p : tbl.act[i]) { + std::cout << " " << p.first << ": " << names[static_cast(p.second.first)] << " " << p.second.second << std::endl; + } + std::cout << " Goto:" << std::endl; + for (auto& p : tbl.goto_[i]) { + std::cout << " " << p.first << ": " << p.second << std::endl;; + } + } }