From f765196417015294967cf335c796570a75116202 Mon Sep 17 00:00:00 2001 From: Robin Jadoul Date: Sun, 18 Dec 2016 17:20:59 +0100 Subject: [PATCH] SLR(1) parse table generation --- include/Parsodus/grammar.h | 18 ++++ include/Parsodus/lrtables/LR0Itemset.h | 21 ++-- include/Parsodus/lrtables/LR0ItemsetBase.h | 105 ++++++++++++++++++ include/Parsodus/lrtables/SLR1Itemset.h | 24 +++++ include/Parsodus/lrtables/generator.h | 18 ++-- include/Parsodus/util/firstset.h | 38 +++++++ include/Parsodus/util/followset.h | 31 ++++++ include/Parsodus/util/symbols.h | 16 +++ src/CMakeLists.txt | 7 ++ src/lrtables/LR0Itemset.cpp | 58 ++-------- src/lrtables/SLR1Itemset.cpp | 29 +++++ src/main.cpp | 4 +- src/util/firstset.cpp | 118 +++++++++++++++++++++ src/util/followset.cpp | 54 ++++++++++ 14 files changed, 464 insertions(+), 77 deletions(-) create mode 100644 include/Parsodus/lrtables/LR0ItemsetBase.h create mode 100644 include/Parsodus/lrtables/SLR1Itemset.h create mode 100644 include/Parsodus/util/firstset.h create mode 100644 include/Parsodus/util/followset.h create mode 100644 include/Parsodus/util/symbols.h create mode 100644 src/lrtables/SLR1Itemset.cpp create mode 100644 src/util/firstset.cpp create mode 100644 src/util/followset.cpp diff --git a/include/Parsodus/grammar.h b/include/Parsodus/grammar.h index 81466ea..72cd2d2 100644 --- a/include/Parsodus/grammar.h +++ b/include/Parsodus/grammar.h @@ -1,6 +1,9 @@ #pragma once #ifndef PARSODUS_GRAMMAR_H +#include "Parsodus/util/firstset.h" +#include "Parsodus/util/followset.h" + #include #include #include @@ -37,6 +40,21 @@ namespace pds { std::set variables; ///< the variables std::set terminals; ///< the terminals std::vector> rules; ///< the replacement rules + + std::unique_ptr first; + std::unique_ptr follow; + + Grammar() : start(""), variables(), terminals(), rules(), first(nullptr), follow(nullptr) + {} + + Grammar(const Grammar& rhs) + : start(rhs.start), variables(rhs.variables), terminals(rhs.terminals) + , rules(rhs.rules), first(nullptr), follow(nullptr) { + if (rhs.first) + first = std::make_unique(*rhs.first); + if (rhs.follow) + follow = std::make_unique(*rhs.follow); + } }; } diff --git a/include/Parsodus/lrtables/LR0Itemset.h b/include/Parsodus/lrtables/LR0Itemset.h index 6cb13b4..b7e485b 100644 --- a/include/Parsodus/lrtables/LR0Itemset.h +++ b/include/Parsodus/lrtables/LR0Itemset.h @@ -2,35 +2,26 @@ #ifndef LRTABLES_LR0ITEMSET_H_HTSWOGFB #define LRTABLES_LR0ITEMSET_H_HTSWOGFB -#include "Parsodus/lrtables/generator.h" -#include "Parsodus/lrtables/LR0Item.h" +#include "Parsodus/lrtables/LR0ItemsetBase.h" -#include +#include namespace pds { namespace lr { - + /** * An LR(0) itemset, @see Generator for details on the public methods */ -class LR0Itemset { +class LR0Itemset : public LR0ItemsetBase { public: LR0Itemset(); LR0Itemset(std::shared_ptr start); - void close(const Grammar& g); - LR0Itemset succ(std::string sym) const; - bool operator==(const LR0Itemset& rhs) const; - bool canMerge(const LR0Itemset& rhs) const; - void merge(const LR0Itemset& rhs); - bool empty() const; + static bool needsFollow(); + std::set getReduces(const Grammar& g, std::string lookahead) const; void print() const; ///TODO remove me - -private: - std::set m_items; - }; } /* lr */ diff --git a/include/Parsodus/lrtables/LR0ItemsetBase.h b/include/Parsodus/lrtables/LR0ItemsetBase.h new file mode 100644 index 0000000..354f437 --- /dev/null +++ b/include/Parsodus/lrtables/LR0ItemsetBase.h @@ -0,0 +1,105 @@ +#pragma once +#ifndef LRTABLES_LR0ITEMSETBASE_H_GGIPISTD +#define LRTABLES_LR0ITEMSETBASE_H_GGIPISTD + +#include "Parsodus/lrtables/LR0Item.h" + +namespace pds { +namespace lr { + +/** + * Provide common methods for Itemsets based upon LR(0) items + * (SLR(1) and LR(0)) + */ +template +class LR0ItemsetBase { +public: + LR0ItemsetBase(); + LR0ItemsetBase(std::shared_ptr start); + + void close(const Grammar& g); + Itemset succ(std::string sym) const; + bool operator==(const Itemset& rhs) const; + bool canMerge(const Itemset& rhs) const; + void merge(const Itemset& rhs); + bool empty() const; + +protected: + std::set m_items; +}; + +template +LR0ItemsetBase::LR0ItemsetBase() +{} + +template +LR0ItemsetBase::LR0ItemsetBase(std::shared_ptr start) { + m_items.emplace(LR0Item{start, 0}); +} + +template +void LR0ItemsetBase::close(const Grammar& g) { + bool changes = true; + std::vector todo; + std::set added; + + while (changes) { + changes = false; + std::set toAdd; + + for (const LR0Item& i : m_items) { + if (i.dotIdx < i.rule->tail.size()) { + std::string& sym = i.rule->tail[i.dotIdx]; + if (g.variables.count(sym) && !added.count(sym)) { + added.insert(sym); + changes = true; + for (const auto& rule : g.rules) { + if (rule->head == sym) { + toAdd.insert(LR0Item{rule, 0}); + } + } + } + } + } + m_items.insert(toAdd.begin(), toAdd.end()); + } +} + +template +Itemset LR0ItemsetBase::succ(std::string sym) const { + Itemset sc; + for (auto& item : m_items) { + if (item.dotIdx < item.rule->tail.size()) { + if (item.rule->tail[item.dotIdx] == sym) { + sc.m_items.insert(LR0Item{item.rule, item.dotIdx + 1}); + } + } + } + return sc; +} + +template +bool LR0ItemsetBase::operator==(const Itemset& rhs) const { + return m_items == rhs.m_items; +} + +template +bool LR0ItemsetBase::canMerge(const Itemset&) const { + return false; +} + +template +void LR0ItemsetBase::merge(const Itemset&) { + //NO-OP +} + +template +bool LR0ItemsetBase::empty() const { + return m_items.empty(); +} + + +} /* lr */ +} /* pds */ + +#endif /* LRTABLES_LR0ITEMSETBASE_H_GGIPISTD */ diff --git a/include/Parsodus/lrtables/SLR1Itemset.h b/include/Parsodus/lrtables/SLR1Itemset.h new file mode 100644 index 0000000..dc0b935 --- /dev/null +++ b/include/Parsodus/lrtables/SLR1Itemset.h @@ -0,0 +1,24 @@ +#pragma once +#ifndef LRTABLES_SLR1ITEMSET_H_PN9QZCDB +#define LRTABLES_SLR1ITEMSET_H_PN9QZCDB + +#include "Parsodus/lrtables/LR0Item.h" +#include "Parsodus/lrtables/LR0Itemset.h" + +namespace pds { +namespace lr { + +class SLR1Itemset : public LR0ItemsetBase { + public: + SLR1Itemset(); + SLR1Itemset(std::shared_ptr start); + + static bool needsFollow(); + + std::set getReduces(const Grammar& g, std::string lookahead) const; +}; + +} /* lr */ +} /* pds */ + +#endif /* LRTABLES_SLR1ITEMSET_H_PN9QZCDB */ diff --git a/include/Parsodus/lrtables/generator.h b/include/Parsodus/lrtables/generator.h index 41dd980..0b38e6e 100644 --- a/include/Parsodus/lrtables/generator.h +++ b/include/Parsodus/lrtables/generator.h @@ -3,6 +3,7 @@ #define PARSODUS_LRTABLES_GENERATOR_H_YW3GIUNH #include "Parsodus/grammar.h" +#include "Parsodus/util/symbols.h" #include "Parsodus/lrtables/table.h" #include @@ -13,9 +14,6 @@ namespace pds { namespace lr { -const std::string EXTENDED_START = "^"; -const std::string EOF_PLACEHOLDER = "$"; - /** * Base class for LR (and derivative) table generators (such as SLR and LALR) * Parametrized on the type of itemset (configuration set) to be used @@ -29,6 +27,7 @@ const std::string EOF_PLACEHOLDER = "$"; * - void merge(const Itemset&); Merge the given Itemset into this one * - bool empty() const; is this Itemset empty (== not useful) * - std::set getReduces(const Grammar&, std::string) const; get all Rule indices where a reduce should happen with given lookahead (not necessarily a set, but iterable) + * - static bool needsFollow() const; does this type of Itemset need Follow sets to work, if so the first and follow unique_ptr's of the grammar passed will be initialized */ template class Generator { @@ -54,10 +53,14 @@ class Generator { }; template -Generator::Generator(const Grammar& g) : m_gram(g), m_startrule(std::make_shared(EXTENDED_START, std::vector{g.start})) { - m_gram.terminals.insert(EOF_PLACEHOLDER); //End of file - m_gram.variables.insert(EXTENDED_START); //modified start rule +Generator::Generator(const Grammar& g) : m_gram(g), m_startrule(std::make_shared(util::EXTENDED_START, std::vector{g.start})) { + m_gram.terminals.insert(util::EOF_PLACEHOLDER); //End of file + m_gram.variables.insert(util::EXTENDED_START); //modified start rule m_gram.rules.push_back(m_startrule); + if (Itemset::needsFollow()) { + m_gram.first = std::make_unique(m_gram); + m_gram.follow = std::make_unique(m_gram, *m_gram.first); + } } template @@ -123,7 +126,7 @@ LRTable Generator::generate() { for (std::size_t rule_applied : cur.getReduces(m_gram, term)) { if (rule_applied == m_gram.rules.size() - 1) { // The last added rule // The extended start rule - if (term == EOF_PLACEHOLDER) + if (term == util::EOF_PLACEHOLDER) table.act[curIdx][term] = std::make_pair(Action::ACCEPT, 0); } else if (table.act[curIdx].count(term)) { if (table.act[curIdx][term].first == Action::SHIFT) { @@ -147,7 +150,6 @@ LRTable Generator::generate() { return table; } - } /* lr */ } /* pdf */ diff --git a/include/Parsodus/util/firstset.h b/include/Parsodus/util/firstset.h new file mode 100644 index 0000000..227b6ac --- /dev/null +++ b/include/Parsodus/util/firstset.h @@ -0,0 +1,38 @@ +#pragma once +#ifndef PARSODUS_FIRSTSET_H_Q6U5VBG0 +#define PARSODUS_FIRSTSET_H_Q6U5VBG0 + +#include +#include +#include +#include + +namespace pds { + +struct Grammar; + +namespace util { + +class FirstSet { + public: + FirstSet(const Grammar& g); + + /** + * Get the first set for the given symbol + */ + std::set operator()(std::string key) const; + + /** + * Get the first set for the given sequence of symbols + */ + std::set operator()(std::vector sequence) const; + + + private: + std::map> m_first; +}; + +} /* util */ +} /* pds */ + +#endif /* PARSODUS_FIRSTSET_H_Q6U5VBG0 */ diff --git a/include/Parsodus/util/followset.h b/include/Parsodus/util/followset.h new file mode 100644 index 0000000..5683164 --- /dev/null +++ b/include/Parsodus/util/followset.h @@ -0,0 +1,31 @@ +#pragma once +#ifndef UTIL_FOLLOWSET_H_WIGGZMRF +#define UTIL_FOLLOWSET_H_WIGGZMRF + +#include "Parsodus/util/firstset.h" + +#include +#include +#include +#include + +namespace pds { +namespace util { + +class FollowSet { + public: + FollowSet(const Grammar& g, const FirstSet& first); + + /** + * Get the follow set for the given variable + */ + std::set operator()(std::string key) const; + + private: + std::map> m_follow; +}; + +} /* util */ +} /* pds */ + +#endif /* UTIL_FOLLOWSET_H_WIGGZMRF */ diff --git a/include/Parsodus/util/symbols.h b/include/Parsodus/util/symbols.h new file mode 100644 index 0000000..b501e47 --- /dev/null +++ b/include/Parsodus/util/symbols.h @@ -0,0 +1,16 @@ +#pragma once +#ifndef UTIL_SYMBOLS_H_2IFHDWBY +#define UTIL_SYMBOLS_H_2IFHDWBY + +#include + +namespace pds { +namespace util { + +const std::string EXTENDED_START = "^"; +const std::string EOF_PLACEHOLDER = "$"; + +} /* util */ +} /* pds */ + +#endif /* UTIL_SYMBOLS_H_2IFHDWBY */ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b09328e..8ac1da0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -14,9 +14,15 @@ else() endif() include_directories("${CMAKE_CURRENT_BINARY_DIR}") +add_library(Parsodus-util + util/firstset.cpp + util/followset.cpp + ) + add_library(Parsodus-tables lrtables/LR0Item.cpp lrtables/LR0Itemset.cpp + lrtables/SLR1Itemset.cpp ) add_library(Parsodus-backends @@ -34,6 +40,7 @@ add_executable(Parsodus main.cpp ) target_link_libraries(Parsodus + Parsodus-util Parsodus-tables Parsodus-backends pds diff --git a/src/lrtables/LR0Itemset.cpp b/src/lrtables/LR0Itemset.cpp index 8d23c48..f22dfdc 100644 --- a/src/lrtables/LR0Itemset.cpp +++ b/src/lrtables/LR0Itemset.cpp @@ -3,67 +3,21 @@ namespace pds { namespace lr { -LR0Itemset::LR0Itemset() { -} - -LR0Itemset::LR0Itemset(std::shared_ptr start) { - m_items.emplace(LR0Item{start, 0}); -} - -void LR0Itemset::close(const Grammar& g) { - bool changes = true; - std::vector todo; - std::set added; - - while (changes) { - changes = false; - std::set toAdd; - - for (const LR0Item& i : m_items) { - if (i.dotIdx < i.rule->tail.size()) { - std::string& sym = i.rule->tail[i.dotIdx]; - if (g.variables.count(sym) && !added.count(sym)) { - added.insert(sym); - changes = true; - for (const auto& rule : g.rules) { - if (rule->head == sym) { - toAdd.insert(LR0Item{rule, 0}); - } - } - } - } - } - m_items.insert(toAdd.begin(), toAdd.end()); - } -} - -LR0Itemset LR0Itemset::succ(std::string sym) const { - LR0Itemset sc; for (auto& item : m_items) { - if (item.dotIdx < item.rule->tail.size()) { - if (item.rule->tail[item.dotIdx] == sym) { - sc.m_items.insert(LR0Item{item.rule, item.dotIdx + 1}); - } } } - return sc; } -bool LR0Itemset::operator==(const LR0Itemset& rhs) const { - return m_items == rhs.m_items; -} +LR0Itemset::LR0Itemset() : LR0ItemsetBase() +{} -bool LR0Itemset::canMerge(const LR0Itemset&) const { +LR0Itemset::LR0Itemset(std::shared_ptr start) : LR0ItemsetBase(start) +{} + +bool LR0Itemset::needsFollow() { return false; } -void LR0Itemset::merge(const LR0Itemset&) { - //NO-OP -} - -bool LR0Itemset::empty() const { - return m_items.empty(); -} std::set LR0Itemset::getReduces(const Grammar& g, std::string) const { std::set res; diff --git a/src/lrtables/SLR1Itemset.cpp b/src/lrtables/SLR1Itemset.cpp new file mode 100644 index 0000000..5cc5185 --- /dev/null +++ b/src/lrtables/SLR1Itemset.cpp @@ -0,0 +1,29 @@ +#include "Parsodus/lrtables/SLR1Itemset.h" + +#include + +namespace pds { +namespace lr { + +SLR1Itemset::SLR1Itemset() : LR0ItemsetBase() +{} + +SLR1Itemset::SLR1Itemset(std::shared_ptr start) : LR0ItemsetBase(start) +{} + +bool SLR1Itemset::needsFollow() { + return true; +} + +std::set SLR1Itemset::getReduces(const Grammar& g, std::string lookahead) const { + std::set res; + for (auto& item : m_items) { + if (item.dotIdx >= item.rule->tail.size() && (*g.follow)(item.rule->head).count(lookahead)) { + res.insert(std::find(g.rules.begin(), g.rules.end(), item.rule) - g.rules.begin()); + } + } + return res; +} + +} /* lr */ +} /* pds */ diff --git a/src/main.cpp b/src/main.cpp index f11b4fa..46c1c55 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -4,7 +4,7 @@ #include "Parsodus/inputparser.h" #include "Parsodus/lrtables/generator.h" -#include "Parsodus/lrtables/LR0Itemset.h" +#include "Parsodus/lrtables/SLR1Itemset.h" int main(int argc, char** argv) { @@ -48,7 +48,7 @@ int main(int argc, char** argv) { } std::vector names = {"ERROR", "SHIFT", "REDUCE", "ACCEPT"}; - pds::lr::Generator g(config.grammar); + pds::lr::Generator g(config.grammar); auto tbl = g.generate(); for (std::size_t i = 0; i < tbl.act.size(); i++) { std::cout << "State " << i << std::endl; diff --git a/src/util/firstset.cpp b/src/util/firstset.cpp new file mode 100644 index 0000000..3fba3f6 --- /dev/null +++ b/src/util/firstset.cpp @@ -0,0 +1,118 @@ +#include "Parsodus/util/firstset.h" +#include "Parsodus/grammar.h" + +namespace pds { +namespace util { + +FirstSet::FirstSet(const Grammar& g) +{ + for (std::string term : g.terminals) { + m_first[term].insert(term); + } + + bool changes = true; + + auto update = [&changes, this](std::string head, const std::set& s) { + for (std::string elem : s) { + if (!m_first[head].count(elem)) { + changes = true; + m_first[head].insert(s.begin(), s.end()); + return; + } + } + }; + + while (changes) { + changes = false; + + for (const auto& rule : g.rules) { + if (rule->tail.size() == 0) { + update(rule->head, {""}); + } + + bool ended = false; + const std::string& head = rule->head; + for (const std::string& replacement : rule->tail) { + if (m_first[replacement].count("")) { + std::set tmp = m_first[replacement]; + tmp.erase(""); + update(head, tmp); + } else { + update(head, m_first[replacement]); + ended = true; + break; + } + } + if (!ended) { + update(head, {""}); + } + } + } +} + +std::set FirstSet::operator()(std::string key) const { + auto tmp = m_first.find(key); + if (tmp == m_first.end()) + return {}; + return tmp->second; +} + +std::set FirstSet::operator()(std::vector sequence) const { + std::set result; + bool hasEmpty = false; + for (std::string key : sequence) { + std::set tmp = (*this)(key); + hasEmpty = tmp.count(""); + tmp.erase(""); + result.insert(tmp.begin(), tmp.end()); + if (!hasEmpty) + break; + } + if (hasEmpty) + result.insert(""); + + return result; +} + +/* + * template + * void Generator::buildFollow() { + * //EOF follow the added start rule. + * m_follow[EXTENDED_START].insert(EOF_PLACEHOLDER); + * + * bool changes = true; + * + * auto update = [&changes, this](std::string head, auto s) { + * s.erase(""); + * for (std::string elem : s) { + * if (!m_follow[head].count(elem)) { + * changes = true; + * m_follow[head].insert(s.begin(), s.end()); + * return; + * } + * } + * }; + * + * while (changes) { + * changes = false; + * for (const auto& p : m_gram.rules) { + * std::string& head = p.first; + * for (const auto& rule : p.second) { + * for (std::size_t i = 0; i < rule.tail.size(); i++) { + * if (!m_gram.terminals.count(rule.tail[i])) { + * if (i == rule.tail.size() - 1 || m_first[rule.tail[i + 1]].count("")) { + * update(rule.tail[i], m_follow[head]); + * } + * if (i < rule.tail.size() - 1) { + * update(rule.tail[i], m_first[rule.tail[i + 1]]); + * } + * } + * } + * } + * } + * } + * } + */ + +} /* util */ +} /* pds */ diff --git a/src/util/followset.cpp b/src/util/followset.cpp new file mode 100644 index 0000000..bd398c1 --- /dev/null +++ b/src/util/followset.cpp @@ -0,0 +1,54 @@ +#include "Parsodus/util/followset.h" +#include "Parsodus/util/symbols.h" +#include "Parsodus/grammar.h" + +namespace pds { +namespace util { + +FollowSet::FollowSet(const Grammar& g, const FirstSet& first) +{ + m_follow[EXTENDED_START].insert(EOF_PLACEHOLDER); + + bool changes = true; + + auto update = [&changes, this](std::string head, auto s) { + s.erase(""); + for (std::string elem : s) { + if (!m_follow[head].count(elem)) { + changes = true; + m_follow[head].insert(s.begin(), s.end()); + return; + } + } + }; + + while (changes) { + changes = false; + for (const auto& rule : g.rules) { + const std::string& head = rule->head; + auto it = rule->tail.begin(); //< Keep track of the start of the rest of the tail + it++; //< The 'rest' of the tail + for (std::size_t i = 0; i < rule->tail.size(); i++, it++) { + if (g.variables.count(rule->tail[i])) { + std::set restFirst = first(std::vector(it, rule->tail.end())); + if (i == rule->tail.size() - 1 || restFirst.count("")) { + update(rule->tail[i], m_follow[head]); + } + if (i < rule->tail.size() - 1) { + update(rule->tail[i], restFirst); + } + } + } + } + } +} + +std::set FollowSet::operator()(std::string key) const { + auto tmp = m_follow.find(key); + if (tmp == m_follow.end()) + return {}; + return tmp->second; +} + +} /* util */ +} /* pds */