From 2e95d671bffc04cf9d01850beffbff9f9999eda8 Mon Sep 17 00:00:00 2001 From: Robin Jadoul Date: Wed, 11 Jan 2017 17:24:21 +0100 Subject: [PATCH] LR(1) table generation --- include/Parsodus/backendmanager.h | 2 + include/Parsodus/lrtables/LR1Item.h | 22 ++++ include/Parsodus/lrtables/LR1Itemset.h | 24 ++++ include/Parsodus/lrtables/LR1ItemsetBase.h | 142 +++++++++++++++++++++ include/Parsodus/util/parserType.h | 2 +- src/CMakeLists.txt | 2 + src/inputparser.cpp | 2 + src/lrtables/LR1Item.cpp | 21 +++ src/lrtables/LR1Itemset.cpp | 21 +++ 9 files changed, 237 insertions(+), 1 deletion(-) create mode 100644 include/Parsodus/lrtables/LR1Item.h create mode 100644 include/Parsodus/lrtables/LR1Itemset.h create mode 100644 include/Parsodus/lrtables/LR1ItemsetBase.h create mode 100644 src/lrtables/LR1Item.cpp create mode 100644 src/lrtables/LR1Itemset.cpp diff --git a/include/Parsodus/backendmanager.h b/include/Parsodus/backendmanager.h index 20d758e..1bad134 100644 --- a/include/Parsodus/backendmanager.h +++ b/include/Parsodus/backendmanager.h @@ -10,6 +10,7 @@ #include "Parsodus/lrtables/generator.h" #include "Parsodus/lrtables/LR0Itemset.h" #include "Parsodus/lrtables/SLR1Itemset.h" +#include "Parsodus/lrtables/LR1Itemset.h" #include "Parsodus/util/parserType.h" namespace pds { @@ -30,6 +31,7 @@ namespace pds { void registerLR() { registerBackend(std::make_unique>>(util::ParserType::LR_0)); registerBackend(std::make_unique>>(util::ParserType::SLR_1)); + registerBackend(std::make_unique>>(util::ParserType::LR_1)); } /** diff --git a/include/Parsodus/lrtables/LR1Item.h b/include/Parsodus/lrtables/LR1Item.h new file mode 100644 index 0000000..528f28a --- /dev/null +++ b/include/Parsodus/lrtables/LR1Item.h @@ -0,0 +1,22 @@ +#pragma once +#ifndef PARSODUS_LRTABLES_LR1ITEM_H_OEUCDMZL +#define PARSODUS_LRTABLES_LR1ITEM_H_OEUCDMZL + +#include "Parsodus/lrtables/LR0Item.h" + +namespace pds { +namespace lr { + +struct LR1Item { + std::shared_ptr rule; + std::size_t dotIdx; + std::set lookaheads; + + bool operator<(const LR1Item& rhs) const; ///< Useful to keep it in a set/map + bool operator==(const LR1Item& rhs) const; +}; + +} /* lr */ +} /* pds */ + +#endif /* PARSODUS_LRTABLES_LR1ITEM_H_OEUCDMZL */ diff --git a/include/Parsodus/lrtables/LR1Itemset.h b/include/Parsodus/lrtables/LR1Itemset.h new file mode 100644 index 0000000..70d78f9 --- /dev/null +++ b/include/Parsodus/lrtables/LR1Itemset.h @@ -0,0 +1,24 @@ +#pragma once +#ifndef PARSODUS_LRTABLES_LR1ITEMSET_H_AFGBM4VN +#define PARSODUS_LRTABLES_LR1ITEMSET_H_AFGBM4VN + +#include "Parsodus/lrtables/LR1ItemsetBase.h" + +namespace pds { +namespace lr { + +class LR1Itemset : public LR1ItemsetBase { +public: + LR1Itemset(); + LR1Itemset(std::shared_ptr start); + + bool canMerge(const LR1Itemset& rhs) const; + void merge(const LR1Itemset& rhs); + +private: +}; + +} /* lr */ +} /* pds */ + +#endif /* PARSODUS_LRTABLES_LR1ITEMSET_H_AFGBM4VN */ diff --git a/include/Parsodus/lrtables/LR1ItemsetBase.h b/include/Parsodus/lrtables/LR1ItemsetBase.h new file mode 100644 index 0000000..738770d --- /dev/null +++ b/include/Parsodus/lrtables/LR1ItemsetBase.h @@ -0,0 +1,142 @@ +#pragma once +#ifndef PARSODUS_LRTABLES_LR1ITEMSETBASE_H_EREKWQSM +#define PARSODUS_LRTABLES_LR1ITEMSETBASE_H_EREKWQSM + +#include "Parsodus/lrtables/LR1Item.h" +#include "Parsodus/util/symbols.h" + +#include + +namespace pds { +namespace lr { + +template +class LR1ItemsetBase { +public: + LR1ItemsetBase(); + LR1ItemsetBase(std::shared_ptr start); + + static bool needsFollow(); + + void close(const Grammar& g); + Itemset succ(std::string sym) const; + bool operator==(const Itemset& rhs) const; + bool empty() const; + std::set getReduces(const Grammar& g, std::string lookahead) const; + +private: + std::set m_items; +}; + +template +LR1ItemsetBase::LR1ItemsetBase() +{} + +template +LR1ItemsetBase::LR1ItemsetBase(std::shared_ptr start) { + m_items.emplace(LR1Item{start, 0, {util::EOF_PLACEHOLDER}}); +} + +template +bool LR1ItemsetBase::needsFollow() { + return true; +} + +template +void LR1ItemsetBase::close(const Grammar& g) { + bool changes = true; + std::vector todo; + std::set> added; // (variable, lookahead) + + while (changes) { + changes = false; + std::set toAdd; + + for (const LR1Item& i : m_items) { + if (i.dotIdx < i.rule->tail.size()) { + std::string& sym = i.rule->tail[i.dotIdx]; + if (g.variables.count(sym)) { + std::vector seq(std::vector(i.rule->tail.begin() + i.dotIdx + 1, i.rule->tail.end())); + std::set first = (*g.first)(seq); + if (first.count("") || !first.size()) { + first.insert(i.lookaheads.begin(), i.lookaheads.end()); + first.erase(""); + } + + for (std::string newLookahead : first) { + if (!added.count({sym, newLookahead})) { + added.emplace(sym, newLookahead); + changes = true; + for (const auto& rule : g.rules) { + if (rule->head == sym) { + toAdd.insert(LR1Item{rule, 0, {newLookahead}}); + } + } + } + } + } + } + } + + std::vector newItems; + for (auto& it : m_items) { + newItems.emplace_back(std::move(it)); + } + for (const auto& newItem : toAdd) { + bool found = false; + for (auto& oldItem : newItems) { + if (newItem.dotIdx == oldItem.dotIdx && newItem.rule == oldItem.rule) { + found = true; + oldItem.lookaheads.insert(newItem.lookaheads.begin(), newItem.lookaheads.end()); + break; + } + } + if (!found) { + newItems.push_back(newItem); + } + } + m_items.clear(); + for (auto& it : newItems) { + m_items.emplace(std::move(it)); + } + } +} + +template +Itemset LR1ItemsetBase::succ(std::string sym) const { + Itemset sc; + for (auto& item : m_items) { + if (item.dotIdx < item.rule->tail.size()) { + if (item.rule->tail[item.dotIdx] == sym) { + sc.m_items.insert(LR1Item{item.rule, item.dotIdx + 1, item.lookaheads}); + } + } + } + return sc; +} + +template +bool LR1ItemsetBase::operator==(const Itemset& rhs) const { + return m_items == rhs.m_items; +} + +template +bool LR1ItemsetBase::empty() const { + return m_items.empty(); +} + +template +std::set LR1ItemsetBase::getReduces(const Grammar& g, std::string lookahead) const { + std::set result; + for (const auto& item : m_items) { + if (item.dotIdx >= item.rule->tail.size() && item.lookaheads.count(lookahead)) { + result.insert(std::find(g.rules.begin(), g.rules.end(), item.rule) - g.rules.begin()); + } + } + return result; +} + +} /* lr */ +} /* pds */ + +#endif /* PARSODUS_LRTABLES_LR1ITEMSETBASE_H_EREKWQSM */ diff --git a/include/Parsodus/util/parserType.h b/include/Parsodus/util/parserType.h index 287d4fc..945188f 100644 --- a/include/Parsodus/util/parserType.h +++ b/include/Parsodus/util/parserType.h @@ -5,7 +5,7 @@ namespace pds { namespace util { - enum class ParserType {LR_0, SLR_1, LALR_1}; + enum class ParserType {LR_0, SLR_1, LR_1, LALR_1}; } } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 227e518..ef09aca 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -23,6 +23,8 @@ add_library(Parsodus-tables lrtables/LR0Item.cpp lrtables/LR0Itemset.cpp lrtables/SLR1Itemset.cpp + lrtables/LR1Item.cpp + lrtables/LR1Itemset.cpp ) # add_library(Parsodus-backends diff --git a/src/inputparser.cpp b/src/inputparser.cpp index 2db47f4..8ab75e5 100644 --- a/src/inputparser.cpp +++ b/src/inputparser.cpp @@ -36,6 +36,8 @@ namespace pds { config.parserType = util::ParserType::LALR_1; } else if(token.content == "SLR(1)") { config.parserType = util::ParserType::SLR_1; + } else if(token.content == "LR(1)") { + config.parserType = util::ParserType::LR_1; } else throw InputParserException("Unkown parser type"); } else { diff --git a/src/lrtables/LR1Item.cpp b/src/lrtables/LR1Item.cpp new file mode 100644 index 0000000..b203043 --- /dev/null +++ b/src/lrtables/LR1Item.cpp @@ -0,0 +1,21 @@ +#include "Parsodus/lrtables/LR1Item.h" + +namespace pds { +namespace lr { + +bool LR1Item::operator<(const LR1Item& rhs) const { + if (dotIdx != rhs.dotIdx) { + return dotIdx < rhs.dotIdx; + } else if (rule != rhs.rule) { + return rule < rhs.rule; + } else { + return lookaheads < rhs.lookaheads; + } +} + +bool LR1Item::operator==(const LR1Item& rhs) const { + return dotIdx == rhs.dotIdx && rule == rhs.rule && lookaheads == rhs.lookaheads; +} + +} /* lr */ +} /* pds */ diff --git a/src/lrtables/LR1Itemset.cpp b/src/lrtables/LR1Itemset.cpp new file mode 100644 index 0000000..425f0d8 --- /dev/null +++ b/src/lrtables/LR1Itemset.cpp @@ -0,0 +1,21 @@ +#include "Parsodus/lrtables/LR1Itemset.h" + +namespace pds { +namespace lr { + +LR1Itemset::LR1Itemset() : LR1ItemsetBase() +{} + +LR1Itemset::LR1Itemset(std::shared_ptr start) : LR1ItemsetBase(start) +{} + +bool LR1Itemset::canMerge(const LR1Itemset&) const { + return false; +} + +void LR1Itemset::merge(const LR1Itemset&) { + //NO-OP +} + +} /* lr */ +} /* pds */