SLR(1) parse table generation

This commit is contained in:
Robin Jadoul 2016-12-18 17:20:59 +01:00
parent 91fd08ba3e
commit f765196417
14 changed files with 464 additions and 77 deletions

View File

@ -1,6 +1,9 @@
#pragma once
#ifndef PARSODUS_GRAMMAR_H
#include "Parsodus/util/firstset.h"
#include "Parsodus/util/followset.h"
#include <map>
#include <memory>
#include <set>
@ -37,6 +40,21 @@ namespace pds {
std::set<std::string> variables; ///< the variables
std::set<std::string> terminals; ///< the terminals
std::vector<std::shared_ptr<Rule>> rules; ///< the replacement rules
std::unique_ptr<util::FirstSet> first;
std::unique_ptr<util::FollowSet> follow;
Grammar() : start(""), variables(), terminals(), rules(), first(nullptr), follow(nullptr)
{}
Grammar(const Grammar& rhs)
: start(rhs.start), variables(rhs.variables), terminals(rhs.terminals)
, rules(rhs.rules), first(nullptr), follow(nullptr) {
if (rhs.first)
first = std::make_unique<util::FirstSet>(*rhs.first);
if (rhs.follow)
follow = std::make_unique<util::FollowSet>(*rhs.follow);
}
};
}

View File

@ -2,35 +2,26 @@
#ifndef LRTABLES_LR0ITEMSET_H_HTSWOGFB
#define LRTABLES_LR0ITEMSET_H_HTSWOGFB
#include "Parsodus/lrtables/generator.h"
#include "Parsodus/lrtables/LR0Item.h"
#include "Parsodus/lrtables/LR0ItemsetBase.h"
#include <memory>
#include <algorithm>
namespace pds {
namespace lr {
/**
* An LR(0) itemset, @see Generator for details on the public methods
*/
class LR0Itemset {
class LR0Itemset : public LR0ItemsetBase<LR0Itemset> {
public:
LR0Itemset();
LR0Itemset(std::shared_ptr<Rule> start);
void close(const Grammar& g);
LR0Itemset succ(std::string sym) const;
bool operator==(const LR0Itemset& rhs) const;
bool canMerge(const LR0Itemset& rhs) const;
void merge(const LR0Itemset& rhs);
bool empty() const;
static bool needsFollow();
std::set<std::size_t> getReduces(const Grammar& g, std::string lookahead) const;
void print() const; ///TODO remove me
private:
std::set<LR0Item> m_items;
};
} /* lr */

View File

@ -0,0 +1,105 @@
#pragma once
#ifndef LRTABLES_LR0ITEMSETBASE_H_GGIPISTD
#define LRTABLES_LR0ITEMSETBASE_H_GGIPISTD
#include "Parsodus/lrtables/LR0Item.h"
namespace pds {
namespace lr {
/**
* Provide common methods for Itemsets based upon LR(0) items
* (SLR(1) and LR(0))
*/
template <typename Itemset>
class LR0ItemsetBase {
public:
LR0ItemsetBase();
LR0ItemsetBase(std::shared_ptr<Rule> start);
void close(const Grammar& g);
Itemset succ(std::string sym) const;
bool operator==(const Itemset& rhs) const;
bool canMerge(const Itemset& rhs) const;
void merge(const Itemset& rhs);
bool empty() const;
protected:
std::set<LR0Item> m_items;
};
template <typename Itemset>
LR0ItemsetBase<Itemset>::LR0ItemsetBase()
{}
template <typename Itemset>
LR0ItemsetBase<Itemset>::LR0ItemsetBase(std::shared_ptr<Rule> start) {
m_items.emplace(LR0Item{start, 0});
}
template <typename Itemset>
void LR0ItemsetBase<Itemset>::close(const Grammar& g) {
bool changes = true;
std::vector<LR0Item> todo;
std::set<std::string> added;
while (changes) {
changes = false;
std::set<LR0Item> toAdd;
for (const LR0Item& i : m_items) {
if (i.dotIdx < i.rule->tail.size()) {
std::string& sym = i.rule->tail[i.dotIdx];
if (g.variables.count(sym) && !added.count(sym)) {
added.insert(sym);
changes = true;
for (const auto& rule : g.rules) {
if (rule->head == sym) {
toAdd.insert(LR0Item{rule, 0});
}
}
}
}
}
m_items.insert(toAdd.begin(), toAdd.end());
}
}
template <typename Itemset>
Itemset LR0ItemsetBase<Itemset>::succ(std::string sym) const {
Itemset sc;
for (auto& item : m_items) {
if (item.dotIdx < item.rule->tail.size()) {
if (item.rule->tail[item.dotIdx] == sym) {
sc.m_items.insert(LR0Item{item.rule, item.dotIdx + 1});
}
}
}
return sc;
}
template <typename Itemset>
bool LR0ItemsetBase<Itemset>::operator==(const Itemset& rhs) const {
return m_items == rhs.m_items;
}
template <typename Itemset>
bool LR0ItemsetBase<Itemset>::canMerge(const Itemset&) const {
return false;
}
template <typename Itemset>
void LR0ItemsetBase<Itemset>::merge(const Itemset&) {
//NO-OP
}
template <typename Itemset>
bool LR0ItemsetBase<Itemset>::empty() const {
return m_items.empty();
}
} /* lr */
} /* pds */
#endif /* LRTABLES_LR0ITEMSETBASE_H_GGIPISTD */

View File

@ -0,0 +1,24 @@
#pragma once
#ifndef LRTABLES_SLR1ITEMSET_H_PN9QZCDB
#define LRTABLES_SLR1ITEMSET_H_PN9QZCDB
#include "Parsodus/lrtables/LR0Item.h"
#include "Parsodus/lrtables/LR0Itemset.h"
namespace pds {
namespace lr {
class SLR1Itemset : public LR0ItemsetBase<SLR1Itemset> {
public:
SLR1Itemset();
SLR1Itemset(std::shared_ptr<Rule> start);
static bool needsFollow();
std::set<std::size_t> getReduces(const Grammar& g, std::string lookahead) const;
};
} /* lr */
} /* pds */
#endif /* LRTABLES_SLR1ITEMSET_H_PN9QZCDB */

View File

@ -3,6 +3,7 @@
#define PARSODUS_LRTABLES_GENERATOR_H_YW3GIUNH
#include "Parsodus/grammar.h"
#include "Parsodus/util/symbols.h"
#include "Parsodus/lrtables/table.h"
#include <algorithm>
@ -13,9 +14,6 @@
namespace pds {
namespace lr {
const std::string EXTENDED_START = "^";
const std::string EOF_PLACEHOLDER = "$";
/**
* Base class for LR (and derivative) table generators (such as SLR and LALR)
* Parametrized on the type of itemset (configuration set) to be used
@ -29,6 +27,7 @@ const std::string EOF_PLACEHOLDER = "$";
* - void merge(const Itemset&); Merge the given Itemset into this one
* - bool empty() const; is this Itemset empty (== not useful)
* - std::set<std::size_t> getReduces(const Grammar&, std::string) const; get all Rule indices where a reduce should happen with given lookahead (not necessarily a set, but iterable)
* - static bool needsFollow() const; does this type of Itemset need Follow sets to work, if so the first and follow unique_ptr's of the grammar passed will be initialized
*/
template <typename Itemset>
class Generator {
@ -54,10 +53,14 @@ class Generator {
};
template <typename Itemset>
Generator<Itemset>::Generator(const Grammar& g) : m_gram(g), m_startrule(std::make_shared<Rule>(EXTENDED_START, std::vector<std::string>{g.start})) {
m_gram.terminals.insert(EOF_PLACEHOLDER); //End of file
m_gram.variables.insert(EXTENDED_START); //modified start rule
Generator<Itemset>::Generator(const Grammar& g) : m_gram(g), m_startrule(std::make_shared<Rule>(util::EXTENDED_START, std::vector<std::string>{g.start})) {
m_gram.terminals.insert(util::EOF_PLACEHOLDER); //End of file
m_gram.variables.insert(util::EXTENDED_START); //modified start rule
m_gram.rules.push_back(m_startrule);
if (Itemset::needsFollow()) {
m_gram.first = std::make_unique<util::FirstSet>(m_gram);
m_gram.follow = std::make_unique<util::FollowSet>(m_gram, *m_gram.first);
}
}
template <typename Itemset>
@ -123,7 +126,7 @@ LRTable Generator<Itemset>::generate() {
for (std::size_t rule_applied : cur.getReduces(m_gram, term)) {
if (rule_applied == m_gram.rules.size() - 1) { // The last added rule
// The extended start rule
if (term == EOF_PLACEHOLDER)
if (term == util::EOF_PLACEHOLDER)
table.act[curIdx][term] = std::make_pair(Action::ACCEPT, 0);
} else if (table.act[curIdx].count(term)) {
if (table.act[curIdx][term].first == Action::SHIFT) {
@ -147,7 +150,6 @@ LRTable Generator<Itemset>::generate() {
return table;
}
} /* lr */
} /* pdf */

View File

@ -0,0 +1,38 @@
#pragma once
#ifndef PARSODUS_FIRSTSET_H_Q6U5VBG0
#define PARSODUS_FIRSTSET_H_Q6U5VBG0
#include <map>
#include <set>
#include <string>
#include <vector>
namespace pds {
struct Grammar;
namespace util {
class FirstSet {
public:
FirstSet(const Grammar& g);
/**
* Get the first set for the given symbol
*/
std::set<std::string> operator()(std::string key) const;
/**
* Get the first set for the given sequence of symbols
*/
std::set<std::string> operator()(std::vector<std::string> sequence) const;
private:
std::map<std::string, std::set<std::string>> m_first;
};
} /* util */
} /* pds */
#endif /* PARSODUS_FIRSTSET_H_Q6U5VBG0 */

View File

@ -0,0 +1,31 @@
#pragma once
#ifndef UTIL_FOLLOWSET_H_WIGGZMRF
#define UTIL_FOLLOWSET_H_WIGGZMRF
#include "Parsodus/util/firstset.h"
#include <map>
#include <set>
#include <string>
#include <vector>
namespace pds {
namespace util {
class FollowSet {
public:
FollowSet(const Grammar& g, const FirstSet& first);
/**
* Get the follow set for the given variable
*/
std::set<std::string> operator()(std::string key) const;
private:
std::map<std::string, std::set<std::string>> m_follow;
};
} /* util */
} /* pds */
#endif /* UTIL_FOLLOWSET_H_WIGGZMRF */

View File

@ -0,0 +1,16 @@
#pragma once
#ifndef UTIL_SYMBOLS_H_2IFHDWBY
#define UTIL_SYMBOLS_H_2IFHDWBY
#include <string>
namespace pds {
namespace util {
const std::string EXTENDED_START = "^";
const std::string EOF_PLACEHOLDER = "$";
} /* util */
} /* pds */
#endif /* UTIL_SYMBOLS_H_2IFHDWBY */

View File

@ -14,9 +14,15 @@ else()
endif()
include_directories("${CMAKE_CURRENT_BINARY_DIR}")
add_library(Parsodus-util
util/firstset.cpp
util/followset.cpp
)
add_library(Parsodus-tables
lrtables/LR0Item.cpp
lrtables/LR0Itemset.cpp
lrtables/SLR1Itemset.cpp
)
add_library(Parsodus-backends
@ -34,6 +40,7 @@ add_executable(Parsodus
main.cpp
)
target_link_libraries(Parsodus
Parsodus-util
Parsodus-tables
Parsodus-backends
pds

View File

@ -3,67 +3,21 @@
namespace pds {
namespace lr {
LR0Itemset::LR0Itemset() {
}
LR0Itemset::LR0Itemset(std::shared_ptr<Rule> start) {
m_items.emplace(LR0Item{start, 0});
}
void LR0Itemset::close(const Grammar& g) {
bool changes = true;
std::vector<LR0Item> todo;
std::set<std::string> added;
while (changes) {
changes = false;
std::set<LR0Item> toAdd;
for (const LR0Item& i : m_items) {
if (i.dotIdx < i.rule->tail.size()) {
std::string& sym = i.rule->tail[i.dotIdx];
if (g.variables.count(sym) && !added.count(sym)) {
added.insert(sym);
changes = true;
for (const auto& rule : g.rules) {
if (rule->head == sym) {
toAdd.insert(LR0Item{rule, 0});
}
}
}
}
}
m_items.insert(toAdd.begin(), toAdd.end());
}
}
LR0Itemset LR0Itemset::succ(std::string sym) const {
LR0Itemset sc;
for (auto& item : m_items) {
if (item.dotIdx < item.rule->tail.size()) {
if (item.rule->tail[item.dotIdx] == sym) {
sc.m_items.insert(LR0Item{item.rule, item.dotIdx + 1});
}
}
}
return sc;
}
bool LR0Itemset::operator==(const LR0Itemset& rhs) const {
return m_items == rhs.m_items;
}
LR0Itemset::LR0Itemset() : LR0ItemsetBase<LR0Itemset>()
{}
bool LR0Itemset::canMerge(const LR0Itemset&) const {
LR0Itemset::LR0Itemset(std::shared_ptr<Rule> start) : LR0ItemsetBase<LR0Itemset>(start)
{}
bool LR0Itemset::needsFollow() {
return false;
}
void LR0Itemset::merge(const LR0Itemset&) {
//NO-OP
}
bool LR0Itemset::empty() const {
return m_items.empty();
}
std::set<std::size_t> LR0Itemset::getReduces(const Grammar& g, std::string) const {
std::set<std::size_t> res;

View File

@ -0,0 +1,29 @@
#include "Parsodus/lrtables/SLR1Itemset.h"
#include <algorithm>
namespace pds {
namespace lr {
SLR1Itemset::SLR1Itemset() : LR0ItemsetBase<SLR1Itemset>()
{}
SLR1Itemset::SLR1Itemset(std::shared_ptr<Rule> start) : LR0ItemsetBase<SLR1Itemset>(start)
{}
bool SLR1Itemset::needsFollow() {
return true;
}
std::set<std::size_t> SLR1Itemset::getReduces(const Grammar& g, std::string lookahead) const {
std::set<std::size_t> res;
for (auto& item : m_items) {
if (item.dotIdx >= item.rule->tail.size() && (*g.follow)(item.rule->head).count(lookahead)) {
res.insert(std::find(g.rules.begin(), g.rules.end(), item.rule) - g.rules.begin());
}
}
return res;
}
} /* lr */
} /* pds */

View File

@ -4,7 +4,7 @@
#include "Parsodus/inputparser.h"
#include "Parsodus/lrtables/generator.h"
#include "Parsodus/lrtables/LR0Itemset.h"
#include "Parsodus/lrtables/SLR1Itemset.h"
int main(int argc, char** argv) {
@ -48,7 +48,7 @@ int main(int argc, char** argv) {
}
std::vector<std::string> names = {"ERROR", "SHIFT", "REDUCE", "ACCEPT"};
pds::lr::Generator<pds::lr::LR0Itemset> g(config.grammar);
pds::lr::Generator<pds::lr::SLR1Itemset> g(config.grammar);
auto tbl = g.generate();
for (std::size_t i = 0; i < tbl.act.size(); i++) {
std::cout << "State " << i << std::endl;

118
src/util/firstset.cpp Normal file
View File

@ -0,0 +1,118 @@
#include "Parsodus/util/firstset.h"
#include "Parsodus/grammar.h"
namespace pds {
namespace util {
FirstSet::FirstSet(const Grammar& g)
{
for (std::string term : g.terminals) {
m_first[term].insert(term);
}
bool changes = true;
auto update = [&changes, this](std::string head, const std::set<std::string>& s) {
for (std::string elem : s) {
if (!m_first[head].count(elem)) {
changes = true;
m_first[head].insert(s.begin(), s.end());
return;
}
}
};
while (changes) {
changes = false;
for (const auto& rule : g.rules) {
if (rule->tail.size() == 0) {
update(rule->head, {""});
}
bool ended = false;
const std::string& head = rule->head;
for (const std::string& replacement : rule->tail) {
if (m_first[replacement].count("")) {
std::set<std::string> tmp = m_first[replacement];
tmp.erase("");
update(head, tmp);
} else {
update(head, m_first[replacement]);
ended = true;
break;
}
}
if (!ended) {
update(head, {""});
}
}
}
}
std::set<std::string> FirstSet::operator()(std::string key) const {
auto tmp = m_first.find(key);
if (tmp == m_first.end())
return {};
return tmp->second;
}
std::set<std::string> FirstSet::operator()(std::vector<std::string> sequence) const {
std::set<std::string> result;
bool hasEmpty = false;
for (std::string key : sequence) {
std::set<std::string> tmp = (*this)(key);
hasEmpty = tmp.count("");
tmp.erase("");
result.insert(tmp.begin(), tmp.end());
if (!hasEmpty)
break;
}
if (hasEmpty)
result.insert("");
return result;
}
/*
* template <typename Item>
* void Generator<Item>::buildFollow() {
* //EOF follow the added start rule.
* m_follow[EXTENDED_START].insert(EOF_PLACEHOLDER);
*
* bool changes = true;
*
* auto update = [&changes, this](std::string head, auto s) {
* s.erase("");
* for (std::string elem : s) {
* if (!m_follow[head].count(elem)) {
* changes = true;
* m_follow[head].insert(s.begin(), s.end());
* return;
* }
* }
* };
*
* while (changes) {
* changes = false;
* for (const auto& p : m_gram.rules) {
* std::string& head = p.first;
* for (const auto& rule : p.second) {
* for (std::size_t i = 0; i < rule.tail.size(); i++) {
* if (!m_gram.terminals.count(rule.tail[i])) {
* if (i == rule.tail.size() - 1 || m_first[rule.tail[i + 1]].count("")) {
* update(rule.tail[i], m_follow[head]);
* }
* if (i < rule.tail.size() - 1) {
* update(rule.tail[i], m_first[rule.tail[i + 1]]);
* }
* }
* }
* }
* }
* }
* }
*/
} /* util */
} /* pds */

54
src/util/followset.cpp Normal file
View File

@ -0,0 +1,54 @@
#include "Parsodus/util/followset.h"
#include "Parsodus/util/symbols.h"
#include "Parsodus/grammar.h"
namespace pds {
namespace util {
FollowSet::FollowSet(const Grammar& g, const FirstSet& first)
{
m_follow[EXTENDED_START].insert(EOF_PLACEHOLDER);
bool changes = true;
auto update = [&changes, this](std::string head, auto s) {
s.erase("");
for (std::string elem : s) {
if (!m_follow[head].count(elem)) {
changes = true;
m_follow[head].insert(s.begin(), s.end());
return;
}
}
};
while (changes) {
changes = false;
for (const auto& rule : g.rules) {
const std::string& head = rule->head;
auto it = rule->tail.begin(); //< Keep track of the start of the rest of the tail
it++; //< The 'rest' of the tail
for (std::size_t i = 0; i < rule->tail.size(); i++, it++) {
if (g.variables.count(rule->tail[i])) {
std::set<std::string> restFirst = first(std::vector<std::string>(it, rule->tail.end()));
if (i == rule->tail.size() - 1 || restFirst.count("")) {
update(rule->tail[i], m_follow[head]);
}
if (i < rule->tail.size() - 1) {
update(rule->tail[i], restFirst);
}
}
}
}
}
}
std::set<std::string> FollowSet::operator()(std::string key) const {
auto tmp = m_follow.find(key);
if (tmp == m_follow.end())
return {};
return tmp->second;
}
} /* util */
} /* pds */