LR(0) table generator
This commit is contained in:
parent
6df924a282
commit
91fd08ba3e
|
@ -2,6 +2,7 @@
|
|||
#ifndef PARSODUS_GRAMMAR_H
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
@ -22,6 +23,9 @@ namespace pds {
|
|||
return tail < other.tail;
|
||||
}
|
||||
}
|
||||
|
||||
Rule() : head(""), tail() {}
|
||||
Rule(const std::string& h, const std::vector<std::string>& t) : head(h), tail(t) {}
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -32,7 +36,7 @@ namespace pds {
|
|||
std::string start; ///< the starting variable
|
||||
std::set<std::string> variables; ///< the variables
|
||||
std::set<std::string> terminals; ///< the terminals
|
||||
std::vector<Rule> rules; ///< the replacement rules
|
||||
std::vector<std::shared_ptr<Rule>> rules; ///< the replacement rules
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
#pragma once
|
||||
#ifndef LRTABLES_LR0ITEM_H_3RNST1YA
|
||||
#define LRTABLES_LR0ITEM_H_3RNST1YA
|
||||
|
||||
#include "Parsodus/grammar.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace pds {
|
||||
namespace lr {
|
||||
|
||||
struct LR0Item {
|
||||
std::shared_ptr<Rule> rule;
|
||||
std::size_t dotIdx;
|
||||
|
||||
bool operator<(const LR0Item& rhs) const; ///< Useful to keep it in a set/map
|
||||
bool operator==(const LR0Item& rhs) const;
|
||||
};
|
||||
|
||||
} /* lr */
|
||||
} /* pds */
|
||||
|
||||
#endif /* LRTABLES_LR0ITEM_H_3RNST1YA */
|
|
@ -0,0 +1,39 @@
|
|||
#pragma once
|
||||
#ifndef LRTABLES_LR0ITEMSET_H_HTSWOGFB
|
||||
#define LRTABLES_LR0ITEMSET_H_HTSWOGFB
|
||||
|
||||
#include "Parsodus/lrtables/generator.h"
|
||||
#include "Parsodus/lrtables/LR0Item.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace pds {
|
||||
namespace lr {
|
||||
|
||||
/**
|
||||
* An LR(0) itemset, @see Generator for details on the public methods
|
||||
*/
|
||||
class LR0Itemset {
|
||||
public:
|
||||
LR0Itemset();
|
||||
LR0Itemset(std::shared_ptr<Rule> start);
|
||||
|
||||
void close(const Grammar& g);
|
||||
LR0Itemset succ(std::string sym) const;
|
||||
bool operator==(const LR0Itemset& rhs) const;
|
||||
bool canMerge(const LR0Itemset& rhs) const;
|
||||
void merge(const LR0Itemset& rhs);
|
||||
bool empty() const;
|
||||
std::set<std::size_t> getReduces(const Grammar& g, std::string lookahead) const;
|
||||
|
||||
void print() const; ///TODO remove me
|
||||
|
||||
private:
|
||||
std::set<LR0Item> m_items;
|
||||
|
||||
};
|
||||
|
||||
} /* lr */
|
||||
} /* pds */
|
||||
|
||||
#endif /* LRTABLES_LR0ITEMSET_H_HTSWOGFB */
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <memory>
|
||||
#include <queue>
|
||||
|
||||
namespace pds {
|
||||
|
@ -22,13 +23,12 @@ const std::string EOF_PLACEHOLDER = "$";
|
|||
* An Itemset should support:
|
||||
* - A constructor taking a single Rule, that makes this a starting rule
|
||||
* - void close(const Grammar&); compute the closure
|
||||
* - Itemset succ(const Grammar&, std::string) const; compute the successor of this set, over the given symbol
|
||||
* - Itemset succ(std::string) const; compute the successor of this set, over the given symbol
|
||||
* - bool operator==(const Itemset&); are these two Itemsets equal
|
||||
* - bool canMerge(const Itemset&) const; Can the given Itemset be merged into this one
|
||||
* - void merge(const Itemset&); Merge the given Itemset into this one
|
||||
* - Action action(const std::string&) const; Determine the action to be executed on given lookahead
|
||||
* - bool empty() const; is this Itemset empty (== not useful)
|
||||
* - std::set<std::size_t> getReduces(std::string) const; get all Rule indices where a reduce should happen with given lookahead (not necessarily a set, but iterable)
|
||||
* - std::set<std::size_t> getReduces(const Grammar&, std::string) const; get all Rule indices where a reduce should happen with given lookahead (not necessarily a set, but iterable)
|
||||
*/
|
||||
template <typename Itemset>
|
||||
class Generator {
|
||||
|
@ -39,7 +39,7 @@ class Generator {
|
|||
* @param start The start symbol for the grammar
|
||||
* @param g The grammar to translate
|
||||
*/
|
||||
Generator(const std::string& start, const Grammar& g);
|
||||
Generator(const Grammar& g);
|
||||
|
||||
/**
|
||||
* Generate an LRTable based on given grammar
|
||||
|
@ -50,19 +50,18 @@ class Generator {
|
|||
|
||||
private:
|
||||
Grammar m_gram;
|
||||
Rule m_startrule;
|
||||
std::shared_ptr<Rule> m_startrule;
|
||||
};
|
||||
|
||||
template <typename Itemset>
|
||||
Generator<Itemset>::Generator(const std::string& start, const Grammar& g) : m_gram(g), m_startrule(Rule{EXTENDED_START, {start}}) {
|
||||
Generator<Itemset>::Generator(const Grammar& g) : m_gram(g), m_startrule(std::make_shared<Rule>(EXTENDED_START, std::vector<std::string>{g.start})) {
|
||||
m_gram.terminals.insert(EOF_PLACEHOLDER); //End of file
|
||||
m_gram.variables.insert(EXTENDED_START); //modified start rule
|
||||
m_gram.rules[EXTENDED_START].push_back(m_startrule);
|
||||
m_gram.rules.push_back(m_startrule);
|
||||
}
|
||||
|
||||
template <typename Itemset>
|
||||
LRTable Generator<Itemset>::generate() {
|
||||
//TODO: generate the table
|
||||
LRTable table;
|
||||
|
||||
//Start with size 1
|
||||
|
@ -73,10 +72,13 @@ LRTable Generator<Itemset>::generate() {
|
|||
itemsets.emplace_back(Itemset(m_startrule));
|
||||
itemsets[0].close(m_gram);
|
||||
|
||||
std::set<std::string> symbols = std::set_union(m_gram.terminals.begin(), m_gram.terminals.end(),
|
||||
m_gram.variables.begin(), m_gram.variables.end());
|
||||
std::set<std::string> symbols;
|
||||
std::set_union(m_gram.terminals.begin(), m_gram.terminals.end(),
|
||||
m_gram.variables.begin(), m_gram.variables.end(),
|
||||
std::inserter(symbols, symbols.end()));
|
||||
|
||||
std::queue<std::pair<std::size_t, Itemset>> q;
|
||||
q.emplace(0, itemsets[0]);
|
||||
while (!q.empty()) {
|
||||
auto& curP = q.front();
|
||||
std::size_t curIdx = curP.first;
|
||||
|
@ -84,7 +86,7 @@ LRTable Generator<Itemset>::generate() {
|
|||
q.pop();
|
||||
|
||||
for (const std::string& sym : symbols) {
|
||||
Itemset s = cur.succ(m_gram, sym);
|
||||
Itemset s = cur.succ(sym);
|
||||
if (s.empty())
|
||||
continue;
|
||||
s.close(m_gram);
|
||||
|
@ -100,8 +102,8 @@ LRTable Generator<Itemset>::generate() {
|
|||
}
|
||||
}
|
||||
if (idx == itemsets.size()) {
|
||||
q.push(s);
|
||||
itemsets.emplace_back(idx, std::move(s));
|
||||
q.emplace(idx, s);
|
||||
itemsets.emplace_back(std::move(s));
|
||||
|
||||
//Grow the table
|
||||
table.act.emplace_back();
|
||||
|
@ -112,24 +114,25 @@ LRTable Generator<Itemset>::generate() {
|
|||
table.goto_[curIdx][sym] = idx;
|
||||
} else {
|
||||
table.act[curIdx][sym] = std::make_pair(Action::SHIFT, idx);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for (std::string term : m_gram.terminals) {
|
||||
//Get reduces from the itemset, add them to the table, look for conflicts
|
||||
for (std::size_t rule_applied : cur.getReduces(term)) {
|
||||
for (std::size_t rule_applied : cur.getReduces(m_gram, term)) {
|
||||
if (rule_applied == m_gram.rules.size() - 1) { // The last added rule
|
||||
// The extended start rule
|
||||
assert(term == EOF_PLACEHOLDER);
|
||||
table.act[curIdx][term] = std::make_pair(Action::ACCEPT, 0);
|
||||
if (term == EOF_PLACEHOLDER)
|
||||
table.act[curIdx][term] = std::make_pair(Action::ACCEPT, 0);
|
||||
} else if (table.act[curIdx].count(term)) {
|
||||
if (table.act[curIdx][term].first == Action::SHIFT) {
|
||||
//Shift-Reduce conflict, rapport and resolve it (TODO)
|
||||
throw "shift-reduce";
|
||||
} else if (table.act[curIdx][term].first == Action::REDUCE
|
||||
&& table.act[curIdx][term].second != rule_applied) {
|
||||
//Reduce-Reduce conflict, rapport it (TODO)
|
||||
throw 1;
|
||||
} else {
|
||||
//Reduce using the same rule, no problem, NO-OP
|
||||
}
|
||||
|
|
|
@ -14,8 +14,10 @@ else()
|
|||
endif()
|
||||
include_directories("${CMAKE_CURRENT_BINARY_DIR}")
|
||||
|
||||
# add_library(Parsodus-tables
|
||||
# )
|
||||
add_library(Parsodus-tables
|
||||
lrtables/LR0Item.cpp
|
||||
lrtables/LR0Itemset.cpp
|
||||
)
|
||||
|
||||
add_library(Parsodus-backends
|
||||
backends/cpp.cpp
|
||||
|
@ -32,7 +34,7 @@ add_executable(Parsodus
|
|||
main.cpp
|
||||
)
|
||||
target_link_libraries(Parsodus
|
||||
#Parsodus-tables
|
||||
Parsodus-tables
|
||||
Parsodus-backends
|
||||
pds
|
||||
mstch::mstch)
|
||||
|
|
|
@ -76,24 +76,24 @@ namespace pds {
|
|||
token = lex.nextToken();
|
||||
if(token.type != ParsodusLexer::ARROW)
|
||||
throw InputParserException("No arrow found after '"+ current_head+"', but found '" + token.content + "' instead");
|
||||
Rule rule;
|
||||
rule.head = current_head;
|
||||
std::shared_ptr<Rule> rule = std::make_shared<Rule>();
|
||||
rule->head = current_head;
|
||||
bool parsing_head = true;
|
||||
while(parsing_head) {
|
||||
token = lex.nextToken();
|
||||
switch(token.type) {
|
||||
case ParsodusLexer::VARIABLE:
|
||||
rule.tail.push_back(token.content);
|
||||
rule->tail.push_back(token.content);
|
||||
break;
|
||||
case ParsodusLexer::TERMINAL:
|
||||
rule.tail.push_back(token.content);
|
||||
rule->tail.push_back(token.content);
|
||||
break;
|
||||
case ParsodusLexer::SEMICOLON:
|
||||
parsing_head = false;
|
||||
case ParsodusLexer::PIPE:
|
||||
rule.tail.shrink_to_fit();
|
||||
config.grammar.rules.push_back(rule);
|
||||
rule.tail.clear();
|
||||
rule->tail.shrink_to_fit();
|
||||
config.grammar.rules.push_back(std::make_shared<Rule>(*rule));
|
||||
rule->tail.clear();
|
||||
break;
|
||||
default:
|
||||
throw InputParserException("Expecting to find a variable, terminal, pipe or a semicolon, but found '" + token.content + "' instead");
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
#include "Parsodus/lrtables/LR0Item.h"
|
||||
|
||||
namespace pds {
|
||||
namespace lr {
|
||||
|
||||
bool LR0Item::operator<(const LR0Item& rhs) const {
|
||||
if (dotIdx != rhs.dotIdx) {
|
||||
return dotIdx < rhs.dotIdx;
|
||||
} else {
|
||||
return rule < rhs.rule;
|
||||
}
|
||||
}
|
||||
|
||||
bool LR0Item::operator==(const LR0Item& rhs) const {
|
||||
return dotIdx == rhs.dotIdx && rule == rhs.rule;
|
||||
}
|
||||
|
||||
} /* lr */
|
||||
} /* pds */
|
|
@ -0,0 +1,79 @@
|
|||
#include "Parsodus/lrtables/LR0Itemset.h"
|
||||
|
||||
namespace pds {
|
||||
namespace lr {
|
||||
|
||||
LR0Itemset::LR0Itemset() {
|
||||
}
|
||||
|
||||
LR0Itemset::LR0Itemset(std::shared_ptr<Rule> start) {
|
||||
m_items.emplace(LR0Item{start, 0});
|
||||
}
|
||||
|
||||
void LR0Itemset::close(const Grammar& g) {
|
||||
bool changes = true;
|
||||
std::vector<LR0Item> todo;
|
||||
std::set<std::string> added;
|
||||
|
||||
while (changes) {
|
||||
changes = false;
|
||||
std::set<LR0Item> toAdd;
|
||||
|
||||
for (const LR0Item& i : m_items) {
|
||||
if (i.dotIdx < i.rule->tail.size()) {
|
||||
std::string& sym = i.rule->tail[i.dotIdx];
|
||||
if (g.variables.count(sym) && !added.count(sym)) {
|
||||
added.insert(sym);
|
||||
changes = true;
|
||||
for (const auto& rule : g.rules) {
|
||||
if (rule->head == sym) {
|
||||
toAdd.insert(LR0Item{rule, 0});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
m_items.insert(toAdd.begin(), toAdd.end());
|
||||
}
|
||||
}
|
||||
|
||||
LR0Itemset LR0Itemset::succ(std::string sym) const {
|
||||
LR0Itemset sc;
|
||||
for (auto& item : m_items) {
|
||||
if (item.dotIdx < item.rule->tail.size()) {
|
||||
if (item.rule->tail[item.dotIdx] == sym) {
|
||||
sc.m_items.insert(LR0Item{item.rule, item.dotIdx + 1});
|
||||
}
|
||||
}
|
||||
}
|
||||
return sc;
|
||||
}
|
||||
|
||||
bool LR0Itemset::operator==(const LR0Itemset& rhs) const {
|
||||
return m_items == rhs.m_items;
|
||||
}
|
||||
|
||||
bool LR0Itemset::canMerge(const LR0Itemset&) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
void LR0Itemset::merge(const LR0Itemset&) {
|
||||
//NO-OP
|
||||
}
|
||||
|
||||
bool LR0Itemset::empty() const {
|
||||
return m_items.empty();
|
||||
}
|
||||
|
||||
std::set<std::size_t> LR0Itemset::getReduces(const Grammar& g, std::string) const {
|
||||
std::set<std::size_t> res;
|
||||
for (auto& item : m_items) {
|
||||
if (item.dotIdx >= item.rule->tail.size()) {
|
||||
res.insert(std::find(g.rules.begin(), g.rules.end(), item.rule) - g.rules.begin());
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
} /* lr */
|
||||
} /* pds */
|
21
src/main.cpp
21
src/main.cpp
|
@ -3,6 +3,9 @@
|
|||
#include "optparse.h"
|
||||
#include "Parsodus/inputparser.h"
|
||||
|
||||
#include "Parsodus/lrtables/generator.h"
|
||||
#include "Parsodus/lrtables/LR0Itemset.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
optparse::OptionParser parser = optparse::OptionParser().description("Parsodus").usage("Parsodus [-d <outputdir>] [-l <language>] [-n <lexername>] <inputfile.pds>");
|
||||
|
@ -37,11 +40,25 @@ int main(int argc, char** argv) {
|
|||
std::cout << "Variable: " << a << std::endl;
|
||||
std::cout << "Rules: " << std::endl;
|
||||
for(auto a: config.grammar.rules) {
|
||||
std::cout << "\t" << a.head << " -> ";
|
||||
for(auto c: a.tail) {
|
||||
std::cout << "\t" << a->head << " -> ";
|
||||
for(auto c: a->tail) {
|
||||
std::cout << c << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
std::vector<std::string> names = {"ERROR", "SHIFT", "REDUCE", "ACCEPT"};
|
||||
pds::lr::Generator<pds::lr::LR0Itemset> g(config.grammar);
|
||||
auto tbl = g.generate();
|
||||
for (std::size_t i = 0; i < tbl.act.size(); i++) {
|
||||
std::cout << "State " << i << std::endl;
|
||||
std::cout << " Action:" << std::endl;
|
||||
for (auto& p : tbl.act[i]) {
|
||||
std::cout << " " << p.first << ": " << names[static_cast<int>(p.second.first)] << " " << p.second.second << std::endl;
|
||||
}
|
||||
std::cout << " Goto:" << std::endl;
|
||||
for (auto& p : tbl.goto_[i]) {
|
||||
std::cout << " " << p.first << ": " << p.second << std::endl;;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue