278 lines
11 KiB
C++
278 lines
11 KiB
C++
#include "Parsodus/parser.h"
|
|
#include <deque>
|
|
|
|
namespace pds {
|
|
|
|
Parser::Parser(ParsodusLexer lex) :
|
|
parsodusParser<std::unique_ptr<Config>>(), m_lex(lex) {
|
|
}
|
|
|
|
Parser::Token Parser::lex() {
|
|
try {
|
|
|
|
ParsodusLexer::Token orig = m_lex.nextToken();
|
|
std::unique_ptr<Config> cnf;
|
|
parsodusParser_Symbol s;
|
|
switch(orig.type) {
|
|
case ParsodusLexer::PARSER:
|
|
s = parsodusParser_Symbol::T_PARSER;
|
|
break;
|
|
case ParsodusLexer::PRECEDENCE:
|
|
s = parsodusParser_Symbol::T_PRECEDENCE;
|
|
break;
|
|
case ParsodusLexer::LEXESIS:
|
|
s = parsodusParser_Symbol::T_LEXESIS;
|
|
break;
|
|
case ParsodusLexer::TERMINALS:
|
|
s = parsodusParser_Symbol::T_TERMINALS;
|
|
break;
|
|
case ParsodusLexer::START:
|
|
s = parsodusParser_Symbol::T_START;
|
|
break;
|
|
case ParsodusLexer::GRAMMAR:
|
|
s = parsodusParser_Symbol::T_GRAMMAR;
|
|
break;
|
|
case ParsodusLexer::PARSERTYPE:
|
|
cnf = std::make_unique<Config>();
|
|
cnf->parserType = orig.content;
|
|
return Token{ parsodusParser_Symbol::T_PARSERTYPE, cnf };
|
|
case ParsodusLexer::LEFT:
|
|
cnf = std::make_unique<Config>();
|
|
cnf->grammar.precedence["type"] = std::make_pair(0, PrecedenceType::LEFT);
|
|
return Token{ parsodusParser_Symbol::T_LEFT, cnf };
|
|
case ParsodusLexer::RIGHT:
|
|
cnf = std::make_unique<Config>();
|
|
cnf->grammar.precedence["type"] = std::make_pair(2, PrecedenceType::RIGHT);
|
|
return Token{ parsodusParser_Symbol::T_RIGHT, cnf };
|
|
case ParsodusLexer::NONASSOC:
|
|
cnf = std::make_unique<Config>();
|
|
cnf->grammar.precedence["type"] = std::make_pair(1, PrecedenceType::NONASSOC);
|
|
return Token{ parsodusParser_Symbol::T_NONASSOC, cnf };
|
|
case ParsodusLexer::LEXESISNAME:
|
|
s = parsodusParser_Symbol::T_LEXESISNAME;
|
|
break;
|
|
case ParsodusLexer::TERMINAL:
|
|
cnf = std::make_unique<Config>();
|
|
cnf->grammar.terminals.insert(orig.content);
|
|
return { parsodusParser_Symbol::T_TERMINAL, cnf };
|
|
case ParsodusLexer::VARIABLE:
|
|
cnf = std::make_unique<Config>();
|
|
cnf->grammar.variables.insert(orig.content);
|
|
return { parsodusParser_Symbol::T_VARIABLE, cnf };
|
|
case ParsodusLexer::ARROW:
|
|
s = parsodusParser_Symbol::T_ARROW;
|
|
break;
|
|
case ParsodusLexer::SEMICOLON:
|
|
s = parsodusParser_Symbol::T_SEMICOLON;
|
|
break;
|
|
case ParsodusLexer::COLON:
|
|
s = parsodusParser_Symbol::T_COLON;
|
|
break;
|
|
case ParsodusLexer::PIPE:
|
|
s = parsodusParser_Symbol::T_PIPE;
|
|
break;
|
|
case ParsodusLexer::RULENAME:
|
|
cnf = std::make_unique<Config>();
|
|
cnf->grammar.rules.emplace_back(std::make_shared<Rule>("", std::vector<std::string>{}, orig.content));
|
|
return { parsodusParser_Symbol::T_RULENAME, cnf };
|
|
default:
|
|
break;
|
|
|
|
}
|
|
return Token{ s, nullptr };
|
|
|
|
} catch(ParsodusLexer::NoMoreTokens) {
|
|
return Token{ parsodusParser_Symbol::T_EOF, nullptr };
|
|
}
|
|
}
|
|
|
|
std::unique_ptr<Config> Parser::reduce_0(std::deque<Token> subparts) {
|
|
// <section> <sections>
|
|
|
|
// Check whether there are no different parserType's given
|
|
if (subparts[0].value->parserType.empty())
|
|
subparts[0].value->parserType = subparts[1].value->parserType;
|
|
else if (!subparts[1].value->parserType.empty() &&
|
|
subparts[1].value->parserType != subparts[0].value->parserType)
|
|
throw "Found more than 1 different parser types"; // MODIFY EXCEPTION
|
|
|
|
// Check whether there are no different lexesisFile's given
|
|
if (subparts[0].value->lexesisFile.empty())
|
|
subparts[0].value->lexesisFile = subparts[1].value->lexesisFile;
|
|
else if (!subparts[1].value->lexesisFile.empty() &&
|
|
subparts[1].value->lexesisFile != subparts[0].value->lexesisFile)
|
|
throw "Found more than 1 different lexesis files"; // MODIFY EXCEPTION
|
|
|
|
// Check whether there are no different grammar's given
|
|
// Check whether there are no different start terminals given
|
|
if (subparts[0].value->grammar.start.empty())
|
|
subparts[0].value->grammar.start = subparts[1].value->grammar.start;
|
|
else if (!subparts[1].value->grammar.start.empty() &&
|
|
subparts[1].value->grammar.start != subparts[0].value->grammar.start)
|
|
throw "Found more than 1 different start terminals"; // MODIFY EXCEPTION
|
|
|
|
// Check whether there are no different variable sets given
|
|
if (subparts[0].value->grammar.variables.empty())
|
|
subparts[0].value->grammar.variables = subparts[1].value->grammar.variables;
|
|
else if (!subparts[1].value->grammar.variables.empty() &&
|
|
subparts[1].value->grammar.variables != subparts[0].value->grammar.variables)
|
|
throw "Found more than 1 different variable sets"; // MODIFY EXCEPTION
|
|
|
|
// Check whether there are no different terminal sets given
|
|
if (subparts[0].value->grammar.terminals.empty())
|
|
subparts[0].value->grammar.terminals = subparts[1].value->grammar.terminals;
|
|
else if (!subparts[1].value->grammar.terminals.empty() &&
|
|
subparts[1].value->grammar.terminals != subparts[0].value->grammar.terminals)
|
|
throw "Found more than 1 different terminal sets"; // MODIFY EXCEPTION
|
|
|
|
// Check whether there are no different rule sets given
|
|
if (subparts[0].value->grammar.rules.empty())
|
|
subparts[0].value->grammar.rules = subparts[1].value->grammar.rules;
|
|
else if (!subparts[1].value->grammar.rules.empty() &&
|
|
subparts[1].value->grammar.rules != subparts[0].value->grammar.rules)
|
|
throw "Found more than 1 different rule sets"; // MODIFY EXCEPTION
|
|
|
|
// Check whether there are no different precedence sets given
|
|
if (subparts[0].value->grammar.precedence.empty())
|
|
subparts[0].value->grammar.precedence = subparts[1].value->grammar.precedence;
|
|
else if (!subparts[1].value->grammar.precedence.empty() &&
|
|
subparts[1].value->grammar.precedence != subparts[0].value->grammar.precedence)
|
|
throw "Found more than 1 different precedence sets"; // MODIFY EXCEPTION
|
|
|
|
// REMARK: Everything is now put into subparts[0]
|
|
|
|
// Set precedence of each rule
|
|
for(std::shared_ptr<Rule>& rule : subparts[0].value->grammar.rules) {
|
|
if (rule->tail.size() == 0)
|
|
rule->precedence = std::make_pair(false,std::make_pair(-1,PrecedenceType::LEFT));
|
|
auto prec = subparts[0].value->grammar.precedence.find(rule->tail.back());
|
|
if (prec != subparts[0].value->grammar.precedence.end())
|
|
rule->precedence = std::make_pair(true,prec);
|
|
else rule->precedence = std::make_pair(false,prec);
|
|
}
|
|
// REMARK: No option yet for explicit rule precedence
|
|
|
|
return std::move(subparts[0].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_1(std::deque<Token> subparts) {
|
|
// "PARSER" "COLON" "PARSERTYPE"
|
|
return std::move(subparts[2].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_2(std::deque<Token> subparts) {
|
|
// "LEXESIS" "COLON" "LEXESISNAME"
|
|
return std::move(subparts[2].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_3(std::deque<Token> subparts) {
|
|
// "TERMINALS" "COLON" <terminals>
|
|
return std::move(subparts[2].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_4(std::deque<Token> subparts) {
|
|
// "PRECEDENCE" "COLON" <precedences>
|
|
return std::move(subparts[2].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_5(std::deque<Token> subparts) {
|
|
// "START" "COLON" "TERMINAL"
|
|
auto cnf = std::make_unique<Config>();
|
|
cnf->grammar.start = *subparts[2].value->terminals.begin();
|
|
return std::move(cnf);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_6(std::deque<Token> subparts) {
|
|
// "GRAMMAR" "COLON" <rules>
|
|
return std::move(subparts[2].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_7(std::deque<Token> subparts) {
|
|
// "TERMINAL" <terminals>
|
|
subparts[1].value->grammar.terminals.insert(*subparts[0].value->grammar.terminals.begin());
|
|
return std::move(subparts[1].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_8(std::deque<Token> subparts) {
|
|
// ""
|
|
return std::make_unique<Config>();
|
|
}
|
|
|
|
std::unique_ptr<Config> Parser::reduce_9(std::deque<Token> subparts) {
|
|
// <precedence> <terminals> <precedences>
|
|
auto other = std::move(subparts[2].value);
|
|
subparts.pop_back();
|
|
std::unique_ptr<Config> cfg = reduce_10(std::move(subparts));
|
|
|
|
for(auto& p : cfg->grammar.precedence)
|
|
other->grammar.precedence.insert(p);
|
|
return std::move(other);
|
|
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_10(std::deque<Token> subparts) {
|
|
// <precedence> <terminals>
|
|
PrecedenceType typ = subparts[0].value->grammar.precedence["type"].second;
|
|
for (std::string t : subparts[1].value->grammar.terminals) {
|
|
subparts[1].value->grammar.precedence[t] = {m_precedenceCounter, typ};
|
|
}
|
|
m_precedenceCounter++;
|
|
return std::move(subparts[1].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_11(std::deque<Token> subparts) {
|
|
//"LEFT"
|
|
return std::move(subparts[0].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_12(std::deque<Token> subparts) {
|
|
// "RIGHT"
|
|
return std::move(subparts[0].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_13(std::deque<Token> subparts) {
|
|
// "NONASSOC"
|
|
return std::move(subparts[0].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_14(std::deque<Token> subparts) {
|
|
// <rule> <rules>
|
|
subparts[1].value->grammar.rules.emplace_back(std::move(subparts[0].value->grammar.rules[0]));
|
|
return std::move(subparts[1].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_15(std::deque<Token> subparts) {
|
|
// <rule>
|
|
return std::move(subparts[0].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_16(std::deque<Token> subparts) {
|
|
//"VARIABLE" "ARROW" <bodies>
|
|
std::string head = subparts[0].value->grammar.rules[0]->head;
|
|
subparts[0].value->grammar.rules.clear();
|
|
for(std::shared_ptr<Rule> rule : subparts[2].value->grammar.rules)
|
|
subparts[0].value->grammar.rules.push_back(std::make_shared(Rule(head, rule->tail, rule->name))); // SOMETHING WRONG WITH THIS STATEMENT
|
|
return std::move(subparts[0].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_17(std::deque<Token> subparts) {
|
|
// <body> "PIPE" <bodies>
|
|
for(auto rule : subparts[2].value->grammar.rules)
|
|
subparts[0].value->grammar.rules.push_back(rule);
|
|
return std::move(subparts[0].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_18(std::deque<Token> subparts) {
|
|
// <body>
|
|
return std::move(subparts[0].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_19(std::deque<Token> subparts) {
|
|
// <term_var> "RULENAME"
|
|
subparts[0].value->grammar.rules[0]->name = subparts[1].value->grammar.rules[0]->name;
|
|
return std::move(subparts[0].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_20(std::deque<Token> subparts) {
|
|
// <term_var>
|
|
return std::move(subparts[0].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_21(std::deque<Token> subparts) {
|
|
// <term_var> "VARIABLE"
|
|
subparts[0].value->grammar.rules[0]->tail.push_back(subparts[1].value->grammar.rules[0]->tail[0]);
|
|
return std::move(subparts[0].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_22(std::deque<Token> subparts) {
|
|
// <term_var> "TERMINAL"
|
|
subparts[0].value->grammar.rules[0]->tail.push_back(subparts[1].value->grammar.rules[0]->tail[0]);
|
|
return std::move(subparts[0].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_23(std::deque<Token> subparts) {
|
|
// ""
|
|
auto cnf = std::make_unique<Config>();
|
|
cnf->grammar.rules.emplace_back(std::make_shared<Rule>("", {}));
|
|
return std::move(subparts[0].value);
|
|
}
|
|
}
|