Parsodus/src/parser.cpp

330 lines
14 KiB
C++

/*
* Parsodus - A language agnostic parser generator
* Copyright © 2016-2017 Thomas Avé, Robin Jadoul, Kobe Wullaert
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
* OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "Parsodus/parser.h"
#include "g3log/g3log.hpp"
#include <deque>
namespace pds {
Parser::Parser(ParsodusLexer lex) :
parsodusParser<std::unique_ptr<Config>>(), m_lex(lex) {
}
Parser::Token Parser::lex() {
try {
ParsodusLexer::Token orig = m_lex.nextToken();
std::unique_ptr<Config> cnf;
parsodusParser_Symbol s;
switch(orig.type) {
case ParsodusLexer::PARSER:
s = parsodusParser_Symbol::T_PARSER;
break;
case ParsodusLexer::PRECEDENCE:
s = parsodusParser_Symbol::T_PRECEDENCE;
break;
case ParsodusLexer::LEXESIS:
s = parsodusParser_Symbol::T_LEXESIS;
break;
case ParsodusLexer::TERMINALS:
s = parsodusParser_Symbol::T_TERMINALS;
break;
case ParsodusLexer::START:
s = parsodusParser_Symbol::T_START;
break;
case ParsodusLexer::GRAMMAR:
s = parsodusParser_Symbol::T_GRAMMAR;
break;
case ParsodusLexer::PARSERTYPE:
cnf = std::make_unique<Config>();
cnf->parserType = orig.content;
return Token{ parsodusParser_Symbol::T_PARSERTYPE, std::move(cnf) };
case ParsodusLexer::LEFT:
cnf = std::make_unique<Config>();
cnf->grammar.precedence["type"] = std::make_pair(0, PrecedenceType::LEFT);
return Token{ parsodusParser_Symbol::T_LEFT, std::move(cnf) };
case ParsodusLexer::RIGHT:
cnf = std::make_unique<Config>();
cnf->grammar.precedence["type"] = std::make_pair(2, PrecedenceType::RIGHT);
return Token{ parsodusParser_Symbol::T_RIGHT, std::move(cnf) };
case ParsodusLexer::NONASSOC:
cnf = std::make_unique<Config>();
cnf->grammar.precedence["type"] = std::make_pair(1, PrecedenceType::NONASSOC);
return Token{ parsodusParser_Symbol::T_NONASSOC, std::move(cnf) };
case ParsodusLexer::LEXESISNAME:
cnf = std::make_unique<Config>();
cnf->lexesisFile = orig.content;
return Token{ parsodusParser_Symbol::T_LEXESISNAME, std::move(cnf) };
case ParsodusLexer::TERMINAL:
cnf = std::make_unique<Config>();
cnf->grammar.terminals.insert(orig.content.substr(1, orig.content.length() - 2));
return { parsodusParser_Symbol::T_TERMINAL, std::move(cnf) };
case ParsodusLexer::VARIABLE:
cnf = std::make_unique<Config>();
cnf->grammar.variables.insert(orig.content.substr(1, orig.content.length() - 2));
return { parsodusParser_Symbol::T_VARIABLE, std::move(cnf) };
case ParsodusLexer::ARROW:
s = parsodusParser_Symbol::T_ARROW;
break;
case ParsodusLexer::SEMICOLON:
s = parsodusParser_Symbol::T_SEMICOLON;
break;
case ParsodusLexer::COLON:
s = parsodusParser_Symbol::T_COLON;
break;
case ParsodusLexer::PIPE:
s = parsodusParser_Symbol::T_PIPE;
break;
case ParsodusLexer::RULENAME:
cnf = std::make_unique<Config>();
cnf->grammar.rules.emplace_back(std::make_shared<Rule>("", std::vector<std::string>{}, orig.content));
return { parsodusParser_Symbol::T_RULENAME, std::move(cnf) };
case ParsodusLexer::NUM:
cnf = std::make_unique<Config>();
cnf->grammar.precedence["num"] = {std::stoi(orig.content), PrecedenceType::RIGHT};
return { parsodusParser_Symbol::T_NUM, std::move(cnf) };
case ParsodusLexer::LBRACKET:
s = parsodusParser_Symbol::T_LBRACKET;
break;
case ParsodusLexer::RBRACKET:
s = parsodusParser_Symbol::T_RBRACKET;
break;
case ParsodusLexer::COMMA:
s = parsodusParser_Symbol::T_COMMA;
break;
case ParsodusLexer::nonmatching:
case ParsodusLexer::ignore:
//ignore
break;
}
return Token{ s, nullptr };
} catch(ParsodusLexer::NoMoreTokens) {
return Token{ parsodusParser_Symbol::T_EOF, nullptr };
} catch (ParsodusLexer::NoMatch) {
LOG(WARNING) << "Unrecognized character: " << m_lex.peek() << std::endl;
throw SyntaxError("Unrecognized character");
}
}
std::unique_ptr<Config> Parser::reduce_0(std::deque<Token> subparts) {
// <section> <sections>
// Check whether there are no different parserType's given
if (subparts[0].value->parserType.empty()) {
subparts[0].value->parserType = subparts[1].value->parserType;
} else if (!subparts[1].value->parserType.empty() &&
subparts[1].value->parserType != subparts[0].value->parserType) {
throw SyntaxError("Found more than 1 different parser type");
}
// Check whether there are no different lexesisFile's given
if (subparts[0].value->lexesisFile.empty()){
subparts[0].value->lexesisFile = subparts[1].value->lexesisFile;
} else if (!subparts[1].value->lexesisFile.empty() &&
subparts[1].value->lexesisFile != subparts[0].value->lexesisFile){
throw SyntaxError("Found more than 1 different lexesis file");
}
// Check whether there are no different grammar's given
// Check whether there are no different start terminals given
if (subparts[0].value->grammar.start.empty()){
subparts[0].value->grammar.start = subparts[1].value->grammar.start;
} else if (!subparts[1].value->grammar.start.empty() &&
subparts[1].value->grammar.start != subparts[0].value->grammar.start){
throw SyntaxError("Found more than 1 different start terminal");
}
// Check whether there are no different variable sets given
if (subparts[0].value->grammar.variables.empty()){
subparts[0].value->grammar.variables = subparts[1].value->grammar.variables;
} else if (!subparts[1].value->grammar.variables.empty() &&
subparts[1].value->grammar.variables != subparts[0].value->grammar.variables){
throw SyntaxError("Found more than 1 different variable set");
}
// Check whether there are no different terminal sets given
if (subparts[0].value->grammar.terminals.empty()) {
subparts[0].value->grammar.terminals = subparts[1].value->grammar.terminals;
} else if (!subparts[1].value->grammar.terminals.empty() &&
subparts[1].value->grammar.terminals != subparts[0].value->grammar.terminals){
throw SyntaxError("Found more than 1 different terminal set");
}
// Check whether there are no different rule sets given
if (subparts[0].value->grammar.rules.empty()) {
subparts[0].value->grammar.rules = subparts[1].value->grammar.rules;
} else if (!subparts[1].value->grammar.rules.empty()){
throw SyntaxError("Found more than 1 different rule set");
}
// Check whether there are no different precedence sets given
if (subparts[0].value->grammar.precedence.empty()) {
subparts[0].value->grammar.precedence = subparts[1].value->grammar.precedence;
} else if (!subparts[1].value->grammar.precedence.empty() &&
subparts[1].value->grammar.precedence != subparts[0].value->grammar.precedence) {
throw SyntaxError("Found more than 1 different precedence set");
}
// REMARK: Everything is now put into subparts[0]
return std::move(subparts[0].value);
}
std::unique_ptr<Config> Parser::reduce_1(std::deque<Token>) {
return std::make_unique<Config>();
}
std::unique_ptr<Config> Parser::reduce_2(std::deque<Token> subparts) {
// "PARSER" "COLON" "PARSERTYPE"
return std::move(subparts[2].value);
}
std::unique_ptr<Config> Parser::reduce_3(std::deque<Token> subparts) {
// "LEXESIS" "COLON" "LEXESISNAME"
return std::move(subparts[2].value);
}
std::unique_ptr<Config> Parser::reduce_4(std::deque<Token> subparts) {
// "TERMINALS" "COLON" <terminals>
return std::move(subparts[2].value);
}
std::unique_ptr<Config> Parser::reduce_5(std::deque<Token> subparts) {
// "PRECEDENCE" "COLON" <precedences>
return std::move(subparts[2].value);
}
std::unique_ptr<Config> Parser::reduce_6(std::deque<Token> subparts) {
// "START" "COLON" "VARIABLE"
auto cnf = std::make_unique<Config>();
cnf->grammar.start = *subparts[2].value->grammar.variables.begin();
return cnf;
}
std::unique_ptr<Config> Parser::reduce_7(std::deque<Token> subparts) {
// "GRAMMAR" "COLON" <rules>
return std::move(subparts[2].value);
}
std::unique_ptr<Config> Parser::reduce_8(std::deque<Token> subparts) {
// "TERMINAL" <terminals>
subparts[1].value->grammar.terminals.insert(*subparts[0].value->grammar.terminals.begin());
return std::move(subparts[1].value);
}
std::unique_ptr<Config> Parser::reduce_9(std::deque<Token>) {
// ""
return std::make_unique<Config>();
}
std::unique_ptr<Config> Parser::reduce_10(std::deque<Token> subparts) {
// <precedence> <terminals> <precedences>
auto other = std::move(subparts[2].value);
subparts.pop_back();
std::unique_ptr<Config> cfg = reduce_11(std::move(subparts));
for(auto& p : cfg->grammar.precedence)
other->grammar.precedence.insert(p);
return other;
}
std::unique_ptr<Config> Parser::reduce_11(std::deque<Token> subparts) {
// <precedence> <terminals>
PrecedenceType typ = subparts[0].value->grammar.precedence["type"].second;
for (std::string t : subparts[1].value->grammar.terminals) {
subparts[1].value->grammar.precedence[t] = {m_precedenceCounter, typ};
}
subparts[1].value->grammar.terminals.clear();
m_precedenceCounter++;
return std::move(subparts[1].value);
}
std::unique_ptr<Config> Parser::reduce_12(std::deque<Token> subparts) {
//"LEFT"
return std::move(subparts[0].value);
}
std::unique_ptr<Config> Parser::reduce_13(std::deque<Token> subparts) {
// "RIGHT"
return std::move(subparts[0].value);
}
std::unique_ptr<Config> Parser::reduce_14(std::deque<Token> subparts) {
// "NONASSOC"
return std::move(subparts[0].value);
}
std::unique_ptr<Config> Parser::reduce_15(std::deque<Token> subparts) {
// <rule> <rules>
for (auto it = subparts[0].value->grammar.rules.rbegin(); it != subparts[0].value->grammar.rules.rend(); it++) {
subparts[1].value->grammar.rules.emplace_front(std::move(*it));
}
for (auto& v : subparts[0].value->grammar.variables) {
subparts[1].value->grammar.variables.insert(v);
}
return std::move(subparts[1].value);
}
std::unique_ptr<Config> Parser::reduce_16(std::deque<Token> subparts) {
// <rule>
return std::move(subparts[0].value);
}
std::unique_ptr<Config> Parser::reduce_17(std::deque<Token> subparts) {
//"VARIABLE" "ARROW" <bodies>
std::string head = *subparts[0].value->grammar.variables.begin();
for (auto& rule : subparts[2].value->grammar.rules)
rule->head = head;
subparts[2].value->grammar.variables.insert(head);
return std::move(subparts[2].value);
}
std::unique_ptr<Config> Parser::reduce_18(std::deque<Token> subparts) {
// <body> "PIPE" <bodies>
subparts[2].value->grammar.rules.emplace_front(std::move(subparts[0].value->grammar.rules[0]));
return std::move(subparts[2].value);
}
std::unique_ptr<Config> Parser::reduce_19(std::deque<Token> subparts) {
// <body>
return std::move(subparts[0].value);
}
std::unique_ptr<Config> Parser::reduce_20(std::deque<Token> subparts) {
// <term_var> "LBRACKET" "RULENAME" <opt_prec> "LBRACKET"
subparts[2].value->grammar.rules[0]->tail = std::move(subparts[0].value->grammar.rules[0]->tail);
if (subparts[3].value) {
subparts[2].value->grammar.rules[0]->precedence = {true, subparts[3].value->grammar.precedence["rule"]};
}
return std::move(subparts[2].value);
}
std::unique_ptr<Config> Parser::reduce_21(std::deque<Token> subparts) {
// <term_var>
return std::move(subparts[0].value);
}
std::unique_ptr<Config> Parser::reduce_22(std::deque<Token> subparts) {
// <term_var> "VARIABLE"
subparts[0].value->grammar.rules[0]->tail.emplace_back(*subparts[1].value->grammar.variables.begin());
return std::move(subparts[0].value);
}
std::unique_ptr<Config> Parser::reduce_23(std::deque<Token> subparts) {
// <term_var> "TERMINAL"
subparts[0].value->grammar.rules[0]->tail.emplace_back(*subparts[1].value->grammar.terminals.begin());
return std::move(subparts[0].value);
}
std::unique_ptr<Config> Parser::reduce_24(std::deque<Token>) {
// ""
auto cnf = std::make_unique<Config>();
cnf->grammar.rules.emplace_back(std::make_shared<Rule>("", std::vector<std::string>{}));
return cnf;
}
std::unique_ptr<Config> Parser::reduce_25(std::deque<Token> subparts) {
// <opt_prec> ::= "COMMA" <precedence> "NUM"
subparts[1].value->grammar.precedence["rule"] = {subparts[2].value->grammar.precedence["num"].first, subparts[1].value->grammar.precedence["type"].second};
return std::move(subparts[1].value);
}
std::unique_ptr<Config> Parser::reduce_26(std::deque<Token>) {
// <opt_prec> ::=
return nullptr;
}
} //namespace pds