330 lines
14 KiB
C++
330 lines
14 KiB
C++
/*
|
|
* Parsodus - A language agnostic parser generator
|
|
* Copyright © 2016-2017 Thomas Avé, Robin Jadoul, Kobe Wullaert
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining
|
|
* a copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included
|
|
* in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
|
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
|
|
* OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "Parsodus/parser.h"
|
|
#include "g3log/g3log.hpp"
|
|
#include <deque>
|
|
|
|
namespace pds {
|
|
|
|
Parser::Parser(ParsodusLexer lex) :
|
|
parsodusParser<std::unique_ptr<Config>>(), m_lex(lex) {
|
|
}
|
|
|
|
Parser::Token Parser::lex() {
|
|
try {
|
|
|
|
ParsodusLexer::Token orig = m_lex.nextToken();
|
|
std::unique_ptr<Config> cnf;
|
|
parsodusParser_Symbol s;
|
|
switch(orig.type) {
|
|
case ParsodusLexer::PARSER:
|
|
s = parsodusParser_Symbol::T_PARSER;
|
|
break;
|
|
case ParsodusLexer::PRECEDENCE:
|
|
s = parsodusParser_Symbol::T_PRECEDENCE;
|
|
break;
|
|
case ParsodusLexer::LEXESIS:
|
|
s = parsodusParser_Symbol::T_LEXESIS;
|
|
break;
|
|
case ParsodusLexer::TERMINALS:
|
|
s = parsodusParser_Symbol::T_TERMINALS;
|
|
break;
|
|
case ParsodusLexer::START:
|
|
s = parsodusParser_Symbol::T_START;
|
|
break;
|
|
case ParsodusLexer::GRAMMAR:
|
|
s = parsodusParser_Symbol::T_GRAMMAR;
|
|
break;
|
|
case ParsodusLexer::PARSERTYPE:
|
|
cnf = std::make_unique<Config>();
|
|
cnf->parserType = orig.content;
|
|
return Token{ parsodusParser_Symbol::T_PARSERTYPE, std::move(cnf) };
|
|
case ParsodusLexer::LEFT:
|
|
cnf = std::make_unique<Config>();
|
|
cnf->grammar.precedence["type"] = std::make_pair(0, PrecedenceType::LEFT);
|
|
return Token{ parsodusParser_Symbol::T_LEFT, std::move(cnf) };
|
|
case ParsodusLexer::RIGHT:
|
|
cnf = std::make_unique<Config>();
|
|
cnf->grammar.precedence["type"] = std::make_pair(2, PrecedenceType::RIGHT);
|
|
return Token{ parsodusParser_Symbol::T_RIGHT, std::move(cnf) };
|
|
case ParsodusLexer::NONASSOC:
|
|
cnf = std::make_unique<Config>();
|
|
cnf->grammar.precedence["type"] = std::make_pair(1, PrecedenceType::NONASSOC);
|
|
return Token{ parsodusParser_Symbol::T_NONASSOC, std::move(cnf) };
|
|
case ParsodusLexer::LEXESISNAME:
|
|
cnf = std::make_unique<Config>();
|
|
cnf->lexesisFile = orig.content;
|
|
return Token{ parsodusParser_Symbol::T_LEXESISNAME, std::move(cnf) };
|
|
case ParsodusLexer::TERMINAL:
|
|
cnf = std::make_unique<Config>();
|
|
cnf->grammar.terminals.insert(orig.content.substr(1, orig.content.length() - 2));
|
|
return { parsodusParser_Symbol::T_TERMINAL, std::move(cnf) };
|
|
case ParsodusLexer::VARIABLE:
|
|
cnf = std::make_unique<Config>();
|
|
cnf->grammar.variables.insert(orig.content.substr(1, orig.content.length() - 2));
|
|
return { parsodusParser_Symbol::T_VARIABLE, std::move(cnf) };
|
|
case ParsodusLexer::ARROW:
|
|
s = parsodusParser_Symbol::T_ARROW;
|
|
break;
|
|
case ParsodusLexer::SEMICOLON:
|
|
s = parsodusParser_Symbol::T_SEMICOLON;
|
|
break;
|
|
case ParsodusLexer::COLON:
|
|
s = parsodusParser_Symbol::T_COLON;
|
|
break;
|
|
case ParsodusLexer::PIPE:
|
|
s = parsodusParser_Symbol::T_PIPE;
|
|
break;
|
|
case ParsodusLexer::RULENAME:
|
|
cnf = std::make_unique<Config>();
|
|
cnf->grammar.rules.emplace_back(std::make_shared<Rule>("", std::vector<std::string>{}, orig.content));
|
|
return { parsodusParser_Symbol::T_RULENAME, std::move(cnf) };
|
|
case ParsodusLexer::NUM:
|
|
cnf = std::make_unique<Config>();
|
|
cnf->grammar.precedence["num"] = {std::stoi(orig.content), PrecedenceType::RIGHT};
|
|
return { parsodusParser_Symbol::T_NUM, std::move(cnf) };
|
|
case ParsodusLexer::LBRACKET:
|
|
s = parsodusParser_Symbol::T_LBRACKET;
|
|
break;
|
|
case ParsodusLexer::RBRACKET:
|
|
s = parsodusParser_Symbol::T_RBRACKET;
|
|
break;
|
|
case ParsodusLexer::COMMA:
|
|
s = parsodusParser_Symbol::T_COMMA;
|
|
break;
|
|
case ParsodusLexer::nonmatching:
|
|
case ParsodusLexer::ignore:
|
|
//ignore
|
|
break;
|
|
}
|
|
return Token{ s, nullptr };
|
|
|
|
} catch(ParsodusLexer::NoMoreTokens) {
|
|
return Token{ parsodusParser_Symbol::T_EOF, nullptr };
|
|
} catch (ParsodusLexer::NoMatch) {
|
|
LOG(WARNING) << "Unrecognized character: " << m_lex.peek() << std::endl;
|
|
throw SyntaxError("Unrecognized character");
|
|
}
|
|
}
|
|
|
|
std::unique_ptr<Config> Parser::reduce_0(std::deque<Token> subparts) {
|
|
// <section> <sections>
|
|
|
|
// Check whether there are no different parserType's given
|
|
if (subparts[0].value->parserType.empty()) {
|
|
subparts[0].value->parserType = subparts[1].value->parserType;
|
|
} else if (!subparts[1].value->parserType.empty() &&
|
|
subparts[1].value->parserType != subparts[0].value->parserType) {
|
|
throw SyntaxError("Found more than 1 different parser type");
|
|
}
|
|
|
|
// Check whether there are no different lexesisFile's given
|
|
if (subparts[0].value->lexesisFile.empty()){
|
|
subparts[0].value->lexesisFile = subparts[1].value->lexesisFile;
|
|
} else if (!subparts[1].value->lexesisFile.empty() &&
|
|
subparts[1].value->lexesisFile != subparts[0].value->lexesisFile){
|
|
throw SyntaxError("Found more than 1 different lexesis file");
|
|
}
|
|
// Check whether there are no different grammar's given
|
|
// Check whether there are no different start terminals given
|
|
if (subparts[0].value->grammar.start.empty()){
|
|
subparts[0].value->grammar.start = subparts[1].value->grammar.start;
|
|
} else if (!subparts[1].value->grammar.start.empty() &&
|
|
subparts[1].value->grammar.start != subparts[0].value->grammar.start){
|
|
throw SyntaxError("Found more than 1 different start terminal");
|
|
}
|
|
// Check whether there are no different variable sets given
|
|
if (subparts[0].value->grammar.variables.empty()){
|
|
subparts[0].value->grammar.variables = subparts[1].value->grammar.variables;
|
|
} else if (!subparts[1].value->grammar.variables.empty() &&
|
|
subparts[1].value->grammar.variables != subparts[0].value->grammar.variables){
|
|
throw SyntaxError("Found more than 1 different variable set");
|
|
}
|
|
// Check whether there are no different terminal sets given
|
|
if (subparts[0].value->grammar.terminals.empty()) {
|
|
subparts[0].value->grammar.terminals = subparts[1].value->grammar.terminals;
|
|
} else if (!subparts[1].value->grammar.terminals.empty() &&
|
|
subparts[1].value->grammar.terminals != subparts[0].value->grammar.terminals){
|
|
throw SyntaxError("Found more than 1 different terminal set");
|
|
}
|
|
// Check whether there are no different rule sets given
|
|
if (subparts[0].value->grammar.rules.empty()) {
|
|
subparts[0].value->grammar.rules = subparts[1].value->grammar.rules;
|
|
} else if (!subparts[1].value->grammar.rules.empty()){
|
|
throw SyntaxError("Found more than 1 different rule set");
|
|
}
|
|
// Check whether there are no different precedence sets given
|
|
if (subparts[0].value->grammar.precedence.empty()) {
|
|
subparts[0].value->grammar.precedence = subparts[1].value->grammar.precedence;
|
|
} else if (!subparts[1].value->grammar.precedence.empty() &&
|
|
subparts[1].value->grammar.precedence != subparts[0].value->grammar.precedence) {
|
|
throw SyntaxError("Found more than 1 different precedence set");
|
|
}
|
|
// REMARK: Everything is now put into subparts[0]
|
|
|
|
return std::move(subparts[0].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_1(std::deque<Token>) {
|
|
return std::make_unique<Config>();
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_2(std::deque<Token> subparts) {
|
|
// "PARSER" "COLON" "PARSERTYPE"
|
|
return std::move(subparts[2].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_3(std::deque<Token> subparts) {
|
|
// "LEXESIS" "COLON" "LEXESISNAME"
|
|
return std::move(subparts[2].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_4(std::deque<Token> subparts) {
|
|
// "TERMINALS" "COLON" <terminals>
|
|
return std::move(subparts[2].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_5(std::deque<Token> subparts) {
|
|
// "PRECEDENCE" "COLON" <precedences>
|
|
return std::move(subparts[2].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_6(std::deque<Token> subparts) {
|
|
// "START" "COLON" "VARIABLE"
|
|
auto cnf = std::make_unique<Config>();
|
|
cnf->grammar.start = *subparts[2].value->grammar.variables.begin();
|
|
return cnf;
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_7(std::deque<Token> subparts) {
|
|
// "GRAMMAR" "COLON" <rules>
|
|
return std::move(subparts[2].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_8(std::deque<Token> subparts) {
|
|
// "TERMINAL" <terminals>
|
|
subparts[1].value->grammar.terminals.insert(*subparts[0].value->grammar.terminals.begin());
|
|
return std::move(subparts[1].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_9(std::deque<Token>) {
|
|
// ""
|
|
return std::make_unique<Config>();
|
|
}
|
|
|
|
std::unique_ptr<Config> Parser::reduce_10(std::deque<Token> subparts) {
|
|
// <precedence> <terminals> <precedences>
|
|
auto other = std::move(subparts[2].value);
|
|
subparts.pop_back();
|
|
std::unique_ptr<Config> cfg = reduce_11(std::move(subparts));
|
|
|
|
for(auto& p : cfg->grammar.precedence)
|
|
other->grammar.precedence.insert(p);
|
|
return other;
|
|
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_11(std::deque<Token> subparts) {
|
|
// <precedence> <terminals>
|
|
PrecedenceType typ = subparts[0].value->grammar.precedence["type"].second;
|
|
for (std::string t : subparts[1].value->grammar.terminals) {
|
|
subparts[1].value->grammar.precedence[t] = {m_precedenceCounter, typ};
|
|
}
|
|
subparts[1].value->grammar.terminals.clear();
|
|
m_precedenceCounter++;
|
|
return std::move(subparts[1].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_12(std::deque<Token> subparts) {
|
|
//"LEFT"
|
|
return std::move(subparts[0].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_13(std::deque<Token> subparts) {
|
|
// "RIGHT"
|
|
return std::move(subparts[0].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_14(std::deque<Token> subparts) {
|
|
// "NONASSOC"
|
|
return std::move(subparts[0].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_15(std::deque<Token> subparts) {
|
|
// <rule> <rules>
|
|
for (auto it = subparts[0].value->grammar.rules.rbegin(); it != subparts[0].value->grammar.rules.rend(); it++) {
|
|
subparts[1].value->grammar.rules.emplace_front(std::move(*it));
|
|
}
|
|
for (auto& v : subparts[0].value->grammar.variables) {
|
|
subparts[1].value->grammar.variables.insert(v);
|
|
}
|
|
return std::move(subparts[1].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_16(std::deque<Token> subparts) {
|
|
// <rule>
|
|
return std::move(subparts[0].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_17(std::deque<Token> subparts) {
|
|
//"VARIABLE" "ARROW" <bodies>
|
|
std::string head = *subparts[0].value->grammar.variables.begin();
|
|
for (auto& rule : subparts[2].value->grammar.rules)
|
|
rule->head = head;
|
|
subparts[2].value->grammar.variables.insert(head);
|
|
return std::move(subparts[2].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_18(std::deque<Token> subparts) {
|
|
// <body> "PIPE" <bodies>
|
|
subparts[2].value->grammar.rules.emplace_front(std::move(subparts[0].value->grammar.rules[0]));
|
|
return std::move(subparts[2].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_19(std::deque<Token> subparts) {
|
|
// <body>
|
|
return std::move(subparts[0].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_20(std::deque<Token> subparts) {
|
|
// <term_var> "LBRACKET" "RULENAME" <opt_prec> "LBRACKET"
|
|
subparts[2].value->grammar.rules[0]->tail = std::move(subparts[0].value->grammar.rules[0]->tail);
|
|
if (subparts[3].value) {
|
|
subparts[2].value->grammar.rules[0]->precedence = {true, subparts[3].value->grammar.precedence["rule"]};
|
|
}
|
|
return std::move(subparts[2].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_21(std::deque<Token> subparts) {
|
|
// <term_var>
|
|
return std::move(subparts[0].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_22(std::deque<Token> subparts) {
|
|
// <term_var> "VARIABLE"
|
|
subparts[0].value->grammar.rules[0]->tail.emplace_back(*subparts[1].value->grammar.variables.begin());
|
|
return std::move(subparts[0].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_23(std::deque<Token> subparts) {
|
|
// <term_var> "TERMINAL"
|
|
subparts[0].value->grammar.rules[0]->tail.emplace_back(*subparts[1].value->grammar.terminals.begin());
|
|
return std::move(subparts[0].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_24(std::deque<Token>) {
|
|
// ""
|
|
auto cnf = std::make_unique<Config>();
|
|
cnf->grammar.rules.emplace_back(std::make_shared<Rule>("", std::vector<std::string>{}));
|
|
return cnf;
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_25(std::deque<Token> subparts) {
|
|
// <opt_prec> ::= "COMMA" <precedence> "NUM"
|
|
subparts[1].value->grammar.precedence["rule"] = {subparts[2].value->grammar.precedence["num"].first, subparts[1].value->grammar.precedence["type"].second};
|
|
return std::move(subparts[1].value);
|
|
}
|
|
std::unique_ptr<Config> Parser::reduce_26(std::deque<Token>) {
|
|
// <opt_prec> ::=
|
|
return nullptr;
|
|
}
|
|
|
|
} //namespace pds
|