/* * Parsodus - A language agnostic parser generator * Copyright © 2016-2017 Thomas Avé, Robin Jadoul, Kobe Wullaert * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "Parsodus/parser.h" #include "g3log/g3log.hpp" #include <deque> namespace pds { Parser::Parser(ParsodusLexer lex) : parsodusParser<std::unique_ptr<Config>>(), m_lex(lex) { } Parser::Token Parser::lex() { try { ParsodusLexer::Token orig = m_lex.nextToken(); std::unique_ptr<Config> cnf; parsodusParser_Symbol s; switch(orig.type) { case ParsodusLexer::PARSER: s = parsodusParser_Symbol::T_PARSER; break; case ParsodusLexer::PRECEDENCE: s = parsodusParser_Symbol::T_PRECEDENCE; break; case ParsodusLexer::LEXESIS: s = parsodusParser_Symbol::T_LEXESIS; break; case ParsodusLexer::TERMINALS: s = parsodusParser_Symbol::T_TERMINALS; break; case ParsodusLexer::START: s = parsodusParser_Symbol::T_START; break; case ParsodusLexer::GRAMMAR: s = parsodusParser_Symbol::T_GRAMMAR; break; case ParsodusLexer::PARSERTYPE: cnf = std::make_unique<Config>(); cnf->parserType = orig.content; return Token{ parsodusParser_Symbol::T_PARSERTYPE, std::move(cnf) }; case ParsodusLexer::LEFT: cnf = std::make_unique<Config>(); cnf->grammar.precedence["type"] = std::make_pair(0, PrecedenceType::LEFT); return Token{ parsodusParser_Symbol::T_LEFT, std::move(cnf) }; case ParsodusLexer::RIGHT: cnf = std::make_unique<Config>(); cnf->grammar.precedence["type"] = std::make_pair(2, PrecedenceType::RIGHT); return Token{ parsodusParser_Symbol::T_RIGHT, std::move(cnf) }; case ParsodusLexer::NONASSOC: cnf = std::make_unique<Config>(); cnf->grammar.precedence["type"] = std::make_pair(1, PrecedenceType::NONASSOC); return Token{ parsodusParser_Symbol::T_NONASSOC, std::move(cnf) }; case ParsodusLexer::LEXESISNAME: cnf = std::make_unique<Config>(); cnf->lexesisFile = orig.content; return Token{ parsodusParser_Symbol::T_LEXESISNAME, std::move(cnf) }; case ParsodusLexer::TERMINAL: cnf = std::make_unique<Config>(); cnf->grammar.terminals.insert(orig.content.substr(1, orig.content.length() - 2)); return { parsodusParser_Symbol::T_TERMINAL, std::move(cnf) }; case ParsodusLexer::VARIABLE: cnf = std::make_unique<Config>(); cnf->grammar.variables.insert(orig.content.substr(1, orig.content.length() - 2)); return { parsodusParser_Symbol::T_VARIABLE, std::move(cnf) }; case ParsodusLexer::ARROW: s = parsodusParser_Symbol::T_ARROW; break; case ParsodusLexer::SEMICOLON: s = parsodusParser_Symbol::T_SEMICOLON; break; case ParsodusLexer::COLON: s = parsodusParser_Symbol::T_COLON; break; case ParsodusLexer::PIPE: s = parsodusParser_Symbol::T_PIPE; break; case ParsodusLexer::RULENAME: cnf = std::make_unique<Config>(); cnf->grammar.rules.emplace_back(std::make_shared<Rule>("", std::vector<std::string>{}, orig.content)); return { parsodusParser_Symbol::T_RULENAME, std::move(cnf) }; case ParsodusLexer::NUM: cnf = std::make_unique<Config>(); cnf->grammar.precedence["num"] = {std::stoi(orig.content), PrecedenceType::RIGHT}; return { parsodusParser_Symbol::T_NUM, std::move(cnf) }; case ParsodusLexer::LBRACKET: s = parsodusParser_Symbol::T_LBRACKET; break; case ParsodusLexer::RBRACKET: s = parsodusParser_Symbol::T_RBRACKET; break; case ParsodusLexer::COMMA: s = parsodusParser_Symbol::T_COMMA; break; case ParsodusLexer::nonmatching: case ParsodusLexer::ignore: //ignore break; } return Token{ s, nullptr }; } catch(ParsodusLexer::NoMoreTokens) { return Token{ parsodusParser_Symbol::T_EOF, nullptr }; } catch (ParsodusLexer::NoMatch) { LOG(WARNING) << "Unrecognized character: " << m_lex.peek() << std::endl; throw SyntaxError("Unrecognized character"); } } std::unique_ptr<Config> Parser::reduce_0(std::deque<Token> subparts) { // <section> <sections> // Check whether there are no different parserType's given if (subparts[0].value->parserType.empty()) { subparts[0].value->parserType = subparts[1].value->parserType; } else if (!subparts[1].value->parserType.empty() && subparts[1].value->parserType != subparts[0].value->parserType) { throw SyntaxError("Found more than 1 different parser type"); } // Check whether there are no different lexesisFile's given if (subparts[0].value->lexesisFile.empty()){ subparts[0].value->lexesisFile = subparts[1].value->lexesisFile; } else if (!subparts[1].value->lexesisFile.empty() && subparts[1].value->lexesisFile != subparts[0].value->lexesisFile){ throw SyntaxError("Found more than 1 different lexesis file"); } // Check whether there are no different grammar's given // Check whether there are no different start terminals given if (subparts[0].value->grammar.start.empty()){ subparts[0].value->grammar.start = subparts[1].value->grammar.start; } else if (!subparts[1].value->grammar.start.empty() && subparts[1].value->grammar.start != subparts[0].value->grammar.start){ throw SyntaxError("Found more than 1 different start terminal"); } // Check whether there are no different variable sets given if (subparts[0].value->grammar.variables.empty()){ subparts[0].value->grammar.variables = subparts[1].value->grammar.variables; } else if (!subparts[1].value->grammar.variables.empty() && subparts[1].value->grammar.variables != subparts[0].value->grammar.variables){ throw SyntaxError("Found more than 1 different variable set"); } // Check whether there are no different terminal sets given if (subparts[0].value->grammar.terminals.empty()) { subparts[0].value->grammar.terminals = subparts[1].value->grammar.terminals; } else if (!subparts[1].value->grammar.terminals.empty() && subparts[1].value->grammar.terminals != subparts[0].value->grammar.terminals){ throw SyntaxError("Found more than 1 different terminal set"); } // Check whether there are no different rule sets given if (subparts[0].value->grammar.rules.empty()) { subparts[0].value->grammar.rules = subparts[1].value->grammar.rules; } else if (!subparts[1].value->grammar.rules.empty()){ throw SyntaxError("Found more than 1 different rule set"); } // Check whether there are no different precedence sets given if (subparts[0].value->grammar.precedence.empty()) { subparts[0].value->grammar.precedence = subparts[1].value->grammar.precedence; } else if (!subparts[1].value->grammar.precedence.empty() && subparts[1].value->grammar.precedence != subparts[0].value->grammar.precedence) { throw SyntaxError("Found more than 1 different precedence set"); } // REMARK: Everything is now put into subparts[0] return std::move(subparts[0].value); } std::unique_ptr<Config> Parser::reduce_1(std::deque<Token>) { return std::make_unique<Config>(); } std::unique_ptr<Config> Parser::reduce_2(std::deque<Token> subparts) { // "PARSER" "COLON" "PARSERTYPE" return std::move(subparts[2].value); } std::unique_ptr<Config> Parser::reduce_3(std::deque<Token> subparts) { // "LEXESIS" "COLON" "LEXESISNAME" return std::move(subparts[2].value); } std::unique_ptr<Config> Parser::reduce_4(std::deque<Token> subparts) { // "TERMINALS" "COLON" <terminals> return std::move(subparts[2].value); } std::unique_ptr<Config> Parser::reduce_5(std::deque<Token> subparts) { // "PRECEDENCE" "COLON" <precedences> return std::move(subparts[2].value); } std::unique_ptr<Config> Parser::reduce_6(std::deque<Token> subparts) { // "START" "COLON" "VARIABLE" auto cnf = std::make_unique<Config>(); cnf->grammar.start = *subparts[2].value->grammar.variables.begin(); return cnf; } std::unique_ptr<Config> Parser::reduce_7(std::deque<Token> subparts) { // "GRAMMAR" "COLON" <rules> return std::move(subparts[2].value); } std::unique_ptr<Config> Parser::reduce_8(std::deque<Token> subparts) { // "TERMINAL" <terminals> subparts[1].value->grammar.terminals.insert(*subparts[0].value->grammar.terminals.begin()); return std::move(subparts[1].value); } std::unique_ptr<Config> Parser::reduce_9(std::deque<Token>) { // "" return std::make_unique<Config>(); } std::unique_ptr<Config> Parser::reduce_10(std::deque<Token> subparts) { // <precedence> <terminals> <precedences> auto other = std::move(subparts[2].value); subparts.pop_back(); std::unique_ptr<Config> cfg = reduce_11(std::move(subparts)); for(auto& p : cfg->grammar.precedence) other->grammar.precedence.insert(p); return other; } std::unique_ptr<Config> Parser::reduce_11(std::deque<Token> subparts) { // <precedence> <terminals> PrecedenceType typ = subparts[0].value->grammar.precedence["type"].second; for (std::string t : subparts[1].value->grammar.terminals) { subparts[1].value->grammar.precedence[t] = {m_precedenceCounter, typ}; } subparts[1].value->grammar.terminals.clear(); m_precedenceCounter++; return std::move(subparts[1].value); } std::unique_ptr<Config> Parser::reduce_12(std::deque<Token> subparts) { //"LEFT" return std::move(subparts[0].value); } std::unique_ptr<Config> Parser::reduce_13(std::deque<Token> subparts) { // "RIGHT" return std::move(subparts[0].value); } std::unique_ptr<Config> Parser::reduce_14(std::deque<Token> subparts) { // "NONASSOC" return std::move(subparts[0].value); } std::unique_ptr<Config> Parser::reduce_15(std::deque<Token> subparts) { // <rule> <rules> for (auto it = subparts[0].value->grammar.rules.rbegin(); it != subparts[0].value->grammar.rules.rend(); it++) { subparts[1].value->grammar.rules.emplace_front(std::move(*it)); } for (auto& v : subparts[0].value->grammar.variables) { subparts[1].value->grammar.variables.insert(v); } return std::move(subparts[1].value); } std::unique_ptr<Config> Parser::reduce_16(std::deque<Token> subparts) { // <rule> return std::move(subparts[0].value); } std::unique_ptr<Config> Parser::reduce_17(std::deque<Token> subparts) { //"VARIABLE" "ARROW" <bodies> std::string head = *subparts[0].value->grammar.variables.begin(); for (auto& rule : subparts[2].value->grammar.rules) rule->head = head; subparts[2].value->grammar.variables.insert(head); return std::move(subparts[2].value); } std::unique_ptr<Config> Parser::reduce_18(std::deque<Token> subparts) { // <body> "PIPE" <bodies> subparts[2].value->grammar.rules.emplace_front(std::move(subparts[0].value->grammar.rules[0])); return std::move(subparts[2].value); } std::unique_ptr<Config> Parser::reduce_19(std::deque<Token> subparts) { // <body> return std::move(subparts[0].value); } std::unique_ptr<Config> Parser::reduce_20(std::deque<Token> subparts) { // <term_var> "LBRACKET" "RULENAME" <opt_prec> "LBRACKET" subparts[2].value->grammar.rules[0]->tail = std::move(subparts[0].value->grammar.rules[0]->tail); if (subparts[3].value) { subparts[2].value->grammar.rules[0]->precedence = {true, subparts[3].value->grammar.precedence["rule"]}; } return std::move(subparts[2].value); } std::unique_ptr<Config> Parser::reduce_21(std::deque<Token> subparts) { // <term_var> return std::move(subparts[0].value); } std::unique_ptr<Config> Parser::reduce_22(std::deque<Token> subparts) { // <term_var> "VARIABLE" subparts[0].value->grammar.rules[0]->tail.emplace_back(*subparts[1].value->grammar.variables.begin()); return std::move(subparts[0].value); } std::unique_ptr<Config> Parser::reduce_23(std::deque<Token> subparts) { // <term_var> "TERMINAL" subparts[0].value->grammar.rules[0]->tail.emplace_back(*subparts[1].value->grammar.terminals.begin()); return std::move(subparts[0].value); } std::unique_ptr<Config> Parser::reduce_24(std::deque<Token>) { // "" auto cnf = std::make_unique<Config>(); cnf->grammar.rules.emplace_back(std::make_shared<Rule>("", std::vector<std::string>{})); return cnf; } std::unique_ptr<Config> Parser::reduce_25(std::deque<Token> subparts) { // <opt_prec> ::= "COMMA" <precedence> "NUM" subparts[1].value->grammar.precedence["rule"] = {subparts[2].value->grammar.precedence["num"].first, subparts[1].value->grammar.precedence["type"].second}; return std::move(subparts[1].value); } std::unique_ptr<Config> Parser::reduce_26(std::deque<Token>) { // <opt_prec> ::= return nullptr; } } //namespace pds