Parsodus/src/parser.cpp

/*
 * Parsodus - A language agnostic parser generator
 * Copyright © 2016-2017 Thomas Avé, Robin Jadoul, Kobe Wullaert
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
 * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#include "Parsodus/parser.h"
#include "g3log/g3log.hpp"
#include <deque>

namespace pds {

Parser::Parser(ParsodusLexer lex) :
	parsodusParser<std::unique_ptr<Config>>(), m_lex(lex) {
}

Parser::Token Parser::lex() {
	try {

		ParsodusLexer::Token orig = m_lex.nextToken();
		std::unique_ptr<Config> cnf;
		parsodusParser_Symbol s;
		switch(orig.type) {
			case ParsodusLexer::PARSER:
				s = parsodusParser_Symbol::T_PARSER;
				break;
			case ParsodusLexer::PRECEDENCE:
                s = parsodusParser_Symbol::T_PRECEDENCE;
                break;
			case ParsodusLexer::LEXESIS:
                s = parsodusParser_Symbol::T_LEXESIS;
                break;
			case ParsodusLexer::TERMINALS:
                s = parsodusParser_Symbol::T_TERMINALS;
                break;
			case ParsodusLexer::START:
                s = parsodusParser_Symbol::T_START;
                break;
			case ParsodusLexer::GRAMMAR:
                s = parsodusParser_Symbol::T_GRAMMAR;
                break;
			case ParsodusLexer::PARSERTYPE:
				cnf = std::make_unique<Config>();
				cnf->parserType = orig.content;
                return Token{ parsodusParser_Symbol::T_PARSERTYPE, std::move(cnf) };
			case ParsodusLexer::LEFT:
				cnf = std::make_unique<Config>();
				cnf->grammar.precedence["type"] = std::make_pair(0, PrecedenceType::LEFT);
                return Token{ parsodusParser_Symbol::T_LEFT, std::move(cnf) };
			case ParsodusLexer::RIGHT:
				cnf = std::make_unique<Config>();
                cnf->grammar.precedence["type"] = std::make_pair(2, PrecedenceType::RIGHT);
                return Token{ parsodusParser_Symbol::T_RIGHT, std::move(cnf) };
	 		case ParsodusLexer::NONASSOC:
				cnf = std::make_unique<Config>();
                cnf->grammar.precedence["type"] = std::make_pair(1, PrecedenceType::NONASSOC);
				return Token{ parsodusParser_Symbol::T_NONASSOC, std::move(cnf) };
			case ParsodusLexer::LEXESISNAME:
				cnf = std::make_unique<Config>();
                cnf->lexesisFile = orig.content;
				return Token{ parsodusParser_Symbol::T_LEXESISNAME, std::move(cnf) };
			case ParsodusLexer::TERMINAL:
				cnf = std::make_unique<Config>();
				cnf->grammar.terminals.insert(orig.content.substr(1, orig.content.length() - 2));
                return { parsodusParser_Symbol::T_TERMINAL, std::move(cnf) };
			case ParsodusLexer::VARIABLE:
				cnf = std::make_unique<Config>();
				cnf->grammar.variables.insert(orig.content.substr(1, orig.content.length() - 2));
                return { parsodusParser_Symbol::T_VARIABLE, std::move(cnf) };
			case ParsodusLexer::ARROW:
                s = parsodusParser_Symbol::T_ARROW;
                break;
			case ParsodusLexer::SEMICOLON:
                s = parsodusParser_Symbol::T_SEMICOLON;
                break;
			case ParsodusLexer::COLON:
                s = parsodusParser_Symbol::T_COLON;
                break;
			case ParsodusLexer::PIPE:
                s = parsodusParser_Symbol::T_PIPE;
                break;
			case ParsodusLexer::RULENAME:
				cnf = std::make_unique<Config>();
				cnf->grammar.rules.emplace_back(std::make_shared<Rule>("", std::vector<std::string>{}, orig.content));
                return { parsodusParser_Symbol::T_RULENAME, std::move(cnf) };
            case ParsodusLexer::NUM:
				cnf = std::make_unique<Config>();
                cnf->grammar.precedence["num"] = {std::stoi(orig.content), PrecedenceType::RIGHT};
                return { parsodusParser_Symbol::T_NUM, std::move(cnf) };
            case ParsodusLexer::LBRACKET:
                s = parsodusParser_Symbol::T_LBRACKET;
                break;
            case ParsodusLexer::RBRACKET:
                s = parsodusParser_Symbol::T_RBRACKET;
                break;
            case ParsodusLexer::COMMA:
                s = parsodusParser_Symbol::T_COMMA;
                break;
            case ParsodusLexer::nonmatching:
            case ParsodusLexer::ignore:
                //ignore
                break;
		}
		return Token{ s, nullptr };

	} catch(ParsodusLexer::NoMoreTokens) {
		return Token{ parsodusParser_Symbol::T_EOF, nullptr };
	} catch (ParsodusLexer::NoMatch) {
        LOG(WARNING) << "Unrecognized character: " << m_lex.peek() << std::endl;
        throw SyntaxError("Unrecognized character");
    }
}

std::unique_ptr<Config> Parser::reduce_0(std::deque<Token> subparts) {
	// <section> <sections>

	// Check whether there are no different parserType's given
	if (subparts[0].value->parserType.empty()) {
		subparts[0].value->parserType = subparts[1].value->parserType;
    } else if (!subparts[1].value->parserType.empty() &&
			subparts[1].value->parserType != subparts[0].value->parserType) {
		throw SyntaxError("Found more than 1 different parser type");
    }

	// Check whether there are no different lexesisFile's given
	if (subparts[0].value->lexesisFile.empty()){
        subparts[0].value->lexesisFile = subparts[1].value->lexesisFile;
    } else if (!subparts[1].value->lexesisFile.empty() &&
            subparts[1].value->lexesisFile != subparts[0].value->lexesisFile){
        throw SyntaxError("Found more than 1 different lexesis file");
    }
	// Check whether there are no different grammar's given
	// Check whether there are no different start terminals given
	if (subparts[0].value->grammar.start.empty()){
        subparts[0].value->grammar.start = subparts[1].value->grammar.start;
    } else if (!subparts[1].value->grammar.start.empty() &&
            subparts[1].value->grammar.start != subparts[0].value->grammar.start){
        throw SyntaxError("Found more than 1 different start terminal");
    }
	// Check whether there are no different variable sets given
	if (subparts[0].value->grammar.variables.empty()){
        subparts[0].value->grammar.variables = subparts[1].value->grammar.variables;
    } else if (!subparts[1].value->grammar.variables.empty() &&
            subparts[1].value->grammar.variables != subparts[0].value->grammar.variables){
        throw SyntaxError("Found more than 1 different variable set");
    }
    // Check whether there are no different terminal sets given
    if (subparts[0].value->grammar.terminals.empty()) {
        subparts[0].value->grammar.terminals = subparts[1].value->grammar.terminals;
    } else if (!subparts[1].value->grammar.terminals.empty() &&
            subparts[1].value->grammar.terminals != subparts[0].value->grammar.terminals){
        throw SyntaxError("Found more than 1 different terminal set");
    }
	// Check whether there are no different rule sets given
	if (subparts[0].value->grammar.rules.empty()) {
        subparts[0].value->grammar.rules = subparts[1].value->grammar.rules;
    } else if (!subparts[1].value->grammar.rules.empty()){
        throw SyntaxError("Found more than 1 different rule set");
    }
	// Check whether there are no different precedence sets given
	if (subparts[0].value->grammar.precedence.empty()) {
        subparts[0].value->grammar.precedence = subparts[1].value->grammar.precedence;
    } else if (!subparts[1].value->grammar.precedence.empty() &&
            subparts[1].value->grammar.precedence != subparts[0].value->grammar.precedence) {
        throw SyntaxError("Found more than 1 different precedence set");
    }
	// REMARK: Everything is now put into subparts[0]

	return std::move(subparts[0].value);
}
std::unique_ptr<Config> Parser::reduce_1(std::deque<Token>) {
	return std::make_unique<Config>();
}
std::unique_ptr<Config> Parser::reduce_2(std::deque<Token> subparts) {
	// "PARSER" "COLON" "PARSERTYPE"
	return std::move(subparts[2].value);
}
std::unique_ptr<Config> Parser::reduce_3(std::deque<Token> subparts) {
	// "LEXESIS" "COLON" "LEXESISNAME"
     return std::move(subparts[2].value);
}
std::unique_ptr<Config> Parser::reduce_4(std::deque<Token> subparts) {
	// "TERMINALS" "COLON" <terminals>
	return std::move(subparts[2].value);
}
std::unique_ptr<Config> Parser::reduce_5(std::deque<Token> subparts) {
	// "PRECEDENCE" "COLON" <precedences>
	return std::move(subparts[2].value);
}
std::unique_ptr<Config> Parser::reduce_6(std::deque<Token> subparts) {
	// "START" "COLON" "VARIABLE"
	auto cnf = std::make_unique<Config>();
	cnf->grammar.start = *subparts[2].value->grammar.variables.begin();
	return cnf;
}
std::unique_ptr<Config> Parser::reduce_7(std::deque<Token> subparts) {
	// "GRAMMAR" "COLON" <rules>
	return std::move(subparts[2].value);
}
std::unique_ptr<Config> Parser::reduce_8(std::deque<Token> subparts) {
	// "TERMINAL" <terminals>
	subparts[1].value->grammar.terminals.insert(*subparts[0].value->grammar.terminals.begin());
	return std::move(subparts[1].value);
}
std::unique_ptr<Config> Parser::reduce_9(std::deque<Token>) {
	// ""
	return std::make_unique<Config>();
}

std::unique_ptr<Config> Parser::reduce_10(std::deque<Token> subparts) {
	// <precedence> <terminals> <precedences>
	auto other = std::move(subparts[2].value);
	subparts.pop_back();
	std::unique_ptr<Config> cfg = reduce_11(std::move(subparts));

	for(auto& p : cfg->grammar.precedence)
		other->grammar.precedence.insert(p);
	return other;

}
std::unique_ptr<Config> Parser::reduce_11(std::deque<Token> subparts) {
	// <precedence> <terminals>
	PrecedenceType typ = subparts[0].value->grammar.precedence["type"].second;
	for (std::string t : subparts[1].value->grammar.terminals) {
		subparts[1].value->grammar.precedence[t] = {m_precedenceCounter, typ};
	}
    subparts[1].value->grammar.terminals.clear();
	m_precedenceCounter++;
	return std::move(subparts[1].value);
}
std::unique_ptr<Config> Parser::reduce_12(std::deque<Token> subparts) {
	//"LEFT"
	return std::move(subparts[0].value);
}
std::unique_ptr<Config> Parser::reduce_13(std::deque<Token> subparts) {
	// "RIGHT"
	return std::move(subparts[0].value);
}
std::unique_ptr<Config> Parser::reduce_14(std::deque<Token> subparts) {
	// "NONASSOC"
	return std::move(subparts[0].value);
}
std::unique_ptr<Config> Parser::reduce_15(std::deque<Token> subparts) {
	// <rule> <rules>
    for (auto it = subparts[0].value->grammar.rules.rbegin(); it != subparts[0].value->grammar.rules.rend(); it++) {
        subparts[1].value->grammar.rules.emplace_front(std::move(*it));
    }
    for (auto& v : subparts[0].value->grammar.variables) {
        subparts[1].value->grammar.variables.insert(v);
    }
	return std::move(subparts[1].value);
}
std::unique_ptr<Config> Parser::reduce_16(std::deque<Token> subparts) {
	// <rule>
	return std::move(subparts[0].value);
}
std::unique_ptr<Config> Parser::reduce_17(std::deque<Token> subparts) {
	//"VARIABLE" "ARROW" <bodies>
	std::string head = *subparts[0].value->grammar.variables.begin();
	for (auto& rule : subparts[2].value->grammar.rules)
        rule->head = head;
    subparts[2].value->grammar.variables.insert(head);
	return std::move(subparts[2].value);
}
std::unique_ptr<Config> Parser::reduce_18(std::deque<Token> subparts) {
	// <body> "PIPE" <bodies>
    subparts[2].value->grammar.rules.emplace_front(std::move(subparts[0].value->grammar.rules[0]));
	return std::move(subparts[2].value);
}
std::unique_ptr<Config> Parser::reduce_19(std::deque<Token> subparts) {
	// <body>
	return std::move(subparts[0].value);
}
std::unique_ptr<Config> Parser::reduce_20(std::deque<Token> subparts) {
	// <term_var> "LBRACKET" "RULENAME" <opt_prec> "LBRACKET"
    subparts[2].value->grammar.rules[0]->tail = std::move(subparts[0].value->grammar.rules[0]->tail);
    if (subparts[3].value) {
        subparts[2].value->grammar.rules[0]->precedence = {true, subparts[3].value->grammar.precedence["rule"]};
    }
	return std::move(subparts[2].value);
}
std::unique_ptr<Config> Parser::reduce_21(std::deque<Token> subparts) {
	// <term_var>
	return std::move(subparts[0].value);
}
std::unique_ptr<Config> Parser::reduce_22(std::deque<Token> subparts) {
	// <term_var> "VARIABLE"
    subparts[0].value->grammar.rules[0]->tail.emplace_back(*subparts[1].value->grammar.variables.begin());
	return std::move(subparts[0].value);
}
std::unique_ptr<Config> Parser::reduce_23(std::deque<Token> subparts) {
	// <term_var> "TERMINAL"
    subparts[0].value->grammar.rules[0]->tail.emplace_back(*subparts[1].value->grammar.terminals.begin());
    return std::move(subparts[0].value);
}
std::unique_ptr<Config> Parser::reduce_24(std::deque<Token>) {
	// ""
	auto cnf = std::make_unique<Config>();
	cnf->grammar.rules.emplace_back(std::make_shared<Rule>("", std::vector<std::string>{}));
	return cnf;
}
std::unique_ptr<Config> Parser::reduce_25(std::deque<Token> subparts) {
    // <opt_prec> ::= "COMMA" <precedence> "NUM"
    subparts[1].value->grammar.precedence["rule"] = {subparts[2].value->grammar.precedence["num"].first, subparts[1].value->grammar.precedence["type"].second};
    return std::move(subparts[1].value);
}
std::unique_ptr<Config> Parser::reduce_26(std::deque<Token>) {
    // <opt_prec> ::=
    return nullptr;
}

} //namespace pds