160 lines
7.4 KiB
C++
160 lines
7.4 KiB
C++
#include "ParsodusLexer.h"
|
|
#include "Parsodus/inputparser.h"
|
|
#include "Parsodus/util/parserType.h"
|
|
|
|
namespace pds {
|
|
|
|
InputParserException::InputParserException(std::string what): m_what(what) {}
|
|
const char* InputParserException::what() const throw() {
|
|
return m_what.c_str();
|
|
}
|
|
Config InputParser::parseInput(std::istream& is) {
|
|
ParsodusLexer lex(is);
|
|
Config config;
|
|
|
|
|
|
bool readingTerminals = false;
|
|
bool readingGrammar = false;
|
|
bool readingPrecedences = false;
|
|
|
|
try {
|
|
while(true) {
|
|
ParsodusLexer::Token token = lex.nextToken();
|
|
if(token.type != ParsodusLexer::TERMINAL)
|
|
readingTerminals = false;
|
|
if(token.type != ParsodusLexer::TERMINAL
|
|
&& token.type != ParsodusLexer::ARROW
|
|
&& token.type != ParsodusLexer::VARIABLE
|
|
&& token.type != ParsodusLexer::SEMICOLON
|
|
&& token.type != ParsodusLexer::PIPE)
|
|
readingGrammar = false;
|
|
switch(token.type) {
|
|
case ParsodusLexer::PARSER:
|
|
lexColon(lex, token);
|
|
if(token.type == ParsodusLexer::PARSERTYPE) {
|
|
config.parserType = token.content;
|
|
} else {
|
|
throw InputParserException("inputfile malformed, no parser type found in parser section");
|
|
}
|
|
break;
|
|
case ParsodusLexer::LEXESIS:
|
|
lexColon(lex, token);
|
|
if(token.type != ParsodusLexer::LEXESISNAME)
|
|
throw InputParserException("No valid input file found after 'lexesis', found '" + token.content + "' instead.");
|
|
config.lexesisFile = token.content;
|
|
break;
|
|
case ParsodusLexer::TERMINALS:
|
|
lexColon(lex, token, false);
|
|
readingTerminals = true;
|
|
break;
|
|
case ParsodusLexer::GRAMMAR:
|
|
lexColon(lex, token, false);
|
|
readingGrammar = true;
|
|
break;
|
|
/*
|
|
case ParsodusLexer::PRECEDENCES:
|
|
lexColon(lex, token, false);
|
|
readingPrecedences = true;
|
|
break;
|
|
*/
|
|
case ParsodusLexer::TERMINAL:
|
|
if(readingTerminals) {
|
|
config.grammar.terminals.insert(token.content.substr(1, token.content.size() - 2));
|
|
} else {
|
|
throw InputParserException("Found a terminal outside a grammar or terminals section: " + token.content);
|
|
}
|
|
break;
|
|
case ParsodusLexer::START:
|
|
lexColon(lex, token);
|
|
if(token.type != ParsodusLexer::VARIABLE)
|
|
throw InputParserException("Expected to find a start variable, but found: " + token.content);
|
|
config.grammar.start = token.content.substr(1, token.content.size() - 2);
|
|
break;
|
|
case ParsodusLexer::VARIABLE:
|
|
if(readingGrammar) {
|
|
if(config.grammar.variables.find(token.content) == config.grammar.variables.end())
|
|
config.grammar.variables.insert(token.content.substr(1, token.content.size() - 2));
|
|
|
|
std::string current_head = token.content;
|
|
// Parsing rule
|
|
token = lex.nextToken();
|
|
if(token.type != ParsodusLexer::ARROW)
|
|
throw InputParserException("No arrow found after '"+ current_head+"', but found '" + token.content + "' instead");
|
|
std::shared_ptr<Rule> rule = std::make_shared<Rule>();
|
|
rule->head = current_head;
|
|
bool parsing_head = true;
|
|
while(parsing_head) {
|
|
token = lex.nextToken();
|
|
token.content = token.content.substr(1, token.content.size() - 2);
|
|
switch(token.type) {
|
|
case ParsodusLexer::VARIABLE:
|
|
rule->tail.push_back(token.content);
|
|
break;
|
|
case ParsodusLexer::TERMINAL:
|
|
rule->tail.push_back(token.content);
|
|
break;
|
|
case ParsodusLexer::SEMICOLON:
|
|
parsing_head = false;
|
|
case ParsodusLexer::PIPE:
|
|
rule->tail.shrink_to_fit();
|
|
config.grammar.rules.push_back(std::make_shared<Rule>(*rule));
|
|
rule->tail.clear();
|
|
break;
|
|
default:
|
|
throw InputParserException("Expecting to find a variable, terminal, pipe or a semicolon, but found '" + token.content + "' instead");
|
|
}
|
|
}
|
|
} else
|
|
throw InputParserException("Found a variable outside a grammar section: " + token.content);
|
|
break;
|
|
/*
|
|
case ParsodusLexer::PRECEDENCE:
|
|
if (readingPrecedences) {
|
|
|
|
pds::PrecedenceType precedence_type;
|
|
switch(token.content) {
|
|
case "left":
|
|
precedence_type = pds::PrecedenceType::LEFT;
|
|
break;
|
|
case "nonassoc":
|
|
precedence_type = pds::PrecedenceType::NONASSOC;
|
|
break;
|
|
case "right":
|
|
precedence_type = pds::PrecedenceType::RIGHT;
|
|
break;
|
|
}
|
|
|
|
int counter = 0;
|
|
while (true) {
|
|
token = lex.nextToken();
|
|
if (token.type == ParsodusLexer::SEMICOLON)
|
|
break;
|
|
if (token.type != ParsodusLexer::TERMINAL)
|
|
throw InputParserException("Expecting to find a terminal, but found '" + token.content + "' instead'");
|
|
if (config.grammar.precedence.find(token.type) == config.grammar.precedence)
|
|
throw InputParserException("Found same terminal in multiple precedence rules");
|
|
config.grammar.precedence.insert(make_pair(token.content.substr(1, token.content.size() - 2), make_pair(counter++, precedence_type)));
|
|
}
|
|
|
|
} else
|
|
throw InputParserException("Found a precedence rule outside a precedence section: " + token.content);
|
|
*/
|
|
default:
|
|
break;
|
|
|
|
}
|
|
}
|
|
} catch(ParsodusLexer::NoMoreTokens& err) {}
|
|
return config;
|
|
}
|
|
void InputParser::lexColon(ParsodusLexer& lex, ParsodusLexer::Token &token, bool nextoken) {
|
|
token = lex.nextToken();
|
|
if(token.type != ParsodusLexer::COLON)
|
|
throw InputParserException("No colon found before '" + token.content + "'");
|
|
if(nextoken)
|
|
token = lex.nextToken();
|
|
|
|
}
|
|
|
|
}
|