Parsodus/src/inputparser.cpp

121 lines
5.8 KiB
C++

#include "ParsodusLexer.h"
#include "Parsodus/inputparser.h"
#include "Parsodus/util/parserType.h"
namespace pds {
InputParserException::InputParserException(std::string what): m_what(what) {}
const char* InputParserException::what() const throw() {
return m_what.c_str();
}
Config InputParser::parseInput(std::istream& is) {
ParsodusLexer lex(is);
Config config;
bool readingTerminals = false;
bool readingGrammar = false;
try {
while(true) {
ParsodusLexer::Token token = lex.nextToken();
if(token.type != ParsodusLexer::TERMINAL)
readingTerminals = false;
if(token.type != ParsodusLexer::TERMINAL
&& token.type != ParsodusLexer::ARROW
&& token.type != ParsodusLexer::VARIABLE
&& token.type != ParsodusLexer::SEMICOLON
&& token.type != ParsodusLexer::PIPE)
readingGrammar = false;
switch(token.type) {
case ParsodusLexer::PARSER:
lexColon(lex, token);
if(token.type == ParsodusLexer::PARSERTYPE) {
config.parserType = token.content;
} else {
throw InputParserException("inputfile malformed, no parser type found in parser section");
}
break;
case ParsodusLexer::LEXESIS:
lexColon(lex, token);
if(token.type != ParsodusLexer::LEXESISNAME)
throw InputParserException("No valid input file found after 'lexesis', found '" + token.content + "' instead.");
config.lexesisFile = token.content;
break;
case ParsodusLexer::TERMINALS:
lexColon(lex, token, false);
readingTerminals = true;
break;
case ParsodusLexer::GRAMMAR:
lexColon(lex, token, false);
readingGrammar = true;
break;
case ParsodusLexer::TERMINAL:
if(readingTerminals) {
config.grammar.terminals.insert(token.content);
} else {
throw InputParserException("Found a terminal outside a grammar or terminals section: " + token.content);
}
break;
case ParsodusLexer::START:
lexColon(lex, token);
if(token.type != ParsodusLexer::VARIABLE)
throw InputParserException("Expected to find a start variable, but found: " + token.content);
config.grammar.start = token.content;
break;
case ParsodusLexer::VARIABLE:
if(readingGrammar) {
if(config.grammar.variables.find(token.content) == config.grammar.variables.end())
config.grammar.variables.insert(token.content);
std::string current_head = token.content;
// Parsing rule
token = lex.nextToken();
if(token.type != ParsodusLexer::ARROW)
throw InputParserException("No arrow found after '"+ current_head+"', but found '" + token.content + "' instead");
std::shared_ptr<Rule> rule = std::make_shared<Rule>();
rule->head = current_head;
bool parsing_head = true;
while(parsing_head) {
token = lex.nextToken();
switch(token.type) {
case ParsodusLexer::VARIABLE:
rule->tail.push_back(token.content);
break;
case ParsodusLexer::TERMINAL:
rule->tail.push_back(token.content);
break;
case ParsodusLexer::SEMICOLON:
parsing_head = false;
case ParsodusLexer::PIPE:
rule->tail.shrink_to_fit();
config.grammar.rules.push_back(std::make_shared<Rule>(*rule));
rule->tail.clear();
break;
default:
throw InputParserException("Expecting to find a variable, terminal, pipe or a semicolon, but found '" + token.content + "' instead");
}
}
} else
throw InputParserException("Found a variable outside a grammar section: " + token.content);
break;
default:
break;
}
}
} catch(ParsodusLexer::NoMoreTokens& err) {}
return config;
}
void InputParser::lexColon(ParsodusLexer& lex, ParsodusLexer::Token &token, bool nextoken) {
token = lex.nextToken();
if(token.type != ParsodusLexer::COLON)
throw InputParserException("No colon found before '" + token.content + "'");
if(nextoken)
token = lex.nextToken();
}
}