First version of input parser

This commit is contained in:
Thomas Avé 2016-11-26 19:26:16 +01:00
parent 62b0c493b4
commit b2a62daed1
10 changed files with 566 additions and 12 deletions

77
include/InputLexer.h Normal file
View File

@ -0,0 +1,77 @@
#pragma once
#ifndef LEXER_InputLexer_H
#define LEXER_InputLexer_H
#include <exception>
#include <istream>
#include <string>
class InputLexer {
public:
class NoMoreTokens : public std::exception {};
class NoMatch : public std::exception {};
enum TokenType {
nonmatching,
ARROW,
COLON,
GRAMMAR,
LEFT,
LEXESIS,
LEXESISNAME,
NONASSOC,
NUM,
PARSER,
PARSERTYPE,
PIPE,
PRECEDENCE,
RIGHT,
SEMICOLON,
START,
TERMINAL,
TERMINALS,
VARIABLE,
ignore,
};
struct Token {
TokenType type;
std::string content;
};
InputLexer(std::istream& in);
~InputLexer();
/**
* Get the next token
*
* @throws NoMoreTokens if no more tokens are available
* @throws NoMatch if no match was found
*/
Token nextToken();
/**
* Skip the following `n` bytes.
*
* @param n The number of bytes to skip
*/
void skip(std::size_t n);
/**
* Peek at the current head of the input stream, useful in error reporting when a character mismatches for example
*
* @throws NoMoreTokens if the input stream is at an end
*/
char peek();
/**
* Get the current byte offset
*/
std::size_t getByteOffset();
private:
std::size_t m_offset;
std::istream& m_input;
};
#endif //LEXER_InputLexer_H

19
include/Parsodus/config.h Normal file
View File

@ -0,0 +1,19 @@
#pragma once
#ifndef PARSODUS_CONFIG_H
#define PARSODUS_CONFIG_H
#include "Parsodus/grammar.h"
namespace pds {
struct Config {
enum class ParserType {LALR_1};
ParserType parserType;
std::string lexesisFile;
Grammar grammar;
};
}
#endif //PARSODUS_CONFIG_H

View File

@ -14,6 +14,14 @@ namespace pds {
struct Rule { struct Rule {
std::string head; ///< The replaced variable std::string head; ///< The replaced variable
std::vector<std::string> tail; ///< The replacement rule std::vector<std::string> tail; ///< The replacement rule
bool operator<(const Rule& other) const {
if(head != other.head){
return head < other.head;
} else {
return tail < other.tail;
}
}
}; };
/** /**
@ -21,6 +29,7 @@ namespace pds {
* Keeps track of variables, terminals and replacement rules * Keeps track of variables, terminals and replacement rules
*/ */
struct Grammar { struct Grammar {
std::string start; ///< the starting variable
std::set<std::string> variables; ///< the variables std::set<std::string> variables; ///< the variables
std::set<std::string> terminals; ///< the terminals std::set<std::string> terminals; ///< the terminals
std::map<std::string, std::set<Rule> > rules; ///< the replacement rules std::map<std::string, std::set<Rule> > rules; ///< the replacement rules

View File

@ -0,0 +1,34 @@
#pragma once
#ifndef PARSODUS_INPUT_PARSER_H
#define PARSODUS_INPUT_PARSER_H
#include <istream>
#include "Parsodus/config.h"
#include "InputLexer.h"
namespace pds {
class InputParser {
public:
static Config parseInput(std::istream& is);
private:
static void lexColon(InputLexer& lex, InputLexer::Token &token, bool nextoken = true);
};
/**
* Used to throw errors when the inputfile was not valid
*/
class InputParserException: public std::exception {
public:
InputParserException(std::string what);
virtual const char* what() const throw();
private:
std::string m_what;
};
}
#endif // PARSODUS_INPUT_PARSER_H

View File

@ -1,16 +1,10 @@
parser: lalr(1) parser: lalr(1)
precedence: #optional
left 5 PLUS
right 6 TIMES
nonassoc 2 LT
lexesis: lexer.lxs lexesis: lexer.lxs
# of
terminals: terminals:
TERMINAL TERMINAL
... start: s
start: start
grammar: grammar:
start -> a s -> a s
| b | b
; ;

View File

@ -11,6 +11,8 @@ add_library(Parsodus-backends
add_library(pds add_library(pds
driver.cpp driver.cpp
inputparser.cpp
InputLexer.cpp
) )
add_executable(Parsodus add_executable(Parsodus

246
src/InputLexer.cpp Normal file
View File

@ -0,0 +1,246 @@
#include "InputLexer.h"
#include <sstream>
#include <iostream>
namespace { //The automaton data
typedef std::size_t State;
State REJECT = 79;
unsigned char TRANS_IDX[256] = { (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)1, (unsigned char)1, (unsigned char)0, (unsigned char)0, (unsigned char)1, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)1, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)2, (unsigned char)3, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)4, (unsigned char)5, (unsigned char)0, (unsigned char)6, (unsigned char)7, (unsigned char)7, (unsigned char)7, (unsigned char)7, (unsigned char)7, (unsigned char)7, (unsigned char)7, (unsigned char)7, (unsigned char)7, (unsigned char)8, (unsigned char)9, (unsigned char)0, (unsigned char)0, (unsigned char)10, (unsigned char)0, (unsigned char)0, (unsigned char)11, (unsigned char)11, (unsigned char)11, (unsigned char)11, (unsigned char)11, (unsigned char)11, (unsigned char)11, (unsigned char)11, (unsigned char)11, (unsigned char)11, (unsigned char)11, (unsigned char)11, (unsigned char)11, (unsigned char)11, (unsigned char)11, (unsigned char)11, (unsigned char)11, (unsigned char)11, (unsigned char)11, (unsigned char)11, (unsigned char)11, (unsigned char)11, (unsigned char)11, (unsigned char)11, (unsigned char)11, (unsigned char)11, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)12, (unsigned char)0, (unsigned char)13, (unsigned char)14, (unsigned char)15, (unsigned char)16, (unsigned char)17, (unsigned char)18, (unsigned char)19, (unsigned char)20, (unsigned char)21, (unsigned char)14, (unsigned char)14, (unsigned char)22, (unsigned char)23, (unsigned char)24, (unsigned char)25, (unsigned char)26, (unsigned char)14, (unsigned char)27, (unsigned char)28, (unsigned char)29, (unsigned char)14, (unsigned char)14, (unsigned char)14, (unsigned char)30, (unsigned char)14, (unsigned char)14, (unsigned char)0, (unsigned char)31, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)32, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)33, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)34, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, (unsigned char)0, };
State TABLE[80 - 1][35] = {
{ 79, 78, 79, 79, 71, 79, 79, 66, 76, 75, 79, 46, 47, 48, 48, 48, 48, 48, 48, 37, 48, 48, 16, 48, 45, 48, 1, 44, 32, 23, 48, 77, 79, 79, 73, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 2, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 7, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 3, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 4, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 5, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 6, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 8, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 9, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 10, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 11, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 12, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 13, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 14, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 15, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 17, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 18, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 19, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 20, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 21, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 22, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 24, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 25, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 26, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 27, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 28, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 29, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 30, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 31, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 33, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 34, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 35, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 36, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 38, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 39, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 40, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 41, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 42, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 43, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 51, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 55, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 46, 46, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 46, 47, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 50, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 52, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 53, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 54, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 56, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 57, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 58, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 59, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 60, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 61, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 79, 79, 79, 79, },
{ 79, 79, 63, 79, 79, 67, 79, 79, 79, 79, 79, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 79, 79, 79, 79, },
{ 79, 79, 79, 79, 79, 79, 64, 64, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, },
{ 79, 79, 79, 65, 79, 79, 64, 64, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, },
{ 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, },
{ 79, 79, 79, 79, 79, 79, 66, 66, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, },
{ 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 68, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, },
{ 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 69, 79, 79, 79, 79, },
{ 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 70, 79, 79, 79, 79, 79, 79, },
{ 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, },
{ 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 72, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, },
{ 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, },
{ 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 74, 79, 79, },
{ 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 72, 79, },
{ 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, },
{ 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, },
{ 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, },
{ 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, },
};
InputLexer::TokenType TOKENS[80] = { InputLexer::nonmatching, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::PARSER, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::PRECEDENCE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::LEXESIS, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::TERMINALS, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::START, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::GRAMMAR, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::TERMINAL, InputLexer::TERMINAL, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::LEFT, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::RIGHT, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::VARIABLE, InputLexer::NONASSOC, InputLexer::nonmatching, InputLexer::nonmatching, InputLexer::nonmatching, InputLexer::PARSERTYPE, InputLexer::NUM, InputLexer::nonmatching, InputLexer::nonmatching, InputLexer::nonmatching, InputLexer::LEXESISNAME, InputLexer::nonmatching, InputLexer::ARROW, InputLexer::nonmatching, InputLexer::nonmatching, InputLexer::SEMICOLON, InputLexer::COLON, InputLexer::PIPE, InputLexer::ignore, InputLexer::nonmatching, };
}
InputLexer::InputLexer(std::istream& in) : m_offset(0), m_input(in) {
}
InputLexer::~InputLexer() {
}
InputLexer::Token InputLexer::nextToken() {
TokenType type = ignore;
std::string token;
while (type == ignore) {
State state = 0;
std::size_t match_length = 0;
token = "";
while (!m_input.eof() && state != REJECT) {
char c = m_input.peek();
if (m_input.eof())
break;
token += c;
state = TABLE[state][TRANS_IDX[(unsigned char)c]];
if (TOKENS[state])
{
match_length = token.length();
type = TOKENS[state];
}
m_input.get();
++m_offset;
}
std::size_t sdiff = token.length() - match_length;
for (std::size_t i = 0; i < sdiff; i++)
{
m_input.putback(token[token.length() - i - 1]);
}
m_offset -= sdiff;
if (!type || !match_length) {
if (m_input.eof())
throw NoMoreTokens();
throw NoMatch();
}
token = token.substr(0, match_length);
}
Token t;
t.type = type;
t.content = token;
return t;
}
void InputLexer::skip(std::size_t n) {
for (size_t i = 0; i < n; i++) {
m_input.get();
++m_offset;
}
}
char InputLexer::peek() {
if (m_input.eof())
throw NoMoreTokens();
return m_input.peek();
}
std::size_t InputLexer::getByteOffset() {
return m_offset;
}

129
src/inputparser.cpp Normal file
View File

@ -0,0 +1,129 @@
#include "InputLexer.h"
#include "Parsodus/inputparser.h"
namespace pds {
InputParserException::InputParserException(std::string what): m_what(what) {}
const char* InputParserException::what() const throw() {
return m_what.c_str();
}
Config InputParser::parseInput(std::istream& is) {
InputLexer lex(is);
Config config;
bool readingTerminals = false;
bool readingGrammar = false;
try {
while(true) {
InputLexer::Token token = lex.nextToken();
if(token.type != InputLexer::TERMINAL)
readingTerminals = false;
if(token.type != InputLexer::TERMINAL
&& token.type != InputLexer::ARROW
&& token.type != InputLexer::VARIABLE
&& token.type != InputLexer::SEMICOLON
&& token.type != InputLexer::PIPE)
readingGrammar = false;
switch(token.type) {
case InputLexer::PARSER:
lexColon(lex, token);
if(token.type == InputLexer::PARSERTYPE) {
if(token.content == "lalr(1)") {
config.parserType = Config::ParserType::LALR_1;
} else
throw InputParserException("Unkown parser type");
} else {
throw InputParserException("inputfile malformed, no parser type found in parser section");
}
break;
case InputLexer::LEXESIS:
lexColon(lex, token);
if(token.type != InputLexer::LEXESISNAME)
throw InputParserException("No valid input file found after 'lexesis', found '" + token.content + "' instead.");
config.lexesisFile = token.content;
break;
case InputLexer::TERMINALS:
lexColon(lex, token, false);
readingTerminals = true;
break;
case InputLexer::GRAMMAR:
lexColon(lex, token, false);
readingGrammar = true;
break;
case InputLexer::TERMINAL:
if(readingTerminals) {
config.grammar.terminals.insert(token.content);
} else {
throw InputParserException("Found a terminal outside a grammar or terminals section: " + token.content);
}
break;
case InputLexer::START:
lexColon(lex, token);
if(token.type != InputLexer::VARIABLE)
throw InputParserException("Expected to find a start variable, but found: " + token.content);
config.grammar.start = token.content;
break;
case InputLexer::VARIABLE:
if(readingGrammar) {
if(config.grammar.variables.find(token.content) == config.grammar.variables.end())
config.grammar.variables.insert(token.content);
std::string current_head = token.content;
std::set<Rule> current_rules;
if(config.grammar.rules.count(current_head)) {
current_rules = config.grammar.rules[current_head];
}
// Parsing rule
token = lex.nextToken();
if(token.type != InputLexer::ARROW)
throw InputParserException("No arrow found after '"+ current_head+"', but found '" + token.content + "' instead");
Rule rule;
rule.head = current_head;
bool parsing_head = true;
while(parsing_head) {
token = lex.nextToken();
switch(token.type) {
case InputLexer::VARIABLE:
rule.tail.push_back(token.content);
break;
case InputLexer::TERMINAL:
rule.tail.push_back(token.content);
break;
case InputLexer::SEMICOLON:
parsing_head = false;
case InputLexer::PIPE:
rule.tail.shrink_to_fit();
current_rules.insert(rule);
rule.tail.clear();
break;
default:
throw InputParserException("Expecting to find a variable, terminal, pipe or a semicolon, but found '" + token.content + "' instead");
}
}
config.grammar.rules[current_head] = current_rules;
} else
throw InputParserException("Found a variable outside a grammar section: " + token.content);
break;
default:
break;
}
}
} catch(InputLexer::NoMoreTokens& err) {
}
return config;
}
void InputParser::lexColon(InputLexer& lex, InputLexer::Token &token, bool nextoken) {
token = lex.nextToken();
if(token.type != InputLexer::COLON)
throw InputParserException("No colon found before '" + token.content + "'");
if(nextoken)
token = lex.nextToken();
}
}

View File

@ -4,7 +4,7 @@ LEXESIS = lexesis
TERMINALS = terminals TERMINALS = terminals
START = start START = start
GRAMMAR = grammar GRAMMAR = grammar
PARSERTYPE = [_a-zA-Z]+(\([1-9][0-9]*\))? PARSERTYPE = [_a-zA-Z]+(\([0-9][0-9]*\))
LEFT = left LEFT = left
RIGHT = right RIGHT = right
NONASSOC = nonassoc NONASSOC = nonassoc
@ -16,3 +16,4 @@ ARROW = ->|→
SEMICOLON = ; SEMICOLON = ;
COLON = : COLON = :
PIPE = \| PIPE = \|
ignore = \t| |\n|\r

View File

@ -1,5 +1,48 @@
#include <iostream> #include <iostream>
#include <fstream>
#include "optparse.h"
#include "Parsodus/inputparser.h"
int main(int argc, char** argv) {
optparse::OptionParser parser = optparse::OptionParser().description("Parsodus").usage("Parsodus [-d <outputdir>] [-l <language>] [-n <lexername>] <inputfile.pds>");
parser.add_help_option(true);
parser.version("%prog 1.0");
parser.add_option("-d", "--outputdir").dest("outputdir").help("Output the generated files to this directory\n[default: .]").metavar("<directory>").set_default(".");
parser.add_option("-l", "--lang", "--language").dest("language").help("The programming language to generate source files for\n[default: c++]").metavar("<language>").set_default("c++");
parser.add_option("-n", "--name").dest("lexername").help("Use this name for the generated parser, the default is based on the input file name").metavar("<parsername>");
optparse::Values options = parser.parse_args(argc, argv);
std::vector<std::string> args = parser.args();
if (args.size() != 1) {
parser.print_usage(std::cerr);
return 1;
}
std::ifstream infile(args[0]);
if (!infile.good()) {
std::cerr << "Could not open file '" << args[0] << "' for reading" << std::endl;
return 1;
}
auto config = pds::InputParser::parseInput(infile);
// Reporting what the inputparser found, to be removed...
std::cout << "Start: " << config.grammar.start << std::endl;
for(auto a: config.grammar.terminals)
std::cout << "Terminal: " << a << std::endl;
for(auto a: config.grammar.variables)
std::cout << "Variable: " << a << std::endl;
for(auto a: config.grammar.rules) {
std::cout << "Starting rule with head: " << a.first << std::endl;
for(auto b: a.second) {
std::cout << "\tRule with head: " << b.head << std::endl;
for(auto c: b.tail) {
std::cout << "\t\tFound replacement rule: " << c << std::endl;
}
}
}
int main() {
std::cout << "Parsodus to the rescue" << std::endl;
} }