First example: a JSON parser

This commit is contained in:
Robin Jadoul 2017-01-19 14:09:51 +01:00
parent 832dcd813d
commit f373b73aa1
9 changed files with 651 additions and 0 deletions

4
examples/json/.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
JSONLexer.h
JSONLexer.cpp
JSONParser.h
JSONParser.cpp

View File

@ -0,0 +1,12 @@
LBRACE = {
RBRACE = }
LBRACKET = \[
RBRACKET = \]
COLON = :
COMMA = ,
TTRUE = true
TFALSE = false
TNULL = null
STRING = "([^\"]|(\\(["\/bfnrt]|u[a-fA-F0-9][a-fA-F0-9][a-fA-F0-9][a-fA-F0-9])))*"
NUMBER = -?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][+-]?[0-9]+)?
ignore = \n| |\r|\t

View File

@ -0,0 +1,40 @@
parser: SLR(1)
lexesis: JSONLexer.lxs
terminals:
"LBRACE"
"RBRACE"
"LBRACKET"
"RBRACKET"
"COLON"
"COMMA"
"TTRUE"
"TFALSE"
"TNULL"
"STRING"
"NUMBER"
start: <value>
grammar:
<value> ::= "STRING"
| "NUMBER"
| "TTRUE"
| "TFALSE"
| "TNULL"
| <object>
| <array>
;
<object> ::= "LBRACE" <keyvals> "RBRACE"
| "LBRACE" "RBRACE"
;
<keyvals> ::= "STRING" "COLON" <value>
| "STRING" "COLON" <value> "COMMA" <keyvals>
;
<array> ::= "LBRACKET" <vals> "RBRACKET"
| "LBRACKET" "RBRACKET"
;
<vals> ::= <value>
| <value> "COMMA" <vals>
;

BIN
examples/json/json Executable file

Binary file not shown.

269
examples/json/json.cpp Normal file
View File

@ -0,0 +1,269 @@
#include "json.h"
#include <iostream>
#include <algorithm>
namespace json {
JSON::operator std::string() const {
if (type != String)
throw JSONError("Not a JSON string");
else
return (*val.str);
}
JSON::operator double() const {
if (type != Num)
throw JSONError("Not a JSON number");
else
return val.num;
}
JSON::operator bool() const {
if (type != Bool && type != Null)
throw JSONError("Not a JSON bool");
else if (type == Null)
return false;
else
return val.b;
}
JSON& JSON::operator[] (int i) const {
if (type == Array)
return (*val.arr)[i];
else
throw JSONError("Not a JSON array");
}
JSON& JSON::operator[] (std::string s) const {
if (type != Object)
throw JSONError("Not a JSON object");
else
return (*val.obj)[s];
}
JSON& JSON::operator[] (const char* s) const {
if (type != Object)
throw JSONError("Not a JSON object");
else
return (*val.obj)[s];
}
JSON& JSON::operator[] (const JSON& idx) const {
if (idx.type == String) {
return (*this)[static_cast<std::string>(idx)];
} else if (idx.type == Num) {
return (*this)[static_cast<int>(std::round(static_cast<double>(idx)))];
} else {
throw JSONError("Not a subscriptable JSON value");
}
}
std::string JSON::to_string() const {
return static_cast<std::string>(*this);
}
double JSON::to_double() const {
return static_cast<double>(*this);
}
bool JSON::to_bool() const {
return static_cast<bool>(*this);
}
bool JSON::isNull() const {
return type == Null;
}
std::size_t JSON::size() const {
if (type == Object) {
return val.obj->size();
} else if (type == Array) {
return val.arr->size();
} else {
throw JSONError("Not a JSON value with a size");
}
}
void JSON::push_back(const JSON& other) {
if (type == Array) {
val.arr->push_back(other);
} else {
throw JSONError("Not a JSON array");
}
}
void JSON::push_front(const JSON& other) {
if (type == Array) {
val.arr->push_front(other);
} else {
throw JSONError("Not a JSON array");
}
}
Type JSON::getType() const {
return type;
}
JSON JSON::num(double n) {
Value v;
v.num = n;
return JSON(Num, v);
}
JSON JSON::string(const std::string& s) {
Value v;
v.str = new std::string(s);
return JSON(String, v);
}
JSON JSON::object() {
return JSON(Object);
}
JSON JSON::array() {
return JSON(Array);
}
JSON JSON::boolean(bool b) {
Value v;
v.b = b;
return JSON(Bool, v);
}
JSON JSON::null() {
return JSON(Null);
}
JSON::JSON(Type t) : type(t)
{
if (t == Array)
val.arr = new std::deque<JSON>();
else if (t == Object)
val.obj = new std::map<std::string, JSON>();
else if (t == String)
val.str = new std::string();
}
JSON::JSON(Type t, Value v) : type(t), val(v)
{}
JSON::JSON() : type(Null)
{}
JSON::JSON(const JSON& other) : type(other.type), val(other.val) {
if (type == String)
val.str = new std::string(*other.val.str);
else if (type == Object)
val.obj = new std::map<std::string, JSON>(*other.val.obj);
else if (type == Array)
val.arr = new std::deque<JSON>(*other.val.arr);
}
JSON::JSON(JSON&& other) : type(other.type), val(other.val) {
if (type == String)
other.val.str = nullptr;
else if (type == Object)
other.val.obj = nullptr;
else if (type == Array)
other.val.arr = nullptr;
}
JSON& JSON::operator=(const JSON& other) {
if (type == String)
delete val.str;
else if (type == Object)
delete val.obj;
else if (type == Array)
delete val.arr;
type = other.type;
if (type == String)
val.str = new std::string(*other.val.str);
else if (type == Object)
val.obj = new std::map<std::string, JSON>(*other.val.obj);
else if (type == Array)
val.arr = new std::deque<JSON>(*other.val.arr);
else
val = other.val;
return *this;
}
JSON& JSON::operator=(JSON&& other) {
if (type == String)
delete val.str;
else if (type == Object)
delete val.obj;
else if (type == Array)
delete val.arr;
type = other.type;
val = other.val;
if (type == String)
other.val.str = nullptr;
else if (type == Object)
other.val.obj = nullptr;
else if (type == Array)
other.val.arr = nullptr;
return *this;
}
JSON::~JSON() {
if (type == String)
delete val.str;
else if (type == Object)
delete val.obj;
else if (type == Array)
delete val.arr;
}
std::ostream& operator<<(std::ostream& os, const JSON& j) {
switch (j.type) {
case Num:
os << j.val.num;
break;
case String:
os << '"' << *j.val.str << '"';
break;
case Object:
os << '{';
{
bool first = true;
for (const auto& p : *j.val.obj) {
if (!first)
os << ", ";
first = false;
os << JSON::string(p.first)
<< ": " << p.second;
}
}
os << '}';
break;
case Array:
os << '[';
{
bool first = true;
for (const auto& e : *j.val.arr) {
if (!first)
os << ", ";
first = false;
os << e;
}
}
os << ']';
break;
case Bool:
if (j.val.b) {
os << "true";
} else {
os << "false";
}
break;
case Null:
os << "null";
break;
}
return os;
}
}

91
examples/json/json.h Normal file
View File

@ -0,0 +1,91 @@
#pragma once
#ifndef JSON_H
#define JSON_H
#include <deque>
#include <iostream>
#include <map>
#include <memory>
#include <stdexcept>
#include <string>
namespace json {
class JSONError : public std::runtime_error {
public:
JSONError(std::string msg) : std::runtime_error(msg) { }
};
enum Type {
Num,
String,
Object,
Array,
Bool,
Null
};
class JSON {
public:
/**
* Default constructor, is a JSON null value
*/
JSON();
JSON(const JSON& other);
JSON(JSON&& other);
JSON& operator=(const JSON& other);
JSON& operator=(JSON&& other);
~JSON();
operator std::string() const;
operator double() const;
operator bool() const;
JSON& operator[] (int) const;
JSON& operator[] (std::string) const;
JSON& operator[] (const char*) const;
JSON& operator[] (const JSON&) const;
std::string to_string() const;
double to_double() const;
bool to_bool() const;
std::size_t size() const;
void push_back(const JSON& other);
void push_front(const JSON& other);
bool isNull() const;
Type getType() const;
static JSON num(double n);
static JSON string(const std::string& s);
static JSON object();
static JSON array();
static JSON boolean(bool b);
static JSON null();
friend std::ostream& operator<<(std::ostream&, const JSON&);
private:
Type type;
//Pointers because of union restrictions
union Value {
double num;
std::string* str;
std::map<std::string, JSON>* obj;
std::deque<JSON>* arr;
bool b;
} val;
JSON(Type t);
JSON(Type t, Value v);
};
std::ostream& operator<<(std::ostream& os, const JSON& j);
}
#endif

12
examples/json/main.cpp Normal file
View File

@ -0,0 +1,12 @@
#include "JSONLexer.h"
#include "json.h"
#include "parser.h"
#include <iostream>
using namespace std;
using namespace json;
int main() {
JSONLexer lex(cin);
Parser p(lex);
cout << p.parse() << endl;
}

180
examples/json/parser.cpp Normal file
View File

@ -0,0 +1,180 @@
#include "parser.h"
#include <algorithm>
#include <cassert>
#include <codecvt>
#include <deque>
#include <locale>
#include <sstream>
#include <iostream>
namespace {
double readNumber(std::string in) {
std::istringstream iss(in);
double d;
iss >> d;
return d;
}
std::string readString(std::string in) {
std::string result;
for (std::size_t i = 1; i < in.length() - 1; i++) {
if (in[i] == '\\' && in[i + 1] != 'u') {
char c = 0;
switch (in[i + 1]) {
case '"':
case '\\':
case '/':
c = in[i + 1];
break;
case 'b':
c = '\b';
break;
case 'f':
c = '\f';
break;
case 'n':
c = '\n';
break;
case 'r':
c = '\r';
break;
case 't':
c = '\t';
break;
default:
break;
}
result.push_back(c);
i++;
} else if (in[i] == '\\'){
char16_t unicode_value;
std::wstring_convert<std::codecvt_utf8<char16_t>, char16_t > utf8converter;
unicode_value = 0;
for (int j = 0; j < 4; j++, i++) {
char hex = in[i + 2];
unicode_value *= 16;
if (hex >= '0' && hex <= '9')
unicode_value += hex - '0';
else if (hex >= 'A' && hex <= '9')
unicode_value += hex - 'A' + 10;
else
unicode_value += hex - 'a' + 10;
}
result.append(utf8converter.to_bytes(unicode_value));
i++;
} else {
if (iscntrl(in[i])) {
throw SyntaxError("Control character inside string");
}
result.push_back(in[i]);
}
}
return result;
}
}
namespace json {
Parser::Parser(JSONLexer lex) : JSONParser<JSON>(), m_lex(lex)
{}
Parser::Token Parser::lex() {
try {
JSONLexer::Token orig = m_lex.nextToken();
JSONParser_Symbol s;
switch (orig.type) {
case JSONLexer::COLON:
s = JSONParser_Symbol::T_COLON;
break;
case JSONLexer::COMMA:
s = JSONParser_Symbol::T_COMMA;
break;
case JSONLexer::LBRACE:
s = JSONParser_Symbol::T_LBRACE;
break;
case JSONLexer::RBRACE:
s = JSONParser_Symbol::T_RBRACE;
break;
case JSONLexer::LBRACKET:
s = JSONParser_Symbol::T_LBRACKET;
break;
case JSONLexer::RBRACKET:
s = JSONParser_Symbol::T_RBRACKET;
break;
case JSONLexer::STRING:
return Token{JSONParser_Symbol::T_STRING, JSON::string(readString(orig.content))};
case JSONLexer::NUMBER:
return Token{JSONParser_Symbol::T_NUMBER, JSON::num(readNumber(orig.content))};
case JSONLexer::TTRUE:
return Token{JSONParser_Symbol::T_TTRUE, JSON::boolean(true)};
case JSONLexer::TFALSE:
return Token{JSONParser_Symbol::T_TFALSE, JSON::boolean(false)};
case JSONLexer::TNULL:
return Token{JSONParser_Symbol::T_TNULL, JSON::null()};
default:
//impossible
break;
}
return Token{s, JSON()};
}
catch (JSONLexer::NoMoreTokens) {
return Token{JSONParser_Symbol::T_EOF, JSON()};
}
}
JSON Parser::reduce_0(std::deque<Token> subparts) {
return std::move(subparts[0].value);
}
JSON Parser::reduce_1(std::deque<Token> subparts) {
return std::move(subparts[0].value);
}
JSON Parser::reduce_2(std::deque<Token> subparts) {
return std::move(subparts[0].value);
}
JSON Parser::reduce_3(std::deque<Token> subparts) {
return std::move(subparts[0].value);
}
JSON Parser::reduce_4(std::deque<Token> subparts) {
return std::move(subparts[0].value);
}
JSON Parser::reduce_5(std::deque<Token> subparts) {
return std::move(subparts[0].value);
}
JSON Parser::reduce_6(std::deque<Token> subparts) {
return std::move(subparts[0].value);
}
JSON Parser::reduce_7(std::deque<Token> subparts) {
return std::move(subparts[1].value);
}
JSON Parser::reduce_8(std::deque<Token>) {
return JSON::object();
}
JSON Parser::reduce_9(std::deque<Token> subparts) {
JSON obj = JSON::object();
obj[std::move(subparts[0].value)] = std::move(subparts[2].value);
return obj;
}
JSON Parser::reduce_10(std::deque<Token> subparts) {
JSON obj = std::move(subparts[4].value);
obj[std::move(subparts[0].value)] = std::move(subparts[2].value);
return obj;
}
JSON Parser::reduce_11(std::deque<Token> subparts) {
return std::move(subparts[1].value);
}
JSON Parser::reduce_12(std::deque<Token>) {
return JSON::array();
}
JSON Parser::reduce_13(std::deque<Token> subparts) {
JSON arr = JSON::array();
arr.push_front(std::move(subparts[0].value));
return arr;
}
JSON Parser::reduce_14(std::deque<Token> subparts) {
JSON arr = std::move(subparts[2].value);
arr.push_front(std::move(subparts[0].value));
return arr;
}
}

43
examples/json/parser.h Normal file
View File

@ -0,0 +1,43 @@
#pragma once
#ifndef PARSER_H
#define PARSER_H
#include "JSONLexer.h"
#include "JSONParser.h"
#include "json.h"
#include <deque>
#include <memory>
#include <stdexcept>
namespace json {
class Parser : public JSONParser<JSON> {
public:
Parser(JSONLexer lex);
protected:
Token lex() override;
JSON reduce_0(std::deque<Token> subparts) override;
JSON reduce_1(std::deque<Token> subparts) override;
JSON reduce_2(std::deque<Token> subparts) override;
JSON reduce_3(std::deque<Token> subparts) override;
JSON reduce_4(std::deque<Token> subparts) override;
JSON reduce_5(std::deque<Token> subparts) override;
JSON reduce_6(std::deque<Token> subparts) override;
JSON reduce_7(std::deque<Token> subparts) override;
JSON reduce_8(std::deque<Token> subparts) override;
JSON reduce_9(std::deque<Token> subparts) override;
JSON reduce_10(std::deque<Token> subparts) override;
JSON reduce_11(std::deque<Token> subparts) override;
JSON reduce_12(std::deque<Token> subparts) override;
JSON reduce_13(std::deque<Token> subparts) override;
JSON reduce_14(std::deque<Token> subparts) override;
private:
JSONLexer m_lex;
};
}
#endif