168 lines
4.6 KiB
C++
168 lines
4.6 KiB
C++
/*
|
|
Lexesis - A language agnostic lexical analyser generator
|
|
Copyright © 2016-2017 Thomas Avé, Robin Jadoul
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining
|
|
a copy of this software and associated documentation files (the "Software"),
|
|
to deal in the Software without restriction, including without limitation
|
|
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
and/or sell copies of the Software, and to permit persons to whom the
|
|
Software is furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included
|
|
in all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
|
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
|
|
OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#pragma once
|
|
#ifndef RE_H
|
|
#define RE_H
|
|
|
|
#include "Lexesis/automata.h"
|
|
|
|
#include <memory>
|
|
#include <stdexcept>
|
|
|
|
namespace lxs {
|
|
/**
|
|
* An abstract regular expression
|
|
*/
|
|
class RE
|
|
{
|
|
public:
|
|
virtual ~RE() {}
|
|
/**
|
|
* Convert this regex to an ENFA
|
|
* This extends the given enfa, and attaches itself to the given `attach` state
|
|
*/
|
|
virtual State toENFA(ENFA& enfa, State attach) = 0;
|
|
|
|
/**
|
|
* Convert this regex to a text regex (useful to check if parsing was correct)
|
|
*/
|
|
virtual std::string toRe() = 0;
|
|
};
|
|
|
|
/**
|
|
* A regex for the empty language
|
|
*/
|
|
class EmptyRE : public RE
|
|
{
|
|
public:
|
|
EmptyRE() {}
|
|
~EmptyRE() {}
|
|
virtual State toENFA(ENFA& enfa, State attach);
|
|
virtual std::string toRe();
|
|
};
|
|
|
|
/**
|
|
* A regex for the language containing only the empty string
|
|
*/
|
|
class EpsilonRE : public RE
|
|
{
|
|
public:
|
|
EpsilonRE() {}
|
|
~EpsilonRE() {}
|
|
virtual State toENFA(ENFA& enfa, State attach);
|
|
virtual std::string toRe();
|
|
};
|
|
|
|
/**
|
|
* A regex for the language containing a single character
|
|
*/
|
|
class SingleRE : public RE
|
|
{
|
|
public:
|
|
/**
|
|
* @param c The character of the language
|
|
*/
|
|
SingleRE(char c) : c(c) {}
|
|
~SingleRE() {}
|
|
virtual State toENFA(ENFA& enfa, State attach);
|
|
virtual std::string toRe();
|
|
|
|
char c;
|
|
};
|
|
|
|
/**
|
|
* A regex for the language containing multiple single-symbol strings
|
|
*/
|
|
class MultiRE : public RE
|
|
{
|
|
public:
|
|
/**
|
|
* @param chars The list of symbols contained in the language
|
|
*/
|
|
MultiRE(std::vector<char> chars) : chars(std::move(chars)) {}
|
|
~MultiRE() {}
|
|
virtual State toENFA(ENFA& enfa, State attach);
|
|
virtual std::string toRe();
|
|
|
|
std::vector<char> chars;
|
|
};
|
|
|
|
/**
|
|
* A regex for the concatenation of two languages
|
|
*/
|
|
class ConcatRE : public RE
|
|
{
|
|
public:
|
|
/**
|
|
* @param e The first language
|
|
* @param f The second language
|
|
*/
|
|
ConcatRE(std::shared_ptr<RE> e, std::shared_ptr<RE> f) : e(e), f(f) {}
|
|
~ConcatRE() {}
|
|
virtual State toENFA(ENFA& enfa, State attach);
|
|
virtual std::string toRe();
|
|
|
|
std::shared_ptr<RE> e, f;
|
|
};
|
|
|
|
/**
|
|
* The regex for the kleene star of a language
|
|
*/
|
|
class StarRE : public RE
|
|
{
|
|
public:
|
|
/**
|
|
* @param e The language to apply the star to
|
|
*/
|
|
StarRE(std::shared_ptr<RE> e) : e(e) {}
|
|
~StarRE() {}
|
|
virtual State toENFA(ENFA& enfa, State attach);
|
|
virtual std::string toRe();
|
|
|
|
std::shared_ptr<RE> e;
|
|
};
|
|
|
|
/**
|
|
* A regex for the sum/disjunction of two languages
|
|
*/
|
|
class PlusRE : public RE
|
|
{
|
|
public:
|
|
/**
|
|
* @param e The first language
|
|
* @param f The second language
|
|
*/
|
|
PlusRE(std::shared_ptr<RE> e, std::shared_ptr<RE> f) : e(e), f(f) {}
|
|
~PlusRE() {}
|
|
virtual State toENFA(ENFA& enfa, State attach);
|
|
virtual std::string toRe();
|
|
|
|
std::shared_ptr<RE> e, f;
|
|
};
|
|
|
|
|
|
} //namespace lxs
|
|
|
|
#endif //RE_H
|