145 lines
3.5 KiB
C++
145 lines
3.5 KiB
C++
#pragma once
|
|
#ifndef RE_H
|
|
#define RE_H
|
|
|
|
#include "Lexesis/automata.h"
|
|
|
|
#include <memory>
|
|
#include <stdexcept>
|
|
|
|
namespace lxs {
|
|
/**
|
|
* An abstract regular expression
|
|
*/
|
|
class RE
|
|
{
|
|
public:
|
|
virtual ~RE() {}
|
|
/**
|
|
* Convert this regex to an ENFA
|
|
* This extends the given enfa, and attaches itself to the given `attach` state
|
|
*/
|
|
virtual State toENFA(ENFA& enfa, State attach) = 0;
|
|
|
|
/**
|
|
* Convert this regex to a text regex (useful to check if parsing was correct)
|
|
*/
|
|
virtual std::string toRe() = 0;
|
|
};
|
|
|
|
/**
|
|
* A regex for the empty language
|
|
*/
|
|
class EmptyRE : public RE
|
|
{
|
|
public:
|
|
EmptyRE() {}
|
|
~EmptyRE() {}
|
|
virtual State toENFA(ENFA& enfa, State attach);
|
|
virtual std::string toRe();
|
|
};
|
|
|
|
/**
|
|
* A regex for the language containing only the empty string
|
|
*/
|
|
class EpsilonRE : public RE
|
|
{
|
|
public:
|
|
EpsilonRE() {}
|
|
~EpsilonRE() {}
|
|
virtual State toENFA(ENFA& enfa, State attach);
|
|
virtual std::string toRe();
|
|
};
|
|
|
|
/**
|
|
* A regex for the language containing a single character
|
|
*/
|
|
class SingleRE : public RE
|
|
{
|
|
public:
|
|
/**
|
|
* @param c The character of the language
|
|
*/
|
|
SingleRE(char c) : c(c) {}
|
|
~SingleRE() {}
|
|
virtual State toENFA(ENFA& enfa, State attach);
|
|
virtual std::string toRe();
|
|
|
|
char c;
|
|
};
|
|
|
|
/**
|
|
* A regex for the language containing multiple single-symbol strings
|
|
*/
|
|
class MultiRE : public RE
|
|
{
|
|
public:
|
|
/**
|
|
* @param chars The list of symbols contained in the language
|
|
*/
|
|
MultiRE(std::vector<char> chars) : chars(std::move(chars)) {}
|
|
~MultiRE() {}
|
|
virtual State toENFA(ENFA& enfa, State attach);
|
|
virtual std::string toRe();
|
|
|
|
std::vector<char> chars;
|
|
};
|
|
|
|
/**
|
|
* A regex for the concatenation of two languages
|
|
*/
|
|
class ConcatRE : public RE
|
|
{
|
|
public:
|
|
/**
|
|
* @param e The first language
|
|
* @param f The second language
|
|
*/
|
|
ConcatRE(std::shared_ptr<RE> e, std::shared_ptr<RE> f) : e(e), f(f) {}
|
|
~ConcatRE() {}
|
|
virtual State toENFA(ENFA& enfa, State attach);
|
|
virtual std::string toRe();
|
|
|
|
std::shared_ptr<RE> e, f;
|
|
};
|
|
|
|
/**
|
|
* The regex for the kleene star of a language
|
|
*/
|
|
class StarRE : public RE
|
|
{
|
|
public:
|
|
/**
|
|
* @param e The language to apply the star to
|
|
*/
|
|
StarRE(std::shared_ptr<RE> e) : e(e) {}
|
|
~StarRE() {}
|
|
virtual State toENFA(ENFA& enfa, State attach);
|
|
virtual std::string toRe();
|
|
|
|
std::shared_ptr<RE> e;
|
|
};
|
|
|
|
/**
|
|
* A regex for the sum/disjunction of two languages
|
|
*/
|
|
class PlusRE : public RE
|
|
{
|
|
public:
|
|
/**
|
|
* @param e The first language
|
|
* @param f The second language
|
|
*/
|
|
PlusRE(std::shared_ptr<RE> e, std::shared_ptr<RE> f) : e(e), f(f) {}
|
|
~PlusRE() {}
|
|
virtual State toENFA(ENFA& enfa, State attach);
|
|
virtual std::string toRe();
|
|
|
|
std::shared_ptr<RE> e, f;
|
|
};
|
|
|
|
|
|
} //namespace lxs
|
|
|
|
#endif //RE_H
|