Lexesis/include/Lexesis/automata.h

108 lines
3.1 KiB
C++

/**
* Lexesis/automata.h
*
* A file describing basic automata (DFA, NFA and e-NFA), and some operations on them.
*/
#pragma once
#ifndef AUTOMATA_H
#define AUTOMATA_H
#include <climits>
#include <map>
#include <set>
#include <string>
#include <utility>
#include <vector>
namespace lxs {
typedef unsigned long long State;
typedef unsigned long long Priority;
const State deadState = ULLONG_MAX;
/**
* A basic automaton, the basis for DFA's, NFA's and epsilon-NFA's
* The states are implicit, from 0 to numStates - 1
* The starting state can be specified, though some methods probably assume it is 0
*
* The priority and acceptingToken are associations with accepting states
* The lower the priority, the more important.
*
* No transitions are specified yet, since that is the main point of difference between different FA's
*
* The alphabet is always considered every char from 0 to 255
*/
struct Automaton {
State numStates = 0;
std::set<State> accepting;
std::map<State, Priority> priority;
std::map<State, std::string> acceptingToken;
State starting;
};
/**
* A Deterministic finite automaton
* An automaton which should have exactly one transition per state per char
*/
struct DFA : public Automaton {
std::map<State, std::map<char, State> > delta;
};
/**
* A nondeterministic FA
* Has an arbitrary amount of transitions per state per char
*/
struct NFA : public Automaton {
std::map<State, std::map<char, std::set<State> > > delta;
/**
* compute the epsilon closure for a state
* Returns {s} for a normal NFA, since it has no epsilon transitions
*/
virtual std::set<State> eClose(State) const;
};
/**
* An epsilon NFA
* In addition to a normal NFA, can have 'free'/epsilon transitions which do not require a char
*/
struct ENFA : public NFA {
std::map<State, std::set<State> > epsilonTransitions;
virtual std::set<State> eClose(State) const;
};
/**
* Convert a DFA to graphviz dot format, can be useful when debugging
*/
std::string toDot(const DFA& d);
/**
* Convert a NFA to graphviz dot format, can be useful when debugging
*/
std::string toDot(const NFA& n);
/**
* Convert a ENFA to graphviz dot format, can be useful when debugging
*/
std::string toDot(const ENFA& e);
/**
* Merge a collection of ENFA's by adding a new starting state in front and connecting it to the old starting states with an epsilon transition
*/
ENFA merge(const std::vector<ENFA>& enfas);
/**
* Modified subset construction: convert an (E)NFA to a DFA
* takes priorities and acceptingTokens into consideration
*/
DFA mssc(const NFA& e);
/**
* Minimize a DFA
* takes priorities and acceptingTokens into consideration to never merge two accepting states with a different acceptingToken
*/
DFA minimize(const DFA& d);
} //namespace lxs
#endif //AUTOMATA_H