Lexesis/include/Lexesis/automata.h

/**
 * Lexesis/automata.h
 *
 * A file describing basic automata (DFA, NFA and e-NFA), and some operations on them.
 */
#pragma once
#ifndef AUTOMATA_H
#define AUTOMATA_H

#include <climits>
#include <map>
#include <set>
#include <string>
#include <utility>
#include <vector>

namespace lxs {
    typedef unsigned long long State;
    typedef unsigned long long Priority;
    const State deadState = ULLONG_MAX;

    /**
     * A basic automaton, the basis for DFA's, NFA's and epsilon-NFA's
     * The states are implicit, from 0 to numStates - 1
     * The starting state can be specified, though some methods probably assume it is 0
     *
     * The priority and acceptingToken are associations with accepting states
     * The lower the priority, the more important.
     *
     * No transitions are specified yet, since that is the main point of difference between different FA's
     *
     * The alphabet is always considered every char from 0 to 255
     */
    struct Automaton {
        State numStates = 0;
        std::set<State> accepting;
        std::map<State, Priority> priority;
        std::map<State, std::string> acceptingToken;
        State starting;
    };

    /**
     * A Deterministic finite automaton
     * An automaton which should have exactly one transition per state per char
     */
    struct DFA : public Automaton {
        std::map<State, std::map<char, State> > delta;
    };

    /**
     * A nondeterministic FA
     * Has an arbitrary amount of transitions per state per char
     */
    struct NFA : public Automaton {
        std::map<State, std::map<char, std::set<State> > > delta;

        /**
         * compute the epsilon closure for a state
         * Returns {s} for a normal NFA, since it has no epsilon transitions
         */
        virtual std::set<State> eClose(State) const;
    };

    /**
     * An epsilon NFA
     * In addition to a normal NFA, can have 'free'/epsilon transitions which do not require a char
     */
    struct ENFA : public NFA {
        std::map<State, std::set<State> > epsilonTransitions;

        virtual std::set<State> eClose(State) const;
    };

    /**
     * Convert a DFA to graphviz dot format, can be useful when debugging
     */
    std::string toDot(const DFA& d);

    /**
     * Convert a NFA to graphviz dot format, can be useful when debugging
     */
    std::string toDot(const NFA& n);

    /**
     * Convert a ENFA to graphviz dot format, can be useful when debugging
     */
    std::string toDot(const ENFA& e);

    /**
     * Merge a collection of ENFA's by adding a new starting state in front and connecting it to the old starting states with an epsilon transition
     */
    ENFA merge(const std::vector<ENFA>& enfas);

    /**
     * Modified subset construction: convert an (E)NFA to a DFA
     * takes priorities and acceptingTokens into consideration
     */
    DFA mssc(const NFA& e);

    /**
     * Minimize a DFA
     * takes priorities and acceptingTokens into consideration to never merge two accepting states with a different acceptingToken
     */
    DFA minimize(const DFA& d);
} //namespace lxs

#endif //AUTOMATA_H