From 83018bb1f139d5d5275ca864a660c6414a02e511 Mon Sep 17 00:00:00 2001
From: Robin Jadoul <robin.jadoul@gmail.com>
Date: Sat, 30 Apr 2016 16:13:59 +0200
Subject: [PATCH] Implementation of table filling for minimalisation

---
 include/Lexesis/automata.h |   2 +-
 src/automata.cpp           | 176 ++++++++++++++++++++++++++++++++++++-
 src/main.cpp               |  13 +++
 3 files changed, 186 insertions(+), 5 deletions(-)
 create mode 100644 src/main.cpp

diff --git a/include/Lexesis/automata.h b/include/Lexesis/automata.h
index de8bd5d..7699e15 100644
--- a/include/Lexesis/automata.h
+++ b/include/Lexesis/automata.h
@@ -13,7 +13,7 @@ namespace lxs {
     typedef unsigned long long Priority;
 
     struct Automaton {
-        State numStates;
+        State numStates = 0;
         std::set<State> accepting;
         std::map<State, Priority> priority;
         std::map<State, std::string> acceptingToken;
diff --git a/src/automata.cpp b/src/automata.cpp
index e79d684..07605d7 100644
--- a/src/automata.cpp
+++ b/src/automata.cpp
@@ -1,8 +1,7 @@
-#include "automata.h"
+#include "Lexesis/automata.h"
 
-#include <cctype>
-#include <algorithm>
-#include <stdexcept>
+#include <climits>
+#include <queue>
 #include <string>
 
 namespace lxs {
@@ -94,4 +93,173 @@ namespace lxs {
         s += "in -> " + std::to_string(e.starting) + "\n}\n";
         return s;
     }
+
+    std::set<State> ENFA::eClose(State) {
+        
+    }
+
+    std::set<State> NFA::eClose(State s) {
+        return {s};
+    }
+
+    namespace { // Utility functions for minimisation
+
+        using Distinguishables = std::map<State, std::set<State> >;
+
+        /**
+         * Return the reversal of a given DFA
+         * This keeps the same accepting states as the original DFA
+         * This uses a random starting state for the reversal.
+         * This does not consider priorities or associated tokens.
+         */
+        NFA reverse(const DFA& d) {
+            NFA rev;
+
+            rev.numStates = d.numStates;
+            rev.accepting = d.accepting;
+            rev.starting = 0;
+
+            for (const auto& stateTransPair : d.delta) {
+                for (const auto& child : stateTransPair.second) {
+                    rev.delta[stateTransPair.first][child.first].insert(child.second);
+                }
+            }
+
+            return rev;
+        }
+
+        void markReachable(const DFA& d, State s, std::set<State> reachable) {
+            if (reachable.count(s) > 0)
+                return;
+            reachable.insert(s);
+            for (const auto& charStatePair : d.delta.find(s)->second)
+                markReachable(d, charStatePair.second, reachable);
+        }
+
+        /**
+         * Remove unreachable nodes from the reversal of d
+         * return a set with the reachable states
+         */
+        std::set<State> removeUnreachable(const DFA& d, NFA& reversed) {
+            std::set<State> reachable;
+            markReachable(d, d.starting, reachable);
+
+            std::vector<State> statesToRemove;
+
+            for (State i = 0; i < d.numStates; i++) {
+                if (reachable.count(i) == 0) {
+                    statesToRemove.push_back(i);
+                }
+            }
+
+            for (State s : statesToRemove) {
+                reversed.accepting.erase(s);
+                reversed.delta.erase(s);
+            }
+
+            return reachable;
+        }
+
+        /**
+         * Compute distinguishable pairs, using the reversal of a DFA
+         */
+        void computeDistinguishable(NFA& rev, Distinguishables& dist) {
+            std::queue<std::pair<State, State> > q;
+
+            for (State a = 0; a < rev.numStates; a++) {
+                for (State b = a + 1; b < rev.numStates; b++) {
+                    if (rev.accepting.count(a) != rev.accepting.count(b)) {
+                        q.push(std::make_pair(a, b));
+                    }
+                }
+            }
+
+            while (!q.empty()) {
+                std::pair<State, State> p = q.front();
+                q.pop();
+                if (dist[p.first].count(p.second) > 0) continue;
+                dist[p.first].insert(p.second);
+                dist[p.second].insert(p.first);
+
+                for (int c = 0; c < 256; c++) {
+                    for (State nextA : rev.delta[p.first][(char)c]) {
+                        for (State nextB : rev.delta[p.second][(char) c]) {
+                            q.push(std::make_pair(nextA, nextB));
+                        }
+                    }
+                }
+            }
+        }
+
+        /**
+         * Do the actual minimisation, using precomputed distinguishable pairs
+         */
+        DFA compress(const DFA& d, std::set<State> reachables, Distinguishables& dist) {
+            DFA min;
+            min.starting = d.starting;
+
+            std::map<State, State> newStates;
+            std::set<State> done;
+
+            State cur = 0;
+            for (State a = 0; a < d.numStates; a++) {
+                if (reachables.count(a) == 0 || done.count(a) > 0) continue;
+
+                Priority prior;
+                std::string acTok;
+                if (d.accepting.count(a) > 0) {
+                    prior = d.priority.find(a)->second;
+                    acTok = d.acceptingToken.find(a)->second;
+                }
+                
+                newStates[a] = cur;
+                done.insert(a);
+                for (State b = a + 1; b < d.numStates; b++) {
+                    if (reachables.count(b) > 0 && dist[a].count(b) == 0) {
+                        done.insert(b);
+                        newStates[b] = cur;
+
+                        if (d.accepting.count(b) > 0) {
+                            Priority bprior = d.priority.find(b)->second;
+                            if (bprior < prior) {
+                                prior = bprior;
+                                acTok = d.acceptingToken.find(b)->second;
+                            }
+                        }
+                    }
+                }
+                
+                if (d.accepting.count(a) > 0) {
+                    min.accepting.insert(a);
+                    min.priority[a] = prior;
+                    min.acceptingToken[a] = acTok;
+                }
+
+                ++min.numStates;
+                ++cur;
+            }
+
+            done.clear();
+            //Fill the delta function of the minimized DFA
+            for (State s = 0; s < d.numStates; s++) {
+                if (done.count(newStates[s]) > 0) continue;
+                done.insert(newStates[s]);
+                for (int i = 0; i < 256; i++) {
+                    min.delta[newStates[s]][(char) i] = newStates[d.delta.find(s)->second.find((char) i)->second];
+                }
+            }
+
+            return min;
+        }
+
+    } //namespace
+
+    DFA minimize(const DFA& d) {
+        NFA reversed = reverse(d);
+        std::set<State> reachable = removeUnreachable(d, reversed);
+        Distinguishables dist;
+        computeDistinguishable(reversed, dist);
+        return compress(d, reachable, dist);
+    }
+
 } //namespace lxs
diff --git a/src/main.cpp b/src/main.cpp
new file mode 100644
index 0000000..257902e
--- /dev/null
+++ b/src/main.cpp
@@ -0,0 +1,13 @@
+#include "Lexesis/automata.h"
+#include "Lexesis/re.h"
+
+#include <iostream>
+
+int main() {
+    lxs::ENFA enfa;
+    std::shared_ptr<lxs::RE> re = lxs::parseRE("[]-a-dA-D]");
+    re->toENFA(enfa, 0);
+    enfa.numStates++;
+    enfa.starting = 0;
+    std::cout << lxs::toDot(enfa) << std::endl;
+}