diff --git a/include/Lexesis/automata.h b/include/Lexesis/automata.h index de8bd5d..7699e15 100644 --- a/include/Lexesis/automata.h +++ b/include/Lexesis/automata.h @@ -13,7 +13,7 @@ namespace lxs { typedef unsigned long long Priority; struct Automaton { - State numStates; + State numStates = 0; std::set accepting; std::map priority; std::map acceptingToken; diff --git a/src/automata.cpp b/src/automata.cpp index e79d684..07605d7 100644 --- a/src/automata.cpp +++ b/src/automata.cpp @@ -1,8 +1,7 @@ -#include "automata.h" +#include "Lexesis/automata.h" -#include -#include -#include +#include +#include #include namespace lxs { @@ -94,4 +93,173 @@ namespace lxs { s += "in -> " + std::to_string(e.starting) + "\n}\n"; return s; } + + std::set ENFA::eClose(State) { + + } + + std::set NFA::eClose(State s) { + return {s}; + } + + namespace { // Utility functions for minimisation + + using Distinguishables = std::map >; + + /** + * Return the reversal of a given DFA + * This keeps the same accepting states as the original DFA + * This uses a random starting state for the reversal. + * This does not consider priorities or associated tokens. + */ + NFA reverse(const DFA& d) { + NFA rev; + + rev.numStates = d.numStates; + rev.accepting = d.accepting; + rev.starting = 0; + + for (const auto& stateTransPair : d.delta) { + for (const auto& child : stateTransPair.second) { + rev.delta[stateTransPair.first][child.first].insert(child.second); + } + } + + return rev; + } + + void markReachable(const DFA& d, State s, std::set reachable) { + if (reachable.count(s) > 0) + return; + reachable.insert(s); + for (const auto& charStatePair : d.delta.find(s)->second) + markReachable(d, charStatePair.second, reachable); + } + + /** + * Remove unreachable nodes from the reversal of d + * return a set with the reachable states + */ + std::set removeUnreachable(const DFA& d, NFA& reversed) { + std::set reachable; + markReachable(d, d.starting, reachable); + + std::vector statesToRemove; + + for (State i = 0; i < d.numStates; i++) { + if (reachable.count(i) == 0) { + statesToRemove.push_back(i); + } + } + + for (State s : statesToRemove) { + reversed.accepting.erase(s); + reversed.delta.erase(s); + } + + return reachable; + } + + /** + * Compute distinguishable pairs, using the reversal of a DFA + */ + void computeDistinguishable(NFA& rev, Distinguishables& dist) { + std::queue > q; + + for (State a = 0; a < rev.numStates; a++) { + for (State b = a + 1; b < rev.numStates; b++) { + if (rev.accepting.count(a) != rev.accepting.count(b)) { + q.push(std::make_pair(a, b)); + } + } + } + + while (!q.empty()) { + std::pair p = q.front(); + q.pop(); + if (dist[p.first].count(p.second) > 0) continue; + dist[p.first].insert(p.second); + dist[p.second].insert(p.first); + + for (int c = 0; c < 256; c++) { + for (State nextA : rev.delta[p.first][(char)c]) { + for (State nextB : rev.delta[p.second][(char) c]) { + q.push(std::make_pair(nextA, nextB)); + } + } + } + } + } + + /** + * Do the actual minimisation, using precomputed distinguishable pairs + */ + DFA compress(const DFA& d, std::set reachables, Distinguishables& dist) { + DFA min; + min.starting = d.starting; + + std::map newStates; + std::set done; + + State cur = 0; + for (State a = 0; a < d.numStates; a++) { + if (reachables.count(a) == 0 || done.count(a) > 0) continue; + + Priority prior; + std::string acTok; + if (d.accepting.count(a) > 0) { + prior = d.priority.find(a)->second; + acTok = d.acceptingToken.find(a)->second; + } + + newStates[a] = cur; + done.insert(a); + for (State b = a + 1; b < d.numStates; b++) { + if (reachables.count(b) > 0 && dist[a].count(b) == 0) { + done.insert(b); + newStates[b] = cur; + + if (d.accepting.count(b) > 0) { + Priority bprior = d.priority.find(b)->second; + if (bprior < prior) { + prior = bprior; + acTok = d.acceptingToken.find(b)->second; + } + } + } + } + + if (d.accepting.count(a) > 0) { + min.accepting.insert(a); + min.priority[a] = prior; + min.acceptingToken[a] = acTok; + } + + ++min.numStates; + ++cur; + } + + done.clear(); + //Fill the delta function of the minimized DFA + for (State s = 0; s < d.numStates; s++) { + if (done.count(newStates[s]) > 0) continue; + done.insert(newStates[s]); + for (int i = 0; i < 256; i++) { + min.delta[newStates[s]][(char) i] = newStates[d.delta.find(s)->second.find((char) i)->second]; + } + } + + return min; + } + + } //namespace + + DFA minimize(const DFA& d) { + NFA reversed = reverse(d); + std::set reachable = removeUnreachable(d, reversed); + Distinguishables dist; + computeDistinguishable(reversed, dist); + return compress(d, reachable, dist); + } + } //namespace lxs diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..257902e --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,13 @@ +#include "Lexesis/automata.h" +#include "Lexesis/re.h" + +#include + +int main() { + lxs::ENFA enfa; + std::shared_ptr re = lxs::parseRE("[]-a-dA-D]"); + re->toENFA(enfa, 0); + enfa.numStates++; + enfa.starting = 0; + std::cout << lxs::toDot(enfa) << std::endl; +}