#include "Lexesis/automata.h" #include #include #include #include namespace lxs { std::string toDot(const DFA& d) { std::string s = "digraph {\nrankdir=LR\nin [shape=point style=invis]\n"; for (State state = 0; state < d.numStates; state++) { s += std::to_string(state) + " [label=\"" + std::to_string(state) + "\""; if (state == d.starting) s += " color=yellow"; if (d.accepting.count(state) > 0) s += " color=green shape=doublecircle"; s += "]\n"; } for (const auto& tmp : d.delta) { const auto& from = tmp.first; for (const auto& trans : tmp.second) { s += std::to_string(from) + " -> " + std::to_string(trans.second) + " [label=\"" + trans.first + "\"]\n"; } } s += "in -> " + std::to_string(d.starting) + "\n}\n"; return s; } std::string toDot(const NFA& n) { std::string s = "digraph {\nrankdir=LR\nin [shape=point style=invis]\n"; for (State state = 0; state < n.numStates; state++) { s += std::to_string(state) + " [label=\"" + std::to_string(state) + "\""; if (state == n.starting) s += " color=yellow"; if (n.accepting.count(state) > 0) s += " color=green shape=doublecircle"; s += "]\n"; } for (const auto& tmp : n.delta) { const auto& from = tmp.first; for (const auto& trans : tmp.second) { for (const auto& to : trans.second) s += std::to_string(from) + " -> " + std::to_string(to) + " [label=\"" + trans.first + "\"]\n"; } } s += "in -> " + std::to_string(n.starting) + "\n}\n"; return s; } std::string toDot(const ENFA& e) { std::string s = "digraph {\nrankdir=LR\nin [shape=point style=invis]\n"; for (State state = 0; state < e.numStates; state++) { s += std::to_string(state) + " [label=\"" + std::to_string(state) + "\""; if (state == e.starting) s += " color=yellow"; if (e.accepting.count(state) > 0) s += " color=green shape=doublecircle"; s += "]\n"; } for (const auto& tmp : e.delta) { const auto& from = tmp.first; for (const auto& trans : tmp.second) { for (const auto& to : trans.second) s += std::to_string(from) + " -> " + std::to_string(to) + " [label=\"" + trans.first + "\"]\n"; } } for (const auto& etrans : e.epsilonTransitions) { for (const auto& dest : etrans.second) s += std::to_string(etrans.first) + " -> " + std::to_string(dest) + " [label=\"ε\"]\n"; } s += "in -> " + std::to_string(e.starting) + "\n}\n"; return s; } std::set ENFA::eClose(State s) const { std::set states; std::queue statequeue; statequeue.push(s); states.insert(s); while(!statequeue.empty()) { auto state = statequeue.front(); statequeue.pop(); auto newStates = epsilonTransitions.find(state)->second; for(auto newstate: newStates) { if(states.find(newstate) == states.end()) { states.insert(newstate); statequeue.push(newstate); } } } return states; } std::set NFA::eClose(State s) const { return {s}; } namespace { // Utility functions for minimisation using Distinguishables = std::map >; /** * Return the reversal of a given DFA * This keeps the same accepting states as the original DFA * This uses a random starting state for the reversal. * This does not consider priorities or associated tokens. */ NFA reverse(const DFA& d) { NFA rev; rev.numStates = d.numStates; rev.accepting = d.accepting; rev.starting = 0; for (const auto& stateTransPair : d.delta) { for (const auto& child : stateTransPair.second) { rev.delta[stateTransPair.first][child.first].insert(child.second); } } return rev; } void markReachable(const DFA& d, State s, std::set& reachable) { if (reachable.count(s) > 0) return; reachable.insert(s); for (const auto& charStatePair : d.delta.find(s)->second) markReachable(d, charStatePair.second, reachable); } /** * Remove unreachable nodes from the reversal of d * return a set with the reachable states */ std::set removeUnreachable(const DFA& d, NFA& reversed) { std::set reachable; markReachable(d, d.starting, reachable); std::vector statesToRemove; for (State i = 0; i < d.numStates; i++) { if (reachable.count(i) == 0) { statesToRemove.push_back(i); } } for (State s : statesToRemove) { reversed.accepting.erase(s); reversed.delta.erase(s); } return reachable; } /** * Compute distinguishable pairs, using the reversal of a DFA */ void computeDistinguishable(NFA& rev, Distinguishables& dist) { std::queue > q; for (State a = 0; a < rev.numStates; a++) { for (State b = a + 1; b < rev.numStates; b++) { if (rev.accepting.count(a) != rev.accepting.count(b)) { q.push(std::make_pair(a, b)); } } } while (!q.empty()) { std::pair p = q.front(); q.pop(); if (dist[p.first].count(p.second) > 0) continue; dist[p.first].insert(p.second); dist[p.second].insert(p.first); for (int c = 0; c < 256; c++) { for (State nextA : rev.delta[p.first][(char)c]) { for (State nextB : rev.delta[p.second][(char) c]) { q.push(std::make_pair(nextA, nextB)); } } } } } /** * Do the actual minimisation, using precomputed distinguishable pairs */ DFA compress(const DFA& d, std::set& reachables, Distinguishables& dist) { DFA min; min.starting = d.starting; std::map newStates; std::set done; State cur = 0; for (State a = 0; a < d.numStates; a++) { if (reachables.count(a) == 0 || done.count(a) > 0) continue; Priority prior; std::string acTok; if (d.accepting.count(a) > 0) { prior = d.priority.find(a)->second; acTok = d.acceptingToken.find(a)->second; } newStates[a] = cur; done.insert(a); for (State b = a + 1; b < d.numStates; b++) { if (reachables.count(b) > 0 && dist[a].count(b) == 0) { done.insert(b); newStates[b] = cur; if (d.accepting.count(b) > 0) { Priority bprior = d.priority.find(b)->second; if (bprior < prior) { prior = bprior; acTok = d.acceptingToken.find(b)->second; } } } } if (d.accepting.count(a) > 0) { min.accepting.insert(a); min.priority[a] = prior; min.acceptingToken[a] = acTok; } ++min.numStates; ++cur; } done.clear(); //Fill the delta function of the minimized DFA for (State s = 0; s < d.numStates; s++) { if (done.count(newStates[s]) > 0) continue; done.insert(newStates[s]); for (const auto& p : d.delta.find(s)->second) min.delta[newStates[s]][p.first] = newStates[p.second]; } return min; } } //namespace DFA minimize(const DFA& d) { NFA reversed = reverse(d); std::set reachable = removeUnreachable(d, reversed); Distinguishables dist; computeDistinguishable(reversed, dist); return compress(d, reachable, dist); } namespace { // Utility function for mssc std::set getNextState(std::set oldstate, char symbol, const NFA& e) { std::set states; for(auto &state: oldstate) { auto a = e.delta.find(state); if(a != e.delta.end()) { auto newStates = a->second.find(symbol); if(newStates != a->second.end()) { for(auto &newstate:newStates->second) { auto eclosestates = e.eClose(newstate); for(auto &eclosestate:eclosestates) { states.insert(eclosestate); } } } } } if(states.empty()) states.insert(-1); return states; } } //namespace DFA mssc(const NFA& e) { std::map,std::map > > dfa; std::map > trans; for (int c = 0; c < 256; c++) { trans.insert(std::pair >(c,{(unsigned long long) -1})); } dfa.insert(std::pair,std::map > > ({(unsigned long long) -1},trans)); std::queue > tocheck; tocheck.push(e.eClose(e.starting)); while(!tocheck.empty()) { auto state = tocheck.front(); tocheck.pop(); std::map > trans; for (int c = 0; c < 256; c++) { auto nextstate = getNextState(state,c,e); trans.insert(std::pair > (c,nextstate)); if(dfa.find(nextstate) == dfa.end()) { tocheck.push(nextstate); } } dfa.insert(std::pair, std::map > > (state,trans)); } std::map,State> lookup; State numStates = 0; for(auto &state:dfa) { lookup.insert(std::pair, State> (state.first, numStates++)); } DFA result; result.numStates = numStates; result.starting = lookup.find(e.eClose(e.starting))->second; std::map reversepriority; for(auto &state:dfa) { std::set priorityset; State newstate = lookup.find(state.first)->second; std::map newtransitions; for(auto &item:state.first) { auto accepting = e.priority.find(item); if(accepting != e.priority.end()) { priorityset.insert(accepting->second); } } if(!priorityset.empty()) { result.accepting.insert(newstate); result.priority.insert(std::pair (newstate,*priorityset.begin())); reversepriority.insert(std::pair (*priorityset.begin(),newstate)); } for(auto &tranition:state.second) { newtransitions.insert(std::pair (tranition.first, lookup.find(tranition.second)->second)); } result.delta.insert(std::pair > (newstate,newtransitions)); } for(auto &priority: e.priority) { auto newstate = reversepriority.find(priority.second)->second; result.acceptingToken.insert(std::pair (newstate,e.acceptingToken.find(priority.first)->second)); } return result; } } //namespace lxs