#include "Lexesis/automata.h" #include #include #include #include #include #include namespace lxs { std::string toDot(const DFA& d) { std::string s = "digraph {\nrankdir=LR\nin [shape=point style=invis]\n"; for (State state = 0; state < d.numStates; state++) { std::string label = std::to_string(state); s += std::to_string(state) + " ["; if (state == d.starting) s += "color=yellow "; if (d.accepting.count(state) > 0) { s += "color=green shape=doublecircle "; label += "\\np=" + std::to_string(d.priority.find(state)->second) + "\\nac=" + d.acceptingToken.find(state)->second; } s += "label=\"" + label + "\"]\n"; } for (const auto& tmp : d.delta) { const auto& from = tmp.first; for (const auto& trans : tmp.second) { s += std::to_string(from) + " -> " + std::to_string(trans.second) + " [label=\"" + trans.first + "\"]\n"; } } s += "in -> " + std::to_string(d.starting) + "\n}\n"; return s; } std::string toDot(const NFA& n) { std::string s = "digraph {\nrankdir=LR\nin [shape=point style=invis]\n"; for (State state = 0; state < n.numStates; state++) { std::string label = std::to_string(state); s += std::to_string(state) + " ["; if (state == n.starting) s += "color=yellow "; if (n.accepting.count(state) > 0) { s += "color=green shape=doublecircle "; label += "\\np=" + std::to_string(n.priority.find(state)->second) + "\\nac=" + n.acceptingToken.find(state)->second; } s += "label=\"" + label + "\"]\n"; } for (const auto& tmp : n.delta) { const auto& from = tmp.first; for (const auto& trans : tmp.second) { for (const auto& to : trans.second) s += std::to_string(from) + " -> " + std::to_string(to) + " [label=\"" + trans.first + "\"]\n"; } } s += "in -> " + std::to_string(n.starting) + "\n}\n"; return s; } std::string toDot(const ENFA& e) { std::string s = "digraph {\nrankdir=LR\nin [shape=point style=invis]\n"; for (State state = 0; state < e.numStates; state++) { std::string label = std::to_string(state); s += std::to_string(state) + " ["; if (state == e.starting) s += "color=yellow "; if (e.accepting.count(state) > 0) { s += "color=green shape=doublecircle "; label += "\\np=" + std::to_string(e.priority.find(state)->second) + "\\nac=" + e.acceptingToken.find(state)->second; } s += "label=\"" + label + "\"]\n"; } for (const auto& tmp : e.delta) { const auto& from = tmp.first; for (const auto& trans : tmp.second) { for (const auto& to : trans.second) s += std::to_string(from) + " -> " + std::to_string(to) + " [label=\"" + trans.first + "\"]\n"; } } for (const auto& etrans : e.epsilonTransitions) { for (const auto& dest : etrans.second) s += std::to_string(etrans.first) + " -> " + std::to_string(dest) + " [label=\"ε\"]\n"; } s += "in -> " + std::to_string(e.starting) + "\n}\n"; return s; } std::set ENFA::eClose(State s) const { std::set states; std::queue statequeue; statequeue.push(s); states.insert(s); while(!statequeue.empty()) { auto state = statequeue.front(); statequeue.pop(); const auto newStatesIt = epsilonTransitions.find(state); if (newStatesIt == epsilonTransitions.end()) continue; for(const auto& newstate: newStatesIt->second) { if(states.find(newstate) == states.end()) { states.insert(newstate); statequeue.push(newstate); } } } return states; } std::set NFA::eClose(State s) const { return {s}; } namespace { // Utility functions for minimisation using Distinguishables = std::map >; /** * Return the reversal of a given DFA * This keeps the same accepting states as the original DFA * This uses a random starting state for the reversal. * This does not consider priorities or associated tokens. */ NFA reverse(const DFA& d) { NFA rev; rev.numStates = d.numStates; rev.accepting = d.accepting; rev.acceptingToken = d.acceptingToken; rev.starting = 0; for (const auto& stateTransPair : d.delta) { for (const auto& child : stateTransPair.second) { rev.delta[child.second][child.first].insert(stateTransPair.first); } } return rev; } /** * Put all reachables states in `d`, starting from `s` into `reachable` */ void markReachable(const DFA& d, State s, std::set& reachable) { if (reachable.count(s) > 0) return; reachable.insert(s); for (const auto& charStatePair : d.delta.find(s)->second) markReachable(d, charStatePair.second, reachable); } /** * Remove unreachable nodes from the reversal of d * return a set with the reachable states */ std::set removeUnreachable(const DFA& d, NFA& reversed) { std::set reachable; markReachable(d, d.starting, reachable); std::vector statesToRemove; for (State i = 0; i < d.numStates; i++) { if (reachable.count(i) == 0) { statesToRemove.push_back(i); } } if (reachable.count(deadState) == 0) statesToRemove.push_back(deadState); for (State s : statesToRemove) { reversed.accepting.erase(s); reversed.delta.erase(s); } return reachable; } /** * Compute distinguishable pairs, using the reversal of a DFA */ void computeDistinguishable(NFA& rev, Distinguishables& dist) { std::queue > q; for (State a = 0; a < rev.numStates; a++) { for (State b = a + 1; b < rev.numStates; b++) { if (rev.accepting.count(a) != rev.accepting.count(b)) { q.push(std::make_pair(a, b)); } else if (rev.accepting.count(a) && rev.acceptingToken[a] != rev.acceptingToken[b]) { //Do not merge accepting states that define different tokens, guaranteed problems... q.push(std::make_pair(a, b)); } } if (rev.accepting.count(a) != rev.accepting.count(deadState)) { q.push(std::make_pair(a, deadState)); } } while (!q.empty()) { std::pair p = q.front(); q.pop(); if (dist[p.first].count(p.second) > 0) continue; dist[p.first].insert(p.second); dist[p.second].insert(p.first); for (int c = 0; c < 256; c++) { for (State nextA : rev.delta[p.first][(char)c]) { for (State nextB : rev.delta[p.second][(char) c]) { q.push(std::make_pair(nextA, nextB)); } } } } } /** * Do the actual minimisation, using precomputed distinguishable pairs */ DFA compress(const DFA& d, std::set& reachables, Distinguishables& dist) { DFA min; min.starting = d.starting; std::map newStates; std::set done; State cur = 0; for (State a = 0; a <= d.numStates; a++) { if (a == d.numStates) a = deadState; if (reachables.count(a) == 0 || done.count(a) > 0) { if (a == deadState) break; continue; } newStates[a] = cur; if (a == deadState) newStates[a] = deadState; done.insert(a); if (a != deadState) { for (State b = a + 1; b <= d.numStates; b++) { if (b == d.numStates) b = deadState; if (reachables.count(b) > 0 && dist[a].count(b) == 0) { done.insert(b); newStates[b] = cur; if (b == deadState) newStates[b] = deadState; } if (b == deadState) b = d.numStates; } } if (d.accepting.count(a) > 0) { //Since different accepting tokens should never be merged, there is no need to check the priorities min.accepting.insert(cur); min.priority[cur] = d.priority.find(a)->second; min.acceptingToken[cur] = d.acceptingToken.find(a)->second; } if (a != deadState) { ++min.numStates; ++cur; } if (a == deadState) a = d.numStates; } done.clear(); //Fill the delta function of the minimized DFA for (State s = 0; s < d.numStates; s++) { if (done.count(newStates[s]) > 0) continue; done.insert(newStates[s]); for (const auto& p : d.delta.find(s)->second) min.delta[newStates[s]][p.first] = newStates[p.second]; } return min; } } //namespace DFA minimize(const DFA& d) { NFA reversed = reverse(d); std::set reachable = removeUnreachable(d, reversed); Distinguishables dist; computeDistinguishable(reversed, dist); return compress(d, reachable, dist); } namespace { // Utility function for mssc /** * Get the next state, by taking the union of all next states in the NFA by following any character in `oldstate` */ std::set getNextState(const std::set& oldstate, char symbol, const NFA& e) { std::set states; for(const auto &state: oldstate) { auto a = e.delta.find(state); if(a != e.delta.end()) { auto newStates = a->second.find(symbol); if(newStates != a->second.end()) { for(auto &newstate:newStates->second) { auto eclosestates = e.eClose(newstate); for(auto &eclosestate:eclosestates) { states.insert(eclosestate); } } } } } return states; } } //namespace DFA mssc(const NFA& e) { //Temporary, improper dfa structure std::map, std::map > > dfa; std::map > trans; for (int c = 0; c < 256; c++) { trans[c] = {}; } dfa[{}] = trans; //Lazy evaluation, on a still implicit DFA std::queue > tocheck; tocheck.push(e.eClose(e.starting)); while(!tocheck.empty()) { auto state = std::move(tocheck.front()); tocheck.pop(); std::map > trans; for (int c = 0; c < 256; c++) { auto nextstate = getNextState(state,c,e); if(dfa.find(nextstate) == dfa.end()) { dfa[nextstate] = {}; tocheck.push(nextstate); } trans[c] = std::move(nextstate); } dfa[state] = trans; } //Assign sequential indices for the actual DFA std::map,State> lookup; State numStates = 0; for(auto &state : dfa) { if (state.first.size()) lookup[state.first] = numStates++; else lookup[state.first] = deadState; } //Setup the actual DFA DFA result; result.numStates = numStates; result.starting = lookup.find(e.eClose(e.starting))->second; //Setup the transitions //Merge priorities and acceptingTokens for(auto &state:dfa) { Priority priority = std::numeric_limits::max(); std::string acTok = ""; State newstate = lookup.find(state.first)->second; std::map newtransitions; for(auto &item : state.first) { if(e.accepting.count(item) > 0) { const auto& newPrior = e.priority.find(item)->second; if (newPrior < priority) { priority = newPrior; acTok = e.acceptingToken.find(item)->second; } } } //Insert the found priority if(priority != std::numeric_limits::max()) { result.accepting.insert(newstate); result.priority[newstate] = priority; result.acceptingToken[newstate] = acTok; } //Do transition for(auto &transition : state.second) { newtransitions[transition.first] = lookup[transition.second]; } result.delta[newstate] = std::move(newtransitions); } return result; } ENFA merge(const std::vector& enfas) { ENFA result; unsigned int offset = 1; result.starting = 0; result.numStates = 1; for(const auto &enfa: enfas) { result.numStates += enfa.numStates; for(const auto accepting: enfa.accepting) { result.accepting.insert(accepting + offset); } for(const auto &priority: enfa.priority) { result.priority[priority.first + offset] = priority.second; } for(const auto &acceptingToken: enfa.acceptingToken) { result.acceptingToken[acceptingToken.first + offset] = acceptingToken.second; } for(const auto &transition: enfa.delta) { std::map > temptransition; for(auto &trans: transition.second) { std::set tempset; for(auto state: trans.second) { tempset.insert(state+offset); } temptransition[trans.first] = tempset; } result.delta[transition.first + offset] = temptransition; } for(const auto &epsilonTransition: enfa.epsilonTransitions) { std::set tempset; for(State state: epsilonTransition.second) { tempset.insert(state+offset); } result.epsilonTransitions[epsilonTransition.first + offset] = tempset; } result.epsilonTransitions[0].insert(enfa.starting + offset); offset += enfa.numStates; } return result; } } //namespace lxs