Implementation of table filling for minimalisation
This commit is contained in:
parent
2f2eb1ccef
commit
83018bb1f1
|
@ -13,7 +13,7 @@ namespace lxs {
|
|||
typedef unsigned long long Priority;
|
||||
|
||||
struct Automaton {
|
||||
State numStates;
|
||||
State numStates = 0;
|
||||
std::set<State> accepting;
|
||||
std::map<State, Priority> priority;
|
||||
std::map<State, std::string> acceptingToken;
|
||||
|
|
176
src/automata.cpp
176
src/automata.cpp
|
@ -1,8 +1,7 @@
|
|||
#include "automata.h"
|
||||
#include "Lexesis/automata.h"
|
||||
|
||||
#include <cctype>
|
||||
#include <algorithm>
|
||||
#include <stdexcept>
|
||||
#include <climits>
|
||||
#include <queue>
|
||||
#include <string>
|
||||
|
||||
namespace lxs {
|
||||
|
@ -94,4 +93,173 @@ namespace lxs {
|
|||
s += "in -> " + std::to_string(e.starting) + "\n}\n";
|
||||
return s;
|
||||
}
|
||||
|
||||
std::set<State> ENFA::eClose(State) {
|
||||
|
||||
}
|
||||
|
||||
std::set<State> NFA::eClose(State s) {
|
||||
return {s};
|
||||
}
|
||||
|
||||
namespace { // Utility functions for minimisation
|
||||
|
||||
using Distinguishables = std::map<State, std::set<State> >;
|
||||
|
||||
/**
|
||||
* Return the reversal of a given DFA
|
||||
* This keeps the same accepting states as the original DFA
|
||||
* This uses a random starting state for the reversal.
|
||||
* This does not consider priorities or associated tokens.
|
||||
*/
|
||||
NFA reverse(const DFA& d) {
|
||||
NFA rev;
|
||||
|
||||
rev.numStates = d.numStates;
|
||||
rev.accepting = d.accepting;
|
||||
rev.starting = 0;
|
||||
|
||||
for (const auto& stateTransPair : d.delta) {
|
||||
for (const auto& child : stateTransPair.second) {
|
||||
rev.delta[stateTransPair.first][child.first].insert(child.second);
|
||||
}
|
||||
}
|
||||
|
||||
return rev;
|
||||
}
|
||||
|
||||
void markReachable(const DFA& d, State s, std::set<State> reachable) {
|
||||
if (reachable.count(s) > 0)
|
||||
return;
|
||||
reachable.insert(s);
|
||||
for (const auto& charStatePair : d.delta.find(s)->second)
|
||||
markReachable(d, charStatePair.second, reachable);
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove unreachable nodes from the reversal of d
|
||||
* return a set with the reachable states
|
||||
*/
|
||||
std::set<State> removeUnreachable(const DFA& d, NFA& reversed) {
|
||||
std::set<State> reachable;
|
||||
markReachable(d, d.starting, reachable);
|
||||
|
||||
std::vector<State> statesToRemove;
|
||||
|
||||
for (State i = 0; i < d.numStates; i++) {
|
||||
if (reachable.count(i) == 0) {
|
||||
statesToRemove.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
for (State s : statesToRemove) {
|
||||
reversed.accepting.erase(s);
|
||||
reversed.delta.erase(s);
|
||||
}
|
||||
|
||||
return reachable;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute distinguishable pairs, using the reversal of a DFA
|
||||
*/
|
||||
void computeDistinguishable(NFA& rev, Distinguishables& dist) {
|
||||
std::queue<std::pair<State, State> > q;
|
||||
|
||||
for (State a = 0; a < rev.numStates; a++) {
|
||||
for (State b = a + 1; b < rev.numStates; b++) {
|
||||
if (rev.accepting.count(a) != rev.accepting.count(b)) {
|
||||
q.push(std::make_pair(a, b));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while (!q.empty()) {
|
||||
std::pair<State, State> p = q.front();
|
||||
q.pop();
|
||||
if (dist[p.first].count(p.second) > 0) continue;
|
||||
dist[p.first].insert(p.second);
|
||||
dist[p.second].insert(p.first);
|
||||
|
||||
for (int c = 0; c < 256; c++) {
|
||||
for (State nextA : rev.delta[p.first][(char)c]) {
|
||||
for (State nextB : rev.delta[p.second][(char) c]) {
|
||||
q.push(std::make_pair(nextA, nextB));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Do the actual minimisation, using precomputed distinguishable pairs
|
||||
*/
|
||||
DFA compress(const DFA& d, std::set<State> reachables, Distinguishables& dist) {
|
||||
DFA min;
|
||||
min.starting = d.starting;
|
||||
|
||||
std::map<State, State> newStates;
|
||||
std::set<State> done;
|
||||
|
||||
State cur = 0;
|
||||
for (State a = 0; a < d.numStates; a++) {
|
||||
if (reachables.count(a) == 0 || done.count(a) > 0) continue;
|
||||
|
||||
Priority prior;
|
||||
std::string acTok;
|
||||
if (d.accepting.count(a) > 0) {
|
||||
prior = d.priority.find(a)->second;
|
||||
acTok = d.acceptingToken.find(a)->second;
|
||||
}
|
||||
|
||||
newStates[a] = cur;
|
||||
done.insert(a);
|
||||
for (State b = a + 1; b < d.numStates; b++) {
|
||||
if (reachables.count(b) > 0 && dist[a].count(b) == 0) {
|
||||
done.insert(b);
|
||||
newStates[b] = cur;
|
||||
|
||||
if (d.accepting.count(b) > 0) {
|
||||
Priority bprior = d.priority.find(b)->second;
|
||||
if (bprior < prior) {
|
||||
prior = bprior;
|
||||
acTok = d.acceptingToken.find(b)->second;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (d.accepting.count(a) > 0) {
|
||||
min.accepting.insert(a);
|
||||
min.priority[a] = prior;
|
||||
min.acceptingToken[a] = acTok;
|
||||
}
|
||||
|
||||
++min.numStates;
|
||||
++cur;
|
||||
}
|
||||
|
||||
done.clear();
|
||||
//Fill the delta function of the minimized DFA
|
||||
for (State s = 0; s < d.numStates; s++) {
|
||||
if (done.count(newStates[s]) > 0) continue;
|
||||
done.insert(newStates[s]);
|
||||
for (int i = 0; i < 256; i++) {
|
||||
min.delta[newStates[s]][(char) i] = newStates[d.delta.find(s)->second.find((char) i)->second];
|
||||
}
|
||||
}
|
||||
|
||||
return min;
|
||||
}
|
||||
|
||||
} //namespace
|
||||
|
||||
DFA minimize(const DFA& d) {
|
||||
NFA reversed = reverse(d);
|
||||
std::set<State> reachable = removeUnreachable(d, reversed);
|
||||
Distinguishables dist;
|
||||
computeDistinguishable(reversed, dist);
|
||||
return compress(d, reachable, dist);
|
||||
}
|
||||
|
||||
} //namespace lxs
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
#include "Lexesis/automata.h"
|
||||
#include "Lexesis/re.h"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
int main() {
|
||||
lxs::ENFA enfa;
|
||||
std::shared_ptr<lxs::RE> re = lxs::parseRE("[]-a-dA-D]");
|
||||
re->toENFA(enfa, 0);
|
||||
enfa.numStates++;
|
||||
enfa.starting = 0;
|
||||
std::cout << lxs::toDot(enfa) << std::endl;
|
||||
}
|
Loading…
Reference in New Issue