365 lines
13 KiB
C++
365 lines
13 KiB
C++
#include "Lexesis/automata.h"
|
|
|
|
#include <algorithm>
|
|
#include <climits>
|
|
#include <queue>
|
|
#include <string>
|
|
|
|
namespace lxs {
|
|
std::string toDot(const DFA& d)
|
|
{
|
|
std::string s = "digraph {\nrankdir=LR\nin [shape=point style=invis]\n";
|
|
|
|
for (State state = 0; state < d.numStates; state++)
|
|
{
|
|
s += std::to_string(state) + " [label=\"" + std::to_string(state) + "\"";
|
|
if (state == d.starting)
|
|
s += " color=yellow";
|
|
if (d.accepting.count(state) > 0)
|
|
s += " color=green shape=doublecircle";
|
|
s += "]\n";
|
|
}
|
|
|
|
for (const auto& tmp : d.delta)
|
|
{
|
|
const auto& from = tmp.first;
|
|
for (const auto& trans : tmp.second)
|
|
{
|
|
s += std::to_string(from) + " -> " + std::to_string(trans.second) + " [label=\"" + trans.first + "\"]\n";
|
|
}
|
|
}
|
|
|
|
s += "in -> " + std::to_string(d.starting) + "\n}\n";
|
|
return s;
|
|
}
|
|
|
|
std::string toDot(const NFA& n)
|
|
{
|
|
std::string s = "digraph {\nrankdir=LR\nin [shape=point style=invis]\n";
|
|
|
|
for (State state = 0; state < n.numStates; state++)
|
|
{
|
|
s += std::to_string(state) + " [label=\"" + std::to_string(state) + "\"";
|
|
if (state == n.starting)
|
|
s += " color=yellow";
|
|
if (n.accepting.count(state) > 0)
|
|
s += " color=green shape=doublecircle";
|
|
s += "]\n";
|
|
}
|
|
|
|
for (const auto& tmp : n.delta)
|
|
{
|
|
const auto& from = tmp.first;
|
|
for (const auto& trans : tmp.second)
|
|
{
|
|
for (const auto& to : trans.second)
|
|
s += std::to_string(from) + " -> " + std::to_string(to) + " [label=\"" + trans.first + "\"]\n";
|
|
}
|
|
}
|
|
|
|
s += "in -> " + std::to_string(n.starting) + "\n}\n";
|
|
return s;
|
|
}
|
|
|
|
std::string toDot(const ENFA& e)
|
|
{
|
|
std::string s = "digraph {\nrankdir=LR\nin [shape=point style=invis]\n";
|
|
|
|
for (State state = 0; state < e.numStates; state++)
|
|
{
|
|
s += std::to_string(state) + " [label=\"" + std::to_string(state) + "\"";
|
|
if (state == e.starting)
|
|
s += " color=yellow";
|
|
if (e.accepting.count(state) > 0)
|
|
s += " color=green shape=doublecircle";
|
|
s += "]\n";
|
|
}
|
|
|
|
for (const auto& tmp : e.delta)
|
|
{
|
|
const auto& from = tmp.first;
|
|
for (const auto& trans : tmp.second)
|
|
{
|
|
for (const auto& to : trans.second)
|
|
s += std::to_string(from) + " -> " + std::to_string(to) + " [label=\"" + trans.first + "\"]\n";
|
|
}
|
|
}
|
|
|
|
for (const auto& etrans : e.epsilonTransitions)
|
|
{
|
|
for (const auto& dest : etrans.second)
|
|
s += std::to_string(etrans.first) + " -> " + std::to_string(dest) + " [label=\"ε\"]\n";
|
|
}
|
|
|
|
s += "in -> " + std::to_string(e.starting) + "\n}\n";
|
|
return s;
|
|
}
|
|
|
|
std::set<State> ENFA::eClose(State s) const {
|
|
std::set<State> states;
|
|
std::queue<State> statequeue;
|
|
statequeue.push(s);
|
|
states.insert(s);
|
|
while(!statequeue.empty()) {
|
|
auto state = statequeue.front();
|
|
statequeue.pop();
|
|
auto newStates = epsilonTransitions.find(state)->second;
|
|
for(auto newstate: newStates) {
|
|
if(states.find(newstate) == states.end()) {
|
|
states.insert(newstate);
|
|
statequeue.push(newstate);
|
|
}
|
|
}
|
|
}
|
|
return states;
|
|
}
|
|
|
|
std::set<State> NFA::eClose(State s) const {
|
|
return {s};
|
|
}
|
|
|
|
namespace { // Utility functions for minimisation
|
|
|
|
using Distinguishables = std::map<State, std::set<State> >;
|
|
|
|
/**
|
|
* Return the reversal of a given DFA
|
|
* This keeps the same accepting states as the original DFA
|
|
* This uses a random starting state for the reversal.
|
|
* This does not consider priorities or associated tokens.
|
|
*/
|
|
NFA reverse(const DFA& d) {
|
|
NFA rev;
|
|
|
|
rev.numStates = d.numStates;
|
|
rev.accepting = d.accepting;
|
|
rev.starting = 0;
|
|
|
|
for (const auto& stateTransPair : d.delta) {
|
|
for (const auto& child : stateTransPair.second) {
|
|
rev.delta[stateTransPair.first][child.first].insert(child.second);
|
|
}
|
|
}
|
|
|
|
return rev;
|
|
}
|
|
|
|
void markReachable(const DFA& d, State s, std::set<State>& reachable) {
|
|
if (reachable.count(s) > 0)
|
|
return;
|
|
reachable.insert(s);
|
|
for (const auto& charStatePair : d.delta.find(s)->second)
|
|
markReachable(d, charStatePair.second, reachable);
|
|
}
|
|
|
|
/**
|
|
* Remove unreachable nodes from the reversal of d
|
|
* return a set with the reachable states
|
|
*/
|
|
std::set<State> removeUnreachable(const DFA& d, NFA& reversed) {
|
|
std::set<State> reachable;
|
|
markReachable(d, d.starting, reachable);
|
|
|
|
std::vector<State> statesToRemove;
|
|
|
|
for (State i = 0; i < d.numStates; i++) {
|
|
if (reachable.count(i) == 0) {
|
|
statesToRemove.push_back(i);
|
|
}
|
|
}
|
|
|
|
for (State s : statesToRemove) {
|
|
reversed.accepting.erase(s);
|
|
reversed.delta.erase(s);
|
|
}
|
|
|
|
return reachable;
|
|
}
|
|
|
|
/**
|
|
* Compute distinguishable pairs, using the reversal of a DFA
|
|
*/
|
|
void computeDistinguishable(NFA& rev, Distinguishables& dist) {
|
|
std::queue<std::pair<State, State> > q;
|
|
|
|
for (State a = 0; a < rev.numStates; a++) {
|
|
for (State b = a + 1; b < rev.numStates; b++) {
|
|
if (rev.accepting.count(a) != rev.accepting.count(b)) {
|
|
q.push(std::make_pair(a, b));
|
|
}
|
|
}
|
|
}
|
|
|
|
while (!q.empty()) {
|
|
std::pair<State, State> p = q.front();
|
|
q.pop();
|
|
if (dist[p.first].count(p.second) > 0) continue;
|
|
dist[p.first].insert(p.second);
|
|
dist[p.second].insert(p.first);
|
|
|
|
for (int c = 0; c < 256; c++) {
|
|
for (State nextA : rev.delta[p.first][(char)c]) {
|
|
for (State nextB : rev.delta[p.second][(char) c]) {
|
|
q.push(std::make_pair(nextA, nextB));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Do the actual minimisation, using precomputed distinguishable pairs
|
|
*/
|
|
DFA compress(const DFA& d, std::set<State>& reachables, Distinguishables& dist) {
|
|
DFA min;
|
|
min.starting = d.starting;
|
|
|
|
std::map<State, State> newStates;
|
|
std::set<State> done;
|
|
|
|
State cur = 0;
|
|
for (State a = 0; a < d.numStates; a++) {
|
|
if (reachables.count(a) == 0 || done.count(a) > 0) continue;
|
|
|
|
Priority prior;
|
|
std::string acTok;
|
|
if (d.accepting.count(a) > 0) {
|
|
prior = d.priority.find(a)->second;
|
|
acTok = d.acceptingToken.find(a)->second;
|
|
}
|
|
|
|
newStates[a] = cur;
|
|
done.insert(a);
|
|
for (State b = a + 1; b < d.numStates; b++) {
|
|
if (reachables.count(b) > 0 && dist[a].count(b) == 0) {
|
|
done.insert(b);
|
|
newStates[b] = cur;
|
|
|
|
if (d.accepting.count(b) > 0) {
|
|
Priority bprior = d.priority.find(b)->second;
|
|
if (bprior < prior) {
|
|
prior = bprior;
|
|
acTok = d.acceptingToken.find(b)->second;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (d.accepting.count(a) > 0) {
|
|
min.accepting.insert(a);
|
|
min.priority[a] = prior;
|
|
min.acceptingToken[a] = acTok;
|
|
}
|
|
|
|
++min.numStates;
|
|
++cur;
|
|
}
|
|
|
|
done.clear();
|
|
//Fill the delta function of the minimized DFA
|
|
for (State s = 0; s < d.numStates; s++) {
|
|
if (done.count(newStates[s]) > 0) continue;
|
|
done.insert(newStates[s]);
|
|
for (const auto& p : d.delta.find(s)->second)
|
|
min.delta[newStates[s]][p.first] = newStates[p.second];
|
|
}
|
|
|
|
return min;
|
|
}
|
|
|
|
} //namespace
|
|
|
|
DFA minimize(const DFA& d) {
|
|
NFA reversed = reverse(d);
|
|
std::set<State> reachable = removeUnreachable(d, reversed);
|
|
Distinguishables dist;
|
|
computeDistinguishable(reversed, dist);
|
|
return compress(d, reachable, dist);
|
|
}
|
|
|
|
namespace { // Utility function for mssc
|
|
|
|
std::set<State> getNextState(std::set<State> oldstate, char symbol, const NFA& e) {
|
|
std::set<State> states;
|
|
for(auto &state: oldstate) {
|
|
auto a = e.delta.find(state);
|
|
if(a != e.delta.end()) {
|
|
auto newStates = a->second.find(symbol);
|
|
if(newStates != a->second.end()) {
|
|
for(auto &newstate:newStates->second) {
|
|
auto eclosestates = e.eClose(newstate);
|
|
for(auto &eclosestate:eclosestates) {
|
|
states.insert(eclosestate);
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
if(states.empty()) states.insert(-1);
|
|
return states;
|
|
}
|
|
|
|
} //namespace
|
|
|
|
DFA mssc(const NFA& e) {
|
|
std::map<std::set<State>,std::map<char,std::set<State> > > dfa;
|
|
std::map<char, std::set<State> > trans;
|
|
for (int c = 0; c < 256; c++) {
|
|
trans.insert(std::pair<char,std::set<State> >(c,{(unsigned long long) -1}));
|
|
}
|
|
dfa.insert(std::pair<std::set<State>,std::map<char,std::set<State> > > ({(unsigned long long) -1},trans));
|
|
|
|
std::queue<std::set<State> > tocheck;
|
|
tocheck.push(e.eClose(e.starting));
|
|
while(!tocheck.empty()) {
|
|
auto state = tocheck.front();
|
|
tocheck.pop();
|
|
std::map<char, std::set<State> > trans;
|
|
for (int c = 0; c < 256; c++) {
|
|
auto nextstate = getNextState(state,c,e);
|
|
trans.insert(std::pair<char,std::set<State> > (c,nextstate));
|
|
if(dfa.find(nextstate) == dfa.end()) {
|
|
tocheck.push(nextstate);
|
|
}
|
|
}
|
|
dfa.insert(std::pair<std::set<State>, std::map<char,std::set<State> > > (state,trans));
|
|
}
|
|
std::map<std::set<State>,State> lookup;
|
|
State numStates = 0;
|
|
for(auto &state:dfa) {
|
|
lookup.insert(std::pair<std::set<State>, State> (state.first, numStates++));
|
|
}
|
|
DFA result;
|
|
result.numStates = numStates;
|
|
result.starting = lookup.find(e.eClose(e.starting))->second;
|
|
std::map<Priority, State> reversepriority;
|
|
for(auto &state:dfa) {
|
|
std::set<Priority> priorityset;
|
|
State newstate = lookup.find(state.first)->second;
|
|
std::map<char,State> newtransitions;
|
|
for(auto &item:state.first) {
|
|
auto accepting = e.priority.find(item);
|
|
if(accepting != e.priority.end()) {
|
|
priorityset.insert(accepting->second);
|
|
}
|
|
}
|
|
if(!priorityset.empty()) {
|
|
result.accepting.insert(newstate);
|
|
result.priority.insert(std::pair<State,Priority> (newstate,*priorityset.begin()));
|
|
reversepriority.insert(std::pair<Priority,State> (*priorityset.begin(),newstate));
|
|
}
|
|
for(auto &tranition:state.second) {
|
|
newtransitions.insert(std::pair<char,State> (tranition.first, lookup.find(tranition.second)->second));
|
|
}
|
|
result.delta.insert(std::pair<State,std::map<char,State> > (newstate,newtransitions));
|
|
}
|
|
for(auto &priority: e.priority) {
|
|
auto newstate = reversepriority.find(priority.second)->second;
|
|
result.acceptingToken.insert(std::pair<State,std::string> (newstate,e.acceptingToken.find(priority.first)->second));
|
|
}
|
|
return result;
|
|
}
|
|
|
|
} //namespace lxs
|