LUCENE-6365: add Operations.topoSort

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1689046 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2015-07-03 16:33:17 +00:00
parent a4e7ab3796
commit 39a08cae76
4 changed files with 34 additions and 59 deletions

View File

@ -123,6 +123,9 @@ New Features
attributes package that can be used for TokenStreams that solely produce attributes package that can be used for TokenStreams that solely produce
binary terms. (Uwe Schindler) binary terms. (Uwe Schindler)
* LUCENE-6365: Add Operations.topoSort, to run topological sort of the
states in an Automaton (Markus Heiden via Mike McCandless)
API Changes API Changes
* LUCENE-6508: Simplify Lock api, there is now just * LUCENE-6508: Simplify Lock api, there is now just

View File

@ -1420,4 +1420,33 @@ final public class Operations {
result.finishState(); result.finishState();
return result; return result;
} }
/** Returns the topological sort of all states. Behavior is undefined if this
* automaton has cycles. CPU cost is O(numTransitions). */
public static int[] topoSortStates(Automaton a) {
int numStates = a.getNumStates();
int[] states = new int[numStates];
final BitSet visited = new BitSet(numStates);
final LinkedList<Integer> worklist = new LinkedList<>();
worklist.add(0);
visited.set(0);
int upto = 0;
states[upto] = 0;
upto++;
Transition t = new Transition();
while (worklist.size() > 0) {
int s = worklist.removeFirst();
int count = a.initTransition(s, t);
for (int i=0;i<count;i++) {
a.getNextTransition(t);
if (!visited.get(t.dest)) {
visited.set(t.dest);
worklist.add(t.dest);
states[upto++] = t.dest;
}
}
}
return states;
}
} }

View File

@ -23,12 +23,10 @@ import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashSet; import java.util.HashSet;
import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
@ -272,33 +270,6 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
} }
} }
private int[] topoSortStates(Automaton a) {
int numStates = a.getNumStates();
int[] states = new int[numStates];
final BitSet visited = new BitSet(numStates);
final LinkedList<Integer> worklist = new LinkedList<>();
worklist.add(0);
visited.set(0);
int upto = 0;
states[upto] = 0;
upto++;
Transition t = new Transition();
while (worklist.size() > 0) {
int s = worklist.removeFirst();
int count = a.initTransition(s, t);
for (int i=0;i<count;i++) {
a.getNextTransition(t);
if (!visited.get(t.dest)) {
visited.set(t.dest);
worklist.add(t.dest);
states[upto++] = t.dest;
}
}
}
return states;
}
// Replaces SEP with epsilon or remaps them if // Replaces SEP with epsilon or remaps them if
// we were asked to preserve them: // we were asked to preserve them:
private Automaton replaceSep(Automaton a) { private Automaton replaceSep(Automaton a) {
@ -311,7 +282,7 @@ public class AnalyzingSuggester extends Lookup implements Accountable {
// Go in reverse topo sort so we know we only have to // Go in reverse topo sort so we know we only have to
// make one pass: // make one pass:
Transition t = new Transition(); Transition t = new Transition();
int[] topoSortStates = topoSortStates(a); int[] topoSortStates = Operations.topoSortStates(a);
for(int i=0;i<topoSortStates.length;i++) { for(int i=0;i<topoSortStates.length;i++) {
int state = topoSortStates[topoSortStates.length-1-i]; int state = topoSortStates[topoSortStates.length-1-i];
int count = a.initTransition(state, t); int count = a.initTransition(state, t);

View File

@ -18,9 +18,7 @@ package org.apache.lucene.search.suggest.document;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.BitSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.LinkedList;
import java.util.Set; import java.util.Set;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
@ -245,7 +243,7 @@ public final class CompletionTokenStream extends TokenStream {
// Go in reverse topo sort so we know we only have to // Go in reverse topo sort so we know we only have to
// make one pass: // make one pass:
Transition t = new Transition(); Transition t = new Transition();
int[] topoSortStates = topoSortStates(a); int[] topoSortStates = Operations.topoSortStates(a);
for (int i = 0; i < topoSortStates.length; i++) { for (int i = 0; i < topoSortStates.length; i++) {
int state = topoSortStates[topoSortStates.length - 1 - i]; int state = topoSortStates[topoSortStates.length - 1 - i];
int count = a.initTransition(state, t); int count = a.initTransition(state, t);
@ -281,32 +279,6 @@ public final class CompletionTokenStream extends TokenStream {
return result; return result;
} }
private static int[] topoSortStates(Automaton a) {
int numStates = a.getNumStates();
int[] states = new int[numStates];
final BitSet visited = new BitSet(numStates);
final LinkedList<Integer> worklist = new LinkedList<>();
worklist.add(0);
visited.set(0);
int upto = 0;
states[upto] = 0;
upto++;
Transition t = new Transition();
while (worklist.size() > 0) {
int s = worklist.removeFirst();
int count = a.initTransition(s, t);
for (int i=0;i<count;i++) {
a.getNextTransition(t);
if (!visited.get(t.dest)) {
visited.set(t.dest);
worklist.add(t.dest);
states[upto++] = t.dest;
}
}
}
return states;
}
/** /**
* Attribute providing access to the term builder and UTF-16 conversion * Attribute providing access to the term builder and UTF-16 conversion
*/ */