LUCENE-6365: fix buggy Operations.topoSort; add test

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1689079 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2015-07-03 20:03:24 +00:00
parent 39a08cae76
commit ae4723e0b5
3 changed files with 78 additions and 22 deletions

View File

@ -150,7 +150,7 @@ final public class Operations {
/**
* Returns an automaton that accepts the union of the empty string and the
* language of the given automaton.
* language of the given automaton. This may create a dead state.
* <p>
* Complexity: linear in number of states.
*/
@ -1130,7 +1130,6 @@ final public class Operations {
IntsRefBuilder builder = new IntsRefBuilder();
HashSet<Integer> visited = new HashSet<>();
int s = 0;
boolean done;
Transition t = new Transition();
while (true) {
visited.add(s);
@ -1421,32 +1420,49 @@ final public class Operations {
return result;
}
/** Returns the topological sort of all states. Behavior is undefined if this
* automaton has cycles. CPU cost is O(numTransitions). */
/** Returns the topological sort of all states reachable from
* the initial state. Behavior is undefined if this
* automaton has cycles. CPU cost is O(numTransitions),
* and the implementation is recursive so an automaton
* matching long strings may exhaust the java stack. */
public static int[] topoSortStates(Automaton a) {
if (a.getNumStates() == 0) {
return new int[0];
}
int numStates = a.getNumStates();
int[] states = new int[numStates];
final BitSet visited = new BitSet(numStates);
final LinkedList<Integer> worklist = new LinkedList<>();
worklist.add(0);
visited.set(0);
int upto = 0;
states[upto] = 0;
upto++;
Transition t = new Transition();
while (worklist.size() > 0) {
int s = worklist.removeFirst();
int count = a.initTransition(s, t);
for (int i=0;i<count;i++) {
a.getNextTransition(t);
if (!visited.get(t.dest)) {
visited.set(t.dest);
worklist.add(t.dest);
states[upto++] = t.dest;
}
}
int upto = topoSortStatesRecurse(a, visited, states, 0, 0);
if (upto < states.length) {
// There were dead states
int[] newStates = new int[upto];
System.arraycopy(states, 0, newStates, 0, upto);
states = newStates;
}
// Reverse the order:
for(int i=0;i<states.length/2;i++) {
int s = states[i];
states[i] = states[states.length-1-i];
states[states.length-1-i] = s;
}
return states;
}
private static int topoSortStatesRecurse(Automaton a, BitSet visited, int[] states, int upto, int state) {
Transition t = new Transition();
int count = a.initTransition(state, t);
for (int i=0;i<count;i++) {
a.getNextTransition(t);
if (!visited.get(t.dest)) {
visited.set(t.dest);
upto = topoSortStatesRecurse(a, visited, states, upto, t.dest);
}
}
states[upto] = state;
upto++;
return upto;
}
}

View File

@ -783,6 +783,7 @@ public class TestAutomaton extends LuceneTestCase {
if (VERBOSE) {
System.out.println(" op=optional");
}
// NOTE: This can add a dead state:
a = Operations.optional(a);
terms.add(new BytesRef());
}
@ -1032,11 +1033,48 @@ public class TestAutomaton extends LuceneTestCase {
assertSame(terms, a);
assertEquals(AutomatonTestUtil.isDeterministicSlow(a), a.isDeterministic());
if (random().nextInt(10) == 7) {
a = verifyTopoSort(a);
}
}
assertSame(terms, a);
}
/** Runs topo sort, verifies transitions then only "go forwards", and
* builds and returns new automaton with those remapped toposorted states. */
private Automaton verifyTopoSort(Automaton a) {
int[] sorted = Operations.topoSortStates(a);
// This can be < if we removed dead states:
assertTrue(sorted.length <= a.getNumStates());
Automaton a2 = new Automaton();
int[] stateMap = new int[a.getNumStates()];
Arrays.fill(stateMap, -1);
Transition transition = new Transition();
for(int state : sorted) {
int newState = a2.createState();
a2.setAccept(newState, a.isAccept(state));
// Each state should only appear once in the sort:
assertEquals(-1, stateMap[state]);
stateMap[state] = newState;
}
// 2nd pass: add new transitions
for(int state : sorted) {
int count = a.initTransition(state, transition);
for(int i=0;i<count;i++) {
a.getNextTransition(transition);
assert stateMap[transition.dest] > stateMap[state];
a2.addTransition(stateMap[state], stateMap[transition.dest], transition.min, transition.max);
}
}
a2.finishState();
return a2;
}
private void assertSame(Collection<BytesRef> terms, Automaton a) {
try {

View File

@ -1233,6 +1233,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
suggester.build(new InputArrayIterator(new Input[] {
new Input(bigString, 7)}));
fail("did not hit expected exception");
} catch (StackOverflowError soe) {
// OK
} catch (IllegalArgumentException iae) {
// expected
}