mirror of https://github.com/apache/lucene.git
LUCENE-6365: fix buggy Operations.topoSort; add test
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1689079 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
39a08cae76
commit
ae4723e0b5
|
@ -150,7 +150,7 @@ final public class Operations {
|
|||
|
||||
/**
|
||||
* Returns an automaton that accepts the union of the empty string and the
|
||||
* language of the given automaton.
|
||||
* language of the given automaton. This may create a dead state.
|
||||
* <p>
|
||||
* Complexity: linear in number of states.
|
||||
*/
|
||||
|
@ -1130,7 +1130,6 @@ final public class Operations {
|
|||
IntsRefBuilder builder = new IntsRefBuilder();
|
||||
HashSet<Integer> visited = new HashSet<>();
|
||||
int s = 0;
|
||||
boolean done;
|
||||
Transition t = new Transition();
|
||||
while (true) {
|
||||
visited.add(s);
|
||||
|
@ -1421,32 +1420,49 @@ final public class Operations {
|
|||
return result;
|
||||
}
|
||||
|
||||
/** Returns the topological sort of all states. Behavior is undefined if this
|
||||
* automaton has cycles. CPU cost is O(numTransitions). */
|
||||
/** Returns the topological sort of all states reachable from
|
||||
* the initial state. Behavior is undefined if this
|
||||
* automaton has cycles. CPU cost is O(numTransitions),
|
||||
* and the implementation is recursive so an automaton
|
||||
* matching long strings may exhaust the java stack. */
|
||||
public static int[] topoSortStates(Automaton a) {
|
||||
if (a.getNumStates() == 0) {
|
||||
return new int[0];
|
||||
}
|
||||
int numStates = a.getNumStates();
|
||||
int[] states = new int[numStates];
|
||||
final BitSet visited = new BitSet(numStates);
|
||||
final LinkedList<Integer> worklist = new LinkedList<>();
|
||||
worklist.add(0);
|
||||
visited.set(0);
|
||||
int upto = 0;
|
||||
states[upto] = 0;
|
||||
upto++;
|
||||
Transition t = new Transition();
|
||||
while (worklist.size() > 0) {
|
||||
int s = worklist.removeFirst();
|
||||
int count = a.initTransition(s, t);
|
||||
for (int i=0;i<count;i++) {
|
||||
a.getNextTransition(t);
|
||||
if (!visited.get(t.dest)) {
|
||||
visited.set(t.dest);
|
||||
worklist.add(t.dest);
|
||||
states[upto++] = t.dest;
|
||||
}
|
||||
}
|
||||
int upto = topoSortStatesRecurse(a, visited, states, 0, 0);
|
||||
|
||||
if (upto < states.length) {
|
||||
// There were dead states
|
||||
int[] newStates = new int[upto];
|
||||
System.arraycopy(states, 0, newStates, 0, upto);
|
||||
states = newStates;
|
||||
}
|
||||
|
||||
// Reverse the order:
|
||||
for(int i=0;i<states.length/2;i++) {
|
||||
int s = states[i];
|
||||
states[i] = states[states.length-1-i];
|
||||
states[states.length-1-i] = s;
|
||||
}
|
||||
|
||||
return states;
|
||||
}
|
||||
|
||||
private static int topoSortStatesRecurse(Automaton a, BitSet visited, int[] states, int upto, int state) {
|
||||
Transition t = new Transition();
|
||||
int count = a.initTransition(state, t);
|
||||
for (int i=0;i<count;i++) {
|
||||
a.getNextTransition(t);
|
||||
if (!visited.get(t.dest)) {
|
||||
visited.set(t.dest);
|
||||
upto = topoSortStatesRecurse(a, visited, states, upto, t.dest);
|
||||
}
|
||||
}
|
||||
states[upto] = state;
|
||||
upto++;
|
||||
return upto;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -783,6 +783,7 @@ public class TestAutomaton extends LuceneTestCase {
|
|||
if (VERBOSE) {
|
||||
System.out.println(" op=optional");
|
||||
}
|
||||
// NOTE: This can add a dead state:
|
||||
a = Operations.optional(a);
|
||||
terms.add(new BytesRef());
|
||||
}
|
||||
|
@ -1032,11 +1033,48 @@ public class TestAutomaton extends LuceneTestCase {
|
|||
|
||||
assertSame(terms, a);
|
||||
assertEquals(AutomatonTestUtil.isDeterministicSlow(a), a.isDeterministic());
|
||||
|
||||
if (random().nextInt(10) == 7) {
|
||||
a = verifyTopoSort(a);
|
||||
}
|
||||
}
|
||||
|
||||
assertSame(terms, a);
|
||||
}
|
||||
|
||||
/** Runs topo sort, verifies transitions then only "go forwards", and
|
||||
* builds and returns new automaton with those remapped toposorted states. */
|
||||
private Automaton verifyTopoSort(Automaton a) {
|
||||
int[] sorted = Operations.topoSortStates(a);
|
||||
// This can be < if we removed dead states:
|
||||
assertTrue(sorted.length <= a.getNumStates());
|
||||
Automaton a2 = new Automaton();
|
||||
int[] stateMap = new int[a.getNumStates()];
|
||||
Arrays.fill(stateMap, -1);
|
||||
Transition transition = new Transition();
|
||||
for(int state : sorted) {
|
||||
int newState = a2.createState();
|
||||
a2.setAccept(newState, a.isAccept(state));
|
||||
|
||||
// Each state should only appear once in the sort:
|
||||
assertEquals(-1, stateMap[state]);
|
||||
stateMap[state] = newState;
|
||||
}
|
||||
|
||||
// 2nd pass: add new transitions
|
||||
for(int state : sorted) {
|
||||
int count = a.initTransition(state, transition);
|
||||
for(int i=0;i<count;i++) {
|
||||
a.getNextTransition(transition);
|
||||
assert stateMap[transition.dest] > stateMap[state];
|
||||
a2.addTransition(stateMap[state], stateMap[transition.dest], transition.min, transition.max);
|
||||
}
|
||||
}
|
||||
|
||||
a2.finishState();
|
||||
return a2;
|
||||
}
|
||||
|
||||
private void assertSame(Collection<BytesRef> terms, Automaton a) {
|
||||
|
||||
try {
|
||||
|
|
|
@ -1233,6 +1233,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
suggester.build(new InputArrayIterator(new Input[] {
|
||||
new Input(bigString, 7)}));
|
||||
fail("did not hit expected exception");
|
||||
} catch (StackOverflowError soe) {
|
||||
// OK
|
||||
} catch (IllegalArgumentException iae) {
|
||||
// expected
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue