mirror of https://github.com/apache/lucene.git
LUCENE-3094: optimize lev automata construction, don't keep around detached states
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1102875 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
993488ea07
commit
fa308d6ad6
|
@ -143,13 +143,16 @@ public class LevenshteinAutomata {
|
|||
if (dest >= 0)
|
||||
for (int r = 0; r < numRanges; r++)
|
||||
states[k].addTransition(new Transition(rangeLower[r], rangeUpper[r], states[dest]));
|
||||
// reduce the state: this doesn't appear to help anything
|
||||
//states[k].reduce();
|
||||
}
|
||||
|
||||
Automaton a = new Automaton(states[0]);
|
||||
a.setDeterministic(true);
|
||||
a.setNumberedStates(states);
|
||||
// we create some useless unconnected states, and its a net-win overall to remove these,
|
||||
// as well as to combine any adjacent transitions (it makes later algorithms more efficient).
|
||||
// so, while we could set our numberedStates here, its actually best not to, and instead to
|
||||
// force a traversal in reduce, pruning the unconnected states while we combine adjacent transitions.
|
||||
//a.setNumberedStates(states);
|
||||
a.reduce();
|
||||
// we need not trim transitions to dead states, as they are not created.
|
||||
//a.restoreInvariant();
|
||||
return a;
|
||||
|
|
|
@ -397,4 +397,15 @@ public class AutomatonTestUtil {
|
|||
path.remove(s);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks that an automaton has no detached states that are unreachable
|
||||
* from the initial state.
|
||||
*/
|
||||
public static void assertNoDetachedStates(Automaton a) {
|
||||
int numStates = a.getNumberOfStates();
|
||||
a.clearNumberedStates(); // force recomputation of cached numbered states
|
||||
assert numStates == a.getNumberOfStates() : "automaton has " + (numStates - a.getNumberOfStates()) + " detached states";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,6 +39,11 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
|
|||
assertCharVectors(2);
|
||||
}
|
||||
|
||||
// LUCENE-3094
|
||||
public void testNoWastedStates() throws Exception {
|
||||
AutomatonTestUtil.assertNoDetachedStates(new LevenshteinAutomata("abc").toAutomaton(1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests all possible characteristic vectors for some n
|
||||
* This exhaustively tests the parametric transitions tables.
|
||||
|
@ -66,6 +71,7 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
|
|||
assertNotNull(automata[n]);
|
||||
assertTrue(automata[n].isDeterministic());
|
||||
assertTrue(SpecialOperations.isFinite(automata[n]));
|
||||
AutomatonTestUtil.assertNoDetachedStates(automata[n]);
|
||||
// check that the dfa for n-1 accepts a subset of the dfa for n
|
||||
if (n > 0) {
|
||||
assertTrue(automata[n-1].subsetOf(automata[n]));
|
||||
|
|
Loading…
Reference in New Issue