LUCENE-3094: optimize lev automata construction, don't keep around detached states

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1102875 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-05-13 19:20:05 +00:00
parent 993488ea07
commit fa308d6ad6
3 changed files with 23 additions and 3 deletions

View File

@ -143,13 +143,16 @@ public class LevenshteinAutomata {
if (dest >= 0) if (dest >= 0)
for (int r = 0; r < numRanges; r++) for (int r = 0; r < numRanges; r++)
states[k].addTransition(new Transition(rangeLower[r], rangeUpper[r], states[dest])); states[k].addTransition(new Transition(rangeLower[r], rangeUpper[r], states[dest]));
// reduce the state: this doesn't appear to help anything
//states[k].reduce();
} }
Automaton a = new Automaton(states[0]); Automaton a = new Automaton(states[0]);
a.setDeterministic(true); a.setDeterministic(true);
a.setNumberedStates(states); // we create some useless unconnected states, and its a net-win overall to remove these,
// as well as to combine any adjacent transitions (it makes later algorithms more efficient).
// so, while we could set our numberedStates here, its actually best not to, and instead to
// force a traversal in reduce, pruning the unconnected states while we combine adjacent transitions.
//a.setNumberedStates(states);
a.reduce();
// we need not trim transitions to dead states, as they are not created. // we need not trim transitions to dead states, as they are not created.
//a.restoreInvariant(); //a.restoreInvariant();
return a; return a;

View File

@ -397,4 +397,15 @@ public class AutomatonTestUtil {
path.remove(s); path.remove(s);
return true; return true;
} }
/**
* Checks that an automaton has no detached states that are unreachable
* from the initial state.
*/
public static void assertNoDetachedStates(Automaton a) {
int numStates = a.getNumberOfStates();
a.clearNumberedStates(); // force recomputation of cached numbered states
assert numStates == a.getNumberOfStates() : "automaton has " + (numStates - a.getNumberOfStates()) + " detached states";
}
} }

View File

@ -39,6 +39,11 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
assertCharVectors(2); assertCharVectors(2);
} }
// LUCENE-3094
public void testNoWastedStates() throws Exception {
AutomatonTestUtil.assertNoDetachedStates(new LevenshteinAutomata("abc").toAutomaton(1));
}
/** /**
* Tests all possible characteristic vectors for some n * Tests all possible characteristic vectors for some n
* This exhaustively tests the parametric transitions tables. * This exhaustively tests the parametric transitions tables.
@ -66,6 +71,7 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
assertNotNull(automata[n]); assertNotNull(automata[n]);
assertTrue(automata[n].isDeterministic()); assertTrue(automata[n].isDeterministic());
assertTrue(SpecialOperations.isFinite(automata[n])); assertTrue(SpecialOperations.isFinite(automata[n]));
AutomatonTestUtil.assertNoDetachedStates(automata[n]);
// check that the dfa for n-1 accepts a subset of the dfa for n // check that the dfa for n-1 accepts a subset of the dfa for n
if (n > 0) { if (n > 0) {
assertTrue(automata[n-1].subsetOf(automata[n])); assertTrue(automata[n-1].subsetOf(automata[n]));