mirror of https://github.com/apache/lucene.git
LUCENE-3094: optimize lev automata construction, don't keep around detached states
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1102875 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
993488ea07
commit
fa308d6ad6
|
@ -143,13 +143,16 @@ public class LevenshteinAutomata {
|
||||||
if (dest >= 0)
|
if (dest >= 0)
|
||||||
for (int r = 0; r < numRanges; r++)
|
for (int r = 0; r < numRanges; r++)
|
||||||
states[k].addTransition(new Transition(rangeLower[r], rangeUpper[r], states[dest]));
|
states[k].addTransition(new Transition(rangeLower[r], rangeUpper[r], states[dest]));
|
||||||
// reduce the state: this doesn't appear to help anything
|
|
||||||
//states[k].reduce();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Automaton a = new Automaton(states[0]);
|
Automaton a = new Automaton(states[0]);
|
||||||
a.setDeterministic(true);
|
a.setDeterministic(true);
|
||||||
a.setNumberedStates(states);
|
// we create some useless unconnected states, and its a net-win overall to remove these,
|
||||||
|
// as well as to combine any adjacent transitions (it makes later algorithms more efficient).
|
||||||
|
// so, while we could set our numberedStates here, its actually best not to, and instead to
|
||||||
|
// force a traversal in reduce, pruning the unconnected states while we combine adjacent transitions.
|
||||||
|
//a.setNumberedStates(states);
|
||||||
|
a.reduce();
|
||||||
// we need not trim transitions to dead states, as they are not created.
|
// we need not trim transitions to dead states, as they are not created.
|
||||||
//a.restoreInvariant();
|
//a.restoreInvariant();
|
||||||
return a;
|
return a;
|
||||||
|
|
|
@ -397,4 +397,15 @@ public class AutomatonTestUtil {
|
||||||
path.remove(s);
|
path.remove(s);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks that an automaton has no detached states that are unreachable
|
||||||
|
* from the initial state.
|
||||||
|
*/
|
||||||
|
public static void assertNoDetachedStates(Automaton a) {
|
||||||
|
int numStates = a.getNumberOfStates();
|
||||||
|
a.clearNumberedStates(); // force recomputation of cached numbered states
|
||||||
|
assert numStates == a.getNumberOfStates() : "automaton has " + (numStates - a.getNumberOfStates()) + " detached states";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,6 +39,11 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
|
||||||
assertCharVectors(2);
|
assertCharVectors(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LUCENE-3094
|
||||||
|
public void testNoWastedStates() throws Exception {
|
||||||
|
AutomatonTestUtil.assertNoDetachedStates(new LevenshteinAutomata("abc").toAutomaton(1));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests all possible characteristic vectors for some n
|
* Tests all possible characteristic vectors for some n
|
||||||
* This exhaustively tests the parametric transitions tables.
|
* This exhaustively tests the parametric transitions tables.
|
||||||
|
@ -66,6 +71,7 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
|
||||||
assertNotNull(automata[n]);
|
assertNotNull(automata[n]);
|
||||||
assertTrue(automata[n].isDeterministic());
|
assertTrue(automata[n].isDeterministic());
|
||||||
assertTrue(SpecialOperations.isFinite(automata[n]));
|
assertTrue(SpecialOperations.isFinite(automata[n]));
|
||||||
|
AutomatonTestUtil.assertNoDetachedStates(automata[n]);
|
||||||
// check that the dfa for n-1 accepts a subset of the dfa for n
|
// check that the dfa for n-1 accepts a subset of the dfa for n
|
||||||
if (n > 0) {
|
if (n > 0) {
|
||||||
assertTrue(automata[n-1].subsetOf(automata[n]));
|
assertTrue(automata[n-1].subsetOf(automata[n]));
|
||||||
|
|
Loading…
Reference in New Issue