LUCENE-9125: Optimize Automaton.step() with binary search and introduce Automaton.next().

This commit is contained in:
Bruno Roustant 2020-01-13 11:11:35 +01:00
parent 23fab1b6eb
commit eb84c04052
No known key found for this signature in database
GPG Key ID: CD28DABB95360525
4 changed files with 78 additions and 15 deletions

View File

@ -42,6 +42,8 @@ Optimizations
* LUCENE-9113: Faster merging of SORTED/SORTED_SET doc values. (Adrien Grand) * LUCENE-9113: Faster merging of SORTED/SORTED_SET doc values. (Adrien Grand)
* LUCENE-9125: Optimize Automaton.step() with binary search and introduce Automaton.next(). (Bruno Roustant)
Bug Fixes Bug Fixes
--------------------- ---------------------

View File

@ -656,22 +656,77 @@ public class Automaton implements Accountable {
* @return destination state, -1 if no matching outgoing transition * @return destination state, -1 if no matching outgoing transition
*/ */
public int step(int state, int label) { public int step(int state, int label) {
return next(state, 0, label, null);
}
/**
* Looks for the next transition that matches the provided label, assuming determinism.
* <p>
* This method is similar to {@link #step(int, int)} but is used more efficiently
* when iterating over multiple transitions from the same source state. It keeps
* the latest reached transition index in {@code transition.transitionUpto} so
* the next call to this method can continue from there instead of restarting
* from the first transition.
*
* @param transition The transition to start the lookup from (inclusive, using its
* {@link Transition#source} and {@link Transition#transitionUpto}).
* It is updated with the matched transition;
* or with {@link Transition#dest} = -1 if no match.
* @param label The codepoint to look up.
* @return The destination state; or -1 if no matching outgoing transition.
*/
public int next(Transition transition, int label) {
return next(transition.source, transition.transitionUpto, label, transition);
}
/**
* Looks for the next transition that matches the provided label, assuming determinism.
*
* @param state The source state.
* @param fromTransitionIndex The transition index to start the lookup from (inclusive); negative interpreted as 0.
* @param label The codepoint to look up.
* @param transition The output transition to update with the matching transition; or null for no update.
* @return The destination state; or -1 if no matching outgoing transition.
*/
private int next(int state, int fromTransitionIndex, int label, Transition transition) {
assert state >= 0; assert state >= 0;
assert label >= 0; assert label >= 0;
int trans = states[2*state]; int stateIndex = 2 * state;
int limit = trans + 3*states[2*state+1]; int firstTransitionIndex = states[stateIndex];
// TODO: we could do bin search; transitions are sorted int numTransitions = states[stateIndex + 1];
while (trans < limit) {
int dest = transitions[trans];
int min = transitions[trans+1];
int max = transitions[trans+2];
if (min <= label && label <= max) {
return dest;
}
trans += 3;
}
return -1; // Since transitions are sorted,
// binary search the transition for which label is within [minLabel, maxLabel].
int low = Math.max(fromTransitionIndex, 0);
int high = numTransitions - 1;
while (low <= high) {
int mid = (low + high) >>> 1;
int transitionIndex = firstTransitionIndex + 3 * mid;
int minLabel = transitions[transitionIndex + 1];
if (minLabel > label) {
high = mid - 1;
} else {
int maxLabel = transitions[transitionIndex + 2];
if (maxLabel < label){
low = mid + 1;
} else {
int destState = transitions[transitionIndex];
if (transition != null) {
transition.dest = destState;
transition.min = minLabel;
transition.max = maxLabel;
transition.transitionUpto = mid;
}
return destState;
}
}
}
int destState = -1;
if (transition != null) {
transition.dest = destState;
transition.transitionUpto = low;
}
return destState;
} }
/** Records new states and transitions and then {@link /** Records new states and transitions and then {@link

View File

@ -94,12 +94,15 @@ final public class MinimizationOperations {
} }
} }
// find initial partition and reverse edges // find initial partition and reverse edges
Transition transition = new Transition();
for (int q = 0; q < statesLen; q++) { for (int q = 0; q < statesLen; q++) {
final int j = a.isAccept(q) ? 0 : 1; final int j = a.isAccept(q) ? 0 : 1;
partition[j].add(q); partition[j].add(q);
block[q] = j; block[q] = j;
transition.source = q;
transition.transitionUpto = -1;
for (int x = 0; x < sigmaLen; x++) { for (int x = 0; x < sigmaLen; x++) {
final ArrayList<Integer>[] r = reverse[a.step(q, sigma[x])]; final ArrayList<Integer>[] r = reverse[a.next(transition, sigma[x])];
if (r[x] == null) { if (r[x] == null) {
r[x] = new ArrayList<>(); r[x] = new ArrayList<>();
} }

View File

@ -78,10 +78,13 @@ public abstract class RunAutomaton implements Accountable {
accept = new boolean[size]; accept = new boolean[size];
transitions = new int[size * points.length]; transitions = new int[size * points.length];
Arrays.fill(transitions, -1); Arrays.fill(transitions, -1);
Transition transition = new Transition();
for (int n=0;n<size;n++) { for (int n=0;n<size;n++) {
accept[n] = a.isAccept(n); accept[n] = a.isAccept(n);
transition.source = n;
transition.transitionUpto = -1;
for (int c = 0; c < points.length; c++) { for (int c = 0; c < points.length; c++) {
int dest = a.step(n, points[c]); int dest = a.next(transition, points[c]);
assert dest == -1 || dest < size; assert dest == -1 || dest < size;
transitions[n * points.length + c] = dest; transitions[n * points.length + c] = dest;
} }