mirror of https://github.com/apache/lucene.git
LUCENE-9125: Optimize Automaton.step() with binary search and introduce Automaton.next().
Closes #1160
This commit is contained in:
parent
3bae63d215
commit
0528621d2f
|
@ -118,6 +118,8 @@ Optimizations
|
|||
|
||||
* LUCENE-9113: Faster merging of SORTED/SORTED_SET doc values. (Adrien Grand)
|
||||
|
||||
* LUCENE-9125: Optimize Automaton.step() with binary search and introduce Automaton.next(). (Bruno Roustant)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -656,22 +656,77 @@ public class Automaton implements Accountable {
|
|||
* @return destination state, -1 if no matching outgoing transition
|
||||
*/
|
||||
public int step(int state, int label) {
|
||||
return next(state, 0, label, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks for the next transition that matches the provided label, assuming determinism.
|
||||
* <p>
|
||||
* This method is similar to {@link #step(int, int)} but is used more efficiently
|
||||
* when iterating over multiple transitions from the same source state. It keeps
|
||||
* the latest reached transition index in {@code transition.transitionUpto} so
|
||||
* the next call to this method can continue from there instead of restarting
|
||||
* from the first transition.
|
||||
*
|
||||
* @param transition The transition to start the lookup from (inclusive, using its
|
||||
* {@link Transition#source} and {@link Transition#transitionUpto}).
|
||||
* It is updated with the matched transition;
|
||||
* or with {@link Transition#dest} = -1 if no match.
|
||||
* @param label The codepoint to look up.
|
||||
* @return The destination state; or -1 if no matching outgoing transition.
|
||||
*/
|
||||
public int next(Transition transition, int label) {
|
||||
return next(transition.source, transition.transitionUpto, label, transition);
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks for the next transition that matches the provided label, assuming determinism.
|
||||
*
|
||||
* @param state The source state.
|
||||
* @param fromTransitionIndex The transition index to start the lookup from (inclusive); negative interpreted as 0.
|
||||
* @param label The codepoint to look up.
|
||||
* @param transition The output transition to update with the matching transition; or null for no update.
|
||||
* @return The destination state; or -1 if no matching outgoing transition.
|
||||
*/
|
||||
private int next(int state, int fromTransitionIndex, int label, Transition transition) {
|
||||
assert state >= 0;
|
||||
assert label >= 0;
|
||||
int trans = states[2*state];
|
||||
int limit = trans + 3*states[2*state+1];
|
||||
// TODO: we could do bin search; transitions are sorted
|
||||
while (trans < limit) {
|
||||
int dest = transitions[trans];
|
||||
int min = transitions[trans+1];
|
||||
int max = transitions[trans+2];
|
||||
if (min <= label && label <= max) {
|
||||
return dest;
|
||||
}
|
||||
trans += 3;
|
||||
}
|
||||
int stateIndex = 2 * state;
|
||||
int firstTransitionIndex = states[stateIndex];
|
||||
int numTransitions = states[stateIndex + 1];
|
||||
|
||||
return -1;
|
||||
// Since transitions are sorted,
|
||||
// binary search the transition for which label is within [minLabel, maxLabel].
|
||||
int low = Math.max(fromTransitionIndex, 0);
|
||||
int high = numTransitions - 1;
|
||||
while (low <= high) {
|
||||
int mid = (low + high) >>> 1;
|
||||
int transitionIndex = firstTransitionIndex + 3 * mid;
|
||||
int minLabel = transitions[transitionIndex + 1];
|
||||
if (minLabel > label) {
|
||||
high = mid - 1;
|
||||
} else {
|
||||
int maxLabel = transitions[transitionIndex + 2];
|
||||
if (maxLabel < label){
|
||||
low = mid + 1;
|
||||
} else {
|
||||
int destState = transitions[transitionIndex];
|
||||
if (transition != null) {
|
||||
transition.dest = destState;
|
||||
transition.min = minLabel;
|
||||
transition.max = maxLabel;
|
||||
transition.transitionUpto = mid;
|
||||
}
|
||||
return destState;
|
||||
}
|
||||
}
|
||||
}
|
||||
int destState = -1;
|
||||
if (transition != null) {
|
||||
transition.dest = destState;
|
||||
transition.transitionUpto = low;
|
||||
}
|
||||
return destState;
|
||||
}
|
||||
|
||||
/** Records new states and transitions and then {@link
|
||||
|
|
|
@ -94,12 +94,15 @@ final public class MinimizationOperations {
|
|||
}
|
||||
}
|
||||
// find initial partition and reverse edges
|
||||
Transition transition = new Transition();
|
||||
for (int q = 0; q < statesLen; q++) {
|
||||
final int j = a.isAccept(q) ? 0 : 1;
|
||||
partition[j].add(q);
|
||||
block[q] = j;
|
||||
transition.source = q;
|
||||
transition.transitionUpto = -1;
|
||||
for (int x = 0; x < sigmaLen; x++) {
|
||||
final ArrayList<Integer>[] r = reverse[a.step(q, sigma[x])];
|
||||
final ArrayList<Integer>[] r = reverse[a.next(transition, sigma[x])];
|
||||
if (r[x] == null) {
|
||||
r[x] = new ArrayList<>();
|
||||
}
|
||||
|
|
|
@ -78,10 +78,13 @@ public abstract class RunAutomaton implements Accountable {
|
|||
accept = new boolean[size];
|
||||
transitions = new int[size * points.length];
|
||||
Arrays.fill(transitions, -1);
|
||||
Transition transition = new Transition();
|
||||
for (int n=0;n<size;n++) {
|
||||
accept[n] = a.isAccept(n);
|
||||
transition.source = n;
|
||||
transition.transitionUpto = -1;
|
||||
for (int c = 0; c < points.length; c++) {
|
||||
int dest = a.step(n, points[c]);
|
||||
int dest = a.next(transition, points[c]);
|
||||
assert dest == -1 || dest < size;
|
||||
transitions[n * points.length + c] = dest;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue