mirror of https://github.com/apache/lucene.git
LUCENE-9125: Optimize Automaton.step() with binary search and introduce Automaton.next().
This commit is contained in:
parent
23fab1b6eb
commit
eb84c04052
|
@ -42,6 +42,8 @@ Optimizations
|
||||||
|
|
||||||
* LUCENE-9113: Faster merging of SORTED/SORTED_SET doc values. (Adrien Grand)
|
* LUCENE-9113: Faster merging of SORTED/SORTED_SET doc values. (Adrien Grand)
|
||||||
|
|
||||||
|
* LUCENE-9125: Optimize Automaton.step() with binary search and introduce Automaton.next(). (Bruno Roustant)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -656,22 +656,77 @@ public class Automaton implements Accountable {
|
||||||
* @return destination state, -1 if no matching outgoing transition
|
* @return destination state, -1 if no matching outgoing transition
|
||||||
*/
|
*/
|
||||||
public int step(int state, int label) {
|
public int step(int state, int label) {
|
||||||
|
return next(state, 0, label, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Looks for the next transition that matches the provided label, assuming determinism.
|
||||||
|
* <p>
|
||||||
|
* This method is similar to {@link #step(int, int)} but is used more efficiently
|
||||||
|
* when iterating over multiple transitions from the same source state. It keeps
|
||||||
|
* the latest reached transition index in {@code transition.transitionUpto} so
|
||||||
|
* the next call to this method can continue from there instead of restarting
|
||||||
|
* from the first transition.
|
||||||
|
*
|
||||||
|
* @param transition The transition to start the lookup from (inclusive, using its
|
||||||
|
* {@link Transition#source} and {@link Transition#transitionUpto}).
|
||||||
|
* It is updated with the matched transition;
|
||||||
|
* or with {@link Transition#dest} = -1 if no match.
|
||||||
|
* @param label The codepoint to look up.
|
||||||
|
* @return The destination state; or -1 if no matching outgoing transition.
|
||||||
|
*/
|
||||||
|
public int next(Transition transition, int label) {
|
||||||
|
return next(transition.source, transition.transitionUpto, label, transition);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Looks for the next transition that matches the provided label, assuming determinism.
|
||||||
|
*
|
||||||
|
* @param state The source state.
|
||||||
|
* @param fromTransitionIndex The transition index to start the lookup from (inclusive); negative interpreted as 0.
|
||||||
|
* @param label The codepoint to look up.
|
||||||
|
* @param transition The output transition to update with the matching transition; or null for no update.
|
||||||
|
* @return The destination state; or -1 if no matching outgoing transition.
|
||||||
|
*/
|
||||||
|
private int next(int state, int fromTransitionIndex, int label, Transition transition) {
|
||||||
assert state >= 0;
|
assert state >= 0;
|
||||||
assert label >= 0;
|
assert label >= 0;
|
||||||
int trans = states[2*state];
|
int stateIndex = 2 * state;
|
||||||
int limit = trans + 3*states[2*state+1];
|
int firstTransitionIndex = states[stateIndex];
|
||||||
// TODO: we could do bin search; transitions are sorted
|
int numTransitions = states[stateIndex + 1];
|
||||||
while (trans < limit) {
|
|
||||||
int dest = transitions[trans];
|
|
||||||
int min = transitions[trans+1];
|
|
||||||
int max = transitions[trans+2];
|
|
||||||
if (min <= label && label <= max) {
|
|
||||||
return dest;
|
|
||||||
}
|
|
||||||
trans += 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
return -1;
|
// Since transitions are sorted,
|
||||||
|
// binary search the transition for which label is within [minLabel, maxLabel].
|
||||||
|
int low = Math.max(fromTransitionIndex, 0);
|
||||||
|
int high = numTransitions - 1;
|
||||||
|
while (low <= high) {
|
||||||
|
int mid = (low + high) >>> 1;
|
||||||
|
int transitionIndex = firstTransitionIndex + 3 * mid;
|
||||||
|
int minLabel = transitions[transitionIndex + 1];
|
||||||
|
if (minLabel > label) {
|
||||||
|
high = mid - 1;
|
||||||
|
} else {
|
||||||
|
int maxLabel = transitions[transitionIndex + 2];
|
||||||
|
if (maxLabel < label){
|
||||||
|
low = mid + 1;
|
||||||
|
} else {
|
||||||
|
int destState = transitions[transitionIndex];
|
||||||
|
if (transition != null) {
|
||||||
|
transition.dest = destState;
|
||||||
|
transition.min = minLabel;
|
||||||
|
transition.max = maxLabel;
|
||||||
|
transition.transitionUpto = mid;
|
||||||
|
}
|
||||||
|
return destState;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int destState = -1;
|
||||||
|
if (transition != null) {
|
||||||
|
transition.dest = destState;
|
||||||
|
transition.transitionUpto = low;
|
||||||
|
}
|
||||||
|
return destState;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Records new states and transitions and then {@link
|
/** Records new states and transitions and then {@link
|
||||||
|
|
|
@ -94,12 +94,15 @@ final public class MinimizationOperations {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// find initial partition and reverse edges
|
// find initial partition and reverse edges
|
||||||
|
Transition transition = new Transition();
|
||||||
for (int q = 0; q < statesLen; q++) {
|
for (int q = 0; q < statesLen; q++) {
|
||||||
final int j = a.isAccept(q) ? 0 : 1;
|
final int j = a.isAccept(q) ? 0 : 1;
|
||||||
partition[j].add(q);
|
partition[j].add(q);
|
||||||
block[q] = j;
|
block[q] = j;
|
||||||
|
transition.source = q;
|
||||||
|
transition.transitionUpto = -1;
|
||||||
for (int x = 0; x < sigmaLen; x++) {
|
for (int x = 0; x < sigmaLen; x++) {
|
||||||
final ArrayList<Integer>[] r = reverse[a.step(q, sigma[x])];
|
final ArrayList<Integer>[] r = reverse[a.next(transition, sigma[x])];
|
||||||
if (r[x] == null) {
|
if (r[x] == null) {
|
||||||
r[x] = new ArrayList<>();
|
r[x] = new ArrayList<>();
|
||||||
}
|
}
|
||||||
|
|
|
@ -78,10 +78,13 @@ public abstract class RunAutomaton implements Accountable {
|
||||||
accept = new boolean[size];
|
accept = new boolean[size];
|
||||||
transitions = new int[size * points.length];
|
transitions = new int[size * points.length];
|
||||||
Arrays.fill(transitions, -1);
|
Arrays.fill(transitions, -1);
|
||||||
|
Transition transition = new Transition();
|
||||||
for (int n=0;n<size;n++) {
|
for (int n=0;n<size;n++) {
|
||||||
accept[n] = a.isAccept(n);
|
accept[n] = a.isAccept(n);
|
||||||
|
transition.source = n;
|
||||||
|
transition.transitionUpto = -1;
|
||||||
for (int c = 0; c < points.length; c++) {
|
for (int c = 0; c < points.length; c++) {
|
||||||
int dest = a.step(n, points[c]);
|
int dest = a.next(transition, points[c]);
|
||||||
assert dest == -1 || dest < size;
|
assert dest == -1 || dest < size;
|
||||||
transitions[n * points.length + c] = dest;
|
transitions[n * points.length + c] = dest;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue