diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index fcc359f7326..db1ff0b1d6c 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -118,6 +118,8 @@ Optimizations * LUCENE-9113: Faster merging of SORTED/SORTED_SET doc values. (Adrien Grand) +* LUCENE-9125: Optimize Automaton.step() with binary search and introduce Automaton.next(). (Bruno Roustant) + Bug Fixes --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java index 6c317ebdff7..a8052dca2de 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java @@ -656,22 +656,77 @@ public class Automaton implements Accountable { * @return destination state, -1 if no matching outgoing transition */ public int step(int state, int label) { + return next(state, 0, label, null); + } + + /** + * Looks for the next transition that matches the provided label, assuming determinism. + *
+ * This method is similar to {@link #step(int, int)} but is used more efficiently
+ * when iterating over multiple transitions from the same source state. It keeps
+ * the latest reached transition index in {@code transition.transitionUpto} so
+ * the next call to this method can continue from there instead of restarting
+ * from the first transition.
+ *
+ * @param transition The transition to start the lookup from (inclusive, using its
+ * {@link Transition#source} and {@link Transition#transitionUpto}).
+ * It is updated with the matched transition;
+ * or with {@link Transition#dest} = -1 if no match.
+ * @param label The codepoint to look up.
+ * @return The destination state; or -1 if no matching outgoing transition.
+ */
+ public int next(Transition transition, int label) {
+ return next(transition.source, transition.transitionUpto, label, transition);
+ }
+
+ /**
+ * Looks for the next transition that matches the provided label, assuming determinism.
+ *
+ * @param state The source state.
+ * @param fromTransitionIndex The transition index to start the lookup from (inclusive); negative interpreted as 0.
+ * @param label The codepoint to look up.
+ * @param transition The output transition to update with the matching transition; or null for no update.
+ * @return The destination state; or -1 if no matching outgoing transition.
+ */
+ private int next(int state, int fromTransitionIndex, int label, Transition transition) {
assert state >= 0;
assert label >= 0;
- int trans = states[2*state];
- int limit = trans + 3*states[2*state+1];
- // TODO: we could do bin search; transitions are sorted
- while (trans < limit) {
- int dest = transitions[trans];
- int min = transitions[trans+1];
- int max = transitions[trans+2];
- if (min <= label && label <= max) {
- return dest;
- }
- trans += 3;
- }
+ int stateIndex = 2 * state;
+ int firstTransitionIndex = states[stateIndex];
+ int numTransitions = states[stateIndex + 1];
- return -1;
+ // Since transitions are sorted,
+ // binary search the transition for which label is within [minLabel, maxLabel].
+ int low = Math.max(fromTransitionIndex, 0);
+ int high = numTransitions - 1;
+ while (low <= high) {
+ int mid = (low + high) >>> 1;
+ int transitionIndex = firstTransitionIndex + 3 * mid;
+ int minLabel = transitions[transitionIndex + 1];
+ if (minLabel > label) {
+ high = mid - 1;
+ } else {
+ int maxLabel = transitions[transitionIndex + 2];
+ if (maxLabel < label){
+ low = mid + 1;
+ } else {
+ int destState = transitions[transitionIndex];
+ if (transition != null) {
+ transition.dest = destState;
+ transition.min = minLabel;
+ transition.max = maxLabel;
+ transition.transitionUpto = mid;
+ }
+ return destState;
+ }
+ }
+ }
+ int destState = -1;
+ if (transition != null) {
+ transition.dest = destState;
+ transition.transitionUpto = low;
+ }
+ return destState;
}
/** Records new states and transitions and then {@link
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java b/lucene/core/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java
index 979f7c52988..1962731b266 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/MinimizationOperations.java
@@ -94,12 +94,15 @@ final public class MinimizationOperations {
}
}
// find initial partition and reverse edges
+ Transition transition = new Transition();
for (int q = 0; q < statesLen; q++) {
final int j = a.isAccept(q) ? 0 : 1;
partition[j].add(q);
block[q] = j;
+ transition.source = q;
+ transition.transitionUpto = -1;
for (int x = 0; x < sigmaLen; x++) {
- final ArrayList