Tighten up initialization of DisjunctionDISIApproximation (#14082)

1. Add all leads to heap at once via heapfiy operation
2. Very minor tweaks to cost computation loops (avoid multiple iterations)
This commit is contained in:
Greg Miller 2025-01-02 16:39:54 -08:00 committed by GitHub
parent b8ae8a5bb2
commit 8c4b3702f1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 26 additions and 21 deletions

View File

@ -17,10 +17,10 @@
package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
@ -58,47 +58,52 @@ public final class DisjunctionDISIApproximation extends DocIdSetIterator {
// leadCost) <= 1.5, or Σ min(leadCost, cost) <= 1.5 * leadCost. Other clauses are checked
// linearly.
List<DisiWrapper> wrappers = new ArrayList<>(subIterators);
DisiWrapper[] wrappers = subIterators.toArray(DisiWrapper[]::new);
// Sort by descending cost.
wrappers.sort(Comparator.<DisiWrapper>comparingLong(w -> w.cost).reversed());
leadIterators = new DisiPriorityQueue(subIterators.size());
Arrays.sort(wrappers, Comparator.<DisiWrapper>comparingLong(w -> w.cost).reversed());
long reorderThreshold = leadCost + (leadCost >> 1);
if (reorderThreshold < 0) { // overflow
reorderThreshold = Long.MAX_VALUE;
}
long cost = 0; // track total cost
// Split `wrappers` into those that will remain out of the PQ, and those that will go in
// (PQ entries at the end). `lastIdx` is the last index of the wrappers that will remain out.
long reorderCost = 0;
while (wrappers.isEmpty() == false) {
DisiWrapper last = wrappers.getLast();
long inc = Math.min(last.cost, leadCost);
int lastIdx = wrappers.length - 1;
for (; lastIdx >= 0; lastIdx--) {
long lastCost = wrappers[lastIdx].cost;
long inc = Math.min(lastCost, leadCost);
if (reorderCost + inc < 0 || reorderCost + inc > reorderThreshold) {
break;
}
leadIterators.add(wrappers.removeLast());
reorderCost += inc;
cost += lastCost;
}
// Make leadIterators not empty. This helps save conditionals in the implementation which are
// rarely tested.
if (leadIterators.size() == 0) {
leadIterators.add(wrappers.removeLast());
if (lastIdx == wrappers.length - 1) {
cost += wrappers[lastIdx].cost;
lastIdx--;
}
otherIterators = wrappers.toArray(DisiWrapper[]::new);
// Build the PQ:
assert lastIdx >= -1 && lastIdx < wrappers.length - 1;
int pqLen = wrappers.length - lastIdx - 1;
leadIterators = new DisiPriorityQueue(pqLen);
leadIterators.addAll(wrappers, lastIdx + 1, pqLen);
long cost = 0;
for (DisiWrapper w : leadIterators) {
cost += w.cost;
}
for (DisiWrapper w : otherIterators) {
cost += w.cost;
}
this.cost = cost;
// Build the non-PQ list:
otherIterators = ArrayUtil.copyOfSubArray(wrappers, 0, lastIdx + 1);
minOtherDoc = Integer.MAX_VALUE;
for (DisiWrapper w : otherIterators) {
cost += w.cost;
minOtherDoc = Math.min(minOtherDoc, w.doc);
}
this.cost = cost;
leadTop = leadIterators.top();
}