Tighten up initialization of DisjunctionDISIApproximation (#14082)

1. Add all leads to heap at once via heapfiy operation
2. Very minor tweaks to cost computation loops (avoid multiple iterations)
This commit is contained in:
Greg Miller 2025-01-02 16:39:54 -08:00 committed by GitHub
parent b8ae8a5bb2
commit 8c4b3702f1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 26 additions and 21 deletions

View File

@ -17,10 +17,10 @@
package org.apache.lucene.search; package org.apache.lucene.search;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.Comparator; import java.util.Comparator;
import java.util.List; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
@ -58,47 +58,52 @@ public final class DisjunctionDISIApproximation extends DocIdSetIterator {
// leadCost) <= 1.5, or Σ min(leadCost, cost) <= 1.5 * leadCost. Other clauses are checked // leadCost) <= 1.5, or Σ min(leadCost, cost) <= 1.5 * leadCost. Other clauses are checked
// linearly. // linearly.
List<DisiWrapper> wrappers = new ArrayList<>(subIterators); DisiWrapper[] wrappers = subIterators.toArray(DisiWrapper[]::new);
// Sort by descending cost. // Sort by descending cost.
wrappers.sort(Comparator.<DisiWrapper>comparingLong(w -> w.cost).reversed()); Arrays.sort(wrappers, Comparator.<DisiWrapper>comparingLong(w -> w.cost).reversed());
leadIterators = new DisiPriorityQueue(subIterators.size());
long reorderThreshold = leadCost + (leadCost >> 1); long reorderThreshold = leadCost + (leadCost >> 1);
if (reorderThreshold < 0) { // overflow if (reorderThreshold < 0) { // overflow
reorderThreshold = Long.MAX_VALUE; reorderThreshold = Long.MAX_VALUE;
} }
long cost = 0; // track total cost
// Split `wrappers` into those that will remain out of the PQ, and those that will go in
// (PQ entries at the end). `lastIdx` is the last index of the wrappers that will remain out.
long reorderCost = 0; long reorderCost = 0;
while (wrappers.isEmpty() == false) { int lastIdx = wrappers.length - 1;
DisiWrapper last = wrappers.getLast(); for (; lastIdx >= 0; lastIdx--) {
long inc = Math.min(last.cost, leadCost); long lastCost = wrappers[lastIdx].cost;
long inc = Math.min(lastCost, leadCost);
if (reorderCost + inc < 0 || reorderCost + inc > reorderThreshold) { if (reorderCost + inc < 0 || reorderCost + inc > reorderThreshold) {
break; break;
} }
leadIterators.add(wrappers.removeLast());
reorderCost += inc; reorderCost += inc;
cost += lastCost;
} }
// Make leadIterators not empty. This helps save conditionals in the implementation which are // Make leadIterators not empty. This helps save conditionals in the implementation which are
// rarely tested. // rarely tested.
if (leadIterators.size() == 0) { if (lastIdx == wrappers.length - 1) {
leadIterators.add(wrappers.removeLast()); cost += wrappers[lastIdx].cost;
lastIdx--;
} }
otherIterators = wrappers.toArray(DisiWrapper[]::new); // Build the PQ:
assert lastIdx >= -1 && lastIdx < wrappers.length - 1;
int pqLen = wrappers.length - lastIdx - 1;
leadIterators = new DisiPriorityQueue(pqLen);
leadIterators.addAll(wrappers, lastIdx + 1, pqLen);
long cost = 0; // Build the non-PQ list:
for (DisiWrapper w : leadIterators) { otherIterators = ArrayUtil.copyOfSubArray(wrappers, 0, lastIdx + 1);
cost += w.cost;
}
for (DisiWrapper w : otherIterators) {
cost += w.cost;
}
this.cost = cost;
minOtherDoc = Integer.MAX_VALUE; minOtherDoc = Integer.MAX_VALUE;
for (DisiWrapper w : otherIterators) { for (DisiWrapper w : otherIterators) {
cost += w.cost;
minOtherDoc = Math.min(minOtherDoc, w.doc); minOtherDoc = Math.min(minOtherDoc, w.doc);
} }
this.cost = cost;
leadTop = leadIterators.top(); leadTop = leadIterators.top();
} }