Tighten up initialization of DisjunctionDISIApproximation

1. Add all leads to heap at once via heapfiy operation
2. Very minor tweaks to cost computation loops (avoid multiple iterations)
This commit is contained in:
Greg Miller 2024-12-19 13:30:24 -08:00
parent a337d14b21
commit 0a9b49af35
1 changed files with 26 additions and 21 deletions

View File

@ -17,10 +17,10 @@
package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
@ -57,47 +57,52 @@ public final class DisjunctionDISIApproximation extends DocIdSetIterator {
// leadCost) <= 1.5, or Σ min(leadCost, cost) <= 1.5 * leadCost. Other clauses are checked
// linearly.
List<DisiWrapper> wrappers = new ArrayList<>(subIterators);
DisiWrapper[] wrappers = subIterators.toArray(DisiWrapper[]::new);
// Sort by descending cost.
wrappers.sort(Comparator.<DisiWrapper>comparingLong(w -> w.cost).reversed());
leadIterators = new DisiPriorityQueue(subIterators.size());
Arrays.sort(wrappers, Comparator.<DisiWrapper>comparingLong(w -> w.cost).reversed());
long reorderThreshold = leadCost + (leadCost >> 1);
if (reorderThreshold < 0) { // overflow
reorderThreshold = Long.MAX_VALUE;
}
long cost = 0; // track total cost
// Split `wrappers` into those that will remain out of the PQ, and those that will go in
// (PQ entries at the end). `lastIdx` is the last index of the wrappers that will remain out.
long reorderCost = 0;
while (wrappers.isEmpty() == false) {
DisiWrapper last = wrappers.getLast();
long inc = Math.min(last.cost, leadCost);
int lastIdx = wrappers.length - 1;
for (; lastIdx >= 0; lastIdx--) {
long lastCost = wrappers[lastIdx].cost;
long inc = Math.min(lastCost, leadCost);
if (reorderCost + inc < 0 || reorderCost + inc > reorderThreshold) {
break;
}
leadIterators.add(wrappers.removeLast());
reorderCost += inc;
cost += lastCost;
}
// Make leadIterators not empty. This helps save conditionals in the implementation which are
// rarely tested.
if (leadIterators.size() == 0) {
leadIterators.add(wrappers.removeLast());
if (lastIdx == wrappers.length - 1) {
cost += wrappers[lastIdx].cost;
lastIdx--;
}
otherIterators = wrappers.toArray(DisiWrapper[]::new);
// Build the PQ:
assert lastIdx >= -1 && lastIdx < wrappers.length - 1;
int pqLen = wrappers.length - lastIdx - 1;
leadIterators = new DisiPriorityQueue(pqLen);
leadIterators.addAll(wrappers, lastIdx + 1, pqLen);
long cost = 0;
for (DisiWrapper w : leadIterators) {
cost += w.cost;
}
for (DisiWrapper w : otherIterators) {
cost += w.cost;
}
this.cost = cost;
// Build the non-PQ list:
otherIterators = ArrayUtil.copyOfSubArray(wrappers, 0, lastIdx + 1);
minOtherDoc = Integer.MAX_VALUE;
for (DisiWrapper w : otherIterators) {
cost += w.cost;
minOtherDoc = Math.min(minOtherDoc, w.doc);
}
this.cost = cost;
leadTop = leadIterators.top();
}