Tighten up initialization of DisjunctionDISIApproximation

1. Add all leads to heap at once via heapfiy operation
2. Very minor tweaks to cost computation loops (avoid multiple iterations)
This commit is contained in:
Greg Miller 2024-12-19 13:30:24 -08:00
parent a337d14b21
commit 0a9b49af35
1 changed files with 26 additions and 21 deletions

View File

@ -17,10 +17,10 @@
package org.apache.lucene.search; package org.apache.lucene.search;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.Comparator; import java.util.Comparator;
import java.util.List; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
@ -57,47 +57,52 @@ public final class DisjunctionDISIApproximation extends DocIdSetIterator {
// leadCost) <= 1.5, or Σ min(leadCost, cost) <= 1.5 * leadCost. Other clauses are checked // leadCost) <= 1.5, or Σ min(leadCost, cost) <= 1.5 * leadCost. Other clauses are checked
// linearly. // linearly.
List<DisiWrapper> wrappers = new ArrayList<>(subIterators); DisiWrapper[] wrappers = subIterators.toArray(DisiWrapper[]::new);
// Sort by descending cost. // Sort by descending cost.
wrappers.sort(Comparator.<DisiWrapper>comparingLong(w -> w.cost).reversed()); Arrays.sort(wrappers, Comparator.<DisiWrapper>comparingLong(w -> w.cost).reversed());
leadIterators = new DisiPriorityQueue(subIterators.size());
long reorderThreshold = leadCost + (leadCost >> 1); long reorderThreshold = leadCost + (leadCost >> 1);
if (reorderThreshold < 0) { // overflow if (reorderThreshold < 0) { // overflow
reorderThreshold = Long.MAX_VALUE; reorderThreshold = Long.MAX_VALUE;
} }
long cost = 0; // track total cost
// Split `wrappers` into those that will remain out of the PQ, and those that will go in
// (PQ entries at the end). `lastIdx` is the last index of the wrappers that will remain out.
long reorderCost = 0; long reorderCost = 0;
while (wrappers.isEmpty() == false) { int lastIdx = wrappers.length - 1;
DisiWrapper last = wrappers.getLast(); for (; lastIdx >= 0; lastIdx--) {
long inc = Math.min(last.cost, leadCost); long lastCost = wrappers[lastIdx].cost;
long inc = Math.min(lastCost, leadCost);
if (reorderCost + inc < 0 || reorderCost + inc > reorderThreshold) { if (reorderCost + inc < 0 || reorderCost + inc > reorderThreshold) {
break; break;
} }
leadIterators.add(wrappers.removeLast());
reorderCost += inc; reorderCost += inc;
cost += lastCost;
} }
// Make leadIterators not empty. This helps save conditionals in the implementation which are // Make leadIterators not empty. This helps save conditionals in the implementation which are
// rarely tested. // rarely tested.
if (leadIterators.size() == 0) { if (lastIdx == wrappers.length - 1) {
leadIterators.add(wrappers.removeLast()); cost += wrappers[lastIdx].cost;
lastIdx--;
} }
otherIterators = wrappers.toArray(DisiWrapper[]::new); // Build the PQ:
assert lastIdx >= -1 && lastIdx < wrappers.length - 1;
int pqLen = wrappers.length - lastIdx - 1;
leadIterators = new DisiPriorityQueue(pqLen);
leadIterators.addAll(wrappers, lastIdx + 1, pqLen);
long cost = 0; // Build the non-PQ list:
for (DisiWrapper w : leadIterators) { otherIterators = ArrayUtil.copyOfSubArray(wrappers, 0, lastIdx + 1);
cost += w.cost;
}
for (DisiWrapper w : otherIterators) {
cost += w.cost;
}
this.cost = cost;
minOtherDoc = Integer.MAX_VALUE; minOtherDoc = Integer.MAX_VALUE;
for (DisiWrapper w : otherIterators) { for (DisiWrapper w : otherIterators) {
cost += w.cost;
minOtherDoc = Math.min(minOtherDoc, w.doc); minOtherDoc = Math.min(minOtherDoc, w.doc);
} }
this.cost = cost;
leadTop = leadIterators.top(); leadTop = leadIterators.top();
} }