mirror of https://github.com/apache/lucene.git
Tighten up initialization of DisjunctionDISIApproximation (#14082)
1. Add all leads to heap at once via heapfiy operation 2. Very minor tweaks to cost computation loops (avoid multiple iterations)
This commit is contained in:
parent
b8ae8a5bb2
commit
8c4b3702f1
|
@ -17,10 +17,10 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
|
@ -58,47 +58,52 @@ public final class DisjunctionDISIApproximation extends DocIdSetIterator {
|
|||
// leadCost) <= 1.5, or Σ min(leadCost, cost) <= 1.5 * leadCost. Other clauses are checked
|
||||
// linearly.
|
||||
|
||||
List<DisiWrapper> wrappers = new ArrayList<>(subIterators);
|
||||
DisiWrapper[] wrappers = subIterators.toArray(DisiWrapper[]::new);
|
||||
// Sort by descending cost.
|
||||
wrappers.sort(Comparator.<DisiWrapper>comparingLong(w -> w.cost).reversed());
|
||||
|
||||
leadIterators = new DisiPriorityQueue(subIterators.size());
|
||||
Arrays.sort(wrappers, Comparator.<DisiWrapper>comparingLong(w -> w.cost).reversed());
|
||||
|
||||
long reorderThreshold = leadCost + (leadCost >> 1);
|
||||
if (reorderThreshold < 0) { // overflow
|
||||
reorderThreshold = Long.MAX_VALUE;
|
||||
}
|
||||
|
||||
long cost = 0; // track total cost
|
||||
// Split `wrappers` into those that will remain out of the PQ, and those that will go in
|
||||
// (PQ entries at the end). `lastIdx` is the last index of the wrappers that will remain out.
|
||||
long reorderCost = 0;
|
||||
while (wrappers.isEmpty() == false) {
|
||||
DisiWrapper last = wrappers.getLast();
|
||||
long inc = Math.min(last.cost, leadCost);
|
||||
int lastIdx = wrappers.length - 1;
|
||||
for (; lastIdx >= 0; lastIdx--) {
|
||||
long lastCost = wrappers[lastIdx].cost;
|
||||
long inc = Math.min(lastCost, leadCost);
|
||||
if (reorderCost + inc < 0 || reorderCost + inc > reorderThreshold) {
|
||||
break;
|
||||
}
|
||||
leadIterators.add(wrappers.removeLast());
|
||||
reorderCost += inc;
|
||||
cost += lastCost;
|
||||
}
|
||||
|
||||
// Make leadIterators not empty. This helps save conditionals in the implementation which are
|
||||
// rarely tested.
|
||||
if (leadIterators.size() == 0) {
|
||||
leadIterators.add(wrappers.removeLast());
|
||||
if (lastIdx == wrappers.length - 1) {
|
||||
cost += wrappers[lastIdx].cost;
|
||||
lastIdx--;
|
||||
}
|
||||
|
||||
otherIterators = wrappers.toArray(DisiWrapper[]::new);
|
||||
// Build the PQ:
|
||||
assert lastIdx >= -1 && lastIdx < wrappers.length - 1;
|
||||
int pqLen = wrappers.length - lastIdx - 1;
|
||||
leadIterators = new DisiPriorityQueue(pqLen);
|
||||
leadIterators.addAll(wrappers, lastIdx + 1, pqLen);
|
||||
|
||||
long cost = 0;
|
||||
for (DisiWrapper w : leadIterators) {
|
||||
cost += w.cost;
|
||||
}
|
||||
for (DisiWrapper w : otherIterators) {
|
||||
cost += w.cost;
|
||||
}
|
||||
this.cost = cost;
|
||||
// Build the non-PQ list:
|
||||
otherIterators = ArrayUtil.copyOfSubArray(wrappers, 0, lastIdx + 1);
|
||||
minOtherDoc = Integer.MAX_VALUE;
|
||||
for (DisiWrapper w : otherIterators) {
|
||||
cost += w.cost;
|
||||
minOtherDoc = Math.min(minOtherDoc, w.doc);
|
||||
}
|
||||
|
||||
this.cost = cost;
|
||||
leadTop = leadIterators.top();
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue