mirror of https://github.com/apache/lucene.git
Tighten up initialization of DisjunctionDISIApproximation
1. Add all leads to heap at once via heapfiy operation 2. Very minor tweaks to cost computation loops (avoid multiple iterations)
This commit is contained in:
parent
a337d14b21
commit
0a9b49af35
|
@ -17,10 +17,10 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
|
@ -57,47 +57,52 @@ public final class DisjunctionDISIApproximation extends DocIdSetIterator {
|
|||
// leadCost) <= 1.5, or Σ min(leadCost, cost) <= 1.5 * leadCost. Other clauses are checked
|
||||
// linearly.
|
||||
|
||||
List<DisiWrapper> wrappers = new ArrayList<>(subIterators);
|
||||
DisiWrapper[] wrappers = subIterators.toArray(DisiWrapper[]::new);
|
||||
// Sort by descending cost.
|
||||
wrappers.sort(Comparator.<DisiWrapper>comparingLong(w -> w.cost).reversed());
|
||||
|
||||
leadIterators = new DisiPriorityQueue(subIterators.size());
|
||||
Arrays.sort(wrappers, Comparator.<DisiWrapper>comparingLong(w -> w.cost).reversed());
|
||||
|
||||
long reorderThreshold = leadCost + (leadCost >> 1);
|
||||
if (reorderThreshold < 0) { // overflow
|
||||
reorderThreshold = Long.MAX_VALUE;
|
||||
}
|
||||
|
||||
long cost = 0; // track total cost
|
||||
// Split `wrappers` into those that will remain out of the PQ, and those that will go in
|
||||
// (PQ entries at the end). `lastIdx` is the last index of the wrappers that will remain out.
|
||||
long reorderCost = 0;
|
||||
while (wrappers.isEmpty() == false) {
|
||||
DisiWrapper last = wrappers.getLast();
|
||||
long inc = Math.min(last.cost, leadCost);
|
||||
int lastIdx = wrappers.length - 1;
|
||||
for (; lastIdx >= 0; lastIdx--) {
|
||||
long lastCost = wrappers[lastIdx].cost;
|
||||
long inc = Math.min(lastCost, leadCost);
|
||||
if (reorderCost + inc < 0 || reorderCost + inc > reorderThreshold) {
|
||||
break;
|
||||
}
|
||||
leadIterators.add(wrappers.removeLast());
|
||||
reorderCost += inc;
|
||||
cost += lastCost;
|
||||
}
|
||||
|
||||
// Make leadIterators not empty. This helps save conditionals in the implementation which are
|
||||
// rarely tested.
|
||||
if (leadIterators.size() == 0) {
|
||||
leadIterators.add(wrappers.removeLast());
|
||||
if (lastIdx == wrappers.length - 1) {
|
||||
cost += wrappers[lastIdx].cost;
|
||||
lastIdx--;
|
||||
}
|
||||
|
||||
otherIterators = wrappers.toArray(DisiWrapper[]::new);
|
||||
// Build the PQ:
|
||||
assert lastIdx >= -1 && lastIdx < wrappers.length - 1;
|
||||
int pqLen = wrappers.length - lastIdx - 1;
|
||||
leadIterators = new DisiPriorityQueue(pqLen);
|
||||
leadIterators.addAll(wrappers, lastIdx + 1, pqLen);
|
||||
|
||||
long cost = 0;
|
||||
for (DisiWrapper w : leadIterators) {
|
||||
cost += w.cost;
|
||||
}
|
||||
for (DisiWrapper w : otherIterators) {
|
||||
cost += w.cost;
|
||||
}
|
||||
this.cost = cost;
|
||||
// Build the non-PQ list:
|
||||
otherIterators = ArrayUtil.copyOfSubArray(wrappers, 0, lastIdx + 1);
|
||||
minOtherDoc = Integer.MAX_VALUE;
|
||||
for (DisiWrapper w : otherIterators) {
|
||||
cost += w.cost;
|
||||
minOtherDoc = Math.min(minOtherDoc, w.doc);
|
||||
}
|
||||
|
||||
this.cost = cost;
|
||||
leadTop = leadIterators.top();
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue