mirror of https://github.com/apache/lucene.git
Tighten up initialization of DisjunctionDISIApproximation (#14082)
1. Add all leads to heap at once via heapfiy operation 2. Very minor tweaks to cost computation loops (avoid multiple iterations)
This commit is contained in:
parent
b8ae8a5bb2
commit
8c4b3702f1
|
@ -17,10 +17,10 @@
|
||||||
package org.apache.lucene.search;
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.List;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
|
|
||||||
|
@ -58,47 +58,52 @@ public final class DisjunctionDISIApproximation extends DocIdSetIterator {
|
||||||
// leadCost) <= 1.5, or Σ min(leadCost, cost) <= 1.5 * leadCost. Other clauses are checked
|
// leadCost) <= 1.5, or Σ min(leadCost, cost) <= 1.5 * leadCost. Other clauses are checked
|
||||||
// linearly.
|
// linearly.
|
||||||
|
|
||||||
List<DisiWrapper> wrappers = new ArrayList<>(subIterators);
|
DisiWrapper[] wrappers = subIterators.toArray(DisiWrapper[]::new);
|
||||||
// Sort by descending cost.
|
// Sort by descending cost.
|
||||||
wrappers.sort(Comparator.<DisiWrapper>comparingLong(w -> w.cost).reversed());
|
Arrays.sort(wrappers, Comparator.<DisiWrapper>comparingLong(w -> w.cost).reversed());
|
||||||
|
|
||||||
leadIterators = new DisiPriorityQueue(subIterators.size());
|
|
||||||
|
|
||||||
long reorderThreshold = leadCost + (leadCost >> 1);
|
long reorderThreshold = leadCost + (leadCost >> 1);
|
||||||
if (reorderThreshold < 0) { // overflow
|
if (reorderThreshold < 0) { // overflow
|
||||||
reorderThreshold = Long.MAX_VALUE;
|
reorderThreshold = Long.MAX_VALUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
long cost = 0; // track total cost
|
||||||
|
// Split `wrappers` into those that will remain out of the PQ, and those that will go in
|
||||||
|
// (PQ entries at the end). `lastIdx` is the last index of the wrappers that will remain out.
|
||||||
long reorderCost = 0;
|
long reorderCost = 0;
|
||||||
while (wrappers.isEmpty() == false) {
|
int lastIdx = wrappers.length - 1;
|
||||||
DisiWrapper last = wrappers.getLast();
|
for (; lastIdx >= 0; lastIdx--) {
|
||||||
long inc = Math.min(last.cost, leadCost);
|
long lastCost = wrappers[lastIdx].cost;
|
||||||
|
long inc = Math.min(lastCost, leadCost);
|
||||||
if (reorderCost + inc < 0 || reorderCost + inc > reorderThreshold) {
|
if (reorderCost + inc < 0 || reorderCost + inc > reorderThreshold) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
leadIterators.add(wrappers.removeLast());
|
|
||||||
reorderCost += inc;
|
reorderCost += inc;
|
||||||
|
cost += lastCost;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make leadIterators not empty. This helps save conditionals in the implementation which are
|
// Make leadIterators not empty. This helps save conditionals in the implementation which are
|
||||||
// rarely tested.
|
// rarely tested.
|
||||||
if (leadIterators.size() == 0) {
|
if (lastIdx == wrappers.length - 1) {
|
||||||
leadIterators.add(wrappers.removeLast());
|
cost += wrappers[lastIdx].cost;
|
||||||
|
lastIdx--;
|
||||||
}
|
}
|
||||||
|
|
||||||
otherIterators = wrappers.toArray(DisiWrapper[]::new);
|
// Build the PQ:
|
||||||
|
assert lastIdx >= -1 && lastIdx < wrappers.length - 1;
|
||||||
|
int pqLen = wrappers.length - lastIdx - 1;
|
||||||
|
leadIterators = new DisiPriorityQueue(pqLen);
|
||||||
|
leadIterators.addAll(wrappers, lastIdx + 1, pqLen);
|
||||||
|
|
||||||
long cost = 0;
|
// Build the non-PQ list:
|
||||||
for (DisiWrapper w : leadIterators) {
|
otherIterators = ArrayUtil.copyOfSubArray(wrappers, 0, lastIdx + 1);
|
||||||
cost += w.cost;
|
|
||||||
}
|
|
||||||
for (DisiWrapper w : otherIterators) {
|
|
||||||
cost += w.cost;
|
|
||||||
}
|
|
||||||
this.cost = cost;
|
|
||||||
minOtherDoc = Integer.MAX_VALUE;
|
minOtherDoc = Integer.MAX_VALUE;
|
||||||
for (DisiWrapper w : otherIterators) {
|
for (DisiWrapper w : otherIterators) {
|
||||||
|
cost += w.cost;
|
||||||
minOtherDoc = Math.min(minOtherDoc, w.doc);
|
minOtherDoc = Math.min(minOtherDoc, w.doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this.cost = cost;
|
||||||
leadTop = leadIterators.top();
|
leadTop = leadIterators.top();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue