From 8c4b3702f1df3cfa34aa6751a03d6f4b26883c5f Mon Sep 17 00:00:00 2001 From: Greg Miller Date: Thu, 2 Jan 2025 16:39:54 -0800 Subject: [PATCH] Tighten up initialization of DisjunctionDISIApproximation (#14082) 1. Add all leads to heap at once via heapfiy operation 2. Very minor tweaks to cost computation loops (avoid multiple iterations) --- .../search/DisjunctionDISIApproximation.java | 47 ++++++++++--------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionDISIApproximation.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionDISIApproximation.java index f74111f7ded..6ab57c7b180 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionDISIApproximation.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionDISIApproximation.java @@ -17,10 +17,10 @@ package org.apache.lucene.search; import java.io.IOException; -import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Comparator; -import java.util.List; +import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; @@ -58,47 +58,52 @@ public final class DisjunctionDISIApproximation extends DocIdSetIterator { // leadCost) <= 1.5, or Σ min(leadCost, cost) <= 1.5 * leadCost. Other clauses are checked // linearly. - List wrappers = new ArrayList<>(subIterators); + DisiWrapper[] wrappers = subIterators.toArray(DisiWrapper[]::new); // Sort by descending cost. - wrappers.sort(Comparator.comparingLong(w -> w.cost).reversed()); - - leadIterators = new DisiPriorityQueue(subIterators.size()); + Arrays.sort(wrappers, Comparator.comparingLong(w -> w.cost).reversed()); long reorderThreshold = leadCost + (leadCost >> 1); if (reorderThreshold < 0) { // overflow reorderThreshold = Long.MAX_VALUE; } + + long cost = 0; // track total cost + // Split `wrappers` into those that will remain out of the PQ, and those that will go in + // (PQ entries at the end). `lastIdx` is the last index of the wrappers that will remain out. long reorderCost = 0; - while (wrappers.isEmpty() == false) { - DisiWrapper last = wrappers.getLast(); - long inc = Math.min(last.cost, leadCost); + int lastIdx = wrappers.length - 1; + for (; lastIdx >= 0; lastIdx--) { + long lastCost = wrappers[lastIdx].cost; + long inc = Math.min(lastCost, leadCost); if (reorderCost + inc < 0 || reorderCost + inc > reorderThreshold) { break; } - leadIterators.add(wrappers.removeLast()); reorderCost += inc; + cost += lastCost; } // Make leadIterators not empty. This helps save conditionals in the implementation which are // rarely tested. - if (leadIterators.size() == 0) { - leadIterators.add(wrappers.removeLast()); + if (lastIdx == wrappers.length - 1) { + cost += wrappers[lastIdx].cost; + lastIdx--; } - otherIterators = wrappers.toArray(DisiWrapper[]::new); + // Build the PQ: + assert lastIdx >= -1 && lastIdx < wrappers.length - 1; + int pqLen = wrappers.length - lastIdx - 1; + leadIterators = new DisiPriorityQueue(pqLen); + leadIterators.addAll(wrappers, lastIdx + 1, pqLen); - long cost = 0; - for (DisiWrapper w : leadIterators) { - cost += w.cost; - } - for (DisiWrapper w : otherIterators) { - cost += w.cost; - } - this.cost = cost; + // Build the non-PQ list: + otherIterators = ArrayUtil.copyOfSubArray(wrappers, 0, lastIdx + 1); minOtherDoc = Integer.MAX_VALUE; for (DisiWrapper w : otherIterators) { + cost += w.cost; minOtherDoc = Math.min(minOtherDoc, w.doc); } + + this.cost = cost; leadTop = leadIterators.top(); }