From fe63ccd6f1c81571a7c71597fda9314b657371b5 Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Wed, 2 Oct 2024 13:58:16 -0400 Subject: [PATCH] Expose more parameters in MultiLeafKnnCollector ctor (#13826) There is currently no way to configure two parameters for the multi-leaf collector. For expert extensibility, this commit adds another ctor for advance usage: closes: #13699 --- .../search/knn/MultiLeafKnnCollector.java | 31 +++++++++++++++++-- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/knn/MultiLeafKnnCollector.java b/lucene/core/src/java/org/apache/lucene/search/knn/MultiLeafKnnCollector.java index 1ca979d6794..051cd9ed633 100644 --- a/lucene/core/src/java/org/apache/lucene/search/knn/MultiLeafKnnCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/knn/MultiLeafKnnCollector.java @@ -33,17 +33,17 @@ public final class MultiLeafKnnCollector implements KnnCollector { // greediness of globally non-competitive search: (0,1] private static final float DEFAULT_GREEDINESS = 0.9f; + private static final int DEFAULT_INTERVAL = 0xff; // the global queue of the highest similarities collected so far across all segments private final BlockingFloatHeap globalSimilarityQueue; // the local queue of the highest similarities if we are not competitive globally // the size of this queue is defined by greediness private final FloatHeap nonCompetitiveQueue; - private final float greediness; // the queue of the local similarities to periodically update with the global queue private final FloatHeap updatesQueue; private final float[] updatesScratch; // interval to synchronize the local and global queues, as a number of visited vectors - private final int interval = 0xff; // 255 + private final int interval; private boolean kResultsCollected = false; private float cachedGlobalMinSim = Float.NEGATIVE_INFINITY; private final AbstractKnnCollector subCollector; @@ -58,7 +58,32 @@ public final class MultiLeafKnnCollector implements KnnCollector { */ public MultiLeafKnnCollector( int k, BlockingFloatHeap globalSimilarityQueue, AbstractKnnCollector subCollector) { - this.greediness = DEFAULT_GREEDINESS; + this(k, DEFAULT_GREEDINESS, DEFAULT_INTERVAL, globalSimilarityQueue, subCollector); + } + + /** + * Create a new MultiLeafKnnCollector. + * + * @param k the number of neighbors to collect + * @param greediness the greediness of the global search + * @param interval (by number of collected values) the interval to synchronize the local and + * global queues + * @param globalSimilarityQueue the global queue of the highest similarities collected so far + * @param subCollector the local collector + */ + public MultiLeafKnnCollector( + int k, + float greediness, + int interval, + BlockingFloatHeap globalSimilarityQueue, + AbstractKnnCollector subCollector) { + if (greediness < 0 || greediness > 1) { + throw new IllegalArgumentException("greediness must be in [0,1]"); + } + if (interval <= 0) { + throw new IllegalArgumentException("interval must be positive"); + } + this.interval = interval; this.subCollector = subCollector; this.globalSimilarityQueue = globalSimilarityQueue; this.nonCompetitiveQueue = new FloatHeap(Math.max(1, Math.round((1 - greediness) * k)));