mirror of https://github.com/apache/lucene.git
LUCENE-8428: PriorityQueue takes sentinels via a Supplier as a constructor argument.
This commit is contained in:
parent
3119fbb947
commit
914e264165
|
@ -163,6 +163,10 @@ API Changes:
|
|||
|
||||
* LUCENE-7314: Graduate LatLonPoint and query classes to core (Nick Knize)
|
||||
|
||||
* LUCENE-8428: The way that oal.util.PriorityQueue creates sentinel objects has
|
||||
been changed from a protected method to a java.util.function.Supplier as a
|
||||
constructor argument. (Adrien Grand)
|
||||
|
||||
Bug Fixes:
|
||||
|
||||
* LUCENE-8380: UTF8TaxonomyWriterCache inconsistency. (Ruslan Torobaev, Dawid Weiss)
|
||||
|
|
|
@ -58,20 +58,20 @@ final class HitQueue extends PriorityQueue<ScoreDoc> {
|
|||
* the requested size of this queue.
|
||||
* @param prePopulate
|
||||
* specifies whether to pre-populate the queue with sentinel values.
|
||||
* @see #getSentinelObject()
|
||||
*/
|
||||
HitQueue(int size, boolean prePopulate) {
|
||||
super(size, prePopulate);
|
||||
super(size, () -> {
|
||||
if (prePopulate) {
|
||||
// Always set the doc Id to MAX_VALUE so that it won't be favored by
|
||||
// lessThan. This generally should not happen since if score is not NEG_INF,
|
||||
// TopScoreDocCollector will always add the object to the queue.
|
||||
return new ScoreDoc(Integer.MAX_VALUE, Float.NEGATIVE_INFINITY);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ScoreDoc getSentinelObject() {
|
||||
// Always set the doc Id to MAX_VALUE so that it won't be favored by
|
||||
// lessThan. This generally should not happen since if score is not NEG_INF,
|
||||
// TopScoreDocCollector will always add the object to the queue.
|
||||
return new ScoreDoc(Integer.MAX_VALUE, Float.NEGATIVE_INFINITY);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected final boolean lessThan(ScoreDoc hitA, ScoreDoc hitB) {
|
||||
if (hitA.score == hitB.score)
|
||||
|
|
|
@ -21,7 +21,7 @@ import org.apache.lucene.util.PriorityQueue;
|
|||
|
||||
class SpanPositionQueue extends PriorityQueue<Spans> {
|
||||
SpanPositionQueue(int maxSize) {
|
||||
super(maxSize, false); // do not prepopulate
|
||||
super(maxSize); // do not prepopulate
|
||||
}
|
||||
|
||||
protected boolean lessThan(Spans s1, Spans s2) {
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.util;
|
|||
|
||||
import java.util.Iterator;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -26,10 +27,9 @@ import java.util.NoSuchElementException;
|
|||
* require log(size) time but the remove() cost implemented here is linear.
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE</b>: This class will pre-allocate a full array of length
|
||||
* <code>maxSize+1</code> if instantiated via the
|
||||
* {@link #PriorityQueue(int,boolean)} constructor with <code>prepopulate</code>
|
||||
* set to <code>true</code>.
|
||||
* <b>NOTE</b>: This class pre-allocates an array of length {@code maxSize+1}
|
||||
* and pre-fills it with elements if instantiated via the
|
||||
* {@link #PriorityQueue(int,Supplier)} constructor.
|
||||
*
|
||||
* <b>NOTE</b>: Iteration order is not specified.
|
||||
*
|
||||
|
@ -40,11 +40,47 @@ public abstract class PriorityQueue<T> implements Iterable<T> {
|
|||
private final int maxSize;
|
||||
private final T[] heap;
|
||||
|
||||
/**
|
||||
* Create an empty priority queue of the configured size.
|
||||
*/
|
||||
public PriorityQueue(int maxSize) {
|
||||
this(maxSize, true);
|
||||
this(maxSize, () -> null);
|
||||
}
|
||||
|
||||
public PriorityQueue(int maxSize, boolean prepopulate) {
|
||||
/**
|
||||
* Create a priority queue that is pre-filled with sentinel objects, so that
|
||||
* the code which uses that queue can always assume it's full and only change
|
||||
* the top without attempting to insert any new object.<br>
|
||||
*
|
||||
* Those sentinel values should always compare worse than any non-sentinel
|
||||
* value (i.e., {@link #lessThan} should always favor the
|
||||
* non-sentinel values).<br>
|
||||
*
|
||||
* By default, the supplier returns null, which means the queue will not be
|
||||
* filled with sentinel values. Otherwise, the value returned will be used to
|
||||
* pre-populate the queue.<br>
|
||||
*
|
||||
* If this method is extended to return a non-null value, then the following
|
||||
* usage pattern is recommended:
|
||||
*
|
||||
* <pre class="prettyprint">
|
||||
* PriorityQueue<MyObject> pq = new MyQueue<MyObject>(numHits);
|
||||
* // save the 'top' element, which is guaranteed to not be null.
|
||||
* MyObject pqTop = pq.top();
|
||||
* <...>
|
||||
* // now in order to add a new element, which is 'better' than top (after
|
||||
* // you've verified it is better), it is as simple as:
|
||||
* pqTop.change().
|
||||
* pqTop = pq.updateTop();
|
||||
* </pre>
|
||||
*
|
||||
* <b>NOTE:</b> the given supplier will be called {@code maxSize} times,
|
||||
* relying on a new object to be returned and will not check if it's null again.
|
||||
* Therefore you should ensure any call to this method creates a new instance and
|
||||
* behaves consistently, e.g., it cannot return null if it previously returned
|
||||
* non-null and all returned instances must {@link #lessThan compare equal}.
|
||||
*/
|
||||
public PriorityQueue(int maxSize, Supplier<T> sentinelObjectSupplier) {
|
||||
final int heapSize;
|
||||
if (0 == maxSize) {
|
||||
// We allocate 1 extra to avoid if statement in top()
|
||||
|
@ -65,16 +101,14 @@ public abstract class PriorityQueue<T> implements Iterable<T> {
|
|||
this.heap = h;
|
||||
this.maxSize = maxSize;
|
||||
|
||||
if (prepopulate) {
|
||||
// If sentinel objects are supported, populate the queue with them
|
||||
T sentinel = getSentinelObject();
|
||||
if (sentinel != null) {
|
||||
heap[1] = sentinel;
|
||||
for (int i = 2; i < heap.length; i++) {
|
||||
heap[i] = getSentinelObject();
|
||||
}
|
||||
size = maxSize;
|
||||
// If sentinel objects are supported, populate the queue with them
|
||||
T sentinel = sentinelObjectSupplier.get();
|
||||
if (sentinel != null) {
|
||||
heap[1] = sentinel;
|
||||
for (int i = 2; i < heap.length; i++) {
|
||||
heap[i] = sentinelObjectSupplier.get();
|
||||
}
|
||||
size = maxSize;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -84,50 +118,6 @@ public abstract class PriorityQueue<T> implements Iterable<T> {
|
|||
*/
|
||||
protected abstract boolean lessThan(T a, T b);
|
||||
|
||||
/**
|
||||
* This method can be overridden by extending classes to return a sentinel
|
||||
* object which will be used by the {@link PriorityQueue#PriorityQueue(int,boolean)}
|
||||
* constructor to fill the queue, so that the code which uses that queue can always
|
||||
* assume it's full and only change the top without attempting to insert any new
|
||||
* object.<br>
|
||||
*
|
||||
* Those sentinel values should always compare worse than any non-sentinel
|
||||
* value (i.e., {@link #lessThan} should always favor the
|
||||
* non-sentinel values).<br>
|
||||
*
|
||||
* By default, this method returns null, which means the queue will not be
|
||||
* filled with sentinel values. Otherwise, the value returned will be used to
|
||||
* pre-populate the queue. Adds sentinel values to the queue.<br>
|
||||
*
|
||||
* If this method is extended to return a non-null value, then the following
|
||||
* usage pattern is recommended:
|
||||
*
|
||||
* <pre class="prettyprint">
|
||||
* // extends getSentinelObject() to return a non-null value.
|
||||
* PriorityQueue<MyObject> pq = new MyQueue<MyObject>(numHits);
|
||||
* // save the 'top' element, which is guaranteed to not be null.
|
||||
* MyObject pqTop = pq.top();
|
||||
* <...>
|
||||
* // now in order to add a new element, which is 'better' than top (after
|
||||
* // you've verified it is better), it is as simple as:
|
||||
* pqTop.change().
|
||||
* pqTop = pq.updateTop();
|
||||
* </pre>
|
||||
*
|
||||
* <b>NOTE:</b> if this method returns a non-null value, it will be called by
|
||||
* the {@link PriorityQueue#PriorityQueue(int,boolean)} constructor
|
||||
* {@link #size()} times, relying on a new object to be returned and will not
|
||||
* check if it's null again. Therefore you should ensure any call to this
|
||||
* method creates a new instance and behaves consistently, e.g., it cannot
|
||||
* return null if it previously returned non-null.
|
||||
*
|
||||
* @return the sentinel object to use to pre-populate the queue, or null if
|
||||
* sentinel objects are not supported.
|
||||
*/
|
||||
protected T getSentinelObject() {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds an Object to a PriorityQueue in log(size) time. If one tries to add
|
||||
* more objects than maxSize from initialize an
|
||||
|
|
|
@ -38,7 +38,7 @@ public class TopOrdAndFloatQueue extends PriorityQueue<TopOrdAndFloatQueue.OrdAn
|
|||
|
||||
/** Sole constructor. */
|
||||
public TopOrdAndFloatQueue(int topN) {
|
||||
super(topN, false);
|
||||
super(topN);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -38,7 +38,7 @@ public class TopOrdAndIntQueue extends PriorityQueue<TopOrdAndIntQueue.OrdAndVal
|
|||
|
||||
/** Sole constructor. */
|
||||
public TopOrdAndIntQueue(int topN) {
|
||||
super(topN, false);
|
||||
super(topN);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -86,7 +86,7 @@ class TermAutomatonScorer extends Scorer {
|
|||
* the same (lowest) docID. */
|
||||
private static class DocIDQueue extends PriorityQueue<EnumAndScorer> {
|
||||
public DocIDQueue(int maxSize) {
|
||||
super(maxSize, false);
|
||||
super(maxSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -99,7 +99,7 @@ class TermAutomatonScorer extends Scorer {
|
|||
* position. */
|
||||
private static class PositionQueue extends PriorityQueue<EnumAndScorer> {
|
||||
public PositionQueue(int maxSize) {
|
||||
super(maxSize, false);
|
||||
super(maxSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
Loading…
Reference in New Issue