LUCENE-8428: PriorityQueue takes sentinels via a Supplier as a constructor argument.

This commit is contained in:
Adrien Grand 2018-07-27 11:06:13 +02:00
parent 3119fbb947
commit 914e264165
7 changed files with 68 additions and 74 deletions

View File

@ -163,6 +163,10 @@ API Changes:
* LUCENE-7314: Graduate LatLonPoint and query classes to core (Nick Knize)
* LUCENE-8428: The way that oal.util.PriorityQueue creates sentinel objects has
been changed from a protected method to a java.util.function.Supplier as a
constructor argument. (Adrien Grand)
Bug Fixes:
* LUCENE-8380: UTF8TaxonomyWriterCache inconsistency. (Ruslan Torobaev, Dawid Weiss)

View File

@ -58,18 +58,18 @@ final class HitQueue extends PriorityQueue<ScoreDoc> {
* the requested size of this queue.
* @param prePopulate
* specifies whether to pre-populate the queue with sentinel values.
* @see #getSentinelObject()
*/
HitQueue(int size, boolean prePopulate) {
super(size, prePopulate);
}
@Override
protected ScoreDoc getSentinelObject() {
super(size, () -> {
if (prePopulate) {
// Always set the doc Id to MAX_VALUE so that it won't be favored by
// lessThan. This generally should not happen since if score is not NEG_INF,
// TopScoreDocCollector will always add the object to the queue.
return new ScoreDoc(Integer.MAX_VALUE, Float.NEGATIVE_INFINITY);
} else {
return null;
}
});
}
@Override

View File

@ -21,7 +21,7 @@ import org.apache.lucene.util.PriorityQueue;
class SpanPositionQueue extends PriorityQueue<Spans> {
SpanPositionQueue(int maxSize) {
super(maxSize, false); // do not prepopulate
super(maxSize); // do not prepopulate
}
protected boolean lessThan(Spans s1, Spans s2) {

View File

@ -18,6 +18,7 @@ package org.apache.lucene.util;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.function.Supplier;
/**
@ -26,10 +27,9 @@ import java.util.NoSuchElementException;
* require log(size) time but the remove() cost implemented here is linear.
*
* <p>
* <b>NOTE</b>: This class will pre-allocate a full array of length
* <code>maxSize+1</code> if instantiated via the
* {@link #PriorityQueue(int,boolean)} constructor with <code>prepopulate</code>
* set to <code>true</code>.
* <b>NOTE</b>: This class pre-allocates an array of length {@code maxSize+1}
* and pre-fills it with elements if instantiated via the
* {@link #PriorityQueue(int,Supplier)} constructor.
*
* <b>NOTE</b>: Iteration order is not specified.
*
@ -40,11 +40,47 @@ public abstract class PriorityQueue<T> implements Iterable<T> {
private final int maxSize;
private final T[] heap;
/**
* Create an empty priority queue of the configured size.
*/
public PriorityQueue(int maxSize) {
this(maxSize, true);
this(maxSize, () -> null);
}
public PriorityQueue(int maxSize, boolean prepopulate) {
/**
* Create a priority queue that is pre-filled with sentinel objects, so that
* the code which uses that queue can always assume it's full and only change
* the top without attempting to insert any new object.<br>
*
* Those sentinel values should always compare worse than any non-sentinel
* value (i.e., {@link #lessThan} should always favor the
* non-sentinel values).<br>
*
* By default, the supplier returns null, which means the queue will not be
* filled with sentinel values. Otherwise, the value returned will be used to
* pre-populate the queue.<br>
*
* If this method is extended to return a non-null value, then the following
* usage pattern is recommended:
*
* <pre class="prettyprint">
* PriorityQueue&lt;MyObject&gt; pq = new MyQueue&lt;MyObject&gt;(numHits);
* // save the 'top' element, which is guaranteed to not be null.
* MyObject pqTop = pq.top();
* &lt;...&gt;
* // now in order to add a new element, which is 'better' than top (after
* // you've verified it is better), it is as simple as:
* pqTop.change().
* pqTop = pq.updateTop();
* </pre>
*
* <b>NOTE:</b> the given supplier will be called {@code maxSize} times,
* relying on a new object to be returned and will not check if it's null again.
* Therefore you should ensure any call to this method creates a new instance and
* behaves consistently, e.g., it cannot return null if it previously returned
* non-null and all returned instances must {@link #lessThan compare equal}.
*/
public PriorityQueue(int maxSize, Supplier<T> sentinelObjectSupplier) {
final int heapSize;
if (0 == maxSize) {
// We allocate 1 extra to avoid if statement in top()
@ -65,18 +101,16 @@ public abstract class PriorityQueue<T> implements Iterable<T> {
this.heap = h;
this.maxSize = maxSize;
if (prepopulate) {
// If sentinel objects are supported, populate the queue with them
T sentinel = getSentinelObject();
T sentinel = sentinelObjectSupplier.get();
if (sentinel != null) {
heap[1] = sentinel;
for (int i = 2; i < heap.length; i++) {
heap[i] = getSentinelObject();
heap[i] = sentinelObjectSupplier.get();
}
size = maxSize;
}
}
}
/** Determines the ordering of objects in this priority queue. Subclasses
* must define this one method.
@ -84,50 +118,6 @@ public abstract class PriorityQueue<T> implements Iterable<T> {
*/
protected abstract boolean lessThan(T a, T b);
/**
* This method can be overridden by extending classes to return a sentinel
* object which will be used by the {@link PriorityQueue#PriorityQueue(int,boolean)}
* constructor to fill the queue, so that the code which uses that queue can always
* assume it's full and only change the top without attempting to insert any new
* object.<br>
*
* Those sentinel values should always compare worse than any non-sentinel
* value (i.e., {@link #lessThan} should always favor the
* non-sentinel values).<br>
*
* By default, this method returns null, which means the queue will not be
* filled with sentinel values. Otherwise, the value returned will be used to
* pre-populate the queue. Adds sentinel values to the queue.<br>
*
* If this method is extended to return a non-null value, then the following
* usage pattern is recommended:
*
* <pre class="prettyprint">
* // extends getSentinelObject() to return a non-null value.
* PriorityQueue&lt;MyObject&gt; pq = new MyQueue&lt;MyObject&gt;(numHits);
* // save the 'top' element, which is guaranteed to not be null.
* MyObject pqTop = pq.top();
* &lt;...&gt;
* // now in order to add a new element, which is 'better' than top (after
* // you've verified it is better), it is as simple as:
* pqTop.change().
* pqTop = pq.updateTop();
* </pre>
*
* <b>NOTE:</b> if this method returns a non-null value, it will be called by
* the {@link PriorityQueue#PriorityQueue(int,boolean)} constructor
* {@link #size()} times, relying on a new object to be returned and will not
* check if it's null again. Therefore you should ensure any call to this
* method creates a new instance and behaves consistently, e.g., it cannot
* return null if it previously returned non-null.
*
* @return the sentinel object to use to pre-populate the queue, or null if
* sentinel objects are not supported.
*/
protected T getSentinelObject() {
return null;
}
/**
* Adds an Object to a PriorityQueue in log(size) time. If one tries to add
* more objects than maxSize from initialize an

View File

@ -38,7 +38,7 @@ public class TopOrdAndFloatQueue extends PriorityQueue<TopOrdAndFloatQueue.OrdAn
/** Sole constructor. */
public TopOrdAndFloatQueue(int topN) {
super(topN, false);
super(topN);
}
@Override

View File

@ -38,7 +38,7 @@ public class TopOrdAndIntQueue extends PriorityQueue<TopOrdAndIntQueue.OrdAndVal
/** Sole constructor. */
public TopOrdAndIntQueue(int topN) {
super(topN, false);
super(topN);
}
@Override

View File

@ -86,7 +86,7 @@ class TermAutomatonScorer extends Scorer {
* the same (lowest) docID. */
private static class DocIDQueue extends PriorityQueue<EnumAndScorer> {
public DocIDQueue(int maxSize) {
super(maxSize, false);
super(maxSize);
}
@Override
@ -99,7 +99,7 @@ class TermAutomatonScorer extends Scorer {
* position. */
private static class PositionQueue extends PriorityQueue<EnumAndScorer> {
public PositionQueue(int maxSize) {
super(maxSize, false);
super(maxSize);
}
@Override