mirror of https://github.com/apache/lucene.git
LUCENE-1089: add new PriorityQueue.insertWithOverflow method to allow for re-use
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@604949 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
55d0c3a2f8
commit
10c1ec3a66
|
@ -82,6 +82,10 @@ API Changes
|
|||
11. LUCENE-1079: DocValues cleanup: constructor now has no params,
|
||||
and getInnerArray() now throws UnsupportedOperationException (Doron Cohen)
|
||||
|
||||
12. LUCENE-1089: Added PriorityQueue.insertWithOverflow, which returns
|
||||
the Object (if any) that was bumped from the queue to allow
|
||||
re-use. (Shai Erera via Mike McCandless)
|
||||
|
||||
|
||||
Bug fixes
|
||||
|
||||
|
|
|
@ -80,11 +80,16 @@ extends PriorityQueue {
|
|||
return maxscore;
|
||||
}
|
||||
|
||||
// Update maxscore.
|
||||
private final void updateMaxScore(FieldDoc fdoc) {
|
||||
maxscore = Math.max(maxscore, fdoc.score);
|
||||
}
|
||||
|
||||
// The signature of this method takes a FieldDoc in order to avoid
|
||||
// the unneeded cast to retrieve the score.
|
||||
// inherit javadoc
|
||||
public boolean insert(FieldDoc fdoc) {
|
||||
maxscore = Math.max(maxscore,fdoc.score);
|
||||
updateMaxScore(fdoc);
|
||||
return super.insert(fdoc);
|
||||
}
|
||||
|
||||
|
@ -95,6 +100,14 @@ extends PriorityQueue {
|
|||
return insert((FieldDoc)fdoc);
|
||||
}
|
||||
|
||||
// This overrides PriorityQueue.insertWithOverflow() so that
|
||||
// updateMaxScore(FieldDoc) that keeps track of the score isn't accidentally
|
||||
// bypassed.
|
||||
public Object insertWithOverflow(Object element) {
|
||||
updateMaxScore((FieldDoc) element);
|
||||
return super.insertWithOverflow(element);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether <code>a</code> is less relevant than <code>b</code>.
|
||||
* @param a ScoreDoc
|
||||
|
|
|
@ -104,20 +104,27 @@ public class FuzzyQuery extends MultiTermQuery {
|
|||
FilteredTermEnum enumerator = getEnum(reader);
|
||||
int maxClauseCount = BooleanQuery.getMaxClauseCount();
|
||||
ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount);
|
||||
ScoreTerm reusableST = null;
|
||||
|
||||
try {
|
||||
do {
|
||||
float minScore = 0.0f;
|
||||
float score = 0.0f;
|
||||
Term t = enumerator.term();
|
||||
if (t != null) {
|
||||
score = enumerator.difference();
|
||||
// terms come in alphabetical order, therefore if queue is full and score
|
||||
// not bigger than minScore, we can skip
|
||||
if(stQueue.size() < maxClauseCount || score > minScore){
|
||||
stQueue.insert(new ScoreTerm(t, score));
|
||||
minScore = ((ScoreTerm)stQueue.top()).score; // maintain minScore
|
||||
if (reusableST == null) {
|
||||
reusableST = new ScoreTerm(t, score);
|
||||
} else if (score >= reusableST.score) {
|
||||
// reusableST holds the last "rejected" entry, so, if
|
||||
// this new score is not better than that, there's no
|
||||
// need to try inserting it
|
||||
reusableST.score = score;
|
||||
reusableST.term = t;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
reusableST = (ScoreTerm) stQueue.insertWithOverflow(reusableST);
|
||||
}
|
||||
} while (enumerator.next());
|
||||
} finally {
|
||||
|
|
|
@ -28,8 +28,8 @@ import org.apache.lucene.util.PriorityQueue;
|
|||
* documents are collected.
|
||||
**/
|
||||
public class TopDocCollector extends HitCollector {
|
||||
private int numHits;
|
||||
private float minScore = 0.0f;
|
||||
|
||||
private ScoreDoc reusableSD;
|
||||
|
||||
int totalHits;
|
||||
PriorityQueue hq;
|
||||
|
@ -42,7 +42,6 @@ public class TopDocCollector extends HitCollector {
|
|||
}
|
||||
|
||||
TopDocCollector(int numHits, PriorityQueue hq) {
|
||||
this.numHits = numHits;
|
||||
this.hq = hq;
|
||||
}
|
||||
|
||||
|
@ -50,10 +49,18 @@ public class TopDocCollector extends HitCollector {
|
|||
public void collect(int doc, float score) {
|
||||
if (score > 0.0f) {
|
||||
totalHits++;
|
||||
if (hq.size() < numHits || score >= minScore) {
|
||||
hq.insert(new ScoreDoc(doc, score));
|
||||
minScore = ((ScoreDoc)hq.top()).score; // maintain minScore
|
||||
if (reusableSD == null) {
|
||||
reusableSD = new ScoreDoc(doc, score);
|
||||
} else if (score >= reusableSD.score) {
|
||||
// reusableSD holds the last "rejected" entry, so, if
|
||||
// this new score is not better than that, there's no
|
||||
// need to try inserting it
|
||||
reusableSD.doc = doc;
|
||||
reusableSD.score = score;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
reusableSD = (ScoreDoc) hq.insertWithOverflow(reusableSD);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -31,6 +31,8 @@ import org.apache.lucene.index.IndexReader;
|
|||
**/
|
||||
public class TopFieldDocCollector extends TopDocCollector {
|
||||
|
||||
private FieldDoc reusableFD;
|
||||
|
||||
/** Construct to collect a given number of hits.
|
||||
* @param reader the index to be searched
|
||||
* @param sort the sort criteria
|
||||
|
@ -45,7 +47,18 @@ public class TopFieldDocCollector extends TopDocCollector {
|
|||
public void collect(int doc, float score) {
|
||||
if (score > 0.0f) {
|
||||
totalHits++;
|
||||
hq.insert(new FieldDoc(doc, score));
|
||||
if (reusableFD == null)
|
||||
reusableFD = new FieldDoc(doc, score);
|
||||
else {
|
||||
// Whereas TopDocCollector can skip this if the
|
||||
// score is not competitive, we cannot because the
|
||||
// comparators in the FieldSortedHitQueue.lessThan
|
||||
// aren't in general congruent with "higher score
|
||||
// wins"
|
||||
reusableFD.score = score;
|
||||
reusableFD.doc = doc;
|
||||
}
|
||||
reusableFD = (FieldDoc) hq.insertWithOverflow(reusableFD);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -21,9 +21,9 @@ package org.apache.lucene.util;
|
|||
least element can always be found in constant time. Put()'s and pop()'s
|
||||
require log(size) time. */
|
||||
public abstract class PriorityQueue {
|
||||
private Object[] heap;
|
||||
private int size;
|
||||
private int maxSize;
|
||||
protected Object[] heap;
|
||||
|
||||
/** Determines the ordering of objects in this priority queue. Subclasses
|
||||
must define this one method. */
|
||||
|
@ -32,7 +32,12 @@ public abstract class PriorityQueue {
|
|||
/** Subclass constructors must call this. */
|
||||
protected final void initialize(int maxSize) {
|
||||
size = 0;
|
||||
int heapSize = maxSize + 1;
|
||||
int heapSize;
|
||||
if (0 == maxSize)
|
||||
// We allocate 1 extra to avoid if statement in top()
|
||||
heapSize = 2;
|
||||
else
|
||||
heapSize = maxSize + 1;
|
||||
heap = new Object[heapSize];
|
||||
this.maxSize = maxSize;
|
||||
}
|
||||
|
@ -54,26 +59,40 @@ public abstract class PriorityQueue {
|
|||
* @param element
|
||||
* @return true if element is added, false otherwise.
|
||||
*/
|
||||
public boolean insert(Object element){
|
||||
if(size < maxSize){
|
||||
put(element);
|
||||
return true;
|
||||
public boolean insert(Object element) {
|
||||
return insertWithOverflow(element) != element;
|
||||
}
|
||||
else if(size > 0 && !lessThan(element, top())){
|
||||
|
||||
/**
|
||||
* insertWithOverflow() is the same as insert() except its
|
||||
* return value: it returns the object (if any) that was
|
||||
* dropped off the heap because it was full. This can be
|
||||
* the given parameter (in case it is smaller than the
|
||||
* full heap's minimum, and couldn't be added), or another
|
||||
* object that was previously the smallest value in the
|
||||
* heap and now has been replaced by a larger one, or null
|
||||
* if the queue wasn't yet full with maxSize elements.
|
||||
*/
|
||||
public Object insertWithOverflow(Object element) {
|
||||
if (size < maxSize) {
|
||||
put(element);
|
||||
return null;
|
||||
} else if (size > 0 && !lessThan(element, heap[1])) {
|
||||
Object ret = heap[1];
|
||||
heap[1] = element;
|
||||
adjustTop();
|
||||
return true;
|
||||
return ret;
|
||||
} else {
|
||||
return element;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Returns the least element of the PriorityQueue in constant time. */
|
||||
public final Object top() {
|
||||
if (size > 0)
|
||||
// We don't need to check size here: if maxSize is 0,
|
||||
// then heap is length 2 array with both entries null.
|
||||
// If size is 0 then heap[1] is already null.
|
||||
return heap[1];
|
||||
else
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Removes and returns the least element of the PriorityQueue in log(size)
|
||||
|
@ -101,7 +120,6 @@ public abstract class PriorityQueue {
|
|||
downHeap();
|
||||
}
|
||||
|
||||
|
||||
/** Returns the number of elements currently stored in the PriorityQueue. */
|
||||
public final int size() {
|
||||
return size;
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.util;
|
|||
*/
|
||||
|
||||
import java.util.Random;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestPriorityQueue
|
||||
extends LuceneTestCase
|
||||
|
@ -107,4 +106,25 @@ public class TestPriorityQueue
|
|||
assertEquals(3, pq.size());
|
||||
assertEquals(3, ((Integer) pq.top()).intValue());
|
||||
}
|
||||
|
||||
public void testInsertWithOverflow() {
|
||||
int size = 4;
|
||||
PriorityQueue pq = new IntegerQueue(size);
|
||||
Integer i1 = new Integer(2);
|
||||
Integer i2 = new Integer(3);
|
||||
Integer i3 = new Integer(1);
|
||||
Integer i4 = new Integer(5);
|
||||
Integer i5 = new Integer(7);
|
||||
Integer i6 = new Integer(1);
|
||||
|
||||
assertNull(pq.insertWithOverflow(i1));
|
||||
assertNull(pq.insertWithOverflow(i2));
|
||||
assertNull(pq.insertWithOverflow(i3));
|
||||
assertNull(pq.insertWithOverflow(i4));
|
||||
assertTrue(pq.insertWithOverflow(i5) == i3); // i3 should have been dropped
|
||||
assertTrue(pq.insertWithOverflow(i6) == i6); // i6 should not have been inserted
|
||||
assertEquals(size, pq.size());
|
||||
assertEquals(2, ((Integer) pq.top()).intValue());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue