LUCENE-1089: add new PriorityQueue.insertWithOverflow method to allow for re-use

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@604949 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2007-12-17 18:05:13 +00:00
parent 55d0c3a2f8
commit 10c1ec3a66
7 changed files with 116 additions and 34 deletions

View File

@ -82,6 +82,10 @@ API Changes
11. LUCENE-1079: DocValues cleanup: constructor now has no params,
and getInnerArray() now throws UnsupportedOperationException (Doron Cohen)
12. LUCENE-1089: Added PriorityQueue.insertWithOverflow, which returns
the Object (if any) that was bumped from the queue to allow
re-use. (Shai Erera via Mike McCandless)
Bug fixes

View File

@ -80,11 +80,16 @@ extends PriorityQueue {
return maxscore;
}
// Update maxscore.
private final void updateMaxScore(FieldDoc fdoc) {
maxscore = Math.max(maxscore, fdoc.score);
}
// The signature of this method takes a FieldDoc in order to avoid
// the unneeded cast to retrieve the score.
// inherit javadoc
public boolean insert(FieldDoc fdoc) {
maxscore = Math.max(maxscore,fdoc.score);
updateMaxScore(fdoc);
return super.insert(fdoc);
}
@ -95,6 +100,14 @@ extends PriorityQueue {
return insert((FieldDoc)fdoc);
}
// This overrides PriorityQueue.insertWithOverflow() so that
// updateMaxScore(FieldDoc) that keeps track of the score isn't accidentally
// bypassed.
public Object insertWithOverflow(Object element) {
updateMaxScore((FieldDoc) element);
return super.insertWithOverflow(element);
}
/**
* Returns whether <code>a</code> is less relevant than <code>b</code>.
* @param a ScoreDoc

View File

@ -104,20 +104,27 @@ public class FuzzyQuery extends MultiTermQuery {
FilteredTermEnum enumerator = getEnum(reader);
int maxClauseCount = BooleanQuery.getMaxClauseCount();
ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount);
ScoreTerm reusableST = null;
try {
do {
float minScore = 0.0f;
float score = 0.0f;
Term t = enumerator.term();
if (t != null) {
score = enumerator.difference();
// terms come in alphabetical order, therefore if queue is full and score
// not bigger than minScore, we can skip
if(stQueue.size() < maxClauseCount || score > minScore){
stQueue.insert(new ScoreTerm(t, score));
minScore = ((ScoreTerm)stQueue.top()).score; // maintain minScore
if (reusableST == null) {
reusableST = new ScoreTerm(t, score);
} else if (score >= reusableST.score) {
// reusableST holds the last "rejected" entry, so, if
// this new score is not better than that, there's no
// need to try inserting it
reusableST.score = score;
reusableST.term = t;
} else {
continue;
}
reusableST = (ScoreTerm) stQueue.insertWithOverflow(reusableST);
}
} while (enumerator.next());
} finally {

View File

@ -28,8 +28,8 @@ import org.apache.lucene.util.PriorityQueue;
* documents are collected.
**/
public class TopDocCollector extends HitCollector {
private int numHits;
private float minScore = 0.0f;
private ScoreDoc reusableSD;
int totalHits;
PriorityQueue hq;
@ -42,7 +42,6 @@ public class TopDocCollector extends HitCollector {
}
TopDocCollector(int numHits, PriorityQueue hq) {
this.numHits = numHits;
this.hq = hq;
}
@ -50,10 +49,18 @@ public class TopDocCollector extends HitCollector {
public void collect(int doc, float score) {
if (score > 0.0f) {
totalHits++;
if (hq.size() < numHits || score >= minScore) {
hq.insert(new ScoreDoc(doc, score));
minScore = ((ScoreDoc)hq.top()).score; // maintain minScore
if (reusableSD == null) {
reusableSD = new ScoreDoc(doc, score);
} else if (score >= reusableSD.score) {
// reusableSD holds the last "rejected" entry, so, if
// this new score is not better than that, there's no
// need to try inserting it
reusableSD.doc = doc;
reusableSD.score = score;
} else {
return;
}
reusableSD = (ScoreDoc) hq.insertWithOverflow(reusableSD);
}
}

View File

@ -31,6 +31,8 @@ import org.apache.lucene.index.IndexReader;
**/
public class TopFieldDocCollector extends TopDocCollector {
private FieldDoc reusableFD;
/** Construct to collect a given number of hits.
* @param reader the index to be searched
* @param sort the sort criteria
@ -45,7 +47,18 @@ public class TopFieldDocCollector extends TopDocCollector {
public void collect(int doc, float score) {
if (score > 0.0f) {
totalHits++;
hq.insert(new FieldDoc(doc, score));
if (reusableFD == null)
reusableFD = new FieldDoc(doc, score);
else {
// Whereas TopDocCollector can skip this if the
// score is not competitive, we cannot because the
// comparators in the FieldSortedHitQueue.lessThan
// aren't in general congruent with "higher score
// wins"
reusableFD.score = score;
reusableFD.doc = doc;
}
reusableFD = (FieldDoc) hq.insertWithOverflow(reusableFD);
}
}

View File

@ -21,9 +21,9 @@ package org.apache.lucene.util;
least element can always be found in constant time. Put()'s and pop()'s
require log(size) time. */
public abstract class PriorityQueue {
private Object[] heap;
private int size;
private int maxSize;
protected Object[] heap;
/** Determines the ordering of objects in this priority queue. Subclasses
must define this one method. */
@ -32,7 +32,12 @@ public abstract class PriorityQueue {
/** Subclass constructors must call this. */
protected final void initialize(int maxSize) {
size = 0;
int heapSize = maxSize + 1;
int heapSize;
if (0 == maxSize)
// We allocate 1 extra to avoid if statement in top()
heapSize = 2;
else
heapSize = maxSize + 1;
heap = new Object[heapSize];
this.maxSize = maxSize;
}
@ -55,25 +60,39 @@ public abstract class PriorityQueue {
* @return true if element is added, false otherwise.
*/
public boolean insert(Object element) {
return insertWithOverflow(element) != element;
}
/**
* insertWithOverflow() is the same as insert() except its
* return value: it returns the object (if any) that was
* dropped off the heap because it was full. This can be
* the given parameter (in case it is smaller than the
* full heap's minimum, and couldn't be added), or another
* object that was previously the smallest value in the
* heap and now has been replaced by a larger one, or null
* if the queue wasn't yet full with maxSize elements.
*/
public Object insertWithOverflow(Object element) {
if (size < maxSize) {
put(element);
return true;
}
else if(size > 0 && !lessThan(element, top())){
return null;
} else if (size > 0 && !lessThan(element, heap[1])) {
Object ret = heap[1];
heap[1] = element;
adjustTop();
return true;
return ret;
} else {
return element;
}
else
return false;
}
/** Returns the least element of the PriorityQueue in constant time. */
public final Object top() {
if (size > 0)
// We don't need to check size here: if maxSize is 0,
// then heap is length 2 array with both entries null.
// If size is 0 then heap[1] is already null.
return heap[1];
else
return null;
}
/** Removes and returns the least element of the PriorityQueue in log(size)
@ -101,7 +120,6 @@ public abstract class PriorityQueue {
downHeap();
}
/** Returns the number of elements currently stored in the PriorityQueue. */
public final int size() {
return size;

View File

@ -18,7 +18,6 @@ package org.apache.lucene.util;
*/
import java.util.Random;
import org.apache.lucene.util.LuceneTestCase;
public class TestPriorityQueue
extends LuceneTestCase
@ -107,4 +106,25 @@ public class TestPriorityQueue
assertEquals(3, pq.size());
assertEquals(3, ((Integer) pq.top()).intValue());
}
public void testInsertWithOverflow() {
int size = 4;
PriorityQueue pq = new IntegerQueue(size);
Integer i1 = new Integer(2);
Integer i2 = new Integer(3);
Integer i3 = new Integer(1);
Integer i4 = new Integer(5);
Integer i5 = new Integer(7);
Integer i6 = new Integer(1);
assertNull(pq.insertWithOverflow(i1));
assertNull(pq.insertWithOverflow(i2));
assertNull(pq.insertWithOverflow(i3));
assertNull(pq.insertWithOverflow(i4));
assertTrue(pq.insertWithOverflow(i5) == i3); // i3 should have been dropped
assertTrue(pq.insertWithOverflow(i6) == i6); // i6 should not have been inserted
assertEquals(size, pq.size());
assertEquals(2, ((Integer) pq.top()).intValue());
}
}