LUCENE-1089: add new PriorityQueue.insertWithOverflow method to allow for re-use

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@604949 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2007-12-17 18:05:13 +00:00
parent 55d0c3a2f8
commit 10c1ec3a66
7 changed files with 116 additions and 34 deletions

View File

@ -82,7 +82,11 @@ API Changes
11. LUCENE-1079: DocValues cleanup: constructor now has no params, 11. LUCENE-1079: DocValues cleanup: constructor now has no params,
and getInnerArray() now throws UnsupportedOperationException (Doron Cohen) and getInnerArray() now throws UnsupportedOperationException (Doron Cohen)
12. LUCENE-1089: Added PriorityQueue.insertWithOverflow, which returns
the Object (if any) that was bumped from the queue to allow
re-use. (Shai Erera via Mike McCandless)
Bug fixes Bug fixes
1. LUCENE-933: QueryParser fixed to not produce empty sub 1. LUCENE-933: QueryParser fixed to not produce empty sub

View File

@ -80,11 +80,16 @@ extends PriorityQueue {
return maxscore; return maxscore;
} }
// Update maxscore.
private final void updateMaxScore(FieldDoc fdoc) {
maxscore = Math.max(maxscore, fdoc.score);
}
// The signature of this method takes a FieldDoc in order to avoid // The signature of this method takes a FieldDoc in order to avoid
// the unneeded cast to retrieve the score. // the unneeded cast to retrieve the score.
// inherit javadoc // inherit javadoc
public boolean insert(FieldDoc fdoc) { public boolean insert(FieldDoc fdoc) {
maxscore = Math.max(maxscore,fdoc.score); updateMaxScore(fdoc);
return super.insert(fdoc); return super.insert(fdoc);
} }
@ -95,6 +100,14 @@ extends PriorityQueue {
return insert((FieldDoc)fdoc); return insert((FieldDoc)fdoc);
} }
// This overrides PriorityQueue.insertWithOverflow() so that
// updateMaxScore(FieldDoc) that keeps track of the score isn't accidentally
// bypassed.
public Object insertWithOverflow(Object element) {
updateMaxScore((FieldDoc) element);
return super.insertWithOverflow(element);
}
/** /**
* Returns whether <code>a</code> is less relevant than <code>b</code>. * Returns whether <code>a</code> is less relevant than <code>b</code>.
* @param a ScoreDoc * @param a ScoreDoc

View File

@ -104,20 +104,27 @@ public class FuzzyQuery extends MultiTermQuery {
FilteredTermEnum enumerator = getEnum(reader); FilteredTermEnum enumerator = getEnum(reader);
int maxClauseCount = BooleanQuery.getMaxClauseCount(); int maxClauseCount = BooleanQuery.getMaxClauseCount();
ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount); ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount);
ScoreTerm reusableST = null;
try { try {
do { do {
float minScore = 0.0f;
float score = 0.0f; float score = 0.0f;
Term t = enumerator.term(); Term t = enumerator.term();
if (t != null) { if (t != null) {
score = enumerator.difference(); score = enumerator.difference();
// terms come in alphabetical order, therefore if queue is full and score if (reusableST == null) {
// not bigger than minScore, we can skip reusableST = new ScoreTerm(t, score);
if(stQueue.size() < maxClauseCount || score > minScore){ } else if (score >= reusableST.score) {
stQueue.insert(new ScoreTerm(t, score)); // reusableST holds the last "rejected" entry, so, if
minScore = ((ScoreTerm)stQueue.top()).score; // maintain minScore // this new score is not better than that, there's no
// need to try inserting it
reusableST.score = score;
reusableST.term = t;
} else {
continue;
} }
reusableST = (ScoreTerm) stQueue.insertWithOverflow(reusableST);
} }
} while (enumerator.next()); } while (enumerator.next());
} finally { } finally {

View File

@ -28,8 +28,8 @@ import org.apache.lucene.util.PriorityQueue;
* documents are collected. * documents are collected.
**/ **/
public class TopDocCollector extends HitCollector { public class TopDocCollector extends HitCollector {
private int numHits;
private float minScore = 0.0f; private ScoreDoc reusableSD;
int totalHits; int totalHits;
PriorityQueue hq; PriorityQueue hq;
@ -42,7 +42,6 @@ public class TopDocCollector extends HitCollector {
} }
TopDocCollector(int numHits, PriorityQueue hq) { TopDocCollector(int numHits, PriorityQueue hq) {
this.numHits = numHits;
this.hq = hq; this.hq = hq;
} }
@ -50,10 +49,18 @@ public class TopDocCollector extends HitCollector {
public void collect(int doc, float score) { public void collect(int doc, float score) {
if (score > 0.0f) { if (score > 0.0f) {
totalHits++; totalHits++;
if (hq.size() < numHits || score >= minScore) { if (reusableSD == null) {
hq.insert(new ScoreDoc(doc, score)); reusableSD = new ScoreDoc(doc, score);
minScore = ((ScoreDoc)hq.top()).score; // maintain minScore } else if (score >= reusableSD.score) {
// reusableSD holds the last "rejected" entry, so, if
// this new score is not better than that, there's no
// need to try inserting it
reusableSD.doc = doc;
reusableSD.score = score;
} else {
return;
} }
reusableSD = (ScoreDoc) hq.insertWithOverflow(reusableSD);
} }
} }

View File

@ -31,6 +31,8 @@ import org.apache.lucene.index.IndexReader;
**/ **/
public class TopFieldDocCollector extends TopDocCollector { public class TopFieldDocCollector extends TopDocCollector {
private FieldDoc reusableFD;
/** Construct to collect a given number of hits. /** Construct to collect a given number of hits.
* @param reader the index to be searched * @param reader the index to be searched
* @param sort the sort criteria * @param sort the sort criteria
@ -45,7 +47,18 @@ public class TopFieldDocCollector extends TopDocCollector {
public void collect(int doc, float score) { public void collect(int doc, float score) {
if (score > 0.0f) { if (score > 0.0f) {
totalHits++; totalHits++;
hq.insert(new FieldDoc(doc, score)); if (reusableFD == null)
reusableFD = new FieldDoc(doc, score);
else {
// Whereas TopDocCollector can skip this if the
// score is not competitive, we cannot because the
// comparators in the FieldSortedHitQueue.lessThan
// aren't in general congruent with "higher score
// wins"
reusableFD.score = score;
reusableFD.doc = doc;
}
reusableFD = (FieldDoc) hq.insertWithOverflow(reusableFD);
} }
} }

View File

@ -21,9 +21,9 @@ package org.apache.lucene.util;
least element can always be found in constant time. Put()'s and pop()'s least element can always be found in constant time. Put()'s and pop()'s
require log(size) time. */ require log(size) time. */
public abstract class PriorityQueue { public abstract class PriorityQueue {
private Object[] heap;
private int size; private int size;
private int maxSize; private int maxSize;
protected Object[] heap;
/** Determines the ordering of objects in this priority queue. Subclasses /** Determines the ordering of objects in this priority queue. Subclasses
must define this one method. */ must define this one method. */
@ -32,7 +32,12 @@ public abstract class PriorityQueue {
/** Subclass constructors must call this. */ /** Subclass constructors must call this. */
protected final void initialize(int maxSize) { protected final void initialize(int maxSize) {
size = 0; size = 0;
int heapSize = maxSize + 1; int heapSize;
if (0 == maxSize)
// We allocate 1 extra to avoid if statement in top()
heapSize = 2;
else
heapSize = maxSize + 1;
heap = new Object[heapSize]; heap = new Object[heapSize];
this.maxSize = maxSize; this.maxSize = maxSize;
} }
@ -54,26 +59,40 @@ public abstract class PriorityQueue {
* @param element * @param element
* @return true if element is added, false otherwise. * @return true if element is added, false otherwise.
*/ */
public boolean insert(Object element){ public boolean insert(Object element) {
if(size < maxSize){ return insertWithOverflow(element) != element;
}
/**
* insertWithOverflow() is the same as insert() except its
* return value: it returns the object (if any) that was
* dropped off the heap because it was full. This can be
* the given parameter (in case it is smaller than the
* full heap's minimum, and couldn't be added), or another
* object that was previously the smallest value in the
* heap and now has been replaced by a larger one, or null
* if the queue wasn't yet full with maxSize elements.
*/
public Object insertWithOverflow(Object element) {
if (size < maxSize) {
put(element); put(element);
return true; return null;
} } else if (size > 0 && !lessThan(element, heap[1])) {
else if(size > 0 && !lessThan(element, top())){ Object ret = heap[1];
heap[1] = element; heap[1] = element;
adjustTop(); adjustTop();
return true; return ret;
} else {
return element;
} }
else }
return false;
}
/** Returns the least element of the PriorityQueue in constant time. */ /** Returns the least element of the PriorityQueue in constant time. */
public final Object top() { public final Object top() {
if (size > 0) // We don't need to check size here: if maxSize is 0,
return heap[1]; // then heap is length 2 array with both entries null.
else // If size is 0 then heap[1] is already null.
return null; return heap[1];
} }
/** Removes and returns the least element of the PriorityQueue in log(size) /** Removes and returns the least element of the PriorityQueue in log(size)
@ -101,7 +120,6 @@ public abstract class PriorityQueue {
downHeap(); downHeap();
} }
/** Returns the number of elements currently stored in the PriorityQueue. */ /** Returns the number of elements currently stored in the PriorityQueue. */
public final int size() { public final int size() {
return size; return size;
@ -140,7 +158,7 @@ public abstract class PriorityQueue {
j = i << 1; j = i << 1;
k = j + 1; k = j + 1;
if (k <= size && lessThan(heap[k], heap[j])) { if (k <= size && lessThan(heap[k], heap[j])) {
j = k; j = k;
} }
} }
heap[i] = node; // install saved node heap[i] = node; // install saved node

View File

@ -18,7 +18,6 @@ package org.apache.lucene.util;
*/ */
import java.util.Random; import java.util.Random;
import org.apache.lucene.util.LuceneTestCase;
public class TestPriorityQueue public class TestPriorityQueue
extends LuceneTestCase extends LuceneTestCase
@ -107,4 +106,25 @@ public class TestPriorityQueue
assertEquals(3, pq.size()); assertEquals(3, pq.size());
assertEquals(3, ((Integer) pq.top()).intValue()); assertEquals(3, ((Integer) pq.top()).intValue());
} }
public void testInsertWithOverflow() {
int size = 4;
PriorityQueue pq = new IntegerQueue(size);
Integer i1 = new Integer(2);
Integer i2 = new Integer(3);
Integer i3 = new Integer(1);
Integer i4 = new Integer(5);
Integer i5 = new Integer(7);
Integer i6 = new Integer(1);
assertNull(pq.insertWithOverflow(i1));
assertNull(pq.insertWithOverflow(i2));
assertNull(pq.insertWithOverflow(i3));
assertNull(pq.insertWithOverflow(i4));
assertTrue(pq.insertWithOverflow(i5) == i3); // i3 should have been dropped
assertTrue(pq.insertWithOverflow(i6) == i6); // i6 should not have been inserted
assertEquals(size, pq.size());
assertEquals(2, ((Integer) pq.top()).intValue());
}
} }