mirror of https://github.com/apache/lucene.git
LUCENE-1089: add new PriorityQueue.insertWithOverflow method to allow for re-use
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@604949 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
55d0c3a2f8
commit
10c1ec3a66
|
@ -82,7 +82,11 @@ API Changes
|
||||||
11. LUCENE-1079: DocValues cleanup: constructor now has no params,
|
11. LUCENE-1079: DocValues cleanup: constructor now has no params,
|
||||||
and getInnerArray() now throws UnsupportedOperationException (Doron Cohen)
|
and getInnerArray() now throws UnsupportedOperationException (Doron Cohen)
|
||||||
|
|
||||||
|
12. LUCENE-1089: Added PriorityQueue.insertWithOverflow, which returns
|
||||||
|
the Object (if any) that was bumped from the queue to allow
|
||||||
|
re-use. (Shai Erera via Mike McCandless)
|
||||||
|
|
||||||
|
|
||||||
Bug fixes
|
Bug fixes
|
||||||
|
|
||||||
1. LUCENE-933: QueryParser fixed to not produce empty sub
|
1. LUCENE-933: QueryParser fixed to not produce empty sub
|
||||||
|
|
|
@ -80,11 +80,16 @@ extends PriorityQueue {
|
||||||
return maxscore;
|
return maxscore;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update maxscore.
|
||||||
|
private final void updateMaxScore(FieldDoc fdoc) {
|
||||||
|
maxscore = Math.max(maxscore, fdoc.score);
|
||||||
|
}
|
||||||
|
|
||||||
// The signature of this method takes a FieldDoc in order to avoid
|
// The signature of this method takes a FieldDoc in order to avoid
|
||||||
// the unneeded cast to retrieve the score.
|
// the unneeded cast to retrieve the score.
|
||||||
// inherit javadoc
|
// inherit javadoc
|
||||||
public boolean insert(FieldDoc fdoc) {
|
public boolean insert(FieldDoc fdoc) {
|
||||||
maxscore = Math.max(maxscore,fdoc.score);
|
updateMaxScore(fdoc);
|
||||||
return super.insert(fdoc);
|
return super.insert(fdoc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -95,6 +100,14 @@ extends PriorityQueue {
|
||||||
return insert((FieldDoc)fdoc);
|
return insert((FieldDoc)fdoc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This overrides PriorityQueue.insertWithOverflow() so that
|
||||||
|
// updateMaxScore(FieldDoc) that keeps track of the score isn't accidentally
|
||||||
|
// bypassed.
|
||||||
|
public Object insertWithOverflow(Object element) {
|
||||||
|
updateMaxScore((FieldDoc) element);
|
||||||
|
return super.insertWithOverflow(element);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns whether <code>a</code> is less relevant than <code>b</code>.
|
* Returns whether <code>a</code> is less relevant than <code>b</code>.
|
||||||
* @param a ScoreDoc
|
* @param a ScoreDoc
|
||||||
|
|
|
@ -104,20 +104,27 @@ public class FuzzyQuery extends MultiTermQuery {
|
||||||
FilteredTermEnum enumerator = getEnum(reader);
|
FilteredTermEnum enumerator = getEnum(reader);
|
||||||
int maxClauseCount = BooleanQuery.getMaxClauseCount();
|
int maxClauseCount = BooleanQuery.getMaxClauseCount();
|
||||||
ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount);
|
ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount);
|
||||||
|
ScoreTerm reusableST = null;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
do {
|
do {
|
||||||
float minScore = 0.0f;
|
|
||||||
float score = 0.0f;
|
float score = 0.0f;
|
||||||
Term t = enumerator.term();
|
Term t = enumerator.term();
|
||||||
if (t != null) {
|
if (t != null) {
|
||||||
score = enumerator.difference();
|
score = enumerator.difference();
|
||||||
// terms come in alphabetical order, therefore if queue is full and score
|
if (reusableST == null) {
|
||||||
// not bigger than minScore, we can skip
|
reusableST = new ScoreTerm(t, score);
|
||||||
if(stQueue.size() < maxClauseCount || score > minScore){
|
} else if (score >= reusableST.score) {
|
||||||
stQueue.insert(new ScoreTerm(t, score));
|
// reusableST holds the last "rejected" entry, so, if
|
||||||
minScore = ((ScoreTerm)stQueue.top()).score; // maintain minScore
|
// this new score is not better than that, there's no
|
||||||
|
// need to try inserting it
|
||||||
|
reusableST.score = score;
|
||||||
|
reusableST.term = t;
|
||||||
|
} else {
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
reusableST = (ScoreTerm) stQueue.insertWithOverflow(reusableST);
|
||||||
}
|
}
|
||||||
} while (enumerator.next());
|
} while (enumerator.next());
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
@ -28,8 +28,8 @@ import org.apache.lucene.util.PriorityQueue;
|
||||||
* documents are collected.
|
* documents are collected.
|
||||||
**/
|
**/
|
||||||
public class TopDocCollector extends HitCollector {
|
public class TopDocCollector extends HitCollector {
|
||||||
private int numHits;
|
|
||||||
private float minScore = 0.0f;
|
private ScoreDoc reusableSD;
|
||||||
|
|
||||||
int totalHits;
|
int totalHits;
|
||||||
PriorityQueue hq;
|
PriorityQueue hq;
|
||||||
|
@ -42,7 +42,6 @@ public class TopDocCollector extends HitCollector {
|
||||||
}
|
}
|
||||||
|
|
||||||
TopDocCollector(int numHits, PriorityQueue hq) {
|
TopDocCollector(int numHits, PriorityQueue hq) {
|
||||||
this.numHits = numHits;
|
|
||||||
this.hq = hq;
|
this.hq = hq;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -50,10 +49,18 @@ public class TopDocCollector extends HitCollector {
|
||||||
public void collect(int doc, float score) {
|
public void collect(int doc, float score) {
|
||||||
if (score > 0.0f) {
|
if (score > 0.0f) {
|
||||||
totalHits++;
|
totalHits++;
|
||||||
if (hq.size() < numHits || score >= minScore) {
|
if (reusableSD == null) {
|
||||||
hq.insert(new ScoreDoc(doc, score));
|
reusableSD = new ScoreDoc(doc, score);
|
||||||
minScore = ((ScoreDoc)hq.top()).score; // maintain minScore
|
} else if (score >= reusableSD.score) {
|
||||||
|
// reusableSD holds the last "rejected" entry, so, if
|
||||||
|
// this new score is not better than that, there's no
|
||||||
|
// need to try inserting it
|
||||||
|
reusableSD.doc = doc;
|
||||||
|
reusableSD.score = score;
|
||||||
|
} else {
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
reusableSD = (ScoreDoc) hq.insertWithOverflow(reusableSD);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -31,6 +31,8 @@ import org.apache.lucene.index.IndexReader;
|
||||||
**/
|
**/
|
||||||
public class TopFieldDocCollector extends TopDocCollector {
|
public class TopFieldDocCollector extends TopDocCollector {
|
||||||
|
|
||||||
|
private FieldDoc reusableFD;
|
||||||
|
|
||||||
/** Construct to collect a given number of hits.
|
/** Construct to collect a given number of hits.
|
||||||
* @param reader the index to be searched
|
* @param reader the index to be searched
|
||||||
* @param sort the sort criteria
|
* @param sort the sort criteria
|
||||||
|
@ -45,7 +47,18 @@ public class TopFieldDocCollector extends TopDocCollector {
|
||||||
public void collect(int doc, float score) {
|
public void collect(int doc, float score) {
|
||||||
if (score > 0.0f) {
|
if (score > 0.0f) {
|
||||||
totalHits++;
|
totalHits++;
|
||||||
hq.insert(new FieldDoc(doc, score));
|
if (reusableFD == null)
|
||||||
|
reusableFD = new FieldDoc(doc, score);
|
||||||
|
else {
|
||||||
|
// Whereas TopDocCollector can skip this if the
|
||||||
|
// score is not competitive, we cannot because the
|
||||||
|
// comparators in the FieldSortedHitQueue.lessThan
|
||||||
|
// aren't in general congruent with "higher score
|
||||||
|
// wins"
|
||||||
|
reusableFD.score = score;
|
||||||
|
reusableFD.doc = doc;
|
||||||
|
}
|
||||||
|
reusableFD = (FieldDoc) hq.insertWithOverflow(reusableFD);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,9 +21,9 @@ package org.apache.lucene.util;
|
||||||
least element can always be found in constant time. Put()'s and pop()'s
|
least element can always be found in constant time. Put()'s and pop()'s
|
||||||
require log(size) time. */
|
require log(size) time. */
|
||||||
public abstract class PriorityQueue {
|
public abstract class PriorityQueue {
|
||||||
private Object[] heap;
|
|
||||||
private int size;
|
private int size;
|
||||||
private int maxSize;
|
private int maxSize;
|
||||||
|
protected Object[] heap;
|
||||||
|
|
||||||
/** Determines the ordering of objects in this priority queue. Subclasses
|
/** Determines the ordering of objects in this priority queue. Subclasses
|
||||||
must define this one method. */
|
must define this one method. */
|
||||||
|
@ -32,7 +32,12 @@ public abstract class PriorityQueue {
|
||||||
/** Subclass constructors must call this. */
|
/** Subclass constructors must call this. */
|
||||||
protected final void initialize(int maxSize) {
|
protected final void initialize(int maxSize) {
|
||||||
size = 0;
|
size = 0;
|
||||||
int heapSize = maxSize + 1;
|
int heapSize;
|
||||||
|
if (0 == maxSize)
|
||||||
|
// We allocate 1 extra to avoid if statement in top()
|
||||||
|
heapSize = 2;
|
||||||
|
else
|
||||||
|
heapSize = maxSize + 1;
|
||||||
heap = new Object[heapSize];
|
heap = new Object[heapSize];
|
||||||
this.maxSize = maxSize;
|
this.maxSize = maxSize;
|
||||||
}
|
}
|
||||||
|
@ -54,26 +59,40 @@ public abstract class PriorityQueue {
|
||||||
* @param element
|
* @param element
|
||||||
* @return true if element is added, false otherwise.
|
* @return true if element is added, false otherwise.
|
||||||
*/
|
*/
|
||||||
public boolean insert(Object element){
|
public boolean insert(Object element) {
|
||||||
if(size < maxSize){
|
return insertWithOverflow(element) != element;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* insertWithOverflow() is the same as insert() except its
|
||||||
|
* return value: it returns the object (if any) that was
|
||||||
|
* dropped off the heap because it was full. This can be
|
||||||
|
* the given parameter (in case it is smaller than the
|
||||||
|
* full heap's minimum, and couldn't be added), or another
|
||||||
|
* object that was previously the smallest value in the
|
||||||
|
* heap and now has been replaced by a larger one, or null
|
||||||
|
* if the queue wasn't yet full with maxSize elements.
|
||||||
|
*/
|
||||||
|
public Object insertWithOverflow(Object element) {
|
||||||
|
if (size < maxSize) {
|
||||||
put(element);
|
put(element);
|
||||||
return true;
|
return null;
|
||||||
}
|
} else if (size > 0 && !lessThan(element, heap[1])) {
|
||||||
else if(size > 0 && !lessThan(element, top())){
|
Object ret = heap[1];
|
||||||
heap[1] = element;
|
heap[1] = element;
|
||||||
adjustTop();
|
adjustTop();
|
||||||
return true;
|
return ret;
|
||||||
|
} else {
|
||||||
|
return element;
|
||||||
}
|
}
|
||||||
else
|
}
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns the least element of the PriorityQueue in constant time. */
|
/** Returns the least element of the PriorityQueue in constant time. */
|
||||||
public final Object top() {
|
public final Object top() {
|
||||||
if (size > 0)
|
// We don't need to check size here: if maxSize is 0,
|
||||||
return heap[1];
|
// then heap is length 2 array with both entries null.
|
||||||
else
|
// If size is 0 then heap[1] is already null.
|
||||||
return null;
|
return heap[1];
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Removes and returns the least element of the PriorityQueue in log(size)
|
/** Removes and returns the least element of the PriorityQueue in log(size)
|
||||||
|
@ -101,7 +120,6 @@ public abstract class PriorityQueue {
|
||||||
downHeap();
|
downHeap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Returns the number of elements currently stored in the PriorityQueue. */
|
/** Returns the number of elements currently stored in the PriorityQueue. */
|
||||||
public final int size() {
|
public final int size() {
|
||||||
return size;
|
return size;
|
||||||
|
@ -140,7 +158,7 @@ public abstract class PriorityQueue {
|
||||||
j = i << 1;
|
j = i << 1;
|
||||||
k = j + 1;
|
k = j + 1;
|
||||||
if (k <= size && lessThan(heap[k], heap[j])) {
|
if (k <= size && lessThan(heap[k], heap[j])) {
|
||||||
j = k;
|
j = k;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
heap[i] = node; // install saved node
|
heap[i] = node; // install saved node
|
||||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.util;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
|
||||||
|
|
||||||
public class TestPriorityQueue
|
public class TestPriorityQueue
|
||||||
extends LuceneTestCase
|
extends LuceneTestCase
|
||||||
|
@ -107,4 +106,25 @@ public class TestPriorityQueue
|
||||||
assertEquals(3, pq.size());
|
assertEquals(3, pq.size());
|
||||||
assertEquals(3, ((Integer) pq.top()).intValue());
|
assertEquals(3, ((Integer) pq.top()).intValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testInsertWithOverflow() {
|
||||||
|
int size = 4;
|
||||||
|
PriorityQueue pq = new IntegerQueue(size);
|
||||||
|
Integer i1 = new Integer(2);
|
||||||
|
Integer i2 = new Integer(3);
|
||||||
|
Integer i3 = new Integer(1);
|
||||||
|
Integer i4 = new Integer(5);
|
||||||
|
Integer i5 = new Integer(7);
|
||||||
|
Integer i6 = new Integer(1);
|
||||||
|
|
||||||
|
assertNull(pq.insertWithOverflow(i1));
|
||||||
|
assertNull(pq.insertWithOverflow(i2));
|
||||||
|
assertNull(pq.insertWithOverflow(i3));
|
||||||
|
assertNull(pq.insertWithOverflow(i4));
|
||||||
|
assertTrue(pq.insertWithOverflow(i5) == i3); // i3 should have been dropped
|
||||||
|
assertTrue(pq.insertWithOverflow(i6) == i6); // i6 should not have been inserted
|
||||||
|
assertEquals(size, pq.size());
|
||||||
|
assertEquals(2, ((Integer) pq.top()).intValue());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue