LUCENE-9960: Avoid unnecessary top element replacement for equal elements in PriorityQueue. (#141)

This commit is contained in:
Dawid Weiss 2021-05-18 08:49:53 +02:00 committed by GitHub
parent 406aef8a4b
commit ba9fee502b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 61 additions and 21 deletions

View File

@ -118,6 +118,8 @@ API Changes
Improvements
* LUCENE-9960: Avoid unnecessary top element replacement for equal elements in PriorityQueue. (Dawid Weiss)
* LUCENE-9687: Hunspell support improvements: add API for spell-checking and suggestions, support compound words,
fix various behavior differences between Java and C++ implementations, improve performance (Peter Gromov, Dawid Weiss)

View File

@ -21,7 +21,7 @@ import java.util.NoSuchElementException;
import java.util.function.Supplier;
/**
* A PriorityQueue maintains a partial ordering of its elements such that the least element can
* A priority queue maintains a partial ordering of its elements such that the least element can
* always be found in constant time. Put()'s and pop()'s require log(size) time but the remove()
* cost implemented here is linear.
*
@ -74,11 +74,11 @@ public abstract class PriorityQueue<T> implements Iterable<T> {
*/
public PriorityQueue(int maxSize, Supplier<T> sentinelObjectSupplier) {
final int heapSize;
if (0 == maxSize) {
// We allocate 1 extra to avoid if statement in top()
heapSize = 2;
} else {
if ((maxSize < 0) || (maxSize >= ArrayUtil.MAX_ARRAY_LENGTH)) {
// Throw exception to prevent confusing OOME:
throw new IllegalArgumentException(
@ -89,7 +89,8 @@ public abstract class PriorityQueue<T> implements Iterable<T> {
// 1-based not 0-based. heap[0] is unused.
heapSize = maxSize + 1;
}
// T is unbounded type, so this unchecked cast works always:
// T is an unbounded type, so this unchecked cast works always.
@SuppressWarnings("unchecked")
final T[] h = (T[]) new Object[heapSize];
this.heap = h;
@ -121,9 +122,11 @@ public abstract class PriorityQueue<T> implements Iterable<T> {
* @return the new 'top' element in the queue.
*/
public final T add(T element) {
size++;
heap[size] = element;
upHeap(size);
// don't modify size until we know heap access didn't throw AIOOB.
int index = size + 1;
heap[index] = element;
size = index;
upHeap(index);
return heap[1];
}
@ -138,7 +141,7 @@ public abstract class PriorityQueue<T> implements Iterable<T> {
if (size < maxSize) {
add(element);
return null;
} else if (size > 0 && !lessThan(element, heap[1])) {
} else if (size > 0 && lessThan(heap[1], element)) {
T ret = heap[1];
heap[1] = element;
updateTop();
@ -273,7 +276,7 @@ public abstract class PriorityQueue<T> implements Iterable<T> {
* @lucene.internal
*/
protected final Object[] getHeapArray() {
return (Object[]) heap;
return heap;
}
@Override

View File

@ -21,6 +21,8 @@ import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Random;
import org.hamcrest.MatcherAssert;
import org.hamcrest.Matchers;
public class TestPriorityQueue extends LuceneTestCase {
@ -47,6 +49,50 @@ public class TestPriorityQueue extends LuceneTestCase {
}
}
public void testZeroSizedQueue() {
PriorityQueue<Integer> pq = new IntegerQueue(0);
assertEquals((Object) 1, pq.insertWithOverflow(1));
assertEquals(0, pq.size());
// should fail, but passes and modifies the top...
pq.add(1);
assertEquals((Object) 1, pq.top());
}
public void testNoExtraWorkOnEqualElements() {
class Value {
private final int index;
private final int value;
Value(int index, int value) {
this.index = index;
this.value = value;
}
}
PriorityQueue<Value> pq =
new PriorityQueue<>(5) {
@Override
protected boolean lessThan(Value a, Value b) {
return a.value < b.value;
}
};
// Make all elements equal but record insertion order.
for (int i = 0; i < 100; i++) {
pq.insertWithOverflow(new Value(i, 0));
}
ArrayList<Integer> indexes = new ArrayList<>();
for (Value e : pq) {
indexes.add(e.index);
}
// All elements are "equal" so we should have exactly the indexes of those elements that were
// added first.
MatcherAssert.assertThat(indexes, Matchers.containsInAnyOrder(0, 1, 2, 3, 4));
}
public void testPQ() throws Exception {
testPQ(atLeast(10000), random());
}
@ -61,13 +107,6 @@ public class TestPriorityQueue extends LuceneTestCase {
pq.add(next);
}
// Date end = new Date();
// System.out.print(((float)(end.getTime()-start.getTime()) / count) * 1000);
// System.out.println(" microseconds/put");
// start = new Date();
int last = Integer.MIN_VALUE;
for (int i = 0; i < count; i++) {
Integer next = pq.pop();
@ -77,10 +116,6 @@ public class TestPriorityQueue extends LuceneTestCase {
}
assertEquals(sum, sum2);
// end = new Date();
// System.out.print(((float)(end.getTime()-start.getTime()) / count) * 1000);
// System.out.println(" microseconds/pop");
}
public void testClear() {
@ -141,7 +176,7 @@ public class TestPriorityQueue extends LuceneTestCase {
if (evicted != null) {
assertTrue(sds.remove(evicted));
if (evicted != newEntry) {
assertTrue(evicted == lastLeast);
assertSame(evicted, lastLeast);
}
}
Integer newLeast = pq.top();
@ -159,7 +194,7 @@ public class TestPriorityQueue extends LuceneTestCase {
for (int p = 0; p < 500000; p++) {
int element = (int) (random.nextFloat() * (sds.size() - 1));
Integer objectToRemove = sds.get(element);
assertTrue(sds.remove(element) == objectToRemove);
assertSame(sds.remove(element), objectToRemove);
assertTrue(pq.remove(objectToRemove));
pq.checkValidity();
Integer newEntry = Math.abs(random.nextInt());