LUCENE-10653: Heapify in BMMScorer (#1022)

This commit is contained in:
Greg Miller 2022-07-19 13:49:31 -07:00 committed by GitHub
parent a35dee5b27
commit 3d7d85f245
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 209 additions and 7 deletions

View File

@ -141,6 +141,8 @@ Optimizations
* GITHUB#1007: Optimize IntersectVisitor#visit implementations for certain bulk-add cases.
(Greg Miller)
* LUCENE-10653: BlockMaxMaxscoreScorer uses heapify instead of individual adds. (Greg Miller)
Changes in runtime behavior
---------------------

View File

@ -158,9 +158,10 @@ class BlockMaxMaxscoreScorer extends Scorer {
// list adjusted
if (removedEssentialScorer) {
essentialsScorers.clear();
for (int i = firstEssentialScorerIndex; i < allScorers.length; ++i) {
essentialsScorers.add(allScorers[i]);
}
essentialsScorers.addAll(
allScorers,
firstEssentialScorerIndex,
allScorers.length - firstEssentialScorerIndex);
}
}
@ -199,7 +200,6 @@ class BlockMaxMaxscoreScorer extends Scorer {
}
private void repartitionLists() {
essentialsScorers.clear();
firstEssentialScorerIndex = 0;
Arrays.sort(allScorers, Comparator.comparingDouble(scorer -> scorer.maxScore));
@ -214,9 +214,11 @@ class BlockMaxMaxscoreScorer extends Scorer {
firstEssentialScorerIndex++;
nonEssentialMaxScoreSum += w.maxScore;
}
for (int i = firstEssentialScorerIndex; i < allScorers.length; ++i) {
essentialsScorers.add(allScorers[i]);
}
essentialsScorers.clear();
essentialsScorers.addAll(
allScorers,
firstEssentialScorerIndex,
allScorers.length - firstEssentialScorerIndex);
}
@Override

View File

@ -103,6 +103,48 @@ public final class DisiPriorityQueue implements Iterable<DisiWrapper> {
return heap[0];
}
public void addAll(DisiWrapper[] entries, int offset, int len) {
// Nothing to do if empty:
if (len == 0) {
return;
}
// Fail early if we're going to over-fill:
if (size + len > heap.length) {
throw new IndexOutOfBoundsException(
"Cannot add "
+ len
+ " elements to a queue with remaining capacity "
+ (heap.length - size));
}
// Copy the entries over to our heap array:
System.arraycopy(entries, offset, heap, size, len);
size += len;
// Heapify in bulk:
final int firstLeafIndex = size >>> 1;
for (int rootIndex = firstLeafIndex - 1; rootIndex >= 0; rootIndex--) {
int parentIndex = rootIndex;
DisiWrapper parent = heap[parentIndex];
while (parentIndex < firstLeafIndex) {
int childIndex = leftNode(parentIndex);
int rightChildIndex = rightNode(childIndex);
DisiWrapper child = heap[childIndex];
if (rightChildIndex < size && heap[rightChildIndex].doc < child.doc) {
child = heap[rightChildIndex];
childIndex = rightChildIndex;
}
if (child.doc >= parent.doc) {
break;
}
heap[parentIndex] = child;
parentIndex = childIndex;
}
heap[parentIndex] = parent;
}
}
public DisiWrapper pop() {
final DisiWrapper[] heap = this.heap;
final DisiWrapper result = heap[0];

View File

@ -0,0 +1,156 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.PrimitiveIterator.OfInt;
import java.util.Random;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.tests.util.LuceneTestCase;
public class TestDisiPriorityQueue extends LuceneTestCase {
public void testRandom() throws Exception {
Random r = random();
int size = r.nextInt(TEST_NIGHTLY ? 1000 : 10);
DisiWrapper[] all = new DisiWrapper[size];
for (int i = 0; i < size; i++) {
DocIdSetIterator it = randomDisi(r);
DisiWrapper w = wrapper(it);
all[i] = w;
}
DisiPriorityQueue pq = new DisiPriorityQueue(size);
if (r.nextBoolean()) {
for (DisiWrapper w : all) {
pq.add(w);
}
} else {
if (r.nextInt(10) < 2) {
int len = random().nextInt(1, size);
for (int i = 0; i < len; i++) {
pq.add(all[i]);
}
pq.addAll(all, len, size - len);
} else {
pq.addAll(all, 0, size);
}
}
while (pq.size() > 0) {
Arrays.sort(all, Comparator.comparingInt(w -> w.doc));
DisiWrapper top = pq.top();
assertEquals(all[0].doc, top.doc);
top.doc = top.iterator.nextDoc();
if (top.doc == DocIdSetIterator.NO_MORE_DOCS) {
pq.pop();
} else {
pq.updateTop();
}
}
}
private static DisiWrapper wrapper(DocIdSetIterator iterator) throws IOException {
Query q = new DummyQuery(iterator);
Scorer s = q.createWeight(null, ScoreMode.COMPLETE_NO_SCORES, 1.0f).scorer(null);
return new DisiWrapper(s);
}
private static DocIdSetIterator randomDisi(Random r) {
int maxSize = r.nextInt(50);
OfInt randomInts =
r.ints(maxSize, 0, DocIdSetIterator.NO_MORE_DOCS - 1).sorted().distinct().iterator();
return new DocIdSetIterator() {
private int doc = -1;
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() {
if (randomInts.hasNext()) {
return doc = randomInts.nextInt();
} else {
return doc = DocIdSetIterator.NO_MORE_DOCS;
}
}
@Override
public int advance(int target) {
while (doc < target) {
nextDoc();
}
return doc;
}
@Override
public long cost() {
return maxSize;
}
};
}
private static class DummyQuery extends Query {
private static int COUNTER = 0;
private final int id;
private final DocIdSetIterator disi;
DummyQuery(DocIdSetIterator disi) {
id = COUNTER++;
this.disi = disi;
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException {
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) {
return new ConstantScoreScorer(this, score(), scoreMode, disi);
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return true;
}
};
}
@Override
public String toString(String field) {
return "DummyQuery (" + id + ")";
}
@Override
public void visit(QueryVisitor visitor) {}
@Override
public boolean equals(Object other) {
return sameClassAs(other) && id == ((DummyQuery) other).id;
}
@Override
public int hashCode() {
return id;
}
}
}