Speed up DocIdMerger on sorted indexes. (#12081)

In the case when an index is sorted on a low-cardinality field, or the index
sort order correlates with the order in which documents get ingested, we can
optimize `SortedDocIDMerger` by doing a single comparison with the doc ID on
the next sub. This checks covers at the same time whether the priority queue
needs reordering and whether the current sub reached `NO_MORE_DOCS`.
This commit is contained in:
Adrien Grand 2023-01-12 18:27:45 +01:00 committed by GitHub
parent 729fedcbac
commit aaab028266
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 22 additions and 1 deletions

View File

@ -280,6 +280,8 @@ Optimizations
* GITHUB#12079: Faster merging of 1D points. (Adrien Grand)
* GITHUB#12081: Small merging speedup on sorted indexes. (Adrien Grand)
Other
---------------------

View File

@ -137,6 +137,7 @@ public abstract class DocIDMerger<T extends DocIDMerger.Sub> {
private final List<T> subs;
private T current;
private final PriorityQueue<T> queue;
private int queueMinDocID;
private SortedDocIDMerger(List<T> subs, int maxCount) throws IOException {
if (maxCount <= 1) {
@ -154,6 +155,14 @@ public abstract class DocIDMerger<T extends DocIDMerger.Sub> {
reset();
}
private void setQueueMinDocID() {
if (queue.size() > 0) {
queueMinDocID = queue.top().mappedDocID;
} else {
queueMinDocID = DocIdSetIterator.NO_MORE_DOCS;
}
}
@Override
public void reset() throws IOException {
// caller may not have fully consumed the queue:
@ -171,23 +180,33 @@ public abstract class DocIDMerger<T extends DocIDMerger.Sub> {
queue.add(sub);
} // else all docs in this sub were deleted; do not add it to the queue!
}
setQueueMinDocID();
}
@Override
public T next() throws IOException {
int nextDoc = current.nextMappedDoc();
if (nextDoc < queueMinDocID) {
// This should be the common case when index sorting is either disabled, or enabled on a
// low-cardinality field, or enabled on a field that correlates with index order.
return current;
}
if (nextDoc == NO_MORE_DOCS) {
if (queue.size() == 0) {
current = null;
} else {
current = queue.pop();
}
} else if (queue.size() > 0 && nextDoc > queue.top().mappedDocID) {
} else if (queue.size() > 0) {
assert queueMinDocID == queue.top().mappedDocID;
assert nextDoc > queueMinDocID;
T newCurrent = queue.top();
queue.updateTop(current);
current = newCurrent;
}
setQueueMinDocID();
return current;
}
}