mirror of https://github.com/apache/lucene.git
Speed up DocIdMerger on sorted indexes. (#12081)
In the case when an index is sorted on a low-cardinality field, or the index sort order correlates with the order in which documents get ingested, we can optimize `SortedDocIDMerger` by doing a single comparison with the doc ID on the next sub. This checks covers at the same time whether the priority queue needs reordering and whether the current sub reached `NO_MORE_DOCS`.
This commit is contained in:
parent
729fedcbac
commit
aaab028266
|
@ -280,6 +280,8 @@ Optimizations
|
|||
|
||||
* GITHUB#12079: Faster merging of 1D points. (Adrien Grand)
|
||||
|
||||
* GITHUB#12081: Small merging speedup on sorted indexes. (Adrien Grand)
|
||||
|
||||
|
||||
Other
|
||||
---------------------
|
||||
|
|
|
@ -137,6 +137,7 @@ public abstract class DocIDMerger<T extends DocIDMerger.Sub> {
|
|||
private final List<T> subs;
|
||||
private T current;
|
||||
private final PriorityQueue<T> queue;
|
||||
private int queueMinDocID;
|
||||
|
||||
private SortedDocIDMerger(List<T> subs, int maxCount) throws IOException {
|
||||
if (maxCount <= 1) {
|
||||
|
@ -154,6 +155,14 @@ public abstract class DocIDMerger<T extends DocIDMerger.Sub> {
|
|||
reset();
|
||||
}
|
||||
|
||||
private void setQueueMinDocID() {
|
||||
if (queue.size() > 0) {
|
||||
queueMinDocID = queue.top().mappedDocID;
|
||||
} else {
|
||||
queueMinDocID = DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
// caller may not have fully consumed the queue:
|
||||
|
@ -171,23 +180,33 @@ public abstract class DocIDMerger<T extends DocIDMerger.Sub> {
|
|||
queue.add(sub);
|
||||
} // else all docs in this sub were deleted; do not add it to the queue!
|
||||
}
|
||||
setQueueMinDocID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public T next() throws IOException {
|
||||
int nextDoc = current.nextMappedDoc();
|
||||
if (nextDoc < queueMinDocID) {
|
||||
// This should be the common case when index sorting is either disabled, or enabled on a
|
||||
// low-cardinality field, or enabled on a field that correlates with index order.
|
||||
return current;
|
||||
}
|
||||
|
||||
if (nextDoc == NO_MORE_DOCS) {
|
||||
if (queue.size() == 0) {
|
||||
current = null;
|
||||
} else {
|
||||
current = queue.pop();
|
||||
}
|
||||
} else if (queue.size() > 0 && nextDoc > queue.top().mappedDocID) {
|
||||
} else if (queue.size() > 0) {
|
||||
assert queueMinDocID == queue.top().mappedDocID;
|
||||
assert nextDoc > queueMinDocID;
|
||||
T newCurrent = queue.top();
|
||||
queue.updateTop(current);
|
||||
current = newCurrent;
|
||||
}
|
||||
|
||||
setQueueMinDocID();
|
||||
return current;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue