diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 09f90114442..bbb550b9c23 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -286,6 +286,10 @@ Optimizations * LUCENE-8599: Use sparse bitset to store docs in SingleValueDocValuesFieldUpdates. (Simon Willnauer, Adrien Grand) +* LUCENE-8600: Doc-value updates get applied faster by sorting with quicksort, + rather than an in-place mergesort, which needs to perform fewer swaps. + (Adrien Grand) + Other * LUCENE-8573: BKDWriter now uses FutureArrays#mismatch to compute shared prefixes. diff --git a/lucene/core/src/java/org/apache/lucene/index/DocValuesFieldUpdates.java b/lucene/core/src/java/org/apache/lucene/index/DocValuesFieldUpdates.java index 464a7f2f219..883cf525589 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocValuesFieldUpdates.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocValuesFieldUpdates.java @@ -21,7 +21,7 @@ import org.apache.lucene.util.Accountable; import org.apache.lucene.util.BitSet; import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.InPlaceMergeSorter; +import org.apache.lucene.util.IntroSorter; import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.SparseFixedBitSet; @@ -289,20 +289,57 @@ abstract class DocValuesFieldUpdates implements Accountable { if (size < docs.size()) { resize(size); } - new InPlaceMergeSorter() { - @Override - protected void swap(int i, int j) { - DocValuesFieldUpdates.this.swap(i, j); + if (size > 0) { + // We need a stable sort but InPlaceMergeSorter performs lots of swaps + // which hurts performance due to all the packed ints we are using. + // Another option would be TimSorter, but it needs additional API (copy to + // temp storage, compare with item in temp storage, etc.) so we instead + // use quicksort and record ords of each update to guarantee stability. + final PackedInts.Mutable ords = PackedInts.getMutable(size, PackedInts.bitsRequired(size - 1), PackedInts.DEFAULT); + for (int i = 0; i < size; ++i) { + ords.set(i, i); } + new IntroSorter() { + @Override + protected void swap(int i, int j) { + final long tmpOrd = ords.get(i); + ords.set(i, ords.get(j)); + ords.set(j, tmpOrd); - @Override - protected int compare(int i, int j) { - // increasing docID order: - // NOTE: we can have ties here, when the same docID was updated in the same segment, in which case we rely on sort being - // stable and preserving original order so the last update to that docID wins - return Long.compare(docs.get(i)>>>1, docs.get(j)>>>1); - } - }.sort(0, size); + DocValuesFieldUpdates.this.swap(i, j); + } + + @Override + protected int compare(int i, int j) { + // increasing docID order: + // NOTE: we can have ties here, when the same docID was updated in the same segment, in which case we rely on sort being + // stable and preserving original order so the last update to that docID wins + int cmp = Long.compare(docs.get(i)>>>1, docs.get(j)>>>1); + if (cmp == 0) { + cmp = (int) (ords.get(i) - ords.get(j)); + } + return cmp; + } + + long pivotDoc; + int pivotOrd; + + @Override + protected void setPivot(int i) { + pivotDoc = docs.get(i) >>> 1; + pivotOrd = (int) ords.get(i); + } + + @Override + protected int comparePivot(int j) { + int cmp = Long.compare(pivotDoc, docs.get(j) >>> 1); + if (cmp == 0) { + cmp = pivotOrd - (int) ords.get(j); + } + return cmp; + } + }.sort(0, size); + } } /** Returns true if this instance contains any updates. */