LUCENE-8600: Use a faster sort in DocValuesFieldUpdates.

This commit is contained in:
Adrien Grand 2018-12-17 15:21:17 +01:00
parent d185ba99de
commit dcd4a288b4
2 changed files with 54 additions and 13 deletions

View File

@ -286,6 +286,10 @@ Optimizations
* LUCENE-8599: Use sparse bitset to store docs in SingleValueDocValuesFieldUpdates.
(Simon Willnauer, Adrien Grand)
* LUCENE-8600: Doc-value updates get applied faster by sorting with quicksort,
rather than an in-place mergesort, which needs to perform fewer swaps.
(Adrien Grand)
Other
* LUCENE-8573: BKDWriter now uses FutureArrays#mismatch to compute shared prefixes.

View File

@ -21,7 +21,7 @@ import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.IntroSorter;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.SparseFixedBitSet;
@ -289,9 +289,23 @@ abstract class DocValuesFieldUpdates implements Accountable {
if (size < docs.size()) {
resize(size);
}
new InPlaceMergeSorter() {
if (size > 0) {
// We need a stable sort but InPlaceMergeSorter performs lots of swaps
// which hurts performance due to all the packed ints we are using.
// Another option would be TimSorter, but it needs additional API (copy to
// temp storage, compare with item in temp storage, etc.) so we instead
// use quicksort and record ords of each update to guarantee stability.
final PackedInts.Mutable ords = PackedInts.getMutable(size, PackedInts.bitsRequired(size - 1), PackedInts.DEFAULT);
for (int i = 0; i < size; ++i) {
ords.set(i, i);
}
new IntroSorter() {
@Override
protected void swap(int i, int j) {
final long tmpOrd = ords.get(i);
ords.set(i, ords.get(j));
ords.set(j, tmpOrd);
DocValuesFieldUpdates.this.swap(i, j);
}
@ -300,10 +314,33 @@ abstract class DocValuesFieldUpdates implements Accountable {
// increasing docID order:
// NOTE: we can have ties here, when the same docID was updated in the same segment, in which case we rely on sort being
// stable and preserving original order so the last update to that docID wins
return Long.compare(docs.get(i)>>>1, docs.get(j)>>>1);
int cmp = Long.compare(docs.get(i)>>>1, docs.get(j)>>>1);
if (cmp == 0) {
cmp = (int) (ords.get(i) - ords.get(j));
}
return cmp;
}
long pivotDoc;
int pivotOrd;
@Override
protected void setPivot(int i) {
pivotDoc = docs.get(i) >>> 1;
pivotOrd = (int) ords.get(i);
}
@Override
protected int comparePivot(int j) {
int cmp = Long.compare(pivotDoc, docs.get(j) >>> 1);
if (cmp == 0) {
cmp = pivotOrd - (int) ords.get(j);
}
return cmp;
}
}.sort(0, size);
}
}
/** Returns true if this instance contains any updates. */
synchronized boolean any() {