diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 5a347d88e44..86d06bc8329 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -225,6 +225,9 @@ Improvements * LUCENE-8463: TopFieldCollector can now early-terminates queries when sorting by SortField.DOC. (Christophe Bismuth via Jim Ferenczi) +* LUCENE-8562: Speed up merging segments of points with data dimensions by only sorting on the indexed + dimensions. (Ignacio Vera) + Optimizations * LUCENE-8552: FieldInfos.getMergedFieldInfos no longer does any merging if there is <= 1 segment. diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java index c4ac04eac63..1ffa27542e6 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java @@ -767,6 +767,10 @@ public class BKDWriter implements Closeable { /** Sort the heap writer by the specified dim */ private void sortHeapPointWriter(final HeapPointWriter writer, int dim) { final int pointCount = Math.toIntExact(this.pointCount); + sortHeapPointWriter(writer, pointCount, dim); + } + /** Sort the heap writer by the specified dim */ + private void sortHeapPointWriter(final HeapPointWriter writer, int pointCount, int dim) { // Tie-break by docID: // No need to tie break on ord, for the case where the same doc has the same value in a given dimension indexed more than once: it @@ -959,7 +963,7 @@ public class BKDWriter implements Closeable { } LongBitSet ordBitSet; - if (numDataDims > 1) { + if (numIndexDims > 1) { if (singleValuePerDoc) { ordBitSet = new LongBitSet(maxDoc); } else { @@ -994,7 +998,7 @@ public class BKDWriter implements Closeable { assert pointCount / numLeaves <= maxPointsInLeafNode: "pointCount=" + pointCount + " numLeaves=" + numLeaves + " maxPointsInLeafNode=" + maxPointsInLeafNode; // Sort all docs once by each dimension: - PathSlice[] sortedPointWriters = new PathSlice[numDataDims]; + PathSlice[] sortedPointWriters = new PathSlice[numIndexDims]; // This is only used on exception; on normal code paths we close all files we opened: List toCloseHeroically = new ArrayList<>(); @@ -1002,9 +1006,7 @@ public class BKDWriter implements Closeable { boolean success = false; try { //long t0 = System.nanoTime(); - // even with selective indexing we create the sortedPointWriters so we can compress - // the leaf node data by common prefix - for(int dim=0;dim