diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 6853ea9b073..736b7f2f4ef 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -176,6 +176,8 @@ Optimizations * GITHUB#12604: Estimate the block size of FST BytesStore in BlockTreeTermsWriter to reduce GC load during indexing. (Guo Feng) +* GITHUB#12623: Use a MergeSorter taking advantage of extra storage for StableMSBRadixSorter. (Guo Feng) + Changes in runtime behavior --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/util/StableMSBRadixSorter.java b/lucene/core/src/java/org/apache/lucene/util/StableMSBRadixSorter.java index c6ec7744775..ecccbc58f53 100644 --- a/lucene/core/src/java/org/apache/lucene/util/StableMSBRadixSorter.java +++ b/lucene/core/src/java/org/apache/lucene/util/StableMSBRadixSorter.java @@ -38,7 +38,7 @@ public abstract class StableMSBRadixSorter extends MSBRadixSorter { @Override protected Sorter getFallbackSorter(int k) { - return new InPlaceMergeSorter() { + return new MergeSorter() { @Override protected void swap(int i, int j) { StableMSBRadixSorter.this.swap(i, j); @@ -78,4 +78,64 @@ public abstract class StableMSBRadixSorter extends MSBRadixSorter { } restore(from, to); } + + /** A MergeSorter taking advantage of temporary storage. */ + protected abstract class MergeSorter extends Sorter { + @Override + public void sort(int from, int to) { + checkRange(from, to); + mergeSort(from, to); + } + + private void mergeSort(int from, int to) { + if (to - from < BINARY_SORT_THRESHOLD) { + binarySort(from, to); + } else { + final int mid = (from + to) >>> 1; + mergeSort(from, mid); + mergeSort(mid, to); + merge(from, to, mid); + } + } + + /** + * We tried to expose this to implementations to get a bulk copy optimization. But it did not + * bring a noticeable improvement in benchmark as {@code len} is usually small. + */ + private void bulkSave(int from, int tmpFrom, int len) { + for (int i = 0; i < len; i++) { + save(from + i, tmpFrom + i); + } + } + + private void merge(int from, int to, int mid) { + assert to > mid && mid > from; + if (compare(mid - 1, mid) <= 0) { + // already sorted. + return; + } + int left = from; + int right = mid; + int index = from; + while (true) { + int cmp = compare(left, right); + if (cmp <= 0) { + save(left++, index++); + if (left == mid) { + assert index == right; + bulkSave(right, index, to - right); + break; + } + } else { + save(right++, index++); + if (right == to) { + assert to - index == mid - left; + bulkSave(left, index, mid - left); + break; + } + } + } + restore(from, to); + } + } } diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java index 4552cd97cce..b464971b393 100644 --- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java +++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java @@ -1597,12 +1597,12 @@ public class TestBKD extends LuceneTestCase { @Override public void save(int i, int j) { - throw new UnsupportedOperationException(); + // do nothing } @Override public void restore(int i, int j) { - throw new UnsupportedOperationException(); + // do nothing } @Override @@ -1689,6 +1689,10 @@ public class TestBKD extends LuceneTestCase { } MutablePointTree val = new MutablePointTree() { + + final byte[][] tmpValues = new byte[numValues][]; + final int[] tmpDocs = new int[numValues]; + @Override public void getValue(int i, BytesRef packedValue) { packedValue.bytes = pointValue[i]; @@ -1718,12 +1722,14 @@ public class TestBKD extends LuceneTestCase { @Override public void save(int i, int j) { - throw new UnsupportedOperationException(); + tmpValues[j] = pointValue[i]; + tmpDocs[j] = docId[i]; } @Override public void restore(int i, int j) { - throw new UnsupportedOperationException(); + System.arraycopy(tmpValues, i, pointValue, i, j - i); + System.arraycopy(tmpDocs, i, docId, i, j - i); } @Override