Use a MergeSorter taking advantage of extra storage for StableMSBRadixSorter (#12623)

This commit is contained in:
gf2121 2023-10-05 04:31:23 -05:00 committed by GitHub
parent 7371493478
commit 28f0885bdf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 73 additions and 5 deletions

View File

@ -176,6 +176,8 @@ Optimizations
* GITHUB#12604: Estimate the block size of FST BytesStore in BlockTreeTermsWriter * GITHUB#12604: Estimate the block size of FST BytesStore in BlockTreeTermsWriter
to reduce GC load during indexing. (Guo Feng) to reduce GC load during indexing. (Guo Feng)
* GITHUB#12623: Use a MergeSorter taking advantage of extra storage for StableMSBRadixSorter. (Guo Feng)
Changes in runtime behavior Changes in runtime behavior
--------------------- ---------------------

View File

@ -38,7 +38,7 @@ public abstract class StableMSBRadixSorter extends MSBRadixSorter {
@Override @Override
protected Sorter getFallbackSorter(int k) { protected Sorter getFallbackSorter(int k) {
return new InPlaceMergeSorter() { return new MergeSorter() {
@Override @Override
protected void swap(int i, int j) { protected void swap(int i, int j) {
StableMSBRadixSorter.this.swap(i, j); StableMSBRadixSorter.this.swap(i, j);
@ -78,4 +78,64 @@ public abstract class StableMSBRadixSorter extends MSBRadixSorter {
} }
restore(from, to); restore(from, to);
} }
/** A MergeSorter taking advantage of temporary storage. */
protected abstract class MergeSorter extends Sorter {
@Override
public void sort(int from, int to) {
checkRange(from, to);
mergeSort(from, to);
}
private void mergeSort(int from, int to) {
if (to - from < BINARY_SORT_THRESHOLD) {
binarySort(from, to);
} else {
final int mid = (from + to) >>> 1;
mergeSort(from, mid);
mergeSort(mid, to);
merge(from, to, mid);
}
}
/**
* We tried to expose this to implementations to get a bulk copy optimization. But it did not
* bring a noticeable improvement in benchmark as {@code len} is usually small.
*/
private void bulkSave(int from, int tmpFrom, int len) {
for (int i = 0; i < len; i++) {
save(from + i, tmpFrom + i);
}
}
private void merge(int from, int to, int mid) {
assert to > mid && mid > from;
if (compare(mid - 1, mid) <= 0) {
// already sorted.
return;
}
int left = from;
int right = mid;
int index = from;
while (true) {
int cmp = compare(left, right);
if (cmp <= 0) {
save(left++, index++);
if (left == mid) {
assert index == right;
bulkSave(right, index, to - right);
break;
}
} else {
save(right++, index++);
if (right == to) {
assert to - index == mid - left;
bulkSave(left, index, mid - left);
break;
}
}
}
restore(from, to);
}
}
} }

View File

@ -1597,12 +1597,12 @@ public class TestBKD extends LuceneTestCase {
@Override @Override
public void save(int i, int j) { public void save(int i, int j) {
throw new UnsupportedOperationException(); // do nothing
} }
@Override @Override
public void restore(int i, int j) { public void restore(int i, int j) {
throw new UnsupportedOperationException(); // do nothing
} }
@Override @Override
@ -1689,6 +1689,10 @@ public class TestBKD extends LuceneTestCase {
} }
MutablePointTree val = MutablePointTree val =
new MutablePointTree() { new MutablePointTree() {
final byte[][] tmpValues = new byte[numValues][];
final int[] tmpDocs = new int[numValues];
@Override @Override
public void getValue(int i, BytesRef packedValue) { public void getValue(int i, BytesRef packedValue) {
packedValue.bytes = pointValue[i]; packedValue.bytes = pointValue[i];
@ -1718,12 +1722,14 @@ public class TestBKD extends LuceneTestCase {
@Override @Override
public void save(int i, int j) { public void save(int i, int j) {
throw new UnsupportedOperationException(); tmpValues[j] = pointValue[i];
tmpDocs[j] = docId[i];
} }
@Override @Override
public void restore(int i, int j) { public void restore(int i, int j) {
throw new UnsupportedOperationException(); System.arraycopy(tmpValues, i, pointValue, i, j - i);
System.arraycopy(tmpDocs, i, docId, i, j - i);
} }
@Override @Override