Use singleton for all-zeros DirectMonotonicReader.Meta (#13224)

Having a single block of all zeros is a fairly common case that is using
a lot of heap for duplicate instances in some use-cases in ES.
=> read a singleton for it to save the duplication
This commit is contained in:
Armin Braun 2024-03-29 07:23:00 +01:00 committed by GitHub
parent 8f4e449669
commit 6cba773318
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 63 additions and 10 deletions

View File

@ -254,6 +254,8 @@ Optimizations
* GITHUB#13203: Speed up writeGroupVInts (Zhang Chao)
* GITHUB#13224: Use singleton for all-zeros DirectMonotonicReader.Meta (Armin Braun)
Bug Fixes
---------------------

View File

@ -39,15 +39,19 @@ public final class DirectMonotonicReader extends LongValues implements Accountab
* from disk.
*/
public static class Meta implements Accountable {
// Use a shift of 63 so that there would be a single block regardless of the number of values.
private static final Meta SINGLE_ZERO_BLOCK = new Meta(1L, 63);
private static final long BASE_RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(Meta.class);
final int blockShift;
final int numBlocks;
final long[] mins;
final float[] avgs;
final byte[] bpvs;
final long[] offsets;
private final int blockShift;
private final int numBlocks;
private final long[] mins;
private final float[] avgs;
private final byte[] bpvs;
private final long[] offsets;
Meta(long numValues, int blockShift) {
this.blockShift = blockShift;
@ -79,14 +83,20 @@ public final class DirectMonotonicReader extends LongValues implements Accountab
*/
public static Meta loadMeta(IndexInput metaIn, long numValues, int blockShift)
throws IOException {
boolean allValuesZero = true;
Meta meta = new Meta(numValues, blockShift);
for (int i = 0; i < meta.numBlocks; ++i) {
meta.mins[i] = metaIn.readLong();
meta.avgs[i] = Float.intBitsToFloat(metaIn.readInt());
long min = metaIn.readLong();
meta.mins[i] = min;
int avgInt = metaIn.readInt();
meta.avgs[i] = Float.intBitsToFloat(avgInt);
meta.offsets[i] = metaIn.readLong();
meta.bpvs[i] = metaIn.readByte();
byte bpvs = metaIn.readByte();
meta.bpvs[i] = bpvs;
allValuesZero = allValuesZero && min == 0L && avgInt == 0 && bpvs == 0;
}
return meta;
// save heap in case all values are zero
return allValuesZero ? Meta.SINGLE_ZERO_BLOCK : meta;
}
/** Retrieves a non-merging instance from the specified slice. */

View File

@ -154,6 +154,47 @@ public class TestDirectMonotonic extends LuceneTestCase {
dir.close();
}
public void testZeroValuesSmallBlobShift() throws IOException {
Directory dir = newDirectory();
final int numValues = TestUtil.nextInt(random(), 8, 1 << 20);
// use blockShift < log2(numValues)
final int blockShift =
TestUtil.nextInt(
random(),
DirectMonotonicWriter.MIN_BLOCK_SHIFT,
Math.toIntExact(Math.round(Math.log(numValues) / Math.log(2))) - 1);
final long dataLength;
try (IndexOutput metaOut = dir.createOutput("meta", IOContext.DEFAULT);
IndexOutput dataOut = dir.createOutput("data", IOContext.DEFAULT)) {
DirectMonotonicWriter w =
DirectMonotonicWriter.getInstance(metaOut, dataOut, numValues, blockShift);
for (int i = 0; i < numValues; i++) {
w.add(0);
}
w.finish();
dataLength = dataOut.getFilePointer();
}
try (IndexInput metaIn = dir.openInput("meta", IOContext.READONCE);
IndexInput dataIn = dir.openInput("data", IOContext.DEFAULT)) {
DirectMonotonicReader.Meta meta =
DirectMonotonicReader.loadMeta(metaIn, numValues, blockShift);
assertEquals(metaIn.length(), metaIn.getFilePointer());
// read meta again and assert singleton Meta#SINGLE_ZERO_BLOCK instance is read every time
metaIn.seek(0L);
assertSame(meta, DirectMonotonicReader.loadMeta(metaIn, numValues, blockShift));
LongValues values =
DirectMonotonicReader.getInstance(meta, dataIn.randomAccessSlice(0, dataLength));
for (int i = 0; i < numValues; ++i) {
assertEquals(0, values.get(i));
}
assertEquals(0, dataIn.getFilePointer());
}
dir.close();
}
public void testRandom() throws IOException {
doTestRandom(false);
}