mirror of https://github.com/apache/lucene.git
Use singleton for all-zeros DirectMonotonicReader.Meta (#13224)
Having a single block of all zeros is a fairly common case that is using a lot of heap for duplicate instances in some use-cases in ES. => read a singleton for it to save the duplication
This commit is contained in:
parent
8f4e449669
commit
6cba773318
|
@ -254,6 +254,8 @@ Optimizations
|
|||
|
||||
* GITHUB#13203: Speed up writeGroupVInts (Zhang Chao)
|
||||
|
||||
* GITHUB#13224: Use singleton for all-zeros DirectMonotonicReader.Meta (Armin Braun)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -39,15 +39,19 @@ public final class DirectMonotonicReader extends LongValues implements Accountab
|
|||
* from disk.
|
||||
*/
|
||||
public static class Meta implements Accountable {
|
||||
|
||||
// Use a shift of 63 so that there would be a single block regardless of the number of values.
|
||||
private static final Meta SINGLE_ZERO_BLOCK = new Meta(1L, 63);
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED =
|
||||
RamUsageEstimator.shallowSizeOfInstance(Meta.class);
|
||||
|
||||
final int blockShift;
|
||||
final int numBlocks;
|
||||
final long[] mins;
|
||||
final float[] avgs;
|
||||
final byte[] bpvs;
|
||||
final long[] offsets;
|
||||
private final int blockShift;
|
||||
private final int numBlocks;
|
||||
private final long[] mins;
|
||||
private final float[] avgs;
|
||||
private final byte[] bpvs;
|
||||
private final long[] offsets;
|
||||
|
||||
Meta(long numValues, int blockShift) {
|
||||
this.blockShift = blockShift;
|
||||
|
@ -79,14 +83,20 @@ public final class DirectMonotonicReader extends LongValues implements Accountab
|
|||
*/
|
||||
public static Meta loadMeta(IndexInput metaIn, long numValues, int blockShift)
|
||||
throws IOException {
|
||||
boolean allValuesZero = true;
|
||||
Meta meta = new Meta(numValues, blockShift);
|
||||
for (int i = 0; i < meta.numBlocks; ++i) {
|
||||
meta.mins[i] = metaIn.readLong();
|
||||
meta.avgs[i] = Float.intBitsToFloat(metaIn.readInt());
|
||||
long min = metaIn.readLong();
|
||||
meta.mins[i] = min;
|
||||
int avgInt = metaIn.readInt();
|
||||
meta.avgs[i] = Float.intBitsToFloat(avgInt);
|
||||
meta.offsets[i] = metaIn.readLong();
|
||||
meta.bpvs[i] = metaIn.readByte();
|
||||
byte bpvs = metaIn.readByte();
|
||||
meta.bpvs[i] = bpvs;
|
||||
allValuesZero = allValuesZero && min == 0L && avgInt == 0 && bpvs == 0;
|
||||
}
|
||||
return meta;
|
||||
// save heap in case all values are zero
|
||||
return allValuesZero ? Meta.SINGLE_ZERO_BLOCK : meta;
|
||||
}
|
||||
|
||||
/** Retrieves a non-merging instance from the specified slice. */
|
||||
|
|
|
@ -154,6 +154,47 @@ public class TestDirectMonotonic extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
public void testZeroValuesSmallBlobShift() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
final int numValues = TestUtil.nextInt(random(), 8, 1 << 20);
|
||||
// use blockShift < log2(numValues)
|
||||
final int blockShift =
|
||||
TestUtil.nextInt(
|
||||
random(),
|
||||
DirectMonotonicWriter.MIN_BLOCK_SHIFT,
|
||||
Math.toIntExact(Math.round(Math.log(numValues) / Math.log(2))) - 1);
|
||||
|
||||
final long dataLength;
|
||||
try (IndexOutput metaOut = dir.createOutput("meta", IOContext.DEFAULT);
|
||||
IndexOutput dataOut = dir.createOutput("data", IOContext.DEFAULT)) {
|
||||
DirectMonotonicWriter w =
|
||||
DirectMonotonicWriter.getInstance(metaOut, dataOut, numValues, blockShift);
|
||||
for (int i = 0; i < numValues; i++) {
|
||||
w.add(0);
|
||||
}
|
||||
w.finish();
|
||||
dataLength = dataOut.getFilePointer();
|
||||
}
|
||||
|
||||
try (IndexInput metaIn = dir.openInput("meta", IOContext.READONCE);
|
||||
IndexInput dataIn = dir.openInput("data", IOContext.DEFAULT)) {
|
||||
DirectMonotonicReader.Meta meta =
|
||||
DirectMonotonicReader.loadMeta(metaIn, numValues, blockShift);
|
||||
assertEquals(metaIn.length(), metaIn.getFilePointer());
|
||||
// read meta again and assert singleton Meta#SINGLE_ZERO_BLOCK instance is read every time
|
||||
metaIn.seek(0L);
|
||||
assertSame(meta, DirectMonotonicReader.loadMeta(metaIn, numValues, blockShift));
|
||||
LongValues values =
|
||||
DirectMonotonicReader.getInstance(meta, dataIn.randomAccessSlice(0, dataLength));
|
||||
for (int i = 0; i < numValues; ++i) {
|
||||
assertEquals(0, values.get(i));
|
||||
}
|
||||
assertEquals(0, dataIn.getFilePointer());
|
||||
}
|
||||
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testRandom() throws IOException {
|
||||
doTestRandom(false);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue