mirror of https://github.com/apache/lucene.git
Use singleton for all-zeros DirectMonotonicReader.Meta (#13224)
Having a single block of all zeros is a fairly common case that is using a lot of heap for duplicate instances in some use-cases in ES. => read a singleton for it to save the duplication
This commit is contained in:
parent
8f4e449669
commit
6cba773318
|
@ -254,6 +254,8 @@ Optimizations
|
||||||
|
|
||||||
* GITHUB#13203: Speed up writeGroupVInts (Zhang Chao)
|
* GITHUB#13203: Speed up writeGroupVInts (Zhang Chao)
|
||||||
|
|
||||||
|
* GITHUB#13224: Use singleton for all-zeros DirectMonotonicReader.Meta (Armin Braun)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -39,15 +39,19 @@ public final class DirectMonotonicReader extends LongValues implements Accountab
|
||||||
* from disk.
|
* from disk.
|
||||||
*/
|
*/
|
||||||
public static class Meta implements Accountable {
|
public static class Meta implements Accountable {
|
||||||
|
|
||||||
|
// Use a shift of 63 so that there would be a single block regardless of the number of values.
|
||||||
|
private static final Meta SINGLE_ZERO_BLOCK = new Meta(1L, 63);
|
||||||
|
|
||||||
private static final long BASE_RAM_BYTES_USED =
|
private static final long BASE_RAM_BYTES_USED =
|
||||||
RamUsageEstimator.shallowSizeOfInstance(Meta.class);
|
RamUsageEstimator.shallowSizeOfInstance(Meta.class);
|
||||||
|
|
||||||
final int blockShift;
|
private final int blockShift;
|
||||||
final int numBlocks;
|
private final int numBlocks;
|
||||||
final long[] mins;
|
private final long[] mins;
|
||||||
final float[] avgs;
|
private final float[] avgs;
|
||||||
final byte[] bpvs;
|
private final byte[] bpvs;
|
||||||
final long[] offsets;
|
private final long[] offsets;
|
||||||
|
|
||||||
Meta(long numValues, int blockShift) {
|
Meta(long numValues, int blockShift) {
|
||||||
this.blockShift = blockShift;
|
this.blockShift = blockShift;
|
||||||
|
@ -79,14 +83,20 @@ public final class DirectMonotonicReader extends LongValues implements Accountab
|
||||||
*/
|
*/
|
||||||
public static Meta loadMeta(IndexInput metaIn, long numValues, int blockShift)
|
public static Meta loadMeta(IndexInput metaIn, long numValues, int blockShift)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
boolean allValuesZero = true;
|
||||||
Meta meta = new Meta(numValues, blockShift);
|
Meta meta = new Meta(numValues, blockShift);
|
||||||
for (int i = 0; i < meta.numBlocks; ++i) {
|
for (int i = 0; i < meta.numBlocks; ++i) {
|
||||||
meta.mins[i] = metaIn.readLong();
|
long min = metaIn.readLong();
|
||||||
meta.avgs[i] = Float.intBitsToFloat(metaIn.readInt());
|
meta.mins[i] = min;
|
||||||
|
int avgInt = metaIn.readInt();
|
||||||
|
meta.avgs[i] = Float.intBitsToFloat(avgInt);
|
||||||
meta.offsets[i] = metaIn.readLong();
|
meta.offsets[i] = metaIn.readLong();
|
||||||
meta.bpvs[i] = metaIn.readByte();
|
byte bpvs = metaIn.readByte();
|
||||||
|
meta.bpvs[i] = bpvs;
|
||||||
|
allValuesZero = allValuesZero && min == 0L && avgInt == 0 && bpvs == 0;
|
||||||
}
|
}
|
||||||
return meta;
|
// save heap in case all values are zero
|
||||||
|
return allValuesZero ? Meta.SINGLE_ZERO_BLOCK : meta;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Retrieves a non-merging instance from the specified slice. */
|
/** Retrieves a non-merging instance from the specified slice. */
|
||||||
|
|
|
@ -154,6 +154,47 @@ public class TestDirectMonotonic extends LuceneTestCase {
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testZeroValuesSmallBlobShift() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
final int numValues = TestUtil.nextInt(random(), 8, 1 << 20);
|
||||||
|
// use blockShift < log2(numValues)
|
||||||
|
final int blockShift =
|
||||||
|
TestUtil.nextInt(
|
||||||
|
random(),
|
||||||
|
DirectMonotonicWriter.MIN_BLOCK_SHIFT,
|
||||||
|
Math.toIntExact(Math.round(Math.log(numValues) / Math.log(2))) - 1);
|
||||||
|
|
||||||
|
final long dataLength;
|
||||||
|
try (IndexOutput metaOut = dir.createOutput("meta", IOContext.DEFAULT);
|
||||||
|
IndexOutput dataOut = dir.createOutput("data", IOContext.DEFAULT)) {
|
||||||
|
DirectMonotonicWriter w =
|
||||||
|
DirectMonotonicWriter.getInstance(metaOut, dataOut, numValues, blockShift);
|
||||||
|
for (int i = 0; i < numValues; i++) {
|
||||||
|
w.add(0);
|
||||||
|
}
|
||||||
|
w.finish();
|
||||||
|
dataLength = dataOut.getFilePointer();
|
||||||
|
}
|
||||||
|
|
||||||
|
try (IndexInput metaIn = dir.openInput("meta", IOContext.READONCE);
|
||||||
|
IndexInput dataIn = dir.openInput("data", IOContext.DEFAULT)) {
|
||||||
|
DirectMonotonicReader.Meta meta =
|
||||||
|
DirectMonotonicReader.loadMeta(metaIn, numValues, blockShift);
|
||||||
|
assertEquals(metaIn.length(), metaIn.getFilePointer());
|
||||||
|
// read meta again and assert singleton Meta#SINGLE_ZERO_BLOCK instance is read every time
|
||||||
|
metaIn.seek(0L);
|
||||||
|
assertSame(meta, DirectMonotonicReader.loadMeta(metaIn, numValues, blockShift));
|
||||||
|
LongValues values =
|
||||||
|
DirectMonotonicReader.getInstance(meta, dataIn.randomAccessSlice(0, dataLength));
|
||||||
|
for (int i = 0; i < numValues; ++i) {
|
||||||
|
assertEquals(0, values.get(i));
|
||||||
|
}
|
||||||
|
assertEquals(0, dataIn.getFilePointer());
|
||||||
|
}
|
||||||
|
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
public void testRandom() throws IOException {
|
public void testRandom() throws IOException {
|
||||||
doTestRandom(false);
|
doTestRandom(false);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue