diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index fbd9c36e1a4..dcce957ff54 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -254,6 +254,8 @@ Optimizations * GITHUB#13203: Speed up writeGroupVInts (Zhang Chao) +* GITHUB#13224: Use singleton for all-zeros DirectMonotonicReader.Meta (Armin Braun) + Bug Fixes --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/DirectMonotonicReader.java b/lucene/core/src/java/org/apache/lucene/util/packed/DirectMonotonicReader.java index 1f5954ec05f..66a7db9ab03 100644 --- a/lucene/core/src/java/org/apache/lucene/util/packed/DirectMonotonicReader.java +++ b/lucene/core/src/java/org/apache/lucene/util/packed/DirectMonotonicReader.java @@ -39,15 +39,19 @@ public final class DirectMonotonicReader extends LongValues implements Accountab * from disk. */ public static class Meta implements Accountable { + + // Use a shift of 63 so that there would be a single block regardless of the number of values. + private static final Meta SINGLE_ZERO_BLOCK = new Meta(1L, 63); + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Meta.class); - final int blockShift; - final int numBlocks; - final long[] mins; - final float[] avgs; - final byte[] bpvs; - final long[] offsets; + private final int blockShift; + private final int numBlocks; + private final long[] mins; + private final float[] avgs; + private final byte[] bpvs; + private final long[] offsets; Meta(long numValues, int blockShift) { this.blockShift = blockShift; @@ -79,14 +83,20 @@ public final class DirectMonotonicReader extends LongValues implements Accountab */ public static Meta loadMeta(IndexInput metaIn, long numValues, int blockShift) throws IOException { + boolean allValuesZero = true; Meta meta = new Meta(numValues, blockShift); for (int i = 0; i < meta.numBlocks; ++i) { - meta.mins[i] = metaIn.readLong(); - meta.avgs[i] = Float.intBitsToFloat(metaIn.readInt()); + long min = metaIn.readLong(); + meta.mins[i] = min; + int avgInt = metaIn.readInt(); + meta.avgs[i] = Float.intBitsToFloat(avgInt); meta.offsets[i] = metaIn.readLong(); - meta.bpvs[i] = metaIn.readByte(); + byte bpvs = metaIn.readByte(); + meta.bpvs[i] = bpvs; + allValuesZero = allValuesZero && min == 0L && avgInt == 0 && bpvs == 0; } - return meta; + // save heap in case all values are zero + return allValuesZero ? Meta.SINGLE_ZERO_BLOCK : meta; } /** Retrieves a non-merging instance from the specified slice. */ diff --git a/lucene/core/src/test/org/apache/lucene/util/packed/TestDirectMonotonic.java b/lucene/core/src/test/org/apache/lucene/util/packed/TestDirectMonotonic.java index ff7c3ccce22..62a8f41b011 100644 --- a/lucene/core/src/test/org/apache/lucene/util/packed/TestDirectMonotonic.java +++ b/lucene/core/src/test/org/apache/lucene/util/packed/TestDirectMonotonic.java @@ -154,6 +154,47 @@ public class TestDirectMonotonic extends LuceneTestCase { dir.close(); } + public void testZeroValuesSmallBlobShift() throws IOException { + Directory dir = newDirectory(); + final int numValues = TestUtil.nextInt(random(), 8, 1 << 20); + // use blockShift < log2(numValues) + final int blockShift = + TestUtil.nextInt( + random(), + DirectMonotonicWriter.MIN_BLOCK_SHIFT, + Math.toIntExact(Math.round(Math.log(numValues) / Math.log(2))) - 1); + + final long dataLength; + try (IndexOutput metaOut = dir.createOutput("meta", IOContext.DEFAULT); + IndexOutput dataOut = dir.createOutput("data", IOContext.DEFAULT)) { + DirectMonotonicWriter w = + DirectMonotonicWriter.getInstance(metaOut, dataOut, numValues, blockShift); + for (int i = 0; i < numValues; i++) { + w.add(0); + } + w.finish(); + dataLength = dataOut.getFilePointer(); + } + + try (IndexInput metaIn = dir.openInput("meta", IOContext.READONCE); + IndexInput dataIn = dir.openInput("data", IOContext.DEFAULT)) { + DirectMonotonicReader.Meta meta = + DirectMonotonicReader.loadMeta(metaIn, numValues, blockShift); + assertEquals(metaIn.length(), metaIn.getFilePointer()); + // read meta again and assert singleton Meta#SINGLE_ZERO_BLOCK instance is read every time + metaIn.seek(0L); + assertSame(meta, DirectMonotonicReader.loadMeta(metaIn, numValues, blockShift)); + LongValues values = + DirectMonotonicReader.getInstance(meta, dataIn.randomAccessSlice(0, dataLength)); + for (int i = 0; i < numValues; ++i) { + assertEquals(0, values.get(i)); + } + assertEquals(0, dataIn.getFilePointer()); + } + + dir.close(); + } + public void testRandom() throws IOException { doTestRandom(false); }