From b4619d87ed1fb960a280f78b171a3a08b8abd673 Mon Sep 17 00:00:00 2001 From: Stefan Vodita <41467371+stefanvodita@users.noreply.github.com> Date: Mon, 10 Jul 2023 15:18:28 +0100 Subject: [PATCH] Move sliced int buffer functionality to MemoryIndex (#11248) (#12409) * [WIP] Move IntBlockPool slices to MemoryIndex * [WIP] Working TestMemoryIndex * [WIP} Working TestSlicedIntBlockPool * Working many allocations tests * Add basic IntBlockPool test * SlicedIntBlockPool inherits from IntBlockPool * Tidy --- lucene/CHANGES.txt | 6 +- .../org/apache/lucene/util/IntBlockPool.java | 207 +--------------- .../apache/lucene/index/TestIntBlockPool.java | 166 +++++-------- .../apache/lucene/util/TestByteBlockPool.java | 5 +- .../lucene/index/memory/MemoryIndex.java | 234 ++++++++++++++++-- .../lucene/index/memory/TestMemoryIndex.java | 1 + .../index/memory/TestSlicedIntBlockPool.java | 150 +++++++++++ 7 files changed, 437 insertions(+), 332 deletions(-) create mode 100644 lucene/memory/src/test/org/apache/lucene/index/memory/TestSlicedIntBlockPool.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 26b1c692479..919f16ad255 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -120,7 +120,9 @@ Other API Changes --------------------- -(No changes) + +* GITHUB#11248: IntBlockPool's SliceReader, SliceWriter, and all int slice functionality are moved out to MemoryIndex. + (Stefan Vodita) New Features --------------------- @@ -151,7 +153,7 @@ Optimizations Bug Fixes --------------------- -* GITHUB#9660: Throw and ArithmeticException when the offset overflows in a ByteBlockPool. (Stefan Vodita) +* GITHUB#9660: Throw an ArithmeticException when the offset overflows in a ByteBlockPool. (Stefan Vodita) * GITHUB#12388: JoinUtil queries were ignoring boosts. (Alan Woodward) diff --git a/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java b/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java index 598f55ace1c..c0403c8e253 100644 --- a/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java +++ b/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java @@ -23,7 +23,7 @@ import java.util.Arrays; * * @lucene.internal */ -public final class IntBlockPool { +public class IntBlockPool { public static final int INT_BLOCK_SHIFT = 13; public static final int INT_BLOCK_SIZE = 1 << INT_BLOCK_SHIFT; public static final int INT_BLOCK_MASK = INT_BLOCK_SIZE - 1; @@ -101,8 +101,7 @@ public final class IntBlockPool { /** * Expert: Resets the pool to its initial state reusing the first buffer. * - * @param zeroFillBuffers if true the buffers are filled with 0. This - * should be set to true if this pool is used with {@link SliceWriter}. + * @param zeroFillBuffers if true the buffers are filled with 0. * @param reuseFirst if true the first buffer will be reused and calling {@link * IntBlockPool#nextBuffer()} is not needed after reset iff the block pool was used before ie. * {@link IntBlockPool#nextBuffer()} was called before. @@ -156,206 +155,6 @@ public final class IntBlockPool { bufferUpto++; intUpto = 0; - intOffset += INT_BLOCK_SIZE; - } - - /** - * Creates a new int slice with the given starting size and returns the slices offset in the pool. - * - * @see SliceReader - */ - private int newSlice(final int size) { - if (intUpto > INT_BLOCK_SIZE - size) { - nextBuffer(); - assert assertSliceBuffer(buffer); - } - - final int upto = intUpto; - intUpto += size; - buffer[intUpto - 1] = 16; - return upto; - } - - private static boolean assertSliceBuffer(int[] buffer) { - int count = 0; - for (int i = 0; i < buffer.length; i++) { - count += buffer[i]; // for slices the buffer must only have 0 values - } - return count == 0; - } - - // no need to make this public unless we support different sizes - - /** - * An array holding the offset into the {@link IntBlockPool#LEVEL_SIZE_ARRAY} to quickly navigate - * to the next slice level. - */ - private static final int[] NEXT_LEVEL_ARRAY = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9}; - - /** An array holding the level sizes for int slices. */ - private static final int[] LEVEL_SIZE_ARRAY = {2, 4, 8, 16, 16, 32, 32, 64, 64, 128}; - - /** The first level size for new slices */ - private static final int FIRST_LEVEL_SIZE = LEVEL_SIZE_ARRAY[0]; - - /** Allocates a new slice from the given offset */ - private int allocSlice(final int[] slice, final int sliceOffset) { - final int level = slice[sliceOffset] & 15; - final int newLevel = NEXT_LEVEL_ARRAY[level]; - final int newSize = LEVEL_SIZE_ARRAY[newLevel]; - // Maybe allocate another block - if (intUpto > INT_BLOCK_SIZE - newSize) { - nextBuffer(); - assert assertSliceBuffer(buffer); - } - - final int newUpto = intUpto; - final int offset = newUpto + intOffset; - intUpto += newSize; - // Write forwarding address at end of last slice: - slice[sliceOffset] = offset; - - // Write new level: - buffer[intUpto - 1] = 16 | newLevel; - - return newUpto; - } - - /** - * A {@link SliceWriter} that allows to write multiple integer slices into a given {@link - * IntBlockPool}. - * - * @see SliceReader - * @lucene.internal - */ - public static class SliceWriter { - - private int offset; - private final IntBlockPool pool; - - public SliceWriter(IntBlockPool pool) { - this.pool = pool; - } - /** */ - public void reset(int sliceOffset) { - this.offset = sliceOffset; - } - - /** Writes the given value into the slice and resizes the slice if needed */ - public void writeInt(int value) { - int[] ints = pool.buffers[offset >> INT_BLOCK_SHIFT]; - assert ints != null; - int relativeOffset = offset & INT_BLOCK_MASK; - if (ints[relativeOffset] != 0) { - // End of slice; allocate a new one - relativeOffset = pool.allocSlice(ints, relativeOffset); - ints = pool.buffer; - offset = relativeOffset + pool.intOffset; - } - ints[relativeOffset] = value; - offset++; - } - - /** - * starts a new slice and returns the start offset. The returned value should be used as the - * start offset to initialize a {@link SliceReader}. - */ - public int startNewSlice() { - return offset = pool.newSlice(FIRST_LEVEL_SIZE) + pool.intOffset; - } - - /** - * Returns the offset of the currently written slice. The returned value should be used as the - * end offset to initialize a {@link SliceReader} once this slice is fully written or to reset - * the this writer if another slice needs to be written. - */ - public int getCurrentOffset() { - return offset; - } - } - - /** - * A {@link SliceReader} that can read int slices written by a {@link SliceWriter} - * - * @lucene.internal - */ - public static final class SliceReader { - - private final IntBlockPool pool; - private int upto; - private int bufferUpto; - private int bufferOffset; - private int[] buffer; - private int limit; - private int level; - private int end; - - /** Creates a new {@link SliceReader} on the given pool */ - public SliceReader(IntBlockPool pool) { - this.pool = pool; - } - - /** Resets the reader to a slice give the slices absolute start and end offset in the pool */ - public void reset(int startOffset, int endOffset) { - bufferUpto = startOffset / INT_BLOCK_SIZE; - bufferOffset = bufferUpto * INT_BLOCK_SIZE; - this.end = endOffset; - level = 0; - - buffer = pool.buffers[bufferUpto]; - upto = startOffset & INT_BLOCK_MASK; - - final int firstSize = IntBlockPool.LEVEL_SIZE_ARRAY[0]; - if (startOffset + firstSize >= endOffset) { - // There is only this one slice to read - limit = endOffset & INT_BLOCK_MASK; - } else { - limit = upto + firstSize - 1; - } - } - - /** - * Returns true iff the current slice is fully read. If this method returns - * true {@link SliceReader#readInt()} should not be called again on this slice. - */ - public boolean endOfSlice() { - assert upto + bufferOffset <= end; - return upto + bufferOffset == end; - } - - /** - * Reads the next int from the current slice and returns it. - * - * @see SliceReader#endOfSlice() - */ - public int readInt() { - assert !endOfSlice(); - assert upto <= limit; - if (upto == limit) nextSlice(); - return buffer[upto++]; - } - - private void nextSlice() { - // Skip to our next slice - final int nextIndex = buffer[limit]; - level = NEXT_LEVEL_ARRAY[level]; - final int newSize = LEVEL_SIZE_ARRAY[level]; - - bufferUpto = nextIndex / INT_BLOCK_SIZE; - bufferOffset = bufferUpto * INT_BLOCK_SIZE; - - buffer = pool.buffers[bufferUpto]; - upto = nextIndex & INT_BLOCK_MASK; - - if (nextIndex + newSize >= end) { - // We are advancing to the final slice - assert end - nextIndex > 0; - limit = end - bufferOffset; - } else { - // This is not the final slice (subtract 4 for the - // forwarding address at the end of this new slice) - limit = upto + newSize - 1; - } - } + intOffset = Math.addExact(intOffset, INT_BLOCK_SIZE); } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIntBlockPool.java b/lucene/core/src/test/org/apache/lucene/index/TestIntBlockPool.java index 7be4ac25fd1..833a8d29144 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIntBlockPool.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIntBlockPool.java @@ -16,130 +16,76 @@ */ package org.apache.lucene.index; -import java.util.ArrayList; -import java.util.List; +import static org.apache.lucene.util.IntBlockPool.INT_BLOCK_SIZE; + import org.apache.lucene.tests.util.LuceneTestCase; -import org.apache.lucene.util.Counter; import org.apache.lucene.util.IntBlockPool; /** tests basic {@link IntBlockPool} functionality */ public class TestIntBlockPool extends LuceneTestCase { + public void testWriteReadReset() { + IntBlockPool pool = new IntBlockPool(new IntBlockPool.DirectAllocator()); + pool.nextBuffer(); - public void testSingleWriterReader() { - Counter bytesUsed = Counter.newCounter(); - IntBlockPool pool = new IntBlockPool(new ByteTrackingAllocator(bytesUsed)); + // Write consecutive ints to the buffer, possibly allocating a new buffer + int count = random().nextInt(2 * INT_BLOCK_SIZE); + for (int i = 0; i < count; i++) { + if (pool.intUpto == INT_BLOCK_SIZE) { + pool.nextBuffer(); + } + pool.buffer[pool.intUpto++] = i; + } - for (int j = 0; j < 2; j++) { - IntBlockPool.SliceWriter writer = new IntBlockPool.SliceWriter(pool); - int start = writer.startNewSlice(); - int num = atLeast(100); - for (int i = 0; i < num; i++) { - writer.writeInt(i); - } + // Check that all the ints are present in th buffer pool + for (int i = 0; i < count; i++) { + assertEquals(i, pool.buffers[i / INT_BLOCK_SIZE][i % INT_BLOCK_SIZE]); + } - int upto = writer.getCurrentOffset(); - IntBlockPool.SliceReader reader = new IntBlockPool.SliceReader(pool); - reader.reset(start, upto); - for (int i = 0; i < num; i++) { - assertEquals(i, reader.readInt()); - } - assertTrue(reader.endOfSlice()); - if (random().nextBoolean()) { - pool.reset(true, false); - assertEquals(0, bytesUsed.get()); - } else { - pool.reset(true, true); - assertEquals(IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES, bytesUsed.get()); - } + // Reset without filling with zeros and check that the first buffer still has the ints + count = Math.min(count, INT_BLOCK_SIZE); + pool.reset(false, true); + for (int i = 0; i < count; i++) { + assertEquals(i, pool.buffers[0][i]); + } + + // Reset and fill with zeros, then check there is no data left + pool.intUpto = count; + pool.reset(); + for (int i = 0; i < count; i++) { + assertEquals(0, pool.buffers[0][i]); } } - public void testMultipleWriterReader() { - Counter bytesUsed = Counter.newCounter(); - IntBlockPool pool = new IntBlockPool(new ByteTrackingAllocator(bytesUsed)); - for (int j = 0; j < 2; j++) { - List holders = new ArrayList<>(); - int num = atLeast(4); - for (int i = 0; i < num; i++) { - holders.add(new StartEndAndValues(random().nextInt(1000))); - } - IntBlockPool.SliceWriter writer = new IntBlockPool.SliceWriter(pool); - IntBlockPool.SliceReader reader = new IntBlockPool.SliceReader(pool); + public void testTooManyAllocs() { + // Use a mock allocator that doesn't waste memory + IntBlockPool pool = + new IntBlockPool( + new IntBlockPool.Allocator(0) { + final int[] buffer = new int[0]; - int numValuesToWrite = atLeast(10000); - for (int i = 0; i < numValuesToWrite; i++) { - StartEndAndValues values = holders.get(random().nextInt(holders.size())); - if (values.valueCount == 0) { - values.start = writer.startNewSlice(); - } else { - writer.reset(values.end); - } - writer.writeInt(values.nextValue()); - values.end = writer.getCurrentOffset(); - if (random().nextInt(5) == 0) { - // pick one and reader the ints - assertReader(reader, holders.get(random().nextInt(holders.size()))); - } - } + @Override + public void recycleIntBlocks(int[][] blocks, int start, int end) {} - while (!holders.isEmpty()) { - StartEndAndValues values = holders.remove(random().nextInt(holders.size())); - assertReader(reader, values); - } - if (random().nextBoolean()) { - pool.reset(true, false); - assertEquals(0, bytesUsed.get()); - } else { - pool.reset(true, true); - assertEquals(IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES, bytesUsed.get()); + @Override + public int[] getIntBlock() { + return buffer; + } + }); + pool.nextBuffer(); + + boolean throwsException = false; + for (int i = 0; i < Integer.MAX_VALUE / INT_BLOCK_SIZE + 1; i++) { + try { + pool.nextBuffer(); + } catch ( + @SuppressWarnings("unused") + ArithmeticException ignored) { + // The offset overflows on the last attempt to call nextBuffer() + throwsException = true; + break; } } - } - - private static class ByteTrackingAllocator extends IntBlockPool.Allocator { - private final Counter bytesUsed; - - public ByteTrackingAllocator(Counter bytesUsed) { - this(IntBlockPool.INT_BLOCK_SIZE, bytesUsed); - } - - public ByteTrackingAllocator(int blockSize, Counter bytesUsed) { - super(blockSize); - this.bytesUsed = bytesUsed; - } - - @Override - public int[] getIntBlock() { - bytesUsed.addAndGet(blockSize * Integer.BYTES); - return new int[blockSize]; - } - - @Override - public void recycleIntBlocks(int[][] blocks, int start, int end) { - bytesUsed.addAndGet(-((end - start) * blockSize * Integer.BYTES)); - } - } - - private void assertReader(IntBlockPool.SliceReader reader, StartEndAndValues values) { - reader.reset(values.start, values.end); - for (int i = 0; i < values.valueCount; i++) { - assertEquals(values.valueOffset + i, reader.readInt()); - } - assertTrue(reader.endOfSlice()); - } - - private static class StartEndAndValues { - int valueOffset; - int valueCount; - int start; - int end; - - public StartEndAndValues(int valueOffset) { - this.valueOffset = valueOffset; - } - - public int nextValue() { - return valueOffset + valueCount++; - } + assertTrue(throwsException); + assertTrue(pool.intOffset + INT_BLOCK_SIZE < pool.intOffset); } } diff --git a/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java b/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java index 03f4e00bebc..9a2af17fe6d 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java @@ -153,6 +153,7 @@ public class TestByteBlockPool extends LuceneTestCase { }); pool.nextBuffer(); + boolean throwsException = false; for (int i = 0; i < Integer.MAX_VALUE / ByteBlockPool.BYTE_BLOCK_SIZE + 1; i++) { try { pool.nextBuffer(); @@ -160,9 +161,11 @@ public class TestByteBlockPool extends LuceneTestCase { @SuppressWarnings("unused") ArithmeticException ignored) { // The offset overflows on the last attempt to call nextBuffer() + throwsException = true; break; } } - assertTrue(pool.byteOffset + ByteBlockPool.BYTE_BLOCK_SIZE < 0); + assertTrue(throwsException); + assertTrue(pool.byteOffset + ByteBlockPool.BYTE_BLOCK_SIZE < pool.byteOffset); } } diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index 6a2ea20f525..e4a368d0af1 100644 --- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -57,8 +57,6 @@ import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray; import org.apache.lucene.util.Counter; import org.apache.lucene.util.IntBlockPool; -import org.apache.lucene.util.IntBlockPool.SliceReader; -import org.apache.lucene.util.IntBlockPool.SliceWriter; import org.apache.lucene.util.RecyclingByteBlockAllocator; import org.apache.lucene.util.RecyclingIntBlockAllocator; import org.apache.lucene.util.Version; @@ -164,6 +162,212 @@ import org.apache.lucene.util.Version; * ). */ public class MemoryIndex { + static class SlicedIntBlockPool extends IntBlockPool { + /** + * An array holding the offset into the {@link SlicedIntBlockPool#LEVEL_SIZE_ARRAY} to quickly + * navigate to the next slice level. + */ + private static final int[] NEXT_LEVEL_ARRAY = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9}; + + /** An array holding the level sizes for int slices. */ + private static final int[] LEVEL_SIZE_ARRAY = {2, 4, 8, 16, 16, 32, 32, 64, 64, 128}; + + /** The first level size for new slices */ + private static final int FIRST_LEVEL_SIZE = LEVEL_SIZE_ARRAY[0]; + + SlicedIntBlockPool(Allocator allocator) { + super(allocator); + } + + /** + * Creates a new int slice with the given starting size and returns the slices offset in the + * pool. + * + * @see SliceReader + */ + private int newSlice(final int size) { + if (intUpto > INT_BLOCK_SIZE - size) { + nextBuffer(); + assert assertSliceBuffer(buffer); + } + + final int upto = intUpto; + intUpto += size; + buffer[intUpto - 1] = 16; + return upto; + } + + private static boolean assertSliceBuffer(int[] buffer) { + int count = 0; + for (int i = 0; i < buffer.length; i++) { + count += buffer[i]; // for slices the buffer must only have 0 values + } + return count == 0; + } + + /** Allocates a new slice from the given offset */ + private int allocSlice(final int[] slice, final int sliceOffset) { + final int level = slice[sliceOffset] & 15; + final int newLevel = NEXT_LEVEL_ARRAY[level]; + final int newSize = LEVEL_SIZE_ARRAY[newLevel]; + // Maybe allocate another block + if (intUpto > INT_BLOCK_SIZE - newSize) { + nextBuffer(); + assert assertSliceBuffer(buffer); + } + + final int newUpto = intUpto; + final int offset = newUpto + intOffset; + intUpto += newSize; + // Write forwarding address at end of last slice: + slice[sliceOffset] = offset; + + // Write new level: + buffer[intUpto - 1] = 16 | newLevel; + + return newUpto; + } + + /** + * A {@link SliceWriter} that allows to write multiple integer slices into a given {@link + * IntBlockPool}. + * + * @see SliceReader + * @lucene.internal + */ + static class SliceWriter { + + private int offset; + private final SlicedIntBlockPool slicedIntBlockPool; + + public SliceWriter(SlicedIntBlockPool slicedIntBlockPool) { + this.slicedIntBlockPool = slicedIntBlockPool; + } + /** */ + public void reset(int sliceOffset) { + this.offset = sliceOffset; + } + + /** Writes the given value into the slice and resizes the slice if needed */ + public void writeInt(int value) { + int[] ints = slicedIntBlockPool.buffers[offset >> INT_BLOCK_SHIFT]; + assert ints != null; + int relativeOffset = offset & INT_BLOCK_MASK; + if (ints[relativeOffset] != 0) { + // End of slice; allocate a new one + relativeOffset = slicedIntBlockPool.allocSlice(ints, relativeOffset); + ints = slicedIntBlockPool.buffer; + offset = relativeOffset + slicedIntBlockPool.intOffset; + } + ints[relativeOffset] = value; + offset++; + } + + /** + * starts a new slice and returns the start offset. The returned value should be used as the + * start offset to initialize a {@link SliceReader}. + */ + public int startNewSlice() { + return offset = + slicedIntBlockPool.newSlice(FIRST_LEVEL_SIZE) + slicedIntBlockPool.intOffset; + } + + /** + * Returns the offset of the currently written slice. The returned value should be used as the + * end offset to initialize a {@link SliceReader} once this slice is fully written or to reset + * the this writer if another slice needs to be written. + */ + public int getCurrentOffset() { + return offset; + } + } + + /** + * A {@link SliceReader} that can read int slices written by a {@link SliceWriter} + * + * @lucene.internal + */ + static class SliceReader { + + private final SlicedIntBlockPool slicedIntBlockPool; + private int upto; + private int bufferUpto; + private int bufferOffset; + private int[] buffer; + private int limit; + private int level; + private int end; + + /** Creates a new {@link SliceReader} on the given pool */ + public SliceReader(SlicedIntBlockPool slicedIntBlockPool) { + this.slicedIntBlockPool = slicedIntBlockPool; + } + + /** Resets the reader to a slice give the slices absolute start and end offset in the pool */ + public void reset(int startOffset, int endOffset) { + bufferUpto = startOffset / INT_BLOCK_SIZE; + bufferOffset = bufferUpto * INT_BLOCK_SIZE; + this.end = endOffset; + level = 0; + + buffer = slicedIntBlockPool.buffers[bufferUpto]; + upto = startOffset & INT_BLOCK_MASK; + + final int firstSize = LEVEL_SIZE_ARRAY[0]; + if (startOffset + firstSize >= endOffset) { + // There is only this one slice to read + limit = endOffset & INT_BLOCK_MASK; + } else { + limit = upto + firstSize - 1; + } + } + + /** + * Returns true iff the current slice is fully read. If this method returns + * + * true {@link SliceReader#readInt()} should not be called again on this slice. + */ + public boolean endOfSlice() { + assert upto + bufferOffset <= end; + return upto + bufferOffset == end; + } + + /** + * Reads the next int from the current slice and returns it. + * + * @see SliceReader#endOfSlice() + */ + public int readInt() { + assert !endOfSlice(); + assert upto <= limit; + if (upto == limit) nextSlice(); + return buffer[upto++]; + } + + private void nextSlice() { + // Skip to our next slice + final int nextIndex = buffer[limit]; + level = NEXT_LEVEL_ARRAY[level]; + final int newSize = LEVEL_SIZE_ARRAY[level]; + + bufferUpto = nextIndex / INT_BLOCK_SIZE; + bufferOffset = bufferUpto * INT_BLOCK_SIZE; + + buffer = slicedIntBlockPool.buffers[bufferUpto]; + upto = nextIndex & INT_BLOCK_MASK; + + if (nextIndex + newSize >= end) { + // We are advancing to the final slice + assert end - nextIndex > 0; + limit = end - bufferOffset; + } else { + // This is not the final slice (subtract 4 for the + // forwarding address at the end of this new slice) + limit = upto + newSize - 1; + } + } + } + } private static final boolean DEBUG = false; @@ -174,9 +378,8 @@ public class MemoryIndex { private final boolean storePayloads; private final ByteBlockPool byteBlockPool; - private final IntBlockPool intBlockPool; - // private final IntBlockPool.SliceReader postingsReader; - private final IntBlockPool.SliceWriter postingsWriter; + private final SlicedIntBlockPool slicedIntBlockPool; + private final SlicedIntBlockPool.SliceWriter postingsWriter; private final BytesRefArray payloadsBytesRefs; // non null only when storePayloads private Counter bytesUsed; @@ -237,19 +440,19 @@ public class MemoryIndex { final int maxBufferedIntBlocks = (int) ((maxReusedBytes - (maxBufferedByteBlocks * (long) ByteBlockPool.BYTE_BLOCK_SIZE)) - / (IntBlockPool.INT_BLOCK_SIZE * (long) Integer.BYTES)); + / (SlicedIntBlockPool.INT_BLOCK_SIZE * (long) Integer.BYTES)); assert (maxBufferedByteBlocks * ByteBlockPool.BYTE_BLOCK_SIZE) - + (maxBufferedIntBlocks * IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES) + + (maxBufferedIntBlocks * SlicedIntBlockPool.INT_BLOCK_SIZE * Integer.BYTES) <= maxReusedBytes; byteBlockPool = new ByteBlockPool( new RecyclingByteBlockAllocator( ByteBlockPool.BYTE_BLOCK_SIZE, maxBufferedByteBlocks, bytesUsed)); - intBlockPool = - new IntBlockPool( + slicedIntBlockPool = + new SlicedIntBlockPool( new RecyclingIntBlockAllocator( - IntBlockPool.INT_BLOCK_SIZE, maxBufferedIntBlocks, bytesUsed)); - postingsWriter = new SliceWriter(intBlockPool); + SlicedIntBlockPool.INT_BLOCK_SIZE, maxBufferedIntBlocks, bytesUsed)); + postingsWriter = new SlicedIntBlockPool.SliceWriter(slicedIntBlockPool); // TODO refactor BytesRefArray to allow us to apply maxReusedBytes option payloadsBytesRefs = storePayloads ? new BytesRefArray(bytesUsed) : null; } @@ -842,7 +1045,8 @@ public class MemoryIndex { result.append(fieldName).append(":\n"); SliceByteStartArray sliceArray = info.sliceArray; int numPositions = 0; - SliceReader postingsReader = new SliceReader(intBlockPool); + SlicedIntBlockPool.SliceReader postingsReader = + new SlicedIntBlockPool.SliceReader(slicedIntBlockPool); for (int j = 0; j < info.terms.size(); j++) { int ord = info.sortedTerms[j]; info.terms.get(ord, spare); @@ -1652,7 +1856,7 @@ public class MemoryIndex { private class MemoryPostingsEnum extends PostingsEnum { - private final SliceReader sliceReader; + private final SlicedIntBlockPool.SliceReader sliceReader; private int posUpto; // for assert private boolean hasNext; private int doc = -1; @@ -1663,7 +1867,7 @@ public class MemoryIndex { private final BytesRefBuilder payloadBuilder; // only non-null when storePayloads public MemoryPostingsEnum() { - this.sliceReader = new SliceReader(intBlockPool); + this.sliceReader = new SlicedIntBlockPool.SliceReader(slicedIntBlockPool); this.payloadBuilder = storePayloads ? new BytesRefBuilder() : null; } @@ -1942,7 +2146,7 @@ public class MemoryIndex { fields.clear(); this.normSimilarity = IndexSearcher.getDefaultSimilarity(); byteBlockPool.reset(false, false); // no need to 0-fill the buffers - intBlockPool.reset(true, false); // here must must 0-fill since we use slices + slicedIntBlockPool.reset(true, false); // here must must 0-fill since we use slices if (payloadsBytesRefs != null) { payloadsBytesRefs.clear(); } diff --git a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java index ac2e4148a27..55712f6b890 100644 --- a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java +++ b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java @@ -260,6 +260,7 @@ public class TestMemoryIndex extends LuceneTestCase { assertThat(mi.search(new TermQuery(new Term("field2", "text"))), is(0.0f)); assertThat(mi.search(new TermQuery(new Term("field2", "untokenized text"))), not(0.0f)); + assertThat(mi.search(new TermQuery(new Term("field1", "some more text"))), is(0.0f)); assertThat(mi.search(new PhraseQuery("field1", "some", "more", "text")), not(0.0f)); assertThat(mi.search(new PhraseQuery("field1", "some", "text")), not(0.0f)); assertThat(mi.search(new PhraseQuery("field1", "text", "some")), is(0.0f)); diff --git a/lucene/memory/src/test/org/apache/lucene/index/memory/TestSlicedIntBlockPool.java b/lucene/memory/src/test/org/apache/lucene/index/memory/TestSlicedIntBlockPool.java new file mode 100644 index 00000000000..d8e7c420fea --- /dev/null +++ b/lucene/memory/src/test/org/apache/lucene/index/memory/TestSlicedIntBlockPool.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.index.memory; + +import java.util.ArrayList; +import java.util.List; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.util.Counter; +import org.apache.lucene.util.IntBlockPool; + +public class TestSlicedIntBlockPool extends LuceneTestCase { + public void testSingleWriterReader() { + Counter bytesUsed = Counter.newCounter(); + MemoryIndex.SlicedIntBlockPool slicedIntBlockPool = + new MemoryIndex.SlicedIntBlockPool(new ByteTrackingAllocator(bytesUsed)); + + for (int j = 0; j < 2; j++) { + MemoryIndex.SlicedIntBlockPool.SliceWriter writer = + new MemoryIndex.SlicedIntBlockPool.SliceWriter(slicedIntBlockPool); + int start = writer.startNewSlice(); + int num = atLeast(100); + for (int i = 0; i < num; i++) { + writer.writeInt(i); + } + + int upto = writer.getCurrentOffset(); + MemoryIndex.SlicedIntBlockPool.SliceReader reader = + new MemoryIndex.SlicedIntBlockPool.SliceReader(slicedIntBlockPool); + reader.reset(start, upto); + for (int i = 0; i < num; i++) { + assertEquals(i, reader.readInt()); + } + assertTrue(reader.endOfSlice()); + if (random().nextBoolean()) { + slicedIntBlockPool.reset(true, false); + assertEquals(0, bytesUsed.get()); + } else { + slicedIntBlockPool.reset(true, true); + assertEquals(IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES, bytesUsed.get()); + } + } + } + + public void testMultipleWriterReader() { + Counter bytesUsed = Counter.newCounter(); + MemoryIndex.SlicedIntBlockPool slicedIntBlockPool = + new MemoryIndex.SlicedIntBlockPool(new ByteTrackingAllocator(bytesUsed)); + for (int j = 0; j < 2; j++) { + List holders = new ArrayList<>(); + int num = atLeast(4); + for (int i = 0; i < num; i++) { + holders.add(new StartEndAndValues(random().nextInt(1000))); + } + MemoryIndex.SlicedIntBlockPool.SliceWriter writer = + new MemoryIndex.SlicedIntBlockPool.SliceWriter(slicedIntBlockPool); + MemoryIndex.SlicedIntBlockPool.SliceReader reader = + new MemoryIndex.SlicedIntBlockPool.SliceReader(slicedIntBlockPool); + + int numValuesToWrite = atLeast(10000); + for (int i = 0; i < numValuesToWrite; i++) { + StartEndAndValues values = holders.get(random().nextInt(holders.size())); + if (values.valueCount == 0) { + values.start = writer.startNewSlice(); + } else { + writer.reset(values.end); + } + writer.writeInt(values.nextValue()); + values.end = writer.getCurrentOffset(); + if (random().nextInt(5) == 0) { + // pick one and reader the ints + assertReader(reader, holders.get(random().nextInt(holders.size()))); + } + } + + while (!holders.isEmpty()) { + StartEndAndValues values = holders.remove(random().nextInt(holders.size())); + assertReader(reader, values); + } + if (random().nextBoolean()) { + slicedIntBlockPool.reset(true, false); + assertEquals(0, bytesUsed.get()); + } else { + slicedIntBlockPool.reset(true, true); + assertEquals(IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES, bytesUsed.get()); + } + } + } + + private static class ByteTrackingAllocator extends IntBlockPool.Allocator { + private final Counter bytesUsed; + + public ByteTrackingAllocator(Counter bytesUsed) { + this(IntBlockPool.INT_BLOCK_SIZE, bytesUsed); + } + + public ByteTrackingAllocator(int blockSize, Counter bytesUsed) { + super(blockSize); + this.bytesUsed = bytesUsed; + } + + @Override + public int[] getIntBlock() { + bytesUsed.addAndGet(blockSize * Integer.BYTES); + return new int[blockSize]; + } + + @Override + public void recycleIntBlocks(int[][] blocks, int start, int end) { + bytesUsed.addAndGet(-((end - start) * blockSize * Integer.BYTES)); + } + } + + private void assertReader( + MemoryIndex.SlicedIntBlockPool.SliceReader reader, StartEndAndValues values) { + reader.reset(values.start, values.end); + for (int i = 0; i < values.valueCount; i++) { + assertEquals(values.valueOffset + i, reader.readInt()); + } + assertTrue(reader.endOfSlice()); + } + + private static class StartEndAndValues { + int valueOffset; + int valueCount; + int start; + int end; + + public StartEndAndValues(int valueOffset) { + this.valueOffset = valueOffset; + } + + public int nextValue() { + return valueOffset + valueCount++; + } + } +}