From b4619d87ed1fb960a280f78b171a3a08b8abd673 Mon Sep 17 00:00:00 2001
From: Stefan Vodita <41467371+stefanvodita@users.noreply.github.com>
Date: Mon, 10 Jul 2023 15:18:28 +0100
Subject: [PATCH] Move sliced int buffer functionality to MemoryIndex (#11248)
(#12409)
* [WIP] Move IntBlockPool slices to MemoryIndex
* [WIP] Working TestMemoryIndex
* [WIP} Working TestSlicedIntBlockPool
* Working many allocations tests
* Add basic IntBlockPool test
* SlicedIntBlockPool inherits from IntBlockPool
* Tidy
---
lucene/CHANGES.txt | 6 +-
.../org/apache/lucene/util/IntBlockPool.java | 207 +---------------
.../apache/lucene/index/TestIntBlockPool.java | 166 +++++--------
.../apache/lucene/util/TestByteBlockPool.java | 5 +-
.../lucene/index/memory/MemoryIndex.java | 234 ++++++++++++++++--
.../lucene/index/memory/TestMemoryIndex.java | 1 +
.../index/memory/TestSlicedIntBlockPool.java | 150 +++++++++++
7 files changed, 437 insertions(+), 332 deletions(-)
create mode 100644 lucene/memory/src/test/org/apache/lucene/index/memory/TestSlicedIntBlockPool.java
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 26b1c692479..919f16ad255 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -120,7 +120,9 @@ Other
API Changes
---------------------
-(No changes)
+
+* GITHUB#11248: IntBlockPool's SliceReader, SliceWriter, and all int slice functionality are moved out to MemoryIndex.
+ (Stefan Vodita)
New Features
---------------------
@@ -151,7 +153,7 @@ Optimizations
Bug Fixes
---------------------
-* GITHUB#9660: Throw and ArithmeticException when the offset overflows in a ByteBlockPool. (Stefan Vodita)
+* GITHUB#9660: Throw an ArithmeticException when the offset overflows in a ByteBlockPool. (Stefan Vodita)
* GITHUB#12388: JoinUtil queries were ignoring boosts. (Alan Woodward)
diff --git a/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java b/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java
index 598f55ace1c..c0403c8e253 100644
--- a/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java
+++ b/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java
@@ -23,7 +23,7 @@ import java.util.Arrays;
*
* @lucene.internal
*/
-public final class IntBlockPool {
+public class IntBlockPool {
public static final int INT_BLOCK_SHIFT = 13;
public static final int INT_BLOCK_SIZE = 1 << INT_BLOCK_SHIFT;
public static final int INT_BLOCK_MASK = INT_BLOCK_SIZE - 1;
@@ -101,8 +101,7 @@ public final class IntBlockPool {
/**
* Expert: Resets the pool to its initial state reusing the first buffer.
*
- * @param zeroFillBuffers if true
the buffers are filled with 0
. This
- * should be set to true
if this pool is used with {@link SliceWriter}.
+ * @param zeroFillBuffers if true
the buffers are filled with 0
.
* @param reuseFirst if true
the first buffer will be reused and calling {@link
* IntBlockPool#nextBuffer()} is not needed after reset iff the block pool was used before ie.
* {@link IntBlockPool#nextBuffer()} was called before.
@@ -156,206 +155,6 @@ public final class IntBlockPool {
bufferUpto++;
intUpto = 0;
- intOffset += INT_BLOCK_SIZE;
- }
-
- /**
- * Creates a new int slice with the given starting size and returns the slices offset in the pool.
- *
- * @see SliceReader
- */
- private int newSlice(final int size) {
- if (intUpto > INT_BLOCK_SIZE - size) {
- nextBuffer();
- assert assertSliceBuffer(buffer);
- }
-
- final int upto = intUpto;
- intUpto += size;
- buffer[intUpto - 1] = 16;
- return upto;
- }
-
- private static boolean assertSliceBuffer(int[] buffer) {
- int count = 0;
- for (int i = 0; i < buffer.length; i++) {
- count += buffer[i]; // for slices the buffer must only have 0 values
- }
- return count == 0;
- }
-
- // no need to make this public unless we support different sizes
-
- /**
- * An array holding the offset into the {@link IntBlockPool#LEVEL_SIZE_ARRAY} to quickly navigate
- * to the next slice level.
- */
- private static final int[] NEXT_LEVEL_ARRAY = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9};
-
- /** An array holding the level sizes for int slices. */
- private static final int[] LEVEL_SIZE_ARRAY = {2, 4, 8, 16, 16, 32, 32, 64, 64, 128};
-
- /** The first level size for new slices */
- private static final int FIRST_LEVEL_SIZE = LEVEL_SIZE_ARRAY[0];
-
- /** Allocates a new slice from the given offset */
- private int allocSlice(final int[] slice, final int sliceOffset) {
- final int level = slice[sliceOffset] & 15;
- final int newLevel = NEXT_LEVEL_ARRAY[level];
- final int newSize = LEVEL_SIZE_ARRAY[newLevel];
- // Maybe allocate another block
- if (intUpto > INT_BLOCK_SIZE - newSize) {
- nextBuffer();
- assert assertSliceBuffer(buffer);
- }
-
- final int newUpto = intUpto;
- final int offset = newUpto + intOffset;
- intUpto += newSize;
- // Write forwarding address at end of last slice:
- slice[sliceOffset] = offset;
-
- // Write new level:
- buffer[intUpto - 1] = 16 | newLevel;
-
- return newUpto;
- }
-
- /**
- * A {@link SliceWriter} that allows to write multiple integer slices into a given {@link
- * IntBlockPool}.
- *
- * @see SliceReader
- * @lucene.internal
- */
- public static class SliceWriter {
-
- private int offset;
- private final IntBlockPool pool;
-
- public SliceWriter(IntBlockPool pool) {
- this.pool = pool;
- }
- /** */
- public void reset(int sliceOffset) {
- this.offset = sliceOffset;
- }
-
- /** Writes the given value into the slice and resizes the slice if needed */
- public void writeInt(int value) {
- int[] ints = pool.buffers[offset >> INT_BLOCK_SHIFT];
- assert ints != null;
- int relativeOffset = offset & INT_BLOCK_MASK;
- if (ints[relativeOffset] != 0) {
- // End of slice; allocate a new one
- relativeOffset = pool.allocSlice(ints, relativeOffset);
- ints = pool.buffer;
- offset = relativeOffset + pool.intOffset;
- }
- ints[relativeOffset] = value;
- offset++;
- }
-
- /**
- * starts a new slice and returns the start offset. The returned value should be used as the
- * start offset to initialize a {@link SliceReader}.
- */
- public int startNewSlice() {
- return offset = pool.newSlice(FIRST_LEVEL_SIZE) + pool.intOffset;
- }
-
- /**
- * Returns the offset of the currently written slice. The returned value should be used as the
- * end offset to initialize a {@link SliceReader} once this slice is fully written or to reset
- * the this writer if another slice needs to be written.
- */
- public int getCurrentOffset() {
- return offset;
- }
- }
-
- /**
- * A {@link SliceReader} that can read int slices written by a {@link SliceWriter}
- *
- * @lucene.internal
- */
- public static final class SliceReader {
-
- private final IntBlockPool pool;
- private int upto;
- private int bufferUpto;
- private int bufferOffset;
- private int[] buffer;
- private int limit;
- private int level;
- private int end;
-
- /** Creates a new {@link SliceReader} on the given pool */
- public SliceReader(IntBlockPool pool) {
- this.pool = pool;
- }
-
- /** Resets the reader to a slice give the slices absolute start and end offset in the pool */
- public void reset(int startOffset, int endOffset) {
- bufferUpto = startOffset / INT_BLOCK_SIZE;
- bufferOffset = bufferUpto * INT_BLOCK_SIZE;
- this.end = endOffset;
- level = 0;
-
- buffer = pool.buffers[bufferUpto];
- upto = startOffset & INT_BLOCK_MASK;
-
- final int firstSize = IntBlockPool.LEVEL_SIZE_ARRAY[0];
- if (startOffset + firstSize >= endOffset) {
- // There is only this one slice to read
- limit = endOffset & INT_BLOCK_MASK;
- } else {
- limit = upto + firstSize - 1;
- }
- }
-
- /**
- * Returns true
iff the current slice is fully read. If this method returns
- * true
{@link SliceReader#readInt()} should not be called again on this slice.
- */
- public boolean endOfSlice() {
- assert upto + bufferOffset <= end;
- return upto + bufferOffset == end;
- }
-
- /**
- * Reads the next int from the current slice and returns it.
- *
- * @see SliceReader#endOfSlice()
- */
- public int readInt() {
- assert !endOfSlice();
- assert upto <= limit;
- if (upto == limit) nextSlice();
- return buffer[upto++];
- }
-
- private void nextSlice() {
- // Skip to our next slice
- final int nextIndex = buffer[limit];
- level = NEXT_LEVEL_ARRAY[level];
- final int newSize = LEVEL_SIZE_ARRAY[level];
-
- bufferUpto = nextIndex / INT_BLOCK_SIZE;
- bufferOffset = bufferUpto * INT_BLOCK_SIZE;
-
- buffer = pool.buffers[bufferUpto];
- upto = nextIndex & INT_BLOCK_MASK;
-
- if (nextIndex + newSize >= end) {
- // We are advancing to the final slice
- assert end - nextIndex > 0;
- limit = end - bufferOffset;
- } else {
- // This is not the final slice (subtract 4 for the
- // forwarding address at the end of this new slice)
- limit = upto + newSize - 1;
- }
- }
+ intOffset = Math.addExact(intOffset, INT_BLOCK_SIZE);
}
}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIntBlockPool.java b/lucene/core/src/test/org/apache/lucene/index/TestIntBlockPool.java
index 7be4ac25fd1..833a8d29144 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIntBlockPool.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIntBlockPool.java
@@ -16,130 +16,76 @@
*/
package org.apache.lucene.index;
-import java.util.ArrayList;
-import java.util.List;
+import static org.apache.lucene.util.IntBlockPool.INT_BLOCK_SIZE;
+
import org.apache.lucene.tests.util.LuceneTestCase;
-import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IntBlockPool;
/** tests basic {@link IntBlockPool} functionality */
public class TestIntBlockPool extends LuceneTestCase {
+ public void testWriteReadReset() {
+ IntBlockPool pool = new IntBlockPool(new IntBlockPool.DirectAllocator());
+ pool.nextBuffer();
- public void testSingleWriterReader() {
- Counter bytesUsed = Counter.newCounter();
- IntBlockPool pool = new IntBlockPool(new ByteTrackingAllocator(bytesUsed));
+ // Write consecutive ints to the buffer, possibly allocating a new buffer
+ int count = random().nextInt(2 * INT_BLOCK_SIZE);
+ for (int i = 0; i < count; i++) {
+ if (pool.intUpto == INT_BLOCK_SIZE) {
+ pool.nextBuffer();
+ }
+ pool.buffer[pool.intUpto++] = i;
+ }
- for (int j = 0; j < 2; j++) {
- IntBlockPool.SliceWriter writer = new IntBlockPool.SliceWriter(pool);
- int start = writer.startNewSlice();
- int num = atLeast(100);
- for (int i = 0; i < num; i++) {
- writer.writeInt(i);
- }
+ // Check that all the ints are present in th buffer pool
+ for (int i = 0; i < count; i++) {
+ assertEquals(i, pool.buffers[i / INT_BLOCK_SIZE][i % INT_BLOCK_SIZE]);
+ }
- int upto = writer.getCurrentOffset();
- IntBlockPool.SliceReader reader = new IntBlockPool.SliceReader(pool);
- reader.reset(start, upto);
- for (int i = 0; i < num; i++) {
- assertEquals(i, reader.readInt());
- }
- assertTrue(reader.endOfSlice());
- if (random().nextBoolean()) {
- pool.reset(true, false);
- assertEquals(0, bytesUsed.get());
- } else {
- pool.reset(true, true);
- assertEquals(IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES, bytesUsed.get());
- }
+ // Reset without filling with zeros and check that the first buffer still has the ints
+ count = Math.min(count, INT_BLOCK_SIZE);
+ pool.reset(false, true);
+ for (int i = 0; i < count; i++) {
+ assertEquals(i, pool.buffers[0][i]);
+ }
+
+ // Reset and fill with zeros, then check there is no data left
+ pool.intUpto = count;
+ pool.reset();
+ for (int i = 0; i < count; i++) {
+ assertEquals(0, pool.buffers[0][i]);
}
}
- public void testMultipleWriterReader() {
- Counter bytesUsed = Counter.newCounter();
- IntBlockPool pool = new IntBlockPool(new ByteTrackingAllocator(bytesUsed));
- for (int j = 0; j < 2; j++) {
- List holders = new ArrayList<>();
- int num = atLeast(4);
- for (int i = 0; i < num; i++) {
- holders.add(new StartEndAndValues(random().nextInt(1000)));
- }
- IntBlockPool.SliceWriter writer = new IntBlockPool.SliceWriter(pool);
- IntBlockPool.SliceReader reader = new IntBlockPool.SliceReader(pool);
+ public void testTooManyAllocs() {
+ // Use a mock allocator that doesn't waste memory
+ IntBlockPool pool =
+ new IntBlockPool(
+ new IntBlockPool.Allocator(0) {
+ final int[] buffer = new int[0];
- int numValuesToWrite = atLeast(10000);
- for (int i = 0; i < numValuesToWrite; i++) {
- StartEndAndValues values = holders.get(random().nextInt(holders.size()));
- if (values.valueCount == 0) {
- values.start = writer.startNewSlice();
- } else {
- writer.reset(values.end);
- }
- writer.writeInt(values.nextValue());
- values.end = writer.getCurrentOffset();
- if (random().nextInt(5) == 0) {
- // pick one and reader the ints
- assertReader(reader, holders.get(random().nextInt(holders.size())));
- }
- }
+ @Override
+ public void recycleIntBlocks(int[][] blocks, int start, int end) {}
- while (!holders.isEmpty()) {
- StartEndAndValues values = holders.remove(random().nextInt(holders.size()));
- assertReader(reader, values);
- }
- if (random().nextBoolean()) {
- pool.reset(true, false);
- assertEquals(0, bytesUsed.get());
- } else {
- pool.reset(true, true);
- assertEquals(IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES, bytesUsed.get());
+ @Override
+ public int[] getIntBlock() {
+ return buffer;
+ }
+ });
+ pool.nextBuffer();
+
+ boolean throwsException = false;
+ for (int i = 0; i < Integer.MAX_VALUE / INT_BLOCK_SIZE + 1; i++) {
+ try {
+ pool.nextBuffer();
+ } catch (
+ @SuppressWarnings("unused")
+ ArithmeticException ignored) {
+ // The offset overflows on the last attempt to call nextBuffer()
+ throwsException = true;
+ break;
}
}
- }
-
- private static class ByteTrackingAllocator extends IntBlockPool.Allocator {
- private final Counter bytesUsed;
-
- public ByteTrackingAllocator(Counter bytesUsed) {
- this(IntBlockPool.INT_BLOCK_SIZE, bytesUsed);
- }
-
- public ByteTrackingAllocator(int blockSize, Counter bytesUsed) {
- super(blockSize);
- this.bytesUsed = bytesUsed;
- }
-
- @Override
- public int[] getIntBlock() {
- bytesUsed.addAndGet(blockSize * Integer.BYTES);
- return new int[blockSize];
- }
-
- @Override
- public void recycleIntBlocks(int[][] blocks, int start, int end) {
- bytesUsed.addAndGet(-((end - start) * blockSize * Integer.BYTES));
- }
- }
-
- private void assertReader(IntBlockPool.SliceReader reader, StartEndAndValues values) {
- reader.reset(values.start, values.end);
- for (int i = 0; i < values.valueCount; i++) {
- assertEquals(values.valueOffset + i, reader.readInt());
- }
- assertTrue(reader.endOfSlice());
- }
-
- private static class StartEndAndValues {
- int valueOffset;
- int valueCount;
- int start;
- int end;
-
- public StartEndAndValues(int valueOffset) {
- this.valueOffset = valueOffset;
- }
-
- public int nextValue() {
- return valueOffset + valueCount++;
- }
+ assertTrue(throwsException);
+ assertTrue(pool.intOffset + INT_BLOCK_SIZE < pool.intOffset);
}
}
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java b/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java
index 03f4e00bebc..9a2af17fe6d 100644
--- a/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java
@@ -153,6 +153,7 @@ public class TestByteBlockPool extends LuceneTestCase {
});
pool.nextBuffer();
+ boolean throwsException = false;
for (int i = 0; i < Integer.MAX_VALUE / ByteBlockPool.BYTE_BLOCK_SIZE + 1; i++) {
try {
pool.nextBuffer();
@@ -160,9 +161,11 @@ public class TestByteBlockPool extends LuceneTestCase {
@SuppressWarnings("unused")
ArithmeticException ignored) {
// The offset overflows on the last attempt to call nextBuffer()
+ throwsException = true;
break;
}
}
- assertTrue(pool.byteOffset + ByteBlockPool.BYTE_BLOCK_SIZE < 0);
+ assertTrue(throwsException);
+ assertTrue(pool.byteOffset + ByteBlockPool.BYTE_BLOCK_SIZE < pool.byteOffset);
}
}
diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
index 6a2ea20f525..e4a368d0af1 100644
--- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@@ -57,8 +57,6 @@ import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IntBlockPool;
-import org.apache.lucene.util.IntBlockPool.SliceReader;
-import org.apache.lucene.util.IntBlockPool.SliceWriter;
import org.apache.lucene.util.RecyclingByteBlockAllocator;
import org.apache.lucene.util.RecyclingIntBlockAllocator;
import org.apache.lucene.util.Version;
@@ -164,6 +162,212 @@ import org.apache.lucene.util.Version;
* ).
*/
public class MemoryIndex {
+ static class SlicedIntBlockPool extends IntBlockPool {
+ /**
+ * An array holding the offset into the {@link SlicedIntBlockPool#LEVEL_SIZE_ARRAY} to quickly
+ * navigate to the next slice level.
+ */
+ private static final int[] NEXT_LEVEL_ARRAY = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9};
+
+ /** An array holding the level sizes for int slices. */
+ private static final int[] LEVEL_SIZE_ARRAY = {2, 4, 8, 16, 16, 32, 32, 64, 64, 128};
+
+ /** The first level size for new slices */
+ private static final int FIRST_LEVEL_SIZE = LEVEL_SIZE_ARRAY[0];
+
+ SlicedIntBlockPool(Allocator allocator) {
+ super(allocator);
+ }
+
+ /**
+ * Creates a new int slice with the given starting size and returns the slices offset in the
+ * pool.
+ *
+ * @see SliceReader
+ */
+ private int newSlice(final int size) {
+ if (intUpto > INT_BLOCK_SIZE - size) {
+ nextBuffer();
+ assert assertSliceBuffer(buffer);
+ }
+
+ final int upto = intUpto;
+ intUpto += size;
+ buffer[intUpto - 1] = 16;
+ return upto;
+ }
+
+ private static boolean assertSliceBuffer(int[] buffer) {
+ int count = 0;
+ for (int i = 0; i < buffer.length; i++) {
+ count += buffer[i]; // for slices the buffer must only have 0 values
+ }
+ return count == 0;
+ }
+
+ /** Allocates a new slice from the given offset */
+ private int allocSlice(final int[] slice, final int sliceOffset) {
+ final int level = slice[sliceOffset] & 15;
+ final int newLevel = NEXT_LEVEL_ARRAY[level];
+ final int newSize = LEVEL_SIZE_ARRAY[newLevel];
+ // Maybe allocate another block
+ if (intUpto > INT_BLOCK_SIZE - newSize) {
+ nextBuffer();
+ assert assertSliceBuffer(buffer);
+ }
+
+ final int newUpto = intUpto;
+ final int offset = newUpto + intOffset;
+ intUpto += newSize;
+ // Write forwarding address at end of last slice:
+ slice[sliceOffset] = offset;
+
+ // Write new level:
+ buffer[intUpto - 1] = 16 | newLevel;
+
+ return newUpto;
+ }
+
+ /**
+ * A {@link SliceWriter} that allows to write multiple integer slices into a given {@link
+ * IntBlockPool}.
+ *
+ * @see SliceReader
+ * @lucene.internal
+ */
+ static class SliceWriter {
+
+ private int offset;
+ private final SlicedIntBlockPool slicedIntBlockPool;
+
+ public SliceWriter(SlicedIntBlockPool slicedIntBlockPool) {
+ this.slicedIntBlockPool = slicedIntBlockPool;
+ }
+ /** */
+ public void reset(int sliceOffset) {
+ this.offset = sliceOffset;
+ }
+
+ /** Writes the given value into the slice and resizes the slice if needed */
+ public void writeInt(int value) {
+ int[] ints = slicedIntBlockPool.buffers[offset >> INT_BLOCK_SHIFT];
+ assert ints != null;
+ int relativeOffset = offset & INT_BLOCK_MASK;
+ if (ints[relativeOffset] != 0) {
+ // End of slice; allocate a new one
+ relativeOffset = slicedIntBlockPool.allocSlice(ints, relativeOffset);
+ ints = slicedIntBlockPool.buffer;
+ offset = relativeOffset + slicedIntBlockPool.intOffset;
+ }
+ ints[relativeOffset] = value;
+ offset++;
+ }
+
+ /**
+ * starts a new slice and returns the start offset. The returned value should be used as the
+ * start offset to initialize a {@link SliceReader}.
+ */
+ public int startNewSlice() {
+ return offset =
+ slicedIntBlockPool.newSlice(FIRST_LEVEL_SIZE) + slicedIntBlockPool.intOffset;
+ }
+
+ /**
+ * Returns the offset of the currently written slice. The returned value should be used as the
+ * end offset to initialize a {@link SliceReader} once this slice is fully written or to reset
+ * the this writer if another slice needs to be written.
+ */
+ public int getCurrentOffset() {
+ return offset;
+ }
+ }
+
+ /**
+ * A {@link SliceReader} that can read int slices written by a {@link SliceWriter}
+ *
+ * @lucene.internal
+ */
+ static class SliceReader {
+
+ private final SlicedIntBlockPool slicedIntBlockPool;
+ private int upto;
+ private int bufferUpto;
+ private int bufferOffset;
+ private int[] buffer;
+ private int limit;
+ private int level;
+ private int end;
+
+ /** Creates a new {@link SliceReader} on the given pool */
+ public SliceReader(SlicedIntBlockPool slicedIntBlockPool) {
+ this.slicedIntBlockPool = slicedIntBlockPool;
+ }
+
+ /** Resets the reader to a slice give the slices absolute start and end offset in the pool */
+ public void reset(int startOffset, int endOffset) {
+ bufferUpto = startOffset / INT_BLOCK_SIZE;
+ bufferOffset = bufferUpto * INT_BLOCK_SIZE;
+ this.end = endOffset;
+ level = 0;
+
+ buffer = slicedIntBlockPool.buffers[bufferUpto];
+ upto = startOffset & INT_BLOCK_MASK;
+
+ final int firstSize = LEVEL_SIZE_ARRAY[0];
+ if (startOffset + firstSize >= endOffset) {
+ // There is only this one slice to read
+ limit = endOffset & INT_BLOCK_MASK;
+ } else {
+ limit = upto + firstSize - 1;
+ }
+ }
+
+ /**
+ * Returns true
iff the current slice is fully read. If this method returns
+ *
+ * true
{@link SliceReader#readInt()} should not be called again on this slice.
+ */
+ public boolean endOfSlice() {
+ assert upto + bufferOffset <= end;
+ return upto + bufferOffset == end;
+ }
+
+ /**
+ * Reads the next int from the current slice and returns it.
+ *
+ * @see SliceReader#endOfSlice()
+ */
+ public int readInt() {
+ assert !endOfSlice();
+ assert upto <= limit;
+ if (upto == limit) nextSlice();
+ return buffer[upto++];
+ }
+
+ private void nextSlice() {
+ // Skip to our next slice
+ final int nextIndex = buffer[limit];
+ level = NEXT_LEVEL_ARRAY[level];
+ final int newSize = LEVEL_SIZE_ARRAY[level];
+
+ bufferUpto = nextIndex / INT_BLOCK_SIZE;
+ bufferOffset = bufferUpto * INT_BLOCK_SIZE;
+
+ buffer = slicedIntBlockPool.buffers[bufferUpto];
+ upto = nextIndex & INT_BLOCK_MASK;
+
+ if (nextIndex + newSize >= end) {
+ // We are advancing to the final slice
+ assert end - nextIndex > 0;
+ limit = end - bufferOffset;
+ } else {
+ // This is not the final slice (subtract 4 for the
+ // forwarding address at the end of this new slice)
+ limit = upto + newSize - 1;
+ }
+ }
+ }
+ }
private static final boolean DEBUG = false;
@@ -174,9 +378,8 @@ public class MemoryIndex {
private final boolean storePayloads;
private final ByteBlockPool byteBlockPool;
- private final IntBlockPool intBlockPool;
- // private final IntBlockPool.SliceReader postingsReader;
- private final IntBlockPool.SliceWriter postingsWriter;
+ private final SlicedIntBlockPool slicedIntBlockPool;
+ private final SlicedIntBlockPool.SliceWriter postingsWriter;
private final BytesRefArray payloadsBytesRefs; // non null only when storePayloads
private Counter bytesUsed;
@@ -237,19 +440,19 @@ public class MemoryIndex {
final int maxBufferedIntBlocks =
(int)
((maxReusedBytes - (maxBufferedByteBlocks * (long) ByteBlockPool.BYTE_BLOCK_SIZE))
- / (IntBlockPool.INT_BLOCK_SIZE * (long) Integer.BYTES));
+ / (SlicedIntBlockPool.INT_BLOCK_SIZE * (long) Integer.BYTES));
assert (maxBufferedByteBlocks * ByteBlockPool.BYTE_BLOCK_SIZE)
- + (maxBufferedIntBlocks * IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES)
+ + (maxBufferedIntBlocks * SlicedIntBlockPool.INT_BLOCK_SIZE * Integer.BYTES)
<= maxReusedBytes;
byteBlockPool =
new ByteBlockPool(
new RecyclingByteBlockAllocator(
ByteBlockPool.BYTE_BLOCK_SIZE, maxBufferedByteBlocks, bytesUsed));
- intBlockPool =
- new IntBlockPool(
+ slicedIntBlockPool =
+ new SlicedIntBlockPool(
new RecyclingIntBlockAllocator(
- IntBlockPool.INT_BLOCK_SIZE, maxBufferedIntBlocks, bytesUsed));
- postingsWriter = new SliceWriter(intBlockPool);
+ SlicedIntBlockPool.INT_BLOCK_SIZE, maxBufferedIntBlocks, bytesUsed));
+ postingsWriter = new SlicedIntBlockPool.SliceWriter(slicedIntBlockPool);
// TODO refactor BytesRefArray to allow us to apply maxReusedBytes option
payloadsBytesRefs = storePayloads ? new BytesRefArray(bytesUsed) : null;
}
@@ -842,7 +1045,8 @@ public class MemoryIndex {
result.append(fieldName).append(":\n");
SliceByteStartArray sliceArray = info.sliceArray;
int numPositions = 0;
- SliceReader postingsReader = new SliceReader(intBlockPool);
+ SlicedIntBlockPool.SliceReader postingsReader =
+ new SlicedIntBlockPool.SliceReader(slicedIntBlockPool);
for (int j = 0; j < info.terms.size(); j++) {
int ord = info.sortedTerms[j];
info.terms.get(ord, spare);
@@ -1652,7 +1856,7 @@ public class MemoryIndex {
private class MemoryPostingsEnum extends PostingsEnum {
- private final SliceReader sliceReader;
+ private final SlicedIntBlockPool.SliceReader sliceReader;
private int posUpto; // for assert
private boolean hasNext;
private int doc = -1;
@@ -1663,7 +1867,7 @@ public class MemoryIndex {
private final BytesRefBuilder payloadBuilder; // only non-null when storePayloads
public MemoryPostingsEnum() {
- this.sliceReader = new SliceReader(intBlockPool);
+ this.sliceReader = new SlicedIntBlockPool.SliceReader(slicedIntBlockPool);
this.payloadBuilder = storePayloads ? new BytesRefBuilder() : null;
}
@@ -1942,7 +2146,7 @@ public class MemoryIndex {
fields.clear();
this.normSimilarity = IndexSearcher.getDefaultSimilarity();
byteBlockPool.reset(false, false); // no need to 0-fill the buffers
- intBlockPool.reset(true, false); // here must must 0-fill since we use slices
+ slicedIntBlockPool.reset(true, false); // here must must 0-fill since we use slices
if (payloadsBytesRefs != null) {
payloadsBytesRefs.clear();
}
diff --git a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
index ac2e4148a27..55712f6b890 100644
--- a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
+++ b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
@@ -260,6 +260,7 @@ public class TestMemoryIndex extends LuceneTestCase {
assertThat(mi.search(new TermQuery(new Term("field2", "text"))), is(0.0f));
assertThat(mi.search(new TermQuery(new Term("field2", "untokenized text"))), not(0.0f));
+ assertThat(mi.search(new TermQuery(new Term("field1", "some more text"))), is(0.0f));
assertThat(mi.search(new PhraseQuery("field1", "some", "more", "text")), not(0.0f));
assertThat(mi.search(new PhraseQuery("field1", "some", "text")), not(0.0f));
assertThat(mi.search(new PhraseQuery("field1", "text", "some")), is(0.0f));
diff --git a/lucene/memory/src/test/org/apache/lucene/index/memory/TestSlicedIntBlockPool.java b/lucene/memory/src/test/org/apache/lucene/index/memory/TestSlicedIntBlockPool.java
new file mode 100644
index 00000000000..d8e7c420fea
--- /dev/null
+++ b/lucene/memory/src/test/org/apache/lucene/index/memory/TestSlicedIntBlockPool.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index.memory;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.IntBlockPool;
+
+public class TestSlicedIntBlockPool extends LuceneTestCase {
+ public void testSingleWriterReader() {
+ Counter bytesUsed = Counter.newCounter();
+ MemoryIndex.SlicedIntBlockPool slicedIntBlockPool =
+ new MemoryIndex.SlicedIntBlockPool(new ByteTrackingAllocator(bytesUsed));
+
+ for (int j = 0; j < 2; j++) {
+ MemoryIndex.SlicedIntBlockPool.SliceWriter writer =
+ new MemoryIndex.SlicedIntBlockPool.SliceWriter(slicedIntBlockPool);
+ int start = writer.startNewSlice();
+ int num = atLeast(100);
+ for (int i = 0; i < num; i++) {
+ writer.writeInt(i);
+ }
+
+ int upto = writer.getCurrentOffset();
+ MemoryIndex.SlicedIntBlockPool.SliceReader reader =
+ new MemoryIndex.SlicedIntBlockPool.SliceReader(slicedIntBlockPool);
+ reader.reset(start, upto);
+ for (int i = 0; i < num; i++) {
+ assertEquals(i, reader.readInt());
+ }
+ assertTrue(reader.endOfSlice());
+ if (random().nextBoolean()) {
+ slicedIntBlockPool.reset(true, false);
+ assertEquals(0, bytesUsed.get());
+ } else {
+ slicedIntBlockPool.reset(true, true);
+ assertEquals(IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES, bytesUsed.get());
+ }
+ }
+ }
+
+ public void testMultipleWriterReader() {
+ Counter bytesUsed = Counter.newCounter();
+ MemoryIndex.SlicedIntBlockPool slicedIntBlockPool =
+ new MemoryIndex.SlicedIntBlockPool(new ByteTrackingAllocator(bytesUsed));
+ for (int j = 0; j < 2; j++) {
+ List holders = new ArrayList<>();
+ int num = atLeast(4);
+ for (int i = 0; i < num; i++) {
+ holders.add(new StartEndAndValues(random().nextInt(1000)));
+ }
+ MemoryIndex.SlicedIntBlockPool.SliceWriter writer =
+ new MemoryIndex.SlicedIntBlockPool.SliceWriter(slicedIntBlockPool);
+ MemoryIndex.SlicedIntBlockPool.SliceReader reader =
+ new MemoryIndex.SlicedIntBlockPool.SliceReader(slicedIntBlockPool);
+
+ int numValuesToWrite = atLeast(10000);
+ for (int i = 0; i < numValuesToWrite; i++) {
+ StartEndAndValues values = holders.get(random().nextInt(holders.size()));
+ if (values.valueCount == 0) {
+ values.start = writer.startNewSlice();
+ } else {
+ writer.reset(values.end);
+ }
+ writer.writeInt(values.nextValue());
+ values.end = writer.getCurrentOffset();
+ if (random().nextInt(5) == 0) {
+ // pick one and reader the ints
+ assertReader(reader, holders.get(random().nextInt(holders.size())));
+ }
+ }
+
+ while (!holders.isEmpty()) {
+ StartEndAndValues values = holders.remove(random().nextInt(holders.size()));
+ assertReader(reader, values);
+ }
+ if (random().nextBoolean()) {
+ slicedIntBlockPool.reset(true, false);
+ assertEquals(0, bytesUsed.get());
+ } else {
+ slicedIntBlockPool.reset(true, true);
+ assertEquals(IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES, bytesUsed.get());
+ }
+ }
+ }
+
+ private static class ByteTrackingAllocator extends IntBlockPool.Allocator {
+ private final Counter bytesUsed;
+
+ public ByteTrackingAllocator(Counter bytesUsed) {
+ this(IntBlockPool.INT_BLOCK_SIZE, bytesUsed);
+ }
+
+ public ByteTrackingAllocator(int blockSize, Counter bytesUsed) {
+ super(blockSize);
+ this.bytesUsed = bytesUsed;
+ }
+
+ @Override
+ public int[] getIntBlock() {
+ bytesUsed.addAndGet(blockSize * Integer.BYTES);
+ return new int[blockSize];
+ }
+
+ @Override
+ public void recycleIntBlocks(int[][] blocks, int start, int end) {
+ bytesUsed.addAndGet(-((end - start) * blockSize * Integer.BYTES));
+ }
+ }
+
+ private void assertReader(
+ MemoryIndex.SlicedIntBlockPool.SliceReader reader, StartEndAndValues values) {
+ reader.reset(values.start, values.end);
+ for (int i = 0; i < values.valueCount; i++) {
+ assertEquals(values.valueOffset + i, reader.readInt());
+ }
+ assertTrue(reader.endOfSlice());
+ }
+
+ private static class StartEndAndValues {
+ int valueOffset;
+ int valueCount;
+ int start;
+ int end;
+
+ public StartEndAndValues(int valueOffset) {
+ this.valueOffset = valueOffset;
+ }
+
+ public int nextValue() {
+ return valueOffset + valueCount++;
+ }
+ }
+}