mirror of https://github.com/apache/lucene.git
* [WIP] Move IntBlockPool slices to MemoryIndex * [WIP] Working TestMemoryIndex * [WIP} Working TestSlicedIntBlockPool * Working many allocations tests * Add basic IntBlockPool test * SlicedIntBlockPool inherits from IntBlockPool * Tidy
This commit is contained in:
parent
d03c8f16d9
commit
b4619d87ed
|
@ -120,7 +120,9 @@ Other
|
|||
|
||||
API Changes
|
||||
---------------------
|
||||
(No changes)
|
||||
|
||||
* GITHUB#11248: IntBlockPool's SliceReader, SliceWriter, and all int slice functionality are moved out to MemoryIndex.
|
||||
(Stefan Vodita)
|
||||
|
||||
New Features
|
||||
---------------------
|
||||
|
@ -151,7 +153,7 @@ Optimizations
|
|||
Bug Fixes
|
||||
---------------------
|
||||
|
||||
* GITHUB#9660: Throw and ArithmeticException when the offset overflows in a ByteBlockPool. (Stefan Vodita)
|
||||
* GITHUB#9660: Throw an ArithmeticException when the offset overflows in a ByteBlockPool. (Stefan Vodita)
|
||||
|
||||
* GITHUB#12388: JoinUtil queries were ignoring boosts. (Alan Woodward)
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Arrays;
|
|||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class IntBlockPool {
|
||||
public class IntBlockPool {
|
||||
public static final int INT_BLOCK_SHIFT = 13;
|
||||
public static final int INT_BLOCK_SIZE = 1 << INT_BLOCK_SHIFT;
|
||||
public static final int INT_BLOCK_MASK = INT_BLOCK_SIZE - 1;
|
||||
|
@ -101,8 +101,7 @@ public final class IntBlockPool {
|
|||
/**
|
||||
* Expert: Resets the pool to its initial state reusing the first buffer.
|
||||
*
|
||||
* @param zeroFillBuffers if <code>true</code> the buffers are filled with <code>0</code>. This
|
||||
* should be set to <code>true</code> if this pool is used with {@link SliceWriter}.
|
||||
* @param zeroFillBuffers if <code>true</code> the buffers are filled with <code>0</code>.
|
||||
* @param reuseFirst if <code>true</code> the first buffer will be reused and calling {@link
|
||||
* IntBlockPool#nextBuffer()} is not needed after reset iff the block pool was used before ie.
|
||||
* {@link IntBlockPool#nextBuffer()} was called before.
|
||||
|
@ -156,206 +155,6 @@ public final class IntBlockPool {
|
|||
bufferUpto++;
|
||||
|
||||
intUpto = 0;
|
||||
intOffset += INT_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new int slice with the given starting size and returns the slices offset in the pool.
|
||||
*
|
||||
* @see SliceReader
|
||||
*/
|
||||
private int newSlice(final int size) {
|
||||
if (intUpto > INT_BLOCK_SIZE - size) {
|
||||
nextBuffer();
|
||||
assert assertSliceBuffer(buffer);
|
||||
}
|
||||
|
||||
final int upto = intUpto;
|
||||
intUpto += size;
|
||||
buffer[intUpto - 1] = 16;
|
||||
return upto;
|
||||
}
|
||||
|
||||
private static boolean assertSliceBuffer(int[] buffer) {
|
||||
int count = 0;
|
||||
for (int i = 0; i < buffer.length; i++) {
|
||||
count += buffer[i]; // for slices the buffer must only have 0 values
|
||||
}
|
||||
return count == 0;
|
||||
}
|
||||
|
||||
// no need to make this public unless we support different sizes
|
||||
|
||||
/**
|
||||
* An array holding the offset into the {@link IntBlockPool#LEVEL_SIZE_ARRAY} to quickly navigate
|
||||
* to the next slice level.
|
||||
*/
|
||||
private static final int[] NEXT_LEVEL_ARRAY = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9};
|
||||
|
||||
/** An array holding the level sizes for int slices. */
|
||||
private static final int[] LEVEL_SIZE_ARRAY = {2, 4, 8, 16, 16, 32, 32, 64, 64, 128};
|
||||
|
||||
/** The first level size for new slices */
|
||||
private static final int FIRST_LEVEL_SIZE = LEVEL_SIZE_ARRAY[0];
|
||||
|
||||
/** Allocates a new slice from the given offset */
|
||||
private int allocSlice(final int[] slice, final int sliceOffset) {
|
||||
final int level = slice[sliceOffset] & 15;
|
||||
final int newLevel = NEXT_LEVEL_ARRAY[level];
|
||||
final int newSize = LEVEL_SIZE_ARRAY[newLevel];
|
||||
// Maybe allocate another block
|
||||
if (intUpto > INT_BLOCK_SIZE - newSize) {
|
||||
nextBuffer();
|
||||
assert assertSliceBuffer(buffer);
|
||||
}
|
||||
|
||||
final int newUpto = intUpto;
|
||||
final int offset = newUpto + intOffset;
|
||||
intUpto += newSize;
|
||||
// Write forwarding address at end of last slice:
|
||||
slice[sliceOffset] = offset;
|
||||
|
||||
// Write new level:
|
||||
buffer[intUpto - 1] = 16 | newLevel;
|
||||
|
||||
return newUpto;
|
||||
}
|
||||
|
||||
/**
|
||||
* A {@link SliceWriter} that allows to write multiple integer slices into a given {@link
|
||||
* IntBlockPool}.
|
||||
*
|
||||
* @see SliceReader
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static class SliceWriter {
|
||||
|
||||
private int offset;
|
||||
private final IntBlockPool pool;
|
||||
|
||||
public SliceWriter(IntBlockPool pool) {
|
||||
this.pool = pool;
|
||||
}
|
||||
/** */
|
||||
public void reset(int sliceOffset) {
|
||||
this.offset = sliceOffset;
|
||||
}
|
||||
|
||||
/** Writes the given value into the slice and resizes the slice if needed */
|
||||
public void writeInt(int value) {
|
||||
int[] ints = pool.buffers[offset >> INT_BLOCK_SHIFT];
|
||||
assert ints != null;
|
||||
int relativeOffset = offset & INT_BLOCK_MASK;
|
||||
if (ints[relativeOffset] != 0) {
|
||||
// End of slice; allocate a new one
|
||||
relativeOffset = pool.allocSlice(ints, relativeOffset);
|
||||
ints = pool.buffer;
|
||||
offset = relativeOffset + pool.intOffset;
|
||||
}
|
||||
ints[relativeOffset] = value;
|
||||
offset++;
|
||||
}
|
||||
|
||||
/**
|
||||
* starts a new slice and returns the start offset. The returned value should be used as the
|
||||
* start offset to initialize a {@link SliceReader}.
|
||||
*/
|
||||
public int startNewSlice() {
|
||||
return offset = pool.newSlice(FIRST_LEVEL_SIZE) + pool.intOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the offset of the currently written slice. The returned value should be used as the
|
||||
* end offset to initialize a {@link SliceReader} once this slice is fully written or to reset
|
||||
* the this writer if another slice needs to be written.
|
||||
*/
|
||||
public int getCurrentOffset() {
|
||||
return offset;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A {@link SliceReader} that can read int slices written by a {@link SliceWriter}
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static final class SliceReader {
|
||||
|
||||
private final IntBlockPool pool;
|
||||
private int upto;
|
||||
private int bufferUpto;
|
||||
private int bufferOffset;
|
||||
private int[] buffer;
|
||||
private int limit;
|
||||
private int level;
|
||||
private int end;
|
||||
|
||||
/** Creates a new {@link SliceReader} on the given pool */
|
||||
public SliceReader(IntBlockPool pool) {
|
||||
this.pool = pool;
|
||||
}
|
||||
|
||||
/** Resets the reader to a slice give the slices absolute start and end offset in the pool */
|
||||
public void reset(int startOffset, int endOffset) {
|
||||
bufferUpto = startOffset / INT_BLOCK_SIZE;
|
||||
bufferOffset = bufferUpto * INT_BLOCK_SIZE;
|
||||
this.end = endOffset;
|
||||
level = 0;
|
||||
|
||||
buffer = pool.buffers[bufferUpto];
|
||||
upto = startOffset & INT_BLOCK_MASK;
|
||||
|
||||
final int firstSize = IntBlockPool.LEVEL_SIZE_ARRAY[0];
|
||||
if (startOffset + firstSize >= endOffset) {
|
||||
// There is only this one slice to read
|
||||
limit = endOffset & INT_BLOCK_MASK;
|
||||
} else {
|
||||
limit = upto + firstSize - 1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns <code>true</code> iff the current slice is fully read. If this method returns <code>
|
||||
* true</code> {@link SliceReader#readInt()} should not be called again on this slice.
|
||||
*/
|
||||
public boolean endOfSlice() {
|
||||
assert upto + bufferOffset <= end;
|
||||
return upto + bufferOffset == end;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the next int from the current slice and returns it.
|
||||
*
|
||||
* @see SliceReader#endOfSlice()
|
||||
*/
|
||||
public int readInt() {
|
||||
assert !endOfSlice();
|
||||
assert upto <= limit;
|
||||
if (upto == limit) nextSlice();
|
||||
return buffer[upto++];
|
||||
}
|
||||
|
||||
private void nextSlice() {
|
||||
// Skip to our next slice
|
||||
final int nextIndex = buffer[limit];
|
||||
level = NEXT_LEVEL_ARRAY[level];
|
||||
final int newSize = LEVEL_SIZE_ARRAY[level];
|
||||
|
||||
bufferUpto = nextIndex / INT_BLOCK_SIZE;
|
||||
bufferOffset = bufferUpto * INT_BLOCK_SIZE;
|
||||
|
||||
buffer = pool.buffers[bufferUpto];
|
||||
upto = nextIndex & INT_BLOCK_MASK;
|
||||
|
||||
if (nextIndex + newSize >= end) {
|
||||
// We are advancing to the final slice
|
||||
assert end - nextIndex > 0;
|
||||
limit = end - bufferOffset;
|
||||
} else {
|
||||
// This is not the final slice (subtract 4 for the
|
||||
// forwarding address at the end of this new slice)
|
||||
limit = upto + newSize - 1;
|
||||
}
|
||||
}
|
||||
intOffset = Math.addExact(intOffset, INT_BLOCK_SIZE);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,130 +16,76 @@
|
|||
*/
|
||||
package org.apache.lucene.index;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import static org.apache.lucene.util.IntBlockPool.INT_BLOCK_SIZE;
|
||||
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.IntBlockPool;
|
||||
|
||||
/** tests basic {@link IntBlockPool} functionality */
|
||||
public class TestIntBlockPool extends LuceneTestCase {
|
||||
public void testWriteReadReset() {
|
||||
IntBlockPool pool = new IntBlockPool(new IntBlockPool.DirectAllocator());
|
||||
pool.nextBuffer();
|
||||
|
||||
public void testSingleWriterReader() {
|
||||
Counter bytesUsed = Counter.newCounter();
|
||||
IntBlockPool pool = new IntBlockPool(new ByteTrackingAllocator(bytesUsed));
|
||||
|
||||
for (int j = 0; j < 2; j++) {
|
||||
IntBlockPool.SliceWriter writer = new IntBlockPool.SliceWriter(pool);
|
||||
int start = writer.startNewSlice();
|
||||
int num = atLeast(100);
|
||||
for (int i = 0; i < num; i++) {
|
||||
writer.writeInt(i);
|
||||
// Write <count> consecutive ints to the buffer, possibly allocating a new buffer
|
||||
int count = random().nextInt(2 * INT_BLOCK_SIZE);
|
||||
for (int i = 0; i < count; i++) {
|
||||
if (pool.intUpto == INT_BLOCK_SIZE) {
|
||||
pool.nextBuffer();
|
||||
}
|
||||
pool.buffer[pool.intUpto++] = i;
|
||||
}
|
||||
|
||||
int upto = writer.getCurrentOffset();
|
||||
IntBlockPool.SliceReader reader = new IntBlockPool.SliceReader(pool);
|
||||
reader.reset(start, upto);
|
||||
for (int i = 0; i < num; i++) {
|
||||
assertEquals(i, reader.readInt());
|
||||
// Check that all the ints are present in th buffer pool
|
||||
for (int i = 0; i < count; i++) {
|
||||
assertEquals(i, pool.buffers[i / INT_BLOCK_SIZE][i % INT_BLOCK_SIZE]);
|
||||
}
|
||||
assertTrue(reader.endOfSlice());
|
||||
if (random().nextBoolean()) {
|
||||
pool.reset(true, false);
|
||||
assertEquals(0, bytesUsed.get());
|
||||
} else {
|
||||
pool.reset(true, true);
|
||||
assertEquals(IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES, bytesUsed.get());
|
||||
|
||||
// Reset without filling with zeros and check that the first buffer still has the ints
|
||||
count = Math.min(count, INT_BLOCK_SIZE);
|
||||
pool.reset(false, true);
|
||||
for (int i = 0; i < count; i++) {
|
||||
assertEquals(i, pool.buffers[0][i]);
|
||||
}
|
||||
|
||||
// Reset and fill with zeros, then check there is no data left
|
||||
pool.intUpto = count;
|
||||
pool.reset();
|
||||
for (int i = 0; i < count; i++) {
|
||||
assertEquals(0, pool.buffers[0][i]);
|
||||
}
|
||||
}
|
||||
|
||||
public void testMultipleWriterReader() {
|
||||
Counter bytesUsed = Counter.newCounter();
|
||||
IntBlockPool pool = new IntBlockPool(new ByteTrackingAllocator(bytesUsed));
|
||||
for (int j = 0; j < 2; j++) {
|
||||
List<StartEndAndValues> holders = new ArrayList<>();
|
||||
int num = atLeast(4);
|
||||
for (int i = 0; i < num; i++) {
|
||||
holders.add(new StartEndAndValues(random().nextInt(1000)));
|
||||
}
|
||||
IntBlockPool.SliceWriter writer = new IntBlockPool.SliceWriter(pool);
|
||||
IntBlockPool.SliceReader reader = new IntBlockPool.SliceReader(pool);
|
||||
public void testTooManyAllocs() {
|
||||
// Use a mock allocator that doesn't waste memory
|
||||
IntBlockPool pool =
|
||||
new IntBlockPool(
|
||||
new IntBlockPool.Allocator(0) {
|
||||
final int[] buffer = new int[0];
|
||||
|
||||
int numValuesToWrite = atLeast(10000);
|
||||
for (int i = 0; i < numValuesToWrite; i++) {
|
||||
StartEndAndValues values = holders.get(random().nextInt(holders.size()));
|
||||
if (values.valueCount == 0) {
|
||||
values.start = writer.startNewSlice();
|
||||
} else {
|
||||
writer.reset(values.end);
|
||||
}
|
||||
writer.writeInt(values.nextValue());
|
||||
values.end = writer.getCurrentOffset();
|
||||
if (random().nextInt(5) == 0) {
|
||||
// pick one and reader the ints
|
||||
assertReader(reader, holders.get(random().nextInt(holders.size())));
|
||||
}
|
||||
}
|
||||
|
||||
while (!holders.isEmpty()) {
|
||||
StartEndAndValues values = holders.remove(random().nextInt(holders.size()));
|
||||
assertReader(reader, values);
|
||||
}
|
||||
if (random().nextBoolean()) {
|
||||
pool.reset(true, false);
|
||||
assertEquals(0, bytesUsed.get());
|
||||
} else {
|
||||
pool.reset(true, true);
|
||||
assertEquals(IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES, bytesUsed.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static class ByteTrackingAllocator extends IntBlockPool.Allocator {
|
||||
private final Counter bytesUsed;
|
||||
|
||||
public ByteTrackingAllocator(Counter bytesUsed) {
|
||||
this(IntBlockPool.INT_BLOCK_SIZE, bytesUsed);
|
||||
}
|
||||
|
||||
public ByteTrackingAllocator(int blockSize, Counter bytesUsed) {
|
||||
super(blockSize);
|
||||
this.bytesUsed = bytesUsed;
|
||||
}
|
||||
@Override
|
||||
public void recycleIntBlocks(int[][] blocks, int start, int end) {}
|
||||
|
||||
@Override
|
||||
public int[] getIntBlock() {
|
||||
bytesUsed.addAndGet(blockSize * Integer.BYTES);
|
||||
return new int[blockSize];
|
||||
return buffer;
|
||||
}
|
||||
});
|
||||
pool.nextBuffer();
|
||||
|
||||
@Override
|
||||
public void recycleIntBlocks(int[][] blocks, int start, int end) {
|
||||
bytesUsed.addAndGet(-((end - start) * blockSize * Integer.BYTES));
|
||||
boolean throwsException = false;
|
||||
for (int i = 0; i < Integer.MAX_VALUE / INT_BLOCK_SIZE + 1; i++) {
|
||||
try {
|
||||
pool.nextBuffer();
|
||||
} catch (
|
||||
@SuppressWarnings("unused")
|
||||
ArithmeticException ignored) {
|
||||
// The offset overflows on the last attempt to call nextBuffer()
|
||||
throwsException = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private void assertReader(IntBlockPool.SliceReader reader, StartEndAndValues values) {
|
||||
reader.reset(values.start, values.end);
|
||||
for (int i = 0; i < values.valueCount; i++) {
|
||||
assertEquals(values.valueOffset + i, reader.readInt());
|
||||
}
|
||||
assertTrue(reader.endOfSlice());
|
||||
}
|
||||
|
||||
private static class StartEndAndValues {
|
||||
int valueOffset;
|
||||
int valueCount;
|
||||
int start;
|
||||
int end;
|
||||
|
||||
public StartEndAndValues(int valueOffset) {
|
||||
this.valueOffset = valueOffset;
|
||||
}
|
||||
|
||||
public int nextValue() {
|
||||
return valueOffset + valueCount++;
|
||||
}
|
||||
assertTrue(throwsException);
|
||||
assertTrue(pool.intOffset + INT_BLOCK_SIZE < pool.intOffset);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -153,6 +153,7 @@ public class TestByteBlockPool extends LuceneTestCase {
|
|||
});
|
||||
pool.nextBuffer();
|
||||
|
||||
boolean throwsException = false;
|
||||
for (int i = 0; i < Integer.MAX_VALUE / ByteBlockPool.BYTE_BLOCK_SIZE + 1; i++) {
|
||||
try {
|
||||
pool.nextBuffer();
|
||||
|
@ -160,9 +161,11 @@ public class TestByteBlockPool extends LuceneTestCase {
|
|||
@SuppressWarnings("unused")
|
||||
ArithmeticException ignored) {
|
||||
// The offset overflows on the last attempt to call nextBuffer()
|
||||
throwsException = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
assertTrue(pool.byteOffset + ByteBlockPool.BYTE_BLOCK_SIZE < 0);
|
||||
assertTrue(throwsException);
|
||||
assertTrue(pool.byteOffset + ByteBlockPool.BYTE_BLOCK_SIZE < pool.byteOffset);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -57,8 +57,6 @@ import org.apache.lucene.util.BytesRefHash;
|
|||
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.IntBlockPool;
|
||||
import org.apache.lucene.util.IntBlockPool.SliceReader;
|
||||
import org.apache.lucene.util.IntBlockPool.SliceWriter;
|
||||
import org.apache.lucene.util.RecyclingByteBlockAllocator;
|
||||
import org.apache.lucene.util.RecyclingIntBlockAllocator;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
@ -164,6 +162,212 @@ import org.apache.lucene.util.Version;
|
|||
* </a>).
|
||||
*/
|
||||
public class MemoryIndex {
|
||||
static class SlicedIntBlockPool extends IntBlockPool {
|
||||
/**
|
||||
* An array holding the offset into the {@link SlicedIntBlockPool#LEVEL_SIZE_ARRAY} to quickly
|
||||
* navigate to the next slice level.
|
||||
*/
|
||||
private static final int[] NEXT_LEVEL_ARRAY = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9};
|
||||
|
||||
/** An array holding the level sizes for int slices. */
|
||||
private static final int[] LEVEL_SIZE_ARRAY = {2, 4, 8, 16, 16, 32, 32, 64, 64, 128};
|
||||
|
||||
/** The first level size for new slices */
|
||||
private static final int FIRST_LEVEL_SIZE = LEVEL_SIZE_ARRAY[0];
|
||||
|
||||
SlicedIntBlockPool(Allocator allocator) {
|
||||
super(allocator);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new int slice with the given starting size and returns the slices offset in the
|
||||
* pool.
|
||||
*
|
||||
* @see SliceReader
|
||||
*/
|
||||
private int newSlice(final int size) {
|
||||
if (intUpto > INT_BLOCK_SIZE - size) {
|
||||
nextBuffer();
|
||||
assert assertSliceBuffer(buffer);
|
||||
}
|
||||
|
||||
final int upto = intUpto;
|
||||
intUpto += size;
|
||||
buffer[intUpto - 1] = 16;
|
||||
return upto;
|
||||
}
|
||||
|
||||
private static boolean assertSliceBuffer(int[] buffer) {
|
||||
int count = 0;
|
||||
for (int i = 0; i < buffer.length; i++) {
|
||||
count += buffer[i]; // for slices the buffer must only have 0 values
|
||||
}
|
||||
return count == 0;
|
||||
}
|
||||
|
||||
/** Allocates a new slice from the given offset */
|
||||
private int allocSlice(final int[] slice, final int sliceOffset) {
|
||||
final int level = slice[sliceOffset] & 15;
|
||||
final int newLevel = NEXT_LEVEL_ARRAY[level];
|
||||
final int newSize = LEVEL_SIZE_ARRAY[newLevel];
|
||||
// Maybe allocate another block
|
||||
if (intUpto > INT_BLOCK_SIZE - newSize) {
|
||||
nextBuffer();
|
||||
assert assertSliceBuffer(buffer);
|
||||
}
|
||||
|
||||
final int newUpto = intUpto;
|
||||
final int offset = newUpto + intOffset;
|
||||
intUpto += newSize;
|
||||
// Write forwarding address at end of last slice:
|
||||
slice[sliceOffset] = offset;
|
||||
|
||||
// Write new level:
|
||||
buffer[intUpto - 1] = 16 | newLevel;
|
||||
|
||||
return newUpto;
|
||||
}
|
||||
|
||||
/**
|
||||
* A {@link SliceWriter} that allows to write multiple integer slices into a given {@link
|
||||
* IntBlockPool}.
|
||||
*
|
||||
* @see SliceReader
|
||||
* @lucene.internal
|
||||
*/
|
||||
static class SliceWriter {
|
||||
|
||||
private int offset;
|
||||
private final SlicedIntBlockPool slicedIntBlockPool;
|
||||
|
||||
public SliceWriter(SlicedIntBlockPool slicedIntBlockPool) {
|
||||
this.slicedIntBlockPool = slicedIntBlockPool;
|
||||
}
|
||||
/** */
|
||||
public void reset(int sliceOffset) {
|
||||
this.offset = sliceOffset;
|
||||
}
|
||||
|
||||
/** Writes the given value into the slice and resizes the slice if needed */
|
||||
public void writeInt(int value) {
|
||||
int[] ints = slicedIntBlockPool.buffers[offset >> INT_BLOCK_SHIFT];
|
||||
assert ints != null;
|
||||
int relativeOffset = offset & INT_BLOCK_MASK;
|
||||
if (ints[relativeOffset] != 0) {
|
||||
// End of slice; allocate a new one
|
||||
relativeOffset = slicedIntBlockPool.allocSlice(ints, relativeOffset);
|
||||
ints = slicedIntBlockPool.buffer;
|
||||
offset = relativeOffset + slicedIntBlockPool.intOffset;
|
||||
}
|
||||
ints[relativeOffset] = value;
|
||||
offset++;
|
||||
}
|
||||
|
||||
/**
|
||||
* starts a new slice and returns the start offset. The returned value should be used as the
|
||||
* start offset to initialize a {@link SliceReader}.
|
||||
*/
|
||||
public int startNewSlice() {
|
||||
return offset =
|
||||
slicedIntBlockPool.newSlice(FIRST_LEVEL_SIZE) + slicedIntBlockPool.intOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the offset of the currently written slice. The returned value should be used as the
|
||||
* end offset to initialize a {@link SliceReader} once this slice is fully written or to reset
|
||||
* the this writer if another slice needs to be written.
|
||||
*/
|
||||
public int getCurrentOffset() {
|
||||
return offset;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A {@link SliceReader} that can read int slices written by a {@link SliceWriter}
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
static class SliceReader {
|
||||
|
||||
private final SlicedIntBlockPool slicedIntBlockPool;
|
||||
private int upto;
|
||||
private int bufferUpto;
|
||||
private int bufferOffset;
|
||||
private int[] buffer;
|
||||
private int limit;
|
||||
private int level;
|
||||
private int end;
|
||||
|
||||
/** Creates a new {@link SliceReader} on the given pool */
|
||||
public SliceReader(SlicedIntBlockPool slicedIntBlockPool) {
|
||||
this.slicedIntBlockPool = slicedIntBlockPool;
|
||||
}
|
||||
|
||||
/** Resets the reader to a slice give the slices absolute start and end offset in the pool */
|
||||
public void reset(int startOffset, int endOffset) {
|
||||
bufferUpto = startOffset / INT_BLOCK_SIZE;
|
||||
bufferOffset = bufferUpto * INT_BLOCK_SIZE;
|
||||
this.end = endOffset;
|
||||
level = 0;
|
||||
|
||||
buffer = slicedIntBlockPool.buffers[bufferUpto];
|
||||
upto = startOffset & INT_BLOCK_MASK;
|
||||
|
||||
final int firstSize = LEVEL_SIZE_ARRAY[0];
|
||||
if (startOffset + firstSize >= endOffset) {
|
||||
// There is only this one slice to read
|
||||
limit = endOffset & INT_BLOCK_MASK;
|
||||
} else {
|
||||
limit = upto + firstSize - 1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns <code>true</code> iff the current slice is fully read. If this method returns
|
||||
* <code>
|
||||
* true</code> {@link SliceReader#readInt()} should not be called again on this slice.
|
||||
*/
|
||||
public boolean endOfSlice() {
|
||||
assert upto + bufferOffset <= end;
|
||||
return upto + bufferOffset == end;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the next int from the current slice and returns it.
|
||||
*
|
||||
* @see SliceReader#endOfSlice()
|
||||
*/
|
||||
public int readInt() {
|
||||
assert !endOfSlice();
|
||||
assert upto <= limit;
|
||||
if (upto == limit) nextSlice();
|
||||
return buffer[upto++];
|
||||
}
|
||||
|
||||
private void nextSlice() {
|
||||
// Skip to our next slice
|
||||
final int nextIndex = buffer[limit];
|
||||
level = NEXT_LEVEL_ARRAY[level];
|
||||
final int newSize = LEVEL_SIZE_ARRAY[level];
|
||||
|
||||
bufferUpto = nextIndex / INT_BLOCK_SIZE;
|
||||
bufferOffset = bufferUpto * INT_BLOCK_SIZE;
|
||||
|
||||
buffer = slicedIntBlockPool.buffers[bufferUpto];
|
||||
upto = nextIndex & INT_BLOCK_MASK;
|
||||
|
||||
if (nextIndex + newSize >= end) {
|
||||
// We are advancing to the final slice
|
||||
assert end - nextIndex > 0;
|
||||
limit = end - bufferOffset;
|
||||
} else {
|
||||
// This is not the final slice (subtract 4 for the
|
||||
// forwarding address at the end of this new slice)
|
||||
limit = upto + newSize - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static final boolean DEBUG = false;
|
||||
|
||||
|
@ -174,9 +378,8 @@ public class MemoryIndex {
|
|||
private final boolean storePayloads;
|
||||
|
||||
private final ByteBlockPool byteBlockPool;
|
||||
private final IntBlockPool intBlockPool;
|
||||
// private final IntBlockPool.SliceReader postingsReader;
|
||||
private final IntBlockPool.SliceWriter postingsWriter;
|
||||
private final SlicedIntBlockPool slicedIntBlockPool;
|
||||
private final SlicedIntBlockPool.SliceWriter postingsWriter;
|
||||
private final BytesRefArray payloadsBytesRefs; // non null only when storePayloads
|
||||
|
||||
private Counter bytesUsed;
|
||||
|
@ -237,19 +440,19 @@ public class MemoryIndex {
|
|||
final int maxBufferedIntBlocks =
|
||||
(int)
|
||||
((maxReusedBytes - (maxBufferedByteBlocks * (long) ByteBlockPool.BYTE_BLOCK_SIZE))
|
||||
/ (IntBlockPool.INT_BLOCK_SIZE * (long) Integer.BYTES));
|
||||
/ (SlicedIntBlockPool.INT_BLOCK_SIZE * (long) Integer.BYTES));
|
||||
assert (maxBufferedByteBlocks * ByteBlockPool.BYTE_BLOCK_SIZE)
|
||||
+ (maxBufferedIntBlocks * IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES)
|
||||
+ (maxBufferedIntBlocks * SlicedIntBlockPool.INT_BLOCK_SIZE * Integer.BYTES)
|
||||
<= maxReusedBytes;
|
||||
byteBlockPool =
|
||||
new ByteBlockPool(
|
||||
new RecyclingByteBlockAllocator(
|
||||
ByteBlockPool.BYTE_BLOCK_SIZE, maxBufferedByteBlocks, bytesUsed));
|
||||
intBlockPool =
|
||||
new IntBlockPool(
|
||||
slicedIntBlockPool =
|
||||
new SlicedIntBlockPool(
|
||||
new RecyclingIntBlockAllocator(
|
||||
IntBlockPool.INT_BLOCK_SIZE, maxBufferedIntBlocks, bytesUsed));
|
||||
postingsWriter = new SliceWriter(intBlockPool);
|
||||
SlicedIntBlockPool.INT_BLOCK_SIZE, maxBufferedIntBlocks, bytesUsed));
|
||||
postingsWriter = new SlicedIntBlockPool.SliceWriter(slicedIntBlockPool);
|
||||
// TODO refactor BytesRefArray to allow us to apply maxReusedBytes option
|
||||
payloadsBytesRefs = storePayloads ? new BytesRefArray(bytesUsed) : null;
|
||||
}
|
||||
|
@ -842,7 +1045,8 @@ public class MemoryIndex {
|
|||
result.append(fieldName).append(":\n");
|
||||
SliceByteStartArray sliceArray = info.sliceArray;
|
||||
int numPositions = 0;
|
||||
SliceReader postingsReader = new SliceReader(intBlockPool);
|
||||
SlicedIntBlockPool.SliceReader postingsReader =
|
||||
new SlicedIntBlockPool.SliceReader(slicedIntBlockPool);
|
||||
for (int j = 0; j < info.terms.size(); j++) {
|
||||
int ord = info.sortedTerms[j];
|
||||
info.terms.get(ord, spare);
|
||||
|
@ -1652,7 +1856,7 @@ public class MemoryIndex {
|
|||
|
||||
private class MemoryPostingsEnum extends PostingsEnum {
|
||||
|
||||
private final SliceReader sliceReader;
|
||||
private final SlicedIntBlockPool.SliceReader sliceReader;
|
||||
private int posUpto; // for assert
|
||||
private boolean hasNext;
|
||||
private int doc = -1;
|
||||
|
@ -1663,7 +1867,7 @@ public class MemoryIndex {
|
|||
private final BytesRefBuilder payloadBuilder; // only non-null when storePayloads
|
||||
|
||||
public MemoryPostingsEnum() {
|
||||
this.sliceReader = new SliceReader(intBlockPool);
|
||||
this.sliceReader = new SlicedIntBlockPool.SliceReader(slicedIntBlockPool);
|
||||
this.payloadBuilder = storePayloads ? new BytesRefBuilder() : null;
|
||||
}
|
||||
|
||||
|
@ -1942,7 +2146,7 @@ public class MemoryIndex {
|
|||
fields.clear();
|
||||
this.normSimilarity = IndexSearcher.getDefaultSimilarity();
|
||||
byteBlockPool.reset(false, false); // no need to 0-fill the buffers
|
||||
intBlockPool.reset(true, false); // here must must 0-fill since we use slices
|
||||
slicedIntBlockPool.reset(true, false); // here must must 0-fill since we use slices
|
||||
if (payloadsBytesRefs != null) {
|
||||
payloadsBytesRefs.clear();
|
||||
}
|
||||
|
|
|
@ -260,6 +260,7 @@ public class TestMemoryIndex extends LuceneTestCase {
|
|||
assertThat(mi.search(new TermQuery(new Term("field2", "text"))), is(0.0f));
|
||||
assertThat(mi.search(new TermQuery(new Term("field2", "untokenized text"))), not(0.0f));
|
||||
|
||||
assertThat(mi.search(new TermQuery(new Term("field1", "some more text"))), is(0.0f));
|
||||
assertThat(mi.search(new PhraseQuery("field1", "some", "more", "text")), not(0.0f));
|
||||
assertThat(mi.search(new PhraseQuery("field1", "some", "text")), not(0.0f));
|
||||
assertThat(mi.search(new PhraseQuery("field1", "text", "some")), is(0.0f));
|
||||
|
|
|
@ -0,0 +1,150 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.index.memory;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.IntBlockPool;
|
||||
|
||||
public class TestSlicedIntBlockPool extends LuceneTestCase {
|
||||
public void testSingleWriterReader() {
|
||||
Counter bytesUsed = Counter.newCounter();
|
||||
MemoryIndex.SlicedIntBlockPool slicedIntBlockPool =
|
||||
new MemoryIndex.SlicedIntBlockPool(new ByteTrackingAllocator(bytesUsed));
|
||||
|
||||
for (int j = 0; j < 2; j++) {
|
||||
MemoryIndex.SlicedIntBlockPool.SliceWriter writer =
|
||||
new MemoryIndex.SlicedIntBlockPool.SliceWriter(slicedIntBlockPool);
|
||||
int start = writer.startNewSlice();
|
||||
int num = atLeast(100);
|
||||
for (int i = 0; i < num; i++) {
|
||||
writer.writeInt(i);
|
||||
}
|
||||
|
||||
int upto = writer.getCurrentOffset();
|
||||
MemoryIndex.SlicedIntBlockPool.SliceReader reader =
|
||||
new MemoryIndex.SlicedIntBlockPool.SliceReader(slicedIntBlockPool);
|
||||
reader.reset(start, upto);
|
||||
for (int i = 0; i < num; i++) {
|
||||
assertEquals(i, reader.readInt());
|
||||
}
|
||||
assertTrue(reader.endOfSlice());
|
||||
if (random().nextBoolean()) {
|
||||
slicedIntBlockPool.reset(true, false);
|
||||
assertEquals(0, bytesUsed.get());
|
||||
} else {
|
||||
slicedIntBlockPool.reset(true, true);
|
||||
assertEquals(IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES, bytesUsed.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testMultipleWriterReader() {
|
||||
Counter bytesUsed = Counter.newCounter();
|
||||
MemoryIndex.SlicedIntBlockPool slicedIntBlockPool =
|
||||
new MemoryIndex.SlicedIntBlockPool(new ByteTrackingAllocator(bytesUsed));
|
||||
for (int j = 0; j < 2; j++) {
|
||||
List<StartEndAndValues> holders = new ArrayList<>();
|
||||
int num = atLeast(4);
|
||||
for (int i = 0; i < num; i++) {
|
||||
holders.add(new StartEndAndValues(random().nextInt(1000)));
|
||||
}
|
||||
MemoryIndex.SlicedIntBlockPool.SliceWriter writer =
|
||||
new MemoryIndex.SlicedIntBlockPool.SliceWriter(slicedIntBlockPool);
|
||||
MemoryIndex.SlicedIntBlockPool.SliceReader reader =
|
||||
new MemoryIndex.SlicedIntBlockPool.SliceReader(slicedIntBlockPool);
|
||||
|
||||
int numValuesToWrite = atLeast(10000);
|
||||
for (int i = 0; i < numValuesToWrite; i++) {
|
||||
StartEndAndValues values = holders.get(random().nextInt(holders.size()));
|
||||
if (values.valueCount == 0) {
|
||||
values.start = writer.startNewSlice();
|
||||
} else {
|
||||
writer.reset(values.end);
|
||||
}
|
||||
writer.writeInt(values.nextValue());
|
||||
values.end = writer.getCurrentOffset();
|
||||
if (random().nextInt(5) == 0) {
|
||||
// pick one and reader the ints
|
||||
assertReader(reader, holders.get(random().nextInt(holders.size())));
|
||||
}
|
||||
}
|
||||
|
||||
while (!holders.isEmpty()) {
|
||||
StartEndAndValues values = holders.remove(random().nextInt(holders.size()));
|
||||
assertReader(reader, values);
|
||||
}
|
||||
if (random().nextBoolean()) {
|
||||
slicedIntBlockPool.reset(true, false);
|
||||
assertEquals(0, bytesUsed.get());
|
||||
} else {
|
||||
slicedIntBlockPool.reset(true, true);
|
||||
assertEquals(IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES, bytesUsed.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static class ByteTrackingAllocator extends IntBlockPool.Allocator {
|
||||
private final Counter bytesUsed;
|
||||
|
||||
public ByteTrackingAllocator(Counter bytesUsed) {
|
||||
this(IntBlockPool.INT_BLOCK_SIZE, bytesUsed);
|
||||
}
|
||||
|
||||
public ByteTrackingAllocator(int blockSize, Counter bytesUsed) {
|
||||
super(blockSize);
|
||||
this.bytesUsed = bytesUsed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] getIntBlock() {
|
||||
bytesUsed.addAndGet(blockSize * Integer.BYTES);
|
||||
return new int[blockSize];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void recycleIntBlocks(int[][] blocks, int start, int end) {
|
||||
bytesUsed.addAndGet(-((end - start) * blockSize * Integer.BYTES));
|
||||
}
|
||||
}
|
||||
|
||||
private void assertReader(
|
||||
MemoryIndex.SlicedIntBlockPool.SliceReader reader, StartEndAndValues values) {
|
||||
reader.reset(values.start, values.end);
|
||||
for (int i = 0; i < values.valueCount; i++) {
|
||||
assertEquals(values.valueOffset + i, reader.readInt());
|
||||
}
|
||||
assertTrue(reader.endOfSlice());
|
||||
}
|
||||
|
||||
private static class StartEndAndValues {
|
||||
int valueOffset;
|
||||
int valueCount;
|
||||
int start;
|
||||
int end;
|
||||
|
||||
public StartEndAndValues(int valueOffset) {
|
||||
this.valueOffset = valueOffset;
|
||||
}
|
||||
|
||||
public int nextValue() {
|
||||
return valueOffset + valueCount++;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue