Move sliced int buffer functionality to MemoryIndex (#11248) (#12409)

* [WIP] Move IntBlockPool slices to MemoryIndex

* [WIP] Working TestMemoryIndex

* [WIP} Working TestSlicedIntBlockPool

* Working many allocations tests

* Add basic IntBlockPool test

* SlicedIntBlockPool inherits from IntBlockPool

* Tidy
This commit is contained in:
Stefan Vodita 2023-07-10 15:18:28 +01:00 committed by GitHub
parent d03c8f16d9
commit b4619d87ed
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 437 additions and 332 deletions

View File

@ -120,7 +120,9 @@ Other
API Changes
---------------------
(No changes)
* GITHUB#11248: IntBlockPool's SliceReader, SliceWriter, and all int slice functionality are moved out to MemoryIndex.
(Stefan Vodita)
New Features
---------------------
@ -151,7 +153,7 @@ Optimizations
Bug Fixes
---------------------
* GITHUB#9660: Throw and ArithmeticException when the offset overflows in a ByteBlockPool. (Stefan Vodita)
* GITHUB#9660: Throw an ArithmeticException when the offset overflows in a ByteBlockPool. (Stefan Vodita)
* GITHUB#12388: JoinUtil queries were ignoring boosts. (Alan Woodward)

View File

@ -23,7 +23,7 @@ import java.util.Arrays;
*
* @lucene.internal
*/
public final class IntBlockPool {
public class IntBlockPool {
public static final int INT_BLOCK_SHIFT = 13;
public static final int INT_BLOCK_SIZE = 1 << INT_BLOCK_SHIFT;
public static final int INT_BLOCK_MASK = INT_BLOCK_SIZE - 1;
@ -101,8 +101,7 @@ public final class IntBlockPool {
/**
* Expert: Resets the pool to its initial state reusing the first buffer.
*
* @param zeroFillBuffers if <code>true</code> the buffers are filled with <code>0</code>. This
* should be set to <code>true</code> if this pool is used with {@link SliceWriter}.
* @param zeroFillBuffers if <code>true</code> the buffers are filled with <code>0</code>.
* @param reuseFirst if <code>true</code> the first buffer will be reused and calling {@link
* IntBlockPool#nextBuffer()} is not needed after reset iff the block pool was used before ie.
* {@link IntBlockPool#nextBuffer()} was called before.
@ -156,206 +155,6 @@ public final class IntBlockPool {
bufferUpto++;
intUpto = 0;
intOffset += INT_BLOCK_SIZE;
}
/**
* Creates a new int slice with the given starting size and returns the slices offset in the pool.
*
* @see SliceReader
*/
private int newSlice(final int size) {
if (intUpto > INT_BLOCK_SIZE - size) {
nextBuffer();
assert assertSliceBuffer(buffer);
}
final int upto = intUpto;
intUpto += size;
buffer[intUpto - 1] = 16;
return upto;
}
private static boolean assertSliceBuffer(int[] buffer) {
int count = 0;
for (int i = 0; i < buffer.length; i++) {
count += buffer[i]; // for slices the buffer must only have 0 values
}
return count == 0;
}
// no need to make this public unless we support different sizes
/**
* An array holding the offset into the {@link IntBlockPool#LEVEL_SIZE_ARRAY} to quickly navigate
* to the next slice level.
*/
private static final int[] NEXT_LEVEL_ARRAY = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9};
/** An array holding the level sizes for int slices. */
private static final int[] LEVEL_SIZE_ARRAY = {2, 4, 8, 16, 16, 32, 32, 64, 64, 128};
/** The first level size for new slices */
private static final int FIRST_LEVEL_SIZE = LEVEL_SIZE_ARRAY[0];
/** Allocates a new slice from the given offset */
private int allocSlice(final int[] slice, final int sliceOffset) {
final int level = slice[sliceOffset] & 15;
final int newLevel = NEXT_LEVEL_ARRAY[level];
final int newSize = LEVEL_SIZE_ARRAY[newLevel];
// Maybe allocate another block
if (intUpto > INT_BLOCK_SIZE - newSize) {
nextBuffer();
assert assertSliceBuffer(buffer);
}
final int newUpto = intUpto;
final int offset = newUpto + intOffset;
intUpto += newSize;
// Write forwarding address at end of last slice:
slice[sliceOffset] = offset;
// Write new level:
buffer[intUpto - 1] = 16 | newLevel;
return newUpto;
}
/**
* A {@link SliceWriter} that allows to write multiple integer slices into a given {@link
* IntBlockPool}.
*
* @see SliceReader
* @lucene.internal
*/
public static class SliceWriter {
private int offset;
private final IntBlockPool pool;
public SliceWriter(IntBlockPool pool) {
this.pool = pool;
}
/** */
public void reset(int sliceOffset) {
this.offset = sliceOffset;
}
/** Writes the given value into the slice and resizes the slice if needed */
public void writeInt(int value) {
int[] ints = pool.buffers[offset >> INT_BLOCK_SHIFT];
assert ints != null;
int relativeOffset = offset & INT_BLOCK_MASK;
if (ints[relativeOffset] != 0) {
// End of slice; allocate a new one
relativeOffset = pool.allocSlice(ints, relativeOffset);
ints = pool.buffer;
offset = relativeOffset + pool.intOffset;
}
ints[relativeOffset] = value;
offset++;
}
/**
* starts a new slice and returns the start offset. The returned value should be used as the
* start offset to initialize a {@link SliceReader}.
*/
public int startNewSlice() {
return offset = pool.newSlice(FIRST_LEVEL_SIZE) + pool.intOffset;
}
/**
* Returns the offset of the currently written slice. The returned value should be used as the
* end offset to initialize a {@link SliceReader} once this slice is fully written or to reset
* the this writer if another slice needs to be written.
*/
public int getCurrentOffset() {
return offset;
}
}
/**
* A {@link SliceReader} that can read int slices written by a {@link SliceWriter}
*
* @lucene.internal
*/
public static final class SliceReader {
private final IntBlockPool pool;
private int upto;
private int bufferUpto;
private int bufferOffset;
private int[] buffer;
private int limit;
private int level;
private int end;
/** Creates a new {@link SliceReader} on the given pool */
public SliceReader(IntBlockPool pool) {
this.pool = pool;
}
/** Resets the reader to a slice give the slices absolute start and end offset in the pool */
public void reset(int startOffset, int endOffset) {
bufferUpto = startOffset / INT_BLOCK_SIZE;
bufferOffset = bufferUpto * INT_BLOCK_SIZE;
this.end = endOffset;
level = 0;
buffer = pool.buffers[bufferUpto];
upto = startOffset & INT_BLOCK_MASK;
final int firstSize = IntBlockPool.LEVEL_SIZE_ARRAY[0];
if (startOffset + firstSize >= endOffset) {
// There is only this one slice to read
limit = endOffset & INT_BLOCK_MASK;
} else {
limit = upto + firstSize - 1;
}
}
/**
* Returns <code>true</code> iff the current slice is fully read. If this method returns <code>
* true</code> {@link SliceReader#readInt()} should not be called again on this slice.
*/
public boolean endOfSlice() {
assert upto + bufferOffset <= end;
return upto + bufferOffset == end;
}
/**
* Reads the next int from the current slice and returns it.
*
* @see SliceReader#endOfSlice()
*/
public int readInt() {
assert !endOfSlice();
assert upto <= limit;
if (upto == limit) nextSlice();
return buffer[upto++];
}
private void nextSlice() {
// Skip to our next slice
final int nextIndex = buffer[limit];
level = NEXT_LEVEL_ARRAY[level];
final int newSize = LEVEL_SIZE_ARRAY[level];
bufferUpto = nextIndex / INT_BLOCK_SIZE;
bufferOffset = bufferUpto * INT_BLOCK_SIZE;
buffer = pool.buffers[bufferUpto];
upto = nextIndex & INT_BLOCK_MASK;
if (nextIndex + newSize >= end) {
// We are advancing to the final slice
assert end - nextIndex > 0;
limit = end - bufferOffset;
} else {
// This is not the final slice (subtract 4 for the
// forwarding address at the end of this new slice)
limit = upto + newSize - 1;
}
}
intOffset = Math.addExact(intOffset, INT_BLOCK_SIZE);
}
}

View File

@ -16,130 +16,76 @@
*/
package org.apache.lucene.index;
import java.util.ArrayList;
import java.util.List;
import static org.apache.lucene.util.IntBlockPool.INT_BLOCK_SIZE;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IntBlockPool;
/** tests basic {@link IntBlockPool} functionality */
public class TestIntBlockPool extends LuceneTestCase {
public void testWriteReadReset() {
IntBlockPool pool = new IntBlockPool(new IntBlockPool.DirectAllocator());
pool.nextBuffer();
public void testSingleWriterReader() {
Counter bytesUsed = Counter.newCounter();
IntBlockPool pool = new IntBlockPool(new ByteTrackingAllocator(bytesUsed));
// Write <count> consecutive ints to the buffer, possibly allocating a new buffer
int count = random().nextInt(2 * INT_BLOCK_SIZE);
for (int i = 0; i < count; i++) {
if (pool.intUpto == INT_BLOCK_SIZE) {
pool.nextBuffer();
}
pool.buffer[pool.intUpto++] = i;
}
for (int j = 0; j < 2; j++) {
IntBlockPool.SliceWriter writer = new IntBlockPool.SliceWriter(pool);
int start = writer.startNewSlice();
int num = atLeast(100);
for (int i = 0; i < num; i++) {
writer.writeInt(i);
}
// Check that all the ints are present in th buffer pool
for (int i = 0; i < count; i++) {
assertEquals(i, pool.buffers[i / INT_BLOCK_SIZE][i % INT_BLOCK_SIZE]);
}
int upto = writer.getCurrentOffset();
IntBlockPool.SliceReader reader = new IntBlockPool.SliceReader(pool);
reader.reset(start, upto);
for (int i = 0; i < num; i++) {
assertEquals(i, reader.readInt());
}
assertTrue(reader.endOfSlice());
if (random().nextBoolean()) {
pool.reset(true, false);
assertEquals(0, bytesUsed.get());
} else {
pool.reset(true, true);
assertEquals(IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES, bytesUsed.get());
}
// Reset without filling with zeros and check that the first buffer still has the ints
count = Math.min(count, INT_BLOCK_SIZE);
pool.reset(false, true);
for (int i = 0; i < count; i++) {
assertEquals(i, pool.buffers[0][i]);
}
// Reset and fill with zeros, then check there is no data left
pool.intUpto = count;
pool.reset();
for (int i = 0; i < count; i++) {
assertEquals(0, pool.buffers[0][i]);
}
}
public void testMultipleWriterReader() {
Counter bytesUsed = Counter.newCounter();
IntBlockPool pool = new IntBlockPool(new ByteTrackingAllocator(bytesUsed));
for (int j = 0; j < 2; j++) {
List<StartEndAndValues> holders = new ArrayList<>();
int num = atLeast(4);
for (int i = 0; i < num; i++) {
holders.add(new StartEndAndValues(random().nextInt(1000)));
}
IntBlockPool.SliceWriter writer = new IntBlockPool.SliceWriter(pool);
IntBlockPool.SliceReader reader = new IntBlockPool.SliceReader(pool);
public void testTooManyAllocs() {
// Use a mock allocator that doesn't waste memory
IntBlockPool pool =
new IntBlockPool(
new IntBlockPool.Allocator(0) {
final int[] buffer = new int[0];
int numValuesToWrite = atLeast(10000);
for (int i = 0; i < numValuesToWrite; i++) {
StartEndAndValues values = holders.get(random().nextInt(holders.size()));
if (values.valueCount == 0) {
values.start = writer.startNewSlice();
} else {
writer.reset(values.end);
}
writer.writeInt(values.nextValue());
values.end = writer.getCurrentOffset();
if (random().nextInt(5) == 0) {
// pick one and reader the ints
assertReader(reader, holders.get(random().nextInt(holders.size())));
}
}
@Override
public void recycleIntBlocks(int[][] blocks, int start, int end) {}
while (!holders.isEmpty()) {
StartEndAndValues values = holders.remove(random().nextInt(holders.size()));
assertReader(reader, values);
}
if (random().nextBoolean()) {
pool.reset(true, false);
assertEquals(0, bytesUsed.get());
} else {
pool.reset(true, true);
assertEquals(IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES, bytesUsed.get());
@Override
public int[] getIntBlock() {
return buffer;
}
});
pool.nextBuffer();
boolean throwsException = false;
for (int i = 0; i < Integer.MAX_VALUE / INT_BLOCK_SIZE + 1; i++) {
try {
pool.nextBuffer();
} catch (
@SuppressWarnings("unused")
ArithmeticException ignored) {
// The offset overflows on the last attempt to call nextBuffer()
throwsException = true;
break;
}
}
}
private static class ByteTrackingAllocator extends IntBlockPool.Allocator {
private final Counter bytesUsed;
public ByteTrackingAllocator(Counter bytesUsed) {
this(IntBlockPool.INT_BLOCK_SIZE, bytesUsed);
}
public ByteTrackingAllocator(int blockSize, Counter bytesUsed) {
super(blockSize);
this.bytesUsed = bytesUsed;
}
@Override
public int[] getIntBlock() {
bytesUsed.addAndGet(blockSize * Integer.BYTES);
return new int[blockSize];
}
@Override
public void recycleIntBlocks(int[][] blocks, int start, int end) {
bytesUsed.addAndGet(-((end - start) * blockSize * Integer.BYTES));
}
}
private void assertReader(IntBlockPool.SliceReader reader, StartEndAndValues values) {
reader.reset(values.start, values.end);
for (int i = 0; i < values.valueCount; i++) {
assertEquals(values.valueOffset + i, reader.readInt());
}
assertTrue(reader.endOfSlice());
}
private static class StartEndAndValues {
int valueOffset;
int valueCount;
int start;
int end;
public StartEndAndValues(int valueOffset) {
this.valueOffset = valueOffset;
}
public int nextValue() {
return valueOffset + valueCount++;
}
assertTrue(throwsException);
assertTrue(pool.intOffset + INT_BLOCK_SIZE < pool.intOffset);
}
}

View File

@ -153,6 +153,7 @@ public class TestByteBlockPool extends LuceneTestCase {
});
pool.nextBuffer();
boolean throwsException = false;
for (int i = 0; i < Integer.MAX_VALUE / ByteBlockPool.BYTE_BLOCK_SIZE + 1; i++) {
try {
pool.nextBuffer();
@ -160,9 +161,11 @@ public class TestByteBlockPool extends LuceneTestCase {
@SuppressWarnings("unused")
ArithmeticException ignored) {
// The offset overflows on the last attempt to call nextBuffer()
throwsException = true;
break;
}
}
assertTrue(pool.byteOffset + ByteBlockPool.BYTE_BLOCK_SIZE < 0);
assertTrue(throwsException);
assertTrue(pool.byteOffset + ByteBlockPool.BYTE_BLOCK_SIZE < pool.byteOffset);
}
}

View File

@ -57,8 +57,6 @@ import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IntBlockPool;
import org.apache.lucene.util.IntBlockPool.SliceReader;
import org.apache.lucene.util.IntBlockPool.SliceWriter;
import org.apache.lucene.util.RecyclingByteBlockAllocator;
import org.apache.lucene.util.RecyclingIntBlockAllocator;
import org.apache.lucene.util.Version;
@ -164,6 +162,212 @@ import org.apache.lucene.util.Version;
* </a>).
*/
public class MemoryIndex {
static class SlicedIntBlockPool extends IntBlockPool {
/**
* An array holding the offset into the {@link SlicedIntBlockPool#LEVEL_SIZE_ARRAY} to quickly
* navigate to the next slice level.
*/
private static final int[] NEXT_LEVEL_ARRAY = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9};
/** An array holding the level sizes for int slices. */
private static final int[] LEVEL_SIZE_ARRAY = {2, 4, 8, 16, 16, 32, 32, 64, 64, 128};
/** The first level size for new slices */
private static final int FIRST_LEVEL_SIZE = LEVEL_SIZE_ARRAY[0];
SlicedIntBlockPool(Allocator allocator) {
super(allocator);
}
/**
* Creates a new int slice with the given starting size and returns the slices offset in the
* pool.
*
* @see SliceReader
*/
private int newSlice(final int size) {
if (intUpto > INT_BLOCK_SIZE - size) {
nextBuffer();
assert assertSliceBuffer(buffer);
}
final int upto = intUpto;
intUpto += size;
buffer[intUpto - 1] = 16;
return upto;
}
private static boolean assertSliceBuffer(int[] buffer) {
int count = 0;
for (int i = 0; i < buffer.length; i++) {
count += buffer[i]; // for slices the buffer must only have 0 values
}
return count == 0;
}
/** Allocates a new slice from the given offset */
private int allocSlice(final int[] slice, final int sliceOffset) {
final int level = slice[sliceOffset] & 15;
final int newLevel = NEXT_LEVEL_ARRAY[level];
final int newSize = LEVEL_SIZE_ARRAY[newLevel];
// Maybe allocate another block
if (intUpto > INT_BLOCK_SIZE - newSize) {
nextBuffer();
assert assertSliceBuffer(buffer);
}
final int newUpto = intUpto;
final int offset = newUpto + intOffset;
intUpto += newSize;
// Write forwarding address at end of last slice:
slice[sliceOffset] = offset;
// Write new level:
buffer[intUpto - 1] = 16 | newLevel;
return newUpto;
}
/**
* A {@link SliceWriter} that allows to write multiple integer slices into a given {@link
* IntBlockPool}.
*
* @see SliceReader
* @lucene.internal
*/
static class SliceWriter {
private int offset;
private final SlicedIntBlockPool slicedIntBlockPool;
public SliceWriter(SlicedIntBlockPool slicedIntBlockPool) {
this.slicedIntBlockPool = slicedIntBlockPool;
}
/** */
public void reset(int sliceOffset) {
this.offset = sliceOffset;
}
/** Writes the given value into the slice and resizes the slice if needed */
public void writeInt(int value) {
int[] ints = slicedIntBlockPool.buffers[offset >> INT_BLOCK_SHIFT];
assert ints != null;
int relativeOffset = offset & INT_BLOCK_MASK;
if (ints[relativeOffset] != 0) {
// End of slice; allocate a new one
relativeOffset = slicedIntBlockPool.allocSlice(ints, relativeOffset);
ints = slicedIntBlockPool.buffer;
offset = relativeOffset + slicedIntBlockPool.intOffset;
}
ints[relativeOffset] = value;
offset++;
}
/**
* starts a new slice and returns the start offset. The returned value should be used as the
* start offset to initialize a {@link SliceReader}.
*/
public int startNewSlice() {
return offset =
slicedIntBlockPool.newSlice(FIRST_LEVEL_SIZE) + slicedIntBlockPool.intOffset;
}
/**
* Returns the offset of the currently written slice. The returned value should be used as the
* end offset to initialize a {@link SliceReader} once this slice is fully written or to reset
* the this writer if another slice needs to be written.
*/
public int getCurrentOffset() {
return offset;
}
}
/**
* A {@link SliceReader} that can read int slices written by a {@link SliceWriter}
*
* @lucene.internal
*/
static class SliceReader {
private final SlicedIntBlockPool slicedIntBlockPool;
private int upto;
private int bufferUpto;
private int bufferOffset;
private int[] buffer;
private int limit;
private int level;
private int end;
/** Creates a new {@link SliceReader} on the given pool */
public SliceReader(SlicedIntBlockPool slicedIntBlockPool) {
this.slicedIntBlockPool = slicedIntBlockPool;
}
/** Resets the reader to a slice give the slices absolute start and end offset in the pool */
public void reset(int startOffset, int endOffset) {
bufferUpto = startOffset / INT_BLOCK_SIZE;
bufferOffset = bufferUpto * INT_BLOCK_SIZE;
this.end = endOffset;
level = 0;
buffer = slicedIntBlockPool.buffers[bufferUpto];
upto = startOffset & INT_BLOCK_MASK;
final int firstSize = LEVEL_SIZE_ARRAY[0];
if (startOffset + firstSize >= endOffset) {
// There is only this one slice to read
limit = endOffset & INT_BLOCK_MASK;
} else {
limit = upto + firstSize - 1;
}
}
/**
* Returns <code>true</code> iff the current slice is fully read. If this method returns
* <code>
* true</code> {@link SliceReader#readInt()} should not be called again on this slice.
*/
public boolean endOfSlice() {
assert upto + bufferOffset <= end;
return upto + bufferOffset == end;
}
/**
* Reads the next int from the current slice and returns it.
*
* @see SliceReader#endOfSlice()
*/
public int readInt() {
assert !endOfSlice();
assert upto <= limit;
if (upto == limit) nextSlice();
return buffer[upto++];
}
private void nextSlice() {
// Skip to our next slice
final int nextIndex = buffer[limit];
level = NEXT_LEVEL_ARRAY[level];
final int newSize = LEVEL_SIZE_ARRAY[level];
bufferUpto = nextIndex / INT_BLOCK_SIZE;
bufferOffset = bufferUpto * INT_BLOCK_SIZE;
buffer = slicedIntBlockPool.buffers[bufferUpto];
upto = nextIndex & INT_BLOCK_MASK;
if (nextIndex + newSize >= end) {
// We are advancing to the final slice
assert end - nextIndex > 0;
limit = end - bufferOffset;
} else {
// This is not the final slice (subtract 4 for the
// forwarding address at the end of this new slice)
limit = upto + newSize - 1;
}
}
}
}
private static final boolean DEBUG = false;
@ -174,9 +378,8 @@ public class MemoryIndex {
private final boolean storePayloads;
private final ByteBlockPool byteBlockPool;
private final IntBlockPool intBlockPool;
// private final IntBlockPool.SliceReader postingsReader;
private final IntBlockPool.SliceWriter postingsWriter;
private final SlicedIntBlockPool slicedIntBlockPool;
private final SlicedIntBlockPool.SliceWriter postingsWriter;
private final BytesRefArray payloadsBytesRefs; // non null only when storePayloads
private Counter bytesUsed;
@ -237,19 +440,19 @@ public class MemoryIndex {
final int maxBufferedIntBlocks =
(int)
((maxReusedBytes - (maxBufferedByteBlocks * (long) ByteBlockPool.BYTE_BLOCK_SIZE))
/ (IntBlockPool.INT_BLOCK_SIZE * (long) Integer.BYTES));
/ (SlicedIntBlockPool.INT_BLOCK_SIZE * (long) Integer.BYTES));
assert (maxBufferedByteBlocks * ByteBlockPool.BYTE_BLOCK_SIZE)
+ (maxBufferedIntBlocks * IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES)
+ (maxBufferedIntBlocks * SlicedIntBlockPool.INT_BLOCK_SIZE * Integer.BYTES)
<= maxReusedBytes;
byteBlockPool =
new ByteBlockPool(
new RecyclingByteBlockAllocator(
ByteBlockPool.BYTE_BLOCK_SIZE, maxBufferedByteBlocks, bytesUsed));
intBlockPool =
new IntBlockPool(
slicedIntBlockPool =
new SlicedIntBlockPool(
new RecyclingIntBlockAllocator(
IntBlockPool.INT_BLOCK_SIZE, maxBufferedIntBlocks, bytesUsed));
postingsWriter = new SliceWriter(intBlockPool);
SlicedIntBlockPool.INT_BLOCK_SIZE, maxBufferedIntBlocks, bytesUsed));
postingsWriter = new SlicedIntBlockPool.SliceWriter(slicedIntBlockPool);
// TODO refactor BytesRefArray to allow us to apply maxReusedBytes option
payloadsBytesRefs = storePayloads ? new BytesRefArray(bytesUsed) : null;
}
@ -842,7 +1045,8 @@ public class MemoryIndex {
result.append(fieldName).append(":\n");
SliceByteStartArray sliceArray = info.sliceArray;
int numPositions = 0;
SliceReader postingsReader = new SliceReader(intBlockPool);
SlicedIntBlockPool.SliceReader postingsReader =
new SlicedIntBlockPool.SliceReader(slicedIntBlockPool);
for (int j = 0; j < info.terms.size(); j++) {
int ord = info.sortedTerms[j];
info.terms.get(ord, spare);
@ -1652,7 +1856,7 @@ public class MemoryIndex {
private class MemoryPostingsEnum extends PostingsEnum {
private final SliceReader sliceReader;
private final SlicedIntBlockPool.SliceReader sliceReader;
private int posUpto; // for assert
private boolean hasNext;
private int doc = -1;
@ -1663,7 +1867,7 @@ public class MemoryIndex {
private final BytesRefBuilder payloadBuilder; // only non-null when storePayloads
public MemoryPostingsEnum() {
this.sliceReader = new SliceReader(intBlockPool);
this.sliceReader = new SlicedIntBlockPool.SliceReader(slicedIntBlockPool);
this.payloadBuilder = storePayloads ? new BytesRefBuilder() : null;
}
@ -1942,7 +2146,7 @@ public class MemoryIndex {
fields.clear();
this.normSimilarity = IndexSearcher.getDefaultSimilarity();
byteBlockPool.reset(false, false); // no need to 0-fill the buffers
intBlockPool.reset(true, false); // here must must 0-fill since we use slices
slicedIntBlockPool.reset(true, false); // here must must 0-fill since we use slices
if (payloadsBytesRefs != null) {
payloadsBytesRefs.clear();
}

View File

@ -260,6 +260,7 @@ public class TestMemoryIndex extends LuceneTestCase {
assertThat(mi.search(new TermQuery(new Term("field2", "text"))), is(0.0f));
assertThat(mi.search(new TermQuery(new Term("field2", "untokenized text"))), not(0.0f));
assertThat(mi.search(new TermQuery(new Term("field1", "some more text"))), is(0.0f));
assertThat(mi.search(new PhraseQuery("field1", "some", "more", "text")), not(0.0f));
assertThat(mi.search(new PhraseQuery("field1", "some", "text")), not(0.0f));
assertThat(mi.search(new PhraseQuery("field1", "text", "some")), is(0.0f));

View File

@ -0,0 +1,150 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index.memory;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IntBlockPool;
public class TestSlicedIntBlockPool extends LuceneTestCase {
public void testSingleWriterReader() {
Counter bytesUsed = Counter.newCounter();
MemoryIndex.SlicedIntBlockPool slicedIntBlockPool =
new MemoryIndex.SlicedIntBlockPool(new ByteTrackingAllocator(bytesUsed));
for (int j = 0; j < 2; j++) {
MemoryIndex.SlicedIntBlockPool.SliceWriter writer =
new MemoryIndex.SlicedIntBlockPool.SliceWriter(slicedIntBlockPool);
int start = writer.startNewSlice();
int num = atLeast(100);
for (int i = 0; i < num; i++) {
writer.writeInt(i);
}
int upto = writer.getCurrentOffset();
MemoryIndex.SlicedIntBlockPool.SliceReader reader =
new MemoryIndex.SlicedIntBlockPool.SliceReader(slicedIntBlockPool);
reader.reset(start, upto);
for (int i = 0; i < num; i++) {
assertEquals(i, reader.readInt());
}
assertTrue(reader.endOfSlice());
if (random().nextBoolean()) {
slicedIntBlockPool.reset(true, false);
assertEquals(0, bytesUsed.get());
} else {
slicedIntBlockPool.reset(true, true);
assertEquals(IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES, bytesUsed.get());
}
}
}
public void testMultipleWriterReader() {
Counter bytesUsed = Counter.newCounter();
MemoryIndex.SlicedIntBlockPool slicedIntBlockPool =
new MemoryIndex.SlicedIntBlockPool(new ByteTrackingAllocator(bytesUsed));
for (int j = 0; j < 2; j++) {
List<StartEndAndValues> holders = new ArrayList<>();
int num = atLeast(4);
for (int i = 0; i < num; i++) {
holders.add(new StartEndAndValues(random().nextInt(1000)));
}
MemoryIndex.SlicedIntBlockPool.SliceWriter writer =
new MemoryIndex.SlicedIntBlockPool.SliceWriter(slicedIntBlockPool);
MemoryIndex.SlicedIntBlockPool.SliceReader reader =
new MemoryIndex.SlicedIntBlockPool.SliceReader(slicedIntBlockPool);
int numValuesToWrite = atLeast(10000);
for (int i = 0; i < numValuesToWrite; i++) {
StartEndAndValues values = holders.get(random().nextInt(holders.size()));
if (values.valueCount == 0) {
values.start = writer.startNewSlice();
} else {
writer.reset(values.end);
}
writer.writeInt(values.nextValue());
values.end = writer.getCurrentOffset();
if (random().nextInt(5) == 0) {
// pick one and reader the ints
assertReader(reader, holders.get(random().nextInt(holders.size())));
}
}
while (!holders.isEmpty()) {
StartEndAndValues values = holders.remove(random().nextInt(holders.size()));
assertReader(reader, values);
}
if (random().nextBoolean()) {
slicedIntBlockPool.reset(true, false);
assertEquals(0, bytesUsed.get());
} else {
slicedIntBlockPool.reset(true, true);
assertEquals(IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES, bytesUsed.get());
}
}
}
private static class ByteTrackingAllocator extends IntBlockPool.Allocator {
private final Counter bytesUsed;
public ByteTrackingAllocator(Counter bytesUsed) {
this(IntBlockPool.INT_BLOCK_SIZE, bytesUsed);
}
public ByteTrackingAllocator(int blockSize, Counter bytesUsed) {
super(blockSize);
this.bytesUsed = bytesUsed;
}
@Override
public int[] getIntBlock() {
bytesUsed.addAndGet(blockSize * Integer.BYTES);
return new int[blockSize];
}
@Override
public void recycleIntBlocks(int[][] blocks, int start, int end) {
bytesUsed.addAndGet(-((end - start) * blockSize * Integer.BYTES));
}
}
private void assertReader(
MemoryIndex.SlicedIntBlockPool.SliceReader reader, StartEndAndValues values) {
reader.reset(values.start, values.end);
for (int i = 0; i < values.valueCount; i++) {
assertEquals(values.valueOffset + i, reader.readInt());
}
assertTrue(reader.endOfSlice());
}
private static class StartEndAndValues {
int valueOffset;
int valueCount;
int start;
int end;
public StartEndAndValues(int valueOffset) {
this.valueOffset = valueOffset;
}
public int nextValue() {
return valueOffset + valueCount++;
}
}
}