mirror of https://github.com/apache/lucene.git
LUCENE-9673: fix IntBlockPool's slice allocator to actually grow properly with larger and larger slice-chained int[]; excise wasted RAM due to unused (overallocation) of int[] to track in-memory postings
This commit is contained in:
parent
727c6b1e0b
commit
512cad0e01
|
@ -465,7 +465,10 @@ Improvements
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
---------------------
|
---------------------
|
||||||
(No changes)
|
|
||||||
|
* LUCENE-9673: Substantially improve RAM efficiency of how MemoryIndex stores
|
||||||
|
postings in memory, and reduced a bit of RAM overhead in
|
||||||
|
IndexWriter's internal postings book-keeping (mashudong)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
---------------------
|
---------------------
|
||||||
|
|
|
@ -140,11 +140,16 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called when we first encounter a new term. We must allocate slies to store the postings (vInt
|
||||||
|
* compressed doc/freq/prox), and also the int pointers to where (in our ByteBlockPool storage)
|
||||||
|
* the postings for this term begin.
|
||||||
|
*/
|
||||||
private void initStreamSlices(int termID, int docID) throws IOException {
|
private void initStreamSlices(int termID, int docID) throws IOException {
|
||||||
// Init stream slices
|
// Init stream slices
|
||||||
// TODO: figure out why this is 2*streamCount here. streamCount should be enough?
|
if (streamCount + intPool.intUpto > IntBlockPool.INT_BLOCK_SIZE) {
|
||||||
if ((2 * streamCount) + intPool.intUpto > IntBlockPool.INT_BLOCK_SIZE) {
|
// not enough space remaining in this buffer -- jump to next buffer and lose this remaining
|
||||||
// can we fit all the streams in the current buffer?
|
// piece
|
||||||
intPool.nextBuffer();
|
intPool.nextBuffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -172,7 +172,7 @@ public final class IntBlockPool {
|
||||||
|
|
||||||
final int upto = intUpto;
|
final int upto = intUpto;
|
||||||
intUpto += size;
|
intUpto += size;
|
||||||
buffer[intUpto - 1] = 1;
|
buffer[intUpto - 1] = 16;
|
||||||
return upto;
|
return upto;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -185,7 +185,7 @@ public final class IntBlockPool {
|
||||||
}
|
}
|
||||||
|
|
||||||
// no need to make this public unless we support different sizes
|
// no need to make this public unless we support different sizes
|
||||||
// TODO make the levels and the sizes configurable
|
|
||||||
/**
|
/**
|
||||||
* An array holding the offset into the {@link IntBlockPool#LEVEL_SIZE_ARRAY} to quickly navigate
|
* An array holding the offset into the {@link IntBlockPool#LEVEL_SIZE_ARRAY} to quickly navigate
|
||||||
* to the next slice level.
|
* to the next slice level.
|
||||||
|
@ -193,15 +193,15 @@ public final class IntBlockPool {
|
||||||
private static final int[] NEXT_LEVEL_ARRAY = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9};
|
private static final int[] NEXT_LEVEL_ARRAY = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9};
|
||||||
|
|
||||||
/** An array holding the level sizes for int slices. */
|
/** An array holding the level sizes for int slices. */
|
||||||
private static final int[] LEVEL_SIZE_ARRAY = {2, 4, 8, 16, 32, 64, 128, 256, 512, 1024};
|
private static final int[] LEVEL_SIZE_ARRAY = {2, 4, 8, 16, 16, 32, 32, 64, 64, 128};
|
||||||
|
|
||||||
/** The first level size for new slices */
|
/** The first level size for new slices */
|
||||||
private static final int FIRST_LEVEL_SIZE = LEVEL_SIZE_ARRAY[0];
|
private static final int FIRST_LEVEL_SIZE = LEVEL_SIZE_ARRAY[0];
|
||||||
|
|
||||||
/** Allocates a new slice from the given offset */
|
/** Allocates a new slice from the given offset */
|
||||||
private int allocSlice(final int[] slice, final int sliceOffset) {
|
private int allocSlice(final int[] slice, final int sliceOffset) {
|
||||||
final int level = slice[sliceOffset];
|
final int level = slice[sliceOffset] & 15;
|
||||||
final int newLevel = NEXT_LEVEL_ARRAY[level - 1];
|
final int newLevel = NEXT_LEVEL_ARRAY[level];
|
||||||
final int newSize = LEVEL_SIZE_ARRAY[newLevel];
|
final int newSize = LEVEL_SIZE_ARRAY[newLevel];
|
||||||
// Maybe allocate another block
|
// Maybe allocate another block
|
||||||
if (intUpto > INT_BLOCK_SIZE - newSize) {
|
if (intUpto > INT_BLOCK_SIZE - newSize) {
|
||||||
|
@ -216,7 +216,7 @@ public final class IntBlockPool {
|
||||||
slice[sliceOffset] = offset;
|
slice[sliceOffset] = offset;
|
||||||
|
|
||||||
// Write new level:
|
// Write new level:
|
||||||
buffer[intUpto - 1] = newLevel;
|
buffer[intUpto - 1] = 16 | newLevel;
|
||||||
|
|
||||||
return newUpto;
|
return newUpto;
|
||||||
}
|
}
|
||||||
|
@ -300,8 +300,7 @@ public final class IntBlockPool {
|
||||||
bufferUpto = startOffset / INT_BLOCK_SIZE;
|
bufferUpto = startOffset / INT_BLOCK_SIZE;
|
||||||
bufferOffset = bufferUpto * INT_BLOCK_SIZE;
|
bufferOffset = bufferUpto * INT_BLOCK_SIZE;
|
||||||
this.end = endOffset;
|
this.end = endOffset;
|
||||||
upto = startOffset;
|
level = 0;
|
||||||
level = 1;
|
|
||||||
|
|
||||||
buffer = pool.buffers[bufferUpto];
|
buffer = pool.buffers[bufferUpto];
|
||||||
upto = startOffset & INT_BLOCK_MASK;
|
upto = startOffset & INT_BLOCK_MASK;
|
||||||
|
@ -339,7 +338,7 @@ public final class IntBlockPool {
|
||||||
private void nextSlice() {
|
private void nextSlice() {
|
||||||
// Skip to our next slice
|
// Skip to our next slice
|
||||||
final int nextIndex = buffer[limit];
|
final int nextIndex = buffer[limit];
|
||||||
level = NEXT_LEVEL_ARRAY[level - 1];
|
level = NEXT_LEVEL_ARRAY[level];
|
||||||
final int newSize = LEVEL_SIZE_ARRAY[level];
|
final int newSize = LEVEL_SIZE_ARRAY[level];
|
||||||
|
|
||||||
bufferUpto = nextIndex / INT_BLOCK_SIZE;
|
bufferUpto = nextIndex / INT_BLOCK_SIZE;
|
||||||
|
|
Loading…
Reference in New Issue