LUCENE-9673: fix IntBlockPool's slice allocator to actually grow properly with larger and larger slice-chained int[]; excise wasted RAM due to unused (overallocation) of int[] to track in-memory postings

This commit is contained in:
Mike McCandless 2021-10-28 09:37:36 -04:00
parent 727c6b1e0b
commit 512cad0e01
3 changed files with 20 additions and 13 deletions

View File

@ -465,7 +465,10 @@ Improvements
Optimizations Optimizations
--------------------- ---------------------
(No changes)
* LUCENE-9673: Substantially improve RAM efficiency of how MemoryIndex stores
postings in memory, and reduced a bit of RAM overhead in
IndexWriter's internal postings book-keeping (mashudong)
Bug Fixes Bug Fixes
--------------------- ---------------------

View File

@ -140,11 +140,16 @@ abstract class TermsHashPerField implements Comparable<TermsHashPerField> {
} }
} }
/**
* Called when we first encounter a new term. We must allocate slies to store the postings (vInt
* compressed doc/freq/prox), and also the int pointers to where (in our ByteBlockPool storage)
* the postings for this term begin.
*/
private void initStreamSlices(int termID, int docID) throws IOException { private void initStreamSlices(int termID, int docID) throws IOException {
// Init stream slices // Init stream slices
// TODO: figure out why this is 2*streamCount here. streamCount should be enough? if (streamCount + intPool.intUpto > IntBlockPool.INT_BLOCK_SIZE) {
if ((2 * streamCount) + intPool.intUpto > IntBlockPool.INT_BLOCK_SIZE) { // not enough space remaining in this buffer -- jump to next buffer and lose this remaining
// can we fit all the streams in the current buffer? // piece
intPool.nextBuffer(); intPool.nextBuffer();
} }

View File

@ -172,7 +172,7 @@ public final class IntBlockPool {
final int upto = intUpto; final int upto = intUpto;
intUpto += size; intUpto += size;
buffer[intUpto - 1] = 1; buffer[intUpto - 1] = 16;
return upto; return upto;
} }
@ -185,7 +185,7 @@ public final class IntBlockPool {
} }
// no need to make this public unless we support different sizes // no need to make this public unless we support different sizes
// TODO make the levels and the sizes configurable
/** /**
* An array holding the offset into the {@link IntBlockPool#LEVEL_SIZE_ARRAY} to quickly navigate * An array holding the offset into the {@link IntBlockPool#LEVEL_SIZE_ARRAY} to quickly navigate
* to the next slice level. * to the next slice level.
@ -193,15 +193,15 @@ public final class IntBlockPool {
private static final int[] NEXT_LEVEL_ARRAY = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9}; private static final int[] NEXT_LEVEL_ARRAY = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9};
/** An array holding the level sizes for int slices. */ /** An array holding the level sizes for int slices. */
private static final int[] LEVEL_SIZE_ARRAY = {2, 4, 8, 16, 32, 64, 128, 256, 512, 1024}; private static final int[] LEVEL_SIZE_ARRAY = {2, 4, 8, 16, 16, 32, 32, 64, 64, 128};
/** The first level size for new slices */ /** The first level size for new slices */
private static final int FIRST_LEVEL_SIZE = LEVEL_SIZE_ARRAY[0]; private static final int FIRST_LEVEL_SIZE = LEVEL_SIZE_ARRAY[0];
/** Allocates a new slice from the given offset */ /** Allocates a new slice from the given offset */
private int allocSlice(final int[] slice, final int sliceOffset) { private int allocSlice(final int[] slice, final int sliceOffset) {
final int level = slice[sliceOffset]; final int level = slice[sliceOffset] & 15;
final int newLevel = NEXT_LEVEL_ARRAY[level - 1]; final int newLevel = NEXT_LEVEL_ARRAY[level];
final int newSize = LEVEL_SIZE_ARRAY[newLevel]; final int newSize = LEVEL_SIZE_ARRAY[newLevel];
// Maybe allocate another block // Maybe allocate another block
if (intUpto > INT_BLOCK_SIZE - newSize) { if (intUpto > INT_BLOCK_SIZE - newSize) {
@ -216,7 +216,7 @@ public final class IntBlockPool {
slice[sliceOffset] = offset; slice[sliceOffset] = offset;
// Write new level: // Write new level:
buffer[intUpto - 1] = newLevel; buffer[intUpto - 1] = 16 | newLevel;
return newUpto; return newUpto;
} }
@ -300,8 +300,7 @@ public final class IntBlockPool {
bufferUpto = startOffset / INT_BLOCK_SIZE; bufferUpto = startOffset / INT_BLOCK_SIZE;
bufferOffset = bufferUpto * INT_BLOCK_SIZE; bufferOffset = bufferUpto * INT_BLOCK_SIZE;
this.end = endOffset; this.end = endOffset;
upto = startOffset; level = 0;
level = 1;
buffer = pool.buffers[bufferUpto]; buffer = pool.buffers[bufferUpto];
upto = startOffset & INT_BLOCK_MASK; upto = startOffset & INT_BLOCK_MASK;
@ -339,7 +338,7 @@ public final class IntBlockPool {
private void nextSlice() { private void nextSlice() {
// Skip to our next slice // Skip to our next slice
final int nextIndex = buffer[limit]; final int nextIndex = buffer[limit];
level = NEXT_LEVEL_ARRAY[level - 1]; level = NEXT_LEVEL_ARRAY[level];
final int newSize = LEVEL_SIZE_ARRAY[level]; final int newSize = LEVEL_SIZE_ARRAY[level];
bufferUpto = nextIndex / INT_BLOCK_SIZE; bufferUpto = nextIndex / INT_BLOCK_SIZE;