From 512cad0e019d165f07062657d98987f0a38fbde9 Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Thu, 28 Oct 2021 09:37:36 -0400 Subject: [PATCH] LUCENE-9673: fix IntBlockPool's slice allocator to actually grow properly with larger and larger slice-chained int[]; excise wasted RAM due to unused (overallocation) of int[] to track in-memory postings --- lucene/CHANGES.txt | 5 ++++- .../apache/lucene/index/TermsHashPerField.java | 11 ++++++++--- .../org/apache/lucene/util/IntBlockPool.java | 17 ++++++++--------- 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 44800ea0bf8..44f0cc1b1ea 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -465,7 +465,10 @@ Improvements Optimizations --------------------- -(No changes) + +* LUCENE-9673: Substantially improve RAM efficiency of how MemoryIndex stores + postings in memory, and reduced a bit of RAM overhead in + IndexWriter's internal postings book-keeping (mashudong) Bug Fixes --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java b/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java index 1067bc7fc03..b8120035c8d 100644 --- a/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java +++ b/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java @@ -140,11 +140,16 @@ abstract class TermsHashPerField implements Comparable { } } + /** + * Called when we first encounter a new term. We must allocate slies to store the postings (vInt + * compressed doc/freq/prox), and also the int pointers to where (in our ByteBlockPool storage) + * the postings for this term begin. + */ private void initStreamSlices(int termID, int docID) throws IOException { // Init stream slices - // TODO: figure out why this is 2*streamCount here. streamCount should be enough? - if ((2 * streamCount) + intPool.intUpto > IntBlockPool.INT_BLOCK_SIZE) { - // can we fit all the streams in the current buffer? + if (streamCount + intPool.intUpto > IntBlockPool.INT_BLOCK_SIZE) { + // not enough space remaining in this buffer -- jump to next buffer and lose this remaining + // piece intPool.nextBuffer(); } diff --git a/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java b/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java index b4575ade9f5..598f55ace1c 100644 --- a/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java +++ b/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java @@ -172,7 +172,7 @@ public final class IntBlockPool { final int upto = intUpto; intUpto += size; - buffer[intUpto - 1] = 1; + buffer[intUpto - 1] = 16; return upto; } @@ -185,7 +185,7 @@ public final class IntBlockPool { } // no need to make this public unless we support different sizes - // TODO make the levels and the sizes configurable + /** * An array holding the offset into the {@link IntBlockPool#LEVEL_SIZE_ARRAY} to quickly navigate * to the next slice level. @@ -193,15 +193,15 @@ public final class IntBlockPool { private static final int[] NEXT_LEVEL_ARRAY = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9}; /** An array holding the level sizes for int slices. */ - private static final int[] LEVEL_SIZE_ARRAY = {2, 4, 8, 16, 32, 64, 128, 256, 512, 1024}; + private static final int[] LEVEL_SIZE_ARRAY = {2, 4, 8, 16, 16, 32, 32, 64, 64, 128}; /** The first level size for new slices */ private static final int FIRST_LEVEL_SIZE = LEVEL_SIZE_ARRAY[0]; /** Allocates a new slice from the given offset */ private int allocSlice(final int[] slice, final int sliceOffset) { - final int level = slice[sliceOffset]; - final int newLevel = NEXT_LEVEL_ARRAY[level - 1]; + final int level = slice[sliceOffset] & 15; + final int newLevel = NEXT_LEVEL_ARRAY[level]; final int newSize = LEVEL_SIZE_ARRAY[newLevel]; // Maybe allocate another block if (intUpto > INT_BLOCK_SIZE - newSize) { @@ -216,7 +216,7 @@ public final class IntBlockPool { slice[sliceOffset] = offset; // Write new level: - buffer[intUpto - 1] = newLevel; + buffer[intUpto - 1] = 16 | newLevel; return newUpto; } @@ -300,8 +300,7 @@ public final class IntBlockPool { bufferUpto = startOffset / INT_BLOCK_SIZE; bufferOffset = bufferUpto * INT_BLOCK_SIZE; this.end = endOffset; - upto = startOffset; - level = 1; + level = 0; buffer = pool.buffers[bufferUpto]; upto = startOffset & INT_BLOCK_MASK; @@ -339,7 +338,7 @@ public final class IntBlockPool { private void nextSlice() { // Skip to our next slice final int nextIndex = buffer[limit]; - level = NEXT_LEVEL_ARRAY[level - 1]; + level = NEXT_LEVEL_ARRAY[level]; final int newSize = LEVEL_SIZE_ARRAY[level]; bufferUpto = nextIndex / INT_BLOCK_SIZE;