From 14196cfcf75bdc15d8f9ada5489961f0255d5058 Mon Sep 17 00:00:00 2001 From: Mike McCandless Date: Mon, 13 Nov 2023 08:26:50 -0500 Subject: [PATCH 01/80] remove errant lurking semicolon --- .../src/java/org/apache/lucene/store/ByteBuffersDataOutput.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataOutput.java b/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataOutput.java index aa729d83c0b..08894d6c4ec 100644 --- a/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataOutput.java +++ b/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataOutput.java @@ -38,7 +38,7 @@ import org.apache.lucene.util.UnicodeUtil; /** A {@link DataOutput} storing data in a list of {@link ByteBuffer}s. */ public final class ByteBuffersDataOutput extends DataOutput implements Accountable { private static final ByteBuffer EMPTY = ByteBuffer.allocate(0).order(ByteOrder.LITTLE_ENDIAN); - ; + private static final byte[] EMPTY_BYTE_ARRAY = {}; public static final IntFunction ALLOCATE_BB_ON_HEAP = ByteBuffer::allocate; From a70432c110c404f3ce7817300113194103ad727d Mon Sep 17 00:00:00 2001 From: Stefan Vodita <41467371+stefanvodita@users.noreply.github.com> Date: Mon, 13 Nov 2023 14:42:01 +0000 Subject: [PATCH 02/80] [Minor] Improvements to slice pools (#12795) 1. Remove rest method used only in tests. 2. Update Javadocs. 3. Make interleaved slices test a bit more evil by adding pool resets. --- .../apache/lucene/index/ByteSliceReader.java | 10 +- .../org/apache/lucene/util/ByteBlockPool.java | 20 ++-- .../org/apache/lucene/util/IntBlockPool.java | 18 ++-- .../lucene/index/TestByteSlicePool.java | 96 ++++++++++--------- .../apache/lucene/index/TestIntBlockPool.java | 2 +- 5 files changed, 73 insertions(+), 73 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/ByteSliceReader.java b/lucene/core/src/java/org/apache/lucene/index/ByteSliceReader.java index 118aaf04c2b..d923130bb49 100644 --- a/lucene/core/src/java/org/apache/lucene/index/ByteSliceReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/ByteSliceReader.java @@ -22,11 +22,11 @@ import org.apache.lucene.store.DataOutput; import org.apache.lucene.util.BitUtil; import org.apache.lucene.util.ByteBlockPool; -/* IndexInput that knows how to read the byte slices written - * by Posting and PostingVector. We read the bytes in - * each slice until we hit the end of that slice at which - * point we read the forwarding address of the next slice - * and then jump to it.*/ +/** + * IndexInput that knows how to read the byte slices written by Posting and PostingVector. We read + * the bytes in each slice until we hit the end of that slice at which point we read the forwarding + * address of the next slice and then jump to it. + */ final class ByteSliceReader extends DataInput { ByteBlockPool pool; int bufferUpto; diff --git a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java index 73203c07fbc..d9777d358a4 100644 --- a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java +++ b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java @@ -132,18 +132,11 @@ public final class ByteBlockPool implements Accountable { } /** - * Resets the pool to its initial state, reusing the first buffer and filling all buffers with - * {@code 0} bytes before they are reused or passed to {@link - * Allocator#recycleByteBlocks(byte[][], int, int)}. Calling {@link ByteBlockPool#nextBuffer()} is - * not needed after reset. - */ - public void reset() { - reset(true, true); - } - - /** - * Expert: Resets the pool to its initial state, while reusing the first buffer. Calling {@link - * ByteBlockPool#nextBuffer()} is not needed after reset. + * Expert: Resets the pool to its initial state, while optionally reusing the first buffer. + * Buffers that are not reused are reclaimed by {@link Allocator#recycleByteBlocks(byte[][], int, + * int)}. Buffers can be filled with zeros before recycling them. This is useful if a slice pool + * works on top of this byte pool and relies on the buffers being filled with zeros to find the + * non-zero end of slices. * * @param zeroFillBuffers if {@code true} the buffers are filled with {@code 0}. This should be * set to {@code true} if this pool is used with slices. @@ -188,7 +181,8 @@ public final class ByteBlockPool implements Accountable { /** * Allocates a new buffer and advances the pool to it. This method should be called once after the * constructor to initialize the pool. In contrast to the constructor, a {@link - * ByteBlockPool#reset()} call will advance the pool to its first buffer immediately. + * ByteBlockPool#reset(boolean, boolean)} call will advance the pool to its first buffer + * immediately. */ public void nextBuffer() { if (1 + bufferUpto == buffers.length) { diff --git a/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java b/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java index 9bd1b9a8c88..2524706979a 100644 --- a/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java +++ b/lucene/core/src/java/org/apache/lucene/util/IntBlockPool.java @@ -94,15 +94,11 @@ public class IntBlockPool { } /** - * Resets the pool to its initial state reusing the first buffer. Calling {@link - * IntBlockPool#nextBuffer()} is not needed after reset. - */ - public void reset() { - this.reset(true, true); - } - - /** - * Expert: Resets the pool to its initial state reusing the first buffer. + * Expert: Resets the pool to its initial state, while optionally reusing the first buffer. + * Buffers that are not reused are reclaimed by {@link + * ByteBlockPool.Allocator#recycleByteBlocks(byte[][], int, int)}. Buffers can be filled with + * zeros before recycling them. This is useful if a slice pool works on top of this int pool and + * relies on the buffers being filled with zeros to find the non-zero end of slices. * * @param zeroFillBuffers if true the buffers are filled with 0. * @param reuseFirst if true the first buffer will be reused and calling {@link @@ -145,8 +141,8 @@ public class IntBlockPool { /** * Advances the pool to its next buffer. This method should be called once after the constructor - * to initialize the pool. In contrast to the constructor a {@link IntBlockPool#reset()} call will - * advance the pool to its first buffer immediately. + * to initialize the pool. In contrast to the constructor a {@link IntBlockPool#reset(boolean, + * boolean)} call will advance the pool to its first buffer immediately. */ public void nextBuffer() { if (1 + bufferUpto == buffers.length) { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestByteSlicePool.java b/lucene/core/src/test/org/apache/lucene/index/TestByteSlicePool.java index 4e7593960da..ac9559991ce 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestByteSlicePool.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestByteSlicePool.java @@ -21,6 +21,7 @@ import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.BitUtil; import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.Counter; +import org.apache.lucene.util.RecyclingByteBlockAllocator; public class TestByteSlicePool extends LuceneTestCase { public void testAllocKnownSizeSlice() { @@ -223,57 +224,66 @@ public class TestByteSlicePool extends LuceneTestCase { * that we read back the same data we wrote. */ public void testRandomInterleavedSlices() { - ByteBlockPool blockPool = new ByteBlockPool(new ByteBlockPool.DirectAllocator()); + ByteBlockPool blockPool = new ByteBlockPool(new RecyclingByteBlockAllocator()); ByteSlicePool slicePool = new ByteSlicePool(blockPool); - int n = TestUtil.nextInt(random(), 2, 3); // 2 or 3 writers and readers - SliceWriter[] sliceWriters = new SliceWriter[n]; - SliceReader[] sliceReaders = new SliceReader[n]; + int nIterations = + TestUtil.nextInt(random(), 1, 3); // 1-3 iterations with buffer resets in between + for (int iter = 0; iter < nIterations; iter++) { + int n = TestUtil.nextInt(random(), 2, 3); // 2 or 3 writers and readers + SliceWriter[] sliceWriters = new SliceWriter[n]; + SliceReader[] sliceReaders = new SliceReader[n]; - // Init slice writers - for (int i = 0; i < n; i++) { - sliceWriters[i] = new SliceWriter(slicePool); - } - - // Write slices - while (true) { - int i = random().nextInt(n); - boolean succeeded = sliceWriters[i].writeSlice(); - if (succeeded == false) { - for (int j = 0; j < n; j++) { - while (sliceWriters[j].writeSlice()) - ; - } - break; + // Init slice writers + for (int i = 0; i < n; i++) { + sliceWriters[i] = new SliceWriter(slicePool); } - } - // Init slice readers - for (int i = 0; i < n; i++) { - sliceReaders[i] = - new SliceReader( - slicePool, - sliceWriters[i].size, - sliceWriters[i].firstSliceOffset, - sliceWriters[i].firstSlice); - } - - // Read slices - while (true) { - int i = random().nextInt(n); - boolean succeeded = sliceReaders[i].readSlice(); - if (succeeded == false) { - for (int j = 0; j < n; j++) { - while (sliceReaders[j].readSlice()) - ; + // Write slices + while (true) { + int i = random().nextInt(n); + boolean succeeded = sliceWriters[i].writeSlice(); + if (succeeded == false) { + for (int j = 0; j < n; j++) { + while (sliceWriters[j].writeSlice()) + ; + } + break; } - break; } - } - // Compare written data with read data - for (int i = 0; i < n; i++) { - assertArrayEquals(sliceWriters[i].randomData, sliceReaders[i].readData); + // Init slice readers + for (int i = 0; i < n; i++) { + sliceReaders[i] = + new SliceReader( + slicePool, + sliceWriters[i].size, + sliceWriters[i].firstSliceOffset, + sliceWriters[i].firstSlice); + } + + // Read slices + while (true) { + int i = random().nextInt(n); + boolean succeeded = sliceReaders[i].readSlice(); + if (succeeded == false) { + for (int j = 0; j < n; j++) { + while (sliceReaders[j].readSlice()) + ; + } + break; + } + } + + // Compare written data with read data + for (int i = 0; i < n; i++) { + assertArrayEquals(sliceWriters[i].randomData, sliceReaders[i].readData); + } + + // We don't rely on the buffers being filled with zeros because the SliceWriter keeps the + // slice length as state, but ByteSlicePool.allocKnownSizeSlice asserts on zeros in the + // buffer. + blockPool.reset(true, random().nextBoolean()); } } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIntBlockPool.java b/lucene/core/src/test/org/apache/lucene/index/TestIntBlockPool.java index 833a8d29144..e8f23080835 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIntBlockPool.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIntBlockPool.java @@ -50,7 +50,7 @@ public class TestIntBlockPool extends LuceneTestCase { // Reset and fill with zeros, then check there is no data left pool.intUpto = count; - pool.reset(); + pool.reset(true, true); for (int i = 0; i < count; i++) { assertEquals(0, pool.buffers[0][i]); } From be27303e3ace53b8c5be9a480d5ad7d2c609a28f Mon Sep 17 00:00:00 2001 From: Shubham Chaudhary <36742242+shubhamvishu@users.noreply.github.com> Date: Mon, 13 Nov 2023 20:31:32 +0530 Subject: [PATCH 03/80] Minor change to IndexOrDocValuesQuery#toString (#12791) --- .../org/apache/lucene/search/IndexOrDocValuesQuery.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java index 9ccc45f5434..c8f0a51cda8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java @@ -85,7 +85,11 @@ public final class IndexOrDocValuesQuery extends Query { @Override public String toString(String field) { - return indexQuery.toString(field); + return "IndexOrDocValuesQuery(indexQuery=" + + indexQuery.toString(field) + + ", dvQuery=" + + dvQuery.toString(field) + + ")"; } @Override From fcf687814445f3185276aad075ed7302ff4405d8 Mon Sep 17 00:00:00 2001 From: Jakub Slowinski <32519034+slow-J@users.noreply.github.com> Date: Mon, 13 Nov 2023 16:07:21 +0000 Subject: [PATCH 04/80] javadocs cleanup in Lucene99PostingsFormat (#12776) Addressing the last comments from https://github.com/apache/lucene/pull/12741 --- .../lucene/codecs/lucene99/Lucene99PostingsFormat.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsFormat.java index f233276c6c5..877746641b4 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsFormat.java @@ -158,8 +158,8 @@ import org.apache.lucene.util.packed.PackedInts; *
Frequencies and Skip Data *

The .doc file contains the lists of documents which contain each term, along with the * frequency of the term in that document (except when frequencies are omitted: {@link - * IndexOptions#DOCS}). It also saves skip data to the beginning of each packed or VInt block, - * when the length of document list is larger than packed block size. + * IndexOptions#DOCS}). Skip data is saved at the end of each term's postings. The skip data + * is saved once for the entire postings list. *