From 8433352eec405d53f6defbe134b198849a8adc9b Mon Sep 17 00:00:00 2001 From: Zhang Chao <80152403@qq.com> Date: Mon, 30 Sep 2024 17:04:18 +0800 Subject: [PATCH] Move DataInput.readGroupVInts into GroupVIntUtil (#13830) --- .../lucene99/PostingsUtil.java | 3 ++- .../benchmark/jmh/GroupVIntBenchmark.java | 8 +++---- .../lucene/codecs/lucene912/PostingsUtil.java | 3 ++- .../lucene/store/BufferedIndexInput.java | 2 +- .../lucene/store/ByteBuffersDataInput.java | 2 +- .../lucene/store/ByteBuffersIndexInput.java | 2 +- .../org/apache/lucene/store/DataInput.java | 22 ++---------------- .../org/apache/lucene/util/GroupVIntUtil.java | 23 +++++++++++++++++-- .../lucene/store/MemorySegmentIndexInput.java | 2 +- .../store/BaseChunkedDirectoryTestCase.java | 5 ++-- .../tests/store/BaseDirectoryTestCase.java | 7 +++--- 11 files changed, 42 insertions(+), 37 deletions(-) diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene99/PostingsUtil.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene99/PostingsUtil.java index 7b95bada5bc..dce8c2b145d 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene99/PostingsUtil.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene99/PostingsUtil.java @@ -19,6 +19,7 @@ package org.apache.lucene.backward_codecs.lucene99; import java.io.IOException; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.GroupVIntUtil; /** Utility class to encode/decode postings block. */ final class PostingsUtil { @@ -35,7 +36,7 @@ final class PostingsUtil { boolean indexHasFreq, boolean decodeFreq) throws IOException { - docIn.readGroupVInts(docBuffer, num); + GroupVIntUtil.readGroupVInts(docIn, docBuffer, num); if (indexHasFreq && decodeFreq) { for (int i = 0; i < num; ++i) { freqBuffer[i] = docBuffer[i] & 0x01; diff --git a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/GroupVIntBenchmark.java b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/GroupVIntBenchmark.java index 0df0d7ecf50..48b95570694 100644 --- a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/GroupVIntBenchmark.java +++ b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/GroupVIntBenchmark.java @@ -186,7 +186,7 @@ public class GroupVIntBenchmark { @Benchmark public void benchMMapDirectoryInputs_readGroupVInt(Blackhole bh) throws IOException { byteBufferGVIntIn.seek(0); - byteBufferGVIntIn.readGroupVInts(values, size); + GroupVIntUtil.readGroupVInts(byteBufferGVIntIn, values, size); bh.consume(values); } @@ -209,14 +209,14 @@ public class GroupVIntBenchmark { @Benchmark public void benchByteArrayDataInput_readGroupVInt(Blackhole bh) throws IOException { byteArrayGVIntIn.rewind(); - byteArrayGVIntIn.readGroupVInts(values, size); + GroupVIntUtil.readGroupVInts(byteArrayGVIntIn, values, size); bh.consume(values); } @Benchmark public void benchNIOFSDirectoryInputs_readGroupVInt(Blackhole bh) throws IOException { nioGVIntIn.seek(0); - nioGVIntIn.readGroupVInts(values, size); + GroupVIntUtil.readGroupVInts(nioGVIntIn, values, size); bh.consume(values); } @@ -230,7 +230,7 @@ public class GroupVIntBenchmark { @Benchmark public void benchByteBuffersIndexInput_readGroupVInt(Blackhole bh) throws IOException { byteBuffersGVIntIn.seek(0); - byteBuffersGVIntIn.readGroupVInts(values, size); + GroupVIntUtil.readGroupVInts(byteBuffersGVIntIn, values, size); bh.consume(values); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene912/PostingsUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene912/PostingsUtil.java index 4834dd73e22..1ae808d308f 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene912/PostingsUtil.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene912/PostingsUtil.java @@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene912; import java.io.IOException; import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.GroupVIntUtil; /** Utility class to encode/decode postings block. */ final class PostingsUtil { @@ -35,7 +36,7 @@ final class PostingsUtil { boolean indexHasFreq, boolean decodeFreq) throws IOException { - docIn.readGroupVInts(docBuffer, num); + GroupVIntUtil.readGroupVInts(docIn, docBuffer, num); if (indexHasFreq && decodeFreq) { for (int i = 0; i < num; ++i) { freqBuffer[i] = docBuffer[i] & 0x01; diff --git a/lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java b/lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java index 13151692bc0..7f2aadf54a5 100644 --- a/lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java @@ -151,7 +151,7 @@ public abstract class BufferedIndexInput extends IndexInput implements RandomAcc } @Override - protected void readGroupVInt(long[] dst, int offset) throws IOException { + public void readGroupVInt(long[] dst, int offset) throws IOException { final int len = GroupVIntUtil.readGroupVInt( this, buffer.remaining(), p -> buffer.getInt((int) p), buffer.position(), dst, offset); diff --git a/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataInput.java b/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataInput.java index 4b722b61689..a09f78e5f3a 100644 --- a/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataInput.java @@ -204,7 +204,7 @@ public final class ByteBuffersDataInput extends DataInput } @Override - protected void readGroupVInt(long[] dst, int offset) throws IOException { + public void readGroupVInt(long[] dst, int offset) throws IOException { final ByteBuffer block = blocks[blockIndex(pos)]; final int blockOffset = blockOffset(pos); // We MUST save the return value to local variable, could not use pos += readGroupVInt(...). diff --git a/lucene/core/src/java/org/apache/lucene/store/ByteBuffersIndexInput.java b/lucene/core/src/java/org/apache/lucene/store/ByteBuffersIndexInput.java index c66d864d570..6aebb771b68 100644 --- a/lucene/core/src/java/org/apache/lucene/store/ByteBuffersIndexInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/ByteBuffersIndexInput.java @@ -206,7 +206,7 @@ public final class ByteBuffersIndexInput extends IndexInput implements RandomAcc } @Override - protected void readGroupVInt(long[] dst, int offset) throws IOException { + public void readGroupVInt(long[] dst, int offset) throws IOException { ensureOpen(); in.readGroupVInt(dst, offset); } diff --git a/lucene/core/src/java/org/apache/lucene/store/DataInput.java b/lucene/core/src/java/org/apache/lucene/store/DataInput.java index 427e81f2df2..70f9a96db9c 100644 --- a/lucene/core/src/java/org/apache/lucene/store/DataInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/DataInput.java @@ -100,28 +100,10 @@ public abstract class DataInput implements Cloneable { } /** - * Read all the group varints, including the tail vints. we need a long[] because this is what - * postings are using, all longs are actually required to be integers. - * - * @param dst the array to read ints into. - * @param limit the number of int values to read. - * @lucene.experimental - */ - public final void readGroupVInts(long[] dst, int limit) throws IOException { - int i; - for (i = 0; i <= limit - 4; i += 4) { - readGroupVInt(dst, i); - } - for (; i < limit; ++i) { - dst[i] = readVInt() & 0xFFFFFFFFL; - } - } - - /** - * Override if you have a efficient implementation. In general this is when the input supports + * Override if you have an efficient implementation. In general this is when the input supports * random access. */ - protected void readGroupVInt(long[] dst, int offset) throws IOException { + public void readGroupVInt(long[] dst, int offset) throws IOException { GroupVIntUtil.readGroupVInt(this, dst, offset); } diff --git a/lucene/core/src/java/org/apache/lucene/util/GroupVIntUtil.java b/lucene/core/src/java/org/apache/lucene/util/GroupVIntUtil.java index e1b5466342a..1c5033172db 100644 --- a/lucene/core/src/java/org/apache/lucene/util/GroupVIntUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/GroupVIntUtil.java @@ -33,10 +33,29 @@ public final class GroupVIntUtil { private static final long[] MASKS = new long[] {0xFFL, 0xFFFFL, 0xFFFFFFL, 0xFFFFFFFFL}; /** - * Default implementation of read single group, for optimal performance, you should use {@link - * DataInput#readGroupVInts(long[], int)} instead. + * Read all the group varints, including the tail vints. we need a long[] because this is what + * postings are using, all longs are actually required to be integers. * * @param dst the array to read ints into. + * @param limit the number of int values to read. + * @lucene.experimental + */ + public static void readGroupVInts(DataInput in, long[] dst, int limit) throws IOException { + int i; + for (i = 0; i <= limit - 4; i += 4) { + in.readGroupVInt(dst, i); + } + for (; i < limit; ++i) { + dst[i] = in.readVInt() & 0xFFFFFFFFL; + } + } + + /** + * Default implementation of read single group, for optimal performance, you should use {@link + * GroupVIntUtil#readGroupVInts(DataInput, long[], int)} instead. + * + * @param in the input to use to read data. + * @param dst the array to read ints into. * @param offset the offset in the array to start storing ints. */ public static void readGroupVInt(DataInput in, long[] dst, int offset) throws IOException { diff --git a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java index c6ac3d23a12..8bb70ba009d 100644 --- a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java +++ b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java @@ -419,7 +419,7 @@ abstract class MemorySegmentIndexInput extends IndexInput } @Override - protected void readGroupVInt(long[] dst, int offset) throws IOException { + public void readGroupVInt(long[] dst, int offset) throws IOException { try { final int len = GroupVIntUtil.readGroupVInt( diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseChunkedDirectoryTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseChunkedDirectoryTestCase.java index dd956c6c3fd..8de332eeec9 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseChunkedDirectoryTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseChunkedDirectoryTestCase.java @@ -33,6 +33,7 @@ import org.apache.lucene.tests.analysis.MockAnalyzer; import org.apache.lucene.tests.index.RandomIndexWriter; import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.GroupVIntUtil; /** * Base class for Directories that "chunk" the input into blocks. @@ -77,7 +78,7 @@ public abstract class BaseChunkedDirectoryTestCase extends BaseDirectoryTestCase expectThrows( AlreadyClosedException.class, () -> { - two.readGroupVInts(values, values.length); + GroupVIntUtil.readGroupVInts(two, values, values.length); }); assertEquals(5, three.readVInt()); one.close(); @@ -105,7 +106,7 @@ public abstract class BaseChunkedDirectoryTestCase extends BaseDirectoryTestCase expectThrows( AlreadyClosedException.class, () -> { - one.readGroupVInts(values, values.length); + GroupVIntUtil.readGroupVInts(one, values, values.length); }); assertEquals(2, two.readInt()); // reopen a new slice "another": diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java index 9cc271a9d61..41d72c509db 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java @@ -59,6 +59,7 @@ import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BitUtil; +import org.apache.lucene.util.GroupVIntUtil; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.packed.PackedInts; import org.junit.Assert; @@ -1458,7 +1459,7 @@ public abstract class BaseDirectoryTestCase extends LuceneTestCase { assertEquals(43, in.readByte()); assertEquals(12345, in.readShort()); assertEquals(1234567890, in.readInt()); - in.readGroupVInts(restored, 4); + GroupVIntUtil.readGroupVInts(in, restored, 4); assertArrayEquals(values, restored); assertEquals(1234567890123456789L, in.readLong()); in.close(); @@ -1485,7 +1486,7 @@ public abstract class BaseDirectoryTestCase extends LuceneTestCase { out.writeGroupVInts(values, limit); out.close(); try (IndexInput in = dir.openInput("test", IOContext.DEFAULT)) { - in.readGroupVInts(restore, limit); + GroupVIntUtil.readGroupVInts(in, restore, limit); for (int i = 0; i < limit; i++) { assertEquals(values[i], restore[i]); } @@ -1533,7 +1534,7 @@ public abstract class BaseDirectoryTestCase extends LuceneTestCase { IndexInput groupVIntIn = dir.openInput("group-varint", IOContext.DEFAULT); IndexInput vIntIn = dir.openInput("vint", IOContext.DEFAULT); for (int iter = 0; iter < iterations; iter++) { - groupVIntIn.readGroupVInts(values, numValuesArray[iter]); + GroupVIntUtil.readGroupVInts(groupVIntIn, values, numValuesArray[iter]); for (int j = 0; j < numValuesArray[iter]; j++) { assertEquals(vIntIn.readVInt(), values[j]); }