Move DataInput.readGroupVInts into GroupVIntUtil (#13830)

This commit is contained in:
Zhang Chao 2024-09-30 17:04:18 +08:00 committed by GitHub
parent 22ac47c07a
commit 8433352eec
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 42 additions and 37 deletions

View File

@ -19,6 +19,7 @@ package org.apache.lucene.backward_codecs.lucene99;
import java.io.IOException;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.GroupVIntUtil;
/** Utility class to encode/decode postings block. */
final class PostingsUtil {
@ -35,7 +36,7 @@ final class PostingsUtil {
boolean indexHasFreq,
boolean decodeFreq)
throws IOException {
docIn.readGroupVInts(docBuffer, num);
GroupVIntUtil.readGroupVInts(docIn, docBuffer, num);
if (indexHasFreq && decodeFreq) {
for (int i = 0; i < num; ++i) {
freqBuffer[i] = docBuffer[i] & 0x01;

View File

@ -186,7 +186,7 @@ public class GroupVIntBenchmark {
@Benchmark
public void benchMMapDirectoryInputs_readGroupVInt(Blackhole bh) throws IOException {
byteBufferGVIntIn.seek(0);
byteBufferGVIntIn.readGroupVInts(values, size);
GroupVIntUtil.readGroupVInts(byteBufferGVIntIn, values, size);
bh.consume(values);
}
@ -209,14 +209,14 @@ public class GroupVIntBenchmark {
@Benchmark
public void benchByteArrayDataInput_readGroupVInt(Blackhole bh) throws IOException {
byteArrayGVIntIn.rewind();
byteArrayGVIntIn.readGroupVInts(values, size);
GroupVIntUtil.readGroupVInts(byteArrayGVIntIn, values, size);
bh.consume(values);
}
@Benchmark
public void benchNIOFSDirectoryInputs_readGroupVInt(Blackhole bh) throws IOException {
nioGVIntIn.seek(0);
nioGVIntIn.readGroupVInts(values, size);
GroupVIntUtil.readGroupVInts(nioGVIntIn, values, size);
bh.consume(values);
}
@ -230,7 +230,7 @@ public class GroupVIntBenchmark {
@Benchmark
public void benchByteBuffersIndexInput_readGroupVInt(Blackhole bh) throws IOException {
byteBuffersGVIntIn.seek(0);
byteBuffersGVIntIn.readGroupVInts(values, size);
GroupVIntUtil.readGroupVInts(byteBuffersGVIntIn, values, size);
bh.consume(values);
}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene912;
import java.io.IOException;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.GroupVIntUtil;
/** Utility class to encode/decode postings block. */
final class PostingsUtil {
@ -35,7 +36,7 @@ final class PostingsUtil {
boolean indexHasFreq,
boolean decodeFreq)
throws IOException {
docIn.readGroupVInts(docBuffer, num);
GroupVIntUtil.readGroupVInts(docIn, docBuffer, num);
if (indexHasFreq && decodeFreq) {
for (int i = 0; i < num; ++i) {
freqBuffer[i] = docBuffer[i] & 0x01;

View File

@ -151,7 +151,7 @@ public abstract class BufferedIndexInput extends IndexInput implements RandomAcc
}
@Override
protected void readGroupVInt(long[] dst, int offset) throws IOException {
public void readGroupVInt(long[] dst, int offset) throws IOException {
final int len =
GroupVIntUtil.readGroupVInt(
this, buffer.remaining(), p -> buffer.getInt((int) p), buffer.position(), dst, offset);

View File

@ -204,7 +204,7 @@ public final class ByteBuffersDataInput extends DataInput
}
@Override
protected void readGroupVInt(long[] dst, int offset) throws IOException {
public void readGroupVInt(long[] dst, int offset) throws IOException {
final ByteBuffer block = blocks[blockIndex(pos)];
final int blockOffset = blockOffset(pos);
// We MUST save the return value to local variable, could not use pos += readGroupVInt(...).

View File

@ -206,7 +206,7 @@ public final class ByteBuffersIndexInput extends IndexInput implements RandomAcc
}
@Override
protected void readGroupVInt(long[] dst, int offset) throws IOException {
public void readGroupVInt(long[] dst, int offset) throws IOException {
ensureOpen();
in.readGroupVInt(dst, offset);
}

View File

@ -100,28 +100,10 @@ public abstract class DataInput implements Cloneable {
}
/**
* Read all the group varints, including the tail vints. we need a long[] because this is what
* postings are using, all longs are actually required to be integers.
*
* @param dst the array to read ints into.
* @param limit the number of int values to read.
* @lucene.experimental
*/
public final void readGroupVInts(long[] dst, int limit) throws IOException {
int i;
for (i = 0; i <= limit - 4; i += 4) {
readGroupVInt(dst, i);
}
for (; i < limit; ++i) {
dst[i] = readVInt() & 0xFFFFFFFFL;
}
}
/**
* Override if you have a efficient implementation. In general this is when the input supports
* Override if you have an efficient implementation. In general this is when the input supports
* random access.
*/
protected void readGroupVInt(long[] dst, int offset) throws IOException {
public void readGroupVInt(long[] dst, int offset) throws IOException {
GroupVIntUtil.readGroupVInt(this, dst, offset);
}

View File

@ -33,10 +33,29 @@ public final class GroupVIntUtil {
private static final long[] MASKS = new long[] {0xFFL, 0xFFFFL, 0xFFFFFFL, 0xFFFFFFFFL};
/**
* Default implementation of read single group, for optimal performance, you should use {@link
* DataInput#readGroupVInts(long[], int)} instead.
* Read all the group varints, including the tail vints. we need a long[] because this is what
* postings are using, all longs are actually required to be integers.
*
* @param dst the array to read ints into.
* @param limit the number of int values to read.
* @lucene.experimental
*/
public static void readGroupVInts(DataInput in, long[] dst, int limit) throws IOException {
int i;
for (i = 0; i <= limit - 4; i += 4) {
in.readGroupVInt(dst, i);
}
for (; i < limit; ++i) {
dst[i] = in.readVInt() & 0xFFFFFFFFL;
}
}
/**
* Default implementation of read single group, for optimal performance, you should use {@link
* GroupVIntUtil#readGroupVInts(DataInput, long[], int)} instead.
*
* @param in the input to use to read data.
* @param dst the array to read ints into.
* @param offset the offset in the array to start storing ints.
*/
public static void readGroupVInt(DataInput in, long[] dst, int offset) throws IOException {

View File

@ -419,7 +419,7 @@ abstract class MemorySegmentIndexInput extends IndexInput
}
@Override
protected void readGroupVInt(long[] dst, int offset) throws IOException {
public void readGroupVInt(long[] dst, int offset) throws IOException {
try {
final int len =
GroupVIntUtil.readGroupVInt(

View File

@ -33,6 +33,7 @@ import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.GroupVIntUtil;
/**
* Base class for Directories that "chunk" the input into blocks.
@ -77,7 +78,7 @@ public abstract class BaseChunkedDirectoryTestCase extends BaseDirectoryTestCase
expectThrows(
AlreadyClosedException.class,
() -> {
two.readGroupVInts(values, values.length);
GroupVIntUtil.readGroupVInts(two, values, values.length);
});
assertEquals(5, three.readVInt());
one.close();
@ -105,7 +106,7 @@ public abstract class BaseChunkedDirectoryTestCase extends BaseDirectoryTestCase
expectThrows(
AlreadyClosedException.class,
() -> {
one.readGroupVInts(values, values.length);
GroupVIntUtil.readGroupVInts(one, values, values.length);
});
assertEquals(2, two.readInt());
// reopen a new slice "another":

View File

@ -59,6 +59,7 @@ import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.GroupVIntUtil;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.packed.PackedInts;
import org.junit.Assert;
@ -1458,7 +1459,7 @@ public abstract class BaseDirectoryTestCase extends LuceneTestCase {
assertEquals(43, in.readByte());
assertEquals(12345, in.readShort());
assertEquals(1234567890, in.readInt());
in.readGroupVInts(restored, 4);
GroupVIntUtil.readGroupVInts(in, restored, 4);
assertArrayEquals(values, restored);
assertEquals(1234567890123456789L, in.readLong());
in.close();
@ -1485,7 +1486,7 @@ public abstract class BaseDirectoryTestCase extends LuceneTestCase {
out.writeGroupVInts(values, limit);
out.close();
try (IndexInput in = dir.openInput("test", IOContext.DEFAULT)) {
in.readGroupVInts(restore, limit);
GroupVIntUtil.readGroupVInts(in, restore, limit);
for (int i = 0; i < limit; i++) {
assertEquals(values[i], restore[i]);
}
@ -1533,7 +1534,7 @@ public abstract class BaseDirectoryTestCase extends LuceneTestCase {
IndexInput groupVIntIn = dir.openInput("group-varint", IOContext.DEFAULT);
IndexInput vIntIn = dir.openInput("vint", IOContext.DEFAULT);
for (int iter = 0; iter < iterations; iter++) {
groupVIntIn.readGroupVInts(values, numValuesArray[iter]);
GroupVIntUtil.readGroupVInts(groupVIntIn, values, numValuesArray[iter]);
for (int j = 0; j < numValuesArray[iter]; j++) {
assertEquals(vIntIn.readVInt(), values[j]);
}