Move DataInput.readGroupVInts into GroupVIntUtil (#13830)

This commit is contained in:
Zhang Chao 2024-09-30 17:04:18 +08:00 committed by Dawid Weiss
parent c85458887c
commit 56c9d1cbeb
11 changed files with 42 additions and 37 deletions

View File

@ -19,6 +19,7 @@ package org.apache.lucene.backward_codecs.lucene99;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.GroupVIntUtil;
/** Utility class to encode/decode postings block. */ /** Utility class to encode/decode postings block. */
final class PostingsUtil { final class PostingsUtil {
@ -35,7 +36,7 @@ final class PostingsUtil {
boolean indexHasFreq, boolean indexHasFreq,
boolean decodeFreq) boolean decodeFreq)
throws IOException { throws IOException {
docIn.readGroupVInts(docBuffer, num); GroupVIntUtil.readGroupVInts(docIn, docBuffer, num);
if (indexHasFreq && decodeFreq) { if (indexHasFreq && decodeFreq) {
for (int i = 0; i < num; ++i) { for (int i = 0; i < num; ++i) {
freqBuffer[i] = docBuffer[i] & 0x01; freqBuffer[i] = docBuffer[i] & 0x01;

View File

@ -186,7 +186,7 @@ public class GroupVIntBenchmark {
@Benchmark @Benchmark
public void benchMMapDirectoryInputs_readGroupVInt(Blackhole bh) throws IOException { public void benchMMapDirectoryInputs_readGroupVInt(Blackhole bh) throws IOException {
byteBufferGVIntIn.seek(0); byteBufferGVIntIn.seek(0);
byteBufferGVIntIn.readGroupVInts(values, size); GroupVIntUtil.readGroupVInts(byteBufferGVIntIn, values, size);
bh.consume(values); bh.consume(values);
} }
@ -209,14 +209,14 @@ public class GroupVIntBenchmark {
@Benchmark @Benchmark
public void benchByteArrayDataInput_readGroupVInt(Blackhole bh) throws IOException { public void benchByteArrayDataInput_readGroupVInt(Blackhole bh) throws IOException {
byteArrayGVIntIn.rewind(); byteArrayGVIntIn.rewind();
byteArrayGVIntIn.readGroupVInts(values, size); GroupVIntUtil.readGroupVInts(byteArrayGVIntIn, values, size);
bh.consume(values); bh.consume(values);
} }
@Benchmark @Benchmark
public void benchNIOFSDirectoryInputs_readGroupVInt(Blackhole bh) throws IOException { public void benchNIOFSDirectoryInputs_readGroupVInt(Blackhole bh) throws IOException {
nioGVIntIn.seek(0); nioGVIntIn.seek(0);
nioGVIntIn.readGroupVInts(values, size); GroupVIntUtil.readGroupVInts(nioGVIntIn, values, size);
bh.consume(values); bh.consume(values);
} }
@ -230,7 +230,7 @@ public class GroupVIntBenchmark {
@Benchmark @Benchmark
public void benchByteBuffersIndexInput_readGroupVInt(Blackhole bh) throws IOException { public void benchByteBuffersIndexInput_readGroupVInt(Blackhole bh) throws IOException {
byteBuffersGVIntIn.seek(0); byteBuffersGVIntIn.seek(0);
byteBuffersGVIntIn.readGroupVInts(values, size); GroupVIntUtil.readGroupVInts(byteBuffersGVIntIn, values, size);
bh.consume(values); bh.consume(values);
} }

View File

@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene912;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.GroupVIntUtil;
/** Utility class to encode/decode postings block. */ /** Utility class to encode/decode postings block. */
final class PostingsUtil { final class PostingsUtil {
@ -35,7 +36,7 @@ final class PostingsUtil {
boolean indexHasFreq, boolean indexHasFreq,
boolean decodeFreq) boolean decodeFreq)
throws IOException { throws IOException {
docIn.readGroupVInts(docBuffer, num); GroupVIntUtil.readGroupVInts(docIn, docBuffer, num);
if (indexHasFreq && decodeFreq) { if (indexHasFreq && decodeFreq) {
for (int i = 0; i < num; ++i) { for (int i = 0; i < num; ++i) {
freqBuffer[i] = docBuffer[i] & 0x01; freqBuffer[i] = docBuffer[i] & 0x01;

View File

@ -151,7 +151,7 @@ public abstract class BufferedIndexInput extends IndexInput implements RandomAcc
} }
@Override @Override
protected void readGroupVInt(long[] dst, int offset) throws IOException { public void readGroupVInt(long[] dst, int offset) throws IOException {
final int len = final int len =
GroupVIntUtil.readGroupVInt( GroupVIntUtil.readGroupVInt(
this, buffer.remaining(), p -> buffer.getInt((int) p), buffer.position(), dst, offset); this, buffer.remaining(), p -> buffer.getInt((int) p), buffer.position(), dst, offset);

View File

@ -204,7 +204,7 @@ public final class ByteBuffersDataInput extends DataInput
} }
@Override @Override
protected void readGroupVInt(long[] dst, int offset) throws IOException { public void readGroupVInt(long[] dst, int offset) throws IOException {
final ByteBuffer block = blocks[blockIndex(pos)]; final ByteBuffer block = blocks[blockIndex(pos)];
final int blockOffset = blockOffset(pos); final int blockOffset = blockOffset(pos);
// We MUST save the return value to local variable, could not use pos += readGroupVInt(...). // We MUST save the return value to local variable, could not use pos += readGroupVInt(...).

View File

@ -206,7 +206,7 @@ public final class ByteBuffersIndexInput extends IndexInput implements RandomAcc
} }
@Override @Override
protected void readGroupVInt(long[] dst, int offset) throws IOException { public void readGroupVInt(long[] dst, int offset) throws IOException {
ensureOpen(); ensureOpen();
in.readGroupVInt(dst, offset); in.readGroupVInt(dst, offset);
} }

View File

@ -100,28 +100,10 @@ public abstract class DataInput implements Cloneable {
} }
/** /**
* Read all the group varints, including the tail vints. we need a long[] because this is what * Override if you have an efficient implementation. In general this is when the input supports
* postings are using, all longs are actually required to be integers.
*
* @param dst the array to read ints into.
* @param limit the number of int values to read.
* @lucene.experimental
*/
public final void readGroupVInts(long[] dst, int limit) throws IOException {
int i;
for (i = 0; i <= limit - 4; i += 4) {
readGroupVInt(dst, i);
}
for (; i < limit; ++i) {
dst[i] = readVInt() & 0xFFFFFFFFL;
}
}
/**
* Override if you have a efficient implementation. In general this is when the input supports
* random access. * random access.
*/ */
protected void readGroupVInt(long[] dst, int offset) throws IOException { public void readGroupVInt(long[] dst, int offset) throws IOException {
GroupVIntUtil.readGroupVInt(this, dst, offset); GroupVIntUtil.readGroupVInt(this, dst, offset);
} }

View File

@ -33,10 +33,29 @@ public final class GroupVIntUtil {
private static final long[] MASKS = new long[] {0xFFL, 0xFFFFL, 0xFFFFFFL, 0xFFFFFFFFL}; private static final long[] MASKS = new long[] {0xFFL, 0xFFFFL, 0xFFFFFFL, 0xFFFFFFFFL};
/** /**
* Default implementation of read single group, for optimal performance, you should use {@link * Read all the group varints, including the tail vints. we need a long[] because this is what
* DataInput#readGroupVInts(long[], int)} instead. * postings are using, all longs are actually required to be integers.
* *
* @param dst the array to read ints into. * @param dst the array to read ints into.
* @param limit the number of int values to read.
* @lucene.experimental
*/
public static void readGroupVInts(DataInput in, long[] dst, int limit) throws IOException {
int i;
for (i = 0; i <= limit - 4; i += 4) {
in.readGroupVInt(dst, i);
}
for (; i < limit; ++i) {
dst[i] = in.readVInt() & 0xFFFFFFFFL;
}
}
/**
* Default implementation of read single group, for optimal performance, you should use {@link
* GroupVIntUtil#readGroupVInts(DataInput, long[], int)} instead.
*
* @param in the input to use to read data.
* @param dst the array to read ints into.
* @param offset the offset in the array to start storing ints. * @param offset the offset in the array to start storing ints.
*/ */
public static void readGroupVInt(DataInput in, long[] dst, int offset) throws IOException { public static void readGroupVInt(DataInput in, long[] dst, int offset) throws IOException {

View File

@ -419,7 +419,7 @@ abstract class MemorySegmentIndexInput extends IndexInput
} }
@Override @Override
protected void readGroupVInt(long[] dst, int offset) throws IOException { public void readGroupVInt(long[] dst, int offset) throws IOException {
try { try {
final int len = final int len =
GroupVIntUtil.readGroupVInt( GroupVIntUtil.readGroupVInt(

View File

@ -33,6 +33,7 @@ import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.index.RandomIndexWriter; import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.GroupVIntUtil;
/** /**
* Base class for Directories that "chunk" the input into blocks. * Base class for Directories that "chunk" the input into blocks.
@ -77,7 +78,7 @@ public abstract class BaseChunkedDirectoryTestCase extends BaseDirectoryTestCase
expectThrows( expectThrows(
AlreadyClosedException.class, AlreadyClosedException.class,
() -> { () -> {
two.readGroupVInts(values, values.length); GroupVIntUtil.readGroupVInts(two, values, values.length);
}); });
assertEquals(5, three.readVInt()); assertEquals(5, three.readVInt());
one.close(); one.close();
@ -105,7 +106,7 @@ public abstract class BaseChunkedDirectoryTestCase extends BaseDirectoryTestCase
expectThrows( expectThrows(
AlreadyClosedException.class, AlreadyClosedException.class,
() -> { () -> {
one.readGroupVInts(values, values.length); GroupVIntUtil.readGroupVInts(one, values, values.length);
}); });
assertEquals(2, two.readInt()); assertEquals(2, two.readInt());
// reopen a new slice "another": // reopen a new slice "another":

View File

@ -59,6 +59,7 @@ import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitUtil; import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.GroupVIntUtil;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedInts;
import org.junit.Assert; import org.junit.Assert;
@ -1458,7 +1459,7 @@ public abstract class BaseDirectoryTestCase extends LuceneTestCase {
assertEquals(43, in.readByte()); assertEquals(43, in.readByte());
assertEquals(12345, in.readShort()); assertEquals(12345, in.readShort());
assertEquals(1234567890, in.readInt()); assertEquals(1234567890, in.readInt());
in.readGroupVInts(restored, 4); GroupVIntUtil.readGroupVInts(in, restored, 4);
assertArrayEquals(values, restored); assertArrayEquals(values, restored);
assertEquals(1234567890123456789L, in.readLong()); assertEquals(1234567890123456789L, in.readLong());
in.close(); in.close();
@ -1485,7 +1486,7 @@ public abstract class BaseDirectoryTestCase extends LuceneTestCase {
out.writeGroupVInts(values, limit); out.writeGroupVInts(values, limit);
out.close(); out.close();
try (IndexInput in = dir.openInput("test", IOContext.DEFAULT)) { try (IndexInput in = dir.openInput("test", IOContext.DEFAULT)) {
in.readGroupVInts(restore, limit); GroupVIntUtil.readGroupVInts(in, restore, limit);
for (int i = 0; i < limit; i++) { for (int i = 0; i < limit; i++) {
assertEquals(values[i], restore[i]); assertEquals(values[i], restore[i]);
} }
@ -1533,7 +1534,7 @@ public abstract class BaseDirectoryTestCase extends LuceneTestCase {
IndexInput groupVIntIn = dir.openInput("group-varint", IOContext.DEFAULT); IndexInput groupVIntIn = dir.openInput("group-varint", IOContext.DEFAULT);
IndexInput vIntIn = dir.openInput("vint", IOContext.DEFAULT); IndexInput vIntIn = dir.openInput("vint", IOContext.DEFAULT);
for (int iter = 0; iter < iterations; iter++) { for (int iter = 0; iter < iterations; iter++) {
groupVIntIn.readGroupVInts(values, numValuesArray[iter]); GroupVIntUtil.readGroupVInts(groupVIntIn, values, numValuesArray[iter]);
for (int j = 0; j < numValuesArray[iter]; j++) { for (int j = 0; j < numValuesArray[iter]; j++) {
assertEquals(vIntIn.readVInt(), values[j]); assertEquals(vIntIn.readVInt(), values[j]);
} }