Improve group-varint benchmark to reproduce value distribution of wikbigall. (#12833)

Instead of using a fixed number of bits per value, the group-varint benchmark
now tries to reproduce the distribution of the number of bits per values that
can be observed on tail postings of wikibigall.
This commit is contained in:
Adrien Grand 2023-11-22 14:57:48 +01:00 committed by GitHub
parent e04793d651
commit 79cad27a42
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 40 additions and 11 deletions

View File

@ -18,7 +18,8 @@ package org.apache.lucene.benchmark.jmh;
import java.io.IOException;
import java.nio.file.Files;
import java.util.concurrent.ThreadLocalRandom;
import java.util.Arrays;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import org.apache.lucene.codecs.lucene99.GroupVIntReader;
import org.apache.lucene.codecs.lucene99.GroupVIntWriter;
@ -53,6 +54,34 @@ import org.openjdk.jmh.infra.Blackhole;
jvmArgsPrepend = {"--add-modules=jdk.unsupported"})
public class GroupVIntBenchmark {
// Cumulative frequency for each number of bits per value used by doc deltas of tail postings on
// wikibigall.
private static final float[] CUMULATIVE_FREQUENCY_BY_BITS_REQUIRED =
new float[] {
0.0f,
0.01026574f,
0.021453038f,
0.03342156f,
0.046476692f,
0.060890317f,
0.07644147f,
0.093718216f,
0.11424741f,
0.13989712f,
0.17366524f,
0.22071244f,
0.2815692f,
0.3537585f,
0.43655503f,
0.52308f,
0.6104675f,
0.7047371f,
0.78155357f,
0.8671179f,
0.9740598f,
1.0f
};
final int maxSize = 256;
final long[] values = new long[maxSize];
@ -66,11 +95,6 @@ public class GroupVIntBenchmark {
@Param({"64"})
public int size;
@Param({"1", "2", "3", "4"})
public int numBytesPerInt;
private final int[] maxValues = new int[] {0, 1 << 4, 1 << 12, 1 << 18, 1 << 25};
void initArrayInput(long[] docs) throws Exception {
byte[] gVIntBytes = new byte[Integer.BYTES * maxSize * 2];
byte[] vIntBytes = new byte[Integer.BYTES * maxSize * 2];
@ -103,11 +127,16 @@ public class GroupVIntBenchmark {
@Setup(Level.Trial)
public void init() throws Exception {
long[] docs = new long[maxSize];
int max = maxValues[numBytesPerInt];
int min = max >> 1;
for (int i = 0; i < maxSize; i++) {
long v = ThreadLocalRandom.current().nextInt(min, max);
docs[i] = v;
Random r = new Random(0);
for (int i = 0; i < maxSize; ++i) {
float randomFloat = r.nextFloat();
// Reproduce the distribution of the number of bits per values that we're observing for tail
// postings on wikibigall.
int numBits = 1 + Arrays.binarySearch(CUMULATIVE_FREQUENCY_BY_BITS_REQUIRED, randomFloat);
if (numBits < 0) {
numBits = -numBits;
}
docs[i] = r.nextInt(1 << (numBits - 1), 1 << numBits);
}
initByteBufferInput(docs);
initArrayInput(docs);