Adding new encoder with only readLong instead of readInt to easily compare performance

This commit is contained in:
expani 2024-10-24 10:17:59 +05:30
parent af8b914a9f
commit 4d10d9d5eb
1 changed files with 128 additions and 10 deletions

View File

@ -68,7 +68,13 @@ public class DocIdEncodingBenchmark {
parseInput(); parseInput();
} }
@Param({"Bit21With3StepsEncoder", "Bit21With2StepsEncoder", "Bit24Encoder", "Bit21HybridEncoder"}) @Param({
"Bit21With3StepsEncoder",
"Bit21With2StepsEncoder",
"Bit24Encoder",
"Bit21With2StepsOnlyRWLongEncoder",
"Bit21With3StepsEncoderOnlyRWLong"
})
String encoderName; String encoderName;
@Param({"encode", "decode"}) @Param({"encode", "decode"})
@ -323,6 +329,37 @@ public class DocIdEncodingBenchmark {
} }
} }
class Bit21With2StepsOnlyRWLongEncoder implements DocIdEncoder {
@Override
public void encode(IndexOutput out, int start, int count, int[] docIds) throws IOException {
int i = 0;
for (; i < count - 2; i += 3) {
long packedLong =
((docIds[i] & BPV_21_MASK) << 42)
| ((docIds[i + 1] & BPV_21_MASK) << 21)
| (docIds[i + 2] & BPV_21_MASK);
out.writeLong(packedLong);
}
for (; i < count; i++) {
out.writeLong(docIds[i]);
}
}
@Override
public void decode(IndexInput in, int start, int count, int[] docIDs) throws IOException {
int i = 0;
for (; i < count - 2; i += 3) {
long packedLong = in.readLong();
docIDs[i] = (int) (packedLong >>> 42);
docIDs[i + 1] = (int) ((packedLong >>> 21) & BPV_21_MASK);
docIDs[i + 2] = (int) (packedLong & BPV_21_MASK);
}
for (; i < count; i++) {
docIDs[i] = (int) in.readLong();
}
}
}
/** /**
* Variation of @{@link Bit21With2StepsEncoder} but uses 3 loops to decode the array of DocIds. * Variation of @{@link Bit21With2StepsEncoder} but uses 3 loops to decode the array of DocIds.
* Comparatively better in decoding than @{@link Bit21With2StepsEncoder} on aarch64 with JDK 22 * Comparatively better in decoding than @{@link Bit21With2StepsEncoder} on aarch64 with JDK 22
@ -391,6 +428,69 @@ public class DocIdEncodingBenchmark {
} }
} }
class Bit21With3StepsEncoderOnlyRWLong implements DocIdEncoder {
@Override
public void encode(IndexOutput out, int start, int count, int[] docIds) throws IOException {
int i = 0;
for (; i < count - 8; i += 9) {
long l1 =
((docIds[i] & BPV_21_MASK) << 42)
| ((docIds[i + 1] & BPV_21_MASK) << 21)
| (docIds[i + 2] & BPV_21_MASK);
long l2 =
((docIds[i + 3] & BPV_21_MASK) << 42)
| ((docIds[i + 4] & BPV_21_MASK) << 21)
| (docIds[i + 5] & BPV_21_MASK);
long l3 =
((docIds[i + 6] & BPV_21_MASK) << 42)
| ((docIds[i + 7] & BPV_21_MASK) << 21)
| (docIds[i + 8] & BPV_21_MASK);
out.writeLong(l1);
out.writeLong(l2);
out.writeLong(l3);
}
for (; i < count - 2; i += 3) {
long packedLong =
((docIds[i] & BPV_21_MASK) << 42)
| ((docIds[i + 1] & BPV_21_MASK) << 21)
| (docIds[i + 2] & BPV_21_MASK);
out.writeLong(packedLong);
}
for (; i < count; i++) {
out.writeLong(docIds[i]);
}
}
@Override
public void decode(IndexInput in, int start, int count, int[] docIDs) throws IOException {
int i = 0;
for (; i < count - 8; i += 9) {
long l1 = in.readLong();
long l2 = in.readLong();
long l3 = in.readLong();
docIDs[i] = (int) (l1 >>> 42);
docIDs[i + 1] = (int) ((l1 >>> 21) & BPV_21_MASK);
docIDs[i + 2] = (int) (l1 & BPV_21_MASK);
docIDs[i + 3] = (int) (l2 >>> 42);
docIDs[i + 4] = (int) ((l2 >>> 21) & BPV_21_MASK);
docIDs[i + 5] = (int) (l2 & BPV_21_MASK);
docIDs[i + 6] = (int) (l3 >>> 42);
docIDs[i + 7] = (int) ((l3 >>> 21) & BPV_21_MASK);
docIDs[i + 8] = (int) (l3 & BPV_21_MASK);
}
for (; i < count - 2; i += 3) {
long packedLong = in.readLong();
docIDs[i] = (int) (packedLong >>> 42);
docIDs[i + 1] = (int) ((packedLong >>> 21) & BPV_21_MASK);
docIDs[i + 2] = (int) (packedLong & BPV_21_MASK);
}
for (; i < count; i++) {
docIDs[i] = (int) in.readLong();
}
}
}
class Bit21HybridEncoder implements DocIdEncoder { class Bit21HybridEncoder implements DocIdEncoder {
private final DocIdEncoder encoder; private final DocIdEncoder encoder;
@ -443,11 +543,20 @@ public class DocIdEncodingBenchmark {
Map<Class<? extends DocIdEncodingBenchmark.DocIdEncoder>, Integer> ENCODER_TO_BPV_MAPPING = Map<Class<? extends DocIdEncodingBenchmark.DocIdEncoder>, Integer> ENCODER_TO_BPV_MAPPING =
Map.of( Map.of(
DocIdEncodingBenchmark.DocIdEncoder.Bit21With2StepsEncoder.class, 21, DocIdEncodingBenchmark.DocIdEncoder.Bit21With2StepsEncoder.class,
DocIdEncodingBenchmark.DocIdEncoder.Bit21With3StepsEncoder.class, 21, 21,
DocIdEncodingBenchmark.DocIdEncoder.Bit21HybridEncoder.class, 21, DocIdEncodingBenchmark.DocIdEncoder.Bit21With3StepsEncoder.class,
DocIdEncodingBenchmark.DocIdEncoder.Bit24Encoder.class, 24, 21,
DocIdEncodingBenchmark.DocIdEncoder.Bit32Encoder.class, 32); DocIdEncodingBenchmark.DocIdEncoder.Bit21With2StepsOnlyRWLongEncoder.class,
21,
DocIdEncodingBenchmark.DocIdEncoder.Bit21With3StepsEncoderOnlyRWLong.class,
21,
DocIdEncodingBenchmark.DocIdEncoder.Bit21HybridEncoder.class,
21,
DocIdEncodingBenchmark.DocIdEncoder.Bit24Encoder.class,
24,
DocIdEncodingBenchmark.DocIdEncoder.Bit32Encoder.class,
32);
/** /**
* We want to load all the docId sequences completely in memory to avoid including the time * We want to load all the docId sequences completely in memory to avoid including the time
@ -484,7 +593,16 @@ public class DocIdEncodingBenchmark {
static class FixedBPVRandomDocIdProvider implements DocIdEncodingBenchmark.DocIdProvider { static class FixedBPVRandomDocIdProvider implements DocIdEncodingBenchmark.DocIdProvider {
private final Random random = new Random(); private static final Random RANDOM = new Random();
private static final Map<Class<? extends DocIdEncoder>, Double> ENCODER_POWERS_OF_2;
static {
ENCODER_POWERS_OF_2 = new HashMap<>(ENCODER_TO_BPV_MAPPING.size());
ENCODER_TO_BPV_MAPPING.forEach(
(encoderClazz, bitsUsed) ->
ENCODER_POWERS_OF_2.put(encoderClazz, Math.pow(2, bitsUsed) - 1));
}
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
@Override @Override
@ -499,10 +617,10 @@ public class DocIdEncodingBenchmark {
for (int i = 1; i <= capacity; i++) { for (int i = 1; i <= capacity; i++) {
docIdSequences.add( docIdSequences.add(
random RANDOM
.ints(0, (int) Math.pow(2, ENCODER_TO_BPV_MAPPING.get(encoderClass)) - 1) .ints(0, ENCODER_POWERS_OF_2.get(encoderClass).intValue())
.distinct() .distinct()
.limit(random.nextInt(low, high)) .limit(RANDOM.nextInt(low, high))
.toArray()); .toArray());
} }
return docIdSequences; return docIdSequences;