From fd862b7d0adf90f72d448c00e81bfacd14152025 Mon Sep 17 00:00:00 2001 From: expani Date: Tue, 15 Oct 2024 17:43:50 +0530 Subject: [PATCH] Added unit tests to randomly generate docIds for all encoders --- .../benchmark/jmh/DocIdEncodingBenchmark.java | 18 ++-- .../benchmark/jmh/TestDocIdEncoding.java | 94 +++++++++++++++++++ 2 files changed, 103 insertions(+), 9 deletions(-) create mode 100644 lucene/benchmark-jmh/src/test/org/apache/lucene/benchmark/jmh/TestDocIdEncoding.java diff --git a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DocIdEncodingBenchmark.java b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DocIdEncodingBenchmark.java index 7a6da246968..2a5a3900082 100644 --- a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DocIdEncodingBenchmark.java +++ b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DocIdEncodingBenchmark.java @@ -210,15 +210,6 @@ public class DocIdEncodingBenchmark { } } - private static DocIdEncoder getInternal(String parsedEncoderName) { - if (ENCODER_NAME_TO_INSTANCE_MAPPING.containsKey(parsedEncoderName)) { - return ENCODER_NAME_TO_INSTANCE_MAPPING.get(parsedEncoderName); - } else { - throw new IllegalArgumentException( - String.format(Locale.ROOT, "Unknown DocIdEncoder [%s]", parsedEncoderName)); - } - } - public static DocIdEncoder fromName(String encoderName) { String parsedEncoderName = encoderName.trim().toLowerCase(Locale.ROOT); return getInternal(parsedEncoderName); @@ -235,6 +226,15 @@ public class DocIdEncodingBenchmark { String parsedEncoderName = parsedClazzName(clazz); return getInternal(parsedEncoderName); } + + private static DocIdEncoder getInternal(String parsedEncoderName) { + if (ENCODER_NAME_TO_INSTANCE_MAPPING.containsKey(parsedEncoderName)) { + return ENCODER_NAME_TO_INSTANCE_MAPPING.get(parsedEncoderName); + } else { + throw new IllegalArgumentException( + String.format(Locale.ROOT, "Unknown DocIdEncoder [%s]", parsedEncoderName)); + } + } } class Bit24Encoder implements DocIdEncoder { diff --git a/lucene/benchmark-jmh/src/test/org/apache/lucene/benchmark/jmh/TestDocIdEncoding.java b/lucene/benchmark-jmh/src/test/org/apache/lucene/benchmark/jmh/TestDocIdEncoding.java new file mode 100644 index 00000000000..52ea72440d1 --- /dev/null +++ b/lucene/benchmark-jmh/src/test/org/apache/lucene/benchmark/jmh/TestDocIdEncoding.java @@ -0,0 +1,94 @@ +package org.apache.lucene.benchmark.jmh; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.tests.util.LuceneTestCase; + +public class TestDocIdEncoding extends LuceneTestCase { + + private static final Map, Integer> + ENCODER_TO_BPV_MAPPING = + Map.of( + DocIdEncodingBenchmark.DocIdEncoder.Bit21With2StepsEncoder.class, 21, + DocIdEncodingBenchmark.DocIdEncoder.Bit21With3StepsEncoder.class, 21, + DocIdEncodingBenchmark.DocIdEncoder.Bit21HybridEncoder.class, 21, + DocIdEncodingBenchmark.DocIdEncoder.Bit24Encoder.class, 24, + DocIdEncodingBenchmark.DocIdEncoder.Bit32Encoder.class, 32); + + @Override + public void setUp() throws Exception { + super.setUp(); + } + + static class FixedBPVRandomDocIdProvider implements DocIdEncodingBenchmark.DocIdProvider { + + @Override + public List getDocIds(Object... args) { + DocIdEncodingBenchmark.DocIdEncoder encoder = (DocIdEncodingBenchmark.DocIdEncoder) args[0]; + int capacity = (int) args[1]; + int low = (int) args[2]; + int high = (int) args[3]; + List docIdSequences = new ArrayList<>(capacity); + + for (int i = 1; i <= capacity; i++) { + docIdSequences.add( + random() + .ints(0, (int) Math.pow(2, ENCODER_TO_BPV_MAPPING.get(encoder.getClass())) - 1) + .distinct() + .limit(random().nextInt(low, high)) + .toArray()); + } + return docIdSequences; + } + } + + public void testBPV21AndAbove() { + + List encoders = + DocIdEncodingBenchmark.DocIdEncoder.SingletonFactory.getAllExcept(Collections.emptyList()); + + final int[] scratch = new int[512]; + + DocIdEncodingBenchmark.DocIdProvider docIdProvider = new FixedBPVRandomDocIdProvider(); + + try { + + Path tempDir = Files.createTempDirectory("DocIdEncoding_testBPV21AndAbove_"); + + for (DocIdEncodingBenchmark.DocIdEncoder encoder : encoders) { + + List docIdSequences = docIdProvider.getDocIds(encoder, 50, 100, 512); + + String encoderFileName = "Encoder_" + encoder.getClass().getSimpleName(); + + try (Directory outDir = FSDirectory.open(tempDir); + IndexOutput out = outDir.createOutput(encoderFileName, IOContext.DEFAULT)) { + for (int[] sequence : docIdSequences) { + encoder.encode(out, 0, sequence.length, sequence); + } + } + + try (Directory inDir = FSDirectory.open(tempDir); + IndexInput in = inDir.openInput(encoderFileName, IOContext.DEFAULT)) { + for (int[] sequence : docIdSequences) { + encoder.decode(in, 0, sequence.length, scratch); + assertArrayEquals(sequence, Arrays.copyOf(scratch, sequence.length)); + } + } + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } +}