Used reflection for generating singleton instances and addressed PR Comments

This commit is contained in:
expani 2024-10-06 17:00:17 +05:30
parent e309031c7f
commit 7dd5d80774

View File

@ -16,24 +16,27 @@
*/ */
package org.apache.lucene.benchmark.jmh; package org.apache.lucene.benchmark.jmh;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.InvocationTargetException;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.Objects;
import java.util.Scanner; import java.util.Scanner;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.NIOFSDirectory;
import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork; import org.openjdk.jmh.annotations.Fork;
@ -56,7 +59,7 @@ import org.openjdk.jmh.annotations.Warmup;
@Fork(value = 1) @Fork(value = 1)
public class DocIdEncodingBenchmark { public class DocIdEncodingBenchmark {
private static final List<int[]> DOC_ID_SEQUENCES = new ArrayList<>(); private static List<int[]> DOC_ID_SEQUENCES = new ArrayList<>();
private static int INPUT_SCALE_FACTOR; private static int INPUT_SCALE_FACTOR;
@ -80,19 +83,19 @@ public class DocIdEncodingBenchmark {
private final int[] scratch = new int[512]; private final int[] scratch = new int[512];
private String decoderInputFile;
@Setup(Level.Trial) @Setup(Level.Trial)
public void init() throws IOException { public void init() throws IOException {
tmpDir = Files.createTempDirectory("docIdJmh"); tmpDir = Files.createTempDirectory("docIdJmh");
docIdEncoder = DocIdEncoder.SingletonFactory.fromName(encoderName); docIdEncoder = DocIdEncoder.SingletonFactory.fromName(encoderName);
// Create file once for decoders to read from in every iteration decoderInputFile =
if (methodName.equalsIgnoreCase("decode")) {
String dataFile =
String.join("_", "docIdJmhData", docIdEncoder.getClass().getSimpleName(), "DecoderInput"); String.join("_", "docIdJmhData", docIdEncoder.getClass().getSimpleName(), "DecoderInput");
try (Directory dir = new NIOFSDirectory(tmpDir)) { // Create a file for decoders ( once per trial ) to read in every JMH iteration
out = dir.createOutput(dataFile, IOContext.DEFAULT); if (methodName.equalsIgnoreCase("decode")) {
encode(); try (Directory dir = FSDirectory.open(tmpDir);
} finally { IndexOutput out = dir.createOutput(decoderInputFile, IOContext.DEFAULT)) {
out.close(); encode(out, docIdEncoder, DOC_ID_SEQUENCES, INPUT_SCALE_FACTOR);
} }
} }
} }
@ -100,9 +103,7 @@ public class DocIdEncodingBenchmark {
@TearDown(Level.Trial) @TearDown(Level.Trial)
public void finish() throws IOException { public void finish() throws IOException {
if (methodName.equalsIgnoreCase("decode")) { if (methodName.equalsIgnoreCase("decode")) {
String dataFile = Files.delete(tmpDir.resolve(decoderInputFile));
String.join("_", "docIdJmhData", docIdEncoder.getClass().getSimpleName(), "DecoderInput");
Files.delete(tmpDir.resolve(dataFile));
} }
Files.delete(tmpDir); Files.delete(tmpDir);
} }
@ -110,45 +111,50 @@ public class DocIdEncodingBenchmark {
@Benchmark @Benchmark
public void executeEncodeOrDecode() throws IOException { public void executeEncodeOrDecode() throws IOException {
if (methodName.equalsIgnoreCase("encode")) { if (methodName.equalsIgnoreCase("encode")) {
String dataFile = String outputFile =
String.join( String.join(
"_", "_",
"docIdJmhData", "docIdJmhData",
docIdEncoder.getClass().getSimpleName(), docIdEncoder.getClass().getSimpleName(),
String.valueOf(System.nanoTime())); String.valueOf(System.nanoTime()));
try (Directory dir = new NIOFSDirectory(tmpDir)) { try (Directory dir = FSDirectory.open(tmpDir);
out = dir.createOutput(dataFile, IOContext.DEFAULT); IndexOutput out = dir.createOutput(outputFile, IOContext.DEFAULT)) {
encode(); encode(out, docIdEncoder, DOC_ID_SEQUENCES, INPUT_SCALE_FACTOR);
} finally { } finally {
Files.delete(tmpDir.resolve(dataFile)); Files.delete(tmpDir.resolve(outputFile));
out.close();
} }
} else if (methodName.equalsIgnoreCase("decode")) { } else if (methodName.equalsIgnoreCase("decode")) {
String inputFile = try (Directory dir = FSDirectory.open(tmpDir)) {
String.join("_", "docIdJmhData", docIdEncoder.getClass().getSimpleName(), "DecoderInput"); in = dir.openInput(decoderInputFile, IOContext.DEFAULT);
try (Directory dir = new NIOFSDirectory(tmpDir)) { decode(in, docIdEncoder, DOC_ID_SEQUENCES, INPUT_SCALE_FACTOR, scratch);
in = dir.openInput(inputFile, IOContext.DEFAULT);
decode();
} finally {
in.close();
} }
} else { } else {
throw new IllegalArgumentException("Unknown method: " + methodName); throw new IllegalArgumentException("Unknown method: " + methodName);
} }
} }
public void encode() throws IOException { public void encode(
for (int[] docIdSequence : DOC_ID_SEQUENCES) { IndexOutput out, DocIdEncoder docIdEncoder, List<int[]> docIdSequences, int inputScaleFactor)
for (int i = 1; i <= INPUT_SCALE_FACTOR; i++) { throws IOException {
for (int[] docIdSequence : docIdSequences) {
for (int i = 1; i <= inputScaleFactor; i++) {
docIdEncoder.encode(out, 0, docIdSequence.length, docIdSequence); docIdEncoder.encode(out, 0, docIdSequence.length, docIdSequence);
} }
} }
} }
public void decode() throws IOException { public void decode(
for (int[] docIdSequence : DOC_ID_SEQUENCES) { IndexInput in,
for (int i = 1; i <= INPUT_SCALE_FACTOR; i++) { DocIdEncoder docIdEncoder,
List<int[]> docIdSequences,
int inputScaleFactor,
int[] scratch)
throws IOException {
for (int[] docIdSequence : docIdSequences) {
for (int i = 1; i <= inputScaleFactor; i++) {
docIdEncoder.decode(in, 0, docIdSequence.length, scratch); docIdEncoder.decode(in, 0, docIdSequence.length, scratch);
// TODO Use a unit test with a DocIdProvider that generates a few random sequences based on
// given BPV.
// Uncomment to test the output of Encoder // Uncomment to test the output of Encoder
// if (!Arrays.equals( // if (!Arrays.equals(
// docIdSequence, Arrays.copyOfRange(scratch, 0, docIdSequence.length))) // docIdSequence, Arrays.copyOfRange(scratch, 0, docIdSequence.length)))
@ -175,16 +181,27 @@ public class DocIdEncodingBenchmark {
class SingletonFactory { class SingletonFactory {
static final Map<String, DocIdEncoder> ENCODER_NAME_TO_INSTANCE_MAPPING = static final Map<String, DocIdEncoder> ENCODER_NAME_TO_INSTANCE_MAPPING = new HashMap<>();
Map.of(
Bit24Encoder.class.getSimpleName().toLowerCase(Locale.ROOT), static {
new Bit24Encoder(), Class<?>[] allImplementations = DocIdEncoder.class.getDeclaredClasses();
Bit21With2StepsEncoder.class.getSimpleName().toLowerCase(Locale.ROOT), for (Class<?> clazz : allImplementations) {
new Bit21With2StepsEncoder(), boolean isADocIdEncoder =
Bit21With3StepsEncoder.class.getSimpleName().toLowerCase(Locale.ROOT), Arrays.asList(clazz.getInterfaces()).contains(DocIdEncoder.class);
new Bit21With3StepsEncoder(), if (isADocIdEncoder) {
Bit32Encoder.class.getSimpleName().toLowerCase(Locale.ROOT), try {
new Bit32Encoder()); ENCODER_NAME_TO_INSTANCE_MAPPING.put(
clazz.getSimpleName().toLowerCase(Locale.ROOT),
(DocIdEncoder) clazz.getConstructor().newInstance());
} catch (InstantiationException
| IllegalAccessException
| InvocationTargetException
| NoSuchMethodException e) {
throw new RuntimeException(e);
}
}
}
}
public static DocIdEncoder fromName(String encoderName) { public static DocIdEncoder fromName(String encoderName) {
String parsedEncoderName = encoderName.trim().toLowerCase(Locale.ROOT); String parsedEncoderName = encoderName.trim().toLowerCase(Locale.ROOT);
@ -195,9 +212,8 @@ public class DocIdEncodingBenchmark {
} }
} }
} }
}
static class Bit24Encoder implements DocIdEncoder { class Bit24Encoder implements DocIdEncoder {
@Override @Override
public void encode(IndexOutput out, int start, int count, int[] docIds) throws IOException { public void encode(IndexOutput out, int start, int count, int[] docIds) throws IOException {
int i; int i;
@ -244,12 +260,13 @@ public class DocIdEncodingBenchmark {
docIDs[i + 7] = (int) l3 & 0xffffff; docIDs[i + 7] = (int) l3 & 0xffffff;
} }
for (; i < count; ++i) { for (; i < count; ++i) {
docIDs[i] = (Short.toUnsignedInt(in.readShort()) << 8) | Byte.toUnsignedInt(in.readByte()); docIDs[i] =
(Short.toUnsignedInt(in.readShort()) << 8) | Byte.toUnsignedInt(in.readByte());
} }
} }
} }
static class Bit21With2StepsEncoder implements DocIdEncoder { class Bit21With2StepsEncoder implements DocIdEncoder {
@Override @Override
public void encode(IndexOutput out, int start, int count, int[] docIds) throws IOException { public void encode(IndexOutput out, int start, int count, int[] docIds) throws IOException {
int i = 0; int i = 0;
@ -284,7 +301,7 @@ public class DocIdEncodingBenchmark {
* Variation of @{@link Bit21With2StepsEncoder} but uses 3 loops to decode the array of DocIds. * Variation of @{@link Bit21With2StepsEncoder} but uses 3 loops to decode the array of DocIds.
* Comparatively better than @{@link Bit21With2StepsEncoder} on aarch64 with JDK 22 * Comparatively better than @{@link Bit21With2StepsEncoder} on aarch64 with JDK 22
*/ */
static class Bit21With3StepsEncoder implements DocIdEncoder { class Bit21With3StepsEncoder implements DocIdEncoder {
@Override @Override
public void encode(IndexOutput out, int start, int count, int[] docIds) throws IOException { public void encode(IndexOutput out, int start, int count, int[] docIds) throws IOException {
@ -347,7 +364,7 @@ public class DocIdEncodingBenchmark {
} }
} }
static class Bit32Encoder implements DocIdEncoder { class Bit32Encoder implements DocIdEncoder {
@Override @Override
public void encode(IndexOutput out, int start, int count, int[] docIds) throws IOException { public void encode(IndexOutput out, int start, int count, int[] docIds) throws IOException {
@ -363,41 +380,81 @@ public class DocIdEncodingBenchmark {
} }
} }
} }
}
interface DocIdProvider {
/**
* We want to load all the docId sequences completely in memory to avoid including the time
* spent in fetching from disk. <br>
*
* @return: All the docId sequences or empty list.
*/
List<int[]> getDocIds(Object... args);
}
static class DocIdsFromLocalFS implements DocIdProvider {
@Override
public List<int[]> getDocIds(Object... args) {
List<int[]> docIds = new ArrayList<>();
InputStream fileContents = (InputStream) args[0];
try (Scanner fileReader = new Scanner(fileContents, Charset.defaultCharset())) {
while (fileReader.hasNextLine()) {
String sequence = fileReader.nextLine().trim();
if (!sequence.startsWith("#") && !sequence.isEmpty()) {
docIds.add(
Arrays.stream(sequence.split(","))
.map(String::trim)
.mapToInt(Integer::parseInt)
.toArray());
}
}
}
return docIds;
}
}
private static void parseInput() { private static void parseInput() {
String inputScaleFactor = System.getProperty("docIdEncoding.inputScaleFactor"); String inputScaleFactor = System.getProperty("docIdEncoding.inputScaleFactor");
if (inputScaleFactor != null) { if (inputScaleFactor != null && !inputScaleFactor.isEmpty()) {
INPUT_SCALE_FACTOR = Integer.parseInt(inputScaleFactor); INPUT_SCALE_FACTOR = Integer.parseInt(inputScaleFactor);
} else { } else {
INPUT_SCALE_FACTOR = 2_00_000; INPUT_SCALE_FACTOR = 2_00_000;
} }
String inputFilePath = System.getProperty("docIdEncoding.inputFile"); String docProviderFQDN = System.getProperty("docIdEncoding.docIdProviderFQDN");
Scanner fileReader = null;
DocIdProvider docIdProvider = new DocIdsFromLocalFS();
if (docProviderFQDN != null && !docProviderFQDN.isEmpty()) {
try { try {
if (inputFilePath != null) { docIdProvider =
fileReader = new Scanner(Paths.get(inputFilePath), Charset.defaultCharset()); (DocIdProvider) Class.forName(docProviderFQDN).getConstructor().newInstance();
} else { } catch (InstantiationException
fileReader = | IllegalAccessException
new Scanner( | InvocationTargetException
Objects.requireNonNull( | NoSuchMethodException
DocIdEncodingBenchmark.class.getResourceAsStream( | ClassNotFoundException e) {
"/org.apache.lucene.benchmark.jmh/docIds_bpv21.txt")), throw new RuntimeException(e);
Charset.defaultCharset()); }
} }
while (fileReader.hasNextLine()) {
String sequence = fileReader.nextLine().trim(); if (docIdProvider instanceof DocIdsFromLocalFS) {
if (!sequence.startsWith("#") && !sequence.isEmpty()) { String inputFilePath = System.getProperty("docIdEncoding.inputFile");
DOC_ID_SEQUENCES.add( try {
Arrays.stream(sequence.split(",")).map(String::trim).mapToInt(Integer::parseInt).toArray());
} if (inputFilePath != null && !inputFilePath.isEmpty()) {
} DOC_ID_SEQUENCES = docIdProvider.getDocIds(new FileInputStream(inputFilePath));
} catch (IOException e) { } else {
DOC_ID_SEQUENCES =
docIdProvider.getDocIds(
DocIdEncodingBenchmark.class.getResourceAsStream(
"/org.apache.lucene.benchmark.jmh/docIds_bpv21.txt"));
}
} catch (FileNotFoundException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} finally {
if (fileReader != null) {
fileReader.close();
} }
} }
} }