mirror of
https://github.com/apache/lucene.git
synced 2025-02-07 18:49:03 +00:00
Used reflection for generating singleton instances and addressed PR Comments
This commit is contained in:
parent
e309031c7f
commit
7dd5d80774
@ -16,24 +16,27 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.lucene.benchmark.jmh;
|
package org.apache.lucene.benchmark.jmh;
|
||||||
|
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.lang.reflect.InvocationTargetException;
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.nio.file.Paths;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.Scanner;
|
import java.util.Scanner;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.FSDirectory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.store.NIOFSDirectory;
|
|
||||||
import org.openjdk.jmh.annotations.Benchmark;
|
import org.openjdk.jmh.annotations.Benchmark;
|
||||||
import org.openjdk.jmh.annotations.BenchmarkMode;
|
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||||
import org.openjdk.jmh.annotations.Fork;
|
import org.openjdk.jmh.annotations.Fork;
|
||||||
@ -56,7 +59,7 @@ import org.openjdk.jmh.annotations.Warmup;
|
|||||||
@Fork(value = 1)
|
@Fork(value = 1)
|
||||||
public class DocIdEncodingBenchmark {
|
public class DocIdEncodingBenchmark {
|
||||||
|
|
||||||
private static final List<int[]> DOC_ID_SEQUENCES = new ArrayList<>();
|
private static List<int[]> DOC_ID_SEQUENCES = new ArrayList<>();
|
||||||
|
|
||||||
private static int INPUT_SCALE_FACTOR;
|
private static int INPUT_SCALE_FACTOR;
|
||||||
|
|
||||||
@ -80,19 +83,19 @@ public class DocIdEncodingBenchmark {
|
|||||||
|
|
||||||
private final int[] scratch = new int[512];
|
private final int[] scratch = new int[512];
|
||||||
|
|
||||||
|
private String decoderInputFile;
|
||||||
|
|
||||||
@Setup(Level.Trial)
|
@Setup(Level.Trial)
|
||||||
public void init() throws IOException {
|
public void init() throws IOException {
|
||||||
tmpDir = Files.createTempDirectory("docIdJmh");
|
tmpDir = Files.createTempDirectory("docIdJmh");
|
||||||
docIdEncoder = DocIdEncoder.SingletonFactory.fromName(encoderName);
|
docIdEncoder = DocIdEncoder.SingletonFactory.fromName(encoderName);
|
||||||
// Create file once for decoders to read from in every iteration
|
decoderInputFile =
|
||||||
if (methodName.equalsIgnoreCase("decode")) {
|
|
||||||
String dataFile =
|
|
||||||
String.join("_", "docIdJmhData", docIdEncoder.getClass().getSimpleName(), "DecoderInput");
|
String.join("_", "docIdJmhData", docIdEncoder.getClass().getSimpleName(), "DecoderInput");
|
||||||
try (Directory dir = new NIOFSDirectory(tmpDir)) {
|
// Create a file for decoders ( once per trial ) to read in every JMH iteration
|
||||||
out = dir.createOutput(dataFile, IOContext.DEFAULT);
|
if (methodName.equalsIgnoreCase("decode")) {
|
||||||
encode();
|
try (Directory dir = FSDirectory.open(tmpDir);
|
||||||
} finally {
|
IndexOutput out = dir.createOutput(decoderInputFile, IOContext.DEFAULT)) {
|
||||||
out.close();
|
encode(out, docIdEncoder, DOC_ID_SEQUENCES, INPUT_SCALE_FACTOR);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -100,9 +103,7 @@ public class DocIdEncodingBenchmark {
|
|||||||
@TearDown(Level.Trial)
|
@TearDown(Level.Trial)
|
||||||
public void finish() throws IOException {
|
public void finish() throws IOException {
|
||||||
if (methodName.equalsIgnoreCase("decode")) {
|
if (methodName.equalsIgnoreCase("decode")) {
|
||||||
String dataFile =
|
Files.delete(tmpDir.resolve(decoderInputFile));
|
||||||
String.join("_", "docIdJmhData", docIdEncoder.getClass().getSimpleName(), "DecoderInput");
|
|
||||||
Files.delete(tmpDir.resolve(dataFile));
|
|
||||||
}
|
}
|
||||||
Files.delete(tmpDir);
|
Files.delete(tmpDir);
|
||||||
}
|
}
|
||||||
@ -110,45 +111,50 @@ public class DocIdEncodingBenchmark {
|
|||||||
@Benchmark
|
@Benchmark
|
||||||
public void executeEncodeOrDecode() throws IOException {
|
public void executeEncodeOrDecode() throws IOException {
|
||||||
if (methodName.equalsIgnoreCase("encode")) {
|
if (methodName.equalsIgnoreCase("encode")) {
|
||||||
String dataFile =
|
String outputFile =
|
||||||
String.join(
|
String.join(
|
||||||
"_",
|
"_",
|
||||||
"docIdJmhData",
|
"docIdJmhData",
|
||||||
docIdEncoder.getClass().getSimpleName(),
|
docIdEncoder.getClass().getSimpleName(),
|
||||||
String.valueOf(System.nanoTime()));
|
String.valueOf(System.nanoTime()));
|
||||||
try (Directory dir = new NIOFSDirectory(tmpDir)) {
|
try (Directory dir = FSDirectory.open(tmpDir);
|
||||||
out = dir.createOutput(dataFile, IOContext.DEFAULT);
|
IndexOutput out = dir.createOutput(outputFile, IOContext.DEFAULT)) {
|
||||||
encode();
|
encode(out, docIdEncoder, DOC_ID_SEQUENCES, INPUT_SCALE_FACTOR);
|
||||||
} finally {
|
} finally {
|
||||||
Files.delete(tmpDir.resolve(dataFile));
|
Files.delete(tmpDir.resolve(outputFile));
|
||||||
out.close();
|
|
||||||
}
|
}
|
||||||
} else if (methodName.equalsIgnoreCase("decode")) {
|
} else if (methodName.equalsIgnoreCase("decode")) {
|
||||||
String inputFile =
|
try (Directory dir = FSDirectory.open(tmpDir)) {
|
||||||
String.join("_", "docIdJmhData", docIdEncoder.getClass().getSimpleName(), "DecoderInput");
|
in = dir.openInput(decoderInputFile, IOContext.DEFAULT);
|
||||||
try (Directory dir = new NIOFSDirectory(tmpDir)) {
|
decode(in, docIdEncoder, DOC_ID_SEQUENCES, INPUT_SCALE_FACTOR, scratch);
|
||||||
in = dir.openInput(inputFile, IOContext.DEFAULT);
|
|
||||||
decode();
|
|
||||||
} finally {
|
|
||||||
in.close();
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
throw new IllegalArgumentException("Unknown method: " + methodName);
|
throw new IllegalArgumentException("Unknown method: " + methodName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void encode() throws IOException {
|
public void encode(
|
||||||
for (int[] docIdSequence : DOC_ID_SEQUENCES) {
|
IndexOutput out, DocIdEncoder docIdEncoder, List<int[]> docIdSequences, int inputScaleFactor)
|
||||||
for (int i = 1; i <= INPUT_SCALE_FACTOR; i++) {
|
throws IOException {
|
||||||
|
for (int[] docIdSequence : docIdSequences) {
|
||||||
|
for (int i = 1; i <= inputScaleFactor; i++) {
|
||||||
docIdEncoder.encode(out, 0, docIdSequence.length, docIdSequence);
|
docIdEncoder.encode(out, 0, docIdSequence.length, docIdSequence);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void decode() throws IOException {
|
public void decode(
|
||||||
for (int[] docIdSequence : DOC_ID_SEQUENCES) {
|
IndexInput in,
|
||||||
for (int i = 1; i <= INPUT_SCALE_FACTOR; i++) {
|
DocIdEncoder docIdEncoder,
|
||||||
|
List<int[]> docIdSequences,
|
||||||
|
int inputScaleFactor,
|
||||||
|
int[] scratch)
|
||||||
|
throws IOException {
|
||||||
|
for (int[] docIdSequence : docIdSequences) {
|
||||||
|
for (int i = 1; i <= inputScaleFactor; i++) {
|
||||||
docIdEncoder.decode(in, 0, docIdSequence.length, scratch);
|
docIdEncoder.decode(in, 0, docIdSequence.length, scratch);
|
||||||
|
// TODO Use a unit test with a DocIdProvider that generates a few random sequences based on
|
||||||
|
// given BPV.
|
||||||
// Uncomment to test the output of Encoder
|
// Uncomment to test the output of Encoder
|
||||||
// if (!Arrays.equals(
|
// if (!Arrays.equals(
|
||||||
// docIdSequence, Arrays.copyOfRange(scratch, 0, docIdSequence.length)))
|
// docIdSequence, Arrays.copyOfRange(scratch, 0, docIdSequence.length)))
|
||||||
@ -175,16 +181,27 @@ public class DocIdEncodingBenchmark {
|
|||||||
|
|
||||||
class SingletonFactory {
|
class SingletonFactory {
|
||||||
|
|
||||||
static final Map<String, DocIdEncoder> ENCODER_NAME_TO_INSTANCE_MAPPING =
|
static final Map<String, DocIdEncoder> ENCODER_NAME_TO_INSTANCE_MAPPING = new HashMap<>();
|
||||||
Map.of(
|
|
||||||
Bit24Encoder.class.getSimpleName().toLowerCase(Locale.ROOT),
|
static {
|
||||||
new Bit24Encoder(),
|
Class<?>[] allImplementations = DocIdEncoder.class.getDeclaredClasses();
|
||||||
Bit21With2StepsEncoder.class.getSimpleName().toLowerCase(Locale.ROOT),
|
for (Class<?> clazz : allImplementations) {
|
||||||
new Bit21With2StepsEncoder(),
|
boolean isADocIdEncoder =
|
||||||
Bit21With3StepsEncoder.class.getSimpleName().toLowerCase(Locale.ROOT),
|
Arrays.asList(clazz.getInterfaces()).contains(DocIdEncoder.class);
|
||||||
new Bit21With3StepsEncoder(),
|
if (isADocIdEncoder) {
|
||||||
Bit32Encoder.class.getSimpleName().toLowerCase(Locale.ROOT),
|
try {
|
||||||
new Bit32Encoder());
|
ENCODER_NAME_TO_INSTANCE_MAPPING.put(
|
||||||
|
clazz.getSimpleName().toLowerCase(Locale.ROOT),
|
||||||
|
(DocIdEncoder) clazz.getConstructor().newInstance());
|
||||||
|
} catch (InstantiationException
|
||||||
|
| IllegalAccessException
|
||||||
|
| InvocationTargetException
|
||||||
|
| NoSuchMethodException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static DocIdEncoder fromName(String encoderName) {
|
public static DocIdEncoder fromName(String encoderName) {
|
||||||
String parsedEncoderName = encoderName.trim().toLowerCase(Locale.ROOT);
|
String parsedEncoderName = encoderName.trim().toLowerCase(Locale.ROOT);
|
||||||
@ -195,9 +212,8 @@ public class DocIdEncodingBenchmark {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
static class Bit24Encoder implements DocIdEncoder {
|
class Bit24Encoder implements DocIdEncoder {
|
||||||
@Override
|
@Override
|
||||||
public void encode(IndexOutput out, int start, int count, int[] docIds) throws IOException {
|
public void encode(IndexOutput out, int start, int count, int[] docIds) throws IOException {
|
||||||
int i;
|
int i;
|
||||||
@ -244,12 +260,13 @@ public class DocIdEncodingBenchmark {
|
|||||||
docIDs[i + 7] = (int) l3 & 0xffffff;
|
docIDs[i + 7] = (int) l3 & 0xffffff;
|
||||||
}
|
}
|
||||||
for (; i < count; ++i) {
|
for (; i < count; ++i) {
|
||||||
docIDs[i] = (Short.toUnsignedInt(in.readShort()) << 8) | Byte.toUnsignedInt(in.readByte());
|
docIDs[i] =
|
||||||
|
(Short.toUnsignedInt(in.readShort()) << 8) | Byte.toUnsignedInt(in.readByte());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class Bit21With2StepsEncoder implements DocIdEncoder {
|
class Bit21With2StepsEncoder implements DocIdEncoder {
|
||||||
@Override
|
@Override
|
||||||
public void encode(IndexOutput out, int start, int count, int[] docIds) throws IOException {
|
public void encode(IndexOutput out, int start, int count, int[] docIds) throws IOException {
|
||||||
int i = 0;
|
int i = 0;
|
||||||
@ -284,7 +301,7 @@ public class DocIdEncodingBenchmark {
|
|||||||
* Variation of @{@link Bit21With2StepsEncoder} but uses 3 loops to decode the array of DocIds.
|
* Variation of @{@link Bit21With2StepsEncoder} but uses 3 loops to decode the array of DocIds.
|
||||||
* Comparatively better than @{@link Bit21With2StepsEncoder} on aarch64 with JDK 22
|
* Comparatively better than @{@link Bit21With2StepsEncoder} on aarch64 with JDK 22
|
||||||
*/
|
*/
|
||||||
static class Bit21With3StepsEncoder implements DocIdEncoder {
|
class Bit21With3StepsEncoder implements DocIdEncoder {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void encode(IndexOutput out, int start, int count, int[] docIds) throws IOException {
|
public void encode(IndexOutput out, int start, int count, int[] docIds) throws IOException {
|
||||||
@ -347,7 +364,7 @@ public class DocIdEncodingBenchmark {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class Bit32Encoder implements DocIdEncoder {
|
class Bit32Encoder implements DocIdEncoder {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void encode(IndexOutput out, int start, int count, int[] docIds) throws IOException {
|
public void encode(IndexOutput out, int start, int count, int[] docIds) throws IOException {
|
||||||
@ -363,41 +380,81 @@ public class DocIdEncodingBenchmark {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
interface DocIdProvider {
|
||||||
|
/**
|
||||||
|
* We want to load all the docId sequences completely in memory to avoid including the time
|
||||||
|
* spent in fetching from disk. <br>
|
||||||
|
*
|
||||||
|
* @return: All the docId sequences or empty list.
|
||||||
|
*/
|
||||||
|
List<int[]> getDocIds(Object... args);
|
||||||
|
}
|
||||||
|
|
||||||
|
static class DocIdsFromLocalFS implements DocIdProvider {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<int[]> getDocIds(Object... args) {
|
||||||
|
List<int[]> docIds = new ArrayList<>();
|
||||||
|
InputStream fileContents = (InputStream) args[0];
|
||||||
|
try (Scanner fileReader = new Scanner(fileContents, Charset.defaultCharset())) {
|
||||||
|
while (fileReader.hasNextLine()) {
|
||||||
|
String sequence = fileReader.nextLine().trim();
|
||||||
|
if (!sequence.startsWith("#") && !sequence.isEmpty()) {
|
||||||
|
docIds.add(
|
||||||
|
Arrays.stream(sequence.split(","))
|
||||||
|
.map(String::trim)
|
||||||
|
.mapToInt(Integer::parseInt)
|
||||||
|
.toArray());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return docIds;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static void parseInput() {
|
private static void parseInput() {
|
||||||
|
|
||||||
String inputScaleFactor = System.getProperty("docIdEncoding.inputScaleFactor");
|
String inputScaleFactor = System.getProperty("docIdEncoding.inputScaleFactor");
|
||||||
|
|
||||||
if (inputScaleFactor != null) {
|
if (inputScaleFactor != null && !inputScaleFactor.isEmpty()) {
|
||||||
INPUT_SCALE_FACTOR = Integer.parseInt(inputScaleFactor);
|
INPUT_SCALE_FACTOR = Integer.parseInt(inputScaleFactor);
|
||||||
} else {
|
} else {
|
||||||
INPUT_SCALE_FACTOR = 2_00_000;
|
INPUT_SCALE_FACTOR = 2_00_000;
|
||||||
}
|
}
|
||||||
|
|
||||||
String inputFilePath = System.getProperty("docIdEncoding.inputFile");
|
String docProviderFQDN = System.getProperty("docIdEncoding.docIdProviderFQDN");
|
||||||
Scanner fileReader = null;
|
|
||||||
|
DocIdProvider docIdProvider = new DocIdsFromLocalFS();
|
||||||
|
|
||||||
|
if (docProviderFQDN != null && !docProviderFQDN.isEmpty()) {
|
||||||
try {
|
try {
|
||||||
if (inputFilePath != null) {
|
docIdProvider =
|
||||||
fileReader = new Scanner(Paths.get(inputFilePath), Charset.defaultCharset());
|
(DocIdProvider) Class.forName(docProviderFQDN).getConstructor().newInstance();
|
||||||
} else {
|
} catch (InstantiationException
|
||||||
fileReader =
|
| IllegalAccessException
|
||||||
new Scanner(
|
| InvocationTargetException
|
||||||
Objects.requireNonNull(
|
| NoSuchMethodException
|
||||||
DocIdEncodingBenchmark.class.getResourceAsStream(
|
| ClassNotFoundException e) {
|
||||||
"/org.apache.lucene.benchmark.jmh/docIds_bpv21.txt")),
|
throw new RuntimeException(e);
|
||||||
Charset.defaultCharset());
|
}
|
||||||
}
|
}
|
||||||
while (fileReader.hasNextLine()) {
|
|
||||||
String sequence = fileReader.nextLine().trim();
|
if (docIdProvider instanceof DocIdsFromLocalFS) {
|
||||||
if (!sequence.startsWith("#") && !sequence.isEmpty()) {
|
String inputFilePath = System.getProperty("docIdEncoding.inputFile");
|
||||||
DOC_ID_SEQUENCES.add(
|
try {
|
||||||
Arrays.stream(sequence.split(",")).map(String::trim).mapToInt(Integer::parseInt).toArray());
|
|
||||||
}
|
if (inputFilePath != null && !inputFilePath.isEmpty()) {
|
||||||
}
|
DOC_ID_SEQUENCES = docIdProvider.getDocIds(new FileInputStream(inputFilePath));
|
||||||
} catch (IOException e) {
|
} else {
|
||||||
|
DOC_ID_SEQUENCES =
|
||||||
|
docIdProvider.getDocIds(
|
||||||
|
DocIdEncodingBenchmark.class.getResourceAsStream(
|
||||||
|
"/org.apache.lucene.benchmark.jmh/docIds_bpv21.txt"));
|
||||||
|
}
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
} finally {
|
|
||||||
if (fileReader != null) {
|
|
||||||
fileReader.close();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user