Made loading docIdSequences parallel to reduce benchmark time

This commit is contained in:
expani 2024-10-20 18:13:22 +05:30
parent b2c45e53b8
commit ecf53d921d
1 changed files with 20 additions and 37 deletions

View File

@ -17,14 +17,9 @@
package org.apache.lucene.benchmark.jmh; package org.apache.lucene.benchmark.jmh;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.InvocationTargetException; import java.lang.reflect.InvocationTargetException;
import java.nio.charset.Charset;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
@ -32,9 +27,9 @@ import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.Random; import java.util.Random;
import java.util.Scanner;
import java.util.Set; import java.util.Set;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.stream.Stream;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IOContext;
@ -67,9 +62,6 @@ public class DocIdEncodingBenchmark {
private static List<int[]> DOC_ID_SEQUENCES = new ArrayList<>(); private static List<int[]> DOC_ID_SEQUENCES = new ArrayList<>();
private static final DateTimeFormatter DATE_TIME_FORMATTER =
DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
private static int INPUT_SCALE_FACTOR; private static int INPUT_SCALE_FACTOR;
static { static {
@ -477,23 +469,21 @@ public class DocIdEncodingBenchmark {
@Override @Override
public List<int[]> getDocIds(Object... args) { public List<int[]> getDocIds(Object... args) {
List<int[]> docIds = new ArrayList<>(); try (Stream<String> lines = Files.lines(Path.of((String) args[0]))) {
InputStream fileContents = (InputStream) args[0]; return lines
try (Scanner fileReader = new Scanner(fileContents, Charset.defaultCharset())) { .parallel()
while (fileReader.hasNextLine()) { .filter(x -> !x.trim().startsWith("#"))
String sequence = fileReader.nextLine().trim(); .map(
if (!sequence.startsWith("#") && !sequence.isEmpty()) { x ->
docIds.add( Arrays.stream(x.split(","))
Arrays.stream(sequence.split(",")) .mapToInt((y -> Integer.parseInt(y.trim())))
.map(String::trim) .toArray())
.mapToInt(Integer::parseInt) .toList();
.toArray()); } catch (IOException e) {
throw new RuntimeException(e);
} }
} }
} }
return docIds;
}
}
static class FixedBPVRandomDocIdProvider implements DocIdEncodingBenchmark.DocIdProvider { static class FixedBPVRandomDocIdProvider implements DocIdEncodingBenchmark.DocIdProvider {
@ -532,20 +522,13 @@ public class DocIdEncodingBenchmark {
INPUT_SCALE_FACTOR = 10; INPUT_SCALE_FACTOR = 10;
} }
try {
String inputFilePath = System.getProperty("docIdEncoding.inputFile"); String inputFilePath = System.getProperty("docIdEncoding.inputFile");
if (inputFilePath != null && !inputFilePath.isEmpty()) { if (inputFilePath != null && !inputFilePath.isEmpty()) {
DOC_ID_SEQUENCES = DOC_ID_SEQUENCES = new DocIdsFromLocalFS().getDocIds(inputFilePath);
new DocIdsFromLocalFS()
.getDocIds(Files.newInputStream(Path.of(inputFilePath), StandardOpenOption.READ));
} else { } else {
DOC_ID_SEQUENCES = DOC_ID_SEQUENCES =
new FixedBPVRandomDocIdProvider() new FixedBPVRandomDocIdProvider()
.getDocIds(DocIdEncoder.Bit21With3StepsEncoder.class, 100, 100, 512); .getDocIds(DocIdEncoder.Bit21With3StepsEncoder.class, 100, 100, 512);
} }
} catch (IOException e) {
throw new RuntimeException(e);
} }
}
} }