Made loading docIdSequences parallel to reduce benchmark time

2024-10-20 18:13:22 +05:30 · 2024-10-20 18:13:22 +05:30 · ecf53d921d
parent b2c45e53b8
commit ecf53d921d
1 changed files with 20 additions and 37 deletions
--- a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DocIdEncodingBenchmark.java
+++ b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/DocIdEncodingBenchmark.java
@ -17,14 +17,9 @@
 package org.apache.lucene.benchmark.jmh;
 import java.io.IOException;
 import java.io.InputStream;
 import java.lang.reflect.InvocationTargetException;
 import java.nio.charset.Charset;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.StandardOpenOption;
 import java.time.LocalDateTime;
 import java.time.format.DateTimeFormatter;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
@ -32,9 +27,9 @@ import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Random;
 import java.util.Scanner;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
 import java.util.stream.Stream;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.IOContext;
@ -67,9 +62,6 @@ public class DocIdEncodingBenchmark {
  private static List<int[]> DOC_ID_SEQUENCES = new ArrayList<>();
  private static final DateTimeFormatter DATE_TIME_FORMATTER =
      DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
  private static int INPUT_SCALE_FACTOR;
  static {
@ -477,23 +469,21 @@ public class DocIdEncodingBenchmark {
    @Override
    public List<int[]> getDocIds(Object... args) {
-      List<int[]> docIds = new ArrayList<>();
+      try (Stream<String> lines = Files.lines(Path.of((String) args[0]))) {
-      InputStream fileContents = (InputStream) args[0];
+        return lines
-      try (Scanner fileReader = new Scanner(fileContents, Charset.defaultCharset())) {
+            .parallel()
-        while (fileReader.hasNextLine()) {
+            .filter(x -> !x.trim().startsWith("#"))
-          String sequence = fileReader.nextLine().trim();
+            .map(
-          if (!sequence.startsWith("#") && !sequence.isEmpty()) {
+                x ->
-            docIds.add(
+                    Arrays.stream(x.split(","))
-                Arrays.stream(sequence.split(","))
+                        .mapToInt((y -> Integer.parseInt(y.trim())))
-                    .map(String::trim)
+                        .toArray())
-                    .mapToInt(Integer::parseInt)
+            .toList();
-                    .toArray());
+      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }
  }
      return docIds;
    }
  }
  static class FixedBPVRandomDocIdProvider implements DocIdEncodingBenchmark.DocIdProvider {
@ -532,20 +522,13 @@ public class DocIdEncodingBenchmark {
      INPUT_SCALE_FACTOR = 10;
    }
    try {
    String inputFilePath = System.getProperty("docIdEncoding.inputFile");
    if (inputFilePath != null && !inputFilePath.isEmpty()) {
-        DOC_ID_SEQUENCES =
+      DOC_ID_SEQUENCES = new DocIdsFromLocalFS().getDocIds(inputFilePath);
            new DocIdsFromLocalFS()
                .getDocIds(Files.newInputStream(Path.of(inputFilePath), StandardOpenOption.READ));
    } else {
      DOC_ID_SEQUENCES =
          new FixedBPVRandomDocIdProvider()
              .getDocIds(DocIdEncoder.Bit21With3StepsEncoder.class, 100, 100, 512);
    }
    } catch (IOException e) {
      throw new RuntimeException(e);
  }
  }
 }