Added JMH micro-benchmarks submodule (#12663)

2023-10-12 20:25:34 +02:00 · 2023-10-12 20:25:34 +02:00 · 603cd41ca1
parent 52dfe50e8f
commit 603cd41ca1
13 changed files with 506 additions and 6 deletions
--- a/gradle/help.gradle
+++ b/gradle/help.gradle
@ -27,6 +27,7 @@ configure(rootProject) {
      ["ForbiddenApis", "help/forbiddenApis.txt", "How to add/apply rules for forbidden APIs."],
      ["LocalSettings", "help/localSettings.txt", "Local settings, overrides and build performance tweaks."],
      ["Regeneration", "help/regeneration.txt", "How to refresh generated and derived resources."],
+      ["Jmh", "lucene/benchmark-jmh/README.txt", "JMH micro-benchmarks."],
      ["Git", "help/git.txt", "Git assistance and guides."],
      ["IDEs", "help/IDEs.txt", "IDE support."],
      ["Publishing", "help/publishing.txt", "Maven and other artifact publishing, signing, etc."],
--- a/gradle/java/javac.gradle
+++ b/gradle/java/javac.gradle
@ -17,7 +17,7 @@

 // Configure Java project defaults.

-allprojects {
+allprojects { project ->
  plugins.withType(JavaPlugin) {
    sourceCompatibility = rootProject.minJavaVersion
    targetCompatibility = rootProject.minJavaVersion
@ -69,12 +69,20 @@ allprojects {
        "-Xlint:preview",
        "-Xdoclint:all/protected",
        "-Xdoclint:-missing",
-        "-Xdoclint:-accessibility",
-        "-proc:none",  // proc:none was added because of LOG4J2-1925 / JDK-8186647
+        "-Xdoclint:-accessibility"
      ]

-      if (propertyOrDefault("javac.failOnWarnings", true).toBoolean()) {
-        options.compilerArgs += "-Werror"
+      if (project.path == ":lucene:benchmark-jmh") {
+        // JMH benchmarks use JMH preprocessor and incubating modules.
+      } else {
+        // proc:none was added because of LOG4J2-1925 / JDK-8186647
+        options.compilerArgs += [
+            "-proc:none"
+        ]
+
+        if (propertyOrDefault("javac.failOnWarnings", true).toBoolean()) {
+          options.compilerArgs += "-Werror"
+        }
      }
    }
  }
--- a/gradle/maven/publications.gradle
+++ b/gradle/maven/publications.gradle
@ -40,7 +40,9 @@ configure(rootProject) {
          // Exclude the native module.
          ":lucene:misc:native",
          // Exclude test fixtures.
-          ":lucene:spatial-test-fixtures"
+          ":lucene:spatial-test-fixtures",
+          // Exclude JMH benchmarks.
+          ":lucene:benchmarks-jmh",
      ]

      // Exclude all subprojects that are modular test projects and those explicitly
--- a/help/jmh.txt
+++ b/help/jmh.txt
--- a/lucene/benchmark-jmh/README.txt
+++ b/lucene/benchmark-jmh/README.txt
@ -0,0 +1,21 @@
+The :lucene:benchmark-jmh module contains can be used to compile
+and execute JMH (https://github.com/openjdk/jmh) micro-benchmarks.
+
+Look at existing classes and JMH documentation for inspiration on how
+to write good micro-benchmarks.
+
+To compile the project and prepare JMH launcher, run:
+
+gradlew :lucene:benchmark-jmh:assemble
+
+The above target will display exact commands to execute JMH from
+command line, for example:
+
+java --module-path lucene\benchmark-jmh\build\benchmarks --module org.apache.lucene.benchmark.jmh
+
+You can pass any JMH options to the above command, for example:
+
+  -h      displays verbose help for all options
+  -l      list available benchmarks
+  -lp     list benchmarks that pass the filter and their parameters
+  regexp  execute all benchmark containing regexp
--- a/lucene/benchmark-jmh/build.gradle
+++ b/lucene/benchmark-jmh/build.gradle
@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+plugins {
+  id "java-library"
+}
+
+description = 'Lucene JMH micro-benchmarking module'
+
+dependencies {
+  moduleImplementation project(':lucene:core')
+
+  moduleImplementation "org.openjdk.jmh:jmh-core:1.37"
+  annotationProcessor "org.openjdk.jmh:jmh-generator-annprocess:1.37"
+}
+
+
+// Exclude JMH-generated files and certain classes that require incubating classes
+// from forbiddenapis validation.
+tasks.matching { it.name == "forbiddenApisMain" }.configureEach {
+  failOnMissingClasses = false
+  patternSet.exclude([
+      "**/jmh_generated/**"
+  ])
+}
+
+
+// Skip certain infrastructure tasks that we can't use or don't care about.
+tasks.matching { it.name in [
+    // Turn off JMH dependency checksums and licensing (it's GPL w/ classpath exception
+    // but this seems fine for test/build only tools).
+    "validateJarChecksums", "validateJarLicenses",
+    // No special javadocs for JMH benchmarks.
+    "renderSiteJavadoc",
+    "renderJavadoc",
+]}.configureEach {
+  it.enabled = false
+}
+
+
+// Assemble benchmark JAR and its dependencies here.
+File dependenciesDir = project.layout.buildDirectory.dir("benchmarks").get().asFile
+def syncDependencies = tasks.register("copyDependencies", Sync, {
+  from configurations.runtimeClasspath
+  from jar
+
+  into dependenciesDir
+})
+
+// Module entry point. For some reason can't be a package from outside the module
+// so I wrote a simple redirecting delegate.
+tasks.compileJava.configure {
+  options.javaModuleMainClass.set("org.apache.lucene.benchmark.jmh.Main")
+}
+
+jar.configure { Jar jarTask ->
+  dependsOn configurations.runtimeClasspath
+
+  manifest {
+    attributes([
+        "Main-Class": "org.apache.lucene.benchmark.jmh.Main",
+        "Class-Path": new Object () {
+          @Override
+          String toString() {
+            return configurations.runtimeClasspath.collect { f -> f.name }.join(" ")
+          }
+        }
+    ])
+  }
+}
+
+assemble {
+  dependsOn syncDependencies
+
+  doLast {
+    logger.lifecycle("""
+JMH benchmarks compiled. Run them with:
+
+java -jar ${rootDir.toPath().relativize(dependenciesDir.toPath().resolve(jar.archiveFile.get().asFile.name))}
+
+or
+
+java --module-path ${rootDir.toPath().relativize(dependenciesDir.toPath())} --module org.apache.lucene.benchmark.jmh
+
+JMH options you can use with the above:
+
+  -h      displays verbose help for all options 
+  -l      list available benchmarks
+  -lp     list benchmarks that pass the filter and their parameters
+  regexp  execute all benchmark containing regexp
+""")
+  }
+}
--- a/lucene/benchmark-jmh/src/java/module-info.java
+++ b/lucene/benchmark-jmh/src/java/module-info.java
@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Lucene JMH benchmarks. */
+module org.apache.lucene.benchmark.jmh {
+  requires jmh.core;
+  requires jdk.incubator.vector;
+  requires jdk.unsupported;
+  requires org.apache.lucene.core;
+
+  exports org.apache.lucene.benchmark.jmh;
+  exports org.apache.lucene.benchmark.jmh.jmh_generated;
+}
--- a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/BinaryCosineBenchmark.java
+++ b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/BinaryCosineBenchmark.java
@ -0,0 +1,282 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.jmh;
+
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.TimeUnit;
+import jdk.incubator.vector.ByteVector;
+import jdk.incubator.vector.IntVector;
+import jdk.incubator.vector.ShortVector;
+import jdk.incubator.vector.Vector;
+import jdk.incubator.vector.VectorOperators;
+import jdk.incubator.vector.VectorShape;
+import jdk.incubator.vector.VectorSpecies;
+import org.apache.lucene.util.VectorUtil;
+import org.openjdk.jmh.annotations.*;
+
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.MICROSECONDS)
+@State(Scope.Benchmark)
+@Warmup(iterations = 3, time = 3)
+@Measurement(iterations = 5, time = 3)
+@Fork(
+    value = 1,
+    jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
+public class BinaryCosineBenchmark {
+
+  private byte[] a;
+  private byte[] b;
+
+  @Param({"1", "128", "207", "256", "300", "512", "702", "1024"})
+  // @Param({"1", "4", "6", "8", "13", "16", "25", "32", "64", "100" })
+  // @Param({"1024"})
+  // @Param({"16", "32", "64"})
+  int size;
+
+  @Setup(Level.Trial)
+  public void init() {
+    a = new byte[size];
+    b = new byte[size];
+    ThreadLocalRandom.current().nextBytes(a);
+    ThreadLocalRandom.current().nextBytes(b);
+    if (cosineDistanceNew() != cosineDistanceOld()) {
+      throw new RuntimeException("New is wrong");
+    }
+  }
+
+  static final VectorSpecies<Byte> PREFERRED_BYTE_SPECIES;
+  static final VectorSpecies<Short> PREFERRED_SHORT_SPECIES;
+
+  static {
+    if (IntVector.SPECIES_PREFERRED.vectorBitSize() >= 256) {
+      PREFERRED_BYTE_SPECIES =
+          ByteVector.SPECIES_MAX.withShape(
+              VectorShape.forBitSize(IntVector.SPECIES_PREFERRED.vectorBitSize() >> 2));
+      PREFERRED_SHORT_SPECIES =
+          ShortVector.SPECIES_MAX.withShape(
+              VectorShape.forBitSize(IntVector.SPECIES_PREFERRED.vectorBitSize() >> 1));
+    } else {
+      PREFERRED_BYTE_SPECIES = null;
+      PREFERRED_SHORT_SPECIES = null;
+    }
+  }
+
+  private static final boolean IS_AMD64_WITHOUT_AVX2 =
+      System.getProperty("os.arch").equals("amd64")
+          && IntVector.SPECIES_PREFERRED.vectorBitSize() < 256;
+
+  @Benchmark
+  public float cosineDistanceVectorUtil() {
+    return VectorUtil.cosine(a, b);
+  }
+
+  @Benchmark
+  public float cosineDistanceNewNew() {
+    int i = 0;
+    int sum = 0;
+    int norm1 = 0;
+    int norm2 = 0;
+    final int vectorSize = IntVector.SPECIES_PREFERRED.vectorBitSize();
+    // only vectorize if we'll at least enter the loop a single time, and we have at least 128-bit
+    // vectors
+    if (a.length >= 16 && vectorSize >= 128 && IS_AMD64_WITHOUT_AVX2 == false) {
+      if (vectorSize >= 256) {
+        // optimized 256/512 bit implementation, processes 8/16 bytes at a time
+        int upperBound = PREFERRED_BYTE_SPECIES.loopBound(a.length);
+        IntVector accSum = IntVector.zero(IntVector.SPECIES_PREFERRED);
+        IntVector accNorm1 = IntVector.zero(IntVector.SPECIES_PREFERRED);
+        IntVector accNorm2 = IntVector.zero(IntVector.SPECIES_PREFERRED);
+        for (; i < upperBound; i += PREFERRED_BYTE_SPECIES.length()) {
+          ByteVector va8 = ByteVector.fromArray(PREFERRED_BYTE_SPECIES, a, i);
+          ByteVector vb8 = ByteVector.fromArray(PREFERRED_BYTE_SPECIES, b, i);
+          Vector<Short> va16 = va8.convertShape(VectorOperators.B2S, PREFERRED_SHORT_SPECIES, 0);
+          Vector<Short> vb16 = vb8.convertShape(VectorOperators.B2S, PREFERRED_SHORT_SPECIES, 0);
+          Vector<Short> prod16 = va16.mul(vb16);
+          Vector<Short> norm1_16 = va16.mul(va16);
+          Vector<Short> norm2_16 = vb16.mul(vb16);
+          Vector<Integer> prod32 =
+              prod16.convertShape(VectorOperators.S2I, IntVector.SPECIES_PREFERRED, 0);
+          Vector<Integer> norm1_32 =
+              norm1_16.convertShape(VectorOperators.S2I, IntVector.SPECIES_PREFERRED, 0);
+          Vector<Integer> norm2_32 =
+              norm2_16.convertShape(VectorOperators.S2I, IntVector.SPECIES_PREFERRED, 0);
+          accSum = accSum.add(prod32);
+          accNorm1 = accNorm1.add(norm1_32);
+          accNorm2 = accNorm2.add(norm2_32);
+        }
+        // reduce
+        sum += accSum.reduceLanes(VectorOperators.ADD);
+        norm1 += accNorm1.reduceLanes(VectorOperators.ADD);
+        norm2 += accNorm2.reduceLanes(VectorOperators.ADD);
+      } else {
+        // 128-bit impl, which is tricky since we don't have SPECIES_32, it does "overlapping read"
+        int upperBound = ByteVector.SPECIES_64.loopBound(a.length - ByteVector.SPECIES_64.length());
+        IntVector accSum = IntVector.zero(IntVector.SPECIES_128);
+        IntVector accNorm1 = IntVector.zero(IntVector.SPECIES_128);
+        IntVector accNorm2 = IntVector.zero(IntVector.SPECIES_128);
+        for (; i < upperBound; i += ByteVector.SPECIES_64.length() >> 1) {
+          ByteVector va8 = ByteVector.fromArray(ByteVector.SPECIES_64, a, i);
+          ByteVector vb8 = ByteVector.fromArray(ByteVector.SPECIES_64, b, i);
+
+          // process first half only
+          Vector<Short> va16 = va8.convert(VectorOperators.B2S, 0);
+          Vector<Short> vb16 = vb8.convert(VectorOperators.B2S, 0);
+          Vector<Short> norm1_16 = va16.mul(va16);
+          Vector<Short> norm2_16 = vb16.mul(vb16);
+          Vector<Short> prod16 = va16.mul(vb16);
+
+          // sum into accumulators
+          accNorm1 =
+              accNorm1.add(norm1_16.convertShape(VectorOperators.S2I, IntVector.SPECIES_128, 0));
+          accNorm2 =
+              accNorm2.add(norm2_16.convertShape(VectorOperators.S2I, IntVector.SPECIES_128, 0));
+          accSum = accSum.add(prod16.convertShape(VectorOperators.S2I, IntVector.SPECIES_128, 0));
+        }
+        // reduce
+        sum += accSum.reduceLanes(VectorOperators.ADD);
+        norm1 += accNorm1.reduceLanes(VectorOperators.ADD);
+        norm2 += accNorm2.reduceLanes(VectorOperators.ADD);
+      }
+    }
+
+    for (; i < a.length; i++) {
+      byte elem1 = a[i];
+      byte elem2 = b[i];
+      sum += elem1 * elem2;
+      norm1 += elem1 * elem1;
+      norm2 += elem2 * elem2;
+    }
+    return (float) (sum / Math.sqrt((double) norm1 * (double) norm2));
+  }
+
+  @Benchmark
+  public float cosineDistanceNew() {
+    int i = 0;
+    int sum = 0;
+    int norm1 = 0;
+    int norm2 = 0;
+    final int vectorSize = IntVector.SPECIES_PREFERRED.vectorBitSize();
+    // only vectorize if we'll at least enter the loop a single time, and we have at least 128-bit
+    // vectors
+    if (a.length >= 16 && vectorSize >= 128 && IS_AMD64_WITHOUT_AVX2 == false) {
+      // acts like:
+      // int sum = 0;
+      // for (...) {
+      //   short difference = (short) (x[i] - y[i]);
+      //   sum += (int) difference * (int) difference;
+      // }
+      if (vectorSize >= 256) {
+        // optimized 256/512 bit implementation, processes 8/16 bytes at a time
+        int upperBound = PREFERRED_BYTE_SPECIES.loopBound(a.length);
+        IntVector accSum = IntVector.zero(IntVector.SPECIES_PREFERRED);
+        IntVector accNorm1 = IntVector.zero(IntVector.SPECIES_PREFERRED);
+        IntVector accNorm2 = IntVector.zero(IntVector.SPECIES_PREFERRED);
+        for (; i < upperBound; i += PREFERRED_BYTE_SPECIES.length()) {
+          ByteVector va8 = ByteVector.fromArray(PREFERRED_BYTE_SPECIES, a, i);
+          ByteVector vb8 = ByteVector.fromArray(PREFERRED_BYTE_SPECIES, b, i);
+          Vector<Short> va16 = va8.convertShape(VectorOperators.B2S, PREFERRED_SHORT_SPECIES, 0);
+          Vector<Short> vb16 = vb8.convertShape(VectorOperators.B2S, PREFERRED_SHORT_SPECIES, 0);
+          Vector<Short> prod16 = va16.mul(vb16);
+          Vector<Short> norm1_16 = va16.mul(va16);
+          Vector<Short> norm2_16 = vb16.mul(vb16);
+          Vector<Integer> prod32 =
+              prod16.convertShape(VectorOperators.S2I, IntVector.SPECIES_PREFERRED, 0);
+          Vector<Integer> norm1_32 =
+              norm1_16.convertShape(VectorOperators.S2I, IntVector.SPECIES_PREFERRED, 0);
+          Vector<Integer> norm2_32 =
+              norm2_16.convertShape(VectorOperators.S2I, IntVector.SPECIES_PREFERRED, 0);
+          accSum = accSum.add(prod32);
+          accNorm1 = accNorm1.add(norm1_32);
+          accNorm2 = accNorm2.add(norm2_32);
+        }
+        // reduce
+        sum += accSum.reduceLanes(VectorOperators.ADD);
+        norm1 += accNorm1.reduceLanes(VectorOperators.ADD);
+        norm2 += accNorm2.reduceLanes(VectorOperators.ADD);
+      } else {
+        // 128-bit implementation, which must "split up" vectors due to widening conversions
+        int upperBound = ByteVector.SPECIES_64.loopBound(a.length);
+        IntVector accSum1 = IntVector.zero(IntVector.SPECIES_128);
+        IntVector accSum2 = IntVector.zero(IntVector.SPECIES_128);
+        IntVector accNorm1_1 = IntVector.zero(IntVector.SPECIES_128);
+        IntVector accNorm1_2 = IntVector.zero(IntVector.SPECIES_128);
+        IntVector accNorm2_1 = IntVector.zero(IntVector.SPECIES_128);
+        IntVector accNorm2_2 = IntVector.zero(IntVector.SPECIES_128);
+        for (; i < upperBound; i += ByteVector.SPECIES_64.length()) {
+          ByteVector va8 = ByteVector.fromArray(ByteVector.SPECIES_64, a, i);
+          ByteVector vb8 = ByteVector.fromArray(ByteVector.SPECIES_64, b, i);
+          // expand each byte vector into short vector and perform multiplications
+          Vector<Short> va16 = va8.convertShape(VectorOperators.B2S, ShortVector.SPECIES_128, 0);
+          Vector<Short> vb16 = vb8.convertShape(VectorOperators.B2S, ShortVector.SPECIES_128, 0);
+          Vector<Short> prod16 = va16.mul(vb16);
+          Vector<Short> norm1_16 = va16.mul(va16);
+          Vector<Short> norm2_16 = vb16.mul(vb16);
+          // split each short vector into two int vectors and add
+          Vector<Integer> prod32_1 =
+              prod16.convertShape(VectorOperators.S2I, IntVector.SPECIES_128, 0);
+          Vector<Integer> prod32_2 =
+              prod16.convertShape(VectorOperators.S2I, IntVector.SPECIES_128, 1);
+          Vector<Integer> norm1_32_1 =
+              norm1_16.convertShape(VectorOperators.S2I, IntVector.SPECIES_128, 0);
+          Vector<Integer> norm1_32_2 =
+              norm1_16.convertShape(VectorOperators.S2I, IntVector.SPECIES_128, 1);
+          Vector<Integer> norm2_32_1 =
+              norm2_16.convertShape(VectorOperators.S2I, IntVector.SPECIES_128, 0);
+          Vector<Integer> norm2_32_2 =
+              norm2_16.convertShape(VectorOperators.S2I, IntVector.SPECIES_128, 1);
+          accSum1 = accSum1.add(prod32_1);
+          accSum2 = accSum2.add(prod32_2);
+          accNorm1_1 = accNorm1_1.add(norm1_32_1);
+          accNorm1_2 = accNorm1_2.add(norm1_32_2);
+          accNorm2_1 = accNorm2_1.add(norm2_32_1);
+          accNorm2_2 = accNorm2_2.add(norm2_32_2);
+        }
+        // reduce
+        sum += accSum1.add(accSum2).reduceLanes(VectorOperators.ADD);
+        norm1 += accNorm1_1.add(accNorm1_2).reduceLanes(VectorOperators.ADD);
+        norm2 += accNorm2_1.add(accNorm2_2).reduceLanes(VectorOperators.ADD);
+      }
+    }
+
+    for (; i < a.length; i++) {
+      byte elem1 = a[i];
+      byte elem2 = b[i];
+      sum += elem1 * elem2;
+      norm1 += elem1 * elem1;
+      norm2 += elem2 * elem2;
+    }
+    return (float) (sum / Math.sqrt((double) norm1 * (double) norm2));
+  }
+
+  /** Returns the cosine similarity between the two vectors. */
+  @Benchmark
+  public float cosineDistanceOld() {
+    // Note: this will not overflow if dim < 2^18, since max(byte * byte) = 2^14.
+    int sum = 0;
+    int norm1 = 0;
+    int norm2 = 0;
+
+    for (int i = 0; i < a.length; i++) {
+      byte elem1 = a[i];
+      byte elem2 = b[i];
+      sum += elem1 * elem2;
+      norm1 += elem1 * elem1;
+      norm2 += elem2 * elem2;
+    }
+    return (float) (sum / Math.sqrt((double) norm1 * (double) norm2));
+  }
+}
--- a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/Main.java
+++ b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/Main.java
@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.jmh;
+
+/**
+ * Just redirect to JMH so that the package is within the initially launched module, otherwise
+ * {@code --module xyz} does not work.
+ */
+public class Main {
+  public static void main(String[] args) throws Exception {
+    org.openjdk.jmh.Main.main(args);
+  }
+}
--- a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/jmh_generated/DummyExport.java
+++ b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/jmh_generated/DummyExport.java
@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.jmh.jmh_generated;
+
+/** Just to keep the module descriptor happy (generated classes must be exported). */
+public class DummyExport {}
--- a/lucene/distribution.tests/src/test/org/apache/lucene/distribution/TestModularLayer.java
+++ b/lucene/distribution.tests/src/test/org/apache/lucene/distribution/TestModularLayer.java
@ -128,6 +128,7 @@ public class TestModularLayer extends AbstractLuceneDistributionTest {
            "org.apache.lucene.analysis.stempel",
            "org.apache.lucene.backward_codecs",
            "org.apache.lucene.benchmark",
+            "org.apache.lucene.benchmark.jmh",
            "org.apache.lucene.classification",
            "org.apache.lucene.codecs",
            "org.apache.lucene.core",
--- a/settings.gradle
+++ b/settings.gradle
@ -43,6 +43,7 @@ include "lucene:analysis:stempel"
 include "lucene:analysis.tests"
 include "lucene:backward-codecs"
 include "lucene:benchmark"
+include "lucene:benchmark-jmh"
 include "lucene:classification"
 include "lucene:codecs"
 include "lucene:core"
--- a/versions.lock
+++ b/versions.lock
@ -5,15 +5,18 @@ com.ibm.icu:icu4j:70.1 (1 constraints: dc040a31)
 commons-codec:commons-codec:1.13 (1 constraints: d904f430)
 io.sgr:s2-geometry-library-java:1.0.0 (1 constraints: 0305f035)
 junit:junit:4.13.1 (1 constraints: 3b05453b)
+net.sf.jopt-simple:jopt-simple:5.0.4 (1 constraints: be0ad6cc)
 net.sourceforge.nekohtml:nekohtml:1.9.17 (1 constraints: 4405503b)
 org.antlr:antlr4-runtime:4.11.1 (1 constraints: 39053f3b)
 org.apache.commons:commons-compress:1.19 (1 constraints: df04fa30)
+org.apache.commons:commons-math3:3.6.1 (1 constraints: bf0adbcc)
 org.apache.opennlp:opennlp-tools:1.9.1 (1 constraints: 0d050c36)
 org.carrot2:morfologik-fsa:2.1.9 (1 constraints: db0d9c36)
 org.carrot2:morfologik-polish:2.1.9 (1 constraints: 0e050136)
 org.carrot2:morfologik-stemming:2.1.9 (2 constraints: 1312040d)
 org.hamcrest:hamcrest:2.2 (1 constraints: a8041f2c)
 org.locationtech.spatial4j:spatial4j:0.8 (1 constraints: ac041f2c)
+org.openjdk.jmh:jmh-core:1.37 (1 constraints: df04fc30)
 org.ow2.asm:asm:7.2 (3 constraints: 2717d96b)
 org.ow2.asm:asm-analysis:7.2 (1 constraints: e409d9a5)
 org.ow2.asm:asm-commons:7.2 (1 constraints: ad042e2c)